[MPlayer-dev-eng] [PATCH] zr: revised version

Sun Feb 10 22:23:41 CET 2002

Hello A'rpi,

Here is the revised cleanup patch for vo_zr.c . Summary:

-typo in the first comment of jpeg_enc.c is corrected

-jpeg_enc loses knowledge about interlacing, it is all handled by vo_zr.

-buffers are passed via jpeg_encode and not jpeg_init, to allow the
encoder to encode directly from the buffer passed to draw_frame (this
optimization is not implemented, only made possible by this architectural 
change)

-global variables have dissapeared in jpeg_enc, all info is contained in
jpeg_enc_t. This allows other parts of mplayer to also use the encoder

-definitions have moved to jpeg_enc.h

-information for the encoder is contained in an MpegEncContext to allow
jpeg_enc to call functions from libavcodec directly (they almost all want
a pointer to an MpegEncContext). A lot of cruft (Huffman-tables,
quantization-tables, jpeg write functions, quantizer) has been removed
from jpeg_enc.c . 

Further improvements will follow.

The patch is tested and it applies cleanly to the current CVS tree and it
compiles fine (also with --disable-mmx). Yes, I know the patch is big, but
it is the result of a rewrite of jpeg_enc.c (I _could_ split the patch in
a set of smaller patches but it would be a tedious chore, please don't
make me do it :-)).

Greetings,

Rik.

--------
Nothing is ever a total loss; it can always serve as a bad example.
-------------- next part --------------
diff -Naur main/libvo/jpeg_enc.c main.dev/libvo/jpeg_enc.c

--- main/libvo/jpeg_enc.c	Sat Jan 26 01:52:59 2002
+++ main.dev/libvo/jpeg_enc.c	Sun Feb 10 21:46:33 2002
@@ -19,11 +19,10 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
  * For an excellent introduction to the JPEG format, see:
- * http://www.ece.purdue.edu/~bourman/grad-labs/lab8/pdf/lab.pdf
+ * http://www.ece.purdue.edu/~bouman/grad-labs/lab8/pdf/lab.pdf
  */
 
 
-/* stuff from libavcodec/common.h */
 
 #include <sys/types.h>
 #include <stdio.h>
@@ -33,37 +32,34 @@
 #include "fastmemcpy.h"
 #endif
 #include "../mp_msg.h"
-#include "../libavcodec/common.h"
+#include "../libavcodec/avcodec.h"
 #include "../libavcodec/dsputil.h"
+#include "../libavcodec/mpegvideo.h"
 
+#include "jpeg_enc.h"
 
-static int height, width, fields, cheap_upsample, qscale, bw = 0, first = 1;
+/* zr_mjpeg_encode_mb needs access to these tables for the black & white 
+ * option */
+typedef struct MJpegContext {
+    UINT8 huff_size_dc_luminance[12];
+    UINT16 huff_code_dc_luminance[12];
+    UINT8 huff_size_dc_chrominance[12];
+    UINT16 huff_code_dc_chrominance[12];
 
-/* from dsputils.c */
+    UINT8 huff_size_ac_luminance[256];
+    UINT16 huff_code_ac_luminance[256];
+    UINT8 huff_size_ac_chrominance[256];
+    UINT16 huff_code_ac_chrominance[256];
+} MJpegContext;
 
-static DCTELEM **blck;
 
-extern void (*av_fdct)(DCTELEM *b);
+/* A very important function pointer */
+extern int (*dct_quantize)(MpegEncContext *s, 
+		DCTELEM *block, int n, int qscale);
 
-static UINT8 zr_zigzag_direct[64] = {
-    0, 1, 8, 16, 9, 2, 3, 10,
-    17, 24, 32, 25, 18, 11, 4, 5,
-    12, 19, 26, 33, 40, 48, 41, 34,
-    27, 20, 13, 6, 7, 14, 21, 28,
-    35, 42, 49, 56, 57, 50, 43, 36,
-    29, 22, 15, 23, 30, 37, 44, 51,
-    58, 59, 52, 45, 38, 31, 39, 46,
-    53, 60, 61, 54, 47, 55, 62, 63
-};
-
-/* bit output */
-
-static PutBitContext pb;
 
-/* from mpegvideo.c */
-
-#define QMAT_SHIFT 25
-#define QMAT_SHIFT_MMX 19
+/* Begin excessive code duplication ************************************/
+/* Code coming from mpegvideo.c and mjpeg.c in ../libavcodec ***********/
 
 static const unsigned short aanscales[64] = {
     /* precomputed values scaled up by 14 bits */
@@ -77,528 +73,40 @@
     4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
 };
 
-
-static unsigned int simple_mmx_permutation[64]={
-	0x00, 0x08, 0x01, 0x09, 0x04, 0x0C, 0x05, 0x0D,
-	0x10, 0x18, 0x11, 0x19, 0x14, 0x1C, 0x15, 0x1D,
-	0x02, 0x0A, 0x03, 0x0B, 0x06, 0x0E, 0x07, 0x0F,
-	0x12, 0x1A, 0x13, 0x1B, 0x16, 0x1E, 0x17, 0x1F,
-	0x20, 0x28, 0x21, 0x29, 0x24, 0x2C, 0x25, 0x2D,
-	0x30, 0x38, 0x31, 0x39, 0x34, 0x3C, 0x35, 0x3D,
-	0x22, 0x2A, 0x23, 0x2B, 0x26, 0x2E, 0x27, 0x2F,
-	0x32, 0x3A, 0x33, 0x3B, 0x36, 0x3E, 0x37, 0x3F,
-};
-
-#if 0
-void block_permute(short int *block)
-{
-    int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
-    int i;
-
-    for(i=0;i<8;i++) {
-        tmp1 = block[1];
-        tmp2 = block[2];
-        tmp3 = block[3];
-        tmp4 = block[4];
-        tmp5 = block[5];
-        tmp6 = block[6];
-        block[1] = tmp2;
-        block[2] = tmp4;
-        block[3] = tmp6;
-        block[4] = tmp1;
-        block[5] = tmp3;
-        block[6] = tmp5;
-        block += 8;
-    }
-}
-#endif
-
-static int q_intra_matrix[64];
-
-static int dct_quantize(DCTELEM *block, int n,
-                        int qscale)
-{
-    int i, j, level, last_non_zero, q;
-    const int *qmat;
-
-    av_fdct (block);
-
-    /* we need this permutation so that we correct the IDCT
-       permutation. will be moved into DCT code */
-    //block_permute(block);
-
-    /*if (n < 4)
-        q = s->y_dc_scale;
-    else
-        q = s->c_dc_scale;
-    q = q << 3;*/
-    q = 64;   
-    /* note: block[0] is assumed to be positive */
-    block[0] = (block[0] + (q >> 1)) / q;
-    i = 1;
-    last_non_zero = 0;
-
-    qmat = q_intra_matrix;
-    for(;i<64;i++) {
-        j = zr_zigzag_direct[i];
-        level = block[j];
-        level = level * qmat[j];
-        /* XXX: slight error for the low range. Test should be equivalent to
-           (level <= -(1 << (QMAT_SHIFT - 3)) || level >= (1 <<
-           (QMAT_SHIFT - 3)))
-        */
-        if (((level << (31 - (QMAT_SHIFT - 3))) >> (31 - (QMAT_SHIFT - 3))) != 
-            level) {
-            level = level / (1 << (QMAT_SHIFT - 3));
-            /* XXX: currently, this code is not optimal. the range should be:
-               mpeg1: -255..255
-               mpeg2: -2048..2047
-               h263:  -128..127
-               mpeg4: -2048..2047
-            */
-            if (level > 255)
-                level = 255;
-            else if (level < -255)
-                level = -255;
-            block[j] = level;
-            last_non_zero = i;
-        } else {
-            block[j] = 0;
-        }
-	
-    }
-    return last_non_zero;
-}
-
-static int dct_quantize_mmx(DCTELEM *block, int n, int qscale)
-{
-    int i, j, level, last_non_zero, q;
-    const int *qmat;
-    DCTELEM *b = block;
-
-    /*for (i = 0; i < 8; i++) {
-	    printf("%i %i %i %i %i %i %i %i\n", b[8*i], b[8*i+1], b[8*i+2],
-			    b[8*i+3], b[8*i+4], b[8*i+5], b[8*i+6], b[8*i+7]);
-    }*/
-    av_fdct (block);
-    /*for (i = 0; i < 8; i++) {
-	    printf("%i %i %i %i %i %i %i %i\n", b[8*i], b[8*i+1], b[8*i+2],
-			    b[8*i+3], b[8*i+4], b[8*i+5], b[8*i+6], b[8*i+7]);
-    }*/
-
-
-    /* we need this permutation so that we correct the IDCT
-       permutation. will be moved into DCT code */
-    //block_permute(block);
-
-    //if (n < 2)
-        q = 8;
-    /*else
-        q = 8;*/
-    
-    /* note: block[0] is assumed to be positive */
-    block[0] = (block[0] + (q >> 1)) / q;
-    i = 1;
-    last_non_zero = 0;
-    qmat = q_intra_matrix;
-
-    for(;i<64;i++) {
-        j = zr_zigzag_direct[i];
-        level = block[j];
-        level = level * qmat[j];
-        /* XXX: slight error for the low range. Test should be equivalent to
-           (level <= -(1 << (QMAT_SHIFT_MMX - 3)) || level >= (1 <<
-           (QMAT_SHIFT_MMX - 3)))
-        */
-        if (((level << (31 - (QMAT_SHIFT_MMX - 3))) >> (31 - (QMAT_SHIFT_MMX - 3))) != 
-            level) {
-            level = level / (1 << (QMAT_SHIFT_MMX - 3));
-            /* XXX: currently, this code is not optimal. the range should be:
-               mpeg1: -255..255
-               mpeg2: -2048..2047
-               h263:  -128..127
-               mpeg4: -2048..2047
-	    *  jpeg: -1024..1023   11 bit */
-            if (level > 1023)
-                level = 1023;
-            else if (level < -1024)
-                level = -1024;
-            block[j] = level;
-            last_non_zero = i;
-        } else {
-            block[j] = 0;
-        }
-    }
-    /*for (i = 0; i < 8; i++) {
-	    printf("%i %i %i %i %i %i %i %i\n", b[8*i], b[8*i+1], b[8*i+2],
-			    b[8*i+3], b[8*i+4], b[8*i+5], b[8*i+6], b[8*i+7]);
-    }*/
-
-    return last_non_zero;
-}
-
-static void convert_matrix(int *qmat, const unsigned short *quant_matrix, 
-		int qscale)
+static void convert_matrix(int *qmat, UINT16 *qmat16, const UINT16 *quant_matrix, int qscale)
 {
     int i;
 
     if (av_fdct == jpeg_fdct_ifast) {
         for(i=0;i<64;i++) {
             /* 16 <= qscale * quant_matrix[i] <= 7905 */
-            /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */
+            /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
+            /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
+            /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
             
-            qmat[i] = (int)(((unsigned long long)1 << (QMAT_SHIFT + 11)) / 
-                            (aanscales[i] * qscale * quant_matrix[i]));
+            qmat[block_permute_op(i)] = (int)((UINT64_C(1) << (QMAT_SHIFT + 11)) / 
+                            (aanscales[i] * qscale * quant_matrix[block_permute_op(i)]));
         }
     } else {
         for(i=0;i<64;i++) {
             /* We can safely suppose that 16 <= quant_matrix[i] <= 255
-               So 16 <= qscale * quant_matrix[i] <= 7905
-               so (1 << QMAT_SHIFT) / 16 >= qmat[i] >= (1 << QMAT_SHIFT) / 7905
+               So 16           <= qscale * quant_matrix[i]             <= 7905
+               so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
+               so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
             */
-            qmat[i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
-        }
-    }
-}
-
-#define SOF0	0xC0
-#define SOI	0xD8
-#define	EOI	0xD9
-#define DQT	0xDB
-#define DHT	0xC4
-#define SOS	0xDA
-
-/* this is almost the quantisation table, used for luminance and chrominance */
-/*short int zr_default_intra_matrix[64] = {
-    16,  11,  10,  16,  24,  40,  51,  61,
-    12,  12,  14,  19,  26,  58,  60,  55,
-    14,  13,  16,  24,  40,  57,  69,  56,
-    14,  17,  22,  29,  51,  87,  80,  62,
-    18,  22,  37,  56,  68, 109, 103,  77,
-    24,  35,  55,  64,  81, 104, 113,  92,
-    49,  64,  78,  87, 103, 121, 120, 101,
-    72,  92,  95,  98, 112, 100, 103,  99
-};*/
-/*
-short int default_intra_matrix[64] = {
-	8, 16, 19, 22, 26, 27, 29, 34,
-	16, 16, 22, 24, 27, 29, 34, 37,
-	19, 22, 26, 27, 29, 34, 34, 38,
-	22, 22, 26, 27, 29, 34, 37, 40,
-	22, 26, 27, 29, 32, 35, 40, 48,
-	26, 27, 29, 32, 35, 40, 48, 58,
-	26, 27, 29, 34, 38, 46, 56, 69,
-	27, 29, 35, 38, 46, 56, 69, 83
-};
-*/
-extern short int default_intra_matrix[64];
-
-static short int intra_matrix[64];
-
-/* Set up the standard Huffman tables (cf. JPEG standard section K.3) */
-/* IMPORTANT: these are only valid for 8-bit data precision! */
-static const unsigned char bits_dc_luminance[17] =
-{ /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
-static const unsigned char val_dc_luminance[] =
-{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
-
-#if 0
-static const unsigned char bits_dc_chrominance[17] =
-{ /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
-static const unsigned char val_dc_chrominance[] =
-{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
-#endif
-
-static const unsigned char bits_ac_luminance[17] =
-{ /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d };
-static const unsigned char val_ac_luminance[] =
-{ 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
-  0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
-  0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
-  0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
-  0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
-  0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
-  0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
-  0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
-  0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
-  0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
-  0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
-  0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
-  0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
-  0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
-  0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
-  0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
-  0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
-  0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
-  0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
-  0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
-  0xf9, 0xfa 
-};
-
-#if 0
-static const unsigned char bits_ac_chrominance[17] =
-{ /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 };
-
-static const unsigned char val_ac_chrominance[] =
-{ 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
-  0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
-  0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
-  0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
-  0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
-  0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
-  0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
-  0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
-  0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
-  0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
-  0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
-  0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
-  0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
-  0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
-  0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
-  0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
-  0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
-  0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
-  0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
-  0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
-  0xf9, 0xfa 
-};
-#endif
-
-static unsigned char huff_size_dc_luminance[12];
-static unsigned short huff_code_dc_luminance[12];
-#if 0
-unsigned char huff_size_dc_chrominance[12];
-unsigned short huff_code_dc_chrominance[12];
-#endif
-
-static unsigned char huff_size_ac_luminance[256];
-static unsigned short huff_code_ac_luminance[256];
-#if 0
-unsigned char huff_size_ac_chrominance[256];
-unsigned short huff_code_ac_chrominance[256];
-#endif 
-
-static int last_dc[3];
-static int block_last_index[4];
-
-/* isn't this function nicer than the one in the libjpeg ? */
-static void build_huffman_codes(unsigned char *huff_size, 
-		unsigned short *huff_code, const unsigned char *bits_table, 
-		const unsigned char *val_table)
-{
-    int i, j, k,nb, code, sym;
-
-    code = 0;
-    k = 0;
-    for(i=1;i<=16;i++) {
-        nb = bits_table[i];
-        for(j=0;j<nb;j++) {
-            sym = val_table[k++];
-            huff_size[sym] = i;
-            huff_code[sym] = code;
-            code++;
+            qmat[i]   = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
+            qmat16[i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[block_permute_op(i)]);
         }
-        code <<= 1;
     }
 }
 
-static int zr_mjpeg_init()
-{
-    /* build all the huffman tables */
-    build_huffman_codes(huff_size_dc_luminance, huff_code_dc_luminance,
-                        bits_dc_luminance, val_dc_luminance);
-    //build_huffman_codes(huff_size_dc_chrominance, huff_code_dc_chrominance,
-    //                    bits_dc_chrominance, val_dc_chrominance);
-    build_huffman_codes(huff_size_ac_luminance, huff_code_ac_luminance,
-                        bits_ac_luminance, val_ac_luminance);
-    //build_huffman_codes(huff_size_ac_chrominance, huff_code_ac_chrominance,
-    //                    bits_ac_chrominance, val_ac_chrominance);
-    
-    return 0;
-}
-
-static void zr_mjpeg_close()
-{
-}
-
-static inline void put_marker(PutBitContext *p, int code)
-{
-    put_bits(p, 8, 0xff);
-    put_bits(p, 8, code);
-}
-
-/* table_class: 0 = DC coef, 1 = AC coefs */
-static int put_huffman_table(int table_class, int table_id,
-                             const unsigned char *bits_table, 
-			     const unsigned char *value_table)
-{
-    PutBitContext *p = &pb;
-    int n, i;
-
-    put_bits(p, 4, table_class);
-    put_bits(p, 4, table_id);
-
-    n = 0;
-    for(i=1;i<=16;i++) {
-        n += bits_table[i];
-        put_bits(p, 8, bits_table[i]);
-    }
-
-    for(i=0;i<n;i++)
-        put_bits(p, 8, value_table[i]);
-
-    return n + 17;
-}
-
-static void jpeg_qtable_header()
-{
-    PutBitContext *p = &pb;
-    int i, j, size;
-
-    /* quant matrixes */
-    put_marker(p, DQT);
-    put_bits(p, 16, 2 + 1 * (1 + 64));
-    put_bits(p, 4, 0); /* 8 bit precision */
-    put_bits(p, 4, 0); /* table 0 */
-    for(i=0;i<64;i++) {
-        j = zr_zigzag_direct[i];
-        put_bits(p, 8, intra_matrix[j]);
-    }
-}
-
-static void jpeg_htable_header() {
-    PutBitContext *p = &pb;
-    int i, j, size;
-    unsigned char *ptr;
-    /* huffman table */
-    put_marker(p, DHT);
-    flush_put_bits(p);
-    ptr = p->buf_ptr;
-    put_bits(p, 16, 0); /* patched later */
-    size = 2;
-    size += put_huffman_table(0, 0, bits_dc_luminance, val_dc_luminance);
-  //  size += put_huffman_table(0, 1, bits_dc_chrominance, val_dc_chrominance);
-    
-    ptr[0] = size >> 8;
-    ptr[1] = size;
-    put_marker(p, DHT);
-    flush_put_bits(p);
-    ptr = p->buf_ptr;
-    put_bits(p, 16, 0); /* patched later */
-    size = 2;
-    size += put_huffman_table(1, 0, bits_ac_luminance, val_ac_luminance);
-   // size += put_huffman_table(1, 1, bits_ac_chrominance, val_ac_chrominance);
-    ptr[0] = size >> 8;
-    ptr[1] = size;
-}
-
-static void zr_mjpeg_picture_header()
-{
-    put_marker(&pb, SOI);
-
-    if (first) {
-    	jpeg_qtable_header();
-    	jpeg_htable_header();
-	first = 0;
-    }
-    put_marker(&pb, SOF0);
-
-    put_bits(&pb, 16, 17);
-    put_bits(&pb, 8, 8); /* 8 bits/component */
-    put_bits(&pb, 16, height);
-    put_bits(&pb, 16, width);
-    put_bits(&pb, 8, 3); /* 3 components */
-    
-    /* Y component */
-    put_bits(&pb, 8, 0); /* component number */
-    put_bits(&pb, 4, 2); /* H factor */
-    put_bits(&pb, 4, 1); /* V factor */
-    put_bits(&pb, 8, 0); /* select matrix */
-    
-    /* Cb component */
-    put_bits(&pb, 8, 1); /* component number */
-    put_bits(&pb, 4, 1); /* H factor */
-    put_bits(&pb, 4, 1); /* V factor */
-    put_bits(&pb, 8, 0); /* select matrix */
-
-    /* Cr component */
-    put_bits(&pb, 8, 2); /* component number */
-    put_bits(&pb, 4, 1); /* H factor */
-    put_bits(&pb, 4, 1); /* V factor */
-    put_bits(&pb, 8, 0); /* select matrix */
-
-
-    /* scan header */
-    put_marker(&pb, SOS);
-    put_bits(&pb, 16, 12); /* length */
-    put_bits(&pb, 8, 3); /* 3 components */
-    
-    /* Y component */
-    put_bits(&pb, 8, 0); /* index */
-    put_bits(&pb, 4, 0); /* DC huffman table index */
-    put_bits(&pb, 4, 0); /* AC huffman table index */
-    
-    /* Cb component */
-    put_bits(&pb, 8, 1); /* index */
-    put_bits(&pb, 4, 0); /* DC huffman table index */
-    put_bits(&pb, 4, 0); /* AC huffman table index */
-    
-    /* Cr component */
-    put_bits(&pb, 8, 2); /* index */
-    put_bits(&pb, 4, 0); /* DC huffman table index */
-    put_bits(&pb, 4, 0); /* AC huffman table index */
-
-    put_bits(&pb, 8, 0); /* Ss (not used) */
-    put_bits(&pb, 8, 63); /* Se (not used) */
-    put_bits(&pb, 8, 0); /* (not used) */
-}
-
-static void zr_flush_buffer(PutBitContext *s)
-{
-    int size;
-    if (s->write_data) {
-        size = s->buf_ptr - s->buf;
-        if (size > 0)
-            s->write_data(s->opaque, s->buf, size);
-        s->buf_ptr = s->buf;
-        s->data_out_size += size;
-    }
-}
-
-/* pad the end of the output stream with ones */
-static void zr_jflush_put_bits(PutBitContext *s)
-{
-    unsigned int b;
-    s->bit_buf |= ~1U >> s->bit_cnt; /* set all the unused bits to one */
-
-    while (s->bit_cnt > 0) {
-        b = s->bit_buf >> 24;
-        *s->buf_ptr++ = b;
-        if (b == 0xff)
-            *s->buf_ptr++ = 0;
-        s->bit_buf<<=8;
-        s->bit_cnt-=8;
-    }
-    zr_flush_buffer(s);
-    s->bit_cnt=0;
-    s->bit_buf=0;
-}
-
-static void zr_mjpeg_picture_trailer()
-{
-    zr_jflush_put_bits(&pb);
-    put_marker(&pb, EOI);
-}
-
-static inline void encode_dc(int val, unsigned char *huff_size, 
-		unsigned short *huff_code)
+static inline void encode_dc(MpegEncContext *s, int val, 
+                             UINT8 *huff_size, UINT16 *huff_code)
 {
     int mant, nbits;
 
     if (val == 0) {
-	 //   printf("dc val=0 ");
-        jput_bits(&pb, huff_size[0], huff_code[0]);
-	//printf("dc encoding %d %d\n", huff_size[0], huff_code[0]);
+        jput_bits(&s->pb, huff_size[0], huff_code[0]);
     } else {
         mant = val;
         if (val < 0) {
@@ -612,57 +120,48 @@
             val = val >> 1;
             nbits++;
         }
-	/*nbits = av_log2(val);*/
             
-	//printf("dc ");
-        jput_bits(&pb, huff_size[nbits], huff_code[nbits]);
-	//printf("dc encoding %d %d\n", huff_size[nbits], huff_code[nbits]);
+        jput_bits(&s->pb, huff_size[nbits], huff_code[nbits]);
         
-	//printf("dc ");
-        jput_bits(&pb, nbits, mant & ((1 << nbits) - 1));
-	//printf("dc encoding %d %d\n", huff_size[nbits], huff_code[nbits]);
+        jput_bits(&s->pb, nbits, mant & ((1 << nbits) - 1));
     }
 }
 
-static void encode_block(DCTELEM *b, int n)
+static void encode_block(MpegEncContext *s, DCTELEM *block, int n)
 {
     int mant, nbits, code, i, j;
     int component, dc, run, last_index, val;
-    unsigned char *huff_size_ac;
-    unsigned short *huff_code_ac;
+    MJpegContext *m = s->mjpeg_ctx;
+    UINT8 *huff_size_ac;
+    UINT16 *huff_code_ac;
     
     /* DC coef */
-    component = (n <= 1 ? 0 : n - 2 + 1);
-    dc = b[0]; /* overflow is impossible */
-    /*for (i = 0; i < 8; i++) {
-	    printf("%i %i %i %i %i %i %i %i\n", b[8*i], b[8*i+1], b[8*i+2],
-			    b[8*i+3], b[8*i+4], b[8+i*5], b[8+i*6], b[8+i*7]);
-    }*/
-    val = dc - last_dc[component];
-    //if (n < 2) {
-        encode_dc(val, huff_size_dc_luminance, huff_code_dc_luminance);
-        huff_size_ac = huff_size_ac_luminance;
-        huff_code_ac = huff_code_ac_luminance;
-    //} else {
-    //    encode_dc(val, huff_size_dc_chrominance, huff_code_dc_chrominance);
-    //    huff_size_ac = huff_size_ac_chrominance;
-    //    huff_code_ac = huff_code_ac_chrominance;
-    //}
-    last_dc[component] = dc;
+    component = (n <= 3 ? 0 : n - 4 + 1);
+    dc = block[0]; /* overflow is impossible */
+    val = dc - s->last_dc[component];
+    if (n < 4) {
+        encode_dc(s, val, m->huff_size_dc_luminance, m->huff_code_dc_luminance);
+        huff_size_ac = m->huff_size_ac_luminance;
+        huff_code_ac = m->huff_code_ac_luminance;
+    } else {
+        encode_dc(s, val, m->huff_size_dc_chrominance, m->huff_code_dc_chrominance);
+        huff_size_ac = m->huff_size_ac_chrominance;
+        huff_code_ac = m->huff_code_ac_chrominance;
+    }
+    s->last_dc[component] = dc;
     
     /* AC coefs */
     
     run = 0;
-    last_index = block_last_index[n];
+    last_index = s->block_last_index[n];
     for(i=1;i<=last_index;i++) {
-        j = zr_zigzag_direct[i];
-        val = b[j];
+        j = zigzag_direct[i];
+        val = block[j];
         if (val == 0) {
             run++;
         } else {
             while (run >= 16) {
-		//printf("ac 16 white ");
-                jput_bits(&pb, huff_size_ac[0xf0], huff_code_ac[0xf0]);
+                jput_bits(&s->pb, huff_size_ac[0xf0], huff_code_ac[0xf0]);
                 run -= 16;
             }
             mant = val;
@@ -679,221 +178,299 @@
             }
             code = (run << 4) | nbits;
 
-	    //printf("ac ");
-            jput_bits(&pb, huff_size_ac[code], huff_code_ac[code]);
+            jput_bits(&s->pb, huff_size_ac[code], huff_code_ac[code]);
         
-	    //printf("ac ");
-            jput_bits(&pb, nbits, mant & ((1 << nbits) - 1));
+            jput_bits(&s->pb, nbits, mant & ((1 << nbits) - 1));
             run = 0;
         }
     }
 
     /* output EOB only if not already 64 values */
-    if (last_index < 63 || run != 0) {
-	//printf("ac EOB ");
-        jput_bits(&pb, huff_size_ac[0], huff_code_ac[0]);
-    }
+    if (last_index < 63 || run != 0)
+        jput_bits(&s->pb, huff_size_ac[0], huff_code_ac[0]);
 }
 
-static void zr_mjpeg_encode_mb(DCTELEM **bla)
-{
-    encode_block(*(bla), 0);
-    encode_block(*(bla+1), 1);
-    if (bw) {
-    	jput_bits(&pb, 12, 512+128+8+2); /* 2 times code for 'no color'
-				      * 001010001010 */
-    } else {
-	    encode_block(*(bla+2), 2);
-	    encode_block(*(bla+3), 3);
-    }
+/* End excessive code duplication **************************************/
+
+/* this function is a reproduction of the one in mjpeg, it includes two
+ * changes, it allows for black&white encoding (it skips the U and V
+ * macroblocks and it outputs the huffman code for 'no change' (dc) and
+ * 'all zero' (ac)) and it takes 4 macroblocks (422) instead of 6 (420) */
+static void zr_mjpeg_encode_mb(jpeg_enc_t *j) {
+
+	MJpegContext *m = j->s->mjpeg_ctx;
+
+	encode_block(j->s, j->s->block[0], 0);
+	encode_block(j->s, j->s->block[1], 1);
+	if (j->bw) {
+		/* U */
+		jput_bits(&j->s->pb, m->huff_size_dc_chrominance[0],
+				m->huff_code_dc_chrominance[0]);
+		jput_bits(&j->s->pb, m->huff_size_ac_chrominance[0],
+				m->huff_code_ac_chrominance[0]);
+		/* V */
+		jput_bits(&j->s->pb, m->huff_size_dc_chrominance[0],
+				m->huff_code_dc_chrominance[0]);
+		jput_bits(&j->s->pb, m->huff_size_ac_chrominance[0],
+				m->huff_code_ac_chrominance[0]);
+    	} else {
+		/* we trick encode_block here so that it uses
+		 * chrominance huffman tables instead of luminance ones 
+		 * (see the effect of second argument of encode_block) */
+		encode_block(j->s, j->s->block[2], 4); 
+		encode_block(j->s, j->s->block[3], 5);
+    	}
 }
 
-static int mb_width, mb_height, mb_x, mb_y;
-static unsigned char *y_data, *u_data, *v_data;
-static int y_ps, u_ps, v_ps, y_rs, u_rs, v_rs;
-static char code[256*1024]; // 256kb!
 /* this function can take all kinds of YUV colorspaces
- * YV12, YVYU, UYVY. The necesary parameters must be set up by te caller
+ * YV12, YVYU, UYVY. The necesary parameters must be set up by the caller
  * y_ps means "y pixel size", y_rs means "y row size".
- * For YUYV, for example, is u = y + 1, v = y + 3, y_ps = 2, u_ps = 4
- * v_ps = 4, y_rs = u_rs = v_rs.
+ * For YUYV, for example, is u_buf = y_buf + 1, v_buf = y_buf + 3, 
+ * y_ps = 2, u_ps = 4, v_ps = 4, y_rs = u_rs = v_rs.
+ *
+ *  The actual buffers must be passed with mjpeg_encode_frame, this is
+ *  to make it possible to call encode on the buffer provided by the
+ *  codec in draw_frame.
  *  
  * The data is straightened out at the moment it is put in DCT
  * blocks, there are therefore no spurious memcopies involved */
-/* Notice that w must be a multiple of 16 and h must be a multiple of
- * fields*8 */
+/* Notice that w must be a multiple of 16 and h must be a multiple of 8 */
 /* We produce YUV422 jpegs, the colors must be subsampled horizontally,
  * if the colors are also subsampled vertically, then this function
  * performs cheap upsampling (better solution will be: a DCT that is
  * optimized in the case that every two rows are the same) */
 /* cu = 0 means 'No cheap upsampling'
  * cu = 1 means 'perform cheap upsampling' */
-void mjpeg_encoder_init(int w, int h, 
-		unsigned char* y, int y_psize, int y_rsize, 
-		unsigned char* u, int u_psize, int u_rsize,
-		unsigned char* v, int v_psize, int v_rsize,
-		int f, int cu, int q, int b) {
-	int i;
-	mp_msg(MSGT_VO, MSGL_V, "JPEnc init: %dx%d %p %d %d %p %d %d %p %d %d\n",
-			w, h, y, y_psize, y_rsize, 
-			u, u_psize, u_rsize,
-			v, v_psize, v_rsize);
-	y_data = y; u_data = u; v_data = v;
-	y_ps = y_psize; u_ps = u_psize; v_ps = v_psize;
-	y_rs = y_rsize*f; 
-	u_rs = u_rsize*f; 
-	v_rs = v_rsize*f;
-	width = w;
-	height = h/f;
-	fields = f;
-	qscale = q;
-	cheap_upsample = cu;
-	mb_width = width/16;
-	mb_height = height/8;
-	bw = b;
-	zr_mjpeg_init();
-	i = 0;
-	intra_matrix[0] = default_intra_matrix[0];
-	for (i = 1; i < 64; i++) {
-		intra_matrix[i] = (default_intra_matrix[i]*qscale) >> 3;
+/* The encoder doesn't know anything about interlacing, the halve height
+ * needs to be passed and the double rowstride. Which field gets encoded
+ * is decided by what buffers are passed to mjpeg_encode_frame */
+jpeg_enc_t *jpeg_enc_init(int w, int h, int y_psize, int y_rsize, 
+		int u_psize, int u_rsize, int v_psize, int v_rsize,
+		int cu, int q, int b) {
+	jpeg_enc_t *j;
+	int i = 0;
+	mp_msg(MSGT_VO, MSGL_V, "JPEnc init: %dx%d %d %d %d %d %d %d\n",
+			w, h, y_psize, y_rsize, u_psize, 
+			u_rsize, v_psize, v_rsize);
+
+	j = malloc(sizeof(jpeg_enc_t));
+	if (j == NULL) return NULL;
+
+	j->s = malloc(sizeof(MpegEncContext));
+	if (j->s == NULL) {
+		free(j);
+		return NULL;
 	}
-	if (
-#ifdef HAVE_MMX
-			av_fdct != fdct_mmx && 
-#endif
-			av_fdct != jpeg_fdct_ifast) {
-		/* libavcodec is probably not yet initialized */
-		av_fdct = jpeg_fdct_ifast;
+
+	/* info on how to access the pixels */
+	j->y_ps = y_psize; 
+	j->u_ps = u_psize; 
+	j->v_ps = v_psize;
+	j->y_rs = y_rsize; 
+	j->u_rs = u_rsize; 
+	j->v_rs = v_rsize;
+
+	j->s->width = w;
+	j->s->height = h;
+	j->s->qscale = q;
+
+	j->s->out_format = FMT_MJPEG;
+	j->s->intra_only = 1;
+	j->s->encoding = 1;
+	j->s->pict_type = I_TYPE;
+	j->s->y_dc_scale = 8;
+	j->s->c_dc_scale = 8;
+
+	j->s->mjpeg_write_tables = 1;
+	j->s->mjpeg_vsample[0] = 1;
+	j->s->mjpeg_vsample[1] = 1;
+	j->s->mjpeg_vsample[2] = 1;
+	j->s->mjpeg_hsample[0] = 2;
+	j->s->mjpeg_hsample[1] = 1;
+	j->s->mjpeg_hsample[2] = 1;
+
+	j->cheap_upsample = cu;
+	j->bw = b;
+
+	/* if libavcodec is used by the decoder then we must not
+	 * initialize again, but if it is not initialized then we must
+	 * initialize it here. There must be a better way to find out
+	 * if it is initialized */
+	if (av_fdct != jpeg_fdct_ifast 
 #ifdef HAVE_MMX
-		dsputil_init_mmx();
+			&& av_fdct != fdct_mmx
 #endif
+			) {
+		/* we need to initialize libavcodec */
+		avcodec_init();
+	}
+
+	if (mjpeg_init(j->s) < 0) {
+		free(j->s);
+		free(j);
+		return NULL;
+	}
+
+	if (MPV_common_init(j->s) < 0) {
+		free(j->s);
+		free(j);
+		return NULL;
 	}
-	convert_matrix(q_intra_matrix, intra_matrix, 8);
-	blck = malloc(4*sizeof(DCTELEM*));
-	blck[0] = malloc(64*sizeof(DCTELEM));
-	blck[1] = malloc(64*sizeof(DCTELEM));
-	blck[2] = malloc(64*sizeof(DCTELEM));
-	blck[3] = malloc(64*sizeof(DCTELEM));
+
+	/* correct the value for sc->mb_height */
+	j->s->mb_height = j->s->height/8;
+	j->s->mb_intra = 1;
+
+	j->s->intra_matrix[0] = default_intra_matrix[0];
+	for (i = 1; i < 64; i++) 
+		j->s->intra_matrix[i] = 
+			(default_intra_matrix[i]*j->s->qscale) >> 3;
+	convert_matrix(j->s->q_intra_matrix, j->s->q_intra_matrix16,
+			j->s->intra_matrix, 8);
+	return j;
 }	
 
-int mjpeg_encode_frame(char *bufr, int field) {
-	int i, j, k, l;
+int jpeg_enc_frame(jpeg_enc_t *j, unsigned char *y_data, 
+		unsigned char *u_data, unsigned char *v_data, char *bufr) {
+	int i, k, mb_x, mb_y;
 	short int *dest;
 	unsigned char *source;
 	/* initialize the buffer */
-	if (field == 1) {
-		y_data += y_rs/2;
-		u_data += u_rs/2;
-		v_data += v_rs/2;
-	}
-	init_put_bits(&pb, bufr, 1024*256, NULL, NULL);
 
-	zr_mjpeg_picture_header();
+	init_put_bits(&j->s->pb, bufr, 1024*256, NULL, NULL);
+
+	mjpeg_picture_header(j->s);
 
-	last_dc[0] = 128; last_dc[1] = 128; last_dc[2] = 128;
-	mb_x = 0;
-	mb_y = 0;
-	for (mb_y = 0; mb_y < mb_height; mb_y++) {
-		for (mb_x = 0; mb_x < mb_width; mb_x++) {
-			//printf("Processing macroblock mb_x=%d, mb_y=%d, mb_width=%d, mb_height=%d, size=%d\n", mb_x, mb_y, mb_width, mb_height, pb.buf_ptr - pb.buf);
+	j->s->last_dc[0] = 128; 
+	j->s->last_dc[1] = 128; 
+	j->s->last_dc[2] = 128;
+
+	for (mb_y = 0; mb_y < j->s->mb_height; mb_y++) {
+		for (mb_x = 0; mb_x < j->s->mb_width; mb_x++) {
+			/* conversion 8 to 16 bit and filling of blocks
+			 * must be mmx optimized */
 			/* fill 2 Y macroblocks and one U and one V */
-			source = mb_y * 8 * y_rs + 16 * y_ps * mb_x + y_data;
-			dest = blck[0];
+			source = mb_y * 8 * j->y_rs + 
+				16 * j->y_ps * mb_x + y_data;
+			dest = j->s->block[0];
 			for (i = 0; i < 8; i++) {
-				for (j = 0; j < 8; j++) {
-					dest[j] = source[j*y_ps];
+				for (k = 0; k < 8; k++) {
+					dest[k] = source[k*j->y_ps];
 				}
 				dest += 8;
-				source += y_rs;
+				source += j->y_rs;
 			}
-			source = mb_y * 8 * y_rs + (16*mb_x + 8)*y_ps + y_data;
-			dest = blck[1];
+			source = mb_y * 8 * j->y_rs + 
+				(16*mb_x + 8)*j->y_ps + y_data;
+			dest = j->s->block[1];
 			for (i = 0; i < 8; i++) {
-				for (j = 0; j < 8; j++) {
-					dest[j] = source[j*y_ps];
+				for (k = 0; k < 8; k++) {
+					dest[k] = source[k*j->y_ps];
 				}
 				dest += 8;
-				source += y_rs;
+				source += j->y_rs;
 			}
-			if (!bw) {
-			if (cheap_upsample) {
-				source = mb_y*4*u_rs + 8*mb_x*u_ps + u_data;
-				dest = blck[2];
+			if (!j->bw && j->cheap_upsample) {
+				source = mb_y*4*j->u_rs + 
+					8*mb_x*j->u_ps + u_data;
+				dest = j->s->block[2];
 				for (i = 0; i < 4; i++) {
-					for (j = 0; j < 8; j++) {
-						dest[j] = source[j*u_ps];
-						dest[j+8] = source[j*u_ps];
+					for (k = 0; k < 8; k++) {
+						dest[k] = source[k*j->u_ps];
+						dest[k+8] = source[k*j->u_ps];
 					}
 					dest += 16;
-					source += u_rs;
+					source += j->u_rs;
 				}
-				source = mb_y*4*v_rs + 8*mb_x*v_ps + v_data;
-				dest = blck[3];
+				source = mb_y*4*j->v_rs + 
+					8*mb_x*j->v_ps + v_data;
+				dest = j->s->block[3];
 				for (i = 0; i < 4; i++) {
-					for (j = 0; j < 8; j++) {
-						dest[j] = source[j*v_ps];
-						dest[j+8] = source[j*v_ps];
+					for (k = 0; k < 8; k++) {
+						dest[k] = source[k*j->v_ps];
+						dest[k+8] = source[k*j->v_ps];
 					}
 					dest += 16;
-					source += u_rs;
+					source += j->u_rs;
 				}
-			} else {
-				source = mb_y*8*u_rs + 8*mb_x*u_ps + u_data;
-				dest = blck[2];
+			} else if (!j->bw && !j->cheap_upsample) {
+				source = mb_y*8*j->u_rs + 
+					8*mb_x*j->u_ps + u_data;
+				dest = j->s->block[2];
 				for (i = 0; i < 8; i++) {
-					for (j = 0; j < 8; j++) {
-						dest[j] = source[j*u_ps];
-					}
+					for (k = 0; k < 8; k++) 
+						dest[k] = source[k*j->u_ps];
 					dest += 8;
-					source += u_rs;
+					source += j->u_rs;
 				}
-				source = mb_y*8*v_rs + 8*mb_x*v_ps + v_data;
-				dest = blck[3];
+				source = mb_y*8*j->v_rs + 
+					8*mb_x*j->v_ps + v_data;
+				dest = j->s->block[3];
 				for (i = 0; i < 8; i++) {
-					for (j = 0; j < 8; j++) {
-						dest[j] = source[j*v_ps];
-					}
+					for (k = 0; k < 8; k++) 
+						dest[k] = source[k*j->v_ps];
 					dest += 8;
-					source += u_rs;
+					source += j->u_rs;
 				}
 			}
-			}
-			/* so, **blck is filled now... */
+			emms_c(); /* is this really needed? */
 
-			for(i = 0; i < 2; i++) {
-				if (av_fdct == jpeg_fdct_ifast)
-					block_last_index[i] = 
-						dct_quantize(blck[i], 
-								i, qscale);
-				else
-					block_last_index[i] = 
-						dct_quantize_mmx(blck[i],
-								i, qscale);
+			j->s->block_last_index[0] = 
+				dct_quantize(j->s, j->s->block[0], 
+						0, j->s->qscale);
+			j->s->block_last_index[1] = 
+				dct_quantize(j->s, j->s->block[1], 
+						1, j->s->qscale);
+
+			if (!j->bw) {
+				j->s->block_last_index[4] =
+					dct_quantize(j->s, j->s->block[2], 
+							4, j->s->qscale);
+				j->s->block_last_index[5] =
+					dct_quantize(j->s, j->s->block[3], 
+							5, j->s->qscale);
 			}
-			if (!bw) {
-			for(i = 2; i < 4; i++) {
-				if (av_fdct == jpeg_fdct_ifast)
-					block_last_index[i] = 
-						dct_quantize(blck[i], 
-								i, qscale);
-				else
-					block_last_index[i] = 
-						dct_quantize_mmx(blck[i],
-								i, qscale);
-			}
-			}
-				zr_mjpeg_encode_mb(blck);
+			zr_mjpeg_encode_mb(j);
 		}
 	}
 	emms_c();
-	zr_mjpeg_picture_trailer();
-	flush_put_bits(&pb);	
-	zr_mjpeg_close();
-	if (field == 1) {
-		y_data -= y_rs/2;
-		u_data -= u_rs/2;
-		v_data -= v_rs/2;
-	}
-	return pb.buf_ptr - pb.buf;
+	mjpeg_picture_trailer(j->s);
+	flush_put_bits(&j->s->pb);	
+
+	if (j->s->mjpeg_write_tables == 1)
+		j->s->mjpeg_write_tables = 0;
+	
+	return j->s->pb.buf_ptr - j->s->pb.buf;
+}
+
+void jpeg_enc_uninit(jpeg_enc_t *j) {
+	mjpeg_close(j->s);
+	free(j->s);
+	free(j);
 }
 
+#if 0
+
+#define		W	32	
+#define		H	32
+
+int quant_store[MBR+1][MBC+1];
+unsigned char buf[W*H*3/2];
+char code[256*1024];
+
+
+main() {
+	int i, size;
+	FILE *fp;
+
+	memset(buf, 0, W*H);
+	memset(buf+W*H, 255, W*H/4);
+	memset(buf+5*W*H/4, 0, W*H/4);
+	mjpeg_encoder_init(W, H, 1, W, 1, W/2, 1, W/2, 1, 1, 0);
+
+	size = mjpeg_encode_frame(buf, buf+W*H, buf+5*W*H/4, code);
+	fp = fopen("test.jpg", "w");
+	fwrite(code, 1, size, fp);
+	fclose(fp);
+}
+#endif
diff -Naur main/libvo/jpeg_enc.h main.dev/libvo/jpeg_enc.h
--- main/libvo/jpeg_enc.h	Thu Jan  1 01:00:00 1970
+++ main.dev/libvo/jpeg_enc.h	Sun Feb 10 21:46:33 2002
@@ -0,0 +1,45 @@
+/* Straightforward (to be) optimized JPEG encoder for the YUV422 format 
+ * based on mjpeg code from ffmpeg. 
+ *
+ * Copyright (c) 2002, Rik Snel
+ * Parts from ffmpeg Copyright (c) 2000, 2001 Gerard Lantau
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * For an excellent introduction to the JPEG format, see:
+ * http://www.ece.purdue.edu/~bourman/grad-labs/lab8/pdf/lab.pdf
+ */
+
+
+typedef struct {
+	struct MpegEncContext *s;
+	int cheap_upsample;
+	int bw;
+	int y_ps;
+	int u_ps;
+	int v_ps;
+	int y_rs;
+	int u_rs;
+	int v_rs;
+} jpeg_enc_t;
+
+jpeg_enc_t *jpeg_enc_init(int w, int h, int y_psize, int y_rsize, 
+		int u_psize, int u_rsize, int v_psize, int v_rsize,
+		int cu, int q, int b);
+
+int jpeg_enc_frame(jpeg_enc_t *j, unsigned char *y_data, 
+		unsigned char *u_data, unsigned char *v_data, char *bufr);
+
+void jpeg_enc_uninit(jpeg_enc_t *j);
diff -Naur main/libvo/vo_zr.c main.dev/libvo/vo_zr.c
--- main/libvo/vo_zr.c	Sun Feb 10 18:57:10 2002
+++ main.dev/libvo/vo_zr.c	Sun Feb 10 21:46:33 2002
@@ -25,6 +25,8 @@
 #include "../cfgparser.h"
 #include "fastmemcpy.h"
 
+#include "jpeg_enc.h"
+
 LIBVO_EXTERN (zr)
 
 static vo_info_t vo_info = 
@@ -47,7 +49,9 @@
 static int vdec = 1;
 static int hdec = 1;
 static int size;
-static int quality = 1;
+static int quality = 2;
+static unsigned char *y_data, *u_data, *v_data;
+static int y_stride, u_stride, v_stride;
 
 typedef struct {
 	int width;
@@ -61,6 +65,7 @@
 static uint8_t *image=NULL;
 static uint8_t *buf=NULL;
 
+static jpeg_enc_t *j;
 
 /* Variables needed for Zoran */
 
@@ -80,13 +85,6 @@
 #endif
 char *device = NULL;
 
-
-extern int mjpeg_encode_frame(char *bufr, int field);
-extern void mjpeg_encoder_init(int w, int h, unsigned char *y,
-		int y_psize, int y_rsize, unsigned char *u,
-		int u_psize, int u_rsize, unsigned char *v,
-		int v_psize, int v_rsize, int f, int cu, int q, int b);
-
 int zoran_getcap() {
 	char* dev = device ? device : VO_ZR_DEFAULT_DEVICE;
 	vdes = open(dev, O_RDWR);
@@ -197,7 +195,7 @@
 static uint32_t config(uint32_t width, uint32_t height, uint32_t d_width, 
 	uint32_t d_height, uint32_t fullscreen, char *title, uint32_t format,const vo_tune_info_t *info)
 {
-	int j, stretchx, stretchy;
+	int i, stretchx, stretchy;
 	/* this allows to crop parts from incoming picture,
 	 * for easy 512x240 -> 352x240 */
 	/* These values must be multples of 2 */
@@ -294,17 +292,17 @@
 	 * width 720 (exactly right for the Buz) after decimation 360,
 	 * after padding up to a multiple of 16 368, display 736 -> too
 	 * large). In these situations we auto(re)crop. */
-	j = 16*((g.width - 1)/(hdec*16) + 1);
-	if (stretchx*j > vc.maxwidth) {
-		g.xoff += 2*((g.width - hdec*(j-16))/4);
+	i = 16*((g.width - 1)/(hdec*16) + 1);
+	if (stretchx*i > vc.maxwidth) {
+		g.xoff += 2*((g.width - hdec*(i-16))/4);
 		/* g.off must be a multiple of 2 */
-		g.width = hdec*(j - 16);
+		g.width = hdec*(i - 16);
 		g.set = 0; /* we abuse this field to report that g has changed*/
 	}
-	j = 8*fields*((g.height - 1)/(vdec*fields*8) + 1);
-	if (stretchy*j > vc.maxheight) {
-		g.yoff += 2*((g.height - vdec*(j - 8*fields))/4);
-		g.height = vdec*(j - 8*fields);
+	i = 8*fields*((g.height - 1)/(vdec*fields*8) + 1);
+	if (stretchy*i > vc.maxheight) {
+		g.yoff += 2*((g.height - vdec*(i - 8*fields))/4);
+		g.height = vdec*(i - 8*fields);
 		g.set = 0;
 	}
 	if (!g.set) 
@@ -338,32 +336,54 @@
 			memset(image, 0, image_width*image_height);
 			memset(image + size, 0x80, image_width*image_height/4);
 			memset(image + 3*size/2, 0x80, image_width*image_height/4);
-			mjpeg_encoder_init(image_width/hdec, image_height,
-					image, hdec, image_width,
-					image + image_width*image_height, 
-					hdec, image_width/2,
-					image + 3*image_width*image_height/2, 
-					hdec, image_width/2, fields, 1, 
-					quality, bw);
+			y_data = image;
+			u_data = image + image_width*image_height;
+			v_data = image + 3*image_width*image_height/2;
+			
+			y_stride = image_width;
+			u_stride = image_width/2;
+			v_stride = image_width/2;
+
+			j = jpeg_enc_init(image_width/hdec, 
+					image_height/fields,
+					hdec, y_stride*fields,
+					hdec, u_stride*fields,
+					hdec, v_stride*fields, 
+					1, quality, bw);
 			break;
 		case IMGFMT_YUY2:
-			for (j = 0; j < 2*size; j+=4) {
-				image[j] = 0;
-				image[j+1] = 0x80;
-				image[j+2] = 0;
-				image[j+3] = 0x80;
+			for (i = 0; i < 2*size; i+=4) {
+				image[i] = 0;
+				image[i+1] = 0x80;
+				image[i+2] = 0;
+				image[i+3] = 0x80;
 			}
-			mjpeg_encoder_init(image_width/hdec, image_height,
-					image, hdec*2, image_width*2,
-					image + 1, hdec*4, image_width*2,
-					image + 3, hdec*4, image_width*2,
-					fields, 0, quality, bw);
+
+			y_data = image;
+			u_data = image + 1;
+			v_data = image + 3;
+
+			y_stride = 2*image_width;
+			u_stride = 2*image_width;
+			v_stride = 2*image_width;
+
+			j = jpeg_enc_init(image_width/hdec, 
+					image_height/fields,
+					hdec*2, y_stride*fields,
+					hdec*4, u_stride*fields,
+					hdec*4, v_stride*fields,
+					0, quality, bw);
 			break;
 		default:
 			mp_msg(MSGT_VO, MSGL_FATAL, "internal inconsistency in vo_zr\n");
 	}
 
 
+	if (j == NULL) {
+		mp_msg(MSGT_VO, MSGL_ERR, "Error initializing the jpeg encoder\n");
+		return 1;
+	}
+
 	if (init_zoran(stretchx, stretchy)) {
 		return 1;
 	}
@@ -379,9 +399,9 @@
 }
 
 static void flip_page (void) {
-	int i, j, k;
-	/*FILE *fp;
-	char filename[100];*/
+	int i, k;
+	//FILE *fp;
+	//char filename[100];
 	/* do we have a free buffer? */
 	if (queue-synco < zrq.count) {
 		frame = queue;
@@ -393,9 +413,12 @@
 	}
 	k=0;
 	for (i = 0; i < fields; i++) 
-		k+=mjpeg_encode_frame(buf+frame*zrq.size+k, i);
-	/* Warning, Quantization and Huffman tables are only
-	 * written in the first frame by default (to preserver bandwidth) */
+		k+=jpeg_enc_frame(j, y_data + i*y_stride, 
+				u_data + i*u_stride, v_data + i*v_stride, 
+				buf+frame*zrq.size+k);
+	/* Warning: Only the first jpeg image contains huffman- and 
+	 * quantisation tables, so don't expect files other than
+	 * test0001.jpg to be readable */
 	/*sprintf(filename, "test%04d.jpg", framenum);
 	fp = fopen(filename, "w");
 	if (!fp) exit(1);
@@ -418,7 +441,7 @@
 	int i;
 	char *source, *dest;
 	//printf("draw frame called\n");
-	source = src[0] + 2*g.yoff*image_width + 2*g.xoff;
+	source = src[0] + 2*g.yoff*vdec*stride + 2*g.xoff;
 	dest = image + 2*off_y;
 	for (i = 0; i < g.height/vdec; i++) {
 		memcpy(dest, source, image_width*2);
@@ -435,6 +458,7 @@
 }
 
 static void uninit(void) {
+	jpeg_enc_uninit(j);
 	uninit_zoran();
 }
 
@@ -563,7 +587,7 @@
 	return 1;
     }else if (!strcasecmp(opt, "zrquality")) {
         i = atoi(param);
-	if (i < 1 || i > 20) return ERR_OUT_OF_RANGE;
+	if (i < 2 || i > 20) return ERR_OUT_OF_RANGE;
 	quality = i;
 	return 1;
     }else if (!strcasecmp(opt, "zrnorm")) {
@@ -595,7 +619,7 @@
 		    "              this switch allows you to see the effects\n"
 		    "              of too much decimation\n"
 		    "  -zrbw       display in black&white (speed increase)\n"
-		    "  -zrquality  jpeg compression quality [BEST] 1 - 20 [VERY BAD]\n"
+		    "  -zrquality  jpeg compression quality [BEST] 2 - 20 [VERY BAD]\n"
 		    "  -zrdev      playback device (example -zrdev /dev/video1\n"
 		    "  -zrnorm     specify norm PAL/NTSC [dev: leave at current setting]\n"
 		    "\n"
@@ -623,7 +647,7 @@
   else if (!strcasecmp(param, "zrvdec"))
     vdec = 1;
   else if (!strcasecmp(param, "zrquality"))
-    quality = 1;
+    quality = 2;
   else if (!strcasecmp(param, "zrnorm"))
     norm = VIDEO_MODE_AUTO;