[Mplayer-cvslog] CVS: main/libmp1e/video dct.c,NONE,1.1 dct.h,NONE,1.1 dct_ieee.h,NONE,1.1 dct_mmx.s,NONE,1.1 dct_ref.c,NONE,1.1 filter.c,NONE,1.1 filter_mmx.s,NONE,1.1 libvideo.h,NONE,1.1 mblock.c,NONE,1.1 mblock.h,NONE,1.1 motion.c,NONE,1.1 motion.h,NONE,1.1 motion_mmx.s,NONE,1.1 motion_sse2.s,NONE,1.1 mpeg.h,NONE,1.1 mpeg1.c,NONE,1.1 tables.c,NONE,1.1 video.h,NONE,1.1 vlc.c,NONE,1.1 vlc.h,NONE,1.1 vlc_mmx.s,NONE,1.1
David Holm
mswitch at mplayer.dev.hu
Wed Dec 5 00:58:13 CET 2001
- Previous message: [Mplayer-cvslog] CVS: main/libmp1e/systems libsystems.h,NONE,1.1 mpeg.h,NONE,1.1 mpeg1.c,NONE,1.1 mpeg2.c,NONE,1.1 output.h,NONE,1.1 rte_output.c,NONE,1.1 systems.c,NONE,1.1 systems.h,NONE,1.1 vcd.c,NONE,1.1
- Next message: [Mplayer-cvslog] CVS: main/DOCS DXR3,1.5,1.6
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /cvsroot/mplayer/main/libmp1e/video
In directory mplayer:/var/tmp.root/cvs-serv18843
Added Files:
dct.c dct.h dct_ieee.h dct_mmx.s dct_ref.c filter.c
filter_mmx.s libvideo.h mblock.c mblock.h motion.c motion.h
motion_mmx.s motion_sse2.s mpeg.h mpeg1.c tables.c video.h
vlc.c vlc.h vlc_mmx.s
Log Message:
--- NEW FILE ---
/*
* MPEG-1 Real Time Encoder
*
* Copyright (C) 1999-2000 Michael H. Schimek
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* $Id: dct.c,v 1.1 2001/12/04 23:58:09 mswitch Exp $ */
#include <assert.h>
#include "../common/math.h"
#include "../common/mmx.h"
#include "dct.h"
#include "dct_ieee.h"
#include "mpeg.h"
#include "video.h"
// static char sh1[8] = { 15, 14, 13, 13, 12, 12, 12, 11 };
// static char sh2[8] = { 16, 14, 13, 13, 13, 12, 12, 12 };
/*
* ((q > 16) ? q & ~1 : q) == ((ltp[q] * 2 + 1) << lts[q])
*/
char ltp[32] __attribute__ ((aligned (MIN(32, CACHE_LINE)))) = {
0, 0, 0, 1, 0, 2, 1, 3, 0, 4, 2, 5, 1, 6, 3, 7,
0, 0, 4, 4, 2, 2, 5, 5, 1, 1, 6, 6, 3, 3, 7, 7,
};
char lts[32] __attribute__ ((aligned (MIN(32, CACHE_LINE)))) = {
0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 4, 1, 1, 2, 2, 1, 1, 3, 3, 1, 1, 2, 2, 1, 1,
};
#if __GNUC_MINOR__ < 90
#define align(n)
#else
#define align(n) __attribute__ ((SECTION("video_tables") aligned (n)))
#endif
mmx_t c0 align(8);
mmx_t c1 align(8);
mmx_t c2 align(8);
mmx_t c4 align(8);
mmx_t c1_15 align(8);
mmx_t c1_16 align(8);
mmx_t c1_17 align(8);
mmx_t c1_15w align(8);
mmx_t c128 align(8);
mmx_t c255 align(8);
mmx_t c128_6 align(8);
mmx_t c1b align(8);
mmx_t cC4_14 align(8);
mmx_t cC4_15 align(8);
mmx_t c1C6_13 align(8);
mmx_t cC4C6_14 align(8);
mmx_t cC2C61_13 align(8);
mmx_t cC2626_15 align(8);
mmx_t cC6262_15 align(8);
mmx_t c256 align(8);
mmx_t mm8, mm9;
mmx_t c2q align(8);
char mmx_q_fdct_intra_sh[32] align(MIN(CACHE_LINE,32));
short mmx_q_fdct_intra_q_lut[8][8][8] align(CACHE_LINE);
short mmx_q_fdct_inter_lut[6][8][2] align(CACHE_LINE);
short mmx_q_fdct_inter_lut0[8][1] align(CACHE_LINE);
short mmx_q_fdct_inter_q[32] align(CACHE_LINE);
mmx_t mmx_q_idct_inter_tab[16] align(CACHE_LINE);
short mmx_q_idct_intra_q_lut[8][8][8] align(CACHE_LINE);
mmx_t cfae; // fdct_inter temp
mmx_t csh; // "
mmx_t crnd; // "
mmx_t c1x; // idct_intra2 temp
mmx_t shift, mask, mask0; // "
#define R2 sqrt(2.0)
static void init_dct(void) __attribute__ ((constructor));
static void
init_dct(void)
{
double mmx_inter_lut[8][8];
int q, v, u, sh, max;
double Cu, Cv, m;
int shq[8];
/* Constants used throughout the video encoder */
c0 = MMXRW(0);
c1 = MMXRW(1);
c2 = MMXRW(2);
c4 = MMXRW(4);
c1_15 = MMXRD(1 << 15);
c1_16 = MMXRD(1 << 16);
c1_17 = MMXRD(1 << 17);
c1_15w = MMXRW(0x8000);
c128 = MMXRW(128);
c255 = MMXRW(255);
c128_6 = MMXRW(128 << 6);
c1b = MMXRB(1);
c256 = MMXRW(1 << 8);
cC4_14 = MMXRW(lroundn(C4 * S14));
cC4_15 = MMXRW(lroundn(C4 * S15));
c1C6_13 = MMXRW(lroundn((1.0 / C6) * S13));
cC4C6_14 = MMXRW(lroundn((C4 / C6) * S14));
cC2C61_13 = MMXRW(lroundn((C2 / C6 - 1) * S13));
cC2626_15 = MMXW(lroundn(C2 * S15), -lroundn(C6 * S15), lroundn(C2 * S15), -lroundn(C6 * S15));
cC6262_15 = MMXW(lroundn(C6 * S15), +lroundn(C2 * S15), lroundn(C6 * S15), +lroundn(C2 * S15));
// c3a = MMXRW(0);
// c5a = MMXRW(128 * 32 + 16);
// c5b = MMXRW(16);
c2q.uq = 2ULL;
mmx_q_idct_inter_tab[0] = MMXW(+lroundn(S15*C1/R2), +lroundn(S15*C7/R2), +lroundn(S15*C1/R2), +lroundn(S15*C7/R2));
mmx_q_idct_inter_tab[1] = MMXW(+lroundn(S15*C7/R2), -lroundn(S15*C1/R2), +lroundn(S15*C7/R2), -lroundn(S15*C1/R2));
mmx_q_idct_inter_tab[2] = MMXW(+lroundn(S15*C3/R2), +lroundn(S15*C5/R2), +lroundn(S15*C3/R2), +lroundn(S15*C5/R2));
mmx_q_idct_inter_tab[3] = MMXW(-lroundn(S15*C5/R2), +lroundn(S15*C3/R2), -lroundn(S15*C5/R2), +lroundn(S15*C3/R2));
mmx_q_idct_inter_tab[4] = MMXRW(lroundn(S15*C1/R2));
mmx_q_idct_inter_tab[5] = MMXRW(lroundn(S15*(C7+C1)/R2));
mmx_q_idct_inter_tab[6] = MMXRW(lroundn(S15*(C7-C1)/R2));
mmx_q_idct_inter_tab[7] = MMXRW(lroundn(S15*C5/R2));
mmx_q_idct_inter_tab[8] = MMXRW(lroundn(S15*(C3+C5)/R2));
mmx_q_idct_inter_tab[9] = MMXRW(lroundn(S15*(C3-C5)/R2));
mmx_q_idct_inter_tab[10] = MMXRD(1024);
mmx_q_idct_inter_tab[11] = MMXRW((8 << 2) + 2);
mmx_q_idct_inter_tab[12] = MMXRW(lroundn(S15*C2/R2));
mmx_q_idct_inter_tab[13] = MMXRW(lroundn(S15*(C6+C2)/R2));
mmx_q_idct_inter_tab[14] = MMXRW(lroundn(S15*(C6-C2)/R2));
mmx_q_idct_inter_tab[15] = MMXRW(lroundn(S16*(C6-C2)/R2));
for (q = 0; q < 8; q++) {
for (sh = max = 0; max < 16384; sh++)
for (v = max = 0; v < 8; v++)
for (u = 0; u < 8; u++) {
Cu = (u == 0) ? 1 : (cos(u * M_PI / 16.0) * sqrt(2.0));
Cv = (v == 0) ? 1 : (cos(v * M_PI / 16.0) * sqrt(2.0));
m = 1.0 / (Cu * Cv * 8.0);
if (u == 0 || u == 4) m *= 0.125;
if (u == 2 || u == 6) m *= 0.25;
if (u & 1) m *= C6 * 0.5;
if (v == 0 || v == 4) m *= 2;
if (v == 2 || v == 6) m *= 4;
if (v & 1) m *= C6 * 8;
if (u == 0 && v == 0) m = 0;
mmx_q_fdct_intra_q_lut[q][u][v] = lroundn(
m
* 8
/ default_intra_quant_matrix[v][u]
/ (2 * q + 1)
* (double)(1 << sh));
if (mmx_q_fdct_intra_q_lut[q][v][u] > max)
max = mmx_q_fdct_intra_q_lut[q][v][u];
mmx_q_fdct_intra_q_lut[q][0][0] = 0;
}
shq[q] = sh;
}
for (q = 1; q < 32; q++) {
int ltsi = lts[q], ltpi = ltp[q];
mmx_q_fdct_intra_sh[q] = shq[ltpi] + ltsi - 17;
}
for (v = 0; v < 8; v++) {
for (u = 0; u < 8; u++) {
Cu = (u == 0) ? 1.0 : (cos(u * M_PI / 16.0) * sqrt(2.0));
Cv = (v == 0) ? 1.0 : (cos(v * M_PI / 16.0) * sqrt(2.0));
if (v == 2 || v == 6) Cv = 1.0;
m = 1.0 / (Cu * Cv * 8.0);
if (u & 1) m *= C6;
if (u == 0 || u == 4 || u == 7) m /= 4.0;
if (u == 2 || u == 5) m /= 2.0;
if (u == 6) m /= 8.0;
mmx_inter_lut[v][u] = m;
}
}
for (u = 0; u < 8; u++) {
mmx_q_fdct_inter_lut0[u][0] = lroundn(mmx_inter_lut[0][u] * S19);
mmx_q_fdct_inter_lut[1][u][0] = lroundn(mmx_inter_lut[0][u] * +(C2 + C6) * S18);
mmx_q_fdct_inter_lut[1][u][1] = lroundn(mmx_inter_lut[0][u] * +(C2 - C6) * S18);
mmx_q_fdct_inter_lut[4][u][0] = lroundn(mmx_inter_lut[0][u] * +(C2 - C6) * S18);
mmx_q_fdct_inter_lut[4][u][1] = lroundn(mmx_inter_lut[0][u] * -(C2 + C6) * S18);
mmx_q_fdct_inter_lut[0][u][1] = +(mmx_q_fdct_inter_lut[0][u][0] = lroundn(mmx_inter_lut[1][u] * S19));
mmx_q_fdct_inter_lut[2][u][1] = -(mmx_q_fdct_inter_lut[2][u][0] = lroundn(mmx_inter_lut[3][u] * S19));
mmx_q_fdct_inter_lut[3][u][1] = +(mmx_q_fdct_inter_lut[3][u][0] = lroundn(mmx_inter_lut[5][u] * S19));
mmx_q_fdct_inter_lut[5][u][1] = -(mmx_q_fdct_inter_lut[5][u][0] = lroundn(mmx_inter_lut[7][u] * S17));
}
for (q = 0; q < 32; q++)
mmx_q_fdct_inter_q[q] = lroundn(S15 / q / 2.0);
for (q = 0; q < 8; q++) {
for (v = 0; v < 8; v++)
for (u = 0; u < 8; u++)
if (u + v == 0)
mmx_q_idct_intra_q_lut[q][v][u] = 0;
else
mmx_q_idct_intra_q_lut[q][v][u] =
4 * default_intra_quant_matrix[v][u] * (q * 2 + 1);
// dump(mmx_q_idct_intra_q_lut[q]);
}
c1x = MMXRW(((8 + 128 * 16) << 2) + 2);
}
--- NEW FILE ---
/*
* MPEG-1 Real Time Encoder
*
* Copyright (C) 1999-2000 Michael H. Schimek
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* $Id: dct.h,v 1.1 2001/12/04 23:58:09 mswitch Exp $ */
#define reg(n) __attribute__ ((regparm (n)))
extern void mp1e_fdct_intra(int quant_scale) reg(1);
extern unsigned int mp1e_fdct_inter(short iblock[6][8][8],
int quant_scale) reg(2);
extern void mp1e_mpeg1_idct_intra(int quant_scale) reg(1);
extern void mp1e_mpeg1_idct_inter(int quant_scale,
unsigned int cbp) reg(2);
extern void mp1e_mmx_fdct_intra(int quant_scale) reg(1);
extern unsigned int mp1e_mmx_fdct_inter(short iblock[6][8][8],
int quant_scale) reg(2);
extern void mp1e_mmx_mpeg1_idct_intra(int quant_scale) reg(1);
extern void mp1e_mmx_mpeg1_idct_intra2(int quant_scale) reg(1);
extern void mp1e_mmx_mpeg1_idct_inter(int quant_scale,
unsigned int cbp) reg(2);
extern void mp1e_mmx_copy_refblock(void);
--- NEW FILE ---
/*
* MPEG-1 Real Time Encoder
*
* Copyright (C) 1999-2000 Michael H. Schimek
*
* Based on code written by Tom G. Lane
* and released to public domain 11/22/93.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* $Id: dct_ieee.h,v 1.1 2001/12/04 23:58:09 mswitch Exp $ */
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#define IEEE_PI 3.14159265358979323846
#define C0 cos(0.0 * IEEE_PI / 16.0) // 1.0
#define C1 cos(1.0 * IEEE_PI / 16.0) // 0.9808
#define C2 cos(2.0 * IEEE_PI / 16.0) // 0.9239
#define C3 cos(3.0 * IEEE_PI / 16.0) // 0.8315
#define C4 cos(4.0 * IEEE_PI / 16.0) // 0.7071
#define C5 cos(5.0 * IEEE_PI / 16.0) // 0.5556
#define C6 cos(6.0 * IEEE_PI / 16.0) // 0.3827
#define C7 cos(7.0 * IEEE_PI / 16.0) // 0.1951
#define S13 ((double)(1 << 13))
#define S14 ((double)(1 << 14))
#define S15 ((double)(1 << 15))
#define S16 ((double)(1 << 16))
#define S17 ((double)(1 << 17))
#define S18 ((double)(1 << 18))
#define S19 ((double)(1 << 19))
/*
typedef void dct_func(short [8][8]);
typedef void qdct_func(int q, short [8][8]);
extern int ieee_round(double val);
extern void ieee_ref_fdct(short block[8][8]);
extern void ieee_ref_idct(short block[8][8]);
extern void mpeg_intra_quant(int q, short block[8][8]);
extern void mpeg_inter_quant(int q, short block[8][8]);
extern void mpeg1_intra_iquant(int q, short block[8][8]);
extern void mpeg1_inter_iquant(int q, short block[8][8]);
extern void mpeg2_intra_iquant(int q, short block[8][8]);
extern void mpeg2_inter_iquant(int q, short block[8][8]);
extern void ieee_randomize(short block[8][8], long minpix, long maxpix, long sign);
extern void rake_pattern(short block[8][8], long minpix, long maxpix, long sign);
extern void (* randomize)(short [8][8], long, long, long);
extern void q_fdct_test(qdct_func *fdct, qdct_func *quant, long minpix, long maxpix, long sign, int iterations, unsigned int quant_mask);
extern void q_idct_test(qdct_func *idct, qdct_func *quant, qdct_func *iquant, long minpix, long maxpix, long sign, int iterations, unsigned int quant_mask);
extern void ieee_idct_test(char *name, dct_func *idct, long minpix, long maxpix, long sign, int iterations);
extern void fdct_test(char *name, dct_func *fdct, long minpix, long maxpix, long sign, int iterations);
extern void ieee_1180(char *name, dct_func *idct);
*/
#define __elements(block) (sizeof(block) / sizeof((block)[0][0]))
#define mirror(block) \
do { \
int _i, _j; \
for (_i = 0; _i < 7; _i++) \
for (_j = _i + 1; _j < 8; _j++) \
swap((block)[_i][_j], (block)[_j][_i]); \
} while (0)
#define trans(block, n) \
do { \
int _i; \
for (_i = 0; _i < __elements(block); _i++) \
(block)[0][_i] += n; \
} while (0)
#define copy(d, s) \
do { \
int _i; \
for (_i = 0; _i < __elements(d); _i++) \
(d)[0][_i] = (s)[0][_i]; \
} while (0)
#define clear(block) \
do { \
int _i; \
for (_i = 0; _i < __elements(block); _i++) \
(block)[0][_i] = 0.0; \
} while (0)
#define dump(block) \
do { \
int _i; \
int _j = sizeof((block)[0]) / sizeof((block)[0][0]); \
fprintf(stderr, #block ":\n"); \
for (_i = 0; _i < __elements(block); _i++) \
fprintf(stderr, "%11.4f%c", \
(double)(block)[0][_i], \
(_i % _j == _j - 1) ? '\n' : ' '); \
fprintf(stderr, "\n"); \
} while (0)
#define peak(block) \
do { \
int _i; \
double _min = 1e30, _max = -1e30; \
for (_i = 0; _i < __elements(block); _i++) \
if ((block)[0][_i] < _min) \
_min = (block)[0][_i]; \
else if ((block)[0][_i] > _max) \
_max = (block)[0][_i]; \
fprintf(stderr, #block ": %11.4f ... %11.4f\n", \
_min, _max); \
} while (0)
#define maxabs(res, bl1, bl2) \
do { \
int _i; \
for (_i = 0; _i < __elements(res); _i++) \
(res)[0][_i] = MAX(fabs((bl1)[0][_i]), \
fabs((bl2)[0][_i])); \
} while (0)
--- NEW FILE ---
#
# MPEG-1 Real Time Encoder
#
# Copyright (C) 1999-2001 Michael H. Schimek
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
[...2415 lines suppressed...]
movq 5*8(%eax),%mm5; movq %mm2,(%edx,%esi); // 1
movq 6*8(%eax),%mm6; packuswb %mm5,%mm4;
movq 7*8(%eax),%mm7; movq %mm4,(%edx,%esi,2); // 2
movq 8*8(%eax),%mm0; packuswb %mm7,%mm6;
movq 9*8(%eax),%mm1; movq %mm6,(%edi,%esi,2); // 3
movq 10*8(%eax),%mm2; packuswb %mm1,%mm0;
movq 11*8(%eax),%mm3; movq %mm0,(%edx,%esi,4); // 4
movq 12*8(%eax),%mm4; leal (%edi,%esi,4),%edi;
movq 13*8(%eax),%mm5; packuswb %mm3,%mm2;
movq 14*8(%eax),%mm6; movq %mm2,(%edi); // 5
movq 15*8(%eax),%mm7; packuswb %mm5,%mm4;
leal 128(%eax),%eax; movq %mm4,(%edi,%esi); // 6
cmpl $mb_address+6*8,%ebx; packuswb %mm7,%mm6;
movq %mm6,(%edi,%esi,2); jne 1b; // 7
popl %edi;
popl %esi;
popl %ebx;
ret;
--- NEW FILE ---
/*
* MPEG-1 Real Time Encoder
*
* Copyright (C) 1999-2000 Michael H. Schimek
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* $Id: dct_ref.c,v 1.1 2001/12/04 23:58:09 mswitch Exp $ */
#include "dct.h"
#include "mpeg.h"
#include "video.h"
#include "dct_ieee.h"
#include "../common/math.h"
#define FLOAT float
static FLOAT aan_fwd_lut[8][8];
static FLOAT aan_inv_lut[8][8];
static void aan_lut_init(void) __attribute__ ((constructor));
static void
aan_lut_init(void)
{
int v, u;
for (v = 0; v < 8; v++)
for (u = 0; u < 8; u++) {
double Cu, Cv;
Cu = (u == 0) ? 1.0 : (cos(u * M_PI / 16.0) * sqrt(2.0));
Cv = (v == 0) ? 1.0 : (cos(v * M_PI / 16.0) * sqrt(2.0));
aan_fwd_lut[v][u] = 1.0 / (Cu * Cv * 8.0);
aan_inv_lut[v][u] = 1.0 * (Cu * Cv / 8.0);
}
}
static void
aan_double_1d_fdct(FLOAT *in, FLOAT *out)
{
FLOAT tmp0, tmp1, tmp2, tmp3;
FLOAT tmp4, tmp5, tmp6, tmp7;
/* even */
tmp0 = in[0] + in[7];
tmp1 = in[1] + in[6];
tmp2 = in[2] + in[5];
tmp3 = in[3] + in[4];
tmp4 = tmp0 + tmp3;
tmp5 = tmp1 + tmp2;
out[0] = tmp4 + tmp5;
out[4] = tmp4 - tmp5;
tmp0 -= tmp3;
tmp1 -= tmp2;
tmp1 = (tmp0 + tmp1) * C4;
out[2] = tmp0 + tmp1;
out[6] = tmp0 - tmp1;
/* odd */
tmp7 = in[0] - in[7];
tmp6 = in[1] - in[6];
tmp5 = in[2] - in[5];
tmp4 = in[3] - in[4];
tmp4 += tmp5;
tmp5 += tmp6;
tmp6 += tmp7;
tmp3 = (tmp4 - tmp6) * C6;
tmp4 = (tmp4 * (C2 - C6)) + tmp3;
tmp6 = (tmp6 * (C2 + C6)) + tmp3;
tmp5 = tmp5 * C4;
tmp3 = tmp7 + tmp5;
tmp7 = tmp7 - tmp5;
out[5] = tmp7 + tmp4;
out[3] = tmp7 - tmp4;
out[1] = tmp3 + tmp6;
out[7] = tmp3 - tmp6;
}
static void
aan_double_1d_idct(FLOAT *in, FLOAT *out)
{
FLOAT tmp0, tmp1, tmp2, tmp3;
FLOAT tmp4, tmp5, tmp6, tmp7, tmp8;
/* odd */
tmp5 = in[5] + in[3];
tmp6 = in[5] - in[3];
tmp8 = in[1] + in[7];
tmp4 = in[1] - in[7];
tmp7 = tmp8 + tmp5;
tmp8 = tmp8 - tmp5;
tmp5 = +2.0 * C2 * (tmp4 + tmp6);
tmp6 = -2.0 * (C2 + C6) * tmp6 + tmp5;
tmp4 = -2.0 * (C2 - C6) * tmp4 + tmp5;
tmp5 = +2.0 * C4 * tmp8;
tmp6 -= tmp7;
tmp5 -= tmp6;
tmp4 -= tmp5;
/* even */
tmp2 = in[2] + in[6];
tmp8 = in[2] - in[6];
tmp8 = tmp8 * 2.0 * C4 - tmp2;
tmp0 = in[0] + in[4];
tmp1 = in[0] - in[4];
tmp3 = tmp0 - tmp2;
tmp0 = tmp0 + tmp2;
tmp2 = tmp1 - tmp8;
tmp1 = tmp1 + tmp8;
out[0] = tmp0 + tmp7;
out[1] = tmp1 + tmp6;
out[2] = tmp2 + tmp5;
out[3] = tmp3 + tmp4;
out[4] = tmp3 - tmp4;
out[5] = tmp2 - tmp5;
out[6] = tmp1 - tmp6;
out[7] = tmp0 - tmp7;
}
// #define SATURATE(val, min, max) saturate((val), (min), (max))
#define SATURATE(val, min, max) (val)
/*
* Saturation in RL/VLC routines with overflow feedback, see there.
*/
void
fdct_intra(int quant_scale)
{
int i, j, v, u, val, div;
emms();
for (i = 0; i < 6; i++) {
FLOAT F[8][8], t[8][8];
for (v = 0; v < 64; v++)
F[0][v] = mblock[0][i][0][v] - 128;
for (v = 0; v < 8; v++)
aan_double_1d_fdct(F[v], t[v]);
mirror(t);
for (u = 0; u < 8; u++)
aan_double_1d_fdct(t[u], F[u]);
mirror(F);
val = lroundn(F[0][0] * aan_fwd_lut[0][0]);
mblock[1][i][0][0] = SATURATE((val + 4 * sign(val)) / 8, -255, +255);
for (j = 1; j < 64; j++) {
val = lroundn(F[0][j] * aan_fwd_lut[0][j]);
div = default_intra_quant_matrix[0][j] * quant_scale;
mblock[1][i][0][j] = SATURATE((8 * val + sign(val) * (div >> 1)) / div, -255, +255);
}
mirror(mblock[1][i]);
}
}
unsigned int
fdct_inter(short iblock[6][8][8], int quant_scale)
{
int i, j, val, cbp = 0;
emms();
for (i = 0; i < 6; i++) {
FLOAT F[8][8], t[8][8];
for (j = 0; j < 64; j++)
F[0][j] = iblock[i][0][j];
for (j = 0; j < 8; j++)
aan_double_1d_fdct(F[j], t[j]);
mirror(t);
for (j = 0; j < 8; j++)
aan_double_1d_fdct(t[j], F[j]);
mirror(F);
for (j = 0; j < 64; j++) {
val = lroundn(F[0][j] * aan_fwd_lut[0][j]);
if ((mblock[0][i][0][j] = SATURATE(val / (2 * quant_scale), -255, +255)) != 0)
cbp |= 0x20 >> i;
}
mirror(mblock[0][i]);
}
return cbp;
}
void
mpeg1_idct_intra(int quant_scale)
{
int i, j, k, val;
unsigned char *p, *new = newref;
emms();
for (i = 0; i < 6; i++) {
FLOAT F[8][8], t[8][8];
new += mb_address.block[i].offset;
mirror(mblock[1][i]);
F[0][0] = mblock[1][i][0][0] * 8 * aan_inv_lut[0][0];
for (j = 1; j < 64; j++) {
val = (int)(mblock[1][i][0][j] *
default_intra_quant_matrix[0][j] * quant_scale) / 8;
/* mismatch control */
if (!(val & 1))
val -= sign(val);
F[0][j] = aan_inv_lut[0][j] * saturate(val, -2048, 2047);
}
for (j = 0; j < 8; j++)
aan_double_1d_idct(F[j], t[j]);
mirror(t);
for (j = 0; j < 8; j++)
aan_double_1d_idct(t[j], F[j]);
mirror(F);
for (j = 0, p = new; j < 8; j++) {
for (k = 0; k < 8; k++)
p[k] = saturate(lroundn(F[j][k]) + 128, 0, 255);
p += mb_address.block[i].pitch;
}
}
}
void
mpeg1_idct_inter(int quant_scale, unsigned int cbp)
{
FLOAT F[8][8], t[8][8];
unsigned char *new = newref;
int i, j, k, val;
emms();
for (i = 0; i < 6; i++) {
new += mb_address.block[i].offset;
if (cbp & (0x20 >> i)) {
unsigned char *p = new;
mirror(mblock[0][i]);
for (j = 0; j < 64; j++) {
val = (2 * mblock[0][i][0][j] + sign(mblock[0][i][0][j])) * quant_scale;
/* mismatch control */
if (!(val & 1))
val -= sign(val);
F[0][j] = aan_inv_lut[0][j] * saturate(val, -2048, 2047);
}
for (j = 0; j < 8; j++)
aan_double_1d_idct(F[j], t[j]);
mirror(t);
for (j = 0; j < 8; j++)
aan_double_1d_idct(t[j], F[j]);
mirror(F);
for (j = 0; j < 8; j++) {
for (k = 0; k < 8; k++)
#if 1
p[k] = saturate(lroundn(F[j][k]) + mblock[3][i][j][k], 0, 255);
#else
p[k] = saturate(saturate(lroundn(F[j][k]), -128, 127) + mblock[3][i][j][k], 0, 255);
#endif
p += mb_address.block[i].pitch;
}
} else {
unsigned char *p = new;
for (j = 0; j < 8; j++) {
for (k = 0; k < 8; k++)
p[k] = mblock[3][i][j][k];
p += mb_address.block[i].pitch;
}
}
}
}
void
mpeg2_idct_intra(int quant_scale)
{
int i, j, k, val, sum;
unsigned char *p, *new = newref;
emms();
for (i = 0; i < 6; i++) {
FLOAT F[8][8], t[8][8];
new += mb_address.block[0].offset;
mirror(mblock[1][i]);
F[0][0] = (sum = mblock[1][i][0][0] * 8) * aan_inv_lut[0][0];
for (j = 1; j < 64; j++) {
val = (int)(mblock[1][i][0][j] *
default_intra_quant_matrix[0][j] * quant_scale) / 8;
sum += val = saturate(val, -2048, 2047);
if (j == 63 && !(sum & 1))
val ^= 1;
F[0][j] = aan_inv_lut[0][j] * val;
}
for (j = 0; j < 8; j++)
aan_double_1d_idct(F[j], t[j]);
mirror(t);
for (j = 0; j < 8; j++)
aan_double_1d_idct(t[j], F[j]);
mirror(F);
for (j = 0, p = new; j < 8; j++) {
for (k = 0; k < 8; k++)
p[k] = saturate(lroundn(F[j][k]) + 128, 0, 255);
p += mb_address.block[i].pitch;
}
}
}
void
mpeg2_idct_inter(int quant_scale, unsigned int cbp)
{
FLOAT F[8][8], t[8][8];
unsigned char *new = newref;
int i, j, k, val, sum;
emms();
for (i = 0; i < 6; i++) {
new += mb_address.block[0].offset;
if (cbp & (0x20 >> i)) {
unsigned char *p = new;
mirror(mblock[0][i]);
for (j = 0, sum = 0; j < 64; j++) {
val = (2 * mblock[0][i][0][j] + sign(mblock[0][i][0][j])) * quant_scale;
sum += val = saturate(val, -2048, 2047);
/* mismatch control */
if (j == 63 && !(sum & 1))
val ^= 1;
F[0][j] = aan_inv_lut[0][j] * val;
}
for (j = 0; j < 8; j++)
aan_double_1d_idct(F[j], t[j]);
mirror(t);
for (j = 0; j < 8; j++)
aan_double_1d_idct(t[j], F[j]);
mirror(F);
for (j = 0; j < 8; j++) {
for (k = 0; k < 8; k++)
#if 1
p[k] = saturate(lroundn(F[j][k]) + mblock[3][i][j][k], 0, 255);
#else
p[k] = saturate(saturate(lroundn(F[j][k]), -128, 127) + mblock[3][i][j][k], 0, 255);
#endif
p += mb_address.block[i].pitch;
}
} else {
unsigned char *p = new;
for (j = 0; j < 8; j++) {
for (k = 0; k < 8; k++)
p[k] = mblock[3][i][j][k];
p += mb_address.block[i].pitch;
}
}
}
}
--- NEW FILE ---
/*
* MPEG-1 Real Time Encoder
*
* Copyright (C) 1999-2000 Michael H. Schimek
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* $Id: filter.c,v 1.1 2001/12/04 23:58:09 mswitch Exp $ */
#include "../common/log.h"
#include "../common/mmx.h"
#include "../common/math.h"
#include "../options.h"
#include "video.h"
int (* filter)(unsigned char *, unsigned char *);
// removed bool temporal_interpolation;
const char cbp_order[6] = { 5, 4, 3, 1, 2, 0 };
const char *
filter_labels[] = {
"invalid",
"YUV 4:2:0 fastest",
"YUYV 4:2:2 fastest",
"YUYV 4:2:2 w/vertical decimation",
"YUYV 4:2:2 w/temporal interpolation", /* REMOVED */
"YUYV 4:2:2 w/vertical interpolation",
"YUYV 4:2:2 field progressive 50/60 Hz",
"YUYV 4:2:2 50/60 Hz w/temporal interpolation", /* REMOVED */
"YVU 4:2:0 fastest",
"",
"",
"",
};
/* static */ int filter_y_offs,
filter_u_offs,
filter_v_offs,
filter_y_pitch;
extern int mmx_YUV_420(unsigned char *, unsigned char *);
extern int mmx_YUYV_422(unsigned char *, unsigned char *);
extern int mmx_YUYV_422_2v(unsigned char *, unsigned char *);
extern int mmx_YUYV_422_ti(unsigned char *, unsigned char *);
extern int mmx_YUYV_422_vi(unsigned char *, unsigned char *);
/* Reference */
int
YUYV_422(unsigned char *buffer, unsigned char *unused)
{
int y, x;
unsigned int n, s = 0, s2 = 0;
buffer += filter_y_pitch * mb_row * 16 + mb_col * 16 * 2 + filter_y_offs;
for (y = 0; y < 16; y++)
for (x = 0; x < 8; x++) {
// Note block order Y0 Y2 Y1 Y3
mblock[0][0][y][x] = (short) buffer[y * filter_y_pitch + x * 2 + 0];
mblock[0][2][y][x] = (short) buffer[y * filter_y_pitch + x * 2 + 16];
}
for (y = 0; y < 8; y++)
for (x = 0; x < 8; x++) {
mblock[0][4][y][x] = (short) buffer[y * filter_y_pitch * 2 + x * 4 + 1];
mblock[0][5][y][x] = (short) buffer[y * filter_y_pitch * 2 + x * 4 + 3];
}
for (x = 0; x < 4 * 64; x++) {
n = mblock[0][0][0][x];
s += n;
s2 += n * n;
}
return s2 * 256 - (s * s); // luma spatial activity
}
static int (* color_pred)(unsigned char *, unsigned char *);
/* Hum. Could add rendered subpictures. */
static int
color_trap(unsigned char *buffer1, unsigned char *buffer2)
{
int r = color_pred(buffer1, buffer2);
asm volatile (
"\t movq c128,%%mm0;\n"
"\t movq %%mm0,(%0); movq %%mm0,1*8(%0);\n"
"\t movq %%mm0,2*8(%0); movq %%mm0,3*8(%0);\n"
"\t movq %%mm0,4*8(%0); movq %%mm0,5*8(%0);\n"
"\t movq %%mm0,6*8(%0); movq %%mm0,7*8(%0);\n"
"\t movq %%mm0,8*8(%0); movq %%mm0,9*8(%0);\n"
"\t movq %%mm0,10*8(%0); movq %%mm0,11*8(%0);\n"
"\t movq %%mm0,12*8(%0); movq %%mm0,13*8(%0);\n"
"\t movq %%mm0,14*8(%0); movq %%mm0,15*8(%0);\n"
"\t movq %%mm0,16*8(%0); movq %%mm0,17*8(%0);\n"
"\t movq %%mm0,18*8(%0); movq %%mm0,19*8(%0);\n"
"\t movq %%mm0,20*8(%0); movq %%mm0,21*8(%0);\n"
"\t movq %%mm0,22*8(%0); movq %%mm0,23*8(%0);\n"
"\t movq %%mm0,24*8(%0); movq %%mm0,25*8(%0);\n"
"\t movq %%mm0,26*8(%0); movq %%mm0,27*8(%0);\n"
"\t movq %%mm0,28*8(%0); movq %%mm0,29*8(%0);\n"
"\t movq %%mm0,30*8(%0); movq %%mm0,31*8(%0);\n"
:: "D" (&mblock[0][4][0][0]) : "cc", "memory" FPU_REGS);
return r;
}
/* Experimental low pass filter */
int
YUYV_422_exp1(unsigned char *buffer, unsigned char *unused)
{
static const char
f[5][5] = {
{ 1, 3, 4, 3, 1 },
{ 3, 9, 12, 9, 3 },
{ 4, 12, 16, 12, 4 },
{ 3, 9, 12, 9, 3 },
{ 1, 3, 4, 3, 1 },
};
unsigned int n, s = 0, s2 = 0;
int y, x;
int i, j;
// if (mb_row <= 0 || mb_row >= mb_last_row)
// return mmx_YUYV_422(buffer, NULL);
buffer += filter_y_pitch * mb_row * 16 + mb_col * 16 * 2 + filter_y_offs;
for (y = 0; y < 16; y++)
for (x = 0; x < 8; x++) {
n = 0;
for (j = 0; j < 5; j++)
for (i = 0; i < 5; i++)
n += buffer[(y + j) * filter_y_pitch + (x + i) * 2] * f[j][i];
mblock[0][0][y][x] = (n + 72) / 144;
n = 0;
for (j = 0; j < 5; j++)
for (i = 0; i < 5; i++)
n += buffer[(y + j) * filter_y_pitch + (x + i) * 2 + 16] * f[j][i];
mblock[0][2][y][x] = (n + 72) / 144;
}
for (y = 0; y < 8; y++)
for (x = 0; x < 8; x++) {
mblock[0][4][y][x] = (short) buffer[y * filter_y_pitch * 2 + x * 4 + 1];
mblock[0][5][y][x] = (short) buffer[y * filter_y_pitch * 2 + x * 4 + 3];
}
for (x = 0; x < 4 * 64; x++) {
n = mblock[0][0][0][x];
s += n;
s2 += n * n;
}
return s2 * 256 - (s * s);
}
/* Experimental low pass filter */
int
YUYV_422_exp2(unsigned char *buffer, unsigned char *buffer2)
{
unsigned int n, s = 0, s2 = 0;
int y, x;
x = mmx_YUYV_422(buffer, buffer2);
// x = mmx_YUYV_422_ti(buffer, buffer2);
// if (mb_row <= 0 || mb_row >= mb_last_row)
// return x;
if (x < 65536 * 128)
return x;
buffer += filter_y_pitch * mb_row * 16 + mb_col * 16 * 2 + filter_y_offs;
// buffer2 += filter_y_pitch * mb_row * 16 + mb_col * 16 * 2 + filter_y_offs;
for (y = 0; y < 16; y++)
for (x = 0; x < 8; x++) {
n = buffer[(y - 1) * filter_y_pitch + (x - 1) * 2] +
buffer[(y - 1) * filter_y_pitch + (x + 1) * 2] +
buffer[(y + 1) * filter_y_pitch + (x - 1) * 2] +
buffer[(y + 1) * filter_y_pitch + (x + 1) * 2];
n += (buffer[(y - 1) * filter_y_pitch + (x + 0) * 2] +
buffer[(y + 1) * filter_y_pitch + (x + 0) * 2] +
buffer[(y + 0) * filter_y_pitch + (x - 1) * 2] +
buffer[(y + 0) * filter_y_pitch + (x + 1) * 2]) * 2;
n += buffer[(y + 0) * filter_y_pitch + (x + 0) * 2] * 4;
mblock[0][0][y][x] = (n + 8) >> 4;
n = buffer[(y - 1) * filter_y_pitch + (x - 1) * 2 + 16] +
buffer[(y - 1) * filter_y_pitch + (x + 1) * 2 + 16] +
buffer[(y + 1) * filter_y_pitch + (x - 1) * 2 + 16] +
buffer[(y + 1) * filter_y_pitch + (x + 1) * 2 + 16];
n += (buffer[(y - 1) * filter_y_pitch + (x + 0) * 2 + 16] +
buffer[(y + 1) * filter_y_pitch + (x + 0) * 2 + 16] +
buffer[(y + 0) * filter_y_pitch + (x - 1) * 2 + 16] +
buffer[(y + 0) * filter_y_pitch + (x + 1) * 2 + 16]) * 2;
n += buffer[(y + 0) * filter_y_pitch + (x + 0) * 2 + 16] * 4;
mblock[0][2][y][x] = (n + 8) >> 4;
}
// mblock[0][0][0][0] = 0;
for (y = 0; y < 8; y++)
for (x = 0; x < 8; x++) {
n = buffer[(y - 1) * filter_y_pitch * 2 + (x - 1) * 4 + 1] +
buffer[(y - 1) * filter_y_pitch * 2 + (x + 1) * 4 + 1] +
buffer[(y + 1) * filter_y_pitch * 2 + (x - 1) * 4 + 1] +
buffer[(y + 1) * filter_y_pitch * 2 + (x + 1) * 4 + 1];
n += (buffer[(y - 1) * filter_y_pitch * 2 + (x + 0) * 4 + 1] +
buffer[(y + 1) * filter_y_pitch * 2 + (x + 0) * 4 + 1] +
buffer[(y + 0) * filter_y_pitch * 2 + (x - 1) * 4 + 1] +
buffer[(y + 0) * filter_y_pitch * 2 + (x + 1) * 4 + 1]) * 2;
n += buffer[(y + 0) * filter_y_pitch * 2 + (x + 0) * 4 + 1] * 4;
mblock[0][4][y][x] = (n + 8) >> 4;
n = buffer[(y - 1) * filter_y_pitch * 2 + (x - 1) * 4 + 3] +
buffer[(y - 1) * filter_y_pitch * 2 + (x + 1) * 4 + 3] +
buffer[(y + 1) * filter_y_pitch * 2 + (x - 1) * 4 + 3] +
buffer[(y + 1) * filter_y_pitch * 2 + (x + 1) * 4 + 3];
n += (buffer[(y - 1) * filter_y_pitch * 2 + (x + 0) * 4 + 3] +
buffer[(y + 1) * filter_y_pitch * 2 + (x + 0) * 4 + 3] +
buffer[(y + 0) * filter_y_pitch * 2 + (x - 1) * 4 + 3] +
buffer[(y + 0) * filter_y_pitch * 2 + (x + 1) * 4 + 3]) * 2;
n += buffer[(y + 0) * filter_y_pitch * 2 + (x + 0) * 4 + 3] * 4;
mblock[0][5][y][x] = (n + 8) >> 4;
}
for (x = 0; x < 4 * 64; x++) {
n = mblock[0][0][0][x];
s += n;
s2 += n * n;
}
return s2 * 256 - (s * s);
}
/* Experimental low pass filter */
int
YUYV_422_exp3(unsigned char *buffer, unsigned char *buffer2)
{
static unsigned char temp[19 * 40];
unsigned int n, s = 0, s2 = 0;
int y, x;
buffer += filter_y_pitch * (mb_row * 32 - 1) + mb_col * 16 * 2 + filter_y_offs;
buffer2 += filter_y_pitch * (mb_row * 32 - 1) + mb_col * 16 * 2 + filter_y_offs;
for (y = 0; y < 19; y++) {
for (x = 0; x < 40; x++)
temp[y * 40 + x] = (buffer[x - 4] + buffer2[x - 4] + 1) >> 1;
buffer += filter_y_pitch * 2;
buffer2 += filter_y_pitch * 2;
}
for (y = 0; y < 16; y++)
for (x = 0; x < 8; x++) {
n = temp[(y + 0) * 40 + (x + 0) * 2] +
temp[(y + 0) * 40 + (x + 2) * 2] +
temp[(y + 2) * 40 + (x + 0) * 2] +
temp[(y + 2) * 40 + (x + 2) * 2];
n += (temp[(y + 0) * 40 + (x + 1) * 2] +
temp[(y + 2) * 40 + (x + 1) * 2] +
temp[(y + 1) * 40 + (x + 0) * 2] +
temp[(y + 1) * 40 + (x + 2) * 2]) * 2;
n += temp[(y + 1) * 40 + (x + 1) * 2] * 4;
mblock[0][0][y][x] = (n + 8) >> 4;
n = temp[(y + 0) * 40 + (x + 0) * 2 + 16] +
temp[(y + 0) * 40 + (x + 2) * 2 + 16] +
temp[(y + 2) * 40 + (x + 0) * 2 + 16] +
temp[(y + 2) * 40 + (x + 2) * 2 + 16];
n += (temp[(y + 0) * 40 + (x + 1) * 2 + 16] +
temp[(y + 2) * 40 + (x + 1) * 2 + 16] +
temp[(y + 1) * 40 + (x + 0) * 2 + 16] +
temp[(y + 1) * 40 + (x + 2) * 2 + 16]) * 2;
n += temp[(y + 1) * 40 + (x + 1) * 2 + 16] * 4;
mblock[0][2][y][x] = (n + 8) >> 4;
}
// mblock[0][0][0][0] = 0;
for (y = 0; y < 8; y++)
for (x = 0; x < 8; x++) {
n = temp[(y + 0) * 40 * 2 + (x + 0) * 4 + 1] +
temp[(y + 0) * 40 * 2 + (x + 2) * 4 + 1] +
temp[(y + 2) * 40 * 2 + (x + 0) * 4 + 1] +
temp[(y + 2) * 40 * 2 + (x + 2) * 4 + 1];
n += (temp[(y + 0) * 40 * 2 + (x + 1) * 4 + 1] +
temp[(y + 2) * 40 * 2 + (x + 1) * 4 + 1] +
temp[(y + 1) * 40 * 2 + (x + 0) * 4 + 1] +
temp[(y + 1) * 40 * 2 + (x + 2) * 4 + 1]) * 2;
n += temp[(y + 1) * 40 * 2 + (x + 1) * 4 + 1] * 4;
mblock[0][4][y][x] = (n + 8) >> 4;
n = temp[(y + 0) * 40 * 2 + (x + 0) * 4 + 3] +
temp[(y + 0) * 40 * 2 + (x + 2) * 4 + 3] +
temp[(y + 2) * 40 * 2 + (x + 0) * 4 + 3] +
temp[(y + 2) * 40 * 2 + (x + 2) * 4 + 3];
n += (temp[(y + 0) * 40 * 2 + (x + 1) * 4 + 3] +
temp[(y + 2) * 40 * 2 + (x + 1) * 4 + 3] +
temp[(y + 1) * 40 * 2 + (x + 0) * 4 + 3] +
temp[(y + 1) * 40 * 2 + (x + 2) * 4 + 3]) * 2;
n += temp[(y + 1) * 40 * 2 + (x + 1) * 4 + 3] * 4;
mblock[0][5][y][x] = (n + 8) >> 4;
}
for (x = 0; x < 4 * 64; x++) {
n = mblock[0][0][0][x];
s += n;
s2 += n * n;
}
return s2 * 256 - (s * s);
}
/* Experimental ??? filter */
int
YUYV_422_exp4(unsigned char *buffer, unsigned char *unused)
{
unsigned int n, c, d, r, s = 0, s2 = 0;
int y, x, i, j;
buffer += filter_y_pitch * mb_row * 16 + mb_col * 16 * 2 + filter_y_offs;
for (y = 0; y < 16; y++)
for (x = 0; x < 8; x++) {
n = c = 0;
r = buffer[(y) * filter_y_pitch + (x) * 2];
for (j = -2; j < +2; j++)
for (i = -2; i < +2; i++) {
d = buffer[(y + j) * filter_y_pitch + (x + i) * 2];
if (40 >= nbabs(d - r)) {
n += d;
c++;
}
}
mblock[0][0][y][x] = (n + (c >> 1)) / c;
n = c = 0;
r = buffer[(y) * filter_y_pitch + (x) * 2 + 16];
for (j = -2; j < +2; j++)
for (i = -2; i < +2; i++) {
d = buffer[(y + j) * filter_y_pitch + (x + i) * 2 + 16];
if (40 >= nbabs(d - r)) {
n += d;
c++;
}
}
mblock[0][2][y][x] = (n + (c >> 1)) / c;
}
for (y = 0; y < 8; y++)
for (x = 0; x < 8; x++) {
mblock[0][4][y][x] = (short) buffer[y * filter_y_pitch * 2 + x * 4 + 1];
mblock[0][5][y][x] = (short) buffer[y * filter_y_pitch * 2 + x * 4 + 3];
}
for (x = 0; x < 4 * 64; x++) {
n = mblock[0][0][0][x];
s += n;
s2 += n * n;
}
return s2 * 256 - (s * s);
}
/*
* Input:
* grab_width, grab_height (pixels)
* [encoded image] width, height (pixels)
* pitch (line distance, Y or YUYV, bytes)
*
* Assumed:
* Y plane size = pitch * grab_height,
* U,V or V,U - Y distance = 4,5 * Y plane size / 4
* U,V pitch = pitch / 2
*
* Output:
* width, height (pixels)
* filter initialized
*/
void
filter_init(int pitch)
{
int padded_width, padded_height;
int y_bpp = 2, scale_x = 1, scale_y = 1;
int off_x, off_y;
int uv_size = 0;
int u = 4, v = 5;
// temporal_interpolation = FALSE;
switch (filter_mode) {
case CM_YVU:
u = 5; v = 4;
case CM_YUV:
filter = mmx_YUV_420;
uv_size = pitch * grab_height / 4;
y_bpp = 1;
break;
case CM_YUYV:
case CM_YUYV_PROGRESSIVE:
filter = mmx_YUYV_422;
break;
case CM_YUYV_EXP:
filter = YUYV_422_exp2;
// temporal_interpolation = FALSE;
width = saturate(grab_width, 1, grab_width - 16);
height = saturate(grab_height, 1, grab_height - 16);
break;
case CM_YUYV_EXP2:
filter = YUYV_422_exp4;
// temporal_interpolation = FALSE;
width = saturate(grab_width, 1, grab_width - 16);
height = saturate(grab_height, 1, grab_height - 16);
break;
case CM_YUYV_EXP_VERTICAL_DECIMATION:
FAIL("Sorry, the selected filter mode was experimental and is no longer available.\n");
filter = YUYV_422_exp3;
// temporal_interpolation = TRUE;
scale_y = 2;
width = saturate(grab_width, 1, grab_width - 16);
height = saturate(grab_height / 2, 1, grab_height / 2 - 16);
break;
case CM_YUYV_VERTICAL_DECIMATION:
filter = mmx_YUYV_422_2v;
scale_y = 2;
break;
case CM_YUYV_VERTICAL_INTERPOLATION:
filter = mmx_YUYV_422_vi;
break;
case CM_YUYV_TEMPORAL_INTERPOLATION:
case CM_YUYV_PROGRESSIVE_TEMPORAL:
FAIL("Sorry, the selected filter mode (temporal interpolation) is no longer available.\n");
filter = mmx_YUYV_422_ti;
// temporal_interpolation = TRUE;
break;
default:
FAIL("Filter '%s' out of order",
filter_labels[filter_mode]);
}
/*
* Need a clipping mechanism (or padded buffers?), currently
* all memory accesses as 16 x 16 mblocks. Step #2: clear outside
* blocks to all zero and all outside samples to average of
* inside samples (for prediction and FDCT).
*/
padded_width = ((width + 15) & -16) * scale_x;
padded_height = ((height + 15) & -16) * scale_y;
if (padded_width > grab_width) {
width = (grab_width / scale_x) & -16;
padded_width = width * scale_x;
}
if (padded_height > grab_height) {
height = (grab_height / scale_y) & -16;
padded_height = height * scale_y;
}
/* Center the encoding window */
off_x = (grab_width - width * scale_x + 1) >> 1;
off_y = (grab_height - height * scale_y + 1) >> 1;
if (off_x + padded_width > grab_width)
off_x = grab_width - padded_width;
if (off_y + padded_height > grab_height)
off_y = grab_height - padded_height;
filter_y_pitch = pitch;
filter_y_offs = pitch * off_y + off_x * y_bpp;
filter_u_offs = uv_size * u + (filter_y_offs >> 2);
filter_v_offs = uv_size * v + (filter_y_offs >> 2);
printv(2, "Filter '%s'\n", filter_labels[filter_mode]);
if (luma_only) {
color_pred = filter;
filter = color_trap;
}
}
--- NEW FILE ---
#
# MPEG-1 Real Time Encoder
#
# Copyright (C) 1999-2000 Michael H. Schimek
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
# $Id: filter_mmx.s,v 1.1 2001/12/04 23:58:09 mswitch Exp $
# int
# mmx_YUV_420(unsigned char *buffer, unsigned char *unused)
.text
.align 16
.globl mmx_YUV_420
mmx_YUV_420:
leal -20(%esp),%esp;
movl %edi,16(%esp);
movl 1*4+20(%esp),%edi; // buffer
movl %edx,12(%esp);
movl filter_y_pitch,%edx;
movl %esi,8(%esp);
movl mb_row,%esi;
movl %ebx,4(%esp);
movl mb_col,%ebx;
movl %ecx,0(%esp);
imull %edx,%esi; // row = filter_y_pitch * mb_row
sall $2,%esi;
leal (%esi,%ebx,8),%eax;
leal (%esi,%ebx,4),%esi;
addl %edi,%eax; // chroma = buffer + row * 4 + mb_col * 8
leal (%edi,%esi,4),%esi;
movl %eax,%ebx;
addl filter_u_offs,%eax; // filter_u_offs + chroma
addl filter_v_offs,%ebx; // filter_v_offs + chroma
addl filter_y_offs,%esi; // filter_y_offs + buffer + row * 16 + mb_col * 16
movl $mblock,%edi;
/* Cb, Cr */
movq (%eax),%mm0; shrl $1,%edx;
movq (%ebx),%mm1; pxor %mm7,%mm7;
movq (%eax,%edx),%mm4; movq %mm0,%mm2;
movq (%ebx,%edx),%mm5; punpcklbw %mm7,%mm0;
movq (%ebx,%edx,2),%mm6; punpckhbw %mm7,%mm2;
movq %mm0,512+0*16+0(%edi); movq %mm1,%mm3;
movq (%eax,%edx,2),%mm0; punpcklbw %mm7,%mm1;
movq %mm2,512+0*16+8(%edi); punpckhbw %mm7,%mm3;
movq %mm1,640+0*16+0(%edi); movq %mm4,%mm2;
lea (%ebx,%edx,2),%ebx; punpcklbw %mm7,%mm4;
movq %mm3,640+0*16+8(%edi); punpckhbw %mm7,%mm2;
lea (%eax,%edx,2),%eax; movq %mm5,%mm3;
movq %mm4,512+1*16+0(%edi); punpcklbw %mm7,%mm5;
movq (%eax,%edx),%mm4; punpckhbw %mm7,%mm3;
movq %mm2,512+1*16+8(%edi); lea (%eax,%edx,2),%eax;
movq (%ebx,%edx),%mm2; lea (%ebx,%edx,2),%ebx;
movq %mm5,640+1*16+0(%edi); movq %mm0,%mm5;
movq %mm3,640+1*16+8(%edi); punpcklbw %mm7,%mm0;
movq (%eax),%mm1; punpckhbw %mm7,%mm5;
movq %mm0,512+2*16+0(%edi); movq %mm6,%mm3;
movq (%ebx),%mm0; punpcklbw %mm7,%mm6;
movq %mm5,512+2*16+8(%edi); punpckhbw %mm7,%mm3;
movq %mm6,640+2*16+0(%edi); movq %mm4,%mm5;
movq (%eax,%edx),%mm6; punpcklbw %mm7,%mm4;
movq %mm3,640+2*16+8(%edi); punpckhbw %mm7,%mm5;
movq %mm4,512+3*16+0(%edi); movq %mm2,%mm3;
movq (%ebx,%edx),%mm4; punpcklbw %mm7,%mm2;
movq %mm5,512+3*16+8(%edi); punpckhbw %mm7,%mm3;
movq %mm2,640+3*16+0(%edi); movq %mm1,%mm2;
movq %mm3,640+3*16+8(%edi); punpcklbw %mm7,%mm1;
movq (%eax,%edx,2),%mm3; punpckhbw %mm7,%mm2;
movq %mm1,512+4*16+0(%edi); lea (%eax,%edx,2),%eax;
movq (%ebx,%edx,2),%mm1; movq %mm0,%mm5;
lea (%ebx,%edx,2),%ebx; punpcklbw %mm7,%mm0;
movq %mm2,512+4*16+8(%edi); punpckhbw %mm7,%mm5;
movq %mm0,640+4*16+0(%edi); movq %mm6,%mm2;
movq (%eax,%edx),%mm0; punpcklbw %mm7,%mm6;
movq %mm5,640+4*16+8(%edi); punpckhbw %mm7,%mm2;
movq %mm6,512+5*16+0(%edi); movq %mm4,%mm6;
movq (%ebx,%edx),%mm5; punpcklbw %mm7,%mm4;
movq %mm2,512+5*16+8(%edi); punpckhbw %mm7,%mm6;
movq %mm3,%mm2; punpcklbw %mm7,%mm3;
movq %mm4,640+5*16+0(%edi); punpckhbw %mm7,%mm2;
movq %mm1,%mm4; punpcklbw %mm7,%mm1;
movq %mm6,640+5*16+8(%edi); punpckhbw %mm7,%mm4;
movq %mm3,512+6*16+0(%edi); movq %mm0,%mm3;
movq %mm2,512+6*16+8(%edi); punpcklbw %mm7,%mm0;
movq %mm1,640+6*16+0(%edi); punpckhbw %mm7,%mm3;
movq (%esi,%edx,2),%mm1; movq %mm5,%mm2;
movq %mm0,512+7*16+0(%edi); punpcklbw %mm7,%mm5;
movq (%esi),%mm0; punpckhbw %mm7,%mm2;
movq %mm3,512+7*16+8(%edi); movl %esi,%eax;
movl %esi,%ebx; movl $7,%ecx;
movq %mm4,640+6*16+8(%edi); movq %mm7,%mm6;
movq %mm5,640+7*16+0(%edi); movq %mm7,%mm5;
movq %mm2,640+7*16+8(%edi); shll $1,%edx;
/* Y left 8 x 16 */
1:
movq %mm0,%mm2; punpcklbw %mm5,%mm0;
paddw %mm0,%mm6; punpckhbw %mm5,%mm2;
paddw %mm2,%mm6; movq %mm0,%mm3;
movq %mm2,%mm4; pmaddwd %mm3,%mm3;
movq %mm0,(%edi); lea (%eax,%edx,2),%eax;
movq (%eax),%mm0; pmaddwd %mm4,%mm4;
movq %mm2,8(%edi); paddd %mm3,%mm7;
movq %mm1,%mm2; addl $32,%edi;
paddd %mm4,%mm7; punpcklbw %mm5,%mm1;
paddw %mm1,%mm6; punpckhbw %mm5,%mm2;
paddw %mm2,%mm6; movq %mm1,%mm3;
movq %mm2,%mm4; pmaddwd %mm3,%mm3;
movq %mm1,-16(%edi); pmaddwd %mm4,%mm4;
movq (%eax,%edx),%mm1; decl %ecx;
movq %mm2,-8(%edi); paddd %mm3,%mm7;
paddd %mm4,%mm7; jne 1b;
movl %esi,%eax; movq %mm0,%mm2;
addl $8,%eax; punpcklbw %mm5,%mm0;
paddw %mm0,%mm6; punpckhbw %mm5,%mm2;
paddw %mm2,%mm6; movq %mm0,%mm3;
movq %mm2,%mm4; pmaddwd %mm3,%mm3;
movq %mm0,(%edi); pmaddwd %mm4,%mm4;
movq (%eax),%mm0; paddd %mm3,%mm7;
movq %mm2,8(%edi); movq %mm1,%mm2;
paddd %mm4,%mm7; punpcklbw %mm5,%mm1;
paddw %mm1,%mm6; punpckhbw %mm5,%mm2;
paddw %mm2,%mm6; movq %mm1,%mm3;
movq %mm2,%mm4; pmaddwd %mm3,%mm3;
movq %mm1,16(%edi); pmaddwd %mm4,%mm4;
movq %mm2,24(%edi); addl $32,%edi
paddd %mm3,%mm7; movl $7,%ecx;
movq (%eax,%edx),%mm1; paddd %mm4,%mm7;
/* Y right 8 x 16 */
2:
movq %mm0,%mm2; punpcklbw %mm5,%mm0;
paddw %mm0,%mm6; punpckhbw %mm5,%mm2;
paddw %mm2,%mm6; movq %mm0,%mm3;
movq %mm2,%mm4; pmaddwd %mm3,%mm3;
movq %mm0,(%edi); lea (%eax,%edx,2),%eax;
movq (%eax),%mm0; pmaddwd %mm4,%mm4;
movq %mm2,8(%edi); paddd %mm3,%mm7;
movq %mm1,%mm2; addl $32,%edi;
paddd %mm4,%mm7; punpcklbw %mm5,%mm1;
paddw %mm1,%mm6; punpckhbw %mm5,%mm2;
paddw %mm2,%mm6; movq %mm1,%mm3;
movq %mm2,%mm4; pmaddwd %mm3,%mm3;
movq %mm1,-16(%edi); pmaddwd %mm4,%mm4;
movq (%eax,%edx),%mm1; decl %ecx;
movq %mm2,-8(%edi); paddd %mm3,%mm7;
paddd %mm4,%mm7; jne 2b;
movq %mm0,%mm2; punpcklbw %mm5,%mm0;
paddw %mm0,%mm6; punpckhbw %mm5,%mm2;
paddw %mm2,%mm6; movq %mm0,%mm3;
movq %mm2,%mm4; pmaddwd %mm3,%mm3;
movq %mm0,(%edi); pmaddwd %mm4,%mm4;
movq %mm2,8(%edi); paddd %mm3,%mm7;
movq %mm1,%mm2; punpcklbw %mm5,%mm1;
paddd %mm4,%mm7; punpckhbw %mm5,%mm2;
paddw %mm1,%mm6; movq %mm1,%mm3;
paddw %mm2,%mm6; pmaddwd %mm3,%mm3;
movq %mm2,%mm4; paddd %mm3,%mm7;
movq %mm1,16(%edi); pmaddwd %mm4,%mm4;
movq %mm2,24(%edi); movq %mm6,%mm2;
paddd %mm4,%mm7; psllq $32,%mm6;
paddw %mm2,%mm6; movq %mm7,%mm4;
movq %mm6,%mm3; pslld $16,%mm6;
paddw %mm3,%mm6; psrlq $32,%mm7;
paddd %mm4,%mm7; psrlq $48,%mm6;
movd %mm6,%eax; pslld $8,%mm7;
popl %ecx; mull %eax;
popl %ebx; movd %mm7,%edi;
popl %esi; subl %edi,%eax;
popl %edx; negl %eax;
popl %edi;
ret
# int
# mmx_YUYV_422(unsigned char *buffer, unsigned char *unused)
.text
.align 16
.globl mmx_YUYV_422
mmx_YUYV_422:
leal -16(%esp),%esp;
movl %ecx,12(%esp);
movl mb_col,%eax; // + mb_col * 16 * 2
movl %edx,8(%esp);
sall $5,%eax;
movl filter_y_pitch,%edx;
movl %ebx,4(%esp);
imull mb_row,%edx; // + mb_row * 16 * filter_y_pitch
movl %edi,(%esp);
pxor %mm6,%mm6;
sall $4,%edx;
movq c255,%mm5;
addl %edx,%eax;
addl filter_y_offs,%eax; // + filter_y_offs
pxor %mm7,%mm7;
addl 1*4+16(%esp),%eax; // + buffer
movq (%eax),%mm0;
movl $mblock+512,%ebx; // mblock[0][4] (chroma)
movl $mblock,%edi;
movl $7,%ecx;
movl filter_y_pitch,%edx;
1:
movq 8(%eax),%mm4; movq %mm0,%mm3;
movq %mm0,%mm1; punpcklwd %mm4,%mm3;
movq %mm3,%mm2; punpckhwd %mm4,%mm1;
pand %mm5,%mm0; punpcklwd %mm1,%mm3;
pand %mm5,%mm4; punpckhwd %mm1,%mm2;
movq %mm0,(%edi); paddw %mm0,%mm6;
psrlw $8,%mm3; pmaddwd %mm0,%mm0;
movq %mm4,8(%edi); paddw %mm4,%mm6;
psrlw $8,%mm2; pmaddwd %mm4,%mm4;
movq %mm3,(%ebx); paddd %mm0,%mm7;
movq 16(%eax),%mm0; leal 32(%edi),%edi;
movq %mm2,128+0(%ebx); paddd %mm4,%mm7;
movq 24(%eax),%mm4; movq %mm0,%mm3;
movq %mm0,%mm1; punpcklwd %mm4,%mm3;
decl %ecx; punpckhwd %mm4,%mm1;
movq %mm3,%mm2; punpcklwd %mm1,%mm3;
pand %mm5,%mm0; punpckhwd %mm1,%mm2;
movq (%eax,%edx),%mm1; pand %mm5,%mm4;
movq %mm0,256-32(%edi); paddw %mm0,%mm6;
psrlw $8,%mm3; pmaddwd %mm0,%mm0;
movq %mm4,256+8-32(%edi); paddw %mm4,%mm6;
psrlw $8,%mm2; pmaddwd %mm4,%mm4;
movq %mm3,8(%ebx); paddd %mm0,%mm7;
movq (%eax,%edx,2),%mm0; pand %mm5,%mm1;
movq %mm2,128+8(%ebx); paddd %mm4,%mm7;
movq 8(%eax,%edx),%mm2; paddw %mm1,%mm6;
movq 16(%eax,%edx),%mm3; pand %mm5,%mm2;
movq %mm1,16-32(%edi); paddw %mm2,%mm6;
movq 24(%eax,%edx),%mm4; pmaddwd %mm1,%mm1;
leal (%eax,%edx,2),%eax; pand %mm5,%mm3;
movq %mm2,24-32(%edi); pmaddwd %mm2,%mm2;
pand %mm5,%mm4; paddw %mm3,%mm6;
paddd %mm1,%mm7; paddw %mm4,%mm6;
movq %mm4,%mm1; pmaddwd %mm4,%mm4;
movq %mm3,256+16-32(%edi); pmaddwd %mm3,%mm3;
leal 16(%ebx),%ebx; paddd %mm2,%mm7;
movq %mm1,256+24-32(%edi); paddd %mm4,%mm7;
paddd %mm3,%mm7; jne 1b
movq 8(%eax),%mm4; movq %mm0,%mm3;
movq %mm0,%mm1; punpcklwd %mm4,%mm3;
movq %mm3,%mm2; punpckhwd %mm4,%mm1;
pand %mm5,%mm0; punpcklwd %mm1,%mm3;
pand %mm5,%mm4; punpckhwd %mm1,%mm2;
movq %mm0,(%edi); paddw %mm0,%mm6;
psrlw $8,%mm3; pmaddwd %mm0,%mm0;
movq %mm4,8(%edi); paddw %mm4,%mm6;
psrlw $8,%mm2; pmaddwd %mm4,%mm4;
movq %mm3,(%ebx); paddd %mm0,%mm7;
movq 16(%eax),%mm0; leal 32(%edi),%edi;
movq %mm2,128+0(%ebx); paddd %mm4,%mm7;
movq 24(%eax),%mm4; movq %mm0,%mm3;
movq %mm0,%mm1; punpcklwd %mm4,%mm3;
movq %mm3,%mm2; punpckhwd %mm4,%mm1;
pand %mm5,%mm0; punpcklwd %mm1,%mm3;
movq %mm0,256-32(%edi); punpckhwd %mm1,%mm2;
movq (%eax,%edx),%mm1; pand %mm5,%mm4;
psrlw $8,%mm3; paddw %mm0,%mm6;
movq %mm4,256+8-32(%edi); pmaddwd %mm0,%mm0;
psrlw $8,%mm2; paddw %mm4,%mm6;
movq %mm3,8(%ebx); pmaddwd %mm4,%mm4;
paddd %mm0,%mm7; pand %mm5,%mm1;
movq %mm2,128+8(%ebx); paddd %mm4,%mm7;
movq 8(%eax,%edx),%mm2; paddw %mm1,%mm6;
movq 16(%eax,%edx),%mm3; pand %mm5,%mm2;
movq %mm1,16-32(%edi); paddw %mm2,%mm6;
movq 24(%eax,%edx),%mm4; pmaddwd %mm1,%mm1;
movq %mm2,24-32(%edi); pand %mm5,%mm3;
paddw %mm3,%mm6; pmaddwd %mm2,%mm2;
paddd %mm1,%mm7; pand %mm5,%mm4;
movq %mm3,256+16-32(%edi); paddw %mm4,%mm6;
paddd %mm2,%mm7; pmaddwd %mm3,%mm3;
movq %mm4,256+24-32(%edi); pmaddwd %mm4,%mm4;
movq %mm6,%mm2; psllq $32,%mm6;
paddd %mm3,%mm7; paddw %mm2,%mm6;
paddd %mm4,%mm7; movq %mm6,%mm3;
movq %mm7,%mm5; psrlq $32,%mm7;
paddd %mm5,%mm7; pslld $16,%mm6;
paddw %mm3,%mm6; pslld $8,%mm7;
movd %mm7,%ecx; psrlq $48,%mm6;
movd %mm6,%eax;
popl %edi;
mull %eax;
popl %ebx;
subl %ecx,%eax;
popl %edx;
negl %eax;
popl %ecx;
ret
# int
# mmx_YUYV_422_2v(unsigned char *buffer, unsigned char *unused)
.text
.align 16
.globl mmx_YUYV_422_2v
mmx_YUYV_422_2v:
leal -20(%esp),%esp;
movl %edx,16(%esp);
movl filter_y_pitch,%edx;
movl %esi,12(%esp);
movl mb_row,%esi;
movl %ebx,8(%esp);
sall $5,%esi;
movl %ecx,4(%esp);
imull %edx,%esi;
movl mb_col,%eax;
movl %edi,(%esp);
sall $5,%eax;
addl filter_y_offs,%esi;
addl %eax,%esi;
addl 1*4+20(%esp),%esi; // s1 = buffer + filter_y_pitch * mb_row * 32 + mb_col * 32 + filter_y_offs
leal (%esi,%edx),%eax; // s2 = s1 + filter_y_pitch
sall $1,%edx; // filter_y_pitch * 2
.align 16
filter_s2t:
movl $mblock,%edi; movl $8,%ecx;
movl $mblock+512,%ebx; pxor %mm6,%mm6;
movq c255,%mm5; pxor %mm7,%mm7;
1:
movq (%esi),%mm0; leal 16(%ebx),%ebx;
movq (%eax),%mm1; movq %mm0,%mm2;
pand %mm5,%mm0; movq %mm1,%mm4;
paddw c1,%mm0; pand %mm5,%mm1;
movq (%eax,%edx),%mm3; paddw %mm0,%mm1;
movq (%esi,%edx),%mm0; psrlw $1,%mm1;
paddw %mm1,%mm6; psrlw $8,%mm2;
movq %mm1,(%edi); pmaddwd %mm1,%mm1;
decl %ecx; psrlw $8,%mm4;
paddw %mm2,%mm4; movq %mm0,%mm2;
paddd %mm1,%mm7; pand %mm5,%mm0;
paddw c1,%mm0; movq %mm3,%mm1;
pand %mm5,%mm1; psrlw $8,%mm2;
paddw %mm0,%mm1; psrlw $8,%mm3;
movq 8(%esi),%mm0; psrlw $1,%mm1;
paddw %mm1,%mm6; paddw %mm2,%mm4;
movq %mm1,16(%edi); pmaddwd %mm1,%mm1;
movq 8(%eax),%mm5; movq %mm0,%mm2;
paddw %mm3,%mm4; movq %mm5,%mm3;
pand c255,%mm0; paddd %mm1,%mm7;
paddw c1,%mm0; psrlw $8,%mm2;
pand c255,%mm3; psrlw $8,%mm5;
paddw %mm0,%mm3; paddw %mm2,%mm5;
movq 8(%esi,%edx),%mm0; psrlw $1,%mm3;
paddw %mm3,%mm6; movq %mm0,%mm2;
movq %mm3,8(%edi); pmaddwd %mm3,%mm3;
pand c255,%mm0; psrlw $8,%mm2;
movq 8(%eax,%edx),%mm1; paddw %mm2,%mm5;
paddd %mm3,%mm7; movq %mm1,%mm3;
pand c255,%mm1; psrlw $8,%mm3;
paddw %mm0,%mm1; paddw %mm3,%mm5;
paddw c1,%mm1; movq %mm4,%mm3;
movq 16(%esi),%mm0; psrlw $1,%mm1;
paddw %mm1,%mm6; punpcklwd %mm5,%mm4;
movq %mm1,24(%edi); pmaddwd %mm1,%mm1;
punpckhwd %mm5,%mm3; movq %mm4,%mm5;
movq %mm0,%mm2; punpcklwd %mm3,%mm4;
paddw c2,%mm4; punpckhwd %mm3,%mm5;
paddw c2,%mm5; paddd %mm1,%mm7;
movq 16(%eax),%mm1; psraw $2,%mm4;
movq c255,%mm3; psraw $2,%mm5;
movq %mm4,-16(%ebx); pand %mm3,%mm0;
movq %mm5,128+0-16(%ebx); movq %mm1,%mm4;
paddw c1,%mm0; pand %mm3,%mm1;
paddw %mm0,%mm1; psrlw $8,%mm2;
movq 16(%esi,%edx),%mm0; psrlw $1,%mm1;
movq %mm1,256(%edi); paddw %mm1,%mm6;
pmaddwd %mm1,%mm1; psrlw $8,%mm4;
movq 16(%eax,%edx),%mm5; paddw %mm2,%mm4;
movq %mm0,%mm2; pand %mm3,%mm0;
paddd %mm1,%mm7; movq %mm5,%mm1;
pand %mm3,%mm1; psrlw $8,%mm2;
paddw %mm0,%mm1; psrlw $8,%mm5;
paddw c1,%mm1; paddw %mm2,%mm4;
movq 24(%esi),%mm0; psrlw $1,%mm1;
movq %mm1,256+16(%edi); paddw %mm1,%mm6;
pmaddwd %mm1,%mm1; paddw %mm5,%mm4;
movq 24(%eax),%mm3; movq %mm0,%mm2;
pand c255,%mm0; movq %mm3,%mm5;
paddw c1,%mm0; psrlw $8,%mm2;
pand c255,%mm5; psrlw $8,%mm3;
paddw %mm0,%mm5; paddw %mm2,%mm3;
movq 24(%esi,%edx),%mm0; psrlw $1,%mm5;
movq %mm5,256+8(%edi); paddw %mm5,%mm6;
paddd %mm1,%mm7; pmaddwd %mm5,%mm5;
movq 24(%eax,%edx),%mm1; movq %mm0,%mm2;
psrlw $8,%mm2; leal (%esi,%edx,2),%esi;
pand c255,%mm0; paddw %mm2,%mm3;
paddd %mm5,%mm7; leal (%eax,%edx,2),%eax;
movq %mm1,%mm5; movq %mm4,%mm2;
pand c255,%mm1; psrlw $8,%mm5;
paddw %mm5,%mm3; paddw %mm0,%mm1;
paddw c1,%mm1; punpcklwd %mm3,%mm4;
psrlw $1,%mm1; punpckhwd %mm3,%mm2;
movq %mm4,%mm3; punpcklwd %mm2,%mm4;
paddw %mm1,%mm6; punpckhwd %mm2,%mm3;
movq %mm1,256+24(%edi); pmaddwd %mm1,%mm1;
paddw c2,%mm4; leal 32(%edi),%edi;
paddw c2,%mm3; psraw $2,%mm4;
movq c255,%mm5; psraw $2,%mm3;
movq %mm4,8-16(%ebx); paddd %mm1,%mm7;
movq %mm3,128+8-16(%ebx); jne 1b;
pmaddwd c1,%mm6; movq %mm7,%mm0;
psrlq $32,%mm7; popl %edi;
paddd %mm0,%mm7; popl %ecx;
movq %mm6,%mm5; psrlq $32,%mm6;
paddd %mm5,%mm6; pslld $8,%mm7;
movd %mm6,%edx; popl %ebx;
movd %mm7,%eax; imul %edx,%edx;
subl %edx,%eax; popl %esi;
popl %edx;
ret
# int
# mmx_YUYV_422_ti(unsigned char *buffer1, unsigned char *buffer2)
.text
.align 16
.globl mmx_YUYV_422_ti
mmx_YUYV_422_ti:
movl 2*4(%esp),%eax;
leal -20(%esp),%esp;
movl %edx,16(%esp);
movl filter_y_pitch,%edx; // filter_y_pitch
movl %esi,12(%esp);
movl mb_row,%esi;
movl %ebx,8(%esp);
sall $4,%esi;
movl %ecx,4(%esp);
imull %edx,%esi;
movl %edi,(%esp);
movl mb_col,%ecx;
sall $5,%ecx;
addl filter_y_offs,%esi;
addl %ecx,%esi;
addl %esi,%eax; // s2 = buffer2 + filter_y_pitch * mb_row * 16 + mb_col * 32 + filter_y_offs
addl 1*4+20(%esp),%esi; // s1 = buffer1 + filter_y_pitch * mb_row * 16 + mb_col * 32 + filter_y_offs
jmp filter_s2t;
# int
# mmx_YUYV_422_vi(unsigned char *buffer, unsigned char *unused)
.text
.align 16
.globl mmx_YUYV_422_vi
mmx_YUYV_422_vi:
leal -20(%esp),%esp;
movl %esi,12(%esp);
movl mb_row,%esi;
movl %edx,16(%esp);
cmpl mb_last_row,%esi;
movl %ebx,8(%esp);
jl 1f;
movl 12(%esp),%esi;
leal 20(%esp),%esp;
jmp mmx_YUYV_422;
.p2align 4,,7
1:
movl filter_y_pitch,%edx; // filter_y_pitch
movl %ecx,4(%esp);
movl %edi,(%esp);
sall $4,%esi;
movl 2*4+20(%esp),%eax;
movl mb_col,%ecx;
imull %edx,%esi;
sall $5,%ecx;
addl filter_y_offs,%esi;
addl %ecx,%esi;
addl 1*4+20(%esp),%esi; // s1 = buffer + filter_y_pitch * mb_row * 16 + mb_col * 32 + filter_y_offs
leal (%esi,%edx),%eax; // s2 = buffer + filter_y_pitch * (mb_row * 16 + 1) + mb_col * 32 + filter_y_offs
jmp filter_s2t;
--- NEW FILE ---
/*
* MPEG-1 Real Time Encoder
*
* Copyright (C) 1999-2001 Michael H. Schimek
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* $Id: libvideo.h,v 1.1 2001/12/04 23:58:09 mswitch Exp $ */
#include "../rtepriv.h"
#include "../systems/libsystems.h"
#include "video.h" // XXX REMOVE
extern rte_codec_class mp1e_mpeg1_video_codec;
extern void
video_init(rte_codec *codec, int cpu_type,
int coded_width, int coded_height,
int motion_min, int motion_max,
fifo *capture_fifo,
unsigned int module, multiplexer *mux);
--- NEW FILE ---
/*
* MPEG-1 Real Time Encoder
*
* Copyright (C) 1999-2000 Michael H. Schimek
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* $Id: mblock.c,v 1.1 2001/12/04 23:58:09 mswitch Exp $ */
#include "video.h"
#include "../common/math.h"
int mb_col, mb_row, // current
mb_width, mb_height,
mb_last_col, mb_last_row,
mb_num;
uint8_t * newref; /* future reference frame buffer */
/*
* Packed reference buffer format is
* [mb_height]
* [mb_width] - for all macroblocks of a frame
* [6] - Y0, Y2, Y1, Y3, Cb, Cr
* [8][8] - 8 bit unsigned samples, e. g. according to ITU-R Rec. 601
*/
struct mb_addr mb_address __attribute__ ((aligned (MIN(CACHE_LINE, 64))));
short mblock[7][6][8][8] __attribute__ ((aligned (4096)));
/*
* Buffer for current macroblock
* [7] - intra, forward, backward, interpolated
* [6] - Y0, Y2, Y1, Y3, Cb, Cr
* [8][8] - samples, block difference, dct coefficients
*/
void
video_coding_size(int width, int height)
{
mb_width = (saturate(width, 1, MAX_WIDTH) + 15) >> 4;
mb_height = (saturate(height, 1, MAX_HEIGHT) + 15) >> 4;
mb_last_col = mb_width - 1;
mb_last_row = mb_height - 1;
mb_num = mb_width * mb_height;
}
/*
* B picture: encode & discard; I or P picture must be encoded ahead of
* all B pictures forward referencing the I or P picture, ie. we will
* stack as many captured pictures as there are B pictures in a row
* plus the following I or P. The capture module may add one or two
* more for double buffering.
*/
int
video_look_ahead(char *gop_sequence)
{
int i;
int max = 0;
int count = 0;
for (i = 0; i < 1024; i++)
switch (gop_sequence[i]) {
case 'I':
case 'P':
max = MAX(count, max);
count = 0;
break;
case 'B':
count++;
break;
default:
i = 1024;
}
return max + 1;
}
--- NEW FILE ---
/*
* MPEG-1 Real Time Encoder
*
* Copyright (C) 1999-2000 Michael H. Schimek
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* $Id: mblock.h,v 1.1 2001/12/04 23:58:09 mswitch Exp $ */
#ifndef MBLOCK_H
#define MBLOCK_H
/* OBSOLETE */
#endif // MBLOCK_H
--- NEW FILE ---
/*
* MPEG-1 Real Time Encoder
* Motion compensation V3.1.39
*
* Copyright (C) 2001 Michael H. Schimek
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
[...3899 lines suppressed...]
for (i = 0; i < 8; i++) {
for (j = 0; j < 8; j++) {
mblock[1][4][i][j] = mblock[0][4][i][j] - p1[j];
mblock[2][4][i][j] = mblock[0][4][i][j] - p2[j];
mblock[3][4][i][j] = mblock[0][4][i][j] - ((p1[j] + p2[j] + 1) >> 1);
mblock[1][5][i][j] = mblock[0][5][i][j] - p1[j + mb_address.block[5].offset];
mblock[2][5][i][j] = mblock[0][5][i][j] - p2[j + mb_address.block[5].offset];
mblock[3][5][i][j] = mblock[0][5][i][j] - ((p1[j + mb_address.block[5].offset] + p2[j + mb_address.block[5].offset] + 1) >> 1);
}
p1 += mb_address.block[4].pitch;
p2 += mb_address.block[4].pitch;
}
*vmc1 = sf * 256;
*vmc2 = sb * 256;
return si * 256;
}
--- NEW FILE ---
/*
* MPEG-1 Real Time Encoder
*
* Copyright (C) 2001 Michael H. Schimek
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* $Id: motion.h,v 1.1 2001/12/04 23:58:09 mswitch Exp $ */
#ifndef MOTION_H
#define MOTION_H
#include "vlc.h"
#include "mblock.h"
#define reg(n) __attribute__ ((regparm (n)))
extern int motion;
extern int mm_buf_offs;
/* motion.c */
typedef unsigned int (search_fn)(int *dhx, int *dhy, unsigned char *from,
int x, int y, int range, short dest[6][8][8]);
extern search_fn mmx_search, _3dn_search, sse_search, sse2_search;
extern search_fn * search;
extern unsigned int predict_forward_packed(unsigned char *from) reg(1);
extern unsigned int predict_forward_planar(unsigned char *from) reg(1);
extern unsigned int predict_backward_packed(unsigned char *from) reg(1);
extern unsigned int predict_bidirectional_packed(unsigned char *from1, unsigned char *from2, unsigned int *vmc1, unsigned int *vmc2);
extern unsigned int predict_bidirectional_planar(unsigned char *from1, unsigned char *from2, unsigned int *vmc1, unsigned int *vmc2);
extern unsigned int predict_forward_motion(struct motion *M, unsigned char *, int);
extern unsigned int predict_bidirectional_motion(mpeg1_context *mpeg1, struct motion *M, unsigned int *, unsigned int *, int);
extern void zero_forward_motion(void);
extern void t7(int range, int dist);
/* motion_mmx.s */
/*
* NB we use mmx_predict_forward also for backward prediction (in B pictures
* within a closed gop, low profile) discarding the reconstruction.
* No mmx_predict_bidi_planar, use reference version.
*/
extern unsigned int mmx_predict_forward_packed(unsigned char *) reg(1);
extern unsigned int mmx_predict_forward_planar(unsigned char *) reg(1);
extern unsigned int mmx_predict_bidirectional_packed(unsigned char *from1, unsigned char *from2, unsigned int *vmc1, unsigned int *vmc2);
/*
* Attention mmx_mbsum uses mblock[4] as permanent scratch in picture_i|p();
* Source mblock[0], dest mm_mbrow and bp
*/
extern void mmx_mbsum(char * /* eax */) reg(1);
extern int mmx_sad(unsigned char t[16][16] /* eax */, unsigned char *p /* edx */, int pitch /* ecx */) reg(3);
extern int sse_sad(unsigned char t[16][16] /* eax */, unsigned char *p /* edx */, int pitch /* ecx */) reg(3);
/* <t> must be 16 byte aligned */
extern int sse2_sad(unsigned char t[16][16] /* eax */, unsigned char *p /* edx */, int pitch /* ecx */) reg(3);
#endif /* MOTION_H */
--- NEW FILE ---
#
# MPEG-1 Real Time Encoder
#
# Copyright (C) 1999-2000 Michael H. Schimek
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
[...1211 lines suppressed...]
psadbw 6*32+8(%eax),%mm1;
paddw %mm1,%mm7;
movq 8(%edx),%mm1;
psadbw 6*32+16(%eax),%mm2;
paddw %mm2,%mm7;
movq (%edx,%ecx),%mm2;
psadbw 6*32+24(%eax),%mm3;
paddw %mm3,%mm7;
movq 8(%edx,%ecx),%mm3;
leal (%edx,%ecx,2),%edx;
psadbw 7*32(%eax),%mm0;
paddw %mm0,%mm7;
psadbw 7*32+8(%eax),%mm1;
paddw %mm1,%mm7;
psadbw 7*32+16(%eax),%mm2;
paddw %mm2,%mm7;
psadbw 7*32+24(%eax),%mm3;
paddw %mm3,%mm7;
movd %mm7,%eax;
ret;
--- NEW FILE ---
#
# MPEG-1 Real Time Encoder
#
# Copyright (C) 2001 Michael H. Schimek
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
# $Id: motion_sse2.s,v 1.1 2001/12/04 23:58:09 mswitch Exp $
.text
.align 16
.globl sse2_sad
# (%eax) assumed 16 byte aligned
sse2_sad:
movdqu (%edx),%xmm0;
pxor %xmm7,%xmm7;
movdqu (%edx,%ecx),%xmm1;
leal (%edx,%ecx,2),%edx;
movdqu (%edx),%xmm2;
movdqu (%edx,%ecx),%xmm3;
leal (%edx,%ecx,2),%edx;
psadbw (%eax),%xmm0;
paddw %xmm0,%xmm7;
movdqu (%edx),%xmm0;
psadbw 1*16(%eax),%xmm1;
paddw %xmm1,%xmm7;
movdqu (%edx,%ecx),%xmm1;
leal (%edx,%ecx,2),%edx;
psadbw 2*16(%eax),%xmm2;
paddw %xmm2,%xmm7;
movdqu (%edx),%xmm2;
psadbw 3*16(%eax),%xmm3;
paddw %xmm3,%xmm7;
movdqu (%edx,%ecx),%xmm3;
leal (%edx,%ecx,2),%edx;
psadbw 4*16(%eax),%xmm0;
paddw %xmm0,%xmm7;
movdqu (%edx),%xmm0;
psadbw 5*16(%eax),%xmm1;
paddw %xmm1,%xmm7;
movdqu (%edx,%ecx),%xmm1;
leal (%edx,%ecx,2),%edx;
psadbw 6*16(%eax),%xmm2;
paddw %xmm2,%xmm7;
movdqu (%edx),%xmm2;
psadbw 7*16(%eax),%xmm3;
paddw %xmm3,%xmm7;
movdqu (%edx,%ecx),%xmm3;
leal (%edx,%ecx,2),%edx;
psadbw 8*16(%eax),%xmm0;
paddw %xmm0,%xmm7;
movdqu (%edx),%xmm0;
psadbw 9*16(%eax),%xmm1;
paddw %xmm1,%xmm7;
movdqu (%edx,%ecx),%xmm1;
leal (%edx,%ecx,2),%edx;
psadbw 10*16(%eax),%xmm2;
paddw %xmm2,%xmm7;
movdqu (%edx),%xmm2;
psadbw 11*16(%eax),%xmm3;
paddw %xmm3,%xmm7;
movdqu (%edx,%ecx),%xmm3;
psadbw 12*16(%eax),%xmm0;
paddw %xmm0,%xmm7;
psadbw 13*16(%eax),%xmm1;
paddw %xmm1,%xmm7;
psadbw 14*16(%eax),%xmm2;
paddw %xmm2,%xmm7;
psadbw 15*16(%eax),%xmm3;
paddw %xmm3,%xmm7;
pshufd $1*64+0*16+3*4+2,%xmm7,%xmm6;
paddw %xmm6,%xmm7;
movd %xmm7,%eax;
ret;
--- NEW FILE ---
/*
* MPEG-1 Real Time Encoder
*
* Copyright (C) 1999-2000 Michael H. Schimek
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* $Id: mpeg.h,v 1.1 2001/12/04 23:58:09 mswitch Exp $ */
#ifndef MPEG_H
#define MPEG_H
#define PICTURE_START_CODE 0x00000100L
#define SLICE_START_CODE 0x00000101L
#define USER_DATA_START_CODE 0x000001B2L
#define SEQUENCE_HEADER_CODE 0x000001B3L
#define SEQUENCE_ERROR_CODE 0x000001B4L
#define EXTENSION_START_CODE 0x000001B5L
#define SEQUENCE_END_CODE 0x000001B7L
#define GROUP_START_CODE 0x000001B8L
typedef enum {
I_TYPE = 1,
P_TYPE,
B_TYPE,
D_TYPE,
} picture_type;
typedef enum {
SEQUENCE_EXTENSION_ID = 1,
SEQUENCE_DISPLAY_EXTENSION_ID,
QUANT_MATRIX_EXTENSION_ID,
COPYRIGHT_EXTENSION_ID,
SEQUENCE_SCALABLE_EXTENSION_ID,
PICTURE_DISPLAY_EXTENSION_ID = 7,
PICTURE_CODING_EXTENSION_ID,
PICTURE_SPATIAL_SCALABLE_EXTENSION_ID,
PICTURE_TEMPORAL_SCALABLE_EXTENSION_ID,
} extension_id;
typedef enum {
MB_INTRA,
MB_FORWARD,
MB_BACKWARD,
MB_INTERP
} mb_type;
/* tables.c */
extern const double frame_rate_value[16];
extern const unsigned char default_intra_quant_matrix[8][8];
extern const unsigned char default_inter_quant_matrix[8][8];
extern const unsigned char scan[2][8][8];
extern const unsigned char quantiser_scale[2][32];
extern const unsigned long long macroblock_address_increment_vlc[33];
extern const unsigned long long coded_block_pattern_vlc[64];
extern const unsigned long long motion_code_vlc[17];
extern const unsigned long long dct_dc_size_luma_vlc[12];
extern const unsigned long long dct_dc_size_chroma_vlc[12];
extern int mp1e_vlc(unsigned long long, unsigned int *);
extern int mp1e_dct_coeff_vlc(int table, int run, int level, unsigned int *);
#endif /* MPEG_H */
--- NEW FILE ---
/*
* MPEG-1 Real Time Encoder
*
* Copyright (C) 1999-2001 Michael H. Schimek
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
[...2414 lines suppressed...]
rte_helper_reset_options(&mpeg1->codec);
return &mpeg1->codec;
}
rte_codec_class
mp1e_mpeg1_video_codec = {
.public = {
.stream_type = RTE_STREAM_VIDEO,
.keyword = "mpeg1_video",
.label = "MPEG-1 Video",
},
.new = codec_new,
.delete = codec_delete,
.option_enum = option_enum,
.option_get = option_get,
.option_set = option_set,
.option_print = option_print,
};
--- NEW FILE ---
/*
* MPEG-1 Real Time Encoder
*
* Copyright (C) 1999-2000 Michael H. Schimek
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* $Id: tables.c,v 1.1 2001/12/04 23:58:09 mswitch Exp $ */
#include "mpeg.h"
/*
* ISO 13818-2 Table 6.4
*/
const double
frame_rate_value[16] =
{
0,
24000.0 / 1001, 24.0,
25.0, 30000.0 / 1001, 30.0,
50.0, 60000.0 / 1001, 60.0
};
/*
* ISO 13818-2 6.3.11
*/
const unsigned char
default_intra_quant_matrix[8][8] =
{
{ 8, 16, 19, 22, 26, 27, 29, 34 },
{ 16, 16, 22, 24, 27, 29, 34, 37 },
{ 19, 22, 26, 27, 29, 34, 34, 38 },
{ 22, 22, 26, 27, 29, 34, 37, 40 },
{ 22, 26, 27, 29, 32, 35, 40, 48 },
{ 26, 27, 29, 32, 35, 40, 48, 58 },
{ 26, 27, 29, 34, 38, 46, 56, 69 },
{ 27, 29, 35, 38, 46, 56, 69, 83 }
};
const unsigned char
default_inter_quant_matrix[8][8] =
{
{ 16, 16, 16, 16, 16, 16, 16, 16 },
{ 16, 16, 16, 16, 16, 16, 16, 16 },
{ 16, 16, 16, 16, 16, 16, 16, 16 },
{ 16, 16, 16, 16, 16, 16, 16, 16 },
{ 16, 16, 16, 16, 16, 16, 16, 16 },
{ 16, 16, 16, 16, 16, 16, 16, 16 },
{ 16, 16, 16, 16, 16, 16, 16, 16 },
{ 16, 16, 16, 16, 16, 16, 16, 16 },
};
/*
* ISO 13818-2 Figure 7-2, 7-3
*/
const unsigned char
scan[2][8][8] =
{
{
{ 0, 1, 5, 6, 14, 15, 27, 28 },
{ 2, 4, 7, 13, 16, 26, 29, 42 },
{ 3, 8, 12, 17, 25, 30, 41, 43 },
{ 9, 11, 18, 24, 31, 40, 44, 53 },
{ 10, 19, 23, 32, 39, 45, 52, 54 },
{ 20, 22, 33, 38, 46, 51, 55, 60 },
{ 21, 34, 37, 47, 50, 56, 59, 61 },
{ 35, 36, 48, 49, 57, 58, 62, 63 }
}, {
{ 0, 4, 6, 20, 22, 36, 38, 52 },
{ 1, 5, 7, 21, 23, 37, 39, 53 },
{ 2, 8, 19, 24, 34, 40, 50, 54 },
{ 3, 9, 18, 25, 35, 41, 51, 55 },
{ 10, 17, 26, 30, 42, 46, 56, 60 },
{ 11, 16, 27, 31, 43, 47, 57, 61 },
{ 12, 15, 28, 32, 44, 48, 58, 62 },
{ 13, 14, 29, 33, 45, 49, 59, 63 }
}
};
/*
* ISO 13818-2 Table 7.6
*/
const unsigned char
quantiser_scale[2][32] =
{
{ 0, 2, 4, 6, 8, 10, 12, 14,
16, 18, 20, 22, 24, 26, 28, 30,
32, 34, 36, 38, 40, 42, 44, 46,
48, 50, 52, 54, 56, 58, 60, 62 },
{ 0, 1, 2, 3, 4, 5, 6, 7,
8, 10, 12, 14, 16, 18, 20, 22,
24, 28, 32, 36, 40, 44, 48, 52,
56, 64, 72, 80, 88, 96, 104, 112 }
};
/*
* Variable Length Codes
*/
#define VLC(bits) (((0 ## bits) << 5) | (sizeof(# bits) - 1)) // MAX. 19 BITS!
/*
* ISO 13818 Table B-1
* Variable length codes for macroblock_address_increment
*/
const unsigned long long
macroblock_address_increment_vlc[33] =
{
VLC(1),
VLC(011),
VLC(010),
VLC(0011),
VLC(0010),
VLC(00011),
VLC(00010),
VLC(0000111),
VLC(0000110),
VLC(00001011),
VLC(00001010),
VLC(00001001),
VLC(00001000),
VLC(00000111),
VLC(00000110),
VLC(0000010111),
VLC(0000010110),
VLC(0000010101),
VLC(0000010100),
VLC(0000010011),
VLC(0000010010),
VLC(00000100011),
VLC(00000100010),
VLC(00000100001),
VLC(00000100000),
VLC(00000011111),
VLC(00000011110),
VLC(00000011101),
VLC(00000011100),
VLC(00000011011),
VLC(00000011010),
VLC(00000011001),
VLC(00000011000)
// VLC(00000001000) macroblock_escape code
};
/*
* ISO 13818-2 Table B-9
* Variable length codes for coded_block_pattern
*/
const unsigned long long
coded_block_pattern_vlc[64] =
{
VLC(000000001), // This entry shall not be used with 4:2:0 chrominance structure
VLC(01011),
VLC(01001),
VLC(001101),
VLC(1101),
VLC(0010111),
VLC(0010011),
VLC(00011111),
VLC(1100),
VLC(0010110),
VLC(0010010),
VLC(00011110),
VLC(10011),
VLC(00011011),
VLC(00010111),
VLC(00010011),
VLC(1011),
VLC(0010101),
VLC(0010001),
VLC(00011101),
VLC(10001),
VLC(00011001),
VLC(00010101),
VLC(00010001),
VLC(001111),
VLC(00001111),
VLC(00001101),
VLC(000000011),
VLC(01111),
VLC(00001011),
VLC(00000111),
VLC(000000111),
VLC(1010),
VLC(0010100),
VLC(0010000),
VLC(00011100),
VLC(001110),
VLC(00001110),
VLC(00001100),
VLC(000000010),
VLC(10000),
VLC(00011000),
VLC(00010100),
VLC(00010000),
VLC(01110),
VLC(00001010),
VLC(00000110),
VLC(000000110),
VLC(10010),
VLC(00011010),
VLC(00010110),
VLC(00010010),
VLC(01101),
VLC(00001001),
VLC(00000101),
VLC(000000101),
VLC(01100),
VLC(00001000),
VLC(00000100),
VLC(000000100),
VLC(111),
VLC(01010),
VLC(01000),
VLC(001100)
};
/*
* ISO 13818 Table B-10
* Variable length codes for motion_code (not including sign bit)
*/
const unsigned long long
motion_code_vlc[17] =
{
VLC(1), // 0
VLC(01), // 1
VLC(001),
VLC(0001),
VLC(000011),
VLC(0000101),
VLC(0000100),
VLC(0000011),
VLC(000001011),
VLC(000001010),
VLC(000001001),
VLC(0000010001),
VLC(0000010000),
VLC(0000001111),
VLC(0000001110),
VLC(0000001101), // 15
VLC(0000001100) // 16
};
/*
* ISO 13818-2 Table B-12
* Variable length codes for dct_dc_size_luminance
*/
const unsigned long long
dct_dc_size_luma_vlc[12] =
{
VLC(100),
VLC(00),
VLC(01),
VLC(101),
VLC(110),
VLC(1110),
VLC(11110),
VLC(111110),
VLC(1111110),
VLC(11111110),
VLC(111111110),
VLC(111111111)
};
/*
* ISO 13818-2 Table B-13
* Variable length codes for dct_dc_size_chrominance
*/
const unsigned long long
dct_dc_size_chroma_vlc[12] =
{
VLC(00),
VLC(01),
VLC(10),
VLC(110),
VLC(1110),
VLC(11110),
VLC(111110),
VLC(1111110),
VLC(11111110),
VLC(111111110),
VLC(1111111110),
VLC(1111111111)
};
struct dct_coeff {
unsigned long long code;
char run, level;
};
/*
* ISO 13818-2 Table B-14
* DCT coefficients table zero (not including sign bit)
*/
static const struct dct_coeff
dct_coeff_zero_vlc[] =
{
// VLC(10) End of Block
// { VLC(1), 0, 1 } This code shall be used
// for the first (DC) coefficient of a non-intra block
{ VLC(11), 0, 1 },
{ VLC(011), 1, 1 },
{ VLC(0100), 0, 2 },
{ VLC(0101), 2, 1 },
{ VLC(00101), 0, 3 },
{ VLC(00111), 3, 1 },
{ VLC(00110), 4, 1 },
{ VLC(000110), 1, 2 },
{ VLC(000111), 5, 1 },
{ VLC(000101), 6, 1 },
{ VLC(000100), 7, 1 },
{ VLC(0000110), 0, 4 },
{ VLC(0000100), 2, 2 },
{ VLC(0000111), 8, 1 },
{ VLC(0000101), 9, 1 },
// VLC(000001) Escape code
{ VLC(00100110), 0, 5 },
{ VLC(00100001), 0, 6 },
{ VLC(00100101), 1, 3 },
{ VLC(00100100), 3, 2 },
{ VLC(00100111), 10, 1 },
{ VLC(00100011), 11, 1 },
{ VLC(00100010), 12, 1 },
{ VLC(00100000), 13, 1 },
{ VLC(0000001010), 0, 7 },
{ VLC(0000001100), 1, 4 },
{ VLC(0000001011), 2, 3 },
{ VLC(0000001111), 4, 2 },
{ VLC(0000001001), 5, 2 },
{ VLC(0000001110), 14, 1 },
{ VLC(0000001101), 15, 1 },
{ VLC(0000001000), 16, 1 },
{ VLC(000000011101), 0, 8 },
{ VLC(000000011000), 0, 9 },
{ VLC(000000010011), 0, 10 },
{ VLC(000000010000), 0, 11 },
{ VLC(000000011011), 1, 5 },
{ VLC(000000010100), 2, 4 },
{ VLC(000000011100), 3, 3 },
{ VLC(000000010010), 4, 3 },
{ VLC(000000011110), 6, 2 },
{ VLC(000000010101), 7, 2 },
{ VLC(000000010001), 8, 2 },
{ VLC(000000011111), 17, 1 },
{ VLC(000000011010), 18, 1 },
{ VLC(000000011001), 19, 1 },
{ VLC(000000010111), 20, 1 },
{ VLC(000000010110), 21, 1 },
{ VLC(0000000011010), 0, 12 },
{ VLC(0000000011001), 0, 13 },
{ VLC(0000000011000), 0, 14 },
{ VLC(0000000010111), 0, 15 },
{ VLC(0000000010110), 1, 6 },
{ VLC(0000000010101), 1, 7 },
{ VLC(0000000010100), 2, 5 },
{ VLC(0000000010011), 3, 4 },
{ VLC(0000000010010), 5, 3 },
{ VLC(0000000010001), 9, 2 },
{ VLC(0000000010000), 10, 2 },
{ VLC(0000000011111), 22, 1 },
{ VLC(0000000011110), 23, 1 },
{ VLC(0000000011101), 24, 1 },
{ VLC(0000000011100), 25, 1 },
{ VLC(0000000011011), 26, 1 },
{ VLC(00000000011111), 0, 16 },
{ VLC(00000000011110), 0, 17 },
{ VLC(00000000011101), 0, 18 },
{ VLC(00000000011100), 0, 19 },
{ VLC(00000000011011), 0, 20 },
{ VLC(00000000011010), 0, 21 },
{ VLC(00000000011001), 0, 22 },
{ VLC(00000000011000), 0, 23 },
{ VLC(00000000010111), 0, 24 },
{ VLC(00000000010110), 0, 25 },
{ VLC(00000000010101), 0, 26 },
{ VLC(00000000010100), 0, 27 },
{ VLC(00000000010011), 0, 28 },
{ VLC(00000000010010), 0, 29 },
{ VLC(00000000010001), 0, 30 },
{ VLC(00000000010000), 0, 31 },
{ VLC(000000000011000), 0, 32 },
{ VLC(000000000010111), 0, 33 },
{ VLC(000000000010110), 0, 34 },
{ VLC(000000000010101), 0, 35 },
{ VLC(000000000010100), 0, 36 },
{ VLC(000000000010011), 0, 37 },
{ VLC(000000000010010), 0, 38 },
{ VLC(000000000010001), 0, 39 },
{ VLC(000000000010000), 0, 40 },
{ VLC(000000000011111), 1, 8 },
{ VLC(000000000011110), 1, 9 },
{ VLC(000000000011101), 1, 10 },
{ VLC(000000000011100), 1, 11 },
{ VLC(000000000011011), 1, 12 },
{ VLC(000000000011010), 1, 13 },
{ VLC(000000000011001), 1, 14 },
{ VLC(0000000000010011), 1, 15 },
{ VLC(0000000000010010), 1, 16 },
{ VLC(0000000000010001), 1, 17 },
{ VLC(0000000000010000), 1, 18 },
{ VLC(0000000000010100), 6, 3 },
{ VLC(0000000000011010), 11, 2 },
{ VLC(0000000000011001), 12, 2 },
{ VLC(0000000000011000), 13, 2 },
{ VLC(0000000000010111), 14, 2 },
{ VLC(0000000000010110), 15, 2 },
{ VLC(0000000000010101), 16, 2 },
{ VLC(0000000000011111), 27, 1 },
{ VLC(0000000000011110), 28, 1 },
{ VLC(0000000000011101), 29, 1 },
{ VLC(0000000000011100), 30, 1 },
{ VLC(0000000000011011), 31, 1 },
{ VLC(0), -1, -1 }
};
/*
* ISO 13818-2 Table B-15
* DCT coefficients table one (not including sign bit)
*/
static const struct dct_coeff
dct_coeff_one_vlc[] =
{
// VLC(0110) End of Block
{ VLC(10), 0, 1 },
{ VLC(010), 1, 1 },
{ VLC(110), 0, 2 },
{ VLC(00101), 2, 1 },
{ VLC(0111), 0, 3 },
{ VLC(00111), 3, 1 },
{ VLC(000110), 4, 1 },
{ VLC(00110), 1, 2 },
{ VLC(000111), 5, 1 },
{ VLC(0000110), 6, 1 },
{ VLC(0000100), 7, 1 },
{ VLC(11100), 0, 4 },
{ VLC(0000111), 2, 2 },
{ VLC(0000101), 8, 1 },
{ VLC(1111000), 9, 1 },
// VLC(000001) Escape code
{ VLC(11101), 0, 5 },
{ VLC(000101), 0, 6 },
{ VLC(1111001), 1, 3 },
{ VLC(00100110), 3, 2 },
{ VLC(1111010), 10, 1 },
{ VLC(00100001), 11, 1 },
{ VLC(00100101), 12, 1 },
{ VLC(00100100), 13, 1 },
{ VLC(000100), 0, 7 },
{ VLC(00100111), 1, 4 },
{ VLC(11111100), 2, 3 },
{ VLC(11111101), 4, 2 },
{ VLC(000000100), 5, 2 },
{ VLC(000000101), 14, 1 },
{ VLC(000000111), 15, 1 },
{ VLC(0000001101), 16, 1 },
{ VLC(1111011), 0, 8 },
{ VLC(1111100), 0, 9 },
{ VLC(00100011), 0, 10 },
{ VLC(00100010), 0, 11 },
{ VLC(00100000), 1, 5 },
{ VLC(0000001100), 2, 4 },
{ VLC(000000011100), 3, 3 },
{ VLC(000000010010), 4, 3 },
{ VLC(000000011110), 6, 2 },
{ VLC(000000010101), 7, 2 },
{ VLC(000000010001), 8, 2 },
{ VLC(000000011111), 17, 1 },
{ VLC(000000011010), 18, 1 },
{ VLC(000000011001), 19, 1 },
{ VLC(000000010111), 20, 1 },
{ VLC(000000010110), 21, 1 },
{ VLC(11111010), 0, 12 },
{ VLC(11111011), 0, 13 },
{ VLC(11111110), 0, 14 },
{ VLC(11111111), 0, 15 },
{ VLC(0000000010110), 1, 6 },
{ VLC(0000000010101), 1, 7 },
{ VLC(0000000010100), 2, 5 },
{ VLC(0000000010011), 3, 4 },
{ VLC(0000000010010), 5, 3 },
{ VLC(0000000010001), 9, 2 },
{ VLC(0000000010000), 10, 2 },
{ VLC(0000000011111), 22, 1 },
{ VLC(0000000011110), 23, 1 },
{ VLC(0000000011101), 24, 1 },
{ VLC(0000000011100), 25, 1 },
{ VLC(0000000011011), 26, 1 },
{ VLC(00000000011111), 0, 16 },
{ VLC(00000000011110), 0, 17 },
{ VLC(00000000011101), 0, 18 },
{ VLC(00000000011100), 0, 19 },
{ VLC(00000000011011), 0, 20 },
{ VLC(00000000011010), 0, 21 },
{ VLC(00000000011001), 0, 22 },
{ VLC(00000000011000), 0, 23 },
{ VLC(00000000010111), 0, 24 },
{ VLC(00000000010110), 0, 25 },
{ VLC(00000000010101), 0, 26 },
{ VLC(00000000010100), 0, 27 },
{ VLC(00000000010011), 0, 28 },
{ VLC(00000000010010), 0, 29 },
{ VLC(00000000010001), 0, 30 },
{ VLC(00000000010000), 0, 31 },
{ VLC(000000000011000), 0, 32 },
{ VLC(000000000010111), 0, 33 },
{ VLC(000000000010110), 0, 34 },
{ VLC(000000000010101), 0, 35 },
{ VLC(000000000010100), 0, 36 },
{ VLC(000000000010011), 0, 37 },
{ VLC(000000000010010), 0, 38 },
{ VLC(000000000010001), 0, 39 },
{ VLC(000000000010000), 0, 40 },
{ VLC(000000000011111), 1, 8 },
{ VLC(000000000011110), 1, 9 },
{ VLC(000000000011101), 1, 10 },
{ VLC(000000000011100), 1, 11 },
{ VLC(000000000011011), 1, 12 },
{ VLC(000000000011010), 1, 13 },
{ VLC(000000000011001), 1, 14 },
{ VLC(0000000000010011), 1, 15 },
{ VLC(0000000000010010), 1, 16 },
{ VLC(0000000000010001), 1, 17 },
{ VLC(0000000000010000), 1, 18 },
{ VLC(0000000000010100), 6, 3 },
{ VLC(0000000000011010), 11, 2 },
{ VLC(0000000000011001), 12, 2 },
{ VLC(0000000000011000), 13, 2 },
{ VLC(0000000000010111), 14, 2 },
{ VLC(0000000000010110), 15, 2 },
{ VLC(0000000000010101), 16, 2 },
{ VLC(0000000000011111), 27, 1 },
{ VLC(0000000000011110), 28, 1 },
{ VLC(0000000000011101), 29, 1 },
{ VLC(0000000000011100), 30, 1 },
{ VLC(0000000000011011), 31, 1 },
{ VLC(0), -1, -1 }
};
/*
* Translate VLC(), returns bit length
*/
int
mp1e_vlc(unsigned long long vlc_octet, unsigned int *code)
{
int i;
*code = 0;
for (i = 0; i < 19; i++)
if (vlc_octet & (1ULL << (i * 3 + 5)))
*code |= 1 << i;
return vlc_octet & 0x1F;
}
/*
* Find dct_vlc, not including sign bit
* (append 0 for positive level, 1 for negative level)
*/
int
mp1e_dct_coeff_vlc(int table, int run, int level, unsigned int *vlcp)
{
const struct dct_coeff *dcp;
for (dcp = table ? dct_coeff_one_vlc : dct_coeff_zero_vlc; dcp->run >= 0; dcp++)
if (dcp->run == run && dcp->level == level)
return mp1e_vlc(dcp->code, vlcp);
return -1; // No vlc for this run/length combination
}
--- NEW FILE ---
/*
* MPEG-1 Real Time Encoder
*
* Copyright (C) 1999-2000 Michael H. Schimek
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* $Id: video.h,v 1.1 2001/12/04 23:58:10 mswitch Exp $ */
#ifndef VIDEO_H
#define VIDEO_H
#include <stdint.h>
#include "../common/bstream.h"
#include "../common/fifo.h"
#include "../common/log.h"
#include "../common/math.h"
#include "../common/sync.h"
#include "mblock.h"
#include "libvideo.h"
#include "mpeg.h"
#define MAX_WIDTH 1024 /* 1 ... 4096 */
#define MAX_HEIGHT 1024 /* 1 ... 2800 */
#define reg(n) __attribute__ ((regparm (n)))
#define elements(array) (sizeof(array) / sizeof(array[0]))
struct rc {
int ni, np, nb, ob; /* picture types per GOP */
long long Ei, Ep, Eb;
long long gop_count;
double ei, ep, eb;
int G0, Gn; /* estimated target bits per GOP */
double G4;
int Tavg; /* estimated avg. bits per frame */
int Tmin; /* minimum target bits per frame */
int R; /* remaining bits in GOP */
double Xi, Xp, Xb; /* global complexity measure */
double d0i, d0p, d0b; /* virtual buffer fullness */
double r31; /* reaction parameter */
double avg_acti, avg_actp; /* avg spatial activity, intra/inter */
/* auto */
double act_sumi, act_sump; /* sum spatial activity, intra/inter */
double Ti, Tmb;
int T;
};
/*
* Max. successive P pictures when overriding gop_sequence
* (error accumulation) and max. successive B pictures we can stack up
*/
#define MAX_P_SUCC 3
#define MAX_B_SUCC 31
#define B_SHARE 1.4
static inline void
rc_picture_start(struct rc *rc, picture_type type, int mb_num)
{
switch (type) {
case I_TYPE:
/*
* T = lroundn(R / (+ (ni) * Xi / (Xi * 1.0)
* + (np) * Xp / (Xi * 1.0)
* + (nb) * Xb / (Xi * 1.4)));
*/
rc->T = lroundn(rc->R / ((rc->ni + rc->ei)
+ ((rc->np + rc->ep) * rc->Xp
+ (rc->nb + rc->eb) * rc->Xb / B_SHARE)
/ rc->Xi));
rc->Ti = -rc->d0i;
break;
case P_TYPE:
rc->T = lroundn(rc->R / ((rc->np + rc->ep)
+ ((rc->ni + rc->ei) * rc->Xi
+ (rc->nb + rc->eb) * rc->Xb / B_SHARE)
/ rc->Xp));
rc->Ti = -rc->d0p;
break;
case B_TYPE:
/*
* T = lroundn(R / (+ (ni + ei) * Xi * 1.4 / Xb
* + (np + ep) * Xp * 1.4 / Xb
* + (nb + eb) * Xb / Xb));
*/
rc->T = lroundn(rc->R / (((rc->ni + rc->ei) * rc->Xi
+ (rc->np + rc->ep) * rc->Xp) * B_SHARE
/ rc->Xb + (rc->nb + rc->eb)));
rc->Ti = -rc->d0b;
break;
default:
FAIL("!reached");
}
if (rc->T < rc->Tmin)
rc->T = rc->Tmin;
rc->Tmb = rc->T / mb_num;
rc->act_sumi = 0.0;
rc->act_sump = 0.0;
}
static inline int
rc_quant(struct rc *rc, mb_type type,
double acti, double actp,
int bits_out, int qs, int quant_max)
{
int quant;
switch (type) {
case MB_INTRA:
rc->act_sumi += acti;
acti = (2.0 * acti + rc->avg_acti) / (acti + 2.0 * rc->avg_acti);
quant = lroundn((bits_out - rc->Ti) * rc->r31 * acti);
quant = saturate(quant >> qs, 1, quant_max);
rc->Ti += rc->Tmb;
break;
case MB_FORWARD:
case MB_BACKWARD:
rc->act_sumi += acti;
rc->act_sump += actp;
actp = (2.0 * actp + rc->avg_actp) / (actp + 2.0 * rc->avg_actp);
quant = lroundn((bits_out - rc->Ti) * rc->r31 * actp);
quant = saturate(quant >> qs, 1, quant_max);
rc->Ti += rc->Tmb;
break;
case MB_INTERP:
rc->act_sumi += acti;
rc->act_sump += actp;
actp = (2.0 * actp + rc->avg_actp) / (actp + 2.0 * rc->avg_actp);
quant = lroundn((bits_out - rc->Ti) * rc->r31 * actp);
/* quant = saturate(quant, 1, quant_max); */
rc->Ti += rc->Tmb;
break;
default:
FAIL("!reached");
}
return quant;
}
static inline void
rc_picture_end(struct rc *rc, picture_type type,
int S, int quant_sum, int mb_num)
{
switch (type) {
case I_TYPE:
rc->avg_acti = rc->act_sumi / mb_num;
rc->Xi = lroundn(S * (double) quant_sum / mb_num);
rc->d0i += S - rc->T; /* bits encoded - estimated bits */
break;
case P_TYPE:
rc->avg_acti = rc->act_sumi / mb_num;
rc->avg_actp = rc->act_sump / mb_num;
rc->Xp = lroundn(S * (double) quant_sum / mb_num);
rc->d0p += S - rc->T;
break;
case B_TYPE:
rc->avg_acti = rc->act_sumi / mb_num;
rc->avg_actp = rc->act_sump / mb_num;
rc->Xb = lroundn(S * (double) quant_sum / mb_num);
rc->d0b += S - rc->T;
break;
default:
FAIL("!reached");
}
}
typedef struct stacked_frame {
uint8_t * org;
buffer * buffer;
double time;
int skipped;
} stacked_frame;
typedef struct mpeg1_context mpeg1_context;
struct mpeg1_context {
uint8_t seq_header_template[32];
uint8_t * zerop_template; /* empty P picture */
int Sz; /* .. size in bytes */
int (* picture_i)(mpeg1_context *, uint8_t *org);
int (* picture_p)(mpeg1_context *, uint8_t *org,
int dist, int forward_motion);
int (* picture_b)(mpeg1_context *, uint8_t *org,
int dist, int forward_motion,
int backward_motion);
unsigned int (* predict_forward)(uint8_t *from) reg(1);
unsigned int (* predict_bidirectional)(uint8_t *from1, uint8_t *from2,
unsigned int *vmc1,
unsigned int *vmc2);
stacked_frame stack[MAX_B_SUCC];
stacked_frame last;
/* frames encoded (coding order) */
int gop_frame_count; /* .. in current GOP (display order) */
int seq_frame_count; /* .. since last sequence header */
double skip_rate_acc;
double drop_timeout;
double time_per_frame;
double frames_per_sec;
uint8_t * oldref; /* past reference frame buffer */
bool insert_gop_header;
bool closed_gop; /* random access point, no fwd ref */
bool referenced; /* by other P or B pictures */
struct rc rc;
int p_succ;
int skipped_fake;
int skipped_zero;
uint8_t * banner;
consumer cons;
int mb_cx_row;
int mb_cx_thresh;
int motion_min;
int motion_max;
int coded_width;
int coded_height;
int frames_per_seqhdr;
/* input */
synchr_stream sstr;
double coded_elapsed;
/* Output */
fifo * fifo;
producer prod;
double coded_time_elapsed;
double coded_frame_rate;
double coded_frame_period;
/* Options */
rte_codec codec;
int bit_rate;
int frame_rate_code;
double virtual_frame_rate;
char * gop_sequence;
int skip_method;
bool motion_compensation;
bool monochrome;
char * anno;
};
extern mpeg1_context vseg;
extern uint8_t * newref; /* future reference frame buffer */
extern int mb_col, mb_row,
mb_width, mb_height,
mb_last_col, mb_last_row,
mb_num;
extern short mblock[7][6][8][8];
extern struct mb_addr {
struct {
int offset;
int pitch;
} block[6];
struct {
int lum;
int chrom;
} col, row;
int chrom_0;
} mb_address;
#define reset_mba() \
do { \
mb_address.block[0].offset = 0; \
mb_address.block[4].offset = mb_address.chrom_0; \
} while (0)
#define mba_col_incr() \
do { \
mb_address.block[0].offset += mb_address.col.lum; \
mb_address.block[4].offset += mb_address.col.chrom; \
} while (0)
#define mba_row_incr() \
do { \
mb_address.block[0].offset += mb_address.row.lum; \
mb_address.block[4].offset += mb_address.row.chrom; \
} while (0)
#define video_align(n) __attribute__ ((aligned (n)))
extern struct bs_rec video_out video_align(32);
extern int dropped;
extern int (* filter)(unsigned char *, unsigned char *);
extern const char * filter_labels[];
extern long long video_frame_count;
extern long long video_frames_dropped;
// extern fifo * video_fifo;
// extern pthread_t video_thread_id;
extern void * mpeg1_video_ipb(void *capture_fifo);
extern void conv_init(int);
extern fifo * v4l_init(double *frame_rate);
extern fifo * v4l2_init(double *frame_rate);
extern fifo * file_init(double *frame_rate);
extern void filter_init(int pitch);
extern void video_coding_size(int width, int height);
extern int video_look_ahead(char *gop_sequence);
/* don't change order */
/* XXX rethink */
enum {
CM_INVALID,
CM_YUV,
CM_YUYV,
CM_YUYV_VERTICAL_DECIMATION,
CM_YUYV_TEMPORAL_INTERPOLATION,
CM_YUYV_VERTICAL_INTERPOLATION,
CM_YUYV_PROGRESSIVE,
CM_YUYV_PROGRESSIVE_TEMPORAL,
CM_YUYV_EXP,
CM_YUYV_EXP_VERTICAL_DECIMATION,
CM_YUYV_EXP2,
CM_YVU,
CM_NUM_MODES
};
#endif /* VIDEO_H */
--- NEW FILE ---
/*
* MPEG-1 Real Time Encoder
*
* Copyright (C) 1999-2000 Michael H. Schimek
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* $Id: vlc.c,v 1.1 2001/12/04 23:58:10 mswitch Exp $ */
#include <assert.h>
#include <limits.h>
#include "../common/bstream.h"
#include "../common/log.h"
#include "mpeg.h"
#include "vlc.h"
#define align(n) __attribute__ ((SECTION("vlc_tables") aligned (n)))
// XXX
int dc_dct_pred[2][3];
/*
* Tables
*/
VLC2 mp1e_coded_block_pattern[64] align(CACHE_LINE);
VLC2 mp1e_macroblock_address_increment[33] align(CACHE_LINE);
VLCM mp1e_motion_vector_component[480] align(CACHE_LINE);
/*
* ISO/IEC 13818-2 Table B-2
* Variable length codes for macroblock_type in I-pictures
*
* '1' Intra
* '01 xxxxx' Intra, Quant
*/
/*
* ISO/IEC 13818-2 Table B-3
* Variable length codes for macroblock_type in P-pictures
*
* '1' MC, Coded
* '01' No MC, Coded
* '001' MC, Not Coded
* '0001 1' Intra
* '0001 0' MC, Coded, Quant
* '0000 1' No MC, Coded, Quant
* '0000 01' Intra, Quant
*/
/*
* ISO/IEC 13818-2 Table B-4
* Variable length codes for macroblock_type in B-pictures
*/
VLC4
mp1e_macroblock_type_b_nomc_quant[4] align(16) =
{
{ 0x0020, 11, 0 }, /* '0000 01 xxxxx' (Intra, Quant) */
{ 0x0183, 13, 2 }, /* '0000 11 xxxxx 11' (Fwd, Coded, Quant, MV (0, 0)) */
{ 0x0103, 13, 2 }, /* '0000 10 xxxxx 11' (Bwd, Coded, Quant, MV (0, 0)) */
{ 0x040F, 14, 4 } /* '0001 0 xxxxx 11 11' (Interp, Coded, Quant, */
/* FMV (0, 0), BMV (0, 0)) */
};
VLC2
mp1e_macroblock_type_b_nomc[4] align(8) =
{
{ 0x03, 5 }, /* '0001 1' (Intra) */
{ 0x0F, 6 }, /* '0011 11' (Fwd, Coded, MV (0, 0)) */
{ 0x0F, 5 }, /* '011 11' (Bwd, Coded, MV (0, 0)) */
{ 0x3F, 6 }, /* '11 11 11' (Interp, Coded, FMV (0, 0), BMV (0, 0)) */
};
VLC2
mp1e_macroblock_type_b_nomc_notc[4] align(8) =
{
{ 0, 0 }, /* Intra always coded */
{ 0x0B, 6 }, /* '0010 11' (Fwd, Not Coded, MV (0, 0)) */
{ 0x0B, 5 }, /* '010 11' (Bwd, Not Coded, MV (0, 0)) */
{ 0x2F, 6 }, /* '10 11 11' (Interp, Not Coded, FMV (0, 0), BMV (0, 0)) */
};
VLC2
mp1e_macroblock_type_b_quant[4] align(8) =
{
{ 0x020, 11 }, /* '0000 01 xxxxx' (Intra, Quant) */
{ 0x060, 11 }, /* '0000 11 xxxxx' (Fwd, Coded, Quant) */
{ 0x040, 11 }, /* '0000 10 xxxxx' (Bwd, Coded, Quant) */
{ 0x040, 10 } /* '0001 0 xxxxx' (Interp, Coded, Quant) */
};
#if 0
/* Systematic VLCs */
VLC2
mp1e_macroblock_type_b[4] align(8) =
{
{ 0x03, 5 }, /* '0001 1' (Intra) */
{ 0x03, 4 }, /* '0011' (Fwd, Coded) */
{ 0x03, 3 }, /* '011' (Bwd, Coded) */
{ 0x03, 2 }, /* '11' (Interp, Coded) */
};
VLC2
mp1e_macroblock_type_b_notc[4] align(8) =
{
{ 0, 0 }, /* Intra always coded */
{ 0x02, 4 }, /* '0010' (Fwd, Not Coded) */
{ 0x02, 3 }, /* '010' (Bwd, Not Coded) */
{ 0x02, 2 }, /* '10' (Interp, Not Coded) */
};
#endif
unsigned char mp1e_iscan[8][8] align(CACHE_LINE);
VLC8 mp1e_dc_vlc_intra[5][12] align(CACHE_LINE);
VLC2 mp1e_ac_vlc_zero[176] align(CACHE_LINE);
VLC2 mp1e_ac_vlc_one[176] align(CACHE_LINE);
// XXX
extern short mblock[7][6][8][8];
extern struct bs_rec video_out;
extern const char cbp_order[6];
void
mp1e_vlc_init(void)
{
int i, j;
unsigned int code;
int dct_dc_size;
int run, level, length;
int f_code;
/* Variable length codes for macroblock address increment */
for (i = 0; i < 33; i++) {
mp1e_macroblock_address_increment[i].length =
mp1e_vlc(macroblock_address_increment_vlc[i], &code);
mp1e_macroblock_address_increment[i].code = code;
assert(code <= UCHAR_MAX);
}
/* Variable length codes for coded block pattern */
for (i = 0; i < 64; i++) {
int j, k;
for (j = k = 0; k < 6; k++)
if (i & (1 << k))
j |= 0x20 >> cbp_order[k]; /* (5 - k) */
mp1e_coded_block_pattern[i].length =
mp1e_vlc(coded_block_pattern_vlc[j], &code);
mp1e_coded_block_pattern[i].code = code;
assert(code <= UCHAR_MAX);
}
/* Variable length codes for motion vector component */
for (f_code = F_CODE_MIN; f_code <= F_CODE_MAX; f_code++) {
int r_size = f_code - 1;
int f1 = (1 << r_size) - 1;
for (i = 0; i < 16 << f_code; i++) {
int motion_code, motion_residual;
int delta = (i < (16 << r_size)) ? i : i - (16 << f_code);
motion_code = (abs(delta) + f1) >> r_size;
motion_residual = (abs(delta) + f1) & f1;
length = mp1e_vlc(motion_code_vlc[motion_code], &code);
if (motion_code != 0) {
code = code * 2 + (delta < 0); /* sign */
length++;
}
if (f_code > 1 && motion_code != 0) {
code = (code << r_size) + motion_residual;
length += r_size;
}
assert(code < (1 << 12) && length < 16);
mp1e_motion_vector_component[f1 * 32 + i].code = code;
mp1e_motion_vector_component[f1 * 32 + i].length = length;
#if 0
fprintf(stderr, "MV %02x %-2d ", i, delta);
for (j = length - 1; j >= 0; j--)
fprintf(stderr, "%d", (code & (1 << j)) > 0);
fprintf(stderr, "\n");
#endif
}
}
/* Variable length codes for intra DC coefficient */
for (dct_dc_size = 0; dct_dc_size < 12; dct_dc_size++) {
/* Intra DC luma VLC */
mp1e_dc_vlc_intra[0][dct_dc_size].length =
mp1e_vlc(dct_dc_size_luma_vlc[dct_dc_size], &code)
+ dct_dc_size;
mp1e_dc_vlc_intra[0][dct_dc_size].code = code << dct_dc_size;
/* Intra DC luma VLC with EOB ('10' table B-14) of prev. block */
mp1e_dc_vlc_intra[1][dct_dc_size].length =
mp1e_vlc(dct_dc_size_luma_vlc[dct_dc_size], &code)
+ dct_dc_size + 2;
mp1e_dc_vlc_intra[1][dct_dc_size].code =
((0x2 << mp1e_vlc(dct_dc_size_luma_vlc[dct_dc_size],
&code)) | code) << dct_dc_size;
/* Intra DC chroma VLC with EOB of previous block */
mp1e_dc_vlc_intra[2][dct_dc_size].length =
mp1e_vlc(dct_dc_size_chroma_vlc[dct_dc_size], &code)
+ dct_dc_size + 2;
mp1e_dc_vlc_intra[2][dct_dc_size].code =
((0x2 << mp1e_vlc(dct_dc_size_chroma_vlc[dct_dc_size],
&code)) | code) << dct_dc_size;
/* Intra DC luma VLC with EOB ('0110' table B-15) of prev. block */
mp1e_dc_vlc_intra[3][dct_dc_size].length =
mp1e_vlc(dct_dc_size_luma_vlc[dct_dc_size], &code)
+ dct_dc_size + 4;
mp1e_dc_vlc_intra[3][dct_dc_size].code =
((0x6 << mp1e_vlc(dct_dc_size_luma_vlc[dct_dc_size],
&code)) | code) << dct_dc_size;
/* Intra DC chroma VLC with EOB of previous block */
mp1e_dc_vlc_intra[4][dct_dc_size].length =
mp1e_vlc(dct_dc_size_chroma_vlc[dct_dc_size], &code)
+ dct_dc_size + 4;
mp1e_dc_vlc_intra[4][dct_dc_size].code =
((0x6 << mp1e_vlc(dct_dc_size_chroma_vlc[dct_dc_size],
&code)) | code) << dct_dc_size;
}
/* Variable length codes for AC coefficients (table B-14) */
for (i = run = 0; run < 64; run++) {
assert(i <= elements(mp1e_ac_vlc_zero));
mp1e_ac_vlc_zero[j = i++].code = run;
for (level = 1;
(length = mp1e_dct_coeff_vlc(0, run, level, &code)) > 0;
level++, i++) {
assert(i < elements(mp1e_ac_vlc_zero));
assert((code << 1) <= UCHAR_MAX);
mp1e_ac_vlc_zero[i].length = length + 1;
mp1e_ac_vlc_zero[i].code = code << 1; /* sign 0 */
}
mp1e_ac_vlc_zero[j].length = i - j;
}
/* Variable length codes for AC coefficients (table B-15) */
for (i = run = 0; run < 64; run++) {
assert(i <= elements(mp1e_ac_vlc_one));
mp1e_ac_vlc_one[j = i++].code = run;
for (level = 1;
(length = mp1e_dct_coeff_vlc(1, run, level, &code)) > 0;
level++, i++) {
assert(i < elements(mp1e_ac_vlc_one));
assert((code << 0) <= UCHAR_MAX);
mp1e_ac_vlc_one[i].length = length + 1;
mp1e_ac_vlc_one[i].code = code << 0;
/* no sign (would need 9 bits) */
}
mp1e_ac_vlc_zero[j].length = i - j;
}
/*
* Forward zig-zag scanning pattern
*/
for (i = 0; i < 64; i++) {
/* iscan[0][63 - scan[0][0][i]] = (i & 7) * 8 + (i >> 3); */
mp1e_iscan[0][(scan[0][0][i] - 1) & 63] =
(i & 7) * 8 + (i >> 3);
}
}
/* Reference */
#if 0
int
mp1e_mpeg1_encode_intra(void)
{
int v;
int
encode_block(short block[8][8], int *dc_pred, VLC8 *dc_vlc)
{
/* DC coefficient */
{
register int val = block[0][0] - *dc_pred, size;
/*
* Find first set bit, starting at msb with 0 -> 0.
*/
asm volatile (
" bsrl %1,%0\n"
" jnz 1f\n"
" movl $-1,%0\n"
"1:\n"
" incl %0\n"
: "=&r" (size) : "r" (abs(val)));
if (val < 0) {
val--;
val ^= (-1 << size);
}
bputl(&video_out, dc_vlc[size].code | val, dc_vlc[size].length);
*dc_pred = block[0][0];
}
/* AC coefficients */
{
VLC2 *p = ac_vlc_zero;
int i;
for (i = 1; i < 64; i++) {
int ulevel, slevel = block[0][iscan[0][(i - 1) & 63]];
if (slevel) {
ulevel = abs(slevel);
if (ulevel < (int) p->length) {
p += ulevel;
bputl(&video_out, p->code | ((slevel >> 31) & 1), p->length);
} else {
int len;
if (slevel > 127) {
if (slevel > 255)
return 1;
/* %000001 escape, 6 bit run, %00000000, slevel & 0xFF */
slevel = 0x0400000 | (p->code << 16) | (slevel & 0xFF);
len = 28;
} else if (slevel < -127) {
if (slevel < -255)
return 1;
/* %000001 escape, 6 bit run, %10000000, slevel (sic) & 0xFF */
slevel = 0x0408000 | (p->code << 16) | (slevel & 0xFF);
len = 28;
} else {
/* %000001 escape, 6 bit run, slevel & 0xFF */
slevel = (1 << 14) | (p->code << 8) | (slevel & 0xFF);
len = 20;
}
bputl(&video_out, slevel, len);
}
p = ac_vlc_zero; /* run = 0 */
} else
p += p->length; /* run++ */
}
}
return 0;
}
dc_dct_pred[1][0] = dc_dct_pred[0][0];
dc_dct_pred[1][1] = dc_dct_pred[0][1];
dc_dct_pred[1][2] = dc_dct_pred[0][2];
v = encode_block(mblock[1][0], &dc_dct_pred[0][0], dc_vlc_intra[0]);
v |= encode_block(mblock[1][2], &dc_dct_pred[0][0], dc_vlc_intra[1]);
v |= encode_block(mblock[1][1], &dc_dct_pred[0][0], dc_vlc_intra[1]);
v |= encode_block(mblock[1][3], &dc_dct_pred[0][0], dc_vlc_intra[1]);
v |= encode_block(mblock[1][4], &dc_dct_pred[0][1], dc_vlc_intra[2]);
v |= encode_block(mblock[1][5], &dc_dct_pred[0][2], dc_vlc_intra[2]);
bputl(&video_out, 0x2, 2); /* EOB '10' (ISO 13818-2 table B-14) */
/*
* Saturation is rarely needed, so the forward quantisation code
* skips the step. This routine detects excursions in uncritical
* path and reports but saturates because saturation often causes
* a visibly annoying reconstruction error.
*/
if (v) {
dc_dct_pred[0][0] = dc_dct_pred[1][0];
dc_dct_pred[0][1] = dc_dct_pred[1][1];
dc_dct_pred[0][2] = dc_dct_pred[1][2];
}
return v;
}
int
mp1e_mpeg1_encode_inter(short iblock[6][8][8], unsigned int cbp)
{
int v = 0;
int
encode_block(short block[8][8])
{
VLC2 *p = ac_vlc_zero; /* ISO 13818-2 table B-14 */
int i = 1, len, ulevel, slevel;
/* DC coefficient */
ulevel = abs(slevel = block[0][0]);
if (ulevel == 1) {
bputl(&video_out, 0x2 | ((slevel >> 31) & 1), 2);
} else
i = 0;
/* AC coefficients */
while (i < 64) {
if ((slevel = block[0][iscan[0][(i - 1) & 63]])) {
ulevel = abs(slevel);
if (ulevel < (int) p->length) {
p += ulevel;
bputl(&video_out, p->code | ((slevel >> 31) & 1), p->length);
} else {
if (slevel > 127) {
if (slevel > 255)
return 1;
/* %000001 escape, 6 bit run, %00000000, slevel & 0xFF */
slevel = 0x0400000 | (p->code << 16) | (slevel & 0xFF);
len = 28;
} else if (slevel < -127) {
if (slevel < -255)
return 1;
/* %000001 escape, 6 bit run, %10000000, slevel (sic) & 0xFF */
slevel = 0x0408000 | (p->code << 16) | (slevel & 0xFF);
len = 28;
} else {
/* %000001 escape, 6 bit run, slevel & 0xFF */
slevel = (1 << 14) | (p->code << 8) | (slevel & 0xFF);
len = 20;
}
bputl(&video_out, slevel, len);
}
p = ac_vlc_zero; /* run = 0 */
} else
p += p->length; /* run++ */
i++;
}
bputl(&video_out, 0x2, 2);
return 0;
}
/* watch cbp_order */
if (cbp & (1 << 5)) v = encode_block(iblock[0]);
if (cbp & (1 << 3)) v |= encode_block(iblock[2]);
if (cbp & (1 << 4)) v |= encode_block(iblock[1]);
if (cbp & (1 << 2)) v |= encode_block(iblock[3]);
if (cbp & (1 << 1)) v |= encode_block(iblock[4]);
if (cbp & (1 << 0)) v |= encode_block(iblock[5]);
return v;
}
#endif
--- NEW FILE ---
/*
* MPEG-1 Real Time Encoder
*
* Copyright (C) 1999-2000 Michael H. Schimek
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* $Id: vlc.h,v 1.1 2001/12/04 23:58:10 mswitch Exp $ */
#ifndef VLC_H
#define VLC_H
#include "../common/math.h"
#include "video.h"
typedef struct {
unsigned char code;
unsigned char length;
} VLC2;
typedef struct {
unsigned code : 12;
unsigned length : 4;
} VLCM;
typedef struct {
unsigned short code;
unsigned char length, mv_length;
} VLC4;
typedef struct {
unsigned int code;
unsigned int length;
} VLC8;
extern int dc_dct_pred[2][3];
extern VLC2 mp1e_coded_block_pattern[64];
extern VLC2 mp1e_macroblock_address_increment[33];
extern VLCM mp1e_motion_vector_component[480];
extern VLC4 mp1e_macroblock_type_b_nomc_quant[4];
extern VLC2 mp1e_macroblock_type_b_nomc[4];
extern VLC2 mp1e_macroblock_type_b_nomc_notc[4];
extern VLC2 mp1e_macroblock_type_b_quant[4];
extern unsigned char mp1e_iscan[8][8];
extern VLC8 mp1e_dc_vlc_intra[5][12];
extern VLC2 mp1e_ac_vlc_zero[176];
extern VLC2 mp1e_ac_vlc_one[176];
extern void mp1e_vlc_init(void);
extern int mp1e_mpeg1_encode_intra(void);
extern int mp1e_mpeg1_encode_inter(short mblock[6][8][8],
unsigned int cbp);
extern int mp1e_mpeg2_encode_intra(void);
extern int mp1e_mpeg2_encode_inter(short mblock[6][8][8],
unsigned int cbp);
extern int mp1e_p6_mpeg1_encode_intra(void);
extern int mp1e_p6_mpeg1_encode_inter(short mblock[6][8][8],
unsigned int cbp);
static inline
void reset_dct_pred(void)
{
dc_dct_pred[0][0] = 0;
dc_dct_pred[0][1] = 0;
dc_dct_pred[0][2] = 0;
}
#define F_CODE_MIN 1
#define F_CODE_MAX 4
struct motion {
VLCM * vlc;
int f_code;
int f_mask;
int src_range;
int max_range;
int PMV[2], MV[2];
};
static inline void
motion_init(mpeg1_context *mpeg1, struct motion *m, int range)
{
int f;
range = saturate(range, mpeg1->motion_min, mpeg1->motion_max);
f = saturate(ffsr(range - 1) - 1, F_CODE_MIN, F_CODE_MAX);
m->max_range = 4 << f;
m->src_range = saturate(range, 4, 4 << f);
m->f_mask = 0xFF >> (4 - f);
m->f_code = f;
m->vlc = mp1e_motion_vector_component + ((15 << f) & 480);
// = mp1e_motion_vector_component + ((1 << (f - 1)) - 1) * 32;
m->PMV[0] = 0;
m->PMV[1] = 0;
}
#endif /* VLC_H */
--- NEW FILE ---
#
# MPEG-1 Real Time Encoder
#
# Copyright (C) 1999-2000 Michael H. Schimek
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
# $Id: vlc_mmx.s,v 1.1 2001/12/04 23:58:10 mswitch Exp $
# int
# p6_mpeg1_encode_intra(void)
.text
.align 16
.globl mp1e_p6_mpeg1_encode_intra
mp1e_p6_mpeg1_encode_intra:
pushl %ebp;
pushl %edi;
pushl %edx;
leal mp1e_dc_vlc_intra,%edi;
pushl %esi;
leal mblock+0*128+768,%esi;
pushl %ebx;
movl video_out,%ebp;
pushl %ecx;
movl dc_dct_pred,%ebx;
call 1f;
movswl mblock+0*128+768,%ebx;
leal mblock+2*128+768,%esi;
leal mp1e_dc_vlc_intra+12*8,%edi;
call 1f;
movswl mblock+2*128+768,%ebx;
leal mblock+1*128+768,%esi;
leal mp1e_dc_vlc_intra+12*8,%edi;
call 1f;
movswl mblock+1*128+768,%ebx;
leal mblock+3*128+768,%esi;
leal mp1e_dc_vlc_intra+12*8,%edi;
call 1f;
movl dc_dct_pred+4,%ebx;
leal mblock+4*128+768,%esi;
leal mp1e_dc_vlc_intra+24*8,%edi;
call 1f;
movl dc_dct_pred+8,%ebx;
leal mblock+5*128+768,%esi;
leal mp1e_dc_vlc_intra+24*8,%edi;
call 1f;
movswl mblock+3*128+768,%eax;
movswl mblock+4*128+768,%ebx;
movl %eax,dc_dct_pred;
movswl mblock+5*128+768,%ecx;
movl %ebx,dc_dct_pred+4;
movl %ecx,dc_dct_pred+8;
movl %ebp,video_out;
movl $video_out,%eax;
movl $2,%ecx;
movl $2,%edx;
call mmx_bputl;
movl (%esp),%ecx;
movl 4(%esp),%ebx;
movl 8(%esp),%esi;
xorl %eax,%eax;
movl 12(%esp),%edx;
movl 16(%esp),%edi;
movl 20(%esp),%ebp;
leal 24(%esp),%esp;
ret;
.align 16
1: movd %esp,%mm6;
movl $0,%ecx;
movswl (%esi),%eax;
subl %ebx,%eax;
movl %eax,%ebx;
cdq;
xorl %edx,%eax;
subl %edx,%eax;
bsrl %eax,%ecx;
setnz %al;
addl %edx,%ebx;
movl $-63,%esp;
addb %al,%cl;
sall %cl,%edx;
xorl %edx,%ebx;
orl (%edi,%ecx,8),%ebx;
addl 4(%edi,%ecx,8),%ebp;
jmp 4f;
.align 16
2: movswl (%esi,%ebx,2),%eax;
movzbl 1(%edi),%ecx;
testl %eax,%eax;
jne 3f;
movzbl mp1e_iscan+63(%esp),%ebx;
incl %esp;
leal (%edi,%ecx,2),%edi;
jle 2b;
movd %mm6,%esp;
ret;
3: cdq;
xorl %edx,%eax;
subl %edx,%eax;
cmpl %ecx,%eax;
jge 5f;
movzbl (%edi,%eax,2),%ebx;
movzbl 1(%edi,%eax,2),%ecx;
subl %edx,%ebx;
addl %ecx,%ebp;
4: movl $64,%edi;
movd %ebx,%mm2;
subl %ebp,%edi;
movd %edi,%mm1;
jle 7f;
leal mp1e_ac_vlc_zero,%edi;
psllq %mm1,%mm2;
movzbl mp1e_iscan+63(%esp),%ebx;
incl %esp;
por %mm2,%mm7;
jle 2b;
movd %mm6,%esp;
ret;
5: movzbl (%edi),%ecx;
movswl (%esi,%ebx,2),%edx;
cmpl $127,%eax;
jg 6f;
andl $255,%edx;
sall $8,%ecx;
leal 16384(%ecx,%edx),%ebx;
addl $20,%ebp;
jmp 4b;
6: sall $16,%ecx;
andl $33023,%edx;
cmpl $255,%eax;
leal 4194304(%ecx,%edx),%ebx;
addl $28,%ebp;
jle 4b;
movd %mm6,%esp;
addl $4,%esp;
movl $1,%eax;
popl %ecx;
popl %ebx;
popl %esi;
popl %edx;
popl %edi;
popl %ebp;
ret;
.align 16
7: movq video_out+16,%mm3;
movq %mm2,%mm5;
leal mp1e_ac_vlc_zero,%edi;
pxor %mm4,%mm4;
psubd %mm1,%mm4;
movd %mm4,%ebp;
psubd %mm4,%mm3;
psrld %mm4,%mm5;
movl video_out+4,%ecx;
por %mm5,%mm7;
movd %mm7,%eax;
movzbl mp1e_iscan+63(%esp),%ebx;
psrlq $32,%mm7;
bswap %eax;
leal 8(%ecx),%edx;
movl %eax,4(%ecx);
movd %mm7,%eax;
bswap %eax;
psllq %mm3,%mm2;
incl %esp;
movq %mm2,%mm7;
movl %eax,(%ecx);
movl %edx,video_out+4;
jle 2b;
movd %mm6,%esp;
ret;
# int
# p6_mpeg1_encode_inter(short mblock[6][8][8], unsigned int cbp)
.text
.align 16
.globl mp1e_p6_mpeg1_encode_inter
mp1e_p6_mpeg1_encode_inter:
testl $32,1*4+4(%esp);
pushl %esi
movl 2*4+0(%esp),%esi;
pushl %ebp
pushl %edi
pushl %ebx
je 2f;
call 1f;
movl 5*4+0(%esp),%esi;
2: testl $8,5*4+4(%esp);
je 2f;
leal 2*128(%esi),%esi;
call 1f;
movl 5*4+0(%esp),%esi;
2: testl $16,5*4+4(%esp);
je 2f;
leal 1*128(%esi),%esi;
call 1f;
movl 5*4+0(%esp),%esi;
2: testl $4,5*4+4(%esp);
je 2f;
leal 3*128(%esi),%esi;
call 1f;
movl 5*4+0(%esp),%esi;
2: testl $2,5*4+4(%esp);
je 2f;
leal 4*128(%esi),%esi;
call 1f;
movl 5*4+0(%esp),%esi;
2: testl $1,5*4+4(%esp);
je 2f;
leal 5*128(%esi),%esi;
call 1f;
2:
xorl %eax,%eax
popl %ebx
popl %edi
popl %ebp
popl %esi
ret
.align 16
1: movswl (%esi),%eax;
movl $0,%ebp;
movd %esp,%mm6;
movl video_out,%ebx;
movl $-63,%esp;
leal mp1e_ac_vlc_zero,%edi;
cdq;
xorl %edx,%eax;
subl %edx,%eax;
decl %eax;
jne 3f
movl $2,%ebp;
subl %edx,%ebp;
addl $2,%ebx;
jmp 9f;
.align 16
3: movswl (%esi,%ebp,2),%eax;
testl %eax,%eax;
movzbl 1(%edi),%ecx;
jne 4f;
movzbl mp1e_iscan+63(%esp),%ebp;
incl %esp;
leal (%edi,%ecx,2),%edi;
jle 3b;
0: movl %ebx,video_out;
movd %mm6,%esp;
movl $video_out,%eax;
movl $2,%ecx;
movl $2,%edx;
jmp mmx_bputl;
4: cdq;
xorl %edx,%eax;
subl %edx,%eax;
cmpl %ecx,%eax;
jge 5f;
movzbl (%edi,%eax,2),%ebp;
addb 1(%edi,%eax,2),%bl;
subl %edx,%ebp;
9: movl $64,%edi;
movd %ebp,%mm2;
subl %ebx,%edi;
movd %edi,%mm1;
jle 8f;
leal mp1e_ac_vlc_zero,%edi;
movzbl mp1e_iscan+63(%esp),%ebp;
psllq %mm1,%mm2;
incl %esp;
por %mm2,%mm7;
jle 3b;
jmp 0b;
.align 16
5: movswl (%esi,%ebp,2),%ebp;
movzbl (%edi),%ecx;
cmpl $127,%eax;
jg 6f;
sall $8,%ecx;
andl $255,%ebp;
leal 16384(%ecx,%ebp),%ebp;
addb $20,%bl;
jmp 9b;
6: cmpl $255,%eax;
sall $16,%ecx;
andl $33023,%ebp;
leal 4194304(%ecx,%ebp),%ebp;
addb $28,%bl;
jle 9b;
movd %mm6,%esp;
addl $4,%esp;
popl %ebx;
popl %edi;
movl $1,%eax;
popl %ebp;
popl %esi;
ret;
.align 16
8: leal mp1e_ac_vlc_zero,%edi;
movq video_out+16,%mm3;
movq %mm2,%mm5;
pxor %mm4,%mm4;
psubd %mm1,%mm4;
movd %mm4,%ebx;
psrld %mm4,%mm5;
movl video_out+4,%ecx;
por %mm5,%mm7;
psubd %mm4,%mm3;
movzbl mp1e_iscan+63(%esp),%ebp;
movd %mm7,%eax;
psrlq $32,%mm7;
psllq %mm3,%mm2;
leal 8(%ecx),%edx;
bswap %eax;
movl %eax,4(%ecx);
movd %mm7,%eax;
incl %esp;
movl %edx,video_out+4;
movq %mm2,%mm7;
bswap %eax;
movl %eax,(%ecx);
jle 3b;
jmp 0b;
- Previous message: [Mplayer-cvslog] CVS: main/libmp1e/systems libsystems.h,NONE,1.1 mpeg.h,NONE,1.1 mpeg1.c,NONE,1.1 mpeg2.c,NONE,1.1 output.h,NONE,1.1 rte_output.c,NONE,1.1 systems.c,NONE,1.1 systems.h,NONE,1.1 vcd.c,NONE,1.1
- Next message: [Mplayer-cvslog] CVS: main/DOCS DXR3,1.5,1.6
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the MPlayer-cvslog
mailing list