[Ffmpeg-devel] H.264 encoder
Panagiotis Issaris
takis.issaris
Thu Oct 5 15:35:50 CEST 2006
Hi,
On Thu, Oct 05, 2006 at 02:43:39AM +0200, Michael Niedermayer wrote:
>[...]
> > > > > h264.c h264_diff_dct_c() looks so much nicer, is this faster?
> > The h264_diff_dct_c() function had been #if 0'ed out. Has it ever been
> > tested and confirmed working correctly?
> >
> > I'm asking because I'm getting image corruption with this function
> > integrated, and knowing if this function is correct can help me find the
> > problem.
>
> hmm, if you see corruption then its likely not working correctly, so
> remove it or merge it with your or whatever ...
I've removed this one.
> > > [...]
> > > > + if (topavail && w == 16 && h == 16 && srcleft->topblock != 0 && srcleft->topblock->available)
> > > > + {
> > > > + // Plane prediction
> > >
> > > use h264.c:pred*x*_plane_c and the other ones from h264.c
> > Fixed.
>
> great :)
:)
Furthermore, I've reused the fill_rectangle() function from h264.c to fix
another issue you remarked.
I've attached an updated patch, which contains the fixes I have mentioned in
this and previous e-mails. It does not address all issues yet, so I'm sending it
purely for archival purposes ;-)
With friendly regards,
Takis
-------------- next part --------------
diff --git a/Changelog b/Changelog
index 1bcfe36..b9e1917 100644
--- a/Changelog
+++ b/Changelog
@@ -59,6 +59,7 @@ version <next>
- VP5 video decoder
- VP6 video decoder
- WavPack lossless audio decoder
+- Native H.264 encoder
version 0.4.9-pre1:
diff --git a/doc/ffmpeg-doc.texi b/doc/ffmpeg-doc.texi
index c41807d..76644eb 100644
--- a/doc/ffmpeg-doc.texi
+++ b/doc/ffmpeg-doc.texi
@@ -772,7 +772,7 @@ following image formats are supported:
@item WMV8 @tab X @tab X @tab not completely working
@item H.261 @tab X @tab X
@item H.263(+) @tab X @tab X @tab also known as RealVideo 1.0
- at item H.264 @tab @tab X
+ at item H.264 @tab X @tab X
@item RealVideo 1.0 @tab X @tab X
@item RealVideo 2.0 @tab X @tab X
@item MJPEG @tab X @tab X
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index fbf8e0b..74b90f9 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -87,6 +87,7 @@ OBJS-$(CONFIG_FRAPS_DECODER) +
OBJS-$(CONFIG_H261_DECODER) += h261.o
OBJS-$(CONFIG_H261_ENCODER) += h261.o
OBJS-$(CONFIG_H264_DECODER) += h264.o
+OBJS-$(CONFIG_H264_ENCODER) += h264enc.o h264cavlc.o h264dsp.o
OBJS-$(CONFIG_HUFFYUV_DECODER) += huffyuv.o
OBJS-$(CONFIG_HUFFYUV_ENCODER) += huffyuv.o
OBJS-$(CONFIG_IDCIN_DECODER) += idcinvideo.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index fdd80fc..d575908 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -75,7 +75,7 @@ #ifdef CONFIG_MPEG1VIDEO_ENCODER
register_avcodec(&mpeg1video_encoder);
#endif //CONFIG_MPEG1VIDEO_ENCODER
#ifdef CONFIG_H264_ENCODER
-// register_avcodec(&h264_encoder);
+ register_avcodec(&h264_encoder);
#endif //CONFIG_H264_ENCODER
#ifdef CONFIG_MPEG2VIDEO_ENCODER
register_avcodec(&mpeg2video_encoder);
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index ad7c776..db9e8a3 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -142,6 +142,7 @@ enum CodecID {
CODEC_ID_VP5,
CODEC_ID_VP6,
CODEC_ID_VP6F,
+ CODEC_ID_FFH264,
/* various pcm "codecs" */
CODEC_ID_PCM_S16LE= 0x10000,
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index ae5902f..1e3ce7f 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -2624,6 +2624,11 @@ void ff_put_vc1_mspel_mc00_c(uint8_t *ds
}
#endif /* CONFIG_VC1_DECODER||CONFIG_WMV3_DECODER */
+#if defined(CONFIG_H264_ENCODER)
+/* H264 specific */
+void ff_h264dsp_init(DSPContext* c, AVCodecContext *avctx);
+#endif /* CONFIG_H264_ENCODER */
+
static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
uint8_t *cm = cropTbl + MAX_NEG_CROP;
int i;
@@ -4081,6 +4086,9 @@ #endif
#if defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
ff_vc1dsp_init(c,avctx);
#endif
+#if defined(CONFIG_H264_ENCODER)
+ ff_h264dsp_init(c,avctx);
+#endif
c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 34d91ab..1038e5d 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -57,6 +57,13 @@ void ff_h264_idct8_dc_add_c(uint8_t *dst
void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block);
void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block);
+void ff_h264_dct_c(DCTELEM inblock[4][4], DCTELEM outblock[4][4]);
+void ff_h264_hadamard_mult4x4_c(DCTELEM Y[4][4]);
+void ff_h264_transform_dct_quant_c(int16_t block[4][4], int QP, int dontscaleDC);
+void ff_h264_hadamard_quant_4x4_c(DCTELEM Y[4][4], int QP);
+void ff_h264_hadamard_quant_2x2_c(int16_t Y[2][2], int QP);
+void ff_h264_hadamard_invquant_4x4_c(DCTELEM Y[4][4], int QP);
+void ff_h264_transform_inverse_quant_dct_add_c(int16_t block[4][4], int QP, int dontscaleDC, uint8_t *dst, int stride);
void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1,
const float *src2, int src3, int blocksize, int step);
@@ -376,10 +383,19 @@ #define FF_PARTTRANS_IDCT_PERM 5
#define BASIS_SHIFT 16
#define RECON_SHIFT 6
+ /* h264 functions */
void (*h264_idct_add)(uint8_t *dst, DCTELEM *block, int stride);
void (*h264_idct8_add)(uint8_t *dst, DCTELEM *block, int stride);
void (*h264_idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
void (*h264_idct8_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
+ void (*h264_dct)(DCTELEM inblock[4][4], DCTELEM outblock[4][4]);
+ void (*h264_idct_notranspose_add)(uint8_t *dst, DCTELEM *block, int stride);
+ void (*h264_hadamard_mult4x4)(DCTELEM Y[4][4]);
+ void (*h264_hadamard_quant_2x2)(int16_t Y[2][2], int QP);
+ void (*h264_hadamard_quant_4x4)(DCTELEM Y[4][4], int QP);
+ void (*h264_hadamard_invquant_4x4)(DCTELEM Y[4][4], int QP);
+ void (*h264_transform_dct_quant)(int16_t block[4][4], int QP, int dontscaleDC);
+ void (*h264_transform_inverse_quant_dct_add)(int16_t block[4][4], int QP, int dontscaleDC, uint8_t *dst, int stride);
/* snow wavelet */
void (*vertical_compose97i)(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width);
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 8602276..70cebff 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -163,20 +163,6 @@ typedef struct H264Context{
MpegEncContext s;
int nal_ref_idc;
int nal_unit_type;
-#define NAL_SLICE 1
-#define NAL_DPA 2
-#define NAL_DPB 3
-#define NAL_DPC 4
-#define NAL_IDR_SLICE 5
-#define NAL_SEI 6
-#define NAL_SPS 7
-#define NAL_PPS 8
-#define NAL_AUD 9
-#define NAL_END_SEQUENCE 10
-#define NAL_END_STREAM 11
-#define NAL_FILLER_DATA 12
-#define NAL_SPS_EXT 13
-#define NAL_AUXILIARY_SLICE 19
uint8_t *rbsp_buffer;
unsigned int rbsp_buffer_size;
@@ -420,13 +406,22 @@ #else
#endif
}
+const uint8_t rem6[52]={
+0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
+};
+
+const uint8_t div6[52]={
+0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
+};
+
+
/**
* fill a rectangle.
* @param h height of the rectangle, should be a constant
* @param w width of the rectangle, should be a constant
* @param size the size of val (1 or 4), should be a constant
*/
-static always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
+always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
uint8_t *p= (uint8_t*)vp;
assert(size==1 || size==4);
assert(w<=4);
@@ -2033,42 +2028,6 @@ static inline int get_chroma_qp(int chro
return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
}
-
-#if 0
-static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
- int i;
- //FIXME try int temp instead of block
-
- for(i=0; i<4; i++){
- const int d0= src1[0 + i*stride] - src2[0 + i*stride];
- const int d1= src1[1 + i*stride] - src2[1 + i*stride];
- const int d2= src1[2 + i*stride] - src2[2 + i*stride];
- const int d3= src1[3 + i*stride] - src2[3 + i*stride];
- const int z0= d0 + d3;
- const int z3= d0 - d3;
- const int z1= d1 + d2;
- const int z2= d1 - d2;
-
- block[0 + 4*i]= z0 + z1;
- block[1 + 4*i]= 2*z3 + z2;
- block[2 + 4*i]= z0 - z1;
- block[3 + 4*i]= z3 - 2*z2;
- }
-
- for(i=0; i<4; i++){
- const int z0= block[0*4 + i] + block[3*4 + i];
- const int z3= block[0*4 + i] - block[3*4 + i];
- const int z1= block[1*4 + i] + block[2*4 + i];
- const int z2= block[1*4 + i] - block[2*4 + i];
-
- block[0*4 + i]= z0 + z1;
- block[1*4 + i]= 2*z3 + z2;
- block[2*4 + i]= z0 - z1;
- block[3*4 + i]= z3 - 2*z2;
- }
-}
-#endif
-
//FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
//FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
@@ -2355,7 +2314,7 @@ static void pred4x4_horizontal_down_c(ui
src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
}
-static void pred16x16_vertical_c(uint8_t *src, int stride){
+void pred16x16_vertical_c(uint8_t *src, int stride){
int i;
const uint32_t a= ((uint32_t*)(src-stride))[0];
const uint32_t b= ((uint32_t*)(src-stride))[1];
@@ -2370,7 +2329,7 @@ static void pred16x16_vertical_c(uint8_t
}
}
-static void pred16x16_horizontal_c(uint8_t *src, int stride){
+void pred16x16_horizontal_c(uint8_t *src, int stride){
int i;
for(i=0; i<16; i++){
@@ -2381,7 +2340,7 @@ static void pred16x16_horizontal_c(uint8
}
}
-static void pred16x16_dc_c(uint8_t *src, int stride){
+void pred16x16_dc_c(uint8_t *src, int stride){
int i, dc=0;
for(i=0;i<16; i++){
@@ -2435,7 +2394,7 @@ static void pred16x16_top_dc_c(uint8_t *
}
}
-static void pred16x16_128_dc_c(uint8_t *src, int stride){
+void pred16x16_128_dc_c(uint8_t *src, int stride){
int i;
for(i=0; i<16; i++){
@@ -2486,11 +2445,11 @@ static inline void pred16x16_plane_compa
}
}
-static void pred16x16_plane_c(uint8_t *src, int stride){
+void pred16x16_plane_c(uint8_t *src, int stride){
pred16x16_plane_compat_c(src, stride, 0);
}
-static void pred8x8_vertical_c(uint8_t *src, int stride){
+void pred8x8_vertical_c(uint8_t *src, int stride){
int i;
const uint32_t a= ((uint32_t*)(src-stride))[0];
const uint32_t b= ((uint32_t*)(src-stride))[1];
@@ -2501,7 +2460,7 @@ static void pred8x8_vertical_c(uint8_t *
}
}
-static void pred8x8_horizontal_c(uint8_t *src, int stride){
+void pred8x8_horizontal_c(uint8_t *src, int stride){
int i;
for(i=0; i<8; i++){
@@ -2510,7 +2469,7 @@ static void pred8x8_horizontal_c(uint8_t
}
}
-static void pred8x8_128_dc_c(uint8_t *src, int stride){
+void pred8x8_128_dc_c(uint8_t *src, int stride){
int i;
for(i=0; i<8; i++){
@@ -2564,7 +2523,7 @@ static void pred8x8_top_dc_c(uint8_t *sr
}
-static void pred8x8_dc_c(uint8_t *src, int stride){
+void pred8x8_dc_c(uint8_t *src, int stride){
int i;
int dc0, dc1, dc2, dc3;
@@ -2589,7 +2548,7 @@ static void pred8x8_dc_c(uint8_t *src, i
}
}
-static void pred8x8_plane_c(uint8_t *src, int stride){
+void pred8x8_plane_c(uint8_t *src, int stride){
int j, k;
int a;
uint8_t *cm = cropTbl + MAX_NEG_CROP;
diff --git a/libavcodec/h264cavlc.c b/libavcodec/h264cavlc.c
new file mode 100644
index 0000000..5d8cf96
--- /dev/null
+++ b/libavcodec/h264cavlc.c
@@ -0,0 +1,311 @@
+/*
+ * H.264 encoder
+ * Copyright (c) 2006 Expertisecentrum Digitale Media, UHasselt
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "common.h"
+#include "dsputil.h"
+#include "avcodec.h"
+#include "bitstream.h"
+#include "mpegvideo.h"
+#include "h264data.h"
+#include "h264encdata.h"
+
+//#define DEBUG_H264CAVLC
+
+static int length_table[7][4095];
+static int code_table[7][4095];
+
+void h264cavlc_generate_tables()
+{
+ int vlcnum, level;
+ for (vlcnum=0; vlcnum<7; vlcnum++)
+ {
+ for(level=-2047; level<2048; level++)
+ {
+ int sign = level < 0;
+ int levabs = ABS(level);
+ int len, inf;
+
+ if (vlcnum == 0)
+ {
+
+ if (levabs < 8)
+ {
+ len = levabs * 2 + sign - 1;
+ inf = 1;
+ }
+ else if (levabs < 8+8)
+ {
+ len = 14 + 1 + 4;
+ inf = (1 << 4) | ((levabs - 8) << 1) | sign;
+ }
+ else
+ {
+ len = 14 + 2 + 12;
+ inf = (0x1 << 12) | ((levabs - 16)<< 1) | sign;
+ }
+ length_table[vlcnum][level+2047] = len;
+ code_table[vlcnum][level+2047] = inf;
+ }
+ else
+ {
+ int iCodeword;
+ int iLength;
+ int shift = vlcnum-1;
+ int escape = (15<<shift)+1;
+ int numPrefix;
+ int sufmask = ~((0xffffffff)<<shift);
+ int suffix;
+
+ numPrefix = (levabs-1)>>shift;
+ suffix = (levabs-1)&sufmask;
+
+#ifdef DEBUG_H264CAVLC
+ printf("numPrefix %d | suffix %d | levabs %d | escape %d | sufmask %d | vlcnum %d | level %d | sign %d\n",
+ numPrefix,suffix,levabs,escape,sufmask,vlcnum,level,sign);
+#endif // DEBUG_H264CAVLC
+ if (levabs < escape)
+ {
+ iLength = numPrefix + vlcnum + 1;
+ iCodeword = (1<<(shift+1))|(suffix<<1)|sign;
+ }
+ else
+ {
+ iLength = 28;
+ iCodeword = (1<<12)|((levabs-escape)<<1)|sign;
+ }
+ len = iLength;
+ inf = iCodeword;
+
+#ifdef DEBUG_H264CAVLC
+ printf("len %d | code %d\n",len,inf);
+#endif // DEBUG_H264CAVLC
+
+ length_table[vlcnum][level+2047] = len;
+ code_table[vlcnum][level+2047] = inf;
+ }
+ }
+ }
+}
+
+static inline void h264cavlc_encode_vlc_level(PutBitContext *b, int vlcnum, int16_t level)
+{
+ int16_t index;
+ index = level+2047;
+ put_bits(b,length_table[vlcnum][index],code_table[vlcnum][index]);
+#ifdef DEBUG_H264CAVLC
+// av_log(NULL, AV_LOG_DEBUG, "Encoded level with number %d\n",code_table[vlcnum][index]);
+#endif
+}
+
+static inline void h264cavlc_encode_vlc_totalzeros(PutBitContext *b, int vlcnum, int total_zeros)
+{
+ put_bits(b,total_zeros_len[vlcnum][total_zeros],total_zeros_bits[vlcnum][total_zeros]);
+}
+
+static inline void h264cavlc_encode_vlc_run(PutBitContext *b, int vlcnum, int runbefore)
+{
+ put_bits(b,run_len[vlcnum][runbefore],run_bits[vlcnum][runbefore]);
+}
+
+static inline void h264cavlc_encode_vlc_coefftoken(PutBitContext *b, int lookup_table, int total_coeffs, int trailing_ones)
+{
+ put_bits(b,coeff_token_len[lookup_table][trailing_ones+total_coeffs*4],coeff_token_bits[lookup_table][trailing_ones+total_coeffs*4]);
+}
+
+static inline void h264cavlc_encode_vlc_coefftoken_chromadc(PutBitContext *b, int total_coeffs, int trailing_ones)
+{
+ put_bits(b,chroma_dc_coeff_token_len[trailing_ones + total_coeffs * 4],chroma_dc_coeff_token_bits[trailing_ones + total_coeffs * 4]);
+}
+
+static inline void h264cavlc_encode_vlc_totalzeros_chromadc(PutBitContext *b, int vlcnum, int value)
+{
+ if(vlcnum + value == 3) put_bits(b, value , 0);
+ else put_bits(b, value+1, 1);
+}
+
+static inline int h264cavlc_get_lookup_table(int na, int nb)
+{
+ int nc = 0;
+ int8_t lookup_table[8] = {0, 0, 1, 1, 2, 2, 2, 2};
+
+ if (na >= 0 && nb >= 0)
+ {
+ nc = na+nb+1;
+ nc >>= 1;
+ }
+ else
+ {
+ if (na >= 0) // nB < 0
+ nc = na;
+ else if (nb >= 0) // nA < 0
+ nc = nb;
+ }
+
+ return (nc < 8) ? lookup_table[nc] : 3;
+}
+
+int h264cavlc_encode(PutBitContext *b, int16_t *coefficients, int len, int na, int nb, int is_chroma_dc)
+{
+ static const int8_t increment_vlcnum[6] = { 0, 3, 6, 12, 24, 48 };
+
+ int i, t;
+ int total_coeffs;
+ int trailing_ones;
+ int total_zeros;
+ int numlevels;
+ int16_t levels[256];
+ int16_t zeros[256];
+
+#ifdef DEBUG_H264CAVLC
+ for (i = 0 ; i < len ; i++)
+ av_log(NULL, AV_LOG_DEBUG, "%6d",coefficients[i]);
+ av_log(NULL, AV_LOG_DEBUG, "\n");
+#endif
+
+ // Count traling ones, total non-zero coefficients and the number of non-trailing zeros
+
+ total_coeffs = 0;
+ trailing_ones = 0;
+ total_zeros = 0; // For now, we'll count the number of zeros at the end
+ for (i = 0 ; i < len ; i++)
+ {
+ int16_t val = coefficients[i];
+ if (val != 0)
+ {
+ levels[total_coeffs] = val;
+ zeros[total_coeffs] = total_zeros;
+ if (val == -1 || val == +1)
+ trailing_ones++;
+ else
+ trailing_ones = 0;
+ total_coeffs++;
+ total_zeros = 0;
+ }
+ else
+ total_zeros++;
+ }
+ if (trailing_ones > 3)
+ trailing_ones = 3;
+
+ total_zeros = len - total_zeros - total_coeffs; // The actual value of zeros (except the zeros at the end)
+ numlevels = total_coeffs - trailing_ones;
+
+ // Encode coeff_token. This is different for Chroma DC values
+
+ if (!is_chroma_dc)
+ {
+ int lookupTable = h264cavlc_get_lookup_table(na,nb);
+#ifdef DEBUG_H264CAVLC
+// av_log(NULL, AV_LOG_DEBUG, "Luma: vlc=%d #c=%d #t1=%d\n", lookupTable, total_coeffs, trailing_ones);
+#endif
+ h264cavlc_encode_vlc_coefftoken(b,lookupTable,total_coeffs,trailing_ones);
+ }
+ else
+ {
+#ifdef DEBUG_H264CAVLC
+// av_log(NULL, AV_LOG_DEBUG, "Chroma: #c=%d #t1=%d\n", total_coeffs, trailing_ones);
+#endif
+ h264cavlc_encode_vlc_coefftoken_chromadc(b,total_coeffs,trailing_ones);
+ }
+ if (total_coeffs == 0) // Only zeros here, nothing left to do
+ return 0;
+
+ // Encode the trailing one sign bits
+
+ for (i = total_coeffs-1, t = trailing_ones ; t > 0 ; i--, t--)
+ {
+ put_bits(b,1, levels[i] <= 0);
+ }
+
+ // Encode levels of the remaining nonzero coefficients
+
+ if (numlevels > 0)
+ {
+ int level_two_or_higher = 1;
+ int firstlevel = 1;
+ int vlcnum;
+
+ if (total_coeffs > 3 && trailing_ones == 3)
+ level_two_or_higher = 0;
+
+ vlcnum = total_coeffs > 10 && trailing_ones < 3;
+
+ for (i = numlevels-1 ; i >= 0 ; i--)
+ {
+ int16_t val = levels[i];
+ int16_t level = ABS(val);
+
+ if (level_two_or_higher)
+ {
+ val -= (val>>31)|1;
+ level_two_or_higher = 0;
+ }
+
+#ifdef DEBUG_H264CAVLC
+// av_log(NULL, AV_LOG_DEBUG, "Encoding level %d with vlc %d\n",val,vlcnum);
+#endif
+ h264cavlc_encode_vlc_level(b,vlcnum,val);
+
+ // update VLC table
+ if (vlcnum < 6 && level > increment_vlcnum[vlcnum])
+ vlcnum++;
+
+ if (firstlevel)
+ {
+ firstlevel = 0;
+ if (level > 3)
+ vlcnum = 2;
+ }
+ }
+ }
+
+ // If necessary, encode the amount of non-trailing zeros
+
+ if (total_coeffs < len)
+ {
+ int vlcnum = total_coeffs-1;
+
+#ifdef DEBUG_H264CAVLC
+// av_log(NULL, AV_LOG_DEBUG, "Encoding total_zeros %d with vlc %d\n",total_zeros,vlcnum);
+#endif
+
+ if (!is_chroma_dc)
+ h264cavlc_encode_vlc_totalzeros(b,vlcnum,total_zeros);
+ else
+ h264cavlc_encode_vlc_totalzeros_chromadc(b,vlcnum,total_zeros);
+ }
+
+ // If necessary, encode the run_before values
+
+ for (i = total_coeffs-1 ; i > 0 && total_zeros > 0 ; i--)
+ {
+ int runbefore = zeros[i];
+ int vlcnum = FFMIN(total_zeros-1, 6);
+
+#ifdef DEBUG_H264CAVLC
+// av_log(NULL, AV_LOG_DEBUG, "Encoding run %d with vlc %d\n",runbefore,vlcnum);
+#endif
+
+ h264cavlc_encode_vlc_run(b,vlcnum,runbefore);
+ total_zeros -= runbefore;
+ }
+
+ return total_coeffs;
+}
+
diff --git a/libavcodec/h264data.h b/libavcodec/h264data.h
index 1dd9daf..6ee2204 100644
--- a/libavcodec/h264data.h
+++ b/libavcodec/h264data.h
@@ -51,6 +51,24 @@ #define DC_128_PRED8x8 6
#define EXTENDED_SAR 255
+/* NAL unit types */
+enum {
+NAL_SLICE=1,
+NAL_DPA,
+NAL_DPB,
+NAL_DPC,
+NAL_IDR_SLICE,
+NAL_SEI,
+NAL_SPS,
+NAL_PPS,
+NAL_AUD,
+NAL_END_SEQUENCE,
+NAL_END_STREAM,
+NAL_FILLER_DATA,
+NAL_SPS_EXT,
+NAL_AUXILIARY_SLICE=19
+};
+
static const AVRational pixel_aspect[14]={
{0, 1},
{1, 1},
@@ -486,15 +504,6 @@ static const PMbInfo b_sub_mb_type_info[
{MB_TYPE_8x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 4, },
};
-
-static const uint8_t rem6[52]={
-0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
-};
-
-static const uint8_t div6[52]={
-0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
-};
-
static const uint8_t default_scaling4[2][16]={
{ 6,13,20,28,
13,20,28,32,
diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c
new file mode 100644
index 0000000..ba2292f
--- /dev/null
+++ b/libavcodec/h264dsp.c
@@ -0,0 +1,260 @@
+/*
+ * H.264/MPEG-4 Part 10 (Base profile) encoder.
+ *
+ * DSP functions
+ *
+ * Copyright (c) 2006 Expertisecentrum Digitale Media, UHasselt
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file h264dsp.c
+ * H.264 encoder related DSP utils
+ *
+ */
+
+
+#include <stdio.h>
+#include "dsputil.h"
+
+extern const int16_t ff_h264_MF00[6];
+extern const int16_t ff_h264_V00[6];
+extern const uint8_t div6[52];
+extern const uint8_t rem6[52];
+
+#define COPY_SIGN(A, B) ((A ^ (B>>31)) - (B>>31))
+
+#define FF_H264_TRANSFORM_DCT_QUANT_C_ELEMENT(X, Y) \
+ block[X][Y] = COPY_SIGN(((ABS((int32_t)outblock[X][Y])*MF[mod][X][Y]+f) >> qbits), outblock[X][Y])
+
+#define FF_H264_TRANSFORM_DCT_QUANT_C_LINE(X) \
+ FF_H264_TRANSFORM_DCT_QUANT_C_ELEMENT(X,0); \
+ FF_H264_TRANSFORM_DCT_QUANT_C_ELEMENT(X,1); \
+ FF_H264_TRANSFORM_DCT_QUANT_C_ELEMENT(X,2); \
+ FF_H264_TRANSFORM_DCT_QUANT_C_ELEMENT(X,3);
+
+// we'll always work with transposed input blocks, to avoid having to make a distinction between
+// C and mmx implementations
+void ff_h264_transform_dct_quant_c(int16_t block[4][4], int QP, int dontscaleDC) // y,x indexing
+{
+ static const int16_t MF[6][4][4] =
+ {
+ { { 13107, 8066, 13107, 8066}, { 8066, 5243, 8066, 5243}, { 13107, 8066, 13107, 8066}, { 8066, 5243, 8066, 5243} },
+ { { 11916, 7490, 11916, 7490}, { 7490, 4660, 7490, 4660}, { 11916, 7490, 11916, 7490}, { 7490, 4660, 7490, 4660} },
+ { { 10082, 6554, 10082, 6554}, { 6554, 4194, 6554, 4194}, { 10082, 6554, 10082, 6554}, { 6554, 4194, 6554, 4194} },
+ { { 9362, 5825, 9362, 5825}, { 5825, 3647, 5825, 3647}, { 9362, 5825, 9362, 5825}, { 5825, 3647, 5825, 3647} },
+ { { 8192, 5243, 8192, 5243}, { 5243, 3355, 5243, 3355}, { 8192, 5243, 8192, 5243}, { 5243, 3355, 5243, 3355} },
+ { { 7282, 4559, 7282, 4559}, { 4559, 2893, 4559, 2893}, { 7282, 4559, 7282, 4559}, { 4559, 2893, 4559, 2893} }
+ };
+ int32_t qbits = 15 + div6[QP];
+ int32_t f = (1<<qbits)/3;
+ int mod = rem6[QP];
+ DCTELEM outblock[4][4];
+
+ ff_h264_dct_c(block, outblock);
+
+ if (dontscaleDC)
+ block[0][0] = outblock[0][0];
+ else
+ FF_H264_TRANSFORM_DCT_QUANT_C_ELEMENT(0,0);
+ FF_H264_TRANSFORM_DCT_QUANT_C_ELEMENT(0,1);
+ FF_H264_TRANSFORM_DCT_QUANT_C_ELEMENT(0,2);
+ FF_H264_TRANSFORM_DCT_QUANT_C_ELEMENT(0,3);
+
+ FF_H264_TRANSFORM_DCT_QUANT_C_LINE(1);
+ FF_H264_TRANSFORM_DCT_QUANT_C_LINE(2);
+ FF_H264_TRANSFORM_DCT_QUANT_C_LINE(3);
+}
+
+#define H264_TRANSFORM_INVERSE_QUANT_DCT_ADD_C_ELEMENT(X, Y) \
+ elem[X][Y] = ((int32_t)block[X][Y]*V[mod][X][Y]) << shift;
+
+#define H264_TRANSFORM_INVERSE_QUANT_DCT_ADD_C_LINE(X) \
+ H264_TRANSFORM_INVERSE_QUANT_DCT_ADD_C_ELEMENT(X, 0) \
+ H264_TRANSFORM_INVERSE_QUANT_DCT_ADD_C_ELEMENT(X, 1) \
+ H264_TRANSFORM_INVERSE_QUANT_DCT_ADD_C_ELEMENT(X, 2) \
+ H264_TRANSFORM_INVERSE_QUANT_DCT_ADD_C_ELEMENT(X, 3)
+
+#define H264_TRANSFORM_INVERSE_QUANT_DCT_ADD_C_ELEMENT2(X, Y) \
+ elem[X][Y] = ((int32_t)block[X][Y]*V[mod][X][Y]+add) >> shift;
+
+#define H264_TRANSFORM_INVERSE_QUANT_DCT_ADD_C_LINE2(X) \
+ H264_TRANSFORM_INVERSE_QUANT_DCT_ADD_C_ELEMENT2(X, 0) \
+ H264_TRANSFORM_INVERSE_QUANT_DCT_ADD_C_ELEMENT2(X, 1) \
+ H264_TRANSFORM_INVERSE_QUANT_DCT_ADD_C_ELEMENT2(X, 2) \
+ H264_TRANSFORM_INVERSE_QUANT_DCT_ADD_C_ELEMENT2(X, 3)
+
+void ff_h264_transform_inverse_quant_dct_add_c(int16_t block[4][4], int QP, int dontscaleDC, uint8_t *dst, int stride) // y,x indexing
+{
+ static const int16_t V[6][4][4] =
+ {
+ { { 10*16, 13*16, 10*16, 13*16}, { 13*16, 16*16, 13*16, 16*16}, { 10*16, 13*16, 10*16, 13*16}, { 13*16, 16*16, 13*16, 16*16} },
+ { { 11*16, 14*16, 11*16, 14*16}, { 14*16, 18*16, 14*16, 18*16}, { 11*16, 14*16, 11*16, 14*16}, { 14*16, 18*16, 14*16, 18*16} },
+ { { 13*16, 16*16, 13*16, 16*16}, { 16*16, 20*16, 16*16, 20*16}, { 13*16, 16*16, 13*16, 16*16}, { 16*16, 20*16, 16*16, 20*16} },
+ { { 14*16, 18*16, 14*16, 18*16}, { 18*16, 23*16, 18*16, 23*16}, { 14*16, 18*16, 14*16, 18*16}, { 18*16, 23*16, 18*16, 23*16} },
+ { { 16*16, 20*16, 16*16, 20*16}, { 20*16, 25*16, 20*16, 25*16}, { 16*16, 20*16, 16*16, 20*16}, { 20*16, 25*16, 20*16, 25*16} },
+ { { 18*16, 23*16, 18*16, 23*16}, { 23*16, 29*16, 23*16, 29*16}, { 18*16, 23*16, 18*16, 23*16}, { 23*16, 29*16, 23*16, 29*16} }
+ };
+ DCTELEM elem[4][4];
+ int mod = rem6[QP];
+
+ if (QP >= 24)
+ {
+ int shift = div6[QP]-4;
+
+ if (dontscaleDC)
+ elem[0][0] = block[0][0];
+ else
+ H264_TRANSFORM_INVERSE_QUANT_DCT_ADD_C_ELEMENT(0, 0);
+ H264_TRANSFORM_INVERSE_QUANT_DCT_ADD_C_ELEMENT(0, 1);
+ H264_TRANSFORM_INVERSE_QUANT_DCT_ADD_C_ELEMENT(0, 2);
+ H264_TRANSFORM_INVERSE_QUANT_DCT_ADD_C_ELEMENT(0, 3);
+
+ H264_TRANSFORM_INVERSE_QUANT_DCT_ADD_C_LINE(1);
+ H264_TRANSFORM_INVERSE_QUANT_DCT_ADD_C_LINE(2);
+ H264_TRANSFORM_INVERSE_QUANT_DCT_ADD_C_LINE(3);
+ }
+ else
+ {
+ int add = (1<<(3-div6[QP]));
+ int shift = (4-div6[QP]);
+ if (dontscaleDC)
+ elem[0][0] = block[0][0];
+ else
+ H264_TRANSFORM_INVERSE_QUANT_DCT_ADD_C_ELEMENT2(0, 0);
+ H264_TRANSFORM_INVERSE_QUANT_DCT_ADD_C_ELEMENT2(0, 1);
+ H264_TRANSFORM_INVERSE_QUANT_DCT_ADD_C_ELEMENT2(0, 2);
+ H264_TRANSFORM_INVERSE_QUANT_DCT_ADD_C_ELEMENT2(0, 3);
+ H264_TRANSFORM_INVERSE_QUANT_DCT_ADD_C_LINE2(1);
+ H264_TRANSFORM_INVERSE_QUANT_DCT_ADD_C_LINE2(2);
+ H264_TRANSFORM_INVERSE_QUANT_DCT_ADD_C_LINE2(3);
+ if (dontscaleDC)
+ elem[0][0] = block[0][0];
+ }
+
+ ff_h264_idct_add_c(dst,&(elem[0][0]),stride);
+}
+
+#define FF_H264_HADAMARD_QUANT_4X4_C_ELEMENT(A, B) \
+ Y[A][B] = COPY_SIGN((((ABS(Y[A][B])>>1) * MF + f2) >> shift), Y[A][B]);
+
+#define FF_H264_HADAMARD_QUANT_4X4_C_LINE(X) \
+ FF_H264_HADAMARD_QUANT_4X4_C_ELEMENT(X, 0); \
+ FF_H264_HADAMARD_QUANT_4X4_C_ELEMENT(X, 1); \
+ FF_H264_HADAMARD_QUANT_4X4_C_ELEMENT(X, 2); \
+ FF_H264_HADAMARD_QUANT_4X4_C_ELEMENT(X, 3);
+
+/**
+ * |ZD(i,j)| = (|YD(i,j)| MF(0,0) + 2 f) >> (qbits + 1)
+ *
+ */
+void ff_h264_hadamard_quant_4x4_c(DCTELEM Y[4][4], int QP)
+{
+ int qbits = 15 + div6[QP];
+ int f2 = (1 << qbits) * (2/3);
+ int shift = (qbits + 1);
+ int mod = rem6[QP];
+
+ int32_t MF = ff_h264_MF00[mod];
+
+ FF_H264_HADAMARD_QUANT_4X4_C_LINE(0);
+ FF_H264_HADAMARD_QUANT_4X4_C_LINE(1);
+ FF_H264_HADAMARD_QUANT_4X4_C_LINE(2);
+ FF_H264_HADAMARD_QUANT_4X4_C_LINE(3);
+}
+
+#define H264_HADAMARD_INVQUANT_4X4_C_LOWQP_ELEMENT(A, B) \
+ Y[A][B] = (Y[A][B]*V + f) >> shift;
+
+#define H264_HADAMARD_INVQUANT_4X4_C_LOWQP_LINE(A) \
+ H264_HADAMARD_INVQUANT_4X4_C_LOWQP_ELEMENT(A, 0) \
+ H264_HADAMARD_INVQUANT_4X4_C_LOWQP_ELEMENT(A, 1) \
+ H264_HADAMARD_INVQUANT_4X4_C_LOWQP_ELEMENT(A, 2) \
+ H264_HADAMARD_INVQUANT_4X4_C_LOWQP_ELEMENT(A, 3)
+
+#define H264_HADAMARD_INVQUANT_4X4_C_HIGHQP_ELEMENT(A,B) \
+ Y[A][B] = (Y[A][B]*V) << shift ;
+
+#define H264_HADAMARD_INVQUANT_4X4_C_HIGHQP_LINE(A) \
+ H264_HADAMARD_INVQUANT_4X4_C_HIGHQP_ELEMENT(A, 0) \
+ H264_HADAMARD_INVQUANT_4X4_C_HIGHQP_ELEMENT(A, 1) \
+ H264_HADAMARD_INVQUANT_4X4_C_HIGHQP_ELEMENT(A, 2) \
+ H264_HADAMARD_INVQUANT_4X4_C_HIGHQP_ELEMENT(A, 3)
+
+/*
+ * Only if qpprime_y_zero_transform_bypass_flag == 0
+ */
+void ff_h264_hadamard_invquant_4x4_c(DCTELEM Y[4][4], int QP)
+{
+ int mod = rem6[QP];
+
+ if (QP < 36)
+ {
+ int qbits = div6[QP];
+ int shift = 6-qbits;
+ int f = (1 << (5-qbits));
+
+ int32_t V = ff_h264_V00[mod];
+
+ H264_HADAMARD_INVQUANT_4X4_C_LOWQP_LINE(0);
+ H264_HADAMARD_INVQUANT_4X4_C_LOWQP_LINE(1);
+ H264_HADAMARD_INVQUANT_4X4_C_LOWQP_LINE(2);
+ H264_HADAMARD_INVQUANT_4X4_C_LOWQP_LINE(3);
+ }
+ else
+ {
+ int shift = div6[QP] - 6;
+ int32_t V = ff_h264_V00[mod];
+
+ H264_HADAMARD_INVQUANT_4X4_C_HIGHQP_LINE(0);
+ H264_HADAMARD_INVQUANT_4X4_C_HIGHQP_LINE(1);
+ H264_HADAMARD_INVQUANT_4X4_C_HIGHQP_LINE(2);
+ H264_HADAMARD_INVQUANT_4X4_C_HIGHQP_LINE(3);
+ }
+}
+
+#define FF_H264_HADAMARD_QUANT_2X2_C_ELEMENT(A, B) \
+ Y[A][B] = COPY_SIGN(((ABS(Y[A][B])*MF + f2) >> shift),Y[A][B])
+
+/**
+ * |ZD(i,j)| = (|YD(i,j)| MF(0,0) + 2 f) >> (qbits + 1)
+ *
+ */
+void ff_h264_hadamard_quant_2x2_c(int16_t Y[2][2], int QP)
+{
+ int qbits = 15 + div6[QP];
+ int f2 = ((1 << qbits) / 3)*2;
+ int shift = qbits+1;
+ int32_t MF = ff_h264_MF00[rem6[QP]];
+
+ FF_H264_HADAMARD_QUANT_2X2_C_ELEMENT(0, 0);
+ FF_H264_HADAMARD_QUANT_2X2_C_ELEMENT(0, 1);
+ FF_H264_HADAMARD_QUANT_2X2_C_ELEMENT(1, 0);
+ FF_H264_HADAMARD_QUANT_2X2_C_ELEMENT(1, 1);
+}
+
+void ff_h264dsp_init(DSPContext* c, AVCodecContext *avctx)
+{
+ c->h264_dct = ff_h264_dct_c;
+ c->h264_idct_notranspose_add = ff_h264_idct_add_c;
+ c->h264_hadamard_mult4x4 = ff_h264_hadamard_mult4x4_c;
+ c->h264_hadamard_quant_2x2 = ff_h264_hadamard_quant_2x2_c;
+ c->h264_hadamard_quant_4x4 = ff_h264_hadamard_quant_4x4_c;
+ c->h264_hadamard_invquant_4x4 = ff_h264_hadamard_invquant_4x4_c;
+ c->h264_transform_dct_quant = ff_h264_transform_dct_quant_c;
+ c->h264_transform_inverse_quant_dct_add = ff_h264_transform_inverse_quant_dct_add_c;
+}
+
diff --git a/libavcodec/h264enc.c b/libavcodec/h264enc.c
new file mode 100644
index 0000000..47499d9
--- /dev/null
+++ b/libavcodec/h264enc.c
@@ -0,0 +1,2472 @@
+/*
+ * H.264 encoder
+ * Copyright (c) 2006 Expertisecentrum Digitale Media, UHasselt
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+#include "common.h"
+#include "avcodec.h"
+#include "bitstream.h"
+#include "golomb.h"
+#include "mpegvideo.h"
+#include "h264data.h"
+#include "dsputil.h"
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "h264enc.h"
+
+
+#define DEFAULT_QP 30
+#define NUMBER_OF_FRAMES 2
+#define RATECONTROLINTERVAL 0.5
+#define CHROMA_QP_INDEX_OFFSET_MAX 12
+#define CHROMA_QP_INDEX_OFFSET_MIN -12
+
+#define H264_COPY_4X4BLOCK_TRANSPOSED_PART(A, xoffset, yoffset, dest, src1, src2) \
+ dest[0][A] = src1[yoffset+A][xoffset+0]-src2[yoffset+A][xoffset+0]; \
+ dest[1][A] = src1[yoffset+A][xoffset+1]-src2[yoffset+A][xoffset+1]; \
+ dest[2][A] = src1[yoffset+A][xoffset+2]-src2[yoffset+A][xoffset+2]; \
+ dest[3][A] = src1[yoffset+A][xoffset+3]-src2[yoffset+A][xoffset+3];
+
+#define H264_COPY_4X4BLOCK_TRANSPOSED(xoffset,yoffset,dest,src1,src2) \
+{ \
+ H264_COPY_4X4BLOCK_TRANSPOSED_PART(0, xoffset, yoffset, dest, src1, src2); \
+ H264_COPY_4X4BLOCK_TRANSPOSED_PART(1, xoffset, yoffset, dest, src1, src2); \
+ H264_COPY_4X4BLOCK_TRANSPOSED_PART(2, xoffset, yoffset, dest, src1, src2); \
+ H264_COPY_4X4BLOCK_TRANSPOSED_PART(3, xoffset, yoffset, dest, src1, src2); \
+}
+
+#define H264_COPY_16X16BLOCK(dest,src1,src2) \
+{ \
+ H264_COPY_4X4BLOCK_TRANSPOSED(0,0,dest[0][0],src1,src2); \
+ H264_COPY_4X4BLOCK_TRANSPOSED(4,0,dest[0][1],src1,src2); \
+ H264_COPY_4X4BLOCK_TRANSPOSED(8,0,dest[0][2],src1,src2); \
+ H264_COPY_4X4BLOCK_TRANSPOSED(12,0,dest[0][3],src1,src2); \
+ H264_COPY_4X4BLOCK_TRANSPOSED(0,4,dest[1][0],src1,src2); \
+ H264_COPY_4X4BLOCK_TRANSPOSED(4,4,dest[1][1],src1,src2); \
+ H264_COPY_4X4BLOCK_TRANSPOSED(8,4,dest[1][2],src1,src2); \
+ H264_COPY_4X4BLOCK_TRANSPOSED(12,4,dest[1][3],src1,src2); \
+ H264_COPY_4X4BLOCK_TRANSPOSED(0,8,dest[2][0],src1,src2); \
+ H264_COPY_4X4BLOCK_TRANSPOSED(4,8,dest[2][1],src1,src2); \
+ H264_COPY_4X4BLOCK_TRANSPOSED(8,8,dest[2][2],src1,src2); \
+ H264_COPY_4X4BLOCK_TRANSPOSED(12,8,dest[2][3],src1,src2); \
+ H264_COPY_4X4BLOCK_TRANSPOSED(0,12,dest[3][0],src1,src2); \
+ H264_COPY_4X4BLOCK_TRANSPOSED(4,12,dest[3][1],src1,src2); \
+ H264_COPY_4X4BLOCK_TRANSPOSED(8,12,dest[3][2],src1,src2); \
+ H264_COPY_4X4BLOCK_TRANSPOSED(12,12,dest[3][3],src1,src2); \
+}
+
+#define H264_COPY_8X8BLOCK(dest,src1,src2) \
+{ \
+ H264_COPY_4X4BLOCK_TRANSPOSED(0,0,dest[0][0],src1,src2); \
+ H264_COPY_4X4BLOCK_TRANSPOSED(4,0,dest[0][1],src1,src2); \
+ H264_COPY_4X4BLOCK_TRANSPOSED(0,4,dest[1][0],src1,src2); \
+ H264_COPY_4X4BLOCK_TRANSPOSED(4,4,dest[1][1],src1,src2); \
+}
+
+int h264cavlc_encode(PutBitContext *b, int16_t *coefficients, int len, int nA, int nB, int isChromaDC);
+void h264cavlc_generate_tables();
+void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride);
+
+void pred16x16_vertical_c(uint8_t *src, int stride);
+void pred8x8_vertical_c(uint8_t *src, int stride);
+void pred16x16_horizontal_c(uint8_t *src, int stride);
+void pred8x8_horizontal_c(uint8_t *src, int stride);
+void pred16x16_plane_c(uint8_t *src, int stride);
+void pred8x8_plane_c(uint8_t *src, int stride);
+void pred16x16_128_dc_c(uint8_t *src, int stride);
+void pred8x8_128_dc_c(uint8_t *src, int stride);
+void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size);
+
+static int8_t mbtype_map[4][3][2];
+
+/**
+ * For a specific picture, this function sets the correct Y,U and V start addresses for each macroblock
+ */
+static void ff_h264_assign_macroblocks(AVPicture *p, MacroBlock **mb_map, int mb_width, int mb_height, int setneighbours)
+{
+ int y,x,i;
+ int Ylinesize = p->linesize[0];
+ int Ulinesize = p->linesize[1];
+ int Vlinesize = p->linesize[2];
+
+ if (!setneighbours)
+ {
+ for (y = 0 ; y < mb_height ; y++)
+ {
+ int y16 = y << 4;
+ int y8 = y << 3;
+
+ for (x = 0 ; x < mb_width ; x++)
+ {
+ int x16 = x << 4;
+ int x8 = x << 3;
+
+ for (i = 0 ; i < 8 ; i++)
+ {
+ int ypos = y8+i;
+ mb_map[y][x].U[i] = p->data[1]+(x8+ypos*Ulinesize);
+ mb_map[y][x].V[i] = p->data[2]+(x8+ypos*Vlinesize);
+ }
+ for (i = 0 ; i < 16 ; i++)
+ mb_map[y][x].Y[i] = p->data[0]+(x16+(y16+i)*Ylinesize);
+
+ mb_map[y][x].topblock = NULL;
+ mb_map[y][x].leftblock = NULL;
+ mb_map[y][x].rightblock = NULL;
+ mb_map[y][x].available = 0;
+ }
+ }
+ }
+ else
+ {
+ y = 0;
+ x = 0;
+ for (i = 0 ; i < 8 ; i++)
+ {
+ mb_map[y][x].U[i] = p->data[1]+((x<<3)+((y<<3)+i)*Ulinesize);
+ mb_map[y][x].V[i] = p->data[2]+((x<<3)+((y<<3)+i)*Vlinesize);
+ }
+ for (i = 0 ; i < 16 ; i++)
+ mb_map[y][x].Y[i] = p->data[0]+((x<<4)+((y<<4)+i)*Ylinesize);
+
+ mb_map[y][x].topblock = NULL;
+ mb_map[y][x].leftblock = NULL;
+
+ if (x < mb_width-1)
+ mb_map[y][x].rightblock = &(mb_map[y][x+1]);
+ else
+ mb_map[y][x].rightblock = NULL;
+ mb_map[y][x].available = 0;
+
+ y = 0;
+ for (x = 1 ; x < mb_width ; x++)
+ {
+ for (i = 0 ; i < 8 ; i++)
+ {
+ mb_map[y][x].U[i] = p->data[1]+((x<<3)+((y<<3)+i)*Ulinesize);
+ mb_map[y][x].V[i] = p->data[2]+((x<<3)+((y<<3)+i)*Vlinesize);
+ }
+ for (i = 0 ; i < 16 ; i++)
+ mb_map[y][x].Y[i] = p->data[0]+((x<<4)+((y<<4)+i)*Ylinesize);
+
+ mb_map[y][x].topblock = NULL;
+ mb_map[y][x].leftblock = &(mb_map[y][x-1]);
+ if (x < mb_width-1)
+ mb_map[y][x].rightblock = &(mb_map[y][x+1]);
+ else
+ mb_map[y][x].rightblock = NULL;
+ mb_map[y][x].available = 0;
+ }
+
+ x = 0;
+ for (y = 1 ; y < mb_height ; y++)
+ {
+ for (i = 0 ; i < 8 ; i++)
+ {
+ mb_map[y][x].U[i] = p->data[1]+((x<<3)+((y<<3)+i)*Ulinesize);
+ mb_map[y][x].V[i] = p->data[2]+((x<<3)+((y<<3)+i)*Vlinesize);
+ }
+ for (i = 0 ; i < 16 ; i++)
+ mb_map[y][x].Y[i] = p->data[0]+((x<<4)+((y<<4)+i)*Ylinesize);
+
+ mb_map[y][x].topblock = &(mb_map[y-1][x]);
+ mb_map[y][x].leftblock = NULL;
+ if (x < mb_width-1)
+ mb_map[y][x].rightblock = &(mb_map[y][x+1]);
+ else
+ mb_map[y][x].rightblock = NULL;
+ mb_map[y][x].available = 0;
+ }
+
+ for (y = 1 ; y < mb_height ; y++)
+ {
+ for (x = 1 ; x < mb_width ; x++)
+ {
+ for (i = 0 ; i < 8 ; i++)
+ {
+ mb_map[y][x].U[i] = p->data[1]+((x<<3)+((y<<3)+i)*Ulinesize);
+ mb_map[y][x].V[i] = p->data[2]+((x<<3)+((y<<3)+i)*Vlinesize);
+ }
+ for (i = 0 ; i < 16 ; i++)
+ mb_map[y][x].Y[i] = p->data[0]+((x<<4)+((y<<4)+i)*Ylinesize);
+
+ mb_map[y][x].topblock = &(mb_map[y-1][x]);
+ mb_map[y][x].leftblock = &(mb_map[y][x-1]);
+ if (x < mb_width-1)
+ mb_map[y][x].rightblock = &(mb_map[y][x+1]);
+ else
+ mb_map[y][x].rightblock = NULL;
+ mb_map[y][x].available = 0;
+ }
+ }
+ }
+}
+
+static void ff_h264_clear_nonzero_markers(MacroBlock **mb_map, int mb_width, int mb_height)
+{
+ int x,y;
+
+ for (y = 0 ; y < mb_height ; y++)
+ {
+ for (x = 0 ; x < mb_width ; x++)
+ {
+ // mark as not available
+
+ memset(&(mb_map[y][x].Y_nonzero[0][0]),0xff,sizeof(int)*16); // set to -1
+ memset(&(mb_map[y][x].U_nonzero[0][0]),0xff,sizeof(int)*4); // set to -1
+ memset(&(mb_map[y][x].V_nonzero[0][0]),0xff,sizeof(int)*4); // set to -1
+
+ mb_map[y][x].available = 0;
+ }
+ }
+}
+
+static void ff_h264_init_tables()
+{
+ int a, b, c;
+ for(a=0; a<4; a++)
+ for(b=0; b<3; b++)
+ for(c=0; c<2; c++)
+ mbtype_map[a][b][c] = 1 + a + 4*(b + 3*c);
+}
+
+static int ff_h264_encoder_init(AVCodecContext *avctx)
+{
+ H264Context *t = (H264Context *)avctx->priv_data;
+ uint8_t *buf;
+ int s, x, y, i, res;
+ int width, height;
+
+ switch(avctx->pix_fmt){
+ case PIX_FMT_YUV420P:
+ break;
+ default:
+ av_log(avctx, AV_LOG_ERROR, "format not supported\n");
+ return -1;
+ }
+
+ t->frame_cropping_flag = 0;
+ t->frame_crop_left_offset = 0;
+ t->frame_crop_right_offset = 0;
+ t->frame_crop_top_offset = 0;
+ t->frame_crop_bottom_offset = 0;
+
+ width = avctx->width;
+ height = avctx->height;
+
+ t->mb_width = width/16;
+ t->mb_height = height/16;
+ t->frame_width = width;
+ t->frame_height = height;
+
+ /* If the width is not a multiple of 16, enabling cropping */
+ if (( width % 16) !=0 )
+ {
+ t->frame_cropping_flag = 1;
+ t->frame_crop_left_offset = 0;
+ t->frame_crop_right_offset = (width%16)/2;
+ t->mb_width++;
+ }
+
+ /* If the height is not a multiple of 16, enabling cropping */
+ if (( height % 16) !=0 )
+ {
+ t->frame_cropping_flag = 1;
+ t->frame_crop_top_offset = 0;
+ t->frame_crop_bottom_offset = (height%16)/2;
+ t->mb_height++;
+ }
+
+ /* Round the framesize upwards to a multiple of 16 */
+ width = t->mb_width * 16;
+ height = t->mb_height * 16;
+ t->refframe_width = width;
+ t->refframe_height = height;
+
+ s = avpicture_get_size(avctx->pix_fmt, width, height);
+ res = avpicture_alloc(&t->pi, avctx->pix_fmt, width, height);
+ if (res) {
+ av_log(avctx, AV_LOG_ERROR, "Problem allocating picture\n");
+ return -1;
+ }
+ res = avpicture_alloc(&t->po, avctx->pix_fmt, width, height);
+ if (res) {
+ av_log(avctx, AV_LOG_ERROR, "Problem allocating picture\n");
+ return -1;
+ }
+
+ t->pi_data0 = (uint8_t *)t->pi.data[0];
+ t->po_data0 = (uint8_t *)t->po.data[0];
+ t->bufsize = s*2;
+ t->frame_num = 0;
+
+ t->mb_map = (MacroBlock **)av_malloc(sizeof(MacroBlock*) * t->mb_height);
+ for (y = 0 ; y < t->mb_height ; y++)
+ {
+ t->mb_map[y] = (MacroBlock *)av_malloc(sizeof(MacroBlock) * t->mb_width);
+ for (x = 0 ; x < t->mb_width ; x++)
+ {
+ t->mb_map[y][x].Y_width = 16;
+ t->mb_map[y][x].Y_height = 16;
+ }
+ }
+
+ t->framebufsize = NUMBER_OF_FRAMES;
+ t->reconstructed_frames = (FrameInfo **)av_malloc(sizeof(FrameInfo *)*t->framebufsize);
+
+ for (i = 0 ; i < t->framebufsize ; i++)
+ {
+ t->reconstructed_frames[i] = (FrameInfo *)av_malloc(sizeof(FrameInfo));
+
+ buf = av_malloc(s);
+ avpicture_fill(&(t->reconstructed_frames[i]->reconstructed_picture), buf, PIX_FMT_YUV420P, width, height);
+
+ t->reconstructed_frames[i]->reconstructed_mb_map = (MacroBlock **)av_malloc(sizeof(MacroBlock*) * t->mb_height);
+ for (y = 0 ; y < t->mb_height ; y++)
+ {
+ t->reconstructed_frames[i]->reconstructed_mb_map[y] = (MacroBlock *)av_malloc(sizeof(MacroBlock) * t->mb_width);
+ for (x = 0 ; x < t->mb_width ; x++)
+ {
+ t->reconstructed_frames[i]->reconstructed_mb_map[y][x].Y_width = 16;
+ t->reconstructed_frames[i]->reconstructed_mb_map[y][x].Y_height = 16;
+ }
+ }
+ ff_h264_assign_macroblocks(&(t->reconstructed_frames[i]->reconstructed_picture),t->reconstructed_frames[i]->reconstructed_mb_map,t->mb_width,t->mb_height,1);
+ }
+
+ if (!avctx->global_quality)
+ {
+ t->QP = DEFAULT_QP;
+ t->use_fixed_qp = 0;
+ }
+ else
+ {
+ t->QP = avctx->global_quality / FF_QP2LAMBDA;
+ t->use_fixed_qp = 1;
+ }
+ t->PPS_QP = t->QP;
+
+ t->chroma_qp_index_offset = avctx->chromaoffset;
+ t->chroma_qp_index_offset = clip(t->chroma_qp_index_offset, CHROMA_QP_INDEX_OFFSET_MIN, CHROMA_QP_INDEX_OFFSET_MAX);
+ t->IDRcount = 64;
+ t->IDR_frame_num = 0;
+
+ // init dsp
+ dsputil_init(&(t->dspcontext),avctx);
+ t->Y_stride = t->reconstructed_frames[0]->reconstructed_picture.linesize[0];
+ t->U_stride = t->reconstructed_frames[0]->reconstructed_picture.linesize[1];
+ t->V_stride = t->reconstructed_frames[0]->reconstructed_picture.linesize[2];
+
+ // Create an AVPicture instance with the same dimensions as the reference pictures to hold a copy
+ // of the input frame
+ buf = (uint8_t *)av_malloc(s);
+ avpicture_fill(&(t->input_frame_copy), buf, PIX_FMT_YUV420P, width, height);
+ memset(buf,0,s);
+
+ // Assign the macroblock map to this copy of the input image
+ ff_h264_assign_macroblocks(&(t->input_frame_copy),t->mb_map,t->mb_width,t->mb_height,0);
+
+ // Blocksize history, we use a separate history for I and P frame
+ t->milliseconds_per_frame = (1000*avctx->time_base.num)/avctx->time_base.den;
+ t->blocksize_history_length = (RATECONTROLINTERVAL*avctx->time_base.den)/avctx->time_base.num;
+ t->blocksize_history = (int64_t *)av_malloc(sizeof(int64_t)*t->blocksize_history_length);
+ t->blocksize_history_pos = 0;
+ t->blocksize_history_num_filled = 0;
+ t->blocksize_history_total_milliseconds = 0;
+ t->blocksize_history_sum = 0;
+ for (i = 0 ; i < t->blocksize_history_length ; i++)
+ t->blocksize_history[i] = 0;
+
+ h264cavlc_generate_tables();
+ ff_h264_init_tables();
+ return 0;
+}
+
+static uint8_t *ff_h264_write_nal_unit(int nal_ref_idc, int nal_unit_type, uint8_t *dest, int *destsize,
+ PutBitContext *b2)
+{
+ PutBitContext b;
+ int i, destpos, rbsplen;
+ uint8_t *rbsp;
+
+ // Align b2 on a byte boundary
+
+ align_put_bits(b2);
+ rbsplen = put_bits_count(b2)/8;
+ flush_put_bits(b2);
+ rbsp = b2->buf;
+
+ init_put_bits(&b,dest,*destsize);
+
+ put_bits(&b,16,0);
+ put_bits(&b,16,0x01);
+
+ put_bits(&b,1,0); // forbidden zero bit
+ put_bits(&b,2,nal_ref_idc); // nal_ref_idc
+ put_bits(&b,5,nal_unit_type); // nal_unit_type
+
+ flush_put_bits(&b);
+
+ destpos = 5;
+
+ for (i = 0 ; i < rbsplen ; i++)
+ {
+ if (i + 2 < rbsplen && (rbsp[i] == 0 && rbsp[i+1] == 0 && rbsp[i+2] < 4))
+ {
+ dest[destpos++] = rbsp[i++];
+ dest[destpos++] = rbsp[i];
+ dest[destpos++] = 0x03; // emulation prevention byte
+ }
+ else
+ dest[destpos++] = rbsp[i];
+ }
+
+ *destsize -= destpos;
+ return dest+destpos;
+}
+
+static void ff_h264_encode_I_PCM(MacroBlock *mb, PutBitContext *b, MacroBlock *copy_mb)
+{
+ int w = mb->Y_width;
+ int h = mb->Y_height;
+ int x,y;
+
+ set_ue_golomb(b, 25); // mb_type = I_PCM
+ align_put_bits(b);
+
+ // Y
+
+ for (y = 0 ; y < h ; y++)
+ {
+ for (x = 0 ; x < w ; x++)
+ put_bits(b,8,mb->Y[y][x]);
+ for ( ; x < 16 ; x++)
+ put_bits(b,8,0);
+ }
+ for ( ; y < 16 ; y++)
+ {
+ for (x = 0 ; x < 16 ; x++)
+ put_bits(b,8,0);
+ }
+
+ // copy Y
+
+ for (y = 0 ; y < h ; y++)
+ for (x = 0 ; x < w ; x++)
+ copy_mb->Y[y][x] = mb->Y[y][x];
+
+ w >>= 1;
+ h >>= 1;
+
+ // U
+
+ for (y = 0 ; y < h ; y++)
+ {
+ for (x = 0 ; x < w ; x++)
+ put_bits(b,8,mb->U[y][x]);
+ for ( ; x < 8 ; x++)
+ put_bits(b,8,0);
+ }
+ for ( ; y < 8 ; y++)
+ {
+ for (x = 0 ; x < 8 ; x++)
+ put_bits(b,8,0);
+ }
+
+ // V
+
+ for (y = 0 ; y < h ; y++)
+ {
+ for (x = 0 ; x < w ; x++)
+ put_bits(b,8,mb->V[y][x]);
+ for ( ; x < 8 ; x++)
+ put_bits(b,8,0);
+ }
+ for ( ; y < 8 ; y++)
+ {
+ for (x = 0 ; x < 8 ; x++)
+ put_bits(b,8,0);
+ }
+
+ // copy U and V
+
+ for (y = 0 ; y < h ; y++)
+ {
+ for (x = 0 ; x < w ; x++)
+ {
+ copy_mb->U[y][x] = mb->U[y][x];
+ copy_mb->V[y][x] = mb->V[y][x];
+ }
+ }
+
+ // store the nonzero counts (set to 16 for I_PCM blocks)
+ fill_rectangle(copy_mb->Y_nonzero, 4, 4, 4, 16, sizeof(int));
+ fill_rectangle(copy_mb->U_nonzero, 2, 2, 2, 16, sizeof(int));
+ fill_rectangle(copy_mb->V_nonzero, 2, 2, 2, 16, sizeof(int));
+
+ copy_mb->available = 1;
+}
+
+// inblock is transposed, outblock isn't
+void ff_h264_dct_c(DCTELEM inblock[4][4],DCTELEM outblock[4][4])
+{
+ DCTELEM pieces[4][4];
+
+ pieces[0][0] = inblock[0][0]+inblock[1][0]+inblock[2][0]+inblock[3][0];
+ pieces[0][1] = inblock[0][1]+inblock[1][1]+inblock[2][1]+inblock[3][1];
+ pieces[0][2] = inblock[0][2]+inblock[1][2]+inblock[2][2]+inblock[3][2];
+ pieces[0][3] = inblock[0][3]+inblock[1][3]+inblock[2][3]+inblock[3][3];
+
+ pieces[1][0] = (inblock[0][0]<<1)+inblock[1][0]-inblock[2][0]-(inblock[3][0]<<1);
+ pieces[1][1] = (inblock[0][1]<<1)+inblock[1][1]-inblock[2][1]-(inblock[3][1]<<1);
+ pieces[1][2] = (inblock[0][2]<<1)+inblock[1][2]-inblock[2][2]-(inblock[3][2]<<1);
+ pieces[1][3] = (inblock[0][3]<<1)+inblock[1][3]-inblock[2][3]-(inblock[3][3]<<1);
+
+ pieces[2][0] = inblock[0][0]-inblock[1][0]-inblock[2][0]+inblock[3][0];
+ pieces[2][1] = inblock[0][1]-inblock[1][1]-inblock[2][1]+inblock[3][1];
+ pieces[2][2] = inblock[0][2]-inblock[1][2]-inblock[2][2]+inblock[3][2];
+ pieces[2][3] = inblock[0][3]-inblock[1][3]-inblock[2][3]+inblock[3][3];
+
+ pieces[3][0] = inblock[0][0]-(inblock[1][0]<<1)+(inblock[2][0]<<1)-inblock[3][0];
+ pieces[3][1] = inblock[0][1]-(inblock[1][1]<<1)+(inblock[2][1]<<1)-inblock[3][1];
+ pieces[3][2] = inblock[0][2]-(inblock[1][2]<<1)+(inblock[2][2]<<1)-inblock[3][2];
+ pieces[3][3] = inblock[0][3]-(inblock[1][3]<<1)+(inblock[2][3]<<1)-inblock[3][3];
+
+ outblock[0][0] = pieces[0][0]+pieces[0][1]+pieces[0][2]+pieces[0][3];
+ outblock[0][1] = pieces[1][0]+pieces[1][1]+pieces[1][2]+pieces[1][3];
+ outblock[0][2] = pieces[2][0]+pieces[2][1]+pieces[2][2]+pieces[2][3];
+ outblock[0][3] = pieces[3][0]+pieces[3][1]+pieces[3][2]+pieces[3][3];
+
+ outblock[1][0] = (pieces[0][0] << 1)+pieces[0][1]-pieces[0][2]-(pieces[0][3]<<1);
+ outblock[1][1] = (pieces[1][0] << 1)+pieces[1][1]-pieces[1][2]-(pieces[1][3]<<1);
+ outblock[1][2] = (pieces[2][0] << 1)+pieces[2][1]-pieces[2][2]-(pieces[2][3]<<1);
+ outblock[1][3] = (pieces[3][0] << 1)+pieces[3][1]-pieces[3][2]-(pieces[3][3]<<1);
+
+ outblock[2][0] = pieces[0][0]-pieces[0][1]-pieces[0][2]+pieces[0][3];
+ outblock[2][1] = pieces[1][0]-pieces[1][1]-pieces[1][2]+pieces[1][3];
+ outblock[2][2] = pieces[2][0]-pieces[2][1]-pieces[2][2]+pieces[2][3];
+ outblock[2][3] = pieces[3][0]-pieces[3][1]-pieces[3][2]+pieces[3][3];
+
+ outblock[3][0] = pieces[0][0]-(pieces[0][1]<<1)+(pieces[0][2]<<1)-pieces[0][3];
+ outblock[3][1] = pieces[1][0]-(pieces[1][1]<<1)+(pieces[1][2]<<1)-pieces[1][3];
+ outblock[3][2] = pieces[2][0]-(pieces[2][1]<<1)+(pieces[2][2]<<1)-pieces[2][3];
+ outblock[3][3] = pieces[3][0]-(pieces[3][1]<<1)+(pieces[3][2]<<1)-pieces[3][3];
+}
+
+
+void ff_h264_hadamard_mult4x4_c(DCTELEM Y[4][4])
+{
+ DCTELEM pieces[4][4];
+
+ pieces[0][0] = Y[0][0]+Y[0][1]+Y[0][2]+Y[0][3];
+ pieces[0][1] = Y[1][0]+Y[1][1]+Y[1][2]+Y[1][3];
+ pieces[0][2] = Y[2][0]+Y[2][1]+Y[2][2]+Y[2][3];
+ pieces[0][3] = Y[3][0]+Y[3][1]+Y[3][2]+Y[3][3];
+
+ pieces[1][0] = Y[0][0]+Y[0][1]-Y[0][2]-Y[0][3];
+ pieces[1][1] = Y[1][0]+Y[1][1]-Y[1][2]-Y[1][3];
+ pieces[1][2] = Y[2][0]+Y[2][1]-Y[2][2]-Y[2][3];
+ pieces[1][3] = Y[3][0]+Y[3][1]-Y[3][2]-Y[3][3];
+
+ pieces[2][0] = Y[0][0]-Y[0][1]-Y[0][2]+Y[0][3];
+ pieces[2][1] = Y[1][0]-Y[1][1]-Y[1][2]+Y[1][3];
+ pieces[2][2] = Y[2][0]-Y[2][1]-Y[2][2]+Y[2][3];
+ pieces[2][3] = Y[3][0]-Y[3][1]-Y[3][2]+Y[3][3];
+
+ pieces[3][0] = Y[0][0]-Y[0][1]+Y[0][2]-Y[0][3];
+ pieces[3][1] = Y[1][0]-Y[1][1]+Y[1][2]-Y[1][3];
+ pieces[3][2] = Y[2][0]-Y[2][1]+Y[2][2]-Y[2][3];
+ pieces[3][3] = Y[3][0]-Y[3][1]+Y[3][2]-Y[3][3];
+
+ Y[0][0] = pieces[0][0]+pieces[0][1]+pieces[0][2]+pieces[0][3];
+ Y[0][1] = pieces[1][0]+pieces[1][1]+pieces[1][2]+pieces[1][3];
+ Y[0][2] = pieces[2][0]+pieces[2][1]+pieces[2][2]+pieces[2][3];
+ Y[0][3] = pieces[3][0]+pieces[3][1]+pieces[3][2]+pieces[3][3];
+
+ Y[1][0] = pieces[0][0]+pieces[0][1]-pieces[0][2]-pieces[0][3];
+ Y[1][1] = pieces[1][0]+pieces[1][1]-pieces[1][2]-pieces[1][3];
+ Y[1][2] = pieces[2][0]+pieces[2][1]-pieces[2][2]-pieces[2][3];
+ Y[1][3] = pieces[3][0]+pieces[3][1]-pieces[3][2]-pieces[3][3];
+
+ Y[2][0] = pieces[0][0]-pieces[0][1]-pieces[0][2]+pieces[0][3];
+ Y[2][1] = pieces[1][0]-pieces[1][1]-pieces[1][2]+pieces[1][3];
+ Y[2][2] = pieces[2][0]-pieces[2][1]-pieces[2][2]+pieces[2][3];
+ Y[2][3] = pieces[3][0]-pieces[3][1]-pieces[3][2]+pieces[3][3];
+
+ Y[3][0] = pieces[0][0]-pieces[0][1]+pieces[0][2]-pieces[0][3];
+ Y[3][1] = pieces[1][0]-pieces[1][1]+pieces[1][2]-pieces[1][3];
+ Y[3][2] = pieces[2][0]-pieces[2][1]+pieces[2][2]-pieces[2][3];
+ Y[3][3] = pieces[3][0]-pieces[3][1]+pieces[3][2]-pieces[3][3];
+}
+
+static inline void ff_h264_hadamard_mult_2x2(int16_t Y[2][2])
+{
+ int16_t pieces[2][2];
+
+ pieces[0][0] = Y[0][0]+Y[0][1];
+ pieces[0][1] = Y[1][0]+Y[1][1];
+ pieces[1][0] = Y[0][0]-Y[0][1];
+ pieces[1][1] = Y[1][0]-Y[1][1];
+ Y[0][0] = pieces[0][0]+pieces[0][1];
+ Y[0][1] = pieces[1][0]+pieces[1][1];
+ Y[1][0] = pieces[0][0]-pieces[0][1];
+ Y[1][1] = pieces[1][0]-pieces[1][1];
+}
+
+const int16_t ff_h264_MF00[6] = {13107, 11916, 10082, 9362, 8192, 7282};
+const int16_t ff_h264_V00[6] = {10*16, 11*16, 13*16, 14*16, 16*16, 18*16};
+
+static inline void ff_h264_hadamard_invquant_2x2(int16_t Y[2][2], int QP)
+{
+ int32_t V = ff_h264_V00[QP%6];
+ int div = QP/6;
+
+ V <<= div;
+ Y[0][0] = (Y[0][0]*V) >> 5;
+ Y[0][1] = (Y[0][1]*V) >> 5;
+ Y[1][0] = (Y[1][0]*V) >> 5;
+ Y[1][1] = (Y[1][1]*V) >> 5;
+}
+
+#define NEIGHBOUR_SUBTYPE_Y 0
+#define NEIGHBOUR_SUBTYPE_U 1
+#define NEIGHBOUR_SUBTYPE_V 2
+
+#define H264_NEIGHBOUR_COUNT_NONZERO_PLANE(PLANE, P) \
+ { \
+ if (x == 0) \
+ { \
+ MacroBlock *leftmb = mb->leftblock; \
+ if (!leftmb) \
+ *nA = -1; \
+ else \
+ *nA = leftmb->PLANE[y][P]; \
+ } \
+ else \
+ *nA = mb->PLANE[y][x-1]; \
+ if (y == 0) \
+ { \
+ MacroBlock *topmb = mb->topblock; \
+ if (!topmb) \
+ *nB = -1; \
+ else \
+ *nB = topmb->PLANE[P][x]; \
+ } \
+ else \
+ *nB = mb->PLANE[y-1][x]; \
+ }
+
+static inline void ff_h264_neighbour_count_nonzero(MacroBlock *mb, int type, int x, int y, int *nA, int *nB)
+{
+ if (type == NEIGHBOUR_SUBTYPE_Y)
+ H264_NEIGHBOUR_COUNT_NONZERO_PLANE(Y_nonzero, 3)
+ else if (type == NEIGHBOUR_SUBTYPE_U)
+ H264_NEIGHBOUR_COUNT_NONZERO_PLANE(U_nonzero, 1)
+ else
+ H264_NEIGHBOUR_COUNT_NONZERO_PLANE(V_nonzero, 1)
+}
+
+#define H264_COUNT_AND_CLIP(x,count) \
+{\
+ if (x != 0)\
+ count++;\
+ clip(x, -2047, 2047);\
+}
+
+#define H264_COUNT_AND_CLIP_SUBBLOCK(x,count)\
+{\
+ H264_COUNT_AND_CLIP(x[0][1],count);\
+ H264_COUNT_AND_CLIP(x[0][2],count);\
+ H264_COUNT_AND_CLIP(x[0][3],count);\
+ H264_COUNT_AND_CLIP(x[1][0],count);\
+ H264_COUNT_AND_CLIP(x[1][1],count);\
+ H264_COUNT_AND_CLIP(x[1][2],count);\
+ H264_COUNT_AND_CLIP(x[1][3],count);\
+ H264_COUNT_AND_CLIP(x[2][0],count);\
+ H264_COUNT_AND_CLIP(x[2][1],count);\
+ H264_COUNT_AND_CLIP(x[2][2],count);\
+ H264_COUNT_AND_CLIP(x[2][3],count);\
+ H264_COUNT_AND_CLIP(x[3][0],count);\
+ H264_COUNT_AND_CLIP(x[3][1],count);\
+ H264_COUNT_AND_CLIP(x[3][2],count);\
+ H264_COUNT_AND_CLIP(x[3][3],count);\
+}
+
+static const int8_t zigzagx[16] = { 0,1,0,0,1,2,3,2,1,0,1,2,3,3,2,3 };
+static const int8_t zigzagy[16] = { 0,0,1,2,1,0,0,1,2,3,3,2,1,2,3,3 };
+
+#define H264_ENCODE_INTRA16X16_RESIDUAL_COEFFICIENTS(PLANE) \
+ coefficients[0] = PLANE[0][0]; \
+ coefficients[1] = PLANE[0][1]; \
+ coefficients[2] = PLANE[1][0]; \
+ coefficients[3] = PLANE[1][1]; \
+ h264cavlc_encode(b,coefficients,4,-1,-1,1); // nA and nB are not used in this case
+
+static void ff_h264_encode_intra16x16_residual(PutBitContext *b,DCTELEM YD[4][4],DCTELEM UD[2][2],DCTELEM VD[2][2],
+ Residual *residual, int lumamode, int chromamode, MacroBlock *mb)
+{
+ int lumaACcount = 0;
+ int chromaDCcount = 0;
+ int chromaACcount = 0;
+ int CodedBlockPatternChroma = 0;
+ int CodedBlockPatternLuma = 0;
+ int x,y,i,j;
+ int16_t coefficients[256];
+ int nA,nB;
+
+
+ for (y = 0 ; y < 4 ; y++)
+ for (x = 0 ; x < 4 ; x++)
+ H264_COUNT_AND_CLIP_SUBBLOCK(residual->part4x4Y[y][x],lumaACcount);
+
+ for (y = 0 ; y < 2 ; y++)
+ {
+ for (x = 0 ; x < 2 ; x++)
+ {
+ H264_COUNT_AND_CLIP_SUBBLOCK(residual->part4x4U[y][x],chromaACcount);
+ H264_COUNT_AND_CLIP_SUBBLOCK(residual->part4x4V[y][x],chromaACcount);
+ }
+ }
+
+ for (y = 0 ; y < 2 ; y++)
+ {
+ for (x = 0 ; x < 2 ; x++)
+ {
+ H264_COUNT_AND_CLIP(UD[y][x],chromaDCcount);
+ H264_COUNT_AND_CLIP(VD[y][x],chromaDCcount);
+ }
+ }
+
+ for (y = 0 ; y < 4 ; y++)
+ for (x = 0 ; x < 4 ; x++)
+ clip(YD[y][x], -2047, 2047);
+
+ if(chromaACcount)
+ CodedBlockPatternChroma= 2;
+ else
+ CodedBlockPatternChroma= !!chromaDCcount;
+
+ if (lumaACcount == 0)
+ CodedBlockPatternLuma = 0;
+ else
+ CodedBlockPatternLuma = 1; // actually it is 15 in the ITU spec, but I'd like to use it as an array index
+
+ set_ue_golomb(b, mbtype_map[lumamode][CodedBlockPatternChroma][CodedBlockPatternLuma]); // mb_type
+ set_ue_golomb(b, chromamode); // intra_chroma_pred_mode
+ set_se_golomb(b, 0); // mb_qp_delta
+
+ // encode luma DC coefficients
+
+ ff_h264_neighbour_count_nonzero(mb,NEIGHBOUR_SUBTYPE_Y,0,0,&nA,&nB);
+ for (i = 0 ; i < 16 ; i++)
+ coefficients[i] = YD[zigzagy[i]][zigzagx[i]];
+ h264cavlc_encode(b,coefficients,16,nA,nB,0);
+
+ if (CodedBlockPatternLuma > 0)
+ {
+ for (j = 0 ; j < 4 ; j++)
+ {
+ int X = (j&1) << 1;
+ int Y = j&2;
+
+ for (i = 0 ; i < 4 ; i++)
+ {
+ int x = (i&1)+X;
+ int y = (i>>1)+Y;
+
+ int k;
+
+ for (k = 0 ; k < 15 ; k++)
+ coefficients[k] = residual->part4x4Y[y][x][zigzagy[k+1]][zigzagx[k+1]];
+ ff_h264_neighbour_count_nonzero(mb,NEIGHBOUR_SUBTYPE_Y,x,y,&nA,&nB);
+ mb->Y_nonzero[y][x] = h264cavlc_encode(b,coefficients,15,nA,nB,0);
+ }
+ }
+ }
+ else
+ memset(mb->Y_nonzero, 0, sizeof(mb->Y_nonzero));
+
+ if (CodedBlockPatternChroma == 0)
+ {
+ memset(mb->U_nonzero, 0, sizeof(mb->U_nonzero));
+ memset(mb->V_nonzero, 0, sizeof(mb->V_nonzero));
+ return;
+ }
+
+ if (CodedBlockPatternChroma != 0)
+ {
+ H264_ENCODE_INTRA16X16_RESIDUAL_COEFFICIENTS(UD);
+ H264_ENCODE_INTRA16X16_RESIDUAL_COEFFICIENTS(VD);
+ }
+
+ if (CodedBlockPatternChroma == 2)
+ {
+ for (i = 0 ; i < 4 ; i++)
+ {
+ int x = i&1;
+ int y = i>>1;
+
+ int k;
+
+ for (k = 0 ; k < 15 ; k++)
+ coefficients[k] = residual->part4x4U[y][x][zigzagy[k+1]][zigzagx[k+1]];
+ ff_h264_neighbour_count_nonzero(mb,NEIGHBOUR_SUBTYPE_U,x,y,&nA,&nB);
+ mb->U_nonzero[y][x] = h264cavlc_encode(b,coefficients,15,nA,nB,0);
+ }
+
+ for (i = 0 ; i < 4 ; i++)
+ {
+ int x = i&1;
+ int y = i>>1;
+
+ int k;
+
+ for (k = 0 ; k < 15 ; k++)
+ coefficients[k] = residual->part4x4V[y][x][zigzagy[k+1]][zigzagx[k+1]];
+ ff_h264_neighbour_count_nonzero(mb,NEIGHBOUR_SUBTYPE_V,x,y,&nA,&nB);
+ mb->V_nonzero[y][x] = h264cavlc_encode(b,coefficients,15,nA,nB,0);
+ }
+ }
+ else
+ {
+ memset(mb->U_nonzero, 0, sizeof(mb->U_nonzero));
+ memset(mb->V_nonzero, 0, sizeof(mb->V_nonzero));
+ }
+}
+
+static void ff_h264_encode_Intra_16x16(H264Context *t, MacroBlock *targetmb, PutBitContext *b,
+ MacroBlock *destmb)
+{
+ int x,y;
+ int w,h,w2,h2;
+ DCTELEM YD[4][4];
+ DCTELEM UD[2][2];
+ DCTELEM VD[2][2];
+ int qPI;
+ int QPc;
+ int QPy = t->QP;
+ int lumapredmode = 2;
+ int chromapredmode = 0;
+ int leftavail = 0;
+ int topavail = 0;
+
+ qPI = t->QP + t->chroma_qp_index_offset;
+ qPI = clip(qPI, 0, 51);
+ QPc = chroma_qp[qPI];
+
+ w = targetmb->Y_width;
+ h = targetmb->Y_height;
+ w2 = w>>1;
+ h2 = h>>1;
+
+ if (destmb->leftblock != NULL && destmb->leftblock->available)
+ leftavail = 1;
+ if (destmb->topblock != NULL && destmb->topblock->available)
+ topavail = 1;
+
+ // TODO: use better strategy to determine intra16x16 encoding mode
+
+ if (leftavail)
+ {
+ MacroBlock *srcleft = destmb->leftblock;
+
+ if (topavail && w == 16 && h == 16 && srcleft->topblock != 0 && srcleft->topblock->available)
+ {
+ // Plane prediction
+ pred16x16_plane_c(destmb->Y[0], t->refframe_width);
+ pred8x8_plane_c(destmb->U[0], t->refframe_width>>1);
+ pred8x8_plane_c(destmb->V[0], t->refframe_width>>1);
+ lumapredmode = PLANE_PRED8x8;
+ chromapredmode = PLANE_PRED8x8;
+ }
+ else
+ {
+ // Horizontal prediction
+ pred16x16_horizontal_c(destmb->Y[0], t->refframe_width);
+ pred8x8_horizontal_c(destmb->U[0], t->refframe_width>>1);
+ pred8x8_horizontal_c(destmb->V[0], t->refframe_width>>1);
+ lumapredmode = HOR_PRED8x8;
+ chromapredmode = HOR_PRED8x8;
+ }
+ }
+ else // no left neighbour
+ {
+ if (topavail)
+ {
+ // Vertical prediction
+ pred16x16_vertical_c(destmb->Y[0], t->refframe_width);
+ pred8x8_vertical_c(destmb->U[0], t->refframe_width>>1);
+ pred8x8_vertical_c(destmb->V[0], t->refframe_width>>1);
+ lumapredmode = VERT_PRED;
+ chromapredmode = VERT_PRED8x8;
+ }
+ else // nothing available, encode a standard DC block
+ {
+ pred16x16_128_dc_c(destmb->Y[0], t->refframe_width);
+ pred8x8_128_dc_c(destmb->U[0], t->refframe_width>>1);
+ pred8x8_128_dc_c(destmb->V[0], t->refframe_width>>1);
+ lumapredmode = DC_PRED;
+ chromapredmode = DC_PRED8x8;
+ }
+ }
+
+ H264_COPY_16X16BLOCK(t->residual.part4x4Y,(int16_t)targetmb->Y,(int16_t)destmb->Y);
+ H264_COPY_8X8BLOCK(t->residual.part4x4U,(int16_t)targetmb->U,(int16_t)destmb->U);
+ H264_COPY_8X8BLOCK(t->residual.part4x4V,(int16_t)targetmb->V,(int16_t)destmb->V);
+
+ // Transform residual: DCT
+
+ for (y = 0 ; y < 4 ; y++)
+ {
+ for (x = 0 ; x < 4 ; x++)
+ {
+ t->dspcontext.h264_transform_dct_quant(t->residual.part4x4Y[y][x],QPy,1);
+ }
+ }
+ for (y = 0 ; y < 2 ; y++)
+ {
+ for (x = 0 ; x < 2 ; x++)
+ {
+ t->dspcontext.h264_transform_dct_quant(t->residual.part4x4U[y][x],QPc,1);
+ t->dspcontext.h264_transform_dct_quant(t->residual.part4x4V[y][x],QPc,1);
+ }
+ }
+
+ // Hadamard
+
+ // For luma
+ for (y = 0 ; y < 4 ; y++)
+ for (x = 0 ; x < 4 ; x++)
+ YD[y][x] = t->residual.part4x4Y[y][x][0][0];
+
+ t->dspcontext.h264_hadamard_mult4x4(YD);
+ t->dspcontext.h264_hadamard_quant_4x4(YD,QPy);
+
+ // For U
+ for (y = 0 ; y < 2 ; y++)
+ for (x = 0 ; x < 2 ; x++)
+ UD[y][x] = t->residual.part4x4U[y][x][0][0];
+ ff_h264_hadamard_mult_2x2(UD);
+ t->dspcontext.h264_hadamard_quant_2x2(UD,QPc);
+
+ // For V
+ for (y = 0 ; y < 2 ; y++)
+ for (x = 0 ; x < 2 ; x++)
+ VD[y][x] = t->residual.part4x4V[y][x][0][0];
+ ff_h264_hadamard_mult_2x2(VD);
+ t->dspcontext.h264_hadamard_quant_2x2(VD,QPc);
+ // Encode macroblock
+
+ ff_h264_encode_intra16x16_residual(b,YD,UD,VD,&(t->residual),lumapredmode,chromapredmode,destmb);
+
+ // Inverse hadamard
+
+ // For luma
+ t->dspcontext.h264_hadamard_mult4x4(YD);
+ t->dspcontext.h264_hadamard_invquant_4x4(YD,QPy);
+ for (y = 0 ; y < 4 ; y++)
+ for (x = 0 ; x < 4 ; x++)
+ t->residual.part4x4Y[y][x][0][0] = YD[y][x];
+
+ // For U
+ ff_h264_hadamard_mult_2x2(UD);
+ ff_h264_hadamard_invquant_2x2(UD,QPc);
+ for (y = 0 ; y < 2 ; y++)
+ for (x = 0 ; x < 2 ; x++)
+ t->residual.part4x4U[y][x][0][0] = UD[y][x];
+ // For V
+ ff_h264_hadamard_mult_2x2(VD);
+ ff_h264_hadamard_invquant_2x2(VD,QPc);
+ for (y = 0 ; y < 2 ; y++)
+ for (x = 0 ; x < 2 ; x++)
+ t->residual.part4x4V[y][x][0][0] = VD[y][x];
+
+ // Inverse DCT and add
+
+ for (y = 0 ; y < 4 ; y++)
+ {
+ for (x = 0 ; x < 4 ; x++)
+ {
+ t->dspcontext.h264_transform_inverse_quant_dct_add(t->residual.part4x4Y[y][x],QPy,1,&(destmb->Y[y*4][x*4]),t->Y_stride);
+ }
+ }
+ for (y = 0 ; y < 2 ; y++)
+ {
+ for (x = 0 ; x < 2 ; x++)
+ {
+ t->dspcontext.h264_transform_inverse_quant_dct_add(t->residual.part4x4U[y][x],QPc,1,&(destmb->U[y*4][x*4]),t->U_stride);
+ t->dspcontext.h264_transform_inverse_quant_dct_add(t->residual.part4x4V[y][x],QPc,1,&(destmb->V[y*4][x*4]),t->V_stride);
+ }
+ }
+
+ destmb->available = 1;
+}
+
+#define H264_CODEDBLOCKPATTERN_4X4CHECK(a,b) \
+ for (y = 0 ; !done && y < 4 ; y++)\
+ for (x = 0 ; !done && x < 4 ; x++)\
+ if (residual->part4x4Y[a][b][y][x] != 0) \
+ done = 1;
+#define H264_CODEDBLOCKPATTERN_8X8CHECK(i,j,shift) \
+ done = 0;\
+ H264_CODEDBLOCKPATTERN_4X4CHECK(i+0,j+0)\
+ H264_CODEDBLOCKPATTERN_4X4CHECK(i+0,j+1)\
+ H264_CODEDBLOCKPATTERN_4X4CHECK(i+1,j+0)\
+ H264_CODEDBLOCKPATTERN_4X4CHECK(i+1,j+1)\
+ if (done)\
+ CodedBlockPatternLuma |= (1 << shift);
+
+static void ff_h264_encode_inter16x16_residual(H264Context *t, PutBitContext *b,int mv_x,int mv_y,int mv_x2,int mv_y2,
+ Residual *residual,
+ DCTELEM UD[2][2],DCTELEM VD[2][2],int pred_frame_index,MacroBlock *mb,
+ int last_macroblock)
+{
+ static const int8_t me_map[] = { 0, 2, 3, 7, 4, 8,17,13, 5,18, 9,14,10,15,16,
+ 11, 1,32,33,36,34,37,44,40,35,45,38,41,39,42,
+ 43,19, 6,24,25,20,26,21,46,28,27,47,22,29,23,
+ 30,31,12};
+ int coded_block_pattern;
+ int CodedBlockPatternLuma;
+ int CodedBlockPatternChroma;
+ int16_t coefficients[256];
+ int x,y,i,j;
+ int done;
+ int chromaACcount;
+ int chromaDCcount;
+ int nA,nB;
+
+ // coded_block_pattern
+
+ CodedBlockPatternLuma = 0;
+
+ // first 8x8 block
+ H264_CODEDBLOCKPATTERN_8X8CHECK(0,0,0)
+
+ // second 8x8 block
+ H264_CODEDBLOCKPATTERN_8X8CHECK(0,2,1)
+
+ // third 8x8 block
+ H264_CODEDBLOCKPATTERN_8X8CHECK(2,0,2)
+
+ // fourth 8x8 block
+ H264_CODEDBLOCKPATTERN_8X8CHECK(2,2,3)
+
+ // check for too large values in luma
+ for (y = 0 ; y < 4 ; y++)
+ {
+ for (x = 0 ; x < 4 ; x++)
+ {
+ clip(residual->part4x4Y[y][x][0][0], -2047, 2047);
+ clip(residual->part4x4Y[y][x][0][1], -2047, 2047);
+ clip(residual->part4x4Y[y][x][0][2], -2047, 2047);
+ clip(residual->part4x4Y[y][x][0][3], -2047, 2047);
+ clip(residual->part4x4Y[y][x][1][0], -2047, 2047);
+ clip(residual->part4x4Y[y][x][1][1], -2047, 2047);
+ clip(residual->part4x4Y[y][x][1][2], -2047, 2047);
+ clip(residual->part4x4Y[y][x][1][3], -2047, 2047);
+ clip(residual->part4x4Y[y][x][2][0], -2047, 2047);
+ clip(residual->part4x4Y[y][x][2][1], -2047, 2047);
+ clip(residual->part4x4Y[y][x][2][2], -2047, 2047);
+ clip(residual->part4x4Y[y][x][2][3], -2047, 2047);
+ clip(residual->part4x4Y[y][x][3][0], -2047, 2047);
+ clip(residual->part4x4Y[y][x][3][1], -2047, 2047);
+ clip(residual->part4x4Y[y][x][3][2], -2047, 2047);
+ clip(residual->part4x4Y[y][x][3][3], -2047, 2047);
+ }
+ }
+
+ chromaDCcount = 0;
+ chromaACcount = 0;
+ for (y = 0 ; y < 2 ; y++)
+ {
+ for (x = 0 ; x < 2 ; x++)
+ {
+ H264_COUNT_AND_CLIP_SUBBLOCK(residual->part4x4U[y][x],chromaACcount);
+ H264_COUNT_AND_CLIP_SUBBLOCK(residual->part4x4V[y][x],chromaACcount);
+ }
+ }
+ for (y = 0 ; y < 2 ; y++)
+ {
+ for (x = 0 ; x < 2 ; x++)
+ {
+ H264_COUNT_AND_CLIP(UD[y][x],chromaDCcount);
+ H264_COUNT_AND_CLIP(VD[y][x],chromaDCcount);
+ }
+ }
+
+ if (chromaACcount)
+ CodedBlockPatternChroma= 2;
+ else
+ CodedBlockPatternChroma= !!chromaDCcount;
+
+ if (mv_x2 == 0 && mv_y2 == 0 && CodedBlockPatternChroma == 0 && CodedBlockPatternLuma == 0) // entirely predictable
+ {
+ t->mb_skip_run++;
+ if (last_macroblock)
+ set_se_golomb(b, t->mb_skip_run);
+ }
+ else
+ {
+ set_ue_golomb(b, t->mb_skip_run); // mb_skip_run
+ t->mb_skip_run = 0;
+
+ set_ue_golomb(b, 0); // mb_type = P_L0_16x16
+
+ // mb_pred()
+
+ set_se_golomb(b, mv_x);
+ set_se_golomb(b, mv_y);
+
+ coded_block_pattern = (CodedBlockPatternChroma << 4)|CodedBlockPatternLuma;
+ set_ue_golomb(b,me_map[coded_block_pattern]);
+ }
+
+ // residual()
+
+ if (CodedBlockPatternLuma == 0 && CodedBlockPatternChroma == 0) // nothing left to do
+ {
+ memset(mb->Y_nonzero, 0, sizeof(mb->Y_nonzero));
+ memset(mb->U_nonzero, 0, sizeof(mb->U_nonzero));
+ memset(mb->V_nonzero, 0, sizeof(mb->V_nonzero));
+ return;
+ }
+
+ set_se_golomb(b, 0); // mb_qp_delta
+
+ // encode luma levels
+ for (j = 0 ; j < 4 ; j++)
+ {
+ int X = (j&1) << 1;
+ int Y = j&2;
+
+ if ((CodedBlockPatternLuma >> j)&1)
+ {
+ for (i = 0 ; i < 4 ; i++)
+ {
+ int x = (i&1)+X;
+ int y = (i>>1)+Y;
+
+ int k;
+
+ for (k = 0 ; k < 16 ; k++)
+ coefficients[k] = residual->part4x4Y[y][x][zigzagy[k]][zigzagx[k]];
+ ff_h264_neighbour_count_nonzero(mb,NEIGHBOUR_SUBTYPE_Y,x,y,&nA,&nB);
+ mb->Y_nonzero[y][x] = h264cavlc_encode(b,coefficients,16,nA,nB,0);
+ }
+ }
+ else
+ {
+ for (i = 0 ; i < 4 ; i++)
+ {
+ int x = (i&1)+X;
+ int y = (i>>1)+Y;
+ mb->Y_nonzero[y][x] = 0;
+ }
+ }
+ }
+
+ // chroma DC levels
+ if (CodedBlockPatternChroma != 0)
+ {
+ coefficients[0] = UD[0][0];
+ coefficients[1] = UD[0][1];
+ coefficients[2] = UD[1][0];
+ coefficients[3] = UD[1][1];
+ h264cavlc_encode(b,coefficients,4,-1,-1,1); // nA and nB are not used in this case
+
+ coefficients[0] = VD[0][0];
+ coefficients[1] = VD[0][1];
+ coefficients[2] = VD[1][0];
+ coefficients[3] = VD[1][1];
+ h264cavlc_encode(b,coefficients,4,-1,-1,1); // nA and nB are not used in this case
+ }
+
+ if (CodedBlockPatternChroma == 2)
+ {
+ for (i = 0 ; i < 4 ; i++)
+ {
+ int x = i&1;
+ int y = i>>1;
+
+ int k;
+
+ for (k = 0 ; k < 15 ; k++)
+ coefficients[k] = residual->part4x4U[y][x][zigzagy[k+1]][zigzagx[k+1]];
+ ff_h264_neighbour_count_nonzero(mb,NEIGHBOUR_SUBTYPE_U,x,y,&nA,&nB);
+ mb->U_nonzero[y][x] = h264cavlc_encode(b,coefficients,15,nA,nB,0);
+ }
+
+ for (i = 0 ; i < 4 ; i++)
+ {
+ int x = i&1;
+ int y = i>>1;
+
+ int k;
+
+ for (k = 0 ; k < 15 ; k++)
+ coefficients[k] = residual->part4x4V[y][x][zigzagy[k+1]][zigzagx[k+1]];
+ ff_h264_neighbour_count_nonzero(mb,NEIGHBOUR_SUBTYPE_V,x,y,&nA,&nB);
+ mb->V_nonzero[y][x] = h264cavlc_encode(b,coefficients,15,nA,nB,0);
+ }
+ }
+ else
+ {
+ int x,y;
+
+ for (y = 0 ; y < 2 ; y++)
+ {
+ for (x = 0 ; x < 2 ; x++)
+ {
+ mb->U_nonzero[y][x] = 0;
+ mb->V_nonzero[y][x] = 0;
+ }
+ }
+ }
+}
+
+static void ff_h264_predict(H264Context *t, MacroBlock *destmb, FrameInfo *refframe, int mbx, int mby, int mvx, int mvy)
+{
+ int x = mbx << 4;
+ int y = mby << 4;
+ AVPicture *refpic = &(refframe->reconstructed_picture);
+ uint8_t *data;
+ int linesize;
+ int i,j;
+ int startx,starty;
+ int w,h,w2,h2;
+ int xmod,ymod;
+
+ w = destmb->Y_width;
+ h = destmb->Y_height;
+ w2 = w>>1;
+ h2 = h>>1;
+
+ startx = x+(mvx/4);
+ starty = y+(mvy/4);
+
+ linesize = refpic->linesize[0];
+ data = refpic->data[0]+starty*linesize+startx;
+
+ for (i = 0 ; i < h ; i++)
+ {
+ for (j = 0 ; j < w ; j++)
+ destmb->Y[i][j] = data[j];
+ data += linesize;
+ }
+
+ linesize = refpic->linesize[1];
+ data = refpic->data[1]+(starty/2)*linesize+startx/2;
+
+ xmod = startx & 1;
+ ymod = starty & 1;
+
+ if (xmod == 0 && ymod == 0)
+ {
+ for (i = 0 ; i < h2 ; i++)
+ {
+ for (j = 0 ; j < w2 ; j++)
+ destmb->U[i][j] = data[j];
+ data += linesize;
+ }
+
+ linesize = refpic->linesize[2];
+ data = refpic->data[2]+(starty/2)*linesize+startx/2;
+ for (i = 0 ; i < h2 ; i++)
+ {
+ for (j = 0 ; j < w2 ; j++)
+ destmb->V[i][j] = data[j];
+ data += linesize;
+ }
+ }
+ else if (xmod == 0 && ymod != 0)
+ {
+ for (i = 0 ; i < h2 ; i++)
+ {
+ for (j = 0 ; j < w2 ; j++)
+ destmb->U[i][j] = (uint8_t)(((int)data[j]+(int)data[j+linesize]+1)/2);
+ data += linesize;
+ }
+
+ linesize = refpic->linesize[2];
+ data = refpic->data[2]+(starty/2)*linesize+startx/2;
+ for (i = 0 ; i < h2 ; i++)
+ {
+ for (j = 0 ; j < w2 ; j++)
+ destmb->V[i][j] = (uint8_t)(((int)data[j]+(int)data[j+linesize]+1)/2);
+ data += linesize;
+ }
+ }
+ else if (xmod != 0 && ymod == 0)
+ {
+ for (i = 0 ; i < h2 ; i++)
+ {
+ for (j = 0 ; j < w2 ; j++)
+ destmb->U[i][j] = (uint8_t)(((int)data[j]+(int)data[j+1]+1)/2);
+ data += linesize;
+ }
+
+ linesize = refpic->linesize[2];
+ data = refpic->data[2]+(starty/2)*linesize+startx/2;
+ for (i = 0 ; i < h2 ; i++)
+ {
+ for (j = 0 ; j < w2 ; j++)
+ destmb->V[i][j] = (uint8_t)(((int)data[j]+(int)data[j+1]+1)/2);
+ data += linesize;
+ }
+ }
+ else // xmod != 0 && ymod != 0
+ {
+ for (i = 0 ; i < h2 ; i++)
+ {
+ for (j = 0 ; j < w2 ; j++)
+ destmb->U[i][j] = (uint8_t)(((int)data[j]+(int)data[j+1]+(int)data[j+linesize+1]+(int)data[j+linesize]+2)/4);
+ data += linesize;
+ }
+
+ linesize = refpic->linesize[2];
+ data = refpic->data[2]+(starty/2)*linesize+startx/2;
+ for (i = 0 ; i < h2 ; i++)
+ {
+ for (j = 0 ; j < w2 ; j++)
+ destmb->V[i][j] = (uint8_t)(((int)data[j]+(int)data[j+1]+(int)data[j+linesize+1]+(int)data[j+linesize]+2)/4);
+ data += linesize;
+ }
+ }
+}
+
+#define MAXSEARCHSTEPS 8
+#define SEARCHWIDTH 1
+
+static void ff_h264_find_motion_vector_and_prediction(H264Context *t, MacroBlock *targetmb, FrameInfo *refframe,
+ int mbx, int mby, int *mvx, int *mvy,
+ int pred_mvx, int pred_mvy, MacroBlock *destmb)
+{
+ int x = mbx << 4;
+ int y = mby << 4;
+ int bestx, besty;
+ int curx, cury;
+ int minbitsize = 0x7FFFFFFF;
+ int QP = t->QP;
+ int done = 0;
+ int numsteps = 0;
+
+ bestx = x;
+ besty = y;
+ curx = x;
+ cury = y;
+
+ {
+ int scanx = x;
+ int scany = y;
+ int weight;
+ int xvec = -pred_mvx; // it's actually this difference which will be encoded!
+ int yvec = -pred_mvy;
+ int sae = t->dspcontext.pix_abs[0][0](0,targetmb->Y[0],
+ refframe->reconstructed_picture.data[0] + scany * refframe->reconstructed_picture.linesize[0] + scanx,
+ refframe->reconstructed_picture.linesize[0], 16);
+ sae += t->dspcontext.pix_abs[1][0](0,targetmb->U[0],
+ refframe->reconstructed_picture.data[1] + (scany/2) * refframe->reconstructed_picture.linesize[1] + scanx/2,
+ refframe->reconstructed_picture.linesize[1], 8);
+ sae += t->dspcontext.pix_abs[1][0](0,targetmb->V[0],
+ refframe->reconstructed_picture.data[2] + (scany/2) * refframe->reconstructed_picture.linesize[2] + scanx/2,
+ refframe->reconstructed_picture.linesize[2], 8);
+ sae = FFMIN(sae>>4, 2047);
+ minbitsize = mv_len_table[xvec+MVTABLE_OFFSET]
+ + mv_len_table[yvec+MVTABLE_OFFSET];
+ weight = sae_codeblocksize_relation[QP>>2][sae>>8];
+ weight += (sae_codeblocksize_relation[QP>>2][FFMIN(((sae>>8)+1), 8)]
+ - sae_codeblocksize_relation[QP>>2][sae>>8] )
+ * (sae - ((sae>>8) << 8)) / ( (FFMIN(((sae>>8)+1), 8) << 8)
+ - ((sae>>8) << 8) );
+ minbitsize += weight;
+ }
+
+ while (!done && numsteps < MAXSEARCHSTEPS)
+ {
+ int startx = curx - SEARCHWIDTH;
+ int starty = cury - SEARCHWIDTH;
+ int stopx = curx + SEARCHWIDTH + 1;
+ int stopy = cury + SEARCHWIDTH + 1;
+ int foundbetter = 0;
+ int scanx, scany;
+
+ if (startx < 0)
+ startx = 0;
+ if (starty < 0)
+ starty = 0;
+ if (stopx > t->refframe_width - 16 + 1)
+ stopx = t->refframe_width - 16 + 1;
+ if (stopy > t->refframe_height - 16 + 1)
+ stopy = t->refframe_height -16 + 1;
+
+ for(scany = starty; scany < stopy; scany++)
+ {
+ for(scanx = startx; scanx < stopx; scanx++)
+ {
+ if (!(curx == scanx && cury == scany))
+ {
+ int xvec = (scanx-x)*4-pred_mvx; // it's actually this difference which will be encoded!
+ int yvec = (scany-y)*4-pred_mvy;
+ int bitsize;
+ int weight;
+ int xmod = scanx%2;
+ int ymod = scany%2;
+ int absnum = xmod+ymod*2;
+ int sae = t->dspcontext.pix_abs[0][0](0,targetmb->Y[0],
+ refframe->reconstructed_picture.data[0] + scany * refframe->reconstructed_picture.linesize[0] + scanx,
+ refframe->reconstructed_picture.linesize[0], 16);
+
+ sae += t->dspcontext.pix_abs[1][absnum](0,targetmb->U[0],
+ refframe->reconstructed_picture.data[1] + (scany/2) * refframe->reconstructed_picture.linesize[1] + scanx/2,
+ refframe->reconstructed_picture.linesize[1], 8);
+ sae += t->dspcontext.pix_abs[1][absnum](0,targetmb->V[0],
+ refframe->reconstructed_picture.data[2] + (scany/2) * refframe->reconstructed_picture.linesize[2] + scanx/2,
+ refframe->reconstructed_picture.linesize[2], 8);
+ sae = FFMIN(sae>>4, 2047);
+ bitsize = mv_len_table[xvec+MVTABLE_OFFSET]
+ + mv_len_table[yvec+MVTABLE_OFFSET];
+ weight = sae_codeblocksize_relation[QP>>2][sae>>8];
+ weight += (sae_codeblocksize_relation[QP>>2][FFMIN(((sae>>8)+1), 8)]
+ - sae_codeblocksize_relation[QP>>2][sae>>8] )
+ * (sae - ((sae>>8) << 8)) / ( (FFMIN(((sae>>8)+1), 8) << 8)
+ - ((sae>>8) << 8) );
+ bitsize += weight;
+ if (bitsize < minbitsize)
+ {
+ minbitsize = bitsize;
+ bestx = scanx;
+ besty = scany;
+ foundbetter = 1;
+ }
+ }
+ }
+ }
+
+ if (foundbetter)
+ {
+ curx = bestx;
+ cury = besty;
+ numsteps++;
+ }
+ else
+ done = 1;
+ }
+ {
+ int mvx = (bestx - x) * 4;
+ int mvy = (besty - y) * 4;
+
+ ff_h264_predict(t, destmb, refframe, mbx, mby, mvx, mvy);
+ }
+
+ *mvx = (bestx - x) * 4;
+ *mvy = (besty - y) * 4;
+}
+
+// Adjust the values of mvx and mvy based on the prediction from the neighbouring macroblocks
+static void ff_h264_estimate_motion_vectors(MacroBlock *destmb, int *mvpred_x, int *mvpred_y, int *mvpred_x2, int *mvpred_y2)
+{
+ int mvAx = 0, mvAy = 0;
+ int mvBx = 0, mvBy = 0;
+ int mvCx = 0, mvCy = 0;
+ int mvDx = 0, mvDy = 0;
+ int Aavail = 0;
+ int Bavail = 0;
+ int Cavail = 0;
+ int Davail = 0;
+
+ if (destmb->leftblock != NULL && destmb->leftblock->available)
+ {
+ Aavail = 1;
+ mvAx = destmb->leftblock->mv_x;
+ mvAy = destmb->leftblock->mv_y;
+ }
+ if (destmb->topblock != NULL)
+ {
+ MacroBlock *topblock = destmb->topblock;
+
+ if (topblock->available)
+ {
+ Bavail = 1;
+ mvBx = topblock->mv_x;
+ mvBy = topblock->mv_y;
+ }
+ if (topblock->leftblock != NULL && topblock->leftblock->available)
+ {
+ Davail = 1;
+ mvDx = topblock->leftblock->mv_x;
+ mvDy = topblock->leftblock->mv_y;
+ }
+ if (topblock->rightblock != NULL && topblock->rightblock->available)
+ {
+ Cavail = 1;
+ mvCx = topblock->rightblock->mv_x;
+ mvCy = topblock->rightblock->mv_y;
+ }
+ }
+
+ if (!Cavail)
+ {
+ Cavail = Davail;
+ mvCx = mvDx;
+ mvCy = mvDy;
+ }
+
+ if (!Bavail && !Cavail && Aavail)
+ {
+ mvBx = mvAx;
+ mvBy = mvAy;
+ mvCx = mvAx;
+ mvCy = mvAy;
+ }
+
+ *mvpred_x = mid_pred(mvAx,mvBx,mvCx);
+ *mvpred_y = mid_pred(mvAy,mvBy,mvCy);
+
+ if (!Aavail || !Bavail || (Aavail && mvAx == 0 && mvAy == 0) || (Bavail && mvBx == 0 && mvBy == 0))
+ {
+ *mvpred_x2 = 0;
+ *mvpred_y2 = 0;
+ }
+ else
+ {
+ *mvpred_x2 = *mvpred_x;
+ *mvpred_y2 = *mvpred_y;
+ }
+}
+
+/*
+ *
+ * Book p. 184, spec p. 182
+ */
+static inline void ff_h264_deblocking_filter_line_luma(int p[4], int q[4], int QP, int bS)
+{
+ int delta0, delta0i, deltap1i, deltaq1i, deltap1, deltaq1;
+ int pa0, pa1, pa2, qa0, qa1, qa2;
+ int alpha, beta;
+
+ if (bS == 0)
+ return;
+
+ alpha = alpha_table[QP];
+ beta = beta_table[QP];
+
+ if (!(
+ (ABS(p[0] - q[0]) < alpha) /* (1) */
+ &&
+ (ABS(p[1] - p[0]) < beta) /* (2) */
+ &&
+ (ABS(q[1] - q[0]) < beta) /* (3) */
+ ))
+ return;
+
+ pa0 = p[0];
+ pa1 = p[1];
+ pa2 = p[2];
+ qa0 = q[0];
+ qa1 = q[1];
+ qa2 = q[2];
+
+ if (bS == 4)
+ {
+ int aP = ABS(p[2] - p[0]);
+ int aQ = ABS(q[2] - q[0]);
+
+ if (aP < beta && ABS(p[0] - q[0]) < ((alpha>>2) + 2))
+ {
+ // Luminance filtering
+ pa0 = (p[2] + 2*p[1] + 2*p[0] + 2*q[0] + q[1] + 4) >> 3; /* (20) */
+ pa1 = (p[2] + p[1] + p[0] + q[0] + 2) >> 2; /* (21) */
+ pa2 = (2*p[3] + 3*p[2] + p[1] + p[0] + q[0] + 4) >> 3; /* (22) */
+ }
+ else
+ pa0 = (2*p[1] + p[0] + q[1] + 2) >> 2; /* (23) */
+
+ if (aQ < beta && ABS(p[0] - q[0]) < ((alpha>>2) + 2))
+ {
+ // Luminance filtering
+ qa0 = (p[1] + 2*p[0] + 2*q[0] + 2*q[1] + q[2] + 4) >> 3; /* (20) */
+ qa1 = (p[0] + q[0] + q[1] + q[2] + 2) >> 2; /* (21) */
+ qa2 = (2*q[3] + 3*q[2] + q[1] + q[0] + p[0] + 4) >> 3; /* (22) */
+ }
+ else
+ qa0 = (2*q[1] + q[0] + p[1] + 2) >> 2; /* (23) */
+ }
+ else
+ {
+ int aP = ABS(p[2] - p[0]);
+ int aQ = ABS(q[2] - q[0]);
+ int c0, c1;
+
+ c0 = c1 = tc0_table[QP][bS-1];
+
+ // All conditions are met to filter this line of samples
+
+ delta0i = (((q[0] - p[0])<<2) + (p[1] - q[1]) + 4) >> 3;
+
+ if (aP < beta) /* condition (8) */
+ {
+ /* c0 should be incremented for each condition being true, 8-473 */
+ c0++;
+
+ deltap1i = (p[2] + ((p[0] + q[0] + 1) >> 1) - (p[1]<<1)) >> 1;
+ deltap1 = clip(deltap1i, -c1, c1);
+ pa1 = p[1] + deltap1;
+ }
+
+ if (aQ < beta) /* condition (9) */
+ {
+ /* c0 should be incremented for each condition being true, 8-473 */
+ c0++;
+
+ deltaq1i = (q[2] + ((p[0] + q[0] + 1) >> 1) - (q[1]<<1)) >> 1;
+ deltaq1 = clip(deltaq1i, -c1, c1);
+ qa1 = q[1] + deltaq1;
+ }
+
+ delta0 = clip(delta0i, -c0, c0);
+ pa0 = clip_uint8(p[0] + delta0);
+ qa0 = clip_uint8(q[0] - delta0);
+ }
+ p[0] = pa0;
+ p[1] = pa1;
+ p[2] = pa2;
+ q[0] = qa0;
+ q[1] = qa1;
+ q[2] = qa2;
+}
+
+static inline void ff_h264_deblocking_filter_line_chroma(int p[4], int q[4], int QP, int bS)
+{
+ int delta0i, delta0;
+ int pa0, pa1, pa2, qa0, qa1, qa2;
+ int alpha, beta;
+
+ if (bS == 0)
+ return;
+
+ alpha = alpha_table[QP];
+ beta = beta_table[QP];
+
+ if (!(
+ (ABS(p[0] - q[0]) < alpha) /* (1) */
+ &&
+ (ABS(p[1] - p[0]) < beta) /* (2) */
+ &&
+ (ABS(q[1] - q[0]) < beta) /* (3) */
+ ))
+ return;
+
+ pa0 = p[0];
+ pa1 = p[1];
+ pa2 = p[2];
+ qa0 = q[0];
+ qa1 = q[1];
+ qa2 = q[2];
+
+ if (bS == 4)
+ {
+ pa0 = ((p[1]<<1) + p[0] + q[1] + 2) >> 2; /* (23) */
+ qa0 = ((q[1]<<1) + q[0] + p[1] + 2) >> 2; /* (23) */
+ }
+ else
+ {
+ int c0, c1;
+
+ c0 = c1 = tc0_table[QP][bS-1];
+
+ // All conditions are met to filter this line of samples
+
+ delta0i = (((q[0] - p[0])<<2) + (p[1] - q[1]) + 4) >> 3;
+
+ c0++; /* p. 191, (8-474) */
+
+ delta0 = clip(delta0i, -c0, c0);
+ pa0 = clip_uint8(p[0] + delta0);
+ qa0 = clip_uint8(q[0] - delta0);
+ }
+ p[0] = pa0;
+ p[1] = pa1;
+ p[2] = pa2;
+ q[0] = qa0;
+ q[1] = qa1;
+ q[2] = qa2;
+}
+
+static void ff_h264_deblock_macroblock(MacroBlock *mb, int filter_left_edge, int filter_top_edge, int isIDR, int QPYav, int QPCav)
+{
+ int p[4],q[4];
+ int x,y;
+ int bS[4][16];
+
+ // First step is filtering of vertical edges
+
+ // first filter left edge
+ if (filter_left_edge)
+ {
+ MacroBlock *leftmb = mb->leftblock;
+
+ // first Y
+ for (y = 0 ; y < 16 ; y++)
+ {
+ if (isIDR)
+ bS[0][y] = 4;
+ else
+ {
+ if (leftmb->Y_nonzero[y>>2][3] != 0 || mb->Y_nonzero[y>>2][0] != 0)
+ bS[0][y] = 2;
+ else
+ {
+ if (ABS(leftmb->mv_x - mb->mv_x) >= 4 || ABS(leftmb->mv_y - mb->mv_y) >= 4)
+ bS[0][y] = 1;
+ else
+ bS[0][y] = 0;
+ }
+ }
+
+ p[0] = leftmb->Y[y][15];
+ p[1] = leftmb->Y[y][14];
+ p[2] = leftmb->Y[y][13];
+ p[3] = leftmb->Y[y][12];
+ q[0] = mb->Y[y][0];
+ q[1] = mb->Y[y][1];
+ q[2] = mb->Y[y][2];
+ q[3] = mb->Y[y][3];
+
+ ff_h264_deblocking_filter_line_luma(p,q,QPYav,bS[0][y]);
+
+ leftmb->Y[y][15] = p[0];
+ leftmb->Y[y][14] = p[1];
+ leftmb->Y[y][13] = p[2];
+ mb->Y[y][0] = q[0];
+ mb->Y[y][1] = q[1];
+ mb->Y[y][2] = q[2];
+ }
+
+ // then U and V
+
+ for (y = 0 ; y < 8 ; y++)
+ {
+ p[0] = leftmb->U[y][7];
+ p[1] = leftmb->U[y][6];
+ p[2] = leftmb->U[y][5];
+ p[3] = leftmb->U[y][4];
+ q[0] = mb->U[y][0];
+ q[1] = mb->U[y][1];
+ q[2] = mb->U[y][2];
+ q[3] = mb->U[y][3];
+
+ ff_h264_deblocking_filter_line_chroma(p,q,QPCav,bS[0][y<<1]);
+
+ leftmb->U[y][7] = p[0];
+ leftmb->U[y][6] = p[1];
+ leftmb->U[y][5] = p[2];
+ mb->U[y][0] = q[0];
+ mb->U[y][1] = q[1];
+ mb->U[y][2] = q[2];
+
+ p[0] = leftmb->V[y][7];
+ p[1] = leftmb->V[y][6];
+ p[2] = leftmb->V[y][5];
+ p[3] = leftmb->V[y][4];
+ q[0] = mb->V[y][0];
+ q[1] = mb->V[y][1];
+ q[2] = mb->V[y][2];
+ q[3] = mb->V[y][3];
+
+ ff_h264_deblocking_filter_line_chroma(p,q,QPCav,bS[0][y<<1]);
+
+ leftmb->V[y][7] = p[0];
+ leftmb->V[y][6] = p[1];
+ leftmb->V[y][5] = p[2];
+ mb->V[y][0] = q[0];
+ mb->V[y][1] = q[1];
+ mb->V[y][2] = q[2];
+ }
+ }
+
+ // then the internal vertical edges
+
+ for (x = 4 ; x < 16 ; x += 4)
+ {
+ int xidx = x >> 2;
+
+ // first Y
+ for (y = 0 ; y < 16 ; y++)
+ {
+ if (isIDR)
+ bS[xidx][y] = 3;
+ else
+ {
+ if (mb->Y_nonzero[y>>2][(x>>2)-1] != 0 || mb->Y_nonzero[y>>2][x>>2] != 0)
+ bS[xidx][y] = 2;
+ else
+ {
+ // one motion vector per 16x16 block, so there will be no difference
+ // between the motion vectors
+ bS[xidx][y] = 0;
+ }
+ }
+
+ p[0] = mb->Y[y][x-1];
+ p[1] = mb->Y[y][x-2];
+ p[2] = mb->Y[y][x-3];
+ p[3] = mb->Y[y][x-4];
+ q[0] = mb->Y[y][x+0];
+ q[1] = mb->Y[y][x+1];
+ q[2] = mb->Y[y][x+2];
+ q[3] = mb->Y[y][x+3];
+
+ ff_h264_deblocking_filter_line_luma(p,q,QPYav,bS[xidx][y]);
+
+ mb->Y[y][x-1] = p[0];
+ mb->Y[y][x-2] = p[1];
+ mb->Y[y][x-3] = p[2];
+ mb->Y[y][x+0] = q[0];
+ mb->Y[y][x+1] = q[1];
+ mb->Y[y][x+2] = q[2];
+ }
+ }
+
+ // then U and V
+
+ for (y = 0 ; y < 8 ; y++)
+ {
+ p[0] = mb->U[y][3];
+ p[1] = mb->U[y][2];
+ p[2] = mb->U[y][1];
+ p[3] = mb->U[y][0];
+ q[0] = mb->U[y][4];
+ q[1] = mb->U[y][5];
+ q[2] = mb->U[y][6];
+ q[3] = mb->U[y][7];
+
+ ff_h264_deblocking_filter_line_chroma(p,q,QPCav,bS[2][y<<1]);
+
+ mb->U[y][3] = p[0];
+ mb->U[y][2] = p[1];
+ mb->U[y][1] = p[2];
+ mb->U[y][4] = q[0];
+ mb->U[y][5] = q[1];
+ mb->U[y][6] = q[2];
+
+ p[0] = mb->V[y][3];
+ p[1] = mb->V[y][2];
+ p[2] = mb->V[y][1];
+ p[3] = mb->V[y][0];
+ q[0] = mb->V[y][4];
+ q[1] = mb->V[y][5];
+ q[2] = mb->V[y][6];
+ q[3] = mb->V[y][7];
+
+ ff_h264_deblocking_filter_line_chroma(p,q,QPCav,bS[2][y<<1]);
+
+ mb->V[y][3] = p[0];
+ mb->V[y][2] = p[1];
+ mb->V[y][1] = p[2];
+ mb->V[y][4] = q[0];
+ mb->V[y][5] = q[1];
+ mb->V[y][6] = q[2];
+ }
+
+ // Next step is filtering of horizontal edges
+
+ // first, filter top edge
+
+ if (filter_top_edge)
+ {
+ MacroBlock *topmb = mb->topblock;
+
+ // first Y
+ for (x = 0 ; x < 16 ; x++)
+ {
+ if (isIDR)
+ bS[0][x] = 4;
+ else
+ {
+ if (topmb->Y_nonzero[3][x>>2] != 0 || mb->Y_nonzero[0][x>>2] != 0)
+ bS[0][x] = 2;
+ else
+ {
+ if (ABS(topmb->mv_x - mb->mv_x) >= 4 || ABS(topmb->mv_y - mb->mv_y) >= 4)
+ bS[0][x] = 1;
+ else
+ bS[0][x] = 0;
+ }
+ }
+
+ p[0] = topmb->Y[15][x];
+ p[1] = topmb->Y[14][x];
+ p[2] = topmb->Y[13][x];
+ p[3] = topmb->Y[12][x];
+ q[0] = mb->Y[0][x];
+ q[1] = mb->Y[1][x];
+ q[2] = mb->Y[2][x];
+ q[3] = mb->Y[3][x];
+
+ ff_h264_deblocking_filter_line_luma(p,q,QPYav,bS[0][x]);
+
+ topmb->Y[15][x] = p[0];
+ topmb->Y[14][x] = p[1];
+ topmb->Y[13][x] = p[2];
+ mb->Y[0][x] = q[0];
+ mb->Y[1][x] = q[1];
+ mb->Y[2][x] = q[2];
+ }
+
+ // then U and V
+
+ for (x = 0 ; x < 8 ; x++)
+ {
+ p[0] = topmb->U[7][x];
+ p[1] = topmb->U[6][x];
+ p[2] = topmb->U[5][x];
+ p[3] = topmb->U[4][x];
+ q[0] = mb->U[0][x];
+ q[1] = mb->U[1][x];
+ q[2] = mb->U[2][x];
+ q[3] = mb->U[3][x];
+
+ ff_h264_deblocking_filter_line_chroma(p,q,QPCav,bS[0][x<<1]);
+
+ topmb->U[7][x] = p[0];
+ topmb->U[6][x] = p[1];
+ topmb->U[5][x] = p[2];
+ mb->U[0][x] = q[0];
+ mb->U[1][x] = q[1];
+ mb->U[2][x] = q[2];
+
+ p[0] = topmb->V[7][x];
+ p[1] = topmb->V[6][x];
+ p[2] = topmb->V[5][x];
+ p[3] = topmb->V[4][x];
+ q[0] = mb->V[0][x];
+ q[1] = mb->V[1][x];
+ q[2] = mb->V[2][x];
+ q[3] = mb->V[3][x];
+
+ ff_h264_deblocking_filter_line_chroma(p,q,QPCav,bS[0][x<<1]);
+
+ topmb->V[7][x] = p[0];
+ topmb->V[6][x] = p[1];
+ topmb->V[5][x] = p[2];
+ mb->V[0][x] = q[0];
+ mb->V[1][x] = q[1];
+ mb->V[2][x] = q[2];
+ }
+ }
+
+ // then the internal horizontal edges
+
+ for (y = 4 ; y < 16 ; y += 4)
+ {
+ int yidx = y >> 2;
+
+ // first Y
+ for (x = 0 ; x < 16 ; x++)
+ {
+ if (isIDR)
+ bS[yidx][x] = 3;
+ else
+ {
+ if (mb->Y_nonzero[(y>>2)-1][(x>>2)] != 0 || mb->Y_nonzero[y>>2][x>>2] != 0)
+ bS[yidx][x] = 2;
+ else
+ {
+ // one motion vector per 16x16 block, so there will be no difference
+ // between the motion vectors
+ bS[yidx][x] = 0;
+ }
+ }
+
+ p[0] = mb->Y[y-1][x];
+ p[1] = mb->Y[y-2][x];
+ p[2] = mb->Y[y-3][x];
+ p[3] = mb->Y[y-4][x];
+ q[0] = mb->Y[y+0][x];
+ q[1] = mb->Y[y+1][x];
+ q[2] = mb->Y[y+2][x];
+ q[3] = mb->Y[y+3][x];
+
+ ff_h264_deblocking_filter_line_luma(p,q,QPYav,bS[yidx][x]);
+
+ mb->Y[y-1][x] = p[0];
+ mb->Y[y-2][x] = p[1];
+ mb->Y[y-3][x] = p[2];
+ mb->Y[y+0][x] = q[0];
+ mb->Y[y+1][x] = q[1];
+ mb->Y[y+2][x] = q[2];
+ }
+ }
+
+ // then U and V
+
+ for (x = 0 ; x < 8 ; x++)
+ {
+ p[0] = mb->U[3][x];
+ p[1] = mb->U[2][x];
+ p[2] = mb->U[1][x];
+ p[3] = mb->U[0][x];
+ q[0] = mb->U[4][x];
+ q[1] = mb->U[5][x];
+ q[2] = mb->U[6][x];
+ q[3] = mb->U[7][x];
+
+ ff_h264_deblocking_filter_line_chroma(p,q,QPCav,bS[2][x<<1]);
+
+ mb->U[3][x] = p[0];
+ mb->U[2][x] = p[1];
+ mb->U[1][x] = p[2];
+ mb->U[4][x] = q[0];
+ mb->U[5][x] = q[1];
+ mb->U[6][x] = q[2];
+
+ p[0] = mb->V[3][x];
+ p[1] = mb->V[2][x];
+ p[2] = mb->V[1][x];
+ p[3] = mb->V[0][x];
+ q[0] = mb->V[4][x];
+ q[1] = mb->V[5][x];
+ q[2] = mb->V[6][x];
+ q[3] = mb->V[7][x];
+
+ ff_h264_deblocking_filter_line_chroma(p,q,QPCav,bS[2][x<<1]);
+
+ mb->V[3][x] = p[0];
+ mb->V[2][x] = p[1];
+ mb->V[1][x] = p[2];
+ mb->V[4][x] = q[0];
+ mb->V[5][x] = q[1];
+ mb->V[6][x] = q[2];
+ }
+}
+
+static void ff_h264_deblock(H264Context *t, FrameInfo *frame, int isIDR, int QPYav, int QPCav)
+{
+ int y,x;
+ int w,h;
+
+ w = t->mb_width;
+ h = t->mb_height;
+
+ // for the top row, only vertical filtering is done at the edges, for the top-left block, no filtering is
+ // done at the edge
+
+ ff_h264_deblock_macroblock(&(frame->reconstructed_mb_map[0][0]),0,0,isIDR,QPYav,QPCav);
+ for (x = 1 ; x < w ; x++)
+ ff_h264_deblock_macroblock(&(frame->reconstructed_mb_map[0][x]),1,0,isIDR,QPYav,QPCav);
+ for (y = 1 ; y < h ; y++)
+ {
+ ff_h264_deblock_macroblock(&(frame->reconstructed_mb_map[y][0]),0,1,isIDR,QPYav,QPCav);
+ for (x = 1 ; x < w ; x++)
+ ff_h264_deblock_macroblock(&(frame->reconstructed_mb_map[y][x]),1,1,isIDR,QPYav,QPCav);
+ }
+}
+
+static void ff_h264_encode_Inter_16x16(H264Context *t, MacroBlock *targetmb, PutBitContext *b,
+ MacroBlock *destmb, FrameInfo **previous_frames,
+ int num_prev_frames, int mbx, int mby)
+{
+ int y,h,x,w;
+ int w2,h2;
+ int qPI;
+ int QPc;
+ int QPy = t->QP;
+ int16_t UD[2][2];
+ int16_t VD[2][2];
+ int mvx = 0;
+ int mvy = 0;
+ int pred_mvx = 0;
+ int pred_mvy = 0;
+ int pred_mvx2 = 0;
+ int pred_mvy2 = 0;
+
+ qPI = t->QP + t->chroma_qp_index_offset;
+ qPI = clip(qPI, 0, 51);
+ QPc = chroma_qp[qPI];
+
+ w = targetmb->Y_width;
+ h = targetmb->Y_height;
+ w2 = w>>1;
+ h2 = h>>1;
+
+ // Find motion vector and prediction
+
+ ff_h264_estimate_motion_vectors(destmb, &pred_mvx, &pred_mvy, &pred_mvx2, &pred_mvy2);
+ ff_h264_find_motion_vector_and_prediction(t, targetmb, previous_frames[0], mbx, mby, &mvx, &mvy,
+ pred_mvx, pred_mvy, destmb);
+
+ // Calculate residual
+
+ H264_COPY_16X16BLOCK(t->residual.part4x4Y,(int16_t)targetmb->Y,(int16_t)destmb->Y);
+ H264_COPY_8X8BLOCK(t->residual.part4x4U,(int16_t)targetmb->U,(int16_t)destmb->U);
+ H264_COPY_8X8BLOCK(t->residual.part4x4V,(int16_t)targetmb->V,(int16_t)destmb->V);
+
+ // Transform residual: DCT
+
+ for (y = 0 ; y < 4 ; y++)
+ {
+ for (x = 0 ; x < 4 ; x++)
+ {
+ t->dspcontext.h264_transform_dct_quant(t->residual.part4x4Y[y][x],QPy,0);
+ }
+ }
+ for (y = 0 ; y < 2 ; y++)
+ {
+ for (x = 0 ; x < 2 ; x++)
+ {
+ t->dspcontext.h264_transform_dct_quant(t->residual.part4x4U[y][x],QPc,1);
+ t->dspcontext.h264_transform_dct_quant(t->residual.part4x4V[y][x],QPc,1);
+ }
+ }
+ // For U
+ for (y = 0 ; y < 2 ; y++)
+ for (x = 0 ; x < 2 ; x++)
+ UD[y][x] = t->residual.part4x4U[y][x][0][0];
+ ff_h264_hadamard_mult_2x2(UD);
+ t->dspcontext.h264_hadamard_quant_2x2(UD, QPc);
+
+ // For V
+ for (y = 0 ; y < 2 ; y++)
+ for (x = 0 ; x < 2 ; x++)
+ VD[y][x] = t->residual.part4x4V[y][x][0][0];
+ ff_h264_hadamard_mult_2x2(VD);
+ t->dspcontext.h264_hadamard_quant_2x2(VD,QPc);
+
+ // Encode motion vectors, residual, ...
+
+ destmb->mv_x = mvx;
+ destmb->mv_y = mvy;
+
+ ff_h264_encode_inter16x16_residual(t, b, mvx-pred_mvx, mvy-pred_mvy, mvx-pred_mvx2, mvy-pred_mvy2,
+ &(t->residual), UD, VD, 0, destmb, (mbx == t->mb_width-1 && mby == t->mb_height-1));
+
+ // Inverse hadamard
+
+ // For U
+ ff_h264_hadamard_mult_2x2(UD);
+ ff_h264_hadamard_invquant_2x2(UD,QPc);
+ for (y = 0 ; y < 2 ; y++)
+ for (x = 0 ; x < 2 ; x++)
+ t->residual.part4x4U[y][x][0][0] = UD[y][x];
+ // For V
+ ff_h264_hadamard_mult_2x2(VD);
+ ff_h264_hadamard_invquant_2x2(VD,QPc);
+ for (y = 0 ; y < 2 ; y++)
+ for (x = 0 ; x < 2 ; x++)
+ t->residual.part4x4V[y][x][0][0] = VD[y][x];
+
+ // Inverse DCT and add
+
+ for (y = 0 ; y < 4 ; y++)
+ {
+ for (x = 0 ; x < 4 ; x++)
+ {
+ t->dspcontext.h264_transform_inverse_quant_dct_add(t->residual.part4x4Y[y][x],QPy,0,&(destmb->Y[y*4][x*4]),t->Y_stride);
+ }
+ }
+ for (y = 0 ; y < 2 ; y++)
+ {
+ for (x = 0 ; x < 2 ; x++)
+ {
+ t->dspcontext.h264_transform_inverse_quant_dct_add(t->residual.part4x4U[y][x],QPc,1,&(destmb->U[y*4][x*4]),t->V_stride);
+ t->dspcontext.h264_transform_inverse_quant_dct_add(t->residual.part4x4V[y][x],QPc,1,&(destmb->V[y*4][x*4]),t->U_stride);
+ }
+ }
+
+ destmb->available = 1;
+}
+
+static void ff_h264_control_bitrate(AVCodecContext *avctx, H264Context *t)
+{
+ if (t->blocksize_history_total_milliseconds)
+ {
+ int64_t bitrate = (t->blocksize_history_sum*1000)/t->blocksize_history_total_milliseconds;
+
+ if (avctx->bit_rate > bitrate) // increase quality
+ {
+ if (t->QP > 0)
+ t->QP--;
+ }
+ else // decrease quality
+ {
+ if (t->QP < 51)
+ t->QP++;
+ }
+ }
+}
+
+static int ff_h264_encode(AVCodecContext *avctx, uint8_t *buf, int buf_size, void *data)
+{
+ H264Context *t = (H264Context *)avctx->priv_data;
+ PutBitContext b;
+ int mbx, mby;
+ uint8_t *dest;
+ int destlen, i;
+ FrameInfo *tmp;
+ int QPy, QPc, qPI, isIDR = 0;
+
+ if (t->frame_num % t->IDRcount == 0)
+ isIDR = 1;
+
+ destlen = t->bufsize;
+ dest = t->po_data0;
+
+ // Copy the input image. Macroblocks were already assigned in the initialization step
+ img_copy(&(t->input_frame_copy),(AVPicture *)data,PIX_FMT_YUV420P,t->frame_width,t->frame_height);
+
+ // reconstructed_frames[0] will be used to reconstruct the image
+ ff_h264_clear_nonzero_markers(t->reconstructed_frames[0]->reconstructed_mb_map,t->mb_width,t->mb_height);
+
+ if (isIDR)
+ {
+ // sequence parameter set rbsp
+
+ init_put_bits(&b,t->pi_data0,t->bufsize);
+
+ put_bits(&b,8,66); // profile_idc = 66 in Baseline
+ put_bits(&b,1,0); // constraint_set0_flag
+ put_bits(&b,1,0); // constraint_set1_flag
+ put_bits(&b,1,0); // constraint_set2_flag
+ put_bits(&b,1,0); // constraint_set3_flag
+ put_bits(&b,4,0); // reserved_zero_bits
+ put_bits(&b,8,40); // level_idc, p. 262, 10*level number
+
+ set_ue_golomb(&b,0); // seq_parameter_set_id
+ set_ue_golomb(&b,2); // log2_max_frame_num_minus4
+ set_ue_golomb(&b,2); // pic_order_cnt
+ set_ue_golomb(&b,16); // num_ref_frames [0, 16] (make sure we can use enough)
+
+ put_bits(&b,1,0); // gaps_in_frame_num_value_allowed_flag
+
+ set_ue_golomb(&b,t->mb_width-1); // pic_width_in_mbs_minus1
+ set_ue_golomb(&b,t->mb_height-1); // pic_height_in_map_units_minus1
+
+ put_bits(&b, 1, 1); // frame_mbs_only_flag = 1 in Baseline
+ put_bits(&b, 1, 0); // direct_8x8_inference_flag
+ put_bits(&b, 1, t->frame_cropping_flag); // frame_cropping_flag
+
+ if (t->frame_cropping_flag)
+ {
+ set_ue_golomb(&b, t->frame_crop_left_offset);
+ set_ue_golomb(&b, t->frame_crop_right_offset);
+ set_ue_golomb(&b, t->frame_crop_top_offset);
+ set_ue_golomb(&b, t->frame_crop_bottom_offset);
+ }
+
+ put_bits(&b, 1, 0); // vui_parameters_present_flag
+ put_bits(&b, 1, 1); // rbsp_stop_one_bit
+
+ dest = ff_h264_write_nal_unit(1,NAL_SPS,dest,&destlen,&b);
+
+ // Baseline: nal_unit_type not in [2,4]
+
+ // picture parameter set
+
+ init_put_bits(&b,t->pi_data0,t->bufsize);
+
+ set_ue_golomb(&b,0); // pic_parameter_set_id
+ set_ue_golomb(&b,0); // seq_parameter_set_id
+ put_bits(&b,1,0); // entropy_coding_mode 0 = CAVLC
+ put_bits(&b,1,0); // pic_order_present_flag
+ set_ue_golomb(&b,0); // num_slice_groups_minus1 Only one slice group
+ // List0 is needed for enabling P-slices
+ set_ue_golomb(&b,0); // num_ref_idx_l0_active_minus1 Using at most the previous frame for prediction
+ set_ue_golomb(&b,0); // num_ref_idx_l1_active_minus1 Definitely not using list 1 in baseline
+ put_bits(&b,1,0); // weighted_pred_flag Is 0 in baseline
+ put_bits(&b,2,0); // weighted_bipred_idc Is 0 in baseline
+
+ set_se_golomb(&b,t->PPS_QP-26); // pic_init_qp_minus26
+ set_se_golomb(&b,0); // pic_init_qs_minus26
+
+ set_se_golomb(&b,t->chroma_qp_index_offset); // chroma_qp_index_offset
+
+ put_bits(&b,1,0); // deblocking_filter_control_present_flag
+
+ put_bits(&b,1,0); // constrained_intra_pred_flag
+ put_bits(&b,1,0); // redundant_pic_cnt_present
+
+ put_bits(&b,1,1); // rbsp_stop_one_bit
+
+ dest = ff_h264_write_nal_unit(1,NAL_PPS,dest,&destlen,&b);
+
+ }
+
+ // IDR slice or P slice
+
+ init_put_bits(&b,t->pi_data0,t->bufsize);
+
+ // Slice header
+ set_ue_golomb(&b, 0); // first_mb_in_slice
+
+ if (isIDR)
+ set_ue_golomb(&b, 7); // slice_type
+ else
+ set_ue_golomb(&b, 5); // slice_type
+ // 0: current slice is P-slice
+ // 2: current slice is I-slice
+ // 5: current and all other slices are P-slices (0 or 5)
+ // 7: current and all other slices are I-slices (2 or 7)
+
+ set_ue_golomb(&b, 0); // pic_parameter_set_id
+ put_bits(&b, 6, t->frame_num%t->IDRcount); // frame_num
+ //put_bits(&b, 4, 0); // frame_num
+ if (isIDR)
+ set_ue_golomb(&b, t->IDR_frame_num); // idr_pic_id
+ else
+ put_bits(&b, 1, 0); // num_ref_idx_active_override_flag
+
+ // dec_ref_pic_marking() ...
+ put_bits(&b, 1, 0); // no_output_of_prior_pics_flag
+ put_bits(&b, 1, 0); // long_term_reference_flag
+ // ... dec_ref_pic_marking()
+ set_se_golomb(&b, t->QP - t->PPS_QP); // slice_qp_delta
+
+ // Slice data
+
+ if (isIDR)
+ {
+ for(mby = 0; mby < t->mb_height ; mby++)
+ for(mbx = 0 ; mbx < t->mb_width ; mbx++)
+ ff_h264_encode_Intra_16x16(t,&(t->mb_map[mby][mbx]),&b,&(t->reconstructed_frames[0]->reconstructed_mb_map[mby][mbx]));
+ }
+ else // Inter encoded frame
+ {
+ t->mb_skip_run = 0;
+
+ for(mby = 0; mby < t->mb_height ; mby++)
+ for(mbx = 0 ; mbx < t->mb_width ; mbx++)
+ ff_h264_encode_Inter_16x16(t,&(t->mb_map[mby][mbx]),&b,&(t->reconstructed_frames[0]->reconstructed_mb_map[mby][mbx]),&(t->reconstructed_frames[1]),t->framebufsize-1,mbx,mby);
+ }
+
+ QPy = t->QP;
+
+ qPI = t->QP + t->chroma_qp_index_offset;
+ qPI = clip(qPI, 0, 51);
+ QPc = chroma_qp[qPI];
+
+ ff_h264_deblock(t,t->reconstructed_frames[0],isIDR,QPy,QPc);
+
+ // Trailing bits
+
+ put_bits(&b,1,1); // rbsp_stop_one_bit
+
+ if (isIDR)
+ dest = ff_h264_write_nal_unit(1,NAL_IDR_SLICE,dest,&destlen,&b);
+ else
+ dest = ff_h264_write_nal_unit(1,NAL_SLICE,dest,&destlen,&b);
+
+ // cycle frame buffer
+
+ tmp = t->reconstructed_frames[t->framebufsize-1];
+ for (i = t->framebufsize-1 ; i > 0 ; i--)
+ t->reconstructed_frames[i] = t->reconstructed_frames[i-1];
+ t->reconstructed_frames[0] = tmp;
+
+ // copy the encoded bytes
+ memcpy(buf,t->po_data0,t->bufsize-destlen);
+
+ // update history information
+ t->blocksize_history_sum -= t->blocksize_history[t->blocksize_history_pos];
+ t->blocksize_history_sum += (t->bufsize-destlen)*8;
+ t->blocksize_history[t->blocksize_history_pos] = (t->bufsize-destlen)*8;
+
+ t->blocksize_history_pos++;
+ if (t->blocksize_history_pos == t->blocksize_history_length)
+ t->blocksize_history_pos = 0;
+ if (t->blocksize_history_num_filled < t->blocksize_history_length)
+ {
+ t->blocksize_history_num_filled++;
+ t->blocksize_history_total_milliseconds += t->milliseconds_per_frame;
+ }
+
+ if (!t->use_fixed_qp)
+ ff_h264_control_bitrate(avctx,t);
+
+ // adjust frame numbers
+ t->frame_num++;
+ if (isIDR)
+ t->IDR_frame_num++;
+ return (t->bufsize-destlen);
+}
+
+static int ff_h264_encoder_close(AVCodecContext *avctx)
+{
+ PutBitContext b;
+ H264Context *t = (H264Context *)avctx->priv_data;
+ uint8_t *dest;
+ int destlen;
+ int y,i;
+
+ destlen = t->bufsize;
+ dest = t->po_data0;
+
+ init_put_bits(&b,t->pi_data0,t->bufsize);
+
+ // write end of stream
+
+ dest = ff_h264_write_nal_unit(0,NAL_END_STREAM,dest,&destlen,&b);
+
+ *dest = 0;
+ dest++;
+ destlen--;
+
+ // clean up
+
+ avpicture_free(&t->pi);
+ avpicture_free(&t->po);
+
+ for (y = 0 ; y < t->mb_height ; y++)
+ av_free(t->mb_map[y]);
+
+ av_free(t->mb_map);
+
+ for (i = 0 ; i < t->framebufsize ; i++)
+ {
+ av_free(t->reconstructed_frames[i]->reconstructed_picture.data[0]);
+
+ for (y = 0 ; y < t->mb_height ; y++)
+ av_free(t->reconstructed_frames[i]->reconstructed_mb_map[y]);
+
+ av_free(t->reconstructed_frames[i]->reconstructed_mb_map);
+ av_free(t->reconstructed_frames[i]);
+ }
+
+ av_free(t->reconstructed_frames);
+
+ av_free(t->input_frame_copy.data[0]);
+
+ av_free(t->blocksize_history);
+
+ return 0;
+}
+
+#ifdef CONFIG_ENCODERS
+AVCodec h264_encoder = {
+ "ffh264",
+ CODEC_TYPE_VIDEO,
+ CODEC_ID_FFH264,
+ sizeof(H264Context),
+ ff_h264_encoder_init,
+ ff_h264_encode,
+ ff_h264_encoder_close,
+ .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
+};
+#endif
+
diff --git a/libavcodec/h264enc.h b/libavcodec/h264enc.h
new file mode 100644
index 0000000..c962618
--- /dev/null
+++ b/libavcodec/h264enc.h
@@ -0,0 +1,105 @@
+/*
+ * H.264 encoder
+ * Copyright (c) 2006 Expertisecentrum Digitale Media, UHasselt
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "h264encdata.h"
+
+/**
+ * Can contain pointers to the relevant starting points in a picture
+ */
+typedef struct MacroBlock
+{
+ uint8_t *Y[16];
+ uint8_t *U[8];
+ uint8_t *V[8];
+ int Y_width;
+ int Y_height;
+ int Y_nonzero[4][4]; ///< y,x
+ int U_nonzero[2][2];
+ int V_nonzero[2][2];
+ struct MacroBlock *leftblock,*topblock,*rightblock;
+ int available;
+ int mv_x;
+ int mv_y;
+} MacroBlock;
+
+typedef struct Residual
+{
+ int16_t part4x4Y[4][4][4][4]; ///< ypos and xpos of 4x4 part, followed by y,x of pixel
+ int16_t part4x4U[2][2][4][4]; ///< ypos and xpos of 4x4 part, followed by y,x of pixel
+ int16_t part4x4V[2][2][4][4]; ///< ypos and xpos of 4x4 part, followed by y,x of pixel
+} Residual;
+
+typedef struct FrameInfo
+{
+ AVPicture reconstructed_picture;
+ MacroBlock **reconstructed_mb_map; ///< macroblock map of reconstructed picture
+} FrameInfo;
+
+typedef struct H264Context
+{
+ uint8_t *pi_data0;
+ uint8_t *po_data0;
+ int bufsize;
+ int frame_num;
+ int IDR_frame_num;
+ MacroBlock **mb_map; ///< macroblock map for input picture
+ FrameInfo **reconstructed_frames;
+ int framebufsize; ///< length of previous array
+ int mb_width; ///< width in macroblocks
+ int mb_height; ///< height in macroblock
+ int QP;
+ int PPS_QP; //< The QP value stored in the picture parameter set
+ int chroma_qp_index_offset;
+ int IDRcount;
+ int frame_cropping_flag;
+ int frame_crop_left_offset;
+ int frame_crop_right_offset;
+ int frame_crop_top_offset;
+ int frame_crop_bottom_offset;
+ Residual residual;
+
+ MpegEncContext s;
+ AVPicture pi, po;
+
+ DSPContext dspcontext;
+ int Y_stride;
+ int U_stride;
+ int V_stride;
+
+ int frame_width;
+ int frame_height;
+ int refframe_width;
+ int refframe_height;
+
+ AVPicture input_frame_copy; ///< buffer to hold copy of input frame
+ int mb_skip_run;
+
+ int64_t *blocksize_history;
+ int blocksize_history_length;
+ int blocksize_history_total_milliseconds;
+ int milliseconds_per_frame;
+ int blocksize_history_pos;
+ int blocksize_history_num_filled;
+ int64_t blocksize_history_sum;
+
+ int use_fixed_qp;
+
+} H264Context;
+
+
diff --git a/libavcodec/h264encdata.h b/libavcodec/h264encdata.h
new file mode 100644
index 0000000..93bb647
--- /dev/null
+++ b/libavcodec/h264encdata.h
@@ -0,0 +1,110 @@
+/*
+ * H.264 encoder
+ * Copyright (c) 2006 Expertisecentrum Digitale Media, UHasselt
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define MVTABLE_OFFSET (128*4)
+
+static const char mv_len_table[MVTABLE_OFFSET*2+1] =
+{
+ 21,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,17,17,17,17,17,17,17,17,17,17,17,17,17,
+ 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+ 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+ 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+ 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+ 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+ 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+ 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+ 17,17,17,17,17,17,17,17,17,17,15,15,15,15,15,
+ 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
+ 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
+ 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
+ 15,15,15,15,15,15,15,15,15,15,15,15,15,15,13,
+ 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
+ 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
+ 13,11,11,11,11,11,11,11,11,11,11,11,11,11,11,
+ 11,11,9,9,9,9,9,9,9,9,7,7,7,7,5,
+ 5,3,1,3,5,5,7,7,7,7,9,9,9,9,9,
+ 9,9,9,11,11,11,11,11,11,11,11,11,11,11,11,
+ 11,11,11,11,13,13,13,13,13,13,13,13,13,13,13,
+ 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
+ 13,13,13,13,13,13,15,15,15,15,15,15,15,15,15,
+ 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
+ 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
+ 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
+ 15,15,15,15,15,15,15,15,15,15,17,17,17,17,17,
+ 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+ 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+ 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+ 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+ 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+ 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+ 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+ 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+ 17,17,17,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,
+ 19,19,19,19,21
+};
+
+static const int sae_codeblocksize_relation[13][8] = {
+{55,1590,1796,1865,1925,1977,2010,2021},
+{27,1328,1525,1593,1649,1699,1729,1741},
+{20,1097,1288,1353,1405,1449,1479,1485},
+{17,878,1063,1130,1177,1217,1244,1250},
+{16,667,875,945,992,1027,1051,1055},
+{18,491,704,790,835,869,891,895},
+{0,352,538,658,718,747,769,774},
+{0,243,398,500,561,643,672,683},
+{0,163,278,363,446,487,518,568},
+{0,98,197,259,313,368,425,453},
+{0,53,133,186,224,259,293,326},
+{0,22,81,126,162,188,210,231},
+{0,14,47,79,106,135,156,173}
+};
+
diff --git a/tests/Makefile b/tests/Makefile
index 835fab6..892484a 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -20,7 +20,7 @@ test-server: vsynth1/0.pgm asynth1.sw
@$(VPATH)/server-regression.sh $(SERVER_REFFILE) $(VPATH)/test.conf
# fast regression tests for all codecs
-codectest mpeg4 mpeg ac3 snow snowll: vsynth1/0.pgm vsynth2/0.pgm asynth1.sw tiny_psnr$(EXESUF)
+codectest mpeg4 mpeg ac3 snow snowll ffh264: vsynth1/0.pgm vsynth2/0.pgm asynth1.sw tiny_psnr$(EXESUF)
@$(VPATH)/regression.sh $@ $(REFFILE1) vsynth1
@$(VPATH)/regression.sh $@ $(REFFILE2) vsynth2
diff --git a/tests/ffmpeg.regression.ref b/tests/ffmpeg.regression.ref
index 1aec954..3cf62d7 100644
--- a/tests/ffmpeg.regression.ref
+++ b/tests/ffmpeg.regression.ref
@@ -59,6 +59,10 @@ stddev: 8.08 PSNR:29.97 bytes:7602176
2415378 ./data/a-h263p.avi
28fd12ac0b168252d81df6f6e60a5d17 *./data/out.yuv
stddev: 2.07 PSNR:41.76 bytes:7602176
+5b4fae455aa041e99d3467f2a67dd76b *./data/a-ffh264.mp4
+3014465 ./data/a-ffh264.mp4
+ba88c2a8e9bba81581dea8c1bbc03ad5 *./data/out.yuv
+stddev: 0.68 PSNR:51.39 bytes:7602176
d84b65558cd386064ab7a126d66c4744 *./data/a-odivx.mp4
554499 ./data/a-odivx.mp4
57aed19df5cbada4b05991527ee72ebe *./data/out.yuv
diff --git a/tests/regression.sh b/tests/regression.sh
index 9ded777..1e1663d 100755
--- a/tests/regression.sh
+++ b/tests/regression.sh
@@ -40,6 +40,8 @@ elif [ "$1" = "snow" ] ; then
do_snow=y
elif [ "$1" = "snowll" ] ; then
do_snowll=y
+elif [ "$1" = "ffh264" ] ; then
+ do_ffh264=y
elif [ "$1" = "libavtest" ] ; then
do_libav=y
logfile="$datadir/libav.regression"
@@ -82,6 +84,7 @@ else
do_svq1=y
do_snow=y
do_snowll=y
+ do_ffh264=y
do_adpcm_yam=y
do_dv=y
do_dv50=y
@@ -297,6 +300,16 @@ do_ffmpeg $raw_dst -y -i $file -f rawvid
fi
###################################
+if [ -n "$do_ffh264" ] ; then
+# h264 encoding
+file=${outfile}ffh264.mp4
+do_ffmpeg $file -y -qscale 10 -f pgmyuv -i $raw_src -an -vcodec ffh264 -vtag avc1 $file
+
+# h264 decoding
+do_ffmpeg $raw_dst -y -i $file -f rawvideo $raw_dst
+fi
+
+###################################
if [ -n "$do_mpeg4" ] ; then
# mpeg4
file=${outfile}odivx.mp4
diff --git a/tests/rotozoom.regression.ref b/tests/rotozoom.regression.ref
index 5652f2d..1a80264 100644
--- a/tests/rotozoom.regression.ref
+++ b/tests/rotozoom.regression.ref
@@ -59,6 +59,10 @@ stddev: 5.44 PSNR:33.41 bytes:7602176
869200 ./data/a-h263p.avi
80fb224bebbe2e04f228da7485b905c5 *./data/out.yuv
stddev: 1.91 PSNR:42.49 bytes:7602176
+f42c060a951e4711a1bcf91b06936021 *./data/a-ffh264.mp4
+2219112 ./data/a-ffh264.mp4
+c390adcb2747e35c4aaef623d1e17837 *./data/out.yuv
+stddev: 0.71 PSNR:51.06 bytes:7602176
286c5a5fca0d3e17ba6ede970b8318b8 *./data/a-odivx.mp4
120150 ./data/a-odivx.mp4
e8c90899c32e11e7e4d1354dab0b8f28 *./data/out.yuv
More information about the ffmpeg-devel
mailing list