[FFmpeg-devel] [PATCH] Unfinished GSoC qual task (16bit VQA Video Decoder)
Michael Niedermayer
michaelni
Wed Apr 29 01:12:09 CEST 2009
On Tue, Apr 28, 2009 at 10:01:14PM +0200, Adam Iglewski wrote:
> Hi,
>
> Sorry for the delay. Updated (and improved I hope) patch attached.
>
> Michael Niedermayer pisze:
> [...]
>>>>> +static inline void vqa_copy_hc_block(unsigned short *pixels,int
>>>>> pixel_ptr,int stride,unsigned short *codebook,
>>>>> + int vector_index,int block_h)
>>>>> +{
>>>>> + int pixel_y;
>>>>> + for (pixel_y = 0; pixel_y < block_h; pixel_y++) {
>>>>> + pixels[pixel_ptr] = codebook[vector_index++];
>>>>> + pixels[pixel_ptr+1] = codebook[vector_index++];
>>>>> + pixels[pixel_ptr+2] = codebook[vector_index++];
>>>>> + pixels[pixel_ptr+3] = codebook[vector_index++];
>>>>> + pixel_ptr += stride;
>>>>> + }
>>>>> +}
>>>> vector_index is redundant, so is pixel_ptr
>>> Fixed. And as Vitor suggested now using memcpy.
>> memcpy might be a bad choice, did you benchmark this one?
>> personally id cast to uint64_t and do a single assignment ...
>
> I did benchmark by putting START_TIMER after opening brace
> and STOP_TIMER("vqa_copy_hc_block") just before closing brace
> of this function. I hope this is the correct way to benchmark
> things in FFmpeg? If true it looks like memcpy solution is
> fastest.
>
> [...]
>> +#define CHECK_BLOCKS_COUNT(n) \
>> + if ((total_blocks - n) < 0) { \
>> + av_log(s->avctx, AV_LOG_ERROR, " VQA video warning: writing %d
> blocks will exceed frame block count %d\n", \
>> + n, total_blocks); \
>> + av_log(s->avctx, AV_LOG_ERROR, " VQA video warning: action code %X,
>> action count %d\n", \
>> + vptr_action_code & 0xe000, n); \
>> + return; \
>> + }
>> macros are not a solution to code duplication, the code is still
>> duplicated
>> in the binary
>
> OK. Removed.
>
>> [...]
>>> @@ -230,6 +251,8 @@ static void decode_format80(const unsign
>>> count = AV_RL16(&src[src_index]);
>>> src_index += 2;
>>> src_pos = AV_RL16(&src[src_index]);
>>> + if(new_format)
>>> + src_pos = dest_index-src_pos;
>> indent ...
>
> Fixed.
>
>> [...]
>>> + block_x = block_y = 0;
>>> + while(total_blocks > 0) {
>>> +
>>> + pixels = (uint16_t *)s->frame.data[0] + (block_y *
>>> s->vector_height * stride)+block_x*block_inc;
>>> + vptr_action_code = bytestream_get_le16(&src);
>>> +
>>> + switch(vptr_action_code & 0xe000) {
>>> +
>>> + /* Skip Count blocks. Count is (Val & 0x1fff). */
>>> + case 0x0000:
>>> + blocks_done = vptr_action_code & 0x1fff;
>>> + break;
>>> +
>>> + /* Write block number (Val & 0xff) Count times.
>>> + * Count is (((Val/256) & 0x1f)+1)*2
>>> + */
>>> + case 0x2000:
>>> + vector_index = (vptr_action_code & 0xff) << index_shift;
>>> + blocks_done = (((vptr_action_code>>8) & 0x1f)+1) << 1;
>>> + CHECK_BLOCKS_COUNT(blocks_done);
>>> +
>>> + for(i=0;i<blocks_done;i++) {
>>> +
>>> vqa_copy_hc_block(pixels,stride,&codebook[vector_index],s->vector_height);
>>> + pixels += block_inc;
>>> + }
>>> + break;
>>> +
>>> + /* Write block number (Val & 0xff) and then write Count
>>> blocks
>>> + * getting their indexes by reading next Count bytes from
>>> + * the VPTR chunk. Count is (((Val/256) & 0x1f)+1)*2
>>> + */
>>> + case 0x4000:
>>> + vector_index = (vptr_action_code & 0xff) << index_shift;
>>> + blocks_done = (((vptr_action_code>>8) & 0x1f)+1) << 1;
>>> + CHECK_BLOCKS_COUNT(blocks_done+1);
>>> +
>>> +
>>> vqa_copy_hc_block(pixels,stride,&codebook[vector_index],s->vector_height);
>>> + pixels += block_inc;
>>> +
>>> + for(i=0;i<blocks_done;i++) {
>>> + vector_index = *src++;
>>> + vector_index <<= index_shift;
>>> +
>>> vqa_copy_hc_block(pixels,stride,&codebook[vector_index],s->vector_height);
>>> + pixels += block_inc;
>>> + }
>>> + blocks_done++;
>>> + break;
>>> +
>>> + /* Write block (Val & 0x1fff).*/
>>> + case 0x6000:
>>> + vector_index = (vptr_action_code & 0x1fff) <<
>>> index_shift;
>>> + CHECK_BLOCKS_COUNT(1);
>>> +
>>> vqa_copy_hc_block(pixels,stride,&codebook[vector_index],s->vector_height);
>>> + blocks_done=1;
>>> + break;
>>> +
>>> + /* Write block (Val & 0x1fff) Count times.
>>> + * Count is the next byte from the VPTR chunk
>>> + */
>>> + case 0xa000:
>>> + vector_index = (vptr_action_code & 0x1fff) <<
>>> index_shift;
>>> + blocks_done = *src++;
>>> + CHECK_BLOCKS_COUNT(blocks_done);
>>> +
>>> + for(i=0;i<blocks_done;i++) {
>>> +
>>> vqa_copy_hc_block(pixels,stride,&codebook[vector_index],s->vector_height);
>>> + pixels += block_inc;
>>> + }
>> theres alot of code duplication in there, its basically the same thing
>> done over and over again
>
> Rewritten.
>
>>> + break;
>>> + }
>>> + block_y += (block_x + blocks_done) / blocks_wide;
>>> + block_x = (block_x + blocks_done) % blocks_wide;
>>> + total_blocks -= blocks_done;
>> can count cross the right border and continue to the next row?
>> if yes, your code is buggy
>> if no, it can be simplified into a for(all rows) for(all columns)
>
> I've reread specs and in fact every row is processed
> independent of other rows. Rewritten.
>
>> [...]
>>> @@ -581,12 +750,22 @@ static int vqa_decode_frame(AVCodecConte
>>> av_log(s->avctx, AV_LOG_ERROR, " VQA Video: get_buffer()
>>> failed\n");
>>> return -1;
>>> }
>>> + } else {
>>> + s->frame.reference = 1;
>>> + s->frame.buffer_hints = FF_BUFFER_HINTS_VALID |
>>> FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE;
>>> + if (avctx->reget_buffer(avctx, &s->frame)) {
>>> + av_log(s->avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
>>> + return -1;
>>> + }
>>> + }
>> indent
>
> Fixed.
>
>> [...]
>>> --- ffmpeg/libavcodec/adpcm.c 2009-04-20 18:29:22.000000000 +0200
>>> +++ ffmpeg_work/libavcodec/adpcm.c 2009-04-26 00:27:30.000000000 +0200
>>> @@ -1005,6 +1005,7 @@ static int adpcm_decode_frame(AVCodecCon
>>> if (cs->step_index < 0) cs->step_index = 0;
>>> if (cs->step_index > 88) cs->step_index = 88;
>>> + case CODEC_ID_ADPCM_IMA_WS:
>>> m= (buf_size - (src - buf))>>st;
>>> for(i=0; i<m; i++) {
>>> *samples++ = adpcm_ima_expand_nibble(&c->status[0], src[i] &
>>> 0x0F, 4);
>>> @@ -1156,25 +1157,6 @@ static int adpcm_decode_frame(AVCodecCon
>>> src++;
>>> }
>>> break;
>>> - case CODEC_ID_ADPCM_IMA_WS:
>>> - /* no per-block initialization; just start decoding the data */
>>> - while (src < buf + buf_size) {
>>> -
>>> - if (st) {
>>> - *samples++ = adpcm_ima_expand_nibble(&c->status[0],
>>> - src[0] >> 4 , 3);
>>> - *samples++ = adpcm_ima_expand_nibble(&c->status[1],
>>> - src[0] & 0x0F, 3);
>>> - } else {
>>> - *samples++ = adpcm_ima_expand_nibble(&c->status[0],
>>> - src[0] >> 4 , 3);
>>> - *samples++ = adpcm_ima_expand_nibble(&c->status[0],
>>> - src[0] & 0x0F, 3);
>>> - }
>>> -
>>> - src++;
>>> - }
>>> - break;
>>> case CODEC_ID_ADPCM_XA:
>>> while (buf_size >= 128) {
>>> xa_decode(samples, src, &c->status[0], &c->status[1],
>> That will break the existing cases that use CODEC_ID_ADPCM_IMA_WS i assume
>> [...]
>
> Yes if there are VQA files with stereo sound where nibbles are ordered
CODEC_ID_ADPCM_IMA_WS is used by apc.c and westwood.c
> like this:
>
> LL RR LL RR ....
>
> But I couldn't find any in VQA samples directory. Assuming that such files
> do exists what is the correct way to fix this? Adding new codec id? If not
if your case is identical to CODEC_ID_ADPCM_XA then use CODEC_ID_ADPCM_XA
ad dont change another to become identical to it.
> - how demuxer can pass some additional info to codec? Using extradata
> field? In this case it would be simple flag meaning that stereo samples are
> ordered in one way or the other.
[...]
divide by 2^x could use >> maybe
/home/michael/add_vqa_hc.diff:103:+ int stride = s->frame.linesize[0] / 2;
/home/michael/add_vqa_hc.diff:110:+ blocks_wide = s->width / 4;
missing } prior to else
/home/michael/add_vqa_hc.diff:34:+ else
/home/michael/add_vqa_hc.diff:117:+ else
{ should be on the same line as the related previous statement
/home/michael/add_vqa_hc.diff:121:+ {
/home/michael/add_vqa_hc.diff:124:+ {
Missing changelog entry (ignore if minor change)
[...]
> +static void vqa_decode_hc_video_chunk(VqaContext *s,const unsigned char *src,unsigned int src_size)
> +{
> + int block_x, block_y; /* block width and height iterators */
> + int blocks_wide, blocks_high; /* width and height in 4x4 blocks */
> + int block_inc;
> + int index_shift;
> + int i;
> +
> + /* decoding parameters */
> + int blocks_done;
can be moved into the loop
> + uint16_t *pixels,*frame_end;
> + uint16_t *codebook = (uint16_t *)s->codebook;
> + int stride = s->frame.linesize[0] / 2;
> +
> + int vptr_action_code;
> + int vector_index = 0;
> + int get_vectors_from_stream;
redundant, the type can be checked directly
> +
> + blocks_done = 0;
redundant
> + blocks_wide = s->width / 4;
> + blocks_high = s->height / s->vector_height;
> + block_inc = 4;
> + frame_end = (uint16_t *)s->frame.data[0] + s->height * stride + s->width;
> +
> + if (s->vector_height == 4)
> + index_shift = 4;
> + else
> + index_shift = 3;
> +
> + for(block_y=0; block_y < blocks_high; block_y ++)
> + {
> + pixels = (uint16_t *)s->frame.data[0] + (block_y * s->vector_height * stride);
> + for(block_x=0; block_x < blocks_wide; )
> + {
> + get_vectors_from_stream=0;
> + vptr_action_code = bytestream_get_le16(&src);
> +
> + switch(vptr_action_code & 0xe000) {
> +
> + /* Skip Count blocks. Count is (Val & 0x1fff). */
> + case 0x0000:
> + blocks_done = vptr_action_code & 0x1fff;
> + block_x += blocks_done;
> + pixels += blocks_done * block_inc;
> + continue;
> +
> + /* Write block number (Val & 0xff) Count times.
> + * Count is (((Val/256) & 0x1f)+1)*2
> + */
> + case 0x2000:
> + vector_index = (vptr_action_code & 0xff) << index_shift;
> + blocks_done = (((vptr_action_code>>8) & 0x1f)+1) << 1;
> + break;
> +
> + /* Write block number (Val & 0xff) and then write Count blocks
> + * getting their indexes by reading next Count bytes from
> + * the VPTR chunk. Count is (((Val/256) & 0x1f)+1)*2
> + */
> + case 0x4000:
> + vector_index = (vptr_action_code & 0xff) << index_shift;
> + blocks_done = ((((vptr_action_code>>8) & 0x1f)+1) << 1)+1;
> + get_vectors_from_stream=1;
> + break;
> +
> + /* Write block (Val & 0x1fff).*/
> + case 0x6000:
> + vector_index = (vptr_action_code & 0x1fff) << index_shift;
> + blocks_done=1;
> + break;
> +
> + /* Write block (Val & 0x1fff) Count times.
> + * Count is the next byte from the VPTR chunk
> + */
> + case 0xa000:
> + vector_index = (vptr_action_code & 0x1fff) << index_shift;
> + blocks_done = *src++;
> + break;
> + }
> +
> + if(pixels + s->vector_height * stride + blocks_done * block_inc > frame_end) {
> + av_log(s->avctx, AV_LOG_ERROR, " VQA video: too many blocks in frame.\n");
> + return;
> + }
> +
> + for(i=0;i<blocks_done;i++) {
> + vqa_copy_hc_block(pixels,stride,&codebook[vector_index],s->vector_height);
> + pixels += block_inc;
> + if(i && get_vectors_from_stream) {
> + vector_index = *src++;
> + vector_index <<= index_shift;
> + }
> + }
> + block_x += blocks_done;
block_x += blocks_done;
while( blocks_done-- ){
}
[...]
> @@ -308,6 +433,8 @@ static void vqa_decode_chunk(VqaContext
> int cpl0_chunk = -1;
> int cplz_chunk = -1;
> int vptz_chunk = -1;
> + int vptr_chunk = -1;
> + int vprz_chunk = -1;
>
> int x, y;
> int lines = 0;
> @@ -354,6 +481,14 @@ static void vqa_decode_chunk(VqaContext
> vptz_chunk = index;
> break;
>
> + case VPTR_TAG:
> + vptr_chunk = index;
> + break;
> +
> + case VPRZ_TAG:
> + vprz_chunk = index;
> + break;
> +
> default:
> av_log(s->avctx, AV_LOG_ERROR, " VQA video: Found unknown chunk type: %c%c%c%c (%08X)\n",
> (chunk_type >> 24) & 0xFF,
IIRC mike said they are in correct order in files ...
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Avoid a single point of failure, be that a person or equipment.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090429/c369437f/attachment.pgp>
More information about the ffmpeg-devel
mailing list