[FFmpeg-devel] [PATCH] Unfinished GSoC qual task (16bit VQA Video Decoder)

Wed Apr 29 01:12:09 CEST 2009

On Tue, Apr 28, 2009 at 10:01:14PM +0200, Adam Iglewski wrote:
> Hi,
>
> Sorry for the delay. Updated (and improved I hope) patch attached.
>
> Michael Niedermayer pisze:
> [...]
>>>>>  +static inline void vqa_copy_hc_block(unsigned short *pixels,int 
>>>>> pixel_ptr,int stride,unsigned short *codebook,
>>>>> +                                      int vector_index,int block_h)
>>>>> +{
>>>>> +    int pixel_y;
>>>>> +    for (pixel_y = 0; pixel_y < block_h; pixel_y++) {
>>>>> +        pixels[pixel_ptr] = codebook[vector_index++];
>>>>> +        pixels[pixel_ptr+1] = codebook[vector_index++];
>>>>> +        pixels[pixel_ptr+2] = codebook[vector_index++];
>>>>> +        pixels[pixel_ptr+3] = codebook[vector_index++];
>>>>> +        pixel_ptr += stride;
>>>>> +    }
>>>>> +}
>>>> vector_index is redundant, so is pixel_ptr
>>> Fixed. And as Vitor suggested now using memcpy.
>> memcpy might be a bad choice, did you benchmark this one?
>> personally id cast to uint64_t and do a single assignment ...
>
> I did benchmark by putting START_TIMER after opening brace
> and STOP_TIMER("vqa_copy_hc_block") just before closing brace
> of this function. I hope this is the correct way to benchmark
> things in FFmpeg? If true it looks like memcpy solution is
> fastest.
>
> [...]
>> +#define CHECK_BLOCKS_COUNT(n) \
>> +  if ((total_blocks - n) < 0) { \
>> +    av_log(s->avctx, AV_LOG_ERROR, " VQA video warning: writing %d 
> blocks will exceed frame block count %d\n", \
>> +      n, total_blocks); \
>> +    av_log(s->avctx, AV_LOG_ERROR, " VQA video warning: action code %X, 
>> action count %d\n", \
>> +      vptr_action_code & 0xe000, n); \
>> +    return; \
>> +  }
>> macros are not a solution to code duplication, the code is still 
>> duplicated
>> in the binary
>
> OK. Removed.
>
>> [...]
>>> @@ -230,6 +251,8 @@ static void decode_format80(const unsign
>>>              count = AV_RL16(&src[src_index]);
>>>              src_index += 2;
>>>              src_pos = AV_RL16(&src[src_index]);
>>> +            if(new_format)
>>> +              src_pos = dest_index-src_pos;
>> indent ...
>
> Fixed.
>
>> [...]
>>> +    block_x = block_y = 0;
>>> +    while(total_blocks > 0) {
>>> +
>>> +        pixels = (uint16_t *)s->frame.data[0] + (block_y * 
>>> s->vector_height * stride)+block_x*block_inc;
>>> +        vptr_action_code = bytestream_get_le16(&src);
>>> +
>>> +        switch(vptr_action_code & 0xe000) {
>>> +
>>> +            /* Skip Count blocks. Count is (Val & 0x1fff). */
>>> +            case 0x0000:
>>> +                blocks_done = vptr_action_code & 0x1fff;
>>> +                break;
>>> +
>>> +            /* Write block number (Val & 0xff) Count times.
>>> +             * Count is (((Val/256) & 0x1f)+1)*2
>>> +             */
>>> +            case 0x2000:
>>> +                vector_index = (vptr_action_code & 0xff) << index_shift;
>>> +                blocks_done = (((vptr_action_code>>8) & 0x1f)+1) << 1;
>>> +                CHECK_BLOCKS_COUNT(blocks_done);
>>> +
>>> +                for(i=0;i<blocks_done;i++) {
>>> +                    
>>> vqa_copy_hc_block(pixels,stride,&codebook[vector_index],s->vector_height);
>>> +                    pixels += block_inc;
>>> +                }
>>> +                break;
>>> +
>>> +            /* Write block number (Val & 0xff) and then write Count 
>>> blocks
>>> +             * getting their indexes by reading next Count bytes from
>>> +             * the VPTR chunk. Count is (((Val/256) & 0x1f)+1)*2
>>> +             */
>>> +            case 0x4000:
>>> +                vector_index = (vptr_action_code & 0xff) << index_shift;
>>> +                blocks_done = (((vptr_action_code>>8) & 0x1f)+1) << 1;
>>> +                CHECK_BLOCKS_COUNT(blocks_done+1);
>>> +
>>> +                
>>> vqa_copy_hc_block(pixels,stride,&codebook[vector_index],s->vector_height);
>>> +                pixels += block_inc;
>>> +
>>> +                for(i=0;i<blocks_done;i++) {
>>> +                    vector_index = *src++;
>>> +                    vector_index <<= index_shift;
>>> +                    
>>> vqa_copy_hc_block(pixels,stride,&codebook[vector_index],s->vector_height);
>>> +                    pixels += block_inc;
>>> +                }
>>> +                blocks_done++;
>>> +                break;
>>> +
>>> +            /* Write block (Val & 0x1fff).*/
>>> +            case 0x6000:
>>> +                vector_index = (vptr_action_code & 0x1fff) << 
>>> index_shift;
>>> +                CHECK_BLOCKS_COUNT(1);
>>> +                
>>> vqa_copy_hc_block(pixels,stride,&codebook[vector_index],s->vector_height);
>>> +                blocks_done=1;
>>> +                break;
>>> +
>>> +            /* Write block (Val & 0x1fff) Count times.
>>> +             * Count is the next byte from the VPTR chunk
>>> +             */
>>> +            case 0xa000:
>>> +                vector_index = (vptr_action_code & 0x1fff) << 
>>> index_shift;
>>> +                blocks_done = *src++;
>>> +                CHECK_BLOCKS_COUNT(blocks_done);
>>> +
>>> +                for(i=0;i<blocks_done;i++) {
>>> +                    
>>> vqa_copy_hc_block(pixels,stride,&codebook[vector_index],s->vector_height);
>>> +                    pixels += block_inc;
>>> +                }
>> theres alot of code duplication in there, its basically the same thing
>> done over and over again
>
> Rewritten.
>
>>> +                break;
>>> +        }
>>> +        block_y += (block_x + blocks_done) / blocks_wide;
>>> +        block_x  = (block_x + blocks_done) % blocks_wide;
>>> +        total_blocks -= blocks_done;
>> can count cross the right border and continue to the next row?
>> if yes, your code is buggy
>> if no, it can be simplified into a for(all rows) for(all columns)
>
> I've reread specs and in fact every row is processed
> independent of other rows. Rewritten.
>
>> [...]
>>> @@ -581,12 +750,22 @@ static int vqa_decode_frame(AVCodecConte
>>>          av_log(s->avctx, AV_LOG_ERROR, "  VQA Video: get_buffer() 
>>> failed\n");
>>>          return -1;
>>>      }
>>> +    } else {
>>> +    s->frame.reference = 1;
>>> +    s->frame.buffer_hints = FF_BUFFER_HINTS_VALID | 
>>> FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE;
>>> +    if (avctx->reget_buffer(avctx, &s->frame)) {
>>> +        av_log(s->avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
>>> +        return -1;
>>> +    }
>>> +    }
>> indent
>
> Fixed.
>
>> [...]
>>> --- ffmpeg/libavcodec/adpcm.c	2009-04-20 18:29:22.000000000 +0200
>>> +++ ffmpeg_work/libavcodec/adpcm.c	2009-04-26 00:27:30.000000000 +0200
>>> @@ -1005,6 +1005,7 @@ static int adpcm_decode_frame(AVCodecCon
>>>          if (cs->step_index < 0) cs->step_index = 0;
>>>          if (cs->step_index > 88) cs->step_index = 88;
>>>  +    case CODEC_ID_ADPCM_IMA_WS:
>>>          m= (buf_size - (src - buf))>>st;
>>>          for(i=0; i<m; i++) {
>>>              *samples++ = adpcm_ima_expand_nibble(&c->status[0], src[i] & 
>>> 0x0F, 4);
>>> @@ -1156,25 +1157,6 @@ static int adpcm_decode_frame(AVCodecCon
>>>              src++;
>>>          }
>>>          break;
>>> -    case CODEC_ID_ADPCM_IMA_WS:
>>> -        /* no per-block initialization; just start decoding the data */
>>> -        while (src < buf + buf_size) {
>>> -
>>> -            if (st) {
>>> -                *samples++ = adpcm_ima_expand_nibble(&c->status[0],
>>> -                    src[0] >> 4  , 3);
>>> -                *samples++ = adpcm_ima_expand_nibble(&c->status[1],
>>> -                    src[0] & 0x0F, 3);
>>> -            } else {
>>> -                *samples++ = adpcm_ima_expand_nibble(&c->status[0],
>>> -                    src[0] >> 4  , 3);
>>> -                *samples++ = adpcm_ima_expand_nibble(&c->status[0],
>>> -                    src[0] & 0x0F, 3);
>>> -            }
>>> -
>>> -            src++;
>>> -        }
>>> -        break;
>>>      case CODEC_ID_ADPCM_XA:
>>>          while (buf_size >= 128) {
>>>              xa_decode(samples, src, &c->status[0], &c->status[1],
>> That will break the existing cases that use CODEC_ID_ADPCM_IMA_WS i assume
>> [...]
>
> Yes if there are VQA files with stereo sound where nibbles are ordered

CODEC_ID_ADPCM_IMA_WS is used by apc.c and westwood.c

> like this:
>
> LL RR LL RR ....
>
> But I couldn't find any in VQA samples directory. Assuming that such files 
> do exists what is the correct way to fix this? Adding new codec id? If not 

if your case is identical to CODEC_ID_ADPCM_XA then use CODEC_ID_ADPCM_XA
ad dont change another to become identical to it.

> - how demuxer can pass some additional info to codec? Using extradata 
> field? In this case it would be simple flag meaning that stereo samples are 
> ordered in one way or the other.

[...]

divide by 2^x could use >> maybe
/home/michael/add_vqa_hc.diff:103:+    int stride = s->frame.linesize[0] / 2;
/home/michael/add_vqa_hc.diff:110:+    blocks_wide = s->width / 4;

missing } prior to else
/home/michael/add_vqa_hc.diff:34:+    else
/home/michael/add_vqa_hc.diff:117:+    else

{ should be on the same line as the related previous statement
/home/michael/add_vqa_hc.diff:121:+    {
/home/michael/add_vqa_hc.diff:124:+        {

Missing changelog entry (ignore if minor change)

[...]
> +static void vqa_decode_hc_video_chunk(VqaContext *s,const unsigned char *src,unsigned int src_size)
> +{
> +    int block_x, block_y;  /* block width and height iterators */
> +    int blocks_wide, blocks_high;  /* width and height in 4x4 blocks */
> +    int block_inc;
> +    int index_shift;
> +    int i;
> +

> +    /* decoding parameters */
> +    int blocks_done;

can be moved into the loop

> +    uint16_t *pixels,*frame_end;
> +    uint16_t *codebook = (uint16_t *)s->codebook;
> +    int stride = s->frame.linesize[0] / 2;
> +
> +    int vptr_action_code;
> +    int vector_index = 0;

> +    int get_vectors_from_stream;

redundant, the type can be checked directly

> +
> +    blocks_done = 0;

redundant

> +    blocks_wide = s->width / 4;
> +    blocks_high = s->height / s->vector_height;
> +    block_inc = 4;
> +    frame_end = (uint16_t *)s->frame.data[0] + s->height * stride + s->width;
> +
> +    if (s->vector_height == 4)
> +        index_shift = 4;
> +    else
> +        index_shift = 3;
> +
> +    for(block_y=0; block_y < blocks_high; block_y ++)
> +    {
> +        pixels = (uint16_t *)s->frame.data[0] + (block_y * s->vector_height * stride);
> +        for(block_x=0; block_x < blocks_wide; )
> +        {
> +            get_vectors_from_stream=0;
> +            vptr_action_code = bytestream_get_le16(&src);
> +
> +            switch(vptr_action_code & 0xe000) {
> +
> +                /* Skip Count blocks. Count is (Val & 0x1fff). */
> +                case 0x0000:
> +                    blocks_done = vptr_action_code & 0x1fff;
> +                    block_x += blocks_done;
> +                    pixels += blocks_done * block_inc;
> +                    continue;
> +
> +                /* Write block number (Val & 0xff) Count times.
> +                 * Count is (((Val/256) & 0x1f)+1)*2
> +                 */
> +                case 0x2000:
> +                    vector_index = (vptr_action_code & 0xff) << index_shift;
> +                    blocks_done = (((vptr_action_code>>8) & 0x1f)+1) << 1;
> +                    break;
> +
> +                /* Write block number (Val & 0xff) and then write Count blocks
> +                 * getting their indexes by reading next Count bytes from
> +                 * the VPTR chunk. Count is (((Val/256) & 0x1f)+1)*2
> +                 */
> +                case 0x4000:
> +                    vector_index = (vptr_action_code & 0xff) << index_shift;
> +                    blocks_done = ((((vptr_action_code>>8) & 0x1f)+1) << 1)+1;
> +                    get_vectors_from_stream=1;
> +                    break;
> +
> +                /* Write block (Val & 0x1fff).*/
> +                case 0x6000:
> +                    vector_index = (vptr_action_code & 0x1fff) << index_shift;
> +                    blocks_done=1;
> +                    break;
> +
> +                /* Write block (Val & 0x1fff) Count times.
> +                 * Count is the next byte from the VPTR chunk
> +                 */
> +                case 0xa000:
> +                    vector_index = (vptr_action_code & 0x1fff) << index_shift;
> +                    blocks_done = *src++;
> +                    break;
> +            }
> +
> +            if(pixels + s->vector_height * stride + blocks_done * block_inc > frame_end) {
> +                av_log(s->avctx, AV_LOG_ERROR, "  VQA video: too many blocks in frame.\n");
> +                return;
> +            }
> +
> +            for(i=0;i<blocks_done;i++) {
> +                vqa_copy_hc_block(pixels,stride,&codebook[vector_index],s->vector_height);
> +                pixels += block_inc;
> +                if(i && get_vectors_from_stream) {
> +                    vector_index = *src++;
> +                    vector_index <<= index_shift;
> +                }
> +            }
> +            block_x += blocks_done;

block_x += blocks_done;
while( blocks_done-- ){
}

[...]
> @@ -308,6 +433,8 @@ static void vqa_decode_chunk(VqaContext 
>      int cpl0_chunk = -1;
>      int cplz_chunk = -1;
>      int vptz_chunk = -1;
> +    int vptr_chunk = -1;
> +    int vprz_chunk = -1;
>  
>      int x, y;
>      int lines = 0;
> @@ -354,6 +481,14 @@ static void vqa_decode_chunk(VqaContext 
>              vptz_chunk = index;
>              break;
>  
> +        case VPTR_TAG:
> +            vptr_chunk = index;
> +            break;
> +
> +        case VPRZ_TAG:
> +            vprz_chunk = index;
> +            break;
> +
>          default:
>              av_log(s->avctx, AV_LOG_ERROR, "  VQA video: Found unknown chunk type: %c%c%c%c (%08X)\n",
>              (chunk_type >> 24) & 0xFF,

IIRC mike said they are in correct order in files ...

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Avoid a single point of failure, be that a person or equipment.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090429/c369437f/attachment.pgp>