[FFmpeg-devel] [PATCH] Fix unaligned fill_rectangle in rv34.c
Reimar Döffinger
Reimar.Doeffinger
Thu Aug 27 21:57:56 CEST 2009
On Sat, Aug 22, 2009 at 04:11:19PM +0200, Michael Niedermayer wrote:
> let me try again
> if you write a rectangle of 8xC then there exists a semantic partitioning
> of sizeof=8, and things could be aligned accordingly, if not you cannot
> use fill_rectangle()
> you would have to replace r->avail_cache + 5 by + 6 maybe and make other
> related changes, its perfectly fine as well if you dont and remove all
> calls to fill_rectangle() from your code but fill_rectangle() requires
> aligned memory.
So can we get over with this and use attached patch?
The important cases needs something far simpler than fill_rectangle,
so it is not that bad code-wise I think.
-------------- next part --------------
Index: libavcodec/rv34.c
===================================================================
--- libavcodec/rv34.c (revision 19710)
+++ libavcodec/rv34.c (working copy)
@@ -238,6 +238,29 @@
/** @} */ // transform
+/**
+ * fills a square block with a given uint32_t value
+ * @param dst start of block to fill
+ * @param size width and height of block, must be a either 2 or 4
+ * @param stride line stride to use
+ * @param value value to fill with
+ */
+static av_always_inline void fill_uint32_block(uint32_t *dst, int size, int stride, uint32_t value)
+{
+ if (size == 4) {
+ dst[0 * stride + 0] = dst[0 * stride + 1] =
+ dst[0 * stride + 0] = dst[0 * stride + 1] =
+ dst[1 * stride + 0] = dst[1 * stride + 1] =
+ dst[1 * stride + 2] = dst[1 * stride + 3] =
+ dst[2 * stride + 0] = dst[2 * stride + 1] =
+ dst[2 * stride + 2] = dst[2 * stride + 3] =
+ dst[3 * stride + 0] = dst[3 * stride + 1] =
+ dst[3 * stride + 2] = dst[3 * stride + 3] = value;
+ } else {
+ dst[0 * stride + 0] = dst[0 * stride + 1] =
+ dst[1 * stride + 0] = dst[1 * stride + 1] = value;
+ }
+}
/**
* @defgroup block RV30/40 4x4 block decoding functions
@@ -585,7 +608,7 @@
}
}
if(block_type == RV34_MB_B_BACKWARD || block_type == RV34_MB_B_FORWARD)
- fill_rectangle(cur_pic->motion_val[!dir][mv_pos], 2, 2, s->b8_stride, 0, 4);
+ fill_uint32_block(cur_pic->motion_val[!dir][mv_pos], 2, s->b8_stride, 0);
}
/**
@@ -806,11 +829,11 @@
switch(block_type){
case RV34_MB_TYPE_INTRA:
case RV34_MB_TYPE_INTRA16x16:
- fill_rectangle(s->current_picture_ptr->motion_val[0][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride], 2, 2, s->b8_stride, 0, 4);
+ fill_uint32_block(s->current_picture_ptr->motion_val[0][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride], 2, s->b8_stride, 0);
return 0;
case RV34_MB_SKIP:
if(s->pict_type == FF_P_TYPE){
- fill_rectangle(s->current_picture_ptr->motion_val[0][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride], 2, 2, s->b8_stride, 0, 4);
+ fill_uint32_block(s->current_picture_ptr->motion_val[0][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride], 2, s->b8_stride, 0);
rv34_mc_1mv (r, block_type, 0, 0, 0, 2, 2, 0);
break;
}
@@ -818,8 +841,8 @@
//surprisingly, it uses motion scheme from next reference frame
next_bt = s->next_picture_ptr->mb_type[s->mb_x + s->mb_y * s->mb_stride];
if(IS_INTRA(next_bt) || IS_SKIP(next_bt)){
- fill_rectangle(s->current_picture_ptr->motion_val[0][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride], 2, 2, s->b8_stride, 0, 4);
- fill_rectangle(s->current_picture_ptr->motion_val[1][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride], 2, 2, s->b8_stride, 0, 4);
+ fill_uint32_block(s->current_picture_ptr->motion_val[0][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride], 2, s->b8_stride, 0);
+ fill_uint32_block(s->current_picture_ptr->motion_val[1][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride], 2, s->b8_stride, 0);
}else
for(j = 0; j < 2; j++)
for(i = 0; i < 2; i++)
@@ -830,7 +853,7 @@
rv34_mc_2mv(r, block_type);
else
rv34_mc_2mv_skip(r);
- fill_rectangle(s->current_picture_ptr->motion_val[0][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride], 2, 2, s->b8_stride, 0, 4);
+ fill_uint32_block(s->current_picture_ptr->motion_val[0][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride], 2, s->b8_stride, 0);
break;
case RV34_MB_P_16x16:
case RV34_MB_P_MIX16x16:
@@ -987,7 +1010,7 @@
intra_types += r->intra_types_stride;
}
intra_types -= r->intra_types_stride * 4;
- fill_rectangle(r->avail_cache + 5, 2, 2, 4, 0, 4);
+ fill_uint32_block(r->avail_cache + 5, 2, 4, 0);
for(j = 0; j < 2; j++){
idx = 5 + j*4;
for(i = 0; i < 2; i++, cbp >>= 1, idx++){
@@ -1173,7 +1196,7 @@
// Calculate which neighbours are available. Maybe it's worth optimizing too.
memset(r->avail_cache, 0, sizeof(r->avail_cache));
- fill_rectangle(r->avail_cache + 5, 2, 2, 4, 1, 4);
+ fill_uint32_block(r->avail_cache + 5, 2, 4, 1);
dist = (s->mb_x - s->resync_mb_x) + (s->mb_y - s->resync_mb_y) * s->mb_width;
if(s->mb_x && dist)
r->avail_cache[4] =
More information about the ffmpeg-devel
mailing list