[FFmpeg-devel] [PATCH] Yet another stab at RGB48 support

Tue May 19 04:02:44 CEST 2009

On Sat, May 16, 2009 at 09:24:13AM +0300, Kostya wrote:
> On Fri, May 15, 2009 at 04:12:17PM +0200, Michael Niedermayer wrote:
> > On Fri, May 15, 2009 at 08:56:28AM +0300, Kostya wrote:
> > > On Wed, May 13, 2009 at 01:59:19AM +0200, Michael Niedermayer wrote:
> > > > On Sun, May 10, 2009 at 03:21:23PM +0300, Kostya wrote:
> > > > > On Sat, May 09, 2009 at 07:31:20PM +0200, Diego Biurrun wrote:
> > > > > > On Sat, May 09, 2009 at 02:28:18PM +0300, Kostya wrote:
> > > > > > > $subj
> > > > > > 
> > > > > > Changelog update is missing.
> > > > > > 
> > > > > > $nits below
> > > > > > 
> > > > > [...]
> > > > > reformatted, splitted some long lines.
> > > > > Since this is diff against libswscale, no Changelog entry (but I
> > > > > remember about it).
> > > >
> > > > [...]
> > > > > +static int rgb48togray16_template(SwsContext *c, uint8_t *src[],
> > > > > +                                  int srcStride[], int srcSliceY,
> > > > > +                                  int srcSliceH, uint8_t *dst[],
> > > > > +                                  int dstStride[], const int srcLE,
> > > > > +                                  const int dstLE) {
> > > > > +    int length      = c->srcW,
> > > > > +        y           = srcSliceY,
> > > > > +        height      = srcSliceH,
> > > > > +        stride_s2   = srcStride[0] / 2,
> > > > > +        stride_d2   = dstStride[0] / 2,
> > > > > +        i;
> > > > > +    uint16_t *s = (uint16_t *) src[0], *d = (uint16_t *) dst[0] + stride_d2 * y;
> > > > > +
> > > > > +    for (i = 0; i < height; i++) {
> > > > > +             if (!srcLE && !dstLE) rgb48togray16row_template(s, d, length, 0, 0);
> > > > > +        else if (!srcLE &&  dstLE) rgb48togray16row_template(s, d, length, 0, 1);
> > > > > +        else if ( srcLE && !dstLE) rgb48togray16row_template(s, d, length, 1, 0);
> > > > > +        else                       rgb48togray16row_template(s, d, length, 1, 1);
> > > > > +        s += stride_s2;
> > > > > +        d += stride_d2;
> > > > > +    }
> > > > > +    return height;
> > > > > +}
> > > > > +
> > > > 
> > > > > +#define FUNC_RGB48TOGRAY16(name, sLE, dLE) \
> > > > > +    static int name(SwsContext *c, uint8_t *s[], int sS[], int sSY, int sSH, \
> > > > > +                    uint8_t *d[], int dS[]) { \
> > > > > +        return rgb48togray16_template(c, s, sS, sSY, sSH, d, dS, sLE, dLE); \
> > > > > +    }
> > > > > +
> > > > > +FUNC_RGB48TOGRAY16( rgb48letogray16le, 1, 1)
> > > > > +FUNC_RGB48TOGRAY16( rgb48betogray16le, 0, 1)
> > > > > +FUNC_RGB48TOGRAY16( rgb48letogray16be, 1, 0)
> > > > > +FUNC_RGB48TOGRAY16( rgb48betogray16be, 0, 0)
> > > > 
> > > > bloat, a single function does the trick too in this case
> > > 
> > > huh? What do you mean "single function"? 
> > 
> > rgb48togray16_template()
>  
> merged
>  
> > > That's the only place where low bits of 16-bit variable are used.
> > > 
> > > > [...]
> > > > > +static inline void rgb1516to48(const uint8_t *src, uint8_t *d, long src_size,
> > > > > +                               const int be, const int g6, const int bgr)
> > > > > +{
> > > > 
> > > > > +    const uint16_t *s = (const uint16_t*) src, *end = s + src_size/2;
> > > > 
> > > > unreadable
> > > 
> > > split 
> > 
> > better
> 
> in some other places too 
> 
> > > 
> > > > [...]
> > > > > +static void rgb48to48(const uint8_t *src, uint8_t *dst, long src_size)
> > > > > +{
> > > > > +    long i = 23 - src_size;
> > > > > +    uint8_t *d = dst - i;
> > > > > +    const uint8_t *s = src - i;
> > > > > +#if HAVE_MMX
> > > > > +    __asm__ volatile(
> > > > > +    "test           %0,   %0            \n\t"
> > > > > +    "jns            2f                  \n\t"
> > > > > +    PREFETCH"     (%1,    %0)           \n\t"
> > > > > +    ASMALIGN(4)
> > > > > +    "1:                                 \n\t"
> > > > > +    PREFETCH"    32(%1,   %0)           \n\t"
> > > > > +    "movq          (%1,   %0), %%mm0    \n\t"
> > > > > +    "movq         8(%1,   %0), %%mm1    \n\t"
> > > > > +    "movq        16(%1,   %0), %%mm2    \n\t"
> > > > > +    "movq        %%mm0, %%mm3           \n\t"
> > > > > +    "movq        %%mm1, %%mm4           \n\t"
> > > > > +    "movq        %%mm2, %%mm5           \n\t"
> > > > > +    "psllw          $8, %%mm0           \n\t"
> > > > > +    "psllw          $8, %%mm1           \n\t"
> > > > > +    "psllw          $8, %%mm2           \n\t"
> > > > > +    "psrlw          $8, %%mm3           \n\t"
> > > > > +    "psrlw          $8, %%mm4           \n\t"
> > > > > +    "psrlw          $8, %%mm5           \n\t"
> > > > > +    "por         %%mm3, %%mm0           \n\t"
> > > > > +    "por         %%mm4, %%mm1           \n\t"
> > > > > +    "por         %%mm5, %%mm2           \n\t"
> > > > > +    "movq        %%mm0,   (%2,  %0)     \n\t"
> > > > > +    "movq        %%mm1,  8(%2,  %0)     \n\t"
> > > > > +    "movq        %%mm2, 16(%2,  %0)     \n\t"
> > > > > +    "add           $24,    %0           \n\t"
> > > > > +    "js 1b                              \n\t"
> > > > > +    "2:                                 \n\t"
> > > > > +    SFENCE"                             \n\t"
> > > > > +    EMMS"                               \n\t"
> > > > > +    : "+&r"(i)
> > > > > +    : "r" (s), "r" (d)
> > > > > +    : "memory");
> > > > > +#endif
> > > > > +    for (; i < 21; i += 4) {
> > > > > +        unsigned int x = *(uint32_t*)&s[i];
> > > > > +        *(uint32_t*)&d[i] = ((x>>8) & 0x00FF00FF) + ((x<<8) & 0xFF00FF00);
> > > > > +    }
> > > > > +    if (i < 23) {
> > > > > +        d[i]   = s[i+1];
> > > > > +        d[i+1] = s[i];
> > > > > +    }
> > > > 
> > > > is this some duplicate of a gray le <-> be function?
> > > 
> > > Haven't found that byteswapping function but should be, yes. 
> > 
> > planarCopy()
>  
> Hacked it a bit to handle conversion instead of these functions. 
>  
> > > 
> > > > > +}
> > > > > +
> > > > > +static void rgb48leto24(const uint8_t *s, uint8_t *dst, long src_size)
> > > > > +{
> > > > > +    uint8_t *d = dst;
> > > > > +    const uint8_t *end = s + src_size;
> > > > > +#if HAVE_MMX
> > > > > +    __asm__ volatile(
> > > > > +    PREFETCH"     (%1)          \n\t"
> > > > > +    "jmp 2f                     \n\t"
> > > > > +    ASMALIGN(4)
> > > > > +    "1:                         \n\t"
> > > > > +    PREFETCH"   32(%1)          \n\t"
> > > > > +    "movq         (%1), %%mm0   \n\t"
> > > > > +    "movq        8(%1), %%mm1   \n\t"
> > > > > +    "movq       16(%1), %%mm2   \n\t"
> > > > > +    "movq       24(%1), %%mm3   \n\t"
> > > > > +    "movq       32(%1), %%mm4   \n\t"
> > > > > +    "movq       40(%1), %%mm5   \n\t"
> > > > > +    "psrlw          $8, %%mm0   \n\t"
> > > > > +    "psrlw          $8, %%mm1   \n\t"
> > > > > +    "psrlw          $8, %%mm2   \n\t"
> > > > > +    "psrlw          $8, %%mm3   \n\t"
> > > > > +    "psrlw          $8, %%mm4   \n\t"
> > > > > +    "psrlw          $8, %%mm5   \n\t"
> > > > > +    "packuswb    %%mm1, %%mm0   \n\t"
> > > > > +    "packuswb    %%mm3, %%mm2   \n\t"
> > > > > +    "packuswb    %%mm5, %%mm4   \n\t"
> > > > > +    "movq        %%mm0,   (%0)  \n\t"
> > > > > +    "movq        %%mm2,  8(%0)  \n\t"
> > > > > +    "movq        %%mm4, 16(%0)  \n\t"
> > > > > +    "add           $24,    %0   \n\t"
> > > > > +    "add           $48,    %1   \n\t"
> > > > > +    "2:                         \n\t"
> > > > > +    "cmp            %1,    %2   \n\t"
> > > > > +    "ja 1b                      \n\t"
> > > > > +    SFENCE"                     \n\t"
> > > > > +    EMMS"                       \n\t"
> > > > > +    : "+&r"(d), "+&r"(s)
> > > > > +    : "r" (end-47)
> > > > > +    : "memory");
> > > > > +#endif
> > > > > +    for (; s < end; s += 2, d++)
> > > > > +        *d = *(s+1);
> > > > > +}
> > > > 
> > > > same?
> > > 
> > > No. That was byte-swapping, this is byte-shaving. 
> > 
> > same, in the sense that this too is code duplication
> 
> gone

[...]

> @@ -2054,6 +2064,8 @@
>          uint8_t *srcPtr= src[0];
>          if(srcFormat == PIX_FMT_RGB32_1 || srcFormat == PIX_FMT_BGR32_1)
>              srcPtr += ALT32_CORR;
> +        if(srcFormat == PIX_FMT_RGB48LE && !is16BPS(dstFormat))
> +            srcPtr++; // we ignore low byte and can use the same functions then
>  
>          if (dstStride[0]*srcBpp == srcStride[0]*dstBpp && srcStride[0] > 0)
>              conv(srcPtr, dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);

same as what? please rewrite that comment in verbose mode

> @@ -2172,6 +2184,8 @@

diffs with -p would be nice ...

[...]

/------------------------------------------------\

> @@ -299,6 +301,8 @@
>  #define is16BPS(x)      (           \
>             (x)==PIX_FMT_GRAY16BE    \
>          || (x)==PIX_FMT_GRAY16LE    \
> +        || (x)==PIX_FMT_RGB48BE     \
> +        || (x)==PIX_FMT_RGB48LE     \
>          || (x)==PIX_FMT_YUV420PLE   \
>          || (x)==PIX_FMT_YUV422PLE   \
>          || (x)==PIX_FMT_YUV444PLE   \

\------------hunk ok-----------------------------/

[...]

/------------------------------------------------\

>  #define isBGR(x)        (           \
>             (x)==PIX_FMT_BGR32       \
> @@ -376,6 +382,9 @@
>  static inline int fmt_depth(int fmt)
>  {
>      switch(fmt) {
> +        case PIX_FMT_RGB48BE:
> +        case PIX_FMT_RGB48LE:
> +            return 48;
>          case PIX_FMT_BGRA:
>          case PIX_FMT_ABGR:
>          case PIX_FMT_RGBA:

\------------hunk ok-----------------------------/

[...]
> +SwsFunc sws_hires_gray16rgbConv(SwsContext *c)
> +{
> +    const enum PixelFormat srcFormat = c->srcFormat;
> +    const enum PixelFormat dstFormat = c->dstFormat;
> +
> +    if (isGray16(srcFormat) && (dstFormat == PIX_FMT_RGB48BE ||
> +                                dstFormat == PIX_FMT_RGB48LE)) {
> +        return gray16torgb48;
> +    }
> +    if (isGray16(dstFormat) && (srcFormat == PIX_FMT_RGB48BE ||
> +                                srcFormat == PIX_FMT_RGB48LE)) {
> +        if (dstFormat == PIX_FMT_GRAY16BE) {
> +            if (srcFormat == PIX_FMT_RGB48BE)
> +                return rgb48betogray16be;
> +            else
> +                return rgb48letogray16be;
> +        } else {
> +            if (srcFormat == PIX_FMT_RGB48BE)
> +                return rgb48betogray16le;
> +            else
> +                return rgb48letogray16le;
> +        }
> +    }
> +
> +    return NULL;
> +}

the if / else looks messy why are this not 2 functions?

also dont forget to test the code with swscale-example, that is all new
convertions should work and have small differences to the correct image

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

GMX, the mailprovider that uses RBL lists to reject mails from your friends
running their own mailserver at home. The mailprovider that obscures the
origin of mails (mis)identified as viruses. The mailprovider that improves
security my disallowing more secure forms of authentication.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090519/9b54dbbf/attachment.pgp>