[FFmpeg-devel] [PATCH] Yet another stab at RGB48 support
Kostya
kostya.shishkov
Sat May 16 08:24:13 CEST 2009
On Fri, May 15, 2009 at 04:12:17PM +0200, Michael Niedermayer wrote:
> On Fri, May 15, 2009 at 08:56:28AM +0300, Kostya wrote:
> > On Wed, May 13, 2009 at 01:59:19AM +0200, Michael Niedermayer wrote:
> > > On Sun, May 10, 2009 at 03:21:23PM +0300, Kostya wrote:
> > > > On Sat, May 09, 2009 at 07:31:20PM +0200, Diego Biurrun wrote:
> > > > > On Sat, May 09, 2009 at 02:28:18PM +0300, Kostya wrote:
> > > > > > $subj
> > > > >
> > > > > Changelog update is missing.
> > > > >
> > > > > $nits below
> > > > >
> > > > [...]
> > > > reformatted, splitted some long lines.
> > > > Since this is diff against libswscale, no Changelog entry (but I
> > > > remember about it).
> > >
> > > [...]
> > > > +static int rgb48togray16_template(SwsContext *c, uint8_t *src[],
> > > > + int srcStride[], int srcSliceY,
> > > > + int srcSliceH, uint8_t *dst[],
> > > > + int dstStride[], const int srcLE,
> > > > + const int dstLE) {
> > > > + int length = c->srcW,
> > > > + y = srcSliceY,
> > > > + height = srcSliceH,
> > > > + stride_s2 = srcStride[0] / 2,
> > > > + stride_d2 = dstStride[0] / 2,
> > > > + i;
> > > > + uint16_t *s = (uint16_t *) src[0], *d = (uint16_t *) dst[0] + stride_d2 * y;
> > > > +
> > > > + for (i = 0; i < height; i++) {
> > > > + if (!srcLE && !dstLE) rgb48togray16row_template(s, d, length, 0, 0);
> > > > + else if (!srcLE && dstLE) rgb48togray16row_template(s, d, length, 0, 1);
> > > > + else if ( srcLE && !dstLE) rgb48togray16row_template(s, d, length, 1, 0);
> > > > + else rgb48togray16row_template(s, d, length, 1, 1);
> > > > + s += stride_s2;
> > > > + d += stride_d2;
> > > > + }
> > > > + return height;
> > > > +}
> > > > +
> > >
> > > > +#define FUNC_RGB48TOGRAY16(name, sLE, dLE) \
> > > > + static int name(SwsContext *c, uint8_t *s[], int sS[], int sSY, int sSH, \
> > > > + uint8_t *d[], int dS[]) { \
> > > > + return rgb48togray16_template(c, s, sS, sSY, sSH, d, dS, sLE, dLE); \
> > > > + }
> > > > +
> > > > +FUNC_RGB48TOGRAY16( rgb48letogray16le, 1, 1)
> > > > +FUNC_RGB48TOGRAY16( rgb48betogray16le, 0, 1)
> > > > +FUNC_RGB48TOGRAY16( rgb48letogray16be, 1, 0)
> > > > +FUNC_RGB48TOGRAY16( rgb48betogray16be, 0, 0)
> > >
> > > bloat, a single function does the trick too in this case
> >
> > huh? What do you mean "single function"?
>
> rgb48togray16_template()
merged
> > That's the only place where low bits of 16-bit variable are used.
> >
> > > [...]
> > > > +static inline void rgb1516to48(const uint8_t *src, uint8_t *d, long src_size,
> > > > + const int be, const int g6, const int bgr)
> > > > +{
> > >
> > > > + const uint16_t *s = (const uint16_t*) src, *end = s + src_size/2;
> > >
> > > unreadable
> >
> > split
>
> better
in some other places too
> >
> > > [...]
> > > > +static void rgb48to48(const uint8_t *src, uint8_t *dst, long src_size)
> > > > +{
> > > > + long i = 23 - src_size;
> > > > + uint8_t *d = dst - i;
> > > > + const uint8_t *s = src - i;
> > > > +#if HAVE_MMX
> > > > + __asm__ volatile(
> > > > + "test %0, %0 \n\t"
> > > > + "jns 2f \n\t"
> > > > + PREFETCH" (%1, %0) \n\t"
> > > > + ASMALIGN(4)
> > > > + "1: \n\t"
> > > > + PREFETCH" 32(%1, %0) \n\t"
> > > > + "movq (%1, %0), %%mm0 \n\t"
> > > > + "movq 8(%1, %0), %%mm1 \n\t"
> > > > + "movq 16(%1, %0), %%mm2 \n\t"
> > > > + "movq %%mm0, %%mm3 \n\t"
> > > > + "movq %%mm1, %%mm4 \n\t"
> > > > + "movq %%mm2, %%mm5 \n\t"
> > > > + "psllw $8, %%mm0 \n\t"
> > > > + "psllw $8, %%mm1 \n\t"
> > > > + "psllw $8, %%mm2 \n\t"
> > > > + "psrlw $8, %%mm3 \n\t"
> > > > + "psrlw $8, %%mm4 \n\t"
> > > > + "psrlw $8, %%mm5 \n\t"
> > > > + "por %%mm3, %%mm0 \n\t"
> > > > + "por %%mm4, %%mm1 \n\t"
> > > > + "por %%mm5, %%mm2 \n\t"
> > > > + "movq %%mm0, (%2, %0) \n\t"
> > > > + "movq %%mm1, 8(%2, %0) \n\t"
> > > > + "movq %%mm2, 16(%2, %0) \n\t"
> > > > + "add $24, %0 \n\t"
> > > > + "js 1b \n\t"
> > > > + "2: \n\t"
> > > > + SFENCE" \n\t"
> > > > + EMMS" \n\t"
> > > > + : "+&r"(i)
> > > > + : "r" (s), "r" (d)
> > > > + : "memory");
> > > > +#endif
> > > > + for (; i < 21; i += 4) {
> > > > + unsigned int x = *(uint32_t*)&s[i];
> > > > + *(uint32_t*)&d[i] = ((x>>8) & 0x00FF00FF) + ((x<<8) & 0xFF00FF00);
> > > > + }
> > > > + if (i < 23) {
> > > > + d[i] = s[i+1];
> > > > + d[i+1] = s[i];
> > > > + }
> > >
> > > is this some duplicate of a gray le <-> be function?
> >
> > Haven't found that byteswapping function but should be, yes.
>
> planarCopy()
Hacked it a bit to handle conversion instead of these functions.
> >
> > > > +}
> > > > +
> > > > +static void rgb48leto24(const uint8_t *s, uint8_t *dst, long src_size)
> > > > +{
> > > > + uint8_t *d = dst;
> > > > + const uint8_t *end = s + src_size;
> > > > +#if HAVE_MMX
> > > > + __asm__ volatile(
> > > > + PREFETCH" (%1) \n\t"
> > > > + "jmp 2f \n\t"
> > > > + ASMALIGN(4)
> > > > + "1: \n\t"
> > > > + PREFETCH" 32(%1) \n\t"
> > > > + "movq (%1), %%mm0 \n\t"
> > > > + "movq 8(%1), %%mm1 \n\t"
> > > > + "movq 16(%1), %%mm2 \n\t"
> > > > + "movq 24(%1), %%mm3 \n\t"
> > > > + "movq 32(%1), %%mm4 \n\t"
> > > > + "movq 40(%1), %%mm5 \n\t"
> > > > + "psrlw $8, %%mm0 \n\t"
> > > > + "psrlw $8, %%mm1 \n\t"
> > > > + "psrlw $8, %%mm2 \n\t"
> > > > + "psrlw $8, %%mm3 \n\t"
> > > > + "psrlw $8, %%mm4 \n\t"
> > > > + "psrlw $8, %%mm5 \n\t"
> > > > + "packuswb %%mm1, %%mm0 \n\t"
> > > > + "packuswb %%mm3, %%mm2 \n\t"
> > > > + "packuswb %%mm5, %%mm4 \n\t"
> > > > + "movq %%mm0, (%0) \n\t"
> > > > + "movq %%mm2, 8(%0) \n\t"
> > > > + "movq %%mm4, 16(%0) \n\t"
> > > > + "add $24, %0 \n\t"
> > > > + "add $48, %1 \n\t"
> > > > + "2: \n\t"
> > > > + "cmp %1, %2 \n\t"
> > > > + "ja 1b \n\t"
> > > > + SFENCE" \n\t"
> > > > + EMMS" \n\t"
> > > > + : "+&r"(d), "+&r"(s)
> > > > + : "r" (end-47)
> > > > + : "memory");
> > > > +#endif
> > > > + for (; s < end; s += 2, d++)
> > > > + *d = *(s+1);
> > > > +}
> > >
> > > same?
> >
> > No. That was byte-swapping, this is byte-shaving.
>
> same, in the sense that this too is code duplication
gone
> ill read/review the rest once you removed trivial code duplication
>
> [...]
> --
> Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 48bpp.patch
Type: text/x-diff
Size: 18408 bytes
Desc: not available
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090516/c4ca77d9/attachment.patch>
More information about the ffmpeg-devel
mailing list