[FFmpeg-devel] [PATCH] pngdsp x86: use scalar loop for unaligned dest buffers.

Benoit Fouet benoit.fouet at free.fr
Tue Dec 2 14:31:49 CET 2014


Fixes ticket #4148

Signed-off-by: Christophe Gisquet <christophe.gisquet at gmail.com>
Signed-off-by: Benoit Fouet <benoit.fouet at gmail.com>
---
Add TODO
Update function prototype documentation
Mention ticket 4148
---
 libavcodec/pngdsp.h       | 2 ++
 libavcodec/x86/pngdsp.asm | 7 ++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/libavcodec/pngdsp.h b/libavcodec/pngdsp.h
index 1475b0c..24d5b7d 100644
--- a/libavcodec/pngdsp.h
+++ b/libavcodec/pngdsp.h
@@ -25,6 +25,8 @@
 #include <stdint.h>
 
 typedef struct PNGDSPContext {
+    /* src1 must be 16-aligned, dst and src2 must have the same alignment.
+     * If the latter are not 16-aligned, a scalar version will be used. */
     void (*add_bytes_l2)(uint8_t *dst  /* align 16 */,
                          uint8_t *src1 /* align 16 */,
                          uint8_t *src2 /* align 16 */, int w);
diff --git a/libavcodec/x86/pngdsp.asm b/libavcodec/x86/pngdsp.asm
index 8e23ccf..078c73f 100644
--- a/libavcodec/x86/pngdsp.asm
+++ b/libavcodec/x86/pngdsp.asm
@@ -36,9 +36,14 @@ cglobal add_bytes_l2, 4, 6, %1, dst, src1, src2, wa, w, i
     movsxd             waq, wad
 %endif
     xor                 iq, iq
+    mov                 wq, waq
+
+    ; test unaligned dst buffer
+    ; TODO have an optimized unaligned version
+    test dstq, (mmsize-1)
+    jnz .end_s
 
     ; vector loop
-    mov                 wq, waq
     and                waq, ~(mmsize*2-1)
     jmp .end_v
 .loop_v:
-- 
2.2.0



More information about the ffmpeg-devel mailing list