[FFmpeg-devel] [PATCH] pngdsp x86: use scalar loop for unaligned dest buffers.
Benoit Fouet
benoit.fouet at free.fr
Tue Dec 2 14:31:49 CET 2014
Fixes ticket #4148
Signed-off-by: Christophe Gisquet <christophe.gisquet at gmail.com>
Signed-off-by: Benoit Fouet <benoit.fouet at gmail.com>
---
Add TODO
Update function prototype documentation
Mention ticket 4148
---
libavcodec/pngdsp.h | 2 ++
libavcodec/x86/pngdsp.asm | 7 ++++++-
2 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/libavcodec/pngdsp.h b/libavcodec/pngdsp.h
index 1475b0c..24d5b7d 100644
--- a/libavcodec/pngdsp.h
+++ b/libavcodec/pngdsp.h
@@ -25,6 +25,8 @@
#include <stdint.h>
typedef struct PNGDSPContext {
+ /* src1 must be 16-aligned, dst and src2 must have the same alignment.
+ * If the latter are not 16-aligned, a scalar version will be used. */
void (*add_bytes_l2)(uint8_t *dst /* align 16 */,
uint8_t *src1 /* align 16 */,
uint8_t *src2 /* align 16 */, int w);
diff --git a/libavcodec/x86/pngdsp.asm b/libavcodec/x86/pngdsp.asm
index 8e23ccf..078c73f 100644
--- a/libavcodec/x86/pngdsp.asm
+++ b/libavcodec/x86/pngdsp.asm
@@ -36,9 +36,14 @@ cglobal add_bytes_l2, 4, 6, %1, dst, src1, src2, wa, w, i
movsxd waq, wad
%endif
xor iq, iq
+ mov wq, waq
+
+ ; test unaligned dst buffer
+ ; TODO have an optimized unaligned version
+ test dstq, (mmsize-1)
+ jnz .end_s
; vector loop
- mov wq, waq
and waq, ~(mmsize*2-1)
jmp .end_v
.loop_v:
--
2.2.0
More information about the ffmpeg-devel
mailing list