[FFmpeg-devel] [PATCH 1/3] sbc: implement SBC codec (low-complexity subband codec)
Aurelien Jacobs
aurel at gnuage.org
Mon Nov 6 01:35:18 EET 2017
This was originally based on libsbc, and was fully integrated into ffmpeg.
---
doc/general.texi | 2 +
libavcodec/Makefile | 4 +
libavcodec/allcodecs.c | 2 +
libavcodec/arm/Makefile | 3 +
libavcodec/arm/sbcdsp_armv6.S | 245 ++++++++++++++
libavcodec/arm/sbcdsp_init_arm.c | 105 ++++++
libavcodec/arm/sbcdsp_neon.S | 714 +++++++++++++++++++++++++++++++++++++++
libavcodec/avcodec.h | 2 +
libavcodec/codec_desc.c | 12 +
libavcodec/sbc.c | 316 +++++++++++++++++
libavcodec/sbc.h | 121 +++++++
libavcodec/sbcdec.c | 469 +++++++++++++++++++++++++
libavcodec/sbcdec_data.c | 127 +++++++
libavcodec/sbcdec_data.h | 44 +++
libavcodec/sbcdsp.c | 569 +++++++++++++++++++++++++++++++
libavcodec/sbcdsp.h | 86 +++++
libavcodec/sbcdsp_data.c | 335 ++++++++++++++++++
libavcodec/sbcdsp_data.h | 57 ++++
libavcodec/sbcenc.c | 461 +++++++++++++++++++++++++
libavcodec/x86/Makefile | 2 +
libavcodec/x86/sbcdsp.asm | 290 ++++++++++++++++
libavcodec/x86/sbcdsp_init.c | 51 +++
22 files changed, 4017 insertions(+)
create mode 100644 libavcodec/arm/sbcdsp_armv6.S
create mode 100644 libavcodec/arm/sbcdsp_init_arm.c
create mode 100644 libavcodec/arm/sbcdsp_neon.S
create mode 100644 libavcodec/sbc.c
create mode 100644 libavcodec/sbc.h
create mode 100644 libavcodec/sbcdec.c
create mode 100644 libavcodec/sbcdec_data.c
create mode 100644 libavcodec/sbcdec_data.h
create mode 100644 libavcodec/sbcdsp.c
create mode 100644 libavcodec/sbcdsp.h
create mode 100644 libavcodec/sbcdsp_data.c
create mode 100644 libavcodec/sbcdsp_data.h
create mode 100644 libavcodec/sbcenc.c
create mode 100644 libavcodec/x86/sbcdsp.asm
create mode 100644 libavcodec/x86/sbcdsp_init.c
diff --git a/doc/general.texi b/doc/general.texi
index 9e6ae13435..baaa308dcf 100644
--- a/doc/general.texi
+++ b/doc/general.texi
@@ -1096,6 +1096,8 @@ following image formats are supported:
@tab Real low bitrate AC-3 codec
@item RealAudio Lossless @tab @tab X
@item RealAudio SIPR / ACELP.NET @tab @tab X
+ at item SBC (low-complexity subband codec) @tab X @tab X
+ @tab Used in Bluetooth A2DP
@item Shorten @tab @tab X
@item Sierra VMD audio @tab @tab X
@tab Used in Sierra VMD files.
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 3a33361f33..17648a1c3d 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -576,6 +576,10 @@ OBJS-$(CONFIG_SUBVIEWER_DECODER) += subviewerdec.o ass.o
OBJS-$(CONFIG_SUNRAST_DECODER) += sunrast.o
OBJS-$(CONFIG_SUNRAST_ENCODER) += sunrastenc.o
OBJS-$(CONFIG_LIBRSVG_DECODER) += librsvgdec.o
+OBJS-$(CONFIG_SBC_DECODER) += sbcdec.o sbcdec_data.o sbc.o
+OBJS-$(CONFIG_SBC_ENCODER) += sbcenc.o sbc.o sbcdsp.o sbcdsp_data.o
+OBJS-$(CONFIG_MSBC_DECODER) += sbcdec.o sbcdec_data.o sbc.o
+OBJS-$(CONFIG_MSBC_ENCODER) += sbcenc.o sbc.o sbcdsp.o sbcdsp_data.o
OBJS-$(CONFIG_SVQ1_DECODER) += svq1dec.o svq1.o svq13.o h263data.o
OBJS-$(CONFIG_SVQ1_ENCODER) += svq1enc.o svq1.o h263data.o \
h263.o ituh263enc.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 98655ddd7c..95cf67ce20 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -452,6 +452,7 @@ static void register_all(void)
REGISTER_DECODER(MP3ON4FLOAT, mp3on4float);
REGISTER_DECODER(MPC7, mpc7);
REGISTER_DECODER(MPC8, mpc8);
+ REGISTER_ENCDEC (MSBC, msbc);
REGISTER_ENCDEC (NELLYMOSER, nellymoser);
REGISTER_DECODER(ON2AVC, on2avc);
REGISTER_ENCDEC (OPUS, opus);
@@ -465,6 +466,7 @@ static void register_all(void)
REGISTER_DECODER(SHORTEN, shorten);
REGISTER_DECODER(SIPR, sipr);
REGISTER_DECODER(SMACKAUD, smackaud);
+ REGISTER_ENCDEC (SBC, sbc);
REGISTER_ENCDEC (SONIC, sonic);
REGISTER_ENCODER(SONIC_LS, sonic_ls);
REGISTER_DECODER(TAK, tak);
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index 1eeac5449e..fd2401f4e5 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -42,6 +42,7 @@ OBJS-$(CONFIG_DCA_DECODER) += arm/synth_filter_init_arm.o
OBJS-$(CONFIG_HEVC_DECODER) += arm/hevcdsp_init_arm.o
OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_init_arm.o
OBJS-$(CONFIG_RV40_DECODER) += arm/rv40dsp_init_arm.o
+OBJS-$(CONFIG_SBC_ENCODER) += arm/sbcdsp_init_arm.o
OBJS-$(CONFIG_VORBIS_DECODER) += arm/vorbisdsp_init_arm.o
OBJS-$(CONFIG_VP6_DECODER) += arm/vp6dsp_init_arm.o
OBJS-$(CONFIG_VP9_DECODER) += arm/vp9dsp_init_10bpp_arm.o \
@@ -81,6 +82,7 @@ ARMV6-OBJS-$(CONFIG_VP8DSP) += arm/vp8_armv6.o \
# decoders/encoders
ARMV6-OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_armv6.o
+ARMV6-OBJS-$(CONFIG_SBC_ENCODER) += arm/sbcdsp_armv6.o
# VFP optimizations
@@ -140,6 +142,7 @@ NEON-OBJS-$(CONFIG_HEVC_DECODER) += arm/hevcdsp_init_neon.o \
NEON-OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_neon.o
NEON-OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_neon.o \
arm/rv40dsp_neon.o
+NEON-OBJS-$(CONFIG_SBC_ENCODER) += arm/sbcdsp_neon.o
NEON-OBJS-$(CONFIG_VORBIS_DECODER) += arm/vorbisdsp_neon.o
NEON-OBJS-$(CONFIG_VP6_DECODER) += arm/vp6dsp_neon.o
NEON-OBJS-$(CONFIG_VP9_DECODER) += arm/vp9itxfm_16bpp_neon.o \
diff --git a/libavcodec/arm/sbcdsp_armv6.S b/libavcodec/arm/sbcdsp_armv6.S
new file mode 100644
index 0000000000..f1ff845798
--- /dev/null
+++ b/libavcodec/arm/sbcdsp_armv6.S
@@ -0,0 +1,245 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017 Aurelien Jacobs <aurel at gnuage.org>
+ * Copyright (C) 2008-2010 Nokia Corporation
+ * Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org>
+ * Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch>
+ * Copyright (C) 2005-2006 Brad Midgley <bmidgley at xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC ARMv6 optimizations. The instructions are scheduled for ARM11 pipeline.
+ */
+
+#include "libavutil/arm/asm.S"
+
+function ff_sbc_analyze_4_armv6, export=1
+ @ r0 = in, r1 = out, r2 = consts
+ push {r1, r3-r7, lr}
+ push {r8-r12, r14}
+ ldrd r4, r5, [r0, #0]
+ ldrd r6, r7, [r2, #0]
+ ldrd r8, r9, [r0, #16]
+ ldrd r10, r11, [r2, #16]
+ mov r14, #0x8000
+ smlad r3, r4, r6, r14
+ smlad r12, r5, r7, r14
+ ldrd r4, r5, [r0, #32]
+ ldrd r6, r7, [r2, #32]
+ smlad r3, r8, r10, r3
+ smlad r12, r9, r11, r12
+ ldrd r8, r9, [r0, #48]
+ ldrd r10, r11, [r2, #48]
+ smlad r3, r4, r6, r3
+ smlad r12, r5, r7, r12
+ ldrd r4, r5, [r0, #64]
+ ldrd r6, r7, [r2, #64]
+ smlad r3, r8, r10, r3
+ smlad r12, r9, r11, r12
+ ldrd r8, r9, [r0, #8]
+ ldrd r10, r11, [r2, #8]
+ smlad r3, r4, r6, r3 @ t1[0] is done
+ smlad r12, r5, r7, r12 @ t1[1] is done
+ ldrd r4, r5, [r0, #24]
+ ldrd r6, r7, [r2, #24]
+ pkhtb r3, r12, r3, asr #16 @ combine t1[0] and t1[1]
+ smlad r12, r8, r10, r14
+ smlad r14, r9, r11, r14
+ ldrd r8, r9, [r0, #40]
+ ldrd r10, r11, [r2, #40]
+ smlad r12, r4, r6, r12
+ smlad r14, r5, r7, r14
+ ldrd r4, r5, [r0, #56]
+ ldrd r6, r7, [r2, #56]
+ smlad r12, r8, r10, r12
+ smlad r14, r9, r11, r14
+ ldrd r8, r9, [r0, #72]
+ ldrd r10, r11, [r2, #72]
+ smlad r12, r4, r6, r12
+ smlad r14, r5, r7, r14
+ ldrd r4, r5, [r2, #80] @ start loading cos table
+ smlad r12, r8, r10, r12 @ t1[2] is done
+ smlad r14, r9, r11, r14 @ t1[3] is done
+ ldrd r6, r7, [r2, #88]
+ ldrd r8, r9, [r2, #96]
+ ldrd r10, r11, [r2, #104] @ cos table fully loaded
+ pkhtb r12, r14, r12, asr #16 @ combine t1[2] and t1[3]
+ smuad r4, r3, r4
+ smuad r5, r3, r5
+ smlad r4, r12, r8, r4
+ smlad r5, r12, r9, r5
+ smuad r6, r3, r6
+ smuad r7, r3, r7
+ smlad r6, r12, r10, r6
+ smlad r7, r12, r11, r7
+ pop {r8-r12, r14}
+ stmia r1, {r4, r5, r6, r7}
+ pop {r1, r3-r7, pc}
+endfunc
+
+function ff_sbc_analyze_8_armv6, export=1
+ @ r0 = in, r1 = out, r2 = consts
+ push {r1, r3-r7, lr}
+ push {r8-r12, r14}
+ ldrd r4, r5, [r0, #24]
+ ldrd r6, r7, [r2, #24]
+ ldrd r8, r9, [r0, #56]
+ ldrd r10, r11, [r2, #56]
+ mov r14, #0x8000
+ smlad r3, r4, r6, r14
+ smlad r12, r5, r7, r14
+ ldrd r4, r5, [r0, #88]
+ ldrd r6, r7, [r2, #88]
+ smlad r3, r8, r10, r3
+ smlad r12, r9, r11, r12
+ ldrd r8, r9, [r0, #120]
+ ldrd r10, r11, [r2, #120]
+ smlad r3, r4, r6, r3
+ smlad r12, r5, r7, r12
+ ldrd r4, r5, [r0, #152]
+ ldrd r6, r7, [r2, #152]
+ smlad r3, r8, r10, r3
+ smlad r12, r9, r11, r12
+ ldrd r8, r9, [r0, #16]
+ ldrd r10, r11, [r2, #16]
+ smlad r3, r4, r6, r3 @ t1[6] is done
+ smlad r12, r5, r7, r12 @ t1[7] is done
+ ldrd r4, r5, [r0, #48]
+ ldrd r6, r7, [r2, #48]
+ pkhtb r3, r12, r3, asr #16 @ combine t1[6] and t1[7]
+ str r3, [sp, #-4]! @ save to stack
+ smlad r3, r8, r10, r14
+ smlad r12, r9, r11, r14
+ ldrd r8, r9, [r0, #80]
+ ldrd r10, r11, [r2, #80]
+ smlad r3, r4, r6, r3
+ smlad r12, r5, r7, r12
+ ldrd r4, r5, [r0, #112]
+ ldrd r6, r7, [r2, #112]
+ smlad r3, r8, r10, r3
+ smlad r12, r9, r11, r12
+ ldrd r8, r9, [r0, #144]
+ ldrd r10, r11, [r2, #144]
+ smlad r3, r4, r6, r3
+ smlad r12, r5, r7, r12
+ ldrd r4, r5, [r0, #0]
+ ldrd r6, r7, [r2, #0]
+ smlad r3, r8, r10, r3 @ t1[4] is done
+ smlad r12, r9, r11, r12 @ t1[5] is done
+ ldrd r8, r9, [r0, #32]
+ ldrd r10, r11, [r2, #32]
+ pkhtb r3, r12, r3, asr #16 @ combine t1[4] and t1[5]
+ str r3, [sp, #-4]! @ save to stack
+ smlad r3, r4, r6, r14
+ smlad r12, r5, r7, r14
+ ldrd r4, r5, [r0, #64]
+ ldrd r6, r7, [r2, #64]
+ smlad r3, r8, r10, r3
+ smlad r12, r9, r11, r12
+ ldrd r8, r9, [r0, #96]
+ ldrd r10, r11, [r2, #96]
+ smlad r3, r4, r6, r3
+ smlad r12, r5, r7, r12
+ ldrd r4, r5, [r0, #128]
+ ldrd r6, r7, [r2, #128]
+ smlad r3, r8, r10, r3
+ smlad r12, r9, r11, r12
+ ldrd r8, r9, [r0, #8]
+ ldrd r10, r11, [r2, #8]
+ smlad r3, r4, r6, r3 @ t1[0] is done
+ smlad r12, r5, r7, r12 @ t1[1] is done
+ ldrd r4, r5, [r0, #40]
+ ldrd r6, r7, [r2, #40]
+ pkhtb r3, r12, r3, asr #16 @ combine t1[0] and t1[1]
+ smlad r12, r8, r10, r14
+ smlad r14, r9, r11, r14
+ ldrd r8, r9, [r0, #72]
+ ldrd r10, r11, [r2, #72]
+ smlad r12, r4, r6, r12
+ smlad r14, r5, r7, r14
+ ldrd r4, r5, [r0, #104]
+ ldrd r6, r7, [r2, #104]
+ smlad r12, r8, r10, r12
+ smlad r14, r9, r11, r14
+ ldrd r8, r9, [r0, #136]
+ ldrd r10, r11, [r2, #136]!
+ smlad r12, r4, r6, r12
+ smlad r14, r5, r7, r14
+ ldrd r4, r5, [r2, #(160 - 136 + 0)]
+ smlad r12, r8, r10, r12 @ t1[2] is done
+ smlad r14, r9, r11, r14 @ t1[3] is done
+ ldrd r6, r7, [r2, #(160 - 136 + 8)]
+ smuad r4, r3, r4
+ smuad r5, r3, r5
+ pkhtb r12, r14, r12, asr #16 @ combine t1[2] and t1[3]
+ @ r3 = t2[0:1]
+ @ r12 = t2[2:3]
+ pop {r0, r14} @ t2[4:5], t2[6:7]
+ ldrd r8, r9, [r2, #(160 - 136 + 32)]
+ smuad r6, r3, r6
+ smuad r7, r3, r7
+ ldrd r10, r11, [r2, #(160 - 136 + 40)]
+ smlad r4, r12, r8, r4
+ smlad r5, r12, r9, r5
+ ldrd r8, r9, [r2, #(160 - 136 + 64)]
+ smlad r6, r12, r10, r6
+ smlad r7, r12, r11, r7
+ ldrd r10, r11, [r2, #(160 - 136 + 72)]
+ smlad r4, r0, r8, r4
+ smlad r5, r0, r9, r5
+ ldrd r8, r9, [r2, #(160 - 136 + 96)]
+ smlad r6, r0, r10, r6
+ smlad r7, r0, r11, r7
+ ldrd r10, r11, [r2, #(160 - 136 + 104)]
+ smlad r4, r14, r8, r4
+ smlad r5, r14, r9, r5
+ ldrd r8, r9, [r2, #(160 - 136 + 16 + 0)]
+ smlad r6, r14, r10, r6
+ smlad r7, r14, r11, r7
+ ldrd r10, r11, [r2, #(160 - 136 + 16 + 8)]
+ stmia r1!, {r4, r5}
+ smuad r4, r3, r8
+ smuad r5, r3, r9
+ ldrd r8, r9, [r2, #(160 - 136 + 16 + 32)]
+ stmia r1!, {r6, r7}
+ smuad r6, r3, r10
+ smuad r7, r3, r11
+ ldrd r10, r11, [r2, #(160 - 136 + 16 + 40)]
+ smlad r4, r12, r8, r4
+ smlad r5, r12, r9, r5
+ ldrd r8, r9, [r2, #(160 - 136 + 16 + 64)]
+ smlad r6, r12, r10, r6
+ smlad r7, r12, r11, r7
+ ldrd r10, r11, [r2, #(160 - 136 + 16 + 72)]
+ smlad r4, r0, r8, r4
+ smlad r5, r0, r9, r5
+ ldrd r8, r9, [r2, #(160 - 136 + 16 + 96)]
+ smlad r6, r0, r10, r6
+ smlad r7, r0, r11, r7
+ ldrd r10, r11, [r2, #(160 - 136 + 16 + 104)]
+ smlad r4, r14, r8, r4
+ smlad r5, r14, r9, r5
+ smlad r6, r14, r10, r6
+ smlad r7, r14, r11, r7
+ pop {r8-r12, r14}
+ stmia r1!, {r4, r5, r6, r7}
+ pop {r1, r3-r7, pc}
+endfunc
diff --git a/libavcodec/arm/sbcdsp_init_arm.c b/libavcodec/arm/sbcdsp_init_arm.c
new file mode 100644
index 0000000000..6bf7e729ef
--- /dev/null
+++ b/libavcodec/arm/sbcdsp_init_arm.c
@@ -0,0 +1,105 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017 Aurelien Jacobs <aurel at gnuage.org>
+ * Copyright (C) 2008-2010 Nokia Corporation
+ * Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org>
+ * Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch>
+ * Copyright (C) 2005-2006 Brad Midgley <bmidgley at xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC ARMv6 optimization for some basic "building bricks"
+ */
+
+#include "libavutil/cpu.h"
+#include "libavutil/arm/cpu.h"
+#include "libavcodec/sbcdsp.h"
+
+void ff_sbc_analyze_4_armv6(const int16_t *in, int32_t *out, const int16_t *consts);
+void ff_sbc_analyze_8_armv6(const int16_t *in, int32_t *out, const int16_t *consts);
+
+void ff_sbc_analyze_4_neon(const int16_t *in, int32_t *out, const int16_t *consts);
+void ff_sbc_analyze_8_neon(const int16_t *in, int32_t *out, const int16_t *consts);
+void ff_sbc_calc_scalefactors_neon(int32_t sb_sample_f[16][2][8],
+ uint32_t scale_factor[2][8],
+ int blocks, int channels, int subbands);
+int ff_sbc_calc_scalefactors_j_neon(int32_t sb_sample_f[16][2][8],
+ uint32_t scale_factor[2][8],
+ int blocks, int subbands);
+int ff_sbc_enc_process_input_4s_neon(int position, const uint8_t *pcm,
+ int16_t X[2][SBC_X_BUFFER_SIZE],
+ int nsamples, int nchannels);
+int ff_sbc_enc_process_input_8s_neon(int position, const uint8_t *pcm,
+ int16_t X[2][SBC_X_BUFFER_SIZE],
+ int nsamples, int nchannels);
+
+DECLARE_ALIGNED(SBC_ALIGN, int32_t, ff_sbcdsp_joint_bits_mask)[8] = {
+ 8, 4, 2, 1, 128, 64, 32, 16
+};
+
+#if HAVE_BIGENDIAN
+#define PERM(a, b, c, d) { \
+ (a * 2) + 1, (a * 2) + 0, \
+ (b * 2) + 1, (b * 2) + 0, \
+ (c * 2) + 1, (c * 2) + 0, \
+ (d * 2) + 1, (d * 2) + 0 \
+ }
+#else
+#define PERM(a, b, c, d) { \
+ (a * 2) + 0, (a * 2) + 1, \
+ (b * 2) + 0, (b * 2) + 1, \
+ (c * 2) + 0, (c * 2) + 1, \
+ (d * 2) + 0, (d * 2) + 1 \
+ }
+#endif
+
+DECLARE_ALIGNED(SBC_ALIGN, uint8_t, ff_sbc_input_perm_4)[2][8] = {
+ PERM(7, 3, 6, 4),
+ PERM(0, 2, 1, 5)
+};
+
+DECLARE_ALIGNED(SBC_ALIGN, uint8_t, ff_sbc_input_perm_8)[4][8] = {
+ PERM(15, 7, 14, 8),
+ PERM(13, 9, 12, 10),
+ PERM(11, 3, 6, 0),
+ PERM( 5, 1, 4, 2)
+};
+
+av_cold void ff_sbcdsp_init_arm(SBCDSPContext *s)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+ if (have_armv6(cpu_flags)) {
+ s->sbc_analyze_4 = ff_sbc_analyze_4_armv6;
+ s->sbc_analyze_8 = ff_sbc_analyze_8_armv6;
+ }
+
+ if (have_neon(cpu_flags)) {
+ s->sbc_analyze_4 = ff_sbc_analyze_4_neon;
+ s->sbc_analyze_8 = ff_sbc_analyze_8_neon;
+ s->sbc_calc_scalefactors = ff_sbc_calc_scalefactors_neon;
+ s->sbc_calc_scalefactors_j = ff_sbc_calc_scalefactors_j_neon;
+ if (s->increment != 1) {
+ s->sbc_enc_process_input_4s = ff_sbc_enc_process_input_4s_neon;
+ s->sbc_enc_process_input_8s = ff_sbc_enc_process_input_8s_neon;
+ }
+ }
+}
diff --git a/libavcodec/arm/sbcdsp_neon.S b/libavcodec/arm/sbcdsp_neon.S
new file mode 100644
index 0000000000..d83d21d202
--- /dev/null
+++ b/libavcodec/arm/sbcdsp_neon.S
@@ -0,0 +1,714 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017 Aurelien Jacobs <aurel at gnuage.org>
+ * Copyright (C) 2008-2010 Nokia Corporation
+ * Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org>
+ * Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch>
+ * Copyright (C) 2005-2006 Brad Midgley <bmidgley at xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC ARM NEON optimizations
+ */
+
+#include "libavutil/arm/asm.S"
+#include "neon.S"
+
+#define SBC_PROTO_FIXED_SCALE 16
+
+function ff_sbc_analyze_4_neon, export=1
+ /* TODO: merge even and odd cases (or even merge all four calls to this
+ * function) in order to have only aligned reads from 'in' array
+ * and reduce number of load instructions */
+ vld1.16 {d4, d5}, [r0, :64]!
+ vld1.16 {d8, d9}, [r2, :128]!
+
+ vmull.s16 q0, d4, d8
+ vld1.16 {d6, d7}, [r0, :64]!
+ vmull.s16 q1, d5, d9
+ vld1.16 {d10, d11}, [r2, :128]!
+
+ vmlal.s16 q0, d6, d10
+ vld1.16 {d4, d5}, [r0, :64]!
+ vmlal.s16 q1, d7, d11
+ vld1.16 {d8, d9}, [r2, :128]!
+
+ vmlal.s16 q0, d4, d8
+ vld1.16 {d6, d7}, [r0, :64]!
+ vmlal.s16 q1, d5, d9
+ vld1.16 {d10, d11}, [r2, :128]!
+
+ vmlal.s16 q0, d6, d10
+ vld1.16 {d4, d5}, [r0, :64]!
+ vmlal.s16 q1, d7, d11
+ vld1.16 {d8, d9}, [r2, :128]!
+
+ vmlal.s16 q0, d4, d8
+ vmlal.s16 q1, d5, d9
+
+ vpadd.s32 d0, d0, d1
+ vpadd.s32 d1, d2, d3
+
+ vrshrn.s32 d0, q0, SBC_PROTO_FIXED_SCALE
+
+ vld1.16 {d2, d3, d4, d5}, [r2, :128]!
+
+ vdup.i32 d1, d0[1] /* TODO: can be eliminated */
+ vdup.i32 d0, d0[0] /* TODO: can be eliminated */
+
+ vmull.s16 q3, d2, d0
+ vmull.s16 q4, d3, d0
+ vmlal.s16 q3, d4, d1
+ vmlal.s16 q4, d5, d1
+
+ vpadd.s32 d0, d6, d7 /* TODO: can be eliminated */
+ vpadd.s32 d1, d8, d9 /* TODO: can be eliminated */
+
+ vst1.32 {d0, d1}, [r1, :128]
+
+ bx lr
+endfunc
+
+function ff_sbc_analyze_8_neon, export=1
+ /* TODO: merge even and odd cases (or even merge all four calls to this
+ * function) in order to have only aligned reads from 'in' array
+ * and reduce number of load instructions */
+ vld1.16 {d4, d5}, [r0, :64]!
+ vld1.16 {d8, d9}, [r2, :128]!
+
+ vmull.s16 q6, d4, d8
+ vld1.16 {d6, d7}, [r0, :64]!
+ vmull.s16 q7, d5, d9
+ vld1.16 {d10, d11}, [r2, :128]!
+ vmull.s16 q8, d6, d10
+ vld1.16 {d4, d5}, [r0, :64]!
+ vmull.s16 q9, d7, d11
+ vld1.16 {d8, d9}, [r2, :128]!
+
+ vmlal.s16 q6, d4, d8
+ vld1.16 {d6, d7}, [r0, :64]!
+ vmlal.s16 q7, d5, d9
+ vld1.16 {d10, d11}, [r2, :128]!
+ vmlal.s16 q8, d6, d10
+ vld1.16 {d4, d5}, [r0, :64]!
+ vmlal.s16 q9, d7, d11
+ vld1.16 {d8, d9}, [r2, :128]!
+
+ vmlal.s16 q6, d4, d8
+ vld1.16 {d6, d7}, [r0, :64]!
+ vmlal.s16 q7, d5, d9
+ vld1.16 {d10, d11}, [r2, :128]!
+ vmlal.s16 q8, d6, d10
+ vld1.16 {d4, d5}, [r0, :64]!
+ vmlal.s16 q9, d7, d11
+ vld1.16 {d8, d9}, [r2, :128]!
+
+ vmlal.s16 q6, d4, d8
+ vld1.16 {d6, d7}, [r0, :64]!
+ vmlal.s16 q7, d5, d9
+ vld1.16 {d10, d11}, [r2, :128]!
+ vmlal.s16 q8, d6, d10
+ vld1.16 {d4, d5}, [r0, :64]!
+ vmlal.s16 q9, d7, d11
+ vld1.16 {d8, d9}, [r2, :128]!
+
+ vmlal.s16 q6, d4, d8
+ vld1.16 {d6, d7}, [r0, :64]!
+ vmlal.s16 q7, d5, d9
+ vld1.16 {d10, d11}, [r2, :128]!
+
+ vmlal.s16 q8, d6, d10
+ vmlal.s16 q9, d7, d11
+
+ vpadd.s32 d0, d12, d13
+ vpadd.s32 d1, d14, d15
+ vpadd.s32 d2, d16, d17
+ vpadd.s32 d3, d18, d19
+
+ vrshr.s32 q0, q0, SBC_PROTO_FIXED_SCALE
+ vrshr.s32 q1, q1, SBC_PROTO_FIXED_SCALE
+ vmovn.s32 d0, q0
+ vmovn.s32 d1, q1
+
+ vdup.i32 d3, d1[1] /* TODO: can be eliminated */
+ vdup.i32 d2, d1[0] /* TODO: can be eliminated */
+ vdup.i32 d1, d0[1] /* TODO: can be eliminated */
+ vdup.i32 d0, d0[0] /* TODO: can be eliminated */
+
+ vld1.16 {d4, d5}, [r2, :128]!
+ vmull.s16 q6, d4, d0
+ vld1.16 {d6, d7}, [r2, :128]!
+ vmull.s16 q7, d5, d0
+ vmull.s16 q8, d6, d0
+ vmull.s16 q9, d7, d0
+
+ vld1.16 {d4, d5}, [r2, :128]!
+ vmlal.s16 q6, d4, d1
+ vld1.16 {d6, d7}, [r2, :128]!
+ vmlal.s16 q7, d5, d1
+ vmlal.s16 q8, d6, d1
+ vmlal.s16 q9, d7, d1
+
+ vld1.16 {d4, d5}, [r2, :128]!
+ vmlal.s16 q6, d4, d2
+ vld1.16 {d6, d7}, [r2, :128]!
+ vmlal.s16 q7, d5, d2
+ vmlal.s16 q8, d6, d2
+ vmlal.s16 q9, d7, d2
+
+ vld1.16 {d4, d5}, [r2, :128]!
+ vmlal.s16 q6, d4, d3
+ vld1.16 {d6, d7}, [r2, :128]!
+ vmlal.s16 q7, d5, d3
+ vmlal.s16 q8, d6, d3
+ vmlal.s16 q9, d7, d3
+
+ vpadd.s32 d0, d12, d13 /* TODO: can be eliminated */
+ vpadd.s32 d1, d14, d15 /* TODO: can be eliminated */
+ vpadd.s32 d2, d16, d17 /* TODO: can be eliminated */
+ vpadd.s32 d3, d18, d19 /* TODO: can be eliminated */
+
+ vst1.32 {d0, d1, d2, d3}, [r1, :128]
+
+ bx lr
+endfunc
+
+function ff_sbc_calc_scalefactors_neon, export=1
+ @ parameters
+ @ r0 = sb_sample_f
+ @ r1 = scale_factor
+ @ r2 = blocks
+ @ r3 = channels
+ @ r4 = subbands
+ @ local variables
+ @ r5 = in_loop_1
+ @ r6 = in
+ @ r7 = out_loop_1
+ @ r8 = out
+ @ r9 = ch
+ @ r10 = sb
+ @ r11 = inc
+ @ r12 = blk
+
+ push {r1-r2, r4-r12}
+ ldr r4, [sp, #44]
+ mov r11, #64
+
+ mov r9, #0
+1:
+ add r5, r0, r9, lsl#5
+ add r7, r1, r9, lsl#5
+
+ mov r10, #0
+2:
+ add r6, r5, r10, lsl#2
+ add r8, r7, r10, lsl#2
+ mov r12, r2
+
+ vmov.s32 q0, #0
+ vmov.s32 q1, #0x8000 @ 1 << SCALE_OUT_BITS
+ vmov.s32 q14, #1
+ vmov.s32 q15, #16 @ 31 - SCALE_OUT_BITS
+ vadd.s32 q1, q1, q14
+3:
+ vld1.32 {d16, d17}, [r6, :128], r11
+ vabs.s32 q8, q8
+ vld1.32 {d18, d19}, [r6, :128], r11
+ vabs.s32 q9, q9
+ vld1.32 {d20, d21}, [r6, :128], r11
+ vabs.s32 q10, q10
+ vld1.32 {d22, d23}, [r6, :128], r11
+ vabs.s32 q11, q11
+ vmax.s32 q0, q0, q8
+ vmax.s32 q1, q1, q9
+ vmax.s32 q0, q0, q10
+ vmax.s32 q1, q1, q11
+ subs r12, r12, #4
+ bgt 3b
+ vmax.s32 q0, q0, q1
+ vsub.s32 q0, q0, q14
+ vclz.s32 q0, q0
+ vsub.s32 q0, q15, q0
+ vst1.32 {d0, d1}, [r8, :128]
+
+ add r10, r10, #4
+ cmp r10, r4
+ blt 2b
+
+ add r9, r9, #1
+ cmp r9, r3
+ blt 1b
+
+ pop {r1-r2, r4-r12}
+ bx lr
+endfunc
+
+/*
+ * constants: q13 = (31 - SCALE_OUT_BITS)
+ * q14 = 1
+ * input: q0 - ((1 << SCALE_OUT_BITS) + 1)
+ * r5 - samples for channel 0
+ * r6 - samples for shannel 1
+ * output: q0, q1 - scale factors without joint stereo
+ * q2, q3 - scale factors with joint stereo
+ * q15 - joint stereo selection mask
+ */
+.macro calc_scalefactors
+ vmov.s32 q1, q0
+ vmov.s32 q2, q0
+ vmov.s32 q3, q0
+ mov r3, r2
+1:
+ vld1.32 {d18, d19}, [r6, :128], r11
+ vbic.s32 q11, q9, q14
+ vld1.32 {d16, d17}, [r5, :128], r11
+ vhadd.s32 q10, q8, q11
+ vhsub.s32 q11, q8, q11
+ vabs.s32 q8, q8
+ vabs.s32 q9, q9
+ vabs.s32 q10, q10
+ vabs.s32 q11, q11
+ vmax.s32 q0, q0, q8
+ vmax.s32 q1, q1, q9
+ vmax.s32 q2, q2, q10
+ vmax.s32 q3, q3, q11
+ subs r3, r3, #1
+ bgt 1b
+ vsub.s32 q0, q0, q14
+ vsub.s32 q1, q1, q14
+ vsub.s32 q2, q2, q14
+ vsub.s32 q3, q3, q14
+ vclz.s32 q0, q0
+ vclz.s32 q1, q1
+ vclz.s32 q2, q2
+ vclz.s32 q3, q3
+ vsub.s32 q0, q13, q0
+ vsub.s32 q1, q13, q1
+ vsub.s32 q2, q13, q2
+ vsub.s32 q3, q13, q3
+.endm
+
+/*
+ * constants: q14 = 1
+ * input: q15 - joint stereo selection mask
+ * r5 - value set by calc_scalefactors macro
+ * r6 - value set by calc_scalefactors macro
+ */
+.macro update_joint_stereo_samples
+ sub r8, r6, r11
+ sub r7, r5, r11
+ sub r6, r6, r11, asl #1
+ sub r5, r5, r11, asl #1
+ vld1.32 {d18, d19}, [r6, :128]
+ vbic.s32 q11, q9, q14
+ vld1.32 {d16, d17}, [r5, :128]
+ vld1.32 {d2, d3}, [r8, :128]
+ vbic.s32 q3, q1, q14
+ vld1.32 {d0, d1}, [r7, :128]
+ vhsub.s32 q10, q8, q11
+ vhadd.s32 q11, q8, q11
+ vhsub.s32 q2, q0, q3
+ vhadd.s32 q3, q0, q3
+ vbif.s32 q10, q9, q15
+ vbif.s32 d22, d16, d30
+ sub r11, r10, r11, asl #1
+ sub r3, r2, #2
+2:
+ vbif.s32 d23, d17, d31
+ vst1.32 {d20, d21}, [r6, :128], r11
+ vbif.s32 d4, d2, d30
+ vld1.32 {d18, d19}, [r6, :128]
+ vbif.s32 d5, d3, d31
+ vst1.32 {d22, d23}, [r5, :128], r11
+ vbif.s32 d6, d0, d30
+ vld1.32 {d16, d17}, [r5, :128]
+ vbif.s32 d7, d1, d31
+ vst1.32 {d4, d5}, [r8, :128], r11
+ vbic.s32 q11, q9, q14
+ vld1.32 {d2, d3}, [r8, :128]
+ vst1.32 {d6, d7}, [r7, :128], r11
+ vbic.s32 q3, q1, q14
+ vld1.32 {d0, d1}, [r7, :128]
+ vhsub.s32 q10, q8, q11
+ vhadd.s32 q11, q8, q11
+ vhsub.s32 q2, q0, q3
+ vhadd.s32 q3, q0, q3
+ vbif.s32 q10, q9, q15
+ vbif.s32 d22, d16, d30
+ subs r3, r3, #2
+ bgt 2b
+ sub r11, r10, r11, asr #1
+ vbif.s32 d23, d17, d31
+ vst1.32 {d20, d21}, [r6, :128]
+ vbif.s32 q2, q1, q15
+ vst1.32 {d22, d23}, [r5, :128]
+ vbif.s32 q3, q0, q15
+ vst1.32 {d4, d5}, [r8, :128]
+ vst1.32 {d6, d7}, [r7, :128]
+.endm
+
+function ff_sbc_calc_scalefactors_j_neon, export=1
+ @ parameters
+ @ r0 = in = sb_sample_f
+ @ r1 = out = scale_factor
+ @ r2 = blocks
+ @ r3 = subbands
+ @ local variables
+ @ r4 = consts = ff_sbcdsp_joint_bits_mask
+ @ r5 = in0
+ @ r6 = in1
+ @ r7 = out0
+ @ r8 = out1
+ @ r10 = zero
+ @ r11 = inc
+ @ return r0 = joint
+
+ push {r3-r11}
+ movrelx r4, X(ff_sbcdsp_joint_bits_mask)
+ mov r10, #0
+ mov r11, #64
+
+ vmov.s32 q14, #1
+ vmov.s32 q13, #16 @ 31 - SCALE_OUT_BITS
+
+ cmp r3, #4
+ bne 8f
+
+4: @ 4 subbands
+ add r5, r0, #0
+ add r6, r0, #32
+ add r7, r1, #0
+ add r8, r1, #32
+ vmov.s32 q0, #0x8000 @ 1 << SCALE_OUT_BITS
+ vadd.s32 q0, q0, q14
+
+ calc_scalefactors
+
+ @ check whether to use joint stereo for subbands 0, 1, 2
+ vadd.s32 q15, q0, q1
+ vadd.s32 q9, q2, q3
+ vmov.s32 d31[1], r10 @ last subband -> no joint
+ vld1.32 {d16, d17}, [r4, :128]!
+ vcgt.s32 q15, q15, q9
+
+ @ calculate and save to memory 'joint' variable
+ @ update and save scale factors to memory
+ vand.s32 q8, q8, q15
+ vbit.s32 q0, q2, q15
+ vpadd.s32 d16, d16, d17
+ vbit.s32 q1, q3, q15
+ vpadd.s32 d16, d16, d16
+ vst1.32 {d0, d1}, [r7, :128]
+ vst1.32 {d2, d3}, [r8, :128]
+ vmov.32 r0, d16[0]
+
+ update_joint_stereo_samples
+ b 9f
+
+8: @ 8 subbands
+ add r5, r0, #16
+ add r6, r0, #48
+ add r7, r1, #16
+ add r8, r1, #48
+ vmov.s32 q0, #0x8000 @ 1 << SCALE_OUT_BITS
+ vadd.s32 q0, q0, q14
+
+ calc_scalefactors
+
+ @ check whether to use joint stereo for subbands 4, 5, 6
+ vadd.s32 q15, q0, q1
+ vadd.s32 q9, q2, q3
+ vmov.s32 d31[1], r10 @ last subband -> no joint
+ vld1.32 {d16, d17}, [r4, :128]!
+ vcgt.s32 q15, q15, q9
+
+ @ calculate part of 'joint' variable and save it to d24
+ @ update and save scale factors to memory
+ vand.s32 q8, q8, q15
+ vbit.s32 q0, q2, q15
+ vpadd.s32 d16, d16, d17
+ vbit.s32 q1, q3, q15
+ vst1.32 {d0, d1}, [r7, :128]
+ vst1.32 {d2, d3}, [r8, :128]
+ vpadd.s32 d24, d16, d16
+
+ update_joint_stereo_samples
+
+ add r5, r0, #0
+ add r6, r0, #32
+ add r7, r1, #0
+ add r8, r1, #32
+ vmov.s32 q0, #0x8000 @ 1 << SCALE_OUT_BITS
+ vadd.s32 q0, q0, q14
+
+ calc_scalefactors
+
+ @ check whether to use joint stereo for subbands 0, 1, 2, 3
+ vadd.s32 q15, q0, q1
+ vadd.s32 q9, q2, q3
+ vld1.32 {d16, d17}, [r4, :128]!
+ vcgt.s32 q15, q15, q9
+
+ @ combine last part of 'joint' with d24 and save to memory
+ @ update and save scale factors to memory
+ vand.s32 q8, q8, q15
+ vbit.s32 q0, q2, q15
+ vpadd.s32 d16, d16, d17
+ vbit.s32 q1, q3, q15
+ vpadd.s32 d16, d16, d16
+ vst1.32 {d0, d1}, [r7, :128]
+ vadd.s32 d16, d16, d24
+ vst1.32 {d2, d3}, [r8, :128]
+ vmov.32 r0, d16[0]
+
+ update_joint_stereo_samples
+9:
+ pop {r3-r11}
+ bx lr
+endfunc
+
+function ff_sbc_enc_process_input_4s_neon, export=1
+ @ parameters
+ @ r0 = positioin
+ @ r1 = pcm
+ @ r2 = X
+ @ r3 = nsamples
+ @ r4 = nchannels
+ @ local variables
+ @ r5 = ff_sbc_input_perm_4
+ @ r6 = src / x
+ @ r7 = dst / y
+
+ push {r1, r3-r7}
+ ldr r4, [sp, #24]
+ movrelx r5, X(ff_sbc_input_perm_4)
+
+ @ handle X buffer wraparound
+ cmp r0, r3
+ bge 1f @ if (position < nsamples)
+ add r7, r2, #576 @ &X[0][SBC_X_BUFFER_SIZE - 40]
+ add r6, r2, r0, lsl#1 @ &X[0][position]
+ vld1.16 {d0, d1, d2, d3}, [r6, :128]!
+ vst1.16 {d0, d1, d2, d3}, [r7, :128]!
+ vld1.16 {d0, d1, d2, d3}, [r6, :128]!
+ vst1.16 {d0, d1, d2, d3}, [r7, :128]!
+ vld1.16 {d0}, [r6, :64]!
+ vst1.16 {d0}, [r7, :64]!
+ cmp r4, #1
+ ble 2f @ if (nchannels > 1)
+ add r7, r2, #1232 @ &X[1][SBC_X_BUFFER_SIZE - 40]
+ add r6, r2, #656
+ add r6, r6, r0, lsl#1 @ &X[1][position]
+ vld1.16 {d0, d1, d2, d3}, [r6, :128]!
+ vst1.16 {d0, d1, d2, d3}, [r7, :128]!
+ vld1.16 {d0, d1, d2, d3}, [r6, :128]!
+ vst1.16 {d0, d1, d2, d3}, [r7, :128]!
+ vld1.16 {d0}, [r6, :64]!
+ vst1.16 {d0}, [r7, :64]!
+2:
+ mov r0, #288 @ SBC_X_BUFFER_SIZE - 40
+1:
+
+ add r6, r2, r0, lsl#1 @ &X[0][position]
+ add r7, r6, #656 @ &X[1][position]
+
+ cmp r4, #1
+ ble 8f @ if (nchannels > 1)
+ tst r1, #1
+ beq 7f @ if (pcm & 1)
+ @ poor 'pcm' alignment
+ vld1.8 {d0, d1}, [r5, :128]
+1:
+ sub r6, r6, #16
+ sub r7, r7, #16
+ sub r0, r0, #8
+ vld1.8 {d4, d5}, [r1]!
+ vuzp.16 d4, d5
+ vld1.8 {d20, d21}, [r1]!
+ vuzp.16 d20, d21
+ vswp d5, d20
+ vtbl.8 d16, {d4, d5}, d0
+ vtbl.8 d17, {d4, d5}, d1
+ vtbl.8 d18, {d20, d21}, d0
+ vtbl.8 d19, {d20, d21}, d1
+ vst1.16 {d16, d17}, [r6, :128]
+ vst1.16 {d18, d19}, [r7, :128]
+ subs r3, r3, #8
+ bgt 1b
+ b 9f
+7:
+ @ proper 'pcm' alignment
+ vld1.8 {d0, d1}, [r5, :128]
+1:
+ sub r6, r6, #16
+ sub r7, r7, #16
+ sub r0, r0, #8
+ vld2.16 {d4, d5}, [r1]!
+ vld2.16 {d20, d21}, [r1]!
+ vswp d5, d20
+ vtbl.8 d16, {d4, d5}, d0
+ vtbl.8 d17, {d4, d5}, d1
+ vtbl.8 d18, {d20, d21}, d0
+ vtbl.8 d19, {d20, d21}, d1
+ vst1.16 {d16, d17}, [r6, :128]
+ vst1.16 {d18, d19}, [r7, :128]
+ subs r3, r3, #8
+ bgt 1b
+ b 9f
+8:
+ @ mono
+ vld1.8 {d0, d1}, [r5, :128]
+1:
+ sub r6, r6, #16
+ sub r0, r0, #8
+ vld1.8 {d4, d5}, [r1]!
+ vtbl.8 d16, {d4, d5}, d0
+ vtbl.8 d17, {d4, d5}, d1
+ vst1.16 {d16, d17}, [r6, :128]
+ subs r3, r3, #8
+ bgt 1b
+9:
+ pop {r1, r3-r7}
+ bx lr
+endfunc
+
+function ff_sbc_enc_process_input_8s_neon, export=1
+ @ parameters
+ @ r0 = positioin
+ @ r1 = pcm
+ @ r2 = X
+ @ r3 = nsamples
+ @ r4 = nchannels
+ @ local variables
+ @ r5 = ff_sbc_input_perm_8
+ @ r6 = src
+ @ r7 = dst
+
+ push {r1, r3-r7}
+ ldr r4, [sp, #24]
+ movrelx r5, X(ff_sbc_input_perm_8)
+
+ @ handle X buffer wraparound
+ cmp r0, r3
+ bge 1f @ if (position < nsamples)
+ add r7, r2, #512 @ &X[0][SBC_X_BUFFER_SIZE - 72]
+ add r6, r2, r0, lsl#1 @ &X[0][position]
+ vld1.16 {d0, d1, d2, d3}, [r6, :128]!
+ vst1.16 {d0, d1, d2, d3}, [r7, :128]!
+ vld1.16 {d0, d1, d2, d3}, [r6, :128]!
+ vst1.16 {d0, d1, d2, d3}, [r7, :128]!
+ vld1.16 {d0, d1, d2, d3}, [r6, :128]!
+ vst1.16 {d0, d1, d2, d3}, [r7, :128]!
+ vld1.16 {d0, d1, d2, d3}, [r6, :128]!
+ vst1.16 {d0, d1, d2, d3}, [r7, :128]!
+ vld1.16 {d0, d1}, [r6, :128]!
+ vst1.16 {d0, d1}, [r7, :128]!
+ cmp r4, #1
+ ble 2f @ if (nchannels > 1)
+ add r7, r2, #1168 @ &X[1][SBC_X_BUFFER_SIZE - 72]
+ add r6, r2, #656
+ add r6, r6, r0, lsl#1 @ &X[1][position]
+ vld1.16 {d0, d1, d2, d3}, [r6, :128]!
+ vst1.16 {d0, d1, d2, d3}, [r7, :128]!
+ vld1.16 {d0, d1, d2, d3}, [r6, :128]!
+ vst1.16 {d0, d1, d2, d3}, [r7, :128]!
+ vld1.16 {d0, d1, d2, d3}, [r6, :128]!
+ vst1.16 {d0, d1, d2, d3}, [r7, :128]!
+ vld1.16 {d0, d1, d2, d3}, [r6, :128]!
+ vst1.16 {d0, d1, d2, d3}, [r7, :128]!
+ vld1.16 {d0, d1}, [r6, :128]!
+ vst1.16 {d0, d1}, [r7, :128]!
+2:
+ mov r0, #256 @ SBC_X_BUFFER_SIZE - 72
+1:
+
+ add r6, r2, r0, lsl#1 @ &X[0][position]
+ add r7, r6, #656 @ &X[1][position]
+
+ cmp r4, #1
+ ble 8f @ if (nchannels > 1)
+ tst r1, #1
+ beq 7f @ if (pcm & 1)
+ @ poor 'pcm' alignment
+ vld1.8 {d0, d1, d2, d3}, [r5, :128]
+1:
+ sub r6, r6, #32
+ sub r7, r7, #32
+ sub r0, r0, #16
+ vld1.8 {d4, d5, d6, d7}, [r1]!
+ vuzp.16 q2, q3
+ vld1.8 {d20, d21, d22, d23}, [r1]!
+ vuzp.16 q10, q11
+ vswp q3, q10
+ vtbl.8 d16, {d4, d5, d6, d7}, d0
+ vtbl.8 d17, {d4, d5, d6, d7}, d1
+ vtbl.8 d18, {d4, d5, d6, d7}, d2
+ vtbl.8 d19, {d4, d5, d6, d7}, d3
+ vst1.16 {d16, d17, d18, d19}, [r6, :128]
+ vtbl.8 d16, {d20, d21, d22, d23}, d0
+ vtbl.8 d17, {d20, d21, d22, d23}, d1
+ vtbl.8 d18, {d20, d21, d22, d23}, d2
+ vtbl.8 d19, {d20, d21, d22, d23}, d3
+ vst1.16 {d16, d17, d18, d19}, [r7, :128]
+ subs r3, r3, #16
+ bgt 1b
+ b 9f
+7:
+ @ proper 'pcm' alignment
+ vld1.8 {d0, d1, d2, d3}, [r5, :128]
+1:
+ sub r6, r6, #32
+ sub r7, r7, #32
+ sub r0, r0, #16
+ vld2.16 {d4, d5, d6, d7}, [r1]!
+ vld2.16 {d20, d21, d22, d23}, [r1]!
+ vswp q3, q10
+ vtbl.8 d16, {d4, d5, d6, d7}, d0
+ vtbl.8 d17, {d4, d5, d6, d7}, d1
+ vtbl.8 d18, {d4, d5, d6, d7}, d2
+ vtbl.8 d19, {d4, d5, d6, d7}, d3
+ vst1.16 {d16, d17, d18, d19}, [r6, :128]
+ vtbl.8 d16, {d20, d21, d22, d23}, d0
+ vtbl.8 d17, {d20, d21, d22, d23}, d1
+ vtbl.8 d18, {d20, d21, d22, d23}, d2
+ vtbl.8 d19, {d20, d21, d22, d23}, d3
+ vst1.16 {d16, d17, d18, d19}, [r7, :128]
+ subs r3, r3, #16
+ bgt 1b
+ b 9f
+8:
+ @ mono
+ vld1.8 {d0, d1, d2, d3}, [r5, :128]
+1:
+ sub r6, r6, #32
+ sub r0, r0, #16
+ vld1.8 {d4, d5, d6, d7}, [r1]!
+ vtbl.8 d16, {d4, d5, d6, d7}, d0
+ vtbl.8 d17, {d4, d5, d6, d7}, d1
+ vtbl.8 d18, {d4, d5, d6, d7}, d2
+ vtbl.8 d19, {d4, d5, d6, d7}, d3
+ vst1.16 {d16, d17, d18, d19}, [r6, :128]
+ subs r3, r3, #16
+ bgt 1b
+9:
+ pop {r1, r3-r7}
+ bx lr
+endfunc
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index c4134424f0..2d541bf64a 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -632,6 +632,8 @@ enum AVCodecID {
AV_CODEC_ID_ATRAC3AL,
AV_CODEC_ID_ATRAC3PAL,
AV_CODEC_ID_DOLBY_E,
+ AV_CODEC_ID_SBC,
+ AV_CODEC_ID_MSBC,
/* subtitle codecs */
AV_CODEC_ID_FIRST_SUBTITLE = 0x17000, ///< A dummy ID pointing at the start of subtitle codecs.
diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index 92bf1d2681..8d613507e0 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c
@@ -2859,6 +2859,18 @@ static const AVCodecDescriptor codec_descriptors[] = {
.long_name = NULL_IF_CONFIG_SMALL("ADPCM MTAF"),
.props = AV_CODEC_PROP_LOSSY,
},
+ {
+ .id = AV_CODEC_ID_SBC,
+ .type = AVMEDIA_TYPE_AUDIO,
+ .name = "sbc",
+ .long_name = NULL_IF_CONFIG_SMALL("SBC (low-complexity subband codec)"),
+ },
+ {
+ .id = AV_CODEC_ID_MSBC,
+ .type = AVMEDIA_TYPE_AUDIO,
+ .name = "msbc",
+ .long_name = NULL_IF_CONFIG_SMALL("mSBC (wideband speech mono SBC)"),
+ },
/* subtitle codecs */
{
diff --git a/libavcodec/sbc.c b/libavcodec/sbc.c
new file mode 100644
index 0000000000..99d02cc56a
--- /dev/null
+++ b/libavcodec/sbc.c
@@ -0,0 +1,316 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017 Aurelien Jacobs <aurel at gnuage.org>
+ * Copyright (C) 2012-2013 Intel Corporation
+ * Copyright (C) 2008-2010 Nokia Corporation
+ * Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org>
+ * Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch>
+ * Copyright (C) 2005-2008 Brad Midgley <bmidgley at xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC common functions for the encoder and decoder
+ */
+
+#include "avcodec.h"
+#include "sbc.h"
+
+/*
+ * Calculates the CRC-8 of the first len bits in data
+ */
+static const uint8_t crc_table[256] = {
+ 0x00, 0x1D, 0x3A, 0x27, 0x74, 0x69, 0x4E, 0x53,
+ 0xE8, 0xF5, 0xD2, 0xCF, 0x9C, 0x81, 0xA6, 0xBB,
+ 0xCD, 0xD0, 0xF7, 0xEA, 0xB9, 0xA4, 0x83, 0x9E,
+ 0x25, 0x38, 0x1F, 0x02, 0x51, 0x4C, 0x6B, 0x76,
+ 0x87, 0x9A, 0xBD, 0xA0, 0xF3, 0xEE, 0xC9, 0xD4,
+ 0x6F, 0x72, 0x55, 0x48, 0x1B, 0x06, 0x21, 0x3C,
+ 0x4A, 0x57, 0x70, 0x6D, 0x3E, 0x23, 0x04, 0x19,
+ 0xA2, 0xBF, 0x98, 0x85, 0xD6, 0xCB, 0xEC, 0xF1,
+ 0x13, 0x0E, 0x29, 0x34, 0x67, 0x7A, 0x5D, 0x40,
+ 0xFB, 0xE6, 0xC1, 0xDC, 0x8F, 0x92, 0xB5, 0xA8,
+ 0xDE, 0xC3, 0xE4, 0xF9, 0xAA, 0xB7, 0x90, 0x8D,
+ 0x36, 0x2B, 0x0C, 0x11, 0x42, 0x5F, 0x78, 0x65,
+ 0x94, 0x89, 0xAE, 0xB3, 0xE0, 0xFD, 0xDA, 0xC7,
+ 0x7C, 0x61, 0x46, 0x5B, 0x08, 0x15, 0x32, 0x2F,
+ 0x59, 0x44, 0x63, 0x7E, 0x2D, 0x30, 0x17, 0x0A,
+ 0xB1, 0xAC, 0x8B, 0x96, 0xC5, 0xD8, 0xFF, 0xE2,
+ 0x26, 0x3B, 0x1C, 0x01, 0x52, 0x4F, 0x68, 0x75,
+ 0xCE, 0xD3, 0xF4, 0xE9, 0xBA, 0xA7, 0x80, 0x9D,
+ 0xEB, 0xF6, 0xD1, 0xCC, 0x9F, 0x82, 0xA5, 0xB8,
+ 0x03, 0x1E, 0x39, 0x24, 0x77, 0x6A, 0x4D, 0x50,
+ 0xA1, 0xBC, 0x9B, 0x86, 0xD5, 0xC8, 0xEF, 0xF2,
+ 0x49, 0x54, 0x73, 0x6E, 0x3D, 0x20, 0x07, 0x1A,
+ 0x6C, 0x71, 0x56, 0x4B, 0x18, 0x05, 0x22, 0x3F,
+ 0x84, 0x99, 0xBE, 0xA3, 0xF0, 0xED, 0xCA, 0xD7,
+ 0x35, 0x28, 0x0F, 0x12, 0x41, 0x5C, 0x7B, 0x66,
+ 0xDD, 0xC0, 0xE7, 0xFA, 0xA9, 0xB4, 0x93, 0x8E,
+ 0xF8, 0xE5, 0xC2, 0xDF, 0x8C, 0x91, 0xB6, 0xAB,
+ 0x10, 0x0D, 0x2A, 0x37, 0x64, 0x79, 0x5E, 0x43,
+ 0xB2, 0xAF, 0x88, 0x95, 0xC6, 0xDB, 0xFC, 0xE1,
+ 0x5A, 0x47, 0x60, 0x7D, 0x2E, 0x33, 0x14, 0x09,
+ 0x7F, 0x62, 0x45, 0x58, 0x0B, 0x16, 0x31, 0x2C,
+ 0x97, 0x8A, 0xAD, 0xB0, 0xE3, 0xFE, 0xD9, 0xC4
+};
+
+uint8_t ff_sbc_crc8(const uint8_t *data, size_t len)
+{
+ uint8_t crc = 0x0f;
+ size_t i;
+ uint8_t octet;
+
+ for (i = 0; i < len / 8; i++)
+ crc = crc_table[crc ^ data[i]];
+
+ octet = data[i];
+ for (i = 0; i < len % 8; i++) {
+ char bit = ((octet ^ crc) & 0x80) >> 7;
+
+ crc = ((crc & 0x7f) << 1) ^ (bit ? 0x1d : 0);
+
+ octet = octet << 1;
+ }
+
+ return crc;
+}
+
+/* A2DP specification: Appendix B, page 69 */
+static const int sbc_offset4[4][4] = {
+ { -1, 0, 0, 0 },
+ { -2, 0, 0, 1 },
+ { -2, 0, 0, 1 },
+ { -2, 0, 0, 1 }
+};
+
+/* A2DP specification: Appendix B, page 69 */
+static const int sbc_offset8[4][8] = {
+ { -2, 0, 0, 0, 0, 0, 0, 1 },
+ { -3, 0, 0, 0, 0, 0, 1, 2 },
+ { -4, 0, 0, 0, 0, 0, 1, 2 },
+ { -4, 0, 0, 0, 0, 0, 1, 2 }
+};
+
+/*
+ * Code straight from the spec to calculate the bits array
+ * Takes a pointer to the frame in question, a pointer to the bits array and
+ * the sampling frequency (as 2 bit integer)
+ */
+static av_always_inline void sbc_calculate_bits_internal(
+ const struct sbc_frame *frame, int (*bits)[8], int subbands)
+{
+ uint8_t sf = frame->frequency;
+
+ if (frame->mode == MONO || frame->mode == DUAL_CHANNEL) {
+ int bitneed[2][8], loudness, max_bitneed, bitcount, slicecount, bitslice;
+ int ch, sb;
+
+ for (ch = 0; ch < frame->channels; ch++) {
+ max_bitneed = 0;
+ if (frame->allocation == SNR) {
+ for (sb = 0; sb < subbands; sb++) {
+ bitneed[ch][sb] = frame->scale_factor[ch][sb];
+ if (bitneed[ch][sb] > max_bitneed)
+ max_bitneed = bitneed[ch][sb];
+ }
+ } else {
+ for (sb = 0; sb < subbands; sb++) {
+ if (frame->scale_factor[ch][sb] == 0)
+ bitneed[ch][sb] = -5;
+ else {
+ if (subbands == 4)
+ loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb];
+ else
+ loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb];
+ if (loudness > 0)
+ bitneed[ch][sb] = loudness / 2;
+ else
+ bitneed[ch][sb] = loudness;
+ }
+ if (bitneed[ch][sb] > max_bitneed)
+ max_bitneed = bitneed[ch][sb];
+ }
+ }
+
+ bitcount = 0;
+ slicecount = 0;
+ bitslice = max_bitneed + 1;
+ do {
+ bitslice--;
+ bitcount += slicecount;
+ slicecount = 0;
+ for (sb = 0; sb < subbands; sb++) {
+ if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16))
+ slicecount++;
+ else if (bitneed[ch][sb] == bitslice + 1)
+ slicecount += 2;
+ }
+ } while (bitcount + slicecount < frame->bitpool);
+
+ if (bitcount + slicecount == frame->bitpool) {
+ bitcount += slicecount;
+ bitslice--;
+ }
+
+ for (sb = 0; sb < subbands; sb++) {
+ if (bitneed[ch][sb] < bitslice + 2)
+ bits[ch][sb] = 0;
+ else {
+ bits[ch][sb] = bitneed[ch][sb] - bitslice;
+ if (bits[ch][sb] > 16)
+ bits[ch][sb] = 16;
+ }
+ }
+
+ for (sb = 0; bitcount < frame->bitpool &&
+ sb < subbands; sb++) {
+ if ((bits[ch][sb] >= 2) && (bits[ch][sb] < 16)) {
+ bits[ch][sb]++;
+ bitcount++;
+ } else if ((bitneed[ch][sb] == bitslice + 1) && (frame->bitpool > bitcount + 1)) {
+ bits[ch][sb] = 2;
+ bitcount += 2;
+ }
+ }
+
+ for (sb = 0; bitcount < frame->bitpool &&
+ sb < subbands; sb++) {
+ if (bits[ch][sb] < 16) {
+ bits[ch][sb]++;
+ bitcount++;
+ }
+ }
+
+ }
+
+ } else if (frame->mode == STEREO || frame->mode == JOINT_STEREO) {
+ int bitneed[2][8], loudness, max_bitneed, bitcount, slicecount, bitslice;
+ int ch, sb;
+
+ max_bitneed = 0;
+ if (frame->allocation == SNR) {
+ for (ch = 0; ch < 2; ch++) {
+ for (sb = 0; sb < subbands; sb++) {
+ bitneed[ch][sb] = frame->scale_factor[ch][sb];
+ if (bitneed[ch][sb] > max_bitneed)
+ max_bitneed = bitneed[ch][sb];
+ }
+ }
+ } else {
+ for (ch = 0; ch < 2; ch++) {
+ for (sb = 0; sb < subbands; sb++) {
+ if (frame->scale_factor[ch][sb] == 0)
+ bitneed[ch][sb] = -5;
+ else {
+ if (subbands == 4)
+ loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb];
+ else
+ loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb];
+ if (loudness > 0)
+ bitneed[ch][sb] = loudness / 2;
+ else
+ bitneed[ch][sb] = loudness;
+ }
+ if (bitneed[ch][sb] > max_bitneed)
+ max_bitneed = bitneed[ch][sb];
+ }
+ }
+ }
+
+ bitcount = 0;
+ slicecount = 0;
+ bitslice = max_bitneed + 1;
+ do {
+ bitslice--;
+ bitcount += slicecount;
+ slicecount = 0;
+ for (ch = 0; ch < 2; ch++) {
+ for (sb = 0; sb < subbands; sb++) {
+ if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16))
+ slicecount++;
+ else if (bitneed[ch][sb] == bitslice + 1)
+ slicecount += 2;
+ }
+ }
+ } while (bitcount + slicecount < frame->bitpool);
+
+ if (bitcount + slicecount == frame->bitpool) {
+ bitcount += slicecount;
+ bitslice--;
+ }
+
+ for (ch = 0; ch < 2; ch++) {
+ for (sb = 0; sb < subbands; sb++) {
+ if (bitneed[ch][sb] < bitslice + 2) {
+ bits[ch][sb] = 0;
+ } else {
+ bits[ch][sb] = bitneed[ch][sb] - bitslice;
+ if (bits[ch][sb] > 16)
+ bits[ch][sb] = 16;
+ }
+ }
+ }
+
+ ch = 0;
+ sb = 0;
+ while (bitcount < frame->bitpool) {
+ if ((bits[ch][sb] >= 2) && (bits[ch][sb] < 16)) {
+ bits[ch][sb]++;
+ bitcount++;
+ } else if ((bitneed[ch][sb] == bitslice + 1) && (frame->bitpool > bitcount + 1)) {
+ bits[ch][sb] = 2;
+ bitcount += 2;
+ }
+ if (ch == 1) {
+ ch = 0;
+ sb++;
+ if (sb >= subbands)
+ break;
+ } else
+ ch = 1;
+ }
+
+ ch = 0;
+ sb = 0;
+ while (bitcount < frame->bitpool) {
+ if (bits[ch][sb] < 16) {
+ bits[ch][sb]++;
+ bitcount++;
+ }
+ if (ch == 1) {
+ ch = 0;
+ sb++;
+ if (sb >= subbands)
+ break;
+ } else
+ ch = 1;
+ }
+
+ }
+
+}
+
+void ff_sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
+{
+ if (frame->subbands == 4)
+ sbc_calculate_bits_internal(frame, bits, 4);
+ else
+ sbc_calculate_bits_internal(frame, bits, 8);
+}
diff --git a/libavcodec/sbc.h b/libavcodec/sbc.h
new file mode 100644
index 0000000000..169e38f4c1
--- /dev/null
+++ b/libavcodec/sbc.h
@@ -0,0 +1,121 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017 Aurelien Jacobs <aurel at gnuage.org>
+ * Copyright (C) 2012-2014 Intel Corporation
+ * Copyright (C) 2008-2010 Nokia Corporation
+ * Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org>
+ * Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch>
+ * Copyright (C) 2005-2006 Brad Midgley <bmidgley at xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC common definitions for the encoder and decoder
+ */
+
+#ifndef AVCODEC_SBC_H
+#define AVCODEC_SBC_H
+
+#include "avcodec.h"
+
+#define MSBC_BLOCKS 15
+
+/* sampling frequency */
+#define SBC_FREQ_16000 0x00
+#define SBC_FREQ_32000 0x01
+#define SBC_FREQ_44100 0x02
+#define SBC_FREQ_48000 0x03
+
+/* blocks */
+#define SBC_BLK_4 0x00
+#define SBC_BLK_8 0x01
+#define SBC_BLK_12 0x02
+#define SBC_BLK_16 0x03
+
+/* channel mode */
+#define SBC_MODE_MONO 0x00
+#define SBC_MODE_DUAL_CHANNEL 0x01
+#define SBC_MODE_STEREO 0x02
+#define SBC_MODE_JOINT_STEREO 0x03
+
+/* allocation method */
+#define SBC_AM_LOUDNESS 0x00
+#define SBC_AM_SNR 0x01
+
+/* subbands */
+#define SBC_SB_4 0x00
+#define SBC_SB_8 0x01
+
+/* synchronisation words */
+#define SBC_SYNCWORD 0x9C
+#define MSBC_SYNCWORD 0xAD
+
+/* extra bits of precision for the synthesis filter input data */
+#define SBCDEC_FIXED_EXTRA_BITS 2
+
+/*
+ * Enforce 16 byte alignment for the data, which is supposed to be used
+ * with SIMD optimized code.
+ */
+#define SBC_ALIGN 16
+
+/* This structure contains an unpacked SBC frame.
+ Yes, there is probably quite some unused space herein */
+struct sbc_frame {
+ uint8_t frequency;
+ uint8_t block_mode;
+ uint8_t blocks;
+ enum {
+ MONO = SBC_MODE_MONO,
+ DUAL_CHANNEL = SBC_MODE_DUAL_CHANNEL,
+ STEREO = SBC_MODE_STEREO,
+ JOINT_STEREO = SBC_MODE_JOINT_STEREO
+ } mode;
+ uint8_t channels;
+ enum {
+ LOUDNESS = SBC_AM_LOUDNESS,
+ SNR = SBC_AM_SNR
+ } allocation;
+ uint8_t subband_mode;
+ uint8_t subbands;
+ uint8_t bitpool;
+ uint16_t codesize;
+ uint16_t length;
+
+ /* bit number x set means joint stereo has been used in subband x */
+ uint8_t joint;
+
+ /* only the lower 4 bits of every element are to be used */
+ DECLARE_ALIGNED(SBC_ALIGN, uint32_t, scale_factor)[2][8];
+
+ /* raw integer subband samples in the frame */
+ DECLARE_ALIGNED(SBC_ALIGN, int32_t, sb_sample_f)[16][2][8];
+
+ /* modified subband samples */
+ DECLARE_ALIGNED(SBC_ALIGN, int32_t, sb_sample)[16][2][8];
+
+ /* original pcm audio samples */
+ DECLARE_ALIGNED(SBC_ALIGN, int16_t, pcm_sample)[2][16*8];
+};
+
+uint8_t ff_sbc_crc8(const uint8_t *data, size_t len);
+void ff_sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]);
+
+#endif /* AVCODEC_SBC_H */
diff --git a/libavcodec/sbcdec.c b/libavcodec/sbcdec.c
new file mode 100644
index 0000000000..f2a40ad117
--- /dev/null
+++ b/libavcodec/sbcdec.c
@@ -0,0 +1,469 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017 Aurelien Jacobs <aurel at gnuage.org>
+ * Copyright (C) 2012-2013 Intel Corporation
+ * Copyright (C) 2008-2010 Nokia Corporation
+ * Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org>
+ * Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch>
+ * Copyright (C) 2005-2008 Brad Midgley <bmidgley at xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC decoder implementation
+ */
+
+#include <stdbool.h>
+#include "avcodec.h"
+#include "internal.h"
+#include "sbc.h"
+#include "sbcdec_data.h"
+
+struct sbc_decoder_state {
+ int32_t V[2][170];
+ int offset[2][16];
+};
+
+typedef struct SBCDecContext {
+ AVClass *class;
+ DECLARE_ALIGNED(SBC_ALIGN, struct sbc_frame, frame);
+ DECLARE_ALIGNED(SBC_ALIGN, struct sbc_decoder_state, dsp);
+ int (*unpack_frame)(const uint8_t *data, struct sbc_frame *frame,
+ size_t len);
+} SBCDecContext;
+
+/*
+ * Unpacks a SBC frame at the beginning of the stream in data,
+ * which has at most len bytes into frame.
+ * Returns the length in bytes of the packed frame, or a negative
+ * value on error. The error codes are:
+ *
+ * -1 Data stream too short
+ * -2 Sync byte incorrect
+ * -3 CRC8 incorrect
+ * -4 Bitpool value out of bounds
+ */
+static int sbc_unpack_frame_internal(const uint8_t *data,
+ struct sbc_frame *frame, size_t len)
+{
+ unsigned int consumed;
+ /* Will copy the parts of the header that are relevant to crc
+ * calculation here */
+ uint8_t crc_header[11] = { 0 };
+ int crc_pos = 0;
+ int32_t temp;
+
+ uint32_t audio_sample;
+ int ch, sb, blk, bit; /* channel, subband, block and bit standard
+ counters */
+ int bits[2][8]; /* bits distribution */
+ uint32_t levels[2][8]; /* levels derived from that */
+
+ consumed = 32;
+
+ crc_header[0] = data[1];
+ crc_header[1] = data[2];
+ crc_pos = 16;
+
+ if (frame->mode == JOINT_STEREO) {
+ if (len * 8 < consumed + frame->subbands)
+ return -1;
+
+ frame->joint = 0x00;
+ for (sb = 0; sb < frame->subbands - 1; sb++)
+ frame->joint |= ((data[4] >> (7 - sb)) & 0x01) << sb;
+ if (frame->subbands == 4)
+ crc_header[crc_pos / 8] = data[4] & 0xf0;
+ else
+ crc_header[crc_pos / 8] = data[4];
+
+ consumed += frame->subbands;
+ crc_pos += frame->subbands;
+ }
+
+ if (len * 8 < consumed + (4 * frame->subbands * frame->channels))
+ return -1;
+
+ for (ch = 0; ch < frame->channels; ch++) {
+ for (sb = 0; sb < frame->subbands; sb++) {
+ /* FIXME assert(consumed % 4 == 0); */
+ frame->scale_factor[ch][sb] =
+ (data[consumed >> 3] >> (4 - (consumed & 0x7))) & 0x0F;
+ crc_header[crc_pos >> 3] |=
+ frame->scale_factor[ch][sb] << (4 - (crc_pos & 0x7));
+
+ consumed += 4;
+ crc_pos += 4;
+ }
+ }
+
+ if (data[3] != ff_sbc_crc8(crc_header, crc_pos))
+ return -3;
+
+ ff_sbc_calculate_bits(frame, bits);
+
+ for (ch = 0; ch < frame->channels; ch++) {
+ for (sb = 0; sb < frame->subbands; sb++)
+ levels[ch][sb] = (1 << bits[ch][sb]) - 1;
+ }
+
+ for (blk = 0; blk < frame->blocks; blk++) {
+ for (ch = 0; ch < frame->channels; ch++) {
+ for (sb = 0; sb < frame->subbands; sb++) {
+ uint32_t shift;
+
+ if (levels[ch][sb] == 0) {
+ frame->sb_sample[blk][ch][sb] = 0;
+ continue;
+ }
+
+ shift = frame->scale_factor[ch][sb] +
+ 1 + SBCDEC_FIXED_EXTRA_BITS;
+
+ audio_sample = 0;
+ for (bit = 0; bit < bits[ch][sb]; bit++) {
+ if (consumed > len * 8)
+ return -1;
+
+ if ((data[consumed >> 3] >> (7 - (consumed & 0x7))) & 0x01)
+ audio_sample |= 1 << (bits[ch][sb] - bit - 1);
+
+ consumed++;
+ }
+
+ frame->sb_sample[blk][ch][sb] = (int32_t)
+ (((((uint64_t) audio_sample << 1) | 1) << shift) /
+ levels[ch][sb]) - (1 << shift);
+ }
+ }
+ }
+
+ if (frame->mode == JOINT_STEREO) {
+ for (blk = 0; blk < frame->blocks; blk++) {
+ for (sb = 0; sb < frame->subbands; sb++) {
+ if (frame->joint & (0x01 << sb)) {
+ temp = frame->sb_sample[blk][0][sb] +
+ frame->sb_sample[blk][1][sb];
+ frame->sb_sample[blk][1][sb] =
+ frame->sb_sample[blk][0][sb] -
+ frame->sb_sample[blk][1][sb];
+ frame->sb_sample[blk][0][sb] = temp;
+ }
+ }
+ }
+ }
+
+ if ((consumed & 0x7) != 0)
+ consumed += 8 - (consumed & 0x7);
+
+ return consumed >> 3;
+}
+
+static int sbc_unpack_frame(const uint8_t *data, struct sbc_frame *frame,
+ size_t len)
+{
+ if (len < 4)
+ return -1;
+
+ if (data[0] != SBC_SYNCWORD)
+ return -2;
+
+ frame->frequency = (data[1] >> 6) & 0x03;
+ frame->block_mode = (data[1] >> 4) & 0x03;
+
+ switch (frame->block_mode) {
+ case SBC_BLK_4:
+ frame->blocks = 4;
+ break;
+ case SBC_BLK_8:
+ frame->blocks = 8;
+ break;
+ case SBC_BLK_12:
+ frame->blocks = 12;
+ break;
+ case SBC_BLK_16:
+ frame->blocks = 16;
+ break;
+ }
+
+ frame->mode = (data[1] >> 2) & 0x03;
+
+ switch (frame->mode) {
+ case MONO:
+ frame->channels = 1;
+ break;
+ case DUAL_CHANNEL: /* fall-through */
+ case STEREO:
+ case JOINT_STEREO:
+ frame->channels = 2;
+ break;
+ }
+
+ frame->allocation = (data[1] >> 1) & 0x01;
+
+ frame->subband_mode = (data[1] & 0x01);
+ frame->subbands = frame->subband_mode ? 8 : 4;
+
+ frame->bitpool = data[2];
+
+ if ((frame->mode == MONO || frame->mode == DUAL_CHANNEL) &&
+ frame->bitpool > 16 * frame->subbands)
+ return -4;
+
+ if ((frame->mode == STEREO || frame->mode == JOINT_STEREO) &&
+ frame->bitpool > 32 * frame->subbands)
+ return -4;
+
+ return sbc_unpack_frame_internal(data, frame, len);
+}
+
+static int msbc_unpack_frame(const uint8_t *data,
+ struct sbc_frame *frame, size_t len)
+{
+ if (len < 4)
+ return -1;
+
+ if (data[0] != MSBC_SYNCWORD)
+ return -2;
+ if (data[1] != 0)
+ return -2;
+ if (data[2] != 0)
+ return -2;
+
+ frame->frequency = SBC_FREQ_16000;
+ frame->block_mode = SBC_BLK_4;
+ frame->blocks = MSBC_BLOCKS;
+ frame->allocation = LOUDNESS;
+ frame->mode = MONO;
+ frame->channels = 1;
+ frame->subband_mode = 1;
+ frame->subbands = 8;
+ frame->bitpool = 26;
+
+ return sbc_unpack_frame_internal(data, frame, len);
+}
+
+static void sbc_decoder_init(struct sbc_decoder_state *state)
+{
+ int i, ch;
+
+ memset(state->V, 0, sizeof(state->V));
+
+ for (ch = 0; ch < 2; ch++)
+ for (i = 0; i < FF_ARRAY_ELEMS(state->offset[0]); i++)
+ state->offset[ch][i] = (10 * i + 10);
+}
+
+static inline void sbc_synthesize_four(struct sbc_decoder_state *state,
+ struct sbc_frame *frame, int ch, int blk)
+{
+ int i, k, idx;
+ int32_t *v = state->V[ch];
+ int *offset = state->offset[ch];
+
+ for (i = 0; i < 8; i++) {
+ /* Shifting */
+ offset[i]--;
+ if (offset[i] < 0) {
+ offset[i] = 79;
+ memcpy(v + 80, v, 9 * sizeof(*v));
+ }
+
+ /* Distribute the new matrix value to the shifted position */
+ v[offset[i]] =
+ ( ff_synmatrix4[i][0] * frame->sb_sample[blk][ch][0] +
+ ff_synmatrix4[i][1] * frame->sb_sample[blk][ch][1] +
+ ff_synmatrix4[i][2] * frame->sb_sample[blk][ch][2] +
+ ff_synmatrix4[i][3] * frame->sb_sample[blk][ch][3] ) >> 15;
+ }
+
+ /* Compute the samples */
+ for (idx = 0, i = 0; i < 4; i++, idx += 5) {
+ k = (i + 4) & 0xf;
+
+ /* Store in output, Q0 */
+ frame->pcm_sample[ch][blk * 4 + i] = av_clip_int16(
+ ( v[offset[i] + 0] * ff_sbc_proto_4_40m0[idx + 0] +
+ v[offset[k] + 1] * ff_sbc_proto_4_40m1[idx + 0] +
+ v[offset[i] + 2] * ff_sbc_proto_4_40m0[idx + 1] +
+ v[offset[k] + 3] * ff_sbc_proto_4_40m1[idx + 1] +
+ v[offset[i] + 4] * ff_sbc_proto_4_40m0[idx + 2] +
+ v[offset[k] + 5] * ff_sbc_proto_4_40m1[idx + 2] +
+ v[offset[i] + 6] * ff_sbc_proto_4_40m0[idx + 3] +
+ v[offset[k] + 7] * ff_sbc_proto_4_40m1[idx + 3] +
+ v[offset[i] + 8] * ff_sbc_proto_4_40m0[idx + 4] +
+ v[offset[k] + 9] * ff_sbc_proto_4_40m1[idx + 4] ) >> 15);
+ }
+}
+
+static inline void sbc_synthesize_eight(struct sbc_decoder_state *state,
+ struct sbc_frame *frame,
+ int ch, int blk)
+{
+ int i, k, idx;
+ int32_t *v = state->V[ch];
+ int *offset = state->offset[ch];
+
+ for (i = 0; i < 16; i++) {
+ /* Shifting */
+ offset[i]--;
+ if (offset[i] < 0) {
+ offset[i] = 159;
+ memcpy(v + 160, v, 9 * sizeof(*v));
+ }
+
+ /* Distribute the new matrix value to the shifted position */
+ v[offset[i]] =
+ ( ff_synmatrix8[i][0] * frame->sb_sample[blk][ch][0] +
+ ff_synmatrix8[i][1] * frame->sb_sample[blk][ch][1] +
+ ff_synmatrix8[i][2] * frame->sb_sample[blk][ch][2] +
+ ff_synmatrix8[i][3] * frame->sb_sample[blk][ch][3] +
+ ff_synmatrix8[i][4] * frame->sb_sample[blk][ch][4] +
+ ff_synmatrix8[i][5] * frame->sb_sample[blk][ch][5] +
+ ff_synmatrix8[i][6] * frame->sb_sample[blk][ch][6] +
+ ff_synmatrix8[i][7] * frame->sb_sample[blk][ch][7] ) >> 15;
+ }
+
+ /* Compute the samples */
+ for (idx = 0, i = 0; i < 8; i++, idx += 5) {
+ k = (i + 8) & 0xf;
+
+ /* Store in output, Q0 */
+ frame->pcm_sample[ch][blk * 8 + i] = av_clip_int16(
+ ( v[offset[i] + 0] * ff_sbc_proto_8_80m0[idx + 0] +
+ v[offset[k] + 1] * ff_sbc_proto_8_80m1[idx + 0] +
+ v[offset[i] + 2] * ff_sbc_proto_8_80m0[idx + 1] +
+ v[offset[k] + 3] * ff_sbc_proto_8_80m1[idx + 1] +
+ v[offset[i] + 4] * ff_sbc_proto_8_80m0[idx + 2] +
+ v[offset[k] + 5] * ff_sbc_proto_8_80m1[idx + 2] +
+ v[offset[i] + 6] * ff_sbc_proto_8_80m0[idx + 3] +
+ v[offset[k] + 7] * ff_sbc_proto_8_80m1[idx + 3] +
+ v[offset[i] + 8] * ff_sbc_proto_8_80m0[idx + 4] +
+ v[offset[k] + 9] * ff_sbc_proto_8_80m1[idx + 4] ) >> 15);
+ }
+}
+
+static int sbc_synthesize_audio(struct sbc_decoder_state *state,
+ struct sbc_frame *frame)
+{
+ int ch, blk;
+
+ switch (frame->subbands) {
+ case 4:
+ for (ch = 0; ch < frame->channels; ch++) {
+ for (blk = 0; blk < frame->blocks; blk++)
+ sbc_synthesize_four(state, frame, ch, blk);
+ }
+ return frame->blocks * 4;
+
+ case 8:
+ for (ch = 0; ch < frame->channels; ch++) {
+ for (blk = 0; blk < frame->blocks; blk++)
+ sbc_synthesize_eight(state, frame, ch, blk);
+ }
+ return frame->blocks * 8;
+
+ default:
+ return AVERROR(EIO);
+ }
+}
+
+static int sbc_decode_init(AVCodecContext *avctx)
+{
+ SBCDecContext *sbc = avctx->priv_data;
+ sbc->unpack_frame = sbc_unpack_frame;
+ sbc_decoder_init(&sbc->dsp);
+ return 0;
+}
+
+static int msbc_decode_init(AVCodecContext *avctx)
+{
+ SBCDecContext *sbc = avctx->priv_data;
+ sbc->unpack_frame = msbc_unpack_frame;
+ sbc_decoder_init(&sbc->dsp);
+ return 0;
+}
+
+static int sbc_decode_frame(AVCodecContext *avctx,
+ void *data, int *got_frame_ptr,
+ AVPacket *avpkt)
+{
+ SBCDecContext *sbc = avctx->priv_data;
+ int i, ch, samples, ret;
+ AVFrame *frame = data;
+ int16_t *ptr;
+
+ if (!sbc)
+ return AVERROR(EIO);
+
+ sbc->frame.length = sbc->unpack_frame(avpkt->data, &sbc->frame, avpkt->size);
+ if (sbc->frame.length <= 0)
+ return sbc->frame.length;
+
+ samples = sbc_synthesize_audio(&sbc->dsp, &sbc->frame);
+
+ frame->nb_samples = samples;
+ if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+ return ret;
+ ptr = (int16_t *)frame->data[0];
+
+ for (i = 0; i < samples; i++)
+ for (ch = 0; ch < sbc->frame.channels; ch++)
+ *ptr++ = sbc->frame.pcm_sample[ch][i];
+
+ *got_frame_ptr = 1;
+
+ return sbc->frame.length;
+}
+
+#if CONFIG_SBC_DECODER
+AVCodec ff_sbc_decoder = {
+ .name = "sbc",
+ .long_name = NULL_IF_CONFIG_SMALL("SBC (low-complexity subband codec)"),
+ .type = AVMEDIA_TYPE_AUDIO,
+ .id = AV_CODEC_ID_SBC,
+ .priv_data_size = sizeof(SBCDecContext),
+ .init = sbc_decode_init,
+ .decode = sbc_decode_frame,
+ .capabilities = AV_CODEC_CAP_DR1,
+ .channel_layouts = (const uint64_t[]) { AV_CH_LAYOUT_MONO,
+ AV_CH_LAYOUT_STEREO, 0},
+ .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16,
+ AV_SAMPLE_FMT_NONE },
+ .supported_samplerates = (const int[]) { 16000, 32000, 44100, 48000, 0 },
+};
+#endif
+
+#if CONFIG_MSBC_DECODER
+AVCodec ff_msbc_decoder = {
+ .name = "msbc",
+ .long_name = NULL_IF_CONFIG_SMALL("mSBC (wideband speech mono SBC)"),
+ .type = AVMEDIA_TYPE_AUDIO,
+ .id = AV_CODEC_ID_MSBC,
+ .priv_data_size = sizeof(SBCDecContext),
+ .init = msbc_decode_init,
+ .decode = sbc_decode_frame,
+ .capabilities = AV_CODEC_CAP_DR1,
+ .channel_layouts = (const uint64_t[]) { AV_CH_LAYOUT_MONO, 0},
+ .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16,
+ AV_SAMPLE_FMT_NONE },
+ .supported_samplerates = (const int[]) { 16000, 0 },
+};
+#endif
diff --git a/libavcodec/sbcdec_data.c b/libavcodec/sbcdec_data.c
new file mode 100644
index 0000000000..2152162207
--- /dev/null
+++ b/libavcodec/sbcdec_data.c
@@ -0,0 +1,127 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017 Aurelien Jacobs <aurel at gnuage.org>
+ * Copyright (C) 2008-2010 Nokia Corporation
+ * Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org>
+ * Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch>
+ * Copyright (C) 2005-2006 Brad Midgley <bmidgley at xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC decoder tables
+ */
+
+#include <stdint.h>
+#include "sbcdec_data.h"
+#include "sbc.h"
+
+#define SS4(val) ((int32_t)val >> 12)
+#define SS8(val) ((int32_t)val >> 14)
+#define SN4(val) ((int32_t)val >> 11 + 1 + SBCDEC_FIXED_EXTRA_BITS)
+#define SN8(val) ((int32_t)val >> 11 + 1 + SBCDEC_FIXED_EXTRA_BITS)
+
+const int32_t ff_sbc_proto_4_40m0[] = {
+ SS4(0x00000000), SS4(0xffa6982f), SS4(0xfba93848), SS4(0x0456c7b8),
+ SS4(0x005967d1), SS4(0xfffb9ac7), SS4(0xff589157), SS4(0xf9c2a8d8),
+ SS4(0x027c1434), SS4(0x0019118b), SS4(0xfff3c74c), SS4(0xff137330),
+ SS4(0xf81b8d70), SS4(0x00ec1b8b), SS4(0xfff0b71a), SS4(0xffe99b00),
+ SS4(0xfef84470), SS4(0xf6fb4370), SS4(0xffcdc351), SS4(0xffe01dc7)
+};
+
+const int32_t ff_sbc_proto_4_40m1[] = {
+ SS4(0xffe090ce), SS4(0xff2c0475), SS4(0xf694f800), SS4(0xff2c0475),
+ SS4(0xffe090ce), SS4(0xffe01dc7), SS4(0xffcdc351), SS4(0xf6fb4370),
+ SS4(0xfef84470), SS4(0xffe99b00), SS4(0xfff0b71a), SS4(0x00ec1b8b),
+ SS4(0xf81b8d70), SS4(0xff137330), SS4(0xfff3c74c), SS4(0x0019118b),
+ SS4(0x027c1434), SS4(0xf9c2a8d8), SS4(0xff589157), SS4(0xfffb9ac7)
+};
+
+const int32_t ff_sbc_proto_8_80m0[] = {
+ SS8(0x00000000), SS8(0xfe8d1970), SS8(0xee979f00), SS8(0x11686100),
+ SS8(0x0172e690), SS8(0xfff5bd1a), SS8(0xfdf1c8d4), SS8(0xeac182c0),
+ SS8(0x0d9daee0), SS8(0x00e530da), SS8(0xffe9811d), SS8(0xfd52986c),
+ SS8(0xe7054ca0), SS8(0x0a00d410), SS8(0x006c1de4), SS8(0xffdba705),
+ SS8(0xfcbc98e8), SS8(0xe3889d20), SS8(0x06af2308), SS8(0x000bb7db),
+ SS8(0xffca00ed), SS8(0xfc3fbb68), SS8(0xe071bc00), SS8(0x03bf7948),
+ SS8(0xffc4e05c), SS8(0xffb54b3b), SS8(0xfbedadc0), SS8(0xdde26200),
+ SS8(0x0142291c), SS8(0xff960e94), SS8(0xff9f3e17), SS8(0xfbd8f358),
+ SS8(0xdbf79400), SS8(0xff405e01), SS8(0xff7d4914), SS8(0xff8b1a31),
+ SS8(0xfc1417b8), SS8(0xdac7bb40), SS8(0xfdbb828c), SS8(0xff762170)
+};
+
+const int32_t ff_sbc_proto_8_80m1[] = {
+ SS8(0xff7c272c), SS8(0xfcb02620), SS8(0xda612700), SS8(0xfcb02620),
+ SS8(0xff7c272c), SS8(0xff762170), SS8(0xfdbb828c), SS8(0xdac7bb40),
+ SS8(0xfc1417b8), SS8(0xff8b1a31), SS8(0xff7d4914), SS8(0xff405e01),
+ SS8(0xdbf79400), SS8(0xfbd8f358), SS8(0xff9f3e17), SS8(0xff960e94),
+ SS8(0x0142291c), SS8(0xdde26200), SS8(0xfbedadc0), SS8(0xffb54b3b),
+ SS8(0xffc4e05c), SS8(0x03bf7948), SS8(0xe071bc00), SS8(0xfc3fbb68),
+ SS8(0xffca00ed), SS8(0x000bb7db), SS8(0x06af2308), SS8(0xe3889d20),
+ SS8(0xfcbc98e8), SS8(0xffdba705), SS8(0x006c1de4), SS8(0x0a00d410),
+ SS8(0xe7054ca0), SS8(0xfd52986c), SS8(0xffe9811d), SS8(0x00e530da),
+ SS8(0x0d9daee0), SS8(0xeac182c0), SS8(0xfdf1c8d4), SS8(0xfff5bd1a)
+};
+
+const int32_t ff_synmatrix4[8][4] = {
+ { SN4(0x05a82798), SN4(0xfa57d868), SN4(0xfa57d868), SN4(0x05a82798) },
+ { SN4(0x030fbc54), SN4(0xf89be510), SN4(0x07641af0), SN4(0xfcf043ac) },
+ { SN4(0x00000000), SN4(0x00000000), SN4(0x00000000), SN4(0x00000000) },
+ { SN4(0xfcf043ac), SN4(0x07641af0), SN4(0xf89be510), SN4(0x030fbc54) },
+ { SN4(0xfa57d868), SN4(0x05a82798), SN4(0x05a82798), SN4(0xfa57d868) },
+ { SN4(0xf89be510), SN4(0xfcf043ac), SN4(0x030fbc54), SN4(0x07641af0) },
+ { SN4(0xf8000000), SN4(0xf8000000), SN4(0xf8000000), SN4(0xf8000000) },
+ { SN4(0xf89be510), SN4(0xfcf043ac), SN4(0x030fbc54), SN4(0x07641af0) }
+};
+
+const int32_t ff_synmatrix8[16][8] = {
+ { SN8(0x05a82798), SN8(0xfa57d868), SN8(0xfa57d868), SN8(0x05a82798),
+ SN8(0x05a82798), SN8(0xfa57d868), SN8(0xfa57d868), SN8(0x05a82798) },
+ { SN8(0x0471ced0), SN8(0xf8275a10), SN8(0x018f8b84), SN8(0x06a6d988),
+ SN8(0xf9592678), SN8(0xfe70747c), SN8(0x07d8a5f0), SN8(0xfb8e3130) },
+ { SN8(0x030fbc54), SN8(0xf89be510), SN8(0x07641af0), SN8(0xfcf043ac),
+ SN8(0xfcf043ac), SN8(0x07641af0), SN8(0xf89be510), SN8(0x030fbc54) },
+ { SN8(0x018f8b84), SN8(0xfb8e3130), SN8(0x06a6d988), SN8(0xf8275a10),
+ SN8(0x07d8a5f0), SN8(0xf9592678), SN8(0x0471ced0), SN8(0xfe70747c) },
+ { SN8(0x00000000), SN8(0x00000000), SN8(0x00000000), SN8(0x00000000),
+ SN8(0x00000000), SN8(0x00000000), SN8(0x00000000), SN8(0x00000000) },
+ { SN8(0xfe70747c), SN8(0x0471ced0), SN8(0xf9592678), SN8(0x07d8a5f0),
+ SN8(0xf8275a10), SN8(0x06a6d988), SN8(0xfb8e3130), SN8(0x018f8b84) },
+ { SN8(0xfcf043ac), SN8(0x07641af0), SN8(0xf89be510), SN8(0x030fbc54),
+ SN8(0x030fbc54), SN8(0xf89be510), SN8(0x07641af0), SN8(0xfcf043ac) },
+ { SN8(0xfb8e3130), SN8(0x07d8a5f0), SN8(0xfe70747c), SN8(0xf9592678),
+ SN8(0x06a6d988), SN8(0x018f8b84), SN8(0xf8275a10), SN8(0x0471ced0) },
+ { SN8(0xfa57d868), SN8(0x05a82798), SN8(0x05a82798), SN8(0xfa57d868),
+ SN8(0xfa57d868), SN8(0x05a82798), SN8(0x05a82798), SN8(0xfa57d868) },
+ { SN8(0xf9592678), SN8(0x018f8b84), SN8(0x07d8a5f0), SN8(0x0471ced0),
+ SN8(0xfb8e3130), SN8(0xf8275a10), SN8(0xfe70747c), SN8(0x06a6d988) },
+ { SN8(0xf89be510), SN8(0xfcf043ac), SN8(0x030fbc54), SN8(0x07641af0),
+ SN8(0x07641af0), SN8(0x030fbc54), SN8(0xfcf043ac), SN8(0xf89be510) },
+ { SN8(0xf8275a10), SN8(0xf9592678), SN8(0xfb8e3130), SN8(0xfe70747c),
+ SN8(0x018f8b84), SN8(0x0471ced0), SN8(0x06a6d988), SN8(0x07d8a5f0) },
+ { SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000),
+ SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000) },
+ { SN8(0xf8275a10), SN8(0xf9592678), SN8(0xfb8e3130), SN8(0xfe70747c),
+ SN8(0x018f8b84), SN8(0x0471ced0), SN8(0x06a6d988), SN8(0x07d8a5f0) },
+ { SN8(0xf89be510), SN8(0xfcf043ac), SN8(0x030fbc54), SN8(0x07641af0),
+ SN8(0x07641af0), SN8(0x030fbc54), SN8(0xfcf043ac), SN8(0xf89be510) },
+ { SN8(0xf9592678), SN8(0x018f8b84), SN8(0x07d8a5f0), SN8(0x0471ced0),
+ SN8(0xfb8e3130), SN8(0xf8275a10), SN8(0xfe70747c), SN8(0x06a6d988) }
+};
diff --git a/libavcodec/sbcdec_data.h b/libavcodec/sbcdec_data.h
new file mode 100644
index 0000000000..1b79d1de23
--- /dev/null
+++ b/libavcodec/sbcdec_data.h
@@ -0,0 +1,44 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017 Aurelien Jacobs <aurel at gnuage.org>
+ * Copyright (C) 2008-2010 Nokia Corporation
+ * Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org>
+ * Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch>
+ * Copyright (C) 2005-2006 Brad Midgley <bmidgley at xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC decoder tables
+ */
+
+#ifndef AVCODEC_SBCDEC_DATA_H
+#define AVCODEC_SBCDEC_DATA_H
+
+#include <stdint.h>
+
+extern const int32_t ff_sbc_proto_4_40m0[];
+extern const int32_t ff_sbc_proto_4_40m1[];
+extern const int32_t ff_sbc_proto_8_80m0[];
+extern const int32_t ff_sbc_proto_8_80m1[];
+extern const int32_t ff_synmatrix4[8][4];
+extern const int32_t ff_synmatrix8[16][8];
+
+#endif /* AVCODEC_SBCDEC_DATA_H */
diff --git a/libavcodec/sbcdsp.c b/libavcodec/sbcdsp.c
new file mode 100644
index 0000000000..0cdf5ef5aa
--- /dev/null
+++ b/libavcodec/sbcdsp.c
@@ -0,0 +1,569 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017 Aurelien Jacobs <aurel at gnuage.org>
+ * Copyright (C) 2012-2013 Intel Corporation
+ * Copyright (C) 2008-2010 Nokia Corporation
+ * Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org>
+ * Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch>
+ * Copyright (C) 2005-2006 Brad Midgley <bmidgley at xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC basic "building bricks"
+ */
+
+#include <stdint.h>
+#include <limits.h>
+#include <string.h>
+#include "libavutil/common.h"
+#include "libavutil/intmath.h"
+#include "libavutil/intreadwrite.h"
+#include "sbc.h"
+#include "sbcdsp.h"
+#include "sbcdsp_data.h"
+
+/*
+ * A reference C code of analysis filter with SIMD-friendly tables
+ * reordering and code layout. This code can be used to develop platform
+ * specific SIMD optimizations. Also it may be used as some kind of test
+ * for compiler autovectorization capabilities (who knows, if the compiler
+ * is very good at this stuff, hand optimized assembly may be not strictly
+ * needed for some platform).
+ *
+ * Note: It is also possible to make a simple variant of analysis filter,
+ * which needs only a single constants table without taking care about
+ * even/odd cases. This simple variant of filter can be implemented without
+ * input data permutation. The only thing that would be lost is the
+ * possibility to use pairwise SIMD multiplications. But for some simple
+ * CPU cores without SIMD extensions it can be useful. If anybody is
+ * interested in implementing such variant of a filter, sourcecode from
+ * bluez versions 4.26/4.27 can be used as a reference and the history of
+ * the changes in git repository done around that time may be worth checking.
+ */
+
+static void sbc_analyze_4_simd(const int16_t *in, int32_t *out,
+ const int16_t *consts)
+{
+ int32_t t1[4];
+ int16_t t2[4];
+ int hop = 0;
+
+ /* rounding coefficient */
+ t1[0] = t1[1] = t1[2] = t1[3] =
+ (int32_t) 1 << (SBC_PROTO_FIXED4_SCALE - 1);
+
+ /* low pass polyphase filter */
+ for (hop = 0; hop < 40; hop += 8) {
+ t1[0] += (int32_t) in[hop] * consts[hop];
+ t1[0] += (int32_t) in[hop + 1] * consts[hop + 1];
+ t1[1] += (int32_t) in[hop + 2] * consts[hop + 2];
+ t1[1] += (int32_t) in[hop + 3] * consts[hop + 3];
+ t1[2] += (int32_t) in[hop + 4] * consts[hop + 4];
+ t1[2] += (int32_t) in[hop + 5] * consts[hop + 5];
+ t1[3] += (int32_t) in[hop + 6] * consts[hop + 6];
+ t1[3] += (int32_t) in[hop + 7] * consts[hop + 7];
+ }
+
+ /* scaling */
+ t2[0] = t1[0] >> SBC_PROTO_FIXED4_SCALE;
+ t2[1] = t1[1] >> SBC_PROTO_FIXED4_SCALE;
+ t2[2] = t1[2] >> SBC_PROTO_FIXED4_SCALE;
+ t2[3] = t1[3] >> SBC_PROTO_FIXED4_SCALE;
+
+ /* do the cos transform */
+ t1[0] = (int32_t) t2[0] * consts[40 + 0];
+ t1[0] += (int32_t) t2[1] * consts[40 + 1];
+ t1[1] = (int32_t) t2[0] * consts[40 + 2];
+ t1[1] += (int32_t) t2[1] * consts[40 + 3];
+ t1[2] = (int32_t) t2[0] * consts[40 + 4];
+ t1[2] += (int32_t) t2[1] * consts[40 + 5];
+ t1[3] = (int32_t) t2[0] * consts[40 + 6];
+ t1[3] += (int32_t) t2[1] * consts[40 + 7];
+
+ t1[0] += (int32_t) t2[2] * consts[40 + 8];
+ t1[0] += (int32_t) t2[3] * consts[40 + 9];
+ t1[1] += (int32_t) t2[2] * consts[40 + 10];
+ t1[1] += (int32_t) t2[3] * consts[40 + 11];
+ t1[2] += (int32_t) t2[2] * consts[40 + 12];
+ t1[2] += (int32_t) t2[3] * consts[40 + 13];
+ t1[3] += (int32_t) t2[2] * consts[40 + 14];
+ t1[3] += (int32_t) t2[3] * consts[40 + 15];
+
+ out[0] = t1[0] >>
+ (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS);
+ out[1] = t1[1] >>
+ (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS);
+ out[2] = t1[2] >>
+ (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS);
+ out[3] = t1[3] >>
+ (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS);
+}
+
+static void sbc_analyze_8_simd(const int16_t *in, int32_t *out,
+ const int16_t *consts)
+{
+ int32_t t1[8];
+ int16_t t2[8];
+ int i, hop;
+
+ /* rounding coefficient */
+ t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] =
+ (int32_t) 1 << (SBC_PROTO_FIXED8_SCALE-1);
+
+ /* low pass polyphase filter */
+ for (hop = 0; hop < 80; hop += 16) {
+ t1[0] += (int32_t) in[hop] * consts[hop];
+ t1[0] += (int32_t) in[hop + 1] * consts[hop + 1];
+ t1[1] += (int32_t) in[hop + 2] * consts[hop + 2];
+ t1[1] += (int32_t) in[hop + 3] * consts[hop + 3];
+ t1[2] += (int32_t) in[hop + 4] * consts[hop + 4];
+ t1[2] += (int32_t) in[hop + 5] * consts[hop + 5];
+ t1[3] += (int32_t) in[hop + 6] * consts[hop + 6];
+ t1[3] += (int32_t) in[hop + 7] * consts[hop + 7];
+ t1[4] += (int32_t) in[hop + 8] * consts[hop + 8];
+ t1[4] += (int32_t) in[hop + 9] * consts[hop + 9];
+ t1[5] += (int32_t) in[hop + 10] * consts[hop + 10];
+ t1[5] += (int32_t) in[hop + 11] * consts[hop + 11];
+ t1[6] += (int32_t) in[hop + 12] * consts[hop + 12];
+ t1[6] += (int32_t) in[hop + 13] * consts[hop + 13];
+ t1[7] += (int32_t) in[hop + 14] * consts[hop + 14];
+ t1[7] += (int32_t) in[hop + 15] * consts[hop + 15];
+ }
+
+ /* scaling */
+ t2[0] = t1[0] >> SBC_PROTO_FIXED8_SCALE;
+ t2[1] = t1[1] >> SBC_PROTO_FIXED8_SCALE;
+ t2[2] = t1[2] >> SBC_PROTO_FIXED8_SCALE;
+ t2[3] = t1[3] >> SBC_PROTO_FIXED8_SCALE;
+ t2[4] = t1[4] >> SBC_PROTO_FIXED8_SCALE;
+ t2[5] = t1[5] >> SBC_PROTO_FIXED8_SCALE;
+ t2[6] = t1[6] >> SBC_PROTO_FIXED8_SCALE;
+ t2[7] = t1[7] >> SBC_PROTO_FIXED8_SCALE;
+
+
+ /* do the cos transform */
+ t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = 0;
+
+ for (i = 0; i < 4; i++) {
+ t1[0] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 0];
+ t1[0] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 1];
+ t1[1] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 2];
+ t1[1] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 3];
+ t1[2] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 4];
+ t1[2] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 5];
+ t1[3] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 6];
+ t1[3] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 7];
+ t1[4] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 8];
+ t1[4] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 9];
+ t1[5] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 10];
+ t1[5] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 11];
+ t1[6] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 12];
+ t1[6] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 13];
+ t1[7] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 14];
+ t1[7] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 15];
+ }
+
+ for (i = 0; i < 8; i++)
+ out[i] = t1[i] >>
+ (SBC_COS_TABLE_FIXED8_SCALE - SCALE_OUT_BITS);
+}
+
+static inline void sbc_analyze_4b_4s_simd(SBCDSPContext *s,
+ int16_t *x, int32_t *out, int out_stride)
+{
+ /* Analyze blocks */
+ s->sbc_analyze_4(x + 12, out, ff_sbcdsp_analysis_consts_fixed4_simd_odd);
+ out += out_stride;
+ s->sbc_analyze_4(x + 8, out, ff_sbcdsp_analysis_consts_fixed4_simd_even);
+ out += out_stride;
+ s->sbc_analyze_4(x + 4, out, ff_sbcdsp_analysis_consts_fixed4_simd_odd);
+ out += out_stride;
+ s->sbc_analyze_4(x + 0, out, ff_sbcdsp_analysis_consts_fixed4_simd_even);
+
+ emms_c();
+}
+
+static inline void sbc_analyze_4b_8s_simd(SBCDSPContext *s,
+ int16_t *x, int32_t *out, int out_stride)
+{
+ /* Analyze blocks */
+ s->sbc_analyze_8(x + 24, out, ff_sbcdsp_analysis_consts_fixed8_simd_odd);
+ out += out_stride;
+ s->sbc_analyze_8(x + 16, out, ff_sbcdsp_analysis_consts_fixed8_simd_even);
+ out += out_stride;
+ s->sbc_analyze_8(x + 8, out, ff_sbcdsp_analysis_consts_fixed8_simd_odd);
+ out += out_stride;
+ s->sbc_analyze_8(x + 0, out, ff_sbcdsp_analysis_consts_fixed8_simd_even);
+
+ emms_c();
+}
+
+static inline void sbc_analyze_1b_8s_simd_even(SBCDSPContext *s,
+ int16_t *x, int32_t *out,
+ int out_stride);
+
+static inline void sbc_analyze_1b_8s_simd_odd(SBCDSPContext *s,
+ int16_t *x, int32_t *out,
+ int out_stride)
+{
+ s->sbc_analyze_8(x, out, ff_sbcdsp_analysis_consts_fixed8_simd_odd);
+ s->sbc_analyze_8s = sbc_analyze_1b_8s_simd_even;
+
+ emms_c();
+}
+
+static inline void sbc_analyze_1b_8s_simd_even(SBCDSPContext *s,
+ int16_t *x, int32_t *out,
+ int out_stride)
+{
+ s->sbc_analyze_8(x, out, ff_sbcdsp_analysis_consts_fixed8_simd_even);
+ s->sbc_analyze_8s = sbc_analyze_1b_8s_simd_odd;
+
+ emms_c();
+}
+
+#define PCM(i) AV_RN16(pcm + 2*(i))
+
+/*
+ * Internal helper functions for input data processing. In order to get
+ * optimal performance, it is important to have "nsamples" and "nchannels"
+ * arguments used with this inline function as compile time constants.
+ */
+
+static av_always_inline int sbc_encoder_process_input_s4_internal(
+ int position, const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE],
+ int nsamples, int nchannels)
+{
+ /* handle X buffer wraparound */
+ if (position < nsamples) {
+ if (nchannels > 0)
+ memcpy(&X[0][SBC_X_BUFFER_SIZE - 40], &X[0][position],
+ 36 * sizeof(int16_t));
+ if (nchannels > 1)
+ memcpy(&X[1][SBC_X_BUFFER_SIZE - 40], &X[1][position],
+ 36 * sizeof(int16_t));
+ position = SBC_X_BUFFER_SIZE - 40;
+ }
+
+ /* copy/permutate audio samples */
+ while ((nsamples -= 8) >= 0) {
+ position -= 8;
+ if (nchannels > 0) {
+ int16_t *x = &X[0][position];
+ x[0] = PCM(0 + 7 * nchannels);
+ x[1] = PCM(0 + 3 * nchannels);
+ x[2] = PCM(0 + 6 * nchannels);
+ x[3] = PCM(0 + 4 * nchannels);
+ x[4] = PCM(0 + 0 * nchannels);
+ x[5] = PCM(0 + 2 * nchannels);
+ x[6] = PCM(0 + 1 * nchannels);
+ x[7] = PCM(0 + 5 * nchannels);
+ }
+ if (nchannels > 1) {
+ int16_t *x = &X[1][position];
+ x[0] = PCM(1 + 7 * nchannels);
+ x[1] = PCM(1 + 3 * nchannels);
+ x[2] = PCM(1 + 6 * nchannels);
+ x[3] = PCM(1 + 4 * nchannels);
+ x[4] = PCM(1 + 0 * nchannels);
+ x[5] = PCM(1 + 2 * nchannels);
+ x[6] = PCM(1 + 1 * nchannels);
+ x[7] = PCM(1 + 5 * nchannels);
+ }
+ pcm += 16 * nchannels;
+ }
+
+ return position;
+}
+
+static av_always_inline int sbc_encoder_process_input_s8_internal(
+ int position, const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE],
+ int nsamples, int nchannels)
+{
+ /* handle X buffer wraparound */
+ if (position < nsamples) {
+ if (nchannels > 0)
+ memcpy(&X[0][SBC_X_BUFFER_SIZE - 72], &X[0][position],
+ 72 * sizeof(int16_t));
+ if (nchannels > 1)
+ memcpy(&X[1][SBC_X_BUFFER_SIZE - 72], &X[1][position],
+ 72 * sizeof(int16_t));
+ position = SBC_X_BUFFER_SIZE - 72;
+ }
+
+ if (position % 16 == 8) {
+ position -= 8;
+ nsamples -= 8;
+ if (nchannels > 0) {
+ int16_t *x = &X[0][position];
+ x[0] = PCM(0 + (15-8) * nchannels);
+ x[2] = PCM(0 + (14-8) * nchannels);
+ x[3] = PCM(0 + (8-8) * nchannels);
+ x[4] = PCM(0 + (13-8) * nchannels);
+ x[5] = PCM(0 + (9-8) * nchannels);
+ x[6] = PCM(0 + (12-8) * nchannels);
+ x[7] = PCM(0 + (10-8) * nchannels);
+ x[8] = PCM(0 + (11-8) * nchannels);
+ }
+ if (nchannels > 1) {
+ int16_t *x = &X[1][position];
+ x[0] = PCM(1 + (15-8) * nchannels);
+ x[2] = PCM(1 + (14-8) * nchannels);
+ x[3] = PCM(1 + (8-8) * nchannels);
+ x[4] = PCM(1 + (13-8) * nchannels);
+ x[5] = PCM(1 + (9-8) * nchannels);
+ x[6] = PCM(1 + (12-8) * nchannels);
+ x[7] = PCM(1 + (10-8) * nchannels);
+ x[8] = PCM(1 + (11-8) * nchannels);
+ }
+
+ pcm += 16 * nchannels;
+ }
+
+ /* copy/permutate audio samples */
+ while (nsamples >= 16) {
+ position -= 16;
+ if (nchannels > 0) {
+ int16_t *x = &X[0][position];
+ x[0] = PCM(0 + 15 * nchannels);
+ x[1] = PCM(0 + 7 * nchannels);
+ x[2] = PCM(0 + 14 * nchannels);
+ x[3] = PCM(0 + 8 * nchannels);
+ x[4] = PCM(0 + 13 * nchannels);
+ x[5] = PCM(0 + 9 * nchannels);
+ x[6] = PCM(0 + 12 * nchannels);
+ x[7] = PCM(0 + 10 * nchannels);
+ x[8] = PCM(0 + 11 * nchannels);
+ x[9] = PCM(0 + 3 * nchannels);
+ x[10] = PCM(0 + 6 * nchannels);
+ x[11] = PCM(0 + 0 * nchannels);
+ x[12] = PCM(0 + 5 * nchannels);
+ x[13] = PCM(0 + 1 * nchannels);
+ x[14] = PCM(0 + 4 * nchannels);
+ x[15] = PCM(0 + 2 * nchannels);
+ }
+ if (nchannels > 1) {
+ int16_t *x = &X[1][position];
+ x[0] = PCM(1 + 15 * nchannels);
+ x[1] = PCM(1 + 7 * nchannels);
+ x[2] = PCM(1 + 14 * nchannels);
+ x[3] = PCM(1 + 8 * nchannels);
+ x[4] = PCM(1 + 13 * nchannels);
+ x[5] = PCM(1 + 9 * nchannels);
+ x[6] = PCM(1 + 12 * nchannels);
+ x[7] = PCM(1 + 10 * nchannels);
+ x[8] = PCM(1 + 11 * nchannels);
+ x[9] = PCM(1 + 3 * nchannels);
+ x[10] = PCM(1 + 6 * nchannels);
+ x[11] = PCM(1 + 0 * nchannels);
+ x[12] = PCM(1 + 5 * nchannels);
+ x[13] = PCM(1 + 1 * nchannels);
+ x[14] = PCM(1 + 4 * nchannels);
+ x[15] = PCM(1 + 2 * nchannels);
+ }
+ pcm += 32 * nchannels;
+ nsamples -= 16;
+ }
+
+ if (nsamples == 8) {
+ position -= 8;
+ if (nchannels > 0) {
+ int16_t *x = &X[0][position];
+ x[-7] = PCM(0 + 7 * nchannels);
+ x[1] = PCM(0 + 3 * nchannels);
+ x[2] = PCM(0 + 6 * nchannels);
+ x[3] = PCM(0 + 0 * nchannels);
+ x[4] = PCM(0 + 5 * nchannels);
+ x[5] = PCM(0 + 1 * nchannels);
+ x[6] = PCM(0 + 4 * nchannels);
+ x[7] = PCM(0 + 2 * nchannels);
+ }
+ if (nchannels > 1) {
+ int16_t *x = &X[1][position];
+ x[-7] = PCM(1 + 7 * nchannels);
+ x[1] = PCM(1 + 3 * nchannels);
+ x[2] = PCM(1 + 6 * nchannels);
+ x[3] = PCM(1 + 0 * nchannels);
+ x[4] = PCM(1 + 5 * nchannels);
+ x[5] = PCM(1 + 1 * nchannels);
+ x[6] = PCM(1 + 4 * nchannels);
+ x[7] = PCM(1 + 2 * nchannels);
+ }
+ }
+
+ return position;
+}
+
+/*
+ * Input data processing functions. The data is endian converted if needed,
+ * channels are deintrleaved and audio samples are reordered for use in
+ * SIMD-friendly analysis filter function. The results are put into "X"
+ * array, getting appended to the previous data (or it is better to say
+ * prepended, as the buffer is filled from top to bottom). Old data is
+ * discarded when neededed, but availability of (10 * nrof_subbands)
+ * contiguous samples is always guaranteed for the input to the analysis
+ * filter. This is achieved by copying a sufficient part of old data
+ * to the top of the buffer on buffer wraparound.
+ */
+
+static int sbc_enc_process_input_4s(int position, const uint8_t *pcm,
+ int16_t X[2][SBC_X_BUFFER_SIZE],
+ int nsamples, int nchannels)
+{
+ if (nchannels > 1)
+ return sbc_encoder_process_input_s4_internal(
+ position, pcm, X, nsamples, 2);
+ else
+ return sbc_encoder_process_input_s4_internal(
+ position, pcm, X, nsamples, 1);
+}
+
+static int sbc_enc_process_input_8s(int position, const uint8_t *pcm,
+ int16_t X[2][SBC_X_BUFFER_SIZE],
+ int nsamples, int nchannels)
+{
+ if (nchannels > 1)
+ return sbc_encoder_process_input_s8_internal(
+ position, pcm, X, nsamples, 2);
+ else
+ return sbc_encoder_process_input_s8_internal(
+ position, pcm, X, nsamples, 1);
+}
+
+static void sbc_calc_scalefactors(int32_t sb_sample_f[16][2][8],
+ uint32_t scale_factor[2][8],
+ int blocks, int channels, int subbands)
+{
+ int ch, sb, blk;
+ for (ch = 0; ch < channels; ch++) {
+ for (sb = 0; sb < subbands; sb++) {
+ uint32_t x = 1 << SCALE_OUT_BITS;
+ for (blk = 0; blk < blocks; blk++) {
+ int32_t tmp = FFABS(sb_sample_f[blk][ch][sb]);
+ if (tmp != 0)
+ x |= tmp - 1;
+ }
+ scale_factor[ch][sb] = (31 - SCALE_OUT_BITS) -
+ ff_clz(x);
+ }
+ }
+}
+
+static int sbc_calc_scalefactors_j(int32_t sb_sample_f[16][2][8],
+ uint32_t scale_factor[2][8],
+ int blocks, int subbands)
+{
+ int blk, joint = 0;
+ int32_t tmp0, tmp1;
+ uint32_t x, y;
+
+ /* last subband does not use joint stereo */
+ int sb = subbands - 1;
+ x = 1 << SCALE_OUT_BITS;
+ y = 1 << SCALE_OUT_BITS;
+ for (blk = 0; blk < blocks; blk++) {
+ tmp0 = FFABS(sb_sample_f[blk][0][sb]);
+ tmp1 = FFABS(sb_sample_f[blk][1][sb]);
+ if (tmp0 != 0)
+ x |= tmp0 - 1;
+ if (tmp1 != 0)
+ y |= tmp1 - 1;
+ }
+ scale_factor[0][sb] = (31 - SCALE_OUT_BITS) - ff_clz(x);
+ scale_factor[1][sb] = (31 - SCALE_OUT_BITS) - ff_clz(y);
+
+ /* the rest of subbands can use joint stereo */
+ while (--sb >= 0) {
+ int32_t sb_sample_j[16][2];
+ x = 1 << SCALE_OUT_BITS;
+ y = 1 << SCALE_OUT_BITS;
+ for (blk = 0; blk < blocks; blk++) {
+ tmp0 = sb_sample_f[blk][0][sb];
+ tmp1 = sb_sample_f[blk][1][sb];
+ sb_sample_j[blk][0] = (tmp0 >> 1) + (tmp1 >> 1);
+ sb_sample_j[blk][1] = (tmp0 >> 1) - (tmp1 >> 1);
+ tmp0 = FFABS(tmp0);
+ tmp1 = FFABS(tmp1);
+ if (tmp0 != 0)
+ x |= tmp0 - 1;
+ if (tmp1 != 0)
+ y |= tmp1 - 1;
+ }
+ scale_factor[0][sb] = (31 - SCALE_OUT_BITS) -
+ ff_clz(x);
+ scale_factor[1][sb] = (31 - SCALE_OUT_BITS) -
+ ff_clz(y);
+ x = 1 << SCALE_OUT_BITS;
+ y = 1 << SCALE_OUT_BITS;
+ for (blk = 0; blk < blocks; blk++) {
+ tmp0 = FFABS(sb_sample_j[blk][0]);
+ tmp1 = FFABS(sb_sample_j[blk][1]);
+ if (tmp0 != 0)
+ x |= tmp0 - 1;
+ if (tmp1 != 0)
+ y |= tmp1 - 1;
+ }
+ x = (31 - SCALE_OUT_BITS) - ff_clz(x);
+ y = (31 - SCALE_OUT_BITS) - ff_clz(y);
+
+ /* decide whether to use joint stereo for this subband */
+ if ((scale_factor[0][sb] + scale_factor[1][sb]) > x + y) {
+ joint |= 1 << (subbands - 1 - sb);
+ scale_factor[0][sb] = x;
+ scale_factor[1][sb] = y;
+ for (blk = 0; blk < blocks; blk++) {
+ sb_sample_f[blk][0][sb] = sb_sample_j[blk][0];
+ sb_sample_f[blk][1][sb] = sb_sample_j[blk][1];
+ }
+ }
+ }
+
+ /* bitmask with the information about subbands using joint stereo */
+ return joint;
+}
+
+/*
+ * Detect CPU features and setup function pointers
+ */
+av_cold void ff_sbcdsp_init(SBCDSPContext *s)
+{
+ /* Default implementation for analyze functions */
+ s->sbc_analyze_4 = sbc_analyze_4_simd;
+ s->sbc_analyze_8 = sbc_analyze_8_simd;
+ s->sbc_analyze_4s = sbc_analyze_4b_4s_simd;
+ if (s->increment == 1)
+ s->sbc_analyze_8s = sbc_analyze_1b_8s_simd_odd;
+ else
+ s->sbc_analyze_8s = sbc_analyze_4b_8s_simd;
+
+ /* Default implementation for input reordering / deinterleaving */
+ s->sbc_enc_process_input_4s = sbc_enc_process_input_4s;
+ s->sbc_enc_process_input_8s = sbc_enc_process_input_8s;
+
+ /* Default implementation for scale factors calculation */
+ s->sbc_calc_scalefactors = sbc_calc_scalefactors;
+ s->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j;
+
+ if (ARCH_ARM)
+ ff_sbcdsp_init_arm(s);
+ if (ARCH_X86)
+ ff_sbcdsp_init_x86(s);
+}
diff --git a/libavcodec/sbcdsp.h b/libavcodec/sbcdsp.h
new file mode 100644
index 0000000000..334c058e6d
--- /dev/null
+++ b/libavcodec/sbcdsp.h
@@ -0,0 +1,86 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017 Aurelien Jacobs <aurel at gnuage.org>
+ * Copyright (C) 2008-2010 Nokia Corporation
+ * Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org>
+ * Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch>
+ * Copyright (C) 2005-2006 Brad Midgley <bmidgley at xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC basic "building bricks"
+ */
+
+#ifndef AVCODEC_SBCDSP_H
+#define AVCODEC_SBCDSP_H
+
+#include "sbc.h"
+#include "sbcdsp_data.h"
+
+#define SCALE_OUT_BITS 15
+#define SBC_X_BUFFER_SIZE 328
+
+typedef struct sbc_dsp_context SBCDSPContext;
+
+struct sbc_dsp_context {
+ int position;
+ /* Number of consecutive blocks handled by the encoder */
+ uint8_t increment;
+ DECLARE_ALIGNED(SBC_ALIGN, int16_t, X)[2][SBC_X_BUFFER_SIZE];
+ void (*sbc_analyze_4)(const int16_t *in, int32_t *out, const int16_t *consts);
+ void (*sbc_analyze_8)(const int16_t *in, int32_t *out, const int16_t *consts);
+ /* Polyphase analysis filter for 4 subbands configuration,
+ * it handles "increment" blocks at once */
+ void (*sbc_analyze_4s)(SBCDSPContext *s,
+ int16_t *x, int32_t *out, int out_stride);
+ /* Polyphase analysis filter for 8 subbands configuration,
+ * it handles "increment" blocks at once */
+ void (*sbc_analyze_8s)(SBCDSPContext *s,
+ int16_t *x, int32_t *out, int out_stride);
+ /* Process input data (deinterleave, endian conversion, reordering),
+ * depending on the number of subbands and input data byte order */
+ int (*sbc_enc_process_input_4s)(int position, const uint8_t *pcm,
+ int16_t X[2][SBC_X_BUFFER_SIZE],
+ int nsamples, int nchannels);
+ int (*sbc_enc_process_input_8s)(int position, const uint8_t *pcm,
+ int16_t X[2][SBC_X_BUFFER_SIZE],
+ int nsamples, int nchannels);
+ /* Scale factors calculation */
+ void (*sbc_calc_scalefactors)(int32_t sb_sample_f[16][2][8],
+ uint32_t scale_factor[2][8],
+ int blocks, int channels, int subbands);
+ /* Scale factors calculation with joint stereo support */
+ int (*sbc_calc_scalefactors_j)(int32_t sb_sample_f[16][2][8],
+ uint32_t scale_factor[2][8],
+ int blocks, int subbands);
+};
+
+/*
+ * Initialize pointers to the functions which are the basic "building bricks"
+ * of SBC codec. Best implementation is selected based on target CPU
+ * capabilities.
+ */
+void ff_sbcdsp_init(SBCDSPContext *s);
+
+void ff_sbcdsp_init_arm(SBCDSPContext *s);
+void ff_sbcdsp_init_x86(SBCDSPContext *s);
+
+#endif /* AVCODEC_SBCDSP_H */
diff --git a/libavcodec/sbcdsp_data.c b/libavcodec/sbcdsp_data.c
new file mode 100644
index 0000000000..3007a23bc7
--- /dev/null
+++ b/libavcodec/sbcdsp_data.c
@@ -0,0 +1,335 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017 Aurelien Jacobs <aurel at gnuage.org>
+ * Copyright (C) 2008-2010 Nokia Corporation
+ * Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org>
+ * Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch>
+ * Copyright (C) 2005-2006 Brad Midgley <bmidgley at xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * miscellaneous SBC tables
+ */
+
+#include "sbcdsp_data.h"
+
+#define F_PROTO4(x) (int32_t) ((x * 2) * \
+ ((int32_t) 1 << (sizeof(int16_t) * CHAR_BIT - 1)) + 0.5)
+#define F_COS4(x) (int32_t) ((x) * \
+ ((int32_t) 1 << (sizeof(int16_t) * CHAR_BIT - 1)) + 0.5)
+#define F_PROTO8(x) (int32_t) ((x * 2) * \
+ ((int32_t) 1 << (sizeof(int16_t) * CHAR_BIT - 1)) + 0.5)
+#define F_COS8(x) (int32_t) ((x) * \
+ ((int32_t) 1 << (sizeof(int16_t) * CHAR_BIT - 1)) + 0.5)
+
+/*
+ * Constant tables for the use in SIMD optimized analysis filters
+ * Each table consists of two parts:
+ * 1. reordered "proto" table
+ * 2. reordered "cos" table
+ *
+ * Due to non-symmetrical reordering, separate tables for "even"
+ * and "odd" cases are needed
+ */
+
+DECLARE_ALIGNED(SBC_ALIGN, const int16_t, ff_sbcdsp_analysis_consts_fixed4_simd_even)[40 + 16] = {
+#define C0 1.0932568993
+#define C1 1.3056875580
+#define C2 1.3056875580
+#define C3 1.6772280856
+
+#define F(x) F_PROTO4(x)
+ F(0.00000000E+00 * C0), F(3.83720193E-03 * C0),
+ F(5.36548976E-04 * C1), F(2.73370904E-03 * C1),
+ F(3.06012286E-03 * C2), F(3.89205149E-03 * C2),
+ F(0.00000000E+00 * C3), -F(1.49188357E-03 * C3),
+ F(1.09137620E-02 * C0), F(2.58767811E-02 * C0),
+ F(2.04385087E-02 * C1), F(3.21939290E-02 * C1),
+ F(7.76463494E-02 * C2), F(6.13245186E-03 * C2),
+ F(0.00000000E+00 * C3), -F(2.88757392E-02 * C3),
+ F(1.35593274E-01 * C0), F(2.94315332E-01 * C0),
+ F(1.94987841E-01 * C1), F(2.81828203E-01 * C1),
+ -F(1.94987841E-01 * C2), F(2.81828203E-01 * C2),
+ F(0.00000000E+00 * C3), -F(2.46636662E-01 * C3),
+ -F(1.35593274E-01 * C0), F(2.58767811E-02 * C0),
+ -F(7.76463494E-02 * C1), F(6.13245186E-03 * C1),
+ -F(2.04385087E-02 * C2), F(3.21939290E-02 * C2),
+ F(0.00000000E+00 * C3), F(2.88217274E-02 * C3),
+ -F(1.09137620E-02 * C0), F(3.83720193E-03 * C0),
+ -F(3.06012286E-03 * C1), F(3.89205149E-03 * C1),
+ -F(5.36548976E-04 * C2), F(2.73370904E-03 * C2),
+ F(0.00000000E+00 * C3), -F(1.86581691E-03 * C3),
+#undef F
+#define F(x) F_COS4(x)
+ F(0.7071067812 / C0), F(0.9238795325 / C1),
+ -F(0.7071067812 / C0), F(0.3826834324 / C1),
+ -F(0.7071067812 / C0), -F(0.3826834324 / C1),
+ F(0.7071067812 / C0), -F(0.9238795325 / C1),
+ F(0.3826834324 / C2), -F(1.0000000000 / C3),
+ -F(0.9238795325 / C2), -F(1.0000000000 / C3),
+ F(0.9238795325 / C2), -F(1.0000000000 / C3),
+ -F(0.3826834324 / C2), -F(1.0000000000 / C3),
+#undef F
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+};
+
+DECLARE_ALIGNED(SBC_ALIGN, const int16_t, ff_sbcdsp_analysis_consts_fixed4_simd_odd)[40 + 16] = {
+#define C0 1.3056875580
+#define C1 1.6772280856
+#define C2 1.0932568993
+#define C3 1.3056875580
+
+#define F(x) F_PROTO4(x)
+ F(2.73370904E-03 * C0), F(5.36548976E-04 * C0),
+ -F(1.49188357E-03 * C1), F(0.00000000E+00 * C1),
+ F(3.83720193E-03 * C2), F(1.09137620E-02 * C2),
+ F(3.89205149E-03 * C3), F(3.06012286E-03 * C3),
+ F(3.21939290E-02 * C0), F(2.04385087E-02 * C0),
+ -F(2.88757392E-02 * C1), F(0.00000000E+00 * C1),
+ F(2.58767811E-02 * C2), F(1.35593274E-01 * C2),
+ F(6.13245186E-03 * C3), F(7.76463494E-02 * C3),
+ F(2.81828203E-01 * C0), F(1.94987841E-01 * C0),
+ -F(2.46636662E-01 * C1), F(0.00000000E+00 * C1),
+ F(2.94315332E-01 * C2), -F(1.35593274E-01 * C2),
+ F(2.81828203E-01 * C3), -F(1.94987841E-01 * C3),
+ F(6.13245186E-03 * C0), -F(7.76463494E-02 * C0),
+ F(2.88217274E-02 * C1), F(0.00000000E+00 * C1),
+ F(2.58767811E-02 * C2), -F(1.09137620E-02 * C2),
+ F(3.21939290E-02 * C3), -F(2.04385087E-02 * C3),
+ F(3.89205149E-03 * C0), -F(3.06012286E-03 * C0),
+ -F(1.86581691E-03 * C1), F(0.00000000E+00 * C1),
+ F(3.83720193E-03 * C2), F(0.00000000E+00 * C2),
+ F(2.73370904E-03 * C3), -F(5.36548976E-04 * C3),
+#undef F
+#define F(x) F_COS4(x)
+ F(0.9238795325 / C0), -F(1.0000000000 / C1),
+ F(0.3826834324 / C0), -F(1.0000000000 / C1),
+ -F(0.3826834324 / C0), -F(1.0000000000 / C1),
+ -F(0.9238795325 / C0), -F(1.0000000000 / C1),
+ F(0.7071067812 / C2), F(0.3826834324 / C3),
+ -F(0.7071067812 / C2), -F(0.9238795325 / C3),
+ -F(0.7071067812 / C2), F(0.9238795325 / C3),
+ F(0.7071067812 / C2), -F(0.3826834324 / C3),
+#undef F
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+};
+
+DECLARE_ALIGNED(SBC_ALIGN, const int16_t, ff_sbcdsp_analysis_consts_fixed8_simd_even)[80 + 64] = {
+#define C0 2.7906148894
+#define C1 2.4270044280
+#define C2 2.8015616024
+#define C3 3.1710363741
+#define C4 2.5377944043
+#define C5 2.4270044280
+#define C6 2.8015616024
+#define C7 3.1710363741
+
+#define F(x) F_PROTO8(x)
+ F(0.00000000E+00 * C0), F(2.01182542E-03 * C0),
+ F(1.56575398E-04 * C1), F(1.78371725E-03 * C1),
+ F(3.43256425E-04 * C2), F(1.47640169E-03 * C2),
+ F(5.54620202E-04 * C3), F(1.13992507E-03 * C3),
+ -F(8.23919506E-04 * C4), F(0.00000000E+00 * C4),
+ F(2.10371989E-03 * C5), F(3.49717454E-03 * C5),
+ F(1.99454554E-03 * C6), F(1.64973098E-03 * C6),
+ F(1.61656283E-03 * C7), F(1.78805361E-04 * C7),
+ F(5.65949473E-03 * C0), F(1.29371806E-02 * C0),
+ F(8.02941163E-03 * C1), F(1.53184106E-02 * C1),
+ F(1.04584443E-02 * C2), F(1.62208471E-02 * C2),
+ F(1.27472335E-02 * C3), F(1.59045603E-02 * C3),
+ -F(1.46525263E-02 * C4), F(0.00000000E+00 * C4),
+ F(8.85757540E-03 * C5), F(5.31873032E-02 * C5),
+ F(2.92408442E-03 * C6), F(3.90751381E-02 * C6),
+ -F(4.91578024E-03 * C7), F(2.61098752E-02 * C7),
+ F(6.79989431E-02 * C0), F(1.46955068E-01 * C0),
+ F(8.29847578E-02 * C1), F(1.45389847E-01 * C1),
+ F(9.75753918E-02 * C2), F(1.40753505E-01 * C2),
+ F(1.11196689E-01 * C3), F(1.33264415E-01 * C3),
+ -F(1.23264548E-01 * C4), F(0.00000000E+00 * C4),
+ F(1.45389847E-01 * C5), -F(8.29847578E-02 * C5),
+ F(1.40753505E-01 * C6), -F(9.75753918E-02 * C6),
+ F(1.33264415E-01 * C7), -F(1.11196689E-01 * C7),
+ -F(6.79989431E-02 * C0), F(1.29371806E-02 * C0),
+ -F(5.31873032E-02 * C1), F(8.85757540E-03 * C1),
+ -F(3.90751381E-02 * C2), F(2.92408442E-03 * C2),
+ -F(2.61098752E-02 * C3), -F(4.91578024E-03 * C3),
+ F(1.46404076E-02 * C4), F(0.00000000E+00 * C4),
+ F(1.53184106E-02 * C5), -F(8.02941163E-03 * C5),
+ F(1.62208471E-02 * C6), -F(1.04584443E-02 * C6),
+ F(1.59045603E-02 * C7), -F(1.27472335E-02 * C7),
+ -F(5.65949473E-03 * C0), F(2.01182542E-03 * C0),
+ -F(3.49717454E-03 * C1), F(2.10371989E-03 * C1),
+ -F(1.64973098E-03 * C2), F(1.99454554E-03 * C2),
+ -F(1.78805361E-04 * C3), F(1.61656283E-03 * C3),
+ -F(9.02154502E-04 * C4), F(0.00000000E+00 * C4),
+ F(1.78371725E-03 * C5), -F(1.56575398E-04 * C5),
+ F(1.47640169E-03 * C6), -F(3.43256425E-04 * C6),
+ F(1.13992507E-03 * C7), -F(5.54620202E-04 * C7),
+#undef F
+#define F(x) F_COS8(x)
+ F(0.7071067812 / C0), F(0.8314696123 / C1),
+ -F(0.7071067812 / C0), -F(0.1950903220 / C1),
+ -F(0.7071067812 / C0), -F(0.9807852804 / C1),
+ F(0.7071067812 / C0), -F(0.5555702330 / C1),
+ F(0.7071067812 / C0), F(0.5555702330 / C1),
+ -F(0.7071067812 / C0), F(0.9807852804 / C1),
+ -F(0.7071067812 / C0), F(0.1950903220 / C1),
+ F(0.7071067812 / C0), -F(0.8314696123 / C1),
+ F(0.9238795325 / C2), F(0.9807852804 / C3),
+ F(0.3826834324 / C2), F(0.8314696123 / C3),
+ -F(0.3826834324 / C2), F(0.5555702330 / C3),
+ -F(0.9238795325 / C2), F(0.1950903220 / C3),
+ -F(0.9238795325 / C2), -F(0.1950903220 / C3),
+ -F(0.3826834324 / C2), -F(0.5555702330 / C3),
+ F(0.3826834324 / C2), -F(0.8314696123 / C3),
+ F(0.9238795325 / C2), -F(0.9807852804 / C3),
+ -F(1.0000000000 / C4), F(0.5555702330 / C5),
+ -F(1.0000000000 / C4), -F(0.9807852804 / C5),
+ -F(1.0000000000 / C4), F(0.1950903220 / C5),
+ -F(1.0000000000 / C4), F(0.8314696123 / C5),
+ -F(1.0000000000 / C4), -F(0.8314696123 / C5),
+ -F(1.0000000000 / C4), -F(0.1950903220 / C5),
+ -F(1.0000000000 / C4), F(0.9807852804 / C5),
+ -F(1.0000000000 / C4), -F(0.5555702330 / C5),
+ F(0.3826834324 / C6), F(0.1950903220 / C7),
+ -F(0.9238795325 / C6), -F(0.5555702330 / C7),
+ F(0.9238795325 / C6), F(0.8314696123 / C7),
+ -F(0.3826834324 / C6), -F(0.9807852804 / C7),
+ -F(0.3826834324 / C6), F(0.9807852804 / C7),
+ F(0.9238795325 / C6), -F(0.8314696123 / C7),
+ -F(0.9238795325 / C6), F(0.5555702330 / C7),
+ F(0.3826834324 / C6), -F(0.1950903220 / C7),
+#undef F
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+};
+
+DECLARE_ALIGNED(SBC_ALIGN, const int16_t, ff_sbcdsp_analysis_consts_fixed8_simd_odd)[80 + 64] = {
+#define C0 2.5377944043
+#define C1 2.4270044280
+#define C2 2.8015616024
+#define C3 3.1710363741
+#define C4 2.7906148894
+#define C5 2.4270044280
+#define C6 2.8015616024
+#define C7 3.1710363741
+
+#define F(x) F_PROTO8(x)
+ F(0.00000000E+00 * C0), -F(8.23919506E-04 * C0),
+ F(1.56575398E-04 * C1), F(1.78371725E-03 * C1),
+ F(3.43256425E-04 * C2), F(1.47640169E-03 * C2),
+ F(5.54620202E-04 * C3), F(1.13992507E-03 * C3),
+ F(2.01182542E-03 * C4), F(5.65949473E-03 * C4),
+ F(2.10371989E-03 * C5), F(3.49717454E-03 * C5),
+ F(1.99454554E-03 * C6), F(1.64973098E-03 * C6),
+ F(1.61656283E-03 * C7), F(1.78805361E-04 * C7),
+ F(0.00000000E+00 * C0), -F(1.46525263E-02 * C0),
+ F(8.02941163E-03 * C1), F(1.53184106E-02 * C1),
+ F(1.04584443E-02 * C2), F(1.62208471E-02 * C2),
+ F(1.27472335E-02 * C3), F(1.59045603E-02 * C3),
+ F(1.29371806E-02 * C4), F(6.79989431E-02 * C4),
+ F(8.85757540E-03 * C5), F(5.31873032E-02 * C5),
+ F(2.92408442E-03 * C6), F(3.90751381E-02 * C6),
+ -F(4.91578024E-03 * C7), F(2.61098752E-02 * C7),
+ F(0.00000000E+00 * C0), -F(1.23264548E-01 * C0),
+ F(8.29847578E-02 * C1), F(1.45389847E-01 * C1),
+ F(9.75753918E-02 * C2), F(1.40753505E-01 * C2),
+ F(1.11196689E-01 * C3), F(1.33264415E-01 * C3),
+ F(1.46955068E-01 * C4), -F(6.79989431E-02 * C4),
+ F(1.45389847E-01 * C5), -F(8.29847578E-02 * C5),
+ F(1.40753505E-01 * C6), -F(9.75753918E-02 * C6),
+ F(1.33264415E-01 * C7), -F(1.11196689E-01 * C7),
+ F(0.00000000E+00 * C0), F(1.46404076E-02 * C0),
+ -F(5.31873032E-02 * C1), F(8.85757540E-03 * C1),
+ -F(3.90751381E-02 * C2), F(2.92408442E-03 * C2),
+ -F(2.61098752E-02 * C3), -F(4.91578024E-03 * C3),
+ F(1.29371806E-02 * C4), -F(5.65949473E-03 * C4),
+ F(1.53184106E-02 * C5), -F(8.02941163E-03 * C5),
+ F(1.62208471E-02 * C6), -F(1.04584443E-02 * C6),
+ F(1.59045603E-02 * C7), -F(1.27472335E-02 * C7),
+ F(0.00000000E+00 * C0), -F(9.02154502E-04 * C0),
+ -F(3.49717454E-03 * C1), F(2.10371989E-03 * C1),
+ -F(1.64973098E-03 * C2), F(1.99454554E-03 * C2),
+ -F(1.78805361E-04 * C3), F(1.61656283E-03 * C3),
+ F(2.01182542E-03 * C4), F(0.00000000E+00 * C4),
+ F(1.78371725E-03 * C5), -F(1.56575398E-04 * C5),
+ F(1.47640169E-03 * C6), -F(3.43256425E-04 * C6),
+ F(1.13992507E-03 * C7), -F(5.54620202E-04 * C7),
+#undef F
+#define F(x) F_COS8(x)
+ -F(1.0000000000 / C0), F(0.8314696123 / C1),
+ -F(1.0000000000 / C0), -F(0.1950903220 / C1),
+ -F(1.0000000000 / C0), -F(0.9807852804 / C1),
+ -F(1.0000000000 / C0), -F(0.5555702330 / C1),
+ -F(1.0000000000 / C0), F(0.5555702330 / C1),
+ -F(1.0000000000 / C0), F(0.9807852804 / C1),
+ -F(1.0000000000 / C0), F(0.1950903220 / C1),
+ -F(1.0000000000 / C0), -F(0.8314696123 / C1),
+ F(0.9238795325 / C2), F(0.9807852804 / C3),
+ F(0.3826834324 / C2), F(0.8314696123 / C3),
+ -F(0.3826834324 / C2), F(0.5555702330 / C3),
+ -F(0.9238795325 / C2), F(0.1950903220 / C3),
+ -F(0.9238795325 / C2), -F(0.1950903220 / C3),
+ -F(0.3826834324 / C2), -F(0.5555702330 / C3),
+ F(0.3826834324 / C2), -F(0.8314696123 / C3),
+ F(0.9238795325 / C2), -F(0.9807852804 / C3),
+ F(0.7071067812 / C4), F(0.5555702330 / C5),
+ -F(0.7071067812 / C4), -F(0.9807852804 / C5),
+ -F(0.7071067812 / C4), F(0.1950903220 / C5),
+ F(0.7071067812 / C4), F(0.8314696123 / C5),
+ F(0.7071067812 / C4), -F(0.8314696123 / C5),
+ -F(0.7071067812 / C4), -F(0.1950903220 / C5),
+ -F(0.7071067812 / C4), F(0.9807852804 / C5),
+ F(0.7071067812 / C4), -F(0.5555702330 / C5),
+ F(0.3826834324 / C6), F(0.1950903220 / C7),
+ -F(0.9238795325 / C6), -F(0.5555702330 / C7),
+ F(0.9238795325 / C6), F(0.8314696123 / C7),
+ -F(0.3826834324 / C6), -F(0.9807852804 / C7),
+ -F(0.3826834324 / C6), F(0.9807852804 / C7),
+ F(0.9238795325 / C6), -F(0.8314696123 / C7),
+ -F(0.9238795325 / C6), F(0.5555702330 / C7),
+ F(0.3826834324 / C6), -F(0.1950903220 / C7),
+#undef F
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+};
diff --git a/libavcodec/sbcdsp_data.h b/libavcodec/sbcdsp_data.h
new file mode 100644
index 0000000000..12839fb3c3
--- /dev/null
+++ b/libavcodec/sbcdsp_data.h
@@ -0,0 +1,57 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017 Aurelien Jacobs <aurel at gnuage.org>
+ * Copyright (C) 2008-2010 Nokia Corporation
+ * Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org>
+ * Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch>
+ * Copyright (C) 2005-2006 Brad Midgley <bmidgley at xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * miscellaneous SBC tables
+ */
+
+#ifndef AVCODEC_SBCDSP_DATA_H
+#define AVCODEC_SBCDSP_DATA_H
+
+#include "sbc.h"
+
+#define SBC_PROTO_FIXED4_SCALE ((sizeof(int16_t) * CHAR_BIT - 1) + 1)
+#define SBC_COS_TABLE_FIXED4_SCALE ((sizeof(int16_t) * CHAR_BIT - 1) )
+#define SBC_PROTO_FIXED8_SCALE ((sizeof(int16_t) * CHAR_BIT - 1) + 1)
+#define SBC_COS_TABLE_FIXED8_SCALE ((sizeof(int16_t) * CHAR_BIT - 1) )
+
+/*
+ * Constant tables for the use in SIMD optimized analysis filters
+ * Each table consists of two parts:
+ * 1. reordered "proto" table
+ * 2. reordered "cos" table
+ *
+ * Due to non-symmetrical reordering, separate tables for "even"
+ * and "odd" cases are needed
+ */
+
+extern const int16_t ff_sbcdsp_analysis_consts_fixed4_simd_even[];
+extern const int16_t ff_sbcdsp_analysis_consts_fixed4_simd_odd[];
+extern const int16_t ff_sbcdsp_analysis_consts_fixed8_simd_even[];
+extern const int16_t ff_sbcdsp_analysis_consts_fixed8_simd_odd[];
+
+#endif /* AVCODEC_SBCDSP_DATA_H */
diff --git a/libavcodec/sbcenc.c b/libavcodec/sbcenc.c
new file mode 100644
index 0000000000..94a0331495
--- /dev/null
+++ b/libavcodec/sbcenc.c
@@ -0,0 +1,461 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017 Aurelien Jacobs <aurel at gnuage.org>
+ * Copyright (C) 2012-2013 Intel Corporation
+ * Copyright (C) 2008-2010 Nokia Corporation
+ * Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org>
+ * Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch>
+ * Copyright (C) 2005-2008 Brad Midgley <bmidgley at xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC encoder implementation
+ */
+
+#include <stdbool.h>
+#include "libavutil/opt.h"
+#include "avcodec.h"
+#include "internal.h"
+#include "put_bits.h"
+#include "sbc.h"
+#include "sbcdsp.h"
+
+typedef struct SBCEncContext {
+ AVClass *class;
+
+ uint8_t frequency;
+ int blocks;
+ int subbands;
+ uint8_t mode;
+ int allocation;
+ int bitpool;
+
+ int joint_stereo;
+ int dual_channel;
+
+ bool init;
+ bool msbc;
+ DECLARE_ALIGNED(SBC_ALIGN, struct sbc_frame, frame);
+ DECLARE_ALIGNED(SBC_ALIGN, SBCDSPContext, dsp);
+ size_t (*pack_frame)(AVPacket *avpkt, struct sbc_frame *frame, int joint);
+} SBCEncContext;
+
+static int sbc_analyze_audio(SBCDSPContext *s, struct sbc_frame *frame)
+{
+ int ch, blk;
+ int16_t *x;
+
+ switch (frame->subbands) {
+ case 4:
+ for (ch = 0; ch < frame->channels; ch++) {
+ x = &s->X[ch][s->position - 4 *
+ s->increment + frame->blocks * 4];
+ for (blk = 0; blk < frame->blocks;
+ blk += s->increment) {
+ s->sbc_analyze_4s(
+ s, x,
+ frame->sb_sample_f[blk][ch],
+ frame->sb_sample_f[blk + 1][ch] -
+ frame->sb_sample_f[blk][ch]);
+ x -= 4 * s->increment;
+ }
+ }
+ return frame->blocks * 4;
+
+ case 8:
+ for (ch = 0; ch < frame->channels; ch++) {
+ x = &s->X[ch][s->position - 8 *
+ s->increment + frame->blocks * 8];
+ for (blk = 0; blk < frame->blocks;
+ blk += s->increment) {
+ s->sbc_analyze_8s(
+ s, x,
+ frame->sb_sample_f[blk][ch],
+ frame->sb_sample_f[blk + 1][ch] -
+ frame->sb_sample_f[blk][ch]);
+ x -= 8 * s->increment;
+ }
+ }
+ return frame->blocks * 8;
+
+ default:
+ return AVERROR(EIO);
+ }
+}
+
+/*
+ * Packs the SBC frame from frame into the memory in avpkt.
+ * Returns the length of the packed frame.
+ */
+
+static av_always_inline size_t sbc_pack_frame_internal(AVPacket *avpkt,
+ struct sbc_frame *frame, int frame_subbands,
+ int frame_channels, int joint)
+{
+ PutBitContext pb;
+
+ /* Will copy the header parts for CRC-8 calculation here */
+ uint8_t crc_header[11] = { 0 };
+ int crc_pos = 0;
+
+ uint32_t audio_sample;
+
+ int ch, sb, blk; /* channel, subband, block and bit counters */
+ int bits[2][8]; /* bits distribution */
+ uint32_t levels[2][8]; /* levels are derived from that */
+ uint32_t sb_sample_delta[2][8];
+
+ /* Can't fill in crc yet */
+ crc_header[0] = avpkt->data[1];
+ crc_header[1] = avpkt->data[2];
+ crc_pos = 16;
+
+ init_put_bits(&pb, avpkt->data + 4, avpkt->size);
+
+ if (frame->mode == JOINT_STEREO) {
+ put_bits(&pb, frame_subbands, joint);
+ crc_header[crc_pos >> 3] = joint;
+ crc_pos += frame_subbands;
+ }
+
+ for (ch = 0; ch < frame_channels; ch++) {
+ for (sb = 0; sb < frame_subbands; sb++) {
+ put_bits(&pb, 4, frame->scale_factor[ch][sb] & 0x0F);
+ crc_header[crc_pos >> 3] <<= 4;
+ crc_header[crc_pos >> 3] |= frame->scale_factor[ch][sb] & 0x0F;
+ crc_pos += 4;
+ }
+ }
+
+ /* align the last crc byte */
+ if (crc_pos % 8)
+ crc_header[crc_pos >> 3] <<= 8 - (crc_pos % 8);
+
+ avpkt->data[3] = ff_sbc_crc8(crc_header, crc_pos);
+
+ ff_sbc_calculate_bits(frame, bits);
+
+ for (ch = 0; ch < frame_channels; ch++) {
+ for (sb = 0; sb < frame_subbands; sb++) {
+ levels[ch][sb] = ((1 << bits[ch][sb]) - 1) <<
+ (32 - (frame->scale_factor[ch][sb] +
+ SCALE_OUT_BITS + 2));
+ sb_sample_delta[ch][sb] = (uint32_t) 1 <<
+ (frame->scale_factor[ch][sb] +
+ SCALE_OUT_BITS + 1);
+ }
+ }
+
+ for (blk = 0; blk < frame->blocks; blk++) {
+ for (ch = 0; ch < frame_channels; ch++) {
+ for (sb = 0; sb < frame_subbands; sb++) {
+
+ if (bits[ch][sb] == 0)
+ continue;
+
+ audio_sample = ((uint64_t) levels[ch][sb] *
+ (sb_sample_delta[ch][sb] +
+ frame->sb_sample_f[blk][ch][sb])) >> 32;
+
+ put_bits(&pb, bits[ch][sb], audio_sample);
+ }
+ }
+ }
+
+ flush_put_bits(&pb);
+
+ return (put_bits_count(&pb) + 7) / 8;
+}
+
+static size_t sbc_pack_frame(AVPacket *avpkt, struct sbc_frame *frame, int joint)
+{
+ int frame_subbands = 4;
+
+ avpkt->data[0] = SBC_SYNCWORD;
+
+ avpkt->data[1] = (frame->frequency & 0x03) << 6;
+ avpkt->data[1] |= (frame->block_mode & 0x03) << 4;
+ avpkt->data[1] |= (frame->mode & 0x03) << 2;
+ avpkt->data[1] |= (frame->allocation & 0x01) << 1;
+
+ avpkt->data[2] = frame->bitpool;
+
+ if (frame->subbands != 4)
+ frame_subbands = 8;
+
+ if ((frame->mode == MONO || frame->mode == DUAL_CHANNEL) &&
+ frame->bitpool > frame_subbands << 4)
+ return -5;
+
+ if ((frame->mode == STEREO || frame->mode == JOINT_STEREO) &&
+ frame->bitpool > frame_subbands << 5)
+ return -5;
+
+ if (frame->subbands == 4) {
+ if (frame->channels == 1)
+ return sbc_pack_frame_internal(avpkt, frame, 4, 1, joint);
+ else
+ return sbc_pack_frame_internal(avpkt, frame, 4, 2, joint);
+ } else {
+ avpkt->data[1] |= 0x01;
+ if (frame->channels == 1)
+ return sbc_pack_frame_internal(avpkt, frame, 8, 1, joint);
+ else
+ return sbc_pack_frame_internal(avpkt, frame, 8, 2, joint);
+ }
+}
+
+static size_t msbc_pack_frame(AVPacket *avpkt, struct sbc_frame *frame, int joint)
+{
+ avpkt->data[0] = MSBC_SYNCWORD;
+ avpkt->data[1] = 0;
+ avpkt->data[2] = 0;
+
+ return sbc_pack_frame_internal(avpkt, frame, 8, 1, joint);
+}
+
+static void sbc_encoder_init(bool msbc, SBCDSPContext *s,
+ const struct sbc_frame *frame)
+{
+ memset(&s->X, 0, sizeof(s->X));
+ s->position = (SBC_X_BUFFER_SIZE - frame->subbands * 9) & ~7;
+ if (msbc)
+ s->increment = 1;
+ else
+ s->increment = 4;
+
+ ff_sbcdsp_init(s);
+}
+
+static int sbc_encode_init(AVCodecContext *avctx)
+{
+ SBCEncContext *sbc = avctx->priv_data;
+
+ if (sbc->joint_stereo && sbc->dual_channel) {
+ av_log(avctx, AV_LOG_ERROR, "joint_stereo and dual_channel "
+ "can't be used at the same time.\n");
+ return AVERROR(EINVAL);
+ }
+
+ sbc->pack_frame = sbc_pack_frame;
+
+ sbc->frequency = SBC_FREQ_44100;
+ sbc->mode = SBC_MODE_STEREO;
+ if (sbc->joint_stereo)
+ sbc->mode = SBC_MODE_JOINT_STEREO;
+ else if (sbc->dual_channel)
+ sbc->mode = SBC_MODE_DUAL_CHANNEL;
+ sbc->subbands >>= 3;
+ sbc->blocks = (sbc->blocks >> 2) - 1;
+
+ if (!avctx->frame_size)
+ avctx->frame_size = 4*(sbc->subbands + 1) * 4*(sbc->blocks + 1);
+
+ for (int i = 0; avctx->codec->supported_samplerates[i]; i++)
+ if (avctx->sample_rate == avctx->codec->supported_samplerates[i])
+ sbc->frequency = i;
+
+ if (avctx->channels == 1)
+ sbc->mode = SBC_MODE_MONO;
+
+ return 0;
+}
+
+static int msbc_encode_init(AVCodecContext *avctx)
+{
+ SBCEncContext *sbc = avctx->priv_data;
+
+ sbc->msbc = true;
+ sbc->pack_frame = msbc_pack_frame;
+
+ sbc->frequency = SBC_FREQ_16000;
+ sbc->blocks = MSBC_BLOCKS;
+ sbc->subbands = SBC_SB_8;
+ sbc->mode = SBC_MODE_MONO;
+ sbc->allocation = SBC_AM_LOUDNESS;
+ sbc->bitpool = 26;
+
+ if (!avctx->frame_size)
+ avctx->frame_size = 8 * MSBC_BLOCKS;
+
+ return 0;
+}
+
+/* Returns the output block size in bytes */
+static size_t sbc_get_frame_length(SBCEncContext *sbc)
+{
+ int ret;
+ uint8_t subbands, channels, blocks, joint, bitpool;
+
+ if (sbc->init && sbc->frame.bitpool == sbc->bitpool)
+ return sbc->frame.length;
+
+ subbands = sbc->subbands ? 8 : 4;
+ if (sbc->msbc)
+ blocks = MSBC_BLOCKS;
+ else
+ blocks = 4 + (sbc->blocks * 4);
+ channels = sbc->mode == SBC_MODE_MONO ? 1 : 2;
+ joint = sbc->mode == SBC_MODE_JOINT_STEREO ? 1 : 0;
+ bitpool = sbc->bitpool;
+
+ ret = 4 + (4 * subbands * channels) / 8;
+ /* This term is not always evenly divide so we round it up */
+ if (channels == 1 || sbc->mode == SBC_MODE_DUAL_CHANNEL)
+ ret += ((blocks * channels * bitpool) + 7) / 8;
+ else
+ ret += (((joint ? subbands : 0) + blocks * bitpool) + 7) / 8;
+
+ return ret;
+}
+
+static int sbc_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
+ const AVFrame *frame, int *got_packet_ptr)
+{
+ SBCEncContext *sbc = avctx->priv_data;
+ int (*sbc_enc_process_input)(int position,
+ const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE],
+ int nsamples, int nchannels);
+ int ret;
+
+ if (!sbc)
+ return AVERROR(EIO);
+
+ if (!sbc->init) {
+ sbc->frame.frequency = sbc->frequency;
+ sbc->frame.mode = sbc->mode;
+ sbc->frame.channels = sbc->mode == SBC_MODE_MONO ? 1 : 2;
+ sbc->frame.allocation = sbc->allocation;
+ sbc->frame.subband_mode = sbc->subbands;
+ sbc->frame.subbands = sbc->subbands ? 8 : 4;
+ sbc->frame.block_mode = sbc->blocks;
+ if (sbc->msbc)
+ sbc->frame.blocks = MSBC_BLOCKS;
+ else
+ sbc->frame.blocks = 4 + (sbc->blocks * 4);
+ sbc->frame.bitpool = sbc->bitpool;
+ sbc->frame.codesize = sbc->frame.subbands * sbc->frame.blocks
+ * sbc->frame.channels * 2;
+ sbc->frame.length = sbc_get_frame_length(sbc);
+
+ sbc_encoder_init(sbc->msbc, &sbc->dsp, &sbc->frame);
+ sbc->init = true;
+ } else if (sbc->frame.bitpool != sbc->bitpool) {
+ sbc->frame.length = sbc_get_frame_length(sbc);
+ sbc->frame.bitpool = sbc->bitpool;
+ }
+
+ /* input must be large enough to encode a complete frame */
+ if (frame->nb_samples * sbc->frame.channels * 2 < sbc->frame.codesize)
+ return 0;
+
+ if ((ret = ff_alloc_packet2(avctx, avpkt, sbc->frame.length, 0)) < 0)
+ return ret;
+
+ /* Select the needed input data processing function and call it */
+ if (sbc->frame.subbands == 8) {
+ sbc_enc_process_input = sbc->dsp.sbc_enc_process_input_8s;
+ } else {
+ sbc_enc_process_input = sbc->dsp.sbc_enc_process_input_4s;
+ }
+
+ sbc->dsp.position = sbc_enc_process_input(
+ sbc->dsp.position, frame->data[0],
+ sbc->dsp.X, sbc->frame.subbands * sbc->frame.blocks,
+ sbc->frame.channels);
+
+ sbc_analyze_audio(&sbc->dsp, &sbc->frame);
+
+ if (sbc->frame.mode == JOINT_STEREO) {
+ int j = sbc->dsp.sbc_calc_scalefactors_j(
+ sbc->frame.sb_sample_f, sbc->frame.scale_factor,
+ sbc->frame.blocks, sbc->frame.subbands);
+ sbc->pack_frame(avpkt, &sbc->frame, j);
+ } else {
+ sbc->dsp.sbc_calc_scalefactors(
+ sbc->frame.sb_sample_f, sbc->frame.scale_factor,
+ sbc->frame.blocks, sbc->frame.channels,
+ sbc->frame.subbands);
+ sbc->pack_frame(avpkt, &sbc->frame, 0);
+ }
+
+ *got_packet_ptr = 1;
+ return 0;
+}
+
+#define OFFSET(x) offsetof(SBCEncContext, x)
+#define AE AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+ { "joint_stereo", "use joint stereo",
+ OFFSET(joint_stereo), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AE },
+ { "dual_channel", "use dual channel",
+ OFFSET(dual_channel), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AE },
+ { "subbands", "number of subbands (4 or 8)",
+ OFFSET(subbands), AV_OPT_TYPE_INT, { .i64 = 8 }, 4, 8, AE },
+ { "bitpool", "bitpool value",
+ OFFSET(bitpool), AV_OPT_TYPE_INT, { .i64 = 32 }, 0, 255, AE },
+ { "blocks", "number of blocks (4, 8, 12 or 16)",
+ OFFSET(blocks), AV_OPT_TYPE_INT, { .i64 = 16 }, 4, 16, AE },
+ { "snr", "use SNR mode (instead of loudness)",
+ OFFSET(allocation), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AE },
+ { NULL },
+};
+
+static const AVClass sbc_class = {
+ .class_name = "sbc encoder",
+ .item_name = av_default_item_name,
+ .option = options,
+ .version = LIBAVUTIL_VERSION_INT,
+};
+
+#if CONFIG_SBC_ENCODER
+AVCodec ff_sbc_encoder = {
+ .name = "sbc",
+ .long_name = NULL_IF_CONFIG_SMALL("SBC (low-complexity subband codec)"),
+ .type = AVMEDIA_TYPE_AUDIO,
+ .id = AV_CODEC_ID_SBC,
+ .priv_data_size = sizeof(SBCEncContext),
+ .init = sbc_encode_init,
+ .encode2 = sbc_encode_frame,
+ .channel_layouts = (const uint64_t[]) { AV_CH_LAYOUT_MONO,
+ AV_CH_LAYOUT_STEREO, 0},
+ .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16,
+ AV_SAMPLE_FMT_NONE },
+ .supported_samplerates = (const int[]) { 16000, 32000, 44100, 48000, 0 },
+ .priv_class = &sbc_class,
+};
+#endif
+
+#if CONFIG_MSBC_ENCODER
+AVCodec ff_msbc_encoder = {
+ .name = "msbc",
+ .long_name = NULL_IF_CONFIG_SMALL("mSBC (wideband speech mono SBC)"),
+ .type = AVMEDIA_TYPE_AUDIO,
+ .id = AV_CODEC_ID_MSBC,
+ .priv_data_size = sizeof(SBCEncContext),
+ .init = msbc_encode_init,
+ .encode2 = sbc_encode_frame,
+ .channel_layouts = (const uint64_t[]) { AV_CH_LAYOUT_MONO, 0},
+ .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16,
+ AV_SAMPLE_FMT_NONE },
+ .supported_samplerates = (const int[]) { 16000, 0 },
+};
+#endif
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index a805cd37b4..2350c8bbee 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -63,6 +63,7 @@ OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp_init.o
OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp_init.o
OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp_init.o
OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp_init.o
+OBJS-$(CONFIG_SBC_ENCODER) += x86/sbcdsp_init.o
OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_init.o
OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o
OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp_init.o
@@ -172,6 +173,7 @@ X86ASM-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp.o
X86ASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o
X86ASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o
X86ASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o
+X86ASM-OBJS-$(CONFIG_SBC_ENCODER) += x86/sbcdsp.o
X86ASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o
X86ASM-OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp.o
X86ASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o
diff --git a/libavcodec/x86/sbcdsp.asm b/libavcodec/x86/sbcdsp.asm
new file mode 100644
index 0000000000..ecf5298490
--- /dev/null
+++ b/libavcodec/x86/sbcdsp.asm
@@ -0,0 +1,290 @@
+;******************************************************************************
+;* SIMD optimized SBC encoder DSP functions
+;*
+;* Copyright (C) 2017 Aurelien Jacobs <aurel at gnuage.org>
+;* Copyright (C) 2008-2010 Nokia Corporation
+;* Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org>
+;* Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch>
+;* Copyright (C) 2005-2006 Brad Midgley <bmidgley at xmission.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+scale_mask: times 2 dd 0x8000 ; 1 << (SBC_PROTO_FIXED4_SCALE - 1)
+
+SECTION .text
+
+;*******************************************************************
+;void ff_sbc_analyze_4(const int16_t *in, int32_t *out, const int16_t *consts);
+;*******************************************************************
+INIT_MMX mmx
+cglobal sbc_analyze_4, 3, 3, 4, in, out, consts
+ movq m0, [inq]
+ movq m1, [inq+8]
+ pmaddwd m0, [constsq]
+ pmaddwd m1, [constsq+8]
+ paddd m0, [scale_mask]
+ paddd m1, [scale_mask]
+
+ movq m2, [inq+16]
+ movq m3, [inq+24]
+ pmaddwd m2, [constsq+16]
+ pmaddwd m3, [constsq+24]
+ paddd m0, m2
+ paddd m1, m3
+
+ movq m2, [inq+32]
+ movq m3, [inq+40]
+ pmaddwd m2, [constsq+32]
+ pmaddwd m3, [constsq+40]
+ paddd m0, m2
+ paddd m1, m3
+
+ movq m2, [inq+48]
+ movq m3, [inq+56]
+ pmaddwd m2, [constsq+48]
+ pmaddwd m3, [constsq+56]
+ paddd m0, m2
+ paddd m1, m3
+
+ movq m2, [inq+64]
+ movq m3, [inq+72]
+ pmaddwd m2, [constsq+64]
+ pmaddwd m3, [constsq+72]
+ paddd m0, m2
+ paddd m1, m3
+
+ psrad m0, 16 ; SBC_PROTO_FIXED4_SCALE
+ psrad m1, 16 ; SBC_PROTO_FIXED4_SCALE
+ packssdw m0, m0
+ packssdw m1, m1
+
+ movq m2, m0
+ pmaddwd m0, [constsq+80]
+ pmaddwd m2, [constsq+88]
+
+ movq m3, m1
+ pmaddwd m1, [constsq+96]
+ pmaddwd m3, [constsq+104]
+ paddd m0, m1
+ paddd m2, m3
+
+ movq [outq ], m0
+ movq [outq+8], m2
+
+ RET
+
+
+
+;*******************************************************************
+;void ff_sbc_analyze_8(const int16_t *in, int32_t *out, const int16_t *consts);
+;*******************************************************************
+INIT_MMX mmx
+cglobal sbc_analyze_8, 3, 3, 4, in, out, consts
+ movq m0, [inq]
+ movq m1, [inq+8]
+ movq m2, [inq+16]
+ movq m3, [inq+24]
+ pmaddwd m0, [constsq]
+ pmaddwd m1, [constsq+8]
+ pmaddwd m2, [constsq+16]
+ pmaddwd m3, [constsq+24]
+ paddd m0, [scale_mask]
+ paddd m1, [scale_mask]
+ paddd m2, [scale_mask]
+ paddd m3, [scale_mask]
+
+ movq m4, [inq+32]
+ movq m5, [inq+40]
+ movq m6, [inq+48]
+ movq m7, [inq+56]
+ pmaddwd m4, [constsq+32]
+ pmaddwd m5, [constsq+40]
+ pmaddwd m6, [constsq+48]
+ pmaddwd m7, [constsq+56]
+ paddd m0, m4
+ paddd m1, m5
+ paddd m2, m6
+ paddd m3, m7
+
+ movq m4, [inq+64]
+ movq m5, [inq+72]
+ movq m6, [inq+80]
+ movq m7, [inq+88]
+ pmaddwd m4, [constsq+64]
+ pmaddwd m5, [constsq+72]
+ pmaddwd m6, [constsq+80]
+ pmaddwd m7, [constsq+88]
+ paddd m0, m4
+ paddd m1, m5
+ paddd m2, m6
+ paddd m3, m7
+
+ movq m4, [inq+96]
+ movq m5, [inq+104]
+ movq m6, [inq+112]
+ movq m7, [inq+120]
+ pmaddwd m4, [constsq+96]
+ pmaddwd m5, [constsq+104]
+ pmaddwd m6, [constsq+112]
+ pmaddwd m7, [constsq+120]
+ paddd m0, m4
+ paddd m1, m5
+ paddd m2, m6
+ paddd m3, m7
+
+ movq m4, [inq+128]
+ movq m5, [inq+136]
+ movq m6, [inq+144]
+ movq m7, [inq+152]
+ pmaddwd m4, [constsq+128]
+ pmaddwd m5, [constsq+136]
+ pmaddwd m6, [constsq+144]
+ pmaddwd m7, [constsq+152]
+ paddd m0, m4
+ paddd m1, m5
+ paddd m2, m6
+ paddd m3, m7
+
+ psrad m0, 16 ; SBC_PROTO_FIXED8_SCALE
+ psrad m1, 16 ; SBC_PROTO_FIXED8_SCALE
+ psrad m2, 16 ; SBC_PROTO_FIXED8_SCALE
+ psrad m3, 16 ; SBC_PROTO_FIXED8_SCALE
+
+ packssdw m0, m0
+ packssdw m1, m1
+ packssdw m2, m2
+ packssdw m3, m3
+
+ movq m4, m0
+ movq m5, m0
+ pmaddwd m4, [constsq+160]
+ pmaddwd m5, [constsq+168]
+
+ movq m6, m1
+ movq m7, m1
+ pmaddwd m6, [constsq+192]
+ pmaddwd m7, [constsq+200]
+ paddd m4, m6
+ paddd m5, m7
+
+ movq m6, m2
+ movq m7, m2
+ pmaddwd m6, [constsq+224]
+ pmaddwd m7, [constsq+232]
+ paddd m4, m6
+ paddd m5, m7
+
+ movq m6, m3
+ movq m7, m3
+ pmaddwd m6, [constsq+256]
+ pmaddwd m7, [constsq+264]
+ paddd m4, m6
+ paddd m5, m7
+
+ movq [outq ], m4
+ movq [outq+8], m5
+
+ movq m5, m0
+ pmaddwd m0, [constsq+176]
+ pmaddwd m5, [constsq+184]
+
+ movq m7, m1
+ pmaddwd m1, [constsq+208]
+ pmaddwd m7, [constsq+216]
+ paddd m0, m1
+ paddd m5, m7
+
+ movq m7, m2
+ pmaddwd m2, [constsq+240]
+ pmaddwd m7, [constsq+248]
+ paddd m0, m2
+ paddd m5, m7
+
+ movq m7, m3
+ pmaddwd m3, [constsq+272]
+ pmaddwd m7, [constsq+280]
+ paddd m0, m3
+ paddd m5, m7
+
+ movq [outq+16], m0
+ movq [outq+24], m5
+
+ RET
+
+
+;*******************************************************************
+;void ff_sbc_calc_scalefactors(int32_t sb_sample_f[16][2][8],
+; uint32_t scale_factor[2][8],
+; int blocks, int channels, int subbands)
+;*******************************************************************
+INIT_MMX mmx
+cglobal sbc_calc_scalefactors, 5, 9, 3, sb_sample_f, scale_factor, blocks, channels, subbands, ch, sb, sa, sf, blk
+ shl channelsd, 5
+ mov chq, 0
+.loop_1:
+ lea saq, [sb_sample_fq + chq]
+ lea sfq, [scale_factorq + chq]
+
+ mov sbd, 0
+.loop_2:
+ ; blk = (blocks - 1) * 64;
+ lea blkq, [blocksq - 1]
+ shl blkd, 6
+
+ movq m0, [scale_mask]
+.loop_3:
+ movq m1, [saq+blkq]
+ pxor m2, m2
+ pcmpgtd m1, m2
+ paddd m1, [saq+blkq]
+ pcmpgtd m2, m1
+ pxor m1, m2
+
+ por m0, m1
+
+ sub blkd, 64
+ jns .loop_3
+
+ movd blkd, m0
+ psrlq m0, 32
+ bsr blkd, blkd
+ sub blkd, 15 ; SCALE_OUT_BITS
+ mov [sfq], blkd
+
+ movd blkd, m0
+ bsr blkd, blkd
+ sub blkd, 15 ; SCALE_OUT_BITS
+ mov [sfq+4], blkd
+
+ add saq, 8
+ add sfq, 8
+
+ add sbd, 2
+ cmp sbd, subbandsd
+ jl .loop_2
+
+ add chd, 32
+ cmp chd, channelsd
+ jl .loop_1
+
+ emms
+ RET
diff --git a/libavcodec/x86/sbcdsp_init.c b/libavcodec/x86/sbcdsp_init.c
new file mode 100644
index 0000000000..86effecfdf
--- /dev/null
+++ b/libavcodec/x86/sbcdsp_init.c
@@ -0,0 +1,51 @@
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017 Aurelien Jacobs <aurel at gnuage.org>
+ * Copyright (C) 2008-2010 Nokia Corporation
+ * Copyright (C) 2004-2010 Marcel Holtmann <marcel at holtmann.org>
+ * Copyright (C) 2004-2005 Henryk Ploetz <henryk at ploetzli.ch>
+ * Copyright (C) 2005-2006 Brad Midgley <bmidgley at xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC MMX optimization for some basic "building bricks"
+ */
+
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/sbcdsp.h"
+
+void ff_sbc_analyze_4_mmx(const int16_t *in, int32_t *out, const int16_t *consts);
+void ff_sbc_analyze_8_mmx(const int16_t *in, int32_t *out, const int16_t *consts);
+void ff_sbc_calc_scalefactors_mmx(int32_t sb_sample_f[16][2][8],
+ uint32_t scale_factor[2][8],
+ int blocks, int channels, int subbands);
+
+av_cold void ff_sbcdsp_init_x86(SBCDSPContext *s)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+ if (EXTERNAL_MMX(cpu_flags)) {
+ s->sbc_analyze_4 = ff_sbc_analyze_4_mmx;
+ s->sbc_analyze_8 = ff_sbc_analyze_8_mmx;
+ s->sbc_calc_scalefactors = ff_sbc_calc_scalefactors_mmx;
+ }
+}
--
2.15.0
More information about the ffmpeg-devel
mailing list