[FFmpeg-devel] [PATCH] avfilter: add hrtfm filter
Paul B Mahol
onemda at gmail.com
Fri Mar 16 21:44:53 EET 2018
Signed-off-by: Paul B Mahol <onemda at gmail.com>
---
doc/filters.texi | 60 ++++++
libavfilter/Makefile | 1 +
libavfilter/af_hrtfm.c | 486 +++++++++++++++++++++++++++++++++++++++++++++++
libavfilter/allfilters.c | 1 +
4 files changed, 548 insertions(+)
create mode 100644 libavfilter/af_hrtfm.c
diff --git a/doc/filters.texi b/doc/filters.texi
index bd43a7ac6e..c298054325 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -3218,6 +3218,66 @@ Change highpass width.
Syntax for the command is : "@var{width}"
@end table
+ at section hrtfm
+
+Apply simple Head Related Transfer Function Model to audio stream.
+
+hrtfm filter creates virtual loudspeakers around the user for binaural
+listening via headphones (audio formats up to 9 channels supported).
+
+This is very simple implementation which does not use any HRIRs.
+
+It accepts the following parameters:
+
+ at table @option
+ at item hradius
+Set head radius of listener. In meters. Default value is @code{0.0891}.
+
+ at item sspeed
+Set sound speed in meters per second. Default value is @code{334}.
+Allowed range is from @code{300} to @code{400}.
+
+ at item amin
+Set minimum alfa. Default value is @code{0.05}.
+Allowed range is from @code{0.01} to @code{1}.
+
+ at item gain
+Set output gain in dB. Default value is @code{0}.
+Allowed range is from @code{-20} to @code{40}.
+
+ at item rotation
+Set rotation of virtual loudspeakers in deg. Default is @code{0}.
+Allowed range is from @code{-360} to @code{360}.
+
+ at item elevation
+Set elevation of virtual speakers in deg. Default is @code{0}.
+Allowed range is from @code{-90} to @code{90}.
+
+ at item speakers
+Set custom positions of virtual loudspeakers. Syntax for this option is:
+<CH> <AZIM> <ELEV>[|<CH> <AZIM> <ELEV>|...].
+Each virtual loudspeaker is described with short channel name following with
+azimuth and elevation in degrees.
+Each virtual loudspeaker description is separated by '|'.
+For example to override front left and front right channel positions use:
+'speakers=FL 45 15|FR 345 15'.
+Descriptions with unrecognised channel names are ignored.
+
+ at item lfegain
+Set LFE gain in dB. Default value is @code{0}.
+Allowed range is from @code{-11} to @code{11}.
+ at end table
+
+ at subsection Examples
+
+ at itemize
+ at item
+Apply filter with custom head radius, speed of sound and minimum alpha:
+ at example
+hrtfm=hradius=0.09:sspeed=334:amin=0.01
+ at end example
+ at end itemize
+
@section join
Join multiple input streams into one multi-channel stream.
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index fc16512e2c..65783a8443 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -99,6 +99,7 @@ OBJS-$(CONFIG_HAAS_FILTER) += af_haas.o
OBJS-$(CONFIG_HDCD_FILTER) += af_hdcd.o
OBJS-$(CONFIG_HEADPHONE_FILTER) += af_headphone.o
OBJS-$(CONFIG_HIGHPASS_FILTER) += af_biquads.o
+OBJS-$(CONFIG_HRTFM_FILTER) += af_hrtfm.o
OBJS-$(CONFIG_JOIN_FILTER) += af_join.o
OBJS-$(CONFIG_LADSPA_FILTER) += af_ladspa.o
OBJS-$(CONFIG_LOUDNORM_FILTER) += af_loudnorm.o ebur128.o
diff --git a/libavfilter/af_hrtfm.c b/libavfilter/af_hrtfm.c
new file mode 100644
index 0000000000..a9ac95f9a5
--- /dev/null
+++ b/libavfilter/af_hrtfm.c
@@ -0,0 +1,486 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avstring.h"
+#include "libavutil/ffmath.h"
+#include "libavutil/opt.h"
+
+#include "audio.h"
+#include "avfilter.h"
+#include "internal.h"
+
+typedef struct ChParams {
+ float azim; /* azimuth of the virtual loudspeakers */
+ float elev; /* elevation of the virtual loudspeakers */
+ int lfe;
+
+ float Al[2], Bl[2], al[2], bl[2];
+ float Ar[2], Br[2], ar[2], br[2];
+
+ float gain[2];
+ int M[2];
+
+ float *delayed_samples[2];
+ int delayed_index[2];
+
+ float cache1_in[2];
+ float cache1_out[2];
+
+ float cache2_in[2];
+ float cache2_out[2];
+} ChParams;
+
+typedef struct VirtualSpeaker {
+ uint8_t set;
+ float azim;
+ float elev;
+} VirtualSpeaker;
+
+typedef struct HRTFMContext {
+ const AVClass *class;
+
+ float sspeed;
+ float hradius;
+ float alfa_min;
+ float gain; /* filter gain (in dB) */
+ float lfe_gain;
+ float gain_lfe;
+ float rotation; /* rotation of virtual loudspeakers (in degrees) */
+ float elevation; /* elevation of virtual loudspeakers (in deg.) */
+ char *speakers_pos; /* custom positions of the virtual loudspeakers */
+
+ ChParams *params;
+
+ int n_conv;
+
+ VirtualSpeaker vspkrpos[64];
+} HRTFMContext;
+
+static int query_formats(AVFilterContext *ctx)
+{
+ AVFilterFormats *formats = NULL;
+ AVFilterChannelLayouts *layouts = NULL;
+ int ret;
+
+ ret = ff_add_format(&formats, AV_SAMPLE_FMT_FLT);
+ if (ret)
+ return ret;
+ ret = ff_set_common_formats(ctx, formats);
+ if (ret)
+ return ret;
+
+ layouts = ff_all_channel_layouts();
+ if (!layouts)
+ return AVERROR(ENOMEM);
+
+ ret = ff_channel_layouts_ref(layouts, &ctx->inputs[0]->out_channel_layouts);
+ if (ret)
+ return ret;
+
+ layouts = NULL;
+ ret = ff_add_channel_layout(&layouts, AV_CH_LAYOUT_STEREO);
+ if (ret)
+ return ret;
+
+ ret = ff_channel_layouts_ref(layouts, &ctx->outputs[0]->in_channel_layouts);
+ if (ret)
+ return ret;
+
+ formats = ff_all_samplerates();
+ if (!formats)
+ return AVERROR(ENOMEM);
+ return ff_set_common_samplerates(ctx, formats);
+}
+
+static int parse_channel_name(char **arg, int *rchannel, char *buf)
+{
+ int len, i, channel_id = 0;
+ int64_t layout, layout0;
+
+ /* try to parse a channel name, e.g. "FL" */
+ if (sscanf(*arg, "%7[A-Z]%n", buf, &len)) {
+ layout0 = layout = av_get_channel_layout(buf);
+ /* channel_id <- first set bit in layout */
+ for (i = 32; i > 0; i >>= 1) {
+ if (layout >= 1LL << i) {
+ channel_id += i;
+ layout >>= i;
+ }
+ }
+ /* reject layouts that are not a single channel */
+ if (channel_id >= 64 || layout0 != 1LL << channel_id)
+ return AVERROR(EINVAL);
+ *rchannel = channel_id;
+ *arg += len;
+ return 0;
+ }
+ return AVERROR(EINVAL);
+}
+
+static void parse_speaker_pos(AVFilterContext *ctx, int64_t in_channel_layout)
+{
+ HRTFMContext *s = ctx->priv;
+ char *arg, *tokenizer, *p, *args = av_strdup(s->speakers_pos);
+
+ if (!args)
+ return;
+ p = args;
+
+ while ((arg = av_strtok(p, "|", &tokenizer))) {
+ char buf[8];
+ float azim, elev;
+ int out_ch_id;
+
+ p = NULL;
+ if (parse_channel_name(&arg, &out_ch_id, buf)) {
+ av_log(ctx, AV_LOG_WARNING, "Failed to parse \'%s\' as channel name.\n", buf);
+ continue;
+ }
+ if (sscanf(arg, "%f %f", &azim, &elev) == 2) {
+ s->vspkrpos[out_ch_id].set = 1;
+ s->vspkrpos[out_ch_id].azim = azim;
+ s->vspkrpos[out_ch_id].elev = elev;
+ } else if (sscanf(arg, "%f", &azim) == 1) {
+ s->vspkrpos[out_ch_id].set = 1;
+ s->vspkrpos[out_ch_id].azim = azim;
+ s->vspkrpos[out_ch_id].elev = 0;
+ }
+ }
+
+ av_free(args);
+}
+
+static int get_speaker_pos(AVFilterContext *ctx)
+{
+ HRTFMContext *s = ctx->priv;
+ uint64_t channels_layout = ctx->inputs[0]->channel_layout;
+ int m, ch, n_conv = ctx->inputs[0]->channels; /* get no. input channels */
+
+ if (n_conv > 16)
+ return AVERROR(EINVAL);
+
+ if (s->speakers_pos)
+ parse_speaker_pos(ctx, channels_layout);
+
+ /* set speaker positions according to input channel configuration: */
+ for (m = 0, ch = 0; ch < n_conv && m < 64; m++) {
+ ChParams *p = &s->params[ch];
+ float azim, elev;
+ uint64_t mask = channels_layout & (1ULL << m);
+
+ switch (mask) {
+ case AV_CH_FRONT_LEFT: azim = -30; break;
+ case AV_CH_FRONT_RIGHT: azim = 30; break;
+ case AV_CH_FRONT_CENTER: azim = 0; break;
+ case AV_CH_LOW_FREQUENCY:
+ case AV_CH_LOW_FREQUENCY_2: p->lfe = 1; break;
+ case AV_CH_BACK_LEFT: azim =-140; break;
+ case AV_CH_BACK_RIGHT: azim = 140; break;
+ case AV_CH_BACK_CENTER: azim = 180; break;
+ case AV_CH_SIDE_LEFT: azim = -90; break;
+ case AV_CH_SIDE_RIGHT: azim = 90; break;
+ case AV_CH_FRONT_LEFT_OF_CENTER: azim = -15; break;
+ case AV_CH_FRONT_RIGHT_OF_CENTER: azim = 15; break;
+ case AV_CH_TOP_CENTER: azim = 0;
+ elev = 90; break;
+ case AV_CH_TOP_FRONT_LEFT: azim = -30;
+ elev = 45; break;
+ case AV_CH_TOP_FRONT_CENTER: azim = 0;
+ elev = 45; break;
+ case AV_CH_TOP_FRONT_RIGHT: azim = 30;
+ elev = 45; break;
+ case AV_CH_TOP_BACK_LEFT: azim =-140;
+ elev = 45; break;
+ case AV_CH_TOP_BACK_RIGHT: azim = 140;
+ elev = 45; break;
+ case AV_CH_TOP_BACK_CENTER: azim = 180;
+ elev = 45; break;
+ case AV_CH_WIDE_LEFT: azim = -90; break;
+ case AV_CH_WIDE_RIGHT: azim = 90; break;
+ case AV_CH_SURROUND_DIRECT_LEFT: azim = -90; break;
+ case AV_CH_SURROUND_DIRECT_RIGHT: azim = 90; break;
+ case AV_CH_STEREO_LEFT: azim = -90; break;
+ case AV_CH_STEREO_RIGHT: azim = 90; break;
+ case 0: break;
+ default:
+ return AVERROR(EINVAL);
+ }
+
+ if (s->vspkrpos[m].set) {
+ azim = s->vspkrpos[m].azim;
+ elev = s->vspkrpos[m].elev;
+ }
+
+ azim += s->rotation;
+ elev += s->elevation;
+
+ p->azim = azim;
+ p->elev = elev;
+
+ if (mask)
+ ch++;
+ }
+
+ return 0;
+}
+
+static void hsfilter(float angle, int Fs, float hradius, float sspeed, float alfa_min,
+ float *B0, float *B1, float *A0, float *A1,
+ float *b0, float *b1, float *a0, float *a1)
+{
+ float theta = angle + 90;
+ float theta0 = 180;
+ float w0 = sspeed / hradius;
+ float alfa = 1 + alfa_min / 2 + (1 - alfa_min / 2) * cosf(theta / theta0 * M_PI);
+ float gdelay, ac;
+
+ *B0 = ( alfa + w0 / Fs) / (1 + w0 / Fs);
+ *B1 = (-alfa + w0 / Fs) / (1 + w0 / Fs);
+
+ *A0 = 1;
+ *A1 = -(1 - w0 / Fs) / (1 + w0 / Fs);
+
+ if (fabsf(theta) < 90)
+ gdelay = -Fs / w0 * (cosf(theta * M_PI / 180) - 1);
+ else
+ gdelay = Fs / w0 * ((fabsf(theta) - 90) * M_PI / 180 + 1);
+
+ ac = (1 - gdelay) / (1 + gdelay);
+
+ *b0 = ac;
+ *b1 = 1;
+ *a0 = 1;
+ *a1 = ac;
+}
+
+static void shoulder(float angle, float elevation, int Fs, float dBgain,
+ float *gain, int *M)
+{
+ float theta = angle;
+ float phi = elevation;
+ float delay = (1.2 * (180 - theta) / 180) * powf(1.f - 0.00004f * ((phi - 80) * (180 / (180 + theta))), 2);
+
+ *gain = ff_exp10f(dBgain / 20);
+ *M = round(delay / 1000 * Fs);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+ AVFilterContext *ctx = inlink->dst;
+ HRTFMContext *s = ctx->priv;
+ int ret, ch;
+
+ s->n_conv = inlink->channels;
+ s->gain_lfe = expf((s->gain - 3 * s->n_conv + s->lfe_gain) / 20 * M_LN10);
+
+ s->params = av_calloc(s->n_conv, sizeof(*s->params));
+ if (!s->params) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ /* get speaker positions */
+ if ((ret = get_speaker_pos(ctx)) < 0) {
+ av_log(ctx, AV_LOG_ERROR, "Couldn't get speaker positions. Input channel configuration not supported.\n");
+ goto fail;
+ }
+
+ for (ch = 0; ch < inlink->channels; ch++) {
+ ChParams *p = &s->params[ch];
+
+ hsfilter( p->azim, inlink->sample_rate, s->hradius, s->sspeed, s->alfa_min,
+ &p->Bl[0], &p->Bl[1], &p->Al[0], &p->Al[1],
+ &p->bl[0], &p->bl[1], &p->al[0], &p->al[1]);
+ hsfilter(-p->azim, inlink->sample_rate, s->hradius, s->sspeed, s->alfa_min,
+ &p->Br[0], &p->Br[1], &p->Ar[0], &p->Ar[1],
+ &p->br[0], &p->br[1], &p->ar[0], &p->ar[1]);
+
+ shoulder( p->azim, p->elev, inlink->sample_rate, 0, &p->gain[0], &p->M[0]);
+ shoulder(-p->azim, p->elev, inlink->sample_rate, 0, &p->gain[1], &p->M[1]);
+
+ p->delayed_samples[0] = av_calloc(p->M[0], sizeof(float));
+ p->delayed_samples[1] = av_calloc(p->M[1], sizeof(float));
+ if (!p->delayed_samples[0] || !p->delayed_samples[1]) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+ }
+
+ return 0;
+fail:
+ return ret;
+}
+
+static float bfilter(float input, float *icache, float *ocache,
+ float b0, float b1, float a1)
+{
+ float output = input * b0 + *icache * b1 - *ocache * a1;
+
+ *icache = input;
+ *ocache = output;
+
+ return output;
+}
+
+static float dfilter(float input, float gain, int M,
+ float *delayed_samples, int *delayed_index)
+{
+ float output = gain * delayed_samples[*delayed_index];
+
+ delayed_samples[*delayed_index] = input;
+ (*delayed_index)++;
+ if (*delayed_index >= M)
+ *delayed_index = 0;
+
+ return input + output;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+ AVFilterContext *ctx = inlink->dst;
+ AVFilterLink *outlink = ctx->outputs[0];
+ AVFrame *out = ff_get_audio_buffer(outlink, in->nb_samples);
+ HRTFMContext *s = ctx->priv;
+ const float scale = expf((s->gain - 3 * s->n_conv) / 20 * M_LN10);
+ unsigned n_clippings[2] = { 0 };
+ float *src = (float *)in->data[0];
+ float *dst;
+ int ch, n;
+
+ if (!out) {
+ av_frame_free(&in);
+ return AVERROR(ENOMEM);
+ }
+ av_frame_copy_props(out, in);
+
+ dst = (float *)out->data[0];
+
+ for (n = 0; n < in->nb_samples; n++, dst+=2) {
+ dst[0] = dst[1] = 0;
+
+ for (ch = 0; ch < in->channels; ch++, src++) {
+ ChParams *p = &s->params[ch];
+ float left, right;
+
+ if (p->lfe) {
+ left = right = src[0];
+ dst[0] += s->gain_lfe * left;
+ dst[1] += s->gain_lfe * right;
+ } else {
+ left = bfilter(src[0], &p->cache1_in[0], &p->cache1_out[0],
+ p->Bl[0], p->Bl[1], p->Al[1]);
+
+ left = bfilter(left, &p->cache2_in[0], &p->cache2_out[0],
+ p->bl[0], p->bl[1], p->al[1]);
+
+ right = bfilter(src[0], &p->cache1_in[1], &p->cache1_out[1],
+ p->Br[0], p->Br[1], p->Ar[1]);
+
+ right = bfilter(right, &p->cache2_in[1], &p->cache2_out[1],
+ p->br[0], p->br[1], p->ar[1]);
+
+ left += dfilter(left, p->gain[0], p->M[0],
+ p->delayed_samples[0], &p->delayed_index[0]);
+
+ right += dfilter(right, p->gain[1], p->M[1],
+ p->delayed_samples[1], &p->delayed_index[1]);
+
+ dst[0] += scale * left;
+ dst[1] += scale * right;
+ }
+ }
+
+ if (fabsf(dst[0]) > 1)
+ n_clippings[0]++;
+ if (fabsf(dst[1]) > 1)
+ n_clippings[1]++;
+ }
+
+ /* display warning message if clipping occurred */
+ if (n_clippings[0] + n_clippings[1] > 0) {
+ av_log(ctx, AV_LOG_WARNING, "%d of %d samples clipped. Please reduce gain.\n",
+ n_clippings[0] + n_clippings[1], out->nb_samples * 2);
+ }
+
+ av_frame_free(&in);
+ return ff_filter_frame(outlink, out);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+ HRTFMContext *s = ctx->priv;
+
+ if (s->params) {
+ int ch;
+
+ for (ch = 0; ch < s->n_conv; ch++) {
+ ChParams *p = &s->params[ch];
+
+ av_freep(&p->delayed_samples[0]);
+ av_freep(&p->delayed_samples[1]);
+ }
+
+ av_freep(&s->params);
+ }
+}
+
+#define OFFSET(x) offsetof(HRTFMContext, x)
+#define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption hrtfm_options[] = {
+ { "hradius", "set head radius", OFFSET(hradius), AV_OPT_TYPE_FLOAT, {.dbl=0.0891},0.05,0.25,.flags = FLAGS },
+ { "sspeed", "set sound speed", OFFSET(sspeed), AV_OPT_TYPE_FLOAT, {.dbl=334}, 300, 400, .flags = FLAGS },
+ { "amin", "set alfa min", OFFSET(alfa_min), AV_OPT_TYPE_FLOAT, {.dbl=0.05}, 0.01, 1, .flags = FLAGS },
+ { "gain", "set gain in dB", OFFSET(gain), AV_OPT_TYPE_FLOAT, {.dbl=0}, -20, 40, .flags = FLAGS },
+ { "rotation", "set rotation" , OFFSET(rotation), AV_OPT_TYPE_FLOAT, {.dbl=0}, -360, 360, .flags = FLAGS },
+ { "elevation", "set elevation", OFFSET(elevation), AV_OPT_TYPE_FLOAT, {.dbl=0}, -90, 90, .flags = FLAGS },
+ { "speakers", "set speaker custom positions", OFFSET(speakers_pos), AV_OPT_TYPE_STRING, {.str=0}, 0, 0, .flags = FLAGS },
+ { "lfegain", "set lfe gain in dB", OFFSET(lfe_gain), AV_OPT_TYPE_FLOAT, {.dbl=0}, -11, 11, .flags = FLAGS },
+ { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(hrtfm);
+
+static const AVFilterPad hrtfm_inputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_AUDIO,
+ .config_props = config_input,
+ .filter_frame = filter_frame,
+ },
+ { NULL }
+};
+
+static const AVFilterPad hrtfm_outputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_AUDIO,
+ },
+ { NULL }
+};
+
+AVFilter ff_af_hrtfm = {
+ .name = "hrtfm",
+ .description = NULL_IF_CONFIG_SMALL("Apply Head Related Transfer Function Model filter."),
+ .priv_size = sizeof(HRTFMContext),
+ .priv_class = &hrtfm_class,
+ .query_formats = query_formats,
+ .uninit = uninit,
+ .inputs = hrtfm_inputs,
+ .outputs = hrtfm_outputs,
+};
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index cc423af738..cf795febc1 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -110,6 +110,7 @@ static void register_all(void)
REGISTER_FILTER(HDCD, hdcd, af);
REGISTER_FILTER(HEADPHONE, headphone, af);
REGISTER_FILTER(HIGHPASS, highpass, af);
+ REGISTER_FILTER(HRTFM, hrtfm, af);
REGISTER_FILTER(JOIN, join, af);
REGISTER_FILTER(LADSPA, ladspa, af);
REGISTER_FILTER(LOUDNORM, loudnorm, af);
--
2.11.0
More information about the ffmpeg-devel
mailing list