[FFmpeg-devel] [PATCH 2/2] lavfi: Add libspatialaudio filter

Mon Nov 25 12:43:09 EET 2024

---
 configure                               |   5 +
 libavfilter/Makefile                    |   1 +
 libavfilter/af_libspatialaudio.cpp      | 294 ++++++++++++++++++++++++
 libavfilter/af_libspatialaudio_c.c      | 199 ++++++++++++++++
 libavfilter/af_libspatialaudio_common.h |  47 ++++
 libavfilter/allfilters.c                |   1 +
 6 files changed, 547 insertions(+)
 create mode 100644 libavfilter/af_libspatialaudio.cpp
 create mode 100644 libavfilter/af_libspatialaudio_c.c
 create mode 100644 libavfilter/af_libspatialaudio_common.h

diff --git a/configure b/configure
index ff26aa2da7..30b3a35d64 100755
--- a/configure
+++ b/configure
@@ -274,6 +274,7 @@ External library support:
   --enable-libsmbclient    enable Samba protocol via libsmbclient [no]
   --enable-libsnappy       enable Snappy compression, needed for hap encoding [no]
   --enable-libsoxr         enable Include libsoxr resampling [no]
+  --enable-libspatialaudio enable ambisonics/binaural renderer support via libspatialaudio [no]
   --enable-libspeex        enable Speex de/encoding via libspeex [no]
   --enable-libsrt          enable Haivision SRT protocol via libsrt [no]
   --enable-libssh          enable SFTP protocol via libssh [no]
@@ -1991,6 +1992,7 @@ EXTERNAL_LIBRARY_LIST="
     libsnappy
     libsoxr
     libspeex
+    libspatialaudio
     libsrt
     libssh
     libsvtav1
@@ -3994,6 +3996,8 @@ signature_filter_deps="gpl avcodec avformat"
 smartblur_filter_deps="gpl swscale"
 sobel_opencl_filter_deps="opencl"
 sofalizer_filter_deps="libmysofa"
+libspatialaudio_filter_deps="libspatialaudio"
+libspatialaudio_filter_extralibs="-lstdc++"
 spp_filter_deps="gpl avcodec"
 spp_filter_select="idctdsp fdctdsp me_cmp pixblockdsp"
 sr_filter_deps="avformat swscale"
@@ -7043,6 +7047,7 @@ enabled libsnappy         && require libsnappy snappy-c.h snappy_compress -lsnap
 enabled libsoxr           && require libsoxr soxr.h soxr_create -lsoxr
 enabled libssh            && require_pkg_config libssh "libssh >= 0.6.0" libssh/sftp.h sftp_init
 enabled libspeex          && require_pkg_config libspeex speex speex/speex.h speex_decoder_init
+enabled libspatialaudio   && require_pkg_config_cxx libspatialaudio "spatialaudio >= 0.3.1" spatialaudio/Ambisonics.h "CAmbisonicDecoder"
 enabled libsrt            && require_pkg_config libsrt "srt >= 1.3.0" srt/srt.h srt_socket
 enabled libsvtav1         && require_pkg_config libsvtav1 "SvtAv1Enc >= 0.9.0" EbSvtAv1Enc.h svt_av1_enc_init_handle
 enabled libtensorflow     && require libtensorflow tensorflow/c/c_api.h TF_Version -ltensorflow
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 4d9681768b..f47d8a9912 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -154,6 +154,7 @@ OBJS-$(CONFIG_LADSPA_FILTER)                 += af_ladspa.o
 OBJS-$(CONFIG_LOUDNORM_FILTER)               += af_loudnorm.o ebur128.o
 OBJS-$(CONFIG_LOWPASS_FILTER)                += af_biquads.o
 OBJS-$(CONFIG_LOWSHELF_FILTER)               += af_biquads.o
+OBJS-$(CONFIG_LIBSPATIALAUDIO_FILTER)		 += af_libspatialaudio_c.o af_libspatialaudio.o
 OBJS-$(CONFIG_LV2_FILTER)                    += af_lv2.o
 OBJS-$(CONFIG_MCOMPAND_FILTER)               += af_mcompand.o
 OBJS-$(CONFIG_PAN_FILTER)                    += af_pan.o
diff --git a/libavfilter/af_libspatialaudio.cpp b/libavfilter/af_libspatialaudio.cpp
new file mode 100644
index 0000000000..593cf5400b
--- /dev/null
+++ b/libavfilter/af_libspatialaudio.cpp
@@ -0,0 +1,294 @@
+/*
+ * Copyright (C) 2024 Marvin Scholz
+ * Copyright (C) 2017 VLC authors and VideoLAN
+ *
+ * Authors: Marvin Scholz <epirat07 at gmail.com>
+ *
+ * Heavily inspired from VLC media players' spatialaudio.cpp
+ *   Authors: Adrien Maglo <magsoft at videolan.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ */
+
+extern "C" {
+    #include "libavutil/avassert.h"
+    #include "libavutil/mem.h"
+    #include "avfilter.h"
+    #include "filters.h"
+    #include "libavfilter/af_libspatialaudio_common.h"
+}
+
+#include <algorithm>
+#include <iterator>
+#include <vector>
+#include <map>
+
+#include <spatialaudio/Ambisonics.h>
+#include <spatialaudio/SpeakersBinauralizer.h>
+
+struct SpatialaudioContext
+{
+    SpatialaudioContext() {}
+
+    ~SpatialaudioContext() {}
+
+    enum SpatialaudioMode
+    {
+        AMBISONICS_DECODER = 0, // Ambisonics decoding
+        AMBISONICS_BINAURAL_DECODER, // Ambisonics decoding to binaural
+    };
+
+    void *logctx;
+
+    SpatialaudioMode mode;
+
+    CAmbisonicBinauralizer binauralDecoder;
+    SpeakersBinauralizer binauralizer;
+    CAmbisonicDecoder speakerDecoder;
+    CAmbisonicProcessor processor;
+    CAmbisonicZoomer zoomer;
+
+    std::string hrtfPath;
+
+    // Ambisonic order
+    unsigned ambisonicOrder;
+    // Non-diegetic channels
+    unsigned nondiegeticChCount;
+
+    unsigned inChCount;
+    unsigned outChCount;
+
+    /* View point */
+    struct {
+        float theta;
+        float phi;
+        float roll;
+        float zoom;
+    } viewpoint;
+
+};
+
+// Speaker positions according to Rec. ITU-R BS.2051-3
+static const std::map<enum AVChannel, PolarPoint> speakerPositions = {
+    { AV_CHAN_FRONT_LEFT,    { DegreesToRadians(+30),  0.f, 1.0f } },
+    { AV_CHAN_FRONT_RIGHT,   { DegreesToRadians(-30),  0.f, 1.0f } },
+    { AV_CHAN_SIDE_LEFT,     { DegreesToRadians(+110), 0.f, 1.0f } },
+    { AV_CHAN_SIDE_RIGHT,    { DegreesToRadians(-110),  0.f, 1.0f } },
+    { AV_CHAN_BACK_LEFT,     { DegreesToRadians(+145),  0.f, 1.0f } },
+    { AV_CHAN_BACK_RIGHT,    { DegreesToRadians(-145),  0.f, 1.0f } },
+    { AV_CHAN_BACK_CENTER,   { DegreesToRadians(+0),    0.f, 1.0f } },
+    { AV_CHAN_FRONT_CENTER,  { DegreesToRadians(+0),    0.f, 1.0f } },
+    { AV_CHAN_LOW_FREQUENCY, { DegreesToRadians(+0),    0.f, 0.5f } },
+};
+
+// L/R channels
+// This list must be ordered so that the left channels is always followed by the right channel!
+static const std::vector<enum AVChannel> stereoChannels = {
+    AV_CHAN_FRONT_LEFT,
+    AV_CHAN_FRONT_RIGHT,
+
+    AV_CHAN_SIDE_LEFT,
+    AV_CHAN_SIDE_RIGHT,
+
+    AV_CHAN_BACK_LEFT,
+    AV_CHAN_BACK_RIGHT,
+};
+
+int spatialaudio_context_create(struct SpatialaudioContext **spctx, void *logctx)
+{
+    void *storage = av_malloc(sizeof(SpatialaudioContext));
+    if (!storage)
+        return AVERROR(ENOMEM);
+
+    *spctx = new(storage) SpatialaudioContext();
+    (*spctx)->logctx = logctx;
+    return 0;
+}
+
+void spatialaudio_context_set_viewpoint(struct SpatialaudioContext *spctx, float yaw, float pitch, float roll, float fov)
+{
+    spctx->viewpoint.theta  = -DegreesToRadians(yaw);
+    spctx->viewpoint.phi    =  DegreesToRadians(pitch);
+    spctx->viewpoint.roll   =  DegreesToRadians(roll);
+
+    if (fov >= FOV_DEGREES_DEFAULT)
+        spctx->viewpoint.zoom = 0.f; // no unzoom as it does not really make sense.
+    else
+        spctx->viewpoint.zoom = (FOV_DEGREES_DEFAULT - fov) / (FOV_DEGREES_DEFAULT - FOV_DEGREES_MIN);
+}
+
+void spatialaudio_context_set_hrtf_path(struct SpatialaudioContext *spctx, const char *hrtf_path)
+{
+    spctx->hrtfPath = (hrtf_path) ? hrtf_path : "";
+}
+
+int spatialaudio_context_configure_input(struct SpatialaudioContext *spctx, const AVChannelLayout *in_layout)
+{
+    spctx->inChCount = in_layout->nb_channels;
+
+    if (in_layout->order == AV_CHANNEL_ORDER_AMBISONIC) {
+        int order = av_channel_layout_ambisonic_order(in_layout);
+        if (order < 0 || order > AMB_MAX_ORDER) {
+            av_log(spctx->logctx, AV_LOG_ERROR, "unsupported/invalid ambisonic order\n");
+            return AVERROR(EINVAL);
+        }
+        spctx->ambisonicOrder = order;
+        spctx->nondiegeticChCount = spctx->inChCount - (spctx->ambisonicOrder + 1) * (spctx->ambisonicOrder + 1);
+
+        av_log(spctx->logctx, AV_LOG_VERBOSE, "channels: %d, ambisonic order: %d, non-diegetic channels: %d\n",
+            spctx->inChCount, spctx->ambisonicOrder, spctx->nondiegeticChCount);
+
+        if (spctx->nondiegeticChCount > 0 &&
+            (spctx->nondiegeticChCount != 2 ||
+                av_channel_layout_subset(in_layout, AV_CH_LAYOUT_STEREO) != AV_CH_LAYOUT_STEREO))
+        {
+            av_log(spctx->logctx, AV_LOG_ERROR, "Invalid amount of non-diegetic channels: %d\n", spctx->nondiegeticChCount);
+            return AVERROR(EINVAL);
+        }
+    } else {
+        return AVERROR(EINVAL);
+    }
+
+    return 0;
+}
+
+int spatialaudio_context_configure_output(struct SpatialaudioContext *spctx, const AVChannelLayout *out_layout, int samplerate)
+{
+    static constexpr auto binaural_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_BINAURAL;
+    spctx->outChCount = out_layout->nb_channels;
+
+    if (spctx->outChCount == 2 &&
+        !av_channel_layout_compare(out_layout, &binaural_layout))
+    {
+        spctx->mode = SpatialaudioContext::AMBISONICS_BINAURAL_DECODER;
+        if (spctx->hrtfPath.empty()) {
+            av_log(spctx->logctx, AV_LOG_ERROR, "HRTF file path needs to be provided for binaural rendering\n");
+            return AVERROR(EINVAL);
+        }
+
+        unsigned tailLength = 0;
+        if (!spctx->binauralDecoder.Configure(spctx->ambisonicOrder, true, samplerate,
+            AMB_BLOCK_TIME_LEN, tailLength, spctx->hrtfPath))
+        {
+            av_log(spctx->logctx, AV_LOG_ERROR, "Failure creating binaural ambisonic decoder\n");
+            return AVERROR(EINVAL);
+        }
+    } else {
+        if (spctx->outChCount == 1 ||
+           !spctx->speakerDecoder.Configure(spctx->ambisonicOrder, true, AMB_BLOCK_TIME_LEN,
+               kAmblib_CustomSpeakerSetUp, spctx->outChCount))
+        {
+            av_log(spctx->logctx, AV_LOG_ERROR, "Failure creating the ambisonics decoder\n");
+            return AVERROR(EINVAL);
+        }
+
+        for (size_t idx = 0; idx < spctx->outChCount; idx++) {
+            char name[32];
+            enum AVChannel channel = av_channel_layout_channel_from_index(out_layout, idx);
+            av_channel_name(name, sizeof(name), channel);
+
+            PolarPoint point = {};
+            auto it = speakerPositions.find(channel);
+            if (it != speakerPositions.end()) {
+                point = it->second;
+                av_log(spctx->logctx, AV_LOG_VERBOSE, "Setting point for channel '%s':\t Azi.: %.2f, Elev.: %.2f, Dist.: %.2f\n",
+                    name, point.fAzimuth, point.fElevation, point.fDistance);
+            } else {
+                av_log(spctx->logctx, AV_LOG_WARNING, "No position information for channel '%s', assuming Azi.: %.2f, Elev.: %.2f, Dist.: %.2f\n",
+                    name, point.fAzimuth, point.fElevation, point.fDistance);
+            }
+
+            spctx->speakerDecoder.SetPosition(idx, point);
+        }
+    }
+
+    if (!spctx->processor.Configure(spctx->ambisonicOrder, true, AMB_BLOCK_TIME_LEN, 0))
+    {
+        av_log(spctx->logctx, AV_LOG_ERROR, "Failure creating the ambisonics processor\n");
+        return AVERROR_EXTERNAL;
+    }
+
+    if (!spctx->zoomer.Configure(spctx->ambisonicOrder, true, AMB_BLOCK_TIME_LEN, 0))
+    {
+        av_log(spctx->logctx, AV_LOG_ERROR, "Failure creating the ambisonics zoomer\n");
+        return AVERROR_EXTERNAL;
+    }
+
+    return 0;
+}
+
+// Mix in the non-diegetic (headlocked) channels into the output
+static void add_nondiegetic_channels(struct SpatialaudioContext *spctx, AVFrame *in, AVFrame *out)
+{
+    const AVChannelLayout *out_layout = &out->ch_layout;
+
+    // The non-diegetic channels are at the end
+    unsigned nondiegeticChStart = spctx->inChCount - spctx->nondiegeticChCount;
+
+    for (size_t idx = 0; idx < spctx->outChCount; idx++) {
+        enum AVChannel channel = av_channel_layout_channel_from_index(out_layout, idx);
+        
+        auto it = std::find(stereoChannels.cbegin(), stereoChannels.cend(), channel);
+        if (it == stereoChannels.cend())
+            continue;
+
+        ptrdiff_t pos = std::distance(stereoChannels.cbegin(), it);
+
+        // This is either 0 for left or 1 for right
+        unsigned lr = !((pos + 1) % 2);
+
+        float *in_plane = ((float **)in->extended_data)[nondiegeticChStart + lr];
+        float *out_plane = ((float **)out->extended_data)[idx];
+
+        for (size_t i = 0; i < out->nb_samples; i++)
+            out_plane[i] = out_plane[i] / 2.f + in_plane[i] / 2.f;
+    }
+}
+
+int spatialaudio_context_process(struct SpatialaudioContext *spctx, AVFrame *in, AVFrame *out)
+{
+    CBFormat inData;
+    inData.Configure(spctx->ambisonicOrder, true, in->nb_samples);
+
+    av_assert2(in->ch_layout.nb_channels >= spctx->inChCount - spctx->nondiegeticChCount);
+    for (unsigned i = 0; i < spctx->inChCount - spctx->nondiegeticChCount; ++i)
+        inData.InsertStream((float *)in->extended_data[i], i, in->nb_samples);
+
+    Orientation ori(spctx->viewpoint.theta, spctx->viewpoint.phi, spctx->viewpoint.roll);
+    spctx->processor.SetOrientation(ori);
+    spctx->processor.Refresh();
+    spctx->processor.Process(&inData, inData.GetSampleCount());
+
+    spctx->zoomer.SetZoom(spctx->viewpoint.zoom);
+    spctx->zoomer.Refresh();
+    spctx->zoomer.Process(&inData, inData.GetSampleCount());
+
+    if (spctx->mode == SpatialaudioContext::AMBISONICS_BINAURAL_DECODER)
+        spctx->binauralDecoder.Process(&inData, (float **)out->extended_data, out->nb_samples);
+    else
+        spctx->speakerDecoder.Process(&inData, out->nb_samples, (float **)out->extended_data);
+
+    if (spctx->nondiegeticChCount > 0)
+        add_nondiegetic_channels(spctx, in, out);
+
+    return 0;
+}
+
+void spatialaudio_context_destroy(struct SpatialaudioContext **spctx)
+{
+    if (*spctx)
+        (*spctx)->~SpatialaudioContext();
+    av_freep(spctx);
+}
diff --git a/libavfilter/af_libspatialaudio_c.c b/libavfilter/af_libspatialaudio_c.c
new file mode 100644
index 0000000000..a21079c3a7
--- /dev/null
+++ b/libavfilter/af_libspatialaudio_c.c
@@ -0,0 +1,199 @@
+/*
+ * Copyright (C) 2024 Marvin Scholz
+ *
+ * Authors: Marvin Scholz <epirat07 at gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ */
+
+#include "libavutil/mem.h"
+#include "libavutil/tx.h"
+#include "libavutil/avstring.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/intmath.h"
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "filters.h"
+#include "formats.h"
+#include "audio.h"
+
+#include "libavfilter/af_libspatialaudio_common.h"
+
+struct SpatialaudioContextC {
+    const AVClass *class;
+    struct SpatialaudioContext *spctx;
+
+    // The output channel layout to mix to
+    AVChannelLayout output_layout;
+
+    float fov;
+    float roll;
+    float pitch;
+    float yaw;
+
+    // Path of the HRTF file (only used for binaural rendering)
+    char *hrtf_path;
+};
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    struct SpatialaudioContextC *s = ctx->priv;
+    AVFrame *out;
+    int ret;
+
+    out = ff_get_audio_buffer(outlink, in->nb_samples);
+    if (!out) {
+        av_frame_free(&in);
+        return AVERROR(ENOMEM);
+    }
+    av_frame_copy_props(out, in);
+
+    ret = spatialaudio_context_process(s->spctx, in, out);
+
+    av_frame_free(&in);
+    return (ret < 0) ? ret : ff_filter_frame(outlink, out);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    struct SpatialaudioContextC *s = ctx->priv;
+    spatialaudio_context_destroy(&s->spctx);
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    struct SpatialaudioContextC *s = ctx->priv;
+    int ret = spatialaudio_context_create(&s->spctx, ctx);
+    if (ret < 0)
+        return ret;
+
+    spatialaudio_context_set_viewpoint(s->spctx, s->yaw, s->pitch, s->roll, s->fov);
+    spatialaudio_context_set_hrtf_path(s->spctx, s->hrtf_path);
+    return 0;
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    int ret;
+    AVFilterContext *ctx = inlink->dst;
+    struct SpatialaudioContextC *s = ctx->priv;
+    FilterLink *ff_inlink = ff_filter_link(inlink);
+
+    ret = spatialaudio_context_configure_input(s->spctx, &inlink->ch_layout);
+    if (ret < 0)
+        return ret;
+
+    ff_inlink->min_samples = ff_inlink->max_samples = AMB_BLOCK_TIME_LEN;
+    return 0;
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    int ret;
+    AVFilterContext *ctx = outlink->src;
+    struct SpatialaudioContextC *s = ctx->priv;
+
+    ret = spatialaudio_context_configure_output(s->spctx, &outlink->ch_layout, outlink->sample_rate);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
+static int query_formats(const AVFilterContext *ctx,
+                         AVFilterFormatsConfig **cfg_in,
+                         AVFilterFormatsConfig **cfg_out)
+{
+    int ret;
+    struct SpatialaudioContextC *s = ctx->priv;
+    AVFilterChannelLayouts *channel_layouts = NULL;
+
+    static const enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_FLTP,
+        AV_SAMPLE_FMT_NONE
+    };
+
+    ret = ff_set_common_formats_from_list2(ctx, cfg_in, cfg_out, sample_fmts);
+    if (ret < 0)
+        return ret;
+
+    ret = ff_add_channel_layout(&channel_layouts, &s->output_layout);
+    if (ret < 0)
+        return ret;
+
+    ret = ff_channel_layouts_ref(channel_layouts, &cfg_out[0]->channel_layouts);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
+#define OFFSET(x) offsetof(struct SpatialaudioContextC, x)
+#define FLAGS (AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_FILTERING_PARAM)
+
+static const AVOption libspatialaudio_options[] = {
+    { "channel_layout", "Output channel layout", OFFSET(output_layout),
+        AV_OPT_TYPE_CHLAYOUT, { .str = "stereo" }, .flags = FLAGS },
+
+    { "fov", "FoV in the soundsphere", OFFSET(fov),
+        AV_OPT_TYPE_FLOAT, { .dbl = FOV_DEGREES_DEFAULT }, FOV_DEGREES_MIN, FOV_DEGREES_MAX, .flags = FLAGS },
+
+    { "roll", "Roll to apply to the position in the soundsphere",OFFSET(roll),
+        AV_OPT_TYPE_FLOAT, { .dbl = 0.f }, -180.f, 180.f, .flags = FLAGS },
+
+    { "pitch","Pitch to apply to the position in the soundsphere",OFFSET(pitch),
+        AV_OPT_TYPE_FLOAT, { .dbl = 0.f }, -180.f, 180.f, .flags = FLAGS },
+
+    { "yaw",  "Yaw to apply to the position in the soundsphere",  OFFSET(yaw),
+        AV_OPT_TYPE_FLOAT, { .dbl = 0.f }, -180.f, 180.f, .flags = FLAGS },
+
+    { "hrtf_path", "Path to the HRTF file to use for binauralization", OFFSET(hrtf_path),
+        AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
+
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(libspatialaudio);
+
+static const AVFilterPad inputs[] = {
+    {
+        .name           = "default",
+        .type           = AVMEDIA_TYPE_AUDIO,
+        .config_props   = config_input,
+        .filter_frame   = filter_frame,
+    },
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .config_props = config_output,
+    },
+};
+
+const AVFilter ff_af_libspatialaudio = {
+    .name          = "libspatialaudio",
+    .description   = NULL_IF_CONFIG_SMALL("Spatial audio rendering using libspatialaudio"),
+    .priv_size     = sizeof(struct SpatialaudioContextC),
+    .priv_class    = &libspatialaudio_class,
+    .init          = init,
+    .uninit        = uninit,
+    FILTER_INPUTS(inputs),
+    FILTER_OUTPUTS(outputs),
+    FILTER_QUERY_FUNC2(query_formats),
+};
diff --git a/libavfilter/af_libspatialaudio_common.h b/libavfilter/af_libspatialaudio_common.h
new file mode 100644
index 0000000000..a07b05d940
--- /dev/null
+++ b/libavfilter/af_libspatialaudio_common.h
@@ -0,0 +1,47 @@
+/*
+* Copyright (C) 2024 Marvin Scholz
+ *
+ * Authors: Marvin Scholz <epirat07 at gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ */
+#include "avfilter.h"
+
+#ifdef __cplusplus
+#define SPATIALEXTERNC extern "C"
+#else
+#define SPATIALEXTERNC
+#endif
+
+#define AMB_BLOCK_TIME_LEN 1024
+#define AMB_MAX_ORDER 3
+
+#define FOV_DEGREES_MIN       20.f
+#define FOV_DEGREES_MAX       150.f
+#define FOV_DEGREES_DEFAULT   80.f
+
+struct SpatialaudioContext;
+
+SPATIALEXTERNC int spatialaudio_context_create(struct SpatialaudioContext **spctx, void *logctx);
+
+SPATIALEXTERNC void spatialaudio_context_set_viewpoint(struct SpatialaudioContext *spctx, float yaw, float pitch, float roll, float fov);
+SPATIALEXTERNC void spatialaudio_context_set_hrtf_path(struct SpatialaudioContext *spctx, const char *hrtf_path);
+
+SPATIALEXTERNC int spatialaudio_context_configure_input(struct SpatialaudioContext *spctx, const AVChannelLayout *out_layout);
+SPATIALEXTERNC int spatialaudio_context_configure_output(struct SpatialaudioContext *spctx, const AVChannelLayout *out_layout, int samplerate);
+
+SPATIALEXTERNC int spatialaudio_context_process(struct SpatialaudioContext *spctx, AVFrame *in, AVFrame *out);
+
+SPATIALEXTERNC void spatialaudio_context_destroy(struct SpatialaudioContext **spctx);
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 9819f0f95b..a2ef33fc30 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -136,6 +136,7 @@ extern const AVFilter ff_af_highpass;
 extern const AVFilter ff_af_highshelf;
 extern const AVFilter ff_af_join;
 extern const AVFilter ff_af_ladspa;
+extern const AVFilter ff_af_libspatialaudio;
 extern const AVFilter ff_af_loudnorm;
 extern const AVFilter ff_af_lowpass;
 extern const AVFilter ff_af_lowshelf;
-- 
2.39.5 (Apple Git-154)