[FFmpeg-devel] [PATCH 2/2] avfilter: add showspectrumpic filter
Paul B Mahol
onemda at gmail.com
Fri Jan 1 10:00:31 CET 2016
Signed-off-by: Paul B Mahol <onemda at gmail.com>
---
doc/filters.texi | 106 +++++++++++++++++++
libavfilter/Makefile | 1 +
libavfilter/allfilters.c | 1 +
libavfilter/avf_showspectrum.c | 233 ++++++++++++++++++++++++++++++++++++++---
4 files changed, 326 insertions(+), 15 deletions(-)
diff --git a/doc/filters.texi b/doc/filters.texi
index 8aa3b47..224099d 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -14704,6 +14704,112 @@ ffplay -f lavfi 'amovie=input.mp3, asplit [a][out1];
@end example
@end itemize
+ at section showspectrumpic
+
+Convert input audio to a single video frame, representing the audio frequency
+spectrum.
+
+The filter accepts the following options:
+
+ at table @option
+ at item size, s
+Specify the video size for the output. For the syntax of this option, check the
+ at ref{video size syntax,,"Video size" section in the ffmpeg-utils manual,ffmpeg-utils}.
+Default value is @code{640x512}.
+
+ at item mode
+Specify display mode.
+
+It accepts the following values:
+ at table @samp
+ at item combined
+all channels are displayed in the same row
+ at item separate
+all channels are displayed in separate rows
+ at end table
+Default value is @samp{combined}.
+
+ at item color
+Specify display color mode.
+
+It accepts the following values:
+ at table @samp
+ at item channel
+each channel is displayed in a separate color
+ at item intensity
+each channel is displayed using the same color scheme
+ at item rainbow
+each channel is displayed using the rainbow color scheme
+ at item moreland
+each channel is displayed using the moreland color scheme
+ at item nebulae
+each channel is displayed using the nebulae color scheme
+ at item fire
+each channel is displayed using the fire color scheme
+ at end table
+Default value is @samp{channel}.
+
+ at item scale
+Specify scale used for calculating intensity color values.
+
+It accepts the following values:
+ at table @samp
+ at item lin
+linear
+ at item sqrt
+square root, default
+ at item cbrt
+cubic root
+ at item log
+logarithmic
+ at end table
+Default value is @samp{sqrt}.
+
+ at item saturation
+Set saturation modifier for displayed colors. Negative values provide
+alternative color scheme. @code{0} is no saturation at all.
+Saturation must be in [-10.0, 10.0] range.
+Default value is @code{1}.
+
+ at item win_func
+Set window function.
+
+It accepts the following values:
+ at table @samp
+ at item rect
+ at item bartlett
+ at item hann
+ at item hanning
+ at item hamming
+ at item blackman
+ at item welch
+ at item flattop
+ at item bharris
+ at item bnuttall
+ at item bhann
+ at item sine
+ at item nuttall
+ at item lanczos
+ at item gauss
+ at end table
+Default value is @code{hann}.
+
+ at item orientation
+Set orientation of time vs frequency axis. Can be @code{vertical} or
+ at code{horizontal}. Default is @code{vertical}.
+ at end table
+
+ at subsection Examples
+
+ at itemize
+ at item
+Extract an audio spectrogram of a whole audio track
+in a 1024x1024 picture using @command{ffmpeg}:
+ at example
+ffmpeg -i audio.flac -lavfi showspectrumpic=s=1024x1024 spectrogram.png
+ at end example
+ at end itemize
+
@section showvolume
Convert input audio volume to a video output.
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index e334016..689da73 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -286,6 +286,7 @@ OBJS-$(CONFIG_CONCAT_FILTER) += avf_concat.o
OBJS-$(CONFIG_SHOWCQT_FILTER) += avf_showcqt.o lswsutils.o lavfutils.o
OBJS-$(CONFIG_SHOWFREQS_FILTER) += avf_showfreqs.o window_func.o
OBJS-$(CONFIG_SHOWSPECTRUM_FILTER) += avf_showspectrum.o window_func.o
+OBJS-$(CONFIG_SHOWSPECTRUMPIC_FILTER) += avf_showspectrum.o window_func.o
OBJS-$(CONFIG_SHOWVOLUME_FILTER) += avf_showvolume.o
OBJS-$(CONFIG_SHOWWAVES_FILTER) += avf_showwaves.o
OBJS-$(CONFIG_SHOWWAVESPIC_FILTER) += avf_showwaves.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index a039a39..2267e88 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -306,6 +306,7 @@ void avfilter_register_all(void)
REGISTER_FILTER(SHOWCQT, showcqt, avf);
REGISTER_FILTER(SHOWFREQS, showfreqs, avf);
REGISTER_FILTER(SHOWSPECTRUM, showspectrum, avf);
+ REGISTER_FILTER(SHOWSPECTRUMPIC, showspectrumpic, avf);
REGISTER_FILTER(SHOWVOLUME, showvolume, avf);
REGISTER_FILTER(SHOWWAVES, showwaves, avf);
REGISTER_FILTER(SHOWWAVESPIC, showwavespic, avf);
diff --git a/libavfilter/avf_showspectrum.c b/libavfilter/avf_showspectrum.c
index 0fa1be1..2992c2f 100644
--- a/libavfilter/avf_showspectrum.c
+++ b/libavfilter/avf_showspectrum.c
@@ -63,6 +63,7 @@ typedef struct {
int rdft_bits; ///< number of bits (RDFT window size = 1<<rdft_bits)
FFTSample **rdft_data; ///< bins holder for each (displayed) channels
float *window_func_lut; ///< Window function LUT
+ float **magnitudes;
int win_func;
int win_size;
double win_scale;
@@ -186,6 +187,9 @@ static av_cold void uninit(AVFilterContext *ctx)
av_freep(&s->rdft_data[i]);
av_freep(&s->rdft_data);
av_freep(&s->window_func_lut);
+ for (i = 0; i < s->nb_display_channels; i++)
+ av_freep(&s->magnitudes[i]);
+ av_freep(&s->magnitudes);
av_frame_free(&s->outpicref);
av_audio_fifo_free(s->fifo);
}
@@ -267,6 +271,15 @@ static int config_output(AVFilterLink *outlink)
av_freep(&s->rdft_data);
s->nb_display_channels = inlink->channels;
+ s->magnitudes = av_calloc(s->nb_display_channels, sizeof(*s->magnitudes));
+ if (!s->magnitudes)
+ return AVERROR(ENOMEM);
+ for (i = 0; i < s->nb_display_channels; i++) {
+ s->magnitudes[i] = av_calloc(s->orientation == VERTICAL ? s->h: s->w, sizeof(**s->magnitudes));
+ if (!s->magnitudes[i])
+ return AVERROR(ENOMEM);
+ }
+
s->rdft_data = av_calloc(s->nb_display_channels, sizeof(*s->rdft_data));
if (!s->rdft_data)
return AVERROR(ENOMEM);
@@ -370,23 +383,13 @@ static int request_frame(AVFilterLink *outlink)
return ret;
}
-static int plot_spectrum_column(AVFilterLink *inlink, AVFrame *insamples)
+static void run_rdft(ShowSpectrumContext *s, AVFrame *fin)
{
- int ret;
- AVFilterContext *ctx = inlink->dst;
- AVFilterLink *outlink = ctx->outputs[0];
- ShowSpectrumContext *s = ctx->priv;
- AVFrame *outpicref = s->outpicref;
- const double w = s->win_scale;
- int h = s->orientation == VERTICAL ? s->channel_height : s->channel_width;
-
- int ch, plane, n, x, y;
-
- av_assert0(insamples->nb_samples == s->win_size);
+ int ch, n;
/* fill RDFT input with the number of samples available */
for (ch = 0; ch < s->nb_display_channels; ch++) {
- const int16_t *p = (int16_t *)insamples->extended_data[ch];
+ const int16_t *p = (int16_t *)fin->extended_data[ch];
for (n = 0; n < s->win_size; n++)
s->rdft_data[ch][n] = p[n] * s->window_func_lut[n];
@@ -395,12 +398,61 @@ static int plot_spectrum_column(AVFilterLink *inlink, AVFrame *insamples)
/* run RDFT on each samples set */
for (ch = 0; ch < s->nb_display_channels; ch++)
av_rdft_calc(s->rdft, s->rdft_data[ch]);
+}
- /* fill a new spectrum column */
#define RE(y, ch) s->rdft_data[ch][2 * (y) + 0]
#define IM(y, ch) s->rdft_data[ch][2 * (y) + 1]
#define MAGNITUDE(y, ch) hypot(RE(y, ch), IM(y, ch))
+static void calc_magnitudes(ShowSpectrumContext *s)
+{
+ int ch, y, h = s->orientation == VERTICAL ? s->h: s->w;
+
+ for (ch = 0; ch < s->nb_display_channels; ch++) {
+ float *magnitudes = s->magnitudes[ch];
+
+ for (y = 0; y < h; y++)
+ magnitudes[y] = MAGNITUDE(y, ch);
+ }
+}
+
+static void acalc_magnitudes(ShowSpectrumContext *s)
+{
+ int ch, y, h = s->orientation == VERTICAL ? s->h: s->w;
+
+ for (ch = 0; ch < s->nb_display_channels; ch++) {
+ float *magnitudes = s->magnitudes[ch];
+
+ for (y = 0; y < h; y++)
+ magnitudes[y] += MAGNITUDE(y, ch);
+ }
+}
+
+static void scale_magnitudes(ShowSpectrumContext *s, float scale)
+{
+ int ch, y, h = s->orientation == VERTICAL ? s->h: s->w;
+
+ for (ch = 0; ch < s->nb_display_channels; ch++) {
+ float *magnitudes = s->magnitudes[ch];
+
+ for (y = 0; y < h; y++)
+ magnitudes[y] *= scale;
+ }
+}
+
+static int plot_spectrum_column(AVFilterLink *inlink, AVFrame *insamples)
+{
+ int ret;
+ AVFilterContext *ctx = inlink->dst;
+ AVFilterLink *outlink = ctx->outputs[0];
+ ShowSpectrumContext *s = ctx->priv;
+ AVFrame *outpicref = s->outpicref;
+ const double w = s->win_scale;
+ int h = s->orientation == VERTICAL ? s->channel_height : s->channel_width;
+
+ int ch, plane, x, y;
+
+ /* fill a new spectrum column */
/* initialize buffer for combining to black */
if (s->orientation == VERTICAL) {
for (y = 0; y < outlink->h; y++) {
@@ -417,6 +469,7 @@ static int plot_spectrum_column(AVFilterLink *inlink, AVFrame *insamples)
}
for (ch = 0; ch < s->nb_display_channels; ch++) {
+ float *magnitudes = s->magnitudes[ch];
float yf, uf, vf;
/* decide color range */
@@ -471,7 +524,7 @@ static int plot_spectrum_column(AVFilterLink *inlink, AVFrame *insamples)
float *out = &s->combine_buffer[3 * row];
/* get magnitude */
- float a = w * MAGNITUDE(y, ch);
+ float a = w * magnitudes[y];
/* apply scale */
switch (s->scale) {
@@ -631,6 +684,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
if (ret < 0)
goto fail;
+ av_assert0(fin->nb_samples == s->win_size);
+
+ run_rdft(s, fin);
+ calc_magnitudes(s);
+
ret = plot_spectrum_column(inlink, fin);
av_frame_free(&fin);
av_audio_fifo_drain(s->fifo, s->skip_samples);
@@ -672,3 +730,148 @@ AVFilter ff_avf_showspectrum = {
.outputs = showspectrum_outputs,
.priv_class = &showspectrum_class,
};
+
+static const AVOption showspectrumpic_options[] = {
+ { "size", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str = "640x512"}, 0, 0, FLAGS },
+ { "s", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str = "640x512"}, 0, 0, FLAGS },
+ { "mode", "set channel display mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=COMBINED}, COMBINED, NB_MODES-1, FLAGS, "mode" },
+ { "combined", "combined mode", 0, AV_OPT_TYPE_CONST, {.i64=COMBINED}, 0, 0, FLAGS, "mode" },
+ { "separate", "separate mode", 0, AV_OPT_TYPE_CONST, {.i64=SEPARATE}, 0, 0, FLAGS, "mode" },
+ { "color", "set channel coloring", OFFSET(color_mode), AV_OPT_TYPE_INT, {.i64=CHANNEL}, CHANNEL, NB_CLMODES-1, FLAGS, "color" },
+ { "channel", "separate color for each channel", 0, AV_OPT_TYPE_CONST, {.i64=CHANNEL}, 0, 0, FLAGS, "color" },
+ { "intensity", "intensity based coloring", 0, AV_OPT_TYPE_CONST, {.i64=INTENSITY}, 0, 0, FLAGS, "color" },
+ { "rainbow", "rainbow based coloring", 0, AV_OPT_TYPE_CONST, {.i64=RAINBOW}, 0, 0, FLAGS, "color" },
+ { "moreland", "moreland based coloring", 0, AV_OPT_TYPE_CONST, {.i64=MORELAND}, 0, 0, FLAGS, "color" },
+ { "nebulae", "nebulae based coloring", 0, AV_OPT_TYPE_CONST, {.i64=NEBULAE}, 0, 0, FLAGS, "color" },
+ { "fire", "fire based coloring", 0, AV_OPT_TYPE_CONST, {.i64=FIRE}, 0, 0, FLAGS, "color" },
+ { "scale", "set display scale", OFFSET(scale), AV_OPT_TYPE_INT, {.i64=SQRT}, LINEAR, NB_SCALES-1, FLAGS, "scale" },
+ { "sqrt", "square root", 0, AV_OPT_TYPE_CONST, {.i64=SQRT}, 0, 0, FLAGS, "scale" },
+ { "cbrt", "cubic root", 0, AV_OPT_TYPE_CONST, {.i64=CBRT}, 0, 0, FLAGS, "scale" },
+ { "log", "logarithmic", 0, AV_OPT_TYPE_CONST, {.i64=LOG}, 0, 0, FLAGS, "scale" },
+ { "lin", "linear", 0, AV_OPT_TYPE_CONST, {.i64=LINEAR}, 0, 0, FLAGS, "scale" },
+ { "saturation", "color saturation multiplier", OFFSET(saturation), AV_OPT_TYPE_FLOAT, {.dbl = 1}, -10, 10, FLAGS },
+ { "win_func", "set window function", OFFSET(win_func), AV_OPT_TYPE_INT, {.i64 = WFUNC_HANNING}, 0, NB_WFUNC-1, FLAGS, "win_func" },
+ { "rect", "Rectangular", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_RECT}, 0, 0, FLAGS, "win_func" },
+ { "bartlett", "Bartlett", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BARTLETT}, 0, 0, FLAGS, "win_func" },
+ { "hann", "Hann", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HANNING}, 0, 0, FLAGS, "win_func" },
+ { "hanning", "Hanning", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HANNING}, 0, 0, FLAGS, "win_func" },
+ { "hamming", "Hamming", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HAMMING}, 0, 0, FLAGS, "win_func" },
+ { "blackman", "Blackman", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BLACKMAN}, 0, 0, FLAGS, "win_func" },
+ { "welch", "Welch", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_WELCH}, 0, 0, FLAGS, "win_func" },
+ { "flattop", "Flat-top", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_FLATTOP}, 0, 0, FLAGS, "win_func" },
+ { "bharris", "Blackman-Harris", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BHARRIS}, 0, 0, FLAGS, "win_func" },
+ { "bnuttall", "Blackman-Nuttall", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BNUTTALL}, 0, 0, FLAGS, "win_func" },
+ { "bhann", "Bartlett-Hann", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BHANN}, 0, 0, FLAGS, "win_func" },
+ { "sine", "Sine", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_SINE}, 0, 0, FLAGS, "win_func" },
+ { "nuttall", "Nuttall", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_NUTTALL}, 0, 0, FLAGS, "win_func" },
+ { "lanczos", "Lanczos", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_LANCZOS}, 0, 0, FLAGS, "win_func" },
+ { "gauss", "Gauss", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_GAUSS}, 0, 0, FLAGS, "win_func" },
+ { "orientation", "set orientation", OFFSET(orientation), AV_OPT_TYPE_INT, {.i64=VERTICAL}, 0, NB_ORIENTATIONS-1, FLAGS, "orientation" },
+ { "vertical", NULL, 0, AV_OPT_TYPE_CONST, {.i64=VERTICAL}, 0, 0, FLAGS, "orientation" },
+ { "horizontal", NULL, 0, AV_OPT_TYPE_CONST, {.i64=HORIZONTAL}, 0, 0, FLAGS, "orientation" },
+ { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(showspectrumpic);
+
+static int showspectrumpic_request_frame(AVFilterLink *outlink)
+{
+ ShowSpectrumContext *s = outlink->src->priv;
+ AVFilterLink *inlink = outlink->src->inputs[0];
+ int ret;
+
+ ret = ff_request_frame(inlink);
+ if (ret == AVERROR_EOF && s->outpicref) {
+ int samples = av_audio_fifo_size(s->fifo);
+ int consumed = 0;
+ int sz = s->orientation == VERTICAL ? s->w: s->h;
+ int ch, spf, spb;
+ AVFrame *fin;
+
+ if (samples / sz < s->win_size) {
+ spf = lrint(ceil(samples / (float)sz)) % s->win_size;
+ spb = samples / sz;
+ } else {
+ spf = samples % s->win_size;
+ spb = samples / sz;
+ }
+
+ if (!spf)
+ spf = s->win_size;
+ s->sliding = FULLFRAME;
+ fin = ff_get_audio_buffer(inlink, s->win_size);
+ if (!fin)
+ return AVERROR(ENOMEM);
+
+ while (av_audio_fifo_size(s->fifo) > 0) {
+ ret = av_audio_fifo_peek(s->fifo, (void **)fin->extended_data, s->win_size);
+ if (ret < 0) {
+ av_frame_free(&fin);
+ return ret;
+ }
+ av_audio_fifo_drain(s->fifo, spf);
+
+ run_rdft(s, fin);
+ acalc_magnitudes(s);
+
+ consumed += spf;
+ if (consumed >= spb) {
+ int h = s->orientation == VERTICAL ? s->h: s->w;
+
+ scale_magnitudes(s, 1. / (consumed / spf));
+ plot_spectrum_column(inlink, fin);
+ consumed = 0;
+ for (ch = 0; ch < s->nb_display_channels; ch++)
+ memset(s->magnitudes[ch], 0, h * sizeof(float));
+ }
+ }
+
+ av_frame_free(&fin);
+ s->outpicref->pts = 0;
+ ret = ff_filter_frame(outlink, s->outpicref);
+ s->outpicref = NULL;
+ }
+
+ return ret;
+}
+
+static int showspectrumpic_filter_frame(AVFilterLink *inlink, AVFrame *insamples)
+{
+ AVFilterContext *ctx = inlink->dst;
+ ShowSpectrumContext *s = ctx->priv;
+ int ret;
+
+ ret = av_audio_fifo_write(s->fifo, (void **)insamples->extended_data, insamples->nb_samples);
+ av_frame_free(&insamples);
+ return ret;
+}
+
+static const AVFilterPad showspectrumpic_inputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_AUDIO,
+ .filter_frame = showspectrumpic_filter_frame,
+ },
+ { NULL }
+};
+
+static const AVFilterPad showspectrumpic_outputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .config_props = config_output,
+ .request_frame = showspectrumpic_request_frame,
+ },
+ { NULL }
+};
+
+AVFilter ff_avf_showspectrumpic = {
+ .name = "showspectrumpic",
+ .description = NULL_IF_CONFIG_SMALL("Convert input audio to a spectrum video output single picture."),
+ .uninit = uninit,
+ .query_formats = query_formats,
+ .priv_size = sizeof(ShowSpectrumContext),
+ .inputs = showspectrumpic_inputs,
+ .outputs = showspectrumpic_outputs,
+ .priv_class = &showspectrumpic_class,
+};
--
1.9.1
More information about the ffmpeg-devel
mailing list