[FFmpeg-devel] [PATCH v2 01/10] lavfi/nlmeans: random code shuffling to help compiler

Clément Bœsch u at pkh.me
Mon May 7 20:24:13 EEST 2018


This makes nlmeans_slice() slightly faster at least on GCC 7.3.
---
 libavfilter/vf_nlmeans.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libavfilter/vf_nlmeans.c b/libavfilter/vf_nlmeans.c
index e4952e187e..d222d3913e 100644
--- a/libavfilter/vf_nlmeans.c
+++ b/libavfilter/vf_nlmeans.c
@@ -368,7 +368,6 @@ static int nlmeans_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs
     int x, y;
     NLMeansContext *s = ctx->priv;
     const struct thread_data *td = arg;
-    const uint8_t *src = td->src;
     const int src_linesize = td->src_linesize;
     const int process_h = td->endy - td->starty;
     const int slice_start = (process_h *  jobnr   ) / nb_jobs;
@@ -377,14 +376,15 @@ static int nlmeans_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs
     const int endy   = td->starty + slice_end;
 
     for (y = starty; y < endy; y++) {
+        const uint8_t *src = td->src + y*src_linesize;
+        struct weighted_avg *wa = s->wa + y*s->wa_linesize;
         for (x = td->startx; x < td->endx; x++) {
             const int patch_diff_sq = get_integral_patch_value(td->ii_start, s->ii_lz_32, x, y, td->p);
             if (patch_diff_sq < s->max_meaningful_diff) {
-                struct weighted_avg *wa = &s->wa[y*s->wa_linesize + x];
                 const int weight_lut_idx = patch_diff_sq * s->pdiff_lut_scale;
                 const double weight = s->weight_lut[weight_lut_idx]; // exp(-patch_diff_sq * s->pdiff_scale)
-                wa->total_weight += weight;
-                wa->sum += weight * src[y*src_linesize + x];
+                wa[x].total_weight += weight;
+                wa[x].sum += weight * src[x];
             }
         }
     }
-- 
2.17.0



More information about the ffmpeg-devel mailing list