[FFmpeg-devel] [PATCH V3] lavf/vf_ocr: add subregion support
Steven Liu
lq at chinaffmpeg.org
Mon Jul 5 14:41:08 EEST 2021
> 2021年6月18日 下午11:56,Lingjiang Fang <vacingfang at foxmail.com> 写道:
>
>
> fix doc errors, ping for review, thanks :)
> ---
> doc/filters.texi | 8 ++++++++
> libavfilter/vf_ocr.c | 35 ++++++++++++++++++++++++++++++++++-
> 2 files changed, 42 insertions(+), 1 deletion(-)
>
> diff --git a/doc/filters.texi b/doc/filters.texi
> index da8f7d7726..041fd28c57 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -15451,6 +15451,14 @@ Set character whitelist.
>
> @item blacklist
> Set character blacklist.
> +
> + at item x, y
> +Set top-left corner of the subregion, in pixels, default is (0,0).
> +
> + at item w, h
> +Set width and height of the subregion, in pixels,
> +default is the bottom-right part from given top-left corner.
> +
> @end table
>
> The filter exports recognized text as the frame metadata @code{lavfi.ocr.text}.
> diff --git a/libavfilter/vf_ocr.c b/libavfilter/vf_ocr.c
> index 6de474025a..e96dce2d87 100644
> --- a/libavfilter/vf_ocr.c
> +++ b/libavfilter/vf_ocr.c
> @@ -33,6 +33,8 @@ typedef struct OCRContext {
> char *language;
> char *whitelist;
> char *blacklist;
> + int x, y;
> + int w, h;
>
> TessBaseAPI *tess;
> } OCRContext;
> @@ -45,6 +47,10 @@ static const AVOption ocr_options[] = {
> { "language", "set language", OFFSET(language), AV_OPT_TYPE_STRING, {.str="eng"}, 0, 0, FLAGS },
> { "whitelist", "set character whitelist", OFFSET(whitelist), AV_OPT_TYPE_STRING, {.str="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.:;,-+_!?\"'[]{}()<>|/\\=*&%$#@!~ "}, 0, 0, FLAGS },
> { "blacklist", "set character blacklist", OFFSET(blacklist), AV_OPT_TYPE_STRING, {.str=""}, 0, 0, FLAGS },
> + { "x", "top x of sub region", OFFSET(x), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS },
> + { "y", "top y of sub region", OFFSET(y), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS },
> + { "w", "width of sub region", OFFSET(w), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS },
> + { "h", "height of sub region", OFFSET(h), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS },
> { NULL }
> };
>
> @@ -93,6 +99,21 @@ static int query_formats(AVFilterContext *ctx)
> return ff_set_common_formats(ctx, fmts_list);
> }
>
> +static void check_fix(int *x, int *y, int *w, int *h, int pic_w, int pic_h)
> +{
> + // 0 <= x < pic_w
> + if (*x >= pic_w)
> + *x = 0;
> + // 0 <= y < pic_h
> + if (*y >= pic_h)
> + *y = 0;
> +
> + if (*w == 0 || *w + *x > pic_w)
> + *w = pic_w - *x;
> + if (*h == 0 || *h + *y > pic_h)
> + *h = pic_h - *y;
> +}
What about check whether the logo area fits in the frame when .config_props = config_input ?
> +
> static int filter_frame(AVFilterLink *inlink, AVFrame *in)
> {
> AVDictionary **metadata = &in->metadata;
> @@ -102,8 +123,20 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
> char *result;
> int *confs;
>
> + // TODO(vacing): support expression
> + int x = s->x;
> + int y = s->y;
> + int w = s->w;
> + int h = s->h;
> + check_fix(&x, &y, &w, &h, in->width, in->height);
> + if ( x != s->x || y != s->y ||
> + (s->w != 0 && w != s->w) || (s->h != 0 && h != s->h)) {
> + av_log(s, AV_LOG_WARNING, "config error, subregion changed to x=%d, y=%d, w=%d, h=%d\n",
> + x, y, w, h);
> + }
> +
> result = TessBaseAPIRect(s->tess, in->data[0], 1,
> - in->linesize[0], 0, 0, in->width, in->height);
> + in->linesize[0], x, y, w, h);
> confs = TessBaseAPIAllWordConfidences(s->tess);
> av_dict_set(metadata, "lavfi.ocr.text", result, 0);
> for (int i = 0; confs[i] != -1; i++) {
> --
> 2.29.2
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
>
Thanks
Steven Liu
More information about the ffmpeg-devel
mailing list