[FFmpeg-devel] [PATCH] lavf/vf_ocr: add subregion support
Gyan Doshi
ffmpeg at gyani.pro
Thu Jun 17 09:27:00 EEST 2021
On 2021-06-17 11:31, Lingjiang Fang wrote:
> ---
> doc/filters.texi | 7 +++++++
> libavfilter/vf_ocr.c | 30 +++++++++++++++++++++++++++++-
> 2 files changed, 36 insertions(+), 1 deletion(-)
>
> diff --git a/doc/filters.texi b/doc/filters.texi
> index da8f7d7726..9c650a2a5a 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -15451,6 +15451,13 @@ Set character whitelist.
>
> @item blacklist
> Set character blacklist.
> +
> + at item x, y
> +Set top point position of subregion, not support expression now
--> Set position of top-left corner, in pixels.
> +
> + at item w, h
> +Set Width and height of subregion
s/Width/width
> +
> @end table
>
> The filter exports recognized text as the frame metadata @code{lavfi.ocr.text}.
> diff --git a/libavfilter/vf_ocr.c b/libavfilter/vf_ocr.c
> index 6de474025a..7beb101679 100644
> --- a/libavfilter/vf_ocr.c
> +++ b/libavfilter/vf_ocr.c
> @@ -33,6 +33,8 @@ typedef struct OCRContext {
> char *language;
> char *whitelist;
> char *blacklist;
> + int x, y;
> + int w, h;
>
> TessBaseAPI *tess;
> } OCRContext;
> @@ -45,6 +47,10 @@ static const AVOption ocr_options[] = {
> { "language", "set language", OFFSET(language), AV_OPT_TYPE_STRING, {.str="eng"}, 0, 0, FLAGS },
> { "whitelist", "set character whitelist", OFFSET(whitelist), AV_OPT_TYPE_STRING, {.str="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.:;,-+_!?\"'[]{}()<>|/\\=*&%$#@!~ "}, 0, 0, FLAGS },
> { "blacklist", "set character blacklist", OFFSET(blacklist), AV_OPT_TYPE_STRING, {.str=""}, 0, 0, FLAGS },
> + { "x", "top x of sub region", OFFSET(x), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS },
> + { "y", "top y of sub region", OFFSET(y), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS },
> + { "w", "width of sub region", OFFSET(w), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS },
> + { "h", "height of sub region", OFFSET(h), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS },
> { NULL }
> };
>
> @@ -73,6 +79,19 @@ static av_cold int init(AVFilterContext *ctx)
> return 0;
> }
>
> +static int config_input(AVFilterLink *inlink)
> +{
> + OCRContext *s = inlink->dst->priv;
> +
> + // may call many times, we don't check w/h here
> + if (s->x < 0 || s->y < 0) {
> + s->x = 0;
> + s->y = 0;
These are AV_OPT_TYPE_INT with range set as 0 to INT_MAX, so the opt
parser should disallow negative values.
Regards,
Gyan
> + }
> +
> + return 0;
> +}
> +
> static int query_formats(AVFilterContext *ctx)
> {
> static const enum AVPixelFormat pix_fmts[] = {
> @@ -101,9 +120,17 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
> OCRContext *s = ctx->priv;
> char *result;
> int *confs;
> + int w = s->w;
> + int h = s->h;
> +
> + if (w <= 0 || h <= 0) {
> + w = in->width;
> + h = in->height;
> + }
>
> + av_log(s, AV_LOG_ERROR, "x=%d, y=%d, w=%d, h=%d\n", s->x, s->y, w, h);
> result = TessBaseAPIRect(s->tess, in->data[0], 1,
> - in->linesize[0], 0, 0, in->width, in->height);
> + in->linesize[0], s->x, s->y, w, h);
> confs = TessBaseAPIAllWordConfidences(s->tess);
> av_dict_set(metadata, "lavfi.ocr.text", result, 0);
> for (int i = 0; confs[i] != -1; i++) {
> @@ -134,6 +161,7 @@ static const AVFilterPad ocr_inputs[] = {
> .name = "default",
> .type = AVMEDIA_TYPE_VIDEO,
> .filter_frame = filter_frame,
> + .config_props = config_input,
> },
> { NULL }
> };
More information about the ffmpeg-devel
mailing list