[FFmpeg-devel] [PATCH] avcodec/encode: restructure the core encoding code

Tue May 26 17:35:39 EEST 2020

This commit follows the same logic as 061a0c14bb, but for the encode API: The
new public encoding API will no longer be a wrapper around the old deprecated
one, and the internal API used by the encoders now consists of a single
receive_packet() callback that pulls frames as required.

amf encoders adapted by James Almer
librav1e encoder adapted by James Almer
nvidia encoders adapted by James Almer
MediaFoundation encoders adapted by James Almer
vaapi encoders adapted by Linjie Fu
v4l2_m2m encoders adapted by Andriy Gelman

Signed-off-by: James Almer <jamrial at gmail.com>
---
 libavcodec/amfenc.c             |  43 ++---
 libavcodec/amfenc.h             |   2 -
 libavcodec/amfenc_h264.c        |   1 -
 libavcodec/amfenc_hevc.c        |   1 -
 libavcodec/avcodec.h            |  12 +-
 libavcodec/encode.c             | 286 ++++++++++++++++++++++++--------
 libavcodec/encode.h             |  39 +++++
 libavcodec/internal.h           |   7 +-
 libavcodec/librav1e.c           |  51 ++++--
 libavcodec/mfenc.c              |  58 ++++---
 libavcodec/nvenc.c              |  72 ++++----
 libavcodec/nvenc.h              |   9 +-
 libavcodec/nvenc_h264.c         |   6 -
 libavcodec/nvenc_hevc.c         |   4 -
 libavcodec/utils.c              |  10 +-
 libavcodec/v4l2_m2m.c           |   8 +
 libavcodec/v4l2_m2m.h           |   3 +
 libavcodec/v4l2_m2m_enc.c       |  15 +-
 libavcodec/vaapi_encode.c       |  26 ++-
 libavcodec/vaapi_encode.h       |   3 +-
 libavcodec/vaapi_encode_h264.c  |   1 -
 libavcodec/vaapi_encode_h265.c  |   1 -
 libavcodec/vaapi_encode_mjpeg.c |   1 -
 libavcodec/vaapi_encode_mpeg2.c |   1 -
 libavcodec/vaapi_encode_vp8.c   |   1 -
 libavcodec/vaapi_encode_vp9.c   |   1 -
 26 files changed, 442 insertions(+), 220 deletions(-)
 create mode 100644 libavcodec/encode.h

diff --git a/libavcodec/amfenc.c b/libavcodec/amfenc.c
index 876fddd2b6..da0652943d 100644
--- a/libavcodec/amfenc.c
+++ b/libavcodec/amfenc.c
@@ -33,6 +33,7 @@
 #include "libavutil/time.h"
 
 #include "amfenc.h"
+#include "encode.h"
 #include "internal.h"
 
 #if CONFIG_D3D11VA
@@ -588,17 +589,27 @@ static void amf_release_buffer_with_frame_ref(AMFBuffer *frame_ref_storage_buffe
     frame_ref_storage_buffer->pVtbl->Release(frame_ref_storage_buffer);
 }
 
-int ff_amf_send_frame(AVCodecContext *avctx, const AVFrame *frame)
+int ff_amf_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
 {
     AmfContext *ctx = avctx->priv_data;
     AMFSurface *surface;
     AMF_RESULT  res;
     int         ret;
+    AMF_RESULT  res_query;
+    AMFData    *data = NULL;
+    AVFrame    *frame = ctx->delayed_frame;
+    int         block_and_wait;
 
     if (!ctx->encoder)
         return AVERROR(EINVAL);
 
-    if (!frame) { // submit drain
+    if (!frame->buf[0]) {
+        ret = ff_encode_get_frame(avctx, frame);
+        if (ret < 0 && ret != AVERROR_EOF)
+            return ret;
+    }
+
+    if (!frame->buf[0]) { // submit drain
         if (!ctx->eof) { // submit drain one time only
             if (ctx->delayed_surface != NULL) {
                 ctx->delayed_drain = 1; // input queue is full: resubmit Drain() in ff_amf_receive_packet
@@ -613,15 +624,10 @@ int ff_amf_send_frame(AVCodecContext *avctx, const AVFrame *frame)
                     AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "Drain() failed with error %d\n", res);
                 }
             }
-        } else{
-            return AVERROR_EOF;
         }
-    } else { // submit frame
+    } else if (!ctx->delayed_surface) { // submit frame
         int hw_surface = 0;
 
-        if (ctx->delayed_surface != NULL) {
-            return AVERROR(EAGAIN); // should not happen when called from ffmpeg, other clients may resubmit
-        }
         // prepare surface from frame
         switch (frame->format) {
 #if CONFIG_D3D11VA
@@ -693,38 +699,23 @@ int ff_amf_send_frame(AVCodecContext *avctx, const AVFrame *frame)
             break;
         }
 
-
         // submit surface
         res = ctx->encoder->pVtbl->SubmitInput(ctx->encoder, (AMFData*)surface);
         if (res == AMF_INPUT_FULL) { // handle full queue
             //store surface for later submission
             ctx->delayed_surface = surface;
-            if (surface->pVtbl->GetMemoryType(surface) == AMF_MEMORY_DX11) {
-                av_frame_ref(ctx->delayed_frame, frame);
-            }
         } else {
+            int64_t pts = frame->pts;
             surface->pVtbl->Release(surface);
             AMF_RETURN_IF_FALSE(ctx, res == AMF_OK, AVERROR_UNKNOWN, "SubmitInput() failed with error %d\n", res);
 
-            if ((ret = timestamp_queue_enqueue(avctx, frame->pts)) < 0) {
+            av_frame_unref(frame);
+            if ((ret = timestamp_queue_enqueue(avctx, pts)) < 0) {
                 return ret;
             }
-
         }
     }
-    return 0;
-}
-int ff_amf_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
-{
-    int             ret;
-    AMF_RESULT      res;
-    AMF_RESULT      res_query;
-    AmfContext     *ctx = avctx->priv_data;
-    AMFData        *data = NULL;
-    int             block_and_wait;
 
-    if (!ctx->encoder)
-        return AVERROR(EINVAL);
 
     do {
         block_and_wait = 0;
diff --git a/libavcodec/amfenc.h b/libavcodec/amfenc.h
index b1361842bd..80658c6b2a 100644
--- a/libavcodec/amfenc.h
+++ b/libavcodec/amfenc.h
@@ -129,8 +129,6 @@ int ff_amf_encode_close(AVCodecContext *avctx);
 /**
 * Ecoding one frame - common function for all AMF encoders
 */
-
-int ff_amf_send_frame(AVCodecContext *avctx, const AVFrame *frame);
 int ff_amf_receive_packet(AVCodecContext *avctx, AVPacket *avpkt);
 
 /**
diff --git a/libavcodec/amfenc_h264.c b/libavcodec/amfenc_h264.c
index 7f2817f115..7a8029f3b7 100644
--- a/libavcodec/amfenc_h264.c
+++ b/libavcodec/amfenc_h264.c
@@ -383,7 +383,6 @@ AVCodec ff_h264_amf_encoder = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_H264,
     .init           = amf_encode_init_h264,
-    .send_frame     = ff_amf_send_frame,
     .receive_packet = ff_amf_receive_packet,
     .close          = ff_amf_encode_close,
     .priv_data_size = sizeof(AmfContext),
diff --git a/libavcodec/amfenc_hevc.c b/libavcodec/amfenc_hevc.c
index 77e57d2461..8b2302f2af 100644
--- a/libavcodec/amfenc_hevc.c
+++ b/libavcodec/amfenc_hevc.c
@@ -313,7 +313,6 @@ AVCodec ff_hevc_amf_encoder = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_HEVC,
     .init           = amf_encode_init_hevc,
-    .send_frame     = ff_amf_send_frame,
     .receive_packet = ff_amf_receive_packet,
     .close          = ff_amf_encode_close,
     .priv_data_size = sizeof(AmfContext),
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index f10b7a06ec..d283b8f6cb 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -2692,14 +2692,10 @@ typedef struct AVCodec {
     int (*decode)(AVCodecContext *, void *outdata, int *outdata_size, AVPacket *avpkt);
     int (*close)(AVCodecContext *);
     /**
-     * Encode API with decoupled packet/frame dataflow. The API is the
-     * same as the avcodec_ prefixed APIs (avcodec_send_frame() etc.), except
-     * that:
-     * - never called if the codec is closed or the wrong type,
-     * - if AV_CODEC_CAP_DELAY is not set, drain frames are never sent,
-     * - only one drain frame is ever passed down,
-     */
-    int (*send_frame)(AVCodecContext *avctx, const AVFrame *frame);
+     * Encode API with decoupled frame/packet dataflow. This function is called
+     * to get one output packet. It should call ff_encode_get_frame() to obtain
+     * input data.
+     */
     int (*receive_packet)(AVCodecContext *avctx, AVPacket *avpkt);
 
     /**
diff --git a/libavcodec/encode.c b/libavcodec/encode.c
index b850f86d0c..e70f4c2518 100644
--- a/libavcodec/encode.c
+++ b/libavcodec/encode.c
@@ -26,6 +26,7 @@
 #include "libavutil/samplefmt.h"
 
 #include "avcodec.h"
+#include "encode.h"
 #include "frame_thread_encoder.h"
 #include "internal.h"
 
@@ -78,14 +79,10 @@ int ff_alloc_packet(AVPacket *avpkt, int size)
 /**
  * Pad last frame with silence.
  */
-static int pad_last_frame(AVCodecContext *s, AVFrame **dst, const AVFrame *src)
+static int pad_last_frame(AVCodecContext *s, AVFrame *frame, const AVFrame *src)
 {
-    AVFrame *frame = NULL;
     int ret;
 
-    if (!(frame = av_frame_alloc()))
-        return AVERROR(ENOMEM);
-
     frame->format         = src->format;
     frame->channel_layout = src->channel_layout;
     frame->channels       = src->channels;
@@ -106,12 +103,10 @@ static int pad_last_frame(AVCodecContext *s, AVFrame **dst, const AVFrame *src)
                                       s->channels, s->sample_fmt)) < 0)
         goto fail;
 
-    *dst = frame;
-
     return 0;
 
 fail:
-    av_frame_free(&frame);
+    av_frame_unref(frame);
     return ret;
 }
 
@@ -182,7 +177,11 @@ int attribute_align_arg avcodec_encode_audio2(AVCodecContext *avctx,
             }
 
             if (frame->nb_samples < avctx->frame_size) {
-                ret = pad_last_frame(avctx, &padded_frame, frame);
+                if (!(padded_frame = av_frame_alloc())) {
+                    ret = AVERROR(ENOMEM);
+                    goto end;
+                }
+                ret = pad_last_frame(avctx, padded_frame, frame);
                 if (ret < 0)
                     goto end;
 
@@ -363,101 +362,252 @@ int avcodec_encode_subtitle(AVCodecContext *avctx, uint8_t *buf, int buf_size,
     return ret;
 }
 
-static int do_encode(AVCodecContext *avctx, const AVFrame *frame, int *got_packet)
+int ff_encode_get_frame(AVCodecContext *avctx, AVFrame *frame)
+{
+    AVCodecInternal *avci = avctx->internal;
+
+    if (avci->draining)
+        return AVERROR_EOF;
+
+    if (!avci->buffer_frame->buf[0])
+        return AVERROR(EAGAIN);
+
+    av_frame_move_ref(frame, avci->buffer_frame);
+
+    return 0;
+}
+
+static int encode_simple_internal(AVCodecContext *avctx, AVPacket *avpkt)
 {
+    AVCodecInternal   *avci = avctx->internal;
+    EncodeSimpleContext *es = &avci->es;
+    AVFrame          *frame = es->in_frame;
+    int got_packet;
     int ret;
-    *got_packet = 0;
 
-    av_packet_unref(avctx->internal->buffer_pkt);
-    avctx->internal->buffer_pkt_valid = 0;
+    if (avci->draining_done)
+        return AVERROR_EOF;
 
-    if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
-        ret = avcodec_encode_video2(avctx, avctx->internal->buffer_pkt,
-                                    frame, got_packet);
-    } else if (avctx->codec_type == AVMEDIA_TYPE_AUDIO) {
-        ret = avcodec_encode_audio2(avctx, avctx->internal->buffer_pkt,
-                                    frame, got_packet);
-    } else {
-        ret = AVERROR(EINVAL);
+    if (!frame->buf[0] && !avci->draining) {
+        av_frame_unref(frame);
+        ret = ff_encode_get_frame(avctx, frame);
+        if (ret < 0 && ret != AVERROR_EOF)
+            return ret;
+    }
+
+    if (!frame->buf[0]) {
+        if (!(avctx->codec->capabilities & AV_CODEC_CAP_DELAY ||
+              (avci->frame_thread_encoder && avctx->active_thread_type & FF_THREAD_FRAME)))
+            return AVERROR_EOF;
+
+        // Flushing is signaled with a NULL frame
+        frame = NULL;
+    }
+
+    got_packet = 0;
+
+    av_assert0(avctx->codec->encode2);
+
+    if (CONFIG_FRAME_THREAD_ENCODER &&
+        avci->frame_thread_encoder && (avctx->active_thread_type & FF_THREAD_FRAME))
+        ret = ff_thread_video_encode_frame(avctx, avpkt, frame, &got_packet);
+    else {
+        ret = avctx->codec->encode2(avctx, avpkt, frame, &got_packet);
+        if (avctx->codec->type == AVMEDIA_TYPE_VIDEO && !ret && got_packet &&
+            !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
+            avpkt->pts = avpkt->dts = frame->pts;
     }
 
-    if (ret >= 0 && *got_packet) {
+    av_assert0(ret <= 0);
+
+    emms_c();
+
+    if (!ret && got_packet) {
+        if (avpkt->data) {
+            ret = av_packet_make_refcounted(avpkt);
+            if (ret < 0)
+                goto end;
+        }
+
+        if (frame && !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY)) {
+            if (avctx->codec->type == AVMEDIA_TYPE_AUDIO) {
+                if (avpkt->pts == AV_NOPTS_VALUE)
+                    avpkt->pts = frame->pts;
+                if (!avpkt->duration)
+                    avpkt->duration = ff_samples_to_time_base(avctx,
+                                                              frame->nb_samples);
+            }
+        }
+        if (avctx->codec->type == AVMEDIA_TYPE_AUDIO) {
+            /* NOTE: if we add any audio encoders which output non-keyframe packets,
+             *       this needs to be moved to the encoders, but for now we can do it
+             *       here to simplify things */
+            avpkt->flags |= AV_PKT_FLAG_KEY;
+            avpkt->dts = avpkt->pts;
+        }
+    }
+
+    if (avci->draining && !got_packet)
+        avci->draining_done = 1;
+
+end:
+    if (ret < 0 || !got_packet)
+        av_packet_unref(avpkt);
+
+    if (frame) {
+        if (!ret)
+            avctx->frame_number++;
+        av_frame_unref(frame);
+    }
+
+    if (got_packet)
         // Encoders must always return ref-counted buffers.
         // Side-data only packets have no data and can be not ref-counted.
-        av_assert0(!avctx->internal->buffer_pkt->data || avctx->internal->buffer_pkt->buf);
-        avctx->internal->buffer_pkt_valid = 1;
-        ret = 0;
-    } else {
-        av_packet_unref(avctx->internal->buffer_pkt);
+        av_assert0(!avpkt->data || avpkt->buf);
+
+    return ret;
+}
+
+static int encode_simple_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
+{
+    int ret;
+
+    while (!avpkt->data && !avpkt->side_data) {
+        ret = encode_simple_internal(avctx, avpkt);
+        if (ret < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
+static int encode_receive_packet_internal(AVCodecContext *avctx, AVPacket *avpkt)
+{
+    AVCodecInternal *avci = avctx->internal;
+    int ret;
+
+    if (avci->draining_done)
+        return AVERROR_EOF;
+
+    av_assert0(!avpkt->data && !avpkt->side_data);
+
+    if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
+        if ((avctx->flags & AV_CODEC_FLAG_PASS1) && avctx->stats_out)
+            avctx->stats_out[0] = '\0';
+        if (av_image_check_size2(avctx->width, avctx->height, avctx->max_pixels, AV_PIX_FMT_NONE, 0, avctx))
+            return AVERROR(EINVAL);
     }
 
+    if (avctx->codec->receive_packet) {
+        ret = avctx->codec->receive_packet(avctx, avpkt);
+        if (!ret)
+            // Encoders must always return ref-counted buffers.
+            // Side-data only packets have no data and can be not ref-counted.
+            av_assert0(!avpkt->data || avpkt->buf);
+    } else
+        ret = encode_simple_receive_packet(avctx, avpkt);
+
+    if (ret == AVERROR_EOF)
+        avci->draining_done = 1;
+
     return ret;
 }
 
+static int encode_send_frame_internal(AVCodecContext *avctx, const AVFrame *src)
+{
+    AVCodecInternal *avci = avctx->internal;
+    AVFrame *dst = avci->buffer_frame;
+    int ret;
+
+    if (avctx->codec->type == AVMEDIA_TYPE_AUDIO) {
+        /* extract audio service type metadata */
+        AVFrameSideData *sd = av_frame_get_side_data(src, AV_FRAME_DATA_AUDIO_SERVICE_TYPE);
+        if (sd && sd->size >= sizeof(enum AVAudioServiceType))
+            avctx->audio_service_type = *(enum AVAudioServiceType*)sd->data;
+
+        /* check for valid frame size */
+        if (avctx->codec->capabilities & AV_CODEC_CAP_SMALL_LAST_FRAME) {
+            if (src->nb_samples > avctx->frame_size) {
+                av_log(avctx, AV_LOG_ERROR, "more samples than frame size\n");
+                return AVERROR(EINVAL);
+            }
+        } else if (!(avctx->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE)) {
+            /* if we already got an undersized frame, that must have been the last */
+            if (avctx->internal->last_audio_frame) {
+                av_log(avctx, AV_LOG_ERROR, "frame_size (%d) was not respected for a non-last frame\n", avctx->frame_size);
+                return AVERROR(EINVAL);
+            }
+
+            if (src->nb_samples < avctx->frame_size) {
+                ret = pad_last_frame(avctx, dst, src);
+                if (ret < 0)
+                    return ret;
+
+                avctx->internal->last_audio_frame = 1;
+            } else if (src->nb_samples > avctx->frame_size) {
+                av_log(avctx, AV_LOG_ERROR, "nb_samples (%d) != frame_size (%d)\n", src->nb_samples, avctx->frame_size);
+                return AVERROR(EINVAL);
+            }
+        }
+    }
+
+    if (!dst->data[0]) {
+        ret = av_frame_ref(dst, src);
+        if (ret < 0)
+             return ret;
+    }
+
+    return 0;
+}
+
 int attribute_align_arg avcodec_send_frame(AVCodecContext *avctx, const AVFrame *frame)
 {
+    AVCodecInternal *avci = avctx->internal;
+    int ret;
+
     if (!avcodec_is_open(avctx) || !av_codec_is_encoder(avctx->codec))
         return AVERROR(EINVAL);
 
-    if (avctx->internal->draining)
+    if (avci->draining)
         return AVERROR_EOF;
 
-    if (!frame) {
-        avctx->internal->draining = 1;
+    if (avci->buffer_frame->data[0])
+        return AVERROR(EAGAIN);
 
-        if (!(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
-            return 0;
+    if (!frame) {
+        avci->draining = 1;
+    } else {
+        ret = encode_send_frame_internal(avctx, frame);
+        if (ret < 0)
+            return ret;
     }
 
-    if (avctx->codec->send_frame)
-        return avctx->codec->send_frame(avctx, frame);
-
-    // Emulation via old API. Do it here instead of avcodec_receive_packet, because:
-    // 1. if the AVFrame is not refcounted, the copying will be much more
-    //    expensive than copying the packet data
-    // 2. assume few users use non-refcounted AVPackets, so usually no copy is
-    //    needed
-
-    if (avctx->internal->buffer_pkt_valid)
-        return AVERROR(EAGAIN);
+    if (!avci->buffer_pkt->data && !avci->buffer_pkt->side_data) {
+        ret = encode_receive_packet_internal(avctx, avci->buffer_pkt);
+        if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
+            return ret;
+    }
 
-    return do_encode(avctx, frame, &(int){0});
+    return 0;
 }
 
 int attribute_align_arg avcodec_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
 {
+    AVCodecInternal *avci = avctx->internal;
+    int ret;
+
     av_packet_unref(avpkt);
 
     if (!avcodec_is_open(avctx) || !av_codec_is_encoder(avctx->codec))
         return AVERROR(EINVAL);
 
-    if (avctx->codec->receive_packet) {
-        int ret;
-        if (avctx->internal->draining && !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
-            return AVERROR_EOF;
-        ret = avctx->codec->receive_packet(avctx, avpkt);
-        if (!ret)
-            // Encoders must always return ref-counted buffers.
-            // Side-data only packets have no data and can be not ref-counted.
-            av_assert0(!avpkt->data || avpkt->buf);
-        return ret;
-    }
-
-    // Emulation via old API.
-
-    if (!avctx->internal->buffer_pkt_valid) {
-        int got_packet;
-        int ret;
-        if (!avctx->internal->draining)
-            return AVERROR(EAGAIN);
-        ret = do_encode(avctx, NULL, &got_packet);
+    if (avci->buffer_pkt->data || avci->buffer_pkt->side_data) {
+        av_packet_move_ref(avpkt, avci->buffer_pkt);
+    } else {
+        ret = encode_receive_packet_internal(avctx, avpkt);
         if (ret < 0)
             return ret;
-        if (ret >= 0 && !got_packet)
-            return AVERROR_EOF;
     }
 
-    av_packet_move_ref(avpkt, avctx->internal->buffer_pkt);
-    avctx->internal->buffer_pkt_valid = 0;
     return 0;
 }
diff --git a/libavcodec/encode.h b/libavcodec/encode.h
new file mode 100644
index 0000000000..dfa9cb2d97
--- /dev/null
+++ b/libavcodec/encode.h
@@ -0,0 +1,39 @@
+/*
+ * generic encoding-related code
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ENCODE_H
+#define AVCODEC_ENCODE_H
+
+#include "libavutil/frame.h"
+
+#include "avcodec.h"
+
+/**
+ * Called by encoders to get the next frame for encoding.
+ *
+ * @param frame An empty frame to be filled with data.
+ * @return 0 if a new reference has been successfully written to frame
+ *         AVERROR(EAGAIN) if no data is currently available
+ *         AVERROR_EOF if end of stream has been reached, so no more data
+ *                     will be available
+ */
+int ff_encode_get_frame(AVCodecContext *avctx, AVFrame *frame);
+
+#endif /* AVCODEC_ENCODE_H */
diff --git a/libavcodec/internal.h b/libavcodec/internal.h
index 0e3415d69b..0b0888265f 100644
--- a/libavcodec/internal.h
+++ b/libavcodec/internal.h
@@ -113,6 +113,10 @@ typedef struct DecodeSimpleContext {
     AVFrame  *out_frame;
 } DecodeSimpleContext;
 
+typedef struct EncodeSimpleContext {
+    AVFrame *in_frame;
+} EncodeSimpleContext;
+
 typedef struct AVCodecInternal {
     /**
      * Whether the parent AVCodecContext is a copy of the context which had
@@ -151,6 +155,8 @@ typedef struct AVCodecInternal {
 
     void *frame_thread_encoder;
 
+    EncodeSimpleContext es;
+
     /**
      * Number of audio samples to skip at the start of the next decoded frame
      */
@@ -170,7 +176,6 @@ typedef struct AVCodecInternal {
      * buffers for using new encode/decode API through legacy API
      */
     AVPacket *buffer_pkt;
-    int buffer_pkt_valid; // encoding: packet without data can be valid
     AVFrame *buffer_frame;
     int draining_done;
     int compat_decode_warned;
diff --git a/libavcodec/librav1e.c b/libavcodec/librav1e.c
index 6f9b4cce4c..e9b82a724a 100644
--- a/libavcodec/librav1e.c
+++ b/libavcodec/librav1e.c
@@ -30,12 +30,15 @@
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 #include "avcodec.h"
+#include "encode.h"
 #include "internal.h"
 
 typedef struct librav1eContext {
     const AVClass *class;
 
     RaContext *ctx;
+    AVFrame *frame;
+    RaFrame *rframe;
     AVBSFContext *bsf;
 
     uint8_t *pass_data;
@@ -165,7 +168,12 @@ static av_cold int librav1e_encode_close(AVCodecContext *avctx)
         rav1e_context_unref(ctx->ctx);
         ctx->ctx = NULL;
     }
+    if (ctx->rframe) {
+        rav1e_frame_unref(ctx->rframe);
+        ctx->rframe = NULL;
+    }
 
+    av_frame_free(&ctx->frame);
     av_bsf_free(&ctx->bsf);
     av_freep(&ctx->pass_data);
 
@@ -180,6 +188,10 @@ static av_cold int librav1e_encode_init(AVCodecContext *avctx)
     int rret;
     int ret = 0;
 
+    ctx->frame = av_frame_alloc();
+    if (!ctx->frame)
+        return AVERROR(ENOMEM);
+
     cfg = rav1e_config_default();
     if (!cfg) {
         av_log(avctx, AV_LOG_ERROR, "Could not allocate rav1e config.\n");
@@ -416,18 +428,27 @@ end:
     return ret;
 }
 
-static int librav1e_send_frame(AVCodecContext *avctx, const AVFrame *frame)
+static int librav1e_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
 {
     librav1eContext *ctx = avctx->priv_data;
-    RaFrame *rframe = NULL;
+    RaFrame *rframe = ctx->rframe;
+    RaPacket *rpkt = NULL;
     int ret;
 
-    if (frame) {
+    if (!rframe) {
+        AVFrame *frame = ctx->frame;
+
+        ret = ff_encode_get_frame(avctx, frame);
+        if (ret < 0 && ret != AVERROR_EOF)
+            return ret;
+
+        if (frame->buf[0]) {
         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
 
         rframe = rav1e_frame_new(ctx->ctx);
         if (!rframe) {
             av_log(avctx, AV_LOG_ERROR, "Could not allocate new rav1e frame.\n");
+            av_frame_unref(frame);
             return AVERROR(ENOMEM);
         }
 
@@ -438,17 +459,23 @@ static int librav1e_send_frame(AVCodecContext *avctx, const AVFrame *frame)
                                    (frame->height >> shift) * frame->linesize[i],
                                    frame->linesize[i], bytes);
         }
+        av_frame_unref(frame);
+        }
     }
 
     ret = rav1e_send_frame(ctx->ctx, rframe);
     if (rframe)
+        if (ret == RA_ENCODER_STATUS_ENOUGH_DATA) {
+            ctx->rframe = rframe; /* Queue is full. Store the RaFrame to retry next call */
+        } else {
          rav1e_frame_unref(rframe); /* No need to unref if flushing. */
+            ctx->rframe = NULL;
+        }
 
     switch (ret) {
     case RA_ENCODER_STATUS_SUCCESS:
-        break;
     case RA_ENCODER_STATUS_ENOUGH_DATA:
-        return AVERROR(EAGAIN);
+        break;
     case RA_ENCODER_STATUS_FAILURE:
         av_log(avctx, AV_LOG_ERROR, "Could not send frame: %s\n", rav1e_status_to_str(ret));
         return AVERROR_EXTERNAL;
@@ -457,15 +484,6 @@ static int librav1e_send_frame(AVCodecContext *avctx, const AVFrame *frame)
         return AVERROR_UNKNOWN;
     }
 
-    return 0;
-}
-
-static int librav1e_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
-{
-    librav1eContext *ctx = avctx->priv_data;
-    RaPacket *rpkt = NULL;
-    int ret;
-
 retry:
 
     if (avctx->flags & AV_CODEC_FLAG_PASS1) {
@@ -490,9 +508,7 @@ retry:
         }
         return AVERROR_EOF;
     case RA_ENCODER_STATUS_ENCODED:
-        if (avctx->internal->draining)
-            goto retry;
-        return AVERROR(EAGAIN);
+        goto retry;
     case RA_ENCODER_STATUS_NEED_MORE_DATA:
         if (avctx->internal->draining) {
             av_log(avctx, AV_LOG_ERROR, "Unexpected error when receiving packet after EOF.\n");
@@ -592,7 +608,6 @@ AVCodec ff_librav1e_encoder = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_AV1,
     .init           = librav1e_encode_init,
-    .send_frame     = librav1e_send_frame,
     .receive_packet = librav1e_receive_packet,
     .close          = librav1e_encode_close,
     .priv_data_size = sizeof(librav1eContext),
diff --git a/libavcodec/mfenc.c b/libavcodec/mfenc.c
index 83f26b3cc6..913e9a23ea 100644
--- a/libavcodec/mfenc.c
+++ b/libavcodec/mfenc.c
@@ -22,6 +22,7 @@
 #define _WIN32_WINNT 0x0602
 #endif
 
+#include "encode.h"
 #include "mf_utils.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/opt.h"
@@ -32,6 +33,7 @@
 
 typedef struct MFContext {
     AVClass *av_class;
+    AVFrame *frame;
     int is_video, is_audio;
     GUID main_subtype;
     IMFTransform *mft;
@@ -400,26 +402,6 @@ static int mf_send_sample(AVCodecContext *avctx, IMFSample *sample)
     return 0;
 }
 
-static int mf_send_frame(AVCodecContext *avctx, const AVFrame *frame)
-{
-    MFContext *c = avctx->priv_data;
-    int ret;
-    IMFSample *sample = NULL;
-    if (frame) {
-        sample = mf_avframe_to_sample(avctx, frame);
-        if (!sample)
-            return AVERROR(ENOMEM);
-        if (c->is_video && c->codec_api) {
-            if (frame->pict_type == AV_PICTURE_TYPE_I || !c->sample_sent)
-                ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncVideoForceKeyFrame, FF_VAL_VT_UI4(1));
-        }
-    }
-    ret = mf_send_sample(avctx, sample);
-    if (sample)
-        IMFSample_Release(sample);
-    return ret;
-}
-
 static int mf_receive_sample(AVCodecContext *avctx, IMFSample **out_sample)
 {
     MFContext *c = avctx->priv_data;
@@ -502,9 +484,36 @@ static int mf_receive_sample(AVCodecContext *avctx, IMFSample **out_sample)
 
 static int mf_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
 {
-    IMFSample *sample;
+    MFContext *c = avctx->priv_data;
+    IMFSample *sample = NULL;
     int ret;
 
+    if (!c->frame->buf[0]) {
+        ret = ff_encode_get_frame(avctx, c->frame);
+        if (ret < 0 && ret != AVERROR_EOF)
+            return ret;
+    }
+
+    if (c->frame->buf[0]) {
+        sample = mf_avframe_to_sample(avctx, c->frame);
+        if (!sample) {
+            av_frame_unref(c->frame);
+            return AVERROR(ENOMEM);
+        }
+        if (c->is_video && c->codec_api) {
+            if (c->frame->pict_type == AV_PICTURE_TYPE_I || !c->sample_sent)
+                ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncVideoForceKeyFrame, FF_VAL_VT_UI4(1));
+        }
+    }
+
+    ret = mf_send_sample(avctx, sample);
+    if (sample)
+        IMFSample_Release(sample);
+    if (ret != AVERROR(EAGAIN))
+        av_frame_unref(c->frame);
+    if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
+        return ret;
+
     ret = mf_receive_sample(avctx, &sample);
     if (ret < 0)
         return ret;
@@ -1036,6 +1045,10 @@ static int mf_init(AVCodecContext *avctx)
     const CLSID *subtype = ff_codec_to_mf_subtype(avctx->codec_id);
     int use_hw = 0;
 
+    c->frame = av_frame_alloc();
+    if (!c->frame)
+        return AVERROR(ENOMEM);
+
     c->is_audio = avctx->codec_type == AVMEDIA_TYPE_AUDIO;
     c->is_video = !c->is_audio;
     c->reorder_delay = AV_NOPTS_VALUE;
@@ -1124,6 +1137,8 @@ static int mf_close(AVCodecContext *avctx)
 
     ff_free_mf(&c->mft);
 
+    av_frame_free(&c->frame);
+
     av_freep(&avctx->extradata);
     avctx->extradata_size = 0;
 
@@ -1148,7 +1163,6 @@ static int mf_close(AVCodecContext *avctx)
         .priv_data_size = sizeof(MFContext),                                   \
         .init           = mf_init,                                             \
         .close          = mf_close,                                            \
-        .send_frame     = mf_send_frame,                                       \
         .receive_packet = mf_receive_packet,                                   \
         EXTRA                                                                  \
         .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HYBRID,            \
diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
index cbdddd33f9..8dfff5e5d8 100644
--- a/libavcodec/nvenc.c
+++ b/libavcodec/nvenc.c
@@ -30,6 +30,7 @@
 #include "libavutil/avassert.h"
 #include "libavutil/mem.h"
 #include "libavutil/pixdesc.h"
+#include "encode.h"
 #include "internal.h"
 
 #define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, dl_fn->cuda_dl, x)
@@ -1503,6 +1504,8 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx)
     av_freep(&ctx->surfaces);
     ctx->nb_surfaces = 0;
 
+    av_frame_free(&ctx->frame);
+
     if (ctx->nvencoder) {
         p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
 
@@ -1556,6 +1559,10 @@ av_cold int ff_nvenc_encode_init(AVCodecContext *avctx)
         ctx->data_pix_fmt = avctx->pix_fmt;
     }
 
+    ctx->frame = av_frame_alloc();
+    if (!ctx->frame)
+        return AVERROR(ENOMEM);
+
     if ((ret = nvenc_load_libraries(avctx)) < 0)
         return ret;
 
@@ -1873,9 +1880,7 @@ static int process_output_surface(AVCodecContext *avctx, AVPacket *pkt, NvencSur
         goto error;
     }
 
-    res = pkt->data ?
-        ff_alloc_packet2(avctx, pkt, lock_params.bitstreamSizeInBytes, lock_params.bitstreamSizeInBytes) :
-        av_new_packet(pkt, lock_params.bitstreamSizeInBytes);
+    res = av_new_packet(pkt, lock_params.bitstreamSizeInBytes);
 
     if (res < 0) {
         p_nvenc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->output_surface);
@@ -2061,7 +2066,7 @@ static void reconfig_encoder(AVCodecContext *avctx, const AVFrame *frame)
     }
 }
 
-int ff_nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
+static int nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
 {
     NVENCSTATUS nv_status;
     NvencSurface *tmp_out_surf, *in_surf;
@@ -2079,15 +2084,7 @@ int ff_nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
     if ((!ctx->cu_context && !ctx->d3d11_device) || !ctx->nvencoder)
         return AVERROR(EINVAL);
 
-    if (ctx->encoder_flushing) {
-        if (avctx->internal->draining)
-            return AVERROR_EOF;
-
-        ctx->encoder_flushing = 0;
-        av_fifo_reset(ctx->timestamp_list);
-    }
-
-    if (frame) {
+    if (frame && frame->buf[0]) {
         in_surf = get_free_frame(ctx);
         if (!in_surf)
             return AVERROR(EAGAIN);
@@ -2147,7 +2144,6 @@ int ff_nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
         nvenc_codec_specific_pic_params(avctx, &pic_params, sei_data);
     } else {
         pic_params.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
-        ctx->encoder_flushing = 1;
     }
 
     res = nvenc_push_context(avctx);
@@ -2165,7 +2161,7 @@ int ff_nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
         nv_status != NV_ENC_ERR_NEED_MORE_INPUT)
         return nvenc_print_error(avctx, nv_status, "EncodePicture failed!");
 
-    if (frame) {
+    if (frame && frame->buf[0]) {
         av_fifo_generic_write(ctx->output_surface_queue, &in_surf, sizeof(in_surf), NULL);
         timestamp_queue_enqueue(ctx->timestamp_list, frame->pts);
     }
@@ -2188,10 +2184,25 @@ int ff_nvenc_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
 
     NvencContext *ctx = avctx->priv_data;
 
+    AVFrame *frame = ctx->frame;
+
     if ((!ctx->cu_context && !ctx->d3d11_device) || !ctx->nvencoder)
         return AVERROR(EINVAL);
 
-    if (output_ready(avctx, ctx->encoder_flushing)) {
+    if (!frame->buf[0]) {
+        res = ff_encode_get_frame(avctx, frame);
+        if (res < 0 && res != AVERROR_EOF)
+            return res;
+    }
+
+    res = nvenc_send_frame(avctx, frame);
+    if (res < 0) {
+        if (res != AVERROR(EAGAIN))
+            return res;
+    } else
+        av_frame_unref(frame);
+
+    if (output_ready(avctx, avctx->internal->draining)) {
         av_fifo_generic_read(ctx->output_surface_ready_queue, &tmp_out_surf, sizeof(tmp_out_surf), NULL);
 
         res = nvenc_push_context(avctx);
@@ -2208,7 +2219,7 @@ int ff_nvenc_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
             return res;
 
         av_fifo_generic_write(ctx->unused_surface_queue, &tmp_out_surf, sizeof(tmp_out_surf), NULL);
-    } else if (ctx->encoder_flushing) {
+    } else if (avctx->internal->draining) {
         return AVERROR_EOF;
     } else {
         return AVERROR(EAGAIN);
@@ -2217,31 +2228,10 @@ int ff_nvenc_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
     return 0;
 }
 
-int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
-                          const AVFrame *frame, int *got_packet)
+av_cold void ff_nvenc_encode_flush(AVCodecContext *avctx)
 {
     NvencContext *ctx = avctx->priv_data;
-    int res;
-
-    if (!ctx->encoder_flushing) {
-        res = ff_nvenc_send_frame(avctx, frame);
-        if (res < 0)
-            return res;
-    }
-
-    res = ff_nvenc_receive_packet(avctx, pkt);
-    if (res == AVERROR(EAGAIN) || res == AVERROR_EOF) {
-        *got_packet = 0;
-    } else if (res < 0) {
-        return res;
-    } else {
-        *got_packet = 1;
-    }
 
-    return 0;
-}
-
-av_cold void ff_nvenc_encode_flush(AVCodecContext *avctx)
-{
-    ff_nvenc_send_frame(avctx, NULL);
+    nvenc_send_frame(avctx, NULL);
+    av_fifo_reset(ctx->timestamp_list);
 }
diff --git a/libavcodec/nvenc.h b/libavcodec/nvenc.h
index 7a415a4835..007721aa66 100644
--- a/libavcodec/nvenc.h
+++ b/libavcodec/nvenc.h
@@ -138,6 +138,8 @@ typedef struct NvencContext
     CUstream cu_stream;
     ID3D11Device *d3d11_device;
 
+    AVFrame *frame;
+
     int nb_surfaces;
     NvencSurface *surfaces;
 
@@ -146,8 +148,6 @@ typedef struct NvencContext
     AVFifoBuffer *output_surface_ready_queue;
     AVFifoBuffer *timestamp_list;
 
-    int encoder_flushing;
-
     struct {
         void *ptr;
         int ptr_index;
@@ -203,13 +203,8 @@ int ff_nvenc_encode_init(AVCodecContext *avctx);
 
 int ff_nvenc_encode_close(AVCodecContext *avctx);
 
-int ff_nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame);
-
 int ff_nvenc_receive_packet(AVCodecContext *avctx, AVPacket *pkt);
 
-int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
-                          const AVFrame *frame, int *got_packet);
-
 void ff_nvenc_encode_flush(AVCodecContext *avctx);
 
 extern const enum AVPixelFormat ff_nvenc_pix_fmts[];
diff --git a/libavcodec/nvenc_h264.c b/libavcodec/nvenc_h264.c
index c877cf4a30..e973e9ee92 100644
--- a/libavcodec/nvenc_h264.c
+++ b/libavcodec/nvenc_h264.c
@@ -180,9 +180,7 @@ AVCodec ff_nvenc_encoder = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_H264,
     .init           = nvenc_old_init,
-    .send_frame     = ff_nvenc_send_frame,
     .receive_packet = ff_nvenc_receive_packet,
-    .encode2        = ff_nvenc_encode_frame,
     .close          = ff_nvenc_encode_close,
     .flush          = ff_nvenc_encode_flush,
     .priv_data_size = sizeof(NvencContext),
@@ -212,9 +210,7 @@ AVCodec ff_nvenc_h264_encoder = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_H264,
     .init           = nvenc_old_init,
-    .send_frame     = ff_nvenc_send_frame,
     .receive_packet = ff_nvenc_receive_packet,
-    .encode2        = ff_nvenc_encode_frame,
     .close          = ff_nvenc_encode_close,
     .flush          = ff_nvenc_encode_flush,
     .priv_data_size = sizeof(NvencContext),
@@ -244,9 +240,7 @@ AVCodec ff_h264_nvenc_encoder = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_H264,
     .init           = ff_nvenc_encode_init,
-    .send_frame     = ff_nvenc_send_frame,
     .receive_packet = ff_nvenc_receive_packet,
-    .encode2        = ff_nvenc_encode_frame,
     .close          = ff_nvenc_encode_close,
     .flush          = ff_nvenc_encode_flush,
     .priv_data_size = sizeof(NvencContext),
diff --git a/libavcodec/nvenc_hevc.c b/libavcodec/nvenc_hevc.c
index 7f12b56a46..434dc6679e 100644
--- a/libavcodec/nvenc_hevc.c
+++ b/libavcodec/nvenc_hevc.c
@@ -168,9 +168,7 @@ AVCodec ff_nvenc_hevc_encoder = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_HEVC,
     .init           = nvenc_old_init,
-    .send_frame     = ff_nvenc_send_frame,
     .receive_packet = ff_nvenc_receive_packet,
-    .encode2        = ff_nvenc_encode_frame,
     .close          = ff_nvenc_encode_close,
     .priv_data_size = sizeof(NvencContext),
     .priv_class     = &nvenc_hevc_class,
@@ -198,9 +196,7 @@ AVCodec ff_hevc_nvenc_encoder = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_HEVC,
     .init           = ff_nvenc_encode_init,
-    .send_frame     = ff_nvenc_send_frame,
     .receive_packet = ff_nvenc_receive_packet,
-    .encode2        = ff_nvenc_encode_frame,
     .close          = ff_nvenc_encode_close,
     .flush          = ff_nvenc_encode_flush,
     .priv_data_size = sizeof(NvencContext),
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 3899a13b99..cfaf6a2ae6 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -93,7 +93,7 @@ void av_fast_padded_mallocz(void *ptr, unsigned int *size, size_t min_size)
 
 int av_codec_is_encoder(const AVCodec *codec)
 {
-    return codec && (codec->encode_sub || codec->encode2 ||codec->send_frame);
+    return codec && (codec->encode_sub || codec->encode2 || codec->receive_packet);
 }
 
 int av_codec_is_decoder(const AVCodec *codec)
@@ -587,11 +587,13 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
     avci->compat_decode_frame = av_frame_alloc();
     avci->buffer_frame = av_frame_alloc();
     avci->buffer_pkt = av_packet_alloc();
+    avci->es.in_frame = av_frame_alloc();
     avci->ds.in_pkt = av_packet_alloc();
     avci->last_pkt_props = av_packet_alloc();
     if (!avci->to_free      || !avci->compat_decode_frame ||
         !avci->buffer_frame || !avci->buffer_pkt          ||
-        !avci->ds.in_pkt    || !avci->last_pkt_props) {
+        !avci->es.in_frame  || !avci->ds.in_pkt           ||
+        !avci->last_pkt_props) {
         ret = AVERROR(ENOMEM);
         goto free_and_end;
     }
@@ -1045,6 +1047,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
         av_packet_free(&avci->last_pkt_props);
 
         av_packet_free(&avci->ds.in_pkt);
+        av_frame_free(&avci->es.in_frame);
         av_bsf_free(&avci->bsf);
 
         av_buffer_unref(&avci->pool);
@@ -1080,8 +1083,8 @@ void avcodec_flush_buffers(AVCodecContext *avctx)
     av_frame_unref(avci->buffer_frame);
     av_frame_unref(avci->compat_decode_frame);
     av_packet_unref(avci->buffer_pkt);
-    avci->buffer_pkt_valid = 0;
 
+    av_frame_unref(avci->es.in_frame);
     av_packet_unref(avci->ds.in_pkt);
 
     if (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME)
@@ -1143,6 +1146,7 @@ av_cold int avcodec_close(AVCodecContext *avctx)
         av_packet_free(&avctx->internal->last_pkt_props);
 
         av_packet_free(&avctx->internal->ds.in_pkt);
+        av_frame_free(&avctx->internal->es.in_frame);
 
         av_buffer_unref(&avctx->internal->pool);
 
diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c
index e48b3a8ccf..d8d872ea09 100644
--- a/libavcodec/v4l2_m2m.c
+++ b/libavcodec/v4l2_m2m.c
@@ -329,6 +329,7 @@ static void v4l2_m2m_destroy_context(void *opaque, uint8_t *context)
     sem_destroy(&s->refsync);
 
     close(s->fd);
+    av_frame_free(&s->frame);
 
     av_free(s);
 }
@@ -415,5 +416,12 @@ int ff_v4l2_m2m_create_context(V4L2m2mPriv *priv, V4L2m2mContext **s)
     priv->context->self_ref = priv->context_ref;
     priv->context->fd = -1;
 
+    priv->context->frame = av_frame_alloc();
+    if (!priv->context->frame) {
+        av_buffer_unref(&priv->context_ref);
+        *s = NULL; /* freed when unreferencing context_ref */
+        return AVERROR(ENOMEM);
+    }
+
     return 0;
 }
diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h
index 456281f48c..b67b216331 100644
--- a/libavcodec/v4l2_m2m.h
+++ b/libavcodec/v4l2_m2m.h
@@ -58,6 +58,9 @@ typedef struct V4L2m2mContext {
     int draining;
     AVPacket buf_pkt;
 
+    /* Reference to a frame. Only used during encoding */
+    AVFrame *frame;
+
     /* Reference to self; only valid while codec is active. */
     AVBufferRef *self_ref;
 
diff --git a/libavcodec/v4l2_m2m_enc.c b/libavcodec/v4l2_m2m_enc.c
index a21a7b6c65..2b39651185 100644
--- a/libavcodec/v4l2_m2m_enc.c
+++ b/libavcodec/v4l2_m2m_enc.c
@@ -24,6 +24,7 @@
 #include <linux/videodev2.h>
 #include <sys/ioctl.h>
 #include <search.h>
+#include "encode.h"
 #include "libavcodec/avcodec.h"
 #include "libavcodec/internal.h"
 #include "libavutil/pixdesc.h"
@@ -288,11 +289,24 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
     V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
     V4L2Context *const capture = &s->capture;
     V4L2Context *const output = &s->output;
+    AVFrame *frame = s->frame;
     int ret;
 
     if (s->draining)
         goto dequeue;
 
+    ret = ff_encode_get_frame(avctx, frame);
+    if (ret < 0 && ret != AVERROR_EOF)
+        return ret;
+
+    if (ret == AVERROR_EOF)
+        frame = NULL;
+
+    ret = v4l2_send_frame(avctx, frame);
+    av_frame_unref(frame);
+    if (ret < 0)
+        return ret;
+
     if (!output->streamon) {
         ret = ff_v4l2_context_set_status(output, VIDIOC_STREAMON);
         if (ret) {
@@ -411,7 +425,6 @@ static const AVCodecDefault v4l2_m2m_defaults[] = {
         .priv_data_size = sizeof(V4L2m2mPriv), \
         .priv_class     = &v4l2_m2m_ ## NAME ##_enc_class, \
         .init           = v4l2_encode_init, \
-        .send_frame     = v4l2_send_frame, \
         .receive_packet = v4l2_receive_packet, \
         .close          = v4l2_encode_close, \
         .defaults       = v4l2_m2m_defaults, \
diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
index cb05ebd774..98026f451a 100644
--- a/libavcodec/vaapi_encode.c
+++ b/libavcodec/vaapi_encode.c
@@ -25,6 +25,7 @@
 #include "libavutil/pixdesc.h"
 
 #include "vaapi_encode.h"
+#include "encode.h"
 #include "avcodec.h"
 
 const AVCodecHWConfigInternal *ff_vaapi_encode_hw_configs[] = {
@@ -1043,7 +1044,7 @@ static int vaapi_encode_check_frame(AVCodecContext *avctx,
     return 0;
 }
 
-int ff_vaapi_encode_send_frame(AVCodecContext *avctx, const AVFrame *frame)
+static int vaapi_encode_send_frame(AVCodecContext *avctx, AVFrame *frame)
 {
     VAAPIEncodeContext *ctx = avctx->priv_data;
     VAAPIEncodePicture *pic;
@@ -1066,9 +1067,7 @@ int ff_vaapi_encode_send_frame(AVCodecContext *avctx, const AVFrame *frame)
             err = AVERROR(ENOMEM);
             goto fail;
         }
-        err = av_frame_ref(pic->input_image, frame);
-        if (err < 0)
-            goto fail;
+        av_frame_move_ref(pic->input_image, frame);
 
         if (ctx->input_order == 0 || frame->pict_type == AV_PICTURE_TYPE_I)
             pic->force_idr = 1;
@@ -1114,8 +1113,20 @@ int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
 {
     VAAPIEncodeContext *ctx = avctx->priv_data;
     VAAPIEncodePicture *pic;
+    AVFrame *frame = ctx->frame;
     int err;
 
+    err = ff_encode_get_frame(avctx, frame);
+    if (err < 0 && err != AVERROR_EOF)
+        return err;
+
+    if (err == AVERROR_EOF)
+        frame = NULL;
+
+    err = vaapi_encode_send_frame(avctx, frame);
+    if (err < 0)
+        return err;
+
     if (!ctx->pic_start) {
         if (ctx->end_of_stream)
             return AVERROR_EOF;
@@ -2214,6 +2225,11 @@ av_cold int ff_vaapi_encode_init(AVCodecContext *avctx)
     VAStatus vas;
     int err;
 
+    ctx->frame = av_frame_alloc();
+    if (!ctx->frame) {
+        return AVERROR(ENOMEM);
+    }
+
     if (!avctx->hw_frames_ctx) {
         av_log(avctx, AV_LOG_ERROR, "A hardware frames reference is "
                "required to associate the encoding device.\n");
@@ -2391,6 +2407,8 @@ av_cold int ff_vaapi_encode_close(AVCodecContext *avctx)
         ctx->va_config = VA_INVALID_ID;
     }
 
+    av_frame_free(&ctx->frame);
+
     av_freep(&ctx->codec_sequence_params);
     av_freep(&ctx->codec_picture_params);
 
diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
index 1329f6428f..85d2a8f616 100644
--- a/libavcodec/vaapi_encode.h
+++ b/libavcodec/vaapi_encode.h
@@ -328,6 +328,8 @@ typedef struct VAAPIEncodeContext {
     // If the driver does not support ROI then warn the first time we
     // encounter a frame with ROI side data.
     int             roi_warned;
+
+    AVFrame         *frame;
 } VAAPIEncodeContext;
 
 enum {
@@ -419,7 +421,6 @@ typedef struct VAAPIEncodeType {
 } VAAPIEncodeType;
 
 
-int ff_vaapi_encode_send_frame(AVCodecContext *avctx, const AVFrame *frame);
 int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt);
 
 int ff_vaapi_encode_init(AVCodecContext *avctx);
diff --git a/libavcodec/vaapi_encode_h264.c b/libavcodec/vaapi_encode_h264.c
index e195650ef7..b8d9c7b507 100644
--- a/libavcodec/vaapi_encode_h264.c
+++ b/libavcodec/vaapi_encode_h264.c
@@ -1351,7 +1351,6 @@ AVCodec ff_h264_vaapi_encoder = {
     .id             = AV_CODEC_ID_H264,
     .priv_data_size = sizeof(VAAPIEncodeH264Context),
     .init           = &vaapi_encode_h264_init,
-    .send_frame     = &ff_vaapi_encode_send_frame,
     .receive_packet = &ff_vaapi_encode_receive_packet,
     .close          = &vaapi_encode_h264_close,
     .priv_class     = &vaapi_encode_h264_class,
diff --git a/libavcodec/vaapi_encode_h265.c b/libavcodec/vaapi_encode_h265.c
index 92e0510911..c42d51592c 100644
--- a/libavcodec/vaapi_encode_h265.c
+++ b/libavcodec/vaapi_encode_h265.c
@@ -1287,7 +1287,6 @@ AVCodec ff_hevc_vaapi_encoder = {
     .id             = AV_CODEC_ID_HEVC,
     .priv_data_size = sizeof(VAAPIEncodeH265Context),
     .init           = &vaapi_encode_h265_init,
-    .send_frame     = &ff_vaapi_encode_send_frame,
     .receive_packet = &ff_vaapi_encode_receive_packet,
     .close          = &vaapi_encode_h265_close,
     .priv_class     = &vaapi_encode_h265_class,
diff --git a/libavcodec/vaapi_encode_mjpeg.c b/libavcodec/vaapi_encode_mjpeg.c
index 9f9ed811a4..6974ab2cad 100644
--- a/libavcodec/vaapi_encode_mjpeg.c
+++ b/libavcodec/vaapi_encode_mjpeg.c
@@ -559,7 +559,6 @@ AVCodec ff_mjpeg_vaapi_encoder = {
     .id             = AV_CODEC_ID_MJPEG,
     .priv_data_size = sizeof(VAAPIEncodeMJPEGContext),
     .init           = &vaapi_encode_mjpeg_init,
-    .send_frame     = &ff_vaapi_encode_send_frame,
     .receive_packet = &ff_vaapi_encode_receive_packet,
     .close          = &vaapi_encode_mjpeg_close,
     .priv_class     = &vaapi_encode_mjpeg_class,
diff --git a/libavcodec/vaapi_encode_mpeg2.c b/libavcodec/vaapi_encode_mpeg2.c
index 02c76552ef..1dcc84d75c 100644
--- a/libavcodec/vaapi_encode_mpeg2.c
+++ b/libavcodec/vaapi_encode_mpeg2.c
@@ -697,7 +697,6 @@ AVCodec ff_mpeg2_vaapi_encoder = {
     .id             = AV_CODEC_ID_MPEG2VIDEO,
     .priv_data_size = sizeof(VAAPIEncodeMPEG2Context),
     .init           = &vaapi_encode_mpeg2_init,
-    .send_frame     = &ff_vaapi_encode_send_frame,
     .receive_packet = &ff_vaapi_encode_receive_packet,
     .close          = &vaapi_encode_mpeg2_close,
     .priv_class     = &vaapi_encode_mpeg2_class,
diff --git a/libavcodec/vaapi_encode_vp8.c b/libavcodec/vaapi_encode_vp8.c
index cff926baae..51039fa19a 100644
--- a/libavcodec/vaapi_encode_vp8.c
+++ b/libavcodec/vaapi_encode_vp8.c
@@ -252,7 +252,6 @@ AVCodec ff_vp8_vaapi_encoder = {
     .id             = AV_CODEC_ID_VP8,
     .priv_data_size = sizeof(VAAPIEncodeVP8Context),
     .init           = &vaapi_encode_vp8_init,
-    .send_frame     = &ff_vaapi_encode_send_frame,
     .receive_packet = &ff_vaapi_encode_receive_packet,
     .close          = &ff_vaapi_encode_close,
     .priv_class     = &vaapi_encode_vp8_class,
diff --git a/libavcodec/vaapi_encode_vp9.c b/libavcodec/vaapi_encode_vp9.c
index 8514b85991..4f3b55ed2d 100644
--- a/libavcodec/vaapi_encode_vp9.c
+++ b/libavcodec/vaapi_encode_vp9.c
@@ -286,7 +286,6 @@ AVCodec ff_vp9_vaapi_encoder = {
     .id             = AV_CODEC_ID_VP9,
     .priv_data_size = sizeof(VAAPIEncodeVP9Context),
     .init           = &vaapi_encode_vp9_init,
-    .send_frame     = &ff_vaapi_encode_send_frame,
     .receive_packet = &ff_vaapi_encode_receive_packet,
     .close          = &ff_vaapi_encode_close,
     .priv_class     = &vaapi_encode_vp9_class,
-- 
2.26.2