[FFmpeg-devel] [PATCH] avdevice/v4l2: Switch to wrapped AVFrames and implement strides
Ridley Combs
rcombs at rcombs.me
Fri Sep 29 10:56:03 EEST 2023
> On Sep 29, 2023, at 00:52, Asahi Lina via ffmpeg-devel <ffmpeg-devel at ffmpeg.org> wrote:
>
> V4L2 provides a line stride to the client for hardware that has
> alignment requirements. rawvideo cannot represent this, so switch to
> wrapped_avframe for raw video formats and calculate the plane strides
> manually.
>
> This is slightly messy because the existing helper APIs expect
> dimensions and an alignment value, while v4l2 provides the stride of
> plane 0 and the subsequent plane strides are implied, so we need to
> open-code the logic to calculate the plane strides.
>
> This makes vertical video work properly on Apple Macs with "1080p"
> cameras, which are actually square and can support resolutions like
> 1080x1920, which require stride padding to a multiple of 64 bytes.
>
> In principle, this could be extended to support the V4L2 multiplanar
> API, though there seem to be practically no capture (not M2M) drivers
> that support this, so it's not terribly useful right now.
>
> Signed-off-by: Asahi Lina <lina at asahilina.net>
> ---
> libavdevice/v4l2-common.c | 68 +++++++++++------------
> libavdevice/v4l2.c | 138 +++++++++++++++++++++++++++++++++++++++-------
> 2 files changed, 151 insertions(+), 55 deletions(-)
>
> diff --git a/libavdevice/v4l2-common.c b/libavdevice/v4l2-common.c
> index b5b4448a3154..944ffe3d87e1 100644
> --- a/libavdevice/v4l2-common.c
> +++ b/libavdevice/v4l2-common.c
> @@ -19,53 +19,53 @@
> #include "v4l2-common.h"
>
> const struct fmt_map ff_fmt_conversion_table[] = {
> - //ff_fmt codec_id v4l2_fmt
> - { AV_PIX_FMT_YUV420P, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_YUV420 },
> - { AV_PIX_FMT_YUV420P, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_YVU420 },
> - { AV_PIX_FMT_YUV422P, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_YUV422P },
> - { AV_PIX_FMT_YUYV422, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_YUYV },
> - { AV_PIX_FMT_UYVY422, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_UYVY },
> - { AV_PIX_FMT_YUV411P, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_YUV411P },
> - { AV_PIX_FMT_YUV410P, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_YUV410 },
> - { AV_PIX_FMT_YUV410P, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_YVU410 },
> - { AV_PIX_FMT_RGB555LE,AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_RGB555 },
> - { AV_PIX_FMT_RGB555BE,AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_RGB555X },
> - { AV_PIX_FMT_RGB565LE,AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_RGB565 },
> - { AV_PIX_FMT_RGB565BE,AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_RGB565X },
> - { AV_PIX_FMT_BGR24, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_BGR24 },
> - { AV_PIX_FMT_RGB24, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_RGB24 },
> + //ff_fmt codec_id v4l2_fmt
> + { AV_PIX_FMT_YUV420P, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_YUV420 },
> + { AV_PIX_FMT_YUV420P, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_YVU420 },
> + { AV_PIX_FMT_YUV422P, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_YUV422P },
> + { AV_PIX_FMT_YUYV422, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_YUYV },
> + { AV_PIX_FMT_UYVY422, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_UYVY },
> + { AV_PIX_FMT_YUV411P, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_YUV411P },
> + { AV_PIX_FMT_YUV410P, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_YUV410 },
> + { AV_PIX_FMT_YUV410P, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_YVU410 },
> + { AV_PIX_FMT_RGB555LE,AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_RGB555 },
> + { AV_PIX_FMT_RGB555BE,AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_RGB555X },
> + { AV_PIX_FMT_RGB565LE,AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_RGB565 },
> + { AV_PIX_FMT_RGB565BE,AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_RGB565X },
> + { AV_PIX_FMT_BGR24, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_BGR24 },
> + { AV_PIX_FMT_RGB24, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_RGB24 },
> #ifdef V4L2_PIX_FMT_XBGR32
> - { AV_PIX_FMT_BGR0, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_XBGR32 },
> - { AV_PIX_FMT_0RGB, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_XRGB32 },
> - { AV_PIX_FMT_BGRA, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_ABGR32 },
> - { AV_PIX_FMT_ARGB, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_ARGB32 },
> + { AV_PIX_FMT_BGR0, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_XBGR32 },
> + { AV_PIX_FMT_0RGB, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_XRGB32 },
> + { AV_PIX_FMT_BGRA, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_ABGR32 },
> + { AV_PIX_FMT_ARGB, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_ARGB32 },
> #endif
> - { AV_PIX_FMT_BGR0, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_BGR32 },
> - { AV_PIX_FMT_0RGB, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_RGB32 },
> - { AV_PIX_FMT_GRAY8, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_GREY },
> + { AV_PIX_FMT_BGR0, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_BGR32 },
> + { AV_PIX_FMT_0RGB, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_RGB32 },
> + { AV_PIX_FMT_GRAY8, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_GREY },
> #ifdef V4L2_PIX_FMT_Y16
> - { AV_PIX_FMT_GRAY16LE,AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_Y16 },
> + { AV_PIX_FMT_GRAY16LE,AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_Y16 },
> #endif
> #ifdef V4L2_PIX_FMT_Z16
> - { AV_PIX_FMT_GRAY16LE,AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_Z16 },
> + { AV_PIX_FMT_GRAY16LE,AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_Z16 },
> #endif
> - { AV_PIX_FMT_NV12, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_NV12 },
> - { AV_PIX_FMT_NONE, AV_CODEC_ID_MJPEG, V4L2_PIX_FMT_MJPEG },
> - { AV_PIX_FMT_NONE, AV_CODEC_ID_MJPEG, V4L2_PIX_FMT_JPEG },
> + { AV_PIX_FMT_NV12, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_NV12 },
> + { AV_PIX_FMT_NONE, AV_CODEC_ID_MJPEG, V4L2_PIX_FMT_MJPEG },
> + { AV_PIX_FMT_NONE, AV_CODEC_ID_MJPEG, V4L2_PIX_FMT_JPEG },
> #ifdef V4L2_PIX_FMT_H264
> - { AV_PIX_FMT_NONE, AV_CODEC_ID_H264, V4L2_PIX_FMT_H264 },
> + { AV_PIX_FMT_NONE, AV_CODEC_ID_H264, V4L2_PIX_FMT_H264 },
> #endif
> #ifdef V4L2_PIX_FMT_MPEG4
> - { AV_PIX_FMT_NONE, AV_CODEC_ID_MPEG4, V4L2_PIX_FMT_MPEG4 },
> + { AV_PIX_FMT_NONE, AV_CODEC_ID_MPEG4, V4L2_PIX_FMT_MPEG4 },
> #endif
> #ifdef V4L2_PIX_FMT_CPIA1
> - { AV_PIX_FMT_NONE, AV_CODEC_ID_CPIA, V4L2_PIX_FMT_CPIA1 },
> + { AV_PIX_FMT_NONE, AV_CODEC_ID_CPIA, V4L2_PIX_FMT_CPIA1 },
> #endif
> #ifdef V4L2_PIX_FMT_SRGGB8
> - { AV_PIX_FMT_BAYER_BGGR8, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_SBGGR8 },
> - { AV_PIX_FMT_BAYER_GBRG8, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_SGBRG8 },
> - { AV_PIX_FMT_BAYER_GRBG8, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_SGRBG8 },
> - { AV_PIX_FMT_BAYER_RGGB8, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_SRGGB8 },
> + { AV_PIX_FMT_BAYER_BGGR8, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_SBGGR8 },
> + { AV_PIX_FMT_BAYER_GBRG8, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_SGBRG8 },
> + { AV_PIX_FMT_BAYER_GRBG8, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_SGRBG8 },
> + { AV_PIX_FMT_BAYER_RGGB8, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_SRGGB8 },
> #endif
> { AV_PIX_FMT_NONE, AV_CODEC_ID_NONE, 0 },
> };
> diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
> index 5e85d1a2b34e..534aa79b639e 100644
> --- a/libavdevice/v4l2.c
> +++ b/libavdevice/v4l2.c
> @@ -83,7 +83,10 @@ struct video_data {
> AVClass *class;
> int fd;
> int pixelformat; /* V4L2_PIX_FMT_* */
> + int pix_fmt; /* AV_PIX_FMT_* */
> int width, height;
> + int bytesperline;
> + int linesize[AV_NUM_DATA_POINTERS];
> int frame_size;
> int interlaced;
> int top_field_first;
> @@ -240,6 +243,7 @@ static int device_init(AVFormatContext *ctx, int *width, int *height,
> s->interlaced = 1;
> }
>
> + s->bytesperline = fmt.fmt.pix.bytesperline;
> return res;
> }
>
> @@ -501,9 +505,18 @@ static int convert_timestamp(AVFormatContext *ctx, int64_t *ts)
> return 0;
> }
>
> +static void v4l2_free_frame(void *opaque, uint8_t *data)
> +{
> + AVFrame *frame = (AVFrame*)data;
> +
> + av_frame_free(&frame);
> +}
> +
> static int mmap_read_frame(AVFormatContext *ctx, AVPacket *pkt)
> {
> struct video_data *s = ctx->priv_data;
> + struct AVBufferRef *avbuf = NULL;
> + struct AVFrame *frame = NULL;
> struct v4l2_buffer buf = {
> .type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
> .memory = V4L2_MEMORY_MMAP
> @@ -560,13 +573,13 @@ static int mmap_read_frame(AVFormatContext *ctx, AVPacket *pkt)
> /* Image is at s->buff_start[buf.index] */
> if (atomic_load(&s->buffers_queued) == FFMAX(s->buffers / 8, 1)) {
> /* when we start getting low on queued buffers, fall back on copying data */
> - res = av_new_packet(pkt, buf.bytesused);
> - if (res < 0) {
> - av_log(ctx, AV_LOG_ERROR, "Error allocating a packet.\n");
> + avbuf = av_buffer_alloc(buf.bytesused);
> + if (!avbuf) {
> + av_log(ctx, AV_LOG_ERROR, "Error allocating a buffer.\n");
> enqueue_buffer(s, &buf);
> return res;
> }
> - memcpy(pkt->data, s->buf_start[buf.index], buf.bytesused);
> + memcpy(avbuf->data, s->buf_start[buf.index], buf.bytesused);
>
> res = enqueue_buffer(s, &buf);
> if (res) {
> @@ -576,9 +589,6 @@ static int mmap_read_frame(AVFormatContext *ctx, AVPacket *pkt)
> } else {
> struct buff_data *buf_descriptor;
>
> - pkt->data = s->buf_start[buf.index];
> - pkt->size = buf.bytesused;
> -
> buf_descriptor = av_malloc(sizeof(struct buff_data));
> if (!buf_descriptor) {
> /* Something went wrong... Since av_malloc() failed, we cannot even
> @@ -592,19 +602,65 @@ static int mmap_read_frame(AVFormatContext *ctx, AVPacket *pkt)
> buf_descriptor->index = buf.index;
> buf_descriptor->s = s;
>
> - pkt->buf = av_buffer_create(pkt->data, pkt->size, mmap_release_buffer,
> - buf_descriptor, 0);
> - if (!pkt->buf) {
> + avbuf = av_buffer_create(s->buf_start[buf.index], buf.bytesused,
> + mmap_release_buffer, buf_descriptor, 0);
> + if (!avbuf) {
> av_log(ctx, AV_LOG_ERROR, "Failed to create a buffer\n");
> enqueue_buffer(s, &buf);
> av_freep(&buf_descriptor);
> return AVERROR(ENOMEM);
> }
> }
> +
> + if (ctx->video_codec_id == AV_CODEC_ID_WRAPPED_AVFRAME) {
> + frame = av_frame_alloc();
> +
> + if (!frame) {
> + av_log(ctx, AV_LOG_ERROR, "Failed to create an AVFrame\n");
> + goto err_free;
> + }
> +
> + frame->buf[0] = avbuf;
> +
> + memcpy(frame->linesize, s->linesize, sizeof(s->linesize));
> + res = av_image_fill_pointers(frame->data, s->pix_fmt, s->height,
> + avbuf->data, frame->linesize);
> + if (res < 0) {
> + av_log(ctx, AV_LOG_ERROR, "Failed to compute data pointers\n");
> + goto err_free;
> + }
> +
> + frame->format = s->pix_fmt;
> + frame->width = s->width;
> + frame->height = s->height;
> +
> + pkt->buf = av_buffer_create((uint8_t*)frame, sizeof(*frame),
> + &v4l2_free_frame, ctx, 0);
> + if (!pkt->buf) {
> + av_log(ctx, AV_LOG_ERROR, "Failed to create an AVBuffer\n");
> + goto err_free;
> + }
> +
> + pkt->data = (uint8_t*)frame;
> + pkt->size = sizeof(*frame);
> + pkt->flags |= AV_PKT_FLAG_TRUSTED;
> + frame = NULL;
> + } else {
> + pkt->buf = avbuf;
> + pkt->data = avbuf->data;
> + pkt->size = buf.bytesused;
> + avbuf = NULL;
> + }
> +
> pkt->pts = buf_ts.tv_sec * INT64_C(1000000) + buf_ts.tv_usec;
> convert_timestamp(ctx, &pkt->pts);
>
> return pkt->size;
> +
> +err_free:
> + av_buffer_unref(&avbuf);
> + av_frame_unref(frame);
> + return AVERROR(ENOMEM);
> }
>
> static int mmap_start(AVFormatContext *ctx)
> @@ -957,9 +1013,56 @@ static int v4l2_read_header(AVFormatContext *ctx)
> goto fail;
>
> st->codecpar->format = ff_fmt_v4l2ff(desired_format, codec_id);
> - if (st->codecpar->format != AV_PIX_FMT_NONE)
> - s->frame_size = av_image_get_buffer_size(st->codecpar->format,
> - s->width, s->height, 1);
> + s->pix_fmt = st->codecpar->format;
> +
> + if (st->codecpar->format != AV_PIX_FMT_NONE) {
> + size_t sizes[4];
> + ptrdiff_t linesize[4];
> + const AVPixFmtDescriptor *desc;
> + int max_step [4];
> + int max_step_comp[4];
> +
> + /*
> + * Per V4L2 spec, for the single plane API the line strides are always
> + * just divided down by the subsampling factor for chroma planes.
> + *
> + * For example, for NV12, plane 1 contains UV components at half
> + * resolution, and therefore has an equal line stride to plane 0.
> + */
> + desc = av_pix_fmt_desc_get(st->codecpar->format);
> + av_image_fill_max_pixsteps(max_step, max_step_comp, desc);
> +
> + if (!s->bytesperline) {
> + av_log(ctx, AV_LOG_WARNING,
> + "The V4L2 driver did not set bytesperline, guessing.\n");
> + s->bytesperline = s->width * max_step[0];
> + }
> +
> + s->linesize[0] = s->bytesperline;
> + for (int i = 1; i < 4; i++) {
> + int sh = (max_step_comp[i] == 1 || max_step_comp[i] == 2) ? desc->log2_chroma_w : 0;
> + s->linesize[i] = (s->linesize[0] * max_step[i] / max_step[0]) >> sh;
> + }
> +
> + /* Convert since the types are mismatched... */
> + for (int i = 0; i < 4; i++)
> + linesize[i] = s->linesize[i];
> +
> + res = av_image_fill_plane_sizes(sizes, s->pix_fmt, s->height, linesize);
> + if (res < 0) {
> + av_log(ctx, AV_LOG_ERROR, "failed to fill plane sizes\n");
> + goto fail;
> + }
> +
> + s->frame_size = 0;
> + for (int i = 0; i < 4; i++) {
> + if (sizes[i] > INT_MAX - s->frame_size) {
> + res = -EINVAL;
> + goto fail;
> + }
> + s->frame_size += sizes[i];
> + }
> + }
>
> if ((res = mmap_init(ctx)) ||
> (res = mmap_start(ctx)) < 0)
> @@ -969,16 +1072,9 @@ static int v4l2_read_header(AVFormatContext *ctx)
>
> st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
> st->codecpar->codec_id = codec_id;
> - if (codec_id == AV_CODEC_ID_RAWVIDEO)
> - st->codecpar->codec_tag =
> - avcodec_pix_fmt_to_codec_tag(st->codecpar->format);
> - else if (codec_id == AV_CODEC_ID_H264) {
> + if (codec_id == AV_CODEC_ID_H264) {
> avpriv_stream_set_need_parsing(st, AVSTREAM_PARSE_FULL_ONCE);
> }
> - if (desired_format == V4L2_PIX_FMT_YVU420)
> - st->codecpar->codec_tag = MKTAG('Y', 'V', '1', '2');
> - else if (desired_format == V4L2_PIX_FMT_YVU410)
> - st->codecpar->codec_tag = MKTAG('Y', 'V', 'U', '9');
> st->codecpar->width = s->width;
> st->codecpar->height = s->height;
> if (st->avg_frame_rate.den)
>
> ---
> base-commit: b643af4acb471c3cb09b02afcc511498c9674347
> change-id: 20230929-v4l2-strides-443dc65b5cb8
>
> Thank you,
> ~~ Lina
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
LGTM; I'll wait a week and apply assuming nobody has any issues with this.
More information about the ffmpeg-devel
mailing list