[FFmpeg-devel] [PATCH] liavcodec: add bit-rate support to RoQ video encoder
Victor Luchits
vluchits at gmail.com
Sun Jan 21 22:19:43 EET 2024
One can now use the bitrate option (-b) to specify bit rate of the video
stream in the RoQ encoder. The option only becomes effective for values
above 800kbit/s, which is roughly equivalent to bandwidth of a 1x-speed
CD-ROM drive, minus the bandwidth taken up by stereo DPCM stream. Values
below this threshold produce visually inadequate results.
Original patch by Joseph Fenton aka Chilly Willy
Signed-off-by: Victor Luchits <vluchits at gmail.com>
---
Changelog | 1 +
libavcodec/roqvideo.h | 1 +
libavcodec/roqvideodec.c | 15 +++++
libavcodec/roqvideoenc.c | 118 ++++++++++++++++++++++++++++++++++-----
libavcodec/version.h | 2 +-
5 files changed, 123 insertions(+), 14 deletions(-)
diff --git a/Changelog b/Changelog
index c40b6d08fd..6974312f9d 100644
--- a/Changelog
+++ b/Changelog
@@ -22,6 +22,7 @@ version <next>:
- ffmpeg CLI -bsf option may now be used for input as well as output
- ffmpeg CLI options may now be used as -/opt <path>, which is equivalent
to -opt <contents of file <path>>
+- RoQ video bit rate option support
version 6.1:
- libaribcaption decoder
diff --git a/libavcodec/roqvideo.h b/libavcodec/roqvideo.h
index 2c2e42884d..6d30bcaada 100644
--- a/libavcodec/roqvideo.h
+++ b/libavcodec/roqvideo.h
@@ -43,6 +43,7 @@ typedef struct RoqContext {
AVFrame *last_frame;
AVFrame *current_frame;
int width, height;
+ int key_frame;
roq_cell cb2x2[256];
roq_qcell cb4x4[256];
diff --git a/libavcodec/roqvideodec.c b/libavcodec/roqvideodec.c
index bfc69a65c9..07d6b8bb8f 100644
--- a/libavcodec/roqvideodec.c
+++ b/libavcodec/roqvideodec.c
@@ -70,6 +70,7 @@ static void roqvideo_decode_frame(RoqContext *ri,
GetByteContext *gb)
chunk_start = bytestream2_tell(gb);
xpos = ypos = 0;
+ ri->key_frame = 1;
if (chunk_size > bytestream2_get_bytes_left(gb)) {
av_log(ri->logctx, AV_LOG_ERROR, "Chunk does not fit in input
buffer\n");
@@ -92,12 +93,14 @@ static void roqvideo_decode_frame(RoqContext *ri,
GetByteContext *gb)
switch(vqid) {
case RoQ_ID_MOT:
+ ri->key_frame = 0;
break;
case RoQ_ID_FCC: {
int byte = bytestream2_get_byte(gb);
mx = 8 - (byte >> 4) - ((signed char) (chunk_arg
>> 8));
my = 8 - (byte & 0xf) - ((signed char) chunk_arg);
ff_apply_motion_8x8(ri, xp, yp, mx, my);
+ ri->key_frame = 0;
break;
}
case RoQ_ID_SLD:
@@ -125,12 +128,14 @@ static void roqvideo_decode_frame(RoqContext *ri,
GetByteContext *gb)
vqflg_pos--;
switch(vqid) {
case RoQ_ID_MOT:
+ ri->key_frame = 0;
break;
case RoQ_ID_FCC: {
int byte = bytestream2_get_byte(gb);
mx = 8 - (byte >> 4) - ((signed char)
(chunk_arg >> 8));
my = 8 - (byte & 0xf) - ((signed char)
chunk_arg);
ff_apply_motion_4x4(ri, x, y, mx, my);
+ ri->key_frame = 0;
break;
}
case RoQ_ID_SLD:
@@ -214,6 +219,16 @@ static int roq_decode_frame(AVCodecContext *avctx,
AVFrame *rframe,
if ((ret = av_frame_ref(rframe, s->current_frame)) < 0)
return ret;
+
+ /* Keyframe when no MOT or FCC codes in frame */
+ if (s->key_frame) {
+ av_log(avctx, AV_LOG_VERBOSE, "\nFound keyframe!\n");
+ rframe->pict_type = AV_PICTURE_TYPE_I;
+ avpkt->flags |= AV_PKT_FLAG_KEY;
+ } else {
+ rframe->pict_type = AV_PICTURE_TYPE_P;
+ }
+
*got_frame = 1;
/* shuffle frames */
diff --git a/libavcodec/roqvideoenc.c b/libavcodec/roqvideoenc.c
index 0933abf4f9..bcead80bbd 100644
--- a/libavcodec/roqvideoenc.c
+++ b/libavcodec/roqvideoenc.c
@@ -79,6 +79,9 @@
/* The cast is useful when multiplying it by INT_MAX */
#define ROQ_LAMBDA_SCALE ((uint64_t) FF_LAMBDA_SCALE)
+/* The default minimum bitrate, set around the value of a 1x speed
CD-ROM drive */
+#define ROQ_DEFAULT_MIN_BIT_RATE 800*1024
+
typedef struct RoqCodebooks {
int numCB4;
int numCB2;
@@ -136,6 +139,8 @@ typedef struct RoqEncContext {
struct ELBGContext *elbg;
AVLFG randctx;
uint64_t lambda;
+ uint64_t last_lambda;
+ int lambda_delta;
motion_vect *this_motion4;
motion_vect *last_motion4;
@@ -887,8 +892,9 @@ static int generate_new_codebooks(RoqEncContext *enc)
return 0;
}
-static int roq_encode_video(RoqEncContext *enc)
+static int roq_encode_video(AVCodecContext *avctx)
{
+ RoqEncContext *const enc = avctx->priv_data;
RoqTempData *const tempData = &enc->tmp_data;
RoqContext *const roq = &enc->common;
int ret;
@@ -910,14 +916,14 @@ static int roq_encode_video(RoqEncContext *enc)
/* Quake 3 can't handle chunks bigger than 65535 bytes */
if (tempData->mainChunkSize/8 > 65535 && enc->quake3_compat) {
- if (enc->lambda > 100000) {
+ if (enc->lambda > 100000000) {
av_log(roq->logctx, AV_LOG_ERROR, "Cannot encode video in
Quake compatible form\n");
return AVERROR(EINVAL);
}
av_log(roq->logctx, AV_LOG_ERROR,
"Warning, generated a frame too big for Quake (%d >
65535), "
- "now switching to a bigger qscale value.\n",
- tempData->mainChunkSize/8);
+ "now switching to a bigger qscale value (%d).\n",
+ tempData->mainChunkSize/8, (int)enc->lambda);
enc->lambda *= 1.5;
tempData->mainChunkSize = 0;
memset(tempData->used_option, 0, sizeof(tempData->used_option));
@@ -931,6 +937,80 @@ static int roq_encode_video(RoqEncContext *enc)
remap_codebooks(enc);
+ /* bit rate control code - could make encoding very slow */
+ if (avctx->bit_rate > ROQ_DEFAULT_MIN_BIT_RATE) {
+ /* a bit rate has been specified - try to match it */
+ int ftotal = (tempData->mainChunkSize / 8 + tempData->numCB2*6
+ tempData->numCB4*4) * avctx->time_base.den * 8;
+ int64_t tol = avctx->bit_rate_tolerance;
+
+ /* tolerance > bit rate, set to 5% of the bit rate */
+ if (tol > avctx->bit_rate)
+ tol = avctx->bit_rate / 20;
+
+ av_log(roq->logctx, AV_LOG_VERBOSE,
+ "\nDesired bit rate (%d kbps), "
+ "Bit rate tolerance (%d), "
+ "Frame rate (%d)\n",
+ (int)avctx->bit_rate, (int)tol, avctx->time_base.den);
+
+ if (ftotal > (avctx->bit_rate + tol)) {
+ /* frame is too big - increase qscale */
+ if (enc->lambda > 100000000) {
+ av_log(roq->logctx, AV_LOG_ERROR, "\nCannot encode
video at desired bitrate\n");
+ return AVERROR(EINVAL);
+ }
+ enc->lambda_delta = enc->lambda_delta <= 0 ? 1 :
enc->lambda_delta < 65536 ? enc->lambda_delta*2 : 65536;
+ enc->last_lambda = enc->lambda;
+ enc->lambda += enc->lambda_delta;
+ av_log(roq->logctx, AV_LOG_INFO,
+ "\nGenerated a frame too big for desired bit rate
(%d kbps), "
+ "now switching to a bigger qscale value (%d).\n",
+ ftotal / 1000, (int)enc->lambda);
+ tempData->mainChunkSize = 0;
+ memset(tempData->used_option, 0,
sizeof(tempData->used_option));
+ memset(tempData->codebooks.usedCB4, 0,
+ sizeof(tempData->codebooks.usedCB4));
+ memset(tempData->codebooks.usedCB2, 0,
+ sizeof(tempData->codebooks.usedCB2));
+
+ goto retry_encode;
+ } else if (ftotal < (avctx->bit_rate - tol)) {
+ /* frame is too small - decrease qscale */
+ if (enc->lambda <= 1) {
+ av_log(roq->logctx, AV_LOG_WARNING,
+ "\nGenerated a frame too small for desired bit
rate (%d kbps), "
+ "qscale value cannot be lowered any further
(%d).\n",
+ ftotal / 1000, (int)enc->lambda);
+ } else if ((enc->lambda - enc->last_lambda) == 1) {
+ av_log(roq->logctx, AV_LOG_WARNING,
+ "\nCannot find qscale that gives desired bit
rate within desired tolerance, "
+ "using lower bitrate (%d kbps) with higher
qscale value (%d).\n",
+ ftotal / 1000, (int)enc->lambda);
+ } else {
+ enc->lambda_delta = 0;
+ if (enc->lambda == enc->last_lambda) {
+ enc->lambda >>= 1;
+ enc->last_lambda = enc->lambda;
+ } else {
+ enc->lambda = enc->last_lambda;
+ //enc->lambda *= (float)(tempData->mainChunkSize *
avctx->time_base.den) / avctx->bit_rate;
+ av_log(roq->logctx, AV_LOG_INFO,
+ "\nGenerated a frame too small for desired
bit rate (%d kbps), "
+ "reverting lambda and using smaller inc on
qscale (%d).\n",
+ ftotal / 1000, (int)enc->lambda);
+ }
+ tempData->mainChunkSize = 0;
+ memset(tempData->used_option, 0,
sizeof(tempData->used_option));
+ memset(tempData->codebooks.usedCB4, 0,
+ sizeof(tempData->codebooks.usedCB4));
+ memset(tempData->codebooks.usedCB2, 0,
+ sizeof(tempData->codebooks.usedCB2));
+
+ goto retry_encode;
+ }
+ }
+ }
+ write_codebooks(enc);
reconstruct_and_encode_image(enc, roq->width, roq->height,
@@ -980,19 +1060,26 @@ static av_cold int roq_encode_init(AVCodecContext
*avctx)
return AVERROR(EINVAL);
}
- if (avctx->width > 65535 || avctx->height > 65535) {
- av_log(avctx, AV_LOG_ERROR, "Dimensions are max %d\n",
enc->quake3_compat ? 32768 : 65535);
+ if (enc->quake3_compat && ((avctx->width > 32767 || avctx->height >
32767))) {
+ av_log(avctx, AV_LOG_ERROR, "Dimensions are max %d\n", 32767);
+ return AVERROR(EINVAL);
+ }
+ else if (avctx->width > 65535 || avctx->height > 65535) {
+ av_log(avctx, AV_LOG_ERROR, "Dimensions are max %d\n", 65535);
return AVERROR(EINVAL);
}
- if
(((avctx->width)&(avctx->width-1))||((avctx->height)&(avctx->height-1)))
+ if (enc->quake3_compat &&
((avctx->width)&(avctx->width-1))||((avctx->height)&(avctx->height-1)))
av_log(avctx, AV_LOG_ERROR, "Warning: dimensions not power of
two, this is not supported by quake\n");
roq->width = avctx->width;
roq->height = avctx->height;
+ enc->lambda = 2*ROQ_LAMBDA_SCALE;
enc->framesSinceKeyframe = 0;
enc->first_frame = 1;
+ enc->last_lambda = 1;
+ enc->lambda_delta = 0;
roq->last_frame = av_frame_alloc();
roq->current_frame = av_frame_alloc();
@@ -1043,9 +1130,11 @@ static void
roq_write_video_info_chunk(RoqEncContext *enc)
/* Height */
bytestream_put_le16(&enc->out_buf, enc->common.height);
- /* Unused in Quake 3, mimics the output of the real encoder */
+ /* Quake 3: starting size for cell subdivision */
bytestream_put_byte(&enc->out_buf, 0x08);
bytestream_put_byte(&enc->out_buf, 0x00);
+
+ /* Quake 3: ending size for cell subdivision */
bytestream_put_byte(&enc->out_buf, 0x04);
bytestream_put_byte(&enc->out_buf, 0x00);
}
@@ -1059,10 +1148,13 @@ static int roq_encode_frame(AVCodecContext
*avctx, AVPacket *pkt,
enc->frame_to_enc = frame;
- if (frame->quality)
- enc->lambda = frame->quality - 1;
- else
- enc->lambda = 2*ROQ_LAMBDA_SCALE;
+ if (avctx->bit_rate <= ROQ_DEFAULT_MIN_BIT_RATE) {
+ /* no specific bit rate desired, use frame quality */
+ if (frame->quality)
+ enc->lambda = frame->quality - 1;
+ else
+ enc->lambda = 2*ROQ_LAMBDA_SCALE;
+ }
/* 138 bits max per 8x8 block +
* 256 codebooks*(6 bytes 2x2 + 4 bytes 4x4) + 8 bytes frame
header */
@@ -1089,7 +1181,7 @@ static int roq_encode_frame(AVCodecContext *avctx,
AVPacket *pkt,
}
/* Encode the actual frame */
- ret = roq_encode_video(enc);
+ ret = roq_encode_video(avctx);
if (ret < 0)
return ret;
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 376388c5bb..581151cda7 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -30,7 +30,7 @@
#include "version_major.h"
#define LIBAVCODEC_VERSION_MINOR 37
-#define LIBAVCODEC_VERSION_MICRO 100
+#define LIBAVCODEC_VERSION_MICRO 101
#define LIBAVCODEC_VERSION_INT
AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
LIBAVCODEC_VERSION_MINOR, \
--
2.25.1
More information about the ffmpeg-devel
mailing list