[MPlayer-dev-eng] [PATCH] Add support for ARIB STD-B24 captions v4

Thu Oct 29 23:24:29 CET 2009

>> +struct arib_conv_state {
>> +	unsigned char G[4];
>> +	unsigned char G_WIDTH[4];
>> +	unsigned char GL, GR;
>> +	unsigned char SS;
>
> Avoid uppercase names, and something more descriptive
> would be nice, too.
They're mostly names out of the spec which is why they're capitalized.

> Also for generic data using uint8_t is nicer than
> unsigned char IMO.
Sure.

>
>> +static const unsigned int arib_default_color_pal[] = {
>> +	//AABBGGRR
>> +	0xFF000000,
>> +	0xFF0000FF,
>> +	0xFF00FF00,
>> +	0xFF00FFFF,
>> +	0xFFFF0000,
>> +	0xFFFF00FF,
>> +	0xFFFFFF00,
>> +	0xFFFFFFFF
>> +};
>
> Putting alpha in a separate table in case it is ever needed would allow
> to get rid of some & 0x00FFFFFF in the code...
>
I can just eliminate alpha for now since it's not handled at the moment.

>> +	long long start = state.pts * 1000;
>
> Use int64_t I'd say.
>
Ok. I do prefer those standard types.

>> +static void arib_prepare_buf(void)
>> +{
>> +	int len;
>> +	state.inptr = state.inbuf;
>> +	len = snprintf(state.inbuf, sizeof(state.inbuf),
>> "{\\a1\\c&H%06X&\\fs%d}",
>> +		       arib_default_color_pal[state.fg_color] & 0x00FFFFFF,
>> +		       state.font_height + state.font_size_adj * 8);
>> +	if (len >= 0)
>> +		state.inptr += len;
>
> This is wrong, the result of snprintf can be more than the number of
> characters
> printed, and inptr would then point outside the buffer.
> I think you meant state.inptr += strlen(state.inptr)
>
Well according to the man page, I can add a check for when it's equal or
longer than the buffer.

>> +static int arib_process_escapecode(const unsigned char *data, size_t
>> len)
>> +{
>> +	int params[4] = { 0, -1, -1, -1 };
>> +	int current_param = 0;
>> +	size_t skip = 1;
>> +
>> +	while (skip <= len) {
>> +		unsigned char val = data[skip];
>> +		skip++;
>
> Can be one line.
>
Ok.

>> +		if (val == 0x3b && current_param < 4)
>> +			params[++current_param] = 0;
>> +		else if (val >= 0x30 && val < 0x3A) {
>> +			params[current_param] *= 10;
>> +			params[current_param] += val & 0x0F;
>> +		} else if (val >= 0x40 && val < 0x80) {
>
> what about val == 0x3c, 0x3e, 0x3f, 0x00 - 0x3f and 0x80 - 0xff?
>
They either require no special handling or are invalid characters as part
of an escape sequence.

>> +			case 0x53:
>> +				if (params[1] < 0)
>> +					state.format = params[0];
>> +				else
>> +					arib_stub("unhandled SWF [%d, %d, %d, %d]\n", params[0],
>> params[1], params[2], params[3]);
>> +				arib_flush_buf();
>> +				break;
>> +			case 0x61:
>> +				arib_flush_buf();
>> +				state.x = params[0];
>> +				state.y = params[1];
>> +				break;
>
> I'd place the arib_flush_buf consistently, either always on top or always
> at the bottom.
>
I think I can get away with keeping them at the top.

>> +static int arib_process_controlcode(const unsigned char *data, size_t
>> len)
>
> I think almost all functions miss doxygen documentation, but this on in
> particular
> needs an explanation for the return value.
>
Returns the number of characters in the buffer that were parsed.

>> +	// C0
>
> What is that supposed to mean?
>
Control set 0. Part of the spec.

>> +	case 0x1B: // ESC
>> +		// Graphic set designation
>> +		if (data[1] == 0x24) {
>> +			if (data[2] == 0x28) {
>
> You either need to check len or document padding or minimum size
> requirements in the function documentation.
>
Yeah I should add some length checking here in case the buffer is
unexpectedly truncated.

>> +				state.G[0] = data[4] | GS_DRCS;
>> +				state.G_WIDTH[0] = 2;
>> +				return 5;
>> +			} else if (data[2] >= 0x29 && data[2] <= 0x2b) {
>> +				state.G_WIDTH[data[2] - 0x28] = 2;
>> +				if (data[3] == 0x20) {
>> +					state.G[data[2] - 0x28] = data[4] | GS_DRCS;
>> +					return 5;
>
> Uh, what is the point of the data[2] == 0x28 case above, it seems to be
> the same code as this...
>
It's slightly different. data[3] is checked in the 2nd case whereas the
first case doesn't check it (and merely assumes data[3] is 0x20). I can
merge these cases since I'm not sure one is any better than another.

>> +				} else {
>> +					state.G[data[2] - 0x28] = data[3];
>> +					return 4;
>> +				}
>> +			} else {
>> +				state.G[0] = data[2];
>> +				state.G_WIDTH[0] = 2;
>> +				return 3;
>> +			}
>> +		} else if (data[1] >= 0x28 && data[1] <= 0x2b) {
>> +			state.G_WIDTH[data[1] - 0x28] = 1;
>> +			if (data[2] == 0x20) {
>> +				state.G[data[1] - 0x28] = data[3] | GS_DRCS;
>> +				return 4;
>> +			} else {
>> +				state.G[data[1] - 0x28] = data[2];
>> +				return 3;
>> +			}
>
> And this code is almost a duplicate of the above.
>
I'll see if I can reduce the duplication.

>> +		} else if (data[1] == 0x6e) {
>> +			state.GL = 2;
>> +			return 2;
>> +		} else if (data[1] == 0x6f) {
>> +			state.GL = 3;
>> +			return 2;
>
> state.GL = 0x70 - data[1]; for both
>
Ehhh... can reduce the number of lines some.. fine.

>> +		} else if (data[1] == 0x7e) {
>> +			state.GR = 1;
>> +			return 2;
>> +		} else if (data[1] == 0x7d) {
>> +			state.GR = 2;
>> +			return 2;
>> +		} else if (data[1] == 0x7c) {
>> +			state.GR = 3;
>> +			return 2;
>
> state.GR = 0x7f - data[1];
>
Sure.

>> +			size_t left = sizeof(state.inbuf) - (state.inptr - state.inbuf);
>
> a inbuf_end pointer would allow to simplify this to
> state.inbuf_end - state.inptr, with the additional advantage that it is
> easy to change to malloced buffers at a later time.
>
I was thinking of adding an actual buf_left counter somewhere. I think
other parts of the code can take advantage of it. I'll see if a pointer to
the end works better.

>> +			len = snprintf(state.inptr, left, "{\\c%06X}",
>> +				       arib_default_color_pal[state.fg_color] & 0x00FFFFFF);
>> +			if (len >= 0)
>> +				state.inptr += len;
>
> another misuse of snprintf return value.
> Also this piece of code seems to be duplicated quite often, I'd say it
> should be
> in a special append_inbuf function or so.
>
I'll see what I can do.

>> +static const unsigned char *arib_default_macros[] = {
>> +	"\x1B\x24\x42\x1B\x29\x4A\x1B\x2A\x30\x1B\x2B\x20\x70\x0F\x1B\x7D",
>> +	"\x1B\x24\x42\x1B\x29\x31\x1B\x2A\x30\x1B\x2B\x20\x70\x0F\x1B\x7D",
>> +	"\x1B\x24\x42\x1B\x29\x20\x41\x1B\x2A\x30\x1B\x2B\x20\x70\x0F\x1B\x7D",
>> +	"\x1B\x28\x32\x1B\x29\x34\x1B\x2A\x35\x1B\x2B\x20\x70\x0F\x1B\x7D",
>> +	"\x1B\x28\x32\x1B\x29\x33\x1B\x2A\x35\x1B\x2B\x20\x70\x0F\x1B\x7D",
>> +	"\x1B\x28\x32\x1B\x29\x20\x41\x1B\x2A\x35\x1B\x2B\x20\x70\x0F\x1B\x7D",
>> +	"\x1B\x28\x20\x41\x1B\x29\x20\x42\x1B\x2A\x20\x43\x1B\x2B\x20\x70\x0F\x1B\x7D",
>> +	"\x1B\x28\x20\x44\x1B\x29\x20\x45\x1B\x2A\x20\x46\x1B\x2B\x20\x70\x0F\x1B\x7D",
>> +	"\x1B\x28\x20\x47\x1B\x29\x20\x48\x1B\x2A\x20\x49\x1B\x2B\x20\x70\x0F\x1B\x7D",
>> +	"\x1B\x28\x20\x4A\x1B\x29\x20\x4B\x1B\x2A\x20\x4C\x1B\x2B\x20\x70\x0F\x1B\x7D",
>> +	"\x1B\x28\x20\x4D\x1B\x29\x20\x4E\x1B\x2A\x20\x4F\x1B\x2B\x20\x70\x0F\x1B\x7D",
>> +	"\x1B\x24\x42\x1B\x29\x20\x42\x1B\x2A\x30\x1B\x2B\x20\x70\x0F\x1B\x7D",
>> +	"\x1B\x24\x42\x1B\x29\x20\x43\x1B\x2A\x30\x1B\x2B\x20\x70\x0F\x1B\x7D",
>> +	"\x1B\x24\x42\x1B\x29\x20\x44\x1B\x2A\x30\x1B\x2B\x20\x70\x0F\x1B\x7D",
>> +	"\x1B\x28\x31\x1B\x29\x30\x1B\x2A\x4A\x1B\x2B\x20\x70\x0F\x1B\x7D",
>> +	"\x1B\x28\x4A\x1B\x29\x32\x1B\x2A\x20\x41\x1B\x2B\x20\x70\x0F\x1B\x7D",
>
> Please don't use string constants for binary data.
> Also changing the type to [][20] should save memory and a pointer
> dereference on access.
>
This isn't exactly binary data. They're special strings (macros) full of
escape codes which are run through arib_process_statement to change the
current state. They're specified using \x to better match how they're
specified in the spec.

>> +static void arib_encode_sjis(unsigned char row, unsigned char col)
>> +{
>> +	size_t charbytes, outbytes;
>> +	char charbuf[2];
>> +	char *charptr = charbuf;
>> +	charbuf[0] = ((row + 1) >> 1) + (row <= 94 ? 112 : 176);
>> +
>> +	if (row & 0x01)
>> +		charbuf[1] = col + 31 + (col >= 96);
>> +	else
>> +		charbuf[1] = col + 126;
>> +
>> +	charbytes = 2;
>> +	outbytes = sizeof(state.inbuf) - (state.inptr - state.inbuf);
>> +	iconv(conv, &charptr, &charbytes, (char **)&state.inptr, &outbytes);
>
> I'm tempted to suggest to use a lookup-table instead of iconv...
> At least for the cases where the parameters to this functions
> are constant I'd suggest to code the result directly.
> I'd even consider to write the strings directly as UTF-8, though
> I am not 100% sure how well the compilers and other people's
> editors handle that...
>
The kanji -> unicode conversion involves a 94x94 sized look up table.
Would be nice but would take a bit more work than just using iconv's
shift_jis converter.

>> +			arib_process_statement(arib_default_macros[row & 0xF],
>> +					strlen(arib_default_macros[row & 0xF]));
>
> Use an extra variable instead of writing the arib_default_macros... stuff
> twice.
>
Ok.

>> +static void arib_process_statement(const unsigned char *data, size_t
>> len)
>> +{
>> +	int skip;
>> +
>> +	while (len > 0) {
>> +		switch ((*data >> 5) & 0x3) {
>> +		case 0x3:
>> +		case 0x2:
>> +		case 0x1:
>> +			if ((*data & 0x7F) == 0x20 ||
>> +			    (*data & 0x7F) == 0x7F) {
>> +				if (!state.inptr)
>> +					arib_prepare_buf();
>> +				*state.inptr++ = *data;
>> +				skip = 1;
>> +			} else if (*data & 0x80) { // GR
>> +				skip = arib_process_char(state.GR, data[0] & 0x7F, data[1] & 0x7F);
>> +			} else { // GL
>> +				skip = arib_process_char(state.SS ? state.SS : state.GL, data[0],
>> data[1]);
>> +				state.SS = 0;
>> +			}
>> +			len -= skip;
>> +			data += skip;
>
> If you use a data_end = data + len; you don't need to update len.
> Also it allows to pass &data to e.g. arib_process_char and have that
> function
> update the "data" pointer instead of returning a skip value.
>
I'll see about doing this.

>> +struct caption_data_group {
>> +	unsigned char id_version;
>> +	unsigned char link_num;
>> +	unsigned char last_link_num;
>> +	unsigned char size[2];
>> +	unsigned char data[0];
>> +} __attribute__ ((__packed__));
>
> No further packed structs will ever again be allowed in MPlayer if I
> have anything to say.
> Also some compilers have issues with [0] and some with [] declarations in
> structs.
> Since you don't use many part of this struct avoiding it should not really
> make the code any less readable.
Yeah no big deal. I can get rid of it.

Thanks,
-Michael Wu