1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-12-23 12:43:46 +02:00

lavc/ccaption_dec: implement real_time option

This new mode is useful for realtime decoding of closed captions so they
can be display along with mpeg2 frames.

Closed caption streams contain two major types of captions:

- POPON captions, which are buffered off-screen and displayed
  only after EOC (end of caption, aka display buffer)

- PAINTON/ROLLUP captions, which are written to the display as soon as
  they arrive.

In a typical real-time eia608 decoder, commands like EOC (end of
caption; display buffer), EDM (erase display memory) and EBM (erase
buffered memory) perform their expected functions as soon as the
commands are processed. This is implemented in the real_time branches
added in this commit.

Before this commit, and in the !real_time branches after this commit,
the decoder cleverly implements its own version of the decoder which is
specifically geared towards buffered decoding. It does so by actively
ignoring commands like EBM (erase buffered memory), and then re-using
the non-display buffer to hold the previous caption while the new one is
received. This is the opposite of the real-time decoder, which uses the
non-display buffer to hold the new caption while the display buffer is
still showing the current caption.

In addition to ignoring EBM, the buffered decoder also has custom
implementations for EDM and EOC. An EDM (erase display memory) command
flushes the existing contents before clearing the screen, and EOC
similarly always flushes the active buffer (the previous subtitle)
before flipping buffers.
This commit is contained in:
Aman Gupta 2016-01-08 19:01:22 -08:00 committed by Clément Bœsch
parent d6f1abe9ce
commit 5c041e2ccc

View File

@ -116,6 +116,7 @@ struct Screen {
typedef struct CCaptionSubContext { typedef struct CCaptionSubContext {
AVClass *class; AVClass *class;
int real_time;
struct Screen screen[2]; struct Screen screen[2];
int active_screen; int active_screen;
uint8_t cursor_row; uint8_t cursor_row;
@ -130,6 +131,8 @@ typedef struct CCaptionSubContext {
/* visible screen time */ /* visible screen time */
int64_t startv_time; int64_t startv_time;
int64_t end_time; int64_t end_time;
int screen_touched;
int64_t last_real_time;
char prev_cmd[2]; char prev_cmd[2];
/* buffer to store pkt data */ /* buffer to store pkt data */
AVBufferRef *pktbuf; AVBufferRef *pktbuf;
@ -187,6 +190,8 @@ static void flush_decoder(AVCodecContext *avctx)
ctx->cursor_font = 0; ctx->cursor_font = 0;
ctx->cursor_color = 0; ctx->cursor_color = 0;
ctx->active_screen = 0; ctx->active_screen = 0;
ctx->last_real_time = 0;
ctx->screen_touched = 0;
ctx->buffer_changed = 0; ctx->buffer_changed = 0;
av_bprint_clear(&ctx->buffer); av_bprint_clear(&ctx->buffer);
} }
@ -426,15 +431,33 @@ static void handle_edm(CCaptionSubContext *ctx, int64_t pts)
{ {
struct Screen *screen = ctx->screen + ctx->active_screen; struct Screen *screen = ctx->screen + ctx->active_screen;
// In buffered mode, keep writing to screen until it is wiped.
// Before wiping the display, capture contents to emit subtitle.
if (!ctx->real_time)
reap_screen(ctx, pts); reap_screen(ctx, pts);
screen->row_used = 0; screen->row_used = 0;
// In realtime mode, emit an empty caption so the last one doesn't
// stay on the screen.
if (ctx->real_time)
reap_screen(ctx, pts);
} }
static void handle_eoc(CCaptionSubContext *ctx, int64_t pts) static void handle_eoc(CCaptionSubContext *ctx, int64_t pts)
{ {
// In buffered mode, we wait til the *next* EOC and
// reap what was already on the screen since the last EOC.
if (!ctx->real_time)
handle_edm(ctx,pts); handle_edm(ctx,pts);
ctx->active_screen = !ctx->active_screen; ctx->active_screen = !ctx->active_screen;
ctx->cursor_column = 0; ctx->cursor_column = 0;
// In realtime mode, we display the buffered contents (after
// flipping the buffer to active above) as soon as EOC arrives.
if (ctx->real_time)
reap_screen(ctx, pts);
} }
static void handle_delete_end_of_row(CCaptionSubContext *ctx, char hi, char lo) static void handle_delete_end_of_row(CCaptionSubContext *ctx, char hi, char lo)
@ -456,6 +479,9 @@ static void handle_char(CCaptionSubContext *ctx, char hi, char lo, int64_t pts)
} }
write_char(ctx, screen, 0); write_char(ctx, screen, 0);
if (ctx->mode != CCMODE_POPON)
ctx->screen_touched = 1;
/* reset prev command since character can repeat */ /* reset prev command since character can repeat */
ctx->prev_cmd[0] = 0; ctx->prev_cmd[0] = 0;
ctx->prev_cmd[1] = 0; ctx->prev_cmd[1] = 0;
@ -505,10 +531,20 @@ static void process_cc608(CCaptionSubContext *ctx, int64_t pts, uint8_t hi, uint
case 0x2d: case 0x2d:
/* carriage return */ /* carriage return */
ff_dlog(ctx, "carriage return\n"); ff_dlog(ctx, "carriage return\n");
if (!ctx->real_time)
reap_screen(ctx, pts); reap_screen(ctx, pts);
roll_up(ctx); roll_up(ctx);
ctx->cursor_column = 0; ctx->cursor_column = 0;
break; break;
case 0x2e:
/* erase buffered (non displayed) memory */
// Only in realtime mode. In buffered mode, we re-use the inactive screen
// for our own buffering.
if (ctx->real_time) {
struct Screen *screen = ctx->screen + !ctx->active_screen;
screen->row_used = 0;
}
break;
case 0x2f: case 0x2f:
/* end of caption */ /* end of caption */
ff_dlog(ctx, "handle_eoc\n"); ff_dlog(ctx, "handle_eoc\n");
@ -560,24 +596,54 @@ static int decode(AVCodecContext *avctx, void *data, int *got_sub, AVPacket *avp
continue; continue;
else else
process_cc608(ctx, avpkt->pts, *(bptr + i + 1) & 0x7f, *(bptr + i + 2) & 0x7f); process_cc608(ctx, avpkt->pts, *(bptr + i + 1) & 0x7f, *(bptr + i + 2) & 0x7f);
if (ctx->buffer_changed && *ctx->buffer.str)
if (!ctx->buffer_changed)
continue;
ctx->buffer_changed = 0;
if (*ctx->buffer.str || ctx->real_time)
{ {
int start_time = av_rescale_q(ctx->start_time, avctx->time_base, ass_tb); int64_t sub_pts = ctx->real_time ? avpkt->pts : ctx->start_time;
int start_time = av_rescale_q(sub_pts, avctx->time_base, ass_tb);
int duration = -1;
if (!ctx->real_time) {
int end_time = av_rescale_q(ctx->end_time, avctx->time_base, ass_tb); int end_time = av_rescale_q(ctx->end_time, avctx->time_base, ass_tb);
duration = end_time - start_time;
}
ff_dlog(ctx, "cdp writing data (%s)\n",ctx->buffer.str); ff_dlog(ctx, "cdp writing data (%s)\n",ctx->buffer.str);
ret = ff_ass_add_rect_bprint(sub, &ctx->buffer, start_time, end_time - start_time); ret = ff_ass_add_rect_bprint(sub, &ctx->buffer, start_time, duration);
if (ret < 0) if (ret < 0)
return ret; return ret;
sub->pts = av_rescale_q(ctx->start_time, avctx->time_base, AV_TIME_BASE_Q); sub->pts = av_rescale_q(sub_pts, avctx->time_base, AV_TIME_BASE_Q);
ctx->buffer_changed = 0; ctx->buffer_changed = 0;
ctx->last_real_time = avpkt->pts;
ctx->screen_touched = 0;
} }
} }
if (ctx->real_time && ctx->screen_touched &&
avpkt->pts > ctx->last_real_time + av_rescale_q(20, ass_tb, avctx->time_base)) {
ctx->last_real_time = avpkt->pts;
ctx->screen_touched = 0;
capture_screen(ctx);
ctx->buffer_changed = 0;
int start_time = av_rescale_q(avpkt->pts, avctx->time_base, ass_tb);
ret = ff_ass_add_rect_bprint(sub, &ctx->buffer, start_time, -1);
if (ret < 0)
return ret;
sub->pts = av_rescale_q(avpkt->pts, avctx->time_base, AV_TIME_BASE_Q);
}
*got_sub = sub->num_rects > 0; *got_sub = sub->num_rects > 0;
return ret; return ret;
} }
#define OFFSET(x) offsetof(CCaptionSubContext, x)
#define SD AV_OPT_FLAG_SUBTITLE_PARAM | AV_OPT_FLAG_DECODING_PARAM
static const AVOption options[] = { static const AVOption options[] = {
{ "real_time", "emit subtitle events as they are decoded for real-time display", OFFSET(real_time), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, SD },
{NULL} {NULL}
}; };