/* * Apple ProRes encoder * * Copyright (c) 2011 Anatoliy Wasserman * Copyright (c) 2012 Konstantin Shishkov * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "libavutil/mem.h" #include "libavutil/mem_internal.h" #include "libavutil/opt.h" #include "libavutil/pixdesc.h" #include "avcodec.h" #include "codec_internal.h" #include "encode.h" #include "fdctdsp.h" #include "put_bits.h" #include "profiles.h" #include "bytestream.h" #include "proresdata.h" #include "proresenc_kostya_common.h" #define TRELLIS_WIDTH 16 #define SCORE_LIMIT INT_MAX / 2 struct TrellisNode { int prev_node; int quant; int bits; int score; }; typedef struct ProresThreadData { DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE]; DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16]; int16_t custom_q[64]; int16_t custom_chroma_q[64]; struct TrellisNode *nodes; } ProresThreadData; static void get_slice_data(ProresContext *ctx, const uint16_t *src, ptrdiff_t linesize, int x, int y, int w, int h, int16_t *blocks, uint16_t *emu_buf, int mbs_per_slice, int blocks_per_mb, int is_chroma) { const uint16_t *esrc; const int mb_width = 4 * blocks_per_mb; ptrdiff_t elinesize; int i, j, k; for (i = 0; i < mbs_per_slice; i++, src += mb_width) { if (x >= w) { memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb * sizeof(*blocks)); return; } if (x + mb_width <= w && y + 16 <= h) { esrc = src; elinesize = linesize; } else { int bw, bh, pix; esrc = emu_buf; elinesize = 16 * sizeof(*emu_buf); bw = FFMIN(w - x, mb_width); bh = FFMIN(h - y, 16); for (j = 0; j < bh; j++) { memcpy(emu_buf + j * 16, (const uint8_t*)src + j * linesize, bw * sizeof(*src)); pix = emu_buf[j * 16 + bw - 1]; for (k = bw; k < mb_width; k++) emu_buf[j * 16 + k] = pix; } for (; j < 16; j++) memcpy(emu_buf + j * 16, emu_buf + (bh - 1) * 16, mb_width * sizeof(*emu_buf)); } if (!is_chroma) { ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks); blocks += 64; if (blocks_per_mb > 2) { ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks); blocks += 64; } ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks); blocks += 64; if (blocks_per_mb > 2) { ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks); blocks += 64; } } else { ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks); blocks += 64; ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks); blocks += 64; if (blocks_per_mb > 2) { ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks); blocks += 64; ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks); blocks += 64; } } x += mb_width; } } static void get_alpha_data(ProresContext *ctx, const uint16_t *src, ptrdiff_t linesize, int x, int y, int w, int h, uint16_t *blocks, int mbs_per_slice, int abits) { const int slice_width = 16 * mbs_per_slice; int i, j, copy_w, copy_h; copy_w = FFMIN(w - x, slice_width); copy_h = FFMIN(h - y, 16); for (i = 0; i < copy_h; i++) { memcpy(blocks, src, copy_w * sizeof(*src)); if (abits == 8) for (j = 0; j < copy_w; j++) blocks[j] >>= 2; else for (j = 0; j < copy_w; j++) blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4); for (j = copy_w; j < slice_width; j++) blocks[j] = blocks[copy_w - 1]; blocks += slice_width; src += linesize >> 1; } for (; i < 16; i++) { memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks)); blocks += slice_width; } } /** * Write an unsigned rice/exp golomb codeword. */ static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val) { unsigned int rice_order, exp_order, switch_bits, switch_val; int exponent; /* number of prefix bits to switch between Rice and expGolomb */ switch_bits = (codebook & 3) + 1; rice_order = codebook >> 5; /* rice code order */ exp_order = (codebook >> 2) & 7; /* exp golomb code order */ switch_val = switch_bits << rice_order; if (val >= switch_val) { val -= switch_val - (1 << exp_order); exponent = av_log2(val); put_bits(pb, exponent - exp_order + switch_bits, 0); put_bits(pb, exponent + 1, val); } else { exponent = val >> rice_order; if (exponent) put_bits(pb, exponent, 0); put_bits(pb, 1, 1); if (rice_order) put_sbits(pb, rice_order, val); } } #define GET_SIGN(x) ((x) >> 31) #define MAKE_CODE(x) (((x) * 2) ^ GET_SIGN(x)) static void encode_dcs(PutBitContext *pb, int16_t *blocks, int blocks_per_slice, int scale) { int i; int codebook = 5, code, dc, prev_dc, delta, sign, new_sign; prev_dc = (blocks[0] - 0x4000) / scale; encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc)); sign = 0; blocks += 64; for (i = 1; i < blocks_per_slice; i++, blocks += 64) { dc = (blocks[0] - 0x4000) / scale; delta = dc - prev_dc; new_sign = GET_SIGN(delta); delta = (delta ^ sign) - sign; code = MAKE_CODE(delta); encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code); codebook = FFMIN(code, 6); sign = new_sign; prev_dc = dc; } } static void encode_acs(PutBitContext *pb, int16_t *blocks, int blocks_per_slice, const uint8_t *scan, const int16_t *qmat) { int idx, i; int prev_run = 4; int prev_level = 2; int run = 0, level; int max_coeffs, abs_level; max_coeffs = blocks_per_slice << 6; for (i = 1; i < 64; i++) { for (idx = scan[i]; idx < max_coeffs; idx += 64) { level = blocks[idx] / qmat[scan[i]]; if (level) { abs_level = FFABS(level); encode_vlc_codeword(pb, ff_prores_run_to_cb[prev_run], run); encode_vlc_codeword(pb, ff_prores_level_to_cb[prev_level], abs_level - 1); put_sbits(pb, 1, GET_SIGN(level)); prev_run = FFMIN(run, 15); prev_level = FFMIN(abs_level, 9); run = 0; } else { run++; } } } } static void encode_slice_plane(ProresContext *ctx, PutBitContext *pb, const uint16_t *src, ptrdiff_t linesize, int mbs_per_slice, int16_t *blocks, int blocks_per_mb, const int16_t *qmat) { int blocks_per_slice = mbs_per_slice * blocks_per_mb; encode_dcs(pb, blocks, blocks_per_slice, qmat[0]); encode_acs(pb, blocks, blocks_per_slice, ctx->scantable, qmat); } static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits) { const int dbits = (abits == 8) ? 4 : 7; const int dsize = 1 << dbits - 1; int diff = cur - prev; diff = av_zero_extend(diff, abits); if (diff >= (1 << abits) - dsize) diff -= 1 << abits; if (diff < -dsize || diff > dsize || !diff) { put_bits(pb, 1, 1); put_bits(pb, abits, diff); } else { put_bits(pb, 1, 0); put_bits(pb, dbits - 1, FFABS(diff) - 1); put_bits(pb, 1, diff < 0); } } static void put_alpha_run(PutBitContext *pb, int run) { if (run) { put_bits(pb, 1, 0); if (run < 0x10) put_bits(pb, 4, run); else put_bits(pb, 15, run); } else { put_bits(pb, 1, 1); } } // todo alpha quantisation for high quants static void encode_alpha_plane(ProresContext *ctx, PutBitContext *pb, int mbs_per_slice, uint16_t *blocks, int quant) { const int abits = ctx->alpha_bits; const int mask = (1 << abits) - 1; const int num_coeffs = mbs_per_slice * 256; int prev = mask, cur; int idx = 0; int run = 0; cur = blocks[idx++]; put_alpha_diff(pb, cur, prev, abits); prev = cur; do { cur = blocks[idx++]; if (cur != prev) { put_alpha_run (pb, run); put_alpha_diff(pb, cur, prev, abits); prev = cur; run = 0; } else { run++; } } while (idx < num_coeffs); put_alpha_run(pb, run); } static int encode_slice(AVCodecContext *avctx, const AVFrame *pic, PutBitContext *pb, int sizes[4], int x, int y, int quant, int mbs_per_slice) { ProresContext *ctx = avctx->priv_data; int i, xp, yp; int total_size = 0; const uint16_t *src; int num_cblocks, pwidth, line_add; ptrdiff_t linesize; int is_chroma; uint16_t *qmat; uint16_t *qmat_chroma; if (ctx->pictures_per_frame == 1) line_add = 0; else line_add = ctx->cur_picture_idx ^ !(pic->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST); if (ctx->force_quant) { qmat = ctx->quants[0]; qmat_chroma = ctx->quants_chroma[0]; } else if (quant < MAX_STORED_Q) { qmat = ctx->quants[quant]; qmat_chroma = ctx->quants_chroma[quant]; } else { qmat = ctx->custom_q; qmat_chroma = ctx->custom_chroma_q; for (i = 0; i < 64; i++) { qmat[i] = ctx->quant_mat[i] * quant; qmat_chroma[i] = ctx->quant_chroma_mat[i] * quant; } } for (i = 0; i < ctx->num_planes; i++) { is_chroma = (i == 1 || i == 2); if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) { xp = x << 4; yp = y << 4; num_cblocks = 4; pwidth = avctx->width; } else { xp = x << 3; yp = y << 4; num_cblocks = 2; pwidth = avctx->width >> 1; } linesize = pic->linesize[i] * ctx->pictures_per_frame; src = (const uint16_t*)(pic->data[i] + yp * linesize + line_add * pic->linesize[i]) + xp; if (i < 3) { get_slice_data(ctx, src, linesize, xp, yp, pwidth, avctx->height / ctx->pictures_per_frame, ctx->blocks[0], ctx->emu_buf, mbs_per_slice, num_cblocks, is_chroma); if (!is_chroma) {/* luma quant */ encode_slice_plane(ctx, pb, src, linesize, mbs_per_slice, ctx->blocks[0], num_cblocks, qmat); } else { /* chroma plane */ encode_slice_plane(ctx, pb, src, linesize, mbs_per_slice, ctx->blocks[0], num_cblocks, qmat_chroma); } } else { get_alpha_data(ctx, src, linesize, xp, yp, pwidth, avctx->height / ctx->pictures_per_frame, ctx->blocks[0], mbs_per_slice, ctx->alpha_bits); encode_alpha_plane(ctx, pb, mbs_per_slice, ctx->blocks[0], quant); } flush_put_bits(pb); sizes[i] = put_bytes_output(pb) - total_size; total_size = put_bytes_output(pb); } return total_size; } static inline int estimate_vlc(unsigned codebook, int val) { unsigned int rice_order, exp_order, switch_bits, switch_val; int exponent; /* number of prefix bits to switch between Rice and expGolomb */ switch_bits = (codebook & 3) + 1; rice_order = codebook >> 5; /* rice code order */ exp_order = (codebook >> 2) & 7; /* exp golomb code order */ switch_val = switch_bits << rice_order; if (val >= switch_val) { val -= switch_val - (1 << exp_order); exponent = av_log2(val); return exponent * 2 - exp_order + switch_bits + 1; } else { return (val >> rice_order) + rice_order + 1; } } static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice, int scale) { int i; int codebook = 5, code, dc, prev_dc, delta, sign, new_sign; int bits; prev_dc = (blocks[0] - 0x4000) / scale; bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc)); sign = 0; blocks += 64; *error += FFABS(blocks[0] - 0x4000) % scale; for (i = 1; i < blocks_per_slice; i++, blocks += 64) { dc = (blocks[0] - 0x4000) / scale; *error += FFABS(blocks[0] - 0x4000) % scale; delta = dc - prev_dc; new_sign = GET_SIGN(delta); delta = (delta ^ sign) - sign; code = MAKE_CODE(delta); bits += estimate_vlc(ff_prores_dc_codebook[codebook], code); codebook = FFMIN(code, 6); sign = new_sign; prev_dc = dc; } return bits; } static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice, const uint8_t *scan, const int16_t *qmat) { int idx, i; int prev_run = 4; int prev_level = 2; int run, level; int max_coeffs, abs_level; int bits = 0; max_coeffs = blocks_per_slice << 6; run = 0; for (i = 1; i < 64; i++) { for (idx = scan[i]; idx < max_coeffs; idx += 64) { level = blocks[idx] / qmat[scan[i]]; *error += FFABS(blocks[idx]) % qmat[scan[i]]; if (level) { abs_level = FFABS(level); bits += estimate_vlc(ff_prores_run_to_cb[prev_run], run); bits += estimate_vlc(ff_prores_level_to_cb[prev_level], abs_level - 1) + 1; prev_run = FFMIN(run, 15); prev_level = FFMIN(abs_level, 9); run = 0; } else { run++; } } } return bits; } static int estimate_slice_plane(ProresContext *ctx, int *error, int plane, const uint16_t *src, ptrdiff_t linesize, int mbs_per_slice, int blocks_per_mb, const int16_t *qmat, ProresThreadData *td) { int blocks_per_slice; int bits; blocks_per_slice = mbs_per_slice * blocks_per_mb; bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]); bits += estimate_acs(error, td->blocks[plane], blocks_per_slice, ctx->scantable, qmat); return FFALIGN(bits, 8); } static int est_alpha_diff(int cur, int prev, int abits) { const int dbits = (abits == 8) ? 4 : 7; const int dsize = 1 << dbits - 1; int diff = cur - prev; diff = av_zero_extend(diff, abits); if (diff >= (1 << abits) - dsize) diff -= 1 << abits; if (diff < -dsize || diff > dsize || !diff) return abits + 1; else return dbits + 1; } static int estimate_alpha_plane(ProresContext *ctx, const uint16_t *src, ptrdiff_t linesize, int mbs_per_slice, int16_t *blocks) { const int abits = ctx->alpha_bits; const int mask = (1 << abits) - 1; const int num_coeffs = mbs_per_slice * 256; int prev = mask, cur; int idx = 0; int run = 0; int bits; cur = blocks[idx++]; bits = est_alpha_diff(cur, prev, abits); prev = cur; do { cur = blocks[idx++]; if (cur != prev) { if (!run) bits++; else if (run < 0x10) bits += 4; else bits += 15; bits += est_alpha_diff(cur, prev, abits); prev = cur; run = 0; } else { run++; } } while (idx < num_coeffs); if (run) { if (run < 0x10) bits += 4; else bits += 15; } return bits; } static int find_slice_quant(AVCodecContext *avctx, int trellis_node, int x, int y, int mbs_per_slice, ProresThreadData *td) { ProresContext *ctx = avctx->priv_data; int i, q, pq, xp, yp; const uint16_t *src; int num_cblocks[MAX_PLANES], pwidth; int is_chroma[MAX_PLANES]; const int min_quant = ctx->profile_info->min_quant; const int max_quant = ctx->profile_info->max_quant; int error, bits, bits_limit; int mbs, prev, cur, new_score; int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH]; int overquant; uint16_t *qmat; uint16_t *qmat_chroma; int linesize[4], line_add; int alpha_bits = 0; if (ctx->pictures_per_frame == 1) line_add = 0; else line_add = ctx->cur_picture_idx ^ !(ctx->pic->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST); mbs = x + mbs_per_slice; for (i = 0; i < ctx->num_planes; i++) { is_chroma[i] = (i == 1 || i == 2); if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) { xp = x << 4; yp = y << 4; num_cblocks[i] = 4; pwidth = avctx->width; } else { xp = x << 3; yp = y << 4; num_cblocks[i] = 2; pwidth = avctx->width >> 1; } linesize[i] = ctx->pic->linesize[i] * ctx->pictures_per_frame; src = (const uint16_t *)(ctx->pic->data[i] + yp * linesize[i] + line_add * ctx->pic->linesize[i]) + xp; if (i < 3) { get_slice_data(ctx, src, linesize[i], xp, yp, pwidth, avctx->height / ctx->pictures_per_frame, td->blocks[i], td->emu_buf, mbs_per_slice, num_cblocks[i], is_chroma[i]); } else { get_alpha_data(ctx, src, linesize[i], xp, yp, pwidth, avctx->height / ctx->pictures_per_frame, td->blocks[i], mbs_per_slice, ctx->alpha_bits); } } for (q = min_quant; q < max_quant + 2; q++) { td->nodes[trellis_node + q].prev_node = -1; td->nodes[trellis_node + q].quant = q; } if (ctx->alpha_bits) alpha_bits = estimate_alpha_plane(ctx, src, linesize[3], mbs_per_slice, td->blocks[3]); // todo: maybe perform coarser quantising to fit into frame size when needed for (q = min_quant; q <= max_quant; q++) { bits = alpha_bits; error = 0; bits += estimate_slice_plane(ctx, &error, 0, src, linesize[0], mbs_per_slice, num_cblocks[0], ctx->quants[q], td); /* estimate luma plane */ for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */ bits += estimate_slice_plane(ctx, &error, i, src, linesize[i], mbs_per_slice, num_cblocks[i], ctx->quants_chroma[q], td); } if (bits > 65000 * 8) error = SCORE_LIMIT; slice_bits[q] = bits; slice_score[q] = error; } if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) { slice_bits[max_quant + 1] = slice_bits[max_quant]; slice_score[max_quant + 1] = slice_score[max_quant] + 1; overquant = max_quant; } else { for (q = max_quant + 1; q < 128; q++) { bits = alpha_bits; error = 0; if (q < MAX_STORED_Q) { qmat = ctx->quants[q]; qmat_chroma = ctx->quants_chroma[q]; } else { qmat = td->custom_q; qmat_chroma = td->custom_chroma_q; for (i = 0; i < 64; i++) { qmat[i] = ctx->quant_mat[i] * q; qmat_chroma[i] = ctx->quant_chroma_mat[i] * q; } } bits += estimate_slice_plane(ctx, &error, 0, src, linesize[0], mbs_per_slice, num_cblocks[0], qmat, td);/* estimate luma plane */ for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */ bits += estimate_slice_plane(ctx, &error, i, src, linesize[i], mbs_per_slice, num_cblocks[i], qmat_chroma, td); } if (bits <= ctx->bits_per_mb * mbs_per_slice) break; } slice_bits[max_quant + 1] = bits; slice_score[max_quant + 1] = error; overquant = q; } td->nodes[trellis_node + max_quant + 1].quant = overquant; bits_limit = mbs * ctx->bits_per_mb; for (pq = min_quant; pq < max_quant + 2; pq++) { prev = trellis_node - TRELLIS_WIDTH + pq; for (q = min_quant; q < max_quant + 2; q++) { cur = trellis_node + q; bits = td->nodes[prev].bits + slice_bits[q]; error = slice_score[q]; if (bits > bits_limit) error = SCORE_LIMIT; if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT) new_score = td->nodes[prev].score + error; else new_score = SCORE_LIMIT; if (td->nodes[cur].prev_node == -1 || td->nodes[cur].score >= new_score) { td->nodes[cur].bits = bits; td->nodes[cur].score = new_score; td->nodes[cur].prev_node = prev; } } } error = td->nodes[trellis_node + min_quant].score; pq = trellis_node + min_quant; for (q = min_quant + 1; q < max_quant + 2; q++) { if (td->nodes[trellis_node + q].score <= error) { error = td->nodes[trellis_node + q].score; pq = trellis_node + q; } } return pq; } static int find_quant_thread(AVCodecContext *avctx, void *arg, int jobnr, int threadnr) { ProresContext *ctx = avctx->priv_data; ProresThreadData *td = ctx->tdata + threadnr; int mbs_per_slice = ctx->mbs_per_slice; int x, y = jobnr, mb, q = 0; for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) { while (ctx->mb_width - x < mbs_per_slice) mbs_per_slice >>= 1; q = find_slice_quant(avctx, (mb + 1) * TRELLIS_WIDTH, x, y, mbs_per_slice, td); } for (x = ctx->slices_width - 1; x >= 0; x--) { ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant; q = td->nodes[q].prev_node; } return 0; } static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFrame *pic, int *got_packet) { ProresContext *ctx = avctx->priv_data; uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp; uint8_t *picture_size_pos; PutBitContext pb; int x, y, i, mb, q = 0; int sizes[4] = { 0 }; int slice_hdr_size = 2 * ctx->num_planes; int frame_size, picture_size, slice_size; int pkt_size, ret; int max_slice_size = (ctx->frame_size_upper_bound - 200) / (ctx->pictures_per_frame * ctx->slices_per_picture + 1); uint8_t frame_flags; ctx->pic = pic; pkt_size = ctx->frame_size_upper_bound; if ((ret = ff_alloc_packet(avctx, pkt, pkt_size + FF_INPUT_BUFFER_MIN_SIZE)) < 0) return ret; orig_buf = pkt->data; // frame atom orig_buf += 4; // frame size bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID buf = orig_buf; // frame header tmp = buf; buf += 2; // frame header size will be stored here bytestream_put_be16 (&buf, ctx->chroma_factor != CFACTOR_Y422 || ctx->alpha_bits ? 1 : 0); bytestream_put_buffer(&buf, ctx->vendor, 4); bytestream_put_be16 (&buf, avctx->width); bytestream_put_be16 (&buf, avctx->height); frame_flags = ctx->chroma_factor << 6; if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) frame_flags |= (pic->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST) ? 0x04 : 0x08; bytestream_put_byte (&buf, frame_flags); bytestream_put_byte (&buf, 0); // reserved bytestream_put_byte (&buf, pic->color_primaries); bytestream_put_byte (&buf, pic->color_trc); bytestream_put_byte (&buf, pic->colorspace); bytestream_put_byte (&buf, ctx->alpha_bits >> 3); bytestream_put_byte (&buf, 0); // reserved if (ctx->quant_sel != QUANT_MAT_DEFAULT) { bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present bytestream_put_buffer(&buf, ctx->quant_mat, 64); // luma quantisation matrix bytestream_put_buffer(&buf, ctx->quant_chroma_mat, 64); // chroma quantisation matrix } else { bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used } bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size for (ctx->cur_picture_idx = 0; ctx->cur_picture_idx < ctx->pictures_per_frame; ctx->cur_picture_idx++) { // picture header picture_size_pos = buf + 1; bytestream_put_byte (&buf, 0x40); // picture header size (in bits) buf += 4; // picture data size will be stored here bytestream_put_be16 (&buf, ctx->slices_per_picture); bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs // seek table - will be filled during slice encoding slice_sizes = buf; buf += ctx->slices_per_picture * 2; // slices if (!ctx->force_quant) { ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL, ctx->mb_height); if (ret) return ret; } for (y = 0; y < ctx->mb_height; y++) { int mbs_per_slice = ctx->mbs_per_slice; for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) { q = ctx->force_quant ? ctx->force_quant : ctx->slice_q[mb + y * ctx->slices_width]; while (ctx->mb_width - x < mbs_per_slice) mbs_per_slice >>= 1; bytestream_put_byte(&buf, slice_hdr_size << 3); slice_hdr = buf; buf += slice_hdr_size - 1; if (pkt_size <= buf - orig_buf + 2 * max_slice_size) { uint8_t *start = pkt->data; // Recompute new size according to max_slice_size // and deduce delta int delta = 200 + (ctx->pictures_per_frame * ctx->slices_per_picture + 1) * max_slice_size - pkt_size; delta = FFMAX(delta, 2 * max_slice_size); ctx->frame_size_upper_bound += delta; if (!ctx->warn) { avpriv_request_sample(avctx, "Packet too small: is %i," " needs %i (slice: %i). " "Correct allocation", pkt_size, delta, max_slice_size); ctx->warn = 1; } ret = av_grow_packet(pkt, delta); if (ret < 0) return ret; pkt_size += delta; orig_buf = pkt->data + (orig_buf - start); buf = pkt->data + (buf - start); picture_size_pos = pkt->data + (picture_size_pos - start); slice_sizes = pkt->data + (slice_sizes - start); slice_hdr = pkt->data + (slice_hdr - start); tmp = pkt->data + (tmp - start); } init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf))); ret = encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice); if (ret < 0) return ret; bytestream_put_byte(&slice_hdr, q); slice_size = slice_hdr_size + sizes[ctx->num_planes - 1]; for (i = 0; i < ctx->num_planes - 1; i++) { bytestream_put_be16(&slice_hdr, sizes[i]); slice_size += sizes[i]; } bytestream_put_be16(&slice_sizes, slice_size); buf += slice_size - slice_hdr_size; if (max_slice_size < slice_size) max_slice_size = slice_size; } } picture_size = buf - (picture_size_pos - 1); bytestream_put_be32(&picture_size_pos, picture_size); } orig_buf -= 8; frame_size = buf - orig_buf; bytestream_put_be32(&orig_buf, frame_size); pkt->size = frame_size; *got_packet = 1; return 0; } static av_cold int encode_close(AVCodecContext *avctx) { ProresContext *ctx = avctx->priv_data; int i; if (ctx->tdata) { for (i = 0; i < avctx->thread_count; i++) av_freep(&ctx->tdata[i].nodes); } av_freep(&ctx->tdata); av_freep(&ctx->slice_q); return 0; } static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src, ptrdiff_t linesize, int16_t *block) { int x, y; const uint16_t *tsrc = src; for (y = 0; y < 8; y++) { for (x = 0; x < 8; x++) block[y * 8 + x] = tsrc[x]; tsrc += linesize >> 1; } fdsp->fdct(block); } static av_cold int encode_init(AVCodecContext *avctx) { ProresContext *ctx = avctx->priv_data; int err = 0, i, j, min_quant, max_quant; err = ff_prores_kostya_encode_init(avctx, ctx, avctx->pix_fmt); if (err < 0) return err; ctx->fdct = prores_fdct; ff_fdctdsp_init(&ctx->fdsp, avctx); if (!ctx->force_quant) { min_quant = ctx->profile_info->min_quant; max_quant = ctx->profile_info->max_quant; ctx->slice_q = av_malloc_array(ctx->slices_per_picture, sizeof(*ctx->slice_q)); if (!ctx->slice_q) return AVERROR(ENOMEM); ctx->tdata = av_calloc(avctx->thread_count, sizeof(*ctx->tdata)); if (!ctx->tdata) return AVERROR(ENOMEM); for (j = 0; j < avctx->thread_count; j++) { ctx->tdata[j].nodes = av_malloc_array(ctx->slices_width + 1, TRELLIS_WIDTH * sizeof(*ctx->tdata->nodes)); if (!ctx->tdata[j].nodes) return AVERROR(ENOMEM); for (i = min_quant; i < max_quant + 2; i++) { ctx->tdata[j].nodes[i].prev_node = -1; ctx->tdata[j].nodes[i].bits = 0; ctx->tdata[j].nodes[i].score = 0; } } } return 0; } #define OFFSET(x) offsetof(ProresContext, x) #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM static const AVOption options[] = { { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice), AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE }, { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT, { .i64 = PRORES_PROFILE_AUTO }, PRORES_PROFILE_AUTO, PRORES_PROFILE_4444XQ, VE, .unit = "profile" }, { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_AUTO }, 0, 0, VE, .unit = "profile" }, { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY }, 0, 0, VE, .unit = "profile" }, { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT }, 0, 0, VE, .unit = "profile" }, { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD }, 0, 0, VE, .unit = "profile" }, { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ }, 0, 0, VE, .unit = "profile" }, { "4444", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 }, 0, 0, VE, .unit = "profile" }, { "4444xq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444XQ }, 0, 0, VE, .unit = "profile" }, { "vendor", "vendor ID", OFFSET(vendor), AV_OPT_TYPE_STRING, { .str = "Lavc" }, 0, 0, VE }, { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE }, { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, .unit = "quant_mat" }, { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 }, 0, 0, VE, .unit = "quant_mat" }, { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY }, 0, 0, VE, .unit = "quant_mat" }, { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT }, 0, 0, VE, .unit = "quant_mat" }, { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD }, 0, 0, VE, .unit = "quant_mat" }, { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ }, 0, 0, VE, .unit = "quant_mat" }, { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT }, 0, 0, VE, .unit = "quant_mat" }, { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT, { .i64 = 16 }, 0, 16, VE }, { NULL } }; static const AVClass proresenc_class = { .class_name = "ProRes encoder", .item_name = av_default_item_name, .option = options, .version = LIBAVUTIL_VERSION_INT, }; const FFCodec ff_prores_ks_encoder = { .p.name = "prores_ks", CODEC_LONG_NAME("Apple ProRes (iCodec Pro)"), .p.type = AVMEDIA_TYPE_VIDEO, .p.id = AV_CODEC_ID_PRORES, .priv_data_size = sizeof(ProresContext), .init = encode_init, .close = encode_close, FF_CODEC_ENCODE_CB(encode_frame), .p.capabilities = AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE, CODEC_PIXFMTS(AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUVA444P10), .color_ranges = AVCOL_RANGE_MPEG, .p.priv_class = &proresenc_class, .p.profiles = NULL_IF_CONFIG_SMALL(ff_prores_profiles), .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, };