From 97b2322fda885e6c5c32c751ad02b7fb8c02af75 Mon Sep 17 00:00:00 2001 From: v0lt Date: Sat, 14 Dec 2024 11:31:15 +0300 Subject: [PATCH] =?UTF-8?q?=D0=9E=D0=B1=D0=BD=D0=BE=D0=B2=D0=BB=D0=B5?= =?UTF-8?q?=D0=BD=20ffmpeg=20n7.2-dev-897-g90af8e07b0.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/Changelog.Rus.txt | 3 + docs/Changelog.txt | 3 + .../ffmpeg/libavcodec/aac/aacdec_usac.c | 5 +- .../libavcodec/cbs_h266_syntax_template.c | 4 +- src/ExtLib/ffmpeg/libavcodec/hevc/dsp.c | 2 + src/ExtLib/ffmpeg/libavcodec/hevc/dsp.h | 1 + src/ExtLib/ffmpeg/libavcodec/jpeg2000dec.c | 6 +- src/ExtLib/ffmpeg/libavcodec/jpeg2000htdec.c | 6 +- src/ExtLib/ffmpeg/libavcodec/pngdec.c | 12 +- src/ExtLib/ffmpeg/libavcodec/vvc/cabac.c | 10 +- src/ExtLib/ffmpeg/libavcodec/vvc/ctu.c | 14 +- src/ExtLib/ffmpeg/libavcodec/vvc/dec.c | 2 - src/ExtLib/ffmpeg/libavcodec/vvc/dec.h | 4 +- src/ExtLib/ffmpeg/libavcodec/vvc/dsp.c | 9 + .../ffmpeg/libavcodec/vvc/intra_template.c | 5 +- src/ExtLib/ffmpeg/libavcodec/vvc/mvs.c | 4 +- src/ExtLib/ffmpeg/libavcodec/vvc/ps.c | 8 +- src/ExtLib/ffmpeg/libavutil/channel_layout.c | 1 + src/ExtLib/ffmpeg/libavutil/channel_layout.h | 4 +- src/ExtLib/ffmpeg/libavutil/cpu.c | 6 + src/ExtLib/ffmpeg/libavutil/cpu.h | 3 + src/ExtLib/ffmpeg/libavutil/cpu_internal.h | 2 + src/ExtLib/ffmpeg/libavutil/csp.c | 364 +++++++++++++++++- src/ExtLib/ffmpeg/libavutil/csp.h | 46 ++- src/ExtLib/ffmpeg/libavutil/version.h | 4 +- src/ExtLib/ffmpeg/libswscale/graph.c | 12 +- src/ExtLib/ffmpeg/libswscale/hscale.c | 6 +- src/ExtLib/ffmpeg/libswscale/swscale.c | 122 +++++- .../ffmpeg/libswscale/swscale_internal.h | 26 +- .../ffmpeg/libswscale/swscale_unscaled.c | 2 + src/ExtLib/ffmpeg/libswscale/utils.c | 2 +- .../ffmpeg/libswscale/x86/range_convert.asm | 159 +++++--- src/ExtLib/ffmpeg/libswscale/x86/swscale.c | 50 ++- 33 files changed, 756 insertions(+), 151 deletions(-) diff --git a/docs/Changelog.Rus.txt b/docs/Changelog.Rus.txt index a6c785bedb..ebd4daaa1b 100644 --- a/docs/Changelog.Rus.txt +++ b/docs/Changelog.Rus.txt @@ -21,6 +21,9 @@ YouTube Обновлен испанский перевод (автор IPeluchito). Обновлен румынский перевод (автор Andrei Miloiu). +Обновлены библиотеки: + ffmpeg n7.2-dev-897-g90af8e07b0. + 1.8.2 - 2024-12-07 ============================= diff --git a/docs/Changelog.txt b/docs/Changelog.txt index b017198ed9..93625ad622 100644 --- a/docs/Changelog.txt +++ b/docs/Changelog.txt @@ -21,6 +21,9 @@ Updated Italian translation (by mapi68). Updated Spanish translation (by IPeluchito). Updated Romanian translation (by Andrei Miloiu). +Updated libraries: + ffmpeg n7.2-dev-897-g90af8e07b0. + 1.8.2 - 2024-12-07 ============================= diff --git a/src/ExtLib/ffmpeg/libavcodec/aac/aacdec_usac.c b/src/ExtLib/ffmpeg/libavcodec/aac/aacdec_usac.c index 1b79d19a30..2938e69387 100644 --- a/src/ExtLib/ffmpeg/libavcodec/aac/aacdec_usac.c +++ b/src/ExtLib/ffmpeg/libavcodec/aac/aacdec_usac.c @@ -265,6 +265,7 @@ static int decode_usac_extension(AACDecContext *ac, AACUsacElemConfig *e, /* No configuration needed - fallthrough (len should be 0) */ default: skip_bits(gb, 8*ext_config_len); + e->ext.type = ID_EXT_ELE_FILL; break; }; @@ -917,8 +918,10 @@ static int decode_usac_stereo_info(AACDecContext *ac, AACUSACConfig *usac, } ret = setup_sce(ac, sce1, usac); - if (ret < 0) + if (ret < 0) { + ics2->max_sfb = 0; return ret; + } ret = setup_sce(ac, sce2, usac); if (ret < 0) diff --git a/src/ExtLib/ffmpeg/libavcodec/cbs_h266_syntax_template.c b/src/ExtLib/ffmpeg/libavcodec/cbs_h266_syntax_template.c index 43def9220f..9a6e6f5e01 100644 --- a/src/ExtLib/ffmpeg/libavcodec/cbs_h266_syntax_template.c +++ b/src/ExtLib/ffmpeg/libavcodec/cbs_h266_syntax_template.c @@ -1151,7 +1151,7 @@ static int FUNC(sps)(CodedBitstreamContext *ctx, RWContext *rw, else infer(sps_subpic_width_minus1[0], max_width_minus1); if (current->sps_pic_height_max_in_luma_samples > ctb_size_y) - us(hlen, sps_subpic_height_minus1[0], 0, max_width_minus1, 1, 0); + us(hlen, sps_subpic_height_minus1[0], 0, max_height_minus1, 1, 0); else infer(sps_subpic_height_minus1[0], max_height_minus1); if (!current->sps_independent_subpics_flag) { @@ -2812,7 +2812,7 @@ static int FUNC(picture_header) (CodedBitstreamContext *ctx, RWContext *rw, 0, 2 * (ctb_log2_size_y - min_cb_log2_size_y)); if (sps->sps_max_mtt_hierarchy_depth_intra_slice_chroma != 0) { unsigned int min_qt_log2_size_intra_c = - sps->sps_log2_diff_min_qt_min_cb_intra_slice_chroma + + current->ph_log2_diff_min_qt_min_cb_intra_slice_chroma + min_cb_log2_size_y; ue(ph_log2_diff_max_bt_min_qt_intra_slice_chroma, 0, FFMIN(6, ctb_log2_size_y) - min_qt_log2_size_intra_c); diff --git a/src/ExtLib/ffmpeg/libavcodec/hevc/dsp.c b/src/ExtLib/ffmpeg/libavcodec/hevc/dsp.c index 2f7e9cea9b..ce33ec950e 100644 --- a/src/ExtLib/ffmpeg/libavcodec/hevc/dsp.c +++ b/src/ExtLib/ffmpeg/libavcodec/hevc/dsp.c @@ -265,6 +265,8 @@ int i = 0; ff_hevc_dsp_init_arm(hevcdsp, bit_depth); #elif ARCH_PPC ff_hevc_dsp_init_ppc(hevcdsp, bit_depth); +#elif ARCH_WASM + ff_hevc_dsp_init_wasm(hevcdsp, bit_depth); #elif ARCH_X86 ff_hevc_dsp_init_x86(hevcdsp, bit_depth); #elif ARCH_MIPS diff --git a/src/ExtLib/ffmpeg/libavcodec/hevc/dsp.h b/src/ExtLib/ffmpeg/libavcodec/hevc/dsp.h index 02b8e0e8e2..4277d695ba 100644 --- a/src/ExtLib/ffmpeg/libavcodec/hevc/dsp.h +++ b/src/ExtLib/ffmpeg/libavcodec/hevc/dsp.h @@ -133,6 +133,7 @@ extern const int8_t ff_hevc_qpel_filters[4][16]; void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth); void ff_hevc_dsp_init_arm(HEVCDSPContext *c, const int bit_depth); void ff_hevc_dsp_init_ppc(HEVCDSPContext *c, const int bit_depth); +void ff_hevc_dsp_init_wasm(HEVCDSPContext *c, const int bit_depth); void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth); void ff_hevc_dsp_init_mips(HEVCDSPContext *c, const int bit_depth); void ff_hevc_dsp_init_loongarch(HEVCDSPContext *c, const int bit_depth); diff --git a/src/ExtLib/ffmpeg/libavcodec/jpeg2000dec.c b/src/ExtLib/ffmpeg/libavcodec/jpeg2000dec.c index c9d8b025b1..84eebfd1b2 100644 --- a/src/ExtLib/ffmpeg/libavcodec/jpeg2000dec.c +++ b/src/ExtLib/ffmpeg/libavcodec/jpeg2000dec.c @@ -1886,10 +1886,10 @@ static void decode_sigpass(Jpeg2000T1Context *t1, int width, int height, if (ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ff_jpeg2000_getsigctxno(t1->flags[(y+1) * t1->stride + x+1] & flags_mask, bandno))) { int xorbit, ctxno = ff_jpeg2000_getsgnctxno(t1->flags[(y+1) * t1->stride + x+1] & flags_mask, &xorbit); if (t1->mqc.raw) { - t1->data[(y) * t1->stride + x] |= ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ctxno) << 31; + t1->data[(y) * t1->stride + x] |= (uint32_t)(ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ctxno)) << 31; t1->data[(y) * t1->stride + x] |= mask; } else { - t1->data[(y) * t1->stride + x] |= (ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ctxno) ^ xorbit) << 31; + t1->data[(y) * t1->stride + x] |= (uint32_t)(ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ctxno) ^ xorbit) << 31; t1->data[(y) * t1->stride + x] |= mask; } ff_jpeg2000_set_significance(t1, x, y, @@ -1969,7 +1969,7 @@ static void decode_clnpass(const Jpeg2000DecoderContext *s, Jpeg2000T1Context *t int xorbit; int ctxno = ff_jpeg2000_getsgnctxno(t1->flags[(y + 1) * t1->stride + x + 1] & flags_mask, &xorbit); - t1->data[(y) * t1->stride + x] |= (ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ctxno) ^ xorbit) << 31; + t1->data[(y) * t1->stride + x] |= (uint32_t)(ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ctxno) ^ xorbit) << 31; t1->data[(y) * t1->stride + x] |= mask; ff_jpeg2000_set_significance(t1, x, y, t1->data[(y) * t1->stride + x] & INT32_MIN); } diff --git a/src/ExtLib/ffmpeg/libavcodec/jpeg2000htdec.c b/src/ExtLib/ffmpeg/libavcodec/jpeg2000htdec.c index 186a6873ac..08140e06a9 100644 --- a/src/ExtLib/ffmpeg/libavcodec/jpeg2000htdec.c +++ b/src/ExtLib/ffmpeg/libavcodec/jpeg2000htdec.c @@ -1070,7 +1070,7 @@ static void jpeg2000_process_stripes_block(StateVars *sig_prop, int i_s, int j_s uint8_t *state_p = block_states + (i + 1) * stride + (j + 1); if ((state_p[0] >> HT_SHIFT_REF) & 1) { bit = jpeg2000_peek_bit(sig_prop, magref_segment, magref_length); - *sp |= (int32_t)bit << 31; + *sp |= (uint32_t)bit << 31; } } } @@ -1160,7 +1160,7 @@ jpeg2000_decode_magref_segment( uint16_t width, uint16_t block_height, const int jpeg2000_modify_state(i, j, stride, 1 << HT_SHIFT_REF_IND, block_states); bit = jpeg2000_import_magref_bit(&mag_ref, magref_segment, magref_length); tmp = 0xFFFFFFFE | (uint32_t)bit; - tmp <<= pLSB; + tmp = (uint32_t)tmp << pLSB; sp[0] &= tmp; sp[0] |= 1 << (pLSB - 1); // Add 0.5 (reconstruction parameter = 1/2) } @@ -1176,7 +1176,7 @@ jpeg2000_decode_magref_segment( uint16_t width, uint16_t block_height, const int jpeg2000_modify_state(i, j, stride, 1 << HT_SHIFT_REF_IND, block_states); bit = jpeg2000_import_magref_bit(&mag_ref, magref_segment, magref_length); tmp = 0xFFFFFFFE | (uint32_t)bit; - tmp <<= pLSB; + tmp = (uint32_t)tmp << pLSB; sp[0] &= tmp; sp[0] |= 1 << (pLSB - 1); // Add 0.5 (reconstruction parameter = 1/2) } diff --git a/src/ExtLib/ffmpeg/libavcodec/pngdec.c b/src/ExtLib/ffmpeg/libavcodec/pngdec.c index c5b32c166d..f8cb61775e 100644 --- a/src/ExtLib/ffmpeg/libavcodec/pngdec.c +++ b/src/ExtLib/ffmpeg/libavcodec/pngdec.c @@ -757,7 +757,7 @@ static int populate_avctx_color_fields(AVCodecContext *avctx, AVFrame *frame) if (clli) { /* * 0.0001 divisor value - * see: https://www.w3.org/TR/png-3/#cLLi-chunk + * see: https://www.w3.org/TR/png-3/#cLLI-chunk */ clli->MaxCLL = s->clli_max / 10000; clli->MaxFALL = s->clli_avg / 10000; @@ -1566,18 +1566,20 @@ static int decode_frame_common(AVCodecContext *avctx, PNGDecContext *s, break; } - case MKTAG('c', 'L', 'L', 'i'): + case MKTAG('c', 'L', 'L', 'i'): /* legacy spelling, for backwards compat */ + case MKTAG('c', 'L', 'L', 'I'): if (bytestream2_get_bytes_left(&gb_chunk) != 8) { - av_log(avctx, AV_LOG_WARNING, "Invalid cLLi chunk size: %d\n", bytestream2_get_bytes_left(&gb_chunk)); + av_log(avctx, AV_LOG_WARNING, "Invalid cLLI chunk size: %d\n", bytestream2_get_bytes_left(&gb_chunk)); break; } s->have_clli = 1; s->clli_max = bytestream2_get_be32u(&gb_chunk); s->clli_avg = bytestream2_get_be32u(&gb_chunk); break; - case MKTAG('m', 'D', 'C', 'v'): + case MKTAG('m', 'D', 'C', 'v'): /* legacy spelling, for backward compat */ + case MKTAG('m', 'D', 'C', 'V'): if (bytestream2_get_bytes_left(&gb_chunk) != 24) { - av_log(avctx, AV_LOG_WARNING, "Invalid mDCv chunk size: %d\n", bytestream2_get_bytes_left(&gb_chunk)); + av_log(avctx, AV_LOG_WARNING, "Invalid mDCV chunk size: %d\n", bytestream2_get_bytes_left(&gb_chunk)); break; } s->have_mdcv = 1; diff --git a/src/ExtLib/ffmpeg/libavcodec/vvc/cabac.c b/src/ExtLib/ffmpeg/libavcodec/vvc/cabac.c index 0d45eec7511..5510144893 100644 --- a/src/ExtLib/ffmpeg/libavcodec/vvc/cabac.c +++ b/src/ExtLib/ffmpeg/libavcodec/vvc/cabac.c @@ -1257,11 +1257,19 @@ int ff_vvc_pred_mode_ibc_flag(VVCLocalContext *lc, const int is_chroma) return GET_CABAC(PRED_MODE_IBC_FLAG + inc); } +static av_always_inline +uint8_t get_mip_inc(VVCLocalContext *lc, const uint8_t *ctx) +{ + uint8_t left = 0, top = 0; + get_left_top(lc, &left, &top, lc->cu->x0, lc->cu->y0, ctx, ctx); + return (left & 1) + (top & 1); +} + int ff_vvc_intra_mip_flag(VVCLocalContext *lc, const uint8_t *intra_mip_flag) { const int w = lc->cu->cb_width; const int h = lc->cu->cb_height; - const int inc = (w > h * 2 || h > w * 2) ? 3 : get_inc(lc, intra_mip_flag); + const int inc = (w > h * 2 || h > w * 2) ? 3 : get_mip_inc(lc, intra_mip_flag); return GET_CABAC(INTRA_MIP_FLAG + inc); } diff --git a/src/ExtLib/ffmpeg/libavcodec/vvc/ctu.c b/src/ExtLib/ffmpeg/libavcodec/vvc/ctu.c index a32abdeb62..3624337fd7 100644 --- a/src/ExtLib/ffmpeg/libavcodec/vvc/ctu.c +++ b/src/ExtLib/ffmpeg/libavcodec/vvc/ctu.c @@ -946,6 +946,12 @@ static void derive_chroma_intra_pred_mode(VVCLocalContext *lc, } } +static av_always_inline uint8_t pack_mip_info(int intra_mip_flag, + int intra_mip_transposed_flag, int intra_mip_mode) +{ + return (intra_mip_mode << 2) | (intra_mip_transposed_flag << 1) | intra_mip_flag; +} + static void intra_luma_pred_modes(VVCLocalContext *lc) { VVCFrameContext *fc = lc->fc; @@ -974,9 +980,9 @@ static void intra_luma_pred_modes(VVCLocalContext *lc) int x = y_cb * pps->min_cb_width + x_cb; for (int y = 0; y < (cb_height>>log2_min_cb_size); y++) { int width = cb_width>>log2_min_cb_size; - memset(&fc->tab.imf[x], cu->intra_mip_flag, width); - fc->tab.imtf[x] = intra_mip_transposed_flag; - fc->tab.imm[x] = intra_mip_mode; + const uint8_t mip_info = pack_mip_info(cu->intra_mip_flag, + intra_mip_transposed_flag, intra_mip_mode); + memset(&fc->tab.imf[x], mip_info, width); x += pps->min_cb_width; } cu->intra_pred_mode_y = intra_mip_mode; @@ -1493,7 +1499,7 @@ static int hls_merge_data(VVCLocalContext *lc) static void hls_mvd_coding(VVCLocalContext *lc, Mv* mvd) { - int16_t mv[2]; + int32_t mv[2]; for (int i = 0; i < 2; i++) { mv[i] = ff_vvc_abs_mvd_greater0_flag(lc); diff --git a/src/ExtLib/ffmpeg/libavcodec/vvc/dec.c b/src/ExtLib/ffmpeg/libavcodec/vvc/dec.c index f558ee1a7e..6fa1c94275 100644 --- a/src/ExtLib/ffmpeg/libavcodec/vvc/dec.c +++ b/src/ExtLib/ffmpeg/libavcodec/vvc/dec.c @@ -128,7 +128,6 @@ static void min_cb_tl_init(TabList *l, VVCFrameContext *fc) tl_init(l, 1, changed); TL_ADD(imf, pic_size_in_min_cb); - TL_ADD(imm, pic_size_in_min_cb); for (int i = LUMA; i <= CHROMA; i++) TL_ADD(cb_width[i], pic_size_in_min_cb); //is_a0_available requires this @@ -143,7 +142,6 @@ static void min_cb_nz_tl_init(TabList *l, VVCFrameContext *fc) tl_init(l, 0, changed); TL_ADD(skip, pic_size_in_min_cb); - TL_ADD(imtf, pic_size_in_min_cb); TL_ADD(ipm, pic_size_in_min_cb); for (int i = LUMA; i <= CHROMA; i++) { diff --git a/src/ExtLib/ffmpeg/libavcodec/vvc/dec.h b/src/ExtLib/ffmpeg/libavcodec/vvc/dec.h index f7cd5b678c..0f8f1f721d 100644 --- a/src/ExtLib/ffmpeg/libavcodec/vvc/dec.h +++ b/src/ExtLib/ffmpeg/libavcodec/vvc/dec.h @@ -161,9 +161,7 @@ typedef struct VVCFrameContext { uint8_t *skip; ///< CuSkipFlag[][] uint8_t *ispmf; ///< intra_sub_partitions_mode_flag uint8_t *msm[2]; ///< MttSplitMode[][][] in 32 pixels - uint8_t *imf; ///< IntraMipFlag[][] - uint8_t *imtf; ///< intra_mip_transposed_flag[][] - uint8_t *imm; ///< intra_mip_mode[][] + uint8_t *imf; ///< IntraMipFlag[][], intra_mip_transposed_flag[][], intra_mip_mode[][] uint8_t *ipm; ///< IntraPredModeY[][] uint8_t *cpm[2]; ///< CuPredMode[][][] uint8_t *msf; ///< MergeSubblockFlag[][] diff --git a/src/ExtLib/ffmpeg/libavcodec/vvc/dsp.c b/src/ExtLib/ffmpeg/libavcodec/vvc/dsp.c index 9bfa46b03d..af392f2754 100644 --- a/src/ExtLib/ffmpeg/libavcodec/vvc/dsp.c +++ b/src/ExtLib/ffmpeg/libavcodec/vvc/dsp.c @@ -44,6 +44,15 @@ static int vvc_sad(const int16_t *src0, const int16_t *src1, int dx, int dy, return sad; } +static av_always_inline void unpack_mip_info(int *intra_mip_transposed_flag, + int *intra_mip_mode, const uint8_t mip_info) +{ + if (intra_mip_transposed_flag) + *intra_mip_transposed_flag = (mip_info >> 1) & 0x1; + if (intra_mip_mode) + *intra_mip_mode = (mip_info >> 2) & 0xf; +} + typedef struct IntraEdgeParams { uint8_t* top; uint8_t* left; diff --git a/src/ExtLib/ffmpeg/libavcodec/vvc/intra_template.c b/src/ExtLib/ffmpeg/libavcodec/vvc/intra_template.c index 62342c8142..440ac5b6cc 100644 --- a/src/ExtLib/ffmpeg/libavcodec/vvc/intra_template.c +++ b/src/ExtLib/ffmpeg/libavcodec/vvc/intra_template.c @@ -627,8 +627,9 @@ static void FUNC(intra_pred)(const VVCLocalContext *lc, int x0, int y0, FUNC(prepare_intra_edge_params)(lc, &edge, src, stride, x, y, w, h, c_idx, is_intra_mip, mode, ref_idx, need_pdpc); if (is_intra_mip) { - int intra_mip_transposed_flag = SAMPLE_CTB(fc->tab.imtf, x_cb, y_cb); - int intra_mip_mode = SAMPLE_CTB(fc->tab.imm, x_cb, y_cb); + int intra_mip_transposed_flag; + int intra_mip_mode; + unpack_mip_info(&intra_mip_transposed_flag, &intra_mip_mode, intra_mip_flag); fc->vvcdsp.intra.pred_mip((uint8_t *)src, edge.top, edge.left, w, h, stride, intra_mip_mode, intra_mip_transposed_flag); diff --git a/src/ExtLib/ffmpeg/libavcodec/vvc/mvs.c b/src/ExtLib/ffmpeg/libavcodec/vvc/mvs.c index 86ad310035..566df158a8 100644 --- a/src/ExtLib/ffmpeg/libavcodec/vvc/mvs.c +++ b/src/ExtLib/ffmpeg/libavcodec/vvc/mvs.c @@ -1627,12 +1627,12 @@ static int ibc_spatial_candidates(const VVCLocalContext *lc, const int merge_idx init_neighbour_context(&nctx, lc); - if (check_available(a1, lc, 1)) { + if (check_available(a1, lc, 0)) { cand_list[num_cands++] = TAB_MVF(a1->x, a1->y).mv[L0]; if (num_cands > merge_idx) return 1; } - if (check_available(b1, lc, 1)) { + if (check_available(b1, lc, 0)) { const MvField *mvf = &TAB_MVF(b1->x, b1->y); if (!num_cands || !IS_SAME_MV(&cand_list[0], mvf->mv)) { cand_list[num_cands++] = mvf->mv[L0]; diff --git a/src/ExtLib/ffmpeg/libavcodec/vvc/ps.c b/src/ExtLib/ffmpeg/libavcodec/vvc/ps.c index 14cedfd1b3..283a7440b7 100644 --- a/src/ExtLib/ffmpeg/libavcodec/vvc/ps.c +++ b/src/ExtLib/ffmpeg/libavcodec/vvc/ps.c @@ -742,7 +742,7 @@ static int lmcs_derive_lut(VVCLMCS *lmcs, const H266RawAPS *rlmcs, const H266Raw return AVERROR_INVALIDDATA; lmcs->min_bin_idx = rlmcs->lmcs_min_bin_idx; - lmcs->max_bin_idx = LMCS_MAX_BIN_SIZE - 1 - rlmcs->lmcs_min_bin_idx; + lmcs->max_bin_idx = LMCS_MAX_BIN_SIZE - 1 - rlmcs->lmcs_delta_max_bin_idx; memset(cw, 0, sizeof(cw)); for (int i = lmcs->min_bin_idx; i <= lmcs->max_bin_idx; i++) @@ -1113,17 +1113,17 @@ static void scaling_derive(VVCScalingList *sl, const H266RawAPS *aps) //dc if (id >= SL_START_16x16) { if (!aps->scaling_list_copy_mode_flag[id] && !aps->scaling_list_pred_mode_flag[id]) { - sl->scaling_matrix_dc_rec[id - SL_START_16x16] = 8; + dc += 8; } else if (!aps->scaling_list_pred_id_delta[id]) { - sl->scaling_matrix_dc_rec[id - SL_START_16x16] = 16; + dc += 16; } else { const int ref_id = id - aps->scaling_list_pred_id_delta[id]; if (ref_id >= SL_START_16x16) dc += sl->scaling_matrix_dc_rec[ref_id - SL_START_16x16]; else dc += sl->scaling_matrix_rec[ref_id][0]; - sl->scaling_matrix_dc_rec[id - SL_START_16x16] = dc & 255; } + sl->scaling_matrix_dc_rec[id - SL_START_16x16] = dc & 255; } //ac diff --git a/src/ExtLib/ffmpeg/libavutil/channel_layout.c b/src/ExtLib/ffmpeg/libavutil/channel_layout.c index 0f320627ae..f4eb8a593d 100644 --- a/src/ExtLib/ffmpeg/libavutil/channel_layout.c +++ b/src/ExtLib/ffmpeg/libavutil/channel_layout.c @@ -221,6 +221,7 @@ static const struct channel_layout_name channel_layout_map[] = { { "7.1.4", AV_CHANNEL_LAYOUT_7POINT1POINT4_BACK }, { "7.2.3", AV_CHANNEL_LAYOUT_7POINT2POINT3 }, { "9.1.4", AV_CHANNEL_LAYOUT_9POINT1POINT4_BACK }, + { "9.1.6", AV_CHANNEL_LAYOUT_9POINT1POINT6 }, { "hexadecagonal", AV_CHANNEL_LAYOUT_HEXADECAGONAL }, { "binaural", AV_CHANNEL_LAYOUT_BINAURAL }, { "downmix", AV_CHANNEL_LAYOUT_STEREO_DOWNMIX, }, diff --git a/src/ExtLib/ffmpeg/libavutil/channel_layout.h b/src/ExtLib/ffmpeg/libavutil/channel_layout.h index 2012a72a53..1462badc93 100644 --- a/src/ExtLib/ffmpeg/libavutil/channel_layout.h +++ b/src/ExtLib/ffmpeg/libavutil/channel_layout.h @@ -248,10 +248,11 @@ enum AVChannelOrder { #define AV_CH_LAYOUT_7POINT1POINT4_BACK (AV_CH_LAYOUT_7POINT1POINT2|AV_CH_TOP_BACK_LEFT|AV_CH_TOP_BACK_RIGHT) #define AV_CH_LAYOUT_7POINT2POINT3 (AV_CH_LAYOUT_7POINT1POINT2|AV_CH_TOP_BACK_CENTER|AV_CH_LOW_FREQUENCY_2) #define AV_CH_LAYOUT_9POINT1POINT4_BACK (AV_CH_LAYOUT_7POINT1POINT4_BACK|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER) +#define AV_CH_LAYOUT_9POINT1POINT6 (AV_CH_LAYOUT_9POINT1POINT4_BACK|AV_CH_TOP_SIDE_LEFT|AV_CH_TOP_SIDE_RIGHT) #define AV_CH_LAYOUT_HEXADECAGONAL (AV_CH_LAYOUT_OCTAGONAL|AV_CH_WIDE_LEFT|AV_CH_WIDE_RIGHT|AV_CH_TOP_BACK_LEFT|AV_CH_TOP_BACK_RIGHT|AV_CH_TOP_BACK_CENTER|AV_CH_TOP_FRONT_CENTER|AV_CH_TOP_FRONT_LEFT|AV_CH_TOP_FRONT_RIGHT) #define AV_CH_LAYOUT_BINAURAL (AV_CH_BINAURAL_LEFT|AV_CH_BINAURAL_RIGHT) #define AV_CH_LAYOUT_STEREO_DOWNMIX (AV_CH_STEREO_LEFT|AV_CH_STEREO_RIGHT) -#define AV_CH_LAYOUT_22POINT2 (AV_CH_LAYOUT_7POINT1POINT4_BACK|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER|AV_CH_BACK_CENTER|AV_CH_LOW_FREQUENCY_2|AV_CH_TOP_FRONT_CENTER|AV_CH_TOP_CENTER|AV_CH_TOP_SIDE_LEFT|AV_CH_TOP_SIDE_RIGHT|AV_CH_TOP_BACK_CENTER|AV_CH_BOTTOM_FRONT_CENTER|AV_CH_BOTTOM_FRONT_LEFT|AV_CH_BOTTOM_FRONT_RIGHT) +#define AV_CH_LAYOUT_22POINT2 (AV_CH_LAYOUT_9POINT1POINT6|AV_CH_BACK_CENTER|AV_CH_LOW_FREQUENCY_2|AV_CH_TOP_FRONT_CENTER|AV_CH_TOP_CENTER|AV_CH_TOP_BACK_CENTER|AV_CH_BOTTOM_FRONT_CENTER|AV_CH_BOTTOM_FRONT_LEFT|AV_CH_BOTTOM_FRONT_RIGHT) #define AV_CH_LAYOUT_7POINT1_TOP_BACK AV_CH_LAYOUT_5POINT1POINT2_BACK @@ -423,6 +424,7 @@ typedef struct AVChannelLayout { #define AV_CHANNEL_LAYOUT_7POINT1POINT4_BACK AV_CHANNEL_LAYOUT_MASK(12, AV_CH_LAYOUT_7POINT1POINT4_BACK) #define AV_CHANNEL_LAYOUT_7POINT2POINT3 AV_CHANNEL_LAYOUT_MASK(12, AV_CH_LAYOUT_7POINT2POINT3) #define AV_CHANNEL_LAYOUT_9POINT1POINT4_BACK AV_CHANNEL_LAYOUT_MASK(14, AV_CH_LAYOUT_9POINT1POINT4_BACK) +#define AV_CHANNEL_LAYOUT_9POINT1POINT6 AV_CHANNEL_LAYOUT_MASK(16, AV_CH_LAYOUT_9POINT1POINT6) #define AV_CHANNEL_LAYOUT_HEXADECAGONAL AV_CHANNEL_LAYOUT_MASK(16, AV_CH_LAYOUT_HEXADECAGONAL) #define AV_CHANNEL_LAYOUT_BINAURAL AV_CHANNEL_LAYOUT_MASK(2, AV_CH_LAYOUT_BINAURAL) #define AV_CHANNEL_LAYOUT_STEREO_DOWNMIX AV_CHANNEL_LAYOUT_MASK(2, AV_CH_LAYOUT_STEREO_DOWNMIX) diff --git a/src/ExtLib/ffmpeg/libavutil/cpu.c b/src/ExtLib/ffmpeg/libavutil/cpu.c index b118130370..11e136610a 100644 --- a/src/ExtLib/ffmpeg/libavutil/cpu.c +++ b/src/ExtLib/ffmpeg/libavutil/cpu.c @@ -68,6 +68,8 @@ static int get_cpu_flags(void) return ff_get_cpu_flags_ppc(); #elif ARCH_RISCV return ff_get_cpu_flags_riscv(); +#elif ARCH_WASM + return ff_get_cpu_flags_wasm(); #elif ARCH_X86 return ff_get_cpu_flags_x86(); #elif ARCH_LOONGARCH @@ -200,6 +202,8 @@ int av_parse_cpu_caps(unsigned *flags, const char *s) { "zbb", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVB_BASIC }, .unit = "flags" }, { "zvbb", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RV_ZVBB }, .unit = "flags" }, { "misaligned", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RV_MISALIGNED }, .unit = "flags" }, +#elif ARCH_WASM + { "simd128", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SIMD128 }, .unit = "flags" }, #endif { NULL }, }; @@ -283,6 +287,8 @@ size_t av_cpu_max_align(void) return ff_get_cpu_max_align_arm(); #elif ARCH_PPC return ff_get_cpu_max_align_ppc(); +#elif ARCH_WASM + return ff_get_cpu_max_align_wasm(); #elif ARCH_X86 return ff_get_cpu_max_align_x86(); #elif ARCH_LOONGARCH diff --git a/src/ExtLib/ffmpeg/libavutil/cpu.h b/src/ExtLib/ffmpeg/libavutil/cpu.h index 91d3c9e0a4..335d9bfa39 100644 --- a/src/ExtLib/ffmpeg/libavutil/cpu.h +++ b/src/ExtLib/ffmpeg/libavutil/cpu.h @@ -101,6 +101,9 @@ #define AV_CPU_FLAG_RV_MISALIGNED (1 <<10) ///< Fast misaligned accesses #define AV_CPU_FLAG_RVB (1 <<11) ///< B (bit manipulations) +// WASM extensions +#define AV_CPU_FLAG_SIMD128 (1 << 0) + /** * Return the flags which specify extensions supported by the CPU. * The returned value is affected by av_force_cpu_flags() if that was used diff --git a/src/ExtLib/ffmpeg/libavutil/cpu_internal.h b/src/ExtLib/ffmpeg/libavutil/cpu_internal.h index 5f8abcac3c..7dea9ad789 100644 --- a/src/ExtLib/ffmpeg/libavutil/cpu_internal.h +++ b/src/ExtLib/ffmpeg/libavutil/cpu_internal.h @@ -49,6 +49,7 @@ int ff_get_cpu_flags_aarch64(void); int ff_get_cpu_flags_arm(void); int ff_get_cpu_flags_ppc(void); int ff_get_cpu_flags_riscv(void); +int ff_get_cpu_flags_wasm(void); int ff_get_cpu_flags_x86(void); int ff_get_cpu_flags_loongarch(void); @@ -56,6 +57,7 @@ size_t ff_get_cpu_max_align_mips(void); size_t ff_get_cpu_max_align_aarch64(void); size_t ff_get_cpu_max_align_arm(void); size_t ff_get_cpu_max_align_ppc(void); +size_t ff_get_cpu_max_align_wasm(void); size_t ff_get_cpu_max_align_x86(void); size_t ff_get_cpu_max_align_loongarch(void); diff --git a/src/ExtLib/ffmpeg/libavutil/csp.c b/src/ExtLib/ffmpeg/libavutil/csp.c index 7ef822c60b..d69599f513 100644 --- a/src/ExtLib/ffmpeg/libavutil/csp.c +++ b/src/ExtLib/ffmpeg/libavutil/csp.c @@ -170,16 +170,36 @@ static double trc_bt709(double Lc) : a * pow(Lc, 0.45) - (a - 1.0); } +static double trc_bt709_inv(double E) +{ + const double a = BT709_alpha; + const double b = 4.500 * BT709_beta; + + return (0.0 > E) ? 0.0 + : ( b > E) ? E / 4.500 + : pow((E + (a - 1.0)) / a, 1.0 / 0.45); +} + static double trc_gamma22(double Lc) { return (0.0 > Lc) ? 0.0 : pow(Lc, 1.0/ 2.2); } +static double trc_gamma22_inv(double E) +{ + return (0.0 > E) ? 0.0 : pow(E, 2.2); +} + static double trc_gamma28(double Lc) { return (0.0 > Lc) ? 0.0 : pow(Lc, 1.0/ 2.8); } +static double trc_gamma28_inv(double E) +{ + return (0.0 > E) ? 0.0 : pow(E, 2.8); +} + static double trc_smpte240M(double Lc) { const double a = 1.1115; @@ -190,6 +210,16 @@ static double trc_smpte240M(double Lc) : a * pow(Lc, 0.45) - (a - 1.0); } +static double trc_smpte240M_inv(double E) +{ + const double a = 1.1115; + const double b = 4.000 * 0.0228; + + return (0.0 > E) ? 0.0 + : ( b > E) ? E / 4.000 + : pow((E + (a - 1.0)) / a, 1.0 / 0.45); +} + static double trc_linear(double Lc) { return Lc; @@ -200,12 +230,22 @@ static double trc_log(double Lc) return (0.01 > Lc) ? 0.0 : 1.0 + log10(Lc) / 2.0; } +static double trc_log_inv(double E) +{ + return (0.0 > E) ? 0.01 : pow(10.0, 2.0 * (E - 1.0)); +} + static double trc_log_sqrt(double Lc) { // sqrt(10) / 1000 return (0.00316227766 > Lc) ? 0.0 : 1.0 + log10(Lc) / 2.5; } +static double trc_log_sqrt_inv(double E) +{ + return (0.0 > E) ? 0.00316227766 : pow(10.0, 2.5 * (E - 1.0)); +} + static double trc_iec61966_2_4(double Lc) { const double a = BT709_alpha; @@ -216,6 +256,16 @@ static double trc_iec61966_2_4(double Lc) : a * pow( Lc, 0.45) - (a - 1.0); } +static double trc_iec61966_2_4_inv(double E) +{ + const double a = BT709_alpha; + const double b = 4.500 * BT709_beta; + + return (-b >= E) ? -pow((-E + (a - 1.0)) / a, 1.0 / 0.45) + : ( b > E) ? E / 4.500 + : pow(( E + (a - 1.0)) / a, 1.0 / 0.45); +} + static double trc_bt1361(double Lc) { const double a = BT709_alpha; @@ -226,6 +276,16 @@ static double trc_bt1361(double Lc) : a * pow( Lc, 0.45) - (a - 1.0); } +static double trc_bt1361_inv(double E) +{ + const double a = BT709_alpha; + const double b = 4.500 * BT709_beta; + + return (-0.02025 >= E) ? -pow((-4.0 * E - (a - 1.0)) / a, 1.0 / 0.45) / 4.0 + : ( b > E) ? E / 4.500 + : pow(( E + (a - 1.0)) / a, 1.0 / 0.45); +} + static double trc_iec61966_2_1(double Lc) { const double a = 1.055; @@ -236,13 +296,30 @@ static double trc_iec61966_2_1(double Lc) : a * pow(Lc, 1.0 / 2.4) - (a - 1.0); } +static double trc_iec61966_2_1_inv(double E) +{ + const double a = 1.055; + const double b = 12.92 * 0.0031308; + + return (0.0 > E) ? 0.0 + : ( b > E) ? E / 12.92 + : pow((E + (a - 1.0)) / a, 2.4); + return E; +} + +#define PQ_c1 ( 3424.0 / 4096.0) /* c3-c2 + 1 */ +#define PQ_c2 ( 32.0 * 2413.0 / 4096.0) +#define PQ_c3 ( 32.0 * 2392.0 / 4096.0) +#define PQ_m (128.0 * 2523.0 / 4096.0) +#define PQ_n ( 0.25 * 2610.0 / 4096.0) + static double trc_smpte_st2084(double Lc) { - const double c1 = 3424.0 / 4096.0; // c3-c2 + 1 - const double c2 = 32.0 * 2413.0 / 4096.0; - const double c3 = 32.0 * 2392.0 / 4096.0; - const double m = 128.0 * 2523.0 / 4096.0; - const double n = 0.25 * 2610.0 / 4096.0; + const double c1 = PQ_c1; + const double c2 = PQ_c2; + const double c3 = PQ_c3; + const double m = PQ_m; + const double n = PQ_n; const double L = Lc / 10000.0; const double Ln = pow(L, n); @@ -251,24 +328,56 @@ static double trc_smpte_st2084(double Lc) } +static double trc_smpte_st2084_inv(double E) +{ + const double c1 = PQ_c1; + const double c2 = PQ_c2; + const double c3 = PQ_c3; + const double m = PQ_m; + const double n = PQ_n; + const double Em = pow(E, 1.0 / m); + + return (c1 > Em) ? 0.0 + : 10000.0 * pow((Em - c1) / (c2 - c3 * Em), 1.0 / n); +} + +#define DCI_L 48.00 +#define DCI_P 52.37 + static double trc_smpte_st428_1(double Lc) { - return (0.0 > Lc) ? 0.0 - : pow(48.0 * Lc / 52.37, 1.0 / 2.6); + return (0.0 > Lc) ? 0.0 : pow(DCI_L / DCI_P * Lc, 1.0 / 2.6); +} + +static double trc_smpte_st428_1_inv(double E) +{ + return (0.0 > E) ? 0.0 : DCI_P / DCI_L * pow(E, 2.6); } +#define HLG_a 0.17883277 +#define HLG_b 0.28466892 +#define HLG_c 0.55991073 static double trc_arib_std_b67(double Lc) { // The function uses the definition from HEVC, which assumes that the peak // white is input level = 1. (this is equivalent to scaling E = Lc * 12 and // using the definition from the ARIB STD-B67 spec) - const double a = 0.17883277; - const double b = 0.28466892; - const double c = 0.55991073; + const double a = HLG_a; + const double b = HLG_b; + const double c = HLG_c; return (0.0 > Lc) ? 0.0 : (Lc <= 1.0 / 12.0 ? sqrt(3.0 * Lc) : a * log(12.0 * Lc - b) + c); } +static double trc_arib_std_b67_inv(double E) +{ + const double a = HLG_a; + const double b = HLG_b; + const double c = HLG_c; + return (0.0 > E) ? 0.0 : + (E <= 0.5 ? E * E / 3.0 : (exp((E - c) / a) + b) / 12.0); +} + static const av_csp_trc_function trc_funcs[AVCOL_TRC_NB] = { [AVCOL_TRC_BT709] = trc_bt709, [AVCOL_TRC_GAMMA22] = trc_gamma22, @@ -290,11 +399,238 @@ static const av_csp_trc_function trc_funcs[AVCOL_TRC_NB] = { av_csp_trc_function av_csp_trc_func_from_id(enum AVColorTransferCharacteristic trc) { - av_csp_trc_function func; if (trc >= AVCOL_TRC_NB) return NULL; - func = trc_funcs[trc]; - if (!func) + return trc_funcs[trc]; +} + +static const av_csp_trc_function trc_inv_funcs[AVCOL_TRC_NB] = { + [AVCOL_TRC_BT709] = trc_bt709_inv, + [AVCOL_TRC_GAMMA22] = trc_gamma22_inv, + [AVCOL_TRC_GAMMA28] = trc_gamma28_inv, + [AVCOL_TRC_SMPTE170M] = trc_bt709_inv, + [AVCOL_TRC_SMPTE240M] = trc_smpte240M_inv, + [AVCOL_TRC_LINEAR] = trc_linear, + [AVCOL_TRC_LOG] = trc_log_inv, + [AVCOL_TRC_LOG_SQRT] = trc_log_sqrt_inv, + [AVCOL_TRC_IEC61966_2_4] = trc_iec61966_2_4_inv, + [AVCOL_TRC_BT1361_ECG] = trc_bt1361_inv, + [AVCOL_TRC_IEC61966_2_1] = trc_iec61966_2_1_inv, + [AVCOL_TRC_BT2020_10] = trc_bt709_inv, + [AVCOL_TRC_BT2020_12] = trc_bt709_inv, + [AVCOL_TRC_SMPTE2084] = trc_smpte_st2084_inv, + [AVCOL_TRC_SMPTE428] = trc_smpte_st428_1_inv, + [AVCOL_TRC_ARIB_STD_B67] = trc_arib_std_b67_inv, +}; + +av_csp_trc_function av_csp_trc_func_inv_from_id(enum AVColorTransferCharacteristic trc) +{ + if (trc >= AVCOL_TRC_NB) + return NULL; + return trc_inv_funcs[trc]; +} + +static void eotf_linear(const double Lw, const double Lb, double E[3]) +{ + for (int i = 0; i < 3; i++) + E[i] = (Lw - Lb) * E[i] + Lb; +} + +static void eotf_linear_inv(const double Lw, const double Lb, double L[3]) +{ + for (int i = 0; i < 3; i++) + L[i] = (L[i] - Lb) / (Lw - Lb); +} + +#define WRAP_SDR_OETF(name) \ +static void oetf_##name(double L[3]) \ +{ \ + for (int i = 0; i < 3; i++) \ + L[i] = trc_##name(L[i]); \ +} \ + \ +static void oetf_##name##_inv(double E[3]) \ +{ \ + for (int i = 0; i < 3; i++) \ + E[i] = trc_##name##_inv(E[i]); \ +} + +WRAP_SDR_OETF(gamma22) +WRAP_SDR_OETF(gamma28) +WRAP_SDR_OETF(iec61966_2_1) + +#define WRAP_SDR_EOTF(name) \ +static void eotf_##name(double Lw, double Lb, double E[3]) \ +{ \ + oetf_##name##_inv(E); \ + eotf_linear(Lw, Lb, E); \ +} \ + \ +static void eotf_##name##_inv(double Lw, double Lb, double L[3]) \ +{ \ + eotf_linear_inv(Lw, Lb, L); \ + oetf_##name(L); \ +} + +WRAP_SDR_EOTF(gamma22) +WRAP_SDR_EOTF(gamma28) +WRAP_SDR_EOTF(iec61966_2_1) + +static void eotf_bt1886(const double Lw, const double Lb, double E[3]) +{ + const double Lw_inv = pow(Lw, 1.0 / 2.4); + const double Lb_inv = pow(Lb, 1.0 / 2.4); + const double a = pow(Lw_inv - Lb_inv, 2.4); + const double b = Lb_inv / (Lw_inv - Lb_inv); + + for (int i = 0; i < 3; i++) + E[i] = (-b > E[i]) ? 0.0 : a * pow(E[i] + b, 2.4); +} + +static void eotf_bt1886_inv(const double Lw, const double Lb, double L[3]) +{ + const double Lw_inv = pow(Lw, 1.0 / 2.4); + const double Lb_inv = pow(Lb, 1.0 / 2.4); + const double a = pow(Lw_inv - Lb_inv, 2.4); + const double b = Lb_inv / (Lw_inv - Lb_inv); + + for (int i = 0; i < 3; i++) + L[i] = (0.0 > L[i]) ? 0.0 : pow(L[i] / a, 1.0 / 2.4) - b; +} + +static void eotf_smpte_st2084(const double Lw, const double Lb, double E[3]) +{ + for (int i = 0; i < 3; i++) + E[i] = trc_smpte_st2084_inv(E[i]); +} + +static void eotf_smpte_st2084_inv(const double Lw, const double Lb, double L[3]) +{ + for (int i = 0; i < 3; i++) + L[i] = trc_smpte_st2084(L[i]); +} + +/* This implementation assumes an SMPTE RP 431-2 reference projector (DCI) */ +#define DCI_L 48.00 +#define DCI_P 52.37 +#define DCI_X (42.94 / DCI_L) +#define DCI_Z (45.82 / DCI_L) + +static void eotf_smpte_st428_1(const double Lw_Y, const double Lb_Y, double E[3]) +{ + const double Lw[3] = { DCI_X * Lw_Y, Lw_Y, DCI_Z * Lw_Y }; + const double Lb[3] = { DCI_X * Lb_Y, Lb_Y, DCI_Z * Lb_Y }; + + for (int i = 0; i < 3; i++) { + E[i] = (0.0 > E[i]) ? 0.0 : pow(E[i], 2.6) * DCI_P / DCI_L; + E[i] = E[i] * (Lw[i] - Lb[i]) + Lb[i]; + } +} + +static void eotf_smpte_st428_1_inv(const double Lw_Y, const double Lb_Y, double L[3]) +{ + const double Lw[3] = { DCI_X * Lw_Y, Lw_Y, DCI_Z * Lw_Y }; + const double Lb[3] = { DCI_X * Lb_Y, Lb_Y, DCI_Z * Lb_Y }; + + for (int i = 0; i < 3; i++) { + L[i] = (L[i] - Lb[i]) / (Lw[i] - Lb[i]); + L[i] = (0.0 > L[i]) ? 0.0 : pow(L[i] * DCI_L / DCI_P, 1.0 / 2.6); + } +} + +static void eotf_arib_std_b67(const double Lw, const double Lb, double E[3]) +{ + const double gamma = fmax(1.2 + 0.42 * log10(Lw / 1000.0), 1.0); + + /** + * Note: This equation is technically only accurate if the contrast ratio + * Lw:Lb is greater than 12:1; otherwise we would need to use a different, + * significantly more complicated solution. Ignore this as a highly + * degenerate case, since any real world reference display will have a + * static contrast ratio multiple orders of magnitude higher. + */ + const double beta = sqrt(3 * pow(Lb / Lw, 1.0 / gamma)); + double luma; + + for (int i = 0; i < 3; i++) + E[i] = trc_arib_std_b67_inv((1 - beta) * E[i] + beta); + + luma = 0.2627 * E[0] + 0.6780 * E[1] + 0.0593 * E[2]; + luma = pow(fmax(luma, 0.0), gamma - 1.0); + for (int i = 0; i < 3; i++) + E[i] *= Lw * luma; +} + +static void eotf_arib_std_b67_inv(const double Lw, const double Lb, double L[3]) +{ + const double gamma = fmax(1.2 + 0.42 * log10(Lw / 1000.0), 1.0); + const double beta = sqrt(3 * pow(Lb / Lw, 1 / gamma)); + double luma = 0.2627 * L[0] + 0.6780 * L[1] + 0.0593 * L[2]; + + if (luma > 0.0) { + luma = pow(luma / Lw, (1 - gamma) / gamma); + for (int i = 0; i < 3; i++) + L[i] *= luma / Lw; + } else { + L[0] = L[1] = L[2] = 0.0; + } + + for (int i = 0; i < 3; i++) + L[i] = (trc_arib_std_b67(L[i]) - beta) / (1 - beta); +} + +static const av_csp_eotf_function eotf_funcs[AVCOL_TRC_NB] = { + [AVCOL_TRC_BT709] = eotf_bt1886, + [AVCOL_TRC_GAMMA22] = eotf_gamma22, + [AVCOL_TRC_GAMMA28] = eotf_gamma28, + [AVCOL_TRC_SMPTE170M] = eotf_bt1886, + [AVCOL_TRC_SMPTE240M] = eotf_bt1886, + [AVCOL_TRC_LINEAR] = eotf_linear, + /* There is no EOTF associated with these logarithmic encodings, since they + * are defined purely for transmission of scene referred data. */ + [AVCOL_TRC_LOG] = NULL, + [AVCOL_TRC_LOG_SQRT] = NULL, + /* BT.1886 is already defined for values below 0.0, as far as physically + * meaningful, so we can directly use it for extended range encodings */ + [AVCOL_TRC_IEC61966_2_4] = eotf_bt1886, + [AVCOL_TRC_BT1361_ECG] = eotf_bt1886, + [AVCOL_TRC_IEC61966_2_1] = eotf_iec61966_2_1, + [AVCOL_TRC_BT2020_10] = eotf_bt1886, + [AVCOL_TRC_BT2020_12] = eotf_bt1886, + [AVCOL_TRC_SMPTE2084] = eotf_smpte_st2084, + [AVCOL_TRC_SMPTE428] = eotf_smpte_st428_1, + [AVCOL_TRC_ARIB_STD_B67] = eotf_arib_std_b67, +}; + +av_csp_eotf_function av_csp_itu_eotf(enum AVColorTransferCharacteristic trc) +{ + if (trc < 0 || trc >= AVCOL_TRC_NB) + return NULL; + return eotf_funcs[trc]; +} + +static const av_csp_eotf_function eotf_inv_funcs[AVCOL_TRC_NB] = { + [AVCOL_TRC_BT709] = eotf_bt1886_inv, + [AVCOL_TRC_GAMMA22] = eotf_gamma22_inv, + [AVCOL_TRC_GAMMA28] = eotf_gamma28_inv, + [AVCOL_TRC_SMPTE170M] = eotf_bt1886_inv, + [AVCOL_TRC_SMPTE240M] = eotf_bt1886_inv, + [AVCOL_TRC_LINEAR] = eotf_linear_inv, + [AVCOL_TRC_LOG] = NULL, + [AVCOL_TRC_LOG_SQRT] = NULL, + [AVCOL_TRC_IEC61966_2_4] = eotf_bt1886_inv, + [AVCOL_TRC_BT1361_ECG] = eotf_bt1886_inv, + [AVCOL_TRC_IEC61966_2_1] = eotf_iec61966_2_1_inv, + [AVCOL_TRC_BT2020_10] = eotf_bt1886_inv, + [AVCOL_TRC_BT2020_12] = eotf_bt1886_inv, + [AVCOL_TRC_SMPTE2084] = eotf_smpte_st2084_inv, + [AVCOL_TRC_SMPTE428] = eotf_smpte_st428_1_inv, + [AVCOL_TRC_ARIB_STD_B67] = eotf_arib_std_b67_inv, +}; + +av_csp_eotf_function av_csp_itu_eotf_inv(enum AVColorTransferCharacteristic trc) +{ + if (trc < 0 || trc >= AVCOL_TRC_NB) return NULL; - return func; + return eotf_inv_funcs[trc]; } diff --git a/src/ExtLib/ffmpeg/libavutil/csp.h b/src/ExtLib/ffmpeg/libavutil/csp.h index 73bce52bc0..9b74c631d2 100644 --- a/src/ExtLib/ffmpeg/libavutil/csp.h +++ b/src/ExtLib/ffmpeg/libavutil/csp.h @@ -81,8 +81,12 @@ typedef struct AVColorPrimariesDesc { } AVColorPrimariesDesc; /** - * Function pointer representing a double -> double transfer function that performs - * an EOTF transfer inversion. This function outputs linear light. + * Function pointer representing a double -> double transfer function that + * performs either an OETF transfer function, or alternatively an inverse EOTF + * function (in particular, for SMPTE ST 2084 / PQ). This function inputs + * linear light, and outputs gamma encoded light. + * + * See ITU-T H.273 for more information. */ typedef double (*av_csp_trc_function)(double); @@ -143,6 +147,44 @@ double av_csp_approximate_trc_gamma(enum AVColorTransferCharacteristic trc); */ av_csp_trc_function av_csp_trc_func_from_id(enum AVColorTransferCharacteristic trc); +/** + * Returns the mathematical inverse of the corresponding TRC function. + */ +av_csp_trc_function av_csp_trc_func_inv_from_id(enum AVColorTransferCharacteristic trc); + +/** + * Function pointer representing an ITU EOTF transfer for a given reference + * display configuration. + * + * @param Lw The white point luminance of the display, in nits (cd/m^2). + * @param Lb The black point luminance of the display, in nits (cd/m^2). + */ +typedef void (*av_csp_eotf_function)(double Lw, double Lb, double c[3]); + +/** + * Returns the ITU EOTF corresponding to a given TRC. This converts from the + * signal level [0,1] to the raw output display luminance in nits (cd/m^2). + * This is done per channel in RGB space, except for AVCOL_TRC_SMPTE428, which + * assumes CIE XYZ in- and output. + * + * @return A pointer to the function implementing the given TRC, or NULL if no + * such function is defined. + * + * @note In general, the resulting function is defined (wherever possible) for + * out-of-range values, even though these values do not have a physical + * meaning on the given display. Users should clamp inputs (or outputs) + * if this behavior is not desired. + * + * This is also the case for functions like PQ, which are defined over an + * absolute signal range independent of the target display capabilities. + */ +av_csp_eotf_function av_csp_itu_eotf(enum AVColorTransferCharacteristic trc); + +/** + * Returns the mathematical inverse of the corresponding EOTF. + */ +av_csp_eotf_function av_csp_itu_eotf_inv(enum AVColorTransferCharacteristic trc); + /** * @} */ diff --git a/src/ExtLib/ffmpeg/libavutil/version.h b/src/ExtLib/ffmpeg/libavutil/version.h index 6a4abcf7f5..e8c50ea4b2 100644 --- a/src/ExtLib/ffmpeg/libavutil/version.h +++ b/src/ExtLib/ffmpeg/libavutil/version.h @@ -79,8 +79,8 @@ */ #define LIBAVUTIL_VERSION_MAJOR 59 -#define LIBAVUTIL_VERSION_MINOR 47 -#define LIBAVUTIL_VERSION_MICRO 101 +#define LIBAVUTIL_VERSION_MINOR 50 +#define LIBAVUTIL_VERSION_MICRO 100 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ LIBAVUTIL_VERSION_MINOR, \ diff --git a/src/ExtLib/ffmpeg/libswscale/graph.c b/src/ExtLib/ffmpeg/libswscale/graph.c index ee9d9847a9..fbad1fe8c3 100644 --- a/src/ExtLib/ffmpeg/libswscale/graph.c +++ b/src/ExtLib/ffmpeg/libswscale/graph.c @@ -292,7 +292,7 @@ static void legacy_chr_pos(SwsGraph *graph, int *chr_pos, int override, int *war *chr_pos = override; } -static int init_legacy_subpass(SwsGraph *graph, SwsContext *sws, int cascaded, +static int init_legacy_subpass(SwsGraph *graph, SwsContext *sws, SwsPass *input, SwsPass **output) { SwsInternal *c = sws_internal(sws); @@ -308,11 +308,14 @@ static int init_legacy_subpass(SwsGraph *graph, SwsContext *sws, int cascaded, for (int i = 0; i < num_cascaded; i++) { SwsContext *sub = c->cascaded_context[i]; const int is_last = i + 1 == num_cascaded; - ret = init_legacy_subpass(graph, sub, 1, input, is_last ? output : &input); + ret = init_legacy_subpass(graph, sub, input, is_last ? output : &input); if (ret < 0) return ret; + /* Steal cascaded context, so we can free the parent */ + c->cascaded_context[i] = NULL; } + sws_free_context(&sws); return 0; } @@ -336,8 +339,7 @@ static int init_legacy_subpass(SwsGraph *graph, SwsContext *sws, int cascaded, if (!pass) return AVERROR(ENOMEM); pass->setup = setup_legacy_swscale; - if (!cascaded) /* parent context frees this automatically */ - pass->free = free_legacy_swscale; + pass->free = free_legacy_swscale; /** * For slice threading, we need to create sub contexts, similar to how @@ -452,7 +454,7 @@ static int add_legacy_sws_pass(SwsGraph *graph, SwsFormat src, SwsFormat dst, brightness, contrast, saturation); } - ret = init_legacy_subpass(graph, sws, 0, input, output); + ret = init_legacy_subpass(graph, sws, input, output); if (ret < 0) { sws_free_context(&sws); return ret; diff --git a/src/ExtLib/ffmpeg/libswscale/hscale.c b/src/ExtLib/ffmpeg/libswscale/hscale.c index 0e9de3fb46..6bdd922a2a 100644 --- a/src/ExtLib/ffmpeg/libswscale/hscale.c +++ b/src/ExtLib/ffmpeg/libswscale/hscale.c @@ -59,7 +59,8 @@ static int lum_h_scale(SwsInternal *c, SwsFilterDescriptor *desc, int sliceY, in } if (c->lumConvertRange) - c->lumConvertRange((int16_t*)dst[dst_pos], dstW); + c->lumConvertRange((int16_t*)dst[dst_pos], dstW, + c->lumConvertRange_coeff, c->lumConvertRange_offset); desc->dst->plane[0].sliceH += 1; @@ -192,7 +193,8 @@ static int chr_h_scale(SwsInternal *c, SwsFilterDescriptor *desc, int sliceY, in } if (c->chrConvertRange) - c->chrConvertRange((uint16_t*)dst1[dst_pos1+i], (uint16_t*)dst2[dst_pos2+i], dstW); + c->chrConvertRange((uint16_t*)dst1[dst_pos1+i], (uint16_t*)dst2[dst_pos2+i], dstW, + c->chrConvertRange_coeff, c->chrConvertRange_offset); desc->dst->plane[1].sliceH += 1; desc->dst->plane[2].sliceH += 1; diff --git a/src/ExtLib/ffmpeg/libswscale/swscale.c b/src/ExtLib/ffmpeg/libswscale/swscale.c index 3c4637c0a1..96634acfd6 100644 --- a/src/ExtLib/ffmpeg/libswscale/swscale.c +++ b/src/ExtLib/ffmpeg/libswscale/swscale.c @@ -156,75 +156,98 @@ static void hScale8To19_c(SwsInternal *c, int16_t *_dst, int dstW, // FIXME all pal and rgb srcFormats could do this conversion as well // FIXME all scalers more complex than bilinear could do half of this transform -static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width) +static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width, + uint32_t _coeff, int64_t _offset) { + uint16_t coeff = _coeff; + int32_t offset = _offset; int i; for (i = 0; i < width; i++) { - dstU[i] = (FFMIN(dstU[i], 30775) * 4663 - 9289992) >> 12; // -264 - dstV[i] = (FFMIN(dstV[i], 30775) * 4663 - 9289992) >> 12; // -264 + int U = (dstU[i] * coeff + offset) >> 14; + int V = (dstV[i] * coeff + offset) >> 14; + dstU[i] = FFMIN(U, (1 << 15) - 1); + dstV[i] = FFMIN(V, (1 << 15) - 1); } } -static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width) +static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width, + uint32_t _coeff, int64_t _offset) { + uint16_t coeff = _coeff; + int32_t offset = _offset; int i; for (i = 0; i < width; i++) { - dstU[i] = (dstU[i] * 1799 + 4081085) >> 11; // 1469 - dstV[i] = (dstV[i] * 1799 + 4081085) >> 11; // 1469 + dstU[i] = (dstU[i] * coeff + offset) >> 14; + dstV[i] = (dstV[i] * coeff + offset) >> 14; } } -static void lumRangeToJpeg_c(int16_t *dst, int width) +static void lumRangeToJpeg_c(int16_t *dst, int width, + uint32_t _coeff, int64_t _offset) { + uint16_t coeff = _coeff; + int32_t offset = _offset; int i; - for (i = 0; i < width; i++) - dst[i] = (FFMIN(dst[i], 30189) * 19077 - 39057361) >> 14; + for (i = 0; i < width; i++) { + int Y = (dst[i] * coeff + offset) >> 14; + dst[i] = FFMIN(Y, (1 << 15) - 1); + } } -static void lumRangeFromJpeg_c(int16_t *dst, int width) +static void lumRangeFromJpeg_c(int16_t *dst, int width, + uint32_t _coeff, int64_t _offset) { + uint16_t coeff = _coeff; + int32_t offset = _offset; int i; for (i = 0; i < width; i++) - dst[i] = (dst[i] * 14071 + 33561947) >> 14; + dst[i] = (dst[i] * coeff + offset) >> 14; } -static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width) +static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width, + uint32_t coeff, int64_t offset) { int i; int32_t *dstU = (int32_t *) _dstU; int32_t *dstV = (int32_t *) _dstV; for (i = 0; i < width; i++) { - dstU[i] = ((int)(FFMIN(dstU[i], 30775 << 4) * 4663U - (9289992 << 4))) >> 12; // -264 - dstV[i] = ((int)(FFMIN(dstV[i], 30775 << 4) * 4663U - (9289992 << 4))) >> 12; // -264 + int U = ((int64_t) dstU[i] * coeff + offset) >> 18; + int V = ((int64_t) dstV[i] * coeff + offset) >> 18; + dstU[i] = FFMIN(U, (1 << 19) - 1); + dstV[i] = FFMIN(V, (1 << 19) - 1); } } -static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width) +static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width, + uint32_t coeff, int64_t offset) { int i; int32_t *dstU = (int32_t *) _dstU; int32_t *dstV = (int32_t *) _dstV; for (i = 0; i < width; i++) { - dstU[i] = (dstU[i] * 1799 + (4081085 << 4)) >> 11; // 1469 - dstV[i] = (dstV[i] * 1799 + (4081085 << 4)) >> 11; // 1469 + dstU[i] = ((int64_t) dstU[i] * coeff + offset) >> 18; + dstV[i] = ((int64_t) dstV[i] * coeff + offset) >> 18; } } -static void lumRangeToJpeg16_c(int16_t *_dst, int width) +static void lumRangeToJpeg16_c(int16_t *_dst, int width, + uint32_t coeff, int64_t offset) { int i; int32_t *dst = (int32_t *) _dst; for (i = 0; i < width; i++) { - dst[i] = ((int)(FFMIN(dst[i], 30189 << 4) * 4769U - (39057361 << 2))) >> 12; + int Y = ((int64_t) dst[i] * coeff + offset) >> 18; + dst[i] = FFMIN(Y, (1 << 19) - 1); } } -static void lumRangeFromJpeg16_c(int16_t *_dst, int width) +static void lumRangeFromJpeg16_c(int16_t *_dst, int width, + uint32_t coeff, int64_t offset) { int i; int32_t *dst = (int32_t *) _dst; for (i = 0; i < width; i++) - dst[i] = ((int)(dst[i]*(14071U/4) + (33561947<<4)/4)) >> 12; + dst[i] = ((int64_t) dst[i] * coeff + offset) >> 18; } @@ -540,11 +563,68 @@ int ff_swscale(SwsInternal *c, const uint8_t *const src[], const int srcStride[] return dstY - lastDstY; } +/* + * Solve for coeff and offset: + * dst = ((src << src_shift) * coeff + offset) >> (mult_shift + src_shift) + * + * If SwsInternal->dstBpc is > 14, coeff is uint16_t and offset is int32_t, + * otherwise (SwsInternal->dstBpc is <= 14) coeff is uint32_t and offset is + * int64_t. + */ +static void solve_range_convert(uint16_t src_min, uint16_t src_max, + uint16_t dst_min, uint16_t dst_max, + int src_bits, int src_shift, int mult_shift, + uint32_t *coeff, int64_t *offset) +{ + uint16_t src_range = src_max - src_min; + uint16_t dst_range = dst_max - dst_min; + int total_shift = mult_shift + src_shift; + *coeff = AV_CEIL_RSHIFT(((uint64_t) dst_range << total_shift) / src_range, src_shift); + *offset = ((int64_t) dst_max << total_shift) - + ((int64_t) src_max << src_shift) * *coeff; +} + +static void init_range_convert_constants(SwsInternal *c) +{ + const int bit_depth = c->dstBpc ? c->dstBpc : 8; + const int src_bits = bit_depth <= 14 ? 15 : 19; + const int src_shift = src_bits - bit_depth; + const int mult_shift = bit_depth <= 14 ? 14 : 18; + const uint16_t mpeg_min = 16U << (bit_depth - 8); + const uint16_t mpeg_max_lum = 235U << (bit_depth - 8); + const uint16_t mpeg_max_chr = 240U << (bit_depth - 8); + const uint16_t jpeg_max = (1U << bit_depth) - 1; + uint16_t src_min, src_max_lum, src_max_chr; + uint16_t dst_min, dst_max_lum, dst_max_chr; + if (c->opts.src_range) { + src_min = 0; + src_max_lum = jpeg_max; + src_max_chr = jpeg_max; + dst_min = mpeg_min; + dst_max_lum = mpeg_max_lum; + dst_max_chr = mpeg_max_chr; + } else { + src_min = mpeg_min; + src_max_lum = mpeg_max_lum; + src_max_chr = mpeg_max_chr; + dst_min = 0; + dst_max_lum = jpeg_max; + dst_max_chr = jpeg_max; + } + solve_range_convert(src_min, src_max_lum, dst_min, dst_max_lum, + src_bits, src_shift, mult_shift, + &c->lumConvertRange_coeff, &c->lumConvertRange_offset); + solve_range_convert(src_min, src_max_chr, dst_min, dst_max_chr, + src_bits, src_shift, mult_shift, + &c->chrConvertRange_coeff, &c->chrConvertRange_offset); +} + av_cold void ff_sws_init_range_convert(SwsInternal *c) { c->lumConvertRange = NULL; c->chrConvertRange = NULL; if (c->opts.src_range != c->opts.dst_range && !isAnyRGB(c->opts.dst_format)) { + init_range_convert_constants(c); if (c->dstBpc <= 14) { if (c->opts.src_range) { c->lumConvertRange = lumRangeFromJpeg_c; diff --git a/src/ExtLib/ffmpeg/libswscale/swscale_internal.h b/src/ExtLib/ffmpeg/libswscale/swscale_internal.h index 479b436a1e..768e394560 100644 --- a/src/ExtLib/ffmpeg/libswscale/swscale_internal.h +++ b/src/ExtLib/ffmpeg/libswscale/swscale_internal.h @@ -647,10 +647,28 @@ struct SwsInternal { const int32_t *filterPos, int filterSize); /** @} */ - /// Color range conversion function for luma plane if needed. - void (*lumConvertRange)(int16_t *dst, int width); - /// Color range conversion function for chroma planes if needed. - void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width); + /** + * Color range conversion functions if needed. + * If SwsInternal->dstBpc is > 14: + * - int16_t *dst (data is 15 bpc) + * - uint16_t coeff + * - int32_t offset + * Otherwise (SwsInternal->dstBpc is <= 14): + * - int32_t *dst (data is 19 bpc) + * - uint32_t coeff + * - int64_t offset + */ + /** @{ */ + void (*lumConvertRange)(int16_t *dst, int width, + uint32_t coeff, int64_t offset); + void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width, + uint32_t coeff, int64_t offset); + /** @} */ + + uint32_t lumConvertRange_coeff; + uint32_t chrConvertRange_coeff; + int64_t lumConvertRange_offset; + int64_t chrConvertRange_offset; int needs_hcscale; ///< Set if there are chroma planes to be converted. diff --git a/src/ExtLib/ffmpeg/libswscale/swscale_unscaled.c b/src/ExtLib/ffmpeg/libswscale/swscale_unscaled.c index ded1d8f350..fea01e2528 100644 --- a/src/ExtLib/ffmpeg/libswscale/swscale_unscaled.c +++ b/src/ExtLib/ffmpeg/libswscale/swscale_unscaled.c @@ -230,6 +230,8 @@ static void nv24_to_yuv420p_chroma(uint8_t *dst1, int dstStride1, const uint8_t *src2 = src + srcStride; // average 4 pixels into 1 (interleaved U and V) for (int y = 0; y < h; y += 2) { + if (y + 1 == h) + src2 = src1; for (int x = 0; x < w; x++) { dst1[x] = (src1[4 * x + 0] + src1[4 * x + 2] + src2[4 * x + 0] + src2[4 * x + 2]) >> 2; diff --git a/src/ExtLib/ffmpeg/libswscale/utils.c b/src/ExtLib/ffmpeg/libswscale/utils.c index 26ea6062a0..32f90e366e 100644 --- a/src/ExtLib/ffmpeg/libswscale/utils.c +++ b/src/ExtLib/ffmpeg/libswscale/utils.c @@ -1535,7 +1535,7 @@ av_cold int ff_sws_init_single_context(SwsContext *sws, SwsFilter *srcFilter, /* drop every other pixel for chroma calculation unless user * wants full chroma */ - if (isAnyRGB(srcFormat) && !(flags & SWS_FULL_CHR_H_INP) && + if (isAnyRGB(srcFormat) && !(srcW & 1) && !(flags & SWS_FULL_CHR_H_INP) && srcFormat != AV_PIX_FMT_RGB8 && srcFormat != AV_PIX_FMT_BGR8 && srcFormat != AV_PIX_FMT_RGB4 && srcFormat != AV_PIX_FMT_BGR4 && srcFormat != AV_PIX_FMT_RGB4_BYTE && srcFormat != AV_PIX_FMT_BGR4_BYTE && diff --git a/src/ExtLib/ffmpeg/libswscale/x86/range_convert.asm b/src/ExtLib/ffmpeg/libswscale/x86/range_convert.asm index 97c7525448..e5b8866a1f 100644 --- a/src/ExtLib/ffmpeg/libswscale/x86/range_convert.asm +++ b/src/ExtLib/ffmpeg/libswscale/x86/range_convert.asm @@ -21,55 +21,68 @@ %include "libavutil/x86/x86util.asm" SECTION_RODATA - -chr_to_mult: times 4 dw 4663, 0 -chr_to_offset: times 4 dd -9289992 -%define chr_to_shift 12 - -chr_from_mult: times 4 dw 1799, 0 -chr_from_offset: times 4 dd 4081085 -%define chr_from_shift 11 - -lum_to_mult: times 4 dw 19077, 0 -lum_to_offset: times 4 dd -39057361 -%define lum_to_shift 14 - -lum_from_mult: times 4 dw 14071, 0 -lum_from_offset: times 4 dd 33561947 -%define lum_from_shift 14 +pack19: times 4 dd (1 << 19) - 1 SECTION .text -; NOTE: there is no need to clamp the input when converting to jpeg range -; (like we do in the C code) because packssdw will saturate the output. - ;----------------------------------------------------------------------------- ; lumConvertRange ; -; void ff_lumRangeToJpeg_(int16_t *dst, int width); -; void ff_lumRangeFromJpeg_(int16_t *dst, int width); +; void ff_lumRangeToJpeg{8,16}_(int16_t *dst, int width, +; uint32_t coeff, int64_t offset); +; void ff_lumRangeFromJpeg{8,16}_(int16_t *dst, int width, +; uint32_t coeff, int64_t offset); ; ;----------------------------------------------------------------------------- -%macro LUMCONVERTRANGE 4 -cglobal %1, 2, 2, 5, dst, width - shl widthd, 1 - VBROADCASTI128 m2, [%2] - VBROADCASTI128 m3, [%3] +%macro LUMCONVERTRANGE 2 +cglobal lumRange%1Jpeg%2, 4, 4, 5, dst, width, coeff, offset + shl widthd, %2 >> 3 + movd xm2, coeffd + VBROADCASTSS m2, xm2 +%if ARCH_X86_64 + movq xm3, offsetq +%else + movq xm3, offsetm +%endif +%if %2 == 16 + VBROADCASTSD m3, xm3 +%ifidni %1,To + VBROADCASTI128 m4, [pack19] +%endif +%elif %2 == 8 + VBROADCASTSS m3, xm3 pxor m4, m4 +%endif ; %2 == 8/16 add dstq, widthq neg widthq .loop: movu m0, [dstq+widthq] +%if %2 == 16 + pshufd m1, m0, 0xb1 + pmuldq m0, m2 + pmuldq m1, m2 + paddq m0, m3 + paddq m1, m3 + psrlq m0, 18 + psrlq m1, 18 + pshufd m0, m0, 0xd8 + pshufd m1, m1, 0xd8 + punpckldq m0, m1 +%ifidni %1,To + PMINSD m0, m4, m1 +%endif +%elif %2 == 8 punpckhwd m1, m0, m4 punpcklwd m0, m4 pmaddwd m0, m2 pmaddwd m1, m2 paddd m0, m3 paddd m1, m3 - psrad m0, %4 - psrad m1, %4 + psrad m0, 14 + psrad m1, 14 packssdw m0, m1 +%endif ; %2 == 8/16 movu [dstq+widthq], m0 add widthq, mmsize jl .loop @@ -79,23 +92,64 @@ cglobal %1, 2, 2, 5, dst, width ;----------------------------------------------------------------------------- ; chrConvertRange ; -; void ff_chrRangeToJpeg_(int16_t *dstU, int16_t *dstV, int width); -; void ff_chrRangeFromJpeg_(int16_t *dstU, int16_t *dstV, int width); +; void ff_chrRangeToJpeg{8,16}_(int16_t *dstU, int16_t *dstV, int width, +; uint32_t coeff, int64_t offset); +; void ff_chrRangeFromJpeg{8,16}_(int16_t *dstU, int16_t *dstV, int width, +; uint32_t coeff, int64_t offset); ; ;----------------------------------------------------------------------------- -%macro CHRCONVERTRANGE 4 -cglobal %1, 3, 3, 7, dstU, dstV, width - shl widthd, 1 - VBROADCASTI128 m4, [%2] - VBROADCASTI128 m5, [%3] +%macro CHRCONVERTRANGE 2 +cglobal chrRange%1Jpeg%2, 5, 5, 7, dstU, dstV, width, coeff, offset + shl widthd, %2 >> 3 + movd xm4, coeffd + VBROADCASTSS m4, xm4 +%if ARCH_X86_64 + movq xm5, offsetq +%else + movq xm5, offsetm +%endif +%if %2 == 16 + VBROADCASTSD m5, xm5 +%ifidni %1,To + VBROADCASTI128 m6, [pack19] +%endif +%elif %2 == 8 + VBROADCASTSS m5, xm5 pxor m6, m6 +%endif ; %2 == 8/16 add dstUq, widthq add dstVq, widthq neg widthq .loop: movu m0, [dstUq+widthq] movu m2, [dstVq+widthq] +%if %2 == 16 + pshufd m1, m0, 0xb1 + pshufd m3, m2, 0xb1 + pmuldq m0, m4 + pmuldq m1, m4 + pmuldq m2, m4 + pmuldq m3, m4 + paddq m0, m5 + paddq m1, m5 + paddq m2, m5 + paddq m3, m5 + psrlq m0, 18 + psrlq m1, 18 + psrlq m2, 18 + psrlq m3, 18 + pshufd m0, m0, 0xd8 + pshufd m1, m1, 0xd8 + pshufd m2, m2, 0xd8 + pshufd m3, m3, 0xd8 + punpckldq m0, m1 + punpckldq m2, m3 +%ifidni %1,To + PMINSD m0, m6, m1 + PMINSD m2, m6, m3 +%endif +%elif %2 == 8 punpckhwd m1, m0, m6 punpckhwd m3, m2, m6 punpcklwd m0, m6 @@ -108,12 +162,13 @@ cglobal %1, 3, 3, 7, dstU, dstV, width paddd m1, m5 paddd m2, m5 paddd m3, m5 - psrad m0, %4 - psrad m1, %4 - psrad m2, %4 - psrad m3, %4 + psrad m0, 14 + psrad m1, 14 + psrad m2, 14 + psrad m3, 14 packssdw m0, m1 packssdw m2, m3 +%endif ; %2 == 8/16 movu [dstUq+widthq], m0 movu [dstVq+widthq], m2 add widthq, mmsize @@ -122,15 +177,25 @@ cglobal %1, 3, 3, 7, dstU, dstV, width %endmacro INIT_XMM sse2 -LUMCONVERTRANGE lumRangeToJpeg, lum_to_mult, lum_to_offset, lum_to_shift -CHRCONVERTRANGE chrRangeToJpeg, chr_to_mult, chr_to_offset, chr_to_shift -LUMCONVERTRANGE lumRangeFromJpeg, lum_from_mult, lum_from_offset, lum_from_shift -CHRCONVERTRANGE chrRangeFromJpeg, chr_from_mult, chr_from_offset, chr_from_shift +LUMCONVERTRANGE To, 8 +CHRCONVERTRANGE To, 8 +LUMCONVERTRANGE From, 8 +CHRCONVERTRANGE From, 8 + +INIT_XMM sse4 +LUMCONVERTRANGE To, 16 +CHRCONVERTRANGE To, 16 +LUMCONVERTRANGE From, 16 +CHRCONVERTRANGE From, 16 %if HAVE_AVX2_EXTERNAL INIT_YMM avx2 -LUMCONVERTRANGE lumRangeToJpeg, lum_to_mult, lum_to_offset, lum_to_shift -CHRCONVERTRANGE chrRangeToJpeg, chr_to_mult, chr_to_offset, chr_to_shift -LUMCONVERTRANGE lumRangeFromJpeg, lum_from_mult, lum_from_offset, lum_from_shift -CHRCONVERTRANGE chrRangeFromJpeg, chr_from_mult, chr_from_offset, chr_from_shift +LUMCONVERTRANGE To, 8 +LUMCONVERTRANGE To, 16 +CHRCONVERTRANGE To, 8 +CHRCONVERTRANGE To, 16 +LUMCONVERTRANGE From, 8 +LUMCONVERTRANGE From, 16 +CHRCONVERTRANGE From, 8 +CHRCONVERTRANGE From, 16 %endif diff --git a/src/ExtLib/ffmpeg/libswscale/x86/swscale.c b/src/ExtLib/ffmpeg/libswscale/x86/swscale.c index 3e1f9f371f..a7985a3b01 100644 --- a/src/ExtLib/ffmpeg/libswscale/x86/swscale.c +++ b/src/ExtLib/ffmpeg/libswscale/x86/swscale.c @@ -451,34 +451,44 @@ INPUT_PLANAR_RGB_UV_ALL_DECL(avx2); INPUT_PLANAR_RGB_A_ALL_DECL(avx2); #endif -#define RANGE_CONVERT_FUNCS(opt) do { \ - if (c->dstBpc <= 14) { \ - if (c->opts.src_range) { \ - c->lumConvertRange = ff_lumRangeFromJpeg_ ##opt; \ - c->chrConvertRange = ff_chrRangeFromJpeg_ ##opt; \ - } else { \ - c->lumConvertRange = ff_lumRangeToJpeg_ ##opt; \ - c->chrConvertRange = ff_chrRangeToJpeg_ ##opt; \ - } \ +#define RANGE_CONVERT_FUNCS(opt, bpc) do { \ + if (c->opts.src_range) { \ + c->lumConvertRange = ff_lumRangeFromJpeg##bpc##_##opt; \ + c->chrConvertRange = ff_chrRangeFromJpeg##bpc##_##opt; \ + } else { \ + c->lumConvertRange = ff_lumRangeToJpeg##bpc##_##opt; \ + c->chrConvertRange = ff_chrRangeToJpeg##bpc##_##opt; \ } \ } while (0) -#define RANGE_CONVERT_FUNCS_DECL(opt) \ -void ff_lumRangeFromJpeg_ ##opt(int16_t *dst, int width); \ -void ff_chrRangeFromJpeg_ ##opt(int16_t *dstU, int16_t *dstV, int width); \ -void ff_lumRangeToJpeg_ ##opt(int16_t *dst, int width); \ -void ff_chrRangeToJpeg_ ##opt(int16_t *dstU, int16_t *dstV, int width); \ - -RANGE_CONVERT_FUNCS_DECL(sse2); -RANGE_CONVERT_FUNCS_DECL(avx2); +#define RANGE_CONVERT_FUNCS_DECL(opt, bpc) \ +void ff_lumRangeFromJpeg##bpc##_##opt(int16_t *dst, int width, \ + uint32_t coeff, int64_t offset); \ +void ff_chrRangeFromJpeg##bpc##_##opt(int16_t *dstU, int16_t *dstV, int width, \ + uint32_t coeff, int64_t offset); \ +void ff_lumRangeToJpeg##bpc##_##opt(int16_t *dst, int width, \ + uint32_t coeff, int64_t offset); \ +void ff_chrRangeToJpeg##bpc##_##opt(int16_t *dstU, int16_t *dstV, int width, \ + uint32_t coeff, int64_t offset); \ + +RANGE_CONVERT_FUNCS_DECL(sse2, 8) +RANGE_CONVERT_FUNCS_DECL(sse4, 16) +RANGE_CONVERT_FUNCS_DECL(avx2, 8) +RANGE_CONVERT_FUNCS_DECL(avx2, 16) av_cold void ff_sws_init_range_convert_x86(SwsInternal *c) { int cpu_flags = av_get_cpu_flags(); if (EXTERNAL_AVX2_FAST(cpu_flags)) { - RANGE_CONVERT_FUNCS(avx2); - } else if (EXTERNAL_SSE2(cpu_flags)) { - RANGE_CONVERT_FUNCS(sse2); + if (c->dstBpc <= 14) { + RANGE_CONVERT_FUNCS(avx2, 8); + } else { + RANGE_CONVERT_FUNCS(avx2, 16); + } + } else if (EXTERNAL_SSE2(cpu_flags) && c->dstBpc <= 14) { + RANGE_CONVERT_FUNCS(sse2, 8); + } else if (EXTERNAL_SSE4(cpu_flags) && c->dstBpc > 14) { + RANGE_CONVERT_FUNCS(sse4, 16); } }