From 0cb398986148e0fb94729a9c0220b1215a21e4fe Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Fri, 28 Apr 2023 11:33:38 +0200 Subject: [PATCH 01/98] lavc/decode: pass AVHWAccel instead of AVCodecHWConfigInternal to hwaccel_init() The only thing besides the hwaccel that this function uses from AVCodecHWConfigInternal is the pixel format, which should always match the hwaccel one. Will be useful in following commits. --- libavcodec/decode.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/libavcodec/decode.c b/libavcodec/decode.c index 360837a0adb95..18d85cf46f3a0 100644 --- a/libavcodec/decode.c +++ b/libavcodec/decode.c @@ -1111,12 +1111,10 @@ int avcodec_get_hw_frames_parameters(AVCodecContext *avctx, } static int hwaccel_init(AVCodecContext *avctx, - const AVCodecHWConfigInternal *hw_config) + const AVHWAccel *hwaccel) { - const AVHWAccel *hwaccel; int err; - hwaccel = hw_config->hwaccel; if (hwaccel->capabilities & AV_HWACCEL_CODEC_CAP_EXPERIMENTAL && avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) { av_log(avctx, AV_LOG_WARNING, "Ignoring experimental hwaccel: %s\n", @@ -1137,7 +1135,7 @@ static int hwaccel_init(AVCodecContext *avctx, if (err < 0) { av_log(avctx, AV_LOG_ERROR, "Failed setup for format %s: " "hwaccel initialisation returned error.\n", - av_get_pix_fmt_name(hw_config->public.pix_fmt)); + av_get_pix_fmt_name(hwaccel->pix_fmt)); av_freep(&avctx->internal->hwaccel_priv_data); avctx->hwaccel = NULL; return err; @@ -1271,7 +1269,7 @@ int ff_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt) if (hw_config->hwaccel) { av_log(avctx, AV_LOG_DEBUG, "Format %s requires hwaccel " "initialisation.\n", desc->name); - err = hwaccel_init(avctx, hw_config); + err = hwaccel_init(avctx, hw_config->hwaccel); if (err < 0) goto try_again; } From 0b9a8eac41f91af39f165cb06b464fcbda007191 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Fri, 28 Apr 2023 11:48:04 +0200 Subject: [PATCH 02/98] lavc/decode: stop duplicating code from hwaccel_uninit() --- libavcodec/avcodec.c | 5 ++--- libavcodec/decode.c | 4 ++-- libavcodec/hwconfig.h | 1 + 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/libavcodec/avcodec.c b/libavcodec/avcodec.c index 5a96899d505e8..db8226f9b3d59 100644 --- a/libavcodec/avcodec.c +++ b/libavcodec/avcodec.c @@ -39,6 +39,7 @@ #include "decode.h" #include "encode.h" #include "frame_thread_encoder.h" +#include "hwconfig.h" #include "internal.h" #include "thread.h" @@ -459,9 +460,7 @@ av_cold int avcodec_close(AVCodecContext *avctx) av_buffer_unref(&avci->pool); - if (avctx->hwaccel && avctx->hwaccel->uninit) - avctx->hwaccel->uninit(avctx); - av_freep(&avci->hwaccel_priv_data); + ff_hwaccel_uninit(avctx); av_bsf_free(&avci->bsf); diff --git a/libavcodec/decode.c b/libavcodec/decode.c index 18d85cf46f3a0..d75f831a741fe 100644 --- a/libavcodec/decode.c +++ b/libavcodec/decode.c @@ -1145,7 +1145,7 @@ static int hwaccel_init(AVCodecContext *avctx, return 0; } -static void hwaccel_uninit(AVCodecContext *avctx) +void ff_hwaccel_uninit(AVCodecContext *avctx) { if (avctx->hwaccel && avctx->hwaccel->uninit) avctx->hwaccel->uninit(avctx); @@ -1184,7 +1184,7 @@ int ff_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt) for (;;) { // Remove the previous hwaccel, if there was one. - hwaccel_uninit(avctx); + ff_hwaccel_uninit(avctx); user_choice = avctx->get_format(avctx, choices); if (user_choice == AV_PIX_FMT_NONE) { diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h index 721424912c46d..f03b744cdf4b5 100644 --- a/libavcodec/hwconfig.h +++ b/libavcodec/hwconfig.h @@ -39,6 +39,7 @@ typedef struct AVCodecHWConfigInternal { const AVHWAccel *hwaccel; } AVCodecHWConfigInternal; +void ff_hwaccel_uninit(AVCodecContext *avctx); // These macros are used to simplify AVCodecHWConfigInternal definitions. From 1fa18d3951ebfb36b0ae0f5f72a16622308bc2a5 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Fri, 28 Apr 2023 13:37:28 +0200 Subject: [PATCH 03/98] lavc/pthread_frame: add support for thread-safe hwaccels --- libavcodec/avcodec.h | 6 +++ libavcodec/hwconfig.h | 1 + libavcodec/pthread_frame.c | 101 ++++++++++++++++++++++++++++++------- 3 files changed, 89 insertions(+), 19 deletions(-) diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 06b1a120abed9..dad443c818589 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -2253,6 +2253,12 @@ typedef struct AVHWAccel { * that avctx->hwaccel_priv_data is invalid. */ int (*frame_params)(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx); + + /** + * Copy necessary context variables from a previous thread context to the current one. + * For thread-safe hwaccels only. + */ + int (*update_thread_context)(AVCodecContext *dst, const AVCodecContext *src); } AVHWAccel; /** diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h index f03b744cdf4b5..d88dc37c8c926 100644 --- a/libavcodec/hwconfig.h +++ b/libavcodec/hwconfig.h @@ -24,6 +24,7 @@ #define HWACCEL_CAP_ASYNC_SAFE (1 << 0) +#define HWACCEL_CAP_THREAD_SAFE (1 << 1) typedef struct AVCodecHWConfigInternal { diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c index 773e78ae3458c..28335231fd742 100644 --- a/libavcodec/pthread_frame.c +++ b/libavcodec/pthread_frame.c @@ -104,6 +104,12 @@ typedef struct PerThreadContext { int hwaccel_serializing; int async_serializing; + // set to 1 in ff_thread_finish_setup() when a threadsafe hwaccel is used; + // cannot check hwaccel caps directly, because + // worked threads clear hwaccel state for thread-unsafe hwaccels + // after each decode call + int hwaccel_threadsafe; + atomic_int debug_threads; ///< Set if the FF_DEBUG_THREADS option is set. } PerThreadContext; @@ -117,8 +123,8 @@ typedef struct FrameThreadContext { unsigned pthread_init_cnt; ///< Number of successfully initialized mutexes/conditions pthread_mutex_t buffer_mutex; ///< Mutex used to protect get/release_buffer(). /** - * This lock is used for ensuring threads run in serial when hwaccel - * is used. + * This lock is used for ensuring threads run in serial when thread-unsafe + * hwaccel is used. */ pthread_mutex_t hwaccel_mutex; pthread_mutex_t async_mutex; @@ -133,13 +139,19 @@ typedef struct FrameThreadContext { * While it is set, ff_thread_en/decode_frame won't return any results. */ - /* hwaccel state is temporarily stored here in order to transfer its ownership - * to the next decoding thread without the need for extra synchronization */ + /* hwaccel state for thread-unsafe hwaccels is temporarily stored here in + * order to transfer its ownership to the next decoding thread without the + * need for extra synchronization */ const AVHWAccel *stash_hwaccel; void *stash_hwaccel_context; void *stash_hwaccel_priv; } FrameThreadContext; +static int hwaccel_serial(const AVCodecContext *avctx) +{ + return avctx->hwaccel && !(avctx->hwaccel->caps_internal & HWACCEL_CAP_THREAD_SAFE); +} + static void async_lock(FrameThreadContext *fctx) { pthread_mutex_lock(&fctx->async_mutex); @@ -202,9 +214,9 @@ static attribute_align_arg void *frame_worker_thread(void *arg) * cannot be true here. */ av_assert0(!p->hwaccel_serializing); - /* if the previous thread uses hwaccel then we take the lock to ensure - * the threads don't run concurrently */ - if (avctx->hwaccel) { + /* if the previous thread uses thread-unsafe hwaccel then we take the + * lock to ensure the threads don't run concurrently */ + if (hwaccel_serial(avctx)) { pthread_mutex_lock(&p->parent->hwaccel_mutex); p->hwaccel_serializing = 1; } @@ -220,7 +232,8 @@ static attribute_align_arg void *frame_worker_thread(void *arg) ff_thread_finish_setup(avctx); if (p->hwaccel_serializing) { - /* wipe hwaccel state to avoid stale pointers lying around; + /* wipe hwaccel state for thread-unsafe hwaccels to avoid stale + * pointers lying around; * the state was transferred to FrameThreadContext in * ff_thread_finish_setup(), so nothing is leaked */ avctx->hwaccel = NULL; @@ -230,7 +243,8 @@ static attribute_align_arg void *frame_worker_thread(void *arg) p->hwaccel_serializing = 0; pthread_mutex_unlock(&p->parent->hwaccel_mutex); } - av_assert0(!avctx->hwaccel); + av_assert0(!avctx->hwaccel || + (avctx->hwaccel->caps_internal & HWACCEL_CAP_THREAD_SAFE)); if (p->async_serializing) { p->async_serializing = 0; @@ -332,8 +346,49 @@ FF_ENABLE_DEPRECATION_WARNINGS if (codec->update_thread_context_for_user) err = codec->update_thread_context_for_user(dst, src); } else { - if (codec->update_thread_context) + const PerThreadContext *p_src = src->internal->thread_ctx; + PerThreadContext *p_dst = dst->internal->thread_ctx; + + if (codec->update_thread_context) { err = codec->update_thread_context(dst, src); + if (err < 0) + return err; + } + + // reset dst hwaccel state if needed + av_assert0(p_dst->hwaccel_threadsafe || + (!dst->hwaccel && !dst->internal->hwaccel_priv_data)); + if (p_dst->hwaccel_threadsafe && + (!p_src->hwaccel_threadsafe || dst->hwaccel != src->hwaccel)) { + ff_hwaccel_uninit(dst); + p_dst->hwaccel_threadsafe = 0; + } + + // propagate hwaccel state for threadsafe hwaccels + if (p_src->hwaccel_threadsafe) { + if (!dst->hwaccel) { + if (src->hwaccel->priv_data_size) { + av_assert0(src->hwaccel->update_thread_context); + + dst->internal->hwaccel_priv_data = + av_mallocz(src->hwaccel->priv_data_size); + if (!dst->internal->hwaccel_priv_data) + return AVERROR(ENOMEM); + } + dst->hwaccel = src->hwaccel; + } + av_assert0(dst->hwaccel == src->hwaccel); + + if (src->hwaccel->update_thread_context) { + err = src->hwaccel->update_thread_context(dst, src); + if (err < 0) { + av_log(dst, AV_LOG_ERROR, "Error propagating hwaccel state\n"); + ff_hwaccel_uninit(dst); + return err; + } + } + p_dst->hwaccel_threadsafe = 1; + } } return err; @@ -441,10 +496,12 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx, } /* transfer the stashed hwaccel state, if any */ - av_assert0(!p->avctx->hwaccel); - FFSWAP(const AVHWAccel*, p->avctx->hwaccel, fctx->stash_hwaccel); - FFSWAP(void*, p->avctx->hwaccel_context, fctx->stash_hwaccel_context); - FFSWAP(void*, p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv); + av_assert0(!p->avctx->hwaccel || p->hwaccel_threadsafe); + if (!p->hwaccel_threadsafe) { + FFSWAP(const AVHWAccel*, p->avctx->hwaccel, fctx->stash_hwaccel); + FFSWAP(void*, p->avctx->hwaccel_context, fctx->stash_hwaccel_context); + FFSWAP(void*, p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv); + } av_packet_unref(p->avpkt); ret = av_packet_ref(p->avpkt, avpkt); @@ -598,7 +655,10 @@ void ff_thread_finish_setup(AVCodecContext *avctx) { if (!(avctx->active_thread_type&FF_THREAD_FRAME)) return; - if (avctx->hwaccel && !p->hwaccel_serializing) { + p->hwaccel_threadsafe = avctx->hwaccel && + (avctx->hwaccel->caps_internal & HWACCEL_CAP_THREAD_SAFE); + + if (hwaccel_serial(avctx) && !p->hwaccel_serializing) { pthread_mutex_lock(&p->parent->hwaccel_mutex); p->hwaccel_serializing = 1; } @@ -611,13 +671,16 @@ void ff_thread_finish_setup(AVCodecContext *avctx) { async_lock(p->parent); } - /* save hwaccel state for passing to the next thread; + /* thread-unsafe hwaccels share a single private data instance, so we + * save hwaccel state for passing to the next thread; * this is done here so that this worker thread can wipe its own hwaccel * state after decoding, without requiring synchronization */ av_assert0(!p->parent->stash_hwaccel); - p->parent->stash_hwaccel = avctx->hwaccel; - p->parent->stash_hwaccel_context = avctx->hwaccel_context; - p->parent->stash_hwaccel_priv = avctx->internal->hwaccel_priv_data; + if (hwaccel_serial(avctx)) { + p->parent->stash_hwaccel = avctx->hwaccel; + p->parent->stash_hwaccel_context = avctx->hwaccel_context; + p->parent->stash_hwaccel_priv = avctx->internal->hwaccel_priv_data; + } pthread_mutex_lock(&p->progress_mutex); if(atomic_load(&p->state) == STATE_SETUP_FINISHED){ From 9499f9b53d65b377f9909813d0fc9d3d3d6d3a6e Mon Sep 17 00:00:00 2001 From: Lynne Date: Thu, 11 May 2023 03:55:47 +0200 Subject: [PATCH 04/98] lavc/decode: allow to allocate hwaccel_priv_data early --- libavcodec/decode.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/libavcodec/decode.c b/libavcodec/decode.c index d75f831a741fe..9ff132a15c3ca 100644 --- a/libavcodec/decode.c +++ b/libavcodec/decode.c @@ -1087,6 +1087,15 @@ int avcodec_get_hw_frames_parameters(AVCodecContext *avctx, if (!frames_ref) return AVERROR(ENOMEM); + if (!avctx->internal->hwaccel_priv_data) { + avctx->internal->hwaccel_priv_data = + av_mallocz(hwa->priv_data_size); + if (!avctx->internal->hwaccel_priv_data) { + av_buffer_unref(&frames_ref); + return AVERROR(ENOMEM); + } + } + ret = hwa->frame_params(avctx, frames_ref); if (ret >= 0) { AVHWFramesContext *frames_ctx = (AVHWFramesContext*)frames_ref->data; @@ -1122,7 +1131,7 @@ static int hwaccel_init(AVCodecContext *avctx, return AVERROR_PATCHWELCOME; } - if (hwaccel->priv_data_size) { + if (!avctx->internal->hwaccel_priv_data && hwaccel->priv_data_size) { avctx->internal->hwaccel_priv_data = av_mallocz(hwaccel->priv_data_size); if (!avctx->internal->hwaccel_priv_data) @@ -1288,6 +1297,9 @@ int ff_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt) --n; } + if (ret < 0) + ff_hwaccel_uninit(avctx); + av_freep(&choices); return ret; } From 32a50ee01b47e5166a391139359eb85dccc31bc6 Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 14 Dec 2022 00:02:11 +0100 Subject: [PATCH 05/98] h2645_vui: expose aspect_ratio_idc --- libavcodec/h2645_vui.c | 10 +++++----- libavcodec/h2645_vui.h | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/libavcodec/h2645_vui.c b/libavcodec/h2645_vui.c index 0633fcbddd2b9..93e83a9e1f747 100644 --- a/libavcodec/h2645_vui.c +++ b/libavcodec/h2645_vui.c @@ -42,15 +42,15 @@ void ff_h2645_decode_common_vui_params(GetBitContext *gb, H2645VUI *vui, void *l aspect_ratio_info_present_flag = get_bits1(gb); if (aspect_ratio_info_present_flag) { - uint8_t aspect_ratio_idc = get_bits(gb, 8); - if (aspect_ratio_idc < FF_ARRAY_ELEMS(ff_h2645_pixel_aspect)) - vui->sar = ff_h2645_pixel_aspect[aspect_ratio_idc]; - else if (aspect_ratio_idc == EXTENDED_SAR) { + vui->aspect_ratio_idc = get_bits(gb, 8); + if (vui->aspect_ratio_idc < FF_ARRAY_ELEMS(ff_h2645_pixel_aspect)) + vui->sar = ff_h2645_pixel_aspect[vui->aspect_ratio_idc]; + else if (vui->aspect_ratio_idc == EXTENDED_SAR) { vui->sar.num = get_bits(gb, 16); vui->sar.den = get_bits(gb, 16); } else av_log(logctx, AV_LOG_WARNING, - "Unknown SAR index: %u.\n", aspect_ratio_idc); + "Unknown SAR index: %u.\n", vui->aspect_ratio_idc); } else vui->sar = (AVRational){ 0, 1 }; diff --git a/libavcodec/h2645_vui.h b/libavcodec/h2645_vui.h index 638da7c36672e..f1aeab775879d 100644 --- a/libavcodec/h2645_vui.h +++ b/libavcodec/h2645_vui.h @@ -26,6 +26,7 @@ typedef struct H2645VUI { AVRational sar; + int aspect_ratio_idc; int overscan_info_present_flag; int overscan_appropriate_flag; From 99522caabd37e58138ce822cac7965af0cdf9d7d Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 14 Dec 2022 00:03:44 +0100 Subject: [PATCH 06/98] h2645_vui: expose aspect_ratio_info_present_flag --- libavcodec/h2645_vui.c | 6 ++---- libavcodec/h2645_vui.h | 1 + 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/libavcodec/h2645_vui.c b/libavcodec/h2645_vui.c index 93e83a9e1f747..e5c7bf46f9b3d 100644 --- a/libavcodec/h2645_vui.c +++ b/libavcodec/h2645_vui.c @@ -36,12 +36,10 @@ void ff_h2645_decode_common_vui_params(GetBitContext *gb, H2645VUI *vui, void *logctx) { - int aspect_ratio_info_present_flag; - av_log(logctx, AV_LOG_DEBUG, "Decoding VUI\n"); - aspect_ratio_info_present_flag = get_bits1(gb); - if (aspect_ratio_info_present_flag) { + vui->aspect_ratio_info_present_flag = get_bits1(gb); + if (vui->aspect_ratio_info_present_flag) { vui->aspect_ratio_idc = get_bits(gb, 8); if (vui->aspect_ratio_idc < FF_ARRAY_ELEMS(ff_h2645_pixel_aspect)) vui->sar = ff_h2645_pixel_aspect[vui->aspect_ratio_idc]; diff --git a/libavcodec/h2645_vui.h b/libavcodec/h2645_vui.h index f1aeab775879d..2c839f4b01557 100644 --- a/libavcodec/h2645_vui.h +++ b/libavcodec/h2645_vui.h @@ -27,6 +27,7 @@ typedef struct H2645VUI { AVRational sar; int aspect_ratio_idc; + int aspect_ratio_info_present_flag; int overscan_info_present_flag; int overscan_appropriate_flag; From 8cb591f348c9dd3d447c1d1584947821984b4b9f Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 18 Mar 2022 15:11:02 +0100 Subject: [PATCH 07/98] h264_ps: expose pps_id --- libavcodec/h264_ps.c | 1 + libavcodec/h264_ps.h | 1 + 2 files changed, 2 insertions(+) diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c index d0d1e6590398e..4ec5bd4e80839 100644 --- a/libavcodec/h264_ps.c +++ b/libavcodec/h264_ps.c @@ -731,6 +731,7 @@ int ff_h264_decode_picture_parameter_set(GetBitContext *gb, AVCodecContext *avct if (!(bit_length & 7) && pps->data_size < sizeof(pps->data)) pps->data[pps->data_size++] = 0x80; + pps->pps_id = pps_id; pps->sps_id = get_ue_golomb_31(gb); if ((unsigned)pps->sps_id >= MAX_SPS_COUNT || !ps->sps_list[pps->sps_id]) { diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h index 5c35761fbc844..c3f0888f245a5 100644 --- a/libavcodec/h264_ps.h +++ b/libavcodec/h264_ps.h @@ -103,6 +103,7 @@ typedef struct SPS { * Picture parameter set */ typedef struct PPS { + unsigned int pps_id; unsigned int sps_id; int cabac; ///< entropy_coding_mode_flag int pic_order_present; ///< pic_order_present_flag From 951d2dc78a806068b2f333298f5d153ae8a58a25 Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 18 Mar 2022 16:17:33 +0100 Subject: [PATCH 08/98] h264_ps: set pic_scaling_matrix_present_flag --- libavcodec/h264_ps.c | 7 +++++-- libavcodec/h264_ps.h | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c index 4ec5bd4e80839..a94f5350c4541 100644 --- a/libavcodec/h264_ps.c +++ b/libavcodec/h264_ps.c @@ -226,6 +226,7 @@ static int decode_scaling_list(GetBitContext *gb, uint8_t *factors, int size, /* returns non zero if the provided SPS scaling matrix has been filled */ static int decode_scaling_matrices(GetBitContext *gb, const SPS *sps, const PPS *pps, int is_sps, + int present_flag, uint8_t(*scaling_matrix4)[16], uint8_t(*scaling_matrix8)[64]) { @@ -237,7 +238,7 @@ static int decode_scaling_matrices(GetBitContext *gb, const SPS *sps, fallback_sps ? sps->scaling_matrix8[3] : default_scaling8[1] }; int ret = 0; - if (get_bits1(gb)) { + if (present_flag) { ret |= decode_scaling_list(gb, scaling_matrix4[0], 16, default_scaling4[0], fallback[0]); // Intra, Y ret |= decode_scaling_list(gb, scaling_matrix4[1], 16, default_scaling4[0], scaling_matrix4[0]); // Intra, Cr ret |= decode_scaling_list(gb, scaling_matrix4[2], 16, default_scaling4[0], scaling_matrix4[1]); // Intra, Cb @@ -368,7 +369,7 @@ int ff_h264_decode_seq_parameter_set(GetBitContext *gb, AVCodecContext *avctx, goto fail; } sps->transform_bypass = get_bits1(gb); - ret = decode_scaling_matrices(gb, sps, NULL, 1, + ret = decode_scaling_matrices(gb, sps, NULL, 1, get_bits1(gb), sps->scaling_matrix4, sps->scaling_matrix8); if (ret < 0) goto fail; @@ -803,7 +804,9 @@ int ff_h264_decode_picture_parameter_set(GetBitContext *gb, AVCodecContext *avct bits_left = bit_length - get_bits_count(gb); if (bits_left > 0 && more_rbsp_data_in_pps(sps, avctx)) { pps->transform_8x8_mode = get_bits1(gb); + pps->pic_scaling_matrix_present_flag = get_bits1(gb); ret = decode_scaling_matrices(gb, sps, pps, 0, + pps->pic_scaling_matrix_present_flag, pps->scaling_matrix4, pps->scaling_matrix8); if (ret < 0) goto fail; diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h index c3f0888f245a5..d2413ae0f8ceb 100644 --- a/libavcodec/h264_ps.h +++ b/libavcodec/h264_ps.h @@ -119,6 +119,7 @@ typedef struct PPS { int constrained_intra_pred; ///< constrained_intra_pred_flag int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag int transform_8x8_mode; ///< transform_8x8_mode_flag + int pic_scaling_matrix_present_flag; uint8_t scaling_matrix4[6][16]; uint8_t scaling_matrix8[6][64]; uint8_t chroma_qp_table[2][QP_MAX_NUM+1]; ///< pre-scaled (with chroma_qp_index_offset) version of qp_table From 235a8bd97e51ea5a0e179b5a89cfcfb648066a4c Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 23 Nov 2022 15:59:23 +0100 Subject: [PATCH 09/98] h264_ps: comment pic_order_present better The official name which CBS uses is bottom_field_pic_order_in_frame_present_flag. --- libavcodec/h264_ps.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h index d2413ae0f8ceb..de4529b353446 100644 --- a/libavcodec/h264_ps.h +++ b/libavcodec/h264_ps.h @@ -106,7 +106,7 @@ typedef struct PPS { unsigned int pps_id; unsigned int sps_id; int cabac; ///< entropy_coding_mode_flag - int pic_order_present; ///< pic_order_present_flag + int pic_order_present; ///< bottom_field_pic_order_in_frame_present_flag int slice_group_count; ///< num_slice_groups_minus1 + 1 int mb_slice_group_map_type; unsigned int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1 From 090bc7547944a27cae3c24127ad06b0084751174 Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 14 Dec 2022 00:06:04 +0100 Subject: [PATCH 10/98] h264_ps: expose max_dec_frame_buffering --- libavcodec/h264_ps.c | 2 +- libavcodec/h264_ps.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c index a94f5350c4541..d9df570718543 100644 --- a/libavcodec/h264_ps.c +++ b/libavcodec/h264_ps.c @@ -176,7 +176,7 @@ static inline int decode_vui_parameters(GetBitContext *gb, void *logctx, get_ue_golomb_31(gb); /* log2_max_mv_length_horizontal */ get_ue_golomb_31(gb); /* log2_max_mv_length_vertical */ sps->num_reorder_frames = get_ue_golomb_31(gb); - get_ue_golomb_31(gb); /*max_dec_frame_buffering*/ + sps->max_dec_frame_buffering = get_ue_golomb_31(gb); if (get_bits_left(gb) < 0) { sps->num_reorder_frames = 0; diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h index de4529b353446..906bab72140df 100644 --- a/libavcodec/h264_ps.h +++ b/libavcodec/h264_ps.h @@ -80,6 +80,7 @@ typedef struct SPS { int32_t offset_for_ref_frame[256]; int bitstream_restriction_flag; int num_reorder_frames; + int max_dec_frame_buffering; int scaling_matrix_present; uint8_t scaling_matrix4[6][16]; uint8_t scaling_matrix8[6][64]; From 8b497fd1cfea96da72cb73a34e728aa1bcec1114 Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 14 Dec 2022 00:09:08 +0100 Subject: [PATCH 11/98] h264_ps: expose bit rate and CPB size fields --- libavcodec/h264_ps.c | 9 +++++---- libavcodec/h264_ps.h | 4 ++++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c index d9df570718543..c71330089d726 100644 --- a/libavcodec/h264_ps.c +++ b/libavcodec/h264_ps.c @@ -113,12 +113,13 @@ static inline int decode_hrd_parameters(GetBitContext *gb, void *logctx, return AVERROR_INVALIDDATA; } - get_bits(gb, 4); /* bit_rate_scale */ + sps->cpr_flag = 0x0; + sps->bit_rate_scale = get_bits(gb, 4); get_bits(gb, 4); /* cpb_size_scale */ for (i = 0; i < cpb_count; i++) { - get_ue_golomb_long(gb); /* bit_rate_value_minus1 */ - get_ue_golomb_long(gb); /* cpb_size_value_minus1 */ - get_bits1(gb); /* cbr_flag */ + sps->bit_rate_value[i] = get_ue_golomb_long(gb) + 1; /* bit_rate_value_minus1 + 1 */ + sps->cpb_size_value[i] = get_ue_golomb_long(gb) + 1; /* cpb_size_value_minus1 + 1 */ + sps->cpr_flag |= get_bits1(gb) << i; } sps->initial_cpb_removal_delay_length = get_bits(gb, 5) + 1; sps->cpb_removal_delay_length = get_bits(gb, 5) + 1; diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h index 906bab72140df..8adfbd710f375 100644 --- a/libavcodec/h264_ps.h +++ b/libavcodec/h264_ps.h @@ -89,6 +89,10 @@ typedef struct SPS { int pic_struct_present_flag; int time_offset_length; int cpb_cnt; ///< See H.264 E.1.2 + int bit_rate_scale; + uint32_t bit_rate_value[32]; ///< bit_rate_value_minus1 + 1 + uint32_t cpb_size_value[32]; ///< cpb_size_value_minus1 + 1 + uint32_t cpr_flag; int initial_cpb_removal_delay_length; ///< initial_cpb_removal_delay_length_minus1 + 1 int cpb_removal_delay_length; ///< cpb_removal_delay_length_minus1 + 1 int dpb_output_delay_length; ///< dpb_output_delay_length_minus1 + 1 From 0967f1971159a684513f339a43d988f5d97c2e7d Mon Sep 17 00:00:00 2001 From: Lynne Date: Thu, 15 Dec 2022 17:05:35 +0100 Subject: [PATCH 12/98] h264_ps: expose scaling_matrix_present_mask Vulkan requires it. It technically also requires use_default_scaling_matrix_mask, but we can just be explicit and give it the matrix we fill in as-non default. --- libavcodec/h264_ps.c | 37 +++++++++++++++++++++---------------- libavcodec/h264_ps.h | 2 ++ 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c index c71330089d726..53446e9aabeb5 100644 --- a/libavcodec/h264_ps.c +++ b/libavcodec/h264_ps.c @@ -198,12 +198,14 @@ static inline int decode_vui_parameters(GetBitContext *gb, void *logctx, } static int decode_scaling_list(GetBitContext *gb, uint8_t *factors, int size, - const uint8_t *jvt_list, - const uint8_t *fallback_list) + const uint8_t *jvt_list, const uint8_t *fallback_list, + uint16_t *mask, int pos) { int i, last = 8, next = 8; const uint8_t *scan = size == 16 ? ff_zigzag_scan : ff_zigzag_direct; - if (!get_bits1(gb)) /* matrix not written, we use the predicted one */ + uint16_t seq_scaling_list_present_flag = get_bits1(gb); + *mask |= (seq_scaling_list_present_flag << pos); + if (!seq_scaling_list_present_flag) /* matrix not written, we use the predicted one */ memcpy(factors, fallback_list, size * sizeof(uint8_t)); else for (i = 0; i < size; i++) { @@ -227,7 +229,7 @@ static int decode_scaling_list(GetBitContext *gb, uint8_t *factors, int size, /* returns non zero if the provided SPS scaling matrix has been filled */ static int decode_scaling_matrices(GetBitContext *gb, const SPS *sps, const PPS *pps, int is_sps, - int present_flag, + int present_flag, uint16_t *mask, uint8_t(*scaling_matrix4)[16], uint8_t(*scaling_matrix8)[64]) { @@ -239,21 +241,22 @@ static int decode_scaling_matrices(GetBitContext *gb, const SPS *sps, fallback_sps ? sps->scaling_matrix8[3] : default_scaling8[1] }; int ret = 0; + *mask = 0x0; if (present_flag) { - ret |= decode_scaling_list(gb, scaling_matrix4[0], 16, default_scaling4[0], fallback[0]); // Intra, Y - ret |= decode_scaling_list(gb, scaling_matrix4[1], 16, default_scaling4[0], scaling_matrix4[0]); // Intra, Cr - ret |= decode_scaling_list(gb, scaling_matrix4[2], 16, default_scaling4[0], scaling_matrix4[1]); // Intra, Cb - ret |= decode_scaling_list(gb, scaling_matrix4[3], 16, default_scaling4[1], fallback[1]); // Inter, Y - ret |= decode_scaling_list(gb, scaling_matrix4[4], 16, default_scaling4[1], scaling_matrix4[3]); // Inter, Cr - ret |= decode_scaling_list(gb, scaling_matrix4[5], 16, default_scaling4[1], scaling_matrix4[4]); // Inter, Cb + ret |= decode_scaling_list(gb, scaling_matrix4[0], 16, default_scaling4[0], fallback[0], mask, 0); // Intra, Y + ret |= decode_scaling_list(gb, scaling_matrix4[1], 16, default_scaling4[0], scaling_matrix4[0], mask, 1); // Intra, Cr + ret |= decode_scaling_list(gb, scaling_matrix4[2], 16, default_scaling4[0], scaling_matrix4[1], mask, 2); // Intra, Cb + ret |= decode_scaling_list(gb, scaling_matrix4[3], 16, default_scaling4[1], fallback[1], mask, 3); // Inter, Y + ret |= decode_scaling_list(gb, scaling_matrix4[4], 16, default_scaling4[1], scaling_matrix4[3], mask, 4); // Inter, Cr + ret |= decode_scaling_list(gb, scaling_matrix4[5], 16, default_scaling4[1], scaling_matrix4[4], mask, 5); // Inter, Cb if (is_sps || pps->transform_8x8_mode) { - ret |= decode_scaling_list(gb, scaling_matrix8[0], 64, default_scaling8[0], fallback[2]); // Intra, Y - ret |= decode_scaling_list(gb, scaling_matrix8[3], 64, default_scaling8[1], fallback[3]); // Inter, Y + ret |= decode_scaling_list(gb, scaling_matrix8[0], 64, default_scaling8[0], fallback[2], mask, 6); // Intra, Y + ret |= decode_scaling_list(gb, scaling_matrix8[3], 64, default_scaling8[1], fallback[3], mask, 7); // Inter, Y if (sps->chroma_format_idc == 3) { - ret |= decode_scaling_list(gb, scaling_matrix8[1], 64, default_scaling8[0], scaling_matrix8[0]); // Intra, Cr - ret |= decode_scaling_list(gb, scaling_matrix8[4], 64, default_scaling8[1], scaling_matrix8[3]); // Inter, Cr - ret |= decode_scaling_list(gb, scaling_matrix8[2], 64, default_scaling8[0], scaling_matrix8[1]); // Intra, Cb - ret |= decode_scaling_list(gb, scaling_matrix8[5], 64, default_scaling8[1], scaling_matrix8[4]); // Inter, Cb + ret |= decode_scaling_list(gb, scaling_matrix8[1], 64, default_scaling8[0], scaling_matrix8[0], mask, 8); // Intra, Cr + ret |= decode_scaling_list(gb, scaling_matrix8[4], 64, default_scaling8[1], scaling_matrix8[3], mask, 9); // Inter, Cr + ret |= decode_scaling_list(gb, scaling_matrix8[2], 64, default_scaling8[0], scaling_matrix8[1], mask, 10); // Intra, Cb + ret |= decode_scaling_list(gb, scaling_matrix8[5], 64, default_scaling8[1], scaling_matrix8[4], mask, 11); // Inter, Cb } } if (!ret) @@ -371,6 +374,7 @@ int ff_h264_decode_seq_parameter_set(GetBitContext *gb, AVCodecContext *avctx, } sps->transform_bypass = get_bits1(gb); ret = decode_scaling_matrices(gb, sps, NULL, 1, get_bits1(gb), + &sps->scaling_matrix_present_mask, sps->scaling_matrix4, sps->scaling_matrix8); if (ret < 0) goto fail; @@ -808,6 +812,7 @@ int ff_h264_decode_picture_parameter_set(GetBitContext *gb, AVCodecContext *avct pps->pic_scaling_matrix_present_flag = get_bits1(gb); ret = decode_scaling_matrices(gb, sps, pps, 0, pps->pic_scaling_matrix_present_flag, + &pps->pic_scaling_matrix_present_mask, pps->scaling_matrix4, pps->scaling_matrix8); if (ret < 0) goto fail; diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h index 8adfbd710f375..e6756196352b2 100644 --- a/libavcodec/h264_ps.h +++ b/libavcodec/h264_ps.h @@ -82,6 +82,7 @@ typedef struct SPS { int num_reorder_frames; int max_dec_frame_buffering; int scaling_matrix_present; + uint16_t scaling_matrix_present_mask; uint8_t scaling_matrix4[6][16]; uint8_t scaling_matrix8[6][64]; int nal_hrd_parameters_present_flag; @@ -125,6 +126,7 @@ typedef struct PPS { int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag int transform_8x8_mode; ///< transform_8x8_mode_flag int pic_scaling_matrix_present_flag; + uint16_t pic_scaling_matrix_present_mask; uint8_t scaling_matrix4[6][16]; uint8_t scaling_matrix8[6][64]; uint8_t chroma_qp_table[2][QP_MAX_NUM+1]; ///< pre-scaled (with chroma_qp_index_offset) version of qp_table From c912a3aed286fa7cca85da731f196d4aa3eaedf8 Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 7 Dec 2022 01:29:57 +0100 Subject: [PATCH 13/98] hevc_ps: expose SPS and VPS headers --- libavcodec/hevc_ps.c | 100 ++++++++++++++++++++++--------------------- libavcodec/hevc_ps.h | 41 ++++++++++++++++++ 2 files changed, 93 insertions(+), 48 deletions(-) diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c index 043e1bf30854d..aaaca5e6c7275 100644 --- a/libavcodec/hevc_ps.c +++ b/libavcodec/hevc_ps.c @@ -357,81 +357,84 @@ static int parse_ptl(GetBitContext *gb, AVCodecContext *avctx, } static void decode_sublayer_hrd(GetBitContext *gb, unsigned int nb_cpb, - int subpic_params_present) + HEVCSublayerHdrParams *par, int subpic_params_present) { int i; for (i = 0; i < nb_cpb; i++) { - get_ue_golomb_long(gb); // bit_rate_value_minus1 - get_ue_golomb_long(gb); // cpb_size_value_minus1 + par->bit_rate_value_minus1[i] = get_ue_golomb_long(gb); + par->cpb_size_value_minus1[i] = get_ue_golomb_long(gb); if (subpic_params_present) { - get_ue_golomb_long(gb); // cpb_size_du_value_minus1 - get_ue_golomb_long(gb); // bit_rate_du_value_minus1 + par->cpb_size_du_value_minus1[i] = get_ue_golomb_long(gb); + par->bit_rate_du_value_minus1[i] = get_ue_golomb_long(gb); } - skip_bits1(gb); // cbr_flag + + par->cbr_flag = get_bits1(gb); } } static int decode_hrd(GetBitContext *gb, int common_inf_present, - int max_sublayers) + HEVCHdrParams *hdr, int max_sublayers) { - int nal_params_present = 0, vcl_params_present = 0; - int subpic_params_present = 0; - int i; - if (common_inf_present) { - nal_params_present = get_bits1(gb); - vcl_params_present = get_bits1(gb); - - if (nal_params_present || vcl_params_present) { - subpic_params_present = get_bits1(gb); - - if (subpic_params_present) { - skip_bits(gb, 8); // tick_divisor_minus2 - skip_bits(gb, 5); // du_cpb_removal_delay_increment_length_minus1 - skip_bits(gb, 1); // sub_pic_cpb_params_in_pic_timing_sei_flag - skip_bits(gb, 5); // dpb_output_delay_du_length_minus1 + hdr->flags.nal_hrd_parameters_present_flag = get_bits1(gb); + hdr->flags.vcl_hrd_parameters_present_flag = get_bits1(gb); + + if (hdr->flags.nal_hrd_parameters_present_flag || + hdr->flags.vcl_hrd_parameters_present_flag) { + hdr->flags.sub_pic_hrd_params_present_flag = get_bits1(gb); + + if (hdr->flags.sub_pic_hrd_params_present_flag) { + hdr->tick_divisor_minus2 = get_bits(gb, 8); + hdr->du_cpb_removal_delay_increment_length_minus1 = get_bits(gb, 5); + hdr->flags.sub_pic_cpb_params_in_pic_timing_sei_flag = get_bits1(gb); + hdr->dpb_output_delay_du_length_minus1 = get_bits(gb, 5); } - skip_bits(gb, 4); // bit_rate_scale - skip_bits(gb, 4); // cpb_size_scale + hdr->bit_rate_scale = get_bits(gb, 4); + hdr->cpb_size_scale = get_bits(gb, 4); - if (subpic_params_present) - skip_bits(gb, 4); // cpb_size_du_scale + if (hdr->flags.sub_pic_hrd_params_present_flag) + hdr->cpb_size_du_scale = get_bits(gb, 4); - skip_bits(gb, 5); // initial_cpb_removal_delay_length_minus1 - skip_bits(gb, 5); // au_cpb_removal_delay_length_minus1 - skip_bits(gb, 5); // dpb_output_delay_length_minus1 + hdr->initial_cpb_removal_delay_length_minus1 = get_bits(gb, 5); + hdr->au_cpb_removal_delay_length_minus1 = get_bits(gb, 5); + hdr->dpb_output_delay_length_minus1 = get_bits(gb, 5); } } - for (i = 0; i < max_sublayers; i++) { - int low_delay = 0; - unsigned int nb_cpb = 1; - int fixed_rate = get_bits1(gb); + for (int i = 0; i < max_sublayers; i++) { + hdr->flags.fixed_pic_rate_general_flag = get_bits1(gb); + + hdr->cpb_cnt_minus1[i] = 1; - if (!fixed_rate) - fixed_rate = get_bits1(gb); + if (!hdr->flags.fixed_pic_rate_general_flag) + hdr->flags.fixed_pic_rate_within_cvs_flag = get_bits1(gb); - if (fixed_rate) - get_ue_golomb_long(gb); // elemental_duration_in_tc_minus1 + if (hdr->flags.fixed_pic_rate_within_cvs_flag) + hdr->elemental_duration_in_tc_minus1[i] = get_ue_golomb_long(gb); else - low_delay = get_bits1(gb); + hdr->flags.low_delay_hrd_flag = get_bits1(gb); - if (!low_delay) { - nb_cpb = get_ue_golomb_long(gb) + 1; - if (nb_cpb < 1 || nb_cpb > 32) { - av_log(NULL, AV_LOG_ERROR, "nb_cpb %d invalid\n", nb_cpb); + if (!hdr->flags.low_delay_hrd_flag) { + hdr->cpb_cnt_minus1[i] = get_ue_golomb_long(gb); + if (hdr->cpb_cnt_minus1[i] > 31) { + av_log(NULL, AV_LOG_ERROR, "nb_cpb %d invalid\n", + hdr->cpb_cnt_minus1[i]); return AVERROR_INVALIDDATA; } } - if (nal_params_present) - decode_sublayer_hrd(gb, nb_cpb, subpic_params_present); - if (vcl_params_present) - decode_sublayer_hrd(gb, nb_cpb, subpic_params_present); + if (hdr->flags.nal_hrd_parameters_present_flag) + decode_sublayer_hrd(gb, hdr->cpb_cnt_minus1[i], &hdr->nal_params[i], + hdr->flags.sub_pic_hrd_params_present_flag); + + if (hdr->flags.vcl_hrd_parameters_present_flag) + decode_sublayer_hrd(gb, hdr->cpb_cnt_minus1[i], &hdr->vcl_params[i], + hdr->flags.sub_pic_hrd_params_present_flag); } + return 0; } @@ -538,7 +541,8 @@ int ff_hevc_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx, get_ue_golomb_long(gb); // hrd_layer_set_idx if (i) common_inf_present = get_bits1(gb); - decode_hrd(gb, common_inf_present, vps->vps_max_sub_layers); + decode_hrd(gb, common_inf_present, &vps->hdr[i], + vps->vps_max_sub_layers); } } get_bits1(gb); /* vps_extension_flag */ @@ -657,7 +661,7 @@ static void decode_vui(GetBitContext *gb, AVCodecContext *avctx, vui->vui_num_ticks_poc_diff_one_minus1 = get_ue_golomb_long(gb); vui->vui_hrd_parameters_present_flag = get_bits1(gb); if (vui->vui_hrd_parameters_present_flag) - decode_hrd(gb, 1, sps->max_sub_layers); + decode_hrd(gb, 1, &sps->hdr, sps->max_sub_layers); } vui->bitstream_restriction_flag = get_bits1(gb); diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h index 2124deb953d9d..0124f5d37f582 100644 --- a/libavcodec/hevc_ps.h +++ b/libavcodec/hevc_ps.h @@ -32,6 +32,43 @@ #include "h2645_vui.h" #include "hevc.h" +typedef struct HEVCSublayerHdrParams { + uint32_t bit_rate_value_minus1[HEVC_MAX_CPB_CNT]; + uint32_t cpb_size_value_minus1[HEVC_MAX_CPB_CNT]; + uint32_t cpb_size_du_value_minus1[HEVC_MAX_CPB_CNT]; + uint32_t bit_rate_du_value_minus1[HEVC_MAX_CPB_CNT]; + uint32_t cbr_flag; +} HEVCSublayerHdrParams; + +typedef struct HEVCHdrFlagParams { + uint32_t nal_hrd_parameters_present_flag; + uint32_t vcl_hrd_parameters_present_flag; + uint32_t sub_pic_hrd_params_present_flag; + uint32_t sub_pic_cpb_params_in_pic_timing_sei_flag; + uint32_t fixed_pic_rate_general_flag; + uint32_t fixed_pic_rate_within_cvs_flag; + uint32_t low_delay_hrd_flag; +} HEVCHdrFlagParams; + +typedef struct HEVCHdrParams { + HEVCHdrFlagParams flags; + + uint8_t tick_divisor_minus2; + uint8_t du_cpb_removal_delay_increment_length_minus1; + uint8_t dpb_output_delay_du_length_minus1; + uint8_t bit_rate_scale; + uint8_t cpb_size_scale; + uint8_t cpb_size_du_scale; + uint8_t initial_cpb_removal_delay_length_minus1; + uint8_t au_cpb_removal_delay_length_minus1; + uint8_t dpb_output_delay_length_minus1; + uint8_t cpb_cnt_minus1[HEVC_MAX_SUB_LAYERS]; + uint16_t elemental_duration_in_tc_minus1[HEVC_MAX_SUB_LAYERS]; + + HEVCSublayerHdrParams nal_params[HEVC_MAX_SUB_LAYERS]; + HEVCSublayerHdrParams vcl_params[HEVC_MAX_SUB_LAYERS]; +} HEVCHdrParams; + typedef struct ShortTermRPS { unsigned int num_negative_pics; int num_delta_pocs; @@ -108,6 +145,8 @@ typedef struct PTL { } PTL; typedef struct HEVCVPS { + HEVCHdrParams hdr[HEVC_MAX_LAYER_SETS]; + uint8_t vps_temporal_id_nesting_flag; int vps_max_layers; int vps_max_sub_layers; ///< vps_max_temporal_layers_minus1 + 1 @@ -146,6 +185,8 @@ typedef struct HEVCSPS { HEVCWindow pic_conf_win; + HEVCHdrParams hdr; + int bit_depth; int bit_depth_chroma; int pixel_shift; From 1586250ca89dfaadc9414e99f22cadf11e59596d Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 7 Dec 2022 05:33:29 +0100 Subject: [PATCH 14/98] hevc_ps: expose pps_id --- libavcodec/hevc_ps.c | 2 +- libavcodec/hevc_ps.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c index aaaca5e6c7275..73f90b483119c 100644 --- a/libavcodec/hevc_ps.c +++ b/libavcodec/hevc_ps.c @@ -1776,7 +1776,7 @@ int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx, pps->log2_max_transform_skip_block_size = 2; // Coded parameters - pps_id = get_ue_golomb_long(gb); + pps_id = pps->pps_id = get_ue_golomb_long(gb); if (pps_id >= HEVC_MAX_PPS_COUNT) { av_log(avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", pps_id); ret = AVERROR_INVALIDDATA; diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h index 0124f5d37f582..e461b48943d1a 100644 --- a/libavcodec/hevc_ps.h +++ b/libavcodec/hevc_ps.h @@ -289,6 +289,7 @@ typedef struct HEVCSPS { } HEVCSPS; typedef struct HEVCPPS { + unsigned int pps_id; unsigned int sps_id; ///< seq_parameter_set_id uint8_t sign_data_hiding_flag; From 537a1a52fc132f0137f5f094341f254fa12fc06c Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 7 Dec 2022 06:42:44 +0100 Subject: [PATCH 15/98] hevc_ps: expose vps_id --- libavcodec/hevc_ps.c | 2 +- libavcodec/hevc_ps.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c index 73f90b483119c..581c9293ab195 100644 --- a/libavcodec/hevc_ps.c +++ b/libavcodec/hevc_ps.c @@ -464,7 +464,7 @@ int ff_hevc_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx, } memcpy(vps->data, gb->buffer, vps->data_size); - vps_id = get_bits(gb, 4); + vps_id = vps->vps_id = get_bits(gb, 4); if (get_bits(gb, 2) != 3) { // vps_reserved_three_2bits av_log(avctx, AV_LOG_ERROR, "vps_reserved_three_2bits is not three\n"); diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h index e461b48943d1a..eb9e6beac1ed4 100644 --- a/libavcodec/hevc_ps.h +++ b/libavcodec/hevc_ps.h @@ -145,6 +145,7 @@ typedef struct PTL { } PTL; typedef struct HEVCVPS { + unsigned int vps_id; HEVCHdrParams hdr[HEVC_MAX_LAYER_SETS]; uint8_t vps_temporal_id_nesting_flag; From eb7eb73d9a94c4a27c8d8725a9c3a73777af9b24 Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 7 Dec 2022 12:49:45 +0100 Subject: [PATCH 16/98] hevc_ps: expose pps_extension_present_flag --- libavcodec/hevc_ps.c | 3 ++- libavcodec/hevc_ps.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c index 581c9293ab195..61f4f7c1fca38 100644 --- a/libavcodec/hevc_ps.c +++ b/libavcodec/hevc_ps.c @@ -1956,7 +1956,8 @@ int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx, pps->slice_header_extension_present_flag = get_bits1(gb); - if (get_bits1(gb)) { // pps_extension_present_flag + pps->pps_extension_present_flag = get_bits1(gb); + if (pps->pps_extension_present_flag) { pps->pps_range_extensions_flag = get_bits1(gb); pps->pps_multilayer_extension_flag = get_bits1(gb); pps->pps_3d_extension_flag = get_bits1(gb); diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h index eb9e6beac1ed4..e78018c33912c 100644 --- a/libavcodec/hevc_ps.h +++ b/libavcodec/hevc_ps.h @@ -340,6 +340,7 @@ typedef struct HEVCPPS { int num_extra_slice_header_bits; uint8_t slice_header_extension_present_flag; uint8_t log2_max_transform_skip_block_size; + uint8_t pps_extension_present_flag; uint8_t pps_range_extensions_flag; uint8_t pps_multilayer_extension_flag; uint8_t pps_3d_extension_flag; From 1de1dd9955acafea4c779d387c36d5069a194d2e Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 7 Dec 2022 17:11:36 +0100 Subject: [PATCH 17/98] hevcdec: expose bits_used_for_short_term_rps --- libavcodec/hevcdec.c | 1 + libavcodec/hevcdec.h | 1 + 2 files changed, 2 insertions(+) diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c index 7e1bf4e9157d4..b01563177b8af 100644 --- a/libavcodec/hevcdec.c +++ b/libavcodec/hevcdec.c @@ -703,6 +703,7 @@ static int hls_slice_header(HEVCContext *s) if (ret < 0) return ret; + sh->bits_used_for_short_term_rps = pos - get_bits_left(gb); sh->short_term_rps = &sh->slice_rps; } else { int numbits, rps_idx; diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h index 94609e46993d1..04ec25d540b8a 100644 --- a/libavcodec/hevcdec.h +++ b/libavcodec/hevcdec.h @@ -268,6 +268,7 @@ typedef struct SliceHeader { ///< RPS coded in the slice header itself is stored here int short_term_ref_pic_set_sps_flag; + int bits_used_for_short_term_rps; int short_term_ref_pic_set_size; ShortTermRPS slice_rps; const ShortTermRPS *short_term_rps; From 79096b8c2083b5a5b257d9fd9ca39c16575d958f Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 7 Dec 2022 04:30:46 +0100 Subject: [PATCH 18/98] hevc_ps: expose log2_diff_max_min_transform_block_size --- libavcodec/hevc_ps.c | 20 ++++++++++---------- libavcodec/hevc_ps.h | 1 + 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c index 61f4f7c1fca38..5b5cd9d980794 100644 --- a/libavcodec/hevc_ps.c +++ b/libavcodec/hevc_ps.c @@ -856,9 +856,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, { HEVCWindow *ow; int ret = 0; - int log2_diff_max_min_transform_block_size; int bit_depth_chroma, start, vui_present, sublayer_ordering_info, num_comps; - int i; + int i, j; // Coded parameters @@ -993,12 +992,12 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, } } - sps->log2_min_cb_size = get_ue_golomb_long(gb) + 3; - sps->log2_diff_max_min_coding_block_size = get_ue_golomb_long(gb); - sps->log2_min_tb_size = get_ue_golomb_long(gb) + 2; - log2_diff_max_min_transform_block_size = get_ue_golomb_long(gb); - sps->log2_max_trafo_size = log2_diff_max_min_transform_block_size + - sps->log2_min_tb_size; + sps->log2_min_cb_size = get_ue_golomb_long(gb) + 3; + sps->log2_diff_max_min_coding_block_size = get_ue_golomb_long(gb); + sps->log2_min_tb_size = get_ue_golomb_long(gb) + 2; + sps->log2_diff_max_min_transform_block_size = get_ue_golomb_long(gb); + sps->log2_max_trafo_size = sps->log2_diff_max_min_transform_block_size + + sps->log2_min_tb_size; if (sps->log2_min_cb_size < 3 || sps->log2_min_cb_size > 30) { av_log(avctx, AV_LOG_ERROR, "Invalid value %d for log2_min_cb_size", sps->log2_min_cb_size); @@ -1015,8 +1014,9 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, return AVERROR_INVALIDDATA; } - if (log2_diff_max_min_transform_block_size < 0 || log2_diff_max_min_transform_block_size > 30) { - av_log(avctx, AV_LOG_ERROR, "Invalid value %d for log2_diff_max_min_transform_block_size", log2_diff_max_min_transform_block_size); + if (sps->log2_diff_max_min_transform_block_size > 30) { + av_log(avctx, AV_LOG_ERROR, "Invalid value %d for log2_diff_max_min_transform_block_size", + sps->log2_diff_max_min_transform_block_size); return AVERROR_INVALIDDATA; } diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h index e78018c33912c..344c5bbc7faa1 100644 --- a/libavcodec/hevc_ps.h +++ b/libavcodec/hevc_ps.h @@ -237,6 +237,7 @@ typedef struct HEVCSPS { unsigned int log2_max_trafo_size; unsigned int log2_ctb_size; unsigned int log2_min_pu_size; + unsigned int log2_diff_max_min_transform_block_size; int max_transform_hierarchy_depth_inter; int max_transform_hierarchy_depth_intra; From dc3cf3107e60a311719bab11b851b70861b00b68 Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 14 Dec 2022 00:25:48 +0100 Subject: [PATCH 19/98] hevc_ps: expose rps fields --- libavcodec/hevc_ps.c | 37 ++++++++++++++++++------------------- libavcodec/hevc_ps.h | 7 +++++++ 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c index 5b5cd9d980794..7181398e90b8f 100644 --- a/libavcodec/hevc_ps.c +++ b/libavcodec/hevc_ps.c @@ -100,51 +100,50 @@ static void remove_vps(HEVCParamSets *s, int id) int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, ShortTermRPS *rps, const HEVCSPS *sps, int is_slice_header) { - uint8_t rps_predict = 0; int delta_poc; int k0 = 0; int k = 0; int i; + rps->rps_predict = 0; + if (rps != sps->st_rps && sps->nb_st_rps) - rps_predict = get_bits1(gb); + rps->rps_predict = get_bits1(gb); - if (rps_predict) { + if (rps->rps_predict) { const ShortTermRPS *rps_ridx; int delta_rps; - unsigned abs_delta_rps; - uint8_t use_delta_flag = 0; - uint8_t delta_rps_sign; if (is_slice_header) { - unsigned int delta_idx = get_ue_golomb_long(gb) + 1; - if (delta_idx > sps->nb_st_rps) { + rps->delta_idx = get_ue_golomb_long(gb) + 1; + if (rps->delta_idx > sps->nb_st_rps) { av_log(avctx, AV_LOG_ERROR, "Invalid value of delta_idx in slice header RPS: %d > %d.\n", - delta_idx, sps->nb_st_rps); + rps->delta_idx, sps->nb_st_rps); return AVERROR_INVALIDDATA; } - rps_ridx = &sps->st_rps[sps->nb_st_rps - delta_idx]; + rps_ridx = &sps->st_rps[sps->nb_st_rps - rps->delta_idx]; rps->rps_idx_num_delta_pocs = rps_ridx->num_delta_pocs; } else rps_ridx = &sps->st_rps[rps - sps->st_rps - 1]; - delta_rps_sign = get_bits1(gb); - abs_delta_rps = get_ue_golomb_long(gb) + 1; - if (abs_delta_rps < 1 || abs_delta_rps > 32768) { + rps->delta_rps_sign = get_bits1(gb); + rps->abs_delta_rps = get_ue_golomb_long(gb) + 1; + if (rps->abs_delta_rps > 32768) { av_log(avctx, AV_LOG_ERROR, "Invalid value of abs_delta_rps: %d\n", - abs_delta_rps); + rps->abs_delta_rps); return AVERROR_INVALIDDATA; } - delta_rps = (1 - (delta_rps_sign << 1)) * abs_delta_rps; + delta_rps = (1 - (rps->delta_rps_sign << 1)) * rps->abs_delta_rps; for (i = 0; i <= rps_ridx->num_delta_pocs; i++) { int used = rps->used[k] = get_bits1(gb); + rps->use_delta_flag = 0; if (!used) - use_delta_flag = get_bits1(gb); + rps->use_delta_flag = get_bits1(gb); - if (used || use_delta_flag) { + if (used || rps->use_delta_flag) { if (i < rps_ridx->num_delta_pocs) delta_poc = delta_rps + rps_ridx->delta_poc[i]; else @@ -210,7 +209,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, if (rps->num_delta_pocs) { prev = 0; for (i = 0; i < rps->num_negative_pics; i++) { - delta_poc = get_ue_golomb_long(gb) + 1; + delta_poc = rps->delta_poc_s0[i] = get_ue_golomb_long(gb) + 1; if (delta_poc < 1 || delta_poc > 32768) { av_log(avctx, AV_LOG_ERROR, "Invalid value of delta_poc: %d\n", @@ -223,7 +222,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, } prev = 0; for (i = 0; i < nb_positive_pics; i++) { - delta_poc = get_ue_golomb_long(gb) + 1; + delta_poc = rps->delta_poc_s1[i] = get_ue_golomb_long(gb) + 1; if (delta_poc < 1 || delta_poc > 32768) { av_log(avctx, AV_LOG_ERROR, "Invalid value of delta_poc: %d\n", diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h index 344c5bbc7faa1..2b89cdc9fd6aa 100644 --- a/libavcodec/hevc_ps.h +++ b/libavcodec/hevc_ps.h @@ -70,9 +70,16 @@ typedef struct HEVCHdrParams { } HEVCHdrParams; typedef struct ShortTermRPS { + uint8_t rps_predict; + unsigned int delta_idx; + uint8_t use_delta_flag; + uint8_t delta_rps_sign; + unsigned int abs_delta_rps; unsigned int num_negative_pics; int num_delta_pocs; int rps_idx_num_delta_pocs; + int32_t delta_poc_s0[32]; + int32_t delta_poc_s1[32]; int32_t delta_poc[32]; uint8_t used[32]; } ShortTermRPS; From 9c6e79c06be1bf3d24447c700a407f8816e5166d Mon Sep 17 00:00:00 2001 From: Lynne Date: Tue, 28 Feb 2023 20:44:06 +0100 Subject: [PATCH 20/98] hevc_ps: expose vui_present flag --- libavcodec/hevc_ps.c | 6 +++--- libavcodec/hevc_ps.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c index 7181398e90b8f..995b70f65d24a 100644 --- a/libavcodec/hevc_ps.c +++ b/libavcodec/hevc_ps.c @@ -855,7 +855,7 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, { HEVCWindow *ow; int ret = 0; - int bit_depth_chroma, start, vui_present, sublayer_ordering_info, num_comps; + int bit_depth_chroma, start, sublayer_ordering_info, num_comps; int i, j; // Coded parameters @@ -1082,8 +1082,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, sps->sps_temporal_mvp_enabled_flag = get_bits1(gb); sps->sps_strong_intra_smoothing_enable_flag = get_bits1(gb); sps->vui.common.sar = (AVRational){0, 1}; - vui_present = get_bits1(gb); - if (vui_present) + sps->vui_present = get_bits1(gb); + if (sps->vui_present) decode_vui(gb, avctx, apply_defdispwin, sps); if (get_bits1(gb)) { // sps_extension_flag diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h index 2b89cdc9fd6aa..39212b43a7a93 100644 --- a/libavcodec/hevc_ps.h +++ b/libavcodec/hevc_ps.h @@ -211,6 +211,7 @@ typedef struct HEVCSPS { } temporal_layer[HEVC_MAX_SUB_LAYERS]; uint8_t temporal_id_nesting_flag; + int vui_present; VUI vui; PTL ptl; From 82483e38ca0283e51837d0d8786f67efb82f89ef Mon Sep 17 00:00:00 2001 From: Lynne Date: Tue, 28 Feb 2023 20:44:38 +0100 Subject: [PATCH 21/98] hevc_ps: expose sublayer_ordering_info_flag --- libavcodec/hevc_ps.c | 8 ++++---- libavcodec/hevc_ps.h | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c index 995b70f65d24a..8125d9cbd3bbe 100644 --- a/libavcodec/hevc_ps.c +++ b/libavcodec/hevc_ps.c @@ -855,7 +855,7 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, { HEVCWindow *ow; int ret = 0; - int bit_depth_chroma, start, sublayer_ordering_info, num_comps; + int bit_depth_chroma, start, num_comps; int i, j; // Coded parameters @@ -961,8 +961,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, return AVERROR_INVALIDDATA; } - sublayer_ordering_info = get_bits1(gb); - start = sublayer_ordering_info ? 0 : sps->max_sub_layers - 1; + sps->sublayer_ordering_info_flag = get_bits1(gb); + start = sps->sublayer_ordering_info_flag ? 0 : sps->max_sub_layers - 1; for (i = start; i < sps->max_sub_layers; i++) { sps->temporal_layer[i].max_dec_pic_buffering = get_ue_golomb_long(gb) + 1; sps->temporal_layer[i].num_reorder_pics = get_ue_golomb_long(gb); @@ -983,7 +983,7 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, } } - if (!sublayer_ordering_info) { + if (!sps->sublayer_ordering_info_flag) { for (i = 0; i < start; i++) { sps->temporal_layer[i].max_dec_pic_buffering = sps->temporal_layer[start].max_dec_pic_buffering; sps->temporal_layer[i].num_reorder_pics = sps->temporal_layer[start].num_reorder_pics; diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h index 39212b43a7a93..182b671ef7f00 100644 --- a/libavcodec/hevc_ps.h +++ b/libavcodec/hevc_ps.h @@ -203,6 +203,7 @@ typedef struct HEVCSPS { unsigned int log2_max_poc_lsb; int pcm_enabled_flag; + uint8_t sublayer_ordering_info_flag; int max_sub_layers; struct { int max_dec_pic_buffering; From 6833f3324b0ac9fef1118e0463a04c5aa1828a58 Mon Sep 17 00:00:00 2001 From: Lynne Date: Tue, 28 Feb 2023 20:45:05 +0100 Subject: [PATCH 22/98] hevc_ps: expose conformance_window_flag --- libavcodec/hevc_ps.c | 3 ++- libavcodec/hevc_ps.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c index 8125d9cbd3bbe..f106932d43a18 100644 --- a/libavcodec/hevc_ps.c +++ b/libavcodec/hevc_ps.c @@ -904,7 +904,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, sps->height, 0, avctx)) < 0) return ret; - if (get_bits1(gb)) { // pic_conformance_flag + sps->conformance_window_flag = get_bits1(gb); + if (sps->conformance_window_flag) { int vert_mult = hevc_sub_height_c[sps->chroma_format_idc]; int horiz_mult = hevc_sub_width_c[sps->chroma_format_idc]; sps->pic_conf_win.left_offset = get_ue_golomb_long(gb) * horiz_mult; diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h index 182b671ef7f00..a9515cdb0e2de 100644 --- a/libavcodec/hevc_ps.h +++ b/libavcodec/hevc_ps.h @@ -191,6 +191,7 @@ typedef struct HEVCSPS { HEVCWindow output_window; + uint8_t conformance_window_flag; HEVCWindow pic_conf_win; HEVCHdrParams hdr; From 97272ef2f3adb99d70e4628c4ff3eceef3565828 Mon Sep 17 00:00:00 2001 From: Lynne Date: Tue, 28 Feb 2023 20:45:24 +0100 Subject: [PATCH 23/98] hevc_ps: expose sps_extension_present_flag --- libavcodec/hevc_ps.c | 3 ++- libavcodec/hevc_ps.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c index f106932d43a18..01b11ed42aa3b 100644 --- a/libavcodec/hevc_ps.c +++ b/libavcodec/hevc_ps.c @@ -1087,7 +1087,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, if (sps->vui_present) decode_vui(gb, avctx, apply_defdispwin, sps); - if (get_bits1(gb)) { // sps_extension_flag + sps->sps_extension_present_flag = get_bits1(gb); + if (sps->sps_extension_present_flag) { sps->sps_range_extension_flag = get_bits1(gb); sps->sps_multilayer_extension_flag = get_bits1(gb); sps->sps_3d_extension_flag = get_bits1(gb); diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h index a9515cdb0e2de..ef11e51ee72f3 100644 --- a/libavcodec/hevc_ps.h +++ b/libavcodec/hevc_ps.h @@ -217,6 +217,7 @@ typedef struct HEVCSPS { VUI vui; PTL ptl; + uint8_t sps_extension_present_flag; uint8_t scaling_list_enable_flag; ScalingList scaling_list; From a0dd9b2c9d83839a594abdce5214c92e36f1a730 Mon Sep 17 00:00:00 2001 From: Lynne Date: Sat, 25 Feb 2023 09:34:54 +0100 Subject: [PATCH 24/98] lavu: add 12-bit 2-plane 422 and 444 pixel formats --- libavutil/pixdesc.c | 48 +++++++++++++++++++++++++++++++++++++++++ libavutil/pixfmt.h | 8 +++++++ tests/ref/fate/imgutils | 4 ++++ 3 files changed, 60 insertions(+) diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c index 62a2ae08d9078..e1e0dd2a9ea10 100644 --- a/libavutil/pixdesc.c +++ b/libavutil/pixdesc.c @@ -2717,6 +2717,54 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = { .flags = AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_FLOAT | AV_PIX_FMT_FLAG_ALPHA, }, + [AV_PIX_FMT_P212BE] = { + .name = "p212be", + .nb_components = 3, + .log2_chroma_w = 1, + .log2_chroma_h = 0, + .comp = { + { 0, 2, 0, 4, 12 }, /* Y */ + { 1, 4, 0, 4, 12 }, /* U */ + { 1, 4, 2, 4, 12 }, /* V */ + }, + .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_BE, + }, + [AV_PIX_FMT_P212LE] = { + .name = "p212le", + .nb_components = 3, + .log2_chroma_w = 1, + .log2_chroma_h = 0, + .comp = { + { 0, 2, 0, 4, 12 }, /* Y */ + { 1, 4, 0, 4, 12 }, /* U */ + { 1, 4, 2, 4, 12 }, /* V */ + }, + .flags = AV_PIX_FMT_FLAG_PLANAR, + }, + [AV_PIX_FMT_P412BE] = { + .name = "p412be", + .nb_components = 3, + .log2_chroma_w = 0, + .log2_chroma_h = 0, + .comp = { + { 0, 2, 0, 4, 12 }, /* Y */ + { 1, 4, 0, 4, 12 }, /* U */ + { 1, 4, 2, 4, 12 }, /* V */ + }, + .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_BE, + }, + [AV_PIX_FMT_P412LE] = { + .name = "p412le", + .nb_components = 3, + .log2_chroma_w = 0, + .log2_chroma_h = 0, + .comp = { + { 0, 2, 0, 4, 12 }, /* Y */ + { 1, 4, 0, 4, 12 }, /* U */ + { 1, 4, 2, 4, 12 }, /* V */ + }, + .flags = AV_PIX_FMT_FLAG_PLANAR, + }, }; static const char * const color_range_names[] = { diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h index 37c2c79e01405..63e07ba64f752 100644 --- a/libavutil/pixfmt.h +++ b/libavutil/pixfmt.h @@ -420,6 +420,12 @@ enum AVPixelFormat { AV_PIX_FMT_RGBAF32BE, ///< IEEE-754 single precision packed RGBA 32:32:32:32, 128bpp, RGBARGBA..., big-endian AV_PIX_FMT_RGBAF32LE, ///< IEEE-754 single precision packed RGBA 32:32:32:32, 128bpp, RGBARGBA..., little-endian + AV_PIX_FMT_P212BE, ///< interleaved chroma YUV 4:2:2, 24bpp, data in the high bits, big-endian + AV_PIX_FMT_P212LE, ///< interleaved chroma YUV 4:2:2, 24bpp, data in the high bits, little-endian + + AV_PIX_FMT_P412BE, ///< interleaved chroma YUV 4:4:4, 36bpp, data in the high bits, big-endian + AV_PIX_FMT_P412LE, ///< interleaved chroma YUV 4:4:4, 36bpp, data in the high bits, little-endian + AV_PIX_FMT_NB ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions }; @@ -518,6 +524,8 @@ enum AVPixelFormat { #define AV_PIX_FMT_P210 AV_PIX_FMT_NE(P210BE, P210LE) #define AV_PIX_FMT_P410 AV_PIX_FMT_NE(P410BE, P410LE) +#define AV_PIX_FMT_P212 AV_PIX_FMT_NE(P212BE, P212LE) +#define AV_PIX_FMT_P412 AV_PIX_FMT_NE(P412BE, P412LE) #define AV_PIX_FMT_P216 AV_PIX_FMT_NE(P216BE, P216LE) #define AV_PIX_FMT_P416 AV_PIX_FMT_NE(P416BE, P416LE) diff --git a/tests/ref/fate/imgutils b/tests/ref/fate/imgutils index e79ec7e4b3dc2..02a755f2b7632 100644 --- a/tests/ref/fate/imgutils +++ b/tests/ref/fate/imgutils @@ -262,3 +262,7 @@ rgbf32be planes: 1, linesizes: 768 0 0 0, plane_sizes: 36864 0 rgbf32le planes: 1, linesizes: 768 0 0 0, plane_sizes: 36864 0 0 0, plane_offsets: 0 0 0, total_size: 36864 rgbaf32be planes: 1, linesizes: 1024 0 0 0, plane_sizes: 49152 0 0 0, plane_offsets: 0 0 0, total_size: 49152 rgbaf32le planes: 1, linesizes: 1024 0 0 0, plane_sizes: 49152 0 0 0, plane_offsets: 0 0 0, total_size: 49152 +p212be planes: 2, linesizes: 128 128 0 0, plane_sizes: 6144 6144 0 0, plane_offsets: 6144 0 0, total_size: 12288 +p212le planes: 2, linesizes: 128 128 0 0, plane_sizes: 6144 6144 0 0, plane_offsets: 6144 0 0, total_size: 12288 +p412be planes: 2, linesizes: 128 256 0 0, plane_sizes: 6144 12288 0 0, plane_offsets: 6144 0 0, total_size: 18432 +p412le planes: 2, linesizes: 128 256 0 0, plane_sizes: 6144 12288 0 0, plane_offsets: 6144 0 0, total_size: 18432 From 7af2b9d1bce5c9589bf2b91303954ee65d812d6c Mon Sep 17 00:00:00 2001 From: Lynne Date: Sat, 25 Feb 2023 09:36:58 +0100 Subject: [PATCH 25/98] lsws: add in/out support for the new 12-bit 2-plane 422 and 444 pixfmts --- libswscale/input.c | 8 ++++++++ libswscale/utils.c | 4 ++++ tests/ref/fate/sws-pixdesc-query | 26 ++++++++++++++++++++++++++ 3 files changed, 38 insertions(+) diff --git a/libswscale/input.c b/libswscale/input.c index d5676062a298f..41795c636ee73 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -1452,9 +1452,13 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) c->chrToYV12 = p010BEToUV_c; break; case AV_PIX_FMT_P012LE: + case AV_PIX_FMT_P212LE: + case AV_PIX_FMT_P412LE: c->chrToYV12 = p012LEToUV_c; break; case AV_PIX_FMT_P012BE: + case AV_PIX_FMT_P212BE: + case AV_PIX_FMT_P412BE: c->chrToYV12 = p012BEToUV_c; break; case AV_PIX_FMT_P016LE: @@ -1944,9 +1948,13 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) c->lumToYV12 = p010BEToY_c; break; case AV_PIX_FMT_P012LE: + case AV_PIX_FMT_P212LE: + case AV_PIX_FMT_P412LE: c->lumToYV12 = p012LEToY_c; break; case AV_PIX_FMT_P012BE: + case AV_PIX_FMT_P212BE: + case AV_PIX_FMT_P412BE: c->lumToYV12 = p012BEToY_c; break; case AV_PIX_FMT_GRAYF32LE: diff --git a/libswscale/utils.c b/libswscale/utils.c index 925c536bf17c3..a3a7a407509c7 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -248,8 +248,12 @@ static const FormatEntry format_entries[] = { [AV_PIX_FMT_X2BGR10LE] = { 1, 1 }, [AV_PIX_FMT_P210BE] = { 1, 1 }, [AV_PIX_FMT_P210LE] = { 1, 1 }, + [AV_PIX_FMT_P212BE] = { 1, 1 }, + [AV_PIX_FMT_P212LE] = { 1, 1 }, [AV_PIX_FMT_P410BE] = { 1, 1 }, [AV_PIX_FMT_P410LE] = { 1, 1 }, + [AV_PIX_FMT_P412BE] = { 1, 1 }, + [AV_PIX_FMT_P412LE] = { 1, 1 }, [AV_PIX_FMT_P216BE] = { 1, 1 }, [AV_PIX_FMT_P216LE] = { 1, 1 }, [AV_PIX_FMT_P416BE] = { 1, 1 }, diff --git a/tests/ref/fate/sws-pixdesc-query b/tests/ref/fate/sws-pixdesc-query index 14156a383cc39..fd7f2aefc0f18 100644 --- a/tests/ref/fate/sws-pixdesc-query +++ b/tests/ref/fate/sws-pixdesc-query @@ -67,8 +67,12 @@ isNBPS: p012le p210be p210le + p212be + p212le p410be p410le + p412be + p412le x2bgr10be x2bgr10le x2rgb10be @@ -160,8 +164,10 @@ isBE: p012be p016be p210be + p212be p216be p410be + p412be p416be rgb444be rgb48be @@ -226,10 +232,14 @@ isYUV: p016le p210be p210le + p212be + p212le p216be p216le p410be p410le + p412be + p412le p416be p416le uyvy422 @@ -338,10 +348,14 @@ isPlanarYUV: p016le p210be p210le + p212be + p212le p216be p216le p410be p410le + p412be + p412le p416be p416le yuv410p @@ -431,10 +445,14 @@ isSemiPlanarYUV: p016le p210be p210le + p212be + p212le p216be p216le p410be p410le + p412be + p412le p416be p416le @@ -853,10 +871,14 @@ Planar: p016le p210be p210le + p212be + p212le p216be p216le p410be p410le + p412be + p412le p416be p416le yuv410p @@ -1029,8 +1051,12 @@ DataInHighBits: p012le p210be p210le + p212be + p212le p410be p410le + p412be + p412le xv36be xv36le xyz12be From 4b2ebe570b5394fc5039df358fdd2ebec64cd25e Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 23 Nov 2022 15:15:04 +0100 Subject: [PATCH 26/98] hwcontext_vulkan: initialize and require instance version 1.3 This just bumps the required loader library version (libvulkan). All device-related features, such as video decoding, atomics, etc. are still optional and the code deals with their loss on a local level (e.g. the decoder or filter checks for the features it needs, not the hwcontext). Bumping the required version essentially packs all maintenance extensions which correct the spec rather than requiring to enable them individually. --- configure | 4 ++-- libavutil/hwcontext_vulkan.c | 2 +- libavutil/hwcontext_vulkan.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/configure b/configure index a54398c57fa50..5ea128babd4f6 100755 --- a/configure +++ b/configure @@ -7040,8 +7040,8 @@ enabled crystalhd && check_lib crystalhd "stdint.h libcrystalhd/libcrystalhd_if. "in maintaining it." if enabled vulkan; then - check_pkg_config_header_only vulkan "vulkan >= 1.2.189" "vulkan/vulkan.h" "defined VK_VERSION_1_2" || - check_cpp_condition vulkan "vulkan/vulkan.h" "defined(VK_VERSION_1_3) || (defined(VK_VERSION_1_2) && VK_HEADER_VERSION >= 189)" + check_pkg_config_header_only vulkan "vulkan >= 1.3.238" "vulkan/vulkan.h" "defined VK_VERSION_1_3" || + check_cpp_condition vulkan "vulkan/vulkan.h" "defined(VK_VERSION_1_4) || (defined(VK_VERSION_1_3) && VK_HEADER_VERSION >= 238)" fi if enabled x86; then diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index ffd4f5dec4ec5..4185fb6110f92 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -673,7 +673,7 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts) VkApplicationInfo application_info = { .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, .pEngineName = "libavutil", - .apiVersion = VK_API_VERSION_1_2, + .apiVersion = VK_API_VERSION_1_3, .engineVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR, LIBAVUTIL_VERSION_MINOR, LIBAVUTIL_VERSION_MICRO), diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h index df86c85b3ce2e..70c8379dc36e7 100644 --- a/libavutil/hwcontext_vulkan.h +++ b/libavutil/hwcontext_vulkan.h @@ -53,7 +53,7 @@ typedef struct AVVulkanDeviceContext { PFN_vkGetInstanceProcAddr get_proc_addr; /** - * Vulkan instance. Must be at least version 1.2. + * Vulkan instance. Must be at least version 1.3. */ VkInstance inst; From ab17abfdd013606601e3972216af0209fb0c14f9 Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 23 Nov 2022 02:37:14 +0100 Subject: [PATCH 27/98] hwcontext_vulkan: enable support for YCbCr samplers --- libavutil/hwcontext_vulkan.c | 1 + libavutil/vulkan_functions.h | 2 ++ 2 files changed, 3 insertions(+) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 4185fb6110f92..f8cc2d9dbd540 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -1378,6 +1378,7 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, goto end; } p->device_features_1_2.timelineSemaphore = 1; + p->device_features_1_1.samplerYcbcrConversion = dev_features_1_1.samplerYcbcrConversion; /* Setup queue family */ if ((err = setup_queue_families(ctx, &dev_info))) diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h index d15a5d9a425c3..deb77495a2f7c 100644 --- a/libavutil/vulkan_functions.h +++ b/libavutil/vulkan_functions.h @@ -155,6 +155,8 @@ typedef enum FFVulkanExtensions { MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyPipeline) \ \ /* Sampler */ \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateSamplerYcbcrConversion) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroySamplerYcbcrConversion) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateSampler) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroySampler) \ \ From d9bfe10fe4488127f911cee13195f180417b6608 Mon Sep 17 00:00:00 2001 From: Lynne Date: Sun, 13 Mar 2022 09:06:06 +0100 Subject: [PATCH 28/98] hwcontext_vulkan: enable VK_KHR_synchronization2 if supported --- libavutil/hwcontext_vulkan.c | 16 ++++++++++++---- libavutil/vulkan_functions.h | 5 ++++- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index f8cc2d9dbd540..ae28a1109306a 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -89,6 +89,7 @@ typedef struct VulkanDevicePriv { /* Features */ VkPhysicalDeviceVulkan11Features device_features_1_1; VkPhysicalDeviceVulkan12Features device_features_1_2; + VkPhysicalDeviceVulkan13Features device_features_1_3; /* Queues */ uint32_t qfs[5]; @@ -346,7 +347,6 @@ static const VulkanOptExtension optional_device_exts[] = { /* Misc or required by other extensions */ { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, { VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, - { VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, /* Imports/exports */ { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_MEMORY }, @@ -1326,9 +1326,13 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, VkPhysicalDeviceTimelineSemaphoreFeatures timeline_features = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES, }; + VkPhysicalDeviceVulkan13Features dev_features_1_3 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES, + .pNext = &timeline_features, + }; VkPhysicalDeviceVulkan12Features dev_features_1_2 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, - .pNext = &timeline_features, + .pNext = &dev_features_1_3, }; VkPhysicalDeviceVulkan11Features dev_features_1_1 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES, @@ -1340,8 +1344,7 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, }; VkDeviceCreateInfo dev_info = { - .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, - .pNext = &hwctx->device_features, + .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, }; hwctx->device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; @@ -1349,6 +1352,8 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, p->device_features_1_1.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES; p->device_features_1_1.pNext = &p->device_features_1_2; p->device_features_1_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES; + p->device_features_1_2.pNext = &p->device_features_1_3; + p->device_features_1_3.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES; ctx->free = vulkan_device_free; /* Create an instance if not given one */ @@ -1379,6 +1384,9 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, } p->device_features_1_2.timelineSemaphore = 1; p->device_features_1_1.samplerYcbcrConversion = dev_features_1_1.samplerYcbcrConversion; + p->device_features_1_3.synchronization2 = dev_features_1_3.synchronization2; + + dev_info.pNext = &hwctx->device_features; /* Setup queue family */ if ((err = setup_queue_families(ctx, &dev_info))) diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h index deb77495a2f7c..b18d73b5bbf6e 100644 --- a/libavutil/vulkan_functions.h +++ b/libavutil/vulkan_functions.h @@ -145,7 +145,10 @@ typedef enum FFVulkanExtensions { MACRO(1, 1, FF_VK_EXT_NO_FLAG, UpdateDescriptorSetWithTemplate) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateDescriptorUpdateTemplate) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyDescriptorUpdateTemplate) \ - \ + \ + /* sync2 */ \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdPipelineBarrier2) \ + \ /* Pipeline */ \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreatePipelineLayout) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyPipelineLayout) \ From 576cc9547c66f765031b0ca8a72b81ed74a820af Mon Sep 17 00:00:00 2001 From: Lynne Date: Sat, 29 Apr 2023 12:41:53 +0200 Subject: [PATCH 29/98] hwcontext_vulkan: use portability subset if available --- libavutil/hwcontext_vulkan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index ae28a1109306a..2434a800906bb 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -340,11 +340,13 @@ typedef struct VulkanOptExtension { } VulkanOptExtension; static const VulkanOptExtension optional_instance_exts[] = { - /* For future use */ + /* Pointless, here avoid zero-sized structs */ + { VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, }; static const VulkanOptExtension optional_device_exts[] = { /* Misc or required by other extensions */ + { VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, { VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, From ea5603dc1da73a4623e9764125867920182d1817 Mon Sep 17 00:00:00 2001 From: Lynne Date: Tue, 15 Mar 2022 23:00:32 +0100 Subject: [PATCH 30/98] hwcontext_vulkan: support threadsafe queue and frame operations --- libavutil/hwcontext_vulkan.c | 191 ++++++++++++++++++++++++++--------- libavutil/hwcontext_vulkan.h | 40 +++++++- 2 files changed, 182 insertions(+), 49 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 2434a800906bb..b511836b7bbff 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -27,6 +27,7 @@ #include #endif +#include "thread.h" #include #include "config.h" @@ -92,8 +93,10 @@ typedef struct VulkanDevicePriv { VkPhysicalDeviceVulkan13Features device_features_1_3; /* Queues */ - uint32_t qfs[5]; - int num_qfs; + pthread_mutex_t **qf_mutex; + uint32_t nb_tot_qfs; + uint32_t img_qfs[5]; + uint32_t nb_img_qfs; /* Debug callback */ VkDebugUtilsMessengerEXT debug_ctx; @@ -127,6 +130,8 @@ typedef struct VulkanFramesPriv { } VulkanFramesPriv; typedef struct AVVkFrameInternal { + pthread_mutex_t update_mutex; + #if CONFIG_CUDA /* Importing external memory into cuda is really expensive so we keep the * memory imported all the time */ @@ -1305,6 +1310,12 @@ static void vulkan_device_free(AVHWDeviceContext *ctx) if (p->libvulkan) dlclose(p->libvulkan); + for (uint32_t i = 0; i < p->nb_tot_qfs; i++) { + pthread_mutex_destroy(p->qf_mutex[i]); + av_freep(&p->qf_mutex[i]); + } + av_freep(&p->qf_mutex); + RELEASE_PROPS(hwctx->enabled_inst_extensions, hwctx->nb_enabled_inst_extensions); RELEASE_PROPS(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions); } @@ -1437,13 +1448,26 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, return err; } +static void lock_queue(AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index) +{ + VulkanDevicePriv *p = ctx->internal->priv; + pthread_mutex_lock(&p->qf_mutex[queue_family][index]); +} + +static void unlock_queue(AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index) +{ + VulkanDevicePriv *p = ctx->internal->priv; + pthread_mutex_unlock(&p->qf_mutex[queue_family][index]); +} + static int vulkan_device_init(AVHWDeviceContext *ctx) { int err; - uint32_t queue_num; + uint32_t qf_num; AVVulkanDeviceContext *hwctx = ctx->hwctx; VulkanDevicePriv *p = ctx->internal->priv; FFVulkanFunctions *vk = &p->vkfn; + VkQueueFamilyProperties *qf; int graph_index, comp_index, tx_index, enc_index, dec_index; /* Set device extension flags */ @@ -1482,12 +1506,37 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de); p->dev_is_intel = (p->props.properties.vendorID == 0x8086); - vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL); - if (!queue_num) { + vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, NULL); + if (!qf_num) { av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n"); return AVERROR_EXTERNAL; } + qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties)); + if (!qf) + return AVERROR(ENOMEM); + + vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, qf); + + p->qf_mutex = av_calloc(qf_num, sizeof(*p->qf_mutex)); + if (!p->qf_mutex) + return AVERROR(ENOMEM); + p->nb_tot_qfs = qf_num; + + for (uint32_t i = 0; i < qf_num; i++) { + p->qf_mutex[i] = av_calloc(qf[i].queueCount, sizeof(**p->qf_mutex)); + if (!p->qf_mutex[i]) + return AVERROR(ENOMEM); + for (uint32_t j = 0; j < qf[i].queueCount; j++) { + err = pthread_mutex_init(&p->qf_mutex[i][j], NULL); + if (err != 0) { + av_log(ctx, AV_LOG_ERROR, "pthread_mutex_init failed : %s\n", + av_err2str(err)); + return AVERROR(err); + } + } + } + graph_index = hwctx->queue_family_index; comp_index = hwctx->queue_family_comp_index; tx_index = hwctx->queue_family_tx_index; @@ -1502,9 +1551,9 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) return AVERROR(EINVAL); \ } else if (fidx < 0 || ctx_qf < 0) { \ break; \ - } else if (ctx_qf >= queue_num) { \ + } else if (ctx_qf >= qf_num) { \ av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \ - type, ctx_qf, queue_num); \ + type, ctx_qf, qf_num); \ return AVERROR(EINVAL); \ } \ \ @@ -1521,7 +1570,7 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) tx_index = (ctx_qf == tx_index) ? -1 : tx_index; \ enc_index = (ctx_qf == enc_index) ? -1 : enc_index; \ dec_index = (ctx_qf == dec_index) ? -1 : dec_index; \ - p->qfs[p->num_qfs++] = ctx_qf; \ + p->img_qfs[p->nb_img_qfs++] = ctx_qf; \ } while (0) CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index, hwctx->nb_graphics_queues); @@ -1532,6 +1581,11 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) #undef CHECK_QUEUE + if (!hwctx->lock_queue) + hwctx->lock_queue = lock_queue; + if (!hwctx->unlock_queue) + hwctx->unlock_queue = unlock_queue; + /* Get device capabilities */ vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops); @@ -1733,9 +1787,6 @@ static void vulkan_free_internal(AVVkFrame *f) { AVVkFrameInternal *internal = f->internal; - if (!internal) - return; - #if CONFIG_CUDA if (internal->cuda_fc_ref) { AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data; @@ -1764,6 +1815,7 @@ static void vulkan_free_internal(AVVkFrame *f) } #endif + pthread_mutex_destroy(&internal->update_mutex); av_freep(&f->internal); } @@ -1924,9 +1976,11 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, uint32_t src_qf, dst_qf; VkImageLayout new_layout; VkAccessFlags new_access; + AVVulkanFramesContext *vkfc = hwfc->hwctx; const int planes = av_pix_fmt_count_planes(hwfc->sw_format); VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; FFVulkanFunctions *vk = &p->vkfn; + AVFrame tmp = { .data[0] = (uint8_t *)frame }; uint64_t sem_sig_val[AV_NUM_DATA_POINTERS]; VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 }; @@ -1945,6 +1999,12 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, }; VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS]; + + if ((err = wait_start_exec_ctx(hwfc, ectx))) + return err; + + vkfc->lock_frame(hwfc, frame); + for (int i = 0; i < planes; i++) { wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; sem_sig_val[i] = frame->sem_value[i] + 1; @@ -1981,9 +2041,6 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, break; } - if ((err = wait_start_exec_ctx(hwfc, ectx))) - return err; - /* Change the image layout to something more optimal for writes. * This also signals the newly created semaphore, making it usable * for synchronization */ @@ -2009,7 +2066,10 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 0, NULL, planes, img_bar); - return submit_exec_ctx(hwfc, ectx, &s_info, frame, 0); + err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 0); + vkfc->unlock_frame(hwfc, frame); + + return err; } static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format, @@ -2091,10 +2151,10 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, .usage = usage, .samples = VK_SAMPLE_COUNT_1_BIT, - .pQueueFamilyIndices = p->qfs, - .queueFamilyIndexCount = p->num_qfs, - .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : - VK_SHARING_MODE_EXCLUSIVE, + .pQueueFamilyIndices = p->img_qfs, + .queueFamilyIndexCount = p->nb_img_qfs, + .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : + VK_SHARING_MODE_EXCLUSIVE, }; get_plane_wh(&create_info.extent.width, &create_info.extent.height, @@ -2118,6 +2178,7 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, return AVERROR_EXTERNAL; } + f->queue_family[i] = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0]; f->layout[i] = create_info.initialLayout; f->access[i] = 0x0; f->sem_value[i] = 0; @@ -2162,10 +2223,10 @@ static void try_export_flags(AVHWFramesContext *hwfc, VkPhysicalDeviceImageDrmFormatModifierInfoEXT phy_dev_mod_info = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT, .pNext = NULL, - .pQueueFamilyIndices = p->qfs, - .queueFamilyIndexCount = p->num_qfs, - .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : - VK_SHARING_MODE_EXCLUSIVE, + .pQueueFamilyIndices = p->img_qfs, + .queueFamilyIndexCount = p->nb_img_qfs, + .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : + VK_SHARING_MODE_EXCLUSIVE, }; VkPhysicalDeviceExternalImageFormatInfo enext = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO, @@ -2260,6 +2321,16 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size) return NULL; } +static void lock_frame(AVHWFramesContext *fc, AVVkFrame *vkf) +{ + pthread_mutex_lock(&vkf->internal->update_mutex); +} + +static void unlock_frame(AVHWFramesContext *fc, AVVkFrame *vkf) +{ + pthread_mutex_unlock(&vkf->internal->update_mutex); +} + static void vulkan_frames_uninit(AVHWFramesContext *hwfc) { VulkanFramesPriv *fp = hwfc->internal->priv; @@ -2422,6 +2493,11 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc) return AVERROR(ENOMEM); } + if (!hwctx->lock_frame) + hwctx->lock_frame = lock_frame; + if (!hwctx->unlock_frame) + hwctx->unlock_frame = unlock_frame; + return 0; } @@ -2728,10 +2804,10 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, .samples = VK_SAMPLE_COUNT_1_BIT, - .pQueueFamilyIndices = p->qfs, - .queueFamilyIndexCount = p->num_qfs, - .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : - VK_SHARING_MODE_EXCLUSIVE, + .pQueueFamilyIndices = p->img_qfs, + .queueFamilyIndexCount = p->nb_img_qfs, + .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : + VK_SHARING_MODE_EXCLUSIVE, }; /* Image format verification */ @@ -2810,6 +2886,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f * offer us anything we could import and sync with, so instead * just signal the semaphore we created. */ + f->queue_family[i] = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0]; f->layout[i] = create_info.initialLayout; f->access[i] = 0x0; f->sem_value[i] = 0; @@ -3018,20 +3095,12 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, CU_AD_FORMAT_UNSIGNED_INT8; dst_f = (AVVkFrame *)frame->data[0]; - dst_int = dst_f->internal; - if (!dst_int || !dst_int->cuda_fc_ref) { - if (!dst_f->internal) - dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal)); - - if (!dst_int) - return AVERROR(ENOMEM); + if (!dst_int->cuda_fc_ref) { dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc); - if (!dst_int->cuda_fc_ref) { - av_freep(&dst_f->internal); + if (!dst_int->cuda_fc_ref) return AVERROR(ENOMEM); - } for (int i = 0; i < planes; i++) { CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = { @@ -3705,13 +3774,14 @@ static int unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, return err; } -static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f, +static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f, AVBufferRef **bufs, size_t *buf_offsets, const int *buf_stride, int w, int h, enum AVPixelFormat pix_fmt, int to_buf) { int err; AVVkFrame *frame = (AVVkFrame *)f->data[0]; + AVVulkanFramesContext *vkfc = hwfc->hwctx; VulkanFramesPriv *fp = hwfc->internal->priv; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; FFVulkanFunctions *vk = &p->vkfn; @@ -3746,11 +3816,13 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f, .waitSemaphoreCount = planes, }; - for (int i = 0; i < planes; i++) - sem_signal_values[i] = frame->sem_value[i] + 1; + vkfc->lock_frame(hwfc, frame); if ((err = wait_start_exec_ctx(hwfc, ectx))) - return err; + goto end; + + for (int i = 0; i < planes; i++) + sem_signal_values[i] = frame->sem_value[i] + 1; /* Change the image layout to something more optimal for transfers */ for (int i = 0; i < planes; i++) { @@ -3825,14 +3897,18 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f, if (!f->buf[ref]) break; if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1))) - return err; + goto end; } if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, planes))) - return err; - return submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref); + goto end; + err = submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref); } else { - return submit_exec_ctx(hwfc, ectx, &s_info, frame, 1); + err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 1); } + +end: + vkfc->unlock_frame(hwfc, frame); + return err; } static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, @@ -3961,8 +4037,9 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, } /* Copy buffers into/from image */ - err = transfer_image_buf(hwfc, vkf, bufs, buf_offsets, tmp.linesize, - swf->width, swf->height, swf->format, from); + err = transfer_image_buf(hwfc, (AVFrame *)vkf, bufs, buf_offsets, + tmp.linesize, swf->width, swf->height, swf->format, + from); if (from) { /* Map, copy buffer (which came FROM the VkImage) to the frame, unmap */ @@ -4143,7 +4220,25 @@ static int vulkan_frames_derive_to(AVHWFramesContext *dst_fc, AVVkFrame *av_vk_frame_alloc(void) { - return av_mallocz(sizeof(AVVkFrame)); + int err; + AVVkFrame *f = av_mallocz(sizeof(AVVkFrame)); + if (!f) + return NULL; + + f->internal = av_mallocz(sizeof(*f->internal)); + if (!f->internal) { + av_free(f); + return NULL; + } + + err = pthread_mutex_init(&f->internal->update_mutex, NULL); + if (err != 0) { + av_free(f->internal); + av_free(f); + return NULL; + } + + return f; } const HWContextType ff_hwcontext_type_vulkan = { diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h index 70c8379dc36e7..2663211927b23 100644 --- a/libavutil/hwcontext_vulkan.h +++ b/libavutil/hwcontext_vulkan.h @@ -27,6 +27,8 @@ #include "pixfmt.h" #include "frame.h" +typedef struct AVVkFrame AVVkFrame; + /** * @file * API-specific header for AV_HWDEVICE_TYPE_VULKAN. @@ -135,6 +137,19 @@ typedef struct AVVulkanDeviceContext { */ int queue_family_decode_index; int nb_decode_queues; + + /** + * Locks a queue, preventing other threads from submitting any command + * buffers to this queue. + * If set to NULL, will be set to lavu-internal functions that utilize a + * mutex. + */ + void (*lock_queue)(struct AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index); + + /** + * Similar to lock_queue(), unlocks a queue. Must only be called after locking. + */ + void (*unlock_queue)(struct AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index); } AVVulkanDeviceContext; /** @@ -195,6 +210,23 @@ typedef struct AVVulkanFramesContext { * av_hwframe_ctx_init(). */ AVVkFrameFlags flags; + + /** + * Locks a frame, preventing other threads from changing frame properties. + * If set to NULL, will be set to lavu-internal functions that utilize a + * mutex. + * Users SHOULD only ever lock just before command submission in order + * to get accurate frame properties, and unlock immediately after command + * submission without waiting for it to finish. + * + * If unset, will be set to lavu-internal functions that utilize a mutex. + */ + void (*lock_frame)(struct AVHWFramesContext *fc, AVVkFrame *vkf); + + /** + * Similar to lock_frame(), unlocks a frame. Must only be called after locking. + */ + void (*unlock_frame)(struct AVHWFramesContext *fc, AVVkFrame *vkf); } AVVulkanFramesContext; /* @@ -210,7 +242,7 @@ typedef struct AVVulkanFramesContext { * @note the size of this structure is not part of the ABI, to allocate * you must use @av_vk_frame_alloc(). */ -typedef struct AVVkFrame { +struct AVVkFrame { /** * Vulkan images to which the memory is bound to. */ @@ -264,6 +296,12 @@ typedef struct AVVkFrame { * Describes the binding offset of each plane to the VkDeviceMemory. */ ptrdiff_t offset[AV_NUM_DATA_POINTERS]; + + /** + * Queue family of the images. Must be VK_QUEUE_FAMILY_IGNORED if + * the image was allocated with the CONCURRENT concurrency option. + */ + uint32_t queue_family[AV_NUM_DATA_POINTERS]; } AVVkFrame; /** From b5a7e11362c04b45ba5a90376b361939669874bf Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 23 Nov 2022 20:32:49 +0100 Subject: [PATCH 31/98] hwcontext_vulkan: remove contiguous memory hack The hack was added to enable exporting of vulkan images to DRM. On Intel hardware, specifically for DRM images, all planes must be allocated next to each other, due to hardware limitation, so the hack used a single large allocation and suballocated all planes from it. By natively supporting multiplane images, the driver is what decides the layout, so exporting just works. It's a hack because it conflicted heavily with image allocation, and with the whole ecosystem in general, before multiplane images were supported, which just made it redundant. This is also the commit which broke the hwcontext hardest and prompted the entire rewrite in the first place. --- libavutil/hwcontext_vulkan.c | 12 ------------ libavutil/hwcontext_vulkan.h | 6 +++--- libavutil/version.h | 1 + 3 files changed, 4 insertions(+), 15 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index b511836b7bbff..9eb872ca783d7 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -112,9 +112,6 @@ typedef struct VulkanDevicePriv { /* Nvidia */ int dev_is_nvidia; - - /* Intel */ - int dev_is_intel; } VulkanDevicePriv; typedef struct VulkanFramesPriv { @@ -1504,7 +1501,6 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) p->hprops.minImportedHostPointerAlignment); p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de); - p->dev_is_intel = (p->props.properties.vendorID == 0x8086); vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, NULL); if (!qf_num) { @@ -1629,8 +1625,6 @@ static int vulkan_device_derive(AVHWDeviceContext *ctx, return AVERROR_EXTERNAL; } - if (strstr(vendor, "Intel")) - dev_select.vendor_id = 0x8086; if (strstr(vendor, "AMD")) dev_select.vendor_id = 0x1002; @@ -2366,12 +2360,6 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc) if (!hwctx->usage) hwctx->usage = FF_VK_DEFAULT_USAGE_FLAGS; - if (!(hwctx->flags & AV_VK_FRAME_FLAG_NONE)) { - if (p->contiguous_planes == 1 || - ((p->contiguous_planes == -1) && p->dev_is_intel)) - hwctx->flags |= AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY; - } - modifier_info = vk_find_struct(hwctx->create_pnext, VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT); diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h index 2663211927b23..48e2e02cf5dcd 100644 --- a/libavutil/hwcontext_vulkan.h +++ b/libavutil/hwcontext_vulkan.h @@ -160,10 +160,10 @@ typedef enum AVVkFrameFlags { * device and tiling during av_hwframe_ctx_init(). */ AV_VK_FRAME_FLAG_NONE = (1ULL << 0), - /* Image planes will be allocated in a single VkDeviceMemory, rather - * than as per-plane VkDeviceMemory allocations. Required for exporting - * to VAAPI on Intel devices. */ +#if FF_API_VULKAN_CONTIGUOUS_MEMORY + /* DEPRECATED: does nothing. */ AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY = (1ULL << 1), +#endif } AVVkFrameFlags; /** diff --git a/libavutil/version.h b/libavutil/version.h index 8c7ea1a47a6dd..fa53fae768e91 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -118,6 +118,7 @@ #define FF_API_INTERLACED_FRAME (LIBAVUTIL_VERSION_MAJOR < 59) #define FF_API_FRAME_KEY (LIBAVUTIL_VERSION_MAJOR < 59) #define FF_API_PALETTE_HAS_CHANGED (LIBAVUTIL_VERSION_MAJOR < 59) +#define FF_API_VULKAN_CONTIGUOUS_MEMORY (LIBAVUTIL_VERSION_MAJOR < 59) /** * @} From cb5f6165b23c8d6eba4a42d77e777d1fefec8a12 Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 23 Nov 2022 20:35:51 +0100 Subject: [PATCH 32/98] hwcontext_vulkan: rename and expand vk_pixfmt_map to append VK_NULL_FORMAT --- libavutil/hwcontext_vulkan.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 9eb872ca783d7..82790cb014cbf 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -167,8 +167,8 @@ typedef struct AVVkFrameInternal { static const struct { enum AVPixelFormat pixfmt; - const VkFormat vkfmts[4]; -} vk_pixfmt_map[] = { + const VkFormat vkfmts[5]; +} vk_pixfmt_planar_map[] = { { AV_PIX_FMT_GRAY8, { VK_FORMAT_R8_UNORM } }, { AV_PIX_FMT_GRAY16, { VK_FORMAT_R16_UNORM } }, { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } }, @@ -244,9 +244,9 @@ static const struct { const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p) { - for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_map); i++) - if (vk_pixfmt_map[i].pixfmt == p) - return vk_pixfmt_map[i].vkfmts; + for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_planar_map); i++) + if (vk_pixfmt_planar_map[i].pixfmt == p) + return vk_pixfmt_planar_map[i].vkfmts; return NULL; } From 3a76069768afa314fb26c778dd7f63b8dfde8c6e Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 14 Dec 2022 00:52:15 +0100 Subject: [PATCH 33/98] hwcontext_vulkan: fix minor type issue in VulkanQueueCtx.buf_deps_alloc_size --- libavutil/hwcontext_vulkan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 82790cb014cbf..f796cbd689cf2 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -66,7 +66,7 @@ typedef struct VulkanQueueCtx { /* Buffer dependencies */ AVBufferRef **buf_deps; int nb_buf_deps; - int buf_deps_alloc_size; + unsigned int buf_deps_alloc_size; } VulkanQueueCtx; typedef struct VulkanExecCtx { From c1cbce5ebec5f9bc2959f5b217def8db13e3a773 Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 28 Dec 2022 05:55:17 +0100 Subject: [PATCH 34/98] hwcontext_vulkan: report nonCoherentAtomSize --- libavutil/hwcontext_vulkan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index f796cbd689cf2..8f80cc8e21dbf 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -1496,6 +1496,8 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) p->props.properties.limits.optimalBufferCopyRowPitchAlignment); av_log(ctx, AV_LOG_VERBOSE, " minMemoryMapAlignment: %"SIZE_SPECIFIER"\n", p->props.properties.limits.minMemoryMapAlignment); + av_log(ctx, AV_LOG_VERBOSE, " nonCoherentAtomSize: %"PRIu64"\n", + p->props.properties.limits.nonCoherentAtomSize); if (p->extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) av_log(ctx, AV_LOG_VERBOSE, " minImportedHostPointerAlignment: %"PRIu64"\n", p->hprops.minImportedHostPointerAlignment); From 326a2af41add311ad4270ca18764c50214ad303e Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 17 Feb 2023 03:15:02 +0100 Subject: [PATCH 35/98] hwcontext_vulkan: add support for descriptor buffers --- libavutil/hwcontext_vulkan.c | 13 ++++++++++++- libavutil/vulkan_functions.h | 9 +++++++++ libavutil/vulkan_loader.h | 1 + 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 8f80cc8e21dbf..e68c4e3535f2a 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -91,6 +91,7 @@ typedef struct VulkanDevicePriv { VkPhysicalDeviceVulkan11Features device_features_1_1; VkPhysicalDeviceVulkan12Features device_features_1_2; VkPhysicalDeviceVulkan13Features device_features_1_3; + VkPhysicalDeviceDescriptorBufferFeaturesEXT desc_buf_features; /* Queues */ pthread_mutex_t **qf_mutex; @@ -351,6 +352,7 @@ static const VulkanOptExtension optional_device_exts[] = { { VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, { VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, + { VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER, }, /* Imports/exports */ { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_MEMORY }, @@ -1336,9 +1338,13 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, VkPhysicalDeviceTimelineSemaphoreFeatures timeline_features = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES, }; + VkPhysicalDeviceDescriptorBufferFeaturesEXT desc_buf_features = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT, + .pNext = &timeline_features, + }; VkPhysicalDeviceVulkan13Features dev_features_1_3 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES, - .pNext = &timeline_features, + .pNext = &desc_buf_features, }; VkPhysicalDeviceVulkan12Features dev_features_1_2 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, @@ -1364,6 +1370,8 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, p->device_features_1_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES; p->device_features_1_2.pNext = &p->device_features_1_3; p->device_features_1_3.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES; + p->device_features_1_3.pNext = &p->desc_buf_features; + p->desc_buf_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT; ctx->free = vulkan_device_free; /* Create an instance if not given one */ @@ -1393,8 +1401,11 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, goto end; } p->device_features_1_2.timelineSemaphore = 1; + p->device_features_1_2.bufferDeviceAddress = dev_features_1_2.bufferDeviceAddress; p->device_features_1_1.samplerYcbcrConversion = dev_features_1_1.samplerYcbcrConversion; p->device_features_1_3.synchronization2 = dev_features_1_3.synchronization2; + p->desc_buf_features.descriptorBuffer = desc_buf_features.descriptorBuffer; + p->desc_buf_features.descriptorBufferPushDescriptors = desc_buf_features.descriptorBufferPushDescriptors; dev_info.pNext = &hwctx->device_features; diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h index b18d73b5bbf6e..4eccdad3c4117 100644 --- a/libavutil/vulkan_functions.h +++ b/libavutil/vulkan_functions.h @@ -37,6 +37,7 @@ typedef enum FFVulkanExtensions { FF_VK_EXT_EXTERNAL_WIN32_MEMORY = 1ULL << 6, /* VK_KHR_external_memory_win32 */ FF_VK_EXT_EXTERNAL_WIN32_SEM = 1ULL << 7, /* VK_KHR_external_semaphore_win32 */ #endif + FF_VK_EXT_DESCRIPTOR_BUFFER = 1ULL << 8, /* VK_EXT_descriptor_buffer */ FF_VK_EXT_NO_FLAG = 1ULL << 31, } FFVulkanExtensions; @@ -120,6 +121,7 @@ typedef enum FFVulkanExtensions { MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetBufferMemoryRequirements2) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateBuffer) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, BindBufferMemory) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetBufferDeviceAddress) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyBuffer) \ \ /* Image */ \ @@ -141,6 +143,13 @@ typedef enum FFVulkanExtensions { MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyDescriptorPool) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyDescriptorSetLayout) \ \ + /* Descriptor buffers */ \ + MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, GetDescriptorSetLayoutSizeEXT) \ + MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, GetDescriptorSetLayoutBindingOffsetEXT) \ + MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, GetDescriptorEXT) \ + MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, CmdBindDescriptorBuffersEXT) \ + MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, CmdSetDescriptorBufferOffsetsEXT) \ + \ /* DescriptorUpdateTemplate */ \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, UpdateDescriptorSetWithTemplate) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateDescriptorUpdateTemplate) \ diff --git a/libavutil/vulkan_loader.h b/libavutil/vulkan_loader.h index 3f1ee6aa4672a..e08777db177ce 100644 --- a/libavutil/vulkan_loader.h +++ b/libavutil/vulkan_loader.h @@ -48,6 +48,7 @@ static inline uint64_t ff_vk_extensions_to_mask(const char * const *extensions, { VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY }, { VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM }, #endif + { VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER, }, }; FFVulkanExtensions mask = 0x0; From bb54999ecd492dc18f7eb1f0a9f295d9731c00da Mon Sep 17 00:00:00 2001 From: Lynne Date: Tue, 14 Mar 2023 22:10:05 +0100 Subject: [PATCH 36/98] hwcontext_vulkan: do not require libdrm to map VAAPI devices VAAPI is sadly on the way of becoming multiplaform. --- libavutil/hwcontext_vulkan.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index e68c4e3535f2a..7051c5dda8436 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -42,14 +42,17 @@ #include "vulkan.h" #include "vulkan_loader.h" +#if CONFIG_VAAPI +#include "hwcontext_vaapi.h" +#endif + #if CONFIG_LIBDRM -#include -#include -#include "hwcontext_drm.h" #if CONFIG_VAAPI #include -#include "hwcontext_vaapi.h" #endif +#include +#include +#include "hwcontext_drm.h" #endif #if CONFIG_CUDA @@ -1627,7 +1630,6 @@ static int vulkan_device_derive(AVHWDeviceContext *ctx, * by the following checks (e.g. non-PCIe ARM GPU), having an empty * dev_select will mean it'll get picked. */ switch(src_ctx->type) { -#if CONFIG_LIBDRM #if CONFIG_VAAPI case AV_HWDEVICE_TYPE_VAAPI: { AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx; @@ -1644,6 +1646,7 @@ static int vulkan_device_derive(AVHWDeviceContext *ctx, return vulkan_device_create_internal(ctx, &dev_select, opts, flags); } #endif +#if CONFIG_LIBDRM case AV_HWDEVICE_TYPE_DRM: { AVDRMDeviceContext *src_hwctx = src_ctx->hwctx; From e34dbce301a0046459e9e37f83fde5a430e64988 Mon Sep 17 00:00:00 2001 From: Lynne Date: Tue, 14 Mar 2023 22:30:18 +0100 Subject: [PATCH 37/98] hwcontext_vulkan: use VK_EXT_physical_device_drm to derive DRM to Vulkan Finally, a way to directly identify a Vulkan device from a DRM device! --- libavutil/hwcontext_vulkan.c | 58 ++++++++++++++++++++++++++++++++---- libavutil/vulkan_functions.h | 1 + libavutil/vulkan_loader.h | 1 + 3 files changed, 55 insertions(+), 5 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 7051c5dda8436..8cb55b98cc062 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -50,6 +50,8 @@ #if CONFIG_VAAPI #include #endif +#include +#include #include #include #include "hwcontext_drm.h" @@ -356,6 +358,7 @@ static const VulkanOptExtension optional_device_exts[] = { { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, { VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, { VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER, }, + { VK_EXT_PHYSICAL_DEVICE_DRM_EXTENSION_NAME, FF_VK_EXT_DEVICE_DRM }, /* Imports/exports */ { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_MEMORY }, @@ -762,8 +765,11 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts) typedef struct VulkanDeviceSelection { uint8_t uuid[VK_UUID_SIZE]; /* Will use this first unless !has_uuid */ int has_uuid; - const char *name; /* Will use this second unless NULL */ - uint32_t pci_device; /* Will use this third unless 0x0 */ + uint32_t drm_major; /* Will use this second unless !has_drm */ + uint32_t drm_minor; /* Will use this second unless !has_drm */ + uint32_t has_drm; /* has drm node info */ + const char *name; /* Will use this third unless NULL */ + uint32_t pci_device; /* Will use this fourth unless 0x0 */ uint32_t vendor_id; /* Last resort to find something deterministic */ int index; /* Finally fall back to index */ } VulkanDeviceSelection; @@ -790,6 +796,7 @@ static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select) VkPhysicalDevice *devices = NULL; VkPhysicalDeviceIDProperties *idp = NULL; VkPhysicalDeviceProperties2 *prop = NULL; + VkPhysicalDeviceDrmPropertiesEXT *drm_prop = NULL; AVVulkanDeviceContext *hwctx = ctx->hwctx; ret = vk->EnumeratePhysicalDevices(hwctx->inst, &num, NULL); @@ -822,8 +829,20 @@ static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select) goto end; } + if (p->vkctx.extensions & FF_VK_EXT_DEVICE_DRM) { + drm_prop = av_calloc(num, sizeof(*drm_prop)); + if (!drm_prop) { + err = AVERROR(ENOMEM); + goto end; + } + } + av_log(ctx, AV_LOG_VERBOSE, "GPU listing:\n"); for (int i = 0; i < num; i++) { + if (p->vkctx.extensions & FF_VK_EXT_DEVICE_DRM) { + drm_prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT; + idp[i].pNext = &drm_prop[i]; + } idp[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES; prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; prop[i].pNext = &idp[i]; @@ -845,6 +864,20 @@ static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select) av_log(ctx, AV_LOG_ERROR, "Unable to find device by given UUID!\n"); err = AVERROR(ENODEV); goto end; + } else if ((p->vkctx.extensions & FF_VK_EXT_DEVICE_DRM) && select->has_drm) { + for (int i = 0; i < num; i++) { + if ((select->drm_major == drm_prop[i].primaryMajor && + select->drm_minor == drm_prop[i].primaryMinor) || + (select->drm_major == drm_prop[i].renderMajor && + select->drm_minor == drm_prop[i].renderMinor)) { + choice = i; + goto end; + } + } + av_log(ctx, AV_LOG_ERROR, "Unable to find device by given DRM node numbers %i:%i!\n", + select->drm_major, select->drm_minor); + err = AVERROR(ENODEV); + goto end; } else if (select->name) { av_log(ctx, AV_LOG_VERBOSE, "Requested device: %s\n", select->name); for (int i = 0; i < num; i++) { @@ -904,6 +937,7 @@ static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select) av_free(devices); av_free(prop); av_free(idp); + av_free(drm_prop); return err; } @@ -1648,12 +1682,26 @@ static int vulkan_device_derive(AVHWDeviceContext *ctx, #endif #if CONFIG_LIBDRM case AV_HWDEVICE_TYPE_DRM: { + int err; + struct stat drm_node_info; + drmDevice *drm_dev_info; AVDRMDeviceContext *src_hwctx = src_ctx->hwctx; - drmDevice *drm_dev_info; - int err = drmGetDevice(src_hwctx->fd, &drm_dev_info); + err = fstat(src_hwctx->fd, &drm_node_info); + if (err) { + av_log(ctx, AV_LOG_ERROR, "Unable to get node info from DRM fd: %s!\n", + av_err2str(AVERROR(errno))); + return AVERROR_EXTERNAL; + } + + dev_select.drm_major = major(drm_node_info.st_dev); + dev_select.drm_minor = minor(drm_node_info.st_dev); + dev_select.has_drm = 1; + + err = drmGetDevice(src_hwctx->fd, &drm_dev_info); if (err) { - av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd!\n"); + av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd: %s!\n", + av_err2str(AVERROR(errno))); return AVERROR_EXTERNAL; } diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h index 4eccdad3c4117..ee0adf427ca76 100644 --- a/libavutil/vulkan_functions.h +++ b/libavutil/vulkan_functions.h @@ -38,6 +38,7 @@ typedef enum FFVulkanExtensions { FF_VK_EXT_EXTERNAL_WIN32_SEM = 1ULL << 7, /* VK_KHR_external_semaphore_win32 */ #endif FF_VK_EXT_DESCRIPTOR_BUFFER = 1ULL << 8, /* VK_EXT_descriptor_buffer */ + FF_VK_EXT_DEVICE_DRM = 1ULL << 9, /* VK_EXT_physical_device_drm */ FF_VK_EXT_NO_FLAG = 1ULL << 31, } FFVulkanExtensions; diff --git a/libavutil/vulkan_loader.h b/libavutil/vulkan_loader.h index e08777db177ce..2e6dfb4f4f5db 100644 --- a/libavutil/vulkan_loader.h +++ b/libavutil/vulkan_loader.h @@ -44,6 +44,7 @@ static inline uint64_t ff_vk_extensions_to_mask(const char * const *extensions, { VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_SEM }, { VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_HOST_MEMORY }, { VK_EXT_DEBUG_UTILS_EXTENSION_NAME, FF_VK_EXT_DEBUG_UTILS }, + { VK_EXT_PHYSICAL_DEVICE_DRM_EXTENSION_NAME, FF_VK_EXT_DEVICE_DRM }, #ifdef _WIN32 { VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY }, { VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM }, From 289fc3ed79c12488316a0f464dc597d81ceaaa56 Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 23 Nov 2022 13:34:36 +0100 Subject: [PATCH 38/98] hwcontext_vulkan: add functions for video decoding --- libavutil/hwcontext_vulkan.c | 6 ++++++ libavutil/vulkan.c | 8 +++++--- libavutil/vulkan_functions.h | 20 ++++++++++++++++++++ libavutil/vulkan_loader.h | 4 ++++ 4 files changed, 35 insertions(+), 3 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 8cb55b98cc062..97845ec8248f2 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -370,6 +370,12 @@ static const VulkanOptExtension optional_device_exts[] = { { VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY }, { VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM }, #endif + + /* Video encoding/decoding */ + { VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_QUEUE }, + { VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_QUEUE }, + { VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H264 }, + { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 }, }; /* Converts return values to strings */ diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 403f0b1f27660..6bf2c214b70e7 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -85,9 +85,11 @@ const char *ff_vk_ret2str(VkResult res) CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR); CASE(VK_ERROR_VALIDATION_FAILED_EXT); CASE(VK_ERROR_INVALID_SHADER_NV); - CASE(VK_ERROR_OUT_OF_POOL_MEMORY); - CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE); - CASE(VK_ERROR_NOT_PERMITTED_EXT); + CASE(VK_ERROR_VIDEO_PICTURE_LAYOUT_NOT_SUPPORTED_KHR); + CASE(VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR); + CASE(VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR); + CASE(VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR); + CASE(VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR); default: return "Unknown error"; } #undef CASE diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h index ee0adf427ca76..403382fe7f27a 100644 --- a/libavutil/vulkan_functions.h +++ b/libavutil/vulkan_functions.h @@ -39,6 +39,10 @@ typedef enum FFVulkanExtensions { #endif FF_VK_EXT_DESCRIPTOR_BUFFER = 1ULL << 8, /* VK_EXT_descriptor_buffer */ FF_VK_EXT_DEVICE_DRM = 1ULL << 9, /* VK_EXT_physical_device_drm */ + FF_VK_EXT_VIDEO_QUEUE = 1ULL << 10, /* VK_KHR_video_queue */ + FF_VK_EXT_VIDEO_DECODE_QUEUE = 1ULL << 11, /* VK_KHR_video_decode_queue */ + FF_VK_EXT_VIDEO_DECODE_H264 = 1ULL << 12, /* VK_EXT_video_decode_h264 */ + FF_VK_EXT_VIDEO_DECODE_H265 = 1ULL << 13, /* VK_EXT_video_decode_h265 */ FF_VK_EXT_NO_FLAG = 1ULL << 31, } FFVulkanExtensions; @@ -60,6 +64,8 @@ typedef enum FFVulkanExtensions { MACRO(1, 0, FF_VK_EXT_NO_FLAG, CreateDevice) \ MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceFeatures2) \ MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceProperties) \ + MACRO(1, 0, FF_VK_EXT_VIDEO_QUEUE, GetPhysicalDeviceVideoCapabilitiesKHR) \ + MACRO(1, 0, FF_VK_EXT_VIDEO_QUEUE, GetPhysicalDeviceVideoFormatPropertiesKHR) \ MACRO(1, 0, FF_VK_EXT_NO_FLAG, DeviceWaitIdle) \ MACRO(1, 0, FF_VK_EXT_NO_FLAG, DestroyDevice) \ \ @@ -159,6 +165,20 @@ typedef enum FFVulkanExtensions { /* sync2 */ \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdPipelineBarrier2) \ \ + /* Video queue */ \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CreateVideoSessionKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CreateVideoSessionParametersKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, GetVideoSessionMemoryRequirementsKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, BindVideoSessionMemoryKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CmdBeginVideoCodingKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CmdControlVideoCodingKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CmdEndVideoCodingKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, DestroyVideoSessionParametersKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, DestroyVideoSessionKHR) \ + \ + /* Video decoding */ \ + MACRO(1, 1, FF_VK_EXT_VIDEO_DECODE_QUEUE, CmdDecodeVideoKHR) \ + \ /* Pipeline */ \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreatePipelineLayout) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyPipelineLayout) \ diff --git a/libavutil/vulkan_loader.h b/libavutil/vulkan_loader.h index 2e6dfb4f4f5db..5380e2130300d 100644 --- a/libavutil/vulkan_loader.h +++ b/libavutil/vulkan_loader.h @@ -50,6 +50,10 @@ static inline uint64_t ff_vk_extensions_to_mask(const char * const *extensions, { VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM }, #endif { VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER, }, + { VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_QUEUE }, + { VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_QUEUE }, + { VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H264 }, + { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 }, }; FFVulkanExtensions mask = 0x0; From 3c48a14462f4c281ebdcc56ce4cdbf229be663b0 Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 23 Nov 2022 15:18:21 +0100 Subject: [PATCH 39/98] hwcontext_vulkan: support PREP_MODE_DECODING in prepare_frame() --- libavutil/hwcontext_vulkan.c | 67 ++++++++++++++++++++++++++---------- 1 file changed, 48 insertions(+), 19 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 97845ec8248f2..3d3b82af4c92e 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -2030,7 +2030,9 @@ static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f, enum PrepMode { PREP_MODE_WRITE, PREP_MODE_EXTERNAL_EXPORT, - PREP_MODE_EXTERNAL_IMPORT + PREP_MODE_EXTERNAL_IMPORT, + PREP_MODE_DECODING_DST, + PREP_MODE_DECODING_DPB, }; static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, @@ -2039,7 +2041,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, int err; uint32_t src_qf, dst_qf; VkImageLayout new_layout; - VkAccessFlags new_access; + VkAccessFlags2 new_access; AVVulkanFramesContext *vkfc = hwfc->hwctx; const int planes = av_pix_fmt_count_planes(hwfc->sw_format); VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; @@ -2047,7 +2049,8 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, AVFrame tmp = { .data[0] = (uint8_t *)frame }; uint64_t sem_sig_val[AV_NUM_DATA_POINTERS]; - VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 }; + VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS] = { 0 }; + VkDependencyInfo dep_info; VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = { .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, @@ -2103,32 +2106,52 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, s_info.pWaitDstStageMask = wait_st; s_info.waitSemaphoreCount = planes; break; + case PREP_MODE_DECODING_DST: + new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR; + new_access = VK_ACCESS_TRANSFER_WRITE_BIT; + src_qf = VK_QUEUE_FAMILY_IGNORED; + dst_qf = VK_QUEUE_FAMILY_IGNORED; + break; + case PREP_MODE_DECODING_DPB: + new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR; + new_access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; + src_qf = VK_QUEUE_FAMILY_IGNORED; + dst_qf = VK_QUEUE_FAMILY_IGNORED; + break; } /* Change the image layout to something more optimal for writes. * This also signals the newly created semaphore, making it usable * for synchronization */ for (int i = 0; i < planes; i++) { - img_bar[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - img_bar[i].srcAccessMask = 0x0; - img_bar[i].dstAccessMask = new_access; - img_bar[i].oldLayout = frame->layout[i]; - img_bar[i].newLayout = new_layout; - img_bar[i].srcQueueFamilyIndex = src_qf; - img_bar[i].dstQueueFamilyIndex = dst_qf; - img_bar[i].image = frame->img[i]; - img_bar[i].subresourceRange.levelCount = 1; - img_bar[i].subresourceRange.layerCount = 1; - img_bar[i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + img_bar[i] = (VkImageMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .pNext = NULL, + .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, + .srcAccessMask = 0x0, + .dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT, + .dstAccessMask = new_access, + .oldLayout = frame->layout[i], + .newLayout = new_layout, + .srcQueueFamilyIndex = src_qf, + .dstQueueFamilyIndex = dst_qf, + .image = frame->img[i], + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .levelCount = 1, + .layerCount = 1, + }, + }; frame->layout[i] = img_bar[i].newLayout; frame->access[i] = img_bar[i].dstAccessMask; } - vk->CmdPipelineBarrier(get_buf_exec_ctx(hwfc, ectx), - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, 0, NULL, 0, NULL, planes, img_bar); + vk->CmdPipelineBarrier2(get_buf_exec_ctx(hwfc, ectx), &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = planes, + }); err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 0); vkfc->unlock_frame(hwfc, frame); @@ -2369,7 +2392,13 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size) if (err) goto fail; - err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_WRITE); + if ( (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) && + !(hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)) + err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_DECODING_DPB); + else if (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR) + err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_DECODING_DST); + else + err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_WRITE); if (err) goto fail; From 16c91528a3d29a06ad1bdacff5c566fc88f9ce12 Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 17 Feb 2023 10:09:34 +0100 Subject: [PATCH 40/98] hwcontext_vulkan: load query-related functions Needed for both encoding and decoding. --- libavutil/vulkan_functions.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h index 403382fe7f27a..2a7c383dc132c 100644 --- a/libavutil/vulkan_functions.h +++ b/libavutil/vulkan_functions.h @@ -162,6 +162,15 @@ typedef enum FFVulkanExtensions { MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateDescriptorUpdateTemplate) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyDescriptorUpdateTemplate) \ \ + /* Queries */ \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateQueryPool) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetQueryPoolResults) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, ResetQueryPool) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdBeginQuery) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdEndQuery) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdResetQueryPool) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyQueryPool) \ + \ /* sync2 */ \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdPipelineBarrier2) \ \ From 3619c3d5b6be22eb221d2c0bb30797b03f5f4c7d Mon Sep 17 00:00:00 2001 From: Lynne Date: Tue, 14 Mar 2023 20:45:45 +0100 Subject: [PATCH 41/98] hwcontext_vulkan: enable GPU-assisted validation when debugging --- libavutil/hwcontext_vulkan.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 3d3b82af4c92e..28255929255ba 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -696,6 +696,9 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts) LIBAVUTIL_VERSION_MINOR, LIBAVUTIL_VERSION_MICRO), }; + VkValidationFeaturesEXT validation_features = { + .sType = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT, + }; VkInstanceCreateInfo inst_props = { .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, .pApplicationInfo = &application_info, @@ -726,6 +729,17 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts) if (err < 0) goto fail; + if (debug_mode) { + VkValidationFeatureEnableEXT feat_list[] = { + VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT, + VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT, + VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT, + }; + validation_features.pEnabledValidationFeatures = feat_list; + validation_features.enabledValidationFeatureCount = FF_ARRAY_ELEMS(feat_list); + inst_props.pNext = &validation_features; + } + /* Try to create the instance */ ret = vk->CreateInstance(&inst_props, hwctx->alloc, &hwctx->inst); From b8d33f1bddd6f739915736ac60d4e7218d716dc2 Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 23 Nov 2022 14:04:28 +0100 Subject: [PATCH 42/98] vulkan: lock queues before submitting operations --- libavutil/vulkan.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 6bf2c214b70e7..ad13b8f3cb907 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -625,7 +625,14 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e) return AVERROR_EXTERNAL; } + s->hwctx->lock_queue((AVHWDeviceContext *)s->device_ref->data, + e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues); + ret = vk->QueueSubmit(q->queue, 1, &s_info, q->fence); + + s->hwctx->unlock_queue((AVHWDeviceContext *)s->device_ref->data, + e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues); + if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n", ff_vk_ret2str(ret)); From a35d93bc840e2669c535f28d81cd76a5d76085b5 Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 23 Nov 2022 14:04:48 +0100 Subject: [PATCH 43/98] vulkan: define VK_NO_PROTOTYPES This just disables the vulkan headers from defining any symbols like vkCmdPipelineBarrier2(). Instead, all functions must be loaded via the loader and used as function pointers as vk->CmdPipelineBarrier2. Mostly just forces developers to write correct code, as using the symbols can be undesirable in case API users define their own function wrappers via the loader API. --- libavutil/vulkan.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 90922c6cf3de3..11ea8d609e29b 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -19,6 +19,8 @@ #ifndef AVUTIL_VULKAN_H #define AVUTIL_VULKAN_H +#define VK_NO_PROTOTYPES + #include "pixdesc.h" #include "bprint.h" #include "hwcontext.h" From 55f9a620d29eba44ab1d62f3665e20d057b6e87a Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 23 Nov 2022 13:54:35 +0100 Subject: [PATCH 44/98] vulkan: add additional error codes --- libavutil/vulkan.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index ad13b8f3cb907..f2846e628a2a9 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -78,6 +78,12 @@ const char *ff_vk_ret2str(VkResult res) CASE(VK_ERROR_TOO_MANY_OBJECTS); CASE(VK_ERROR_FORMAT_NOT_SUPPORTED); CASE(VK_ERROR_FRAGMENTED_POOL); + CASE(VK_ERROR_UNKNOWN); + CASE(VK_ERROR_OUT_OF_POOL_MEMORY); + CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE); + CASE(VK_ERROR_FRAGMENTATION); + CASE(VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS); + CASE(VK_PIPELINE_COMPILE_REQUIRED); CASE(VK_ERROR_SURFACE_LOST_KHR); CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR); CASE(VK_SUBOPTIMAL_KHR); @@ -90,6 +96,13 @@ const char *ff_vk_ret2str(VkResult res) CASE(VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR); CASE(VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR); CASE(VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR); + CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT); + CASE(VK_ERROR_NOT_PERMITTED_KHR); + CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT); + CASE(VK_THREAD_IDLE_KHR); + CASE(VK_THREAD_DONE_KHR); + CASE(VK_OPERATION_DEFERRED_KHR); + CASE(VK_OPERATION_NOT_DEFERRED_KHR); default: return "Unknown error"; } #undef CASE From 2cc537307b2bc9685cc6654b9657b6fbe3b519dd Mon Sep 17 00:00:00 2001 From: Lynne Date: Thu, 10 Mar 2022 21:41:59 +0100 Subject: [PATCH 45/98] vulkan: fix comment statement about exec_queue blocking --- libavutil/vulkan.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 11ea8d609e29b..107c12a746868 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -390,9 +390,7 @@ int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame, VkPipelineStageFlagBits in_wait_dst_flag); /** - * Submits a command buffer to the queue for execution. - * Will block until execution has finished in order to simplify resource - * management. + * Submits a command buffer to the queue for execution. Will not block. */ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e); From 4e8af956f249cb312de021d768ce72c0c1c82eb6 Mon Sep 17 00:00:00 2001 From: Lynne Date: Thu, 17 Mar 2022 12:23:56 +0100 Subject: [PATCH 46/98] vulkan: add pNext argument to ff_vk_create_buf() --- libavfilter/vf_gblur_vulkan.c | 2 +- libavfilter/vf_overlay_vulkan.c | 2 +- libavfilter/vf_scale_vulkan.c | 2 +- libavutil/vulkan.c | 4 ++-- libavutil/vulkan.h | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/libavfilter/vf_gblur_vulkan.c b/libavfilter/vf_gblur_vulkan.c index e6ffc8c07359e..80d1dc61c0628 100644 --- a/libavfilter/vf_gblur_vulkan.c +++ b/libavfilter/vf_gblur_vulkan.c @@ -174,7 +174,7 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVk RET(ff_vk_init_pipeline_layout(&s->vkctx, pl)); RET(ff_vk_init_compute_pipeline(&s->vkctx, pl)); - RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize, + RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize, NULL, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); RET(ff_vk_map_buffers(&s->vkctx, params_buf, &kernel_mapped, 1, 0)); diff --git a/libavfilter/vf_overlay_vulkan.c b/libavfilter/vf_overlay_vulkan.c index 6db7baddfddc0..7a66cf12ad6ff 100644 --- a/libavfilter/vf_overlay_vulkan.c +++ b/libavfilter/vf_overlay_vulkan.c @@ -181,7 +181,7 @@ static av_cold int init_filter(AVFilterContext *ctx) } *par; err = ff_vk_create_buf(vkctx, &s->params_buf, - sizeof(*par), + sizeof(*par), NULL, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); if (err) diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c index 3b09f0dcc120d..d14b32277d321 100644 --- a/libavfilter/vf_scale_vulkan.c +++ b/libavfilter/vf_scale_vulkan.c @@ -253,7 +253,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) } RET(ff_vk_create_buf(vkctx, &s->params_buf, - sizeof(*par), + sizeof(*par), NULL, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index f2846e628a2a9..ae6adc5104d99 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -205,7 +205,7 @@ static int vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, return 0; } -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext, VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags) { int err; @@ -215,7 +215,7 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, VkBufferCreateInfo buf_spawn = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = NULL, + .pNext = pNext, .usage = usage, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .size = size, /* Gets FFALIGNED during alloc if host visible diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 107c12a746868..c6cfb779fcd49 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -397,7 +397,7 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e); /** * Create a VkBuffer with the specified parameters. */ -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext, VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags); /** From 4f1e99fcf960d294254d306e0797937d23eb598d Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 23 Nov 2022 13:03:58 +0100 Subject: [PATCH 47/98] vulkan: add ff_vk_qf_fill() --- libavutil/vulkan.c | 25 +++++++++++++++++++++++++ libavutil/vulkan.h | 9 +++++++++ 2 files changed, 34 insertions(+) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index ae6adc5104d99..eceef295a84c7 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -108,6 +108,31 @@ const char *ff_vk_ret2str(VkResult res) #undef CASE } +void ff_vk_qf_fill(FFVulkanContext *s) +{ + s->nb_qfs = 0; + + /* Simply fills in all unique queues into s->qfs */ + if (s->hwctx->queue_family_index >= 0) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_index; + if (!s->nb_qfs || s->qfs[0] != s->hwctx->queue_family_tx_index) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_tx_index; + if (!s->nb_qfs || (s->qfs[0] != s->hwctx->queue_family_comp_index && + s->qfs[1] != s->hwctx->queue_family_comp_index)) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_comp_index; + if (s->hwctx->queue_family_decode_index >= 0 && + (s->qfs[0] != s->hwctx->queue_family_decode_index && + s->qfs[1] != s->hwctx->queue_family_decode_index && + s->qfs[2] != s->hwctx->queue_family_decode_index)) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_decode_index; + if (s->hwctx->queue_family_encode_index >= 0 && + (s->qfs[0] != s->hwctx->queue_family_encode_index && + s->qfs[1] != s->hwctx->queue_family_encode_index && + s->qfs[2] != s->hwctx->queue_family_encode_index && + s->qfs[3] != s->hwctx->queue_family_encode_index)) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index; +} + void ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, VkQueueFlagBits dev_family, int nb_queues) { diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index c6cfb779fcd49..4540c3eda1184 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -207,6 +207,9 @@ typedef struct FFVulkanContext { AVHWFramesContext *frames; AVVulkanFramesContext *hwfc; + uint32_t qfs[5]; + int nb_qfs; + FFVkSPIRVCompiler *spirv_compiler; /* Properties */ @@ -249,6 +252,12 @@ int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt); */ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt); +/** + * Setup the queue families from the hardware device context. + * Necessary for image creation to work. + */ +void ff_vk_qf_fill(FFVulkanContext *s); + /** * Initialize a queue family with a specific number of queues. * If nb_queues == 0, use however many queues the queue family has. From 995d951f9c24e68198862dbdc25861e889326d75 Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 23 Nov 2022 13:05:59 +0100 Subject: [PATCH 48/98] vulkan: add ff_vk_image_create() --- libavutil/vulkan.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++ libavutil/vulkan.h | 11 ++++++ 2 files changed, 100 insertions(+) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index eceef295a84c7..212f134466752 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -401,6 +401,95 @@ void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf) vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc); } +int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *f, int idx, + int width, int height, VkFormat fmt, VkImageTiling tiling, + VkImageUsageFlagBits usage, VkImageCreateFlags flags, + void *create_pnext, VkDeviceMemory *mem, void *alloc_pnext) +{ + int err; + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + AVVulkanDeviceContext *hwctx = s->hwctx; + + VkExportSemaphoreCreateInfo ext_sem_info = { + .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO, +#ifdef _WIN32 + .handleTypes = IsWindows8OrGreater() + ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT + : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT, +#else + .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, +#endif + }; + + VkSemaphoreTypeCreateInfo sem_type_info = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, +#ifdef _WIN32 + .pNext = s->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM ? &ext_sem_info : NULL, +#else + .pNext = s->extensions & FF_VK_EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL, +#endif + .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE, + .initialValue = 0, + }; + + VkSemaphoreCreateInfo sem_spawn = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + .pNext = &sem_type_info, + }; + + /* Create the image */ + VkImageCreateInfo create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = create_pnext, + .imageType = VK_IMAGE_TYPE_2D, + .format = fmt, + .extent.depth = 1, + .mipLevels = 1, + .arrayLayers = 1, + .flags = flags, + .tiling = tiling, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .usage = usage, + .samples = VK_SAMPLE_COUNT_1_BIT, + .pQueueFamilyIndices = s->qfs, + .queueFamilyIndexCount = s->nb_qfs, + .sharingMode = s->nb_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : + VK_SHARING_MODE_EXCLUSIVE, + }; + + ret = vk->CreateImage(hwctx->act_dev, &create_info, + hwctx->alloc, &f->img[0]); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Image creation failure: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR(EINVAL); + goto fail; + } + + /* Create semaphore */ + ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn, + hwctx->alloc, &f->sem[0]); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to create semaphore: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + f->queue_family[0] = s->nb_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : s->qfs[0]; + f->layout[0] = create_info.initialLayout; + f->access[0] = 0x0; + f->sem_value[0] = 0; + + f->flags = 0x0; + f->tiling = tiling; + + return 0; + +fail: + return err; +} + int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size, VkShaderStageFlagBits stage) { diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 4540c3eda1184..a0baba7fc8d18 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -427,6 +427,17 @@ int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers, */ void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf); +/** + * Creates an image, allocates and binds memory in the given + * idx value of the dst frame. If mem is non-NULL, then no memory will be + * allocated, but instead the given memory will be bound to the image. + */ +int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *dst, int idx, + int width, int height, VkFormat fmt, VkImageTiling tiling, + VkImageUsageFlagBits usage, VkImageCreateFlags flags, + void *create_pnext, + VkDeviceMemory *mem, void *alloc_pnext); + /** * Frees the main Vulkan context. */ From b0a8d7398c8d873aa3ceeb0505d0989d40100c12 Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 23 Nov 2022 14:03:34 +0100 Subject: [PATCH 49/98] vulkan: expose ff_vk_alloc_mem() --- libavutil/vulkan.c | 15 ++++++++------- libavutil/vulkan.h | 7 +++++++ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 212f134466752..7870de351d16d 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -174,9 +174,9 @@ void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf) qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues; } -static int vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, - VkMemoryPropertyFlagBits req_flags, void *alloc_extension, - VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem) +int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, + VkMemoryPropertyFlagBits req_flags, void *alloc_extension, + VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem) { VkResult ret; int index = -1; @@ -225,7 +225,8 @@ static int vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, return AVERROR(ENOMEM); } - *mem_flags |= s->mprops.memoryTypes[index].propertyFlags; + if (mem_flags) + *mem_flags |= s->mprops.memoryTypes[index].propertyFlags; return 0; } @@ -279,9 +280,9 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNe if (use_ded_mem) ded_alloc.buffer = buf->buf; - err = vk_alloc_mem(s, &req.memoryRequirements, flags, - use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext, - &buf->flags, &buf->mem); + err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags, + use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext, + &buf->flags, &buf->mem); if (err) return err; diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index a0baba7fc8d18..85836a7807587 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -258,6 +258,13 @@ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt); */ void ff_vk_qf_fill(FFVulkanContext *s); +/** + * Allocate device memory. + */ +int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, + VkMemoryPropertyFlagBits req_flags, void *alloc_extension, + VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem); + /** * Initialize a queue family with a specific number of queues. * If nb_queues == 0, use however many queues the queue family has. From 146400aeb9458fc681b0a54a99a862cfbe3469cb Mon Sep 17 00:00:00 2001 From: Lynne Date: Tue, 29 Nov 2022 00:43:19 +0000 Subject: [PATCH 50/98] vulkan: support ignoring memory properties when allocating --- libavutil/vulkan.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 7870de351d16d..b1553c6537d53 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -188,7 +188,7 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, }; /* Align if we need to */ - if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + if ((req_flags != UINT32_MAX) && req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) req->size = FFALIGN(req->size, s->props.limits.minMemoryMapAlignment); alloc_info.allocationSize = req->size; @@ -201,7 +201,8 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, continue; /* The memory type flags must include our properties */ - if ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags) + if ((req_flags != UINT32_MAX) && + ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)) continue; /* Found a suitable memory type */ @@ -210,7 +211,7 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, } if (index < 0) { - av_log(s, AV_LOG_ERROR, "No memory type found for flags 0x%x\n", + av_log(s->device, AV_LOG_ERROR, "No memory type found for flags 0x%x\n", req_flags); return AVERROR(EINVAL); } From 231c78536c269470d52055fc3424e2fcf47bd775 Mon Sep 17 00:00:00 2001 From: Lynne Date: Thu, 15 Dec 2022 17:43:27 +0100 Subject: [PATCH 51/98] vulkan: allow alloc pNext in ff_vk_create_buf --- libavfilter/vf_gblur_vulkan.c | 2 +- libavfilter/vf_scale_vulkan.c | 2 +- libavutil/vulkan.c | 5 +++-- libavutil/vulkan.h | 3 ++- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/libavfilter/vf_gblur_vulkan.c b/libavfilter/vf_gblur_vulkan.c index 80d1dc61c0628..a6037e08888d7 100644 --- a/libavfilter/vf_gblur_vulkan.c +++ b/libavfilter/vf_gblur_vulkan.c @@ -174,7 +174,7 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVk RET(ff_vk_init_pipeline_layout(&s->vkctx, pl)); RET(ff_vk_init_compute_pipeline(&s->vkctx, pl)); - RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize, NULL, + RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize, NULL, NULL, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); RET(ff_vk_map_buffers(&s->vkctx, params_buf, &kernel_mapped, 1, 0)); diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c index d14b32277d321..cd37a861b1ab8 100644 --- a/libavfilter/vf_scale_vulkan.c +++ b/libavfilter/vf_scale_vulkan.c @@ -253,7 +253,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) } RET(ff_vk_create_buf(vkctx, &s->params_buf, - sizeof(*par), NULL, + sizeof(*par), NULL, NULL, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index b1553c6537d53..0bb5b1eebf4fa 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -232,7 +232,8 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, return 0; } -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext, +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, + void *pNext, void *alloc_pNext, VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags) { int err; @@ -254,7 +255,7 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNe }; VkMemoryDedicatedAllocateInfo ded_alloc = { .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, - .pNext = NULL, + .pNext = alloc_pNext, }; VkMemoryDedicatedRequirements ded_req = { .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 85836a7807587..d75be2697719c 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -413,7 +413,8 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e); /** * Create a VkBuffer with the specified parameters. */ -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext, +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, + void *pNext, void *alloc_pNext, VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags); /** From 5a01b0f3f0bdff7cbd019f672c672b1dcf3d3bf0 Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 16 Dec 2022 00:37:53 +0100 Subject: [PATCH 52/98] vulkan: do not wait for device idle when destroying buffers This should be done explicitly. --- libavutil/vulkan.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 0bb5b1eebf4fa..0250f5aa3969b 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -396,8 +396,6 @@ void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf) if (!buf || !s->hwctx) return; - vk->DeviceWaitIdle(s->hwctx->act_dev); - if (buf->buf != VK_NULL_HANDLE) vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc); if (buf->mem != VK_NULL_HANDLE) From 65a433f479c7af0ad303882178a9dfdc6a3b83a2 Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 16 Dec 2022 01:47:42 +0100 Subject: [PATCH 53/98] vulkan: add size tracking to buffer structs --- libavutil/vulkan.c | 2 ++ libavutil/vulkan.h | 1 + 2 files changed, 3 insertions(+) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 0250f5aa3969b..faf5cd55084f8 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -295,6 +295,8 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, return AVERROR_EXTERNAL; } + buf->size = size; + return 0; } diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index d75be2697719c..f2c4a791021a9 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -94,6 +94,7 @@ typedef struct FFVkBuffer { VkBuffer buf; VkDeviceMemory mem; VkMemoryPropertyFlagBits flags; + size_t size; } FFVkBuffer; typedef struct FFVkQueueFamilyCtx { From bc1070f8867b7c4372c91b9fa6a8c54ce9783d0e Mon Sep 17 00:00:00 2001 From: Lynne Date: Mon, 19 Dec 2022 07:57:22 +0100 Subject: [PATCH 54/98] vulkan: use device properties 2 and add a convenience loader function --- libavutil/vulkan.c | 18 +++++++++++++++++- libavutil/vulkan.h | 8 +++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index faf5cd55084f8..8a583248d1607 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -108,6 +108,22 @@ const char *ff_vk_ret2str(VkResult res) #undef CASE } +void ff_vk_load_props(FFVulkanContext *s) +{ + FFVulkanFunctions *vk = &s->vkfn; + + s->driver_props = (VkPhysicalDeviceDriverProperties) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES, + }; + s->props = (VkPhysicalDeviceProperties2) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, + .pNext = &s->driver_props, + }; + + vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props); + vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops); +} + void ff_vk_qf_fill(FFVulkanContext *s) { s->nb_qfs = 0; @@ -189,7 +205,7 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, /* Align if we need to */ if ((req_flags != UINT32_MAX) && req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) - req->size = FFALIGN(req->size, s->props.limits.minMemoryMapAlignment); + req->size = FFALIGN(req->size, s->props.properties.limits.minMemoryMapAlignment); alloc_info.allocationSize = req->size; diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index f2c4a791021a9..2cd2c1f8fa847 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -197,7 +197,8 @@ typedef struct FFVulkanContext { FFVulkanFunctions vkfn; FFVulkanExtensions extensions; - VkPhysicalDeviceProperties props; + VkPhysicalDeviceProperties2 props; + VkPhysicalDeviceDriverProperties driver_props; VkPhysicalDeviceMemoryProperties mprops; AVBufferRef *device_ref; @@ -243,6 +244,11 @@ extern const VkComponentMapping ff_comp_identity_map; */ const char *ff_vk_ret2str(VkResult res); +/** + * Loads props/mprops/driver_props + */ +void ff_vk_load_props(FFVulkanContext *s); + /** * Returns 1 if the image is any sort of supported RGB */ From f552235c14fa931c876404b42c7e7499e8b8e7b7 Mon Sep 17 00:00:00 2001 From: Lynne Date: Thu, 22 Dec 2022 05:02:50 +0100 Subject: [PATCH 55/98] vulkan: minor indent fix, add support for synchronous submission/waiting --- libavutil/vulkan.c | 20 ++++++++++++++++++-- libavutil/vulkan.h | 9 +++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 8a583248d1607..b5e08ecc4695e 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -564,7 +564,7 @@ int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx, /* Create command pool */ ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create, - s->hwctx->alloc, &e->pool); + s->hwctx->alloc, &e->pool); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n", ff_vk_ret2str(ret)); @@ -631,11 +631,13 @@ int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e) ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } - } else { + } else if (!q->synchronous) { vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); vk->ResetFences(s->hwctx->act_dev, 1, &q->fence); } + q->synchronous = 0; + /* Discard queue dependencies */ ff_vk_discard_exec_deps(e); @@ -788,9 +790,23 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e) for (int i = 0; i < e->sem_sig_cnt; i++) *e->sem_sig_val_dst[i] += 1; + q->submitted = 1; + return 0; } +void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e) +{ + FFVulkanFunctions *vk = &s->vkfn; + FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; + if (!q->submitted) + return; + + vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); + vk->ResetFences(s->hwctx->act_dev, 1, &q->fence); + q->synchronous = 1; +} + int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef **deps, int nb_deps) { diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 2cd2c1f8fa847..a17cc4a34ef19 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -147,6 +147,9 @@ typedef struct FFVkQueueCtx { VkFence fence; VkQueue queue; + int synchronous; + int submitted; + /* Buffer dependencies */ AVBufferRef **buf_deps; int nb_buf_deps; @@ -417,6 +420,12 @@ int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame, */ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e); +/** + * Wait on a command buffer's execution. Mainly useful for debugging and + * development. + */ +void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e); + /** * Create a VkBuffer with the specified parameters. */ From cafa1990776fc602537f9df9fd44340bf93e5cd6 Mon Sep 17 00:00:00 2001 From: Lynne Date: Thu, 22 Dec 2022 05:03:32 +0100 Subject: [PATCH 56/98] vulkan: add support for queries --- libavutil/vulkan.c | 118 +++++++++++++++++++++++++++++++++++++++++++++ libavutil/vulkan.h | 30 ++++++++++++ 2 files changed, 148 insertions(+) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index b5e08ecc4695e..de0c300c0e430 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -592,6 +592,114 @@ int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx, return 0; } +int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e, + int nb_queries, VkQueryType type, + int elem_64bits, void *create_pnext) +{ + VkResult ret; + size_t qd_size; + int nb_results = nb_queries; + int nb_statuses = 0 /* Once RADV has support, = nb_queries */; + int status_stride = 2; + int result_elem_size = elem_64bits ? 8 : 4; + FFVulkanFunctions *vk = &s->vkfn; + VkQueryPoolCreateInfo query_pool_info = { + .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, + .pNext = create_pnext, + .queryType = type, + .queryCount = nb_queries*e->qf->nb_queues, + }; + + if (e->query.pool) + return AVERROR(EINVAL); + + /* Video encode quieries produce two results per query */ + if (type == VK_QUERY_TYPE_VIDEO_ENCODE_BITSTREAM_BUFFER_RANGE_KHR) { + status_stride = 3; /* skip,skip,result,skip,skip,result */ + nb_results *= 2; + } else if (type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) { + status_stride = 1; + nb_results *= 0; + } + + qd_size = nb_results*result_elem_size + nb_statuses*result_elem_size; + + e->query.data = av_mallocz(e->qf->nb_queues*qd_size); + if (!e->query.data) + return AVERROR(ENOMEM); + + ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info, + s->hwctx->alloc, &e->query.pool); + if (ret != VK_SUCCESS) + return AVERROR_EXTERNAL; + + e->query.data_per_queue = qd_size; + e->query.nb_queries = nb_queries; + e->query.nb_results = nb_results; + e->query.nb_statuses = nb_statuses; + e->query.elem_64bits = elem_64bits; + e->query.status_stride = status_stride; + + return 0; +} + +int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e, + int query_idx, void **data, int64_t *status) +{ + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + uint8_t *qd; + int32_t *res32; + int64_t *res64; + int64_t res = 0; + VkQueryResultFlags qf = 0; + FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; + + if (!q->submitted) { + *data = NULL; + return 0; + } + + qd = e->query.data + e->qf->cur_queue*e->query.data_per_queue; + qf |= e->query.nb_results && e->query.nb_statuses ? + VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0; + qf |= e->query.elem_64bits ? VK_QUERY_RESULT_64_BIT : 0x0; + res32 = (int32_t *)(qd + e->query.nb_results*4); + res64 = (int64_t *)(qd + e->query.nb_results*8); + + ret = vk->GetQueryPoolResults(s->hwctx->act_dev, e->query.pool, + query_idx, + e->query.nb_queries, + e->query.data_per_queue, qd, + e->query.elem_64bits ? 8 : 4, qf); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to perform query: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + if (e->query.nb_statuses && e->query.elem_64bits) { + for (int i = 0; i < e->query.nb_queries; i++) { + res = (res64[i] < res) || (res >= 0 && res64[i] > res) ? + res64[i] : res; + res64 += e->query.status_stride; + } + } else if (e->query.nb_statuses) { + for (int i = 0; i < e->query.nb_queries; i++) { + res = (res32[i] < res) || (res >= 0 && res32[i] > res) ? + res32[i] : res; + res32 += e->query.status_stride; + } + } + + if (data) + *data = qd; + if (status) + *status = res; + + return 0; +} + void ff_vk_discard_exec_deps(FFVkExecContext *e) { FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; @@ -648,6 +756,12 @@ int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e) return AVERROR_EXTERNAL; } + if (e->query.pool) { + e->query.idx = e->qf->cur_queue*e->query.nb_queries; + vk->CmdResetQueryPool(e->bufs[e->qf->cur_queue], e->query.pool, + e->query.idx, e->query.nb_queries); + } + return 0; } @@ -790,6 +904,7 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e) for (int i = 0; i < e->sem_sig_cnt; i++) *e->sem_sig_val_dst[i] += 1; + e->query.idx = e->qf->cur_queue*e->query.nb_queries; q->submitted = 1; return 0; @@ -1483,7 +1598,10 @@ static void free_exec_ctx(FFVulkanContext *s, FFVkExecContext *e) vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, e->qf->nb_queues, e->bufs); if (e->pool) vk->DestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc); + if (e->query.pool) + vk->DestroyQueryPool(s->hwctx->act_dev, e->query.pool, s->hwctx->alloc); + av_freep(&e->query.data); av_freep(&e->bufs); av_freep(&e->queues); av_freep(&e->sem_sig); diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index a17cc4a34ef19..4bd1c9fc004dc 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -168,6 +168,19 @@ typedef struct FFVkExecContext { VkCommandBuffer *bufs; FFVkQueueCtx *queues; + struct { + int idx; + VkQueryPool pool; + uint8_t *data; + + int nb_queries; + int nb_results; + int nb_statuses; + int elem_64bits; + size_t data_per_queue; + int status_stride; + } query; + AVBufferRef ***deps; int *nb_deps; int *dep_alloc_size; @@ -371,6 +384,23 @@ void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx, FFVkQueueFamilyCtx *qf); +/** + * Create a query pool for a command context. + * elem_64bits exists to troll driver devs for compliance. All results + * and statuses returned should be 32 bits, unless this is set, then it's 64bits. + */ +int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e, + int nb_queries, VkQueryType type, + int elem_64bits, void *create_pnext); + +/** + * Get results for query. + * Returns the status of the query. + * Sets *res to the status of the queries. + */ +int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e, + int query_idx, void **data, int64_t *status); + /** * Begin recording to the command buffer. Previous execution must have been * completed, which ff_vk_submit_exec_queue() will ensure. From 9141845182706490e8c6e37d96b0c2fffa865497 Mon Sep 17 00:00:00 2001 From: Lynne Date: Thu, 22 Dec 2022 17:37:51 +0100 Subject: [PATCH 57/98] vulkan: add support for retrieving queue, query and video properties --- libavutil/vulkan.c | 87 ++++++++++++++++++++++++++++++------ libavutil/vulkan.h | 14 ++++-- libavutil/vulkan_functions.h | 1 + 3 files changed, 85 insertions(+), 17 deletions(-) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index de0c300c0e430..6c9f91c7ef368 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -108,8 +108,9 @@ const char *ff_vk_ret2str(VkResult res) #undef CASE } -void ff_vk_load_props(FFVulkanContext *s) +int ff_vk_load_props(FFVulkanContext *s) { + uint32_t qc = 0; FFVulkanFunctions *vk = &s->vkfn; s->driver_props = (VkPhysicalDeviceDriverProperties) { @@ -120,8 +121,48 @@ void ff_vk_load_props(FFVulkanContext *s) .pNext = &s->driver_props, }; + vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props); vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops); + vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &qc, s->qf_props); + + if (s->qf_props) + return 0; + + s->qf_props = av_calloc(qc, sizeof(*s->qf_props)); + if (!s->qf_props) + return AVERROR(ENOMEM); + + s->query_props = av_calloc(qc, sizeof(*s->query_props)); + if (!s->qf_props) { + av_freep(&s->qf_props); + return AVERROR(ENOMEM); + } + + s->video_props = av_calloc(qc, sizeof(*s->video_props)); + if (!s->video_props) { + av_freep(&s->qf_props); + av_freep(&s->query_props); + return AVERROR(ENOMEM); + } + + for (uint32_t i = 0; i < qc; i++) { + s->query_props[i] = (VkQueueFamilyQueryResultStatusPropertiesKHR) { + .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR, + }; + s->video_props[i] = (VkQueueFamilyVideoPropertiesKHR) { + .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR, + .pNext = &s->query_props[i], + }; + s->qf_props[i] = (VkQueueFamilyProperties2) { + .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2, + .pNext = &s->video_props[i], + }; + } + + vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &qc, s->qf_props); + + return 0; } void ff_vk_qf_fill(FFVulkanContext *s) @@ -149,40 +190,54 @@ void ff_vk_qf_fill(FFVulkanContext *s) s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index; } -void ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, - VkQueueFlagBits dev_family, int nb_queues) +int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb) { + int ret, num; + switch (dev_family) { case VK_QUEUE_GRAPHICS_BIT: - qf->queue_family = s->hwctx->queue_family_index; - qf->actual_queues = s->hwctx->nb_graphics_queues; + ret = s->hwctx->queue_family_index; + num = s->hwctx->nb_graphics_queues; break; case VK_QUEUE_COMPUTE_BIT: - qf->queue_family = s->hwctx->queue_family_comp_index; - qf->actual_queues = s->hwctx->nb_comp_queues; + ret = s->hwctx->queue_family_comp_index; + num = s->hwctx->nb_comp_queues; break; case VK_QUEUE_TRANSFER_BIT: - qf->queue_family = s->hwctx->queue_family_tx_index; - qf->actual_queues = s->hwctx->nb_tx_queues; + ret = s->hwctx->queue_family_tx_index; + num = s->hwctx->nb_tx_queues; break; case VK_QUEUE_VIDEO_ENCODE_BIT_KHR: - qf->queue_family = s->hwctx->queue_family_encode_index; - qf->actual_queues = s->hwctx->nb_encode_queues; + ret = s->hwctx->queue_family_encode_index; + num = s->hwctx->nb_encode_queues; break; case VK_QUEUE_VIDEO_DECODE_BIT_KHR: - qf->queue_family = s->hwctx->queue_family_decode_index; - qf->actual_queues = s->hwctx->nb_decode_queues; + ret = s->hwctx->queue_family_decode_index; + num = s->hwctx->nb_decode_queues; break; default: av_assert0(0); /* Should never happen */ } + if (nb) + *nb = num; + + return ret; +} + +int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, + VkQueueFlagBits dev_family, int nb_queues) +{ + int ret; + + ret = qf->queue_family = ff_vk_qf_get_index(s, dev_family, &qf->actual_queues); + if (!nb_queues) qf->nb_queues = qf->actual_queues; else qf->nb_queues = nb_queues; - return; + return ret; } void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf) @@ -1669,6 +1724,10 @@ void ff_vk_uninit(FFVulkanContext *s) { FFVulkanFunctions *vk = &s->vkfn; + av_freep(&s->query_props); + av_freep(&s->qf_props); + av_freep(&s->video_props); + if (s->spirv_compiler) s->spirv_compiler->uninit(&s->spirv_compiler); diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 4bd1c9fc004dc..4c38dbc2e6a48 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -216,6 +216,9 @@ typedef struct FFVulkanContext { VkPhysicalDeviceProperties2 props; VkPhysicalDeviceDriverProperties driver_props; VkPhysicalDeviceMemoryProperties mprops; + VkQueueFamilyQueryResultStatusPropertiesKHR *query_props; + VkQueueFamilyVideoPropertiesKHR *video_props; + VkQueueFamilyProperties2 *qf_props; AVBufferRef *device_ref; AVHWDeviceContext *device; @@ -263,7 +266,7 @@ const char *ff_vk_ret2str(VkResult res); /** * Loads props/mprops/driver_props */ -void ff_vk_load_props(FFVulkanContext *s); +int ff_vk_load_props(FFVulkanContext *s); /** * Returns 1 if the image is any sort of supported RGB @@ -288,12 +291,17 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, VkMemoryPropertyFlagBits req_flags, void *alloc_extension, VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem); +/** + * Get a queue family index and the number of queues. nb is optional. + */ +int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb); + /** * Initialize a queue family with a specific number of queues. * If nb_queues == 0, use however many queues the queue family has. */ -void ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, - VkQueueFlagBits dev_family, int nb_queues); +int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, + VkQueueFlagBits dev_family, int nb_queues); /** * Rotate through the queues in a queue family. diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h index 2a7c383dc132c..e06d0978078ac 100644 --- a/libavutil/vulkan_functions.h +++ b/libavutil/vulkan_functions.h @@ -77,6 +77,7 @@ typedef enum FFVulkanExtensions { MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceFormatProperties2) \ MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceImageFormatProperties2) \ MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceQueueFamilyProperties) \ + MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceQueueFamilyProperties2) \ \ /* Command pool */ \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateCommandPool) \ From 3f8f097f0d2953f6c64212feba6466e9a29688bf Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 28 Dec 2022 05:55:53 +0100 Subject: [PATCH 58/98] vulkan: return current queue index from ff_vk_qf_rotate() --- libavutil/vulkan.c | 3 ++- libavutil/vulkan.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 6c9f91c7ef368..0a92f894fa1af 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -240,9 +240,10 @@ int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, return ret; } -void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf) +int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf) { qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues; + return qf->cur_queue; } int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 4c38dbc2e6a48..3f887a782e000 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -306,7 +306,7 @@ int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, /** * Rotate through the queues in a queue family. */ -void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf); +int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf); /** * Create a Vulkan sampler, will be auto-freed in ff_vk_filter_uninit() From c35232b94ad40cc011852f03a7b7fa113f0294f3 Mon Sep 17 00:00:00 2001 From: Lynne Date: Thu, 29 Dec 2022 21:16:21 +0100 Subject: [PATCH 59/98] vulkan: rewrite to support all necessary features This commit rewrites the majority of vulkan.c to enable its use as a general-purpose high-level utility code, usable for decoding, encoding, and filtering of video frames. The dependency system was rewritten to simplify management of execution. The image handling system was rewritten to accomodate multiplane images. Due to how related all the new features were, this is a single commit. --- libavutil/vulkan.c | 2163 ++++++++++++++++++---------------- libavutil/vulkan.h | 516 ++++---- libavutil/vulkan_functions.h | 1 + 3 files changed, 1357 insertions(+), 1323 deletions(-) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 0a92f894fa1af..cff13dcde2a37 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -1,4 +1,6 @@ /* + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -21,33 +23,6 @@ #include "vulkan.h" #include "vulkan_loader.h" -#if CONFIG_LIBGLSLANG -#include "vulkan_glslang.c" -#elif CONFIG_LIBSHADERC -#include "vulkan_shaderc.c" -#endif - -/* Generic macro for creating contexts which need to keep their addresses - * if another context is created. */ -#define FN_CREATING(ctx, type, shortname, array, num) \ -static av_always_inline type *create_ ##shortname(ctx *dctx) \ -{ \ - type **array, *sctx = av_mallocz(sizeof(*sctx)); \ - if (!sctx) \ - return NULL; \ - \ - array = av_realloc_array(dctx->array, sizeof(*dctx->array), dctx->num + 1);\ - if (!array) { \ - av_free(sctx); \ - return NULL; \ - } \ - \ - dctx->array = array; \ - dctx->array[dctx->num++] = sctx; \ - \ - return sctx; \ -} - const VkComponentMapping ff_comp_identity_map = { .r = VK_COMPONENT_SWIZZLE_IDENTITY, .g = VK_COMPONENT_SWIZZLE_IDENTITY, @@ -110,43 +85,50 @@ const char *ff_vk_ret2str(VkResult res) int ff_vk_load_props(FFVulkanContext *s) { - uint32_t qc = 0; FFVulkanFunctions *vk = &s->vkfn; + s->hprops = (VkPhysicalDeviceExternalMemoryHostPropertiesEXT) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT, + }; + s->desc_buf_props = (VkPhysicalDeviceDescriptorBufferPropertiesEXT) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_PROPERTIES_EXT, + .pNext = &s->hprops, + }; s->driver_props = (VkPhysicalDeviceDriverProperties) { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES, + .pNext = &s->desc_buf_props, }; s->props = (VkPhysicalDeviceProperties2) { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, .pNext = &s->driver_props, }; - vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props); vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops); - vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &qc, s->qf_props); if (s->qf_props) return 0; - s->qf_props = av_calloc(qc, sizeof(*s->qf_props)); + vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &s->tot_nb_qfs, NULL); + + s->qf_props = av_calloc(s->tot_nb_qfs, sizeof(*s->qf_props)); if (!s->qf_props) return AVERROR(ENOMEM); - s->query_props = av_calloc(qc, sizeof(*s->query_props)); + s->query_props = av_calloc(s->tot_nb_qfs, sizeof(*s->query_props)); if (!s->qf_props) { av_freep(&s->qf_props); return AVERROR(ENOMEM); } - s->video_props = av_calloc(qc, sizeof(*s->video_props)); + s->video_props = av_calloc(s->tot_nb_qfs, sizeof(*s->video_props)); if (!s->video_props) { av_freep(&s->qf_props); av_freep(&s->query_props); return AVERROR(ENOMEM); } - for (uint32_t i = 0; i < qc; i++) { + for (uint32_t i = 0; i < s->tot_nb_qfs; i++) { s->query_props[i] = (VkQueueFamilyQueryResultStatusPropertiesKHR) { .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR, }; @@ -160,37 +142,12 @@ int ff_vk_load_props(FFVulkanContext *s) }; } - vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &qc, s->qf_props); + vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &s->tot_nb_qfs, s->qf_props); return 0; } -void ff_vk_qf_fill(FFVulkanContext *s) -{ - s->nb_qfs = 0; - - /* Simply fills in all unique queues into s->qfs */ - if (s->hwctx->queue_family_index >= 0) - s->qfs[s->nb_qfs++] = s->hwctx->queue_family_index; - if (!s->nb_qfs || s->qfs[0] != s->hwctx->queue_family_tx_index) - s->qfs[s->nb_qfs++] = s->hwctx->queue_family_tx_index; - if (!s->nb_qfs || (s->qfs[0] != s->hwctx->queue_family_comp_index && - s->qfs[1] != s->hwctx->queue_family_comp_index)) - s->qfs[s->nb_qfs++] = s->hwctx->queue_family_comp_index; - if (s->hwctx->queue_family_decode_index >= 0 && - (s->qfs[0] != s->hwctx->queue_family_decode_index && - s->qfs[1] != s->hwctx->queue_family_decode_index && - s->qfs[2] != s->hwctx->queue_family_decode_index)) - s->qfs[s->nb_qfs++] = s->hwctx->queue_family_decode_index; - if (s->hwctx->queue_family_encode_index >= 0 && - (s->qfs[0] != s->hwctx->queue_family_encode_index && - s->qfs[1] != s->hwctx->queue_family_encode_index && - s->qfs[2] != s->hwctx->queue_family_encode_index && - s->qfs[3] != s->hwctx->queue_family_encode_index)) - s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index; -} - -int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb) +static int vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb) { int ret, num; @@ -226,24 +183,552 @@ int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb) } int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, - VkQueueFlagBits dev_family, int nb_queues) + VkQueueFlagBits dev_family) +{ + /* Fill in queue families from context if not done yet */ + if (!s->nb_qfs) { + s->nb_qfs = 0; + + /* Simply fills in all unique queues into s->qfs */ + if (s->hwctx->queue_family_index >= 0) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_index; + if (!s->nb_qfs || s->qfs[0] != s->hwctx->queue_family_tx_index) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_tx_index; + if (!s->nb_qfs || (s->qfs[0] != s->hwctx->queue_family_comp_index && + s->qfs[1] != s->hwctx->queue_family_comp_index)) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_comp_index; + if (s->hwctx->queue_family_decode_index >= 0 && + (s->qfs[0] != s->hwctx->queue_family_decode_index && + s->qfs[1] != s->hwctx->queue_family_decode_index && + s->qfs[2] != s->hwctx->queue_family_decode_index)) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_decode_index; + if (s->hwctx->queue_family_encode_index >= 0 && + (s->qfs[0] != s->hwctx->queue_family_encode_index && + s->qfs[1] != s->hwctx->queue_family_encode_index && + s->qfs[2] != s->hwctx->queue_family_encode_index && + s->qfs[3] != s->hwctx->queue_family_encode_index)) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index; + } + + return (qf->queue_family = vk_qf_get_index(s, dev_family, &qf->nb_queues)); +} + +void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool) { - int ret; + FFVulkanFunctions *vk = &s->vkfn; - ret = qf->queue_family = ff_vk_qf_get_index(s, dev_family, &qf->actual_queues); + for (int i = 0; i < pool->pool_size; i++) { + FFVkExecContext *e = &pool->contexts[i]; - if (!nb_queues) - qf->nb_queues = qf->actual_queues; - else - qf->nb_queues = nb_queues; + if (e->fence) { + vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX); + vk->DestroyFence(s->hwctx->act_dev, e->fence, s->hwctx->alloc); + } - return ret; + ff_vk_exec_discard_deps(s, e); + + av_free(e->frame_deps); + av_free(e->buf_deps); + av_free(e->queue_family_dst); + av_free(e->layout_dst); + av_free(e->access_dst); + av_free(e->frame_update); + av_free(e->frame_locked); + av_free(e->sem_sig); + av_free(e->sem_sig_val_dst); + av_free(e->sem_wait); + } + + if (pool->cmd_bufs) + vk->FreeCommandBuffers(s->hwctx->act_dev, pool->cmd_buf_pool, + pool->pool_size, pool->cmd_bufs); + if (pool->cmd_buf_pool) + vk->DestroyCommandPool(s->hwctx->act_dev, pool->cmd_buf_pool, s->hwctx->alloc); + if (pool->query_pool) + vk->DestroyQueryPool(s->hwctx->act_dev, pool->query_pool, s->hwctx->alloc); + + av_free(pool->query_data); + av_free(pool->cmd_bufs); + av_free(pool->contexts); +} + +int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, + FFVkExecPool *pool, int nb_contexts, + int nb_queries, VkQueryType query_type, int query_64bit, + const void *query_create_pnext) +{ + int err; + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + + VkCommandPoolCreateInfo cqueue_create; + VkCommandBufferAllocateInfo cbuf_create; + + atomic_init(&pool->idx, 0); + + /* Create command pool */ + cqueue_create = (VkCommandPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | + VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + .queueFamilyIndex = qf->queue_family, + }; + ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create, + s->hwctx->alloc, &pool->cmd_buf_pool); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + + /* Allocate space for command buffers */ + pool->cmd_bufs = av_malloc(nb_contexts*sizeof(*pool->cmd_bufs)); + if (!pool->cmd_bufs) { + err = AVERROR(ENOMEM); + goto fail; + } + + /* Allocate command buffer */ + cbuf_create = (VkCommandBufferAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandPool = pool->cmd_buf_pool, + .commandBufferCount = nb_contexts, + }; + ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, + pool->cmd_bufs); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + + /* Query pool */ + if (nb_queries) { + VkQueryPoolCreateInfo query_pool_info = { + .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, + .pNext = query_create_pnext, + .queryType = query_type, + .queryCount = nb_queries*nb_contexts, + }; + ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info, + s->hwctx->alloc, &pool->query_pool); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Query pool alloc failure: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + + pool->nb_queries = nb_queries; + pool->query_status_stride = 2; + pool->query_results = nb_queries; + pool->query_statuses = 0; /* if radv supports it, nb_queries; */ + +#if CONFIG_VULKAN_ENCODE + /* Video encode quieries produce two results per query */ + if (query_type == VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR) { + pool->query_status_stride = 3; /* skip,skip,result,skip,skip,result */ + pool->query_results *= 2; + } else +#endif + if (query_type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) { + pool->query_status_stride = 1; + pool->query_results = 0; + pool->query_statuses = nb_queries; + } + + pool->qd_size = (pool->query_results + pool->query_statuses)*(query_64bit ? 8 : 4); + + /* Allocate space for the query data */ + pool->query_data = av_calloc(nb_contexts, pool->qd_size); + if (!pool->query_data) { + err = AVERROR(ENOMEM); + goto fail; + } + } + + /* Allocate space for the contexts */ + pool->contexts = av_calloc(nb_contexts, sizeof(*pool->contexts)); + if (!pool->contexts) { + err = AVERROR(ENOMEM); + goto fail; + } + + pool->pool_size = nb_contexts; + + /* Init contexts */ + for (int i = 0; i < pool->pool_size; i++) { + FFVkExecContext *e = &pool->contexts[i]; + + /* Fence */ + VkFenceCreateInfo fence_create = { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + .flags = VK_FENCE_CREATE_SIGNALED_BIT, + }; + ret = vk->CreateFence(s->hwctx->act_dev, &fence_create, s->hwctx->alloc, + &e->fence); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to create submission fence: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + e->idx = i; + e->parent = pool; + + /* Query data */ + e->query_data = ((uint8_t *)pool->query_data) + pool->qd_size*i; + e->query_idx = nb_queries*i; + + /* Command buffer */ + e->buf = pool->cmd_bufs[i]; + + /* Queue index distribution */ + e->qi = i % qf->nb_queues; + e->qf = qf->queue_family; + vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family, + e->qi, &e->queue); + } + + return 0; + +fail: + ff_vk_exec_pool_free(s, pool); + return err; +} + +VkResult ff_vk_exec_get_query(FFVulkanContext *s, FFVkExecContext *e, + void **data, int64_t *status) +{ + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + const FFVkExecPool *pool = e->parent; + + int32_t *res32 = e->query_data; + int64_t *res64 = e->query_data; + int64_t res = 0; + VkQueryResultFlags qf = 0; + + qf |= pool->query_64bit ? + VK_QUERY_RESULT_64_BIT : 0x0; + qf |= pool->query_statuses ? + VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0; + + ret = vk->GetQueryPoolResults(s->hwctx->act_dev, pool->query_pool, + e->query_idx, + pool->nb_queries, + pool->qd_size, e->query_data, + pool->query_64bit ? 8 : 4, qf); + if (ret != VK_SUCCESS) + return ret; + + if (pool->query_statuses && pool->query_64bit) { + for (int i = 0; i < pool->query_statuses; i++) { + res = (res64[i] < res) || (res >= 0 && res64[i] > res) ? + res64[i] : res; + res64 += pool->query_status_stride; + } + } else if (pool->query_statuses) { + for (int i = 0; i < pool->query_statuses; i++) { + res = (res32[i] < res) || (res >= 0 && res32[i] > res) ? + res32[i] : res; + res32 += pool->query_status_stride; + } + } + + if (data) + *data = e->query_data; + if (status) + *status = res; + + return VK_SUCCESS; +} + +FFVkExecContext *ff_vk_exec_get(FFVkExecPool *pool) +{ + int idx = atomic_fetch_add_explicit(&pool->idx, 1, memory_order_relaxed); + idx %= pool->pool_size; + return &pool->contexts[idx]; +} + +void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e) +{ + FFVulkanFunctions *vk = &s->vkfn; + vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX); +} + +int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e) +{ + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + const FFVkExecPool *pool = e->parent; + + VkCommandBufferBeginInfo cmd_start = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + }; + + /* Create the fence and don't wait for it initially */ + vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX); + vk->ResetFences(s->hwctx->act_dev, 1, &e->fence); + + /* Discard queue dependencies */ + ff_vk_exec_discard_deps(s, e); + + ret = vk->BeginCommandBuffer(e->buf, &cmd_start); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + if (pool->nb_queries) + vk->CmdResetQueryPool(e->buf, pool->query_pool, + e->query_idx, pool->nb_queries); + + return 0; +} + +void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e) +{ + for (int j = 0; j < e->nb_buf_deps; j++) + av_buffer_unref(&e->buf_deps[j]); + e->nb_buf_deps = 0; + + for (int j = 0; j < e->nb_frame_deps; j++) { + AVFrame *f = e->frame_deps[j]; + if (e->frame_locked[j]) { + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + AVVulkanFramesContext *vkfc = hwfc->hwctx; + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + vkfc->unlock_frame(hwfc, vkf); + e->frame_locked[j] = 0; + } + e->frame_update[j] = 0; + if (f->buf[0]) + av_frame_free(&e->frame_deps[j]); + } + e->nb_frame_deps = 0; + + e->sem_wait_cnt = 0; + e->sem_sig_cnt = 0; + e->sem_sig_val_dst_cnt = 0; +} + +int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e, + AVBufferRef **deps, int nb_deps, int ref) +{ + AVBufferRef **dst = av_fast_realloc(e->buf_deps, &e->buf_deps_alloc_size, + (e->nb_buf_deps + nb_deps) * sizeof(*dst)); + if (!dst) { + ff_vk_exec_discard_deps(s, e); + return AVERROR(ENOMEM); + } + + e->buf_deps = dst; + + for (int i = 0; i < nb_deps; i++) { + e->buf_deps[e->nb_buf_deps] = ref ? av_buffer_ref(deps[i]) : deps[i]; + if (!e->buf_deps[e->nb_buf_deps]) { + ff_vk_exec_discard_deps(s, e); + return AVERROR(ENOMEM); + } + e->nb_buf_deps++; + } + + return 0; +} + +int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, + VkPipelineStageFlagBits2 wait_stage, + VkPipelineStageFlagBits2 signal_stage) +{ + uint8_t *frame_locked; + uint8_t *frame_update; + AVFrame **frame_deps; + VkImageLayout *layout_dst; + uint32_t *queue_family_dst; + VkAccessFlagBits *access_dst; + + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + AVVulkanFramesContext *vkfc = hwfc->hwctx; + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + int nb_images = ff_vk_count_images(vkf); + + /* Don't add duplicates */ + for (int i = 0; i < e->nb_frame_deps; i++) + if (e->frame_deps[i]->data[0] == f->data[0]) + return 1; + +#define ARR_REALLOC(str, arr, alloc_s, cnt) \ + do { \ + arr = av_fast_realloc(str->arr, alloc_s, (cnt + 1)*sizeof(*arr)); \ + if (!arr) { \ + ff_vk_exec_discard_deps(s, e); \ + return AVERROR(ENOMEM); \ + } \ + str->arr = arr; \ + } while (0) + + ARR_REALLOC(e, layout_dst, &e->layout_dst_alloc, e->nb_frame_deps); + ARR_REALLOC(e, queue_family_dst, &e->queue_family_dst_alloc, e->nb_frame_deps); + ARR_REALLOC(e, access_dst, &e->access_dst_alloc, e->nb_frame_deps); + + ARR_REALLOC(e, frame_locked, &e->frame_locked_alloc_size, e->nb_frame_deps); + ARR_REALLOC(e, frame_update, &e->frame_update_alloc_size, e->nb_frame_deps); + ARR_REALLOC(e, frame_deps, &e->frame_deps_alloc_size, e->nb_frame_deps); + + e->frame_deps[e->nb_frame_deps] = f->buf[0] ? av_frame_clone(f) : f; + if (!e->frame_deps[e->nb_frame_deps]) { + ff_vk_exec_discard_deps(s, e); + return AVERROR(ENOMEM); + } + + vkfc->lock_frame(hwfc, vkf); + e->frame_locked[e->nb_frame_deps] = 1; + e->frame_update[e->nb_frame_deps] = 0; + e->nb_frame_deps++; + + for (int i = 0; i < nb_images; i++) { + VkSemaphoreSubmitInfo *sem_wait; + VkSemaphoreSubmitInfo *sem_sig; + uint64_t **sem_sig_val_dst; + + ARR_REALLOC(e, sem_wait, &e->sem_wait_alloc, e->sem_wait_cnt); + ARR_REALLOC(e, sem_sig, &e->sem_sig_alloc, e->sem_sig_cnt); + ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt); + + e->sem_wait[e->sem_wait_cnt++] = (VkSemaphoreSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .semaphore = vkf->sem[i], + .value = vkf->sem_value[i], + .stageMask = wait_stage, + }; + + e->sem_sig[e->sem_sig_cnt++] = (VkSemaphoreSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .semaphore = vkf->sem[i], + .value = vkf->sem_value[i] + 1, + .stageMask = signal_stage, + }; + + e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = &vkf->sem_value[i]; + e->sem_sig_val_dst_cnt++; + } + + return 0; +} + +void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, + VkImageMemoryBarrier2 *bar, uint32_t *nb_img_bar) +{ + int i; + for (i = 0; i < e->nb_frame_deps; i++) + if (e->frame_deps[i]->data[0] == f->data[0]) + break; + av_assert0(i < e->nb_frame_deps); + + /* Don't update duplicates */ + if (nb_img_bar && !e->frame_update[i]) + (*nb_img_bar)++; + + e->queue_family_dst[i] = bar->dstQueueFamilyIndex; + e->access_dst[i] = bar->dstAccessMask; + e->layout_dst[i] = bar->newLayout; + e->frame_update[i] = 1; +} + +int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e, + VkSemaphore *dst, uint64_t *dst_val, + AVFrame *f) +{ + uint64_t **sem_sig_val_dst; + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + + /* Reject unknown frames */ + int i; + for (i = 0; i < e->nb_frame_deps; i++) + if (e->frame_deps[i]->data[0] == f->data[0]) + break; + if (i == e->nb_frame_deps) + return AVERROR(EINVAL); + + ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt); + + *dst = vkf->sem[0]; + *dst_val = vkf->sem_value[0]; + + e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = dst_val; + e->sem_sig_val_dst_cnt++; + + return 0; } -int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf) +int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e) { - qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues; - return qf->cur_queue; + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + VkCommandBufferSubmitInfo cmd_buf_info = (VkCommandBufferSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, + .commandBuffer = e->buf, + }; + VkSubmitInfo2 submit_info = (VkSubmitInfo2) { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, + .pCommandBufferInfos = &cmd_buf_info, + .commandBufferInfoCount = 1, + .pWaitSemaphoreInfos = e->sem_wait, + .waitSemaphoreInfoCount = e->sem_wait_cnt, + .pSignalSemaphoreInfos = e->sem_sig, + .signalSemaphoreInfoCount = e->sem_sig_cnt, + }; + + ret = vk->EndCommandBuffer(e->buf); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", + ff_vk_ret2str(ret)); + ff_vk_exec_discard_deps(s, e); + return AVERROR_EXTERNAL; + } + + s->hwctx->lock_queue(s->device, e->qf, e->qi); + ret = vk->QueueSubmit2(e->queue, 1, &submit_info, e->fence); + s->hwctx->unlock_queue(s->device, e->qf, e->qi); + + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n", + ff_vk_ret2str(ret)); + ff_vk_exec_discard_deps(s, e); + return AVERROR_EXTERNAL; + } + + for (int i = 0; i < e->sem_sig_val_dst_cnt; i++) + *e->sem_sig_val_dst[i] += 1; + + /* Unlock all frames */ + for (int j = 0; j < e->nb_frame_deps; j++) { + if (e->frame_locked[j]) { + AVFrame *f = e->frame_deps[j]; + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + AVVulkanFramesContext *vkfc = hwfc->hwctx; + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + + if (e->frame_update[j]) { + int nb_images = ff_vk_count_images(vkf); + for (int i = 0; i < nb_images; i++) { + vkf->layout[i] = e->layout_dst[j]; + vkf->access[i] = e->access_dst[j]; + vkf->queue_family[i] = e->queue_family_dst[j]; + } + } + vkfc->unlock_frame(hwfc, vkf); + e->frame_locked[j] = 0; + } + } + + return 0; } int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, @@ -322,6 +807,10 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, but should be ok */ }; + VkMemoryAllocateFlagsInfo alloc_flags = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, + .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT, + }; VkBufferMemoryRequirementsInfo2 req_desc = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, }; @@ -351,11 +840,18 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, /* In case the implementation prefers/requires dedicated allocation */ use_ded_mem = ded_req.prefersDedicatedAllocation | ded_req.requiresDedicatedAllocation; - if (use_ded_mem) + if (use_ded_mem) { ded_alloc.buffer = buf->buf; + ded_alloc.pNext = alloc_pNext; + alloc_pNext = &ded_alloc; + } + + if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) { + alloc_flags.pNext = alloc_pNext; + alloc_pNext = &alloc_flags; + } - err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags, - use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext, + err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags, alloc_pNext, &buf->flags, &buf->mem); if (err) return err; @@ -367,27 +863,72 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, return AVERROR_EXTERNAL; } + if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) { + VkBufferDeviceAddressInfo address_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, + .buffer = buf->buf, + }; + buf->address = vk->GetBufferDeviceAddress(s->hwctx->act_dev, &address_info); + } + buf->size = size; return 0; } -int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[], +static void destroy_avvkbuf(void *opaque, uint8_t *data) +{ + FFVulkanContext *s = opaque; + FFVkBuffer *buf = (FFVkBuffer *)data; + ff_vk_free_buf(s, buf); + av_free(buf); +} + +int ff_vk_create_avbuf(FFVulkanContext *s, AVBufferRef **ref, size_t size, + void *pNext, void *alloc_pNext, + VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags) +{ + int err; + AVBufferRef *buf; + FFVkBuffer *vkb = av_mallocz(sizeof(*vkb)); + if (!vkb) + return AVERROR(ENOMEM); + + err = ff_vk_create_buf(s, vkb, size, pNext, alloc_pNext, usage, flags); + if (err < 0) { + av_free(vkb); + return err; + } + + buf = av_buffer_create((uint8_t *)vkb, sizeof(*vkb), destroy_avvkbuf, s, 0); + if (!buf) { + destroy_avvkbuf(s, (uint8_t *)vkb); + return AVERROR(ENOMEM); + } + + *ref = buf; + + return 0; +} + +int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer **buf, uint8_t *mem[], int nb_buffers, int invalidate) { VkResult ret; FFVulkanFunctions *vk = &s->vkfn; - VkMappedMemoryRange *inval_list = NULL; + VkMappedMemoryRange inval_list[64]; int inval_count = 0; for (int i = 0; i < nb_buffers; i++) { - ret = vk->MapMemory(s->hwctx->act_dev, buf[i].mem, 0, - VK_WHOLE_SIZE, 0, (void **)&mem[i]); + void *dst; + ret = vk->MapMemory(s->hwctx->act_dev, buf[i]->mem, 0, + VK_WHOLE_SIZE, 0, &dst); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Failed to map buffer memory: %s\n", ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } + mem[i] = dst; } if (!invalidate) @@ -396,16 +937,12 @@ int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[], for (int i = 0; i < nb_buffers; i++) { const VkMappedMemoryRange ival_buf = { .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - .memory = buf[i].mem, + .memory = buf[i]->mem, .size = VK_WHOLE_SIZE, }; - if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + if (buf[i]->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) continue; - inval_list = av_fast_realloc(s->scratch, &s->scratch_size, - (++inval_count)*sizeof(*inval_list)); - if (!inval_list) - return AVERROR(ENOMEM); - inval_list[inval_count - 1] = ival_buf; + inval_list[inval_count++] = ival_buf; } if (inval_count) { @@ -421,29 +958,25 @@ int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[], return 0; } -int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers, +int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer **buf, int nb_buffers, int flush) { int err = 0; VkResult ret; FFVulkanFunctions *vk = &s->vkfn; - VkMappedMemoryRange *flush_list = NULL; + VkMappedMemoryRange flush_list[64]; int flush_count = 0; if (flush) { for (int i = 0; i < nb_buffers; i++) { const VkMappedMemoryRange flush_buf = { .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - .memory = buf[i].mem, + .memory = buf[i]->mem, .size = VK_WHOLE_SIZE, }; - if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + if (buf[i]->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) continue; - flush_list = av_fast_realloc(s->scratch, &s->scratch_size, - (++flush_count)*sizeof(*flush_list)); - if (!flush_list) - return AVERROR(ENOMEM); - flush_list[flush_count - 1] = flush_buf; + flush_list[flush_count++] = flush_buf; } } @@ -458,7 +991,7 @@ int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers, } for (int i = 0; i < nb_buffers; i++) - vk->UnmapMemory(s->hwctx->act_dev, buf[i].mem); + vk->UnmapMemory(s->hwctx->act_dev, buf[i]->mem); return err; } @@ -470,547 +1003,109 @@ void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf) if (!buf || !s->hwctx) return; + if (buf->mapped_mem) + ff_vk_unmap_buffer(s, buf, 0); if (buf->buf != VK_NULL_HANDLE) vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc); if (buf->mem != VK_NULL_HANDLE) vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc); } -int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *f, int idx, - int width, int height, VkFormat fmt, VkImageTiling tiling, - VkImageUsageFlagBits usage, VkImageCreateFlags flags, - void *create_pnext, VkDeviceMemory *mem, void *alloc_pnext) +static void free_data_buf(void *opaque, uint8_t *data) { - int err; - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - AVVulkanDeviceContext *hwctx = s->hwctx; - - VkExportSemaphoreCreateInfo ext_sem_info = { - .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO, -#ifdef _WIN32 - .handleTypes = IsWindows8OrGreater() - ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT - : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT, -#else - .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, -#endif - }; - - VkSemaphoreTypeCreateInfo sem_type_info = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, -#ifdef _WIN32 - .pNext = s->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM ? &ext_sem_info : NULL, -#else - .pNext = s->extensions & FF_VK_EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL, -#endif - .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE, - .initialValue = 0, - }; - - VkSemaphoreCreateInfo sem_spawn = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, - .pNext = &sem_type_info, - }; - - /* Create the image */ - VkImageCreateInfo create_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .pNext = create_pnext, - .imageType = VK_IMAGE_TYPE_2D, - .format = fmt, - .extent.depth = 1, - .mipLevels = 1, - .arrayLayers = 1, - .flags = flags, - .tiling = tiling, - .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - .usage = usage, - .samples = VK_SAMPLE_COUNT_1_BIT, - .pQueueFamilyIndices = s->qfs, - .queueFamilyIndexCount = s->nb_qfs, - .sharingMode = s->nb_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : - VK_SHARING_MODE_EXCLUSIVE, - }; - - ret = vk->CreateImage(hwctx->act_dev, &create_info, - hwctx->alloc, &f->img[0]); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Image creation failure: %s\n", - ff_vk_ret2str(ret)); - err = AVERROR(EINVAL); - goto fail; - } - - /* Create semaphore */ - ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn, - hwctx->alloc, &f->sem[0]); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to create semaphore: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - f->queue_family[0] = s->nb_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : s->qfs[0]; - f->layout[0] = create_info.initialLayout; - f->access[0] = 0x0; - f->sem_value[0] = 0; - - f->flags = 0x0; - f->tiling = tiling; - - return 0; - -fail: - return err; -} - -int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size, - VkShaderStageFlagBits stage) -{ - VkPushConstantRange *pc; - - pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts), - pl->push_consts_num + 1); - if (!pl->push_consts) - return AVERROR(ENOMEM); - - pc = &pl->push_consts[pl->push_consts_num++]; - memset(pc, 0, sizeof(*pc)); - - pc->stageFlags = stage; - pc->offset = offset; - pc->size = size; - - return 0; + FFVulkanContext *ctx = opaque; + FFVkBuffer *buf = (FFVkBuffer *)data; + ff_vk_free_buf(ctx, buf); + av_free(data); } -FN_CREATING(FFVulkanContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num) -int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx, - FFVkQueueFamilyCtx *qf) +static AVBufferRef *alloc_data_buf(void *opaque, size_t size) { - VkResult ret; - FFVkExecContext *e; - FFVulkanFunctions *vk = &s->vkfn; - - VkCommandPoolCreateInfo cqueue_create = { - .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, - .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, - .queueFamilyIndex = qf->queue_family, - }; - VkCommandBufferAllocateInfo cbuf_create = { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, - .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, - .commandBufferCount = qf->nb_queues, - }; - - e = create_exec_ctx(s); - if (!e) - return AVERROR(ENOMEM); - - e->qf = qf; - - e->queues = av_mallocz(qf->nb_queues * sizeof(*e->queues)); - if (!e->queues) - return AVERROR(ENOMEM); - - e->bufs = av_mallocz(qf->nb_queues * sizeof(*e->bufs)); - if (!e->bufs) - return AVERROR(ENOMEM); - - /* Create command pool */ - ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create, - s->hwctx->alloc, &e->pool); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - cbuf_create.commandPool = e->pool; - - /* Allocate command buffer */ - ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, e->bufs); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - for (int i = 0; i < qf->nb_queues; i++) { - FFVkQueueCtx *q = &e->queues[i]; - vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family, - i % qf->actual_queues, &q->queue); - } - - *ctx = e; + AVBufferRef *ref; + uint8_t *buf = av_mallocz(size); + if (!buf) + return NULL; - return 0; + ref = av_buffer_create(buf, size, free_data_buf, opaque, 0); + if (!ref) + av_free(buf); + return ref; } -int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e, - int nb_queries, VkQueryType type, - int elem_64bits, void *create_pnext) +int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool, + AVBufferRef **buf, VkBufferUsageFlags usage, + void *create_pNext, size_t size, + VkMemoryPropertyFlagBits mem_props) { - VkResult ret; - size_t qd_size; - int nb_results = nb_queries; - int nb_statuses = 0 /* Once RADV has support, = nb_queries */; - int status_stride = 2; - int result_elem_size = elem_64bits ? 8 : 4; - FFVulkanFunctions *vk = &s->vkfn; - VkQueryPoolCreateInfo query_pool_info = { - .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, - .pNext = create_pnext, - .queryType = type, - .queryCount = nb_queries*e->qf->nb_queues, - }; - - if (e->query.pool) - return AVERROR(EINVAL); + int err; + AVBufferRef *ref; + FFVkBuffer *data; - /* Video encode quieries produce two results per query */ - if (type == VK_QUERY_TYPE_VIDEO_ENCODE_BITSTREAM_BUFFER_RANGE_KHR) { - status_stride = 3; /* skip,skip,result,skip,skip,result */ - nb_results *= 2; - } else if (type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) { - status_stride = 1; - nb_results *= 0; + if (!(*buf_pool)) { + *buf_pool = av_buffer_pool_init2(sizeof(FFVkBuffer), ctx, + alloc_data_buf, NULL); + if (!(*buf_pool)) + return AVERROR(ENOMEM); } - qd_size = nb_results*result_elem_size + nb_statuses*result_elem_size; - - e->query.data = av_mallocz(e->qf->nb_queues*qd_size); - if (!e->query.data) + *buf = ref = av_buffer_pool_get(*buf_pool); + if (!ref) return AVERROR(ENOMEM); - ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info, - s->hwctx->alloc, &e->query.pool); - if (ret != VK_SUCCESS) - return AVERROR_EXTERNAL; - - e->query.data_per_queue = qd_size; - e->query.nb_queries = nb_queries; - e->query.nb_results = nb_results; - e->query.nb_statuses = nb_statuses; - e->query.elem_64bits = elem_64bits; - e->query.status_stride = status_stride; - - return 0; -} - -int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e, - int query_idx, void **data, int64_t *status) -{ - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - uint8_t *qd; - int32_t *res32; - int64_t *res64; - int64_t res = 0; - VkQueryResultFlags qf = 0; - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; + data = (FFVkBuffer *)ref->data; + data->stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; + data->access = VK_ACCESS_2_NONE; - if (!q->submitted) { - *data = NULL; + if (data->size >= size) return 0; - } - qd = e->query.data + e->qf->cur_queue*e->query.data_per_queue; - qf |= e->query.nb_results && e->query.nb_statuses ? - VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0; - qf |= e->query.elem_64bits ? VK_QUERY_RESULT_64_BIT : 0x0; - res32 = (int32_t *)(qd + e->query.nb_results*4); - res64 = (int64_t *)(qd + e->query.nb_results*8); - - ret = vk->GetQueryPoolResults(s->hwctx->act_dev, e->query.pool, - query_idx, - e->query.nb_queries, - e->query.data_per_queue, qd, - e->query.elem_64bits ? 8 : 4, qf); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to perform query: %s!\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } + ff_vk_free_buf(ctx, data); + memset(data, 0, sizeof(*data)); - if (e->query.nb_statuses && e->query.elem_64bits) { - for (int i = 0; i < e->query.nb_queries; i++) { - res = (res64[i] < res) || (res >= 0 && res64[i] > res) ? - res64[i] : res; - res64 += e->query.status_stride; - } - } else if (e->query.nb_statuses) { - for (int i = 0; i < e->query.nb_queries; i++) { - res = (res32[i] < res) || (res >= 0 && res32[i] > res) ? - res32[i] : res; - res32 += e->query.status_stride; - } - } + av_log(ctx, AV_LOG_DEBUG, "Allocating buffer of %lu bytes for pool %p\n", + size, *buf_pool); - if (data) - *data = qd; - if (status) - *status = res; - - return 0; -} - -void ff_vk_discard_exec_deps(FFVkExecContext *e) -{ - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; - - for (int j = 0; j < q->nb_buf_deps; j++) - av_buffer_unref(&q->buf_deps[j]); - q->nb_buf_deps = 0; - - for (int j = 0; j < q->nb_frame_deps; j++) - av_frame_free(&q->frame_deps[j]); - q->nb_frame_deps = 0; - - e->sem_wait_cnt = 0; - e->sem_sig_cnt = 0; -} - -int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e) -{ - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; - - VkCommandBufferBeginInfo cmd_start = { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, - }; - - /* Create the fence and don't wait for it initially */ - if (!q->fence) { - VkFenceCreateInfo fence_spawn = { - .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, - }; - ret = vk->CreateFence(s->hwctx->act_dev, &fence_spawn, s->hwctx->alloc, - &q->fence); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to queue frame fence: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } else if (!q->synchronous) { - vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); - vk->ResetFences(s->hwctx->act_dev, 1, &q->fence); - } - - q->synchronous = 0; - - /* Discard queue dependencies */ - ff_vk_discard_exec_deps(e); - - ret = vk->BeginCommandBuffer(e->bufs[e->qf->cur_queue], &cmd_start); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - if (e->query.pool) { - e->query.idx = e->qf->cur_queue*e->query.nb_queries; - vk->CmdResetQueryPool(e->bufs[e->qf->cur_queue], e->query.pool, - e->query.idx, e->query.nb_queries); + err = ff_vk_create_buf(ctx, data, size, + create_pNext, NULL, usage, + mem_props); + if (err < 0) { + av_buffer_unref(&ref); + return err; } - return 0; -} - -VkCommandBuffer ff_vk_get_exec_buf(FFVkExecContext *e) -{ - return e->bufs[e->qf->cur_queue]; -} - -int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame, - VkPipelineStageFlagBits in_wait_dst_flag) -{ - AVFrame **dst; - AVVkFrame *f = (AVVkFrame *)frame->data[0]; - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; - AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data; - int planes = av_pix_fmt_count_planes(fc->sw_format); - - for (int i = 0; i < planes; i++) { - e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc, - (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait)); - if (!e->sem_wait) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - - e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc, - (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst)); - if (!e->sem_wait_dst) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - - e->sem_wait_val = av_fast_realloc(e->sem_wait_val, &e->sem_wait_val_alloc, - (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_val)); - if (!e->sem_wait_val) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - - e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc, - (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig)); - if (!e->sem_sig) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - - e->sem_sig_val = av_fast_realloc(e->sem_sig_val, &e->sem_sig_val_alloc, - (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val)); - if (!e->sem_sig_val) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); + if (mem_props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { + err = ff_vk_map_buffer(ctx, data, &data->mapped_mem, 0); + if (err < 0) { + av_buffer_unref(&ref); + return err; } - - e->sem_sig_val_dst = av_fast_realloc(e->sem_sig_val_dst, &e->sem_sig_val_dst_alloc, - (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val_dst)); - if (!e->sem_sig_val_dst) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - - e->sem_wait[e->sem_wait_cnt] = f->sem[i]; - e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag; - e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i]; - e->sem_wait_cnt++; - - e->sem_sig[e->sem_sig_cnt] = f->sem[i]; - e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i] + 1; - e->sem_sig_val_dst[e->sem_sig_cnt] = &f->sem_value[i]; - e->sem_sig_cnt++; - } - - dst = av_fast_realloc(q->frame_deps, &q->frame_deps_alloc_size, - (q->nb_frame_deps + 1) * sizeof(*dst)); - if (!dst) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); } - q->frame_deps = dst; - q->frame_deps[q->nb_frame_deps] = av_frame_clone(frame); - if (!q->frame_deps[q->nb_frame_deps]) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - q->nb_frame_deps++; - return 0; } -int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e) -{ - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; - - VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = { - .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, - .pWaitSemaphoreValues = e->sem_wait_val, - .pSignalSemaphoreValues = e->sem_sig_val, - .waitSemaphoreValueCount = e->sem_wait_cnt, - .signalSemaphoreValueCount = e->sem_sig_cnt, - }; - - VkSubmitInfo s_info = { - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .pNext = &s_timeline_sem_info, - - .commandBufferCount = 1, - .pCommandBuffers = &e->bufs[e->qf->cur_queue], - - .pWaitSemaphores = e->sem_wait, - .pWaitDstStageMask = e->sem_wait_dst, - .waitSemaphoreCount = e->sem_wait_cnt, - - .pSignalSemaphores = e->sem_sig, - .signalSemaphoreCount = e->sem_sig_cnt, - }; - - ret = vk->EndCommandBuffer(e->bufs[e->qf->cur_queue]); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - s->hwctx->lock_queue((AVHWDeviceContext *)s->device_ref->data, - e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues); - - ret = vk->QueueSubmit(q->queue, 1, &s_info, q->fence); - - s->hwctx->unlock_queue((AVHWDeviceContext *)s->device_ref->data, - e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues); - - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - for (int i = 0; i < e->sem_sig_cnt; i++) - *e->sem_sig_val_dst[i] += 1; - - e->query.idx = e->qf->cur_queue*e->query.nb_queries; - q->submitted = 1; - - return 0; -} - -void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e) -{ - FFVulkanFunctions *vk = &s->vkfn; - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; - if (!q->submitted) - return; - - vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); - vk->ResetFences(s->hwctx->act_dev, 1, &q->fence); - q->synchronous = 1; -} - -int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e, - AVBufferRef **deps, int nb_deps) +int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size, + VkShaderStageFlagBits stage) { - AVBufferRef **dst; - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; - - if (!deps || !nb_deps) - return 0; + VkPushConstantRange *pc; - dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size, - (q->nb_buf_deps + nb_deps) * sizeof(*dst)); - if (!dst) - goto err; + pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts), + pl->push_consts_num + 1); + if (!pl->push_consts) + return AVERROR(ENOMEM); - q->buf_deps = dst; + pc = &pl->push_consts[pl->push_consts_num++]; + memset(pc, 0, sizeof(*pc)); - for (int i = 0; i < nb_deps; i++) { - q->buf_deps[q->nb_buf_deps] = deps[i]; - if (!q->buf_deps[q->nb_buf_deps]) - goto err; - q->nb_buf_deps++; - } + pc->stageFlags = stage; + pc->offset = offset; + pc->size = size; return 0; - -err: - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); } -FN_CREATING(FFVulkanContext, FFVkSampler, sampler, samplers, samplers_num) -FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, - int unnorm_coords, VkFilter filt) +int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler, + int unnorm_coords, VkFilter filt) { VkResult ret; FFVulkanFunctions *vk = &s->vkfn; @@ -1030,22 +1125,15 @@ FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, .unnormalizedCoordinates = unnorm_coords, }; - FFVkSampler *sctx = create_sampler(s); - if (!sctx) - return NULL; - ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info, - s->hwctx->alloc, &sctx->sampler[0]); + s->hwctx->alloc, sampler); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Unable to init sampler: %s\n", ff_vk_ret2str(ret)); - return NULL; + return AVERROR_EXTERNAL; } - for (int i = 1; i < 4; i++) - sctx->sampler[i] = sctx->sampler[0]; - - return sctx; + return 0; } int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt) @@ -1068,79 +1156,139 @@ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt) } typedef struct ImageViewCtx { - VkImageView view; + VkImageView views[AV_NUM_DATA_POINTERS]; + int nb_views; } ImageViewCtx; -static void destroy_imageview(void *opaque, uint8_t *data) +static void destroy_imageviews(void *opaque, uint8_t *data) { FFVulkanContext *s = opaque; FFVulkanFunctions *vk = &s->vkfn; ImageViewCtx *iv = (ImageViewCtx *)data; - vk->DestroyImageView(s->hwctx->act_dev, iv->view, s->hwctx->alloc); + for (int i = 0; i < iv->nb_views; i++) + vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc); + av_free(iv); } -int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e, - VkImageView *v, VkImage img, VkFormat fmt, - const VkComponentMapping map) +int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, + VkImageView views[AV_NUM_DATA_POINTERS], + AVFrame *f) { int err; + VkResult ret; AVBufferRef *buf; FFVulkanFunctions *vk = &s->vkfn; - - VkImageViewCreateInfo imgview_spawn = { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .pNext = NULL, - .image = img, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = fmt, - .components = map, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }; + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + const VkFormat *rep_fmts = av_vkfmt_from_pixfmt(hwfc->sw_format); + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + const int nb_images = ff_vk_count_images(vkf); + const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format); ImageViewCtx *iv = av_mallocz(sizeof(*iv)); + if (!iv) + return AVERROR(ENOMEM); - VkResult ret = vk->CreateImageView(s->hwctx->act_dev, &imgview_spawn, - s->hwctx->alloc, &iv->view); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; + for (int i = 0; i < nb_planes; i++) { + VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_ASPECT_PLANE_0_BIT, + VK_IMAGE_ASPECT_PLANE_1_BIT, + VK_IMAGE_ASPECT_PLANE_2_BIT, }; + + VkImageViewCreateInfo view_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = NULL, + .image = vkf->img[FFMIN(i, nb_images - 1)], + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = rep_fmts[i], + .components = ff_comp_identity_map, + .subresourceRange = { + .aspectMask = plane_aspect[(nb_planes != nb_images) + + i*(nb_planes != nb_images)], + .levelCount = 1, + .layerCount = 1, + }, + }; + + ret = vk->CreateImageView(s->hwctx->act_dev, &view_create_info, + s->hwctx->alloc, &iv->views[i]); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + + iv->nb_views++; } - buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageview, s, 0); + buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageviews, s, 0); if (!buf) { - destroy_imageview(s, (uint8_t *)iv); - return AVERROR(ENOMEM); + err = AVERROR(ENOMEM); + goto fail; } /* Add to queue dependencies */ - err = ff_vk_add_dep_exec_ctx(s, e, &buf, 1); - if (err) { + err = ff_vk_exec_add_dep_buf(s, e, &buf, 1, 0); + if (err < 0) av_buffer_unref(&buf); - return err; - } - *v = iv->view; + memcpy(views, iv->views, nb_planes*sizeof(*views)); - return 0; + return err; + +fail: + for (int i = 0; i < iv->nb_views; i++) + vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc); + av_free(iv); + return err; } -FN_CREATING(FFVulkanPipeline, FFVkSPIRVShader, shader, shaders, shaders_num) -FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name, - VkShaderStageFlags stage) +void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, + AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar, + VkPipelineStageFlags src_stage, + VkPipelineStageFlags dst_stage, + VkAccessFlagBits new_access, + VkImageLayout new_layout, + uint32_t new_qf) { - FFVkSPIRVShader *shd = create_shader(pl); - if (!shd) - return NULL; + int i, found; + AVVkFrame *vkf = (AVVkFrame *)pic->data[0]; + const int nb_images = ff_vk_count_images(vkf); + for (i = 0; i < e->nb_frame_deps; i++) + if (e->frame_deps[i]->data[0] == pic->data[0]) + break; + found = (i < e->nb_frame_deps) && (e->frame_update[i]) ? i : -1; + + for (int i = 0; i < nb_images; i++) { + bar[*nb_bar] = (VkImageMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .pNext = NULL, + .srcStageMask = src_stage, + .dstStageMask = dst_stage, + .srcAccessMask = found >= 0 ? e->access_dst[found] : vkf->access[i], + .dstAccessMask = new_access, + .oldLayout = found >= 0 ? e->layout_dst[found] : vkf->layout[0], + .newLayout = new_layout, + .srcQueueFamilyIndex = found >= 0 ? e->queue_family_dst[found] : vkf->queue_family[0], + .dstQueueFamilyIndex = new_qf, + .image = vkf->img[i], + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .layerCount = 1, + .levelCount = 1, + }, + }; + *nb_bar += 1; + } + + ff_vk_exec_update_frame(s, e, pic, &bar[*nb_bar - nb_images], NULL); +} +int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name, + VkShaderStageFlags stage) +{ av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED); shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; @@ -1151,22 +1299,24 @@ FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name, GLSLF(0, #version %i ,460); GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) ); GLSLC(0, ); + GLSLC(0, #extension GL_EXT_buffer_reference : require ); + GLSLC(0, #extension GL_EXT_buffer_reference2 : require ); - return shd; + return 0; } -void ff_vk_set_compute_shader_sizes(FFVkSPIRVShader *shd, int local_size[3]) +void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int x, int y, int z) { - shd->local_size[0] = local_size[0]; - shd->local_size[1] = local_size[1]; - shd->local_size[2] = local_size[2]; + shd->local_size[0] = x; + shd->local_size[1] = y; + shd->local_size[2] = z; av_bprintf(&shd->src, "layout (local_size_x = %i, " "local_size_y = %i, local_size_z = %i) in;\n\n", shd->local_size[0], shd->local_size[1], shd->local_size[2]); } -void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio) +void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio) { int line = 0; const char *p = shd->src.str; @@ -1188,36 +1338,24 @@ void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio) av_bprint_finalize(&buf, NULL); } -int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd, - const char *entrypoint) +void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd) +{ + FFVulkanFunctions *vk = &s->vkfn; + av_bprint_finalize(&shd->src, NULL); + + if (shd->shader.module) + vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module, s->hwctx->alloc); +} + +int ff_vk_shader_create(FFVulkanContext *s, FFVkSPIRVShader *shd, + uint8_t *spirv, size_t spirv_size, const char *entrypoint) { - int err; VkResult ret; FFVulkanFunctions *vk = &s->vkfn; VkShaderModuleCreateInfo shader_create; - uint8_t *spirv; - size_t spirv_size; - void *priv; shd->shader.pName = entrypoint; - if (!s->spirv_compiler) { -#if CONFIG_LIBGLSLANG - s->spirv_compiler = ff_vk_glslang_init(); -#elif CONFIG_LIBSHADERC - s->spirv_compiler = ff_vk_shaderc_init(); -#else - return AVERROR(ENOSYS); -#endif - if (!s->spirv_compiler) - return AVERROR(ENOMEM); - } - - err = s->spirv_compiler->compile_shader(s->spirv_compiler, s, shd, &spirv, - &spirv_size, entrypoint, &priv); - if (err < 0) - return err; - av_log(s, AV_LOG_VERBOSE, "Shader %s compiled! Size: %zu bytes\n", shd->name, spirv_size); @@ -1229,11 +1367,8 @@ int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd, ret = vk->CreateShaderModule(s->hwctx->act_dev, &shader_create, NULL, &shd->shader.module); - - s->spirv_compiler->free_shader(s->spirv_compiler, &priv); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to create shader module: %s\n", + av_log(s, AV_LOG_VERBOSE, "Error creating shader module: %s\n", ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } @@ -1262,132 +1397,88 @@ static const struct descriptor_props { [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, }, }; -int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, - FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc, - int num, int only_print_to_shader) +int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkSPIRVShader *shd, + FFVulkanDescriptorSetBinding *desc, int nb, + int read_only, int print_to_shader_only) { VkResult ret; - VkDescriptorSetLayout *layout; + int has_sampler = 0; FFVulkanFunctions *vk = &s->vkfn; + FFVulkanDescriptorSet *set; + VkDescriptorSetLayoutCreateInfo desc_create_layout; - if (only_print_to_shader) + if (print_to_shader_only) goto print; - pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout), - pl->desc_layout_num + pl->qf->nb_queues); - if (!pl->desc_layout) + /* Actual layout allocated for the pipeline */ + set = av_realloc_array(pl->desc_set, sizeof(*pl->desc_set), + pl->nb_descriptor_sets + 1); + if (!set) return AVERROR(ENOMEM); + pl->desc_set = set; + set = &set[pl->nb_descriptor_sets]; + memset(set, 0, sizeof(*set)); - pl->desc_set_initialized = av_realloc_array(pl->desc_set_initialized, - sizeof(*pl->desc_set_initialized), - pl->descriptor_sets_num + 1); - if (!pl->desc_set_initialized) + set->binding = av_calloc(nb, sizeof(*set->binding)); + if (!set->binding) return AVERROR(ENOMEM); - pl->desc_set_initialized[pl->descriptor_sets_num] = 0; - layout = &pl->desc_layout[pl->desc_layout_num]; - - { /* Create descriptor set layout descriptions */ - VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 }; - VkDescriptorSetLayoutBinding *desc_binding; - - desc_binding = av_mallocz(sizeof(*desc_binding)*num); - if (!desc_binding) - return AVERROR(ENOMEM); - - for (int i = 0; i < num; i++) { - desc_binding[i].binding = i; - desc_binding[i].descriptorType = desc[i].type; - desc_binding[i].descriptorCount = FFMAX(desc[i].elems, 1); - desc_binding[i].stageFlags = desc[i].stages; - desc_binding[i].pImmutableSamplers = desc[i].sampler ? - desc[i].sampler->sampler : - NULL; - } - - desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - desc_create_layout.pBindings = desc_binding; - desc_create_layout.bindingCount = num; - - for (int i = 0; i < pl->qf->nb_queues; i++) { - ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout, - s->hwctx->alloc, &layout[i]); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to init descriptor set " - "layout: %s\n", ff_vk_ret2str(ret)); - av_free(desc_binding); - return AVERROR_EXTERNAL; - } - } - - av_free(desc_binding); + set->binding_offset = av_calloc(nb, sizeof(*set->binding_offset)); + if (!set->binding_offset) { + av_freep(&set->binding); + return AVERROR(ENOMEM); } - { /* Pool each descriptor by type and update pool counts */ - for (int i = 0; i < num; i++) { - int j; - for (j = 0; j < pl->pool_size_desc_num; j++) - if (pl->pool_size_desc[j].type == desc[i].type) - break; - if (j >= pl->pool_size_desc_num) { - pl->pool_size_desc = av_realloc_array(pl->pool_size_desc, - sizeof(*pl->pool_size_desc), - ++pl->pool_size_desc_num); - if (!pl->pool_size_desc) - return AVERROR(ENOMEM); - memset(&pl->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize)); - } - pl->pool_size_desc[j].type = desc[i].type; - pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1)*pl->qf->nb_queues; - } - } + desc_create_layout = (VkDescriptorSetLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = nb, + .pBindings = set->binding, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT, + }; - { /* Create template creation struct */ - VkDescriptorUpdateTemplateCreateInfo *dt; - VkDescriptorUpdateTemplateEntry *des_entries; + for (int i = 0; i < nb; i++) { + set->binding[i].binding = i; + set->binding[i].descriptorType = desc[i].type; + set->binding[i].descriptorCount = FFMAX(desc[i].elems, 1); + set->binding[i].stageFlags = desc[i].stages; + set->binding[i].pImmutableSamplers = desc[i].samplers; - /* Freed after descriptor set initialization */ - des_entries = av_mallocz(num*sizeof(VkDescriptorUpdateTemplateEntry)); - if (!des_entries) - return AVERROR(ENOMEM); + if (desc[i].type == VK_DESCRIPTOR_TYPE_SAMPLER || + desc[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) + has_sampler |= 1; + } - for (int i = 0; i < num; i++) { - des_entries[i].dstBinding = i; - des_entries[i].descriptorType = desc[i].type; - des_entries[i].descriptorCount = FFMAX(desc[i].elems, 1); - des_entries[i].dstArrayElement = 0; - des_entries[i].offset = ((uint8_t *)desc[i].updater) - (uint8_t *)s; - des_entries[i].stride = descriptor_props[desc[i].type].struct_size; - } + set->usage = VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; + if (has_sampler) + set->usage |= VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT; - pl->desc_template_info = av_realloc_array(pl->desc_template_info, - sizeof(*pl->desc_template_info), - pl->total_descriptor_sets + pl->qf->nb_queues); - if (!pl->desc_template_info) - return AVERROR(ENOMEM); + ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout, + s->hwctx->alloc, &set->layout); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to init descriptor set layout: %s", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } - dt = &pl->desc_template_info[pl->total_descriptor_sets]; - memset(dt, 0, sizeof(*dt)*pl->qf->nb_queues); + vk->GetDescriptorSetLayoutSizeEXT(s->hwctx->act_dev, set->layout, &set->layout_size); - for (int i = 0; i < pl->qf->nb_queues; i++) { - dt[i].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO; - dt[i].templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET; - dt[i].descriptorSetLayout = layout[i]; - dt[i].pDescriptorUpdateEntries = des_entries; - dt[i].descriptorUpdateEntryCount = num; - } - } + set->aligned_size = FFALIGN(set->layout_size, s->desc_buf_props.descriptorBufferOffsetAlignment); - pl->descriptor_sets_num++; + for (int i = 0; i < nb; i++) + vk->GetDescriptorSetLayoutBindingOffsetEXT(s->hwctx->act_dev, set->layout, + i, &set->binding_offset[i]); - pl->desc_layout_num += pl->qf->nb_queues; - pl->total_descriptor_sets += pl->qf->nb_queues; + set->read_only = read_only; + set->nb_bindings = nb; + pl->nb_descriptor_sets++; print: /* Write shader info */ - for (int i = 0; i < num; i++) { + for (int i = 0; i < nb; i++) { const struct descriptor_props *prop = &descriptor_props[desc[i].type]; - GLSLA("layout (set = %i, binding = %i", pl->descriptor_sets_num - 1, i); + GLSLA("layout (set = %i, binding = %i", pl->nb_descriptor_sets - 1, i); if (desc[i].mem_layout) GLSLA(", %s", desc[i].mem_layout); @@ -1412,185 +1503,268 @@ int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, else if (desc[i].elems > 0) GLSLA("[%i]", desc[i].elems); - GLSLA(";\n"); + GLSLA(";"); + GLSLA("\n"); } GLSLA("\n"); return 0; } -void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, - int set_id) +int ff_vk_exec_pipeline_register(FFVulkanContext *s, FFVkExecPool *pool, + FFVulkanPipeline *pl) { - FFVulkanFunctions *vk = &s->vkfn; + int err; - /* If a set has never been updated, update all queues' sets. */ - if (!pl->desc_set_initialized[set_id]) { - for (int i = 0; i < pl->qf->nb_queues; i++) { - int idx = set_id*pl->qf->nb_queues + i; - vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev, - pl->desc_set[idx], - pl->desc_template[idx], - s); - } - pl->desc_set_initialized[set_id] = 1; - return; - } + pl->desc_bind = av_calloc(pl->nb_descriptor_sets, sizeof(*pl->desc_bind)); + if (!pl->desc_bind) + return AVERROR(ENOMEM); + + pl->bound_buffer_indices = av_calloc(pl->nb_descriptor_sets, + sizeof(*pl->bound_buffer_indices)); + if (!pl->bound_buffer_indices) + return AVERROR(ENOMEM); - set_id = set_id*pl->qf->nb_queues + pl->qf->cur_queue; + for (int i = 0; i < pl->nb_descriptor_sets; i++) { + FFVulkanDescriptorSet *set = &pl->desc_set[i]; + int nb = set->read_only ? 1 : pool->pool_size; + + err = ff_vk_create_buf(s, &set->buf, set->aligned_size*nb, + NULL, NULL, set->usage, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + if (err < 0) + return err; + + err = ff_vk_map_buffer(s, &set->buf, &set->desc_mem, 0); + if (err < 0) + return err; + + pl->desc_bind[i] = (VkDescriptorBufferBindingInfoEXT) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT, + .usage = set->usage, + .address = set->buf.address, + }; + + pl->bound_buffer_indices[i] = i; + } - vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev, - pl->desc_set[set_id], - pl->desc_template[set_id], - s); + return 0; } -void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e, - VkShaderStageFlagBits stage, int offset, - size_t size, void *src) +static inline void update_set_descriptor(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanDescriptorSet *set, + int bind_idx, int array_idx, + VkDescriptorGetInfoEXT *desc_get_info, + size_t desc_size) { FFVulkanFunctions *vk = &s->vkfn; + const size_t exec_offset = set->read_only ? 0 : set->aligned_size*e->idx; + void *desc = set->desc_mem + /* Base */ + exec_offset + /* Execution context */ + set->binding_offset[bind_idx] + /* Descriptor binding */ + array_idx*desc_size; /* Array position */ - vk->CmdPushConstants(e->bufs[e->qf->cur_queue], e->bound_pl->pipeline_layout, - stage, offset, size, src); + vk->GetDescriptorEXT(s->hwctx->act_dev, desc_get_info, desc_size, desc); } -int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl) +int ff_vk_set_descriptor_sampler(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkSampler *sampler) { - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - - pl->desc_staging = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_staging)); - if (!pl->desc_staging) - return AVERROR(ENOMEM); + FFVulkanDescriptorSet *desc_set = &pl->desc_set[set]; + VkDescriptorGetInfoEXT desc_get_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, + .type = desc_set->binding[bind].descriptorType, + }; - { /* Init descriptor set pool */ - VkDescriptorPoolCreateInfo pool_create_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - .poolSizeCount = pl->pool_size_desc_num, - .pPoolSizes = pl->pool_size_desc, - .maxSets = pl->total_descriptor_sets, - }; + switch (desc_get_info.type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + desc_get_info.data.pSampler = sampler; + break; + default: + av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n", + set, bind, desc_get_info.type); + return AVERROR(EINVAL); + break; + }; - ret = vk->CreateDescriptorPool(s->hwctx->act_dev, &pool_create_info, - s->hwctx->alloc, &pl->desc_pool); - av_freep(&pl->pool_size_desc); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to init descriptor set " - "pool: %s\n", ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } + update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info, + s->desc_buf_props.samplerDescriptorSize); - { /* Allocate descriptor sets */ - VkDescriptorSetAllocateInfo alloc_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = pl->desc_pool, - .descriptorSetCount = pl->total_descriptor_sets, - .pSetLayouts = pl->desc_layout, - }; + return 0; +} - pl->desc_set = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_set)); - if (!pl->desc_set) - return AVERROR(ENOMEM); +int ff_vk_set_descriptor_image(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkImageView view, VkImageLayout layout, VkSampler sampler) +{ + FFVulkanDescriptorSet *desc_set = &pl->desc_set[set]; + VkDescriptorGetInfoEXT desc_get_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, + .type = desc_set->binding[bind].descriptorType, + }; + VkDescriptorImageInfo desc_img_info = { + .imageView = view, + .sampler = sampler, + .imageLayout = layout, + }; + size_t desc_size; - ret = vk->AllocateDescriptorSets(s->hwctx->act_dev, &alloc_info, - pl->desc_set); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } + switch (desc_get_info.type) { + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + desc_get_info.data.pSampledImage = &desc_img_info; + desc_size = s->desc_buf_props.sampledImageDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + desc_get_info.data.pStorageImage = &desc_img_info; + desc_size = s->desc_buf_props.storageImageDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + desc_get_info.data.pInputAttachmentImage = &desc_img_info; + desc_size = s->desc_buf_props.inputAttachmentDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + desc_get_info.data.pCombinedImageSampler = &desc_img_info; + desc_size = s->desc_buf_props.combinedImageSamplerDescriptorSize; + break; + default: + av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n", + set, bind, desc_get_info.type); + return AVERROR(EINVAL); + break; + }; - { /* Finally create the pipeline layout */ - VkPipelineLayoutCreateInfo spawn_pipeline_layout = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pSetLayouts = (VkDescriptorSetLayout *)pl->desc_staging, - .pushConstantRangeCount = pl->push_consts_num, - .pPushConstantRanges = pl->push_consts, - }; + update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info, desc_size); - for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) - pl->desc_staging[spawn_pipeline_layout.setLayoutCount++] = pl->desc_layout[i]; + return 0; +} - ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout, - s->hwctx->alloc, &pl->pipeline_layout); - av_freep(&pl->push_consts); - pl->push_consts_num = 0; - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } +int ff_vk_set_descriptor_buffer(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkDeviceAddress addr, VkDeviceSize len, VkFormat fmt) +{ + FFVulkanDescriptorSet *desc_set = &pl->desc_set[set]; + VkDescriptorGetInfoEXT desc_get_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, + .type = desc_set->binding[bind].descriptorType, + }; + VkDescriptorAddressInfoEXT desc_buf_info = { + .address = addr, + .range = len, + .format = fmt, + }; + size_t desc_size; - { /* Descriptor template (for tightly packed descriptors) */ - VkDescriptorUpdateTemplateCreateInfo *dt; + switch (desc_get_info.type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + desc_get_info.data.pUniformBuffer = &desc_buf_info; + desc_size = s->desc_buf_props.uniformBufferDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + desc_get_info.data.pStorageBuffer = &desc_buf_info; + desc_size = s->desc_buf_props.storageBufferDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + desc_get_info.data.pUniformTexelBuffer = &desc_buf_info; + desc_size = s->desc_buf_props.uniformTexelBufferDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + desc_get_info.data.pStorageTexelBuffer = &desc_buf_info; + desc_size = s->desc_buf_props.storageTexelBufferDescriptorSize; + break; + default: + av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n", + set, bind, desc_get_info.type); + return AVERROR(EINVAL); + break; + }; - pl->desc_template = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_template)); - if (!pl->desc_template) - return AVERROR(ENOMEM); + update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info, desc_size); - /* Create update templates for the descriptor sets */ - for (int i = 0; i < pl->total_descriptor_sets; i++) { - dt = &pl->desc_template_info[i]; - dt->pipelineLayout = pl->pipeline_layout; - ret = vk->CreateDescriptorUpdateTemplate(s->hwctx->act_dev, - dt, s->hwctx->alloc, - &pl->desc_template[i]); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to init descriptor " - "template: %s\n", ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } + return 0; +} - /* Free the duplicated memory used for the template entries */ - for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) { - dt = &pl->desc_template_info[i]; - av_free((void *)dt->pDescriptorUpdateEntries); - } +void ff_vk_update_descriptor_img_array(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, AVFrame *f, + VkImageView *views, int set, int binding, + VkImageLayout layout, VkSampler sampler) +{ + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format); - av_freep(&pl->desc_template_info); - } + for (int i = 0; i < nb_planes; i++) + ff_vk_set_descriptor_image(s, pl, e, set, binding, i, + views[i], layout, sampler); +} - return 0; +void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanPipeline *pl, + VkShaderStageFlagBits stage, + int offset, size_t size, void *src) +{ + FFVulkanFunctions *vk = &s->vkfn; + vk->CmdPushConstants(e->buf, pl->pipeline_layout, + stage, offset, size, src); } -FN_CREATING(FFVulkanContext, FFVulkanPipeline, pipeline, pipelines, pipelines_num) -FFVulkanPipeline *ff_vk_create_pipeline(FFVulkanContext *s, FFVkQueueFamilyCtx *qf) +static int init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl) { - FFVulkanPipeline *pl = create_pipeline(s); - if (pl) - pl->qf = qf; + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + VkPipelineLayoutCreateInfo pipeline_layout_info; + + VkDescriptorSetLayout *desc_layouts = av_malloc(pl->nb_descriptor_sets* + sizeof(desc_layouts)); + if (!desc_layouts) + return AVERROR(ENOMEM); + + for (int i = 0; i < pl->nb_descriptor_sets; i++) + desc_layouts[i] = pl->desc_set[i].layout; + + /* Finally create the pipeline layout */ + pipeline_layout_info = (VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pSetLayouts = desc_layouts, + .setLayoutCount = pl->nb_descriptor_sets, + .pushConstantRangeCount = pl->push_consts_num, + .pPushConstantRanges = pl->push_consts, + }; + + ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &pipeline_layout_info, + s->hwctx->alloc, &pl->pipeline_layout); + av_free(desc_layouts); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } - return pl; + return 0; } -int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl) +int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkSPIRVShader *shd) { - int i; + int err; VkResult ret; FFVulkanFunctions *vk = &s->vkfn; - VkComputePipelineCreateInfo pipe = { + VkComputePipelineCreateInfo pipeline_create_info; + + err = init_pipeline_layout(s, pl); + if (err < 0) + return err; + + pipeline_create_info = (VkComputePipelineCreateInfo) { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .flags = VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT, .layout = pl->pipeline_layout, + .stage = shd->shader, }; - for (i = 0; i < pl->shaders_num; i++) { - if (pl->shaders[i]->shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) { - pipe.stage = pl->shaders[i]->shader; - break; - } - } - if (i == pl->shaders_num) { - av_log(s, AV_LOG_ERROR, "Can't init compute pipeline, no shader\n"); - return AVERROR(EINVAL); - } - - ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipe, + ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, + &pipeline_create_info, s->hwctx->alloc, &pl->pipeline); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n", @@ -1599,157 +1773,68 @@ int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl) } pl->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE; + pl->wg_size[0] = shd->local_size[0]; + pl->wg_size[1] = shd->local_size[1]; + pl->wg_size[2] = shd->local_size[2]; return 0; } -void ff_vk_bind_pipeline_exec(FFVulkanContext *s, FFVkExecContext *e, +void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e, FFVulkanPipeline *pl) { FFVulkanFunctions *vk = &s->vkfn; + VkDeviceSize offsets[1024]; - vk->CmdBindPipeline(e->bufs[e->qf->cur_queue], pl->bind_point, pl->pipeline); - - for (int i = 0; i < pl->descriptor_sets_num; i++) - pl->desc_staging[i] = pl->desc_set[i*pl->qf->nb_queues + pl->qf->cur_queue]; + /* Bind pipeline */ + vk->CmdBindPipeline(e->buf, pl->bind_point, pl->pipeline); - vk->CmdBindDescriptorSets(e->bufs[e->qf->cur_queue], pl->bind_point, - pl->pipeline_layout, 0, - pl->descriptor_sets_num, - (VkDescriptorSet *)pl->desc_staging, - 0, NULL); + if (pl->nb_descriptor_sets) { + for (int i = 0; i < pl->nb_descriptor_sets; i++) + offsets[i] = pl->desc_set[i].read_only ? 0 : pl->desc_set[i].aligned_size*e->idx; - e->bound_pl = pl; -} - -static void free_exec_ctx(FFVulkanContext *s, FFVkExecContext *e) -{ - FFVulkanFunctions *vk = &s->vkfn; - - /* Make sure all queues have finished executing */ - for (int i = 0; i < e->qf->nb_queues; i++) { - FFVkQueueCtx *q = &e->queues[i]; - - if (q->fence) { - vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); - vk->ResetFences(s->hwctx->act_dev, 1, &q->fence); - } - - /* Free the fence */ - if (q->fence) - vk->DestroyFence(s->hwctx->act_dev, q->fence, s->hwctx->alloc); - - /* Free buffer dependencies */ - for (int j = 0; j < q->nb_buf_deps; j++) - av_buffer_unref(&q->buf_deps[j]); - av_free(q->buf_deps); - - /* Free frame dependencies */ - for (int j = 0; j < q->nb_frame_deps; j++) - av_frame_free(&q->frame_deps[j]); - av_free(q->frame_deps); + /* Bind descriptor buffers */ + vk->CmdBindDescriptorBuffersEXT(e->buf, pl->nb_descriptor_sets, pl->desc_bind); + /* Binding offsets */ + vk->CmdSetDescriptorBufferOffsetsEXT(e->buf, pl->bind_point, pl->pipeline_layout, + 0, pl->nb_descriptor_sets, + pl->bound_buffer_indices, offsets); } - - if (e->bufs) - vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, e->qf->nb_queues, e->bufs); - if (e->pool) - vk->DestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc); - if (e->query.pool) - vk->DestroyQueryPool(s->hwctx->act_dev, e->query.pool, s->hwctx->alloc); - - av_freep(&e->query.data); - av_freep(&e->bufs); - av_freep(&e->queues); - av_freep(&e->sem_sig); - av_freep(&e->sem_sig_val); - av_freep(&e->sem_sig_val_dst); - av_freep(&e->sem_wait); - av_freep(&e->sem_wait_dst); - av_freep(&e->sem_wait_val); - av_free(e); } -static void free_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl) +void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl) { FFVulkanFunctions *vk = &s->vkfn; - for (int i = 0; i < pl->shaders_num; i++) { - FFVkSPIRVShader *shd = pl->shaders[i]; - av_bprint_finalize(&shd->src, NULL); - vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module, - s->hwctx->alloc); - av_free(shd); - } - - vk->DestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc); - vk->DestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout, - s->hwctx->alloc); + if (pl->pipeline) + vk->DestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc); + if (pl->pipeline_layout) + vk->DestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout, + s->hwctx->alloc); - for (int i = 0; i < pl->desc_layout_num; i++) { - if (pl->desc_template && pl->desc_template[i]) - vk->DestroyDescriptorUpdateTemplate(s->hwctx->act_dev, pl->desc_template[i], - s->hwctx->alloc); - if (pl->desc_layout && pl->desc_layout[i]) - vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, pl->desc_layout[i], + for (int i = 0; i < pl->nb_descriptor_sets; i++) { + FFVulkanDescriptorSet *set = &pl->desc_set[i]; + if (set->buf.mem) + ff_vk_unmap_buffer(s, &set->buf, 0); + ff_vk_free_buf(s, &set->buf); + if (set->layout) + vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, set->layout, s->hwctx->alloc); + av_free(set->binding); + av_free(set->binding_offset); } - /* Also frees the descriptor sets */ - if (pl->desc_pool) - vk->DestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool, - s->hwctx->alloc); - - av_freep(&pl->desc_staging); av_freep(&pl->desc_set); - av_freep(&pl->shaders); - av_freep(&pl->desc_layout); - av_freep(&pl->desc_template); - av_freep(&pl->desc_set_initialized); + av_freep(&pl->desc_bind); av_freep(&pl->push_consts); pl->push_consts_num = 0; - - /* Only freed in case of failure */ - av_freep(&pl->pool_size_desc); - if (pl->desc_template_info) { - for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) { - VkDescriptorUpdateTemplateCreateInfo *dt = &pl->desc_template_info[i]; - av_free((void *)dt->pDescriptorUpdateEntries); - } - av_freep(&pl->desc_template_info); - } - - av_free(pl); } void ff_vk_uninit(FFVulkanContext *s) { - FFVulkanFunctions *vk = &s->vkfn; - av_freep(&s->query_props); av_freep(&s->qf_props); av_freep(&s->video_props); - if (s->spirv_compiler) - s->spirv_compiler->uninit(&s->spirv_compiler); - - for (int i = 0; i < s->exec_ctx_num; i++) - free_exec_ctx(s, s->exec_ctx[i]); - av_freep(&s->exec_ctx); - - for (int i = 0; i < s->samplers_num; i++) { - vk->DestroySampler(s->hwctx->act_dev, s->samplers[i]->sampler[0], - s->hwctx->alloc); - av_free(s->samplers[i]); - } - av_freep(&s->samplers); - - for (int i = 0; i < s->pipelines_num; i++) - free_pipeline(s, s->pipelines[i]); - av_freep(&s->pipelines); - - av_freep(&s->scratch); - s->scratch_size = 0; - - av_buffer_unref(&s->device_ref); av_buffer_unref(&s->frames_ref); } diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 3f887a782e000..7f31ced41ddde 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -21,6 +21,8 @@ #define VK_NO_PROTOTYPES +#include + #include "pixdesc.h" #include "bprint.h" #include "hwcontext.h" @@ -28,11 +30,6 @@ #include "hwcontext_vulkan.h" #include "vulkan_loader.h" -#define FF_VK_DEFAULT_USAGE_FLAGS (VK_IMAGE_USAGE_SAMPLED_BIT | \ - VK_IMAGE_USAGE_STORAGE_BIT | \ - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | \ - VK_IMAGE_USAGE_TRANSFER_DST_BIT) - /* GLSL management macros */ #define INDENT(N) INDENT_##N #define INDENT_0 @@ -57,6 +54,8 @@ goto fail; \ } while (0) +#define DUP_SAMPLER(x) { x, x, x, x } + typedef struct FFVkSPIRVShader { const char *name; /* Name for id/debugging purposes */ AVBPrint src; @@ -64,19 +63,6 @@ typedef struct FFVkSPIRVShader { VkPipelineShaderStageCreateInfo shader; } FFVkSPIRVShader; -typedef struct FFVkSPIRVCompiler { - void *priv; - int (*compile_shader)(struct FFVkSPIRVCompiler *ctx, void *avctx, - struct FFVkSPIRVShader *shd, uint8_t **data, - size_t *size, const char *entrypoint, void **opaque); - void (*free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque); - void (*uninit)(struct FFVkSPIRVCompiler **ctx); -} FFVkSPIRVCompiler; - -typedef struct FFVkSampler { - VkSampler sampler[4]; -} FFVkSampler; - typedef struct FFVulkanDescriptorSetBinding { const char *name; VkDescriptorType type; @@ -86,8 +72,7 @@ typedef struct FFVulkanDescriptorSetBinding { uint32_t dimensions; /* Needed for e.g. sampler%iD */ uint32_t elems; /* 0 - scalar, 1 or more - vector */ VkShaderStageFlags stages; - FFVkSampler *sampler; /* Sampler to use for all elems */ - void *updater; /* Pointer to VkDescriptor*Info */ + VkSampler samplers[4]; /* Sampler to use for all elems */ } FFVulkanDescriptorSetBinding; typedef struct FFVkBuffer { @@ -95,119 +80,133 @@ typedef struct FFVkBuffer { VkDeviceMemory mem; VkMemoryPropertyFlagBits flags; size_t size; + VkDeviceAddress address; + + /* Local use only */ + VkPipelineStageFlags2 stage; + VkAccessFlags2 access; + + /* Only valid when allocated via ff_vk_get_pooled_buffer with HOST_VISIBLE */ + uint8_t *mapped_mem; } FFVkBuffer; typedef struct FFVkQueueFamilyCtx { int queue_family; int nb_queues; - int cur_queue; - int actual_queues; } FFVkQueueFamilyCtx; -typedef struct FFVulkanPipeline { - FFVkQueueFamilyCtx *qf; +typedef struct FFVulkanDescriptorSet { + VkDescriptorSetLayout layout; + FFVkBuffer buf; + uint8_t *desc_mem; + VkDeviceSize layout_size; + VkDeviceSize aligned_size; /* descriptorBufferOffsetAlignment */ + VkDeviceSize total_size; /* Once registered to an exec context */ + VkBufferUsageFlags usage; + + VkDescriptorSetLayoutBinding *binding; + VkDeviceSize *binding_offset; + int nb_bindings; + int read_only; +} FFVulkanDescriptorSet; + +typedef struct FFVulkanPipeline { VkPipelineBindPoint bind_point; /* Contexts */ VkPipelineLayout pipeline_layout; VkPipeline pipeline; - /* Shaders */ - FFVkSPIRVShader **shaders; - int shaders_num; - /* Push consts */ VkPushConstantRange *push_consts; int push_consts_num; + /* Workgroup */ + int wg_size[3]; + /* Descriptors */ - VkDescriptorSetLayout *desc_layout; - VkDescriptorPool desc_pool; - VkDescriptorSet *desc_set; -#if VK_USE_64_BIT_PTR_DEFINES == 1 - void **desc_staging; -#else - uint64_t *desc_staging; -#endif - VkDescriptorSetLayoutBinding **desc_binding; - VkDescriptorUpdateTemplate *desc_template; - int *desc_set_initialized; - int desc_layout_num; - int descriptor_sets_num; - int total_descriptor_sets; - int pool_size_desc_num; - - /* Temporary, used to store data in between initialization stages */ - VkDescriptorUpdateTemplateCreateInfo *desc_template_info; - VkDescriptorPoolSize *pool_size_desc; + FFVulkanDescriptorSet *desc_set; + VkDescriptorBufferBindingInfoEXT *desc_bind; + uint32_t *bound_buffer_indices; + int nb_descriptor_sets; } FFVulkanPipeline; -typedef struct FFVkQueueCtx { - VkFence fence; +typedef struct FFVkExecContext { + int idx; + const struct FFVkExecPool *parent; + + /* Queue for the execution context */ VkQueue queue; + int qf; + int qi; + + /* Command buffer for the context */ + VkCommandBuffer buf; + + /* Fence for the command buffer */ + VkFence fence; - int synchronous; - int submitted; + void *query_data; + int query_idx; /* Buffer dependencies */ AVBufferRef **buf_deps; int nb_buf_deps; - int buf_deps_alloc_size; + unsigned int buf_deps_alloc_size; /* Frame dependencies */ AVFrame **frame_deps; + unsigned int frame_deps_alloc_size; int nb_frame_deps; - int frame_deps_alloc_size; -} FFVkQueueCtx; - -typedef struct FFVkExecContext { - FFVkQueueFamilyCtx *qf; - VkCommandPool pool; - VkCommandBuffer *bufs; - FFVkQueueCtx *queues; - - struct { - int idx; - VkQueryPool pool; - uint8_t *data; - - int nb_queries; - int nb_results; - int nb_statuses; - int elem_64bits; - size_t data_per_queue; - int status_stride; - } query; + VkSemaphoreSubmitInfo *sem_wait; + unsigned int sem_wait_alloc; + int sem_wait_cnt; - AVBufferRef ***deps; - int *nb_deps; - int *dep_alloc_size; + VkSemaphoreSubmitInfo *sem_sig; + unsigned int sem_sig_alloc; + int sem_sig_cnt; - FFVulkanPipeline *bound_pl; + uint64_t **sem_sig_val_dst; + unsigned int sem_sig_val_dst_alloc; + int sem_sig_val_dst_cnt; - VkSemaphore *sem_wait; - int sem_wait_alloc; /* Allocated sem_wait */ - int sem_wait_cnt; + uint8_t *frame_locked; + unsigned int frame_locked_alloc_size; - uint64_t *sem_wait_val; - int sem_wait_val_alloc; + VkAccessFlagBits *access_dst; + unsigned int access_dst_alloc; - VkPipelineStageFlagBits *sem_wait_dst; - int sem_wait_dst_alloc; /* Allocated sem_wait_dst */ + VkImageLayout *layout_dst; + unsigned int layout_dst_alloc; - VkSemaphore *sem_sig; - int sem_sig_alloc; /* Allocated sem_sig */ - int sem_sig_cnt; + uint32_t *queue_family_dst; + unsigned int queue_family_dst_alloc; - uint64_t *sem_sig_val; - int sem_sig_val_alloc; - - uint64_t **sem_sig_val_dst; - int sem_sig_val_dst_alloc; + uint8_t *frame_update; + unsigned int frame_update_alloc_size; } FFVkExecContext; +typedef struct FFVkExecPool { + FFVkQueueFamilyCtx *qf; + FFVkExecContext *contexts; + atomic_int_least64_t idx; + + VkCommandPool cmd_buf_pool; + VkCommandBuffer *cmd_bufs; + int pool_size; + + VkQueryPool query_pool; + void *query_data; + int query_results; + int query_statuses; + int query_64bit; + int query_status_stride; + int nb_queries; + size_t qd_size; +} FFVkExecPool; + typedef struct FFVulkanContext { const AVClass *class; /* Filters and encoders use this */ @@ -216,14 +215,17 @@ typedef struct FFVulkanContext { VkPhysicalDeviceProperties2 props; VkPhysicalDeviceDriverProperties driver_props; VkPhysicalDeviceMemoryProperties mprops; + VkPhysicalDeviceExternalMemoryHostPropertiesEXT hprops; + VkPhysicalDeviceDescriptorBufferPropertiesEXT desc_buf_props; VkQueueFamilyQueryResultStatusPropertiesKHR *query_props; VkQueueFamilyVideoPropertiesKHR *video_props; VkQueueFamilyProperties2 *qf_props; + int tot_nb_qfs; - AVBufferRef *device_ref; AVHWDeviceContext *device; AVVulkanDeviceContext *hwctx; + AVBufferRef *input_frames_ref; AVBufferRef *frames_ref; AVHWFramesContext *frames; AVVulkanFramesContext *hwfc; @@ -231,28 +233,11 @@ typedef struct FFVulkanContext { uint32_t qfs[5]; int nb_qfs; - FFVkSPIRVCompiler *spirv_compiler; - /* Properties */ int output_width; int output_height; enum AVPixelFormat output_format; enum AVPixelFormat input_format; - - /* Samplers */ - FFVkSampler **samplers; - int samplers_num; - - /* Exec contexts */ - FFVkExecContext **exec_ctx; - int exec_ctx_num; - - /* Pipelines (each can have 1 shader of each type) */ - FFVulkanPipeline **pipelines; - int pipelines_num; - - void *scratch; /* Scratch memory used only in functions */ - unsigned int scratch_size; } FFVulkanContext; /* Identity mapping - r = r, b = b, g = g, a = a */ @@ -264,244 +249,207 @@ extern const VkComponentMapping ff_comp_identity_map; const char *ff_vk_ret2str(VkResult res); /** - * Loads props/mprops/driver_props - */ -int ff_vk_load_props(FFVulkanContext *s); - -/** - * Returns 1 if the image is any sort of supported RGB + * Returns 1 if pixfmt is a usable RGB format. */ int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt); /** - * Gets the glsl format string for a pixel format + * Returns the format to use for images in shaders. */ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt); /** - * Setup the queue families from the hardware device context. - * Necessary for image creation to work. - */ -void ff_vk_qf_fill(FFVulkanContext *s); - -/** - * Allocate device memory. - */ -int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, - VkMemoryPropertyFlagBits req_flags, void *alloc_extension, - VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem); - -/** - * Get a queue family index and the number of queues. nb is optional. + * Loads props/mprops/driver_props */ -int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb); +int ff_vk_load_props(FFVulkanContext *s); /** - * Initialize a queue family with a specific number of queues. - * If nb_queues == 0, use however many queues the queue family has. + * Chooses a QF and loads it into a context. */ int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, - VkQueueFlagBits dev_family, int nb_queues); - -/** - * Rotate through the queues in a queue family. - */ -int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf); - -/** - * Create a Vulkan sampler, will be auto-freed in ff_vk_filter_uninit() - */ -FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, int unnorm_coords, - VkFilter filt); + VkQueueFlagBits dev_family); /** - * Create an imageview. - * Guaranteed to remain alive until the queue submission has finished executing, - * and will be destroyed after that. + * Allocates/frees an execution pool. + * ff_vk_exec_pool_init_desc() MUST be called if ff_vk_exec_descriptor_set_add() + * has been called. */ -int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e, - VkImageView *v, VkImage img, VkFormat fmt, - const VkComponentMapping map); +int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, + FFVkExecPool *pool, int nb_contexts, + int nb_queries, VkQueryType query_type, int query_64bit, + const void *query_create_pnext); +void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool); /** - * Define a push constant for a given stage into a pipeline. - * Must be called before the pipeline layout has been initialized. + * Retrieve an execution pool. Threadsafe. */ -int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size, - VkShaderStageFlagBits stage); +FFVkExecContext *ff_vk_exec_get(FFVkExecPool *pool); /** - * Inits a pipeline. Everything in it will be auto-freed when calling - * ff_vk_filter_uninit(). + * Performs nb_queries queries and returns their results and statuses. + * Execution must have been waited on to produce valid results. */ -FFVulkanPipeline *ff_vk_create_pipeline(FFVulkanContext *s, FFVkQueueFamilyCtx *qf); +VkResult ff_vk_exec_get_query(FFVulkanContext *s, FFVkExecContext *e, + void **data, int64_t *status); /** - * Inits a shader for a specific pipeline. Will be auto-freed on uninit. + * Start/submit/wait an execution. + * ff_vk_exec_start() always waits on a submission, so using ff_vk_exec_wait() + * is not necessary (unless using it is just better). */ -FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name, - VkShaderStageFlags stage); +int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e); +int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e); +void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e); /** - * Writes the workgroup size for a shader. + * Execution dependency management. + * Can attach buffers to executions that will only be unref'd once the + * buffer has finished executing. + * Adding a frame dep will *lock the frame*, until either the dependencies + * are discarded, the execution is submitted, or a failure happens. + * update_frame will update the frame's properties before it is unlocked, + * only if submission was successful. */ -void ff_vk_set_compute_shader_sizes(FFVkSPIRVShader *shd, int local_size[3]); +int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e, + AVBufferRef **deps, int nb_deps, int ref); +int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, + VkPipelineStageFlagBits2 wait_stage, + VkPipelineStageFlagBits2 signal_stage); +void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, + VkImageMemoryBarrier2 *bar, uint32_t *nb_img_bar); +int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e, + VkSemaphore *dst, uint64_t *dst_val, + AVFrame *f); +void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e); /** - * Adds a descriptor set to the shader and registers them in the pipeline. + * Create an imageview and add it as a dependency to an execution. */ -int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, - FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc, - int num, int only_print_to_shader); +int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, + VkImageView views[AV_NUM_DATA_POINTERS], + AVFrame *f); -/** - * Compiles the shader, entrypoint must be set to "main". - */ -int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd, - const char *entrypoint); +void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, + AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar, + VkPipelineStageFlags src_stage, + VkPipelineStageFlags dst_stage, + VkAccessFlagBits new_access, + VkImageLayout new_layout, + uint32_t new_qf); /** - * Pretty print shader, mainly used by shader compilers. + * Memory/buffer/image allocation helpers. */ -void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio); - -/** - * Initializes the pipeline layout after all shaders and descriptor sets have - * been finished. - */ -int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl); +int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, + VkMemoryPropertyFlagBits req_flags, void *alloc_extension, + VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem); +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, + void *pNext, void *alloc_pNext, + VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags); +int ff_vk_create_avbuf(FFVulkanContext *s, AVBufferRef **ref, size_t size, + void *pNext, void *alloc_pNext, + VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags); /** - * Initializes a compute pipeline. Will pick the first shader with the - * COMPUTE flag set. + * Buffer management code. */ -int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl); +int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer **buf, uint8_t *mem[], + int nb_buffers, int invalidate); +int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer **buf, int nb_buffers, + int flush); -/** - * Updates a descriptor set via the updaters defined. - * Can be called immediately after pipeline creation, but must be called - * at least once before queue submission. - */ -void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, - int set_id); +static inline int ff_vk_map_buffer(FFVulkanContext *s, FFVkBuffer *buf, uint8_t **mem, + int invalidate) +{ + return ff_vk_map_buffers(s, (FFVkBuffer *[]){ buf }, mem, + 1, invalidate); +} -/** - * Init an execution context for command recording and queue submission. - * WIll be auto-freed on uninit. - */ -int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx, - FFVkQueueFamilyCtx *qf); +static inline int ff_vk_unmap_buffer(FFVulkanContext *s, FFVkBuffer *buf, int flush) +{ + return ff_vk_unmap_buffers(s, (FFVkBuffer *[]){ buf }, 1, flush); +} -/** - * Create a query pool for a command context. - * elem_64bits exists to troll driver devs for compliance. All results - * and statuses returned should be 32 bits, unless this is set, then it's 64bits. - */ -int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e, - int nb_queries, VkQueryType type, - int elem_64bits, void *create_pnext); +void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf); -/** - * Get results for query. - * Returns the status of the query. - * Sets *res to the status of the queries. - */ -int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e, - int query_idx, void **data, int64_t *status); +/** Initialize a pool and create AVBufferRefs containing FFVkBuffer. + * Threadsafe to use. Buffers are automatically mapped on creation if + * VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT is set in mem_props. Users should + * synchronize access themselvesd. Mainly meant for device-local buffers. */ +int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool, + AVBufferRef **buf, VkBufferUsageFlags usage, + void *create_pNext, size_t size, + VkMemoryPropertyFlagBits mem_props); /** - * Begin recording to the command buffer. Previous execution must have been - * completed, which ff_vk_submit_exec_queue() will ensure. + * Create a sampler. */ -int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e); +int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler, + int unnorm_coords, VkFilter filt); /** - * Add a command to bind the completed pipeline and its descriptor sets. - * Must be called after ff_vk_start_exec_recording() and before submission. + * Shader management. */ -void ff_vk_bind_pipeline_exec(FFVulkanContext *s, FFVkExecContext *e, - FFVulkanPipeline *pl); +int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name, + VkShaderStageFlags stage); +void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int x, int y, int z); +void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio); +int ff_vk_shader_create(FFVulkanContext *s, FFVkSPIRVShader *shd, + uint8_t *spirv, size_t spirv_size, const char *entrypoint); +void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd); /** - * Updates push constants. - * Must be called after binding a pipeline if any push constants were defined. + * Add/update push constants for execution. */ +int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size, + VkShaderStageFlagBits stage); void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e, - VkShaderStageFlagBits stage, int offset, - size_t size, void *src); + FFVulkanPipeline *pl, + VkShaderStageFlagBits stage, + int offset, size_t size, void *src); /** - * Gets the command buffer to use for this submission from the exe context. + * Add descriptor to a pipeline. Must be called before pipeline init. */ -VkCommandBuffer ff_vk_get_exec_buf(FFVkExecContext *e); +int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkSPIRVShader *shd, + FFVulkanDescriptorSetBinding *desc, int nb, + int read_only, int print_to_shader_only); -/** - * Adds a generic AVBufferRef as a queue depenency. - */ -int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e, - AVBufferRef **deps, int nb_deps); - -/** - * Discards all queue dependencies - */ -void ff_vk_discard_exec_deps(FFVkExecContext *e); +/* Initialize/free a pipeline. */ +int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkSPIRVShader *shd); +void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl); /** - * Adds a frame as a queue dependency. This also manages semaphore signalling. - * Must be called before submission. + * Register a pipeline with an exec pool. + * Pool may be NULL if all descriptor sets are read-only. */ -int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame, - VkPipelineStageFlagBits in_wait_dst_flag); +int ff_vk_exec_pipeline_register(FFVulkanContext *s, FFVkExecPool *pool, + FFVulkanPipeline *pl); -/** - * Submits a command buffer to the queue for execution. Will not block. - */ -int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e); - -/** - * Wait on a command buffer's execution. Mainly useful for debugging and - * development. - */ -void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e); - -/** - * Create a VkBuffer with the specified parameters. - */ -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, - void *pNext, void *alloc_pNext, - VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags); - -/** - * Maps the buffer to userspace. Set invalidate to 1 if reading the contents - * is necessary. - */ -int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[], - int nb_buffers, int invalidate); - -/** - * Unmaps the buffer from userspace. Set flush to 1 to write and sync. - */ -int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers, - int flush); +/* Bind pipeline */ +void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanPipeline *pl); -/** - * Frees a buffer. - */ -void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf); +/* Update sampler/image/buffer descriptors. e may be NULL for read-only descriptors. */ +int ff_vk_set_descriptor_sampler(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkSampler *sampler); +int ff_vk_set_descriptor_image(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkImageView view, VkImageLayout layout, VkSampler sampler); +int ff_vk_set_descriptor_buffer(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkDeviceAddress addr, VkDeviceSize len, VkFormat fmt); -/** - * Creates an image, allocates and binds memory in the given - * idx value of the dst frame. If mem is non-NULL, then no memory will be - * allocated, but instead the given memory will be bound to the image. - */ -int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *dst, int idx, - int width, int height, VkFormat fmt, VkImageTiling tiling, - VkImageUsageFlagBits usage, VkImageCreateFlags flags, - void *create_pnext, - VkDeviceMemory *mem, void *alloc_pnext); +void ff_vk_update_descriptor_img_array(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, AVFrame *f, + VkImageView *views, int set, int binding, + VkImageLayout layout, VkSampler sampler); /** - * Frees the main Vulkan context. + * Frees main context. */ void ff_vk_uninit(FFVulkanContext *s); diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h index e06d0978078ac..c81e12f27ec06 100644 --- a/libavutil/vulkan_functions.h +++ b/libavutil/vulkan_functions.h @@ -93,6 +93,7 @@ typedef enum FFVulkanExtensions { /* Queue */ \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetDeviceQueue) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, QueueSubmit) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, QueueSubmit2) \ \ /* Fences */ \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateFence) \ From b03572e07efac6d9f37b9ef3eebf5f5bff097204 Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 11 Jan 2023 09:37:18 +0100 Subject: [PATCH 60/98] vulkan: add ff_vk_count_images() --- libavutil/vulkan.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 7f31ced41ddde..8ea009e4dab63 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -240,6 +240,15 @@ typedef struct FFVulkanContext { enum AVPixelFormat input_format; } FFVulkanContext; +static inline int ff_vk_count_images(AVVkFrame *f) +{ + int cnt = 0; + while (f->img[cnt]) + cnt++; + + return cnt; +} + /* Identity mapping - r = r, b = b, g = g, a = a */ extern const VkComponentMapping ff_comp_identity_map; From cb7c7fe892e744703f5a8b764f82266f117e4038 Mon Sep 17 00:00:00 2001 From: Lynne Date: Mon, 6 Mar 2023 00:19:12 +0100 Subject: [PATCH 61/98] vulkan: enable forcing of full subgroups --- libavutil/vulkan.c | 15 +++++++++++++-- libavutil/vulkan.h | 4 +++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index cff13dcde2a37..6ca361e054275 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -90,9 +90,13 @@ int ff_vk_load_props(FFVulkanContext *s) s->hprops = (VkPhysicalDeviceExternalMemoryHostPropertiesEXT) { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT, }; + s->subgroup_props = (VkPhysicalDeviceSubgroupSizeControlProperties) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES, + .pNext = &s->hprops, + }; s->desc_buf_props = (VkPhysicalDeviceDescriptorBufferPropertiesEXT) { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_PROPERTIES_EXT, - .pNext = &s->hprops, + .pNext = &s->subgroup_props, }; s->driver_props = (VkPhysicalDeviceDriverProperties) { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES, @@ -1287,13 +1291,20 @@ void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, } int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name, - VkShaderStageFlags stage) + VkShaderStageFlags stage, uint32_t required_subgroup_size) { av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED); shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; shd->shader.stage = stage; + if (required_subgroup_size) { + shd->shader.flags |= VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT; + shd->shader.pNext = &shd->subgroup_info; + shd->subgroup_info.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO; + shd->subgroup_info.requiredSubgroupSize = required_subgroup_size; + } + shd->name = name; GLSLF(0, #version %i ,460); diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 8ea009e4dab63..1dc7a61d9774d 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -61,6 +61,7 @@ typedef struct FFVkSPIRVShader { AVBPrint src; int local_size[3]; /* Compute shader workgroup sizes */ VkPipelineShaderStageCreateInfo shader; + VkPipelineShaderStageRequiredSubgroupSizeCreateInfo subgroup_info; } FFVkSPIRVShader; typedef struct FFVulkanDescriptorSetBinding { @@ -217,6 +218,7 @@ typedef struct FFVulkanContext { VkPhysicalDeviceMemoryProperties mprops; VkPhysicalDeviceExternalMemoryHostPropertiesEXT hprops; VkPhysicalDeviceDescriptorBufferPropertiesEXT desc_buf_props; + VkPhysicalDeviceSubgroupSizeControlProperties subgroup_props; VkQueueFamilyQueryResultStatusPropertiesKHR *query_props; VkQueueFamilyVideoPropertiesKHR *video_props; VkQueueFamilyProperties2 *qf_props; @@ -400,7 +402,7 @@ int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler, * Shader management. */ int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name, - VkShaderStageFlags stage); + VkShaderStageFlags stage, uint32_t required_subgroup_size); void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int x, int y, int z); void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio); int ff_vk_shader_create(FFVulkanContext *s, FFVkSPIRVShader *shd, From 27b52867bd2bf075d71e2910b72bf344b22a6d88 Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 24 Mar 2023 02:22:06 +0100 Subject: [PATCH 62/98] vulkan: make GLSL macro functions semicolumn-safe --- libavutil/vulkan.h | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 1dc7a61d9774d..0831219d8fd31 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -40,12 +40,28 @@ #define INDENT_5 INDENT_4 INDENT_1 #define INDENT_6 INDENT_5 INDENT_1 #define C(N, S) INDENT(N) #S "\n" -#define GLSLC(N, S) av_bprintf(&shd->src, C(N, S)) -#define GLSLA(...) av_bprintf(&shd->src, __VA_ARGS__) -#define GLSLF(N, S, ...) av_bprintf(&shd->src, C(N, S), __VA_ARGS__) -#define GLSLD(D) GLSLC(0, ); \ - av_bprint_append_data(&shd->src, D, strlen(D)); \ - GLSLC(0, ) + +#define GLSLC(N, S) \ + do { \ + av_bprintf(&shd->src, C(N, S)); \ + } while (0) + +#define GLSLA(...) \ + do { \ + av_bprintf(&shd->src, __VA_ARGS__); \ + } while (0) + +#define GLSLF(N, S, ...) \ + do { \ + av_bprintf(&shd->src, C(N, S), __VA_ARGS__); \ + } while (0) + +#define GLSLD(D) \ + do { \ + av_bprintf(&shd->src, "\n"); \ + av_bprint_append_data(&shd->src, D, strlen(D)); \ + av_bprintf(&shd->src, "\n"); \ + } while (0) /* Helper, pretty much every Vulkan return value needs to be checked */ #define RET(x) \ From 083d7e2a005b31bf232e814e8f45e35035dc429e Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 11 Jan 2023 09:37:35 +0100 Subject: [PATCH 63/98] hwcontext_vulkan: rewrite to support multiplane surfaces This commit adds proper handling of multiplane images throughout all of the hwcontext code. To avoid breakage of individual components, the change is performed as a single commit. --- libavutil/hwcontext_vulkan.c | 791 +++++++++++++++++++---------------- libavutil/hwcontext_vulkan.h | 73 ++-- 2 files changed, 474 insertions(+), 390 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 28255929255ba..622dd811de19d 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -1,4 +1,6 @@ /* + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -67,6 +69,8 @@ typedef struct VulkanQueueCtx { VkFence fence; VkQueue queue; int was_synchronous; + int qf; + int qidx; /* Buffer dependencies */ AVBufferRef **buf_deps; @@ -116,6 +120,9 @@ typedef struct VulkanDevicePriv { /* Option to allocate all image planes in a single allocation */ int contiguous_planes; + /* Disable multiplane images */ + int disable_multiplane; + /* Nvidia */ int dev_is_nvidia; } VulkanDevicePriv; @@ -150,112 +157,207 @@ typedef struct AVVkFrameInternal { #endif } AVVkFrameInternal; -#define ADD_VAL_TO_LIST(list, count, val) \ - do { \ - list = av_realloc_array(list, sizeof(*list), ++count); \ - if (!list) { \ - err = AVERROR(ENOMEM); \ - goto fail; \ - } \ - list[count - 1] = av_strdup(val); \ - if (!list[count - 1]) { \ - err = AVERROR(ENOMEM); \ - goto fail; \ - } \ - } while(0) +#define ASPECT_2PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT) +#define ASPECT_3PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT) -#define RELEASE_PROPS(props, count) \ - if (props) { \ - for (int i = 0; i < count; i++) \ - av_free((void *)((props)[i])); \ - av_free((void *)props); \ - } - -static const struct { +static const struct FFVkFormatEntry { + VkFormat vkf; enum AVPixelFormat pixfmt; - const VkFormat vkfmts[5]; -} vk_pixfmt_planar_map[] = { - { AV_PIX_FMT_GRAY8, { VK_FORMAT_R8_UNORM } }, - { AV_PIX_FMT_GRAY16, { VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } }, - - { AV_PIX_FMT_NV12, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, - { AV_PIX_FMT_NV21, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, - { AV_PIX_FMT_P010, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, - { AV_PIX_FMT_P012, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, - { AV_PIX_FMT_P016, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, - - { AV_PIX_FMT_NV16, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, - - { AV_PIX_FMT_NV24, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, - { AV_PIX_FMT_NV42, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, - - { AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { AV_PIX_FMT_YUV420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUV420P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUV420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - - { AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { AV_PIX_FMT_YUV422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUV422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUV422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - - { AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { AV_PIX_FMT_YUV444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUV444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUV444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - - { AV_PIX_FMT_YUVA420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { AV_PIX_FMT_YUVA420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - /* There is no AV_PIX_FMT_YUVA420P12 */ - { AV_PIX_FMT_YUVA420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - - { AV_PIX_FMT_YUVA422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { AV_PIX_FMT_YUVA422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUVA422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUVA422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - - { AV_PIX_FMT_YUVA444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { AV_PIX_FMT_YUVA444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUVA444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUVA444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - - { AV_PIX_FMT_VUYX, { VK_FORMAT_R8G8B8A8_UNORM } }, - { AV_PIX_FMT_XV36, { VK_FORMAT_R16G16B16A16_UNORM } }, - - { AV_PIX_FMT_BGRA, { VK_FORMAT_B8G8R8A8_UNORM } }, - { AV_PIX_FMT_RGBA, { VK_FORMAT_R8G8B8A8_UNORM } }, - { AV_PIX_FMT_RGB24, { VK_FORMAT_R8G8B8_UNORM } }, - { AV_PIX_FMT_BGR24, { VK_FORMAT_B8G8R8_UNORM } }, - { AV_PIX_FMT_RGB48, { VK_FORMAT_R16G16B16_UNORM } }, - { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } }, - { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } }, - { AV_PIX_FMT_RGB565, { VK_FORMAT_R5G6B5_UNORM_PACK16 } }, - { AV_PIX_FMT_BGR565, { VK_FORMAT_B5G6R5_UNORM_PACK16 } }, - { AV_PIX_FMT_BGR0, { VK_FORMAT_B8G8R8A8_UNORM } }, - { AV_PIX_FMT_RGB0, { VK_FORMAT_R8G8B8A8_UNORM } }, - - /* Lower priority as there's an endianess-dependent overlap between these - * and rgba/bgr0, and PACK32 formats are more limited */ - { AV_PIX_FMT_BGR32, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } }, - { AV_PIX_FMT_0BGR32, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } }, - - { AV_PIX_FMT_X2RGB10, { VK_FORMAT_A2R10G10B10_UNORM_PACK32 } }, - - { AV_PIX_FMT_GBRAP, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { AV_PIX_FMT_GBRAP16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } }, - { AV_PIX_FMT_GBRAPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } }, + VkImageAspectFlags aspect; + int vk_planes; + int nb_images; + int nb_images_fallback; + const VkFormat fallback[5]; +} vk_formats_list[] = { + /* Gray formats */ + { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GRAY8, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY16, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GRAYF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32_SFLOAT } }, + + /* RGB formats */ + { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_XV36, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, + { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGRA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } }, + { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGBA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } }, + { VK_FORMAT_R8G8B8_UNORM, AV_PIX_FMT_RGB24, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8_UNORM } }, + { VK_FORMAT_B8G8R8_UNORM, AV_PIX_FMT_BGR24, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8_UNORM } }, + { VK_FORMAT_R16G16B16_UNORM, AV_PIX_FMT_RGB48, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16_UNORM } }, + { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_RGBA64, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, + { VK_FORMAT_R5G6B5_UNORM_PACK16, AV_PIX_FMT_RGB565, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R5G6B5_UNORM_PACK16 } }, + { VK_FORMAT_B5G6R5_UNORM_PACK16, AV_PIX_FMT_BGR565, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B5G6R5_UNORM_PACK16 } }, + { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGR0, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } }, + { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGB0, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } }, + { VK_FORMAT_A2R10G10B10_UNORM_PACK32, AV_PIX_FMT_X2RGB10, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_A2R10G10B10_UNORM_PACK32 } }, + + /* Planar RGB */ + { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GBRAP, VK_IMAGE_ASPECT_COLOR_BIT, 1, 4, 4, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP16, VK_IMAGE_ASPECT_COLOR_BIT, 1, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRPF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 3, 3, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } }, + { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRAPF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 4, 4, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } }, + + /* Two-plane 420 YUV at 8, 10, 12 and 16 bits */ + { VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, AV_PIX_FMT_NV12, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, + { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P010, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P012, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + { VK_FORMAT_G16_B16R16_2PLANE_420_UNORM, AV_PIX_FMT_P016, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + + /* Two-plane 422 YUV at 8, 10 and 16 bits */ + { VK_FORMAT_G8_B8R8_2PLANE_422_UNORM, AV_PIX_FMT_NV16, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, + { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P210, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P212, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + { VK_FORMAT_G16_B16R16_2PLANE_422_UNORM, AV_PIX_FMT_P216, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + + /* Two-plane 444 YUV at 8, 10 and 16 bits */ + { VK_FORMAT_G8_B8R8_2PLANE_444_UNORM, AV_PIX_FMT_NV24, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, + { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P410, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P412, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + { VK_FORMAT_G16_B16R16_2PLANE_444_UNORM, AV_PIX_FMT_P416, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + + /* Three-plane 420, 422, 444 at 8, 10, 12 and 16 bits */ + { VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + + /* Single plane 422 at 8, 10 and 12 bits */ + { VK_FORMAT_G8B8G8R8_422_UNORM, AV_PIX_FMT_YUYV422, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } }, + { VK_FORMAT_B8G8R8G8_422_UNORM, AV_PIX_FMT_UYVY422, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } }, + { VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16, AV_PIX_FMT_Y210, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, + { VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16, AV_PIX_FMT_Y212, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, }; +static const int nb_vk_formats_list = FF_ARRAY_ELEMS(vk_formats_list); const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p) { - for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_planar_map); i++) - if (vk_pixfmt_planar_map[i].pixfmt == p) - return vk_pixfmt_planar_map[i].vkfmts; + for (int i = 0; i < nb_vk_formats_list; i++) + if (vk_formats_list[i].pixfmt == p) + return vk_formats_list[i].fallback; return NULL; } +static const struct FFVkFormatEntry *vk_find_format_entry(enum AVPixelFormat p) +{ + for (int i = 0; i < nb_vk_formats_list; i++) + if (vk_formats_list[i].pixfmt == p) + return &vk_formats_list[i]; + return NULL; +} + +/* Malitia pura, Khronos */ +#define FN_MAP_TO(dst_t, dst_name, src_t, src_name) \ + static av_unused dst_t map_ ##src_name## _to_ ##dst_name(src_t src) \ + { \ + dst_t dst = 0x0; \ + MAP_TO(VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT, \ + VK_IMAGE_USAGE_SAMPLED_BIT); \ + MAP_TO(VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT, \ + VK_IMAGE_USAGE_TRANSFER_SRC_BIT); \ + MAP_TO(VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT, \ + VK_IMAGE_USAGE_TRANSFER_DST_BIT); \ + MAP_TO(VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT, \ + VK_IMAGE_USAGE_STORAGE_BIT); \ + MAP_TO(VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT, \ + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); \ + MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_DECODE_OUTPUT_BIT_KHR, \ + VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR); \ + MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_DECODE_DPB_BIT_KHR, \ + VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR); \ + MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_ENCODE_DPB_BIT_KHR, \ + VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR); \ + MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_ENCODE_INPUT_BIT_KHR, \ + VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR); \ + return dst; \ + } + +#define MAP_TO(flag1, flag2) if (src & flag2) dst |= flag1; +FN_MAP_TO(VkFormatFeatureFlagBits2, feats, VkImageUsageFlags, usage) +#undef MAP_TO +#define MAP_TO(flag1, flag2) if (src & flag1) dst |= flag2; +FN_MAP_TO(VkImageUsageFlags, usage, VkFormatFeatureFlagBits2, feats) +#undef MAP_TO +#undef FN_MAP_TO + +static int vkfmt_from_pixfmt2(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p, + VkImageTiling tiling, + VkFormat fmts[AV_NUM_DATA_POINTERS], + int *nb_images, VkImageAspectFlags *aspect, + VkImageUsageFlags *supported_usage, int disable_multiplane) +{ + AVVulkanDeviceContext *hwctx = dev_ctx->hwctx; + VulkanDevicePriv *priv = dev_ctx->internal->priv; + FFVulkanFunctions *vk = &priv->vkfn; + + const VkFormatFeatureFlagBits2 basic_flags = VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT | + VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT | + VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT; + + for (int i = 0; i < nb_vk_formats_list; i++) { + if (vk_formats_list[i].pixfmt == p) { + VkFormatProperties2 prop = { + .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, + }; + VkFormatFeatureFlagBits2 feats_primary, feats_secondary; + int basics_primary = 0, basics_secondary = 0; + + vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev, + vk_formats_list[i].vkf, + &prop); + + feats_primary = tiling == VK_IMAGE_TILING_LINEAR ? + prop.formatProperties.linearTilingFeatures : + prop.formatProperties.optimalTilingFeatures; + basics_primary = (feats_primary & basic_flags) == basic_flags; + + if (vk_formats_list[i].vkf != vk_formats_list[i].fallback[0]) { + vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev, + vk_formats_list[i].fallback[0], + &prop); + feats_secondary = tiling == VK_IMAGE_TILING_LINEAR ? + prop.formatProperties.linearTilingFeatures : + prop.formatProperties.optimalTilingFeatures; + basics_secondary = (feats_secondary & basic_flags) == basic_flags; + } else { + basics_secondary = basics_primary; + } + + if (basics_primary && !(disable_multiplane && vk_formats_list[i].vk_planes > 1)) { + if (fmts) + fmts[0] = vk_formats_list[i].vkf; + if (nb_images) + *nb_images = 1; + if (aspect) + *aspect = vk_formats_list[i].aspect; + if (supported_usage) + *supported_usage = map_feats_to_usage(feats_primary); + return 0; + } else if (basics_secondary) { + if (fmts) { + for (int j = 0; j < vk_formats_list[i].nb_images_fallback; j++) + fmts[j] = vk_formats_list[i].fallback[j]; + } + if (nb_images) + *nb_images = vk_formats_list[i].nb_images_fallback; + if (aspect) + *aspect = vk_formats_list[i].aspect; + if (supported_usage) + *supported_usage = map_feats_to_usage(feats_secondary); + return 0; + } else { + return AVERROR(ENOTSUP); + } + } + } + + return AVERROR(EINVAL); +} + static const void *vk_find_struct(const void *chain, VkStructureType stype) { const VkBaseInStructure *in = chain; @@ -281,33 +383,6 @@ static void vk_link_struct(void *chain, void *in) out->pNext = in; } -static int pixfmt_is_supported(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p, - int linear) -{ - AVVulkanDeviceContext *hwctx = dev_ctx->hwctx; - VulkanDevicePriv *priv = dev_ctx->internal->priv; - FFVulkanFunctions *vk = &priv->vkfn; - const VkFormat *fmt = av_vkfmt_from_pixfmt(p); - int planes = av_pix_fmt_count_planes(p); - - if (!fmt) - return 0; - - for (int i = 0; i < planes; i++) { - VkFormatFeatureFlags flags; - VkFormatProperties2 prop = { - .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, - }; - vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev, fmt[i], &prop); - flags = linear ? prop.formatProperties.linearTilingFeatures : - prop.formatProperties.optimalTilingFeatures; - if (!(flags & FF_VK_DEFAULT_USAGE_FLAGS)) - return 0; - } - - return 1; -} - static int load_libvulkan(AVHWDeviceContext *ctx) { AVVulkanDeviceContext *hwctx = ctx->hwctx; @@ -442,6 +517,27 @@ static VkBool32 VKAPI_CALL vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEX return 0; } +#define ADD_VAL_TO_LIST(list, count, val) \ + do { \ + list = av_realloc_array(list, sizeof(*list), ++count); \ + if (!list) { \ + err = AVERROR(ENOMEM); \ + goto fail; \ + } \ + list[count - 1] = av_strdup(val); \ + if (!list[count - 1]) { \ + err = AVERROR(ENOMEM); \ + goto fail; \ + } \ + } while(0) + +#define RELEASE_PROPS(props, count) \ + if (props) { \ + for (int i = 0; i < count; i++) \ + av_free((void *)((props)[i])); \ + av_free((void *)props); \ + } + static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts, const char * const **dst, uint32_t *num, int debug) { @@ -690,6 +786,10 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts) AVVulkanDeviceContext *hwctx = ctx->hwctx; VkApplicationInfo application_info = { .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, + .pApplicationName = "ffmpeg", + .applicationVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR, + LIBAVUTIL_VERSION_MINOR, + LIBAVUTIL_VERSION_MICRO), .pEngineName = "libavutil", .apiVersion = VK_API_VERSION_1_3, .engineVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR, @@ -1173,6 +1273,8 @@ static int create_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd, VulkanQueueCtx *q = &cmd->queues[i]; vk->GetDeviceQueue(hwctx->act_dev, queue_family_index, i, &q->queue); q->was_synchronous = 1; + q->qf = queue_family_index; + q->qidx = i; } return 0; @@ -1308,6 +1410,7 @@ static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd, VkResult ret; VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx]; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; FFVulkanFunctions *vk = &p->vkfn; ret = vk->EndCommandBuffer(cmd->bufs[cmd->cur_queue_idx]); @@ -1321,7 +1424,9 @@ static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd, s_info->pCommandBuffers = &cmd->bufs[cmd->cur_queue_idx]; s_info->commandBufferCount = 1; + hwctx->lock_queue(hwfc->device_ctx, q->qf, q->qidx); ret = vk->QueueSubmit(q->queue, 1, s_info, q->fence); + hwctx->unlock_queue(hwfc->device_ctx, q->qf, q->qidx); if (ret != VK_SUCCESS) { av_log(hwfc, AV_LOG_ERROR, "Queue submission failure: %s\n", vk_ret2str(ret)); @@ -1336,7 +1441,6 @@ static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd, q->was_synchronous = synchronous; if (synchronous) { - AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; vk->WaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); vk->ResetFences(hwctx->act_dev, 1, &q->fence); unref_exec_ctx_deps(hwfc, cmd); @@ -1500,11 +1604,9 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, if (opt_d) p->use_linear_images = strtol(opt_d->value, NULL, 10); - opt_d = av_dict_get(opts, "contiguous_planes", NULL, 0); + opt_d = av_dict_get(opts, "disable_multiplane", NULL, 0); if (opt_d) - p->contiguous_planes = strtol(opt_d->value, NULL, 10); - else - p->contiguous_planes = -1; + p->disable_multiplane = strtol(opt_d->value, NULL, 10); hwctx->enabled_dev_extensions = dev_info.ppEnabledExtensionNames; hwctx->nb_enabled_dev_extensions = dev_info.enabledExtensionCount; @@ -1764,8 +1866,12 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx, int count = 0; VulkanDevicePriv *p = ctx->internal->priv; - for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++) - count += pixfmt_is_supported(ctx, i, p->use_linear_images); + for (enum AVPixelFormat i = 0; i < nb_vk_formats_list; i++) { + count += vkfmt_from_pixfmt2(ctx, vk_formats_list[i].pixfmt, + p->use_linear_images ? VK_IMAGE_TILING_LINEAR : + VK_IMAGE_TILING_OPTIMAL, + NULL, NULL, NULL, NULL, 0) >= 0; + } #if CONFIG_CUDA if (p->dev_is_nvidia) @@ -1778,9 +1884,14 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx, return AVERROR(ENOMEM); count = 0; - for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++) - if (pixfmt_is_supported(ctx, i, p->use_linear_images)) - constraints->valid_sw_formats[count++] = i; + for (enum AVPixelFormat i = 0; i < nb_vk_formats_list; i++) { + if (vkfmt_from_pixfmt2(ctx, vk_formats_list[i].pixfmt, + p->use_linear_images ? VK_IMAGE_TILING_LINEAR : + VK_IMAGE_TILING_OPTIMAL, + NULL, NULL, NULL, NULL, 0) >= 0) { + constraints->valid_sw_formats[count++] = vk_formats_list[i].pixfmt; + } + } #if CONFIG_CUDA if (p->dev_is_nvidia) @@ -1788,8 +1899,8 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx, #endif constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE; - constraints->min_width = 0; - constraints->min_height = 0; + constraints->min_width = 1; + constraints->min_height = 1; constraints->max_width = p->props.properties.limits.maxImageDimension2D; constraints->max_height = p->props.properties.limits.maxImageDimension2D; @@ -1863,7 +1974,7 @@ static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req, static void vulkan_free_internal(AVVkFrame *f) { - AVVkFrameInternal *internal = f->internal; + av_unused AVVkFrameInternal *internal = f->internal; #if CONFIG_CUDA if (internal->cuda_fc_ref) { @@ -1904,17 +2015,22 @@ static void vulkan_frame_free(void *opaque, uint8_t *data) AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; FFVulkanFunctions *vk = &p->vkfn; - int planes = av_pix_fmt_count_planes(hwfc->sw_format); + int nb_images = ff_vk_count_images(f); - /* We could use vkWaitSemaphores, but the validation layer seems to have - * issues tracking command buffer execution state on uninit. */ - vk->DeviceWaitIdle(hwctx->act_dev); + VkSemaphoreWaitInfo sem_wait = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, + .pSemaphores = f->sem, + .pValues = f->sem_value, + .semaphoreCount = nb_images, + }; + + vk->WaitSemaphores(hwctx->act_dev, &sem_wait, UINT64_MAX); vulkan_free_internal(f); - for (int i = 0; i < planes; i++) { - vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); - vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); + for (int i = 0; i < nb_images; i++) { + vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); + vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc); } @@ -1924,30 +2040,25 @@ static void vulkan_frame_free(void *opaque, uint8_t *data) static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f, void *alloc_pnext, size_t alloc_pnext_stride) { - int err; + int img_cnt = 0, err; VkResult ret; AVHWDeviceContext *ctx = hwfc->device_ctx; VulkanDevicePriv *p = ctx->internal->priv; FFVulkanFunctions *vk = &p->vkfn; AVVulkanFramesContext *hwfctx = hwfc->hwctx; - const int planes = av_pix_fmt_count_planes(hwfc->sw_format); VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } }; - VkMemoryRequirements cont_memory_requirements = { 0 }; - int cont_mem_size_list[AV_NUM_DATA_POINTERS] = { 0 }; - int cont_mem_size = 0; - AVVulkanDeviceContext *hwctx = ctx->hwctx; - for (int i = 0; i < planes; i++) { + while (f->img[img_cnt]) { int use_ded_mem; VkImageMemoryRequirementsInfo2 req_desc = { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2, - .image = f->img[i], + .image = f->img[img_cnt], }; VkMemoryDedicatedAllocateInfo ded_alloc = { .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, - .pNext = (void *)(((uint8_t *)alloc_pnext) + i*alloc_pnext_stride), + .pNext = (void *)(((uint8_t *)alloc_pnext) + img_cnt*alloc_pnext_stride), }; VkMemoryDedicatedRequirements ded_req = { .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, @@ -1963,32 +2074,11 @@ static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f, req.memoryRequirements.size = FFALIGN(req.memoryRequirements.size, p->props.properties.limits.minMemoryMapAlignment); - if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY) { - if (ded_req.requiresDedicatedAllocation) { - av_log(hwfc, AV_LOG_ERROR, "Cannot allocate all planes in a single allocation, " - "device requires dedicated image allocation!\n"); - return AVERROR(EINVAL); - } else if (!i) { - cont_memory_requirements = req.memoryRequirements; - } else if (cont_memory_requirements.memoryTypeBits != - req.memoryRequirements.memoryTypeBits) { - av_log(hwfc, AV_LOG_ERROR, "The memory requirements differ between plane 0 " - "and %i, cannot allocate in a single region!\n", - i); - return AVERROR(EINVAL); - } - - cont_mem_size_list[i] = FFALIGN(req.memoryRequirements.size, - req.memoryRequirements.alignment); - cont_mem_size += cont_mem_size_list[i]; - continue; - } - /* In case the implementation prefers/requires dedicated allocation */ use_ded_mem = ded_req.prefersDedicatedAllocation | ded_req.requiresDedicatedAllocation; if (use_ded_mem) - ded_alloc.image = f->img[i]; + ded_alloc.image = f->img[img_cnt]; /* Allocate memory */ if ((err = alloc_mem(ctx, &req.memoryRequirements, @@ -1996,42 +2086,19 @@ static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext, - &f->flags, &f->mem[i]))) + &f->flags, &f->mem[img_cnt]))) return err; - f->size[i] = req.memoryRequirements.size; - bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO; - bind_info[i].image = f->img[i]; - bind_info[i].memory = f->mem[i]; - } - - if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY) { - cont_memory_requirements.size = cont_mem_size; + f->size[img_cnt] = req.memoryRequirements.size; + bind_info[img_cnt].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO; + bind_info[img_cnt].image = f->img[img_cnt]; + bind_info[img_cnt].memory = f->mem[img_cnt]; - /* Allocate memory */ - if ((err = alloc_mem(ctx, &cont_memory_requirements, - f->tiling == VK_IMAGE_TILING_LINEAR ? - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT : - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - (void *)(((uint8_t *)alloc_pnext)), - &f->flags, &f->mem[0]))) - return err; - - f->size[0] = cont_memory_requirements.size; - - for (int i = 0, offset = 0; i < planes; i++) { - bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO; - bind_info[i].image = f->img[i]; - bind_info[i].memory = f->mem[0]; - bind_info[i].memoryOffset = offset; - - f->offset[i] = bind_info[i].memoryOffset; - offset += cont_mem_size_list[i]; - } + img_cnt++; } /* Bind the allocated memory to the images */ - ret = vk->BindImageMemory2(hwctx->act_dev, planes, bind_info); + ret = vk->BindImageMemory2(hwctx->act_dev, img_cnt, bind_info); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n", vk_ret2str(ret)); @@ -2057,11 +2124,10 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, VkImageLayout new_layout; VkAccessFlags2 new_access; AVVulkanFramesContext *vkfc = hwfc->hwctx; - const int planes = av_pix_fmt_count_planes(hwfc->sw_format); VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; FFVulkanFunctions *vk = &p->vkfn; - AVFrame tmp = { .data[0] = (uint8_t *)frame }; uint64_t sem_sig_val[AV_NUM_DATA_POINTERS]; + int nb_images = ff_vk_count_images(frame); VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS] = { 0 }; VkDependencyInfo dep_info; @@ -2069,14 +2135,14 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = { .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, .pSignalSemaphoreValues = sem_sig_val, - .signalSemaphoreValueCount = planes, + .signalSemaphoreValueCount = nb_images, }; VkSubmitInfo s_info = { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .pNext = &s_timeline_sem_info, .pSignalSemaphores = frame->sem, - .signalSemaphoreCount = planes, + .signalSemaphoreCount = nb_images, }; VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS]; @@ -2086,7 +2152,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, vkfc->lock_frame(hwfc, frame); - for (int i = 0; i < planes; i++) { + for (int i = 0; i < nb_images; i++) { wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; sem_sig_val[i] = frame->sem_value[i] + 1; } @@ -2104,10 +2170,10 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, src_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR; dst_qf = VK_QUEUE_FAMILY_IGNORED; s_timeline_sem_info.pWaitSemaphoreValues = frame->sem_value; - s_timeline_sem_info.waitSemaphoreValueCount = planes; + s_timeline_sem_info.waitSemaphoreValueCount = nb_images; s_info.pWaitSemaphores = frame->sem; s_info.pWaitDstStageMask = wait_st; - s_info.waitSemaphoreCount = planes; + s_info.waitSemaphoreCount = nb_images; break; case PREP_MODE_EXTERNAL_EXPORT: new_layout = VK_IMAGE_LAYOUT_GENERAL; @@ -2115,10 +2181,10 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, src_qf = VK_QUEUE_FAMILY_IGNORED; dst_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR; s_timeline_sem_info.pWaitSemaphoreValues = frame->sem_value; - s_timeline_sem_info.waitSemaphoreValueCount = planes; + s_timeline_sem_info.waitSemaphoreValueCount = nb_images; s_info.pWaitSemaphores = frame->sem; s_info.pWaitDstStageMask = wait_st; - s_info.waitSemaphoreCount = planes; + s_info.waitSemaphoreCount = nb_images; break; case PREP_MODE_DECODING_DST: new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR; @@ -2137,7 +2203,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, /* Change the image layout to something more optimal for writes. * This also signals the newly created semaphore, making it usable * for synchronization */ - for (int i = 0; i < planes; i++) { + for (int i = 0; i < nb_images; i++) { img_bar[i] = (VkImageMemoryBarrier2) { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, .pNext = NULL, @@ -2152,8 +2218,8 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, .image = frame->img[i], .subresourceRange = (VkImageSubresourceRange) { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .layerCount = VK_REMAINING_ARRAY_LAYERS, .levelCount = 1, - .layerCount = 1, }, }; @@ -2164,7 +2230,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, vk->CmdPipelineBarrier2(get_buf_exec_ctx(hwfc, ectx), &(VkDependencyInfo) { .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, .pImageMemoryBarriers = img_bar, - .imageMemoryBarrierCount = planes, + .imageMemoryBarrierCount = nb_images, }); err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 0); @@ -2173,7 +2239,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, return err; } -static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format, +static inline void get_plane_wh(uint32_t *w, uint32_t *h, enum AVPixelFormat format, int frame_w, int frame_h, int plane) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format); @@ -2192,17 +2258,17 @@ static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format, static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, VkImageTiling tiling, VkImageUsageFlagBits usage, + VkImageCreateFlags flags, int nb_layers, void *create_pnext) { int err; VkResult ret; + AVVulkanFramesContext *hwfc_vk = hwfc->hwctx; AVHWDeviceContext *ctx = hwfc->device_ctx; VulkanDevicePriv *p = ctx->internal->priv; FFVulkanFunctions *vk = &p->vkfn; AVVulkanDeviceContext *hwctx = ctx->hwctx; - enum AVPixelFormat format = hwfc->sw_format; - const VkFormat *img_fmts = av_vkfmt_from_pixfmt(format); - const int planes = av_pix_fmt_count_planes(format); + AVVulkanFramesContext *frames = hwfc->hwctx; VkExportSemaphoreCreateInfo ext_sem_info = { .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO, @@ -2237,17 +2303,19 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, return AVERROR(ENOMEM); } + // TODO: check witdh and height for alignment in case of multiplanar (must be mod-2 if subsampled) + /* Create the images */ - for (int i = 0; i < planes; i++) { + for (int i = 0; (hwfc_vk->format[i] != VK_FORMAT_UNDEFINED); i++) { VkImageCreateInfo create_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .pNext = create_pnext, .imageType = VK_IMAGE_TYPE_2D, - .format = img_fmts[i], + .format = hwfc_vk->format[i], .extent.depth = 1, .mipLevels = 1, - .arrayLayers = 1, - .flags = VK_IMAGE_CREATE_ALIAS_BIT, + .arrayLayers = nb_layers, + .flags = flags, .tiling = tiling, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, .usage = usage, @@ -2259,7 +2327,7 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, }; get_plane_wh(&create_info.extent.width, &create_info.extent.height, - format, hwfc->width, hwfc->height, i); + hwfc->sw_format, hwfc->width, hwfc->height, i); ret = vk->CreateImage(hwctx->act_dev, &create_info, hwctx->alloc, &f->img[i]); @@ -2368,8 +2436,8 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size) AVVulkanFramesContext *hwctx = hwfc->hwctx; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; VulkanFramesPriv *fp = hwfc->internal->priv; - VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS]; VkExternalMemoryHandleTypeFlags e = 0x0; + VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS]; VkExternalMemoryImageCreateInfo eiinfo = { .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, @@ -2385,10 +2453,6 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size) if (p->extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) try_export_flags(hwfc, &eiinfo.handleTypes, &e, VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT); - - if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS)) - try_export_flags(hwfc, &eiinfo.handleTypes, &e, - VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); #endif for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) { @@ -2397,8 +2461,8 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size) eminfo[i].handleTypes = e; } - err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, - eiinfo.handleTypes ? &eiinfo : NULL); + err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->img_flags, + hwctx->nb_layers, eiinfo.handleTypes ? &eiinfo : NULL); if (err) return NULL; @@ -2461,104 +2525,89 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc) VulkanFramesPriv *fp = hwfc->internal->priv; AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - const VkImageDrmFormatModifierListCreateInfoEXT *modifier_info; - const int has_modifiers = !!(p->extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS); - - /* Default tiling flags */ - hwctx->tiling = hwctx->tiling ? hwctx->tiling : - has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT : - p->use_linear_images ? VK_IMAGE_TILING_LINEAR : - VK_IMAGE_TILING_OPTIMAL; - - if (!hwctx->usage) - hwctx->usage = FF_VK_DEFAULT_USAGE_FLAGS; - - modifier_info = vk_find_struct(hwctx->create_pnext, - VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT); - - /* Get the supported modifiers if the user has not given any. */ - if (has_modifiers && !modifier_info) { - const VkFormat *fmt = av_vkfmt_from_pixfmt(hwfc->sw_format); - VkImageDrmFormatModifierListCreateInfoEXT *modifier_info; - FFVulkanFunctions *vk = &p->vkfn; - VkDrmFormatModifierPropertiesEXT *mod_props; - uint64_t *modifiers; - int modifier_count = 0; - - VkDrmFormatModifierPropertiesListEXT mod_props_list = { - .sType = VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT, - .pNext = NULL, - .drmFormatModifierCount = 0, - .pDrmFormatModifierProperties = NULL, - }; - VkFormatProperties2 prop = { - .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, - .pNext = &mod_props_list, - }; - - /* Get all supported modifiers */ - vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->phys_dev, fmt[0], &prop); - - if (!mod_props_list.drmFormatModifierCount) { - av_log(hwfc, AV_LOG_ERROR, "There are no supported modifiers for the given sw_format\n"); - return AVERROR(EINVAL); - } - - /* Createa structure to hold the modifier list info */ - modifier_info = av_mallocz(sizeof(*modifier_info)); - if (!modifier_info) - return AVERROR(ENOMEM); - - modifier_info->pNext = NULL; - modifier_info->sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT; - - /* Add structure to the image creation pNext chain */ - if (!hwctx->create_pnext) - hwctx->create_pnext = modifier_info; - else - vk_link_struct(hwctx->create_pnext, (void *)modifier_info); + VkImageUsageFlagBits supported_usage; + const struct FFVkFormatEntry *fmt; + int disable_multiplane = p->disable_multiplane || + (hwctx->flags & AV_VK_FRAME_FLAG_DISABLE_MULTIPLANE); - /* Backup the allocated struct to be freed later */ - fp->modifier_info = modifier_info; + /* Defaults */ + if (!hwctx->nb_layers) + hwctx->nb_layers = 1; - /* Allocate list of modifiers */ - modifiers = av_mallocz(mod_props_list.drmFormatModifierCount * - sizeof(*modifiers)); - if (!modifiers) - return AVERROR(ENOMEM); - - modifier_info->pDrmFormatModifiers = modifiers; - - /* Allocate a temporary list to hold all modifiers supported */ - mod_props = av_mallocz(mod_props_list.drmFormatModifierCount * - sizeof(*mod_props)); - if (!mod_props) - return AVERROR(ENOMEM); - - mod_props_list.pDrmFormatModifierProperties = mod_props; + /* VK_IMAGE_TILING_OPTIMAL == 0, can't check for it really */ + if (p->use_linear_images && + (hwctx->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)) + hwctx->tiling = VK_IMAGE_TILING_LINEAR; - /* Finally get all modifiers from the device */ - vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->phys_dev, fmt[0], &prop); - /* Reject any modifiers that don't match our requirements */ - for (int i = 0; i < mod_props_list.drmFormatModifierCount; i++) { - if (!(mod_props[i].drmFormatModifierTilingFeatures & hwctx->usage)) - continue; + fmt = vk_find_format_entry(hwfc->sw_format); + if (!fmt) { + av_log(hwfc, AV_LOG_ERROR, "Unsupported pixel format: %s!\n", + av_get_pix_fmt_name(hwfc->sw_format)); + return AVERROR(EINVAL); + } - modifiers[modifier_count++] = mod_props[i].drmFormatModifier; + if (hwctx->format[0] != VK_FORMAT_UNDEFINED) { + if (hwctx->format[0] != fmt->vkf) { + for (int i = 0; i < fmt->nb_images_fallback; i++) { + if (hwctx->format[i] != fmt->fallback[i]) { + av_log(hwfc, AV_LOG_ERROR, "Incompatible Vulkan format given " + "for the current sw_format %s!\n", + av_get_pix_fmt_name(hwfc->sw_format)); + return AVERROR(EINVAL); + } + } } - if (!modifier_count) { - av_log(hwfc, AV_LOG_ERROR, "None of the given modifiers supports" - " the usage flags!\n"); - av_freep(&mod_props); + /* Check if the sw_format itself is supported */ + err = vkfmt_from_pixfmt2(hwfc->device_ctx, hwfc->sw_format, + hwctx->tiling, NULL, + NULL, NULL, &supported_usage, 0); + if (err < 0) { + av_log(hwfc, AV_LOG_ERROR, "Unsupported sw format: %s!\n", + av_get_pix_fmt_name(hwfc->sw_format)); return AVERROR(EINVAL); } + } else { + err = vkfmt_from_pixfmt2(hwfc->device_ctx, hwfc->sw_format, + hwctx->tiling, hwctx->format, NULL, + NULL, &supported_usage, + disable_multiplane); + if (err < 0) + return err; + } - modifier_info->drmFormatModifierCount = modifier_count; - av_freep(&mod_props); + /* Image usage flags */ + if (!hwctx->usage) { + hwctx->usage = supported_usage & (VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR); + } + + /* Image creation flags. + * Only fill them in automatically if the image is not going to be used as + * a DPB-only image, and we have SAMPLED/STORAGE bits set. */ + if (!hwctx->img_flags) { + int is_lone_dpb = (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) && + !(hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR); + int sampleable = hwctx->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT); + if (sampleable && !is_lone_dpb) { + hwctx->img_flags = VK_IMAGE_CREATE_ALIAS_BIT; + if ((fmt->vk_planes > 1) && (hwctx->format[0] == fmt->vkf)) + hwctx->img_flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT | + VK_IMAGE_CREATE_EXTENDED_USAGE_BIT; + } } + if (!hwctx->lock_frame) + hwctx->lock_frame = lock_frame; + + if (!hwctx->unlock_frame) + hwctx->unlock_frame = unlock_frame; + err = create_exec_ctx(hwfc, &fp->conv_ctx, dev_hwctx->queue_family_comp_index, dev_hwctx->nb_comp_queues); @@ -2577,8 +2626,8 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc) return err; /* Test to see if allocation will fail */ - err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, - hwctx->create_pnext); + err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->img_flags, + hwctx->nb_layers, hwctx->create_pnext); if (err) return err; @@ -2594,11 +2643,6 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc) return AVERROR(ENOMEM); } - if (!hwctx->lock_frame) - hwctx->lock_frame = lock_frame; - if (!hwctx->unlock_frame) - hwctx->unlock_frame = unlock_frame; - return 0; } @@ -2674,7 +2718,7 @@ static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst, const AVFrame *src, int flags) { VkResult ret; - int err, mapped_mem_count = 0, mem_planes = 0; + int err, nb_mem = 0, mapped_mem_count = 0, mem_planes = 0; AVVkFrame *f = (AVVkFrame *)src->data[0]; AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; AVVulkanFramesContext *hwfctx = hwfc->hwctx; @@ -2694,7 +2738,7 @@ static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst, } if (!(f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) || - !(f->tiling == VK_IMAGE_TILING_LINEAR)) { + !(hwfctx->tiling == VK_IMAGE_TILING_LINEAR)) { av_log(hwfc, AV_LOG_ERROR, "Unable to map frame, not host visible " "and linear!\n"); err = AVERROR(EINVAL); @@ -2704,35 +2748,35 @@ static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst, dst->width = src->width; dst->height = src->height; - mem_planes = hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY ? 1 : planes; - for (int i = 0; i < mem_planes; i++) { + for (int i = 0; i < AV_NUM_DATA_POINTERS; i++) + nb_mem += !!f->mem[i]; + + for (int i = 0; i < nb_mem; i++) { ret = vk->MapMemory(hwctx->act_dev, f->mem[i], 0, VK_WHOLE_SIZE, 0, (void **)&dst->data[i]); if (ret != VK_SUCCESS) { - av_log(hwfc, AV_LOG_ERROR, "Failed to map image memory: %s\n", - vk_ret2str(ret)); + av_log(hwfc, AV_LOG_ERROR, "Failed to map %ith frame memory: %s\n", + i, vk_ret2str(ret)); err = AVERROR_EXTERNAL; goto fail; } mapped_mem_count++; } - if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY) { - for (int i = 0; i < planes; i++) - dst->data[i] = dst->data[0] + f->offset[i]; - } + for (int i = 0; i < planes; i++) + dst->data[i] = dst->data[i] + f->offset[i]; /* Check if the memory contents matter */ if (((flags & AV_HWFRAME_MAP_READ) || !(flags & AV_HWFRAME_MAP_OVERWRITE)) && !(f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { VkMappedMemoryRange map_mem_ranges[AV_NUM_DATA_POINTERS] = { { 0 } }; - for (int i = 0; i < planes; i++) { + for (int i = 0; i < nb_mem; i++) { map_mem_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; map_mem_ranges[i].size = VK_WHOLE_SIZE; map_mem_ranges[i].memory = f->mem[i]; } - ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, planes, + ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, nb_mem, map_mem_ranges); if (ret != VK_SUCCESS) { av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate memory: %s\n", @@ -2774,25 +2818,25 @@ static void vulkan_unmap_from_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwma { AVVkFrame *f = hwmap->priv; AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; - const int planes = av_pix_fmt_count_planes(hwfc->sw_format); VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; FFVulkanFunctions *vk = &p->vkfn; + const int nb_images = ff_vk_count_images(f); VkSemaphoreWaitInfo wait_info = { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, .flags = 0x0, .pSemaphores = f->sem, .pValues = f->sem_value, - .semaphoreCount = planes, + .semaphoreCount = nb_images, }; vk->WaitSemaphores(hwctx->act_dev, &wait_info, UINT64_MAX); vulkan_free_internal(f); - for (int i = 0; i < planes; i++) { - vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); - vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); + for (int i = 0; i < nb_images; i++) { + vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); + vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc); } @@ -2843,6 +2887,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f AVVulkanDeviceContext *hwctx = ctx->hwctx; VulkanDevicePriv *p = ctx->internal->priv; FFVulkanFunctions *vk = &p->vkfn; + AVVulkanFramesContext *hwfctx = hwfc->hwctx; VulkanFramesPriv *fp = hwfc->internal->priv; const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0]; VkBindImageMemoryInfo bind_info[AV_DRM_MAX_PLANES]; @@ -2899,8 +2944,8 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f .extent.depth = 1, .mipLevels = 1, .arrayLayers = 1, - .flags = 0x0, /* ALIAS flag is implicit for imported images */ - .tiling = f->tiling, + .flags = 0x0, + .tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */ .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, @@ -2993,7 +3038,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f f->sem_value[i] = 0; } - for (int i = 0; i < desc->nb_objects; i++) { + for (int i = 0; i < desc->nb_layers; i++) { /* Memory requirements */ VkImageMemoryRequirementsInfo2 req_desc = { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2, @@ -3011,9 +3056,13 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f VkMemoryFdPropertiesKHR fdmp = { .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR, }; + /* This assumes that a layer will never be constructed from multiple + * objects. If that was to happen in the real world, this code would + * need to import each plane separately. + */ VkImportMemoryFdInfoKHR idesc = { .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR, - .fd = dup(desc->objects[i].fd), + .fd = dup(desc->objects[desc->layers[i].planes[0].object_index].fd), .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, }; VkMemoryDedicatedAllocateInfo ded_alloc = { @@ -3067,7 +3116,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f bind_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO; bind_info[bind_counts].pNext = planes > 1 ? &plane_info[bind_counts] : NULL; bind_info[bind_counts].image = f->img[i]; - bind_info[bind_counts].memory = f->mem[desc->layers[i].planes[j].object_index]; + bind_info[bind_counts].memory = f->mem[i]; /* Offset is already signalled via pPlaneLayouts above */ bind_info[bind_counts].memoryOffset = 0; @@ -3444,13 +3493,13 @@ static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst, #if CONFIG_LIBDRM #if CONFIG_VAAPI case AV_PIX_FMT_VAAPI: - if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS)) + if (p->extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS) return vulkan_map_from_vaapi(hwfc, dst, src, flags); else return AVERROR(ENOSYS); #endif case AV_PIX_FMT_DRM_PRIME: - if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS)) + if (p->extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS) return vulkan_map_from_drm(hwfc, dst, src, flags); else return AVERROR(ENOSYS); @@ -3630,13 +3679,13 @@ static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst, switch (dst->format) { #if CONFIG_LIBDRM case AV_PIX_FMT_DRM_PRIME: - if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS)) + if (p->extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS) return vulkan_map_to_drm(hwfc, dst, src, flags); else return AVERROR(ENOSYS); #if CONFIG_VAAPI case AV_PIX_FMT_VAAPI: - if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS)) + if (p->extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS) return vulkan_map_to_vaapi(hwfc, dst, src, flags); else return AVERROR(ENOSYS); @@ -3890,7 +3939,9 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f, int bar_num = 0; VkPipelineStageFlagBits sem_wait_dst[AV_NUM_DATA_POINTERS]; - const int planes = av_pix_fmt_count_planes(pix_fmt); + const int nb_images = ff_vk_count_images(frame); + int pixfmt_planes = av_pix_fmt_count_planes(pix_fmt); + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 }; @@ -3903,8 +3954,8 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f, .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, .pWaitSemaphoreValues = frame->sem_value, .pSignalSemaphoreValues = sem_signal_values, - .waitSemaphoreValueCount = planes, - .signalSemaphoreValueCount = planes, + .waitSemaphoreValueCount = nb_images, + .signalSemaphoreValueCount = nb_images, }; VkSubmitInfo s_info = { @@ -3913,8 +3964,8 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f, .pSignalSemaphores = frame->sem, .pWaitSemaphores = frame->sem, .pWaitDstStageMask = sem_wait_dst, - .signalSemaphoreCount = planes, - .waitSemaphoreCount = planes, + .signalSemaphoreCount = nb_images, + .waitSemaphoreCount = nb_images, }; vkfc->lock_frame(hwfc, frame); @@ -3922,11 +3973,11 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f, if ((err = wait_start_exec_ctx(hwfc, ectx))) goto end; - for (int i = 0; i < planes; i++) + for (int i = 0; i < nb_images; i++) sem_signal_values[i] = frame->sem_value[i] + 1; /* Change the image layout to something more optimal for transfers */ - for (int i = 0; i < planes; i++) { + for (int i = 0; i < nb_images; i++) { VkImageLayout new_layout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; VkAccessFlags new_access = to_buf ? VK_ACCESS_TRANSFER_READ_BIT : @@ -3962,13 +4013,20 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f, 0, NULL, 0, NULL, bar_num, img_bar); /* Schedule a copy for each plane */ - for (int i = 0; i < planes; i++) { + for (int i = 0; i < pixfmt_planes; i++) { + int idx = FFMIN(i, nb_images - 1); + VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_ASPECT_PLANE_0_BIT, + VK_IMAGE_ASPECT_PLANE_1_BIT, + VK_IMAGE_ASPECT_PLANE_2_BIT, }; + ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; VkBufferImageCopy buf_reg = { .bufferOffset = buf_offsets[i], .bufferRowLength = buf_stride[i] / desc->comp[i].step, .imageSubresource.layerCount = 1, - .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .imageSubresource.aspectMask = plane_aspect[(pixfmt_planes != nb_images) + + i*(pixfmt_planes != nb_images)], .imageOffset = { 0, 0, 0, }, }; @@ -3979,11 +4037,11 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f, buf_reg.imageExtent = (VkExtent3D){ p_w, p_h, 1, }; if (to_buf) - vk->CmdCopyImageToBuffer(cmd_buf, frame->img[i], frame->layout[i], + vk->CmdCopyImageToBuffer(cmd_buf, frame->img[idx], frame->layout[idx], vkbuf->buf, 1, &buf_reg); else - vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[i], - frame->layout[i], 1, &buf_reg); + vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[idx], + frame->layout[idx], 1, &buf_reg); } /* When uploading, do this asynchronously if the source is refcounted by @@ -4000,7 +4058,7 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f, if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1))) goto end; } - if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, planes))) + if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, pixfmt_planes))) goto end; err = submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref); } else { @@ -4020,6 +4078,7 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, AVVkFrame *f = (AVVkFrame *)vkf->data[0]; AVHWDeviceContext *dev_ctx = hwfc->device_ctx; AVVulkanDeviceContext *hwctx = dev_ctx->hwctx; + AVVulkanFramesContext *fc = hwfc->hwctx; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; FFVulkanFunctions *vk = &p->vkfn; @@ -4042,7 +4101,7 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, return AVERROR(EINVAL); /* For linear, host visiable images */ - if (f->tiling == VK_IMAGE_TILING_LINEAR && + if (fc->tiling == VK_IMAGE_TILING_LINEAR && f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { AVFrame *map = av_frame_alloc(); if (!map) diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h index 48e2e02cf5dcd..13c12a5351ee4 100644 --- a/libavutil/hwcontext_vulkan.h +++ b/libavutil/hwcontext_vulkan.h @@ -164,6 +164,10 @@ typedef enum AVVkFrameFlags { /* DEPRECATED: does nothing. */ AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY = (1ULL << 1), #endif + + /* Disables multiplane images. + * This is required to export/import images from CUDA. */ + AV_VK_FRAME_FLAG_DISABLE_MULTIPLANE = (1ULL << 2), } AVVkFrameFlags; /** @@ -171,26 +175,32 @@ typedef enum AVVkFrameFlags { */ typedef struct AVVulkanFramesContext { /** - * Controls the tiling of allocated frames. If left as optimal tiling, - * then during av_hwframe_ctx_init() will decide based on whether the device - * supports DRM modifiers, or if the linear_images flag is set, otherwise - * will allocate optimally-tiled images. + * Controls the tiling of allocated frames. + * If left as VK_IMAGE_TILING_OPTIMAL (0), will use optimal tiling. + * Can be set to VK_IMAGE_TILING_LINEAR to force linear images, + * or VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT to force DMABUF-backed + * images. + * @note Imported frames from other APIs ignore this. */ VkImageTiling tiling; /** - * Defines extra usage of output frames. If left as 0, the following bits - * are set: TRANSFER_SRC, TRANSFER_DST. SAMPLED and STORAGE. + * Defines extra usage of output frames. If non-zero, all flags MUST be + * supported by the VkFormat. Otherwise, will use supported flags amongst: + * - VK_IMAGE_USAGE_SAMPLED_BIT + * - VK_IMAGE_USAGE_STORAGE_BIT + * - VK_IMAGE_USAGE_TRANSFER_SRC_BIT + * - VK_IMAGE_USAGE_TRANSFER_DST_BIT */ VkImageUsageFlagBits usage; /** * Extension data for image creation. - * If VkImageDrmFormatModifierListCreateInfoEXT is present in the chain, - * and the device supports DRM modifiers, then images will be allocated - * with the specific requested DRM modifiers. + * If DRM tiling is used, a VkImageDrmFormatModifierListCreateInfoEXT structure + * can be added to specify the exact modifier to use. + * * Additional structures may be added at av_hwframe_ctx_init() time, - * which will be freed automatically on uninit(), so users need only free + * which will be freed automatically on uninit(), so users must only free * any structures they've allocated themselves. */ void *create_pnext; @@ -211,6 +221,25 @@ typedef struct AVVulkanFramesContext { */ AVVkFrameFlags flags; + /** + * Flags to set during image creation. If unset, defaults to + * VK_IMAGE_CREATE_ALIAS_BIT. + */ + VkImageCreateFlags img_flags; + + /** + * Vulkan format for each image. MUST be compatible with the pixel format. + * If unset, will be automatically set. + * There are at most two compatible formats for a frame - a multiplane + * format, and a single-plane multi-image format. + */ + VkFormat format[AV_NUM_DATA_POINTERS]; + + /** + * Number of layers each image will have. + */ + int nb_layers; + /** * Locks a frame, preventing other threads from changing frame properties. * If set to NULL, will be set to lavu-internal functions that utilize a @@ -230,14 +259,7 @@ typedef struct AVVulkanFramesContext { } AVVulkanFramesContext; /* - * Frame structure, the VkFormat of the image will always match - * the pool's sw_format. - * All frames, imported or allocated, will be created with the - * VK_IMAGE_CREATE_ALIAS_BIT flag set, so the memory may be aliased if needed. - * - * If all queue family indices in the device context are the same, - * images will be created with the EXCLUSIVE sharing mode. Otherwise, all images - * will be created using the CONCURRENT sharing mode. + * Frame structure. * * @note the size of this structure is not part of the ABI, to allocate * you must use @av_vk_frame_alloc(). @@ -249,7 +271,7 @@ struct AVVkFrame { VkImage img[AV_NUM_DATA_POINTERS]; /** - * The same tiling must be used for all images in the frame. + * Tiling for the frame. */ VkImageTiling tiling; @@ -267,13 +289,13 @@ struct AVVkFrame { VkMemoryPropertyFlagBits flags; /** - * Updated after every barrier + * Updated after every barrier. One per VkImage. */ VkAccessFlagBits access[AV_NUM_DATA_POINTERS]; VkImageLayout layout[AV_NUM_DATA_POINTERS]; /** - * Synchronization timeline semaphores, one for each sw_format plane. + * Synchronization timeline semaphores, one for each VkImage. * Must not be freed manually. Must be waited on at every submission using * the value in sem_value, and must be signalled at every submission, * using an incremented value. @@ -282,6 +304,7 @@ struct AVVkFrame { /** * Up to date semaphore value at which each image becomes accessible. + * One per VkImage. * Clients must wait on this value when submitting a command queue, * and increment it when signalling. */ @@ -293,16 +316,18 @@ struct AVVkFrame { struct AVVkFrameInternal *internal; /** - * Describes the binding offset of each plane to the VkDeviceMemory. + * Describes the binding offset of each image to the VkDeviceMemory. + * One per VkImage. */ ptrdiff_t offset[AV_NUM_DATA_POINTERS]; /** * Queue family of the images. Must be VK_QUEUE_FAMILY_IGNORED if * the image was allocated with the CONCURRENT concurrency option. + * One per VkImage. */ uint32_t queue_family[AV_NUM_DATA_POINTERS]; -} AVVkFrame; +}; /** * Allocates a single AVVkFrame and initializes everything as 0. @@ -311,7 +336,7 @@ struct AVVkFrame { AVVkFrame *av_vk_frame_alloc(void); /** - * Returns the format of each image up to the number of planes for a given sw_format. + * Returns the optimal format for a given sw_format, one for each plane. * Returns NULL on unsupported formats. */ const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p); From 7bdf679b1cc0fd0779ff83686554796352d35139 Mon Sep 17 00:00:00 2001 From: Lynne Date: Sat, 18 Feb 2023 14:55:14 +0100 Subject: [PATCH 64/98] hwcontext_vulkan: remove linear+host_visible "fast" path The idea was that it's faster to map linear images and copy them via regular memcpy. This is a very niche use, plus very inconsistently useful, as it would only really be faster on a few Intel GPUs. Even then, using the non-cached memcpy would've been better. Instead, scrap this code. Drivers are better at figuring out what copy to use, and if we're host-mapping, it should actually be just as fast, if not faster. --- libavutil/hwcontext_vulkan.c | 158 +---------------------------------- 1 file changed, 2 insertions(+), 156 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 622dd811de19d..a0139871c9f22 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -2675,144 +2675,6 @@ static int vulkan_transfer_get_formats(AVHWFramesContext *hwfc, return 0; } -typedef struct VulkanMapping { - AVVkFrame *frame; - int flags; -} VulkanMapping; - -static void vulkan_unmap_frame(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap) -{ - VulkanMapping *map = hwmap->priv; - AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; - const int planes = av_pix_fmt_count_planes(hwfc->sw_format); - VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; - - /* Check if buffer needs flushing */ - if ((map->flags & AV_HWFRAME_MAP_WRITE) && - !(map->frame->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { - VkResult ret; - VkMappedMemoryRange flush_ranges[AV_NUM_DATA_POINTERS] = { { 0 } }; - - for (int i = 0; i < planes; i++) { - flush_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - flush_ranges[i].memory = map->frame->mem[i]; - flush_ranges[i].size = VK_WHOLE_SIZE; - } - - ret = vk->FlushMappedMemoryRanges(hwctx->act_dev, planes, - flush_ranges); - if (ret != VK_SUCCESS) { - av_log(hwfc, AV_LOG_ERROR, "Failed to flush memory: %s\n", - vk_ret2str(ret)); - } - } - - for (int i = 0; i < planes; i++) - vk->UnmapMemory(hwctx->act_dev, map->frame->mem[i]); - - av_free(map); -} - -static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst, - const AVFrame *src, int flags) -{ - VkResult ret; - int err, nb_mem = 0, mapped_mem_count = 0, mem_planes = 0; - AVVkFrame *f = (AVVkFrame *)src->data[0]; - AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; - AVVulkanFramesContext *hwfctx = hwfc->hwctx; - const int planes = av_pix_fmt_count_planes(hwfc->sw_format); - VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; - - VulkanMapping *map = av_mallocz(sizeof(VulkanMapping)); - if (!map) - return AVERROR(EINVAL); - - if (src->format != AV_PIX_FMT_VULKAN) { - av_log(hwfc, AV_LOG_ERROR, "Cannot map from pixel format %s!\n", - av_get_pix_fmt_name(src->format)); - err = AVERROR(EINVAL); - goto fail; - } - - if (!(f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) || - !(hwfctx->tiling == VK_IMAGE_TILING_LINEAR)) { - av_log(hwfc, AV_LOG_ERROR, "Unable to map frame, not host visible " - "and linear!\n"); - err = AVERROR(EINVAL); - goto fail; - } - - dst->width = src->width; - dst->height = src->height; - - for (int i = 0; i < AV_NUM_DATA_POINTERS; i++) - nb_mem += !!f->mem[i]; - - for (int i = 0; i < nb_mem; i++) { - ret = vk->MapMemory(hwctx->act_dev, f->mem[i], 0, - VK_WHOLE_SIZE, 0, (void **)&dst->data[i]); - if (ret != VK_SUCCESS) { - av_log(hwfc, AV_LOG_ERROR, "Failed to map %ith frame memory: %s\n", - i, vk_ret2str(ret)); - err = AVERROR_EXTERNAL; - goto fail; - } - mapped_mem_count++; - } - - for (int i = 0; i < planes; i++) - dst->data[i] = dst->data[i] + f->offset[i]; - - /* Check if the memory contents matter */ - if (((flags & AV_HWFRAME_MAP_READ) || !(flags & AV_HWFRAME_MAP_OVERWRITE)) && - !(f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { - VkMappedMemoryRange map_mem_ranges[AV_NUM_DATA_POINTERS] = { { 0 } }; - for (int i = 0; i < nb_mem; i++) { - map_mem_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - map_mem_ranges[i].size = VK_WHOLE_SIZE; - map_mem_ranges[i].memory = f->mem[i]; - } - - ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, nb_mem, - map_mem_ranges); - if (ret != VK_SUCCESS) { - av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate memory: %s\n", - vk_ret2str(ret)); - err = AVERROR_EXTERNAL; - goto fail; - } - } - - for (int i = 0; i < planes; i++) { - VkImageSubresource sub = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - }; - VkSubresourceLayout layout; - vk->GetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout); - dst->linesize[i] = layout.rowPitch; - } - - map->frame = f; - map->flags = flags; - - err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, - &vulkan_unmap_frame, map); - if (err < 0) - goto fail; - - return 0; - -fail: - for (int i = 0; i < mapped_mem_count; i++) - vk->UnmapMemory(hwctx->act_dev, f->mem[i]); - - av_free(map); - return err; -} - #if CONFIG_LIBDRM static void vulkan_unmap_from_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap) { @@ -3692,8 +3554,9 @@ static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst, #endif #endif default: - return vulkan_map_frame_to_mem(hwfc, dst, src, flags); + break; } + return AVERROR(ENOSYS); } typedef struct ImageBuffer { @@ -4100,23 +3963,6 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, if (swf->width > hwfc->width || swf->height > hwfc->height) return AVERROR(EINVAL); - /* For linear, host visiable images */ - if (fc->tiling == VK_IMAGE_TILING_LINEAR && - f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { - AVFrame *map = av_frame_alloc(); - if (!map) - return AVERROR(ENOMEM); - map->format = swf->format; - - err = vulkan_map_frame_to_mem(hwfc, map, vkf, AV_HWFRAME_MAP_WRITE); - if (err) - return err; - - err = av_frame_copy((AVFrame *)(from ? swf : map), from ? map : swf); - av_frame_free(&map); - return err; - } - /* Create buffers */ for (int i = 0; i < planes; i++) { size_t req_size; From 272473e39ae04bd23311b442385cb8176556fcd2 Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 17 Feb 2023 04:14:08 +0100 Subject: [PATCH 65/98] hwcontext_vulkan: don't change properties if prepare_frame fails --- libavutil/hwcontext_vulkan.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index a0139871c9f22..4eab89e93cf68 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -2200,16 +2200,13 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, break; } - /* Change the image layout to something more optimal for writes. - * This also signals the newly created semaphore, making it usable - * for synchronization */ for (int i = 0; i < nb_images; i++) { img_bar[i] = (VkImageMemoryBarrier2) { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, .pNext = NULL, - .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, - .srcAccessMask = 0x0, - .dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT, + .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .srcAccessMask = frame->access[i], .dstAccessMask = new_access, .oldLayout = frame->layout[i], .newLayout = new_layout, @@ -2222,9 +2219,6 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, .levelCount = 1, }, }; - - frame->layout[i] = img_bar[i].newLayout; - frame->access[i] = img_bar[i].dstAccessMask; } vk->CmdPipelineBarrier2(get_buf_exec_ctx(hwfc, ectx), &(VkDependencyInfo) { @@ -2234,6 +2228,13 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, }); err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 0); + if (err >= 0) { + for (int i = 0; i < nb_images; i++) { + frame->layout[i] = img_bar[i].newLayout; + frame->access[i] = img_bar[i].dstAccessMask; + frame->queue_family[i] = img_bar[i].dstQueueFamilyIndex; + } + } vkfc->unlock_frame(hwfc, frame); return err; From d4f97cd34959f0dc4bf923b67ddc03bfdde996cf Mon Sep 17 00:00:00 2001 From: Lynne Date: Thu, 2 Mar 2023 13:02:25 +0100 Subject: [PATCH 66/98] hwcontext_vulkan: remove duplicate code, port to use generic vulkan utils The temporary AVFrame on staack enables us to use the common dependency/dispatch code in prepare_frame(). The prepare_frame() function is used for both frame initialization and frame import/export queue family transfer operations. In the former case, no AVFrame exists yet, so, as this is purely libavutil code, we create a temporary frame on stack. Otherwise, we'd need to allocate multiple frames somewhere, one for each possible command buffer dispatch. --- libavutil/Makefile | 2 +- libavutil/hwcontext_vulkan.c | 1005 +++++++--------------------------- libavutil/vulkan.h | 13 + 3 files changed, 209 insertions(+), 811 deletions(-) diff --git a/libavutil/Makefile b/libavutil/Makefile index dc9012f9a83a7..bd9c6f9e32778 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -195,7 +195,7 @@ OBJS-$(CONFIG_QSV) += hwcontext_qsv.o OBJS-$(CONFIG_VAAPI) += hwcontext_vaapi.o OBJS-$(CONFIG_VIDEOTOOLBOX) += hwcontext_videotoolbox.o OBJS-$(CONFIG_VDPAU) += hwcontext_vdpau.o -OBJS-$(CONFIG_VULKAN) += hwcontext_vulkan.o +OBJS-$(CONFIG_VULKAN) += hwcontext_vulkan.o vulkan.o OBJS-$(!CONFIG_VULKAN) += hwcontext_stub.o diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 4eab89e93cf68..ff2cde2dd650a 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -78,18 +78,13 @@ typedef struct VulkanQueueCtx { unsigned int buf_deps_alloc_size; } VulkanQueueCtx; -typedef struct VulkanExecCtx { - VkCommandPool pool; - VkCommandBuffer *bufs; - VulkanQueueCtx *queues; - int nb_queues; - int cur_queue_idx; -} VulkanExecCtx; - typedef struct VulkanDevicePriv { /* Vulkan library and loader functions */ void *libvulkan; - FFVulkanFunctions vkfn; + + FFVulkanContext vkctx; + FFVkQueueFamilyCtx compute_qf; + FFVkQueueFamilyCtx transfer_qf; /* Properties */ VkPhysicalDeviceProperties2 props; @@ -111,9 +106,6 @@ typedef struct VulkanDevicePriv { /* Debug callback */ VkDebugUtilsMessengerEXT debug_ctx; - /* Extensions */ - FFVulkanExtensions extensions; - /* Settings */ int use_linear_images; @@ -129,11 +121,11 @@ typedef struct VulkanDevicePriv { typedef struct VulkanFramesPriv { /* Image conversions */ - VulkanExecCtx conv_ctx; + FFVkExecPool compute_exec; /* Image transfers */ - VulkanExecCtx upload_ctx; - VulkanExecCtx download_ctx; + FFVkExecPool upload_exec; + FFVkExecPool download_exec; /* Modifier info list to free at uninit */ VkImageDrmFormatModifierListCreateInfoEXT *modifier_info; @@ -292,7 +284,7 @@ static int vkfmt_from_pixfmt2(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p, { AVVulkanDeviceContext *hwctx = dev_ctx->hwctx; VulkanDevicePriv *priv = dev_ctx->internal->priv; - FFVulkanFunctions *vk = &priv->vkfn; + FFVulkanFunctions *vk = &priv->vkctx.vkfn; const VkFormatFeatureFlagBits2 basic_flags = VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT | @@ -358,31 +350,6 @@ static int vkfmt_from_pixfmt2(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p, return AVERROR(EINVAL); } -static const void *vk_find_struct(const void *chain, VkStructureType stype) -{ - const VkBaseInStructure *in = chain; - while (in) { - if (in->sType == stype) - return in; - - in = in->pNext; - } - - return NULL; -} - -static void vk_link_struct(void *chain, void *in) -{ - VkBaseOutStructure *out = chain; - if (!in) - return; - - while (out->pNext) - out = out->pNext; - - out->pNext = in; -} - static int load_libvulkan(AVHWDeviceContext *ctx) { AVVulkanDeviceContext *hwctx = ctx->hwctx; @@ -453,47 +420,6 @@ static const VulkanOptExtension optional_device_exts[] = { { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 }, }; -/* Converts return values to strings */ -static const char *vk_ret2str(VkResult res) -{ -#define CASE(VAL) case VAL: return #VAL - switch (res) { - CASE(VK_SUCCESS); - CASE(VK_NOT_READY); - CASE(VK_TIMEOUT); - CASE(VK_EVENT_SET); - CASE(VK_EVENT_RESET); - CASE(VK_INCOMPLETE); - CASE(VK_ERROR_OUT_OF_HOST_MEMORY); - CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY); - CASE(VK_ERROR_INITIALIZATION_FAILED); - CASE(VK_ERROR_DEVICE_LOST); - CASE(VK_ERROR_MEMORY_MAP_FAILED); - CASE(VK_ERROR_LAYER_NOT_PRESENT); - CASE(VK_ERROR_EXTENSION_NOT_PRESENT); - CASE(VK_ERROR_FEATURE_NOT_PRESENT); - CASE(VK_ERROR_INCOMPATIBLE_DRIVER); - CASE(VK_ERROR_TOO_MANY_OBJECTS); - CASE(VK_ERROR_FORMAT_NOT_SUPPORTED); - CASE(VK_ERROR_FRAGMENTED_POOL); - CASE(VK_ERROR_SURFACE_LOST_KHR); - CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR); - CASE(VK_SUBOPTIMAL_KHR); - CASE(VK_ERROR_OUT_OF_DATE_KHR); - CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR); - CASE(VK_ERROR_VALIDATION_FAILED_EXT); - CASE(VK_ERROR_INVALID_SHADER_NV); - CASE(VK_ERROR_OUT_OF_POOL_MEMORY); - CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE); - CASE(VK_ERROR_NOT_PERMITTED_EXT); - CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT); - CASE(VK_ERROR_INVALID_DEVICE_ADDRESS_EXT); - CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT); - default: return "Unknown error"; - } -#undef CASE -} - static VkBool32 VKAPI_CALL vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, VkDebugUtilsMessageTypeFlagsEXT messageType, const VkDebugUtilsMessengerCallbackDataEXT *data, @@ -544,7 +470,7 @@ static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts, const char *tstr; const char **extension_names = NULL; VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; AVVulkanDeviceContext *hwctx = ctx->hwctx; int err = 0, found, extensions_found = 0; @@ -607,7 +533,7 @@ static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts, continue; av_log(ctx, AV_LOG_VERBOSE, "Using %s extension %s\n", mod, tstr); - p->extensions |= optional_exts[i].flag; + p->vkctx.extensions |= optional_exts[i].flag; ADD_VAL_TO_LIST(extension_names, extensions_found, tstr); } @@ -623,7 +549,7 @@ static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts, if (found) { av_log(ctx, AV_LOG_VERBOSE, "Using %s extension %s\n", mod, tstr); ADD_VAL_TO_LIST(extension_names, extensions_found, tstr); - p->extensions |= FF_VK_EXT_DEBUG_UTILS; + p->vkctx.extensions |= FF_VK_EXT_DEBUG_UTILS; } else { av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n", tstr); @@ -675,7 +601,7 @@ static int check_validation_layers(AVHWDeviceContext *ctx, AVDictionary *opts, int found = 0, err = 0; VulkanDevicePriv *priv = ctx->internal->priv; - FFVulkanFunctions *vk = &priv->vkfn; + FFVulkanFunctions *vk = &priv->vkctx.vkfn; uint32_t sup_layer_count; VkLayerProperties *sup_layers; @@ -782,7 +708,7 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts) int err = 0, debug_mode = 0; VkResult ret; VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; AVVulkanDeviceContext *hwctx = ctx->hwctx; VkApplicationInfo application_info = { .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, @@ -810,7 +736,7 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts) return err; } - err = ff_vk_load_functions(ctx, vk, p->extensions, 0, 0); + err = ff_vk_load_functions(ctx, vk, p->vkctx.extensions, 0, 0); if (err < 0) { av_log(ctx, AV_LOG_ERROR, "Unable to load instance enumeration functions!\n"); return err; @@ -846,12 +772,12 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts) /* Check for errors */ if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Instance creation failure: %s\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); err = AVERROR_EXTERNAL; goto fail; } - err = ff_vk_load_functions(ctx, vk, p->extensions, 1, 0); + err = ff_vk_load_functions(ctx, vk, p->vkctx.extensions, 1, 0); if (err < 0) { av_log(ctx, AV_LOG_ERROR, "Unable to load instance functions!\n"); goto fail; @@ -912,7 +838,7 @@ static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select) uint32_t num; VkResult ret; VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; VkPhysicalDevice *devices = NULL; VkPhysicalDeviceIDProperties *idp = NULL; VkPhysicalDeviceProperties2 *prop = NULL; @@ -921,7 +847,7 @@ static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select) ret = vk->EnumeratePhysicalDevices(hwctx->inst, &num, NULL); if (ret != VK_SUCCESS || !num) { - av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", vk_ret2str(ret)); + av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", ff_vk_ret2str(ret)); return AVERROR(ENODEV); } @@ -932,7 +858,7 @@ static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select) ret = vk->EnumeratePhysicalDevices(hwctx->inst, &num, devices); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Failed enumerating devices: %s\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); err = AVERROR(ENODEV); goto end; } @@ -1092,7 +1018,7 @@ static int setup_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd) float *weights; VkQueueFamilyProperties *qf = NULL; VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; AVVulkanDeviceContext *hwctx = ctx->hwctx; int graph_index, comp_index, tx_index, enc_index, dec_index; @@ -1220,241 +1146,10 @@ static int setup_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd) return 0; } -static int create_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd, - int queue_family_index, int num_queues) -{ - VkResult ret; - AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; - VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; - - VkCommandPoolCreateInfo cqueue_create = { - .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, - .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, - .queueFamilyIndex = queue_family_index, - }; - VkCommandBufferAllocateInfo cbuf_create = { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, - .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, - .commandBufferCount = num_queues, - }; - - cmd->nb_queues = num_queues; - - /* Create command pool */ - ret = vk->CreateCommandPool(hwctx->act_dev, &cqueue_create, - hwctx->alloc, &cmd->pool); - if (ret != VK_SUCCESS) { - av_log(hwfc, AV_LOG_ERROR, "Command pool creation failure: %s\n", - vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - cmd->bufs = av_mallocz(num_queues * sizeof(*cmd->bufs)); - if (!cmd->bufs) - return AVERROR(ENOMEM); - - cbuf_create.commandPool = cmd->pool; - - /* Allocate command buffer */ - ret = vk->AllocateCommandBuffers(hwctx->act_dev, &cbuf_create, cmd->bufs); - if (ret != VK_SUCCESS) { - av_log(hwfc, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", - vk_ret2str(ret)); - av_freep(&cmd->bufs); - return AVERROR_EXTERNAL; - } - - cmd->queues = av_mallocz(num_queues * sizeof(*cmd->queues)); - if (!cmd->queues) - return AVERROR(ENOMEM); - - for (int i = 0; i < num_queues; i++) { - VulkanQueueCtx *q = &cmd->queues[i]; - vk->GetDeviceQueue(hwctx->act_dev, queue_family_index, i, &q->queue); - q->was_synchronous = 1; - q->qf = queue_family_index; - q->qidx = i; - } - - return 0; -} - -static void free_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd) -{ - AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; - VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; - - if (cmd->queues) { - for (int i = 0; i < cmd->nb_queues; i++) { - VulkanQueueCtx *q = &cmd->queues[i]; - - /* Make sure all queues have finished executing */ - if (q->fence && !q->was_synchronous) { - vk->WaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); - vk->ResetFences(hwctx->act_dev, 1, &q->fence); - } - - /* Free the fence */ - if (q->fence) - vk->DestroyFence(hwctx->act_dev, q->fence, hwctx->alloc); - - /* Free buffer dependencies */ - for (int j = 0; j < q->nb_buf_deps; j++) - av_buffer_unref(&q->buf_deps[j]); - av_free(q->buf_deps); - } - } - - if (cmd->bufs) - vk->FreeCommandBuffers(hwctx->act_dev, cmd->pool, cmd->nb_queues, cmd->bufs); - if (cmd->pool) - vk->DestroyCommandPool(hwctx->act_dev, cmd->pool, hwctx->alloc); - - av_freep(&cmd->queues); - av_freep(&cmd->bufs); - cmd->pool = VK_NULL_HANDLE; -} - -static VkCommandBuffer get_buf_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd) -{ - return cmd->bufs[cmd->cur_queue_idx]; -} - -static void unref_exec_ctx_deps(AVHWFramesContext *hwfc, VulkanExecCtx *cmd) -{ - VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx]; - - for (int j = 0; j < q->nb_buf_deps; j++) - av_buffer_unref(&q->buf_deps[j]); - q->nb_buf_deps = 0; -} - -static int wait_start_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd) -{ - VkResult ret; - AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; - VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx]; - VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; - - VkCommandBufferBeginInfo cmd_start = { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, - }; - - /* Create the fence and don't wait for it initially */ - if (!q->fence) { - VkFenceCreateInfo fence_spawn = { - .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, - }; - ret = vk->CreateFence(hwctx->act_dev, &fence_spawn, hwctx->alloc, - &q->fence); - if (ret != VK_SUCCESS) { - av_log(hwfc, AV_LOG_ERROR, "Failed to queue frame fence: %s\n", - vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } else if (!q->was_synchronous) { - vk->WaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); - vk->ResetFences(hwctx->act_dev, 1, &q->fence); - } - - /* Discard queue dependencies */ - unref_exec_ctx_deps(hwfc, cmd); - - ret = vk->BeginCommandBuffer(cmd->bufs[cmd->cur_queue_idx], &cmd_start); - if (ret != VK_SUCCESS) { - av_log(hwfc, AV_LOG_ERROR, "Unable to init command buffer: %s\n", - vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - return 0; -} - -static int add_buf_dep_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd, - AVBufferRef * const *deps, int nb_deps) -{ - AVBufferRef **dst; - VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx]; - - if (!deps || !nb_deps) - return 0; - - dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size, - (q->nb_buf_deps + nb_deps) * sizeof(*dst)); - if (!dst) - goto err; - - q->buf_deps = dst; - - for (int i = 0; i < nb_deps; i++) { - q->buf_deps[q->nb_buf_deps] = av_buffer_ref(deps[i]); - if (!q->buf_deps[q->nb_buf_deps]) - goto err; - q->nb_buf_deps++; - } - - return 0; - -err: - unref_exec_ctx_deps(hwfc, cmd); - return AVERROR(ENOMEM); -} - -static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd, - VkSubmitInfo *s_info, AVVkFrame *f, int synchronous) -{ - VkResult ret; - VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx]; - VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; - FFVulkanFunctions *vk = &p->vkfn; - - ret = vk->EndCommandBuffer(cmd->bufs[cmd->cur_queue_idx]); - if (ret != VK_SUCCESS) { - av_log(hwfc, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", - vk_ret2str(ret)); - unref_exec_ctx_deps(hwfc, cmd); - return AVERROR_EXTERNAL; - } - - s_info->pCommandBuffers = &cmd->bufs[cmd->cur_queue_idx]; - s_info->commandBufferCount = 1; - - hwctx->lock_queue(hwfc->device_ctx, q->qf, q->qidx); - ret = vk->QueueSubmit(q->queue, 1, s_info, q->fence); - hwctx->unlock_queue(hwfc->device_ctx, q->qf, q->qidx); - if (ret != VK_SUCCESS) { - av_log(hwfc, AV_LOG_ERROR, "Queue submission failure: %s\n", - vk_ret2str(ret)); - unref_exec_ctx_deps(hwfc, cmd); - return AVERROR_EXTERNAL; - } - - if (f) - for (int i = 0; i < s_info->signalSemaphoreCount; i++) - f->sem_value[i]++; - - q->was_synchronous = synchronous; - - if (synchronous) { - vk->WaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); - vk->ResetFences(hwctx->act_dev, 1, &q->fence); - unref_exec_ctx_deps(hwfc, cmd); - } else { /* Rotate queues */ - cmd->cur_queue_idx = (cmd->cur_queue_idx + 1) % cmd->nb_queues; - } - - return 0; -} - static void vulkan_device_free(AVHWDeviceContext *ctx) { VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; AVVulkanDeviceContext *hwctx = ctx->hwctx; if (hwctx->act_dev) @@ -1488,7 +1183,7 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, VkResult ret; AVDictionaryEntry *opt_d; VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; AVVulkanDeviceContext *hwctx = ctx->hwctx; /* @@ -1591,7 +1286,7 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); for (int i = 0; i < dev_info.enabledExtensionCount; i++) av_free((void *)dev_info.ppEnabledExtensionNames[i]); av_free((void *)dev_info.ppEnabledExtensionNames); @@ -1633,7 +1328,7 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) uint32_t qf_num; AVVulkanDeviceContext *hwctx = ctx->hwctx; VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; VkQueueFamilyProperties *qf; int graph_index, comp_index, tx_index, enc_index, dec_index; @@ -1642,13 +1337,13 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) for (int j = 0; j < FF_ARRAY_ELEMS(optional_device_exts); j++) { if (!strcmp(hwctx->enabled_dev_extensions[i], optional_device_exts[j].name)) { - p->extensions |= optional_device_exts[j].flag; + p->vkctx.extensions |= optional_device_exts[j].flag; break; } } } - err = ff_vk_load_functions(ctx, vk, p->extensions, 1, 1); + err = ff_vk_load_functions(ctx, vk, p->vkctx.extensions, 1, 1); if (err < 0) { av_log(ctx, AV_LOG_ERROR, "Unable to load functions!\n"); return err; @@ -1668,7 +1363,7 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) p->props.properties.limits.minMemoryMapAlignment); av_log(ctx, AV_LOG_VERBOSE, " nonCoherentAtomSize: %"PRIu64"\n", p->props.properties.limits.nonCoherentAtomSize); - if (p->extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) + if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) av_log(ctx, AV_LOG_VERBOSE, " minImportedHostPointerAlignment: %"PRIu64"\n", p->hprops.minImportedHostPointerAlignment); @@ -1757,6 +1452,13 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) /* Get device capabilities */ vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops); + p->vkctx.device = ctx; + p->vkctx.hwctx = hwctx; + + ff_vk_load_props(&p->vkctx); + ff_vk_qf_init(&p->vkctx, &p->compute_qf, VK_QUEUE_COMPUTE_BIT); + ff_vk_qf_init(&p->vkctx, &p->transfer_qf, VK_QUEUE_TRANSFER_BIT); + return 0; } @@ -1921,7 +1623,7 @@ static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req, VkResult ret; int index = -1; VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; AVVulkanDeviceContext *dev_hwctx = ctx->hwctx; VkMemoryAllocateInfo alloc_info = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, @@ -1963,7 +1665,7 @@ static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req, dev_hwctx->alloc, mem); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); return AVERROR(ENOMEM); } @@ -2014,7 +1716,7 @@ static void vulkan_frame_free(void *opaque, uint8_t *data) AVHWFramesContext *hwfc = opaque; AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; int nb_images = ff_vk_count_images(f); VkSemaphoreWaitInfo sem_wait = { @@ -2044,8 +1746,7 @@ static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f, VkResult ret; AVHWDeviceContext *ctx = hwfc->device_ctx; VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; - AVVulkanFramesContext *hwfctx = hwfc->hwctx; + FFVulkanFunctions *vk = &p->vkctx.vkfn; VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } }; AVVulkanDeviceContext *hwctx = ctx->hwctx; @@ -2101,7 +1802,7 @@ static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f, ret = vk->BindImageMemory2(hwctx->act_dev, img_cnt, bind_info); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } @@ -2116,128 +1817,86 @@ enum PrepMode { PREP_MODE_DECODING_DPB, }; -static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, +static int prepare_frame(AVHWFramesContext *hwfc, FFVkExecPool *ectx, AVVkFrame *frame, enum PrepMode pmode) { int err; - uint32_t src_qf, dst_qf; - VkImageLayout new_layout; - VkAccessFlags2 new_access; - AVVulkanFramesContext *vkfc = hwfc->hwctx; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; - uint64_t sem_sig_val[AV_NUM_DATA_POINTERS]; - int nb_images = ff_vk_count_images(frame); + FFVulkanFunctions *vk = &p->vkctx.vkfn; + VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS]; + int nb_img_bar = 0; - VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS] = { 0 }; - VkDependencyInfo dep_info; + uint32_t dst_qf = VK_QUEUE_FAMILY_IGNORED; + VkImageLayout new_layout; + VkAccessFlags2 new_access; + VkPipelineStageFlagBits2 src_stage = VK_PIPELINE_STAGE_2_NONE; - VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = { - .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, - .pSignalSemaphoreValues = sem_sig_val, - .signalSemaphoreValueCount = nb_images, + /* This is dirty - but it works. The vulkan.c dependency system doesn't + * free non-refcounted frames, and non-refcounted hardware frames cannot + * happen anywhere outside of here. */ + AVBufferRef tmp_ref = { + .data = (uint8_t *)hwfc, }; - - VkSubmitInfo s_info = { - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .pNext = &s_timeline_sem_info, - .pSignalSemaphores = frame->sem, - .signalSemaphoreCount = nb_images, + AVFrame tmp_frame = { + .data[0] = (uint8_t *)frame, + .hw_frames_ctx = &tmp_ref, }; - VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS]; + VkCommandBuffer cmd_buf; + FFVkExecContext *exec = ff_vk_exec_get(ectx); + cmd_buf = exec->buf; + ff_vk_exec_start(&p->vkctx, exec); - if ((err = wait_start_exec_ctx(hwfc, ectx))) + err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, &tmp_frame, + VK_PIPELINE_STAGE_2_NONE, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT); + if (err < 0) return err; - vkfc->lock_frame(hwfc, frame); - - for (int i = 0; i < nb_images; i++) { - wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; - sem_sig_val[i] = frame->sem_value[i] + 1; - } - switch (pmode) { case PREP_MODE_WRITE: new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; new_access = VK_ACCESS_TRANSFER_WRITE_BIT; - src_qf = VK_QUEUE_FAMILY_IGNORED; - dst_qf = VK_QUEUE_FAMILY_IGNORED; break; case PREP_MODE_EXTERNAL_IMPORT: new_layout = VK_IMAGE_LAYOUT_GENERAL; new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT; - src_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR; - dst_qf = VK_QUEUE_FAMILY_IGNORED; - s_timeline_sem_info.pWaitSemaphoreValues = frame->sem_value; - s_timeline_sem_info.waitSemaphoreValueCount = nb_images; - s_info.pWaitSemaphores = frame->sem; - s_info.pWaitDstStageMask = wait_st; - s_info.waitSemaphoreCount = nb_images; break; case PREP_MODE_EXTERNAL_EXPORT: new_layout = VK_IMAGE_LAYOUT_GENERAL; new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT; - src_qf = VK_QUEUE_FAMILY_IGNORED; dst_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR; - s_timeline_sem_info.pWaitSemaphoreValues = frame->sem_value; - s_timeline_sem_info.waitSemaphoreValueCount = nb_images; - s_info.pWaitSemaphores = frame->sem; - s_info.pWaitDstStageMask = wait_st; - s_info.waitSemaphoreCount = nb_images; + src_stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; break; case PREP_MODE_DECODING_DST: new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR; new_access = VK_ACCESS_TRANSFER_WRITE_BIT; - src_qf = VK_QUEUE_FAMILY_IGNORED; - dst_qf = VK_QUEUE_FAMILY_IGNORED; break; case PREP_MODE_DECODING_DPB: new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR; new_access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; - src_qf = VK_QUEUE_FAMILY_IGNORED; - dst_qf = VK_QUEUE_FAMILY_IGNORED; break; } - for (int i = 0; i < nb_images; i++) { - img_bar[i] = (VkImageMemoryBarrier2) { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, - .pNext = NULL, - .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, - .srcAccessMask = frame->access[i], - .dstAccessMask = new_access, - .oldLayout = frame->layout[i], - .newLayout = new_layout, - .srcQueueFamilyIndex = src_qf, - .dstQueueFamilyIndex = dst_qf, - .image = frame->img[i], - .subresourceRange = (VkImageSubresourceRange) { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - .levelCount = 1, - }, - }; - } + ff_vk_frame_barrier(&p->vkctx, exec, &tmp_frame, img_bar, &nb_img_bar, + src_stage, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + new_access, new_layout, dst_qf); - vk->CmdPipelineBarrier2(get_buf_exec_ctx(hwfc, ectx), &(VkDependencyInfo) { + vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) { .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, .pImageMemoryBarriers = img_bar, - .imageMemoryBarrierCount = nb_images, + .imageMemoryBarrierCount = nb_img_bar, }); - err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 0); - if (err >= 0) { - for (int i = 0; i < nb_images; i++) { - frame->layout[i] = img_bar[i].newLayout; - frame->access[i] = img_bar[i].dstAccessMask; - frame->queue_family[i] = img_bar[i].dstQueueFamilyIndex; - } - } - vkfc->unlock_frame(hwfc, frame); + err = ff_vk_exec_submit(&p->vkctx, exec); + if (err < 0) + return err; - return err; + /* We can do this because there are no real dependencies */ + ff_vk_exec_discard_deps(&p->vkctx, exec); + + return 0; } static inline void get_plane_wh(uint32_t *w, uint32_t *h, enum AVPixelFormat format, @@ -2267,9 +1926,8 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, AVVulkanFramesContext *hwfc_vk = hwfc->hwctx; AVHWDeviceContext *ctx = hwfc->device_ctx; VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; AVVulkanDeviceContext *hwctx = ctx->hwctx; - AVVulkanFramesContext *frames = hwfc->hwctx; VkExportSemaphoreCreateInfo ext_sem_info = { .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO, @@ -2285,9 +1943,9 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, VkSemaphoreTypeCreateInfo sem_type_info = { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, #ifdef _WIN32 - .pNext = p->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM ? &ext_sem_info : NULL, + .pNext = p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM ? &ext_sem_info : NULL, #else - .pNext = p->extensions & FF_VK_EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL, + .pNext = p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL, #endif .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE, .initialValue = 0, @@ -2334,7 +1992,7 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, hwctx->alloc, &f->img[i]); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); err = AVERROR(EINVAL); goto fail; } @@ -2344,7 +2002,7 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, hwctx->alloc, &f->sem[i]); if (ret != VK_SUCCESS) { av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } @@ -2375,11 +2033,11 @@ static void try_export_flags(AVHWFramesContext *hwfc, AVVulkanFramesContext *hwctx = hwfc->hwctx; AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; const VkImageDrmFormatModifierListCreateInfoEXT *drm_mod_info = - vk_find_struct(hwctx->create_pnext, - VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT); + ff_vk_find_struct(hwctx->create_pnext, + VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT); int has_mods = hwctx->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && drm_mod_info; int nb_mods; @@ -2446,12 +2104,12 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size) }; #ifdef _WIN32 - if (p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) + if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) try_export_flags(hwfc, &eiinfo.handleTypes, &e, IsWindows8OrGreater() ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT); #else - if (p->extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) + if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) try_export_flags(hwfc, &eiinfo.handleTypes, &e, VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT); #endif @@ -2473,11 +2131,11 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size) if ( (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) && !(hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)) - err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_DECODING_DPB); + err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_DECODING_DPB); else if (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR) - err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_DECODING_DST); + err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_DECODING_DST); else - err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_WRITE); + err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_WRITE); if (err) goto fail; @@ -2505,6 +2163,7 @@ static void unlock_frame(AVHWFramesContext *fc, AVVkFrame *vkf) static void vulkan_frames_uninit(AVHWFramesContext *hwfc) { + VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; VulkanFramesPriv *fp = hwfc->internal->priv; if (fp->modifier_info) { @@ -2513,9 +2172,9 @@ static void vulkan_frames_uninit(AVHWFramesContext *hwfc) av_freep(&fp->modifier_info); } - free_exec_ctx(hwfc, &fp->conv_ctx); - free_exec_ctx(hwfc, &fp->upload_ctx); - free_exec_ctx(hwfc, &fp->download_ctx); + ff_vk_exec_pool_free(&p->vkctx, &fp->compute_exec); + ff_vk_exec_pool_free(&p->vkctx, &fp->upload_exec); + ff_vk_exec_pool_free(&p->vkctx, &fp->download_exec); } static int vulkan_frames_init(AVHWFramesContext *hwfc) @@ -2524,7 +2183,6 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc) AVVkFrame *f; AVVulkanFramesContext *hwctx = hwfc->hwctx; VulkanFramesPriv *fp = hwfc->internal->priv; - AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; VkImageUsageFlagBits supported_usage; const struct FFVkFormatEntry *fmt; @@ -2609,20 +2267,18 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc) if (!hwctx->unlock_frame) hwctx->unlock_frame = unlock_frame; - err = create_exec_ctx(hwfc, &fp->conv_ctx, - dev_hwctx->queue_family_comp_index, - dev_hwctx->nb_comp_queues); + err = ff_vk_exec_pool_init(&p->vkctx, &p->compute_qf, &fp->compute_exec, + p->compute_qf.nb_queues*4, 0, 0, 0, NULL); if (err) return err; - err = create_exec_ctx(hwfc, &fp->upload_ctx, - dev_hwctx->queue_family_tx_index, - dev_hwctx->nb_tx_queues); + err = ff_vk_exec_pool_init(&p->vkctx, &p->transfer_qf, &fp->upload_exec, + p->transfer_qf.nb_queues*4, 0, 0, 0, NULL); if (err) return err; - err = create_exec_ctx(hwfc, &fp->download_ctx, - dev_hwctx->queue_family_tx_index, 1); + err = ff_vk_exec_pool_init(&p->vkctx, &p->transfer_qf, &fp->download_exec, + p->transfer_qf.nb_queues*4, 0, 0, 0, NULL); if (err) return err; @@ -2682,7 +2338,7 @@ static void vulkan_unmap_from_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwma AVVkFrame *f = hwmap->priv; AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; const int nb_images = ff_vk_count_images(f); VkSemaphoreWaitInfo wait_info = { @@ -2749,8 +2405,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f AVHWDeviceContext *ctx = hwfc->device_ctx; AVVulkanDeviceContext *hwctx = ctx->hwctx; VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; - AVVulkanFramesContext *hwfctx = hwfc->hwctx; + FFVulkanFunctions *vk = &p->vkctx.vkfn; VulkanFramesPriv *fp = hwfc->internal->priv; const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0]; VkBindImageMemoryInfo bind_info[AV_DRM_MAX_PLANES]; @@ -2854,7 +2509,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f &fmt_props, &props_ret); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Cannot map DRM frame to Vulkan: %s\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); err = AVERROR_EXTERNAL; goto fail; } @@ -2877,7 +2532,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f hwctx->alloc, &f->img[i]); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); err = AVERROR(EINVAL); goto fail; } @@ -2886,7 +2541,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f hwctx->alloc, &f->sem[i]); if (ret != VK_SUCCESS) { av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } @@ -2940,7 +2595,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f idesc.fd, &fdmp); if (ret != VK_SUCCESS) { av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); err = AVERROR_EXTERNAL; close(idesc.fd); goto fail; @@ -2992,12 +2647,12 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f ret = vk->BindImageMemory2(hwctx->act_dev, bind_counts, bind_info); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); err = AVERROR_EXTERNAL; goto fail; } - err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_EXTERNAL_IMPORT); + err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_EXTERNAL_IMPORT); if (err) goto fail; @@ -3097,7 +2752,7 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, const int planes = av_pix_fmt_count_planes(hwfc->sw_format); const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format); VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; AVHWFramesContext *cuda_fc = (AVHWFramesContext*)cuda_hwfc->data; AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx; @@ -3157,7 +2812,7 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, &ext_desc.handle.win32.handle); if (ret != VK_SUCCESS) { av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a Win32 Handle: %s!\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); err = AVERROR_EXTERNAL; goto fail; } @@ -3185,7 +2840,7 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, &ext_desc.handle.fd); if (ret != VK_SUCCESS) { av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD: %s!\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); err = AVERROR_EXTERNAL; goto fail; } @@ -3228,7 +2883,7 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, #endif if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); err = AVERROR_EXTERNAL; goto fail; } @@ -3276,7 +2931,7 @@ static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc, dst_f = (AVVkFrame *)dst->data[0]; - err = prepare_frame(hwfc, &fp->upload_ctx, dst_f, PREP_MODE_EXTERNAL_EXPORT); + err = prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_EXPORT); if (err < 0) return err; @@ -3336,7 +2991,7 @@ static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc, av_log(hwfc, AV_LOG_VERBOSE, "Transfered CUDA image to Vulkan!\n"); - return err = prepare_frame(hwfc, &fp->upload_ctx, dst_f, PREP_MODE_EXTERNAL_IMPORT); + return err = prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_IMPORT); fail: CHECK_CU(cu->cuCtxPopCurrent(&dummy)); @@ -3356,13 +3011,13 @@ static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst, #if CONFIG_LIBDRM #if CONFIG_VAAPI case AV_PIX_FMT_VAAPI: - if (p->extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS) + if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS) return vulkan_map_from_vaapi(hwfc, dst, src, flags); else return AVERROR(ENOSYS); #endif case AV_PIX_FMT_DRM_PRIME: - if (p->extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS) + if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS) return vulkan_map_from_drm(hwfc, dst, src, flags); else return AVERROR(ENOSYS); @@ -3403,7 +3058,7 @@ static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst, VkResult ret; AVVkFrame *f = (AVVkFrame *)src->data[0]; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; VulkanFramesPriv *fp = hwfc->internal->priv; AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; AVVulkanFramesContext *hwfctx = hwfc->hwctx; @@ -3421,7 +3076,7 @@ static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst, if (!drm_desc) return AVERROR(ENOMEM); - err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_EXTERNAL_EXPORT); + err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_EXTERNAL_EXPORT); if (err < 0) goto end; @@ -3542,13 +3197,13 @@ static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst, switch (dst->format) { #if CONFIG_LIBDRM case AV_PIX_FMT_DRM_PRIME: - if (p->extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS) + if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS) return vulkan_map_to_drm(hwfc, dst, src, flags); else return AVERROR(ENOSYS); #if CONFIG_VAAPI case AV_PIX_FMT_VAAPI: - if (p->extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS) + if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS) return vulkan_map_to_vaapi(hwfc, dst, src, flags); else return AVERROR(ENOSYS); @@ -3560,29 +3215,6 @@ static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst, return AVERROR(ENOSYS); } -typedef struct ImageBuffer { - VkBuffer buf; - VkDeviceMemory mem; - VkMemoryPropertyFlagBits flags; - int mapped_mem; -} ImageBuffer; - -static void free_buf(void *opaque, uint8_t *data) -{ - AVHWDeviceContext *ctx = opaque; - AVVulkanDeviceContext *hwctx = ctx->hwctx; - VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; - ImageBuffer *vkbuf = (ImageBuffer *)data; - - if (vkbuf->buf) - vk->DestroyBuffer(hwctx->act_dev, vkbuf->buf, hwctx->alloc); - if (vkbuf->mem) - vk->FreeMemory(hwctx->act_dev, vkbuf->mem, hwctx->alloc); - - av_free(data); -} - static size_t get_req_buffer_size(VulkanDevicePriv *p, int *stride, int height) { size_t size; @@ -3592,202 +3224,6 @@ static size_t get_req_buffer_size(VulkanDevicePriv *p, int *stride, int height) return size; } -static int create_buf(AVHWDeviceContext *ctx, AVBufferRef **buf, - VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags, - size_t size, uint32_t req_memory_bits, int host_mapped, - void *create_pnext, void *alloc_pnext) -{ - int err; - VkResult ret; - int use_ded_mem; - AVVulkanDeviceContext *hwctx = ctx->hwctx; - VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; - - VkBufferCreateInfo buf_spawn = { - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = create_pnext, - .usage = usage, - .size = size, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - }; - - VkBufferMemoryRequirementsInfo2 req_desc = { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, - }; - VkMemoryDedicatedAllocateInfo ded_alloc = { - .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, - .pNext = alloc_pnext, - }; - VkMemoryDedicatedRequirements ded_req = { - .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, - }; - VkMemoryRequirements2 req = { - .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, - .pNext = &ded_req, - }; - - ImageBuffer *vkbuf = av_mallocz(sizeof(*vkbuf)); - if (!vkbuf) - return AVERROR(ENOMEM); - - vkbuf->mapped_mem = host_mapped; - - ret = vk->CreateBuffer(hwctx->act_dev, &buf_spawn, NULL, &vkbuf->buf); - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Failed to create buffer: %s\n", - vk_ret2str(ret)); - err = AVERROR_EXTERNAL; - goto fail; - } - - req_desc.buffer = vkbuf->buf; - - vk->GetBufferMemoryRequirements2(hwctx->act_dev, &req_desc, &req); - - /* In case the implementation prefers/requires dedicated allocation */ - use_ded_mem = ded_req.prefersDedicatedAllocation | - ded_req.requiresDedicatedAllocation; - if (use_ded_mem) - ded_alloc.buffer = vkbuf->buf; - - /* Additional requirements imposed on us */ - if (req_memory_bits) - req.memoryRequirements.memoryTypeBits &= req_memory_bits; - - err = alloc_mem(ctx, &req.memoryRequirements, flags, - use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext, - &vkbuf->flags, &vkbuf->mem); - if (err) - goto fail; - - ret = vk->BindBufferMemory(hwctx->act_dev, vkbuf->buf, vkbuf->mem, 0); - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n", - vk_ret2str(ret)); - err = AVERROR_EXTERNAL; - goto fail; - } - - *buf = av_buffer_create((uint8_t *)vkbuf, sizeof(*vkbuf), free_buf, ctx, 0); - if (!(*buf)) { - err = AVERROR(ENOMEM); - goto fail; - } - - return 0; - -fail: - free_buf(ctx, (uint8_t *)vkbuf); - return err; -} - -/* Skips mapping of host mapped buffers but still invalidates them */ -static int map_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, uint8_t *mem[], - int nb_buffers, int invalidate) -{ - VkResult ret; - AVVulkanDeviceContext *hwctx = ctx->hwctx; - VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; - VkMappedMemoryRange invalidate_ctx[AV_NUM_DATA_POINTERS]; - int invalidate_count = 0; - - for (int i = 0; i < nb_buffers; i++) { - ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; - if (vkbuf->mapped_mem) - continue; - - ret = vk->MapMemory(hwctx->act_dev, vkbuf->mem, 0, - VK_WHOLE_SIZE, 0, (void **)&mem[i]); - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n", - vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } - - if (!invalidate) - return 0; - - for (int i = 0; i < nb_buffers; i++) { - ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; - const VkMappedMemoryRange ival_buf = { - .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - .memory = vkbuf->mem, - .size = VK_WHOLE_SIZE, - }; - - /* For host imported memory Vulkan says to use platform-defined - * sync methods, but doesn't really say not to call flush or invalidate - * on original host pointers. It does explicitly allow to do that on - * host-mapped pointers which are then mapped again using vkMapMemory, - * but known implementations return the original pointers when mapped - * again. */ - if (vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) - continue; - - invalidate_ctx[invalidate_count++] = ival_buf; - } - - if (invalidate_count) { - ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, invalidate_count, - invalidate_ctx); - if (ret != VK_SUCCESS) - av_log(ctx, AV_LOG_WARNING, "Failed to invalidate memory: %s\n", - vk_ret2str(ret)); - } - - return 0; -} - -static int unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, - int nb_buffers, int flush) -{ - int err = 0; - VkResult ret; - AVVulkanDeviceContext *hwctx = ctx->hwctx; - VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; - VkMappedMemoryRange flush_ctx[AV_NUM_DATA_POINTERS]; - int flush_count = 0; - - if (flush) { - for (int i = 0; i < nb_buffers; i++) { - ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; - const VkMappedMemoryRange flush_buf = { - .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - .memory = vkbuf->mem, - .size = VK_WHOLE_SIZE, - }; - - if (vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) - continue; - - flush_ctx[flush_count++] = flush_buf; - } - } - - if (flush_count) { - ret = vk->FlushMappedMemoryRanges(hwctx->act_dev, flush_count, flush_ctx); - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Failed to flush memory: %s\n", - vk_ret2str(ret)); - err = AVERROR_EXTERNAL; /* We still want to try to unmap them */ - } - } - - for (int i = 0; i < nb_buffers; i++) { - ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; - if (vkbuf->mapped_mem) - continue; - - vk->UnmapMemory(hwctx->act_dev, vkbuf->mem); - } - - return err; -} - static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f, AVBufferRef **bufs, size_t *buf_offsets, const int *buf_stride, int w, @@ -3795,86 +3231,46 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f, { int err; AVVkFrame *frame = (AVVkFrame *)f->data[0]; - AVVulkanFramesContext *vkfc = hwfc->hwctx; VulkanFramesPriv *fp = hwfc->internal->priv; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; - - int bar_num = 0; - VkPipelineStageFlagBits sem_wait_dst[AV_NUM_DATA_POINTERS]; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS]; + int nb_img_bar = 0; const int nb_images = ff_vk_count_images(frame); int pixfmt_planes = av_pix_fmt_count_planes(pix_fmt); - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); - VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 }; - VulkanExecCtx *ectx = to_buf ? &fp->download_ctx : &fp->upload_ctx; - VkCommandBuffer cmd_buf = get_buf_exec_ctx(hwfc, ectx); - - uint64_t sem_signal_values[AV_NUM_DATA_POINTERS]; + VkCommandBuffer cmd_buf; + FFVkExecContext *exec = ff_vk_exec_get(to_buf ? &fp->download_exec : + &fp->upload_exec); + cmd_buf = exec->buf; + ff_vk_exec_start(&p->vkctx, exec); - VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = { - .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, - .pWaitSemaphoreValues = frame->sem_value, - .pSignalSemaphoreValues = sem_signal_values, - .waitSemaphoreValueCount = nb_images, - .signalSemaphoreValueCount = nb_images, - }; - - VkSubmitInfo s_info = { - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .pNext = &s_timeline_sem_info, - .pSignalSemaphores = frame->sem, - .pWaitSemaphores = frame->sem, - .pWaitDstStageMask = sem_wait_dst, - .signalSemaphoreCount = nb_images, - .waitSemaphoreCount = nb_images, - }; - - vkfc->lock_frame(hwfc, frame); - - if ((err = wait_start_exec_ctx(hwfc, ectx))) - goto end; - - for (int i = 0; i < nb_images; i++) - sem_signal_values[i] = frame->sem_value[i] + 1; - - /* Change the image layout to something more optimal for transfers */ - for (int i = 0; i < nb_images; i++) { - VkImageLayout new_layout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL : - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - VkAccessFlags new_access = to_buf ? VK_ACCESS_TRANSFER_READ_BIT : - VK_ACCESS_TRANSFER_WRITE_BIT; - - sem_wait_dst[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + err = ff_vk_exec_add_dep_buf(&p->vkctx, exec, bufs, pixfmt_planes, 1); + if (err < 0) + return err; - /* If the layout matches and we have read access skip the barrier */ - if ((frame->layout[i] == new_layout) && (frame->access[i] & new_access)) - continue; + err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, f, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_TRANSFER_BIT); + if (err < 0) + return err; - img_bar[bar_num].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - img_bar[bar_num].srcAccessMask = 0x0; - img_bar[bar_num].dstAccessMask = new_access; - img_bar[bar_num].oldLayout = frame->layout[i]; - img_bar[bar_num].newLayout = new_layout; - img_bar[bar_num].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - img_bar[bar_num].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - img_bar[bar_num].image = frame->img[i]; - img_bar[bar_num].subresourceRange.levelCount = 1; - img_bar[bar_num].subresourceRange.layerCount = 1; - img_bar[bar_num].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - - frame->layout[i] = img_bar[bar_num].newLayout; - frame->access[i] = img_bar[bar_num].dstAccessMask; - - bar_num++; - } + ff_vk_frame_barrier(&p->vkctx, exec, f, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, + to_buf ? VK_ACCESS_TRANSFER_READ_BIT : + VK_ACCESS_TRANSFER_WRITE_BIT, + to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL : + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); - if (bar_num) - vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, 0, - 0, NULL, 0, NULL, bar_num, img_bar); + vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); /* Schedule a copy for each plane */ for (int i = 0; i < pixfmt_planes; i++) { @@ -3884,7 +3280,7 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f, VK_IMAGE_ASPECT_PLANE_1_BIT, VK_IMAGE_ASPECT_PLANE_2_BIT, }; - ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; + FFVkBuffer *vkbuf = (FFVkBuffer *)bufs[i]->data; VkBufferImageCopy buf_reg = { .bufferOffset = buf_offsets[i], .bufferRowLength = buf_stride[i] / desc->comp[i].step, @@ -3894,44 +3290,32 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f, .imageOffset = { 0, 0, 0, }, }; - int p_w, p_h; + uint32_t p_w, p_h; get_plane_wh(&p_w, &p_h, pix_fmt, w, h, i); buf_reg.bufferImageHeight = p_h; buf_reg.imageExtent = (VkExtent3D){ p_w, p_h, 1, }; if (to_buf) - vk->CmdCopyImageToBuffer(cmd_buf, frame->img[idx], frame->layout[idx], - vkbuf->buf, 1, &buf_reg); + vk->CmdCopyImageToBuffer(cmd_buf, frame->img[idx], + img_bar[0].newLayout, + vkbuf->buf, + 1, &buf_reg); else vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[idx], - frame->layout[idx], 1, &buf_reg); + img_bar[0].newLayout, + 1, &buf_reg); } - /* When uploading, do this asynchronously if the source is refcounted by - * keeping the buffers as a submission dependency. - * The hwcontext is guaranteed to not be freed until all frames are freed - * in the frames_unint function. - * When downloading to buffer, do this synchronously and wait for the - * queue submission to finish executing */ - if (!to_buf) { - int ref; - for (ref = 0; ref < AV_NUM_DATA_POINTERS; ref++) { - if (!f->buf[ref]) - break; - if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1))) - goto end; - } - if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, pixfmt_planes))) - goto end; - err = submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref); - } else { - err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 1); - } + err = ff_vk_exec_submit(&p->vkctx, exec); + if (err < 0) + return err; -end: - vkfc->unlock_frame(hwfc, frame); - return err; + /* Wait for the operation to complete when downloading */ + if (to_buf) + ff_vk_exec_wait(&p->vkctx, exec); + + return 0; } static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, @@ -3939,22 +3323,21 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, { int err = 0; VkResult ret; - AVVkFrame *f = (AVVkFrame *)vkf->data[0]; AVHWDeviceContext *dev_ctx = hwfc->device_ctx; AVVulkanDeviceContext *hwctx = dev_ctx->hwctx; - AVVulkanFramesContext *fc = hwfc->hwctx; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; AVFrame tmp; + FFVkBuffer *vkbufs[AV_NUM_DATA_POINTERS]; AVBufferRef *bufs[AV_NUM_DATA_POINTERS] = { 0 }; size_t buf_offsets[AV_NUM_DATA_POINTERS] = { 0 }; - int p_w, p_h; + uint32_t p_w, p_h; const int planes = av_pix_fmt_count_planes(swf->format); int host_mapped[AV_NUM_DATA_POINTERS] = { 0 }; - const int map_host = !!(p->extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY); + const int map_host = !!(p->vkctx.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY); if ((swf->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(swf->format))) { av_log(hwfc, AV_LOG_ERROR, "Unsupported software frame pixel format!\n"); @@ -4001,8 +3384,7 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, import_desc.handleType, import_desc.pHostPointer, &p_props); - - if (ret == VK_SUCCESS) { + if (ret == VK_SUCCESS && p_props.memoryTypeBits) { host_mapped[i] = 1; buf_offsets[i] = offs; } @@ -4011,20 +3393,23 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, if (!host_mapped[i]) req_size = get_req_buffer_size(p, &tmp.linesize[i], p_h); - err = create_buf(dev_ctx, &bufs[i], - from ? VK_BUFFER_USAGE_TRANSFER_DST_BIT : - VK_BUFFER_USAGE_TRANSFER_SRC_BIT, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, - req_size, p_props.memoryTypeBits, host_mapped[i], - host_mapped[i] ? &create_desc : NULL, - host_mapped[i] ? &import_desc : NULL); - if (err) + err = ff_vk_create_avbuf(&p->vkctx, &bufs[i], req_size, + host_mapped[i] ? &create_desc : NULL, + host_mapped[i] ? &import_desc : NULL, + from ? VK_BUFFER_USAGE_TRANSFER_DST_BIT : + VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + (host_mapped[i] ? + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT : 0x0)); + if (err < 0) goto end; + + vkbufs[i] = (FFVkBuffer *)bufs[i]->data; } if (!from) { /* Map, copy image TO buffer (which then goes to the VkImage), unmap */ - if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 0))) + if ((err = ff_vk_map_buffers(&p->vkctx, vkbufs, tmp.data, planes, 0))) goto end; for (int i = 0; i < planes; i++) { @@ -4039,7 +3424,7 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, p_h); } - if ((err = unmap_buffers(dev_ctx, bufs, planes, 1))) + if ((err = ff_vk_unmap_buffers(&p->vkctx, vkbufs, planes, 1))) goto end; } @@ -4050,7 +3435,7 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, if (from) { /* Map, copy buffer (which came FROM the VkImage) to the frame, unmap */ - if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 0))) + if ((err = ff_vk_map_buffers(&p->vkctx, vkbufs, tmp.data, planes, 0))) goto end; for (int i = 0; i < planes; i++) { @@ -4065,7 +3450,7 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, p_h); } - if ((err = unmap_buffers(dev_ctx, bufs, planes, 1))) + if ((err = ff_vk_unmap_buffers(&p->vkctx, vkbufs, planes, 1))) goto end; } @@ -4085,11 +3470,11 @@ static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst, #if CONFIG_CUDA case AV_PIX_FMT_CUDA: #ifdef _WIN32 - if ((p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) && - (p->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM)) + if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) && + (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM)) #else - if ((p->extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) && - (p->extensions & FF_VK_EXT_EXTERNAL_FD_SEM)) + if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) && + (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_SEM)) #endif return vulkan_transfer_data_from_cuda(hwfc, dst, src); #endif @@ -4123,7 +3508,7 @@ static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst, dst_f = (AVVkFrame *)src->data[0]; - err = prepare_frame(hwfc, &fp->upload_ctx, dst_f, PREP_MODE_EXTERNAL_EXPORT); + err = prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_EXPORT); if (err < 0) return err; @@ -4183,7 +3568,7 @@ static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst, av_log(hwfc, AV_LOG_VERBOSE, "Transfered Vulkan image to CUDA!\n"); - return prepare_frame(hwfc, &fp->upload_ctx, dst_f, PREP_MODE_EXTERNAL_IMPORT); + return prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_IMPORT); fail: CHECK_CU(cu->cuCtxPopCurrent(&dummy)); @@ -4203,11 +3588,11 @@ static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst, #if CONFIG_CUDA case AV_PIX_FMT_CUDA: #ifdef _WIN32 - if ((p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) && - (p->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM)) + if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) && + (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM)) #else - if ((p->extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) && - (p->extensions & FF_VK_EXT_EXTERNAL_FD_SEM)) + if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) && + (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_SEM)) #endif return vulkan_transfer_data_to_cuda(hwfc, dst, src); #endif diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 0831219d8fd31..ec03ba8b717b8 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -267,6 +267,19 @@ static inline int ff_vk_count_images(AVVkFrame *f) return cnt; } +static inline const void *ff_vk_find_struct(const void *chain, VkStructureType stype) +{ + const VkBaseInStructure *in = chain; + while (in) { + if (in->sType == stype) + return in; + + in = in->pNext; + } + + return NULL; +} + /* Identity mapping - r = r, b = b, g = g, a = a */ extern const VkComponentMapping ff_comp_identity_map; From 5c149a4ca33da6861fec6645fa3e9ab0dd3924a7 Mon Sep 17 00:00:00 2001 From: Lynne Date: Tue, 14 Mar 2023 21:38:55 +0100 Subject: [PATCH 67/98] hwcontext_vulkan: enable additional device properties --- libavutil/hwcontext_vulkan.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index ff2cde2dd650a..93b6c6de4b5b6 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -1248,6 +1248,8 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, COPY_FEATURE(hwctx->device_features, fragmentStoresAndAtomics) COPY_FEATURE(hwctx->device_features, vertexPipelineStoresAndAtomics) COPY_FEATURE(hwctx->device_features, shaderInt64) + COPY_FEATURE(hwctx->device_features, shaderInt16) + COPY_FEATURE(hwctx->device_features, shaderFloat64) #undef COPY_FEATURE /* We require timeline semaphores */ @@ -1256,10 +1258,27 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, err = AVERROR(ENOSYS); goto end; } + + p->device_features_1_1.samplerYcbcrConversion = dev_features_1_1.samplerYcbcrConversion; + p->device_features_1_1.storagePushConstant16 = dev_features_1_1.storagePushConstant16; + p->device_features_1_2.timelineSemaphore = 1; p->device_features_1_2.bufferDeviceAddress = dev_features_1_2.bufferDeviceAddress; - p->device_features_1_1.samplerYcbcrConversion = dev_features_1_1.samplerYcbcrConversion; + p->device_features_1_2.storagePushConstant8 = dev_features_1_2.storagePushConstant8; + p->device_features_1_2.shaderInt8 = dev_features_1_2.shaderInt8; + p->device_features_1_2.storageBuffer8BitAccess = dev_features_1_2.storageBuffer8BitAccess; + p->device_features_1_2.uniformAndStorageBuffer8BitAccess = dev_features_1_2.uniformAndStorageBuffer8BitAccess; + p->device_features_1_2.shaderFloat16 = dev_features_1_2.shaderFloat16; + p->device_features_1_2.shaderSharedInt64Atomics = dev_features_1_2.shaderSharedInt64Atomics; + p->device_features_1_2.vulkanMemoryModel = dev_features_1_2.vulkanMemoryModel; + p->device_features_1_2.vulkanMemoryModelDeviceScope = dev_features_1_2.vulkanMemoryModelDeviceScope; + p->device_features_1_2.hostQueryReset = dev_features_1_2.hostQueryReset; + p->device_features_1_3.synchronization2 = dev_features_1_3.synchronization2; + p->device_features_1_3.computeFullSubgroups = dev_features_1_3.computeFullSubgroups; + p->device_features_1_3.shaderZeroInitializeWorkgroupMemory = dev_features_1_3.shaderZeroInitializeWorkgroupMemory; + p->device_features_1_3.dynamicRendering = dev_features_1_3.dynamicRendering; + p->desc_buf_features.descriptorBuffer = desc_buf_features.descriptorBuffer; p->desc_buf_features.descriptorBufferPushDescriptors = desc_buf_features.descriptorBufferPushDescriptors; From 1b3dae983fe0b240b8fe681e66fe4ee57305e13e Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 17 Feb 2023 03:10:58 +0100 Subject: [PATCH 68/98] lavfi: add lavfi-only Vulkan infrastructure --- libavfilter/Makefile | 6 + libavfilter/vulkan_filter.c | 480 +++++++++++++++----- libavfilter/vulkan_filter.h | 39 +- {libavutil => libavfilter}/vulkan_glslang.c | 19 +- {libavutil => libavfilter}/vulkan_shaderc.c | 8 +- libavfilter/vulkan_spirv.h | 45 ++ 6 files changed, 473 insertions(+), 124 deletions(-) rename {libavutil => libavfilter}/vulkan_glslang.c (95%) rename {libavutil => libavfilter}/vulkan_shaderc.c (96%) create mode 100644 libavfilter/vulkan_spirv.h diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 19283a71de193..c4b52d02575af 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -623,6 +623,10 @@ OBJS-$(CONFIG_AVSYNCTEST_FILTER) += src_avsynctest.o OBJS-$(CONFIG_AMOVIE_FILTER) += src_movie.o OBJS-$(CONFIG_MOVIE_FILTER) += src_movie.o +# vulkan libs +OBJS-$(CONFIG_LIBGLSLANG) += vulkan_glslang.o +OBJS-$(CONFIG_LIBSHADERC) += vulkan_shaderc.o + # Objects duplicated from other libraries for shared builds SHLIBOBJS += log2_tab.o @@ -636,6 +640,8 @@ SKIPHEADERS-$(CONFIG_QSVVPP) += qsvvpp.h stack_internal.h SKIPHEADERS-$(CONFIG_OPENCL) += opencl.h SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_vpp.h stack_internal.h SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_filter.h +SKIPHEADERS-$(CONFIG_LIBSHADERC) += vulkan_spirv.h +SKIPHEADERS-$(CONFIG_LIBGLSLANG) += vulkan_spirv.h TOOLS = graph2dot TESTPROGS = drawutils filtfmts formats integral diff --git a/libavfilter/vulkan_filter.c b/libavfilter/vulkan_filter.c index e22541bd23a78..b4d8f952b538e 100644 --- a/libavfilter/vulkan_filter.c +++ b/libavfilter/vulkan_filter.c @@ -1,4 +1,6 @@ /* + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -18,107 +20,186 @@ #include "vulkan_filter.h" -static int vulkan_filter_set_device(AVFilterContext *avctx, - AVBufferRef *device) +int ff_vk_filter_init_context(AVFilterContext *avctx, FFVulkanContext *s, + AVBufferRef *frames_ref, + int width, int height, enum AVPixelFormat sw_format) { - FFVulkanContext *s = avctx->priv; + int err; + AVHWFramesContext *frames_ctx; + AVHWDeviceContext *device_ctx; + AVVulkanFramesContext *vk_frames; + AVVulkanDeviceContext *vk_dev; + AVBufferRef *device_ref = avctx->hw_device_ctx; + + /* Check if context is reusable as-is */ + if (frames_ref) { + int no_storage = 0; + FFVulkanFunctions *vk; + const VkFormat *sub = av_vkfmt_from_pixfmt(sw_format); + + frames_ctx = (AVHWFramesContext *)frames_ref->data; + device_ctx = (AVHWDeviceContext *)frames_ctx->device_ref->data; + vk_frames = frames_ctx->hwctx; + vk_dev = device_ctx->hwctx; + + /* Basic format validation */ + if (width != frames_ctx->width || + height != frames_ctx->height || + sw_format != frames_ctx->sw_format || + (vk_frames->tiling != VK_IMAGE_TILING_LINEAR && + vk_frames->tiling != VK_IMAGE_TILING_OPTIMAL) || + !(vk_frames->usage & VK_IMAGE_USAGE_SAMPLED_BIT)) { + goto skip; + } - av_buffer_unref(&s->device_ref); + if (vk_frames->usage & VK_IMAGE_USAGE_STORAGE_BIT) + goto accept; - s->device_ref = av_buffer_ref(device); - if (!s->device_ref) - return AVERROR(ENOMEM); + s->extensions = ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions, + vk_dev->nb_enabled_dev_extensions); + err = ff_vk_load_functions(device_ctx, &s->vkfn, s->extensions, 1, 1); + if (err < 0) + return err; + vk = &s->vkfn; + + /* Check if the subformats can do storage */ + for (int i = 0; sub[i] != VK_FORMAT_UNDEFINED; i++) { + VkFormatProperties2 prop = { + .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, + }; + vk->GetPhysicalDeviceFormatProperties2(vk_dev->phys_dev, sub[i], + &prop); + + if (vk_frames->tiling == VK_IMAGE_TILING_LINEAR) { + no_storage |= !(prop.formatProperties.linearTilingFeatures & + VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT); + } else { + no_storage |= !(prop.formatProperties.optimalTilingFeatures & + VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT); + } + } - s->device = (AVHWDeviceContext*)s->device_ref->data; - s->hwctx = s->device->hwctx; + /* Check if it's usable */ + if (no_storage) { +skip: + device_ref = frames_ctx->device_ref; + frames_ref = NULL; + } else { +accept: + frames_ref = av_buffer_ref(frames_ref); + if (!frames_ref) + return AVERROR(ENOMEM); + } + } - return 0; -} + if (!frames_ref) { + if (!device_ref) { + av_log(avctx, AV_LOG_ERROR, + "Vulkan filtering requires a device context!\n"); + return AVERROR(EINVAL); + } -static int vulkan_filter_set_frames(AVFilterContext *avctx, - AVBufferRef *frames) -{ - FFVulkanContext *s = avctx->priv; + frames_ref = av_hwframe_ctx_alloc(device_ref); - av_buffer_unref(&s->frames_ref); + frames_ctx = (AVHWFramesContext *)frames_ref->data; + frames_ctx->format = AV_PIX_FMT_VULKAN; + frames_ctx->sw_format = sw_format; + frames_ctx->width = width; + frames_ctx->height = height; - s->frames_ref = av_buffer_ref(frames); - if (!s->frames_ref) - return AVERROR(ENOMEM); + vk_frames = frames_ctx->hwctx; + vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL; + vk_frames->usage = VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT; - return 0; + err = av_hwframe_ctx_init(frames_ref); + if (err < 0) { + av_buffer_unref(&frames_ref); + return err; + } + + device_ctx = (AVHWDeviceContext *)frames_ctx->device_ref->data; + vk_dev = device_ctx->hwctx; + } + + s->extensions = ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions, + vk_dev->nb_enabled_dev_extensions); + + /** + * libplacebo does not use descriptor buffers. + */ + if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) && + strcmp(avctx->filter->name, "libplacebo")) { + av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires that " + "the %s extension is supported!\n", + VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME); + av_buffer_unref(&frames_ref); + return AVERROR(EINVAL); + } + + err = ff_vk_load_functions(device_ctx, &s->vkfn, s->extensions, 1, 1); + if (err < 0) { + av_buffer_unref(&frames_ref); + return err; + } + + s->frames_ref = frames_ref; + s->frames = frames_ctx; + s->hwfc = vk_frames; + s->device = device_ctx; + s->hwctx = device_ctx->hwctx; + + err = ff_vk_load_props(s); + if (err < 0) + av_buffer_unref(&s->frames_ref); + + return err; } int ff_vk_filter_config_input(AVFilterLink *inlink) { - int err; - AVFilterContext *avctx = inlink->dst; - FFVulkanContext *s = avctx->priv; - FFVulkanFunctions *vk = &s->vkfn; AVHWFramesContext *input_frames; + AVFilterContext *avctx = inlink->dst; + FFVulkanContext *s = inlink->dst->priv; if (!inlink->hw_frames_ctx) { - av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a " + av_log(inlink->dst, AV_LOG_ERROR, "Vulkan filtering requires a " "hardware frames context on the input.\n"); return AVERROR(EINVAL); } - /* Extract the device and default output format from the first input. */ - if (avctx->inputs[0] != inlink) - return 0; - input_frames = (AVHWFramesContext *)inlink->hw_frames_ctx->data; if (input_frames->format != AV_PIX_FMT_VULKAN) return AVERROR(EINVAL); - err = vulkan_filter_set_device(avctx, input_frames->device_ref); - if (err < 0) - return err; - err = vulkan_filter_set_frames(avctx, inlink->hw_frames_ctx); - if (err < 0) - return err; - - s->extensions = ff_vk_extensions_to_mask(s->hwctx->enabled_dev_extensions, - s->hwctx->nb_enabled_dev_extensions); - - err = ff_vk_load_functions(s->device, &s->vkfn, s->extensions, 1, 1); - if (err < 0) - return err; + /* Extract the device and default output format from the first input. */ + if (avctx->inputs[0] != inlink) + return 0; - vk->GetPhysicalDeviceProperties(s->hwctx->phys_dev, &s->props); - vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops); + /* Save the ref, without reffing it */ + s->input_frames_ref = inlink->hw_frames_ctx; - /* Default output parameters match input parameters. */ - s->input_format = input_frames->sw_format; - if (s->output_format == AV_PIX_FMT_NONE) - s->output_format = input_frames->sw_format; - if (!s->output_width) - s->output_width = inlink->w; - if (!s->output_height) - s->output_height = inlink->h; + /* Defaults */ + s->output_format = input_frames->sw_format; + s->output_width = inlink->w; + s->output_height = inlink->h; return 0; } -int ff_vk_filter_config_output_inplace(AVFilterLink *outlink) +int ff_vk_filter_config_output(AVFilterLink *outlink) { int err; - AVFilterContext *avctx = outlink->src; - FFVulkanContext *s = avctx->priv; + FFVulkanContext *s = outlink->src->priv; av_buffer_unref(&outlink->hw_frames_ctx); - if (!s->device_ref) { - if (!avctx->hw_device_ctx) { - av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a " - "Vulkan device.\n"); - return AVERROR(EINVAL); - } - - err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx); - if (err < 0) - return err; - } + err = ff_vk_filter_init_context(outlink->src, s, s->input_frames_ref, + s->output_width, s->output_height, + s->output_format); + if (err < 0) + return err; outlink->hw_frames_ctx = av_buffer_ref(s->frames_ref); if (!outlink->hw_frames_ctx) @@ -127,65 +208,246 @@ int ff_vk_filter_config_output_inplace(AVFilterLink *outlink) outlink->w = s->output_width; outlink->h = s->output_height; - return 0; + return err; } -int ff_vk_filter_config_output(AVFilterLink *outlink) +int ff_vk_filter_init(AVFilterContext *avctx) { - int err; - AVFilterContext *avctx = outlink->src; FFVulkanContext *s = avctx->priv; - AVBufferRef *output_frames_ref; - AVHWFramesContext *output_frames; - - av_buffer_unref(&outlink->hw_frames_ctx); - if (!s->device_ref) { - if (!avctx->hw_device_ctx) { - av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a " - "Vulkan device.\n"); - return AVERROR(EINVAL); - } + s->output_format = AV_PIX_FMT_NONE; - err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx); - if (err < 0) - return err; - } + return 0; +} - output_frames_ref = av_hwframe_ctx_alloc(s->device_ref); - if (!output_frames_ref) { - err = AVERROR(ENOMEM); - goto fail; +int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanPipeline *pl, AVFrame *out_f, AVFrame *in_f, + VkSampler sampler, void *push_src, size_t push_size) +{ + int err = 0; + FFVulkanFunctions *vk = &vkctx->vkfn; + VkImageView in_views[AV_NUM_DATA_POINTERS]; + VkImageView out_views[AV_NUM_DATA_POINTERS]; + VkImageMemoryBarrier2 img_bar[37]; + int nb_img_bar = 0; + + /* Update descriptors and init the exec context */ + FFVkExecContext *exec = ff_vk_exec_get(e); + ff_vk_exec_start(vkctx, exec); + + ff_vk_exec_bind_pipeline(vkctx, exec, pl); + + if (push_src) + ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT, + 0, push_size, push_src); + + if (in_f) { + RET(ff_vk_exec_add_dep_frame(vkctx, exec, in_f, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(vkctx, exec, in_views, in_f)); + ff_vk_update_descriptor_img_array(vkctx, pl, exec, in_f, in_views, 0, 0, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + sampler); + ff_vk_frame_barrier(vkctx, exec, in_f, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); } - output_frames = (AVHWFramesContext*)output_frames_ref->data; - output_frames->format = AV_PIX_FMT_VULKAN; - output_frames->sw_format = s->output_format; - output_frames->width = s->output_width; - output_frames->height = s->output_height; + RET(ff_vk_exec_add_dep_frame(vkctx, exec, out_f, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(vkctx, exec, out_views, out_f)); + ff_vk_update_descriptor_img_array(vkctx, pl, exec, out_f, out_views, 0, !!in_f, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + ff_vk_frame_barrier(vkctx, exec, out_f, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + vk->CmdDispatch(exec->buf, + FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0], + FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1], + pl->wg_size[2]); + + return ff_vk_exec_submit(vkctx, exec); +fail: + ff_vk_exec_discard_deps(vkctx, exec); + return err; +} - err = av_hwframe_ctx_init(output_frames_ref); - if (err < 0) { - av_log(avctx, AV_LOG_ERROR, "Failed to initialise output " - "frames: %d.\n", err); - goto fail; +int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanPipeline *pls[2], + AVFrame *out, AVFrame *tmp, AVFrame *in, + VkSampler sampler, void *push_src, size_t push_size) +{ + int err = 0; + FFVulkanFunctions *vk = &vkctx->vkfn; + VkImageView in_views[AV_NUM_DATA_POINTERS]; + VkImageView tmp_views[AV_NUM_DATA_POINTERS]; + VkImageView out_views[AV_NUM_DATA_POINTERS]; + VkImageMemoryBarrier2 img_bar[37]; + int nb_img_bar = 0; + + /* Update descriptors and init the exec context */ + FFVkExecContext *exec = ff_vk_exec_get(e); + ff_vk_exec_start(vkctx, exec); + + RET(ff_vk_exec_add_dep_frame(vkctx, exec, in, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_exec_add_dep_frame(vkctx, exec, tmp, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_exec_add_dep_frame(vkctx, exec, out, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + + RET(ff_vk_create_imageviews(vkctx, exec, in_views, in)); + RET(ff_vk_create_imageviews(vkctx, exec, tmp_views, tmp)); + RET(ff_vk_create_imageviews(vkctx, exec, out_views, out)); + + ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); + ff_vk_frame_barrier(vkctx, exec, tmp, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + for (int i = 0; i < 2; i++) { + FFVulkanPipeline *pl = pls[i]; + AVFrame *src_f = !i ? in : tmp; + AVFrame *dst_f = !i ? tmp : out; + VkImageView *src_views = !i ? in_views : tmp_views; + VkImageView *dst_views = !i ? tmp_views : out_views; + + ff_vk_exec_bind_pipeline(vkctx, exec, pl); + + if (push_src) + ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT, + 0, push_size, push_src); + + ff_vk_update_descriptor_img_array(vkctx, pl, exec, src_f, src_views, 0, 0, + !i ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : + VK_IMAGE_LAYOUT_GENERAL, + sampler); + ff_vk_update_descriptor_img_array(vkctx, pl, exec, dst_f, dst_views, 0, 1, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + + vk->CmdDispatch(exec->buf, + FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0], + FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1], + pl->wg_size[2]); } - outlink->hw_frames_ctx = output_frames_ref; - outlink->w = s->output_width; - outlink->h = s->output_height; - - return 0; + return ff_vk_exec_submit(vkctx, exec); fail: - av_buffer_unref(&output_frames_ref); + ff_vk_exec_discard_deps(vkctx, exec); return err; } -int ff_vk_filter_init(AVFilterContext *avctx) +int ff_vk_filter_process_Nin(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanPipeline *pl, + AVFrame *out, AVFrame *in[], int nb_in, + VkSampler sampler, void *push_src, size_t push_size) { - FFVulkanContext *s = avctx->priv; - - s->output_format = AV_PIX_FMT_NONE; + int err = 0; + FFVulkanFunctions *vk = &vkctx->vkfn; + VkImageView in_views[16][AV_NUM_DATA_POINTERS]; + VkImageView out_views[AV_NUM_DATA_POINTERS]; + VkImageMemoryBarrier2 img_bar[128]; + int nb_img_bar = 0; + + /* Update descriptors and init the exec context */ + FFVkExecContext *exec = ff_vk_exec_get(e); + ff_vk_exec_start(vkctx, exec); + + /* Inputs */ + for (int i = 0; i < nb_in; i++) { + RET(ff_vk_exec_add_dep_frame(vkctx, exec, in[i], + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(vkctx, exec, in_views[i], in[i])); + + ff_vk_frame_barrier(vkctx, exec, in[i], img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); + } - return 0; + /* Output */ + RET(ff_vk_exec_add_dep_frame(vkctx, exec, out, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(vkctx, exec, out_views, out)); + ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + ff_vk_exec_bind_pipeline(vkctx, exec, pl); + + if (push_src) + ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT, + 0, push_size, push_src); + + for (int i = 0; i < nb_in; i++) + ff_vk_update_descriptor_img_array(vkctx, pl, exec, in[i], in_views[i], 0, i, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + sampler); + + ff_vk_update_descriptor_img_array(vkctx, pl, exec, out, out_views, 0, nb_in, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + + vk->CmdDispatch(exec->buf, + FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0], + FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1], + pl->wg_size[2]); + + return ff_vk_exec_submit(vkctx, exec); +fail: + ff_vk_exec_discard_deps(vkctx, exec); + return err; } diff --git a/libavfilter/vulkan_filter.h b/libavfilter/vulkan_filter.h index bfdb9b2d7d84e..d2c14601d94de 100644 --- a/libavfilter/vulkan_filter.h +++ b/libavfilter/vulkan_filter.h @@ -1,4 +1,6 @@ /* + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -26,9 +28,38 @@ /** * General lavfi IO functions */ -int ff_vk_filter_init (AVFilterContext *avctx); -int ff_vk_filter_config_input (AVFilterLink *inlink); -int ff_vk_filter_config_output (AVFilterLink *outlink); -int ff_vk_filter_config_output_inplace(AVFilterLink *outlink); +int ff_vk_filter_init (AVFilterContext *avctx); +int ff_vk_filter_config_input (AVFilterLink *inlink); +int ff_vk_filter_config_output(AVFilterLink *outlink); + +/** + * Can be called manually, if not using ff_vk_filter_config_output. + */ +int ff_vk_filter_init_context(AVFilterContext *avctx, FFVulkanContext *s, + AVBufferRef *frames_ref, + int width, int height, enum AVPixelFormat sw_format); + +/** + * Submit a compute shader with a zero/one input and single out for execution. + */ +int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanPipeline *pl, AVFrame *out_f, AVFrame *in_f, + VkSampler sampler, void *push_src, size_t push_size); + +/** + * Submit a compute shader with a single in and single out with 2 stages. + */ +int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanPipeline *pls[2], + AVFrame *out, AVFrame *tmp, AVFrame *in, + VkSampler sampler, void *push_src, size_t push_size); + +/** + * Up to 16 inputs, one output + */ +int ff_vk_filter_process_Nin(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanPipeline *pl, + AVFrame *out, AVFrame *in[], int nb_in, + VkSampler sampler, void *push_src, size_t push_size); #endif /* AVFILTER_VULKAN_FILTER_H */ diff --git a/libavutil/vulkan_glslang.c b/libavfilter/vulkan_glslang.c similarity index 95% rename from libavutil/vulkan_glslang.c rename to libavfilter/vulkan_glslang.c index e7785f6d4055a..845a530ee0d8d 100644 --- a/libavutil/vulkan_glslang.c +++ b/libavfilter/vulkan_glslang.c @@ -21,8 +21,9 @@ #include #include -#include "mem.h" -#include "avassert.h" +#include "vulkan_spirv.h" +#include "libavutil/mem.h" +#include "libavutil/avassert.h" static pthread_mutex_t glslc_mutex = PTHREAD_MUTEX_INITIALIZER; static int glslc_refcount = 0; @@ -176,11 +177,13 @@ static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx, av_assert0(glslc_refcount); + *opaque = NULL; + if (!(glslc_shader = glslang_shader_create(&glslc_input))) return AVERROR(ENOMEM); if (!glslang_shader_preprocess(glslc_shader, &glslc_input)) { - ff_vk_print_shader(avctx, shd, AV_LOG_WARNING); + ff_vk_shader_print(avctx, shd, AV_LOG_WARNING); av_log(avctx, AV_LOG_ERROR, "Unable to preprocess shader: %s (%s)!\n", glslang_shader_get_info_log(glslc_shader), glslang_shader_get_info_debug_log(glslc_shader)); @@ -189,7 +192,7 @@ static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx, } if (!glslang_shader_parse(glslc_shader, &glslc_input)) { - ff_vk_print_shader(avctx, shd, AV_LOG_WARNING); + ff_vk_shader_print(avctx, shd, AV_LOG_WARNING); av_log(avctx, AV_LOG_ERROR, "Unable to parse shader: %s (%s)!\n", glslang_shader_get_info_log(glslc_shader), glslang_shader_get_info_debug_log(glslc_shader)); @@ -206,7 +209,7 @@ static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx, if (!glslang_program_link(glslc_program, GLSLANG_MSG_SPV_RULES_BIT | GLSLANG_MSG_VULKAN_RULES_BIT)) { - ff_vk_print_shader(avctx, shd, AV_LOG_WARNING); + ff_vk_shader_print(avctx, shd, AV_LOG_WARNING); av_log(avctx, AV_LOG_ERROR, "Unable to link shader: %s (%s)!\n", glslang_program_get_info_log(glslc_program), glslang_program_get_info_debug_log(glslc_program)); @@ -219,10 +222,10 @@ static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx, messages = glslang_program_SPIRV_get_messages(glslc_program); if (messages) { - ff_vk_print_shader(avctx, shd, AV_LOG_WARNING); + ff_vk_shader_print(avctx, shd, AV_LOG_WARNING); av_log(avctx, AV_LOG_WARNING, "%s\n", messages); } else { - ff_vk_print_shader(avctx, shd, AV_LOG_VERBOSE); + ff_vk_shader_print(avctx, shd, AV_LOG_VERBOSE); } glslang_shader_delete(glslc_shader); @@ -257,7 +260,7 @@ static void glslc_uninit(FFVkSPIRVCompiler **ctx) av_freep(ctx); } -static FFVkSPIRVCompiler *ff_vk_glslang_init(void) +FFVkSPIRVCompiler *ff_vk_glslang_init(void) { FFVkSPIRVCompiler *ret = av_mallocz(sizeof(*ret)); if (!ret) diff --git a/libavutil/vulkan_shaderc.c b/libavfilter/vulkan_shaderc.c similarity index 96% rename from libavutil/vulkan_shaderc.c rename to libavfilter/vulkan_shaderc.c index bd40edf187671..38be1030ad20d 100644 --- a/libavutil/vulkan_shaderc.c +++ b/libavfilter/vulkan_shaderc.c @@ -18,7 +18,8 @@ #include -#include "mem.h" +#include "libavutil/mem.h" +#include "vulkan_spirv.h" static int shdc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx, FFVkSPIRVShader *shd, uint8_t **data, @@ -43,6 +44,7 @@ static int shdc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx, }; shaderc_compile_options_t opts = shaderc_compile_options_initialize(); + *opaque = NULL; if (!opts) return AVERROR(ENOMEM); @@ -65,7 +67,7 @@ static int shdc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx, loglevel = err ? AV_LOG_ERROR : warn ? AV_LOG_WARNING : AV_LOG_VERBOSE; - ff_vk_print_shader(avctx, shd, loglevel); + ff_vk_shader_print(avctx, shd, loglevel); if (message && (err || warn)) av_log(avctx, loglevel, "%s\n", message); status = ret < FF_ARRAY_ELEMS(shdc_result) ? shdc_result[ret] : "unknown"; @@ -104,7 +106,7 @@ static void shdc_uninit(FFVkSPIRVCompiler **ctx) av_freep(ctx); } -static FFVkSPIRVCompiler *ff_vk_shaderc_init(void) +FFVkSPIRVCompiler *ff_vk_shaderc_init(void) { FFVkSPIRVCompiler *ret = av_mallocz(sizeof(*ret)); if (!ret) diff --git a/libavfilter/vulkan_spirv.h b/libavfilter/vulkan_spirv.h new file mode 100644 index 0000000000000..5638cd9696a0c --- /dev/null +++ b/libavfilter/vulkan_spirv.h @@ -0,0 +1,45 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVFILTER_VULKAN_SPIRV_H +#define AVFILTER_VULKAN_SPIRV_H + +#include "libavutil/vulkan.h" + +#include "vulkan.h" +#include "config.h" + +typedef struct FFVkSPIRVCompiler { + void *priv; + int (*compile_shader)(struct FFVkSPIRVCompiler *ctx, void *avctx, + struct FFVkSPIRVShader *shd, uint8_t **data, + size_t *size, const char *entrypoint, void **opaque); + void (*free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque); + void (*uninit)(struct FFVkSPIRVCompiler **ctx); +} FFVkSPIRVCompiler; + +#if CONFIG_LIBGLSLANG +FFVkSPIRVCompiler *ff_vk_glslang_init(void); +#define ff_vk_spirv_init ff_vk_glslang_init +#endif +#if CONFIG_LIBSHADERC +FFVkSPIRVCompiler *ff_vk_shaderc_init(void); +#define ff_vk_spirv_init ff_vk_shaderc_init +#endif + +#endif /* AVFILTER_VULKAN_H */ From 400e08d7abee3468289347bb39ee195221c623e1 Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 17 Feb 2023 03:11:19 +0100 Subject: [PATCH 69/98] avgblur_vulkan: port for the rewrite --- libavfilter/vf_avgblur_vulkan.c | 365 +++++++++--------------------- libavfilter/vf_chromaber_vulkan.c | 2 +- 2 files changed, 102 insertions(+), 265 deletions(-) diff --git a/libavfilter/vf_avgblur_vulkan.c b/libavfilter/vf_avgblur_vulkan.c index 6a54d158ce1fe..4873824c70476 100644 --- a/libavfilter/vf_avgblur_vulkan.c +++ b/libavfilter/vf_avgblur_vulkan.c @@ -1,4 +1,6 @@ /* + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -19,23 +21,24 @@ #include "libavutil/random_seed.h" #include "libavutil/opt.h" #include "vulkan_filter.h" +#include "vulkan_spirv.h" #include "internal.h" -#define CGS 32 - typedef struct AvgBlurVulkanContext { FFVulkanContext vkctx; int initialized; + FFVkExecPool e; FFVkQueueFamilyCtx qf; - FFVkExecContext *exec; - FFVulkanPipeline *pl_hor; - FFVulkanPipeline *pl_ver; + VkSampler sampler; + FFVulkanPipeline pl; + FFVkSPIRVShader shd; - /* Shader updators, must be in the main filter struct */ - VkDescriptorImageInfo input_images[3]; - VkDescriptorImageInfo tmp_images[3]; - VkDescriptorImageInfo output_images[3]; + /* Push constants / options */ + struct { + float filter_norm[4]; + int32_t filter_len[2]; + } opts; int size_x; int size_y; @@ -43,46 +46,53 @@ typedef struct AvgBlurVulkanContext { } AvgBlurVulkanContext; static const char blur_kernel[] = { - C(0, shared vec4 cache[DIR(gl_WorkGroupSize) + FILTER_RADIUS*2 + 1]; ) - C(0, ) - C(0, void distort(const ivec2 pos, const int idx) ) - C(0, { ) - C(1, const uint cp = DIR(gl_LocalInvocationID) + FILTER_RADIUS; ) - C(0, ) - C(1, cache[cp] = texture(input_img[idx], pos); ) - C(0, ) - C(1, const ivec2 loc_l = pos - INC(FILTER_RADIUS); ) - C(1, cache[cp - FILTER_RADIUS] = texture(input_img[idx], loc_l); ) - C(0, ) - C(1, const ivec2 loc_h = pos + INC(DIR(gl_WorkGroupSize)); ) - C(1, cache[cp + DIR(gl_WorkGroupSize)] = texture(input_img[idx], loc_h); ) - C(0, ) - C(1, barrier(); ) - C(0, ) - C(1, vec4 sum = vec4(0); ) - C(1, for (int p = -FILTER_RADIUS; p <= FILTER_RADIUS; p++) ) - C(2, sum += cache[cp + p]; ) - C(0, ) - C(1, sum /= vec4(FILTER_RADIUS*2 + 1); ) - C(1, imageStore(output_img[idx], pos, sum); ) - C(0, } ) + C(0, void distort(const ivec2 pos, const int idx) ) + C(0, { ) + C(1, vec4 sum = vec4(0); ) + C(1, for (int y = -filter_len.y; y <= filter_len.y; y++) ) + C(1, for (int x = -filter_len.x; x <= filter_len.x; x++) ) + C(2, sum += texture(input_img[idx], pos + ivec2(x, y)); ) + C(0, ) + C(1, imageStore(output_img[idx], pos, sum * filter_norm); ) + C(0, } ) }; static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) { int err; - FFVkSPIRVShader *shd; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; AvgBlurVulkanContext *s = ctx->priv; FFVulkanContext *vkctx = &s->vkctx; const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + FFVkSPIRVShader *shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } - FFVulkanDescriptorSetBinding desc_i[2] = { + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_LINEAR)); + RET(ff_vk_shader_init(&s->pl, &s->shd, "avgblur_compute", + VK_SHADER_STAGE_COMPUTE_BIT, 0)); + shd = &s->shd; + + ff_vk_shader_set_compute_sizes(shd, 32, 1, 1); + + desc = (FFVulkanDescriptorSetBinding []) { { .name = "input_img", .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .dimensions = 2, .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), }, { .name = "output_img", @@ -95,244 +105,68 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) }, }; - ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0); + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0)); - desc_i[0].sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_LINEAR); - if (!desc_i[0].sampler) - return AVERROR_EXTERNAL; + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, vec4 filter_norm; ); + GLSLC(1, ivec2 filter_len; ); + GLSLC(0, }; ); + GLSLC(0, ); - { /* Create shader for the horizontal pass */ - desc_i[0].updater = s->input_images; - desc_i[1].updater = s->tmp_images; - - s->pl_hor = ff_vk_create_pipeline(vkctx, &s->qf); - if (!s->pl_hor) - return AVERROR(ENOMEM); - - shd = ff_vk_init_shader(s->pl_hor, "avgblur_compute_hor", - VK_SHADER_STAGE_COMPUTE_BIT); - if (!shd) - return AVERROR(ENOMEM); - - ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 }); - - RET(ff_vk_add_descriptor_set(vkctx, s->pl_hor, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0)); - - GLSLF(0, #define FILTER_RADIUS (%i) ,s->size_x - 1); - GLSLC(0, #define INC(x) (ivec2(x, 0)) ); - GLSLC(0, #define DIR(var) (var.x) ); - GLSLD( blur_kernel ); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 size; ); - GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - for (int i = 0; i < planes; i++) { - GLSLC(0, ); - GLSLF(1, size = imageSize(output_img[%i]); ,i); - GLSLC(1, if (IS_WITHIN(pos, size)) { ); - if (s->planes & (1 << i)) { - GLSLF(2, distort(pos, %i); ,i); - } else { - GLSLF(2, vec4 res = texture(input_img[%i], pos); ,i); - GLSLF(2, imageStore(output_img[%i], pos, res); ,i); - } - GLSLC(1, } ); - } - GLSLC(0, } ); - - RET(ff_vk_compile_shader(vkctx, shd, "main")); - - RET(ff_vk_init_pipeline_layout(vkctx, s->pl_hor)); - RET(ff_vk_init_compute_pipeline(vkctx, s->pl_hor)); - } + ff_vk_add_push_constant(&s->pl, 0, sizeof(s->opts), + VK_SHADER_STAGE_COMPUTE_BIT); - { /* Create shader for the vertical pass */ - desc_i[0].updater = s->tmp_images; - desc_i[1].updater = s->output_images; - - s->pl_ver = ff_vk_create_pipeline(vkctx, &s->qf); - if (!s->pl_ver) - return AVERROR(ENOMEM); - - shd = ff_vk_init_shader(s->pl_ver, "avgblur_compute_ver", - VK_SHADER_STAGE_COMPUTE_BIT); - if (!shd) - return AVERROR(ENOMEM); - - ff_vk_set_compute_shader_sizes(shd, (int [3]){ 1, CGS, 1 }); - - RET(ff_vk_add_descriptor_set(vkctx, s->pl_ver, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0)); - - GLSLF(0, #define FILTER_RADIUS (%i) ,s->size_y - 1); - GLSLC(0, #define INC(x) (ivec2(0, x)) ); - GLSLC(0, #define DIR(var) (var.y) ); - GLSLD( blur_kernel ); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 size; ); - GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - for (int i = 0; i < planes; i++) { - GLSLC(0, ); - GLSLF(1, size = imageSize(output_img[%i]); ,i); - GLSLC(1, if (IS_WITHIN(pos, size)) { ); - if (s->planes & (1 << i)) { - GLSLF(2, distort(pos, %i); ,i); - } else { - GLSLF(2, vec4 res = texture(input_img[%i], pos); ,i); - GLSLF(2, imageStore(output_img[%i], pos, res); ,i); - } - GLSLC(1, } ); + GLSLD( blur_kernel ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + for (int i = 0; i < planes; i++) { + GLSLC(0, ); + GLSLF(1, size = imageSize(output_img[%i]); ,i); + GLSLC(1, if (!IS_WITHIN(pos, size)) ); + GLSLC(2, return; ); + if (s->planes & (1 << i)) { + GLSLF(1, distort(pos, %i); ,i); + } else { + GLSLF(1, vec4 res = texture(input_img[%i], pos); ,i); + GLSLF(1, imageStore(output_img[%i], pos, res); ,i); } - GLSLC(0, } ); - - RET(ff_vk_compile_shader(vkctx, shd, "main")); - - RET(ff_vk_init_pipeline_layout(vkctx, s->pl_ver)); - RET(ff_vk_init_compute_pipeline(vkctx, s->pl_ver)); } + GLSLC(0, } ); + + RET(spv->compile_shader(spv, ctx, &s->shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(vkctx, &s->shd, spv_data, spv_len, "main")); - /* Execution context */ - RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf)); + RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, &s->shd)); + RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl)); s->initialized = 1; + s->opts.filter_len[0] = s->size_x - 1; + s->opts.filter_len[1] = s->size_y - 1; + + s->opts.filter_norm[0] = s->opts.filter_len[0]*2 + 1; + s->opts.filter_norm[0] = 1.0/(s->opts.filter_norm[0]*s->opts.filter_norm[0]); + s->opts.filter_norm[1] = s->opts.filter_norm[0]; + s->opts.filter_norm[2] = s->opts.filter_norm[0]; + s->opts.filter_norm[3] = s->opts.filter_norm[0]; return 0; fail: - return err; -} - -static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *tmp_f, AVFrame *in_f) -{ - int err; - VkCommandBuffer cmd_buf; - AvgBlurVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &vkctx->vkfn; - AVVkFrame *in = (AVVkFrame *)in_f->data[0]; - AVVkFrame *tmp = (AVVkFrame *)tmp_f->data[0]; - AVVkFrame *out = (AVVkFrame *)out_f->data[0]; - - const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format); - const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format); - - int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - - /* Update descriptors and init the exec context */ - ff_vk_start_exec_recording(vkctx, s->exec); - cmd_buf = ff_vk_get_exec_buf(s->exec); - - for (int i = 0; i < planes; i++) { - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->input_images[i].imageView, in->img[i], - input_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->tmp_images[i].imageView, tmp->img[i], - output_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->output_images[i].imageView, out->img[i], - output_formats[i], - ff_comp_identity_map)); - - s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->tmp_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - } - - ff_vk_update_descriptor_set(vkctx, s->pl_hor, 0); - ff_vk_update_descriptor_set(vkctx, s->pl_ver, 0); - - for (int i = 0; i < planes; i++) { - VkImageMemoryBarrier bar[] = { - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = in->layout[i], - .newLayout = s->input_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = in->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT, - .oldLayout = tmp->layout[i], - .newLayout = s->tmp_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = tmp->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .oldLayout = out->layout[i], - .newLayout = s->output_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = out->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - }; - - vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, - 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar); - - in->layout[i] = bar[0].newLayout; - in->access[i] = bar[0].dstAccessMask; - - tmp->layout[i] = bar[1].newLayout; - tmp->access[i] = bar[1].dstAccessMask; - - out->layout[i] = bar[2].newLayout; - out->access[i] = bar[2].dstAccessMask; - } - - ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl_hor); - - vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS, - s->vkctx.output_height, 1); - - ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl_ver); - - vk->CmdDispatch(cmd_buf, s->vkctx.output_width, - FFALIGN(s->vkctx.output_height, CGS)/CGS, 1); - - ff_vk_add_exec_dep(vkctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(vkctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - - err = ff_vk_submit_exec_queue(vkctx,s->exec); - if (err) - return err; - - ff_vk_qf_rotate(&s->qf); + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); return err; - -fail: - ff_vk_discard_exec_deps(s->exec); - return err; } static int avgblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) { int err; - AVFrame *tmp = NULL, *out = NULL; + AVFrame *out = NULL; AVFilterContext *ctx = link->dst; AvgBlurVulkanContext *s = ctx->priv; AVFilterLink *outlink = ctx->outputs[0]; @@ -343,29 +177,22 @@ static int avgblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) goto fail; } - tmp = ff_get_video_buffer(outlink, outlink->w, outlink->h); - if (!tmp) { - err = AVERROR(ENOMEM); - goto fail; - } - if (!s->initialized) RET(init_filter(ctx, in)); - RET(process_frames(ctx, out, tmp, in)); + RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, + out, in, s->sampler, &s->opts, sizeof(s->opts))); err = av_frame_copy_props(out, in); if (err < 0) goto fail; av_frame_free(&in); - av_frame_free(&tmp); return ff_filter_frame(outlink, out); fail: av_frame_free(&in); - av_frame_free(&tmp); av_frame_free(&out); return err; } @@ -373,6 +200,16 @@ static int avgblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) static void avgblur_vulkan_uninit(AVFilterContext *avctx) { AvgBlurVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl); + ff_vk_shader_free(vkctx, &s->shd); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); ff_vk_uninit(&s->vkctx); @@ -382,9 +219,9 @@ static void avgblur_vulkan_uninit(AVFilterContext *avctx) #define OFFSET(x) offsetof(AvgBlurVulkanContext, x) #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) static const AVOption avgblur_vulkan_options[] = { - { "sizeX", "Set horizontal radius", OFFSET(size_x), AV_OPT_TYPE_INT, {.i64 = 3}, 1, 32, .flags = FLAGS }, + { "sizeX", "Set horizontal radius", OFFSET(size_x), AV_OPT_TYPE_INT, { .i64 = 3 }, 1, 32, .flags = FLAGS }, + { "sizeY", "Set vertical radius", OFFSET(size_y), AV_OPT_TYPE_INT, { .i64 = 3 }, 1, 32, .flags = FLAGS }, { "planes", "Set planes to filter (bitmask)", OFFSET(planes), AV_OPT_TYPE_INT, {.i64 = 0xF}, 0, 0xF, .flags = FLAGS }, - { "sizeY", "Set vertical radius", OFFSET(size_y), AV_OPT_TYPE_INT, {.i64 = 3}, 1, 32, .flags = FLAGS }, { NULL }, }; diff --git a/libavfilter/vf_chromaber_vulkan.c b/libavfilter/vf_chromaber_vulkan.c index 62b99cc4d9190..5ebdaf4e434a0 100644 --- a/libavfilter/vf_chromaber_vulkan.c +++ b/libavfilter/vf_chromaber_vulkan.c @@ -59,7 +59,7 @@ static const char distort_chroma_kernel[] = { C(0, { ) C(1, vec2 p = ((vec2(pos)/vec2(size)) - 0.5f)*2.0f; ) C(1, float d = sqrt(p.x*p.x + p.y*p.y); ) - C(1, p *= d / (d* dist); ) + C(1, p *= d / (d*dist); ) C(1, vec4 res = texture(input_img[idx], (p/2.0f) + 0.5f); ) C(1, imageStore(output_img[idx], pos, res); ) C(0, } ) From 30a310fab50de5b0fba59709ec2c55d40cc1f94a Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 17 Feb 2023 03:11:43 +0100 Subject: [PATCH 70/98] blend_vulkan: port for the rewrite --- libavfilter/vf_blend_vulkan.c | 316 +++++++++++----------------------- 1 file changed, 103 insertions(+), 213 deletions(-) diff --git a/libavfilter/vf_blend_vulkan.c b/libavfilter/vf_blend_vulkan.c index 4cee688a22fcf..170992c3ef1ff 100644 --- a/libavfilter/vf_blend_vulkan.c +++ b/libavfilter/vf_blend_vulkan.c @@ -1,5 +1,7 @@ /* * copyright (c) 2021-2022 Wu Jianhua + * Copyright (c) Lynne + * * The blend modes are based on the blend.c. * * This file is part of FFmpeg. @@ -22,12 +24,11 @@ #include "libavutil/random_seed.h" #include "libavutil/opt.h" #include "vulkan_filter.h" +#include "vulkan_spirv.h" #include "internal.h" #include "framesync.h" #include "blend.h" -#define CGS 32 - #define IN_TOP 0 #define IN_BOTTOM 1 @@ -40,20 +41,18 @@ typedef struct FilterParamsVulkan { typedef struct BlendVulkanContext { FFVulkanContext vkctx; - FFVkQueueFamilyCtx qf; - FFVkExecContext *exec; - FFVulkanPipeline *pl; FFFrameSync fs; - VkDescriptorImageInfo top_images[3]; - VkDescriptorImageInfo bottom_images[3]; - VkDescriptorImageInfo output_images[3]; + int initialized; + FFVulkanPipeline pl; + FFVkExecPool e; + FFVkQueueFamilyCtx qf; + FFVkSPIRVShader shd; + VkSampler sampler; FilterParamsVulkan params[4]; double all_opacity; enum BlendMode all_mode; - - int initialized; } BlendVulkanContext; #define DEFINE_BLEND_MODE(MODE, EXPR) \ @@ -125,223 +124,103 @@ static int process_command(AVFilterContext *ctx, const char *cmd, const char *ar static av_cold int init_filter(AVFilterContext *avctx) { int err = 0; - FFVkSampler *sampler; - FFVkSPIRVShader *shd; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; BlendVulkanContext *s = avctx->priv; FFVulkanContext *vkctx = &s->vkctx; const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + FFVkSPIRVShader *shd = &s->shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; - ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0); - - sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_LINEAR); - if (!sampler) + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); return AVERROR_EXTERNAL; - - s->pl = ff_vk_create_pipeline(vkctx, &s->qf); - if (!s->pl) - return AVERROR(ENOMEM); - - { - FFVulkanDescriptorSetBinding image_descs[] = { - { - .name = "top_images", - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->top_images, - .sampler = sampler, - }, - { - .name = "bottom_images", - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->bottom_images, - .sampler = sampler, - }, - { - .name = "output_images", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), - .mem_quali = "writeonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->output_images, - }, - }; - - shd = ff_vk_init_shader(s->pl, "blend_compute", image_descs[0].stages); - if (!shd) - return AVERROR(ENOMEM); - - ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, CGS, 1 }); - RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0)); - - for (int i = 0, j = 0; i < planes; i++) { - for (j = 0; j < i; j++) - if (s->params[i].blend_func == s->params[j].blend_func) - break; - /* note: the bracket is needed, for GLSLD is a macro with multiple statements. */ - if (j == i) { - GLSLD(s->params[i].blend_func); - } - } - - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 size; ); - GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - for (int i = 0; i < planes; i++) { - GLSLC(0, ); - GLSLF(1, size = imageSize(output_images[%i]); ,i); - GLSLC(1, if (IS_WITHIN(pos, size)) { ); - GLSLF(2, const vec4 top = texture(top_images[%i], pos); ,i); - GLSLF(2, const vec4 bottom = texture(bottom_images[%i], pos); ,i); - GLSLF(2, const float opacity = %f; ,s->params[i].opacity); - GLSLF(2, vec4 dst = %s(top, bottom, opacity); ,s->params[i].blend); - GLSLC(0, ); - GLSLF(2, imageStore(output_images[%i], pos, dst); ,i); - GLSLC(1, } ); - } - GLSLC(0, } ); - - RET(ff_vk_compile_shader(vkctx, shd, "main")); - RET(ff_vk_init_pipeline_layout(vkctx, s->pl)); - RET(ff_vk_init_compute_pipeline(vkctx, s->pl)); } - RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf)); - - s->initialized = 1; - -fail: - return err; -} - -static int process_frames(AVFilterContext *avctx, AVFrame *out_frame, AVFrame *top_frame, AVFrame *bottom_frame) -{ - int err = 0; - VkCommandBuffer cmd_buf; - BlendVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &s->vkctx.vkfn; - const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - - AVVkFrame *out = (AVVkFrame *)out_frame->data[0]; - AVVkFrame *top = (AVVkFrame *)top_frame->data[0]; - AVVkFrame *bottom = (AVVkFrame *)bottom_frame->data[0]; - - AVHWFramesContext *top_fc = (AVHWFramesContext*)top_frame->hw_frames_ctx->data; - AVHWFramesContext *bottom_fc = (AVHWFramesContext*)bottom_frame->hw_frames_ctx->data; - - const VkFormat *top_formats = av_vkfmt_from_pixfmt(top_fc->sw_format); - const VkFormat *bottom_formats = av_vkfmt_from_pixfmt(bottom_fc->sw_format); - const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format); - - ff_vk_start_exec_recording(vkctx, s->exec); - cmd_buf = ff_vk_get_exec_buf(s->exec); - - for (int i = 0; i < planes; i++) { - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->top_images[i].imageView, top->img[i], - top_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->bottom_images[i].imageView, bottom->img[i], - bottom_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->output_images[i].imageView, out->img[i], - output_formats[i], - ff_comp_identity_map)); - - s->top_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->bottom_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST)); + RET(ff_vk_shader_init(&s->pl, &s->shd, "blend_compute", + VK_SHADER_STAGE_COMPUTE_BIT, 0)); + + ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1); + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "top_images", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "bottom_images", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "output_images", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 3, 0, 0)); + + for (int i = 0, j = 0; i < planes; i++) { + for (j = 0; j < i; j++) + if (s->params[i].blend_func == s->params[j].blend_func) + break; + /* note: the bracket is needed, for GLSLD is a macro with multiple statements. */ + if (j == i) { + GLSLD(s->params[i].blend_func); + } } - ff_vk_update_descriptor_set(vkctx, s->pl, 0); - + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); for (int i = 0; i < planes; i++) { - VkImageMemoryBarrier barriers[] = { - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = top->layout[i], - .newLayout = s->top_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = top->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = bottom->layout[i], - .newLayout = s->bottom_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = bottom->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .oldLayout = out->layout[i], - .newLayout = s->output_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = out->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - }; - - vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, - 0, NULL, 0, NULL, FF_ARRAY_ELEMS(barriers), barriers); - - top->layout[i] = barriers[0].newLayout; - top->access[i] = barriers[0].dstAccessMask; - - bottom->layout[i] = barriers[1].newLayout; - bottom->access[i] = barriers[1].dstAccessMask; - - out->layout[i] = barriers[2].newLayout; - out->access[i] = barriers[2].dstAccessMask; + GLSLC(0, ); + GLSLF(1, size = imageSize(output_images[%i]); ,i); + GLSLC(1, if (IS_WITHIN(pos, size)) { ); + GLSLF(2, const vec4 top = texture(top_images[%i], pos); ,i); + GLSLF(2, const vec4 bottom = texture(bottom_images[%i], pos); ,i); + GLSLF(2, const float opacity = %f; ,s->params[i].opacity); + GLSLF(2, vec4 dst = %s(top, bottom, opacity); ,s->params[i].blend); + GLSLC(0, ); + GLSLF(2, imageStore(output_images[%i], pos, dst); ,i); + GLSLC(1, } ); } + GLSLC(0, } ); - ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl); - vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS) / CGS, - FFALIGN(s->vkctx.output_height, CGS) / CGS, 1); - - ff_vk_add_exec_dep(vkctx, s->exec, top_frame, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(vkctx, s->exec, bottom_frame, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(vkctx, s->exec, out_frame, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + RET(spv->compile_shader(spv, avctx, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); - err = ff_vk_submit_exec_queue(vkctx, s->exec); - if (err) - return err; + RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd)); + RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl)); - ff_vk_qf_rotate(&s->qf); - - return 0; + s->initialized = 1; fail: - ff_vk_discard_exec_deps(s->exec); + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); + return err; } @@ -375,7 +254,9 @@ static int blend_frame(FFFrameSync *fs) RET(init_filter(avctx)); } - RET(process_frames(avctx, out, top, bottom)); + RET(ff_vk_filter_process_Nin(&s->vkctx, &s->e, &s->pl, + out, (AVFrame *[]){ top, bottom }, 2, + s->sampler, NULL, 0)); return ff_filter_frame(outlink, out); @@ -396,10 +277,19 @@ static av_cold int init(AVFilterContext *avctx) static av_cold void uninit(AVFilterContext *avctx) { BlendVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; - ff_framesync_uninit(&s->fs); + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl); + ff_vk_shader_free(vkctx, &s->shd); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); ff_vk_uninit(&s->vkctx); + ff_framesync_uninit(&s->fs); s->initialized = 0; } From c0a9c53abe25b74d184a48e19632f5db8c2ffff1 Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 17 Feb 2023 03:11:53 +0100 Subject: [PATCH 71/98] chromaber_vulkan: port for the rewrite --- libavfilter/vf_chromaber_vulkan.c | 285 ++++++++++-------------------- 1 file changed, 96 insertions(+), 189 deletions(-) diff --git a/libavfilter/vf_chromaber_vulkan.c b/libavfilter/vf_chromaber_vulkan.c index 5ebdaf4e434a0..dcce64304efa2 100644 --- a/libavfilter/vf_chromaber_vulkan.c +++ b/libavfilter/vf_chromaber_vulkan.c @@ -1,4 +1,6 @@ /* + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -19,21 +21,18 @@ #include "libavutil/random_seed.h" #include "libavutil/opt.h" #include "vulkan_filter.h" +#include "vulkan_spirv.h" #include "internal.h" -#define CGROUPS (int [3]){ 32, 32, 1 } - typedef struct ChromaticAberrationVulkanContext { FFVulkanContext vkctx; int initialized; + FFVulkanPipeline pl; + FFVkExecPool e; FFVkQueueFamilyCtx qf; - FFVkExecContext *exec; - FFVulkanPipeline *pl; - - /* Shader updators, must be in the main filter struct */ - VkDescriptorImageInfo input_images[3]; - VkDescriptorImageInfo output_images[3]; + FFVkSPIRVShader shd; + VkSampler sampler; /* Push constants / options */ struct { @@ -68,205 +67,102 @@ static const char distort_chroma_kernel[] = { static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) { int err; - FFVkSampler *sampler; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; ChromaticAberrationVulkanContext *s = ctx->priv; FFVulkanContext *vkctx = &s->vkctx; const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - - ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0); - - /* Create a sampler */ - sampler = ff_vk_init_sampler(vkctx, 0, VK_FILTER_LINEAR); - if (!sampler) - return AVERROR_EXTERNAL; - - s->pl = ff_vk_create_pipeline(vkctx, &s->qf); - if (!s->pl) - return AVERROR(ENOMEM); + FFVkSPIRVShader *shd = &s->shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; /* Normalize options */ s->opts.dist[0] = (s->opts.dist[0] / 100.0f) + 1.0f; s->opts.dist[1] = (s->opts.dist[1] / 100.0f) + 1.0f; - { /* Create the shader */ - FFVulkanDescriptorSetBinding desc_i[2] = { - { - .name = "input_img", - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->input_images, - .sampler = sampler, - }, - { - .name = "output_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), - .mem_quali = "writeonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->output_images, - }, - }; - - FFVkSPIRVShader *shd = ff_vk_init_shader(s->pl, "chromaber_compute", - VK_SHADER_STAGE_COMPUTE_BIT); - if (!shd) - return AVERROR(ENOMEM); - - ff_vk_set_compute_shader_sizes(shd, CGROUPS); - - GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); - GLSLC(1, vec2 dist; ); - GLSLC(0, }; ); - GLSLC(0, ); - - ff_vk_add_push_constant(s->pl, 0, sizeof(s->opts), - VK_SHADER_STAGE_COMPUTE_BIT); - - RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0)); /* set 0 */ - - GLSLD( distort_chroma_kernel ); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - if (planes == 1) { - GLSLC(1, distort_rgb(imageSize(output_img[0]), pos); ); - } else { - GLSLC(1, ivec2 size = imageSize(output_img[0]); ); - GLSLC(1, vec2 npos = vec2(pos)/vec2(size); ); - GLSLC(1, vec4 res = texture(input_img[0], npos); ); - GLSLC(1, imageStore(output_img[0], pos, res); ); - for (int i = 1; i < planes; i++) { - GLSLC(0, ); - GLSLF(1, size = imageSize(output_img[%i]); ,i); - GLSLC(1, if (IS_WITHIN(pos, size)) { ); - GLSLF(2, distort_chroma(%i, size, pos); ,i); - GLSLC(1, } else { ); - GLSLC(2, npos = vec2(pos)/vec2(size); ); - GLSLF(2, res = texture(input_img[%i], npos); ,i); - GLSLF(2, imageStore(output_img[%i], pos, res); ,i); - GLSLC(1, } ); - } - } - GLSLC(0, } ); + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } - RET(ff_vk_compile_shader(vkctx, shd, "main")); + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 0, VK_FILTER_LINEAR)); + RET(ff_vk_shader_init(&s->pl, &s->shd, "chromaber_compute", + VK_SHADER_STAGE_COMPUTE_BIT, 0)); + + ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1); + + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, vec2 dist; ); + GLSLC(0, }; ); + GLSLC(0, ); + + ff_vk_add_push_constant(&s->pl, 0, sizeof(s->opts), + VK_SHADER_STAGE_COMPUTE_BIT); + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "input_img", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0)); + + GLSLD( distort_chroma_kernel ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + if (planes == 1) { + GLSLC(1, distort_rgb(imageSize(output_img[0]), pos); ); + } else { + GLSLC(1, ivec2 size = imageSize(output_img[0]); ); + GLSLC(1, vec2 npos = vec2(pos)/vec2(size); ); + GLSLC(1, vec4 res = texture(input_img[0], npos); ); + GLSLC(1, imageStore(output_img[0], pos, res); ); + for (int i = 1; i < planes; i++) { + GLSLC(0, ); + GLSLF(1, size = imageSize(output_img[%i]); ,i); + GLSLC(1, if (!IS_WITHIN(pos, size)) ); + GLSLC(2, return; ); + GLSLF(1, distort_chroma(%i, size, pos); ,i); + } } + GLSLC(0, } ); - RET(ff_vk_init_pipeline_layout(vkctx, s->pl)); - RET(ff_vk_init_compute_pipeline(vkctx, s->pl)); + RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); - /* Execution context */ - RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf)); + RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd)); + RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl)); s->initialized = 1; return 0; fail: - return err; -} - -static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f) -{ - int err = 0; - VkCommandBuffer cmd_buf; - ChromaticAberrationVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &vkctx->vkfn; - AVVkFrame *in = (AVVkFrame *)in_f->data[0]; - AVVkFrame *out = (AVVkFrame *)out_f->data[0]; - int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format); - const VkFormat *ouput_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format); - - /* Update descriptors and init the exec context */ - ff_vk_start_exec_recording(vkctx, s->exec); - cmd_buf = ff_vk_get_exec_buf(s->exec); - - for (int i = 0; i < planes; i++) { - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->input_images[i].imageView, in->img[i], - input_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->output_images[i].imageView, out->img[i], - ouput_formats[i], - ff_comp_identity_map)); - - s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - } - - ff_vk_update_descriptor_set(vkctx, s->pl, 0); - - for (int i = 0; i < planes; i++) { - VkImageMemoryBarrier bar[2] = { - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = in->layout[i], - .newLayout = s->input_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = in->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .oldLayout = out->layout[i], - .newLayout = s->output_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = out->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - }; - - vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, - 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar); - - in->layout[i] = bar[0].newLayout; - in->access[i] = bar[0].dstAccessMask; - - out->layout[i] = bar[1].newLayout; - out->access[i] = bar[1].dstAccessMask; - } - - ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl); - - ff_vk_update_push_exec(vkctx, s->exec, VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(s->opts), &s->opts); - - vk->CmdDispatch(cmd_buf, - FFALIGN(s->vkctx.output_width, CGROUPS[0])/CGROUPS[0], - FFALIGN(s->vkctx.output_height, CGROUPS[1])/CGROUPS[1], 1); - - ff_vk_add_exec_dep(vkctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(vkctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - - err = ff_vk_submit_exec_queue(vkctx, s->exec); - if (err) - return err; + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); - ff_vk_qf_rotate(&s->qf); - - return err; - -fail: - ff_vk_discard_exec_deps(s->exec); return err; } @@ -286,7 +182,8 @@ static int chromaber_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) if (!s->initialized) RET(init_filter(ctx, in)); - RET(process_frames(ctx, out, in)); + RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, out, in, + s->sampler, &s->opts, sizeof(s->opts))); err = av_frame_copy_props(out, in); if (err < 0) @@ -305,6 +202,16 @@ static int chromaber_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) static void chromaber_vulkan_uninit(AVFilterContext *avctx) { ChromaticAberrationVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl); + ff_vk_shader_free(vkctx, &s->shd); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); ff_vk_uninit(&s->vkctx); From ba23cb9a7fd6dfd957f36165efa7b1ec4a987bd9 Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 17 Feb 2023 03:12:42 +0100 Subject: [PATCH 72/98] flip_vulkan: port for the rewrite --- libavfilter/vf_flip_vulkan.c | 230 ++++++++++++----------------------- 1 file changed, 79 insertions(+), 151 deletions(-) diff --git a/libavfilter/vf_flip_vulkan.c b/libavfilter/vf_flip_vulkan.c index 6868e39ee6321..4279dd2123ab1 100644 --- a/libavfilter/vf_flip_vulkan.c +++ b/libavfilter/vf_flip_vulkan.c @@ -1,5 +1,7 @@ /* * copyright (c) 2021 Wu Jianhua + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -20,10 +22,9 @@ #include "libavutil/random_seed.h" #include "libavutil/opt.h" #include "vulkan_filter.h" +#include "vulkan_spirv.h" #include "internal.h" -#define CGS 32 - enum FlipType { FLIP_VERTICAL, FLIP_HORIZONTAL, @@ -32,32 +33,50 @@ enum FlipType { typedef struct FlipVulkanContext { FFVulkanContext vkctx; - FFVkQueueFamilyCtx qf; - FFVkExecContext *exec; - FFVulkanPipeline *pl; - - VkDescriptorImageInfo input_images[3]; - VkDescriptorImageInfo output_images[3]; int initialized; + FFVulkanPipeline pl; + FFVkExecPool e; + FFVkQueueFamilyCtx qf; + FFVkSPIRVShader shd; + VkSampler sampler; } FlipVulkanContext; static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in, enum FlipType type) { int err = 0; - FFVkSPIRVShader *shd; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; FlipVulkanContext *s = ctx->priv; FFVulkanContext *vkctx = &s->vkctx; const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + FFVkSPIRVShader *shd = &s->shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_LINEAR)); + RET(ff_vk_shader_init(&s->pl, &s->shd, "flip_compute", + VK_SHADER_STAGE_COMPUTE_BIT, 0)); + + ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1); - FFVulkanDescriptorSetBinding image_descs[] = { + desc = (FFVulkanDescriptorSetBinding []) { { .name = "input_image", .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .dimensions = 2, .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->input_images, + .samplers = DUP_SAMPLER(s->sampler), }, { .name = "output_image", @@ -67,167 +86,75 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in, enum FlipType .dimensions = 2, .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->output_images, }, }; - image_descs[0].sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_LINEAR); - if (!image_descs[0].sampler) - return AVERROR_EXTERNAL; + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0)); - ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0); - - { - s->pl = ff_vk_create_pipeline(vkctx, &s->qf); - if (!s->pl) - return AVERROR(ENOMEM); - - shd = ff_vk_init_shader(s->pl, "flip_compute", image_descs[0].stages); - if (!shd) - return AVERROR(ENOMEM); - - ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 }); - RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0)); - - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 size; ); - GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - for (int i = 0; i < planes; i++) { - GLSLC(0, ); - GLSLF(1, size = imageSize(output_image[%i]); ,i); - GLSLC(1, if (IS_WITHIN(pos, size)) { ); - switch (type) - { - case FLIP_HORIZONTAL: - GLSLF(2, vec4 res = texture(input_image[%i], ivec2(size.x - pos.x, pos.y)); ,i); - break; - case FLIP_VERTICAL: - GLSLF(2, vec4 res = texture(input_image[%i], ivec2(pos.x, size.y - pos.y)); ,i); - break; - case FLIP_BOTH: - GLSLF(2, vec4 res = texture(input_image[%i], ivec2(size.xy - pos.xy));, i); - break; - default: - GLSLF(2, vec4 res = texture(input_image[%i], pos); ,i); - break; - } - GLSLF(2, imageStore(output_image[%i], pos, res); ,i); - GLSLC(1, } ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + for (int i = 0; i < planes; i++) { + GLSLC(0, ); + GLSLF(1, size = imageSize(output_image[%i]); ,i); + GLSLC(1, if (IS_WITHIN(pos, size)) { ); + switch (type) + { + case FLIP_HORIZONTAL: + GLSLF(2, vec4 res = texture(input_image[%i], ivec2(size.x - pos.x, pos.y)); ,i); + break; + case FLIP_VERTICAL: + GLSLF(2, vec4 res = texture(input_image[%i], ivec2(pos.x, size.y - pos.y)); ,i); + break; + case FLIP_BOTH: + GLSLF(2, vec4 res = texture(input_image[%i], ivec2(size.xy - pos.xy));, i); + break; + default: + GLSLF(2, vec4 res = texture(input_image[%i], pos); ,i); + break; } - GLSLC(0, } ); - - RET(ff_vk_compile_shader(vkctx, shd, "main")); - RET(ff_vk_init_pipeline_layout(vkctx, s->pl)); - RET(ff_vk_init_compute_pipeline(vkctx, s->pl)); + GLSLF(2, imageStore(output_image[%i], pos, res); ,i); + GLSLC(1, } ); } + GLSLC(0, } ); + + RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); + + RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd)); + RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl)); - RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf)); s->initialized = 1; fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); + return err; } static av_cold void flip_vulkan_uninit(AVFilterContext *avctx) { FlipVulkanContext *s = avctx->priv; - ff_vk_uninit(&s->vkctx); - s->initialized = 0; -} - -static int process_frames(AVFilterContext *avctx, AVFrame *outframe, AVFrame *inframe) -{ - int err = 0; - VkCommandBuffer cmd_buf; - FlipVulkanContext *s = avctx->priv; FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &s->vkctx.vkfn; - AVVkFrame *in = (AVVkFrame *)inframe->data[0]; - AVVkFrame *out = (AVVkFrame *)outframe->data[0]; - const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format); - const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format); - - ff_vk_start_exec_recording(vkctx, s->exec); - cmd_buf = ff_vk_get_exec_buf(s->exec); - - for (int i = 0; i < planes; i++) { - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->input_images[i].imageView, in->img[i], - input_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->output_images[i].imageView, out->img[i], - output_formats[i], - ff_comp_identity_map)); - - s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - } + FFVulkanFunctions *vk = &vkctx->vkfn; - ff_vk_update_descriptor_set(vkctx, s->pl, 0); + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl); + ff_vk_shader_free(vkctx, &s->shd); - for (int i = 0; i < planes; i++) { - VkImageMemoryBarrier barriers[] = { - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = in->layout[i], - .newLayout = s->input_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = in->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .oldLayout = out->layout[i], - .newLayout = s->output_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = out->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - }; - - vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, - 0, NULL, 0, NULL, FF_ARRAY_ELEMS(barriers), barriers); - - in->layout[i] = barriers[0].newLayout; - in->access[i] = barriers[0].dstAccessMask; - - out->layout[i] = barriers[1].newLayout; - out->access[i] = barriers[1].dstAccessMask; - } - - ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl); - vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS, - s->vkctx.output_height, 1); - - ff_vk_add_exec_dep(vkctx, s->exec, inframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(vkctx, s->exec, outframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - - err = ff_vk_submit_exec_queue(vkctx, s->exec); - if (err) - return err; + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); - ff_vk_qf_rotate(&s->qf); + ff_vk_uninit(&s->vkctx); - return 0; -fail: - ff_vk_discard_exec_deps(s->exec); - return err; + s->initialized = 0; } static int filter_frame(AVFilterLink *link, AVFrame *in, enum FlipType type) @@ -247,7 +174,8 @@ static int filter_frame(AVFilterLink *link, AVFrame *in, enum FlipType type) if (!s->initialized) RET(init_filter(ctx, in, type)); - RET(process_frames(ctx, out, in)); + RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, out, in, + s->sampler, NULL, 0)); RET(av_frame_copy_props(out, in)); From d8b78eb6da35f1457a53e79a5b66c68dfc98ab48 Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 17 Feb 2023 03:12:55 +0100 Subject: [PATCH 73/98] gblur_vulkan: port for the rewrite --- libavfilter/vf_gblur_vulkan.c | 328 +++++++++++----------------------- 1 file changed, 102 insertions(+), 226 deletions(-) diff --git a/libavfilter/vf_gblur_vulkan.c b/libavfilter/vf_gblur_vulkan.c index a6037e08888d7..0f0f5dff43243 100644 --- a/libavfilter/vf_gblur_vulkan.c +++ b/libavfilter/vf_gblur_vulkan.c @@ -1,5 +1,7 @@ /* * copyright (c) 2021-2022 Wu Jianhua + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -20,6 +22,7 @@ #include "libavutil/random_seed.h" #include "libavutil/opt.h" #include "vulkan_filter.h" +#include "vulkan_spirv.h" #include "internal.h" #define CGS 32 @@ -27,26 +30,23 @@ typedef struct GBlurVulkanContext { FFVulkanContext vkctx; - FFVkQueueFamilyCtx qf; - FFVkExecContext *exec; - FFVulkanPipeline *pl_hor; - FFVulkanPipeline *pl_ver; - FFVkBuffer params_buf_hor; - FFVkBuffer params_buf_ver; - - VkDescriptorImageInfo input_images[3]; - VkDescriptorImageInfo tmp_images[3]; - VkDescriptorImageInfo output_images[3]; - VkDescriptorBufferInfo params_desc_hor; - VkDescriptorBufferInfo params_desc_ver; int initialized; + FFVkExecPool e; + FFVkQueueFamilyCtx qf; + VkSampler sampler; + FFVulkanPipeline pl_hor; + FFVkSPIRVShader shd_hor; + FFVkBuffer params_hor; + FFVulkanPipeline pl_ver; + FFVkSPIRVShader shd_ver; + FFVkBuffer params_ver; + int size; int sizeV; int planes; float sigma; float sigmaV; - AVFrame *tmpframe; } GBlurVulkanContext; static const char gblur_func[] = { @@ -118,16 +118,17 @@ static av_cold void init_gaussian_params(GBlurVulkanContext *s) s->sizeV = s->size; else init_kernel_size(s, &s->sizeV); - - s->tmpframe = NULL; } -static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVkSPIRVShader *shd, - FFVkBuffer *params_buf, VkDescriptorBufferInfo *params_desc, - int ksize, float sigma) +static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, + FFVkSPIRVShader *shd, FFVkBuffer *params_buf, + int ksize, float sigma, FFVkSPIRVCompiler *spv) { int err = 0; uint8_t *kernel_mapped; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); @@ -137,7 +138,6 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVk .mem_quali = "readonly", .mem_layout = "std430", .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = NULL, .buf_content = NULL, }; @@ -145,10 +145,9 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVk if (!kernel_def) return AVERROR(ENOMEM); - buf_desc.updater = params_desc; buf_desc.buf_content = kernel_def; - RET(ff_vk_add_descriptor_set(&s->vkctx, pl, shd, &buf_desc, 1, 0)); + RET(ff_vk_pipeline_descriptor_set_add(&s->vkctx, pl, shd, &buf_desc, 1, 1, 0)); GLSLD( gblur_func ); GLSLC(0, void main() ); @@ -157,38 +156,43 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVk GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); for (int i = 0; i < planes; i++) { GLSLC(0, ); - GLSLF(1, size = imageSize(output_images[%i]); ,i); - GLSLC(1, if (IS_WITHIN(pos, size)) { ); + GLSLF(1, size = imageSize(output_images[%i]); ,i); + GLSLC(1, if (!IS_WITHIN(pos, size)) ); + GLSLC(2, return; ); if (s->planes & (1 << i)) { - GLSLF(2, gblur(pos, %i); ,i); + GLSLF(1, gblur(pos, %i); ,i); } else { - GLSLF(2, vec4 res = texture(input_images[%i], pos); ,i); - GLSLF(2, imageStore(output_images[%i], pos, res); ,i); + GLSLF(1, vec4 res = texture(input_images[%i], pos); ,i); + GLSLF(1, imageStore(output_images[%i], pos, res); ,i); } - GLSLC(1, } ); } GLSLC(0, } ); - RET(ff_vk_compile_shader(&s->vkctx, shd, "main")); + RET(spv->compile_shader(spv, s, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(&s->vkctx, shd, spv_data, spv_len, "main")); - RET(ff_vk_init_pipeline_layout(&s->vkctx, pl)); - RET(ff_vk_init_compute_pipeline(&s->vkctx, pl)); + RET(ff_vk_init_compute_pipeline(&s->vkctx, pl, shd)); + RET(ff_vk_exec_pipeline_register(&s->vkctx, &s->e, pl)); RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize, NULL, NULL, - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); - RET(ff_vk_map_buffers(&s->vkctx, params_buf, &kernel_mapped, 1, 0)); + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); + RET(ff_vk_map_buffer(&s->vkctx, params_buf, &kernel_mapped, 0)); init_gaussian_kernel((float *)kernel_mapped, sigma, ksize); - RET(ff_vk_unmap_buffers(&s->vkctx, params_buf, 1, 1)); - - params_desc->buffer = params_buf->buf; - params_desc->range = VK_WHOLE_SIZE; + RET(ff_vk_unmap_buffer(&s->vkctx, params_buf, 1)); - ff_vk_update_descriptor_set(&s->vkctx, pl, 1); + RET(ff_vk_set_descriptor_buffer(&s->vkctx, pl, NULL, 1, 0, 0, + params_buf->address, params_buf->size, + VK_FORMAT_UNDEFINED)); fail: av_free(kernel_def); + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); return err; } @@ -196,16 +200,35 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) { int err = 0; GBlurVulkanContext *s = ctx->priv; - FFVkSPIRVShader *shd; + FFVulkanContext *vkctx = &s->vkctx; const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - FFVulkanDescriptorSetBinding image_descs[] = { + FFVkSPIRVShader *shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_LINEAR)); + RET(ff_vk_shader_init(&s->pl_hor, &s->shd_hor, "gblur_hor_compute", + VK_SHADER_STAGE_COMPUTE_BIT, 0)); + RET(ff_vk_shader_init(&s->pl_ver, &s->shd_ver, "gblur_ver_compute", + VK_SHADER_STAGE_COMPUTE_BIT, 0)); + + desc = (FFVulkanDescriptorSetBinding []) { { .name = "input_images", .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .dimensions = 2, .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), }, { .name = "output_images", @@ -218,215 +241,64 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) }, }; - image_descs[0].sampler = ff_vk_init_sampler(&s->vkctx, 1, VK_FILTER_LINEAR); - if (!image_descs[0].sampler) - return AVERROR_EXTERNAL; - init_gaussian_params(s); - ff_vk_qf_init(&s->vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0); - { - /* Create shader for the horizontal pass */ - image_descs[0].updater = s->input_images; - image_descs[1].updater = s->tmp_images; - - s->pl_hor = ff_vk_create_pipeline(&s->vkctx, &s->qf); - if (!s->pl_hor) { - err = AVERROR(ENOMEM); - goto fail; - } - - shd = ff_vk_init_shader(s->pl_hor, "gblur_compute_hor", image_descs[0].stages); - if (!shd) { - err = AVERROR(ENOMEM); - goto fail; - } + shd = &s->shd_hor; + ff_vk_shader_set_compute_sizes(shd, 32, 1, 1); - ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 }); - RET(ff_vk_add_descriptor_set(&s->vkctx, s->pl_hor, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0)); + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl_hor, shd, desc, 2, 0, 0)); GLSLC(0, #define OFFSET (vec2(i, 0.0))); - RET(init_gblur_pipeline(s, s->pl_hor, shd, &s->params_buf_hor, &s->params_desc_hor, - s->size, s->sigma)); + RET(init_gblur_pipeline(s, &s->pl_hor, shd, &s->params_hor, s->size, s->sigma, spv)); } { - /* Create shader for the vertical pass */ - image_descs[0].updater = s->tmp_images; - image_descs[1].updater = s->output_images; - - s->pl_ver = ff_vk_create_pipeline(&s->vkctx, &s->qf); - if (!s->pl_ver) { - err = AVERROR(ENOMEM); - goto fail; - } + shd = &s->shd_ver; + ff_vk_shader_set_compute_sizes(shd, 1, 32, 1); - shd = ff_vk_init_shader(s->pl_ver, "gblur_compute_ver", image_descs[0].stages); - if (!shd) { - err = AVERROR(ENOMEM); - goto fail; - } - - ff_vk_set_compute_shader_sizes(shd, (int [3]){ 1, CGS, 1 }); - RET(ff_vk_add_descriptor_set(&s->vkctx, s->pl_ver, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0)); + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl_ver, shd, desc, 2, 0, 0)); GLSLC(0, #define OFFSET (vec2(0.0, i))); - RET(init_gblur_pipeline(s, s->pl_ver, shd, &s->params_buf_ver, &s->params_desc_ver, - s->sizeV, s->sigmaV)); + RET(init_gblur_pipeline(s, &s->pl_ver, shd, &s->params_ver, s->sizeV, s->sigmaV, spv)); } - RET(ff_vk_create_exec_ctx(&s->vkctx, &s->exec, &s->qf)); - s->initialized = 1; fail: + if (spv) + spv->uninit(&spv); + return err; } static av_cold void gblur_vulkan_uninit(AVFilterContext *avctx) { GBlurVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; - av_frame_free(&s->tmpframe); + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl_hor); + ff_vk_pipeline_free(vkctx, &s->pl_ver); + ff_vk_shader_free(vkctx, &s->shd_hor); + ff_vk_shader_free(vkctx, &s->shd_ver); + ff_vk_free_buf(vkctx, &s->params_hor); + ff_vk_free_buf(vkctx, &s->params_ver); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); - ff_vk_free_buf(&s->vkctx, &s->params_buf_hor); - ff_vk_free_buf(&s->vkctx, &s->params_buf_ver); ff_vk_uninit(&s->vkctx); s->initialized = 0; } -static int process_frames(AVFilterContext *avctx, AVFrame *outframe, AVFrame *inframe) -{ - int err; - VkCommandBuffer cmd_buf; - GBlurVulkanContext *s = avctx->priv; - FFVulkanFunctions *vk = &s->vkctx.vkfn; - - const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - - AVVkFrame *in = (AVVkFrame *)inframe->data[0]; - AVVkFrame *out = (AVVkFrame *)outframe->data[0]; - AVVkFrame *tmp = (AVVkFrame *)s->tmpframe->data[0]; - - const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format); - const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format); - - ff_vk_start_exec_recording(&s->vkctx, s->exec); - cmd_buf = ff_vk_get_exec_buf(s->exec); - - for (int i = 0; i < planes; i++) { - RET(ff_vk_create_imageview(&s->vkctx, s->exec, &s->input_images[i].imageView, - in->img[i], - input_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(&s->vkctx, s->exec, &s->tmp_images[i].imageView, - tmp->img[i], - output_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(&s->vkctx, s->exec, &s->output_images[i].imageView, - out->img[i], - output_formats[i], - ff_comp_identity_map)); - - s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->tmp_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - } - - ff_vk_update_descriptor_set(&s->vkctx, s->pl_hor, 0); - ff_vk_update_descriptor_set(&s->vkctx, s->pl_ver, 0); - - for (int i = 0; i < planes; i++) { - VkImageMemoryBarrier barriers[] = { - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = in->layout[i], - .newLayout = s->input_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = in->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT, - .oldLayout = tmp->layout[i], - .newLayout = s->tmp_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = tmp->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .oldLayout = out->layout[i], - .newLayout = s->output_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = out->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - }; - - vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, - 0, NULL, 0, NULL, FF_ARRAY_ELEMS(barriers), barriers); - - in->layout[i] = barriers[0].newLayout; - in->access[i] = barriers[0].dstAccessMask; - - tmp->layout[i] = barriers[1].newLayout; - tmp->access[i] = barriers[1].dstAccessMask; - - out->layout[i] = barriers[2].newLayout; - out->access[i] = barriers[2].dstAccessMask; - } - - ff_vk_bind_pipeline_exec(&s->vkctx, s->exec, s->pl_hor); - - vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS, - s->vkctx.output_height, 1); - - ff_vk_bind_pipeline_exec(&s->vkctx, s->exec, s->pl_ver); - - vk->CmdDispatch(cmd_buf,s->vkctx.output_width, - FFALIGN(s->vkctx.output_height, CGS)/CGS, 1); - - ff_vk_add_exec_dep(&s->vkctx, s->exec, inframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(&s->vkctx, s->exec, outframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - - err = ff_vk_submit_exec_queue(&s->vkctx, s->exec); - if (err) - return err; - - ff_vk_qf_rotate(&s->qf); - - return 0; - -fail: - ff_vk_discard_exec_deps(s->exec); - return err; -} - static int gblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) { int err; - AVFrame *out = NULL; + AVFrame *tmp = NULL, *out = NULL; AVFilterContext *ctx = link->dst; GBlurVulkanContext *s = ctx->priv; AVFilterLink *outlink = ctx->outputs[0]; @@ -437,28 +309,32 @@ static int gblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) goto fail; } - if (!s->initialized) { - RET(init_filter(ctx, in)); - s->tmpframe = ff_get_video_buffer(outlink, outlink->w, outlink->h); - if (!s->tmpframe) { - err = AVERROR(ENOMEM); - goto fail; - } + tmp = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!tmp) { + err = AVERROR(ENOMEM); + goto fail; } - RET(process_frames(ctx, out, in)); + if (!s->initialized) + RET(init_filter(ctx, in)); - RET(av_frame_copy_props(out, in)); + RET(ff_vk_filter_process_2pass(&s->vkctx, &s->e, + (FFVulkanPipeline *[2]){ &s->pl_hor, &s->pl_ver }, + out, tmp, in, s->sampler, NULL, 0)); + + err = av_frame_copy_props(out, in); + if (err < 0) + goto fail; av_frame_free(&in); + av_frame_free(&tmp); return ff_filter_frame(outlink, out); fail: av_frame_free(&in); + av_frame_free(&tmp); av_frame_free(&out); - av_frame_free(&s->tmpframe); - return err; } From c229186015c5f213039b910036138b598863de01 Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 17 Feb 2023 03:13:05 +0100 Subject: [PATCH 74/98] overlay_vulkan: port for the rewrite --- libavfilter/vf_overlay_vulkan.c | 398 ++++++++++---------------------- 1 file changed, 123 insertions(+), 275 deletions(-) diff --git a/libavfilter/vf_overlay_vulkan.c b/libavfilter/vf_overlay_vulkan.c index 7a66cf12ad6ff..a05d9155be133 100644 --- a/libavfilter/vf_overlay_vulkan.c +++ b/libavfilter/vf_overlay_vulkan.c @@ -1,4 +1,6 @@ /* + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -19,26 +21,26 @@ #include "libavutil/random_seed.h" #include "libavutil/opt.h" #include "vulkan_filter.h" +#include "vulkan_spirv.h" #include "internal.h" #include "framesync.h" -#define CGROUPS (int [3]){ 32, 32, 1 } - typedef struct OverlayVulkanContext { FFVulkanContext vkctx; + FFFrameSync fs; int initialized; + FFVulkanPipeline pl; + FFVkExecPool e; FFVkQueueFamilyCtx qf; - FFVkExecContext *exec; - FFVulkanPipeline *pl; - FFFrameSync fs; - FFVkBuffer params_buf; + FFVkSPIRVShader shd; + VkSampler sampler; - /* Shader updators, must be in the main filter struct */ - VkDescriptorImageInfo main_images[3]; - VkDescriptorImageInfo overlay_images[3]; - VkDescriptorImageInfo output_images[3]; - VkDescriptorBufferInfo params_desc; + /* Push constants / options */ + struct { + int32_t o_offset[2*3]; + int32_t o_size[2*3]; + } opts; int overlay_x; int overlay_y; @@ -80,279 +82,114 @@ static const char overlay_alpha[] = { static av_cold int init_filter(AVFilterContext *ctx) { int err; - FFVkSampler *sampler; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; OverlayVulkanContext *s = ctx->priv; FFVulkanContext *vkctx = &s->vkctx; const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - - ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0); - - sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_NEAREST); - if (!sampler) + const int ialpha = av_pix_fmt_desc_get(s->vkctx.input_format)->flags & AV_PIX_FMT_FLAG_ALPHA; + const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(s->vkctx.output_format); + FFVkSPIRVShader *shd = &s->shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); return AVERROR_EXTERNAL; - - s->pl = ff_vk_create_pipeline(vkctx, &s->qf); - if (!s->pl) - return AVERROR(ENOMEM); - - { /* Create the shader */ - const int ialpha = av_pix_fmt_desc_get(s->vkctx.input_format)->flags & AV_PIX_FMT_FLAG_ALPHA; - - FFVulkanDescriptorSetBinding desc_i[3] = { - { - .name = "main_img", - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->main_images, - .sampler = sampler, - }, - { - .name = "overlay_img", - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->overlay_images, - .sampler = sampler, - }, - { - .name = "output_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), - .mem_quali = "writeonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->output_images, - }, - }; - - FFVulkanDescriptorSetBinding desc_b = { - .name = "params", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .mem_quali = "readonly", - .mem_layout = "std430", - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = &s->params_desc, - .buf_content = "ivec2 o_offset[3], o_size[3];", - }; - - FFVkSPIRVShader *shd = ff_vk_init_shader(s->pl, "overlay_compute", - VK_SHADER_STAGE_COMPUTE_BIT); - if (!shd) - return AVERROR(ENOMEM); - - ff_vk_set_compute_shader_sizes(shd, CGROUPS); - - RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0)); /* set 0 */ - RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, &desc_b, 1, 0)); /* set 1 */ - - GLSLD( overlay_noalpha ); - GLSLD( overlay_alpha ); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - GLSLF(1, int planes = %i; ,planes); - GLSLC(1, for (int i = 0; i < planes; i++) { ); - if (ialpha) - GLSLC(2, overlay_alpha_opaque(i, pos); ); - else - GLSLC(2, overlay_noalpha(i, pos); ); - GLSLC(1, } ); - GLSLC(0, } ); - - RET(ff_vk_compile_shader(vkctx, shd, "main")); - } - - RET(ff_vk_init_pipeline_layout(vkctx, s->pl)); - RET(ff_vk_init_compute_pipeline(vkctx, s->pl)); - - { /* Create and update buffer */ - const AVPixFmtDescriptor *desc; - - /* NOTE: std430 requires the same identical struct layout, padding and - * alignment as C, so we're allowed to do this, as this will map - * exactly to what the shader recieves */ - struct { - int32_t o_offset[2*3]; - int32_t o_size[2*3]; - } *par; - - err = ff_vk_create_buf(vkctx, &s->params_buf, - sizeof(*par), NULL, - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); - if (err) - return err; - - err = ff_vk_map_buffers(vkctx, &s->params_buf, (uint8_t **)&par, 1, 0); - if (err) - return err; - - desc = av_pix_fmt_desc_get(s->vkctx.output_format); - - par->o_offset[0] = s->overlay_x; - par->o_offset[1] = s->overlay_y; - par->o_offset[2] = par->o_offset[0] >> desc->log2_chroma_w; - par->o_offset[3] = par->o_offset[1] >> desc->log2_chroma_h; - par->o_offset[4] = par->o_offset[0] >> desc->log2_chroma_w; - par->o_offset[5] = par->o_offset[1] >> desc->log2_chroma_h; - - par->o_size[0] = s->overlay_w; - par->o_size[1] = s->overlay_h; - par->o_size[2] = par->o_size[0] >> desc->log2_chroma_w; - par->o_size[3] = par->o_size[1] >> desc->log2_chroma_h; - par->o_size[4] = par->o_size[0] >> desc->log2_chroma_w; - par->o_size[5] = par->o_size[1] >> desc->log2_chroma_h; - - err = ff_vk_unmap_buffers(vkctx, &s->params_buf, 1, 1); - if (err) - return err; - - s->params_desc.buffer = s->params_buf.buf; - s->params_desc.range = VK_WHOLE_SIZE; - - ff_vk_update_descriptor_set(vkctx, s->pl, 1); } - /* Execution context */ - RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf)); + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST)); + RET(ff_vk_shader_init(&s->pl, &s->shd, "overlay_compute", + VK_SHADER_STAGE_COMPUTE_BIT, 0)); + + ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1); + + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, ivec2 o_offset[3]; ); + GLSLC(1, ivec2 o_size[3]; ); + GLSLC(0, }; ); + GLSLC(0, ); + + ff_vk_add_push_constant(&s->pl, 0, sizeof(s->opts), + VK_SHADER_STAGE_COMPUTE_BIT); + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "main_img", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "overlay_img", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 3, 0, 0)); + + GLSLD( overlay_noalpha ); + GLSLD( overlay_alpha ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + GLSLF(1, int planes = %i; ,planes); + GLSLC(1, for (int i = 0; i < planes; i++) { ); + if (ialpha) + GLSLC(2, overlay_alpha_opaque(i, pos); ); + else + GLSLC(2, overlay_noalpha(i, pos); ); + GLSLC(1, } ); + GLSLC(0, } ); + + RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); + + RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd)); + RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl)); + + s->opts.o_offset[0] = s->overlay_x; + s->opts.o_offset[1] = s->overlay_y; + s->opts.o_offset[2] = s->opts.o_offset[0] >> pix_desc->log2_chroma_w; + s->opts.o_offset[3] = s->opts.o_offset[1] >> pix_desc->log2_chroma_h; + s->opts.o_offset[4] = s->opts.o_offset[0] >> pix_desc->log2_chroma_w; + s->opts.o_offset[5] = s->opts.o_offset[1] >> pix_desc->log2_chroma_h; + + s->opts.o_size[0] = s->overlay_w; + s->opts.o_size[1] = s->overlay_h; + s->opts.o_size[2] = s->opts.o_size[0] >> pix_desc->log2_chroma_w; + s->opts.o_size[3] = s->opts.o_size[1] >> pix_desc->log2_chroma_h; + s->opts.o_size[4] = s->opts.o_size[0] >> pix_desc->log2_chroma_w; + s->opts.o_size[5] = s->opts.o_size[1] >> pix_desc->log2_chroma_h; s->initialized = 1; - return 0; - fail: - return err; -} - -static int process_frames(AVFilterContext *avctx, AVFrame *out_f, - AVFrame *main_f, AVFrame *overlay_f) -{ - int err; - VkCommandBuffer cmd_buf; - OverlayVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &vkctx->vkfn; - int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - - AVVkFrame *out = (AVVkFrame *)out_f->data[0]; - AVVkFrame *main = (AVVkFrame *)main_f->data[0]; - AVVkFrame *overlay = (AVVkFrame *)overlay_f->data[0]; - - AVHWFramesContext *main_fc = (AVHWFramesContext*)main_f->hw_frames_ctx->data; - AVHWFramesContext *overlay_fc = (AVHWFramesContext*)overlay_f->hw_frames_ctx->data; - - const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format); - const VkFormat *main_sw_formats = av_vkfmt_from_pixfmt(main_fc->sw_format); - const VkFormat *overlay_sw_formats = av_vkfmt_from_pixfmt(overlay_fc->sw_format); - - /* Update descriptors and init the exec context */ - ff_vk_start_exec_recording(vkctx, s->exec); - cmd_buf = ff_vk_get_exec_buf(s->exec); - - for (int i = 0; i < planes; i++) { - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->main_images[i].imageView, main->img[i], - main_sw_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->overlay_images[i].imageView, overlay->img[i], - overlay_sw_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->output_images[i].imageView, out->img[i], - output_formats[i], - ff_comp_identity_map)); - - s->main_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->overlay_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - } - - ff_vk_update_descriptor_set(vkctx, s->pl, 0); - - for (int i = 0; i < planes; i++) { - VkImageMemoryBarrier bar[3] = { - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = main->layout[i], - .newLayout = s->main_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = main->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = overlay->layout[i], - .newLayout = s->overlay_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = overlay->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .oldLayout = out->layout[i], - .newLayout = s->output_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = out->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - }; - - vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, - 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar); - - main->layout[i] = bar[0].newLayout; - main->access[i] = bar[0].dstAccessMask; - - overlay->layout[i] = bar[1].newLayout; - overlay->access[i] = bar[1].dstAccessMask; - - out->layout[i] = bar[2].newLayout; - out->access[i] = bar[2].dstAccessMask; - } - - ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl); - - vk->CmdDispatch(cmd_buf, - FFALIGN(s->vkctx.output_width, CGROUPS[0])/CGROUPS[0], - FFALIGN(s->vkctx.output_height, CGROUPS[1])/CGROUPS[1], 1); - - ff_vk_add_exec_dep(vkctx, s->exec, main_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(vkctx, s->exec, overlay_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(vkctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - - err = ff_vk_submit_exec_queue(vkctx, s->exec); - if (err) - return err; + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); - ff_vk_qf_rotate(&s->qf); - - return err; - -fail: - ff_vk_discard_exec_deps(s->exec); return err; } @@ -394,7 +231,9 @@ static int overlay_vulkan_blend(FFFrameSync *fs) goto fail; } - RET(process_frames(ctx, out, input_main, input_overlay)); + RET(ff_vk_filter_process_Nin(&s->vkctx, &s->e, &s->pl, + out, (AVFrame *[]){ input_main, input_overlay }, 2, + s->sampler, &s->opts, sizeof(s->opts))); err = av_frame_copy_props(out, input_main); if (err < 0) @@ -443,8 +282,17 @@ static av_cold int overlay_vulkan_init(AVFilterContext *avctx) static void overlay_vulkan_uninit(AVFilterContext *avctx) { OverlayVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl); + ff_vk_shader_free(vkctx, &s->shd); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); - ff_vk_free_buf(&s->vkctx, &s->params_buf); ff_vk_uninit(&s->vkctx); ff_framesync_uninit(&s->fs); From 37763d653aa2d2c53b68484e141dee6af0486668 Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 17 Feb 2023 03:13:32 +0100 Subject: [PATCH 75/98] scale_vulkan: port for the rewrite --- libavfilter/vf_scale_vulkan.c | 366 ++++++++++++---------------------- 1 file changed, 125 insertions(+), 241 deletions(-) diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c index cd37a861b1ab8..64f5e79afb64a 100644 --- a/libavfilter/vf_scale_vulkan.c +++ b/libavfilter/vf_scale_vulkan.c @@ -1,4 +1,6 @@ /* + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -19,12 +21,11 @@ #include "libavutil/random_seed.h" #include "libavutil/opt.h" #include "vulkan_filter.h" +#include "vulkan_spirv.h" #include "scale_eval.h" #include "internal.h" #include "colorspace.h" -#define CGROUPS (int [3]){ 32, 32, 1 } - enum ScalerFunc { F_BILINEAR = 0, F_NEAREST, @@ -35,15 +36,17 @@ enum ScalerFunc { typedef struct ScaleVulkanContext { FFVulkanContext vkctx; + int initialized; + FFVulkanPipeline pl; + FFVkExecPool e; FFVkQueueFamilyCtx qf; - FFVkExecContext *exec; - FFVulkanPipeline *pl; - FFVkBuffer params_buf; + FFVkSPIRVShader shd; + VkSampler sampler; - /* Shader updators, must be in the main filter struct */ - VkDescriptorImageInfo input_images[3]; - VkDescriptorImageInfo output_images[3]; - VkDescriptorBufferInfo params_desc; + /* Push constants / options */ + struct { + float yuv_matrix[4][4]; + } opts; char *out_format_string; char *w_expr; @@ -51,8 +54,6 @@ typedef struct ScaleVulkanContext { enum ScalerFunc scaler; enum AVColorRange out_range; - - int initialized; } ScaleVulkanContext; static const char scale_bilinear[] = { @@ -110,10 +111,15 @@ static const char write_444[] = { static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) { int err; - FFVkSampler *sampler; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; VkFilter sampler_mode; ScaleVulkanContext *s = ctx->priv; FFVulkanContext *vkctx = &s->vkctx; + FFVkSPIRVShader *shd = &s->shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; int crop_x = in->crop_left; int crop_y = in->crop_top; @@ -121,8 +127,6 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) int crop_h = in->height - (in->crop_top + in->crop_bottom); int in_planes = av_pix_fmt_count_planes(s->vkctx.input_format); - ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0); - switch (s->scaler) { case F_NEAREST: sampler_mode = VK_FILTER_NEAREST; @@ -132,264 +136,134 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) break; }; - /* Create a sampler */ - sampler = ff_vk_init_sampler(vkctx, 0, sampler_mode); - if (!sampler) + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); return AVERROR_EXTERNAL; + } - s->pl = ff_vk_create_pipeline(vkctx, &s->qf); - if (!s->pl) - return AVERROR(ENOMEM); - - { /* Create the shader */ - FFVulkanDescriptorSetBinding desc_i[2] = { - { - .name = "input_img", - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .dimensions = 2, - .elems = in_planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->input_images, - .sampler = sampler, - }, - { - .name = "output_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), - .mem_quali = "writeonly", - .dimensions = 2, - .elems = av_pix_fmt_count_planes(s->vkctx.output_format), - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->output_images, - }, - }; - - FFVulkanDescriptorSetBinding desc_b = { - .name = "params", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .mem_quali = "readonly", - .mem_layout = "std430", - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = &s->params_desc, - .buf_content = "mat4 yuv_matrix;", - }; - - FFVkSPIRVShader *shd = ff_vk_init_shader(s->pl, "scale_compute", - VK_SHADER_STAGE_COMPUTE_BIT); - if (!shd) - return AVERROR(ENOMEM); - - ff_vk_set_compute_shader_sizes(shd, CGROUPS); - - RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0)); /* set 0 */ - RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, &desc_b, 1, 0)); /* set 1 */ - - GLSLD( scale_bilinear ); - - if (s->vkctx.output_format != s->vkctx.input_format) { - GLSLD( rgb2yuv ); - } + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 0, sampler_mode)); + RET(ff_vk_shader_init(&s->pl, &s->shd, "scale_compute", + VK_SHADER_STAGE_COMPUTE_BIT, 0)); + + ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1); + + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, mat4 yuv_matrix; ); + GLSLC(0, }; ); + GLSLC(0, ); + + ff_vk_add_push_constant(&s->pl, 0, sizeof(s->opts), + VK_SHADER_STAGE_COMPUTE_BIT); + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "input_img", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = in_planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = av_pix_fmt_count_planes(s->vkctx.output_format), + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; - switch (s->vkctx.output_format) { - case AV_PIX_FMT_NV12: GLSLD(write_nv12); break; - case AV_PIX_FMT_YUV420P: GLSLD( write_420); break; - case AV_PIX_FMT_YUV444P: GLSLD( write_444); break; - default: break; - } + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0)); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 size; ); - GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - GLSLF(1, vec2 in_d = vec2(%i, %i); ,in->width, in->height); - GLSLF(1, vec2 c_r = vec2(%i, %i) / in_d; ,crop_w, crop_h); - GLSLF(1, vec2 c_o = vec2(%i, %i) / in_d; ,crop_x,crop_y); - GLSLC(0, ); - - if (s->vkctx.output_format == s->vkctx.input_format) { - for (int i = 0; i < desc_i[1].elems; i++) { - GLSLF(1, size = imageSize(output_img[%i]); ,i); - GLSLC(1, if (IS_WITHIN(pos, size)) { ); - switch (s->scaler) { - case F_NEAREST: - case F_BILINEAR: - GLSLF(2, vec4 res = scale_bilinear(%i, pos, c_r, c_o); ,i); - GLSLF(2, imageStore(output_img[%i], pos, res); ,i); - break; - }; - GLSLC(1, } ); - } - } else { - GLSLC(1, vec4 res = scale_bilinear(0, pos, c_r, c_o); ); - GLSLF(1, res = rgb2yuv(res, %i); ,s->out_range == AVCOL_RANGE_JPEG); - switch (s->vkctx.output_format) { - case AV_PIX_FMT_NV12: GLSLC(1, write_nv12(res, pos); ); break; - case AV_PIX_FMT_YUV420P: GLSLC(1, write_420(res, pos); ); break; - case AV_PIX_FMT_YUV444P: GLSLC(1, write_444(res, pos); ); break; - default: return AVERROR(EINVAL); - } - } + GLSLD( scale_bilinear ); + + if (s->vkctx.output_format != s->vkctx.input_format) { + GLSLD( rgb2yuv ); + } - GLSLC(0, } ); + switch (s->vkctx.output_format) { + case AV_PIX_FMT_NV12: GLSLD(write_nv12); break; + case AV_PIX_FMT_YUV420P: GLSLD( write_420); break; + case AV_PIX_FMT_YUV444P: GLSLD( write_444); break; + default: break; + } - RET(ff_vk_compile_shader(vkctx, shd, "main")); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + GLSLF(1, vec2 in_d = vec2(%i, %i); ,in->width, in->height); + GLSLF(1, vec2 c_r = vec2(%i, %i) / in_d; ,crop_w, crop_h); + GLSLF(1, vec2 c_o = vec2(%i, %i) / in_d; ,crop_x,crop_y); + GLSLC(0, ); + + if (s->vkctx.output_format == s->vkctx.input_format) { + for (int i = 0; i < desc[i].elems; i++) { + GLSLF(1, size = imageSize(output_img[%i]); ,i); + GLSLC(1, if (IS_WITHIN(pos, size)) { ); + switch (s->scaler) { + case F_NEAREST: + case F_BILINEAR: + GLSLF(2, vec4 res = scale_bilinear(%i, pos, c_r, c_o); ,i); + GLSLF(2, imageStore(output_img[%i], pos, res); ,i); + break; + }; + GLSLC(1, } ); + } + } else { + GLSLC(1, vec4 res = scale_bilinear(0, pos, c_r, c_o); ); + GLSLF(1, res = rgb2yuv(res, %i); ,s->out_range == AVCOL_RANGE_JPEG); + switch (s->vkctx.output_format) { + case AV_PIX_FMT_NV12: GLSLC(1, write_nv12(res, pos); ); break; + case AV_PIX_FMT_YUV420P: GLSLC(1, write_420(res, pos); ); break; + case AV_PIX_FMT_YUV444P: GLSLC(1, write_444(res, pos); ); break; + default: return AVERROR(EINVAL); + } } - RET(ff_vk_init_pipeline_layout(vkctx, s->pl)); - RET(ff_vk_init_compute_pipeline(vkctx, s->pl)); + GLSLC(0, } ); if (s->vkctx.output_format != s->vkctx.input_format) { const AVLumaCoefficients *lcoeffs; double tmp_mat[3][3]; - struct { - float yuv_matrix[4][4]; - } *par; - lcoeffs = av_csp_luma_coeffs_from_avcsp(in->colorspace); if (!lcoeffs) { av_log(ctx, AV_LOG_ERROR, "Unsupported colorspace\n"); return AVERROR(EINVAL); } - RET(ff_vk_create_buf(vkctx, &s->params_buf, - sizeof(*par), NULL, NULL, - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); - - RET(ff_vk_map_buffers(vkctx, &s->params_buf, (uint8_t **)&par, 1, 0)); - ff_fill_rgb2yuv_table(lcoeffs, tmp_mat); - memset(par, 0, sizeof(*par)); - for (int y = 0; y < 3; y++) for (int x = 0; x < 3; x++) - par->yuv_matrix[x][y] = tmp_mat[x][y]; - - par->yuv_matrix[3][3] = 1.0; - - RET(ff_vk_unmap_buffers(vkctx, &s->params_buf, 1, 1)); - - s->params_desc.buffer = s->params_buf.buf; - s->params_desc.range = VK_WHOLE_SIZE; - - ff_vk_update_descriptor_set(vkctx, s->pl, 1); + s->opts.yuv_matrix[x][y] = tmp_mat[x][y]; + s->opts.yuv_matrix[3][3] = 1.0; } - /* Execution context */ - RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf)); + RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); + + RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd)); + RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl)); s->initialized = 1; return 0; fail: - return err; -} - -static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f) -{ - int err = 0; - VkCommandBuffer cmd_buf; - ScaleVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &vkctx->vkfn; - AVVkFrame *in = (AVVkFrame *)in_f->data[0]; - AVVkFrame *out = (AVVkFrame *)out_f->data[0]; - VkImageMemoryBarrier barriers[AV_NUM_DATA_POINTERS*2]; - int barrier_count = 0; - const int planes = av_pix_fmt_count_planes(s->vkctx.input_format); - const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format); - const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format); - - /* Update descriptors and init the exec context */ - ff_vk_start_exec_recording(vkctx, s->exec); - cmd_buf = ff_vk_get_exec_buf(s->exec); - - for (int i = 0; i < planes; i++) { - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->input_images[i].imageView, in->img[i], - input_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->output_images[i].imageView, out->img[i], - output_formats[i], - ff_comp_identity_map)); - - s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - } - - ff_vk_update_descriptor_set(vkctx, s->pl, 0); - - for (int i = 0; i < planes; i++) { - VkImageMemoryBarrier bar = { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = in->layout[i], - .newLayout = s->input_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = in->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }; - - memcpy(&barriers[barrier_count++], &bar, sizeof(VkImageMemoryBarrier)); - - in->layout[i] = bar.newLayout; - in->access[i] = bar.dstAccessMask; - } - - for (int i = 0; i < av_pix_fmt_count_planes(s->vkctx.output_format); i++) { - VkImageMemoryBarrier bar = { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .oldLayout = out->layout[i], - .newLayout = s->output_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = out->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }; - - memcpy(&barriers[barrier_count++], &bar, sizeof(VkImageMemoryBarrier)); - - out->layout[i] = bar.newLayout; - out->access[i] = bar.dstAccessMask; - } - - vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, - 0, NULL, 0, NULL, barrier_count, barriers); - - ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl); - - vk->CmdDispatch(cmd_buf, - FFALIGN(vkctx->output_width, CGROUPS[0])/CGROUPS[0], - FFALIGN(vkctx->output_height, CGROUPS[1])/CGROUPS[1], 1); - - ff_vk_add_exec_dep(vkctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(vkctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - - err = ff_vk_submit_exec_queue(vkctx, s->exec); - if (err) - return err; - - ff_vk_qf_rotate(&s->qf); + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); return err; - -fail: - ff_vk_discard_exec_deps(s->exec); - return err; } static int scale_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) @@ -408,7 +282,8 @@ static int scale_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) if (!s->initialized) RET(init_filter(ctx, in)); - RET(process_frames(ctx, out, in)); + RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, out, in, + s->sampler, &s->opts, sizeof(s->opts))); err = av_frame_copy_props(out, in); if (err < 0) @@ -475,8 +350,17 @@ static int scale_vulkan_config_output(AVFilterLink *outlink) static void scale_vulkan_uninit(AVFilterContext *avctx) { ScaleVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl); + ff_vk_shader_free(vkctx, &s->shd); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); - ff_vk_free_buf(&s->vkctx, &s->params_buf); ff_vk_uninit(&s->vkctx); s->initialized = 0; From fabd4a8d9aaf5685fd46ed1b977c9c0643bc6bcb Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 17 Feb 2023 03:13:43 +0100 Subject: [PATCH 76/98] transpose_vulkan: port for the rewrite --- libavfilter/vf_transpose_vulkan.c | 224 ++++++++++-------------------- 1 file changed, 76 insertions(+), 148 deletions(-) diff --git a/libavfilter/vf_transpose_vulkan.c b/libavfilter/vf_transpose_vulkan.c index 3b2ce4fb693ee..f9c0dd928d73c 100644 --- a/libavfilter/vf_transpose_vulkan.c +++ b/libavfilter/vf_transpose_vulkan.c @@ -1,5 +1,7 @@ /* * copyright (c) 2021 Wu Jianhua + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -20,41 +22,60 @@ #include "libavutil/random_seed.h" #include "libavutil/opt.h" #include "vulkan_filter.h" +#include "vulkan_spirv.h" #include "internal.h" #include "transpose.h" -#define CGS 32 - typedef struct TransposeVulkanContext { FFVulkanContext vkctx; - FFVkQueueFamilyCtx qf; - FFVkExecContext *exec; - FFVulkanPipeline *pl; - VkDescriptorImageInfo input_images[3]; - VkDescriptorImageInfo output_images[3]; + int initialized; + FFVulkanPipeline pl; + FFVkExecPool e; + FFVkQueueFamilyCtx qf; + FFVkSPIRVShader shd; + VkSampler sampler; int dir; int passthrough; - int initialized; } TransposeVulkanContext; static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) { - int err = 0; - FFVkSPIRVShader *shd; + int err; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; TransposeVulkanContext *s = ctx->priv; FFVulkanContext *vkctx = &s->vkctx; + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + FFVkSPIRVShader *shd = &s->shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_LINEAR)); + RET(ff_vk_shader_init(&s->pl, &s->shd, "transpose_compute", + VK_SHADER_STAGE_COMPUTE_BIT, 0)); - FFVulkanDescriptorSetBinding image_descs[] = { + ff_vk_shader_set_compute_sizes(&s->shd, 32, 1, 1); + + desc = (FFVulkanDescriptorSetBinding []) { { .name = "input_images", .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .dimensions = 2, .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->input_images, + .samplers = DUP_SAMPLER(s->sampler), }, { .name = "output_images", @@ -64,154 +85,49 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) .dimensions = 2, .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->output_images, }, }; - image_descs[0].sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_LINEAR); - if (!image_descs[0].sampler) - return AVERROR_EXTERNAL; - - ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0); - - { - s->pl = ff_vk_create_pipeline(vkctx, &s->qf); - if (!s->pl) - return AVERROR(ENOMEM); - - shd = ff_vk_init_shader(s->pl, "transpose_compute", image_descs[0].stages); - if (!shd) - return AVERROR(ENOMEM); - - ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 }); - RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0)); - - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 size; ); - GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - for (int i = 0; i < planes; i++) { - GLSLC(0, ); - GLSLF(1, size = imageSize(output_images[%i]); ,i); - GLSLC(1, if (IS_WITHIN(pos, size)) { ); - if (s->dir == TRANSPOSE_CCLOCK) - GLSLF(2, vec4 res = texture(input_images[%i], ivec2(size.y - pos.y, pos.x)); ,i); - else if (s->dir == TRANSPOSE_CLOCK_FLIP || s->dir == TRANSPOSE_CLOCK) { - GLSLF(2, vec4 res = texture(input_images[%i], ivec2(size.yx - pos.yx)); ,i); - if (s->dir == TRANSPOSE_CLOCK) - GLSLC(2, pos = ivec2(pos.x, size.y - pos.y); ); - } else - GLSLF(2, vec4 res = texture(input_images[%i], pos.yx); ,i); - GLSLF(2, imageStore(output_images[%i], pos, res); ,i); - GLSLC(1, } ); - } - GLSLC(0, } ); - - RET(ff_vk_compile_shader(vkctx, shd, "main")); - RET(ff_vk_init_pipeline_layout(vkctx, s->pl)); - RET(ff_vk_init_compute_pipeline(vkctx, s->pl)); - } - - RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf)); - s->initialized = 1; - -fail: - return err; -} - -static int process_frames(AVFilterContext *avctx, AVFrame *outframe, AVFrame *inframe) -{ - int err = 0; - VkCommandBuffer cmd_buf; - TransposeVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &s->vkctx.vkfn; - const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - - AVVkFrame *in = (AVVkFrame *)inframe->data[0]; - AVVkFrame *out = (AVVkFrame *)outframe->data[0]; - - const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format); - const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format); - - ff_vk_start_exec_recording(vkctx, s->exec); - cmd_buf = ff_vk_get_exec_buf(s->exec); - - for (int i = 0; i < planes; i++) { - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->input_images[i].imageView, in->img[i], - input_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->output_images[i].imageView, out->img[i], - output_formats[i], - ff_comp_identity_map)); - - s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - } - - ff_vk_update_descriptor_set(vkctx, s->pl, 0); + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0)); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); for (int i = 0; i < planes; i++) { - VkImageMemoryBarrier barriers[] = { - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = in->layout[i], - .newLayout = s->input_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = in->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .oldLayout = out->layout[i], - .newLayout = s->output_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = out->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - }; - - vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, - 0, NULL, 0, NULL, FF_ARRAY_ELEMS(barriers), barriers); - - in->layout[i] = barriers[0].newLayout; - in->access[i] = barriers[0].dstAccessMask; - - out->layout[i] = barriers[1].newLayout; - out->access[i] = barriers[1].dstAccessMask; + GLSLC(0, ); + GLSLF(1, size = imageSize(output_images[%i]); ,i); + GLSLC(1, if (IS_WITHIN(pos, size)) { ); + if (s->dir == TRANSPOSE_CCLOCK) + GLSLF(2, vec4 res = texture(input_images[%i], ivec2(size.y - pos.y, pos.x)); ,i); + else if (s->dir == TRANSPOSE_CLOCK_FLIP || s->dir == TRANSPOSE_CLOCK) { + GLSLF(2, vec4 res = texture(input_images[%i], ivec2(size.yx - pos.yx)); ,i); + if (s->dir == TRANSPOSE_CLOCK) + GLSLC(2, pos = ivec2(pos.x, size.y - pos.y); ); + } else + GLSLF(2, vec4 res = texture(input_images[%i], pos.yx); ,i); + GLSLF(2, imageStore(output_images[%i], pos, res); ,i); + GLSLC(1, } ); } + GLSLC(0, } ); - ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl); - vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS, - s->vkctx.output_height, 1); + RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); - ff_vk_add_exec_dep(vkctx, s->exec, inframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(vkctx, s->exec, outframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd)); + RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl)); - err = ff_vk_submit_exec_queue(vkctx, s->exec); - if (err) - return err; - - ff_vk_qf_rotate(&s->qf); + s->initialized = 1; return 0; fail: - ff_vk_discard_exec_deps(s->exec); + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); + return err; } @@ -235,7 +151,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) if (!s->initialized) RET(init_filter(ctx, in)); - RET(process_frames(ctx, out, in)); + RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, out, in, + s->sampler, NULL, 0)); RET(av_frame_copy_props(out, in)); @@ -259,6 +176,17 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) static av_cold void transpose_vulkan_uninit(AVFilterContext *avctx) { TransposeVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl); + ff_vk_shader_free(vkctx, &s->shd); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); + ff_vk_uninit(&s->vkctx); s->initialized = 0; From 1226c202bb667b17b159c883e34c7fb1e5197de1 Mon Sep 17 00:00:00 2001 From: Lynne Date: Sun, 26 Feb 2023 22:36:51 +0100 Subject: [PATCH 77/98] lavfi: add bwdif_vulkan --- configure | 1 + libavfilter/Makefile | 1 + libavfilter/allfilters.c | 1 + libavfilter/vf_bwdif_vulkan.c | 416 ++++++++++++++++++++++++++++++++++ 4 files changed, 419 insertions(+) create mode 100644 libavfilter/vf_bwdif_vulkan.c diff --git a/configure b/configure index 5ea128babd4f6..1fbd1939c6fae 100755 --- a/configure +++ b/configure @@ -3640,6 +3640,7 @@ blend_vulkan_filter_deps="vulkan spirv_compiler" boxblur_filter_deps="gpl" boxblur_opencl_filter_deps="opencl gpl" bs2b_filter_deps="libbs2b" +bwdif_vulkan_filter_deps="vulkan spirv_compiler" chromaber_vulkan_filter_deps="vulkan spirv_compiler" colorkey_opencl_filter_deps="opencl" colormatrix_filter_deps="gpl" diff --git a/libavfilter/Makefile b/libavfilter/Makefile index c4b52d02575af..4bc30c37f88c2 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -213,6 +213,7 @@ OBJS-$(CONFIG_BOXBLUR_FILTER) += vf_boxblur.o boxblur.o OBJS-$(CONFIG_BOXBLUR_OPENCL_FILTER) += vf_avgblur_opencl.o opencl.o \ opencl/avgblur.o boxblur.o OBJS-$(CONFIG_BWDIF_FILTER) += vf_bwdif.o yadif_common.o +OBJS-$(CONFIG_BWDIF_VULKAN_FILTER) += vf_bwdif_vulkan.o yadif_common.o vulkan.o vulkan_filter.o OBJS-$(CONFIG_CAS_FILTER) += vf_cas.o OBJS-$(CONFIG_CCREPACK_FILTER) += vf_ccrepack.o OBJS-$(CONFIG_CHROMABER_VULKAN_FILTER) += vf_chromaber_vulkan.o vulkan.o vulkan_filter.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 30447a11c8da1..8f88c1443f228 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -197,6 +197,7 @@ extern const AVFilter ff_vf_bm3d; extern const AVFilter ff_vf_boxblur; extern const AVFilter ff_vf_boxblur_opencl; extern const AVFilter ff_vf_bwdif; +extern const AVFilter ff_vf_bwdif_vulkan; extern const AVFilter ff_vf_cas; extern const AVFilter ff_vf_ccrepack; extern const AVFilter ff_vf_chromaber_vulkan; diff --git a/libavfilter/vf_bwdif_vulkan.c b/libavfilter/vf_bwdif_vulkan.c new file mode 100644 index 0000000000000..126e852e96d4f --- /dev/null +++ b/libavfilter/vf_bwdif_vulkan.c @@ -0,0 +1,416 @@ +/* + * Copyright (c) Lynne + * Copyright (C) 2018 Philip Langdale + * Copyright (C) 2016 Thomas Mundt + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/random_seed.h" +#include "libavutil/opt.h" +#include "vulkan_filter.h" +#include "vulkan_spirv.h" +#include "yadif.h" +#include "internal.h" + +typedef struct BWDIFVulkanContext { + YADIFContext yadif; + FFVulkanContext vkctx; + + int initialized; + FFVkExecPool e; + FFVkQueueFamilyCtx qf; + VkSampler sampler; + FFVulkanPipeline pl; + FFVkSPIRVShader shd; +} BWDIFVulkanContext; + +typedef struct BWDIFParameters { + int parity; + int tff; + int current_field; +} BWDIFParameters; + +static const char filter_fn[] = { + "const vec4 coef_lf[2] = { vec4(4309), vec4(213), };\n" + "const vec4 coef_hf[3] = { vec4(5570), vec4(3801), vec4(1016) };\n" + "const vec4 coef_sp[2] = { vec4(5077), vec4(981), };\n" + C(0, ) + C(0, vec4 process_intra(vec4 cur[4]) ) + C(0, { ) + C(1, return (coef_sp[0]*(cur[1] + cur[2]) - coef_sp[1]*(cur[0] + cur[3])) / (1 << 13); ) + C(0, } ) + C(0, ) + C(0, vec4 process_line(vec4 prev2[5], vec4 prev1[2], vec4 cur[4], vec4 next1[2], vec4 next2[5]) ) + C(0, { ) + C(1, vec4 fc = cur[1]; ) + C(1, vec4 fe = cur[2]; ) + C(1, vec4 fs = prev2[2] + next2[2]; ) + C(1, vec4 fd = fs / 2; ) + C(0, ) + C(1, vec4 temp_diff[3]; ) + C(1, temp_diff[0] = abs(prev2[2] - next2[2]); ) + C(1, temp_diff[1] = (abs(prev1[0] - fc) + abs(prev1[1] - fe)) / 2; ) + C(1, temp_diff[1] = (abs(next1[0] - fc) + abs(next1[1] - fe)) / 2; ) + C(1, vec4 diff = max(temp_diff[0] / 2, max(temp_diff[1], temp_diff[2])); ) + C(1, bvec4 diff_mask = equal(diff, vec4(0)); ) + C(0, ) + C(1, vec4 fbs = prev2[1] + next2[1]; ) + C(1, vec4 ffs = prev2[3] + next2[3]; ) + C(1, vec4 fb = (fbs / 2) - fc; ) + C(1, vec4 ff = (ffs / 2) - fe; ) + C(1, vec4 dc = fd - fc; ) + C(1, vec4 de = fd - fe; ) + C(1, vec4 mmax = max(de, max(dc, min(fb, ff))); ) + C(1, vec4 mmin = min(de, min(dc, max(fb, ff))); ) + C(1, diff = max(diff, max(mmin, -mmax)); ) + C(0, ) +" vec4 interpolate_all = (((coef_hf[0]*(fs) - coef_hf[1]*(fbs + ffs) +\n" +" coef_hf[2]*(prev2[0] + next2[0] + prev2[4] + next2[4])) / 4) +\n" +" coef_lf[0]*(fc + fe) - coef_lf[1]*(cur[0] + cur[3])) / (1 << 13);\n" +" vec4 interpolate_cur = (coef_sp[0]*(fc + fe) - coef_sp[1]*(cur[0] + cur[3])) / (1 << 13);\n" + C(0, ) + C(1, bvec4 interpolate_cnd1 = greaterThan(abs(fc - fe), temp_diff[0]); ) + C(1, vec4 dst = mix(interpolate_cur, interpolate_all, interpolate_cnd1); ) + C(1, return mix(dst, fd, diff_mask); ) + C(0, } ) +}; + +static av_cold int init_filter(AVFilterContext *ctx) +{ + int err; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; + BWDIFVulkanContext *s = ctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + FFVkSPIRVShader *shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST)); + RET(ff_vk_shader_init(&s->pl, &s->shd, "bwdif_compute", + VK_SHADER_STAGE_COMPUTE_BIT, 0)); + shd = &s->shd; + + ff_vk_shader_set_compute_sizes(shd, 1, 64, 1); + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "prev", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "cur", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "next", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "dst", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 4, 0, 0)); + + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, int parity; ); + GLSLC(1, int tff; ); + GLSLC(1, int current_field; ); + GLSLC(0, }; ); + + ff_vk_add_push_constant(&s->pl, 0, sizeof(BWDIFParameters), + VK_SHADER_STAGE_COMPUTE_BIT); + + GLSLD( filter_fn ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, vec4 res; ); + GLSLC(1, ivec2 size; ); + GLSLC(1, vec4 dcur[4]; ); + GLSLC(1, vec4 prev1[2]; ); + GLSLC(1, vec4 next1[2]; ); + GLSLC(1, vec4 prev2[5]; ); + GLSLC(1, vec4 next2[5]; ); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + GLSLC(1, bool filter_field = ((pos.y ^ parity) & 1) == 1; ); + GLSLF(1, bool is_intra = filter_field && (current_field == %i); ,YADIF_FIELD_END); + GLSLC(1, bool field_parity = (parity ^ tff) != 0; ); + GLSLC(0, ); + + for (int i = 0; i < planes; i++) { + GLSLC(0, ); + GLSLF(1, size = imageSize(dst[%i]); ,i); + GLSLC(1, if (!IS_WITHIN(pos, size)) { ); + GLSLC(2, return; ); + GLSLC(1, } else if (is_intra) { ); + GLSLF(2, dcur[0] = texture(cur[%i], pos - ivec2(0, 3)); ,i); + GLSLF(2, dcur[1] = texture(cur[%i], pos - ivec2(0, 1)); ,i); + GLSLF(2, dcur[2] = texture(cur[%i], pos + ivec2(0, 1)); ,i); + GLSLF(2, dcur[3] = texture(cur[%i], pos + ivec2(0, 3)); ,i); + GLSLC(0, ); + GLSLC(2, res = process_intra(dcur); ); + GLSLF(2, imageStore(dst[%i], pos, res); ,i); + GLSLC(1, } else if (filter_field) { ); + GLSLF(2, dcur[0] = texture(cur[%i], pos - ivec2(0, 3)); ,i); + GLSLF(2, dcur[1] = texture(cur[%i], pos - ivec2(0, 1)); ,i); + GLSLF(2, dcur[2] = texture(cur[%i], pos + ivec2(0, 1)); ,i); + GLSLF(2, dcur[3] = texture(cur[%i], pos + ivec2(0, 3)); ,i); + GLSLC(0, ); + GLSLF(2, prev1[0] = texture(prev[%i], pos - ivec2(0, 1)); ,i); + GLSLF(2, prev1[1] = texture(prev[%i], pos + ivec2(0, 1)); ,i); + GLSLC(0, ); + GLSLF(2, next1[0] = texture(next[%i], pos - ivec2(0, 1)); ,i); + GLSLF(2, next1[1] = texture(next[%i], pos + ivec2(0, 1)); ,i); + GLSLC(0, ); + GLSLC(2, if (field_parity) { ); + GLSLF(3, prev2[0] = texture(prev[%i], pos - ivec2(0, 4)); ,i); + GLSLF(3, prev2[1] = texture(prev[%i], pos - ivec2(0, 2)); ,i); + GLSLF(3, prev2[2] = texture(prev[%i], pos); ,i); + GLSLF(3, prev2[3] = texture(prev[%i], pos + ivec2(0, 2)); ,i); + GLSLF(3, prev2[4] = texture(prev[%i], pos + ivec2(0, 4)); ,i); + GLSLC(0, ); + GLSLF(3, next2[0] = texture(cur[%i], pos - ivec2(0, 4)); ,i); + GLSLF(3, next2[1] = texture(cur[%i], pos - ivec2(0, 2)); ,i); + GLSLF(3, next2[2] = texture(cur[%i], pos); ,i); + GLSLF(3, next2[3] = texture(cur[%i], pos + ivec2(0, 2)); ,i); + GLSLF(3, next2[4] = texture(cur[%i], pos + ivec2(0, 4)); ,i); + GLSLC(2, } else { ); + GLSLF(3, prev2[0] = texture(cur[%i], pos - ivec2(0, 4)); ,i); + GLSLF(3, prev2[1] = texture(cur[%i], pos - ivec2(0, 2)); ,i); + GLSLF(3, prev2[2] = texture(cur[%i], pos); ,i); + GLSLF(3, prev2[3] = texture(cur[%i], pos + ivec2(0, 2)); ,i); + GLSLF(3, prev2[4] = texture(cur[%i], pos + ivec2(0, 4)); ,i); + GLSLC(0, ); + GLSLF(3, next2[0] = texture(next[%i], pos - ivec2(0, 4)); ,i); + GLSLF(3, next2[1] = texture(next[%i], pos - ivec2(0, 2)); ,i); + GLSLF(3, next2[2] = texture(next[%i], pos); ,i); + GLSLF(3, next2[3] = texture(next[%i], pos + ivec2(0, 2)); ,i); + GLSLF(3, next2[4] = texture(next[%i], pos + ivec2(0, 4)); ,i); + GLSLC(2, } ); + GLSLC(0, ); + GLSLC(2, res = process_line(prev2, prev1, dcur, next1, next2); ); + GLSLF(2, imageStore(dst[%i], pos, res); ,i); + GLSLC(1, } else { ); + GLSLF(2, res = texture(cur[%i], pos); ,i); + GLSLF(2, imageStore(dst[%i], pos, res); ,i); + GLSLC(1, } ); + } + + GLSLC(0, } ); + + RET(spv->compile_shader(spv, ctx, &s->shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(vkctx, &s->shd, spv_data, spv_len, "main")); + + RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, &s->shd)); + RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl)); + + s->initialized = 1; + + return 0; + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); + + return err; +} + +static void bwdif_vulkan_filter_frame(AVFilterContext *ctx, AVFrame *dst, + int parity, int tff) +{ + BWDIFVulkanContext *s = ctx->priv; + YADIFContext *y = &s->yadif; + BWDIFParameters params = { + .parity = parity, + .tff = tff, + .current_field = y->current_field, + }; + + ff_vk_filter_process_Nin(&s->vkctx, &s->e, &s->pl, dst, + (AVFrame *[]){ y->prev, y->cur, y->next }, 3, + s->sampler, ¶ms, sizeof(params)); + + if (y->current_field == YADIF_FIELD_END) + y->current_field = YADIF_FIELD_NORMAL; +} + +static void bwdif_vulkan_uninit(AVFilterContext *avctx) +{ + BWDIFVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl); + ff_vk_shader_free(vkctx, &s->shd); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); + + ff_vk_uninit(&s->vkctx); + + s->initialized = 0; +} + +static int bwdif_vulkan_config_input(AVFilterLink *inlink) +{ + AVHWFramesContext *input_frames; + AVFilterContext *avctx = inlink->dst; + BWDIFVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + + if (!inlink->hw_frames_ctx) { + av_log(inlink->dst, AV_LOG_ERROR, "Vulkan filtering requires a " + "hardware frames context on the input.\n"); + return AVERROR(EINVAL); + } + + input_frames = (AVHWFramesContext *)inlink->hw_frames_ctx->data; + if (input_frames->format != AV_PIX_FMT_VULKAN) + return AVERROR(EINVAL); + + /* Extract the device and default output format from the first input. */ + if (avctx->inputs[0] != inlink) + return 0; + + /* Save the ref, without reffing it */ + vkctx->input_frames_ref = inlink->hw_frames_ctx; + + /* Defaults */ + vkctx->output_format = input_frames->sw_format; + vkctx->output_width = input_frames->width; + vkctx->output_height = input_frames->height; + + return 0; +} + +static int bwdif_vulkan_config_output(AVFilterLink *outlink) +{ + int err; + AVFilterContext *avctx = outlink->src; + BWDIFVulkanContext *s = avctx->priv; + YADIFContext *y = &s->yadif; + FFVulkanContext *vkctx = &s->vkctx; + + av_buffer_unref(&outlink->hw_frames_ctx); + + err = ff_vk_filter_init_context(avctx, vkctx, vkctx->input_frames_ref, + vkctx->output_width, vkctx->output_height, + vkctx->output_format); + if (err < 0) + return err; + + /* For logging */ + vkctx->class = y->class; + + outlink->hw_frames_ctx = av_buffer_ref(vkctx->frames_ref); + if (!outlink->hw_frames_ctx) + return AVERROR(ENOMEM); + + outlink->time_base = av_mul_q(avctx->inputs[0]->time_base, (AVRational){1, 2}); + outlink->w = vkctx->output_width; + outlink->h = vkctx->output_height; + + if (y->mode & 1) + outlink->frame_rate = av_mul_q(avctx->inputs[0]->frame_rate, + (AVRational){2, 1}); + + if (outlink->w < 4 || outlink->h < 4) { + av_log(avctx, AV_LOG_ERROR, "Video of less than 4 columns or lines is not " + "supported\n"); + return AVERROR(EINVAL); + } + + y->csp = av_pix_fmt_desc_get(vkctx->frames->sw_format); + y->filter = bwdif_vulkan_filter_frame; + + return init_filter(avctx); +} + +static const AVClass bwdif_vulkan_class = { + .class_name = "bwdif_vulkan", + .item_name = av_default_item_name, + .option = ff_yadif_options, + .version = LIBAVUTIL_VERSION_INT, + .category = AV_CLASS_CATEGORY_FILTER, +}; + +static const AVFilterPad bwdif_vulkan_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = ff_yadif_filter_frame, + .config_props = &bwdif_vulkan_config_input, + }, +}; + +static const AVFilterPad bwdif_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .request_frame = ff_yadif_request_frame, + .config_props = &bwdif_vulkan_config_output, + }, +}; + +const AVFilter ff_vf_bwdif_vulkan = { + .name = "bwdif_vulkan", + .description = NULL_IF_CONFIG_SMALL("Deinterlace Vulkan frames via bwdif"), + .priv_size = sizeof(BWDIFVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &bwdif_vulkan_uninit, + FILTER_INPUTS(bwdif_vulkan_inputs), + FILTER_OUTPUTS(bwdif_vulkan_outputs), + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), + .priv_class = &bwdif_vulkan_class, + .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL, + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; From 1fd7328f079eaddb6d8a52cd2b5f7fe88299b697 Mon Sep 17 00:00:00 2001 From: Lynne Date: Thu, 10 Mar 2022 18:03:05 +0100 Subject: [PATCH 78/98] avcodec: add AVHWAccel.free_frame_priv callback --- libavcodec/av1dec.c | 4 ++-- libavcodec/avcodec.h | 8 ++++++++ libavcodec/decode.c | 20 ++++++++++++++++++++ libavcodec/decode.h | 11 +++++++++++ libavcodec/h264_slice.c | 3 ++- libavcodec/hevc_refs.c | 3 ++- libavcodec/mpegpicture.c | 4 +++- libavcodec/vp8.c | 2 +- libavcodec/vp9.c | 2 +- 9 files changed, 50 insertions(+), 7 deletions(-) diff --git a/libavcodec/av1dec.c b/libavcodec/av1dec.c index d46ee48335f56..9279ce3636e07 100644 --- a/libavcodec/av1dec.c +++ b/libavcodec/av1dec.c @@ -27,6 +27,7 @@ #include "libavutil/opt.h" #include "avcodec.h" #include "av1_parse.h" +#include "decode.h" #include "av1dec.h" #include "atsc_a53.h" #include "bytestream.h" @@ -863,8 +864,7 @@ static int av1_frame_alloc(AVCodecContext *avctx, AV1Frame *f) if (avctx->hwaccel) { const AVHWAccel *hwaccel = avctx->hwaccel; if (hwaccel->frame_priv_data_size) { - f->hwaccel_priv_buf = - av_buffer_allocz(hwaccel->frame_priv_data_size); + f->hwaccel_priv_buf = ff_hwaccel_frame_priv_alloc(avctx, hwaccel); if (!f->hwaccel_priv_buf) { ret = AVERROR(ENOMEM); goto fail; diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index dad443c818589..82c9aaab5345a 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -2259,6 +2259,14 @@ typedef struct AVHWAccel { * For thread-safe hwaccels only. */ int (*update_thread_context)(AVCodecContext *dst, const AVCodecContext *src); + + /** + * Callback to free the hwaccel-specific frame data. + * + * @param hwctx a pointer to an AVHWDeviceContext. + * @param data the per-frame hardware accelerator private data to be freed. + */ + void (*free_frame_priv)(void *hwctx, uint8_t *data); } AVHWAccel; /** diff --git a/libavcodec/decode.c b/libavcodec/decode.c index 9ff132a15c3ca..a7c130207c17b 100644 --- a/libavcodec/decode.c +++ b/libavcodec/decode.c @@ -1718,3 +1718,23 @@ int ff_copy_palette(void *dst, const AVPacket *src, void *logctx) } return 0; } + +AVBufferRef *ff_hwaccel_frame_priv_alloc(AVCodecContext *avctx, + const AVHWAccel *hwaccel) +{ + AVBufferRef *ref; + AVHWFramesContext *frames_ctx = (AVHWFramesContext *)avctx->hw_frames_ctx->data; + uint8_t *data = av_mallocz(hwaccel->frame_priv_data_size); + if (!data) + return NULL; + + ref = av_buffer_create(data, hwaccel->frame_priv_data_size, + hwaccel->free_frame_priv, + frames_ctx->device_ctx, 0); + if (!ref) { + av_free(data); + return NULL; + } + + return ref; +} diff --git a/libavcodec/decode.h b/libavcodec/decode.h index 8430ffbd66484..aaa29bc7f528a 100644 --- a/libavcodec/decode.h +++ b/libavcodec/decode.h @@ -150,4 +150,15 @@ int ff_reget_buffer(AVCodecContext *avctx, AVFrame *frame, int flags); int ff_side_data_update_matrix_encoding(AVFrame *frame, enum AVMatrixEncoding matrix_encoding); +/** + * Allocate a hwaccel frame private data and create an AVBufferRef + * from it. + * + * @param avctx The codec context which to attach as an opaque value + * @param hwaccel The hwaccel for which to allocate + * @return The allocated buffer + */ +AVBufferRef *ff_hwaccel_frame_priv_alloc(AVCodecContext *avctx, + const AVHWAccel *hwaccel); + #endif /* AVCODEC_DECODE_H */ diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c index be7a8e0b5abe3..d715cbb002a83 100644 --- a/libavcodec/h264_slice.c +++ b/libavcodec/h264_slice.c @@ -33,6 +33,7 @@ #include "libavutil/pixdesc.h" #include "libavutil/timecode.h" #include "internal.h" +#include "decode.h" #include "cabac.h" #include "cabac_functions.h" #include "decode.h" @@ -212,7 +213,7 @@ static int alloc_picture(H264Context *h, H264Picture *pic) const AVHWAccel *hwaccel = h->avctx->hwaccel; av_assert0(!pic->hwaccel_picture_private); if (hwaccel->frame_priv_data_size) { - pic->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size); + pic->hwaccel_priv_buf = ff_hwaccel_frame_priv_alloc(h->avctx, hwaccel); if (!pic->hwaccel_priv_buf) return AVERROR(ENOMEM); pic->hwaccel_picture_private = pic->hwaccel_priv_buf->data; diff --git a/libavcodec/hevc_refs.c b/libavcodec/hevc_refs.c index e9be02c489232..a4af6ca65682b 100644 --- a/libavcodec/hevc_refs.c +++ b/libavcodec/hevc_refs.c @@ -23,6 +23,7 @@ #include "libavutil/avassert.h" +#include "decode.h" #include "thread.h" #include "hevc.h" #include "hevcdec.h" @@ -121,7 +122,7 @@ static HEVCFrame *alloc_frame(HEVCContext *s) const AVHWAccel *hwaccel = s->avctx->hwaccel; av_assert0(!frame->hwaccel_picture_private); if (hwaccel->frame_priv_data_size) { - frame->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size); + frame->hwaccel_priv_buf = ff_hwaccel_frame_priv_alloc(s->avctx, hwaccel); if (!frame->hwaccel_priv_buf) goto fail; frame->hwaccel_picture_private = frame->hwaccel_priv_buf->data; diff --git a/libavcodec/mpegpicture.c b/libavcodec/mpegpicture.c index 3204a70578cb6..71c7a3fd706e8 100644 --- a/libavcodec/mpegpicture.c +++ b/libavcodec/mpegpicture.c @@ -27,6 +27,8 @@ #include "avcodec.h" #include "encode.h" +#include "internal.h" +#include "decode.h" #include "motion_est.h" #include "mpegpicture.h" #include "mpegutils.h" @@ -172,7 +174,7 @@ static int alloc_frame_buffer(AVCodecContext *avctx, Picture *pic, if (avctx->hwaccel) { assert(!pic->hwaccel_picture_private); if (avctx->hwaccel->frame_priv_data_size) { - pic->hwaccel_priv_buf = av_buffer_allocz(avctx->hwaccel->frame_priv_data_size); + pic->hwaccel_priv_buf = ff_hwaccel_frame_priv_alloc(avctx, avctx->hwaccel); if (!pic->hwaccel_priv_buf) { av_log(avctx, AV_LOG_ERROR, "alloc_frame_buffer() failed (hwaccel private data allocation)\n"); return -1; diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c index 2ab06c82939f6..b410e0eb79fd0 100644 --- a/libavcodec/vp8.c +++ b/libavcodec/vp8.c @@ -109,7 +109,7 @@ static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref) if (s->avctx->hwaccel) { const AVHWAccel *hwaccel = s->avctx->hwaccel; if (hwaccel->frame_priv_data_size) { - f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size); + f->hwaccel_priv_buf = ff_hwaccel_frame_priv_alloc(s->avctx, hwaccel); if (!f->hwaccel_priv_buf) goto fail; f->hwaccel_picture_private = f->hwaccel_priv_buf->data; diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c index d8a31507fa50d..03883d254b91d 100644 --- a/libavcodec/vp9.c +++ b/libavcodec/vp9.c @@ -136,7 +136,7 @@ static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f) const AVHWAccel *hwaccel = avctx->hwaccel; av_assert0(!f->hwaccel_picture_private); if (hwaccel->frame_priv_data_size) { - f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size); + f->hwaccel_priv_buf = ff_hwaccel_frame_priv_alloc(avctx, hwaccel); if (!f->hwaccel_priv_buf) goto fail; f->hwaccel_picture_private = f->hwaccel_priv_buf->data; From 5c4566a40815d709af3302ed189ff34ecf099d70 Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 6 Jan 2023 03:32:56 +0100 Subject: [PATCH 79/98] avcodec: add AVHWAccel.flush callback --- libavcodec/av1dec.c | 3 +++ libavcodec/avcodec.h | 5 +++++ libavcodec/h264dec.c | 3 +++ libavcodec/hevcdec.c | 3 +++ libavcodec/vp8.c | 3 +++ libavcodec/vp9.c | 3 +++ 6 files changed, 20 insertions(+) diff --git a/libavcodec/av1dec.c b/libavcodec/av1dec.c index 9279ce3636e07..aaa66ee7ea661 100644 --- a/libavcodec/av1dec.c +++ b/libavcodec/av1dec.c @@ -1412,6 +1412,9 @@ static void av1_decode_flush(AVCodecContext *avctx) av_buffer_unref(&itut_t35.payload_ref); ff_cbs_flush(s->cbc); + + if (avctx->hwaccel && avctx->hwaccel->flush) + avctx->hwaccel->flush(avctx); } #define OFFSET(x) offsetof(AV1DecContext, x) diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 82c9aaab5345a..84e29a02dbca6 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -2267,6 +2267,11 @@ typedef struct AVHWAccel { * @param data the per-frame hardware accelerator private data to be freed. */ void (*free_frame_priv)(void *hwctx, uint8_t *data); + + /** + * Callback to flush the hwaccel state. + */ + void (*flush)(AVCodecContext *avctx); } AVHWAccel; /** diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c index 521b1e2235d83..a10b4bb85cfed 100644 --- a/libavcodec/h264dec.c +++ b/libavcodec/h264dec.c @@ -484,6 +484,9 @@ static void h264_decode_flush(AVCodecContext *avctx) ff_h264_free_tables(h); h->context_initialized = 0; + + if (avctx->hwaccel && avctx->hwaccel->flush) + avctx->hwaccel->flush(avctx); } static int get_last_needed_nal(H264Context *h) diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c index b01563177b8af..8764e0bd83ef4 100644 --- a/libavcodec/hevcdec.c +++ b/libavcodec/hevcdec.c @@ -3708,6 +3708,9 @@ static void hevc_decode_flush(AVCodecContext *avctx) av_buffer_unref(&s->rpu_buf); s->max_ra = INT_MAX; s->eos = 1; + + if (avctx->hwaccel && avctx->hwaccel->flush) + avctx->hwaccel->flush(avctx); } #define OFFSET(x) offsetof(HEVCContext, x) diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c index b410e0eb79fd0..50afe19b7a5bb 100644 --- a/libavcodec/vp8.c +++ b/libavcodec/vp8.c @@ -167,6 +167,9 @@ static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem) if (free_mem) free_buffers(s); + + if (avctx->hwaccel && avctx->hwaccel->flush) + avctx->hwaccel->flush(avctx); } static void vp8_decode_flush(AVCodecContext *avctx) diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c index 03883d254b91d..4f704ec0dd12f 100644 --- a/libavcodec/vp9.c +++ b/libavcodec/vp9.c @@ -1801,6 +1801,9 @@ static void vp9_decode_flush(AVCodecContext *avctx) vp9_frame_unref(avctx, &s->s.frames[i]); for (i = 0; i < 8; i++) ff_thread_release_ext_buffer(avctx, &s->s.refs[i]); + + if (avctx->hwaccel && avctx->hwaccel->flush) + avctx->hwaccel->flush(avctx); } static av_cold int vp9_decode_init(AVCodecContext *avctx) From da3294d12eae55b2b46ac8371e4282ba39cd417f Mon Sep 17 00:00:00 2001 From: Lynne Date: Sun, 18 Dec 2022 08:31:03 +0100 Subject: [PATCH 80/98] libavcodec: add Vulkan common video code --- configure | 2 +- libavcodec/Makefile | 2 + libavcodec/hwconfig.h | 2 + libavcodec/vulkan.c | 19 ++ libavcodec/vulkan.h | 24 +++ libavcodec/vulkan_video.c | 372 ++++++++++++++++++++++++++++++++++++++ libavcodec/vulkan_video.h | 98 ++++++++++ 7 files changed, 518 insertions(+), 1 deletion(-) create mode 100644 libavcodec/vulkan.c create mode 100644 libavcodec/vulkan.h create mode 100644 libavcodec/vulkan_video.c create mode 100644 libavcodec/vulkan_video.h diff --git a/configure b/configure index 1fbd1939c6fae..859cc4acebbf8 100755 --- a/configure +++ b/configure @@ -327,7 +327,6 @@ External library support: --disable-securetransport disable Secure Transport, needed for TLS support on OSX if openssl and gnutls are not used [autodetect] --enable-vapoursynth enable VapourSynth demuxer [no] - --disable-vulkan disable Vulkan code [autodetect] --disable-xlib disable xlib [autodetect] --disable-zlib disable zlib [autodetect] @@ -354,6 +353,7 @@ External library support: --disable-vaapi disable Video Acceleration API (mainly Unix/Intel) code [autodetect] --disable-vdpau disable Nvidia Video Decode and Presentation API for Unix code [autodetect] --disable-videotoolbox disable VideoToolbox code [autodetect] + --disable-vulkan disable Vulkan code [autodetect] Toolchain options: --arch=ARCH select architecture [$arch] diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 9c38240025ce7..8580d4ca1d719 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -982,6 +982,7 @@ OBJS-$(CONFIG_NVDEC) += nvdec.o OBJS-$(CONFIG_VAAPI) += vaapi_decode.o OBJS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.o OBJS-$(CONFIG_VDPAU) += vdpau.o +OBJS-$(CONFIG_VULKAN) += vulkan.o vulkan_video.o OBJS-$(CONFIG_AV1_D3D11VA_HWACCEL) += dxva2_av1.o OBJS-$(CONFIG_AV1_DXVA2_HWACCEL) += dxva2_av1.o @@ -1290,6 +1291,7 @@ SKIPHEADERS-$(CONFIG_XVMC) += xvmc.h SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_decode.h vaapi_hevc.h vaapi_encode.h SKIPHEADERS-$(CONFIG_VDPAU) += vdpau.h vdpau_internal.h SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.h vt_internal.h +SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_video.h SKIPHEADERS-$(CONFIG_V4L2_M2M) += v4l2_buffers.h v4l2_context.h v4l2_m2m.h SKIPHEADERS-$(CONFIG_ZLIB) += zlib_wrapper.h diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h index d88dc37c8c926..e8c6186151d7e 100644 --- a/libavcodec/hwconfig.h +++ b/libavcodec/hwconfig.h @@ -78,6 +78,8 @@ void ff_hwaccel_uninit(AVCodecContext *avctx); HW_CONFIG_HWACCEL(1, 1, 1, VDPAU, VDPAU, ff_ ## codec ## _vdpau_hwaccel) #define HWACCEL_VIDEOTOOLBOX(codec) \ HW_CONFIG_HWACCEL(1, 1, 1, VIDEOTOOLBOX, VIDEOTOOLBOX, ff_ ## codec ## _videotoolbox_hwaccel) +#define HWACCEL_VULKAN(codec) \ + HW_CONFIG_HWACCEL(1, 1, 1, VULKAN, VULKAN, ff_ ## codec ## _vulkan_hwaccel) #define HWACCEL_D3D11VA(codec) \ HW_CONFIG_HWACCEL(0, 0, 1, D3D11VA_VLD, NONE, ff_ ## codec ## _d3d11va_hwaccel) diff --git a/libavcodec/vulkan.c b/libavcodec/vulkan.c new file mode 100644 index 0000000000000..fc8a1fa47bad4 --- /dev/null +++ b/libavcodec/vulkan.c @@ -0,0 +1,19 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/vulkan.c" diff --git a/libavcodec/vulkan.h b/libavcodec/vulkan.h new file mode 100644 index 0000000000000..b15efd4addb13 --- /dev/null +++ b/libavcodec/vulkan.h @@ -0,0 +1,24 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VULKAN_H +#define AVCODEC_VULKAN_H + +#include "libavutil/vulkan.h" + +#endif /* AVCODEC_VULKAN_H */ diff --git a/libavcodec/vulkan_video.c b/libavcodec/vulkan_video.c new file mode 100644 index 0000000000000..e4624864ab709 --- /dev/null +++ b/libavcodec/vulkan_video.c @@ -0,0 +1,372 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "codec_id.h" + +#include "vulkan_video.h" + +const FFVkCodecMap ff_vk_codec_map[AV_CODEC_ID_FIRST_AUDIO] = { + [AV_CODEC_ID_H264] = { +#if CONFIG_VULKAN_ENCODE + FF_VK_EXT_VIDEO_ENCODE_H264, + VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_EXT, +#else + 0, + 0, +#endif + FF_VK_EXT_VIDEO_DECODE_H264, + VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR, + }, + [AV_CODEC_ID_HEVC] = { +#if CONFIG_VULKAN_ENCODE + FF_VK_EXT_VIDEO_ENCODE_H265, + VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_EXT, +#else + 0, + 0, +#endif + FF_VK_EXT_VIDEO_DECODE_H265, + VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR + }, +}; + +#define ASPECT_2PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT) +#define ASPECT_3PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT) + +static const struct FFVkFormatMapEntry { + VkFormat vkf; + enum AVPixelFormat pixfmt; + VkImageAspectFlags aspect; +} vk_format_map[] = { + /* Gray formats */ + { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GRAY8, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY16, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GRAYF32, VK_IMAGE_ASPECT_COLOR_BIT }, + + /* RGB formats */ + { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_XV36, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGRA, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGBA, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R8G8B8_UNORM, AV_PIX_FMT_RGB24, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_B8G8R8_UNORM, AV_PIX_FMT_BGR24, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R16G16B16_UNORM, AV_PIX_FMT_RGB48, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_RGBA64, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R5G6B5_UNORM_PACK16, AV_PIX_FMT_RGB565, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_B5G6R5_UNORM_PACK16, AV_PIX_FMT_BGR565, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGR0, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGB0, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_A2R10G10B10_UNORM_PACK32, AV_PIX_FMT_X2RGB10, VK_IMAGE_ASPECT_COLOR_BIT }, + + /* Planar RGB */ + { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GBRAP, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP16, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRPF32, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRAPF32, VK_IMAGE_ASPECT_COLOR_BIT }, + + /* Two-plane 420 YUV at 8, 10, 12 and 16 bits */ + { VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, AV_PIX_FMT_NV12, ASPECT_2PLANE }, + { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P010, ASPECT_2PLANE }, + { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P012, ASPECT_2PLANE }, + { VK_FORMAT_G16_B16R16_2PLANE_420_UNORM, AV_PIX_FMT_P016, ASPECT_2PLANE }, + + /* Two-plane 422 YUV at 8, 10 and 16 bits */ + { VK_FORMAT_G8_B8R8_2PLANE_422_UNORM, AV_PIX_FMT_NV16, ASPECT_2PLANE }, + { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P210, ASPECT_2PLANE }, + { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P212, ASPECT_2PLANE }, + { VK_FORMAT_G16_B16R16_2PLANE_422_UNORM, AV_PIX_FMT_P216, ASPECT_2PLANE }, + + /* Two-plane 444 YUV at 8, 10 and 16 bits */ + { VK_FORMAT_G8_B8R8_2PLANE_444_UNORM, AV_PIX_FMT_NV24, ASPECT_2PLANE }, + { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P410, ASPECT_2PLANE }, + { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P412, ASPECT_2PLANE }, + { VK_FORMAT_G16_B16R16_2PLANE_444_UNORM, AV_PIX_FMT_P416, ASPECT_2PLANE }, + + /* Three-plane 420, 422, 444 at 8, 10, 12 and 16 bits */ + { VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P, ASPECT_3PLANE }, + { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P10, ASPECT_3PLANE }, + { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P12, ASPECT_3PLANE }, + { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P16, ASPECT_3PLANE }, + { VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P, ASPECT_3PLANE }, + { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P10, ASPECT_3PLANE }, + { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P12, ASPECT_3PLANE }, + { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P16, ASPECT_3PLANE }, + { VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P, ASPECT_3PLANE }, + { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P10, ASPECT_3PLANE }, + { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P12, ASPECT_3PLANE }, + { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P16, ASPECT_3PLANE }, + + /* Single plane 422 at 8, 10 and 12 bits */ + { VK_FORMAT_G8B8G8R8_422_UNORM, AV_PIX_FMT_YUYV422, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_B8G8R8G8_422_UNORM, AV_PIX_FMT_UYVY422, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16, AV_PIX_FMT_Y210, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16, AV_PIX_FMT_Y212, VK_IMAGE_ASPECT_COLOR_BIT }, +}; +static const int nb_vk_format_map = FF_ARRAY_ELEMS(vk_format_map); + +enum AVPixelFormat ff_vk_pix_fmt_from_vkfmt(VkFormat vkf) +{ + for (int i = 0; i < nb_vk_format_map; i++) + if (vk_format_map[i].vkf == vkf) + return vk_format_map[i].pixfmt; + return AV_PIX_FMT_NONE; +} + +VkImageAspectFlags ff_vk_aspect_bits_from_vkfmt(VkFormat vkf) +{ + for (int i = 0; i < nb_vk_format_map; i++) + if (vk_format_map[i].vkf == vkf) + return vk_format_map[i].aspect; + return VK_IMAGE_ASPECT_NONE; +} + +VkVideoChromaSubsamplingFlagBitsKHR ff_vk_subsampling_from_av_desc(const AVPixFmtDescriptor *desc) +{ + if (desc->nb_components == 1) + return VK_VIDEO_CHROMA_SUBSAMPLING_MONOCHROME_BIT_KHR; + else if (!desc->log2_chroma_w && !desc->log2_chroma_h) + return VK_VIDEO_CHROMA_SUBSAMPLING_444_BIT_KHR; + else if (!desc->log2_chroma_w && desc->log2_chroma_h == 1) + return VK_VIDEO_CHROMA_SUBSAMPLING_422_BIT_KHR; + else if (desc->log2_chroma_w == 1 && desc->log2_chroma_h == 1) + return VK_VIDEO_CHROMA_SUBSAMPLING_420_BIT_KHR; + return VK_VIDEO_CHROMA_SUBSAMPLING_INVALID_KHR; +} + +VkVideoComponentBitDepthFlagBitsKHR ff_vk_depth_from_av_depth(int depth) +{ + switch (depth) { + case 8: return VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR; + case 10: return VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR; + case 12: return VK_VIDEO_COMPONENT_BIT_DEPTH_12_BIT_KHR; + default: break; + } + return VK_VIDEO_COMPONENT_BIT_DEPTH_INVALID_KHR; +} + +static void free_data_buf(void *opaque, uint8_t *data) +{ + FFVulkanContext *ctx = opaque; + FFVkVideoBuffer *buf = (FFVkVideoBuffer *)data; + ff_vk_unmap_buffer(ctx, &buf->buf, 0); + ff_vk_free_buf(ctx, &buf->buf); + av_free(data); +} + +static AVBufferRef *alloc_data_buf(void *opaque, size_t size) +{ + AVBufferRef *ref; + uint8_t *buf = av_mallocz(size); + if (!buf) + return NULL; + + ref = av_buffer_create(buf, size, free_data_buf, opaque, 0); + if (!ref) + av_free(buf); + return ref; +} + +int ff_vk_video_get_buffer(FFVulkanContext *ctx, FFVkVideoCommon *s, + AVBufferRef **buf, VkBufferUsageFlags usage, + void *create_pNext, size_t size) +{ + int err; + AVBufferRef *ref; + FFVkVideoBuffer *data; + + if (!s->buf_pool) { + s->buf_pool = av_buffer_pool_init2(sizeof(FFVkVideoBuffer), ctx, + alloc_data_buf, NULL); + if (!s->buf_pool) + return AVERROR(ENOMEM); + } + + *buf = ref = av_buffer_pool_get(s->buf_pool); + if (!ref) + return AVERROR(ENOMEM); + + data = (FFVkVideoBuffer *)ref->data; + + if (data->buf.size >= size) + return 0; + + /* No point in requesting anything smaller. */ + size = FFMAX(size, 1024*1024); + size = FFALIGN(size, s->caps.minBitstreamBufferSizeAlignment); + + /* Align buffer to nearest power of two. Makes fragmentation management + * easier, and gives us ample headroom. */ + size--; + size |= size >> 1; + size |= size >> 2; + size |= size >> 4; + size |= size >> 8; + size |= size >> 16; + size++; + + ff_vk_free_buf(ctx, &data->buf); + memset(data, 0, sizeof(FFVkVideoBuffer)); + + err = ff_vk_create_buf(ctx, &data->buf, size, + create_pNext, NULL, usage, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + if (err < 0) { + av_buffer_unref(&ref); + return err; + } + + /* Map the buffer */ + err = ff_vk_map_buffer(ctx, &data->buf, &data->mem, 0); + if (err < 0) { + av_buffer_unref(&ref); + return err; + } + + return 0; +} + +av_cold void ff_vk_video_common_uninit(FFVulkanContext *s, + FFVkVideoCommon *common) +{ + FFVulkanFunctions *vk = &s->vkfn; + + if (common->session) { + vk->DestroyVideoSessionKHR(s->hwctx->act_dev, common->session, + s->hwctx->alloc); + common->session = NULL; + } + + if (common->nb_mem && common->mem) + for (int i = 0; i < common->nb_mem; i++) + vk->FreeMemory(s->hwctx->act_dev, common->mem[i], s->hwctx->alloc); + + av_freep(&common->mem); + + av_buffer_pool_uninit(&common->buf_pool); +} + +av_cold int ff_vk_video_common_init(void *log, FFVulkanContext *s, + FFVkVideoCommon *common, + VkVideoSessionCreateInfoKHR *session_create) +{ + int err; + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + VkMemoryRequirements2 *mem_req = NULL; + VkVideoSessionMemoryRequirementsKHR *mem = NULL; + VkBindVideoSessionMemoryInfoKHR *bind_mem = NULL; + + /* Create session */ + ret = vk->CreateVideoSessionKHR(s->hwctx->act_dev, session_create, + s->hwctx->alloc, &common->session); + if (ret != VK_SUCCESS) + return AVERROR_EXTERNAL; + + /* Get memory requirements */ + ret = vk->GetVideoSessionMemoryRequirementsKHR(s->hwctx->act_dev, + common->session, + &common->nb_mem, + NULL); + if (ret != VK_SUCCESS) { + err = AVERROR_EXTERNAL; + goto fail; + } + + /* Allocate all memory needed to actually allocate memory */ + common->mem = av_mallocz(sizeof(*common->mem)*common->nb_mem); + if (!common->mem) { + err = AVERROR(ENOMEM); + goto fail; + } + mem = av_mallocz(sizeof(*mem)*common->nb_mem); + if (!mem) { + err = AVERROR(ENOMEM); + goto fail; + } + mem_req = av_mallocz(sizeof(*mem_req)*common->nb_mem); + if (!mem_req) { + err = AVERROR(ENOMEM); + goto fail; + } + bind_mem = av_mallocz(sizeof(*bind_mem)*common->nb_mem); + if (!bind_mem) { + err = AVERROR(ENOMEM); + goto fail; + } + + /* Set the needed fields to get the memory requirements */ + for (int i = 0; i < common->nb_mem; i++) { + mem_req[i] = (VkMemoryRequirements2) { + .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, + }; + mem[i] = (VkVideoSessionMemoryRequirementsKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_MEMORY_REQUIREMENTS_KHR, + .memoryRequirements = mem_req[i].memoryRequirements, + }; + } + + /* Finally get the memory requirements */ + ret = vk->GetVideoSessionMemoryRequirementsKHR(s->hwctx->act_dev, + common->session, &common->nb_mem, + mem); + if (ret != VK_SUCCESS) { + err = AVERROR_EXTERNAL; + goto fail; + } + + /* Now allocate each requested memory. + * For ricing, could pool together memory that ends up in the same index. */ + for (int i = 0; i < common->nb_mem; i++) { + err = ff_vk_alloc_mem(s, &mem[i].memoryRequirements, + UINT32_MAX, NULL, NULL, &common->mem[i]); + if (err < 0) + goto fail; + + bind_mem[i] = (VkBindVideoSessionMemoryInfoKHR) { + .sType = VK_STRUCTURE_TYPE_BIND_VIDEO_SESSION_MEMORY_INFO_KHR, + .memory = common->mem[i], + .memoryBindIndex = mem[i].memoryBindIndex, + .memoryOffset = 0, + .memorySize = mem[i].memoryRequirements.size, + }; + + av_log(log, AV_LOG_VERBOSE, "Allocating %lu bytes in bind index %i for video session\n", + bind_mem[i].memorySize, bind_mem[i].memoryBindIndex); + } + + /* Bind the allocated memory */ + ret = vk->BindVideoSessionMemoryKHR(s->hwctx->act_dev, common->session, + common->nb_mem, bind_mem); + if (ret != VK_SUCCESS) { + err = AVERROR_EXTERNAL; + goto fail; + } + + av_freep(&mem); + av_freep(&mem_req); + av_freep(&bind_mem); + + return 0; + +fail: + av_freep(&mem); + av_freep(&mem_req); + av_freep(&bind_mem); + + ff_vk_video_common_uninit(s, common); + return err; +} diff --git a/libavcodec/vulkan_video.h b/libavcodec/vulkan_video.h new file mode 100644 index 0000000000000..c10fcdcca1ba7 --- /dev/null +++ b/libavcodec/vulkan_video.h @@ -0,0 +1,98 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VULKAN_VIDEO_H +#define AVCODEC_VULKAN_VIDEO_H + +#include "codec_id.h" +#include "vulkan.h" + +#include + +#define CODEC_VER_MAJ(ver) (ver >> 22) +#define CODEC_VER_MIN(ver) ((ver >> 12) & ((1 << 10) - 1)) +#define CODEC_VER_PAT(ver) (ver & ((1 << 12) - 1)) +#define CODEC_VER(ver) CODEC_VER_MAJ(ver), CODEC_VER_MIN(ver), CODEC_VER_PAT(ver) + +typedef struct FFVkCodecMap { + FFVulkanExtensions encode_extension; + VkVideoCodecOperationFlagBitsKHR encode_op; + FFVulkanExtensions decode_extension; + VkVideoCodecOperationFlagBitsKHR decode_op; +} FFVkCodecMap; + +typedef struct FFVkVideoSession { + VkVideoSessionKHR session; + VkDeviceMemory *mem; + uint32_t nb_mem; + VkVideoCapabilitiesKHR caps; + + AVBufferPool *buf_pool; +} FFVkVideoCommon; + +/** + * Index is codec_id. + */ +extern const FFVkCodecMap ff_vk_codec_map[AV_CODEC_ID_FIRST_AUDIO]; + +/** + * Get pixfmt from a Vulkan format. + */ +enum AVPixelFormat ff_vk_pix_fmt_from_vkfmt(VkFormat vkf); + +/** + * Get aspect bits which include all planes from a VkFormat. + */ +VkImageAspectFlags ff_vk_aspect_bits_from_vkfmt(VkFormat vkf); + +/** + * Get Vulkan's chroma subsampling from a pixfmt descriptor. + */ +VkVideoChromaSubsamplingFlagBitsKHR ff_vk_subsampling_from_av_desc(const AVPixFmtDescriptor *desc); + +/** + * Get Vulkan's bit depth from an [8:12] integer. + */ +VkVideoComponentBitDepthFlagBitsKHR ff_vk_depth_from_av_depth(int depth); + +typedef struct FFVkVideoBuffer { + FFVkBuffer buf; + uint8_t *mem; +} FFVkVideoBuffer; + +/** + * Get a mapped FFVkPooledBuffer with a specific guaranteed minimum size + * from a pool. + */ +int ff_vk_video_get_buffer(FFVulkanContext *ctx, FFVkVideoCommon *s, + AVBufferRef **buf, VkBufferUsageFlags usage, + void *create_pNext, size_t size); + +/** + * Initialize video session, allocating and binding necessary memory. + */ +int ff_vk_video_common_init(void *log, FFVulkanContext *s, + FFVkVideoCommon *common, + VkVideoSessionCreateInfoKHR *session_create); + +/** + * Free video session and required resources. + */ +void ff_vk_video_common_uninit(FFVulkanContext *s, FFVkVideoCommon *common); + +#endif /* AVCODEC_VULKAN_VIDEO_H */ From ccd967a35aafbe32c62a36438ccbe79703da4ef0 Mon Sep 17 00:00:00 2001 From: Lynne Date: Mon, 16 Jan 2023 07:23:27 +0100 Subject: [PATCH 81/98] libavcodec: add Vulkan common video decoding code --- libavcodec/Makefile | 2 +- libavcodec/vulkan_decode.c | 1145 ++++++++++++++++++++++++++++++++++++ libavcodec/vulkan_decode.h | 175 ++++++ 3 files changed, 1321 insertions(+), 1 deletion(-) create mode 100644 libavcodec/vulkan_decode.c create mode 100644 libavcodec/vulkan_decode.h diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 8580d4ca1d719..5bc3d6ffabf1c 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -1291,7 +1291,7 @@ SKIPHEADERS-$(CONFIG_XVMC) += xvmc.h SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_decode.h vaapi_hevc.h vaapi_encode.h SKIPHEADERS-$(CONFIG_VDPAU) += vdpau.h vdpau_internal.h SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.h vt_internal.h -SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_video.h +SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_video.h vulkan_decode.h SKIPHEADERS-$(CONFIG_V4L2_M2M) += v4l2_buffers.h v4l2_context.h v4l2_m2m.h SKIPHEADERS-$(CONFIG_ZLIB) += zlib_wrapper.h diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c new file mode 100644 index 0000000000000..9a82d6ad50130 --- /dev/null +++ b/libavcodec/vulkan_decode.c @@ -0,0 +1,1145 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "vulkan_video.h" +#include "vulkan_decode.h" +#include "config_components.h" + +#if CONFIG_H264_VULKAN_HWACCEL +extern const VkExtensionProperties ff_vk_dec_h264_ext; +#endif +#if CONFIG_HEVC_VULKAN_HWACCEL +extern const VkExtensionProperties ff_vk_dec_hevc_ext; +#endif + +static const VkExtensionProperties *dec_ext[] = { +#if CONFIG_H264_VULKAN_HWACCEL + [AV_CODEC_ID_H264] = &ff_vk_dec_h264_ext, +#endif +#if CONFIG_HEVC_VULKAN_HWACCEL + [AV_CODEC_ID_HEVC] = &ff_vk_dec_hevc_ext, +#endif +}; + +int ff_vk_update_thread_context(AVCodecContext *dst, const AVCodecContext *src) +{ + int err; + FFVulkanDecodeContext *src_ctx = src->internal->hwaccel_priv_data; + FFVulkanDecodeContext *dst_ctx = dst->internal->hwaccel_priv_data; + + err = av_buffer_replace(&dst_ctx->shared_ref, src_ctx->shared_ref); + if (err < 0) + return err; + + if (src_ctx->session_params) { + err = av_buffer_replace(&dst_ctx->session_params, src_ctx->session_params); + if (err < 0) + return err; + } + + dst_ctx->frame_id_alloc_mask = src_ctx->frame_id_alloc_mask; + + return 0; +} + +int ff_vk_params_changed(AVCodecContext *avctx, int t, const uint8_t *b, uint32_t s) +{ + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + dec->params_changed = 1; + return 0; +} + +static int vk_decode_create_view(FFVulkanDecodeShared *ctx, VkImageView *dst_view, + VkImageAspectFlags *aspect, AVVkFrame *src, + VkFormat vkf) +{ + VkResult ret; + FFVulkanFunctions *vk = &ctx->s.vkfn; + VkImageAspectFlags aspect_mask = ff_vk_aspect_bits_from_vkfmt(vkf); + + VkSamplerYcbcrConversionInfo yuv_sampler_info = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO, + .conversion = ctx->yuv_sampler, + }; + VkImageViewCreateInfo img_view_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = &yuv_sampler_info, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = vkf, + .image = src->img[0], + .components = (VkComponentMapping) { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + .levelCount = 1, + }, + }; + + ret = vk->CreateImageView(ctx->s.hwctx->act_dev, &img_view_create_info, + ctx->s.hwctx->alloc, dst_view); + if (ret != VK_SUCCESS) + return AVERROR_EXTERNAL; + + *aspect = aspect_mask; + + return 0; +} + +static AVFrame *vk_get_dpb_pool(FFVulkanDecodeShared *ctx) +{ + int err; + AVFrame *avf = av_frame_alloc(); + if (!avf) + return NULL; + + err = av_hwframe_get_buffer(ctx->dpb_hwfc_ref, avf, 0x0); + if (err < 0) + av_frame_free(&avf); + + return avf; +} + +int ff_vk_decode_prepare_frame(FFVulkanDecodeShared *ctx, AVFrame *pic, + FFVulkanDecodePicture *vkpic, int is_current, + int alloc_dpb) +{ + int err; + + vkpic->nb_slices = 0; + vkpic->slices_size = 0; + + /* If the decoder made a blank frame to make up for a missing ref, or the + * frame is the current frame so it's missing one, create a re-representation */ + if (vkpic->img_view_ref) + return 0; + + vkpic->dpb_frame = NULL; + vkpic->img_view_ref = NULL; + vkpic->img_view_out = NULL; + vkpic->img_view_dest = NULL; + + if (ctx->layered_dpb && alloc_dpb) { + vkpic->img_view_ref = ctx->layered_view; + vkpic->img_aspect_ref = ctx->layered_aspect; + } else if (alloc_dpb) { + AVHWFramesContext *dpb_frames = (AVHWFramesContext *)ctx->dpb_hwfc_ref->data; + AVVulkanFramesContext *dpb_hwfc = dpb_frames->hwctx; + + vkpic->dpb_frame = vk_get_dpb_pool(ctx); + if (!vkpic->dpb_frame) + return AVERROR(ENOMEM); + + err = vk_decode_create_view(ctx, &vkpic->img_view_ref, + &vkpic->img_aspect_ref, + (AVVkFrame *)vkpic->dpb_frame->data[0], + dpb_hwfc->format[0]); + if (err < 0) + return err; + + vkpic->img_view_dest = vkpic->img_view_ref; + } + + if (!alloc_dpb || is_current) { + AVHWFramesContext *frames = (AVHWFramesContext *)pic->hw_frames_ctx->data; + AVVulkanFramesContext *hwfc = frames->hwctx; + + err = vk_decode_create_view(ctx, &vkpic->img_view_out, + &vkpic->img_aspect, + (AVVkFrame *)pic->data[0], + hwfc->format[0]); + if (err < 0) + return err; + + if (!alloc_dpb) { + vkpic->img_view_ref = vkpic->img_view_out; + vkpic->img_aspect_ref = vkpic->img_aspect; + } + } + + return 0; +} + +int ff_vk_decode_add_slice(AVCodecContext *avctx, FFVulkanDecodePicture *vp, + const uint8_t *data, size_t size, int add_startcode, + uint32_t *nb_slices, const uint32_t **offsets) +{ + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = (FFVulkanDecodeShared *)dec->shared_ref->data; + + static const uint8_t startcode_prefix[3] = { 0x0, 0x0, 0x1 }; + const size_t startcode_len = add_startcode ? sizeof(startcode_prefix) : 0; + const int nb = *nb_slices; + uint8_t *slices; + uint32_t *slice_off; + FFVkVideoBuffer *vkbuf; + + size_t new_size = vp->slices_size + startcode_len + size + + ctx->common.caps.minBitstreamBufferSizeAlignment; + + slice_off = av_fast_realloc(vp->slice_off, &vp->slice_off_max, + (nb + 1)*sizeof(slice_off)); + if (!slice_off) + return AVERROR(ENOMEM); + + *offsets = vp->slice_off = slice_off; + slice_off[nb] = vp->slices_size; + + vkbuf = vp->slices_buf ? (FFVkVideoBuffer *)vp->slices_buf->data : NULL; + if (!vkbuf || vkbuf->buf.size < new_size) { + int err; + AVBufferRef *new_ref; + FFVkVideoBuffer *new_buf; + err = ff_vk_video_get_buffer(&ctx->s, &ctx->common, &new_ref, + VK_BUFFER_USAGE_VIDEO_DECODE_SRC_BIT_KHR, + ctx->s.hwfc->create_pnext, new_size); + if (err < 0) + return err; + + new_buf = (FFVkVideoBuffer *)new_ref->data; + + /* Copy data from the old buffer */ + if (vkbuf) { + memcpy(new_buf->mem, vkbuf->mem, vp->slices_size); + av_buffer_unref(&vp->slices_buf); + } + + vp->slices_buf = new_ref; + vkbuf = new_buf; + } + slices = vkbuf->mem; + + /* Startcode */ + memcpy(slices + vp->slices_size, startcode_prefix, startcode_len); + + /* Slice data */ + memcpy(slices + vp->slices_size + startcode_len, data, size); + + *nb_slices = nb + 1; + vp->nb_slices++; + vp->slices_size += startcode_len + size; + + return 0; +} + +void ff_vk_decode_flush(AVCodecContext *avctx) +{ + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = (FFVulkanDecodeShared *)dec->shared_ref->data; + + FFVulkanFunctions *vk = &ctx->s.vkfn; + VkVideoBeginCodingInfoKHR decode_start = { + .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR, + .videoSession = ctx->common.session, + .videoSessionParameters = ctx->empty_session_params, + }; + VkVideoCodingControlInfoKHR decode_ctrl = { + .sType = VK_STRUCTURE_TYPE_VIDEO_CODING_CONTROL_INFO_KHR, + .flags = VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR, + }; + VkVideoEndCodingInfoKHR decode_end = { + .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR, + }; + + VkCommandBuffer cmd_buf; + FFVkExecContext *exec = ff_vk_exec_get(&ctx->exec_pool); + ff_vk_exec_start(&ctx->s, exec); + cmd_buf = exec->buf; + + vk->CmdBeginVideoCodingKHR(cmd_buf, &decode_start); + vk->CmdControlVideoCodingKHR(cmd_buf, &decode_ctrl); + vk->CmdEndVideoCodingKHR(cmd_buf, &decode_end); + ff_vk_exec_submit(&ctx->s, exec); +} + +int ff_vk_decode_frame(AVCodecContext *avctx, + AVFrame *pic, FFVulkanDecodePicture *vp, + AVFrame *rpic[], FFVulkanDecodePicture *rvkp[]) +{ + int err; + VkResult ret; + VkCommandBuffer cmd_buf; + FFVkVideoBuffer *sd_buf; + + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = (FFVulkanDecodeShared *)dec->shared_ref->data; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + /* Output */ + AVVkFrame *vkf = (AVVkFrame *)pic->buf[0]->data; + + /* Quirks */ + const int layered_dpb = ctx->layered_dpb; + + VkVideoSessionParametersKHR *par = (VkVideoSessionParametersKHR *)dec->session_params->data; + VkVideoBeginCodingInfoKHR decode_start = { + .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR, + .videoSession = ctx->common.session, + .videoSessionParameters = *par, + .referenceSlotCount = vp->decode_info.referenceSlotCount, + .pReferenceSlots = vp->decode_info.pReferenceSlots, + }; + VkVideoEndCodingInfoKHR decode_end = { + .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR, + }; + + VkImageMemoryBarrier2 img_bar[37]; + int nb_img_bar = 0; + size_t data_size = FFALIGN(vp->slices_size, + ctx->common.caps.minBitstreamBufferSizeAlignment); + + FFVkExecContext *exec = ff_vk_exec_get(&ctx->exec_pool); + + /* The current decoding reference has to be bound as an inactive reference */ + VkVideoReferenceSlotInfoKHR *cur_vk_ref; + cur_vk_ref = (void *)&decode_start.pReferenceSlots[decode_start.referenceSlotCount]; + cur_vk_ref[0] = vp->ref_slot; + cur_vk_ref[0].slotIndex = -1; + decode_start.referenceSlotCount++; + + if (ctx->exec_pool.nb_queries) { + int64_t prev_sub_res = 0; + ff_vk_exec_wait(&ctx->s, exec); + ret = ff_vk_exec_get_query(&ctx->s, exec, NULL, &prev_sub_res); + if (ret != VK_NOT_READY && ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to perform query: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + if (ret == VK_SUCCESS) + av_log(avctx, prev_sub_res < 0 ? AV_LOG_ERROR : AV_LOG_DEBUG, + "Result of previous frame decoding: %li\n", prev_sub_res); + } + + sd_buf = (FFVkVideoBuffer *)vp->slices_buf->data; + + /* Flush if needed */ + if (!(sd_buf->buf.flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { + VkMappedMemoryRange flush_buf = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = sd_buf->buf.mem, + .offset = 0, + .size = FFALIGN(vp->slices_size, + ctx->s.props.properties.limits.nonCoherentAtomSize), + }; + + ret = vk->FlushMappedMemoryRanges(ctx->s.hwctx->act_dev, 1, &flush_buf); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to flush memory: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + vp->decode_info.srcBuffer = sd_buf->buf.buf; + vp->decode_info.srcBufferOffset = 0; + vp->decode_info.srcBufferRange = data_size; + + /* Start command buffer recording */ + err = ff_vk_exec_start(&ctx->s, exec); + if (err < 0) + return err; + cmd_buf = exec->buf; + + /* Slices */ + err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->slices_buf, 1, 0); + if (err < 0) + return err; + vp->slices_buf = NULL; /* Owned by the exec buffer from now on */ + + /* Parameters */ + err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &dec->session_params, 1, 1); + if (err < 0) + return err; + + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, pic, + VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR); + if (err < 0) + return err; + + err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value, + pic); + if (err < 0) + return err; + + /* Output image - change layout, as it comes from a pool */ + img_bar[nb_img_bar] = (VkImageMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .pNext = NULL, + .srcStageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + .srcAccessMask = VK_ACCESS_2_NONE, + .dstAccessMask = VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR, + .oldLayout = vkf->layout[0], + .newLayout = vp->dpb_frame ? VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR : + VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR, /* Spec, 07252 utter madness */ + .srcQueueFamilyIndex = vkf->queue_family[0], + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = vkf->img[0], + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = vp->img_aspect, + .layerCount = 1, + .levelCount = 1, + }, + }; + ff_vk_exec_update_frame(&ctx->s, exec, pic, + &img_bar[nb_img_bar], &nb_img_bar); + + /* Reference for the current image, if existing and not layered */ + if (vp->dpb_frame) { + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, vp->dpb_frame, + VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR); + if (err < 0) + return err; + } + + if (!layered_dpb) { + /* All references (apart from the current) for non-layered refs */ + + for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) { + AVFrame *ref_frame = rpic[i]; + FFVulkanDecodePicture *rvp = rvkp[i]; + AVFrame *ref = rvp->dpb_frame ? rvp->dpb_frame : ref_frame; + + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ref, + VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR); + if (err < 0) + return err; + + if (err == 0) { + err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, + &rvp->sem, &rvp->sem_value, + ref); + if (err < 0) + return err; + } + + if (!rvp->dpb_frame) { + AVVkFrame *rvkf = (AVVkFrame *)ref->data[0]; + + img_bar[nb_img_bar] = (VkImageMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .pNext = NULL, + .srcStageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + .srcAccessMask = VK_ACCESS_2_NONE, + .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + .dstAccessMask = VK_ACCESS_2_VIDEO_DECODE_READ_BIT_KHR | + VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR, + .oldLayout = rvkf->layout[0], + .newLayout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR, + .srcQueueFamilyIndex = rvkf->queue_family[0], + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = rvkf->img[0], + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = rvp->img_aspect_ref, + .layerCount = 1, + .levelCount = 1, + }, + }; + ff_vk_exec_update_frame(&ctx->s, exec, ref, + &img_bar[nb_img_bar], &nb_img_bar); + } + } + } else if (vp->decode_info.referenceSlotCount || + vp->img_view_out != vp->img_view_ref) { + /* Single barrier for a single layered ref */ + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ctx->layered_frame, + VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR); + if (err < 0) + return err; + } + + /* Change image layout */ + vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + /* Start, use parameters, decode and end decoding */ + vk->CmdBeginVideoCodingKHR(cmd_buf, &decode_start); + + /* Start status query */ + if (ctx->exec_pool.nb_queries) + vk->CmdBeginQuery(cmd_buf, ctx->exec_pool.query_pool, exec->query_idx + 0, 0); + + vk->CmdDecodeVideoKHR(cmd_buf, &vp->decode_info); + + /* End status query */ + if (ctx->exec_pool.nb_queries) + vk->CmdEndQuery(cmd_buf, ctx->exec_pool.query_pool, exec->query_idx + 0); + + vk->CmdEndVideoCodingKHR(cmd_buf, &decode_end); + + /* End recording and submit for execution */ + return ff_vk_exec_submit(&ctx->s, exec); +} + +void ff_vk_decode_free_frame(AVHWDeviceContext *dev_ctx, FFVulkanDecodePicture *vp) +{ + AVVulkanDeviceContext *hwctx = dev_ctx->hwctx; + PFN_vkGetDeviceProcAddr device_proc_addr; + PFN_vkWaitSemaphores wait_semaphores; + PFN_vkDestroyImageView destroy_image_view; + + VkSemaphoreWaitInfo sem_wait = (VkSemaphoreWaitInfo) { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, + .pSemaphores = &vp->sem, + .pValues = &vp->sem_value, + .semaphoreCount = 1, + }; + + /* Guaranteed to exist */ + device_proc_addr = (PFN_vkGetDeviceProcAddr)hwctx->get_proc_addr(hwctx->inst, "vkGetDeviceProcAddr"); + destroy_image_view = (PFN_vkDestroyImageView)device_proc_addr(hwctx->act_dev, "vkDestroyImageView"); + wait_semaphores = (PFN_vkWaitSemaphores)device_proc_addr(hwctx->act_dev, "vkWaitSemaphores"); + + /* We do not have to lock the frame here because we're not interested + * in the actual current semaphore value, but only that it's later than + * the time we submitted the image for decoding. */ + if (vp->sem) + wait_semaphores(hwctx->act_dev, &sem_wait, UINT64_MAX); + + /* Free slices data */ + av_buffer_unref(&vp->slices_buf); + + /* TODO: use a pool in the decode context instead to avoid per-frame allocs. */ + av_freep(&vp->slice_off); + + /* Destroy image view (out) */ + if (vp->img_view_out && vp->img_view_out != vp->img_view_dest) + destroy_image_view(hwctx->act_dev, vp->img_view_out, hwctx->alloc); + + /* Destroy image view (ref, unlayered) */ + if (vp->img_view_dest) + destroy_image_view(hwctx->act_dev, vp->img_view_dest, hwctx->alloc); + + av_frame_free(&vp->dpb_frame); +} + +static void free_common(void *opaque, uint8_t *data) +{ + FFVulkanDecodeShared *ctx = (FFVulkanDecodeShared *)data; + FFVulkanContext *s = &ctx->s; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + /* Wait on and free execution pool */ + ff_vk_exec_pool_free(s, &ctx->exec_pool); + + /* Destroy layered view */ + if (ctx->layered_view) + vk->DestroyImageView(s->hwctx->act_dev, ctx->layered_view, s->hwctx->alloc); + + /* This also frees all references from this pool */ + av_frame_free(&ctx->layered_frame); + av_buffer_unref(&ctx->dpb_hwfc_ref); + + /* Destroy parameters */ + if (ctx->empty_session_params) + vk->DestroyVideoSessionParametersKHR(s->hwctx->act_dev, + ctx->empty_session_params, + s->hwctx->alloc); + + ff_vk_video_common_uninit(s, &ctx->common); + + vk->DestroySamplerYcbcrConversion(s->hwctx->act_dev, ctx->yuv_sampler, + s->hwctx->alloc); + + ff_vk_uninit(s); +} + +/* Since to even get decoder capabilities, we have to initialize quite a lot, + * this function does initialization and saves it to hwaccel_priv_data if + * available. */ +static int vulkan_decode_check_init(AVCodecContext *avctx, AVBufferRef *frames_ref, + int *width_align, int *height_align, + enum AVPixelFormat *pix_fmt, VkFormat *vk_fmt, + int *dpb_dedicate) +{ + VkResult ret; + int err, max_level; + const struct FFVkCodecMap *vk_codec = &ff_vk_codec_map[avctx->codec_id]; + AVHWFramesContext *frames = (AVHWFramesContext *)frames_ref->data; + AVHWDeviceContext *device = (AVHWDeviceContext *)frames->device_ref->data; + AVVulkanDeviceContext *hwctx = device->hwctx; + enum AVPixelFormat source_format; + enum AVPixelFormat best_format; + VkFormat best_vkfmt; + int base_profile, cur_profile = avctx->profile; + + int dedicated_dpb; + int layered_dpb; + + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx; + + if (!dec->shared_ref) { + ctx = av_mallocz(sizeof(*ctx)); + if (!ctx) + return AVERROR(ENOMEM); + + dec->shared_ref = av_buffer_create((uint8_t *)ctx, sizeof(*ctx), + free_common, NULL, 0); + if (!dec->shared_ref) { + av_free(ctx); + return AVERROR(ENOMEM); + } + } + + ctx = (FFVulkanDecodeShared *)dec->shared_ref->data; + + FFVulkanExtensions *extensions = &ctx->s.extensions; + FFVulkanFunctions *vk = &ctx->s.vkfn; + VkVideoCapabilitiesKHR *caps = &ctx->common.caps; + + VkVideoDecodeCapabilitiesKHR *dec_caps = &ctx->dec_caps; + VkVideoDecodeH264ProfileInfoKHR *h264_profile = &ctx->h264_profile; + VkVideoDecodeH264ProfileInfoKHR *h265_profile = &ctx->h265_profile; + VkVideoDecodeUsageInfoKHR *usage = &ctx->usage; + VkVideoProfileInfoKHR *profile = &ctx->profile; + VkVideoProfileListInfoKHR *profile_list = &ctx->profile_list; + + VkPhysicalDeviceVideoFormatInfoKHR fmt_info = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_FORMAT_INFO_KHR, + .pNext = profile_list, + }; + VkVideoDecodeH264CapabilitiesKHR h264_caps = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_CAPABILITIES_KHR, + }; + VkVideoDecodeH265CapabilitiesKHR h265_caps = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_CAPABILITIES_KHR, + }; + VkVideoFormatPropertiesKHR *ret_info; + uint32_t nb_out_fmts = 0; + + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); + if (!desc) + return AVERROR(EINVAL); + + if (ctx->init) + return 0; + + if (!vk_codec->decode_op) + return AVERROR(EINVAL); + + *extensions = ff_vk_extensions_to_mask(hwctx->enabled_dev_extensions, + hwctx->nb_enabled_dev_extensions); + + if (!(*extensions & FF_VK_EXT_VIDEO_DECODE_QUEUE)) { + av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n", + VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME); + return AVERROR(ENOSYS); + } else if (!vk_codec->decode_extension) { + av_log(avctx, AV_LOG_ERROR, "Unsupported codec for Vulkan decoding: %s!\n", + avcodec_get_name(avctx->codec_id)); + return AVERROR(ENOSYS); + } else if (!(vk_codec->decode_extension & *extensions)) { + av_log(avctx, AV_LOG_ERROR, "Device does not support decoding %s!\n", + avcodec_get_name(avctx->codec_id)); + return AVERROR(ENOSYS); + } + + err = ff_vk_load_functions(device, vk, *extensions, 1, 1); + if (err < 0) + return err; + +repeat: + if (avctx->codec_id == AV_CODEC_ID_H264) { + base_profile = FF_PROFILE_H264_CONSTRAINED_BASELINE; + dec_caps->pNext = &h264_caps; + usage->pNext = h264_profile; + h264_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PROFILE_INFO_KHR; + h264_profile->stdProfileIdc = cur_profile; + h264_profile->pictureLayout = avctx->field_order == AV_FIELD_UNKNOWN || + avctx->field_order == AV_FIELD_PROGRESSIVE ? + VK_VIDEO_DECODE_H264_PICTURE_LAYOUT_PROGRESSIVE_KHR : + VK_VIDEO_DECODE_H264_PICTURE_LAYOUT_INTERLACED_INTERLEAVED_LINES_BIT_KHR; + } else if (avctx->codec_id == AV_CODEC_ID_H265) { + base_profile = FF_PROFILE_HEVC_MAIN; + dec_caps->pNext = &h265_caps; + usage->pNext = h265_profile; + h265_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_INFO_KHR; + h265_profile->stdProfileIdc = cur_profile; + } + + usage->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_USAGE_INFO_KHR; + usage->videoUsageHints = VK_VIDEO_DECODE_USAGE_DEFAULT_KHR; + + profile->sType = VK_STRUCTURE_TYPE_VIDEO_PROFILE_INFO_KHR; + profile->pNext = usage; + profile->videoCodecOperation = vk_codec->decode_op; + profile->chromaSubsampling = ff_vk_subsampling_from_av_desc(desc); + profile->lumaBitDepth = ff_vk_depth_from_av_depth(desc->comp[0].depth); + profile->chromaBitDepth = profile->lumaBitDepth; + + profile_list->sType = VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR; + profile_list->profileCount = 1; + profile_list->pProfiles = profile; + + /* Get the capabilities of the decoder for the given profile */ + caps->sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR; + caps->pNext = dec_caps; + dec_caps->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_CAPABILITIES_KHR; + /* dec_caps->pNext already filled in */ + + ret = vk->GetPhysicalDeviceVideoCapabilitiesKHR(hwctx->phys_dev, profile, + caps); + if (ret == VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR && + avctx->flags & AV_HWACCEL_FLAG_ALLOW_PROFILE_MISMATCH && + cur_profile != base_profile) { + cur_profile = base_profile; + av_log(avctx, AV_LOG_VERBOSE, "%s profile %s not supported, attempting " + "again with profile %s\n", + avcodec_get_name(avctx->codec_id), + avcodec_profile_name(avctx->codec_id, avctx->profile), + avcodec_profile_name(avctx->codec_id, base_profile)); + goto repeat; + } else if (ret == VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR) { + av_log(avctx, AV_LOG_VERBOSE, "Unable to initialize video session: " + "%s profile \"%s\" not supported!\n", + avcodec_get_name(avctx->codec_id), + avcodec_profile_name(avctx->codec_id, cur_profile)); + return AVERROR(EINVAL); + } else if (ret == VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR) { + av_log(avctx, AV_LOG_VERBOSE, "Unable to initialize video session: " + "format (%s) not supported!\n", + av_get_pix_fmt_name(avctx->sw_pix_fmt)); + return AVERROR(EINVAL); + } else if (ret == VK_ERROR_FEATURE_NOT_PRESENT || + ret == VK_ERROR_FORMAT_NOT_SUPPORTED) { + return AVERROR(EINVAL); + } else if (ret != VK_SUCCESS) { + return AVERROR_EXTERNAL; + } + + max_level = avctx->codec_id == AV_CODEC_ID_H264 ? h264_caps.maxLevelIdc : + avctx->codec_id == AV_CODEC_ID_H265 ? h265_caps.maxLevelIdc : + 0; + + if (ctx) { + av_log(avctx, AV_LOG_VERBOSE, "Decoder capabilities for %s profile \"%s\":\n", + avcodec_get_name(avctx->codec_id), + avcodec_profile_name(avctx->codec_id, avctx->profile)); + av_log(avctx, AV_LOG_VERBOSE, " Maximum level: %i\n", + max_level); + av_log(avctx, AV_LOG_VERBOSE, " Width: from %i to %i\n", + caps->minCodedExtent.width, caps->maxCodedExtent.width); + av_log(avctx, AV_LOG_VERBOSE, " Height: from %i to %i\n", + caps->minCodedExtent.height, caps->maxCodedExtent.height); + av_log(avctx, AV_LOG_VERBOSE, " Width alignment: %i\n", + caps->pictureAccessGranularity.width); + av_log(avctx, AV_LOG_VERBOSE, " Height alignment: %i\n", + caps->pictureAccessGranularity.height); + av_log(avctx, AV_LOG_VERBOSE, " Bitstream offset alignment: %"PRIu64"\n", + caps->minBitstreamBufferOffsetAlignment); + av_log(avctx, AV_LOG_VERBOSE, " Bitstream size alignment: %"PRIu64"\n", + caps->minBitstreamBufferSizeAlignment); + av_log(avctx, AV_LOG_VERBOSE, " Maximum references: %u\n", + caps->maxDpbSlots); + av_log(avctx, AV_LOG_VERBOSE, " Maximum active references: %u\n", + caps->maxActiveReferencePictures); + av_log(avctx, AV_LOG_VERBOSE, " Codec header version: %i.%i.%i (driver), %i.%i.%i (compiled)\n", + CODEC_VER(caps->stdHeaderVersion.specVersion), + CODEC_VER(dec_ext[avctx->codec_id]->specVersion)); + av_log(avctx, AV_LOG_VERBOSE, " Decode modes:%s%s%s\n", + dec_caps->flags ? "" : + " invalid", + dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR ? + " reuse_dst_dpb" : "", + dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR ? + " dedicated_dpb" : ""); + av_log(avctx, AV_LOG_VERBOSE, " Capability flags:%s%s%s\n", + caps->flags ? "" : + " none", + caps->flags & VK_VIDEO_CAPABILITY_PROTECTED_CONTENT_BIT_KHR ? + " protected" : "", + caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR ? + " separate_references" : ""); + } + + /* Check if decoding is possible with the given parameters */ + if (avctx->coded_width < caps->minCodedExtent.width || + avctx->coded_height < caps->minCodedExtent.height || + avctx->coded_width > caps->maxCodedExtent.width || + avctx->coded_height > caps->maxCodedExtent.height) + return AVERROR(EINVAL); + + if (!(avctx->hwaccel_flags & AV_HWACCEL_FLAG_IGNORE_LEVEL) && + avctx->level > max_level) + return AVERROR(EINVAL); + + /* Some basic sanity checking */ + if (!(dec_caps->flags & (VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR | + VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR))) { + av_log(avctx, AV_LOG_ERROR, "Buggy driver signals invalid decoding mode: neither " + "VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR nor " + "VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR are set!\n"); + return AVERROR_EXTERNAL; + } else if ((dec_caps->flags & (VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR | + VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR) == + VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR) && + !(caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR)) { + av_log(avctx, AV_LOG_ERROR, "Cannot initialize Vulkan decoding session, buggy driver: " + "VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR set " + "but VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR is unset!\n"); + return AVERROR_EXTERNAL; + } + + /* TODO: make dedicated_dpb tunable */ + dedicated_dpb = !(dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR); + layered_dpb = !(caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR); + + if (ctx) { + ctx->dedicated_dpb = dedicated_dpb; + ctx->layered_dpb = layered_dpb; + ctx->init = 1; + } + + if (!pix_fmt) + return 0; + + if (dedicated_dpb) { + fmt_info.imageUsage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR; + } else { + fmt_info.imageUsage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | + VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT; + } + + /* Get the format of the images necessary */ + ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(hwctx->phys_dev, + &fmt_info, + &nb_out_fmts, NULL); + if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED || + (!nb_out_fmts && ret == VK_SUCCESS)) { + return AVERROR(EINVAL); + } else if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + ret_info = av_mallocz(sizeof(*ret_info)*nb_out_fmts); + if (!ret_info) + return AVERROR(ENOMEM); + + for (int i = 0; i < nb_out_fmts; i++) + ret_info[i].sType = VK_STRUCTURE_TYPE_VIDEO_FORMAT_PROPERTIES_KHR; + + ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(hwctx->phys_dev, + &fmt_info, + &nb_out_fmts, ret_info); + if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED || + (!nb_out_fmts && ret == VK_SUCCESS)) { + av_free(ret_info); + return AVERROR(EINVAL); + } else if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n", + ff_vk_ret2str(ret)); + av_free(ret_info); + return AVERROR_EXTERNAL; + } + + /* Find a format to use */ + *pix_fmt = best_format = AV_PIX_FMT_NONE; + *vk_fmt = best_vkfmt = VK_FORMAT_UNDEFINED; + source_format = avctx->sw_pix_fmt; + + av_log(avctx, AV_LOG_DEBUG, "Choosing best pixel format for decoding from %i:\n", nb_out_fmts); + for (int i = 0; i < nb_out_fmts; i++) { + enum AVPixelFormat tmp = ff_vk_pix_fmt_from_vkfmt(ret_info[i].format); + if (tmp == AV_PIX_FMT_NONE) { + av_log(avctx, AV_LOG_WARNING, "Invalid/unknown Vulkan format %i!\n", ret_info[i].format); + continue; + } + + best_format = av_find_best_pix_fmt_of_2(tmp, best_format, source_format, 0, NULL); + if (tmp == best_format) + best_vkfmt = ret_info[i].format; + + av_log(avctx, AV_LOG_DEBUG, " %s%s (Vulkan ID: %i)\n", + av_get_pix_fmt_name(tmp), tmp == best_format ? "*" : "", + ret_info[i].format); + } + + av_free(ret_info); + + if (best_format == AV_PIX_FMT_NONE) { + av_log(avctx, AV_LOG_ERROR, "No valid/compatible pixel format found for decoding!\n"); + return AVERROR(EINVAL); + } else { + av_log(avctx, AV_LOG_VERBOSE, "Chosen frame pixfmt: %s (Vulkan ID: %i)\n", + av_get_pix_fmt_name(best_format), best_vkfmt); + } + + *pix_fmt = best_format; + *vk_fmt = best_vkfmt; + + *width_align = caps->pictureAccessGranularity.width; + *height_align = caps->pictureAccessGranularity.height; + *dpb_dedicate = dedicated_dpb; + + return 0; +} + +int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx) +{ + VkFormat vkfmt; + int err, width_align, height_align, dedicated_dpb; + AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data; + AVVulkanFramesContext *hwfc = frames_ctx->hwctx; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx; + + frames_ctx->sw_format = AV_PIX_FMT_NONE; + + err = vulkan_decode_check_init(avctx, hw_frames_ctx, + &width_align, &height_align, + &frames_ctx->sw_format, &vkfmt, + &dedicated_dpb); + if (err < 0) + return err; + + ctx = (FFVulkanDecodeShared *)dec->shared_ref->data; + + frames_ctx->width = FFALIGN(avctx->coded_width, width_align); + frames_ctx->height = FFALIGN(avctx->coded_height, height_align); + frames_ctx->format = AV_PIX_FMT_VULKAN; + + hwfc->format[0] = vkfmt; + hwfc->create_pnext = &ctx->profile_list; + hwfc->tiling = VK_IMAGE_TILING_OPTIMAL; + hwfc->usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR; + + if (!dedicated_dpb) + hwfc->usage |= VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR; + + return err; +} + +void ff_vk_decode_free_params(void *opaque, uint8_t *data) +{ + FFVulkanDecodeShared *ctx = opaque; + FFVulkanFunctions *vk = &ctx->s.vkfn; + VkVideoSessionParametersKHR *par = (VkVideoSessionParametersKHR *)data; + vk->DestroyVideoSessionParametersKHR(ctx->s.hwctx->act_dev, *par, + ctx->s.hwctx->alloc); + av_free(par); +} + +int ff_vk_decode_uninit(AVCodecContext *avctx) +{ + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + av_buffer_pool_uninit(&dec->tmp_pool); + av_buffer_unref(&dec->session_params); + av_buffer_unref(&dec->shared_ref); + return 0; +} + +int ff_vk_decode_init(AVCodecContext *avctx) +{ + int err, qf, cxpos = 0, cypos = 0, nb_q = 0; + VkResult ret; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx; + FFVulkanContext *s; + FFVulkanFunctions *vk; + const VkVideoProfileListInfoKHR *profile_list; + FFVkQueueFamilyCtx qf_dec; + + VkVideoDecodeH264SessionParametersCreateInfoKHR h264_params = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_CREATE_INFO_KHR, + }; + VkVideoDecodeH265SessionParametersCreateInfoKHR h265_params = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_CREATE_INFO_KHR, + }; + VkVideoSessionParametersCreateInfoKHR session_params_create = { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pNext = avctx->codec_id == AV_CODEC_ID_H264 ? (void *)&h264_params : + avctx->codec_id == AV_CODEC_ID_HEVC ? (void *)&h265_params : + NULL, + }; + VkVideoSessionCreateInfoKHR session_create = { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_CREATE_INFO_KHR, + }; + VkSamplerYcbcrConversionCreateInfo yuv_sampler_info = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO, + .components = ff_comp_identity_map, + .ycbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY, + .ycbcrRange = avctx->color_range == AVCOL_RANGE_MPEG, /* Ignored */ + }; + + err = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_VULKAN); + if (err < 0) + return err; + + /* Get parameters, capabilities and final pixel/vulkan format */ + err = vulkan_decode_check_init(avctx, avctx->hw_frames_ctx, + NULL, NULL, NULL, NULL, NULL); + if (err < 0) + goto fail; + + ctx = (FFVulkanDecodeShared *)dec->shared_ref->data; + s = &ctx->s; + vk = &ctx->s.vkfn; + + s->frames_ref = av_buffer_ref(avctx->hw_frames_ctx); + s->frames = (AVHWFramesContext *)s->frames_ref->data; + s->hwfc = s->frames->hwctx; + + s->device = (AVHWDeviceContext *)s->frames->device_ref->data; + s->hwctx = s->device->hwctx; + + /* Load all properties */ + err = ff_vk_load_props(s); + if (err < 0) + goto fail; + + /* Create queue context */ + qf = ff_vk_qf_init(s, &qf_dec, VK_QUEUE_VIDEO_DECODE_BIT_KHR); + + /* Check for support */ + if (!(s->video_props[qf].videoCodecOperations & + ff_vk_codec_map[avctx->codec_id].decode_op)) { + av_log(avctx, AV_LOG_ERROR, "Decoding %s not supported on the given " + "queue family %i!\n", avcodec_get_name(avctx->codec_id), qf); + return AVERROR(EINVAL); + } + + /* Enable queries if supported */ + if (s->query_props[qf].queryResultStatusSupport) + nb_q = 1; + + profile_list = ff_vk_find_struct(s->hwfc->create_pnext, + VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR); + + session_create.flags = 0x0; + session_create.queueFamilyIndex = s->hwctx->queue_family_decode_index; + session_create.maxCodedExtent = ctx->common.caps.maxCodedExtent; + session_create.maxDpbSlots = ctx->common.caps.maxDpbSlots; + session_create.maxActiveReferencePictures = ctx->common.caps.maxActiveReferencePictures; + session_create.pictureFormat = s->hwfc->format[0]; + session_create.referencePictureFormat = session_create.pictureFormat; + session_create.pStdHeaderVersion = dec_ext[avctx->codec_id]; + session_create.pVideoProfile = &profile_list->pProfiles[0]; + + /* Create decode exec context. + * 4 async contexts per thread seems like a good number. */ + err = ff_vk_exec_pool_init(s, &qf_dec, &ctx->exec_pool, 4*avctx->thread_count, + nb_q, VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR, 0, + session_create.pVideoProfile); + if (err < 0) + goto fail; + + err = ff_vk_video_common_init(avctx, s, &ctx->common, &session_create); + if (err < 0) + goto fail; + + /* Get sampler */ + av_chroma_location_enum_to_pos(&cxpos, &cypos, avctx->chroma_sample_location); + yuv_sampler_info.xChromaOffset = cxpos >> 7; + yuv_sampler_info.yChromaOffset = cypos >> 7; + yuv_sampler_info.format = s->hwfc->format[0]; + ret = vk->CreateSamplerYcbcrConversion(s->hwctx->act_dev, &yuv_sampler_info, + s->hwctx->alloc, &ctx->yuv_sampler); + if (ret != VK_SUCCESS) { + err = AVERROR_EXTERNAL; + goto fail; + } + + /* If doing an out-of-place decoding, create a DPB pool */ + if (ctx->dedicated_dpb) { + AVHWFramesContext *dpb_frames; + AVVulkanFramesContext *dpb_hwfc; + + ctx->dpb_hwfc_ref = av_hwframe_ctx_alloc(s->frames->device_ref); + if (!ctx->dpb_hwfc_ref) { + err = AVERROR(ENOMEM); + goto fail; + } + + dpb_frames = (AVHWFramesContext *)ctx->dpb_hwfc_ref->data; + dpb_frames->format = s->frames->format; + dpb_frames->sw_format = s->frames->sw_format; + dpb_frames->width = s->frames->width; + dpb_frames->height = s->frames->height; + + dpb_hwfc = dpb_frames->hwctx; + dpb_hwfc->create_pnext = (void *)profile_list; + dpb_hwfc->format[0] = s->hwfc->format[0]; + dpb_hwfc->tiling = VK_IMAGE_TILING_OPTIMAL; + dpb_hwfc->usage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | + VK_IMAGE_USAGE_SAMPLED_BIT; /* Shuts validator up. */ + + if (ctx->layered_dpb) + dpb_hwfc->nb_layers = ctx->common.caps.maxDpbSlots; + + err = av_hwframe_ctx_init(ctx->dpb_hwfc_ref); + if (err < 0) + goto fail; + + if (ctx->layered_dpb) { + ctx->layered_frame = vk_get_dpb_pool(ctx); + if (!ctx->layered_frame) { + err = AVERROR(ENOMEM); + goto fail; + } + + err = vk_decode_create_view(ctx, &ctx->layered_view, &ctx->layered_aspect, + (AVVkFrame *)ctx->layered_frame->data[0], + s->hwfc->format[0]); + if (err < 0) + goto fail; + } + } + + session_params_create.videoSession = ctx->common.session; + ret = vk->CreateVideoSessionParametersKHR(s->hwctx->act_dev, &session_params_create, + s->hwctx->alloc, &ctx->empty_session_params); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to create empty Vulkan video session parameters: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + ff_vk_decode_flush(avctx); + + av_log(avctx, AV_LOG_VERBOSE, "Vulkan decoder initialization sucessful\n"); + + return 0; + +fail: + ff_vk_decode_uninit(avctx); + + return err; +} diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h new file mode 100644 index 0000000000000..406fdc6792223 --- /dev/null +++ b/libavcodec/vulkan_decode.h @@ -0,0 +1,175 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VULKAN_DECODE_H +#define AVCODEC_VULKAN_DECODE_H + +#include "decode.h" +#include "hwconfig.h" +#include "internal.h" + +#include "vulkan_video.h" + +typedef struct FFVulkanDecodeShared { + FFVulkanContext s; + FFVkVideoCommon common; + FFVkExecPool exec_pool; + + int dedicated_dpb; /* Oddity #1 - separate DPB images */ + int layered_dpb; /* Madness #1 - layered DPB images */ + + AVBufferRef *dpb_hwfc_ref; /* Only used for dedicated_dpb */ + + AVFrame *layered_frame; /* Only used for layered_dpb */ + VkImageView layered_view; + VkImageAspectFlags layered_aspect; + + VkVideoSessionParametersKHR empty_session_params; + + VkVideoDecodeCapabilitiesKHR dec_caps; + VkVideoDecodeH264ProfileInfoKHR h264_profile; + VkVideoDecodeH264ProfileInfoKHR h265_profile; + VkVideoDecodeAV1ProfileInfoMESA av1_profile; + VkVideoDecodeUsageInfoKHR usage; + VkVideoProfileInfoKHR profile; + VkVideoProfileListInfoKHR profile_list; + + VkSamplerYcbcrConversion yuv_sampler; + int init; +} FFVulkanDecodeShared; + +typedef struct FFVulkanDecodeContext { + AVBufferRef *shared_ref; + + /* Thread-local state below */ + AVBufferPool *tmp_pool; /* Pool for temporary data, if needed (HEVC) */ + size_t tmp_pool_ele_size; + + /* Thread-synchronized data below */ + AVBufferRef *session_params; + int params_changed; +} FFVulkanDecodeContext; + +typedef struct FFVulkanDecodePicture { + AVFrame *dpb_frame; /* Only used for out-of-place decoding. */ + + VkImageView img_view_ref; /* Image representation view (reference) */ + VkImageView img_view_out; /* Image representation view (output-only) */ + VkImageView img_view_dest; /* Set to img_view_out if no layered refs are used */ + VkImageAspectFlags img_aspect; /* Image plane mask bits */ + VkImageAspectFlags img_aspect_ref; /* Only used for out-of-place decoding */ + + VkSemaphore sem; + uint64_t sem_value; + + /* State */ + int update_params; + AVBufferRef *session_params; + + /* Current picture */ + VkVideoPictureResourceInfoKHR ref; + VkVideoReferenceSlotInfoKHR ref_slot; + + /* Picture refs. H264 has the maximum number of refs (36) of any supported codec. */ + VkVideoPictureResourceInfoKHR refs [36]; + VkVideoReferenceSlotInfoKHR ref_slots[36]; + + /* Main decoding struct */ + AVBufferRef *params_buf; + VkVideoDecodeInfoKHR decode_info; + + /* Slice data */ + AVBufferRef *slices_buf; + size_t slices_size; + uint32_t *slice_off; + unsigned int slice_off_max; + uint32_t nb_slices; +} FFVulkanDecodePicture; + +/** + * Initialize decoder. + */ +int ff_vk_decode_init(AVCodecContext *avctx); + +/** + * Synchronize the contexts between 2 threads. + */ +int ff_vk_update_thread_context(AVCodecContext *dst, const AVCodecContext *src); + +/** + * Initialize hw_frames_ctx with the parameters needed to decode the stream + * using the parameters from avctx. + * + * NOTE: if avctx->internal->hwaccel_priv_data exists, will partially initialize + * the context. + */ +int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx); + +/** + * Sets FFVulkanDecodeContext.params_changed to 1. + */ +int ff_vk_params_changed(AVCodecContext *avctx, int t, const uint8_t *b, uint32_t s); + +/** + * Prepare a frame, creates the image view, and sets up the dpb fields. + */ +int ff_vk_decode_prepare_frame(FFVulkanDecodeShared *ctx, AVFrame *pic, + FFVulkanDecodePicture *vkpic, int is_current, + int alloc_dpb); + +/** + * Add slice data to frame. + */ +int ff_vk_decode_add_slice(AVCodecContext *avctx, FFVulkanDecodePicture *vp, + const uint8_t *data, size_t size, int add_startcode, + uint32_t *nb_slices, const uint32_t **offsets); + +/** + * Decode a frame. + */ +int ff_vk_decode_frame(AVCodecContext *avctx, + AVFrame *pic, FFVulkanDecodePicture *vp, + AVFrame *rpic[], FFVulkanDecodePicture *rvkp[]); + +/** + * Free a frame and its state. + */ +void ff_vk_decode_free_frame(AVHWDeviceContext *dev_ctx, FFVulkanDecodePicture *vp); + +/** + * Get an FFVkBuffer suitable for decoding from. + */ +int ff_vk_get_decode_buffer(FFVulkanDecodeContext *ctx, AVBufferRef **buf, + void *create_pNext, size_t size); + +/** + * Free VkVideoSessionParametersKHR. + */ +void ff_vk_decode_free_params(void *opaque, uint8_t *data); + +/** + * Flush decoder. + */ +void ff_vk_decode_flush(AVCodecContext *avctx); + +/** + * Free decoder. + */ +int ff_vk_decode_uninit(AVCodecContext *avctx); + +#endif /* AVCODEC_VULKAN_DECODE_H */ From 7075fe5bf2c3bb1baf2849bbcb7516993c8ba8cb Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 14 Dec 2022 01:13:01 +0100 Subject: [PATCH 82/98] h264dec: add Vulkan hwaccel Thanks to Dave Airlie for figuring out a lot of the parameters. --- configure | 2 + libavcodec/Makefile | 1 + libavcodec/h264_slice.c | 12 +- libavcodec/h264dec.c | 3 + libavcodec/hwaccels.h | 1 + libavcodec/vulkan_h264.c | 534 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 552 insertions(+), 1 deletion(-) create mode 100644 libavcodec/vulkan_h264.c diff --git a/configure b/configure index 859cc4acebbf8..ddff9423bcfee 100755 --- a/configure +++ b/configure @@ -3038,6 +3038,8 @@ h264_vdpau_hwaccel_deps="vdpau" h264_vdpau_hwaccel_select="h264_decoder" h264_videotoolbox_hwaccel_deps="videotoolbox" h264_videotoolbox_hwaccel_select="h264_decoder" +h264_vulkan_hwaccel_deps="vulkan" +h264_vulkan_hwaccel_select="h264_decoder" hevc_d3d11va_hwaccel_deps="d3d11va DXVA_PicParams_HEVC" hevc_d3d11va_hwaccel_select="hevc_decoder" hevc_d3d11va2_hwaccel_deps="d3d11va DXVA_PicParams_HEVC" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 5bc3d6ffabf1c..62b24630bf920 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -998,6 +998,7 @@ OBJS-$(CONFIG_H264_QSV_HWACCEL) += qsvdec.o OBJS-$(CONFIG_H264_VAAPI_HWACCEL) += vaapi_h264.o OBJS-$(CONFIG_H264_VDPAU_HWACCEL) += vdpau_h264.o OBJS-$(CONFIG_H264_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o +OBJS-$(CONFIG_H264_VULKAN_HWACCEL) += vulkan_decode.o vulkan_h264.o OBJS-$(CONFIG_HEVC_D3D11VA_HWACCEL) += dxva2_hevc.o OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL) += dxva2_hevc.o OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL) += nvdec_hevc.o diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c index d715cbb002a83..41bf30eefca6a 100644 --- a/libavcodec/h264_slice.c +++ b/libavcodec/h264_slice.c @@ -781,7 +781,8 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback) CONFIG_H264_NVDEC_HWACCEL + \ CONFIG_H264_VAAPI_HWACCEL + \ CONFIG_H264_VIDEOTOOLBOX_HWACCEL + \ - CONFIG_H264_VDPAU_HWACCEL) + CONFIG_H264_VDPAU_HWACCEL + \ + CONFIG_H264_VULKAN_HWACCEL) enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts; const enum AVPixelFormat *choices = pix_fmts; int i; @@ -802,6 +803,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback) #if CONFIG_H264_VIDEOTOOLBOX_HWACCEL if (h->avctx->colorspace != AVCOL_SPC_RGB) *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX; +#endif +#if CONFIG_H264_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; #endif if (CHROMA444(h)) { if (h->avctx->colorspace == AVCOL_SPC_RGB) { @@ -821,6 +825,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback) } break; case 12: +#if CONFIG_H264_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; +#endif if (CHROMA444(h)) { if (h->avctx->colorspace == AVCOL_SPC_RGB) { *fmt++ = AV_PIX_FMT_GBRP12; @@ -846,6 +853,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback) #if CONFIG_H264_VDPAU_HWACCEL *fmt++ = AV_PIX_FMT_VDPAU; #endif +#if CONFIG_H264_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; +#endif #if CONFIG_H264_NVDEC_HWACCEL *fmt++ = AV_PIX_FMT_CUDA; #endif diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c index a10b4bb85cfed..19f8dba131780 100644 --- a/libavcodec/h264dec.c +++ b/libavcodec/h264dec.c @@ -1100,6 +1100,9 @@ const FFCodec ff_h264_decoder = { #endif #if CONFIG_H264_VIDEOTOOLBOX_HWACCEL HWACCEL_VIDEOTOOLBOX(h264), +#endif +#if CONFIG_H264_VULKAN_HWACCEL + HWACCEL_VULKAN(h264), #endif NULL }, diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h index aca55831f32f9..23d0843c76f5f 100644 --- a/libavcodec/hwaccels.h +++ b/libavcodec/hwaccels.h @@ -36,6 +36,7 @@ extern const AVHWAccel ff_h264_nvdec_hwaccel; extern const AVHWAccel ff_h264_vaapi_hwaccel; extern const AVHWAccel ff_h264_vdpau_hwaccel; extern const AVHWAccel ff_h264_videotoolbox_hwaccel; +extern const AVHWAccel ff_h264_vulkan_hwaccel; extern const AVHWAccel ff_hevc_d3d11va_hwaccel; extern const AVHWAccel ff_hevc_d3d11va2_hwaccel; extern const AVHWAccel ff_hevc_dxva2_hwaccel; diff --git a/libavcodec/vulkan_h264.c b/libavcodec/vulkan_h264.c new file mode 100644 index 0000000000000..86234f3ad38e0 --- /dev/null +++ b/libavcodec/vulkan_h264.c @@ -0,0 +1,534 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "h264dec.h" +#include "h264_ps.h" + +#include "vulkan_decode.h" + +const VkExtensionProperties ff_vk_dec_h264_ext = { + .extensionName = VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME, + .specVersion = VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION, +}; + +typedef struct H264VulkanDecodePicture { + FFVulkanDecodePicture vp; + + /* Current picture */ + StdVideoDecodeH264ReferenceInfo h264_ref; + VkVideoDecodeH264DpbSlotInfoKHR vkh264_ref; + + /* Picture refs */ + H264Picture *ref_src [H264_MAX_PICTURE_COUNT]; + StdVideoDecodeH264ReferenceInfo h264_refs [H264_MAX_PICTURE_COUNT]; + VkVideoDecodeH264DpbSlotInfoKHR vkh264_refs[H264_MAX_PICTURE_COUNT]; + + /* Current picture (contd.) */ + StdVideoDecodeH264PictureInfo h264pic; + VkVideoDecodeH264PictureInfoKHR h264_pic_info; +} H264VulkanDecodePicture; + +static int vk_h264_fill_pict(AVCodecContext *avctx, H264Picture **ref_src, + VkVideoReferenceSlotInfoKHR *ref_slot, /* Main structure */ + VkVideoPictureResourceInfoKHR *ref, /* Goes in ^ */ + VkVideoDecodeH264DpbSlotInfoKHR *vkh264_ref, /* Goes in ^ */ + StdVideoDecodeH264ReferenceInfo *h264_ref, /* Goes in ^ */ + H264Picture *pic, int is_current, + int is_field, int picture_structure, + int dpb_slot_index) +{ + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = (FFVulkanDecodeShared *)dec->shared_ref->data; + H264VulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vkpic = &hp->vp; + + int err = ff_vk_decode_prepare_frame(ctx, pic->f, vkpic, is_current, + ctx->dedicated_dpb); + if (err < 0) + return err; + + *h264_ref = (StdVideoDecodeH264ReferenceInfo) { + .FrameNum = pic->long_ref ? pic->pic_id : pic->frame_num, + .PicOrderCnt = { pic->field_poc[0], pic->field_poc[1] }, + .flags = (StdVideoDecodeH264ReferenceInfoFlags) { + .top_field_flag = is_field ? !!(picture_structure & PICT_TOP_FIELD) : 0, + .bottom_field_flag = is_field ? !!(picture_structure & PICT_BOTTOM_FIELD) : 0, + .used_for_long_term_reference = pic->reference && pic->long_ref, + .is_non_existing = 0, + }, + }; + + *vkh264_ref = (VkVideoDecodeH264DpbSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR, + .pStdReferenceInfo = h264_ref, + }; + + *ref = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .codedOffset = (VkOffset2D){ 0, 0 }, + .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height }, + .baseArrayLayer = ctx->layered_dpb ? dpb_slot_index : 0, + .imageViewBinding = vkpic->img_view_ref, + }; + + *ref_slot = (VkVideoReferenceSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR, + .pNext = vkh264_ref, + .slotIndex = dpb_slot_index, + .pPictureResource = ref, + }; + + if (ref_src) + *ref_src = pic; + + return 0; +} + +static void set_sps(const SPS *sps, + StdVideoH264ScalingLists *vksps_scaling, + StdVideoH264HrdParameters *vksps_vui_header, + StdVideoH264SequenceParameterSetVui *vksps_vui, + StdVideoH264SequenceParameterSet *vksps) +{ + *vksps_scaling = (StdVideoH264ScalingLists) { + .scaling_list_present_mask = sps->scaling_matrix_present_mask, + .use_default_scaling_matrix_mask = 0, /* We already fill in the default matrix */ + }; + + for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS; i++) + memcpy(vksps_scaling->ScalingList4x4[i], sps->scaling_matrix4[i], + STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**sps->scaling_matrix4)); + + for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_8X8_NUM_LISTS; i++) + memcpy(vksps_scaling->ScalingList8x8[i], sps->scaling_matrix8[i], + STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS * sizeof(**sps->scaling_matrix8)); + + *vksps_vui_header = (StdVideoH264HrdParameters) { + .cpb_cnt_minus1 = sps->cpb_cnt - 1, + .bit_rate_scale = sps->bit_rate_scale, + .initial_cpb_removal_delay_length_minus1 = sps->initial_cpb_removal_delay_length - 1, + .cpb_removal_delay_length_minus1 = sps->cpb_removal_delay_length - 1, + .dpb_output_delay_length_minus1 = sps->dpb_output_delay_length - 1, + .time_offset_length = sps->time_offset_length, + }; + + for (int i = 0; i < sps->cpb_cnt; i++) { + vksps_vui_header->bit_rate_value_minus1[i] = sps->bit_rate_value[i] - 1; + vksps_vui_header->cpb_size_value_minus1[i] = sps->cpb_size_value[i] - 1; + vksps_vui_header->cbr_flag[i] = (sps->cpr_flag >> i) & 0x1; + } + + *vksps_vui = (StdVideoH264SequenceParameterSetVui) { + .aspect_ratio_idc = sps->vui.aspect_ratio_idc, + .sar_width = sps->vui.sar.num, + .sar_height = sps->vui.sar.den, + .video_format = sps->vui.video_format, + .colour_primaries = sps->vui.colour_primaries, + .transfer_characteristics = sps->vui.transfer_characteristics, + .matrix_coefficients = sps->vui.matrix_coeffs, + .num_units_in_tick = sps->num_units_in_tick, + .time_scale = sps->time_scale, + .pHrdParameters = vksps_vui_header, + .max_num_reorder_frames = sps->num_reorder_frames, + .max_dec_frame_buffering = sps->max_dec_frame_buffering, + .flags = (StdVideoH264SpsVuiFlags) { + .aspect_ratio_info_present_flag = sps->vui.aspect_ratio_info_present_flag, + .overscan_info_present_flag = sps->vui.overscan_info_present_flag, + .overscan_appropriate_flag = sps->vui.overscan_appropriate_flag, + .video_signal_type_present_flag = sps->vui.video_signal_type_present_flag, + .video_full_range_flag = sps->vui.video_full_range_flag, + .color_description_present_flag = sps->vui.colour_description_present_flag, + .chroma_loc_info_present_flag = sps->vui.chroma_location, + .timing_info_present_flag = sps->timing_info_present_flag, + .fixed_frame_rate_flag = sps->fixed_frame_rate_flag, + .bitstream_restriction_flag = sps->bitstream_restriction_flag, + .nal_hrd_parameters_present_flag = sps->nal_hrd_parameters_present_flag, + .vcl_hrd_parameters_present_flag = sps->vcl_hrd_parameters_present_flag, + }, + }; + + *vksps = (StdVideoH264SequenceParameterSet) { + .profile_idc = sps->profile_idc, + .level_idc = sps->level_idc, + .seq_parameter_set_id = sps->sps_id, + .chroma_format_idc = sps->chroma_format_idc, + .bit_depth_luma_minus8 = sps->bit_depth_luma - 8, + .bit_depth_chroma_minus8 = sps->bit_depth_chroma - 8, + .log2_max_frame_num_minus4 = sps->log2_max_frame_num - 4, + .pic_order_cnt_type = sps->poc_type, + .log2_max_pic_order_cnt_lsb_minus4 = sps->poc_type ? 0 : sps->log2_max_poc_lsb - 4, + .offset_for_non_ref_pic = sps->offset_for_non_ref_pic, + .offset_for_top_to_bottom_field = sps->offset_for_top_to_bottom_field, + .num_ref_frames_in_pic_order_cnt_cycle = sps->poc_cycle_length, + .max_num_ref_frames = sps->ref_frame_count, + .pic_width_in_mbs_minus1 = sps->mb_width - 1, + .pic_height_in_map_units_minus1 = (sps->mb_height/(2 - sps->frame_mbs_only_flag)) - 1, + .frame_crop_left_offset = sps->crop_left, + .frame_crop_right_offset = sps->crop_right, + .frame_crop_top_offset = sps->crop_top, + .frame_crop_bottom_offset = sps->crop_bottom, + .flags = (StdVideoH264SpsFlags) { + .constraint_set0_flag = (sps->constraint_set_flags >> 0) & 0x1, + .constraint_set1_flag = (sps->constraint_set_flags >> 1) & 0x1, + .constraint_set2_flag = (sps->constraint_set_flags >> 2) & 0x1, + .constraint_set3_flag = (sps->constraint_set_flags >> 3) & 0x1, + .constraint_set4_flag = (sps->constraint_set_flags >> 4) & 0x1, + .constraint_set5_flag = (sps->constraint_set_flags >> 5) & 0x1, + .direct_8x8_inference_flag = sps->direct_8x8_inference_flag, + .mb_adaptive_frame_field_flag = sps->mb_aff, + .frame_mbs_only_flag = sps->frame_mbs_only_flag, + .delta_pic_order_always_zero_flag = sps->delta_pic_order_always_zero_flag, + .separate_colour_plane_flag = sps->residual_color_transform_flag, + .gaps_in_frame_num_value_allowed_flag = sps->gaps_in_frame_num_allowed_flag, + .qpprime_y_zero_transform_bypass_flag = sps->transform_bypass, + .frame_cropping_flag = sps->crop, + .seq_scaling_matrix_present_flag = sps->scaling_matrix_present, + .vui_parameters_present_flag = sps->vui_parameters_present_flag, + }, + .pOffsetForRefFrame = sps->offset_for_ref_frame, + .pScalingLists = vksps_scaling, + .pSequenceParameterSetVui = vksps_vui, + }; +} + +static void set_pps(const PPS *pps, const SPS *sps, + StdVideoH264ScalingLists *vkpps_scaling, + StdVideoH264PictureParameterSet *vkpps) +{ + *vkpps_scaling = (StdVideoH264ScalingLists) { + .scaling_list_present_mask = pps->pic_scaling_matrix_present_mask, + .use_default_scaling_matrix_mask = 0, /* We already fill in the default matrix */ + }; + + for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS; i++) + memcpy(vkpps_scaling->ScalingList4x4[i], pps->scaling_matrix4[i], + STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**pps->scaling_matrix4)); + + for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_8X8_NUM_LISTS; i++) + memcpy(vkpps_scaling->ScalingList8x8[i], pps->scaling_matrix8[i], + STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS * sizeof(**pps->scaling_matrix8)); + + *vkpps = (StdVideoH264PictureParameterSet) { + .seq_parameter_set_id = pps->sps_id, + .pic_parameter_set_id = pps->pps_id, + .num_ref_idx_l0_default_active_minus1 = pps->ref_count[0] - 1, + .num_ref_idx_l1_default_active_minus1 = pps->ref_count[1] - 1, + .weighted_bipred_idc = pps->weighted_bipred_idc, + .pic_init_qp_minus26 = pps->init_qp - 26, + .pic_init_qs_minus26 = pps->init_qs - 26, + .chroma_qp_index_offset = pps->chroma_qp_index_offset[0], + .second_chroma_qp_index_offset = pps->chroma_qp_index_offset[1], + .flags = (StdVideoH264PpsFlags) { + .transform_8x8_mode_flag = pps->transform_8x8_mode, + .redundant_pic_cnt_present_flag = pps->redundant_pic_cnt_present, + .constrained_intra_pred_flag = pps->constrained_intra_pred, + .deblocking_filter_control_present_flag = pps->deblocking_filter_parameters_present, + .weighted_pred_flag = pps->weighted_pred, + .bottom_field_pic_order_in_frame_present_flag = pps->pic_order_present, + .entropy_coding_mode_flag = pps->cabac, + .pic_scaling_matrix_present_flag = pps->pic_scaling_matrix_present_flag, + }, + .pScalingLists = vkpps_scaling, + }; +} + +static int vk_h264_create_params(AVCodecContext *avctx, AVBufferRef **buf) +{ + VkResult ret; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = (FFVulkanDecodeShared *)dec->shared_ref->data; + FFVulkanFunctions *vk = &ctx->s.vkfn; + const H264Context *h = avctx->priv_data; + + /* SPS */ + StdVideoH264ScalingLists vksps_scaling[MAX_SPS_COUNT]; + StdVideoH264HrdParameters vksps_vui_header[MAX_SPS_COUNT]; + StdVideoH264SequenceParameterSetVui vksps_vui[MAX_SPS_COUNT]; + StdVideoH264SequenceParameterSet vksps[MAX_SPS_COUNT]; + + /* PPS */ + StdVideoH264ScalingLists vkpps_scaling[MAX_PPS_COUNT]; + StdVideoH264PictureParameterSet vkpps[MAX_PPS_COUNT]; + + VkVideoDecodeH264SessionParametersAddInfoKHR h264_params_info = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_ADD_INFO_KHR, + .pStdSPSs = vksps, + .stdSPSCount = 0, + .pStdPPSs = vkpps, + .stdPPSCount = 0, + }; + VkVideoDecodeH264SessionParametersCreateInfoKHR h264_params = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pParametersAddInfo = &h264_params_info, + }; + VkVideoSessionParametersCreateInfoKHR session_params_create = { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pNext = &h264_params, + .videoSession = ctx->common.session, + .videoSessionParametersTemplate = NULL, + }; + + AVBufferRef *tmp; + VkVideoSessionParametersKHR *par = av_malloc(sizeof(*par)); + if (!par) + return AVERROR(ENOMEM); + + /* SPS list */ + for (int i = 0; i < FF_ARRAY_ELEMS(h->ps.sps_list); i++) { + if (h->ps.sps_list[i]) { + const SPS *sps_l = (const SPS *)h->ps.sps_list[i]->data; + int idx = h264_params_info.stdSPSCount; + set_sps(sps_l, &vksps_scaling[idx], &vksps_vui_header[idx], &vksps_vui[idx], &vksps[idx]); + h264_params_info.stdSPSCount++; + } + } + + /* PPS list */ + for (int i = 0; i < FF_ARRAY_ELEMS(h->ps.pps_list); i++) { + if (h->ps.pps_list[i]) { + const PPS *pps_l = (const PPS *)h->ps.pps_list[i]->data; + int idx = h264_params_info.stdPPSCount; + set_pps(pps_l, pps_l->sps, &vkpps_scaling[idx], &vkpps[idx]); + h264_params_info.stdPPSCount++; + } + } + + h264_params.maxStdSPSCount = h264_params_info.stdSPSCount; + h264_params.maxStdPPSCount = h264_params_info.stdPPSCount; + + /* Create session parameters */ + ret = vk->CreateVideoSessionParametersKHR(ctx->s.hwctx->act_dev, &session_params_create, + ctx->s.hwctx->alloc, par); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to create Vulkan video session parameters: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + tmp = av_buffer_create((uint8_t *)par, sizeof(*par), ff_vk_decode_free_params, + ctx, 0); + if (!tmp) { + ff_vk_decode_free_params(ctx, (uint8_t *)par); + return AVERROR(ENOMEM); + } + + av_log(avctx, AV_LOG_DEBUG, "Created frame parameters: %i SPS %i PPS\n", + h264_params_info.stdSPSCount, h264_params_info.stdPPSCount); + + *buf = tmp; + + return 0; +} + +static int vk_h264_start_frame(AVCodecContext *avctx, + av_unused const uint8_t *buffer, + av_unused uint32_t size) +{ + int err; + int dpb_slot_index = 0; + H264Context *h = avctx->priv_data; + H264Picture *pic = h->cur_pic_ptr; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + H264VulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &hp->vp; + + if (!dec->session_params || dec->params_changed) { + av_buffer_unref(&dec->session_params); + err = vk_h264_create_params(avctx, &dec->session_params); + if (err < 0) + return err; + dec->params_changed = 0; + } + + /* Fill in main slot */ + dpb_slot_index = 0; + for (unsigned slot = 0; slot < H264_MAX_PICTURE_COUNT; slot++) { + if (pic == &h->DPB[slot]) { + dpb_slot_index = slot; + break; + } + } + + err = vk_h264_fill_pict(avctx, NULL, &vp->ref_slot, &vp->ref, + &hp->vkh264_ref, &hp->h264_ref, pic, 1, + h->DPB[dpb_slot_index].field_picture, + h->DPB[dpb_slot_index].reference, + dpb_slot_index); + if (err < 0) + return err; + + /* Fill in short-term references */ + for (int i = 0; i < h->short_ref_count; i++) { + dpb_slot_index = 0; + for (unsigned slot = 0; slot < H264_MAX_PICTURE_COUNT; slot++) { + if (h->short_ref[i] == &h->DPB[slot]) { + dpb_slot_index = slot; + break; + } + } + err = vk_h264_fill_pict(avctx, &hp->ref_src[i], &vp->ref_slots[i], + &vp->refs[i], &hp->vkh264_refs[i], + &hp->h264_refs[i], h->short_ref[i], 0, + h->DPB[dpb_slot_index].field_picture, + h->DPB[dpb_slot_index].reference, + dpb_slot_index); + if (err < 0) + return err; + } + + /* Fill in long-term refs */ + for (int r = 0, i = h->short_ref_count; i < h->short_ref_count + h->long_ref_count; i++, r++) { + dpb_slot_index = 0; + for (unsigned slot = 0; slot < H264_MAX_PICTURE_COUNT; slot++) { + if (h->long_ref[i] == &h->DPB[slot]) { + dpb_slot_index = slot; + break; + } + } + err = vk_h264_fill_pict(avctx, &hp->ref_src[i], &vp->ref_slots[i], + &vp->refs[i], &hp->vkh264_refs[i], + &hp->h264_refs[i], h->long_ref[r], 0, + h->DPB[dpb_slot_index].field_picture, + h->DPB[dpb_slot_index].reference, + dpb_slot_index); + if (err < 0) + return err; + } + + hp->h264pic = (StdVideoDecodeH264PictureInfo) { + .seq_parameter_set_id = pic->pps->sps_id, + .pic_parameter_set_id = pic->pps->pps_id, + .frame_num = 0, /* Set later */ + .idr_pic_id = 0, /* Set later */ + .PicOrderCnt[0] = pic->field_poc[0], + .PicOrderCnt[1] = pic->field_poc[1], + .flags = (StdVideoDecodeH264PictureInfoFlags) { + .field_pic_flag = FIELD_PICTURE(h), + .is_intra = 1, /* Set later */ + .IdrPicFlag = h->picture_idr, + .bottom_field_flag = h->picture_structure != PICT_FRAME && + h->picture_structure & PICT_BOTTOM_FIELD, + .is_reference = h->nal_ref_idc != 0, + .complementary_field_pair = h->first_field && FIELD_PICTURE(h), + }, + }; + + hp->h264_pic_info = (VkVideoDecodeH264PictureInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PICTURE_INFO_KHR, + .pStdPictureInfo = &hp->h264pic, + .sliceCount = 0, + }; + + vp->decode_info = (VkVideoDecodeInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR, + .pNext = &hp->h264_pic_info, + .flags = 0x0, + .pSetupReferenceSlot = &vp->ref_slot, + .referenceSlotCount = h->short_ref_count + h->long_ref_count, + .pReferenceSlots = vp->ref_slots, + .dstPictureResource = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .codedOffset = (VkOffset2D){ 0, 0 }, + .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height }, + .baseArrayLayer = 0, + .imageViewBinding = vp->img_view_out, + }, + }; + + return 0; +} + +static int vk_h264_decode_slice(AVCodecContext *avctx, + const uint8_t *data, + uint32_t size) +{ + const H264Context *h = avctx->priv_data; + const H264SliceContext *sl = &h->slice_ctx[0]; + H264VulkanDecodePicture *hp = h->cur_pic_ptr->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &hp->vp; + + int err = ff_vk_decode_add_slice(avctx, vp, data, size, 1, + &hp->h264_pic_info.sliceCount, + &hp->h264_pic_info.pSliceOffsets); + if (err < 0) + return err; + + hp->h264pic.frame_num = sl->frame_num; + hp->h264pic.idr_pic_id = sl->idr_pic_id; + + /* Frame is only intra of all slices are marked as intra */ + if (sl->slice_type != AV_PICTURE_TYPE_I && sl->slice_type != AV_PICTURE_TYPE_SI) + hp->h264pic.flags.is_intra = 0; + + return 0; +} + +static int vk_h264_end_frame(AVCodecContext *avctx) +{ + const H264Context *h = avctx->priv_data; + H264Picture *pic = h->cur_pic_ptr; + H264VulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &hp->vp; + FFVulkanDecodePicture *rvp[H264_MAX_PICTURE_COUNT] = { 0 }; + AVFrame *rav[H264_MAX_PICTURE_COUNT] = { 0 }; + + for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) { + H264Picture *rp = hp->ref_src[i]; + H264VulkanDecodePicture *rhp = rp->hwaccel_picture_private; + + rvp[i] = &rhp->vp; + rav[i] = hp->ref_src[i]->f; + } + + av_log(avctx, AV_LOG_VERBOSE, "Decoding frame, %lu bytes, %i slices\n", + vp->slices_size, hp->h264_pic_info.sliceCount); + + return ff_vk_decode_frame(avctx, pic->f, vp, rav, rvp); +} + +static void vk_h264_free_frame_priv(void *_hwctx, uint8_t *data) +{ + AVHWDeviceContext *hwctx = _hwctx; + H264VulkanDecodePicture *hp = (H264VulkanDecodePicture *)data; + + /* Free frame resources, this also destroys the session parameters. */ + ff_vk_decode_free_frame(hwctx, &hp->vp); + + /* Free frame context */ + av_free(hp); +} + +const AVHWAccel ff_h264_vulkan_hwaccel = { + .name = "h264_vulkan", + .type = AVMEDIA_TYPE_VIDEO, + .id = AV_CODEC_ID_H264, + .pix_fmt = AV_PIX_FMT_VULKAN, + .start_frame = &vk_h264_start_frame, + .decode_slice = &vk_h264_decode_slice, + .end_frame = &vk_h264_end_frame, + .free_frame_priv = &vk_h264_free_frame_priv, + .frame_priv_data_size = sizeof(H264VulkanDecodePicture), + .init = &ff_vk_decode_init, + .update_thread_context = &ff_vk_update_thread_context, + .decode_params = &ff_vk_params_changed, + .flush = &ff_vk_decode_flush, + .uninit = &ff_vk_decode_uninit, + .frame_params = &ff_vk_frame_params, + .priv_data_size = sizeof(FFVulkanDecodeContext), + .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE, +}; From b878ed629703997a951fa60ff5e2f61b76cb01bd Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 14 Dec 2022 08:27:18 +0100 Subject: [PATCH 83/98] hevcdec: add Vulkan hwaccel Thanks to Dave Airlie for figuring out a lot of the parameters. --- configure | 2 + libavcodec/Makefile | 1 + libavcodec/hevcdec.c | 25 +- libavcodec/hwaccels.h | 1 + libavcodec/vulkan_hevc.c | 930 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 958 insertions(+), 1 deletion(-) create mode 100644 libavcodec/vulkan_hevc.c diff --git a/configure b/configure index ddff9423bcfee..b3d2730b1b76f 100755 --- a/configure +++ b/configure @@ -3054,6 +3054,8 @@ hevc_vdpau_hwaccel_deps="vdpau VdpPictureInfoHEVC" hevc_vdpau_hwaccel_select="hevc_decoder" hevc_videotoolbox_hwaccel_deps="videotoolbox" hevc_videotoolbox_hwaccel_select="hevc_decoder" +hevc_vulkan_hwaccel_deps="vulkan" +hevc_vulkan_hwaccel_select="hevc_decoder" mjpeg_nvdec_hwaccel_deps="nvdec" mjpeg_nvdec_hwaccel_select="mjpeg_decoder" mjpeg_vaapi_hwaccel_deps="vaapi" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 62b24630bf920..d421844903136 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -1005,6 +1005,7 @@ OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL) += nvdec_hevc.o OBJS-$(CONFIG_HEVC_QSV_HWACCEL) += qsvdec.o OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL) += vaapi_hevc.o h265_profile_level.o OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL) += vdpau_hevc.o h265_profile_level.o +OBJS-$(CONFIG_HEVC_VULKAN_HWACCEL) += vulkan_decode.o vulkan_hevc.o OBJS-$(CONFIG_MJPEG_NVDEC_HWACCEL) += nvdec_mjpeg.o OBJS-$(CONFIG_MJPEG_VAAPI_HWACCEL) += vaapi_mjpeg.o OBJS-$(CONFIG_MPEG1_NVDEC_HWACCEL) += nvdec_mpeg12.o diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c index 8764e0bd83ef4..eee77ec4dbb2f 100644 --- a/libavcodec/hevcdec.c +++ b/libavcodec/hevcdec.c @@ -405,7 +405,8 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) CONFIG_HEVC_NVDEC_HWACCEL + \ CONFIG_HEVC_VAAPI_HWACCEL + \ CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \ - CONFIG_HEVC_VDPAU_HWACCEL) + CONFIG_HEVC_VDPAU_HWACCEL + \ + CONFIG_HEVC_VULKAN_HWACCEL) enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts; switch (sps->pix_fmt) { @@ -429,6 +430,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) #endif #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX; +#endif +#if CONFIG_HEVC_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; #endif break; case AV_PIX_FMT_YUV420P10: @@ -445,6 +449,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX; #endif +#if CONFIG_HEVC_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; +#endif #if CONFIG_HEVC_VDPAU_HWACCEL *fmt++ = AV_PIX_FMT_VDPAU; #endif @@ -464,6 +471,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) #endif #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX; +#endif +#if CONFIG_HEVC_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; #endif break; case AV_PIX_FMT_YUV422P: @@ -473,12 +483,16 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) #endif #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX; +#endif +#if CONFIG_HEVC_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; #endif break; case AV_PIX_FMT_YUV444P10: #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX; #endif + /* NOTE: fallthrough */ case AV_PIX_FMT_YUV420P12: case AV_PIX_FMT_YUV444P12: #if CONFIG_HEVC_VAAPI_HWACCEL @@ -487,6 +501,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) #if CONFIG_HEVC_VDPAU_HWACCEL *fmt++ = AV_PIX_FMT_VDPAU; #endif +#if CONFIG_HEVC_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; +#endif #if CONFIG_HEVC_NVDEC_HWACCEL *fmt++ = AV_PIX_FMT_CUDA; #endif @@ -494,6 +511,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) case AV_PIX_FMT_YUV422P12: #if CONFIG_HEVC_VAAPI_HWACCEL *fmt++ = AV_PIX_FMT_VAAPI; +#endif +#if CONFIG_HEVC_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; #endif break; } @@ -3769,6 +3789,9 @@ const FFCodec ff_hevc_decoder = { #endif #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL HWACCEL_VIDEOTOOLBOX(hevc), +#endif +#if CONFIG_HEVC_VULKAN_HWACCEL + HWACCEL_VULKAN(hevc), #endif NULL }, diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h index 23d0843c76f5f..a7c74d07cbd35 100644 --- a/libavcodec/hwaccels.h +++ b/libavcodec/hwaccels.h @@ -44,6 +44,7 @@ extern const AVHWAccel ff_hevc_nvdec_hwaccel; extern const AVHWAccel ff_hevc_vaapi_hwaccel; extern const AVHWAccel ff_hevc_vdpau_hwaccel; extern const AVHWAccel ff_hevc_videotoolbox_hwaccel; +extern const AVHWAccel ff_hevc_vulkan_hwaccel; extern const AVHWAccel ff_mjpeg_nvdec_hwaccel; extern const AVHWAccel ff_mjpeg_vaapi_hwaccel; extern const AVHWAccel ff_mpeg1_nvdec_hwaccel; diff --git a/libavcodec/vulkan_hevc.c b/libavcodec/vulkan_hevc.c new file mode 100644 index 0000000000000..53071626196f3 --- /dev/null +++ b/libavcodec/vulkan_hevc.c @@ -0,0 +1,930 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "hevcdec.h" +#include "hevc_ps.h" + +#include "vulkan_decode.h" + +const VkExtensionProperties ff_vk_dec_hevc_ext = { + .extensionName = VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME, + .specVersion = VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION, +}; + +typedef struct HEVCHeaderSPS { + StdVideoH265ScalingLists scaling; + StdVideoH265HrdParameters vui_header; + StdVideoH265SequenceParameterSetVui vui; + StdVideoH265ProfileTierLevel ptl; + StdVideoH265DecPicBufMgr dpbm; + StdVideoH265PredictorPaletteEntries pal; + StdVideoH265SubLayerHrdParameters nal_hdr[HEVC_MAX_SUB_LAYERS]; + StdVideoH265SubLayerHrdParameters vcl_hdr[HEVC_MAX_SUB_LAYERS]; + StdVideoH265ShortTermRefPicSet str[HEVC_MAX_SHORT_TERM_REF_PIC_SETS]; + StdVideoH265LongTermRefPicsSps ltr; +} HEVCHeaderSPS; + +typedef struct HEVCHeaderPPS { + StdVideoH265ScalingLists scaling; + StdVideoH265PredictorPaletteEntries pal; +} HEVCHeaderPPS; + +typedef struct HEVCHeaderVPSSet { + StdVideoH265SubLayerHrdParameters nal_hdr[HEVC_MAX_SUB_LAYERS]; + StdVideoH265SubLayerHrdParameters vcl_hdr[HEVC_MAX_SUB_LAYERS]; +} HEVCHeaderVPSSet; + +typedef struct HEVCHeaderVPS { + StdVideoH265ProfileTierLevel ptl; + StdVideoH265DecPicBufMgr dpbm; + StdVideoH265HrdParameters hdr[HEVC_MAX_LAYER_SETS]; + HEVCHeaderVPSSet *sls; +} HEVCHeaderVPS; + +typedef struct HEVCHeaderSet { + StdVideoH265SequenceParameterSet sps[HEVC_MAX_SPS_COUNT]; + HEVCHeaderSPS hsps[HEVC_MAX_SPS_COUNT]; + + StdVideoH265PictureParameterSet pps[HEVC_MAX_PPS_COUNT]; + HEVCHeaderPPS hpps[HEVC_MAX_PPS_COUNT]; + + StdVideoH265VideoParameterSet vps[HEVC_MAX_PPS_COUNT]; + HEVCHeaderVPS *hvps; +} HEVCHeaderSet; + +static int get_data_set_buf(FFVulkanDecodeContext *s, AVBufferRef **data_buf, + int nb_vps, AVBufferRef * const vps_list[HEVC_MAX_VPS_COUNT]) +{ + uint8_t *data_ptr; + HEVCHeaderSet *hdr; + + size_t base_size = sizeof(StdVideoH265SequenceParameterSet)*HEVC_MAX_SPS_COUNT + + sizeof(HEVCHeaderSPS)*HEVC_MAX_SPS_COUNT + + sizeof(StdVideoH265PictureParameterSet)*HEVC_MAX_PPS_COUNT + + sizeof(HEVCHeaderPPS)*HEVC_MAX_PPS_COUNT + + sizeof(StdVideoH265VideoParameterSet)*HEVC_MAX_VPS_COUNT + + sizeof(HEVCHeaderVPS *); + + size_t vps_size = sizeof(StdVideoH265ProfileTierLevel) + + sizeof(StdVideoH265DecPicBufMgr) + + sizeof(StdVideoH265HrdParameters)*HEVC_MAX_LAYER_SETS + + sizeof(HEVCHeaderVPSSet *); + + size_t buf_size = base_size + vps_size*nb_vps; + + for (int i = 0; i < nb_vps; i++) { + const HEVCVPS *vps = (const HEVCVPS *)vps_list[i]->data; + buf_size += sizeof(HEVCHeaderVPSSet)*vps->vps_num_hrd_parameters; + } + + if (buf_size > s->tmp_pool_ele_size) { + av_buffer_pool_uninit(&s->tmp_pool); + s->tmp_pool_ele_size = 0; + s->tmp_pool = av_buffer_pool_init(buf_size, NULL); + if (!s->tmp_pool) + return AVERROR(ENOMEM); + s->tmp_pool_ele_size = buf_size; + } + + *data_buf = av_buffer_pool_get(s->tmp_pool); + if (!(*data_buf)) + return AVERROR(ENOMEM); + + /* Setup pointers */ + data_ptr = (*data_buf)->data; + hdr = (HEVCHeaderSet *)data_ptr; + hdr->hvps = (HEVCHeaderVPS *)(data_ptr + base_size); + data_ptr += base_size + vps_size*nb_vps; + for (int i = 0; i < nb_vps; i++) { + const HEVCVPS *vps = (const HEVCVPS *)vps_list[i]->data; + hdr->hvps[i].sls = (HEVCHeaderVPSSet *)data_ptr; + data_ptr += sizeof(HEVCHeaderVPSSet)*vps->vps_num_hrd_parameters; + } + + return 0; +} + +typedef struct HEVCVulkanDecodePicture { + FFVulkanDecodePicture vp; + + /* Current picture */ + StdVideoDecodeH265ReferenceInfo h265_ref; + VkVideoDecodeH265DpbSlotInfoKHR vkh265_ref; + + /* Picture refs */ + HEVCFrame *ref_src [HEVC_MAX_REFS]; + StdVideoDecodeH265ReferenceInfo h265_refs [HEVC_MAX_REFS]; + VkVideoDecodeH265DpbSlotInfoKHR vkh265_refs[HEVC_MAX_REFS]; + + /* Current picture (contd.) */ + StdVideoDecodeH265PictureInfo h265pic; + VkVideoDecodeH265PictureInfoKHR h265_pic_info; +} HEVCVulkanDecodePicture; + +static int vk_hevc_fill_pict(AVCodecContext *avctx, HEVCFrame **ref_src, + VkVideoReferenceSlotInfoKHR *ref_slot, /* Main structure */ + VkVideoPictureResourceInfoKHR *ref, /* Goes in ^ */ + VkVideoDecodeH265DpbSlotInfoKHR *vkh265_ref, /* Goes in ^ */ + StdVideoDecodeH265ReferenceInfo *h265_ref, /* Goes in ^ */ + HEVCFrame *pic, int is_current, int pic_id) +{ + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = (FFVulkanDecodeShared *)dec->shared_ref->data; + HEVCVulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vkpic = &hp->vp; + + int err = ff_vk_decode_prepare_frame(ctx, pic->frame, vkpic, is_current, + ctx->dedicated_dpb); + if (err < 0) + return err; + + *h265_ref = (StdVideoDecodeH265ReferenceInfo) { + .flags = (StdVideoDecodeH265ReferenceInfoFlags) { + .used_for_long_term_reference = pic->flags & HEVC_FRAME_FLAG_LONG_REF, + .unused_for_reference = 0, + }, + .PicOrderCntVal = pic->poc, + }; + + *vkh265_ref = (VkVideoDecodeH265DpbSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_DPB_SLOT_INFO_KHR, + .pStdReferenceInfo = h265_ref, + }; + + *ref = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .codedOffset = (VkOffset2D){ 0, 0 }, + .codedExtent = (VkExtent2D){ pic->frame->width, pic->frame->height }, + .baseArrayLayer = ctx->layered_dpb ? pic_id : 0, + .imageViewBinding = vkpic->img_view_ref, + }; + + *ref_slot = (VkVideoReferenceSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR, + .pNext = vkh265_ref, + .slotIndex = pic_id, + .pPictureResource = ref, + }; + + if (ref_src) + *ref_src = pic; + + return 0; +} + +static void set_sps(const HEVCSPS *sps, int sps_idx, + StdVideoH265ScalingLists *vksps_scaling, + StdVideoH265HrdParameters *vksps_vui_header, + StdVideoH265SequenceParameterSetVui *vksps_vui, + StdVideoH265SequenceParameterSet *vksps, + StdVideoH265SubLayerHrdParameters *slhdrnal, + StdVideoH265SubLayerHrdParameters *slhdrvcl, + StdVideoH265ProfileTierLevel *ptl, + StdVideoH265DecPicBufMgr *dpbm, + StdVideoH265PredictorPaletteEntries *pal, + StdVideoH265ShortTermRefPicSet *str, + StdVideoH265LongTermRefPicsSps *ltr) +{ + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS; i++) + memcpy(vksps_scaling->ScalingList4x4[i], sps->scaling_list.sl[0][i], + STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**vksps_scaling->ScalingList4x4)); + + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS; i++) + memcpy(vksps_scaling->ScalingList8x8[i], sps->scaling_list.sl[1][i], + STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS * sizeof(**vksps_scaling->ScalingList8x8)); + + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS; i++) + memcpy(vksps_scaling->ScalingList16x16[i], sps->scaling_list.sl[2][i], + STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**vksps_scaling->ScalingList16x16)); + + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; i++) + memcpy(vksps_scaling->ScalingList32x32[i], sps->scaling_list.sl[3][i], + STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS * sizeof(**vksps_scaling->ScalingList32x32)); + + memcpy(vksps_scaling->ScalingListDCCoef16x16, sps->scaling_list.sl_dc[0], + STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(*vksps_scaling->ScalingListDCCoef16x16)); + + memcpy(vksps_scaling->ScalingListDCCoef32x32, sps->scaling_list.sl_dc[1], + STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS * sizeof(*vksps_scaling->ScalingListDCCoef32x32)); + + *vksps_vui_header = (StdVideoH265HrdParameters) { + .flags = (StdVideoH265HrdFlags) { + .nal_hrd_parameters_present_flag = sps->hdr.flags.nal_hrd_parameters_present_flag, + .vcl_hrd_parameters_present_flag = sps->hdr.flags.vcl_hrd_parameters_present_flag, + .sub_pic_hrd_params_present_flag = sps->hdr.flags.sub_pic_hrd_params_present_flag, + .sub_pic_cpb_params_in_pic_timing_sei_flag = sps->hdr.flags.sub_pic_cpb_params_in_pic_timing_sei_flag, + .fixed_pic_rate_general_flag = sps->hdr.flags.fixed_pic_rate_general_flag, + .fixed_pic_rate_within_cvs_flag = sps->hdr.flags.fixed_pic_rate_within_cvs_flag, + .low_delay_hrd_flag = sps->hdr.flags.low_delay_hrd_flag, + }, + .tick_divisor_minus2 = sps->hdr.tick_divisor_minus2, + .du_cpb_removal_delay_increment_length_minus1 = sps->hdr.du_cpb_removal_delay_increment_length_minus1, + .dpb_output_delay_du_length_minus1 = sps->hdr.dpb_output_delay_du_length_minus1, + .bit_rate_scale = sps->hdr.bit_rate_scale, + .cpb_size_scale = sps->hdr.cpb_size_scale, + .cpb_size_du_scale = sps->hdr.cpb_size_du_scale, + .initial_cpb_removal_delay_length_minus1 = sps->hdr.initial_cpb_removal_delay_length_minus1, + .au_cpb_removal_delay_length_minus1 = sps->hdr.au_cpb_removal_delay_length_minus1, + .dpb_output_delay_length_minus1 = sps->hdr.dpb_output_delay_length_minus1, + /* Reserved - 3*16 bits */ + .pSubLayerHrdParametersNal = slhdrnal, + .pSubLayerHrdParametersNal = slhdrvcl, + }; + + memcpy(vksps_vui_header->cpb_cnt_minus1, sps->hdr.cpb_cnt_minus1, + STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*vksps_vui_header->cpb_cnt_minus1)); + memcpy(vksps_vui_header->elemental_duration_in_tc_minus1, sps->hdr.elemental_duration_in_tc_minus1, + STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*vksps_vui_header->elemental_duration_in_tc_minus1)); + + memcpy(slhdrnal, sps->hdr.nal_params, HEVC_MAX_SUB_LAYERS*sizeof(*slhdrnal)); + memcpy(slhdrvcl, sps->hdr.vcl_params, HEVC_MAX_SUB_LAYERS*sizeof(*slhdrvcl)); + + *vksps_vui = (StdVideoH265SequenceParameterSetVui) { + .flags = (StdVideoH265SpsVuiFlags) { + .aspect_ratio_info_present_flag = sps->vui.common.aspect_ratio_info_present_flag, + .overscan_info_present_flag = sps->vui.common.overscan_info_present_flag, + .overscan_appropriate_flag = sps->vui.common.overscan_appropriate_flag, + .video_signal_type_present_flag = sps->vui.common.video_signal_type_present_flag, + .video_full_range_flag = sps->vui.common.video_full_range_flag, + .colour_description_present_flag = sps->vui.common.colour_description_present_flag, + .chroma_loc_info_present_flag = sps->vui.common.chroma_loc_info_present_flag, + .neutral_chroma_indication_flag = sps->vui.neutra_chroma_indication_flag, + .field_seq_flag = sps->vui.field_seq_flag, + .frame_field_info_present_flag = sps->vui.frame_field_info_present_flag, + .default_display_window_flag = sps->vui.default_display_window_flag, + .vui_timing_info_present_flag = sps->vui.vui_timing_info_present_flag, + .vui_poc_proportional_to_timing_flag = sps->vui.vui_poc_proportional_to_timing_flag, + .vui_hrd_parameters_present_flag = sps->vui.vui_hrd_parameters_present_flag, + .bitstream_restriction_flag = sps->vui.bitstream_restriction_flag, + .tiles_fixed_structure_flag = sps->vui.tiles_fixed_structure_flag, + .motion_vectors_over_pic_boundaries_flag = sps->vui.motion_vectors_over_pic_boundaries_flag, + .restricted_ref_pic_lists_flag = sps->vui.restricted_ref_pic_lists_flag, + }, + .aspect_ratio_idc = sps->vui.common.aspect_ratio_idc, + .sar_width = sps->vui.common.sar.num, + .sar_height = sps->vui.common.sar.den, + .video_format = sps->vui.common.video_format, + .colour_primaries = sps->vui.common.colour_primaries, + .transfer_characteristics = sps->vui.common.transfer_characteristics, + .matrix_coeffs = sps->vui.common.matrix_coeffs, + .chroma_sample_loc_type_top_field = sps->vui.common.chroma_sample_loc_type_top_field, + .chroma_sample_loc_type_bottom_field = sps->vui.common.chroma_sample_loc_type_bottom_field, + /* Reserved */ + /* Reserved */ + .def_disp_win_left_offset = sps->vui.def_disp_win.left_offset, + .def_disp_win_right_offset = sps->vui.def_disp_win.right_offset, + .def_disp_win_top_offset = sps->vui.def_disp_win.top_offset, + .def_disp_win_bottom_offset = sps->vui.def_disp_win.bottom_offset, + .vui_num_units_in_tick = sps->vui.vui_num_units_in_tick, + .vui_time_scale = sps->vui.vui_time_scale, + .vui_num_ticks_poc_diff_one_minus1 = sps->vui.vui_num_ticks_poc_diff_one_minus1, + .min_spatial_segmentation_idc = sps->vui.min_spatial_segmentation_idc, + .max_bytes_per_pic_denom = sps->vui.max_bytes_per_pic_denom, + .max_bits_per_min_cu_denom = sps->vui.max_bits_per_min_cu_denom, + .log2_max_mv_length_horizontal = sps->vui.log2_max_mv_length_horizontal, + .log2_max_mv_length_vertical = sps->vui.log2_max_mv_length_vertical, + .pHrdParameters = vksps_vui_header, + }; + + *ptl = (StdVideoH265ProfileTierLevel) { + .flags = (StdVideoH265ProfileTierLevelFlags) { + .general_tier_flag = sps->ptl.general_ptl.tier_flag, + .general_progressive_source_flag = sps->ptl.general_ptl.progressive_source_flag, + .general_interlaced_source_flag = sps->ptl.general_ptl.interlaced_source_flag, + .general_non_packed_constraint_flag = sps->ptl.general_ptl.non_packed_constraint_flag, + .general_frame_only_constraint_flag = sps->ptl.general_ptl.frame_only_constraint_flag, + }, + .general_profile_idc = sps->ptl.general_ptl.profile_idc, + .general_level_idc = sps->ptl.general_ptl.level_idc, + }; + + for (int i = 0; i < sps->max_sub_layers; i++) { + dpbm->max_latency_increase_plus1[i] = sps->temporal_layer[i].max_latency_increase + 1; + dpbm->max_dec_pic_buffering_minus1[i] = sps->temporal_layer[i].max_dec_pic_buffering - 1; + dpbm->max_num_reorder_pics[i] = sps->temporal_layer[i].num_reorder_pics; + } + + for (int i = 0; i < (sps->chroma_format_idc ? 3 : 1); i++) + for (int j = 0; j < sps->sps_num_palette_predictor_initializers; j++) + pal->PredictorPaletteEntries[i][j] = sps->sps_palette_predictor_initializer[i][j]; + + for (int i = 0; i < sps->nb_st_rps; i++) { + str[i] = (StdVideoH265ShortTermRefPicSet) { + .flags = (StdVideoH265ShortTermRefPicSetFlags) { + .inter_ref_pic_set_prediction_flag = sps->st_rps[i].rps_predict, + .delta_rps_sign = sps->st_rps[i].delta_rps_sign, + }, + .delta_idx_minus1 = sps->st_rps[i].delta_idx - 1, + .use_delta_flag = sps->st_rps[i].use_delta_flag, + .abs_delta_rps_minus1 = sps->st_rps[i].abs_delta_rps - 1, + .used_by_curr_pic_flag = 0x0, + .used_by_curr_pic_s0_flag = 0x0, + .used_by_curr_pic_s1_flag = 0x0, + /* Reserved */ + /* Reserved */ + /* Reserved */ + .num_negative_pics = sps->st_rps[i].num_negative_pics, + .num_positive_pics = sps->st_rps[i].num_delta_pocs - sps->st_rps[i].num_negative_pics, + }; + + /* NOTE: This is the predicted, and *reordered* version. + * Probably incorrect, but the spec doesn't say which version to use. */ + for (int j = 0; j < sps->st_rps[i].num_delta_pocs; j++) + str[i].used_by_curr_pic_flag |= sps->st_rps[i].used[j] << j; + + for (int j = 0; j < str[i].num_negative_pics; j++) { + str[i].delta_poc_s0_minus1[j] = sps->st_rps[i].delta_poc_s0[j] - 1; + str[i].used_by_curr_pic_s0_flag |= sps->st_rps[i].used[j] << j; + } + + for (int j = 0; j < str[i].num_positive_pics; j++) { + str[i].delta_poc_s1_minus1[j] = sps->st_rps[i].delta_poc_s1[j] - 1; + str[i].used_by_curr_pic_s0_flag |= sps->st_rps[i].used[str[i].num_negative_pics + j] << j; + } + } + + *ltr = (StdVideoH265LongTermRefPicsSps) { + .used_by_curr_pic_lt_sps_flag = 0x0, + }; + + for (int i = 0; i < sps->num_long_term_ref_pics_sps; i++) { + ltr->used_by_curr_pic_lt_sps_flag |= sps->used_by_curr_pic_lt_sps_flag[i] << i; + ltr->lt_ref_pic_poc_lsb_sps[i] = sps->lt_ref_pic_poc_lsb_sps[i]; + } + + *vksps = (StdVideoH265SequenceParameterSet) { + .flags = (StdVideoH265SpsFlags) { + .sps_temporal_id_nesting_flag = sps->temporal_id_nesting_flag, + .separate_colour_plane_flag = sps->separate_colour_plane_flag, + .conformance_window_flag = sps->conformance_window_flag, + .sps_sub_layer_ordering_info_present_flag = sps->sublayer_ordering_info_flag, + .scaling_list_enabled_flag = sps->scaling_list_enable_flag, + .sps_scaling_list_data_present_flag = sps->scaling_list_enable_flag, + .amp_enabled_flag = sps->amp_enabled_flag, + .sample_adaptive_offset_enabled_flag = sps->sao_enabled, + .pcm_enabled_flag = sps->pcm_enabled_flag, + .pcm_loop_filter_disabled_flag = sps->pcm.loop_filter_disable_flag, + .long_term_ref_pics_present_flag = sps->long_term_ref_pics_present_flag, + .sps_temporal_mvp_enabled_flag = sps->sps_temporal_mvp_enabled_flag, + .strong_intra_smoothing_enabled_flag = sps->sps_strong_intra_smoothing_enable_flag, + .vui_parameters_present_flag = sps->vui_present, + .sps_extension_present_flag = sps->sps_extension_present_flag, + .sps_range_extension_flag = sps->sps_range_extension_flag, + .transform_skip_rotation_enabled_flag = sps->transform_skip_rotation_enabled_flag, + .transform_skip_context_enabled_flag = sps->transform_skip_context_enabled_flag, + .implicit_rdpcm_enabled_flag = sps->implicit_rdpcm_enabled_flag, + .explicit_rdpcm_enabled_flag = sps->explicit_rdpcm_enabled_flag, + .extended_precision_processing_flag = sps->extended_precision_processing_flag, + .intra_smoothing_disabled_flag = sps->intra_smoothing_disabled_flag, + .high_precision_offsets_enabled_flag = sps->high_precision_offsets_enabled_flag, + .persistent_rice_adaptation_enabled_flag = sps->persistent_rice_adaptation_enabled_flag, + .cabac_bypass_alignment_enabled_flag = sps->cabac_bypass_alignment_enabled_flag, + .sps_scc_extension_flag = sps->sps_scc_extension_flag, + .sps_curr_pic_ref_enabled_flag = sps->sps_curr_pic_ref_enabled_flag, + .palette_mode_enabled_flag = sps->palette_mode_enabled_flag, + .sps_palette_predictor_initializers_present_flag = sps->sps_palette_predictor_initializers_present_flag, + .intra_boundary_filtering_disabled_flag = sps->intra_boundary_filtering_disabled_flag, + }, + .chroma_format_idc = sps->chroma_format_idc, + .pic_width_in_luma_samples = sps->width, + .pic_height_in_luma_samples = sps->height, + .sps_video_parameter_set_id = sps->vps_id, + .sps_max_sub_layers_minus1 = sps->max_sub_layers - 1, + .sps_seq_parameter_set_id = sps_idx, + .bit_depth_luma_minus8 = sps->bit_depth - 8, + .bit_depth_chroma_minus8 = sps->bit_depth_chroma - 8, + .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_poc_lsb - 4, + .log2_min_luma_coding_block_size_minus3 = sps->log2_min_cb_size - 3, + .log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_coding_block_size, + .log2_min_luma_transform_block_size_minus2 = sps->log2_min_tb_size - 2, + .log2_diff_max_min_luma_transform_block_size = sps->log2_diff_max_min_transform_block_size, + .max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter, + .max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra, + .num_short_term_ref_pic_sets = sps->nb_st_rps, + .num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps, + .pcm_sample_bit_depth_luma_minus1 = sps->pcm.bit_depth - 1, + .pcm_sample_bit_depth_chroma_minus1 = sps->pcm.bit_depth_chroma - 1, + .log2_min_pcm_luma_coding_block_size_minus3 = sps->pcm.log2_min_pcm_cb_size - 3, + .log2_diff_max_min_pcm_luma_coding_block_size = sps->pcm.log2_max_pcm_cb_size - sps->pcm.log2_min_pcm_cb_size, + /* Reserved */ + /* Reserved */ + .palette_max_size = sps->palette_max_size, + .delta_palette_max_predictor_size = sps->delta_palette_max_predictor_size, + .motion_vector_resolution_control_idc = sps->motion_vector_resolution_control_idc, + .sps_num_palette_predictor_initializers_minus1 = sps->sps_num_palette_predictor_initializers - 1, + .conf_win_left_offset = sps->pic_conf_win.left_offset, + .conf_win_right_offset = sps->pic_conf_win.right_offset, + .conf_win_top_offset = sps->pic_conf_win.top_offset, + .conf_win_bottom_offset = sps->pic_conf_win.bottom_offset, + .pProfileTierLevel = ptl, + .pDecPicBufMgr = dpbm, + .pScalingLists = vksps_scaling, + .pShortTermRefPicSet = str, + .pLongTermRefPicsSps = ltr, + .pSequenceParameterSetVui = vksps_vui, + .pPredictorPaletteEntries = pal, + }; +} + +static void set_pps(const HEVCPPS *pps, const HEVCSPS *sps, + StdVideoH265ScalingLists *vkpps_scaling, + StdVideoH265PictureParameterSet *vkpps, + StdVideoH265PredictorPaletteEntries *pal) +{ + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS; i++) + memcpy(vkpps_scaling->ScalingList4x4[i], pps->scaling_list.sl[0][i], + STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**vkpps_scaling->ScalingList4x4)); + + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS; i++) + memcpy(vkpps_scaling->ScalingList8x8[i], pps->scaling_list.sl[1][i], + STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS * sizeof(**vkpps_scaling->ScalingList8x8)); + + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS; i++) + memcpy(vkpps_scaling->ScalingList16x16[i], pps->scaling_list.sl[2][i], + STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**vkpps_scaling->ScalingList16x16)); + + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; i++) + memcpy(vkpps_scaling->ScalingList32x32[i], pps->scaling_list.sl[3][i], + STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS * sizeof(**vkpps_scaling->ScalingList32x32)); + + memcpy(vkpps_scaling->ScalingListDCCoef16x16, pps->scaling_list.sl_dc[0], + STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(*vkpps_scaling->ScalingListDCCoef16x16)); + + memcpy(vkpps_scaling->ScalingListDCCoef32x32, pps->scaling_list.sl_dc[1], + STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS * sizeof(*vkpps_scaling->ScalingListDCCoef32x32)); + + *vkpps = (StdVideoH265PictureParameterSet) { + .flags = (StdVideoH265PpsFlags) { + .dependent_slice_segments_enabled_flag = pps->dependent_slice_segments_enabled_flag, + .output_flag_present_flag = pps->output_flag_present_flag, + .sign_data_hiding_enabled_flag = pps->sign_data_hiding_flag, + .cabac_init_present_flag = pps->cabac_init_present_flag, + .constrained_intra_pred_flag = pps->constrained_intra_pred_flag, + .transform_skip_enabled_flag = pps->transform_skip_enabled_flag, + .cu_qp_delta_enabled_flag = pps->cu_qp_delta_enabled_flag, + .pps_slice_chroma_qp_offsets_present_flag = pps->pic_slice_level_chroma_qp_offsets_present_flag, + .weighted_pred_flag = pps->weighted_pred_flag, + .weighted_bipred_flag = pps->weighted_bipred_flag, + .transquant_bypass_enabled_flag = pps->transquant_bypass_enable_flag, + .tiles_enabled_flag = pps->tiles_enabled_flag, + .entropy_coding_sync_enabled_flag = pps->entropy_coding_sync_enabled_flag, + .uniform_spacing_flag = pps->uniform_spacing_flag, + .loop_filter_across_tiles_enabled_flag = pps->loop_filter_across_tiles_enabled_flag, + .pps_loop_filter_across_slices_enabled_flag = pps->seq_loop_filter_across_slices_enabled_flag, + .deblocking_filter_control_present_flag = pps->deblocking_filter_control_present_flag, + .deblocking_filter_override_enabled_flag = pps->deblocking_filter_override_enabled_flag, + .pps_deblocking_filter_disabled_flag = pps->disable_dbf, + .pps_scaling_list_data_present_flag = pps->scaling_list_data_present_flag, + .lists_modification_present_flag = pps->lists_modification_present_flag, + .slice_segment_header_extension_present_flag = pps->slice_header_extension_present_flag, + .pps_extension_present_flag = pps->pps_extension_present_flag, + .cross_component_prediction_enabled_flag = pps->cross_component_prediction_enabled_flag, + .chroma_qp_offset_list_enabled_flag = pps->chroma_qp_offset_list_enabled_flag, + .pps_curr_pic_ref_enabled_flag = pps->pps_curr_pic_ref_enabled_flag, + .residual_adaptive_colour_transform_enabled_flag = pps->residual_adaptive_colour_transform_enabled_flag, + .pps_slice_act_qp_offsets_present_flag = pps->pps_slice_act_qp_offsets_present_flag, + .pps_palette_predictor_initializers_present_flag = pps->pps_palette_predictor_initializers_present_flag, + .monochrome_palette_flag = pps->monochrome_palette_flag, + .pps_range_extension_flag = pps->pps_range_extensions_flag, + }, + .pps_pic_parameter_set_id = pps->pps_id, + .pps_seq_parameter_set_id = pps->sps_id, + .sps_video_parameter_set_id = sps->vps_id, + .num_extra_slice_header_bits = pps->num_extra_slice_header_bits, + .num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active - 1, + .num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active - 1, + .init_qp_minus26 = pps->pic_init_qp_minus26, + .diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth, + .pps_cb_qp_offset = pps->cb_qp_offset, + .pps_cr_qp_offset = pps->cr_qp_offset, + .pps_beta_offset_div2 = pps->beta_offset >> 1, + .pps_tc_offset_div2 = pps->tc_offset >> 1, + .log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level - 2, + .log2_max_transform_skip_block_size_minus2 = pps->log2_max_transform_skip_block_size - 2, + .diff_cu_chroma_qp_offset_depth = pps->diff_cu_chroma_qp_offset_depth, + .chroma_qp_offset_list_len_minus1 = pps->chroma_qp_offset_list_len_minus1, + .log2_sao_offset_scale_luma = pps->log2_sao_offset_scale_luma, + .log2_sao_offset_scale_chroma = pps->log2_sao_offset_scale_chroma, + .pps_act_y_qp_offset_plus5 = pps->pps_act_y_qp_offset + 5, + .pps_act_cb_qp_offset_plus5 = pps->pps_act_cb_qp_offset + 5, + .pps_act_cr_qp_offset_plus3 = pps->pps_act_cr_qp_offset + 3, + .pps_num_palette_predictor_initializers = pps->pps_num_palette_predictor_initializers, + .luma_bit_depth_entry_minus8 = pps->luma_bit_depth_entry - 8, + .chroma_bit_depth_entry_minus8 = pps->chroma_bit_depth_entry - 8, + .num_tile_columns_minus1 = pps->num_tile_columns - 1, + .num_tile_rows_minus1 = pps->num_tile_rows - 1, + .pScalingLists = vkpps_scaling, + .pPredictorPaletteEntries = pal, + }; + + for (int i = 0; i < (pps->monochrome_palette_flag ? 1 : 3); i++) { + for (int j = 0; j < pps->pps_num_palette_predictor_initializers; j++) + pal->PredictorPaletteEntries[i][j] = pps->pps_palette_predictor_initializer[i][j]; + } + + for (int i = 0; i < pps->num_tile_columns - 1; i++) + vkpps->column_width_minus1[i] = pps->column_width[i] - 1; + + for (int i = 0; i < pps->num_tile_rows - 1; i++) + vkpps->row_height_minus1[i] = pps->row_height[i] - 1; + + for (int i = 0; i <= pps->chroma_qp_offset_list_len_minus1; i++) { + vkpps->cb_qp_offset_list[i] = pps->cb_qp_offset_list[i]; + vkpps->cr_qp_offset_list[i] = pps->cr_qp_offset_list[i]; + } +} + +static void set_vps(const HEVCVPS *vps, + StdVideoH265VideoParameterSet *vkvps, + StdVideoH265ProfileTierLevel *ptl, + StdVideoH265DecPicBufMgr *dpbm, + StdVideoH265HrdParameters *sls_hdr, + HEVCHeaderVPSSet sls[]) +{ + for (int i = 0; i < vps->vps_num_hrd_parameters; i++) { + const HEVCHdrParams *src = &vps->hdr[i]; + + sls_hdr[i] = (StdVideoH265HrdParameters) { + .flags = (StdVideoH265HrdFlags) { + .nal_hrd_parameters_present_flag = src->flags.nal_hrd_parameters_present_flag, + .vcl_hrd_parameters_present_flag = src->flags.vcl_hrd_parameters_present_flag, + .sub_pic_hrd_params_present_flag = src->flags.sub_pic_hrd_params_present_flag, + .sub_pic_cpb_params_in_pic_timing_sei_flag = src->flags.sub_pic_cpb_params_in_pic_timing_sei_flag, + .fixed_pic_rate_general_flag = src->flags.fixed_pic_rate_general_flag, + .fixed_pic_rate_within_cvs_flag = src->flags.fixed_pic_rate_within_cvs_flag, + .low_delay_hrd_flag = src->flags.low_delay_hrd_flag, + }, + .tick_divisor_minus2 = src->tick_divisor_minus2, + .du_cpb_removal_delay_increment_length_minus1 = src->du_cpb_removal_delay_increment_length_minus1, + .dpb_output_delay_du_length_minus1 = src->dpb_output_delay_du_length_minus1, + .bit_rate_scale = src->bit_rate_scale, + .cpb_size_scale = src->cpb_size_scale, + .cpb_size_du_scale = src->cpb_size_du_scale, + .initial_cpb_removal_delay_length_minus1 = src->initial_cpb_removal_delay_length_minus1, + .au_cpb_removal_delay_length_minus1 = src->au_cpb_removal_delay_length_minus1, + .dpb_output_delay_length_minus1 = src->dpb_output_delay_length_minus1, + /* Reserved - 3*16 bits */ + .pSubLayerHrdParametersNal = sls[i].nal_hdr, + .pSubLayerHrdParametersNal = sls[i].vcl_hdr, + }; + + memcpy(sls_hdr[i].cpb_cnt_minus1, src->cpb_cnt_minus1, + STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*sls_hdr[i].cpb_cnt_minus1)); + memcpy(sls_hdr[i].elemental_duration_in_tc_minus1, src->elemental_duration_in_tc_minus1, + STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*sls_hdr[i].elemental_duration_in_tc_minus1)); + + memcpy(sls[i].nal_hdr, src->nal_params, HEVC_MAX_SUB_LAYERS*sizeof(*sls[i].nal_hdr)); + memcpy(sls[i].vcl_hdr, src->vcl_params, HEVC_MAX_SUB_LAYERS*sizeof(*sls[i].vcl_hdr)); + } + + *ptl = (StdVideoH265ProfileTierLevel) { + .flags = (StdVideoH265ProfileTierLevelFlags) { + .general_tier_flag = vps->ptl.general_ptl.tier_flag, + .general_progressive_source_flag = vps->ptl.general_ptl.progressive_source_flag, + .general_interlaced_source_flag = vps->ptl.general_ptl.interlaced_source_flag, + .general_non_packed_constraint_flag = vps->ptl.general_ptl.non_packed_constraint_flag, + .general_frame_only_constraint_flag = vps->ptl.general_ptl.frame_only_constraint_flag, + }, + .general_profile_idc = vps->ptl.general_ptl.profile_idc, + .general_level_idc = vps->ptl.general_ptl.level_idc, + }; + + for (int i = 0; i < vps->vps_max_sub_layers; i++) { + dpbm->max_latency_increase_plus1[i] = vps->vps_max_latency_increase[i] + 1; + dpbm->max_dec_pic_buffering_minus1[i] = vps->vps_max_dec_pic_buffering[i] - 1; + dpbm->max_num_reorder_pics[i] = vps->vps_num_reorder_pics[i]; + } + + *vkvps = (StdVideoH265VideoParameterSet) { + .flags = (StdVideoH265VpsFlags) { + .vps_temporal_id_nesting_flag = vps->vps_temporal_id_nesting_flag, + .vps_sub_layer_ordering_info_present_flag = vps->vps_sub_layer_ordering_info_present_flag, + .vps_timing_info_present_flag = vps->vps_timing_info_present_flag, + .vps_poc_proportional_to_timing_flag = vps->vps_poc_proportional_to_timing_flag, + }, + .vps_video_parameter_set_id = vps->vps_id, + .vps_max_sub_layers_minus1 = vps->vps_max_sub_layers - 1, + /* Reserved */ + /* Reserved */ + .vps_num_units_in_tick = vps->vps_num_units_in_tick, + .vps_time_scale = vps->vps_time_scale, + .vps_num_ticks_poc_diff_one_minus1 = vps->vps_num_ticks_poc_diff_one - 1, + /* Reserved */ + .pDecPicBufMgr = dpbm, + .pHrdParameters = sls_hdr, + .pProfileTierLevel = ptl, + }; +} + +static int vk_hevc_create_params(AVCodecContext *avctx, AVBufferRef **buf) +{ + int err; + VkResult ret; + const HEVCContext *h = avctx->priv_data; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = (FFVulkanDecodeShared *)dec->shared_ref->data; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + VkVideoDecodeH265SessionParametersAddInfoKHR h265_params_info = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_ADD_INFO_KHR, + .stdSPSCount = 0, + .stdPPSCount = 0, + .stdVPSCount = 0, + }; + VkVideoDecodeH265SessionParametersCreateInfoKHR h265_params = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pParametersAddInfo = &h265_params_info, + }; + VkVideoSessionParametersCreateInfoKHR session_params_create = { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pNext = &h265_params, + .videoSession = ctx->common.session, + .videoSessionParametersTemplate = NULL, + }; + + int nb_vps = 0; + AVBufferRef *data_set; + HEVCHeaderSet *hdr; + + AVBufferRef *tmp; + VkVideoSessionParametersKHR *par = av_malloc(sizeof(*par)); + if (!par) + return AVERROR(ENOMEM); + + for (int i = 0; h->ps.vps_list[i]; i++) + nb_vps++; + + err = get_data_set_buf(dec, &data_set, nb_vps, h->ps.vps_list); + if (err < 0) + return err; + + hdr = (HEVCHeaderSet *)data_set->data; + + h265_params_info.pStdSPSs = hdr->sps; + h265_params_info.pStdPPSs = hdr->pps; + h265_params_info.pStdVPSs = hdr->vps; + + /* SPS list */ + for (int i = 0; h->ps.sps_list[i]; i++) { + const HEVCSPS *sps_l = (const HEVCSPS *)h->ps.sps_list[i]->data; + set_sps(sps_l, i, &hdr->hsps[i].scaling, &hdr->hsps[i].vui_header, + &hdr->hsps[i].vui, &hdr->sps[i], hdr->hsps[i].nal_hdr, + hdr->hsps[i].vcl_hdr, &hdr->hsps[i].ptl, &hdr->hsps[i].dpbm, + &hdr->hsps[i].pal, hdr->hsps[i].str, &hdr->hsps[i].ltr); + h265_params_info.stdSPSCount++; + } + + /* PPS list */ + for (int i = 0; h->ps.pps_list[i]; i++) { + const HEVCPPS *pps_l = (const HEVCPPS *)h->ps.pps_list[i]->data; + const HEVCSPS *sps_l = (const HEVCSPS *)h->ps.sps_list[pps_l->sps_id]->data; + set_pps(pps_l, sps_l, &hdr->hpps[i].scaling, &hdr->pps[i], &hdr->hpps[i].pal); + h265_params_info.stdPPSCount++; + } + + /* VPS list */ + for (int i = 0; i < nb_vps; i++) { + const HEVCVPS *vps_l = (const HEVCVPS *)h->ps.vps_list[i]->data; + set_vps(vps_l, &hdr->vps[i], &hdr->hvps[i].ptl, &hdr->hvps[i].dpbm, + hdr->hvps[i].hdr, hdr->hvps[i].sls); + h265_params_info.stdVPSCount++; + } + + h265_params.maxStdSPSCount = h265_params_info.stdSPSCount; + h265_params.maxStdPPSCount = h265_params_info.stdPPSCount; + h265_params.maxStdVPSCount = h265_params_info.stdVPSCount; + + /* Create session parameters */ + ret = vk->CreateVideoSessionParametersKHR(ctx->s.hwctx->act_dev, &session_params_create, + ctx->s.hwctx->alloc, par); + av_buffer_unref(&data_set); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to create Vulkan video session parameters: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + tmp = av_buffer_create((uint8_t *)par, sizeof(*par), ff_vk_decode_free_params, + ctx, 0); + if (!tmp) { + ff_vk_decode_free_params(ctx, (uint8_t *)par); + return AVERROR(ENOMEM); + } + + av_log(avctx, AV_LOG_DEBUG, "Created frame parameters: %i SPS %i PPS %i VPS\n", + h265_params_info.stdSPSCount, h265_params_info.stdPPSCount, + h265_params_info.stdVPSCount); + + *buf = tmp; + + return 0; +} + +static int vk_hevc_start_frame(AVCodecContext *avctx, + av_unused const uint8_t *buffer, + av_unused uint32_t size) +{ + int err; + HEVCContext *h = avctx->priv_data; + HEVCFrame *pic = h->ref; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + HEVCVulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &hp->vp; + const HEVCSPS *sps = h->ps.sps; + const HEVCPPS *pps = h->ps.pps; + int nb_refs = 0; + + if (!dec->session_params || dec->params_changed) { + av_buffer_unref(&dec->session_params); + err = vk_hevc_create_params(avctx, &dec->session_params); + if (err < 0) + return err; + dec->params_changed = 0; + } + + hp->h265pic = (StdVideoDecodeH265PictureInfo) { + .flags = (StdVideoDecodeH265PictureInfoFlags) { + .IrapPicFlag = IS_IRAP(h), + .IdrPicFlag = IS_IDR(h), + .IsReference = h->nal_unit_type < 16 ? h->nal_unit_type & 1 : 1, + .short_term_ref_pic_set_sps_flag = h->sh.short_term_ref_pic_set_sps_flag, + }, + .sps_video_parameter_set_id = sps->vps_id, + .pps_seq_parameter_set_id = pps->sps_id, + .pps_pic_parameter_set_id = pps->pps_id, + .NumDeltaPocsOfRefRpsIdx = h->sh.short_term_rps ? h->sh.short_term_rps->rps_idx_num_delta_pocs : 0, + .PicOrderCntVal = h->poc, + .NumBitsForSTRefPicSetInSlice = !h->sh.short_term_ref_pic_set_sps_flag ? + h->sh.bits_used_for_short_term_rps : 0, + }; + + /* Fill in references */ + for (int i = 0; i < FF_ARRAY_ELEMS(h->DPB); i++) { + const HEVCFrame *ref = &h->DPB[i]; + int idx = nb_refs; + + if (!(ref->flags & (HEVC_FRAME_FLAG_SHORT_REF | HEVC_FRAME_FLAG_LONG_REF))) + continue; + + if (ref == pic) { + err = vk_hevc_fill_pict(avctx, NULL, &vp->ref_slot, &vp->ref, + &hp->vkh265_ref, &hp->h265_ref, pic, 1, i); + if (err < 0) + return err; + + continue; + } + + err = vk_hevc_fill_pict(avctx, &hp->ref_src[idx], &vp->ref_slots[idx], + &vp->refs[idx], &hp->vkh265_refs[idx], + &hp->h265_refs[idx], (HEVCFrame *)ref, 0, i); + if (err < 0) + return err; + + nb_refs++; + } + + memset(hp->h265pic.RefPicSetStCurrBefore, 0xff, 8); + for (int i = 0; i < h->rps[ST_CURR_BEF].nb_refs; i++) { + HEVCFrame *frame = h->rps[ST_CURR_BEF].ref[i]; + for (int j = 0; j < FF_ARRAY_ELEMS(h->DPB); j++) { + const HEVCFrame *ref = &h->DPB[j]; + if (ref == frame) { + hp->h265pic.RefPicSetStCurrBefore[i] = j; + break; + } + } + } + memset(hp->h265pic.RefPicSetStCurrAfter, 0xff, 8); + for (int i = 0; i < h->rps[ST_CURR_AFT].nb_refs; i++) { + HEVCFrame *frame = h->rps[ST_CURR_AFT].ref[i]; + for (int j = 0; j < FF_ARRAY_ELEMS(h->DPB); j++) { + const HEVCFrame *ref = &h->DPB[j]; + if (ref == frame) { + hp->h265pic.RefPicSetStCurrAfter[i] = j; + break; + } + } + } + memset(hp->h265pic.RefPicSetLtCurr, 0xff, 8); + for (int i = 0; i < h->rps[LT_CURR].nb_refs; i++) { + HEVCFrame *frame = h->rps[LT_CURR].ref[i]; + for (int j = 0; j < FF_ARRAY_ELEMS(h->DPB); j++) { + const HEVCFrame *ref = &h->DPB[j]; + if (ref == frame) { + hp->h265pic.RefPicSetLtCurr[i] = j; + break; + } + } + } + + hp->h265_pic_info = (VkVideoDecodeH265PictureInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PICTURE_INFO_KHR, + .pStdPictureInfo = &hp->h265pic, + .sliceSegmentCount = 0, + .pSliceSegmentOffsets = vp->slice_off, + }; + + vp->decode_info = (VkVideoDecodeInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR, + .pNext = &hp->h265_pic_info, + .flags = 0x0, + .pSetupReferenceSlot = &vp->ref_slot, + .referenceSlotCount = nb_refs, + .pReferenceSlots = vp->ref_slots, + .dstPictureResource = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .codedOffset = (VkOffset2D){ 0, 0 }, + .codedExtent = (VkExtent2D){ pic->frame->width, pic->frame->height }, + .baseArrayLayer = 0, + .imageViewBinding = vp->img_view_out, + }, + }; + + return 0; +} + +static int vk_hevc_decode_slice(AVCodecContext *avctx, + const uint8_t *data, + uint32_t size) +{ + const HEVCContext *h = avctx->priv_data; + HEVCVulkanDecodePicture *hp = h->ref->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &hp->vp; + + int err = ff_vk_decode_add_slice(avctx, vp, data, size, 1, + &hp->h265_pic_info.sliceSegmentCount, + &hp->h265_pic_info.pSliceSegmentOffsets); + if (err < 0) + return err; + + return 0; +} + +static int vk_hevc_end_frame(AVCodecContext *avctx) +{ + const HEVCContext *h = avctx->priv_data; + HEVCFrame *pic = h->ref; + HEVCVulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &hp->vp; + FFVulkanDecodePicture *rvp[HEVC_MAX_REFS] = { 0 }; + AVFrame *rav[HEVC_MAX_REFS] = { 0 }; + + for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) { + HEVCVulkanDecodePicture *rfhp = hp->ref_src[i]->hwaccel_picture_private; + rav[i] = hp->ref_src[i]->frame; + rvp[i] = &rfhp->vp; + } + + av_log(avctx, AV_LOG_VERBOSE, "Decoding frame, %lu bytes, %i slices\n", + vp->slices_size, hp->h265_pic_info.sliceSegmentCount); + + return ff_vk_decode_frame(avctx, pic->frame, vp, rav, rvp); +} + +static void vk_hevc_free_frame_priv(void *_hwctx, uint8_t *data) +{ + AVHWDeviceContext *hwctx = _hwctx; + HEVCVulkanDecodePicture *hp = (HEVCVulkanDecodePicture *)data; + + /* Free frame resources */ + ff_vk_decode_free_frame(hwctx, &hp->vp); + + /* Free frame context */ + av_free(hp); +} + +const AVHWAccel ff_hevc_vulkan_hwaccel = { + .name = "hevc_vulkan", + .type = AVMEDIA_TYPE_VIDEO, + .id = AV_CODEC_ID_HEVC, + .pix_fmt = AV_PIX_FMT_VULKAN, + .start_frame = &vk_hevc_start_frame, + .decode_slice = &vk_hevc_decode_slice, + .end_frame = &vk_hevc_end_frame, + .free_frame_priv = &vk_hevc_free_frame_priv, + .frame_priv_data_size = sizeof(HEVCVulkanDecodePicture), + .init = &ff_vk_decode_init, + .update_thread_context = &ff_vk_update_thread_context, + .decode_params = &ff_vk_params_changed, + .flush = &ff_vk_decode_flush, + .uninit = &ff_vk_decode_uninit, + .frame_params = &ff_vk_frame_params, + .priv_data_size = sizeof(FFVulkanDecodeContext), + .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE, +}; From 41ca03a24a855dcc28d625ef2fc4fbcb3c430349 Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 28 Dec 2022 06:31:11 +0100 Subject: [PATCH 84/98] hwcontext_vulkan: add PREP_MODE_ENCODING --- libavutil/hwcontext_vulkan.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 93b6c6de4b5b6..34ea9a8d2a5c9 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -1834,6 +1834,7 @@ enum PrepMode { PREP_MODE_EXTERNAL_IMPORT, PREP_MODE_DECODING_DST, PREP_MODE_DECODING_DPB, + PREP_MODE_ENCODING_DPB, }; static int prepare_frame(AVHWFramesContext *hwfc, FFVkExecPool *ectx, @@ -1895,6 +1896,10 @@ static int prepare_frame(AVHWFramesContext *hwfc, FFVkExecPool *ectx, new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR; new_access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; break; + case PREP_MODE_ENCODING_DPB: + new_layout = VK_IMAGE_LAYOUT_VIDEO_ENCODE_DPB_KHR; + new_access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; + break; } ff_vk_frame_barrier(&p->vkctx, exec, &tmp_frame, img_bar, &nb_img_bar, @@ -2153,6 +2158,8 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size) err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_DECODING_DPB); else if (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR) err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_DECODING_DST); + else if (hwctx->usage & VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR) + err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_ENCODING_DPB); else err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_WRITE); if (err) From 4577bc1a08f8ff2bdf5bb13412193822293dbf1c Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 11 Jan 2023 09:14:55 +0100 Subject: [PATCH 85/98] hwcontext_vulkan: add encoding functions --- configure | 2 ++ libavutil/hwcontext_vulkan.c | 7 +++++++ libavutil/vulkan_functions.h | 6 ++++++ libavutil/vulkan_loader.h | 7 +++++++ 4 files changed, 22 insertions(+) diff --git a/configure b/configure index b3d2730b1b76f..38cfc0316ab68 100755 --- a/configure +++ b/configure @@ -354,6 +354,7 @@ External library support: --disable-vdpau disable Nvidia Video Decode and Presentation API for Unix code [autodetect] --disable-videotoolbox disable VideoToolbox code [autodetect] --disable-vulkan disable Vulkan code [autodetect] + --enable-vulkan-encode enable Vulkan encoding code [no] Toolchain options: --arch=ARCH select architecture [$arch] @@ -1915,6 +1916,7 @@ HWACCEL_LIBRARY_LIST=" mmal omx opencl + vulkan_encode " DOCUMENT_LIST=" diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 34ea9a8d2a5c9..7ebab02d9f8b3 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -415,8 +415,15 @@ static const VulkanOptExtension optional_device_exts[] = { /* Video encoding/decoding */ { VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_QUEUE }, + { VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_QUEUE }, { VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_QUEUE }, +#if CONFIG_VULKAN_ENCODE + { VK_EXT_VIDEO_ENCODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H264 }, +#endif { VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H264 }, +#if CONFIG_VULKAN_ENCODE + { VK_EXT_VIDEO_ENCODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H265 }, +#endif { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 }, }; diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h index c81e12f27ec06..20096b1506a3f 100644 --- a/libavutil/vulkan_functions.h +++ b/libavutil/vulkan_functions.h @@ -43,6 +43,9 @@ typedef enum FFVulkanExtensions { FF_VK_EXT_VIDEO_DECODE_QUEUE = 1ULL << 11, /* VK_KHR_video_decode_queue */ FF_VK_EXT_VIDEO_DECODE_H264 = 1ULL << 12, /* VK_EXT_video_decode_h264 */ FF_VK_EXT_VIDEO_DECODE_H265 = 1ULL << 13, /* VK_EXT_video_decode_h265 */ + FF_VK_EXT_VIDEO_ENCODE_QUEUE = 1ULL << 14, /* VK_KHR_video_encode_queue */ + FF_VK_EXT_VIDEO_ENCODE_H264 = 1ULL << 15, /* VK_EXT_video_encode_h264 */ + FF_VK_EXT_VIDEO_ENCODE_H265 = 1ULL << 16, /* VK_EXT_video_encode_h265 */ FF_VK_EXT_NO_FLAG = 1ULL << 31, } FFVulkanExtensions; @@ -189,6 +192,9 @@ typedef enum FFVulkanExtensions { \ /* Video decoding */ \ MACRO(1, 1, FF_VK_EXT_VIDEO_DECODE_QUEUE, CmdDecodeVideoKHR) \ + \ + /* Video encoding */ \ + MACRO(1, 1, FF_VK_EXT_VIDEO_ENCODE_QUEUE, CmdEncodeVideoKHR) \ \ /* Pipeline */ \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreatePipelineLayout) \ diff --git a/libavutil/vulkan_loader.h b/libavutil/vulkan_loader.h index 5380e2130300d..76da10d477364 100644 --- a/libavutil/vulkan_loader.h +++ b/libavutil/vulkan_loader.h @@ -51,8 +51,15 @@ static inline uint64_t ff_vk_extensions_to_mask(const char * const *extensions, #endif { VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER, }, { VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_QUEUE }, + { VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_QUEUE }, { VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_QUEUE }, +#if CONFIG_VULKAN_ENCODE + { VK_EXT_VIDEO_ENCODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H264 }, +#endif { VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H264 }, +#if CONFIG_VULKAN_ENCODE + { VK_EXT_VIDEO_ENCODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H265 }, +#endif { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 }, }; From f2b594690d3209299d20d0f2f635c3c09afb06bd Mon Sep 17 00:00:00 2001 From: Lynne Date: Sun, 18 Dec 2022 08:32:58 +0100 Subject: [PATCH 86/98] libavcodec: add Vulkan common encoding code --- configure | 2 + libavcodec/Makefile | 3 +- libavcodec/vulkan_encode.c | 989 +++++++++++++++++++++++++++++++++++++ libavcodec/vulkan_encode.h | 258 ++++++++++ 4 files changed, 1251 insertions(+), 1 deletion(-) create mode 100644 libavcodec/vulkan_encode.c create mode 100644 libavcodec/vulkan_encode.h diff --git a/configure b/configure index 38cfc0316ab68..212254148748d 100755 --- a/configure +++ b/configure @@ -2516,6 +2516,7 @@ CONFIG_EXTRA=" vp56dsp vp8dsp wma_freqs + vulkan_encode wmv2dsp " @@ -3139,6 +3140,7 @@ qsvenc_select="qsv" qsvvpp_select="qsv" vaapi_encode_deps="vaapi" v4l2_m2m_deps="linux_videodev2_h sem_timedwait" +vulkan_encode_deps="vulkan" bilateral_cuda_filter_deps="ffnvcodec" bilateral_cuda_filter_deps_any="cuda_nvcc cuda_llvm" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index d421844903136..4b3587e9703de 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -170,6 +170,7 @@ OBJS-$(CONFIG_VP3DSP) += vp3dsp.o OBJS-$(CONFIG_VP56DSP) += vp56dsp.o OBJS-$(CONFIG_VP8DSP) += vp8dsp.o OBJS-$(CONFIG_V4L2_M2M) += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o +OBJS-$(CONFIG_VULKAN_ENCODE) += vulkan_encode.o OBJS-$(CONFIG_WMA_FREQS) += wma_freqs.o OBJS-$(CONFIG_WMV2DSP) += wmv2dsp.o @@ -1293,7 +1294,7 @@ SKIPHEADERS-$(CONFIG_XVMC) += xvmc.h SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_decode.h vaapi_hevc.h vaapi_encode.h SKIPHEADERS-$(CONFIG_VDPAU) += vdpau.h vdpau_internal.h SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.h vt_internal.h -SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_video.h vulkan_decode.h +SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_video.h vulkan_encode.h vulkan_decode.h SKIPHEADERS-$(CONFIG_V4L2_M2M) += v4l2_buffers.h v4l2_context.h v4l2_m2m.h SKIPHEADERS-$(CONFIG_ZLIB) += zlib_wrapper.h diff --git a/libavcodec/vulkan_encode.c b/libavcodec/vulkan_encode.c new file mode 100644 index 0000000000000..c832fbf4e7539 --- /dev/null +++ b/libavcodec/vulkan_encode.c @@ -0,0 +1,989 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "vulkan_encode.h" + +#include // TODO: REMOVE WHEN THE HEADERS BUG IS FIXED + +const VkExtensionProperties ff_vk_enc_ext[AV_CODEC_ID_FIRST_AUDIO] = { + [AV_CODEC_ID_H264] = (VkExtensionProperties) { + .extensionName = VK_STD_VULKAN_VIDEO_CODEC_H264_ENCODE_EXTENSION_NAME, + .specVersion = VK_STD_VULKAN_VIDEO_CODEC_H264_ENCODE_SPEC_VERSION, + }, + [AV_CODEC_ID_HEVC] = (VkExtensionProperties) { + .extensionName = VK_STD_VULKAN_VIDEO_CODEC_H265_ENCODE_EXTENSION_NAME, + .specVersion = VK_STD_VULKAN_VIDEO_CODEC_H265_ENCODE_SPEC_VERSION, + }, +}; + +const AVCodecHWConfigInternal *const ff_vulkan_encode_hw_configs[] = { + HW_CONFIG_ENCODER_FRAMES(VULKAN, VULKAN), + NULL, +}; + +av_cold void ff_vulkan_encode_uninit(FFVulkanEncodeContext *ctx) +{ + FFVulkanContext *s = &ctx->s; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + /* Wait on and free execution pool */ + ff_vk_exec_pool_free(s, &ctx->enc_pool); + + /* This also frees all references from this pool */ + av_frame_free(&ctx->layered_frame); + av_buffer_unref(&ctx->dpb_hwfc_ref); + + /* Destroy YUV sampler */ + if (ctx->yuv_sampler) + vk->DestroySamplerYcbcrConversion(s->hwctx->act_dev, ctx->yuv_sampler, + s->hwctx->alloc); + + /* Destroy parameters */ + if (ctx->session_params) + vk->DestroyVideoSessionParametersKHR(s->hwctx->act_dev, ctx->session_params, + s->hwctx->alloc); + + ff_vk_video_common_uninit(s, &ctx->common); + + ff_vk_uninit(s); + + av_free(ctx->pic); +} + +int ff_vk_encode_create_view(FFVulkanEncodeContext *ctx, VkImageView *dst_view, + VkImageAspectFlags *aspect, AVVkFrame *src, int layer) +{ + VkResult ret; + FFVulkanFunctions *vk = &ctx->s.vkfn; + VkImageAspectFlags aspect_mask = ff_vk_aspect_bits_from_vkfmt(ctx->pic_format); + + VkSamplerYcbcrConversionInfo yuv_sampler_info = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO, + .conversion = ctx->yuv_sampler, + }; + VkImageViewCreateInfo img_view_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = &yuv_sampler_info, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = ctx->pic_format, + .image = src->img[0], + .components = (VkComponentMapping) { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseArrayLayer = layer, + .layerCount = 1, + .levelCount = 1, + }, + }; + + ret = vk->CreateImageView(ctx->s.hwctx->act_dev, &img_view_create_info, + ctx->s.hwctx->alloc, dst_view); + if (ret != VK_SUCCESS) + return AVERROR_EXTERNAL; + + *aspect = aspect_mask; + + return 0; +} + +static AVFrame *vk_get_dpb_pool(FFVulkanEncodeContext *ctx) +{ + AVFrame *avf = av_frame_alloc(); + AVHWFramesContext *dpb_frames = (AVHWFramesContext *)ctx->dpb_hwfc_ref->data; + if (!avf) + return NULL; + + avf->hw_frames_ctx = av_buffer_ref(ctx->dpb_hwfc_ref); + if (!avf->hw_frames_ctx) + av_frame_free(&avf); + avf->buf[0] = av_buffer_pool_get(dpb_frames->pool); + if (!avf->buf[0]) + av_frame_free(&avf); + avf->data[0] = avf->buf[0]->data; + + return avf; +} + +av_cold int ff_vulkan_encode_init(AVCodecContext *avctx, FFVulkanEncodeContext *ctx, + void *codec_profile, void *caps, + const FFVulkanEncoder *enc, + int output_delay, int decode_delay) +{ + int i, err, qf; + VkResult ret; + int cxpos = 0, cypos = 0; + FFVulkanFunctions *vk = &ctx->s.vkfn; + FFVulkanContext *s = &ctx->s; + FFVulkanExtensions extensions; + const struct FFVkCodecMap *vk_codec = &ff_vk_codec_map[avctx->codec_id]; + const AVPixFmtDescriptor *desc; + + AVHWFramesContext *dpb_frames; + AVVulkanFramesContext *dpb_hwfc; + + VkVideoFormatPropertiesKHR *ret_info; + uint32_t nb_out_fmts = 0; + + VkQueryPoolVideoEncodeFeedbackCreateInfoKHR query_create = { + .sType = VK_STRUCTURE_TYPE_QUERY_POOL_VIDEO_ENCODE_FEEDBACK_CREATE_INFO_KHR, + .encodeFeedbackFlags = VK_VIDEO_ENCODE_FEEDBACK_BITSTREAM_BUFFER_OFFSET_BIT_KHR | + VK_VIDEO_ENCODE_FEEDBACK_BITSTREAM_BYTES_WRITTEN_BIT_KHR, + }; + VkVideoSessionCreateInfoKHR session_create = { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_CREATE_INFO_KHR, + }; + VkPhysicalDeviceVideoFormatInfoKHR fmt_info = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_FORMAT_INFO_KHR, + .pNext = &ctx->profile_list, + }; + VkSamplerYcbcrConversionCreateInfo yuv_sampler_info = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO, + .components = ff_comp_identity_map, + .ycbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY, + .ycbcrRange = avctx->color_range == AVCOL_RANGE_MPEG, /* Ignored */ + }; + + if (!avctx->hw_frames_ctx) { + av_log(avctx, AV_LOG_ERROR, "A hardware frames reference is " + "required to associate the encoding device.\n"); + return AVERROR(EINVAL); + } + + s->frames_ref = av_buffer_ref(avctx->hw_frames_ctx); + s->frames = (AVHWFramesContext *)s->frames_ref->data; + s->hwfc = s->frames->hwctx; + + s->device = (AVHWDeviceContext *)s->frames->device_ref->data; + s->hwctx = s->device->hwctx; + + desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); + if (!desc) + return AVERROR(EINVAL); + + ctx->enc = enc; + ctx->output_delay = output_delay; + ctx->decode_delay = decode_delay; + + extensions = ff_vk_extensions_to_mask(s->hwctx->enabled_dev_extensions, + s->hwctx->nb_enabled_dev_extensions); + + if (!(extensions & FF_VK_EXT_VIDEO_ENCODE_QUEUE)) { + av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n", + VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME); + return AVERROR(ENOSYS); + } else if (!(vk_codec->encode_extension & extensions)) { + av_log(avctx, AV_LOG_ERROR, "Device does not support decoding %s!\n", + avcodec_get_name(avctx->codec_id)); + return AVERROR(ENOSYS); + } + + /* Load functions */ + err = ff_vk_load_functions(s->device, vk, extensions, 1, 1); + if (err < 0) + return err; + + /* Load all properties */ + err = ff_vk_load_props(s); + if (err < 0) + return err; + + /* Create queue context */ + qf = ff_vk_qf_init(s, &ctx->qf_enc, VK_QUEUE_VIDEO_ENCODE_BIT_KHR); + + /* Check for support */ + if (!(s->video_props[qf].videoCodecOperations & vk_codec->encode_op)) { + av_log(avctx, AV_LOG_ERROR, "Encoding %s not supported on the given " + "queue family %i!\n", avcodec_get_name(avctx->codec_id), qf); + return AVERROR(EINVAL); + } + + /* Set tuning */ + ctx->usage_info = (VkVideoEncodeUsageInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_USAGE_INFO_KHR, + .pNext = codec_profile, + .videoUsageHints = ctx->opts.usage, + .videoContentHints = ctx->opts.content, + .tuningMode = ctx->opts.tune, + }; + + /* Load up the profile now, we need it to create a query pool */ + ctx->profile.sType = VK_STRUCTURE_TYPE_VIDEO_PROFILE_INFO_KHR; + ctx->profile.pNext = &ctx->usage_info; + ctx->profile.videoCodecOperation = vk_codec->encode_op; + ctx->profile.chromaSubsampling = ff_vk_subsampling_from_av_desc(desc); + ctx->profile.lumaBitDepth = ff_vk_depth_from_av_depth(desc->comp[0].depth); + ctx->profile.chromaBitDepth = ctx->profile.lumaBitDepth; + + query_create.pNext = &ctx->profile; + + /* Create command and query pool. + * 18.12. Video Encode Bitstream Buffer Range Queries: + * Two values are written, the buffer offset, and the number of bytes written. */ + err = ff_vk_exec_pool_init(s, &ctx->qf_enc, &ctx->enc_pool, 1, + 1, VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR, 0, + &query_create); + if (err < 0) + return err; + + ctx->profile_list.sType = VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR; + ctx->profile_list.profileCount = 1; + ctx->profile_list.pProfiles = &ctx->profile; + + /* Get the capabilities of the decoder for the given profile */ + ctx->common.caps.sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR; + ctx->common.caps.pNext = &ctx->enc_caps; + ctx->enc_caps.sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_CAPABILITIES_KHR; + ctx->enc_caps.pNext = caps; + + ret = vk->GetPhysicalDeviceVideoCapabilitiesKHR(s->hwctx->phys_dev, + &ctx->profile, + &ctx->common.caps); + if (ret == VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize encoding: " + "%s profile \"%s\" not supported!\n", + avcodec_get_name(avctx->codec_id), + avcodec_profile_name(avctx->codec_id, ctx->opts.profile)); + return AVERROR(EINVAL); + } else if (ret == VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize encoding: " + "format (%s) not supported!\n", + av_get_pix_fmt_name(avctx->sw_pix_fmt)); + return AVERROR(EINVAL); + } else if (ret == VK_ERROR_FEATURE_NOT_PRESENT || + ret == VK_ERROR_FORMAT_NOT_SUPPORTED) { + return AVERROR(EINVAL); + } else if (ret != VK_SUCCESS) { + return AVERROR_EXTERNAL; + } + + av_log(avctx, AV_LOG_VERBOSE, "encoder capabilities for %s profile \"%s\":\n", + avcodec_get_name(avctx->codec_id), + avcodec_profile_name(avctx->codec_id, ctx->opts.profile)); + av_log(avctx, AV_LOG_VERBOSE, " Width: from %i to %i\n", + ctx->common.caps.minCodedExtent.width, ctx->common.caps.maxCodedExtent.width); + av_log(avctx, AV_LOG_VERBOSE, " Height: from %i to %i\n", + ctx->common.caps.minCodedExtent.height, ctx->common.caps.maxCodedExtent.height); + av_log(avctx, AV_LOG_VERBOSE, " Width alignment: %i\n", + ctx->common.caps.pictureAccessGranularity.width); + av_log(avctx, AV_LOG_VERBOSE, " Height alignment: %i\n", + ctx->common.caps.pictureAccessGranularity.height); + av_log(avctx, AV_LOG_VERBOSE, " Bitstream offset alignment: %"PRIu64"\n", + ctx->common.caps.minBitstreamBufferOffsetAlignment); + av_log(avctx, AV_LOG_VERBOSE, " Bitstream size alignment: %"PRIu64"\n", + ctx->common.caps.minBitstreamBufferSizeAlignment); + av_log(avctx, AV_LOG_VERBOSE, " Maximum references: %u\n", + ctx->common.caps.maxDpbSlots); + av_log(avctx, AV_LOG_VERBOSE, " Maximum active references: %u\n", + ctx->common.caps.maxActiveReferencePictures); + av_log(avctx, AV_LOG_VERBOSE, " Codec header version: %i.%i.%i (driver), %i.%i.%i (compiled)\n", + CODEC_VER(ctx->common.caps.stdHeaderVersion.specVersion), + CODEC_VER(ff_vk_enc_ext[avctx->codec_id].specVersion)); + av_log(avctx, AV_LOG_VERBOSE, " encode quality levels: %i\n", + ctx->enc_caps.maxQualityLevels); + av_log(avctx, AV_LOG_VERBOSE, " encode image width alignment: %i\n", + ctx->enc_caps.inputImageDataFillAlignment.width); + av_log(avctx, AV_LOG_VERBOSE, " encode image height alignment: %i\n", + ctx->enc_caps.inputImageDataFillAlignment.height); + av_log(avctx, AV_LOG_VERBOSE, " Capability flags:%s%s%s\n", + ctx->common.caps.flags ? "" : + " none", + ctx->common.caps.flags & VK_VIDEO_CAPABILITY_PROTECTED_CONTENT_BIT_KHR ? + " protected" : "", + ctx->common.caps.flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR ? + " separate_references" : ""); + + /* Check if decoding is possible with the given parameters */ + if (avctx->coded_width < ctx->common.caps.minCodedExtent.width || + avctx->coded_height < ctx->common.caps.minCodedExtent.height || + avctx->coded_width > ctx->common.caps.maxCodedExtent.width || + avctx->coded_height > ctx->common.caps.maxCodedExtent.height) + return AVERROR(EINVAL); + + fmt_info.imageUsage = VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR | + VK_IMAGE_USAGE_VIDEO_ENCODE_DST_BIT_KHR; + + ctx->layered_dpb = !(ctx->common.caps.flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR); + + /* Get the supported image formats */ + ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(s->hwctx->phys_dev, + &fmt_info, + &nb_out_fmts, NULL); + if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED || + (!nb_out_fmts && ret == VK_SUCCESS)) { + return AVERROR(EINVAL); + } else if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + ret_info = av_mallocz(sizeof(*ret_info)*nb_out_fmts); + if (!ret_info) + return AVERROR(ENOMEM); + + for (int i = 0; i < nb_out_fmts; i++) + ret_info[i].sType = VK_STRUCTURE_TYPE_VIDEO_FORMAT_PROPERTIES_KHR; + + ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(s->hwctx->phys_dev, + &fmt_info, + &nb_out_fmts, ret_info); + if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED || + (!nb_out_fmts && ret == VK_SUCCESS)) { + av_free(ret_info); + return AVERROR(EINVAL); + } else if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n", + ff_vk_ret2str(ret)); + av_free(ret_info); + return AVERROR_EXTERNAL; + } + + av_log(avctx, AV_LOG_VERBOSE, "Supported input formats:\n"); + for (i = 0; i < nb_out_fmts; i++) + av_log(avctx, AV_LOG_VERBOSE, " %i: %i\n", i, ret_info[i].format); + + for (i = 0; i < nb_out_fmts; i++) { + if (ff_vk_pix_fmt_from_vkfmt(ret_info[i].format) == s->frames->sw_format) { + ctx->pic_format = ret_info[i].format; + break; + } + } + + av_free(ret_info); + + if (i == nb_out_fmts) { + av_log(avctx, AV_LOG_ERROR, "Pixel format %s of input frames not supported!\n", + av_get_pix_fmt_name(s->frames->sw_format)); + return AVERROR(EINVAL); + } + + session_create.pVideoProfile = &ctx->profile; + session_create.flags = 0x0; + session_create.queueFamilyIndex = s->hwctx->queue_family_encode_index; + session_create.maxCodedExtent = ctx->common.caps.maxCodedExtent; + session_create.maxDpbSlots = ctx->common.caps.maxDpbSlots; + session_create.maxActiveReferencePictures = ctx->common.caps.maxActiveReferencePictures; + session_create.pictureFormat = ctx->pic_format; + session_create.referencePictureFormat = session_create.pictureFormat; + session_create.pStdHeaderVersion = &ff_vk_enc_ext[avctx->codec_id]; + + err = ff_vk_video_common_init(avctx, s, &ctx->common, &session_create); + if (err < 0) + return err; + + /* Get sampler */ + av_chroma_location_enum_to_pos(&cxpos, &cypos, avctx->chroma_sample_location); + yuv_sampler_info.xChromaOffset = cxpos >> 7; + yuv_sampler_info.yChromaOffset = cypos >> 7; + yuv_sampler_info.format = ctx->pic_format; + ret = vk->CreateSamplerYcbcrConversion(s->hwctx->act_dev, &yuv_sampler_info, + s->hwctx->alloc, &ctx->yuv_sampler); + if (ret != VK_SUCCESS) + return AVERROR_EXTERNAL; + + ctx->dpb_hwfc_ref = av_hwframe_ctx_alloc(s->frames->device_ref); + if (!ctx->dpb_hwfc_ref) + return AVERROR(ENOMEM); + + dpb_frames = (AVHWFramesContext *)ctx->dpb_hwfc_ref->data; + dpb_frames->format = s->frames->format; + dpb_frames->sw_format = s->frames->sw_format; + dpb_frames->width = s->frames->width; + dpb_frames->height = s->frames->height; + + dpb_hwfc = dpb_frames->hwctx; + dpb_hwfc->create_pnext = &ctx->profile_list; + dpb_hwfc->tiling = VK_IMAGE_TILING_OPTIMAL; + dpb_hwfc->format[0] = ctx->pic_format; + dpb_hwfc->usage = VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR | + VK_IMAGE_USAGE_SAMPLED_BIT; /* Shuts validator up. */ + + if (ctx->layered_dpb) { + ctx->dpb_layers = ctx->common.caps.maxDpbSlots; + dpb_hwfc->nb_layers = ctx->dpb_layers; + } + + err = av_hwframe_ctx_init(ctx->dpb_hwfc_ref); + if (err < 0) + return err; + + if (dpb_hwfc->nb_layers) { + ctx->dpb_layer_taken = av_mallocz(ctx->dpb_layers*sizeof(*ctx->dpb_layer_taken)); + if (!ctx->dpb_layer_taken) + return AVERROR(ENOMEM); + } + + if (ctx->layered_dpb) { + ctx->layered_frame = vk_get_dpb_pool(ctx); + if (!ctx->layered_frame) + return AVERROR(ENOMEM); + } + + ctx->pic = av_mallocz(3*sizeof(*ctx->pic)); + if (!ctx->pic) + return AVERROR(ENOMEM); + + return 0; +} + +static void vkctx_frame_free(FFVulkanEncodeContext *ctx, + FFVulkanEncodePicture *pic) +{ + FFVulkanFunctions *vk = &ctx->s.vkfn; + + if (pic->view) + vk->DestroyImageView(ctx->s.hwctx->act_dev, pic->view, + ctx->s.hwctx->alloc); + + /* TODO: keep these */ + if (pic->dpb_view) + vk->DestroyImageView(ctx->s.hwctx->act_dev, pic->dpb_view, + ctx->s.hwctx->alloc); + + av_frame_free(&pic->dpb_frame); + av_buffer_unref(&pic->pkt_buf); + av_freep(&pic->priv_data); + + ctx->dpb_layer_taken[pic->dpb_layer] = 0; +} + +static int vkctx_frame_init(FFVulkanEncodeContext *ctx, + FFVulkanEncodePicture *pic, AVFrame *src) +{ + int err; + AVVkFrame *vkf; + + if (!src) { + ctx->end_of_stream = 1; + + /* Fix timestamps if we hit end-of-stream before the initial decode + * delay has elapsed. */ + if (ctx->input_order < ctx->decode_delay) + ctx->dts_pts_diff = ctx->pic_end->pts - ctx->first_pts; + + return AVERROR_EOF; + } + + vkf = (AVVkFrame *)src->buf[0]->data; + + /* Create image view for the input frame */ + err = ff_vk_encode_create_view(ctx, &pic->view, &pic->aspect, vkf, 0); + if (err < 0) + goto fail; + + /* Create private data for input frame TODO remove this */ + pic->priv_data = av_mallocz(ctx->enc->pic_priv_data_size); + if (!pic->priv_data) { + err = AVERROR(ENOMEM); + goto fail; + } + + /* Allocate a DPB buffer */ + if (!pic->dpb_frame) { + int layer_id = 0; + if (ctx->layered_dpb) { + for (int layer_id = 0; layer_id < ctx->dpb_layers; layer_id++) + if (!ctx->dpb_layer_taken[layer_id]) + break; + + /* No free DPB slots */ + if (layer_id == ctx->dpb_layers) + return AVERROR(EAGAIN); + + pic->dpb_frame = av_frame_clone(ctx->layered_frame); + } else { + pic->dpb_frame = vk_get_dpb_pool(ctx); + } + + if (!pic->dpb_frame) { + err = AVERROR(ENOMEM); + goto fail; + } + + /* Create image view for the DPB */ + err = ff_vk_encode_create_view(ctx, &pic->dpb_view, &pic->dpb_aspect, + (AVVkFrame *)pic->dpb_frame->data[0], layer_id); + if (err < 0) + goto fail; + + ctx->dpb_layer_taken[layer_id] = 1; + pic->dpb_layer = layer_id; + } + + if (ctx->input_order == 0 || src->pict_type == AV_PICTURE_TYPE_I) + pic->force_idr = 1; + + pic->pts = src->pts; + pic->duration = src->duration; + pic->time_base = src->time_base; + + if (ctx->input_order == 0) + ctx->first_pts = pic->pts; + if (ctx->input_order == ctx->decode_delay) + ctx->dts_pts_diff = pic->pts - ctx->first_pts; + if (ctx->output_delay > 0) + ctx->dts_ring[ctx->input_order % + (3 * ctx->output_delay + ctx->opts.async_depth)] = pic->pts; + + pic->display_order = ctx->input_order; + ctx->input_order++; + + if (ctx->pic_start) { + ctx->pic_end->next = pic; + ctx->pic_end = pic; + } else { + ctx->pic_start = pic; + ctx->pic_end = pic; + } + + return 0; + +fail: + vkctx_frame_free(ctx, pic); + return err; +} + +static int vulkan_encode_issue(AVCodecContext *avctx, + FFVulkanEncodeContext *ctx, + FFVulkanEncodePicture *pic, + AVFrame *src) +{ + int err, max_pkt_size; + const size_t size_align = ctx->common.caps.minBitstreamBufferSizeAlignment; + const int layered_dpb = ctx->layered_dpb; + + FFVulkanFunctions *vk = &ctx->s.vkfn; + AVVkFrame *vkf = (AVVkFrame *)src->buf[0]->data; + + FFVkVideoBuffer *sd_buf; + + FFVkExecContext *exec; + VkCommandBuffer cmd_buf; + VkImageMemoryBarrier2 img_bar[37]; + int nb_img_bar = 0; + + /* Coding start/end */ + VkVideoBeginCodingInfoKHR encode_start = { + .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR, + .videoSession = ctx->common.session, + .videoSessionParameters = ctx->session_params, + .referenceSlotCount = 0, + .pReferenceSlots = NULL, + }; + VkVideoEndCodingInfoKHR encode_end = { + .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR, + }; + + VkVideoEncodeRateControlLayerInfoKHR rc_layer; + VkVideoEncodeRateControlInfoKHR rc_info; + VkVideoCodingControlInfoKHR encode_ctrl; + VkVideoPictureResourceInfoKHR dpb_pic; + VkVideoReferenceSlotInfoKHR dpb_slot; + VkVideoPictureResourceInfoKHR ref_pic[37]; + VkVideoReferenceSlotInfoKHR ref_slot[37]; + VkVideoEncodeInfoKHR encode_info; + + /* Initialize all codec-specific headers */ + err = ctx->enc->init_pic_headers(avctx, pic); + if (err < 0) + return err; + + /* Create packet data buffer, TODO make this smarter */ + max_pkt_size = (src->width * src->height)*2; + + err = ff_vk_video_get_buffer(&ctx->s, &ctx->common, &pic->pkt_buf, + VK_BUFFER_USAGE_VIDEO_ENCODE_DST_BIT_KHR, + &ctx->profile_list, max_pkt_size); + if (err < 0) + goto fail; + + sd_buf = (FFVkVideoBuffer *)pic->pkt_buf->data; + + /* Rate control */ + rc_layer = (VkVideoEncodeRateControlLayerInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_RATE_CONTROL_LAYER_INFO_KHR, + .pNext = pic->codec_rc_layer, + }; + rc_info = (VkVideoEncodeRateControlInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_RATE_CONTROL_INFO_KHR, + .pNext = NULL, + .rateControlMode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR, + .layerCount = 1, /* Required to be >= 1 */ + .pLayers = &rc_layer, + }; + encode_ctrl = (VkVideoCodingControlInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_CODING_CONTROL_INFO_KHR, + .pNext = &rc_info, + .flags = VK_VIDEO_CODING_CONTROL_ENCODE_RATE_CONTROL_BIT_KHR, + }; + + /* Current picture */ + dpb_pic = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .pNext = NULL, + .codedOffset = { 0 }, + .codedExtent = (VkExtent2D){ src->width, src->height }, + .baseArrayLayer = 0, + .imageViewBinding = pic->dpb_view, + }; + dpb_slot = (VkVideoReferenceSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR, + .pNext = NULL, + .slotIndex = pic->slot, + .pPictureResource = &dpb_pic, + }; + + /* References for current picture */ + for (int i = 0; i < pic->nb_refs; i++) { + FFVulkanEncodePicture *ref = pic->refs[i]; + + ref_pic[i] = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .pNext = NULL, + .codedOffset = { 0 }, + .codedExtent = (VkExtent2D){ src->width, src->height }, + .baseArrayLayer = 0, + .imageViewBinding = ref->dpb_view, + }; + ref_slot[i] = (VkVideoReferenceSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR, + .pNext = NULL, + .slotIndex = ref->slot, + .pPictureResource = &ref_pic[i], + }; + } + + encode_info = (VkVideoEncodeInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_INFO_KHR, + .pNext = pic->codec_info, + .flags = 0x0, + .qualityLevel = 0, + .srcPictureResource = (VkVideoPictureResourceInfoKHR) { // SPEC: this should be separate + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .pNext = NULL, + .codedOffset = 0, + .codedExtent = (VkExtent2D){ src->width, src->height }, + .baseArrayLayer = 0, + .imageViewBinding = pic->view, + }, + .pSetupReferenceSlot = &dpb_slot, /* pic->is_reference ? &ref_slot : NULL */ + .referenceSlotCount = pic->nb_refs, + .pReferenceSlots = ref_slot, + .dstBuffer = sd_buf->buf.buf, + .dstBufferOffset = 0, + .dstBufferRange = sd_buf->buf.size, + .precedingExternallyEncodedBytes = 0, + }; + + /* Reset encoder for the very first frame and on every keyframe */ + if (pic->display_order == 0 || pic->type == FF_VK_FRAME_KEY) + encode_ctrl.flags |= VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR; + + /* Write header */ + if (pic->type == FF_VK_FRAME_KEY && ctx->enc->write_stream_headers) { + uint8_t *hdr_dst = sd_buf->mem + encode_info.dstBufferOffset; + size_t data_size = encode_info.dstBufferRange; + err = ctx->enc->write_stream_headers(avctx, hdr_dst, &data_size); + if (err < 0) + goto fail; + encode_info.dstBufferOffset += data_size; + encode_info.dstBufferRange -= data_size; + } + + /* Write extra units */ + if (ctx->enc->write_extra_headers) { + uint8_t *hdr_dst = sd_buf->mem + encode_info.dstBufferOffset; + size_t data_size = encode_info.dstBufferRange; + err = ctx->enc->write_extra_headers(avctx, pic, hdr_dst, &data_size); + if (err < 0) + goto fail; + encode_info.dstBufferOffset += data_size; + encode_info.dstBufferRange -= data_size; + } + + /* Align buffer offset to the required value with filler units */ + if (ctx->enc->write_filler) { + uint8_t *hdr_dst = sd_buf->mem + encode_info.dstBufferOffset; + size_t data_size = encode_info.dstBufferRange; + + uint32_t offset = encode_info.dstBufferOffset; + size_t offset_align = ctx->common.caps.minBitstreamBufferOffsetAlignment; + + uint32_t filler_data = FFALIGN(offset, offset_align) - offset; + + if (filler_data) { + while (filler_data < ctx->enc->filler_header_size) + filler_data += offset_align; + + filler_data -= ctx->enc->filler_header_size; + + err = ctx->enc->write_filler(avctx, filler_data, + hdr_dst, &data_size); + if (err < 0) + goto fail; + encode_info.dstBufferOffset += data_size; + encode_info.dstBufferRange -= data_size; + } + } + + pic->pkt_buf_offset = encode_info.dstBufferOffset; + + /* Align buffer size to the nearest lower alignment requirement. */ + encode_info.dstBufferRange -= size_align; + encode_info.dstBufferRange = FFALIGN(encode_info.dstBufferRange, + size_align); + + /* Start command buffer recording */ + exec = ff_vk_exec_get(&ctx->enc_pool); + ff_vk_exec_start(&ctx->s, exec); + cmd_buf = exec->buf; + + /* Output packet buffer */ + err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &pic->pkt_buf, 1, 1); + if (err < 0) + goto fail; + + /* Source image - change encode to compute once we have analysis */ + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, src, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR); + if (err < 0) + goto fail; + + /* Source image layout conversion */ + img_bar[nb_img_bar] = (VkImageMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .pNext = NULL, + .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + .srcAccessMask = vkf->access[0], + .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR, + .dstAccessMask = VK_ACCESS_2_VIDEO_ENCODE_READ_BIT_KHR, + .oldLayout = vkf->layout[0], + .newLayout = VK_IMAGE_LAYOUT_VIDEO_ENCODE_SRC_KHR, + .srcQueueFamilyIndex = vkf->queue_family[0], + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = vkf->img[0], + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = pic->aspect, + .layerCount = 1, + .levelCount = 1, + }, + }; + ff_vk_exec_update_frame(&ctx->s, exec, src, + &img_bar[nb_img_bar], &nb_img_bar); + + /* Source image's DPB */ + if (encode_info.pSetupReferenceSlot) { + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, pic->dpb_frame, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR); + if (err < 0) + return err; + } + + /* Reference frame DPBs */ + if (!layered_dpb) { + for (int i = 0; i < pic->nb_refs; i++) { + FFVulkanEncodePicture *ref = pic->refs[i]; + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ref->dpb_frame, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR); + if (err < 0) + return err; + } + } + + /* Change image layout */ + vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + /* Start, use parameters */ + vk->CmdBeginVideoCodingKHR(cmd_buf, &encode_start); + + /* Send control data */ + vk->CmdControlVideoCodingKHR(cmd_buf, &encode_ctrl); + + /* encode and fetch the status. + * SPEC: cannot insert AUD units between slices! */ + vk->CmdBeginQuery(cmd_buf, ctx->enc_pool.query_pool, exec->query_idx + 0, 0); + vk->CmdEncodeVideoKHR(cmd_buf, &encode_info); + vk->CmdEndQuery(cmd_buf, ctx->enc_pool.query_pool, exec->query_idx + 0); + + /* End encoding */ + vk->CmdEndVideoCodingKHR(cmd_buf, &encode_end); + + /* End recording and submit for execution */ + ff_vk_exec_submit(&ctx->s, exec); + + pic->encode_issued = 1; + pic->exec = exec; + + return 0; + +fail: + vkctx_frame_free(ctx, pic); + return err; +} + +static int vulkan_encode_output(AVCodecContext *avctx, + FFVulkanEncodeContext *ctx, + AVPacket *pkt, + FFVulkanEncodePicture *pic) +{ + int err; + VkResult ret; + int64_t qstatus = 0; + uint32_t pkt_size, *query_data; + FFVulkanFunctions *vk = &ctx->s.vkfn; + FFVkVideoBuffer *sd_buf = (FFVkVideoBuffer *)pic->pkt_buf->data; + + /* TODO: replace this with a semaphore wait, maybe? */ + ff_vk_exec_wait(&ctx->s, pic->exec); + + /* Get status */ + ret = ff_vk_exec_get_query(&ctx->s, pic->exec, (void **)&query_data, &qstatus); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Error querying results from encoder: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + // SPEC: signal if there hasn't been enough buffer with a unique return code */ + + if (qstatus < 0) { + av_log(ctx, AV_LOG_ERROR, "Error while encoding: %li!\n", qstatus); + return AVERROR(EINVAL); + } + + // SPEC: why? + query_data[0] += pic->pkt_buf_offset; + + pkt_size = query_data[0] /* Buffer offset */ + + query_data[1] /* Data written */; + + av_log(ctx, AV_LOG_VERBOSE, "Received a packet, %u bytes large (%u off, %u data), " + "status = %i\n", pkt_size, query_data[0], query_data[1], err); + + if (!(sd_buf->buf.flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { + VkMappedMemoryRange invalidate_buf = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = sd_buf->buf.mem, + .offset = query_data[0], /* Should be already aligned */ + .size = FFALIGN(query_data[1], + ctx->s.props.properties.limits.nonCoherentAtomSize), + }; + + vk->FlushMappedMemoryRanges(ctx->s.hwctx->act_dev, 1, &invalidate_buf); + } + + /* Transfer data buffer ref */ + pkt->pts = pkt->dts = pic->pts; +// pkt->duration = pic->duration; + pkt->time_base = pic->time_base; + pkt->buf = pic->pkt_buf; + pkt->data = sd_buf->mem; + pkt->size = pkt_size; + pkt->flags = ( pic->type == FF_VK_FRAME_KEY ? AV_PKT_FLAG_KEY : 0x0) | + (!pic->is_reference ? AV_PKT_FLAG_DISPOSABLE : 0x0); + + if (ctx->output_delay == 0) { + pkt->dts = pkt->pts; + } else if (pic->encode_order < ctx->decode_delay) { + if (ctx->dts_ring[pic->encode_order] < INT64_MIN + ctx->dts_pts_diff) + pkt->dts = INT64_MIN; + else + pkt->dts = ctx->dts_ring[pic->encode_order] - ctx->dts_pts_diff; + } else { + pkt->dts = ctx->dts_ring[(pic->encode_order - ctx->decode_delay) % + (3 * ctx->output_delay + ctx->opts.async_depth)]; + } + + pic->encode_complete = 1; + pic->encode_size = pkt_size; + pic->pkt_buf = NULL; + + return 0; +} + +int ff_vulkan_encode_receive_packet(AVCodecContext *avctx, FFVulkanEncodeContext *ctx, + AVPacket *pkt) +{ + int err; + AVFrame *frame; + FFVulkanEncodePicture *cur_pic = &ctx->pic[avctx->frame_num % ctx->gop_size]; + int ft = !(avctx->frame_num % ctx->gop_size) ? FF_VK_FRAME_KEY : FF_VK_FRAME_P; + + { + /* TODO: remove this per-frame alloc */ + frame = av_frame_alloc(); + if (!frame) + return AVERROR(ENOMEM); + + err = ff_encode_get_frame(avctx, frame); + if (err < 0 && err != AVERROR_EOF) { + av_frame_free(&frame); + return err; + } + + if (err == AVERROR_EOF) { + av_frame_free(&frame); + return err; + } + + if (ft == FF_VK_FRAME_P) + cur_pic->prev = &ctx->pic[(avctx->frame_num % ctx->gop_size) - 1]; + + err = vkctx_frame_init(ctx, cur_pic, frame); + if (err < 0) { + av_frame_free(&frame); + return err; + } + + cur_pic->refs[0] = !(avctx->frame_num % ctx->gop_size) ? NULL : &ctx->pic[0]; + cur_pic->slot = avctx->frame_num % ctx->gop_size; + cur_pic->nb_refs = !!(avctx->frame_num % ctx->gop_size); + cur_pic->is_reference = ft == FF_VK_FRAME_KEY; + cur_pic->type = ft; + cur_pic->qp = 30; + cur_pic->encode_order = ctx->encode_order++; + + err = vulkan_encode_issue(avctx, ctx, cur_pic, frame); + av_frame_free(&frame); + if (err < 0) + return err; + } + + err = vulkan_encode_output(avctx, ctx, pkt, cur_pic); + if (err < 0) + return err; + + if (!((avctx->frame_num + 1) % ctx->gop_size)) { + vkctx_frame_free(ctx, &ctx->pic[0]); + vkctx_frame_free(ctx, &ctx->pic[1]); + vkctx_frame_free(ctx, &ctx->pic[2]); + } + + return pkt->size; +} diff --git a/libavcodec/vulkan_encode.h b/libavcodec/vulkan_encode.h new file mode 100644 index 0000000000000..1a98ae4680c88 --- /dev/null +++ b/libavcodec/vulkan_encode.h @@ -0,0 +1,258 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VULKAN_ENCODE_H +#define AVCODEC_VULKAN_ENCODE_H + +#include "encode.h" +#include "hwconfig.h" + +#include "vulkan_video.h" + +#define MAX_REORDER_DELAY 16 +#define MAX_ASYNC_DEPTH 64 +#define MAX_PICTURE_REFERENCES 2 + +enum FFVkFrameType { + FF_VK_FRAME_KEY, /* IDR in mpeg-ese */ + FF_VK_FRAME_I, /* mpeg-only */ + + FF_VK_FRAME_P, + FF_VK_FRAME_B, /* mpeg-only */ + + FF_VK_FRAME_S, /* av1-only */ +}; + +typedef struct FFVkEncodeCommonOptions { + int profile; + int async_depth; + VkVideoEncodeUsageFlagBitsKHR usage; + VkVideoEncodeContentFlagBitsKHR content; + VkVideoEncodeTuningModeKHR tune; +} FFVkEncodeCommonOptions; + +typedef struct FFVulkanEncodePicture { + int qp; + enum FFVkFrameType type; + int64_t display_order; + int64_t encode_order; + + int slot; + int64_t pts; + int64_t duration; + AVRational time_base; + int64_t reordered_opaque; + + FFVkExecContext *exec; + int encode_issued; + int encode_complete; + size_t encode_size; + + int force_idr; + int b_depth; + + int is_reference; /* Set if picture is used as a ref */ + + void *priv_data; + + void *codec_info; + void *codec_rc_layer; + + /* Input frame */ + VkImageView view; + VkImageAspectFlags aspect; + + /* DPB */ + AVFrame *dpb_frame; + VkImageView dpb_view; + VkImageAspectFlags dpb_aspect; + int dpb_layer; + + /* Packet buffer - contains an FFVkVideoBuffer struct */ + size_t pkt_buf_offset; // SPEC: MAKE UP YOUR MIND JAMMIT + AVBufferRef *pkt_buf; + + /* The previous reference picture in encode order. Must be in at least + * one of the reference list and DPB list. */ + struct FFVulkanEncodePicture *prev; + struct FFVulkanEncodePicture *next; + + /* The contents of the DPB after this picture has been decoded. + * This will contain the picture itself if it is a reference picture, + * but not if it isn't. */ + struct FFVulkanEncodePicture *dpb[16]; + int nb_dpb_pics; + + /* The reference pictures used in decoding this picture. If they are + * used by later pictures they will also appear in the DPB. */ + struct FFVulkanEncodePicture *refs[MAX_PICTURE_REFERENCES]; + int nb_refs; + + /* Reference count for other pictures referring to this one through + * the above pointers, directly from incomplete pictures and indirectly + * through completed pictures. */ + int ref_count[2]; + int ref_removed[2]; +} FFVulkanEncodePicture; + +/** + * Callback for writing stream-level headers. + */ +typedef int (*vkenc_cb_write_stream_headers)(AVCodecContext *avctx, + uint8_t *data, size_t *data_len); + +/** + * Callback for initializing codec-specific picture headers. + */ +typedef int (*vkenc_cb_init_pic_headers)(AVCodecContext *avctx, + FFVulkanEncodePicture *pic); + +/** + * Callback for writing alignment data. + * Align is the value to align offset to. + */ +typedef int (*vkenc_cb_write_filler)(AVCodecContext *avctx, uint32_t filler, + uint8_t *data, size_t *data_len); + +/** + * Callback for writing any extra units requested. data_len must be set + * to the available size, and its value will be overwritten by the #bytes written + * to the output buffer. + */ +typedef int (*vkenc_cb_write_extra_headers)(AVCodecContext *avctx, + FFVulkanEncodePicture *pic, + uint8_t *data, size_t *data_len); + +typedef struct FFVulkanEncoder { + size_t pic_priv_data_size; + uint32_t filler_header_size; + + vkenc_cb_write_stream_headers write_stream_headers; + vkenc_cb_init_pic_headers init_pic_headers; + vkenc_cb_write_filler write_filler; + vkenc_cb_write_extra_headers write_extra_headers; +} FFVulkanEncoder; + +typedef struct FFVulkanEncodeContext { + FFVulkanContext s; + FFVkVideoCommon common; + const FFVulkanEncoder *enc; + + int64_t input_order; + int64_t encode_order; + + FFVulkanEncodePicture *pic; + + int gop_size; + int64_t bitrate; + + int64_t first_pts; + int output_delay; + int decode_delay; + int end_of_stream; + int64_t dts_pts_diff; + + int64_t dts_ring[MAX_REORDER_DELAY * 3 + MAX_ASYNC_DEPTH]; + + /* Current encoding window, in display (input) order. */ + FFVulkanEncodePicture *pic_start, *pic_end; + /* The next picture to use as the previous reference picture in + * encoding order. */ + FFVulkanEncodePicture *next_prev; + + int frame_num; + + /* DPB */ + AVBufferRef *dpb_hwfc_ref; + AVFrame *layered_frame; + int layered_dpb; + int *dpb_layer_taken; + int dpb_layers; + + VkVideoSessionParametersKHR session_params; + + VkSamplerYcbcrConversion yuv_sampler; + VkFormat pic_format; + + FFVkEncodeCommonOptions opts; + + VkVideoProfileInfoKHR profile; + VkVideoProfileListInfoKHR profile_list; + VkVideoEncodeCapabilitiesKHR enc_caps; + VkVideoEncodeUsageInfoKHR usage_info; + + FFVkQueueFamilyCtx qf_enc; + FFVkExecPool enc_pool; +} FFVulkanEncodeContext; + +#define FF_VK_ENCODE_COMMON_OPTS \ + { "tune", "Select tuning type", OFFSET(vkenc.opts.tune), AV_OPT_TYPE_INT, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_DEFAULT_KHR }, 0, INT_MAX, FLAGS, "tune" }, \ + { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_DEFAULT_KHR }, INT_MIN, INT_MAX, FLAGS, "tune" }, \ + { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_HIGH_QUALITY_KHR }, INT_MIN, INT_MAX, FLAGS, "tune" }, \ + { "ll", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_LOW_LATENCY_KHR }, INT_MIN, INT_MAX, FLAGS, "tune" }, \ + { "ull", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_ULTRA_LOW_LATENCY_KHR }, INT_MIN, INT_MAX, FLAGS, "tune" }, \ + { "lossless", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_LOSSLESS_KHR }, INT_MIN, INT_MAX, FLAGS, "tune" }, \ + { "usage", "Select usage type", OFFSET(vkenc.opts.usage), AV_OPT_TYPE_FLAGS, { .i64 = VK_VIDEO_DECODE_USAGE_DEFAULT_KHR }, 0, INT_MAX, FLAGS, "usage" }, \ + { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_DECODE_USAGE_DEFAULT_KHR }, INT_MIN, INT_MAX, FLAGS, "usage" }, \ + { "transcode", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_TRANSCODING_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "usage" }, \ + { "stream", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_STREAMING_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "usage" }, \ + { "record", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_RECORDING_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "usage" }, \ + { "conference", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_CONFERENCING_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "usage" }, \ + { "content", "Select content type", OFFSET(vkenc.opts.content), AV_OPT_TYPE_FLAGS, { .i64 = VK_VIDEO_ENCODE_CONTENT_DEFAULT_KHR }, 0, INT_MAX, FLAGS, "content" }, \ + { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_CONTENT_DEFAULT_KHR }, INT_MIN, INT_MAX, FLAGS, "content" }, \ + { "camera", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_CONTENT_CAMERA_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "content" }, \ + { "desktop", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_CONTENT_DESKTOP_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "content" }, \ + { "rendered", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_CONTENT_RENDERED_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "content" }, \ + { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(vkenc.opts.async_depth), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, MAX_ASYNC_DEPTH, FLAGS }, \ + +/** + * Paperwork. + */ +extern const AVCodecHWConfigInternal *const ff_vulkan_encode_hw_configs[]; + +/** + * Extension name and version. + */ +extern const VkExtensionProperties ff_vk_enc_ext[AV_CODEC_ID_FIRST_AUDIO]; + +/** + * Create image view for a frame. + */ +int ff_vk_encode_create_view(FFVulkanEncodeContext *ctx, VkImageView *dst_view, + VkImageAspectFlags *aspect, AVVkFrame *src, int layer); + +/** + * Initialize encoder. + */ +int ff_vulkan_encode_init(AVCodecContext *avctx, FFVulkanEncodeContext *ctx, + void *codec_profile, void *caps, + const FFVulkanEncoder *enc, + int output_delay, int decode_delay); + +/** + * Uninitialize encoder. + */ +void ff_vulkan_encode_uninit(FFVulkanEncodeContext *ctx); + +/** + * Encode. + */ +int ff_vulkan_encode_receive_packet(AVCodecContext *avctx, FFVulkanEncodeContext *ctx, + AVPacket *pkt); + +#endif /* AVCODEC_VULKAN_ENCODE_H */ From c10b73dda915402d2a81e02c64b1e88655219450 Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 11 Jan 2023 09:11:23 +0100 Subject: [PATCH 87/98] lavc: add h264_vulkan encoder --- configure | 1 + libavcodec/Makefile | 2 + libavcodec/allcodecs.c | 1 + libavcodec/vulkan_encode_h264.c | 1124 +++++++++++++++++++++++++++++++ 4 files changed, 1128 insertions(+) create mode 100644 libavcodec/vulkan_encode_h264.c diff --git a/configure b/configure index 212254148748d..e9ce71ec285c6 100755 --- a/configure +++ b/configure @@ -3193,6 +3193,7 @@ h264_qsv_encoder_select="atsc_a53 qsvenc" h264_rkmpp_decoder_deps="rkmpp" h264_rkmpp_decoder_select="h264_mp4toannexb_bsf" h264_vaapi_encoder_select="atsc_a53 cbs_h264 vaapi_encode" +h264_vulkan_encoder_select="cbs_h264 vulkan_encode" h264_v4l2m2m_decoder_deps="v4l2_m2m h264_v4l2_m2m" h264_v4l2m2m_decoder_select="h264_mp4toannexb_bsf" h264_v4l2m2m_encoder_deps="v4l2_m2m h264_v4l2_m2m" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 4b3587e9703de..5664f692beb3a 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -420,6 +420,8 @@ OBJS-$(CONFIG_H264_VAAPI_ENCODER) += vaapi_encode_h264.o h264_levels.o \ OBJS-$(CONFIG_H264_VIDEOTOOLBOX_ENCODER) += videotoolboxenc.o OBJS-$(CONFIG_H264_V4L2M2M_DECODER) += v4l2_m2m_dec.o OBJS-$(CONFIG_H264_V4L2M2M_ENCODER) += v4l2_m2m_enc.o +OBJS-$(CONFIG_H264_VULKAN_ENCODER) += vulkan_encode_h264.o h264_levels.o \ + h2645data.o OBJS-$(CONFIG_HAP_DECODER) += hapdec.o hap.o OBJS-$(CONFIG_HAP_ENCODER) += hapenc.o hap.o OBJS-$(CONFIG_HCA_DECODER) += hcadec.o diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index 184bb8521f043..84203027fb2a6 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -852,6 +852,7 @@ extern const FFCodec ff_h264_qsv_encoder; extern const FFCodec ff_h264_v4l2m2m_encoder; extern const FFCodec ff_h264_vaapi_encoder; extern const FFCodec ff_h264_videotoolbox_encoder; +extern const FFCodec ff_h264_vulkan_encoder; extern const FFCodec ff_hevc_amf_encoder; extern const FFCodec ff_hevc_cuvid_decoder; extern const FFCodec ff_hevc_mediacodec_decoder; diff --git a/libavcodec/vulkan_encode_h264.c b/libavcodec/vulkan_encode_h264.c new file mode 100644 index 0000000000000..d5a44e14b5ff8 --- /dev/null +++ b/libavcodec/vulkan_encode_h264.c @@ -0,0 +1,1124 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/opt.h" + +#include "cbs.h" +#include "cbs_h264.h" +#include "h264_levels.h" +#include "h2645data.h" +#include "codec_internal.h" +#include "version.h" + +#include "vulkan_encode.h" + +enum UnitElems { + UNIT_AUD = 1 << 0, + UNIT_TIMING = 1 << 1, + UNIT_IDENTIFIER = 1 << 2, + UNIT_RECOVERY = 1 << 3, +}; + +/* Random (version 4) ISO 11578 UUID. */ +static const uint8_t vulkan_encode_h264_sei_identifier_uuid[16] = { + 0x03, 0xfd, 0xf2, 0x0a, 0x5d, 0x4c, 0x05, 0x48, + 0x20, 0x98, 0xca, 0x6b, 0x0c, 0x95, 0x30, 0x1c, +}; + +typedef struct VulkanEncodeH264Context { + FFVulkanEncodeContext vkenc; + VkVideoEncodeH264ProfileInfoEXT profile; + VkVideoEncodeH264CapabilitiesEXT caps; + + int bit_rate; + int output_delay; + int decode_delay; + int gop_size; + int b_per_p; + int dpb_frames; + int max_b_depth; + int hrd_initial_buffer_fullness; + int hrd_buffer_size; + + int mb_width; + int mb_height; + + /* Options */ + enum UnitElems insert_units; + int coder; + int desired_b_depth; + + /* State */ + enum UnitElems write_units; + + /* SPS structs */ + H264RawSPS raw_sps; + StdVideoH264ScalingLists vksps_scaling; + StdVideoH264HrdParameters vksps_vui_header; + StdVideoH264SequenceParameterSetVui vksps_vui; + StdVideoH264SequenceParameterSet vksps; + + /* PPS structs */ + H264RawPPS raw_pps; + StdVideoH264ScalingLists vkpps_scaling; + StdVideoH264PictureParameterSet vkpps; + + /* Structs needed for CBC */ + H264RawAUD raw_aud; + + CodedBitstreamContext *cbc; + CodedBitstreamFragment current_access_unit; + SEIRawUserDataUnregistered sei_identifier; + H264RawSEIBufferingPeriod sei_buffering_period; + H264RawSEIPicTiming sei_pic_timing; + H264RawSEIRecoveryPoint sei_recovery_point; + char *sei_identifier_string; +} VulkanEncodeH264Context; + +typedef struct VulkanEncodeH264Picture { + uint64_t frame_num; + int64_t last_idr_frame; + uint16_t idr_pic_id; + uint16_t pic_order_cnt; + + StdVideoEncodeH264WeightTable slice_wt; + StdVideoEncodeH264SliceHeader slice_hdr; + VkVideoEncodeH264NaluSliceInfoEXT vkslice; + StdVideoEncodeH264PictureInfo h264pic_info; + VkVideoEncodeH264VclFrameInfoEXT vkh264pic_info; + VkVideoEncodeH264RateControlLayerInfoEXT vkrc_layer_info; + + StdVideoEncodeH264ReferenceListsInfo final_list; + VkVideoEncodeH264DpbSlotInfoEXT l0refs[37]; + VkVideoEncodeH264DpbSlotInfoEXT l1refs[37]; + StdVideoEncodeH264ReferenceInfo l0ref_info[37]; + StdVideoEncodeH264ReferenceInfo l1ref_info[37]; + + StdVideoEncodeH264ReferenceListsInfo ref_list_info; + StdVideoEncodeH264RefListModEntry l0mods[37]; + StdVideoEncodeH264RefListModEntry l1mods[37]; + StdVideoEncodeH264RefPicMarkingEntry marks[37]; +} VulkanEncodeH264Picture; + +static av_cold int vulkan_encode_h264_init_seq_params(AVCodecContext *avctx) +{ + VulkanEncodeH264Context *enc = avctx->priv_data; + + H264RawSPS *sps = &enc->raw_sps; + H264RawHRD *hrd = &sps->vui.nal_hrd_parameters; + StdVideoH264ScalingLists *vksps_scaling = &enc->vksps_scaling; + StdVideoH264HrdParameters *vksps_vui_header = &enc->vksps_vui_header; + StdVideoH264SequenceParameterSetVui *vksps_vui = &enc->vksps_vui; + StdVideoH264SequenceParameterSet *vksps = &enc->vksps; + + H264RawPPS *pps = &enc->raw_pps; + StdVideoH264ScalingLists *vkpps_scaling = &enc->vkpps_scaling; + StdVideoH264PictureParameterSet *vkpps = &enc->vkpps; + + memset(sps, 0, sizeof(*sps)); + memset(pps, 0, sizeof(*pps)); + + sps->nal_unit_header.nal_ref_idc = 3; + sps->nal_unit_header.nal_unit_type = H264_NAL_SPS; + + sps->profile_idc = enc->vkenc.opts.profile & 0xff; + + if (sps->profile_idc == FF_PROFILE_H264_MAIN) + sps->constraint_set1_flag = 1; + + if (sps->profile_idc == FF_PROFILE_H264_HIGH) + sps->constraint_set3_flag = enc->gop_size == 1; + + if (sps->profile_idc == FF_PROFILE_H264_MAIN || + sps->profile_idc == FF_PROFILE_H264_HIGH) { + sps->constraint_set4_flag = 1; + sps->constraint_set5_flag = enc->b_per_p == 0; + } + + if (avctx->gop_size == 1) + enc->dpb_frames = 0; + else + enc->dpb_frames = 1 + enc->max_b_depth; + + if (avctx->level != FF_LEVEL_UNKNOWN) { + sps->level_idc = avctx->level; + } else { + const H264LevelDescriptor *level; + int framerate; + + if (avctx->framerate.num > 0 && avctx->framerate.den > 0) + framerate = avctx->framerate.num / avctx->framerate.den; + else + framerate = 0; + + level = ff_h264_guess_level(sps->profile_idc, + avctx->bit_rate, + framerate, + enc->mb_width * 16, + enc->mb_height * 16, + enc->dpb_frames); + if (level) { + av_log(avctx, AV_LOG_VERBOSE, "Using level %s.\n", level->name); + if (level->constraint_set3_flag) + sps->constraint_set3_flag = 1; + sps->level_idc = level->level_idc; + } else { + av_log(avctx, AV_LOG_WARNING, "Stream will not conform " + "to any level: using level 6.2.\n"); + sps->level_idc = 62; + } + } + + sps->seq_parameter_set_id = 0; + sps->chroma_format_idc = 1; + + sps->log2_max_frame_num_minus4 = 4; + sps->pic_order_cnt_type = enc->max_b_depth ? 0 : 2; + if (!sps->pic_order_cnt_type) + sps->log2_max_pic_order_cnt_lsb_minus4 = 4; + + sps->max_num_ref_frames = enc->dpb_frames; + + sps->pic_width_in_mbs_minus1 = enc->mb_width - 1; + sps->pic_height_in_map_units_minus1 = enc->mb_height - 1; + + sps->frame_mbs_only_flag = 1; + sps->direct_8x8_inference_flag = 1; + + if (avctx->width != 16 * enc->mb_width || + avctx->height != 16 * enc->mb_height) { + sps->frame_cropping_flag = 1; + + sps->frame_crop_left_offset = 0; + sps->frame_crop_right_offset = (16 * enc->mb_width - avctx->width) / 2; + sps->frame_crop_top_offset = 0; + sps->frame_crop_bottom_offset = (16 * enc->mb_height - avctx->height) / 2; + } else { + sps->frame_cropping_flag = 0; + } + + sps->vui_parameters_present_flag = 1; + + if (avctx->sample_aspect_ratio.num != 0 && + avctx->sample_aspect_ratio.den != 0) { + int num, den, i; + av_reduce(&num, &den, avctx->sample_aspect_ratio.num, + avctx->sample_aspect_ratio.den, 65535); + for (i = 0; i < FF_ARRAY_ELEMS(ff_h2645_pixel_aspect); i++) { + if (num == ff_h2645_pixel_aspect[i].num && + den == ff_h2645_pixel_aspect[i].den) { + sps->vui.aspect_ratio_idc = i; + break; + } + } + if (i >= FF_ARRAY_ELEMS(ff_h2645_pixel_aspect)) { + sps->vui.aspect_ratio_idc = 255; + sps->vui.sar_width = num; + sps->vui.sar_height = den; + } + sps->vui.aspect_ratio_info_present_flag = 1; + } + + /* Unspecified video format, from table E-2. */ + sps->vui.video_format = 5; + sps->vui.video_full_range_flag = avctx->color_range == AVCOL_RANGE_JPEG; + sps->vui.colour_primaries = avctx->color_primaries; + sps->vui.transfer_characteristics = avctx->color_trc; + sps->vui.matrix_coefficients = avctx->colorspace; + if (avctx->color_primaries != AVCOL_PRI_UNSPECIFIED || + avctx->color_trc != AVCOL_TRC_UNSPECIFIED || + avctx->colorspace != AVCOL_SPC_UNSPECIFIED) + sps->vui.colour_description_present_flag = 1; + if (avctx->color_range != AVCOL_RANGE_UNSPECIFIED || + sps->vui.colour_description_present_flag) + sps->vui.video_signal_type_present_flag = 1; + + if (avctx->chroma_sample_location != AVCHROMA_LOC_UNSPECIFIED) { + sps->vui.chroma_loc_info_present_flag = 1; + sps->vui.chroma_sample_loc_type_top_field = + sps->vui.chroma_sample_loc_type_bottom_field = + avctx->chroma_sample_location - 1; + } + + sps->vui.timing_info_present_flag = 1; + if (avctx->framerate.num > 0 && avctx->framerate.den > 0) { + sps->vui.num_units_in_tick = avctx->framerate.den; + sps->vui.time_scale = 2 * avctx->framerate.num; + sps->vui.fixed_frame_rate_flag = 1; + } else { + sps->vui.num_units_in_tick = avctx->time_base.num; + sps->vui.time_scale = 2 * avctx->time_base.den; + sps->vui.fixed_frame_rate_flag = 0; + } + + if (enc->insert_units & UNIT_TIMING) { + H264RawHRD *hrd = &sps->vui.nal_hrd_parameters; + H264RawSEIBufferingPeriod *bp = &enc->sei_buffering_period; + + sps->vui.nal_hrd_parameters_present_flag = 1; + + hrd->cpb_cnt_minus1 = 0; + + /* Try to scale these to a sensible range so that the + * golomb encode of the value is not overlong. */ + hrd->bit_rate_scale = av_clip_uintp2(av_log2(enc->bit_rate) - 15 - 6, 4); + hrd->bit_rate_value_minus1[0] = (enc->bit_rate >> hrd->bit_rate_scale + 6) - 1; + + hrd->cpb_size_scale = av_clip_uintp2(av_log2(enc->hrd_buffer_size) - 15 - 4, 4); + hrd->cpb_size_value_minus1[0] = (enc->hrd_buffer_size >> hrd->cpb_size_scale + 4) - 1; + + /* CBR mode as defined for the HRD cannot be achieved without filler + * data */ + hrd->cbr_flag[0] = 0; + + hrd->initial_cpb_removal_delay_length_minus1 = 23; + hrd->cpb_removal_delay_length_minus1 = 23; + hrd->dpb_output_delay_length_minus1 = 7; + hrd->time_offset_length = 0; + + bp->seq_parameter_set_id = sps->seq_parameter_set_id; + + // This calculation can easily overflow 32 bits. + bp->nal.initial_cpb_removal_delay[0] = 90000 * + (uint64_t)enc->hrd_initial_buffer_fullness / enc->hrd_buffer_size; + bp->nal.initial_cpb_removal_delay_offset[0] = 0; + } else { + sps->vui.nal_hrd_parameters_present_flag = 0; + sps->vui.low_delay_hrd_flag = 1 - sps->vui.fixed_frame_rate_flag; + } + + sps->vui.bitstream_restriction_flag = 1; + sps->vui.motion_vectors_over_pic_boundaries_flag = 1; + sps->vui.log2_max_mv_length_horizontal = 15; + sps->vui.log2_max_mv_length_vertical = 15; + sps->vui.max_num_reorder_frames = enc->max_b_depth; + sps->vui.max_dec_frame_buffering = enc->max_b_depth + 1; + + pps->nal_unit_header.nal_ref_idc = 3; + pps->nal_unit_header.nal_unit_type = H264_NAL_PPS; + + pps->pic_parameter_set_id = 0; + pps->seq_parameter_set_id = 0; + + pps->entropy_coding_mode_flag = + !(sps->profile_idc == FF_PROFILE_H264_BASELINE || + sps->profile_idc == FF_PROFILE_H264_EXTENDED || + sps->profile_idc == FF_PROFILE_H264_CAVLC_444); + if (!enc->coder && pps->entropy_coding_mode_flag) + pps->entropy_coding_mode_flag = 0; + + pps->num_ref_idx_l0_default_active_minus1 = 0; + pps->num_ref_idx_l1_default_active_minus1 = 0; + + pps->pic_init_qp_minus26 = 0; // TODO - fix, I have no idea + + if (sps->profile_idc == FF_PROFILE_H264_BASELINE || + sps->profile_idc == FF_PROFILE_H264_EXTENDED || + sps->profile_idc == FF_PROFILE_H264_MAIN) { + pps->more_rbsp_data = 0; + } else { + pps->more_rbsp_data = 1; + + pps->transform_8x8_mode_flag = 1; + } + + *vksps_scaling = (StdVideoH264ScalingLists) { + .scaling_list_present_mask = sps->seq_scaling_matrix_present_flag, + .use_default_scaling_matrix_mask = 1, + }; + + *vksps_vui_header = (StdVideoH264HrdParameters) { + .cpb_cnt_minus1 = hrd->cpb_cnt_minus1, + .bit_rate_scale = hrd->bit_rate_scale, + .initial_cpb_removal_delay_length_minus1 = hrd->initial_cpb_removal_delay_length_minus1, + .cpb_removal_delay_length_minus1 = hrd->cpb_removal_delay_length_minus1, + .dpb_output_delay_length_minus1 = hrd->dpb_output_delay_length_minus1, + .time_offset_length = hrd->time_offset_length, + }; + + for (int i = 0; i < H264_MAX_CPB_CNT; i++) { + vksps_vui_header->bit_rate_value_minus1[i] = hrd->bit_rate_value_minus1[i]; + vksps_vui_header->cpb_size_value_minus1[i] = hrd->cpb_size_value_minus1[i]; + vksps_vui_header->cbr_flag[i] = hrd->cbr_flag[i]; + } + + *vksps_vui = (StdVideoH264SequenceParameterSetVui) { + .aspect_ratio_idc = sps->vui.aspect_ratio_idc, + .sar_width = sps->vui.sar_width, + .sar_height = sps->vui.sar_height, + .video_format = sps->vui.video_format, + .colour_primaries = sps->vui.colour_primaries, + .transfer_characteristics = sps->vui.transfer_characteristics, + .matrix_coefficients = sps->vui.matrix_coefficients, + .num_units_in_tick = sps->vui.num_units_in_tick, + .time_scale = sps->vui.time_scale, + .pHrdParameters = vksps_vui_header, + .max_num_reorder_frames = sps->vui.max_num_reorder_frames, + .max_dec_frame_buffering = sps->vui.max_dec_frame_buffering, + .flags = (StdVideoH264SpsVuiFlags) { + .aspect_ratio_info_present_flag = sps->vui.aspect_ratio_info_present_flag, + .overscan_info_present_flag = sps->vui.overscan_info_present_flag, + .overscan_appropriate_flag = sps->vui.overscan_appropriate_flag, + .video_signal_type_present_flag = sps->vui.video_signal_type_present_flag, + .video_full_range_flag = sps->vui.video_full_range_flag, + .color_description_present_flag = sps->vui.colour_description_present_flag, + .chroma_loc_info_present_flag = sps->vui.chroma_loc_info_present_flag, + .timing_info_present_flag = sps->vui.timing_info_present_flag, + .fixed_frame_rate_flag = sps->vui.fixed_frame_rate_flag, + .bitstream_restriction_flag = sps->vui.bitstream_restriction_flag, + .nal_hrd_parameters_present_flag = sps->vui.nal_hrd_parameters_present_flag, + .vcl_hrd_parameters_present_flag = sps->vui.vcl_hrd_parameters_present_flag, + }, + }; + + *vksps = (StdVideoH264SequenceParameterSet) { + .profile_idc = sps->profile_idc, + .level_idc = sps->level_idc, + .seq_parameter_set_id = sps->seq_parameter_set_id, + .chroma_format_idc = sps->chroma_format_idc, + .bit_depth_luma_minus8 = sps->bit_depth_luma_minus8, + .bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8, + .log2_max_frame_num_minus4 = sps->log2_max_frame_num_minus4, + .pic_order_cnt_type = sps->pic_order_cnt_type, + .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4, + .offset_for_non_ref_pic = sps->offset_for_non_ref_pic, + .offset_for_top_to_bottom_field = sps->offset_for_top_to_bottom_field, + .num_ref_frames_in_pic_order_cnt_cycle = sps->num_ref_frames_in_pic_order_cnt_cycle, + .max_num_ref_frames = sps->max_num_ref_frames, + .pic_width_in_mbs_minus1 = sps->pic_width_in_mbs_minus1, + .pic_height_in_map_units_minus1 = sps->pic_height_in_map_units_minus1, + .frame_crop_left_offset = sps->frame_crop_left_offset, + .frame_crop_right_offset = sps->frame_crop_right_offset, + .frame_crop_top_offset = sps->frame_crop_top_offset, + .frame_crop_bottom_offset = sps->frame_crop_bottom_offset, + .flags = (StdVideoH264SpsFlags) { + .constraint_set0_flag = sps->constraint_set0_flag, + .constraint_set1_flag = sps->constraint_set1_flag, + .constraint_set2_flag = sps->constraint_set2_flag, + .constraint_set3_flag = sps->constraint_set3_flag, + .constraint_set4_flag = sps->constraint_set4_flag, + .constraint_set5_flag = sps->constraint_set5_flag, + .direct_8x8_inference_flag = sps->direct_8x8_inference_flag, + .mb_adaptive_frame_field_flag = sps->mb_adaptive_frame_field_flag, + .frame_mbs_only_flag = sps->frame_mbs_only_flag, + .delta_pic_order_always_zero_flag = sps->delta_pic_order_always_zero_flag, + .separate_colour_plane_flag = sps->separate_colour_plane_flag, + .gaps_in_frame_num_value_allowed_flag = sps->gaps_in_frame_num_allowed_flag, + .qpprime_y_zero_transform_bypass_flag = sps->qpprime_y_zero_transform_bypass_flag, + .frame_cropping_flag = sps->frame_cropping_flag, + .seq_scaling_matrix_present_flag = sps->seq_scaling_matrix_present_flag, + .vui_parameters_present_flag = sps->vui_parameters_present_flag, + }, + .pOffsetForRefFrame = sps->offset_for_ref_frame, + .pSequenceParameterSetVui = vksps_vui, + }; + + *vkpps_scaling = (StdVideoH264ScalingLists) { + .scaling_list_present_mask = pps->pic_scaling_matrix_present_flag, + .use_default_scaling_matrix_mask = 1, + }; + + *vkpps = (StdVideoH264PictureParameterSet) { + .seq_parameter_set_id = pps->seq_parameter_set_id, + .pic_parameter_set_id = pps->pic_parameter_set_id, + .num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active_minus1, + .num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active_minus1, + .weighted_bipred_idc = pps->weighted_bipred_idc, + .pic_init_qp_minus26 = pps->pic_init_qp_minus26, + .pic_init_qs_minus26 = pps->pic_init_qs_minus26, + .chroma_qp_index_offset = pps->chroma_qp_index_offset, + .second_chroma_qp_index_offset = pps->second_chroma_qp_index_offset, + .flags = (StdVideoH264PpsFlags) { + .transform_8x8_mode_flag = pps->transform_8x8_mode_flag, + .redundant_pic_cnt_present_flag = pps->redundant_pic_cnt_present_flag, + .constrained_intra_pred_flag = pps->constrained_intra_pred_flag, + .deblocking_filter_control_present_flag = pps->deblocking_filter_control_present_flag, + .weighted_pred_flag = pps->weighted_pred_flag, + .bottom_field_pic_order_in_frame_present_flag = pps->bottom_field_pic_order_in_frame_present_flag, + .entropy_coding_mode_flag = pps->entropy_coding_mode_flag, + .pic_scaling_matrix_present_flag = pps->pic_scaling_matrix_present_flag, + }, + }; + + return 0; +} + +static int vulkan_encode_h264_add_nal(AVCodecContext *avctx, + CodedBitstreamFragment *au, + void *nal_unit) +{ + H264RawNALUnitHeader *header = nal_unit; + + int err = ff_cbs_insert_unit_content(au, -1, + header->nal_unit_type, nal_unit, NULL); + if (err < 0) + av_log(avctx, AV_LOG_ERROR, "Failed to add NAL unit: " + "type = %d.\n", header->nal_unit_type); + + return err; +} + +static int vulkan_encode_h264_write_access_unit(AVCodecContext *avctx, + uint8_t *data, size_t *data_len, + CodedBitstreamFragment *au) +{ + VulkanEncodeH264Context *enc = avctx->priv_data; + + int err = ff_cbs_write_fragment_data(enc->cbc, au); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Failed to write packed header.\n"); + return err; + } + + if (*data_len < au->data_size) { + av_log(avctx, AV_LOG_ERROR, "Access unit too large: %zu < %zu.\n", + *data_len, au->data_size); + return AVERROR(ENOSPC); + } + + memcpy(data, au->data, au->data_size); + *data_len = au->data_size; + + return 0; +} + +static int vulkan_encode_h264_write_sequence_header(AVCodecContext *avctx, + uint8_t *data, size_t *data_len) +{ + int err; + VulkanEncodeH264Context *enc = avctx->priv_data; + CodedBitstreamFragment *au = &enc->current_access_unit; + + if (enc->write_units & UNIT_AUD) { + err = vulkan_encode_h264_add_nal(avctx, au, &enc->raw_aud); + if (err < 0) + goto fail; + } + + err = vulkan_encode_h264_add_nal(avctx, au, &enc->raw_sps); + if (err < 0) + goto fail; + + err = vulkan_encode_h264_add_nal(avctx, au, &enc->raw_pps); + if (err < 0) + goto fail; + + err = vulkan_encode_h264_write_access_unit(avctx, data, data_len, au); +fail: + ff_cbs_fragment_reset(au); + return err; +} + +static int vulkan_encode_h264_write_filler(AVCodecContext *avctx, uint32_t filler, + uint8_t *data, size_t *data_len) +{ + int err; + VulkanEncodeH264Context *enc = avctx->priv_data; + CodedBitstreamFragment *au = &enc->current_access_unit; + + H264RawFiller raw_filler = { + .nal_unit_header = { + .nal_unit_type = H264_NAL_FILLER_DATA, + }, + .filler_size = filler, + }; + + err = vulkan_encode_h264_add_nal(avctx, au, &raw_filler); + if (err < 0) + goto fail; + + err = vulkan_encode_h264_write_access_unit(avctx, data, data_len, au); +fail: + ff_cbs_fragment_reset(au); + return err; +} + +static av_cold int vulkan_encode_h264_create_session(AVCodecContext *avctx) +{ + VkResult ret; + VulkanEncodeH264Context *enc = avctx->priv_data; + FFVulkanFunctions *vk = &enc->vkenc.s.vkfn; + + VkVideoEncodeH264SessionParametersAddInfoEXT h264_params_info; + VkVideoEncodeH264SessionParametersCreateInfoEXT h264_params; + VkVideoSessionParametersCreateInfoKHR session_params_create; + + h264_params_info = (VkVideoEncodeH264SessionParametersAddInfoEXT) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_SESSION_PARAMETERS_ADD_INFO_EXT, + .pStdSPSs = &enc->vksps, + .stdSPSCount = 1, + .pStdPPSs = &enc->vkpps, + .stdPPSCount = 1, + }; + h264_params = (VkVideoEncodeH264SessionParametersCreateInfoEXT) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_SESSION_PARAMETERS_CREATE_INFO_EXT, + .maxStdSPSCount = 1, + .maxStdPPSCount = 1, + .pParametersAddInfo = &h264_params_info, + }; + session_params_create = (VkVideoSessionParametersCreateInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pNext = &h264_params, + .videoSession = enc->vkenc.common.session, + .videoSessionParametersTemplate = NULL, + }; + + /* Create session parameters */ + ret = vk->CreateVideoSessionParametersKHR(enc->vkenc.s.hwctx->act_dev, &session_params_create, + enc->vkenc.s.hwctx->alloc, &enc->vkenc.session_params); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to create Vulkan video session parameters: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + return 0; +} + +static int vulkan_encode_h264_init_pic_headers(AVCodecContext *avctx, + FFVulkanEncodePicture *pic) +{ + VulkanEncodeH264Context *enc = avctx->priv_data; + VulkanEncodeH264Picture *hpic = pic->priv_data; + FFVulkanEncodePicture *prev = pic->prev; + VulkanEncodeH264Picture *hprev = prev ? prev->priv_data : NULL; + + int qp = pic->qp; + int cpb_delay; + int dpb_delay; + int primary_pic_type; + int slice_type; + + if (pic->type == FF_VK_FRAME_KEY) { + av_assert0(pic->display_order == pic->encode_order); + + hpic->frame_num = 0; + hpic->last_idr_frame = pic->display_order; + hpic->idr_pic_id = hprev ? hprev->idr_pic_id + 1 : 0; + + primary_pic_type = 0; + slice_type = 7; // SPEC: add slice types above 5 + } else { + av_assert0(prev); + + hpic->frame_num = hprev->frame_num + prev->is_reference; + hpic->last_idr_frame = hprev->last_idr_frame; + hpic->idr_pic_id = hprev->idr_pic_id; + + /* SPEC: missing StdVideoH264PictureType entries */ + if (pic->type == FF_VK_FRAME_I) { + slice_type = 7; + primary_pic_type = 0; + } else if (pic->type == FF_VK_FRAME_P) { + slice_type = 5; + primary_pic_type = 1; + } else { + slice_type = 6; + primary_pic_type = 2; + } + } + + hpic->pic_order_cnt = pic->display_order - hpic->last_idr_frame; + if (enc->raw_sps.pic_order_cnt_type == 2) + hpic->pic_order_cnt *= 2; + + dpb_delay = pic->display_order - pic->encode_order + enc->max_b_depth; + cpb_delay = pic->encode_order - hpic->last_idr_frame; + + enc->write_units = 0x0; + + if (pic->display_order == 0 && enc->insert_units & UNIT_IDENTIFIER) + enc->write_units |= UNIT_IDENTIFIER; + + if (enc->insert_units & UNIT_AUD) { + enc->raw_aud = (H264RawAUD) { + .nal_unit_header = { + .nal_unit_type = H264_NAL_AUD, + }, + .primary_pic_type = primary_pic_type, + }; + enc->write_units |= UNIT_AUD; + } + if (enc->insert_units & UNIT_TIMING) { + enc->sei_pic_timing = (H264RawSEIPicTiming) { + .cpb_removal_delay = 2 * cpb_delay, + .dpb_output_delay = 2 * dpb_delay, + }; + enc->write_units |= UNIT_TIMING; + } + if (enc->insert_units & UNIT_RECOVERY && pic->type == FF_VK_FRAME_I) { + enc->sei_recovery_point = (H264RawSEIRecoveryPoint) { + .recovery_frame_cnt = 0, + .exact_match_flag = 1, + .broken_link_flag = enc->b_per_p > 0, + }; + enc->write_units |= UNIT_RECOVERY; + } + + hpic->slice_wt = (StdVideoEncodeH264WeightTable) { + .flags = (StdVideoEncodeH264WeightTableFlags) { + .luma_weight_l0_flag = 0, + .chroma_weight_l0_flag = 0, + .luma_weight_l1_flag = 0, + .chroma_weight_l1_flag = 0, + }, + .luma_log2_weight_denom = 0, + .chroma_log2_weight_denom = 0, + .luma_weight_l0 = { 0 }, + .luma_offset_l0 = { 0 }, + .chroma_weight_l0 = { { 0 } }, + .chroma_offset_l0 = { { 0 } }, + .luma_weight_l1 = { 0 }, + .luma_offset_l1 = { 0 }, + .chroma_weight_l1 = { { 0 } }, + .chroma_offset_l1 = { { 0 } }, + }; + + hpic->slice_hdr = (StdVideoEncodeH264SliceHeader) { + .flags = (StdVideoEncodeH264SliceHeaderFlags) { + .direct_spatial_mv_pred_flag = 0, + .num_ref_idx_active_override_flag = 0, + .no_output_of_prior_pics_flag = 0, + .adaptive_ref_pic_marking_mode_flag = 0, + .no_prior_references_available_flag = 0, + }, + .first_mb_in_slice = 0, + .slice_type = slice_type, + .idr_pic_id = hpic->idr_pic_id, + .num_ref_idx_l0_active_minus1 = 0, + .num_ref_idx_l1_active_minus1 = 0, + .cabac_init_idc = 0, + .disable_deblocking_filter_idc = 1, + .slice_alpha_c0_offset_div2 = 0, + .slice_beta_offset_div2 = 0, + .pWeightTable = &hpic->slice_wt, + }; + + hpic->vkslice = (VkVideoEncodeH264NaluSliceInfoEXT) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_NALU_SLICE_INFO_EXT, + .pNext = NULL, + .mbCount = enc->mb_width * enc->mb_height, + .pStdReferenceFinalLists = NULL, + .pStdSliceHeader = &hpic->slice_hdr, + }; + + hpic->h264pic_info = (StdVideoEncodeH264PictureInfo) { + .flags = (StdVideoEncodeH264PictureInfoFlags) { + .idr_flag = pic->type == FF_VK_FRAME_KEY, + .is_reference_flag = pic->is_reference, + .used_for_long_term_reference = 0, + }, + .seq_parameter_set_id = enc->raw_sps.seq_parameter_set_id, + .pic_parameter_set_id = enc->raw_pps.pic_parameter_set_id, + .pictureType = pic->type == FF_VK_FRAME_P ? STD_VIDEO_H264_PICTURE_TYPE_P : + pic->type == FF_VK_FRAME_B ? STD_VIDEO_H264_PICTURE_TYPE_B : + pic->type == FF_VK_FRAME_I ? STD_VIDEO_H264_PICTURE_TYPE_I : + STD_VIDEO_H264_PICTURE_TYPE_IDR, + .frame_num = hpic->frame_num, + .PicOrderCnt = hpic->pic_order_cnt, + }; + +#if 0 + hpic->ref_list_info = (StdVideoEncodeH264ReferenceListsInfo) { + .flags = (StdVideoEncodeH264ReferenceListsInfoFlags) { + .ref_pic_list_modification_flag_l0 = 0, + .ref_pic_list_modification_flag_l1 = 0, + }, + .pRefList0ModOperations = hpic->l0mods, + .refList0ModOpCount = 0, + .pRefList1ModOperations = hpic->l1mods, + .refList1ModOpCount = 0, + .pRefPicMarkingOperations = hpic->marks, + .refPicMarkingOpCount = 0, + }; + + for (int i = 0; i < pic->nb_refs; i++) { + FFVulkanEncodePicture *ref = pic->refs[i]; + VulkanEncodeH264Picture *href = ref->priv_data; + + hpic->l0ref_info[0] = (StdVideoEncodeH264ReferenceInfo) { + .flags = (StdVideoEncodeH264ReferenceInfoFlags) { + .used_for_long_term_reference = 0, + }, + .FrameNum = href->frame_num, + .PicOrderCnt = href->pic_order_cnt, + .long_term_pic_num = 0, + .long_term_frame_idx = 0, + }; + + hpic->l0refs[i] = (VkVideoEncodeH264DpbSlotInfoEXT) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_DPB_SLOT_INFO_EXT, + .pStdReferenceInfo = &hpic->l0ref_info[i], + }; + } + + hpic->final_list = (StdVideoEncodeH264ReferenceListsInfo) { + .pReferenceList0Entries = hpic->l0refs, + .referenceList0EntryCount = pic->nb_refs, + .pReferenceList1Entries = hpic->l1refs, + .pReferenceList1Entries = 0, + .pMemMgmtCtrlOperations = &hpic->mem_mgmt, + }; +#endif + + hpic->vkh264pic_info = (VkVideoEncodeH264VclFrameInfoEXT) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_VCL_FRAME_INFO_EXT, + .pNext = NULL, + .pStdReferenceFinalLists = &hpic->final_list, + .naluSliceEntryCount = 1, + .pNaluSliceEntries = &hpic->vkslice, + .pStdPictureInfo = &hpic->h264pic_info, + }; + + hpic->vkrc_layer_info = (VkVideoEncodeH264RateControlLayerInfoEXT) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_RATE_CONTROL_LAYER_INFO_EXT, + .minQp = (VkVideoEncodeH264QpEXT){ qp, qp, qp }, + .maxQp = (VkVideoEncodeH264QpEXT){ qp, qp, qp }, + .useMinQp = 1, + .useMaxQp = 1, + }; + + pic->codec_info = &hpic->vkh264pic_info; + pic->codec_rc_layer = &hpic->vkrc_layer_info; + + return 0; +} + +static int vulkan_encode_h264_write_extra_headers(AVCodecContext *avctx, + FFVulkanEncodePicture *pic, + uint8_t *data, size_t *data_len) +{ + int err; + VulkanEncodeH264Context *enc = avctx->priv_data; + CodedBitstreamFragment *au = &enc->current_access_unit; + + if (enc->write_units) { + if (enc->write_units & UNIT_AUD) { + err = vulkan_encode_h264_add_nal(avctx, au, &enc->raw_aud); + if (err < 0) + goto fail; + } + + if (enc->write_units & UNIT_IDENTIFIER) { + err = ff_cbs_sei_add_message(enc->cbc, au, 1, + SEI_TYPE_USER_DATA_UNREGISTERED, + &enc->sei_identifier, NULL); + if (err < 0) + goto fail; + } + if (enc->write_units & UNIT_TIMING) { + if (pic->type == FF_VK_FRAME_KEY) { + err = ff_cbs_sei_add_message(enc->cbc, au, 1, + SEI_TYPE_BUFFERING_PERIOD, + &enc->sei_buffering_period, NULL); + if (err < 0) + goto fail; + } + err = ff_cbs_sei_add_message(enc->cbc, au, 1, + SEI_TYPE_PIC_TIMING, + &enc->sei_pic_timing, NULL); + if (err < 0) + goto fail; + } + if (enc->write_units & UNIT_RECOVERY) { + err = ff_cbs_sei_add_message(enc->cbc, au, 1, + SEI_TYPE_RECOVERY_POINT, + &enc->sei_recovery_point, NULL); + if (err < 0) + goto fail; + } + + err = vulkan_encode_h264_write_access_unit(avctx, data, data_len, au); + if (err < 0) + goto fail; + + ff_cbs_fragment_reset(au); + + return 0; + } + +fail: + ff_cbs_fragment_reset(au); + return err; +} + +static const FFVulkanEncoder encoder = { + .pic_priv_data_size = sizeof(VulkanEncodeH264Picture), + .write_stream_headers = vulkan_encode_h264_write_sequence_header, + .init_pic_headers = vulkan_encode_h264_init_pic_headers, + .write_filler = vulkan_encode_h264_write_filler, + .filler_header_size = 6, + .write_extra_headers = vulkan_encode_h264_write_extra_headers, +}; + +static av_cold int vulkan_encode_h264_init(AVCodecContext *avctx) +{ + int err; + VulkanEncodeH264Context *enc = avctx->priv_data; + + enc->profile = (VkVideoEncodeH264ProfileInfoEXT) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_PROFILE_INFO_EXT, + .stdProfileIdc = enc->vkenc.opts.profile, + }; + + enc->caps = (VkVideoEncodeH264CapabilitiesEXT) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_CAPABILITIES_EXT, + }; + + err = ff_cbs_init(&enc->cbc, AV_CODEC_ID_H264, avctx); + if (err < 0) + return err; + + enc->mb_width = FFALIGN(avctx->width, 16) / 16; + enc->mb_height = FFALIGN(avctx->height, 16) / 16; + + enc->bit_rate = avctx->bit_rate; + enc->gop_size = 3; /* avctx->gop_size; */ + enc->b_per_p = 0; /* avctx->max_b_frames; */ + enc->max_b_depth = 0; /* FFMIN(enc->desired_b_depth, + av_log2(enc->b_per_p) + 1); */ + + enc->vkenc.gop_size = enc->gop_size; + enc->vkenc.bitrate =enc->bit_rate; + + err = ff_vulkan_encode_init(avctx, &enc->vkenc, &enc->profile, &enc->caps, + &encoder, enc->b_per_p, enc->max_b_depth); + if (err < 0) + return err; + + av_log(avctx, AV_LOG_VERBOSE, "H264 encoder capabilities:\n"); + av_log(avctx, AV_LOG_VERBOSE, " Capability flags:\n"); + av_log(avctx, AV_LOG_VERBOSE, " 8x8_inference:%s%s\n", + enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_DIRECT_8X8_INFERENCE_ENABLED_BIT_EXT ? + " enabling" : "", + enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_DIRECT_8X8_INFERENCE_DISABLED_BIT_EXT ? + " disabling" : ""); + av_log(avctx, AV_LOG_VERBOSE, " separate_color_plane: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_SEPARATE_COLOUR_PLANE_BIT_EXT)); + av_log(avctx, AV_LOG_VERBOSE, " qprime_y_zero_transform_bypass: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_QPPRIME_Y_ZERO_TRANSFORM_BYPASS_BIT_EXT)); + av_log(avctx, AV_LOG_VERBOSE, " scaling_lists: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_SCALING_LISTS_BIT_EXT)); + av_log(avctx, AV_LOG_VERBOSE, " hrd_compliance: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_HRD_COMPLIANCE_BIT_EXT)); + av_log(avctx, AV_LOG_VERBOSE, " chroma_qp_offset: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_CHROMA_QP_OFFSET_BIT_EXT)); + av_log(avctx, AV_LOG_VERBOSE, " second_chroma_qp_offset: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_SECOND_CHROMA_QP_OFFSET_BIT_EXT)); + av_log(avctx, AV_LOG_VERBOSE, " pic_init_qp: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_PIC_INIT_QP_MINUS26_BIT_EXT)); + av_log(avctx, AV_LOG_VERBOSE, " weighted:%s%s%s%s\n", + enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_WEIGHTED_PRED_BIT_EXT ? + " pred" : "", + enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_WEIGHTED_BIPRED_EXPLICIT_BIT_EXT ? + " bipred_explicit" : "", + enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_WEIGHTED_BIPRED_IMPLICIT_BIT_EXT ? + " bipred_implicit" : "", + enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_WEIGHTED_PRED_NO_TABLE_BIT_EXT ? + " pred_no_table" : ""); + av_log(avctx, AV_LOG_VERBOSE, " 8x8_transforms: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_TRANSFORM_8X8_BIT_EXT)); + av_log(avctx, AV_LOG_VERBOSE, " coder:%s%s\n", + enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_CABAC_BIT_EXT ? + " cabac" : "", + enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_CAVLC_BIT_EXT ? + " cavlc" : ""); + av_log(avctx, AV_LOG_VERBOSE, " deblock:%s%s%s\n", + enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_DEBLOCKING_FILTER_DISABLED_BIT_EXT ? + " filter_disabling" : "", + enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_DEBLOCKING_FILTER_ENABLED_BIT_EXT ? + " filter_enabling" : "", + enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_DEBLOCKING_FILTER_PARTIAL_BIT_EXT ? + " filter_partial" : ""); + av_log(avctx, AV_LOG_VERBOSE, " disable_direct_spatial_mv_pred: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_DISABLE_DIRECT_SPATIAL_MV_PRED_BIT_EXT)); + av_log(avctx, AV_LOG_VERBOSE, " multiple_slices: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_MULTIPLE_SLICE_PER_FRAME_BIT_EXT)); + av_log(avctx, AV_LOG_VERBOSE, " slice_mb_count: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_SLICE_MB_COUNT_BIT_EXT)); + av_log(avctx, AV_LOG_VERBOSE, " row_unaligned_slice: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_ROW_UNALIGNED_SLICE_BIT_EXT)); + av_log(avctx, AV_LOG_VERBOSE, " different_slice_type: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_DIFFERENT_SLICE_TYPE_BIT_EXT)); + av_log(avctx, AV_LOG_VERBOSE, " b_frame_in_l1_list: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_B_FRAME_IN_L1_LIST_BIT_EXT)); + + av_log(avctx, AV_LOG_VERBOSE, " L0_refs: %i P's; %i B's\n", + enc->caps.maxPPictureL0ReferenceCount, + enc->caps.maxBPictureL0ReferenceCount); + + av_log(avctx, AV_LOG_VERBOSE, " L1_refs: %i\n", + enc->caps.maxL1ReferenceCount); + + av_log(avctx, AV_LOG_VERBOSE, " bitstream_restriction:\n"); + av_log(avctx, AV_LOG_VERBOSE, " mvs_over_pic_boundaries: %i\n", + enc->caps.motionVectorsOverPicBoundariesFlag); + av_log(avctx, AV_LOG_VERBOSE, " max_bytes_per_pic_denom: %i\n", + enc->caps.maxBytesPerPicDenom); + av_log(avctx, AV_LOG_VERBOSE, " max_bits_per_mb_denom: %i\n", + enc->caps.maxBitsPerMbDenom); + av_log(avctx, AV_LOG_VERBOSE, " log2_max_mv_length_hor: %i\n", + enc->caps.log2MaxMvLengthHorizontal); + av_log(avctx, AV_LOG_VERBOSE, " log2_max_mv_length_ver: %i\n", + enc->caps.log2MaxMvLengthVertical); + + if (enc->insert_units & UNIT_IDENTIFIER) { + int len; + + memcpy(enc->sei_identifier.uuid_iso_iec_11578, + vulkan_encode_h264_sei_identifier_uuid, + sizeof(enc->sei_identifier.uuid_iso_iec_11578)); + + len = snprintf(NULL, 0, + "%s / Vulkan video %i.%i.%i / %s %i.%i.%i / %s", + LIBAVCODEC_IDENT, + CODEC_VER(ff_vk_enc_ext[avctx->codec_id].specVersion), + enc->vkenc.s.driver_props.driverName, + CODEC_VER(enc->vkenc.s.props.properties.driverVersion), + enc->vkenc.s.props.properties.deviceName); + + if (len >= 0) { + enc->sei_identifier_string = av_malloc(len + 1); + if (!enc->sei_identifier_string) + return AVERROR(ENOMEM); + + len = snprintf(enc->sei_identifier_string, len + 1, + "%s / Vulkan video %i.%i.%i / %s %i.%i.%i / %s", + LIBAVCODEC_IDENT, + CODEC_VER(ff_vk_enc_ext[avctx->codec_id].specVersion), + enc->vkenc.s.driver_props.driverName, + CODEC_VER(enc->vkenc.s.props.properties.driverVersion), + enc->vkenc.s.props.properties.deviceName); + + enc->sei_identifier.data = enc->sei_identifier_string; + enc->sei_identifier.data_length = len + 1; + } + } + + err = vulkan_encode_h264_init_seq_params(avctx); + if (err < 0) + return err; + + err = vulkan_encode_h264_create_session(avctx); + if (err < 0) + return err; + + if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) { + uint8_t data[4096]; + size_t data_len = sizeof(data); + + err = vulkan_encode_h264_write_sequence_header(avctx, data, &data_len); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Failed to write sequence header " + "for extradata: %d.\n", err); + return err; + } else { + avctx->extradata_size = data_len; + avctx->extradata = av_mallocz(avctx->extradata_size + + AV_INPUT_BUFFER_PADDING_SIZE); + if (!avctx->extradata) { + err = AVERROR(ENOMEM); + return err; + } + memcpy(avctx->extradata, data, avctx->extradata_size); + } + } + + return 0; +} + +static av_cold int vulkan_encode_h264_close(AVCodecContext *avctx) +{ + VulkanEncodeH264Context *enc = avctx->priv_data; + ff_vulkan_encode_uninit(&enc->vkenc); + return 0; +} + +static int video_encode_h264_receive_packet(AVCodecContext *avctx, AVPacket *pkt) +{ + VulkanEncodeH264Context *enc = avctx->priv_data; + return ff_vulkan_encode_receive_packet(avctx, &enc->vkenc, pkt); +} + +static void vulkan_encode_h264_flush(AVCodecContext *avctx) +{ + +} + +#define OFFSET(x) offsetof(VulkanEncodeH264Context, x) +#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM) +static const AVOption vulkan_encode_h264_options[] = { + { "profile", "Select profile", OFFSET(vkenc.opts.profile), AV_OPT_TYPE_INT, { .i64 = FF_PROFILE_H264_MAIN }, 0, INT_MAX, FLAGS, "profile" }, + { "baseline", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_H264_BASELINE }, INT_MIN, INT_MAX, FLAGS, "profile" }, + { "main", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_H264_MAIN }, INT_MIN, INT_MAX, FLAGS, "profile" }, + { "high", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_H264_HIGH }, INT_MIN, INT_MAX, FLAGS, "profile" }, + { "high444p", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_H264_HIGH_444_PREDICTIVE }, INT_MIN, INT_MAX, FLAGS, "profile" }, + + { "b_depth", "Maximum B-frame reference depth", OFFSET(desired_b_depth), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, INT_MAX, FLAGS }, + + { "coder", "Entropy coder type", OFFSET(coder), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, FLAGS, "coder" }, + { "cabac", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, INT_MIN, INT_MAX, FLAGS, "coder" }, + { "vlc", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, INT_MIN, INT_MAX, FLAGS, "coder" }, + + { "units", "Set units to include", OFFSET(insert_units), AV_OPT_TYPE_FLAGS, { .i64 = UNIT_IDENTIFIER | UNIT_AUD | UNIT_RECOVERY }, 0, INT_MAX, FLAGS, "units" }, + { "identifier", "Include encoder version identifier", 0, AV_OPT_TYPE_CONST, { .i64 = UNIT_IDENTIFIER }, INT_MIN, INT_MAX, FLAGS, "units" }, + { "aud", "Include AUD units", 0, AV_OPT_TYPE_CONST, { .i64 = UNIT_AUD }, INT_MIN, INT_MAX, FLAGS, "units" }, + { "timing", "Include timing parameters (buffering_period and pic_timing)", 0, AV_OPT_TYPE_CONST, { .i64 = UNIT_TIMING }, INT_MIN, INT_MAX, FLAGS, "units" }, + { "recovery", "Include recovery points where appropriate", 0, AV_OPT_TYPE_CONST, { .i64 = UNIT_RECOVERY }, INT_MIN, INT_MAX, FLAGS, "units" }, + + FF_VK_ENCODE_COMMON_OPTS + + { NULL }, +}; + +static const FFCodecDefault vulkan_encode_h264_defaults[] = { + { "b", "0" }, + { "g", "120" }, + { NULL }, +}; + +static const AVClass vulkan_encode_h264_class = { + .class_name = "h264_vulkan", + .item_name = av_default_item_name, + .option = vulkan_encode_h264_options, + .version = LIBAVUTIL_VERSION_INT, +}; + +const FFCodec ff_h264_vulkan_encoder = { + .p.name = "h264_vulkan", + CODEC_LONG_NAME("H.264/AVC (Vulkan)"), + .p.type = AVMEDIA_TYPE_VIDEO, + .p.id = AV_CODEC_ID_H264, + .priv_data_size = sizeof(VulkanEncodeH264Context), + .init = &vulkan_encode_h264_init, + FF_CODEC_RECEIVE_PACKET_CB(&video_encode_h264_receive_packet), + .flush = &vulkan_encode_h264_flush, + .close = &vulkan_encode_h264_close, + .p.priv_class = &vulkan_encode_h264_class, + .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE | + AV_CODEC_CAP_DR1 | + AV_CODEC_CAP_ENCODER_FLUSH /* | AV_CODEC_CAP_EXPERIMENTAL */, + .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, + .defaults = vulkan_encode_h264_defaults, + .p.pix_fmts = (const enum AVPixelFormat[]) { + AV_PIX_FMT_VULKAN, + AV_PIX_FMT_NONE, + }, + .hw_configs = ff_vulkan_encode_hw_configs, + .p.wrapper_name = "vulkan", +}; From ac8b71bc8054be41218c4e2fd6bd7b36215c81dd Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 27 Feb 2023 11:57:54 +1000 Subject: [PATCH 88/98] av1: set skip mode frames properly There are circumstances where the flag isn't set but the skip mode frames are. So don't use the inferred bit which has other inputs when deciding to pass the skip mode frames to the device. This fixes some decoding bugs on intel av1 --- libavcodec/av1dec.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/libavcodec/av1dec.c b/libavcodec/av1dec.c index aaa66ee7ea661..47ef0bb67ed49 100644 --- a/libavcodec/av1dec.c +++ b/libavcodec/av1dec.c @@ -269,8 +269,10 @@ static void skip_mode_params(AV1DecContext *s) int second_forward_idx, second_forward_hint; int ref_hint, dist, i; - if (!header->skip_mode_present) - return; + if (header->frame_type == AV1_FRAME_KEY || + header->frame_type == AV1_FRAME_INTRA_ONLY || + !header->reference_select || !seq->enable_order_hint) + return; forward_idx = -1; backward_idx = -1; From a6e4bcf93c58f6e0d065c822aefbfee0142b0616 Mon Sep 17 00:00:00 2001 From: Lynne Date: Thu, 23 Feb 2023 02:46:13 +0100 Subject: [PATCH 89/98] cbs_av1: expose tile col/row starts in SBs --- libavcodec/cbs_av1.h | 2 ++ libavcodec/cbs_av1_syntax_template.c | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/libavcodec/cbs_av1.h b/libavcodec/cbs_av1.h index 36848d4410c03..64dfdce9c4eb5 100644 --- a/libavcodec/cbs_av1.h +++ b/libavcodec/cbs_av1.h @@ -215,6 +215,8 @@ typedef struct AV1RawFrameHeader { uint8_t uniform_tile_spacing_flag; uint8_t tile_cols_log2; uint8_t tile_rows_log2; + uint8_t tile_start_col_sb[AV1_MAX_TILE_COLS]; + uint8_t tile_start_row_sb[AV1_MAX_TILE_COLS]; uint8_t width_in_sbs_minus_1[AV1_MAX_TILE_COLS]; uint8_t height_in_sbs_minus_1[AV1_MAX_TILE_ROWS]; uint16_t context_update_tile_id; diff --git a/libavcodec/cbs_av1_syntax_template.c b/libavcodec/cbs_av1_syntax_template.c index 8f4640d1af82f..a747e17784f71 100644 --- a/libavcodec/cbs_av1_syntax_template.c +++ b/libavcodec/cbs_av1_syntax_template.c @@ -626,6 +626,10 @@ static int FUNC(tile_info)(CodedBitstreamContext *ctx, RWContext *rw, tile_width_sb = (sb_cols + (1 << current->tile_cols_log2) - 1) >> current->tile_cols_log2; + + for (int off = 0, i = 0; off < sb_cols; off += tile_width_sb) + current->tile_start_col_sb[i++] = off; + current->tile_cols = (sb_cols + tile_width_sb - 1) / tile_width_sb; min_log2_tile_rows = FFMAX(min_log2_tiles - current->tile_cols_log2, 0); @@ -634,6 +638,10 @@ static int FUNC(tile_info)(CodedBitstreamContext *ctx, RWContext *rw, tile_height_sb = (sb_rows + (1 << current->tile_rows_log2) - 1) >> current->tile_rows_log2; + + for (int off = 0, i = 0; off < sb_rows; off += tile_height_sb) + current->tile_start_row_sb[i++] = off; + current->tile_rows = (sb_rows + tile_height_sb - 1) / tile_height_sb; for (i = 0; i < current->tile_cols - 1; i++) @@ -652,6 +660,7 @@ static int FUNC(tile_info)(CodedBitstreamContext *ctx, RWContext *rw, start_sb = 0; for (i = 0; start_sb < sb_cols && i < AV1_MAX_TILE_COLS; i++) { + current->tile_start_col_sb[i] = start_sb; max_width = FFMIN(sb_cols - start_sb, max_tile_width_sb); ns(max_width, width_in_sbs_minus_1[i], 1, i); size_sb = current->width_in_sbs_minus_1[i] + 1; @@ -669,6 +678,7 @@ static int FUNC(tile_info)(CodedBitstreamContext *ctx, RWContext *rw, start_sb = 0; for (i = 0; start_sb < sb_rows && i < AV1_MAX_TILE_ROWS; i++) { + current->tile_start_row_sb[i] = start_sb; max_height = FFMIN(sb_rows - start_sb, max_tile_height_sb); ns(max_height, height_in_sbs_minus_1[i], 1, i); size_sb = current->height_in_sbs_minus_1[i] + 1; From 0a2c71cad2a9c7cb7decc5e3fd0dd4d3110d5966 Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 17 Feb 2023 04:09:16 +0100 Subject: [PATCH 90/98] av1dec: add Vulkan hwaccel --- configure | 2 + libavcodec/Makefile | 3 +- libavcodec/av1dec.c | 47 +- libavcodec/hwaccels.h | 1 + libavcodec/vulkan_av1.c | 598 ++++++++++++++++++ libavcodec/vulkan_decode.c | 29 +- libavcodec/vulkan_decode.h | 2 + libavcodec/vulkan_video.c | 6 + libavcodec/vulkan_video.h | 2 + libavcodec/vulkan_video_codec_av1std.h | 394 ++++++++++++ libavcodec/vulkan_video_codec_av1std_decode.h | 27 + libavutil/hwcontext_vulkan.c | 1 + libavutil/vulkan_functions.h | 1 + libavutil/vulkan_loader.h | 1 + 14 files changed, 1111 insertions(+), 3 deletions(-) create mode 100644 libavcodec/vulkan_av1.c create mode 100644 libavcodec/vulkan_video_codec_av1std.h create mode 100644 libavcodec/vulkan_video_codec_av1std_decode.h diff --git a/configure b/configure index e9ce71ec285c6..817752ecf9b23 100755 --- a/configure +++ b/configure @@ -3023,6 +3023,8 @@ av1_vaapi_hwaccel_deps="vaapi VADecPictureParameterBufferAV1_bit_depth_idx" av1_vaapi_hwaccel_select="av1_decoder" av1_vdpau_hwaccel_deps="vdpau VdpPictureInfoAV1" av1_vdpau_hwaccel_select="av1_decoder" +av1_vulkan_hwaccel_deps="vulkan" +av1_vulkan_hwaccel_select="av1_decoder" h263_vaapi_hwaccel_deps="vaapi" h263_vaapi_hwaccel_select="h263_decoder" h263_videotoolbox_hwaccel_deps="videotoolbox" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 5664f692beb3a..f841512620bcf 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -992,6 +992,7 @@ OBJS-$(CONFIG_AV1_DXVA2_HWACCEL) += dxva2_av1.o OBJS-$(CONFIG_AV1_NVDEC_HWACCEL) += nvdec_av1.o OBJS-$(CONFIG_AV1_VAAPI_HWACCEL) += vaapi_av1.o OBJS-$(CONFIG_AV1_VDPAU_HWACCEL) += vdpau_av1.o +OBJS-$(CONFIG_AV1_VULKAN_HWACCEL) += vulkan_av1.o OBJS-$(CONFIG_H263_VAAPI_HWACCEL) += vaapi_mpeg4.o OBJS-$(CONFIG_H263_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o OBJS-$(CONFIG_H264_D3D11VA_HWACCEL) += dxva2_h264.o @@ -1296,7 +1297,7 @@ SKIPHEADERS-$(CONFIG_XVMC) += xvmc.h SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_decode.h vaapi_hevc.h vaapi_encode.h SKIPHEADERS-$(CONFIG_VDPAU) += vdpau.h vdpau_internal.h SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.h vt_internal.h -SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_video.h vulkan_encode.h vulkan_decode.h +SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_video.h vulkan_encode.h vulkan_decode.h vulkan_video_codec_av1std.h vulkan_video_codec_av1std_decode.h SKIPHEADERS-$(CONFIG_V4L2_M2M) += v4l2_buffers.h v4l2_context.h v4l2_m2m.h SKIPHEADERS-$(CONFIG_ZLIB) += zlib_wrapper.h diff --git a/libavcodec/av1dec.c b/libavcodec/av1dec.c index 47ef0bb67ed49..a4c56e8446e03 100644 --- a/libavcodec/av1dec.c +++ b/libavcodec/av1dec.c @@ -449,7 +449,8 @@ static int get_pixel_format(AVCodecContext *avctx) CONFIG_AV1_D3D11VA_HWACCEL * 2 + \ CONFIG_AV1_NVDEC_HWACCEL + \ CONFIG_AV1_VAAPI_HWACCEL + \ - CONFIG_AV1_VDPAU_HWACCEL) + CONFIG_AV1_VDPAU_HWACCEL + \ + CONFIG_AV1_VULKAN_HWACCEL) enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts; if (seq->seq_profile == 2 && seq->color_config.high_bitdepth) @@ -529,6 +530,9 @@ static int get_pixel_format(AVCodecContext *avctx) #endif #if CONFIG_AV1_VDPAU_HWACCEL *fmtp++ = AV_PIX_FMT_VDPAU; +#endif +#if CONFIG_AV1_VULKAN_HWACCEL + *fmtp++ = AV_PIX_FMT_VULKAN; #endif break; case AV_PIX_FMT_YUV420P10: @@ -547,6 +551,44 @@ static int get_pixel_format(AVCodecContext *avctx) #endif #if CONFIG_AV1_VDPAU_HWACCEL *fmtp++ = AV_PIX_FMT_VDPAU; +#endif +#if CONFIG_AV1_VULKAN_HWACCEL + *fmtp++ = AV_PIX_FMT_VULKAN; +#endif + break; + case AV_PIX_FMT_YUV420P12: +#if CONFIG_AV1_VULKAN_HWACCEL + *fmtp++ = AV_PIX_FMT_VULKAN; +#endif + break; + case AV_PIX_FMT_YUV422P: +#if CONFIG_AV1_VULKAN_HWACCEL + *fmtp++ = AV_PIX_FMT_VULKAN; +#endif + break; + case AV_PIX_FMT_YUV422P10: +#if CONFIG_AV1_VULKAN_HWACCEL + *fmtp++ = AV_PIX_FMT_VULKAN; +#endif + break; + case AV_PIX_FMT_YUV422P12: +#if CONFIG_AV1_VULKAN_HWACCEL + *fmtp++ = AV_PIX_FMT_VULKAN; +#endif + break; + case AV_PIX_FMT_YUV444P: +#if CONFIG_AV1_VULKAN_HWACCEL + *fmtp++ = AV_PIX_FMT_VULKAN; +#endif + break; + case AV_PIX_FMT_YUV444P10: +#if CONFIG_AV1_VULKAN_HWACCEL + *fmtp++ = AV_PIX_FMT_VULKAN; +#endif + break; + case AV_PIX_FMT_YUV444P12: +#if CONFIG_AV1_VULKAN_HWACCEL + *fmtp++ = AV_PIX_FMT_VULKAN; #endif break; case AV_PIX_FMT_GRAY8: @@ -1469,6 +1511,9 @@ const FFCodec ff_av1_decoder = { #if CONFIG_AV1_VDPAU_HWACCEL HWACCEL_VDPAU(av1), #endif +#if CONFIG_AV1_VULKAN_HWACCEL + HWACCEL_VULKAN(av1), +#endif NULL }, diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h index a7c74d07cbd35..48dfc17f72b5e 100644 --- a/libavcodec/hwaccels.h +++ b/libavcodec/hwaccels.h @@ -27,6 +27,7 @@ extern const AVHWAccel ff_av1_dxva2_hwaccel; extern const AVHWAccel ff_av1_nvdec_hwaccel; extern const AVHWAccel ff_av1_vaapi_hwaccel; extern const AVHWAccel ff_av1_vdpau_hwaccel; +extern const AVHWAccel ff_av1_vulkan_hwaccel; extern const AVHWAccel ff_h263_vaapi_hwaccel; extern const AVHWAccel ff_h263_videotoolbox_hwaccel; extern const AVHWAccel ff_h264_d3d11va_hwaccel; diff --git a/libavcodec/vulkan_av1.c b/libavcodec/vulkan_av1.c new file mode 100644 index 0000000000000..19bfd22b606a3 --- /dev/null +++ b/libavcodec/vulkan_av1.c @@ -0,0 +1,598 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "av1dec.h" + +#include "vulkan_decode.h" + +/* Maximum number of tiles specified by any defined level */ +#define MAX_TILES 256 + +const VkExtensionProperties ff_vk_dec_av1_ext = { + .extensionName = VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME, + .specVersion = VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION, +}; + +typedef struct AV1VulkanDecodePicture { + FFVulkanDecodePicture vp; + + /* Workaround for a spec issue. + *Can be removed once no longer needed, and threading can be enabled. */ + FFVulkanDecodeContext *dec; + + StdVideoAV1MESATile tiles[MAX_TILES]; + StdVideoAV1MESATileList tile_list; + const uint32_t *tile_offsets; + + /* Current picture */ + VkVideoDecodeAV1DpbSlotInfoMESA vkav1_ref; + StdVideoAV1MESAFrameHeader av1_frame_header; + VkVideoDecodeAV1PictureInfoMESA av1_pic_info; + + /* Picture refs */ + const AV1Frame *ref_src [AV1_NUM_REF_FRAMES]; + VkVideoDecodeAV1DpbSlotInfoMESA vkav1_refs[AV1_NUM_REF_FRAMES]; + + uint8_t frame_id_set; + uint8_t frame_id; +} AV1VulkanDecodePicture; + +static int vk_av1_fill_pict(AVCodecContext *avctx, const AV1Frame **ref_src, + VkVideoReferenceSlotInfoKHR *ref_slot, /* Main structure */ + VkVideoPictureResourceInfoKHR *ref, /* Goes in ^ */ + VkVideoDecodeAV1DpbSlotInfoMESA *vkav1_ref, /* Goes in ^ */ + const AV1Frame *pic, int is_current, int has_grain, + int dpb_slot_index) +{ + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = (FFVulkanDecodeShared *)dec->shared_ref->data; + AV1VulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vkpic = &hp->vp; + + int err = ff_vk_decode_prepare_frame(ctx, pic->f, vkpic, is_current, + has_grain || ctx->dedicated_dpb); + if (err < 0) + return err; + + *vkav1_ref = (VkVideoDecodeAV1DpbSlotInfoMESA) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_DPB_SLOT_INFO_MESA, + .frameIdx = hp->frame_id, + }; + + for (unsigned i = 0; i < 7; i++) { + const int idx = pic->raw_frame_header->ref_frame_idx[i]; + vkav1_ref->ref_order_hint[i] = pic->raw_frame_header->ref_order_hint[idx]; + } + + vkav1_ref->disable_frame_end_update_cdf = pic->raw_frame_header->disable_frame_end_update_cdf; + + *ref = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .codedOffset = (VkOffset2D){ 0, 0 }, + .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height }, + .baseArrayLayer = ((has_grain || ctx->dedicated_dpb) && ctx->layered_dpb) ? + dpb_slot_index : 0, + .imageViewBinding = vkpic->img_view_ref, + }; + + *ref_slot = (VkVideoReferenceSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR, + .pNext = vkav1_ref, + .slotIndex = dpb_slot_index, + .pPictureResource = ref, + }; + + if (ref_src) + *ref_src = pic; + + return 0; +} + +static int vk_av1_create_params(AVCodecContext *avctx, AVBufferRef **buf) +{ + VkResult ret; + + const AV1DecContext *s = avctx->priv_data; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = (FFVulkanDecodeShared *)dec->shared_ref->data; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + const AV1RawSequenceHeader *seq = s->raw_seq; + + StdVideoAV1MESASequenceHeader av1_sequence_header; + VkVideoDecodeAV1SessionParametersAddInfoMESA av1_params_info; + VkVideoDecodeAV1SessionParametersCreateInfoMESA av1_params; + VkVideoSessionParametersCreateInfoKHR session_params_create; + + AVBufferRef *tmp; + VkVideoSessionParametersKHR *par = av_malloc(sizeof(*par)); + if (!par) + return AVERROR(ENOMEM); + + av1_sequence_header = (StdVideoAV1MESASequenceHeader) { + .flags = (StdVideoAV1MESASequenceHeaderFlags) { + .still_picture = seq->still_picture, + .reduced_still_picture_header = seq->reduced_still_picture_header, + .use_128x128_superblock = seq->use_128x128_superblock, + .enable_filter_intra = seq->enable_filter_intra, + .enable_intra_edge_filter = seq->enable_intra_edge_filter, + .enable_interintra_compound = seq->enable_interintra_compound, + .enable_masked_compound = seq->enable_masked_compound, + .enable_warped_motion = seq->enable_warped_motion, + .enable_dual_filter = seq->enable_dual_filter, + .enable_order_hint = seq->enable_order_hint, + .enable_jnt_comp = seq->enable_jnt_comp, + .enable_ref_frame_mvs = seq->enable_ref_frame_mvs, + .frame_id_numbers_present_flag = seq->frame_id_numbers_present_flag, + .enable_superres = seq->enable_superres, + .enable_cdef = seq->enable_cdef, + .enable_restoration = seq->enable_restoration, + .film_grain_params_present = seq->film_grain_params_present, + .timing_info_present_flag = seq->timing_info_present_flag, + .initial_display_delay_present_flag = seq->initial_display_delay_present_flag, + }, + .seq_profile = seq->seq_profile, + .frame_width_bits_minus_1 = seq->frame_width_bits_minus_1, + .frame_height_bits_minus_1 = seq->frame_height_bits_minus_1, + .max_frame_width_minus_1 = seq->max_frame_width_minus_1, + .max_frame_height_minus_1 = seq->max_frame_height_minus_1, + .delta_frame_id_length_minus_2 = seq->delta_frame_id_length_minus_2, + .additional_frame_id_length_minus_1 = seq->additional_frame_id_length_minus_1, + .order_hint_bits_minus_1 = seq->order_hint_bits_minus_1, + .timing_info = (StdVideoAV1MESATimingInfo) { + .flags = (StdVideoAV1MESATimingInfoFlags) { + .equal_picture_interval = seq->timing_info.equal_picture_interval, + }, + .num_units_in_display_tick = seq->timing_info.num_units_in_display_tick, + .time_scale = seq->timing_info.time_scale, + .num_ticks_per_picture_minus_1 = seq->timing_info.num_ticks_per_picture_minus_1, + }, + .color_config = (StdVideoAV1MESAColorConfig) { + .flags = (StdVideoAV1MESAColorConfigFlags) { + .mono_chrome = seq->color_config.mono_chrome, + .color_range = seq->color_config.color_range, + .separate_uv_delta_q = seq->color_config.separate_uv_delta_q, + }, + .bit_depth = seq->color_config.twelve_bit ? 12 : + seq->color_config.high_bitdepth ? 10 : 8, + .subsampling_x = seq->color_config.subsampling_x, + .subsampling_y = seq->color_config.subsampling_y, + }, + }; + + av1_params_info = (VkVideoDecodeAV1SessionParametersAddInfoMESA) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_SESSION_PARAMETERS_ADD_INFO_MESA, + .sequence_header = &av1_sequence_header, + }; + av1_params = (VkVideoDecodeAV1SessionParametersCreateInfoMESA) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_SESSION_PARAMETERS_CREATE_INFO_MESA, + .pParametersAddInfo = &av1_params_info, + }; + session_params_create = (VkVideoSessionParametersCreateInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pNext = &av1_params, + .videoSession = ctx->common.session, + .videoSessionParametersTemplate = NULL, + }; + + /* Create session parameters */ + ret = vk->CreateVideoSessionParametersKHR(ctx->s.hwctx->act_dev, &session_params_create, + ctx->s.hwctx->alloc, par); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to create Vulkan video session parameters: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + tmp = av_buffer_create((uint8_t *)par, sizeof(*par), ff_vk_decode_free_params, + ctx, 0); + if (!tmp) { + ff_vk_decode_free_params(ctx, (uint8_t *)par); + return AVERROR(ENOMEM); + } + + av_log(avctx, AV_LOG_DEBUG, "Created frame parameters\n"); + + *buf = tmp; + + return 0; +} + +static int vk_av1_start_frame(AVCodecContext *avctx, + av_unused const uint8_t *buffer, + av_unused uint32_t size) +{ + int err; + int ref_count = 0; + AV1DecContext *s = avctx->priv_data; + const AV1Frame *pic = &s->cur_frame; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + AV1VulkanDecodePicture *ap = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &ap->vp; + + const AV1RawFrameHeader *frame_header = s->raw_frame_header; + const AV1RawFilmGrainParams *film_grain = &s->cur_frame.film_grain; + const int apply_grain = !(avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN) && + film_grain->apply_grain; + + if (!dec->session_params || dec->params_changed) { + av_buffer_unref(&dec->session_params); + err = vk_av1_create_params(avctx, &dec->session_params); + if (err < 0) + return err; + dec->params_changed = 0; + } + + if (!ap->frame_id_set) { + unsigned slot_idx = 0; + for (unsigned i = 0; i < 32; i++) { + if (!(dec->frame_id_alloc_mask & (1 << i))) { + slot_idx = i; + break; + } + } + ap->frame_id = slot_idx; + ap->frame_id_set = 1; + dec->frame_id_alloc_mask |= (1 << slot_idx); + } + + /* Fill in references */ + for (int i = 0; i < AV1_NUM_REF_FRAMES; i++) { + const AV1Frame *ref_frame = &s->ref[i]; + if (s->ref[i].f->pict_type == AV_PICTURE_TYPE_NONE) + continue; + + err = vk_av1_fill_pict(avctx, &ap->ref_src[i], &vp->ref_slots[i], + &vp->refs[i], &ap->vkav1_refs[i], + ref_frame, 0, 0, i); + if (err < 0) + return err; + + ref_count++; + } + + err = vk_av1_fill_pict(avctx, NULL, &vp->ref_slot, &vp->ref, + &ap->vkav1_ref, + pic, 1, apply_grain, 8); + if (err < 0) + return err; + + ap->tile_list.nb_tiles = 0; + ap->tile_list.tile_list = ap->tiles; + + ap->av1_pic_info = (VkVideoDecodeAV1PictureInfoMESA) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_PICTURE_INFO_MESA, + .frame_header = &ap->av1_frame_header, + .tile_list = &ap->tile_list, + }; + + vp->decode_info = (VkVideoDecodeInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR, + .pNext = &ap->av1_pic_info, + .flags = 0x0, + .pSetupReferenceSlot = &vp->ref_slot, + .referenceSlotCount = ref_count, + .pReferenceSlots = vp->ref_slots, + .dstPictureResource = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .codedOffset = (VkOffset2D){ 0, 0 }, + .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height }, + .baseArrayLayer = 0, + .imageViewBinding = vp->img_view_out, + }, + }; + + /* Setup frame header */ + ap->av1_frame_header = (StdVideoAV1MESAFrameHeader) { + .flags = (StdVideoAV1MESAFrameHeaderFlags) { + .error_resilient_mode = frame_header->error_resilient_mode, + .disable_cdf_update = frame_header->disable_cdf_update, + .use_superres = frame_header->use_superres, + .render_and_frame_size_different = frame_header->render_and_frame_size_different, + .allow_screen_content_tools = frame_header->allow_screen_content_tools, + .is_filter_switchable = frame_header->is_filter_switchable, + .force_integer_mv = frame_header->force_integer_mv, + .frame_size_override_flag = frame_header->frame_size_override_flag, + .buffer_removal_time_present_flag = frame_header->buffer_removal_time_present_flag, + .allow_intrabc = frame_header->allow_intrabc, + .frame_refs_short_signaling = frame_header->frame_refs_short_signaling, + .allow_high_precision_mv = frame_header->allow_high_precision_mv, + .is_motion_mode_switchable = frame_header->is_motion_mode_switchable, + .use_ref_frame_mvs = frame_header->use_ref_frame_mvs, + .disable_frame_end_update_cdf = frame_header->disable_frame_end_update_cdf, + .allow_warped_motion = frame_header->allow_warped_motion, + .reduced_tx_set = frame_header->reduced_tx_set, + .reference_select = frame_header->reference_select, + .skip_mode_present = frame_header->skip_mode_present, + .delta_q_present = frame_header->delta_q_present, + }, + .frame_to_show_map_idx = frame_header->frame_to_show_map_idx, + .frame_presentation_time = frame_header->frame_presentation_time, + .display_frame_id = frame_header->display_frame_id, + .frame_type = frame_header->frame_type, + .current_frame_id = frame_header->current_frame_id, + .order_hint = frame_header->order_hint, + .primary_ref_frame = frame_header->primary_ref_frame, + .frame_width_minus_1 = frame_header->frame_width_minus_1, + .frame_height_minus_1 = frame_header->frame_height_minus_1, + .coded_denom = frame_header->coded_denom, + .render_width_minus_1 = frame_header->render_width_minus_1, + .render_height_minus_1 = frame_header->render_height_minus_1, + .refresh_frame_flags = frame_header->refresh_frame_flags, + .interpolation_filter = frame_header->interpolation_filter, + .tx_mode = frame_header->tx_mode, + .tiling = (StdVideoAV1MESATileInfo) { + .flags = (StdVideoAV1MESATileInfoFlags) { + .uniform_tile_spacing_flag = frame_header->uniform_tile_spacing_flag, + }, + .tile_cols = frame_header->tile_cols, + .tile_rows = frame_header->tile_rows, + .context_update_tile_id = frame_header->context_update_tile_id, + .tile_size_bytes_minus1 = frame_header->tile_size_bytes_minus1, + }, + .quantization = (StdVideoAV1MESAQuantization) { + .flags.using_qmatrix = frame_header->using_qmatrix, + .base_q_idx = frame_header->base_q_idx, + .delta_q_y_dc = frame_header->delta_q_y_dc, + .diff_uv_delta = frame_header->diff_uv_delta, + .delta_q_u_dc = frame_header->delta_q_u_dc, + .delta_q_u_ac = frame_header->delta_q_u_ac, + .delta_q_v_dc = frame_header->delta_q_v_dc, + .delta_q_v_ac = frame_header->delta_q_v_ac, + .qm_y = frame_header->qm_y, + .qm_u = frame_header->qm_u, + .qm_v = frame_header->qm_v, + }, + .delta_q = (StdVideoAV1MESADeltaQ) { + .flags = (StdVideoAV1MESADeltaQFlags) { + .delta_lf_present = frame_header->delta_lf_present, + .delta_lf_multi = frame_header->delta_lf_multi, + }, + .delta_q_res = frame_header->delta_q_res, + .delta_lf_res = frame_header->delta_lf_res, + }, + .loop_filter = (StdVideoAV1MESALoopFilter) { + .flags = (StdVideoAV1MESALoopFilterFlags) { + .delta_enabled = frame_header->loop_filter_delta_enabled, + .delta_update = frame_header->loop_filter_delta_update, + }, + .level = { + frame_header->loop_filter_level[0], frame_header->loop_filter_level[1], + frame_header->loop_filter_level[2], frame_header->loop_filter_level[3], + }, + .sharpness = frame_header->loop_filter_sharpness, + .mode_deltas = { + frame_header->loop_filter_mode_deltas[0], frame_header->loop_filter_mode_deltas[1], + }, + }, + .cdef = (StdVideoAV1MESACDEF) { + .damping_minus_3 = frame_header->cdef_damping_minus_3, + .bits = frame_header->cdef_bits, + }, + .lr = (StdVideoAV1MESALoopRestoration) { + .lr_unit_shift = frame_header->lr_unit_shift, + .lr_uv_shift = frame_header->lr_uv_shift, + .lr_type = { frame_header->lr_type[0], frame_header->lr_type[1], frame_header->lr_type[2] }, + }, + .segmentation = (StdVideoAV1MESASegmentation) { + .flags = (StdVideoAV1MESASegmentationFlags) { + .enabled = frame_header->segmentation_enabled, + .update_map = frame_header->segmentation_update_map, + .temporal_update = frame_header->segmentation_temporal_update, + .update_data = frame_header->segmentation_update_data, + }, + }, + .film_grain = (StdVideoAV1MESAFilmGrainParameters) { + .flags = (StdVideoAV1MESAFilmGrainFlags) { + .apply_grain = apply_grain, + .chroma_scaling_from_luma = film_grain->chroma_scaling_from_luma, + .overlap_flag = film_grain->overlap_flag, + .clip_to_restricted_range = film_grain->clip_to_restricted_range, + }, + .grain_scaling_minus_8 = film_grain->grain_scaling_minus_8, + .ar_coeff_lag = film_grain->ar_coeff_lag, + .ar_coeff_shift_minus_6 = film_grain->ar_coeff_shift_minus_6, + .grain_scale_shift = film_grain->grain_scale_shift, + .grain_seed = film_grain->grain_seed, + .num_y_points = film_grain->num_y_points, + .num_cb_points = film_grain->num_cb_points, + .num_cr_points = film_grain->num_cr_points, + .cb_mult = film_grain->cb_mult, + .cb_luma_mult = film_grain->cb_luma_mult, + .cb_offset = film_grain->cb_offset, + .cr_mult = film_grain->cr_mult, + .cr_luma_mult = film_grain->cr_luma_mult, + .cr_offset = film_grain->cr_offset, + }, + }; + + for (int i = 0; i < 64; i++) { + ap->av1_frame_header.tiling.width_in_sbs_minus_1[i] = frame_header->width_in_sbs_minus_1[i]; + ap->av1_frame_header.tiling.height_in_sbs_minus_1[i] = frame_header->height_in_sbs_minus_1[i]; + ap->av1_frame_header.tiling.tile_start_col_sb[i] = frame_header->tile_start_col_sb[i]; + ap->av1_frame_header.tiling.tile_start_row_sb[i] = frame_header->tile_start_row_sb[i]; + } + + for (int i = 0; i < 8; i++) { + ap->av1_frame_header.segmentation.feature_enabled_bits[i] = 0; + for (int j = 0; j < 8; j++) { + ap->av1_frame_header.segmentation.feature_enabled_bits[i] |= (frame_header->feature_enabled[i][j] << j); + ap->av1_frame_header.segmentation.feature_data[i][j] = frame_header->feature_value[i][j]; + } + + ap->av1_frame_header.loop_filter.ref_deltas[i] = frame_header->loop_filter_ref_deltas[i]; + + ap->av1_frame_header.cdef.y_pri_strength[i] = frame_header->cdef_y_pri_strength[i]; + ap->av1_frame_header.cdef.y_sec_strength[i] = frame_header->cdef_y_sec_strength[i]; + ap->av1_frame_header.cdef.uv_pri_strength[i] = frame_header->cdef_uv_pri_strength[i]; + ap->av1_frame_header.cdef.uv_sec_strength[i] = frame_header->cdef_uv_sec_strength[i]; + + ap->av1_frame_header.ref_order_hint[i] = frame_header->ref_order_hint[i]; + ap->av1_frame_header.global_motion[i] = (StdVideoAV1MESAGlobalMotion) { + .flags = (StdVideoAV1MESAGlobalMotionFlags) { + .gm_invalid = s->cur_frame.gm_invalid[i], + }, + .gm_type = s->cur_frame.gm_type[i], + .gm_params = { + frame_header->gm_params[i][0], frame_header->gm_params[i][1], + frame_header->gm_params[i][2], frame_header->gm_params[i][3], + frame_header->gm_params[i][4], frame_header->gm_params[i][5], + }, + }; + } + + for (int i = 0; i < 7; i++) { + ap->av1_frame_header.ref_frame_idx[i] = frame_header->ref_frame_idx[i]; + ap->av1_frame_header.delta_frame_id_minus1[i] = frame_header->delta_frame_id_minus1[i]; + } + + ap->av1_pic_info.skip_mode_frame_idx[0] = s->cur_frame.skip_mode_frame_idx[0]; + ap->av1_pic_info.skip_mode_frame_idx[1] = s->cur_frame.skip_mode_frame_idx[1]; + + if (apply_grain) { + for (int i = 0; i < 14; i++) { + ap->av1_frame_header.film_grain.point_y_value[i] = film_grain->point_y_value[i]; + ap->av1_frame_header.film_grain.point_y_scaling[i] = film_grain->point_y_scaling[i]; + } + + for (int i = 0; i < 10; i++) { + ap->av1_frame_header.film_grain.point_cb_value[i] = film_grain->point_cb_value[i]; + ap->av1_frame_header.film_grain.point_cb_scaling[i] = film_grain->point_cb_scaling[i]; + ap->av1_frame_header.film_grain.point_cr_value[i] = film_grain->point_cr_value[i]; + ap->av1_frame_header.film_grain.point_cr_scaling[i] = film_grain->point_cr_scaling[i]; + } + + for (int i = 0; i < 24; i++) { + ap->av1_frame_header.film_grain.ar_coeffs_y_plus_128[i] = film_grain->ar_coeffs_y_plus_128[i]; + ap->av1_frame_header.film_grain.ar_coeffs_cb_plus_128[i] = film_grain->ar_coeffs_cb_plus_128[i]; + ap->av1_frame_header.film_grain.ar_coeffs_cr_plus_128[i] = film_grain->ar_coeffs_cr_plus_128[i]; + } + + ap->av1_frame_header.film_grain.ar_coeffs_cb_plus_128[24] = film_grain->ar_coeffs_cb_plus_128[24]; + ap->av1_frame_header.film_grain.ar_coeffs_cr_plus_128[24] = film_grain->ar_coeffs_cr_plus_128[24]; + } + + av_log(avctx, AV_LOG_DEBUG, "Created frame parameters"); + + /* Workaround for a spec issue. */ + ap->dec = dec; + + return 0; +} + +static int vk_av1_decode_slice(AVCodecContext *avctx, + const uint8_t *data, + uint32_t size) +{ + int err; + const AV1DecContext *s = avctx->priv_data; + AV1VulkanDecodePicture *ap = s->cur_frame.hwaccel_picture_private; + FFVulkanDecodePicture *vp = &ap->vp; + + for (int i = s->tg_start; i <= s->tg_end; i++) { + ap->tiles[ap->tile_list.nb_tiles] = (StdVideoAV1MESATile) { + .size = s->tile_group_info[i].tile_size, + .offset = s->tile_group_info[i].tile_offset, + .row = s->tile_group_info[i].tile_row, + .column = s->tile_group_info[i].tile_column, + .tg_start = s->tg_start, + .tg_end = s->tg_end, + }; + + err = ff_vk_decode_add_slice(avctx, vp, data, size, 0, + &ap->tile_list.nb_tiles, + &ap->tile_offsets); + if (err < 0) + return err; + +// ap->tiles[ap->tile_list.nb_tiles - 1].offset = ap->tile_offsets[ap->tile_list.nb_tiles - 1]; + } + + return 0; +} + +static int vk_av1_end_frame(AVCodecContext *avctx) +{ + const AV1DecContext *s = avctx->priv_data; + const AV1Frame *pic = &s->cur_frame; + AV1VulkanDecodePicture *ap = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &ap->vp; + FFVulkanDecodePicture *rvp[AV1_NUM_REF_FRAMES] = { 0 }; + AVFrame *rav[AV1_NUM_REF_FRAMES] = { 0 }; + + for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) { + const AV1Frame *rp = ap->ref_src[i]; + AV1VulkanDecodePicture *rhp = rp->hwaccel_picture_private; + + rvp[i] = &rhp->vp; + rav[i] = ap->ref_src[i]->f; + } + + av_log(avctx, AV_LOG_VERBOSE, "Decoding frame, %lu bytes, %i tiles\n", + vp->slices_size, ap->tile_list.nb_tiles); + + return ff_vk_decode_frame(avctx, pic->f, vp, rav, rvp); +} + +static void vk_av1_free_frame_priv(void *_hwctx, uint8_t *data) +{ + AVHWDeviceContext *hwctx = _hwctx; + AV1VulkanDecodePicture *ap = (AV1VulkanDecodePicture *)data; + + /* Workaround for a spec issue. */ + if (ap->frame_id_set) + ap->dec->frame_id_alloc_mask &= ~(1 << ap->frame_id); + + /* Free frame resources, this also destroys the session parameters. */ + ff_vk_decode_free_frame(hwctx, &ap->vp); + + /* Free frame context */ + av_free(ap); +} + +const AVHWAccel ff_av1_vulkan_hwaccel = { + .name = "av1_vulkan", + .type = AVMEDIA_TYPE_VIDEO, + .id = AV_CODEC_ID_AV1, + .pix_fmt = AV_PIX_FMT_VULKAN, + .start_frame = &vk_av1_start_frame, + .decode_slice = &vk_av1_decode_slice, + .end_frame = &vk_av1_end_frame, + .free_frame_priv = &vk_av1_free_frame_priv, + .frame_priv_data_size = sizeof(AV1VulkanDecodePicture), + .init = &ff_vk_decode_init, + .update_thread_context = &ff_vk_update_thread_context, + .decode_params = &ff_vk_params_changed, + .flush = &ff_vk_decode_flush, + .uninit = &ff_vk_decode_uninit, + .frame_params = &ff_vk_frame_params, + .priv_data_size = sizeof(FFVulkanDecodeContext), + + /* NOTE: Threading is intentionally disabled here. Due to the design of Vulkan, + * where frames are opaque to users, and mostly opaque for driver developers, + * there's an issue with current hardware accelerator implementations of AV1, + * where they require an internal index. With regular hwaccel APIs, this index + * is given to users as an opaque handle directly. With Vulkan, due to increased + * flexibility, this index cannot be present anywhere. + * The current implementation tracks the index for the driver and submits it + * as necessary information. Due to needing to modify the decoding context, + * which is not thread-safe, on frame free, threading is disabled. + * In the future, once this is fixed in the spec, the workarounds may be removed + * and threading enabled. */ + .caps_internal = HWACCEL_CAP_ASYNC_SAFE, +}; diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c index 9a82d6ad50130..30c4bbf0ce1f7 100644 --- a/libavcodec/vulkan_decode.c +++ b/libavcodec/vulkan_decode.c @@ -26,6 +26,9 @@ extern const VkExtensionProperties ff_vk_dec_h264_ext; #if CONFIG_HEVC_VULKAN_HWACCEL extern const VkExtensionProperties ff_vk_dec_hevc_ext; #endif +#if CONFIG_AV1_VULKAN_HWACCEL +extern const VkExtensionProperties ff_vk_dec_av1_ext; +#endif static const VkExtensionProperties *dec_ext[] = { #if CONFIG_H264_VULKAN_HWACCEL @@ -34,6 +37,9 @@ static const VkExtensionProperties *dec_ext[] = { #if CONFIG_HEVC_VULKAN_HWACCEL [AV_CODEC_ID_HEVC] = &ff_vk_dec_hevc_ext, #endif +#if CONFIG_AV1_VULKAN_HWACCEL + [AV_CODEC_ID_AV1] = &ff_vk_dec_av1_ext, +#endif }; int ff_vk_update_thread_context(AVCodecContext *dst, const AVCodecContext *src) @@ -622,6 +628,7 @@ static int vulkan_decode_check_init(AVCodecContext *avctx, AVBufferRef *frames_r VkVideoDecodeCapabilitiesKHR *dec_caps = &ctx->dec_caps; VkVideoDecodeH264ProfileInfoKHR *h264_profile = &ctx->h264_profile; VkVideoDecodeH264ProfileInfoKHR *h265_profile = &ctx->h265_profile; + VkVideoDecodeAV1ProfileInfoMESA *av1_profile = &ctx->av1_profile; VkVideoDecodeUsageInfoKHR *usage = &ctx->usage; VkVideoProfileInfoKHR *profile = &ctx->profile; VkVideoProfileListInfoKHR *profile_list = &ctx->profile_list; @@ -636,6 +643,9 @@ static int vulkan_decode_check_init(AVCodecContext *avctx, AVBufferRef *frames_r VkVideoDecodeH265CapabilitiesKHR h265_caps = { .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_CAPABILITIES_KHR, }; + VkVideoDecodeAV1CapabilitiesMESA av1_caps = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_CAPABILITIES_MESA, + }; VkVideoFormatPropertiesKHR *ret_info; uint32_t nb_out_fmts = 0; @@ -687,6 +697,12 @@ static int vulkan_decode_check_init(AVCodecContext *avctx, AVBufferRef *frames_r usage->pNext = h265_profile; h265_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_INFO_KHR; h265_profile->stdProfileIdc = cur_profile; + } else if (avctx->codec_id == AV_CODEC_ID_AV1) { + base_profile = STD_VIDEO_AV1_MESA_PROFILE_MAIN; + dec_caps->pNext = &av1_caps; + usage->pNext = av1_profile; + av1_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_PROFILE_INFO_MESA; + av1_profile->stdProfileIdc = cur_profile; } usage->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_USAGE_INFO_KHR; @@ -741,6 +757,7 @@ static int vulkan_decode_check_init(AVCodecContext *avctx, AVBufferRef *frames_r max_level = avctx->codec_id == AV_CODEC_ID_H264 ? h264_caps.maxLevelIdc : avctx->codec_id == AV_CODEC_ID_H265 ? h265_caps.maxLevelIdc : + avctx->codec_id == AV_CODEC_ID_AV1 ? av1_caps.maxLevelIdc : 0; if (ctx) { @@ -810,6 +827,11 @@ static int vulkan_decode_check_init(AVCodecContext *avctx, AVBufferRef *frames_r "VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR set " "but VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR is unset!\n"); return AVERROR_EXTERNAL; + } else if (!(dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR) && + avctx->codec_id == AV_CODEC_ID_AV1) { + av_log(avctx, AV_LOG_ERROR, "Cannot initialize Vulkan decoding session, buggy driver: " + "codec is AV1, but VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR isn't set!\n"); + return AVERROR_EXTERNAL; } /* TODO: make dedicated_dpb tunable */ @@ -819,6 +841,7 @@ static int vulkan_decode_check_init(AVCodecContext *avctx, AVBufferRef *frames_r if (ctx) { ctx->dedicated_dpb = dedicated_dpb; ctx->layered_dpb = layered_dpb; + ctx->external_fg = av1_caps.flags & VK_VIDEO_DECODE_AV1_CAPABILITY_EXTERNAL_FILM_GRAIN_MESA; ctx->init = 1; } @@ -983,10 +1006,14 @@ int ff_vk_decode_init(AVCodecContext *avctx) VkVideoDecodeH265SessionParametersCreateInfoKHR h265_params = { .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_CREATE_INFO_KHR, }; + VkVideoDecodeAV1SessionParametersCreateInfoMESA av1_params = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_SESSION_PARAMETERS_CREATE_INFO_MESA, + }; VkVideoSessionParametersCreateInfoKHR session_params_create = { .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR, .pNext = avctx->codec_id == AV_CODEC_ID_H264 ? (void *)&h264_params : avctx->codec_id == AV_CODEC_ID_HEVC ? (void *)&h265_params : + avctx->codec_id == AV_CODEC_ID_AV1 ? (void *)&av1_params : NULL, }; VkVideoSessionCreateInfoKHR session_create = { @@ -1078,7 +1105,7 @@ int ff_vk_decode_init(AVCodecContext *avctx) } /* If doing an out-of-place decoding, create a DPB pool */ - if (ctx->dedicated_dpb) { + if (ctx->dedicated_dpb || avctx->codec_id == AV_CODEC_ID_AV1) { AVHWFramesContext *dpb_frames; AVVulkanFramesContext *dpb_hwfc; diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h index 406fdc6792223..b6472311e43eb 100644 --- a/libavcodec/vulkan_decode.h +++ b/libavcodec/vulkan_decode.h @@ -32,6 +32,7 @@ typedef struct FFVulkanDecodeShared { int dedicated_dpb; /* Oddity #1 - separate DPB images */ int layered_dpb; /* Madness #1 - layered DPB images */ + int external_fg; /* Oddity #2 - hardware can't apply film grain */ AVBufferRef *dpb_hwfc_ref; /* Only used for dedicated_dpb */ @@ -63,6 +64,7 @@ typedef struct FFVulkanDecodeContext { /* Thread-synchronized data below */ AVBufferRef *session_params; int params_changed; + uint32_t frame_id_alloc_mask; /* For AV1 only */ } FFVulkanDecodeContext; typedef struct FFVulkanDecodePicture { diff --git a/libavcodec/vulkan_video.c b/libavcodec/vulkan_video.c index e4624864ab709..f33bb33a21676 100644 --- a/libavcodec/vulkan_video.c +++ b/libavcodec/vulkan_video.c @@ -43,6 +43,12 @@ const FFVkCodecMap ff_vk_codec_map[AV_CODEC_ID_FIRST_AUDIO] = { FF_VK_EXT_VIDEO_DECODE_H265, VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR }, + [AV_CODEC_ID_AV1] = { + 0, + 0, + FF_VK_EXT_VIDEO_DECODE_AV1, + 0x01000000 /* TODO fix this */ + }, }; #define ASPECT_2PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT) diff --git a/libavcodec/vulkan_video.h b/libavcodec/vulkan_video.h index c10fcdcca1ba7..35c0f90d28aa7 100644 --- a/libavcodec/vulkan_video.h +++ b/libavcodec/vulkan_video.h @@ -23,6 +23,8 @@ #include "vulkan.h" #include +#include "vulkan_video_codec_av1std.h" +#include "vulkan_video_codec_av1std_decode.h" #define CODEC_VER_MAJ(ver) (ver >> 22) #define CODEC_VER_MIN(ver) ((ver >> 12) & ((1 << 10) - 1)) diff --git a/libavcodec/vulkan_video_codec_av1std.h b/libavcodec/vulkan_video_codec_av1std.h new file mode 100644 index 0000000000000..ee64b15cb7a7d --- /dev/null +++ b/libavcodec/vulkan_video_codec_av1std.h @@ -0,0 +1,394 @@ +#ifndef VULKAN_VIDEO_CODEC_AV1STD_H_ +#define VULKAN_VIDEO_CODEC_AV1STD_H_ 1 + + +/* +** Copyright 2015-2022 The Khronos Group Inc. +** +** SPDX-License-Identifier: Apache-2.0 +*/ + +/* +** This header is NOT YET generated from the Khronos Vulkan XML API Registry. +** +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#define vulkan_video_codec_av1std 1 + +#define VK_MAKE_VIDEO_STD_VERSION(major, minor, patch) \ + ((((uint32_t)(major)) << 22) | (((uint32_t)(minor)) << 12) | ((uint32_t)(patch))) +#define VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_API_VERSION_0_1_0 VK_MAKE_VIDEO_STD_VERSION(0, 1, 0) +#define VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_API_VERSION_0_1_0 +#define VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME "VK_STD_vulkan_video_codec_av1_decode" + +typedef enum StdVideoAV1MESAProfile { + STD_VIDEO_AV1_MESA_PROFILE_MAIN = 0, + STD_VIDEO_AV1_MESA_PROFILE_HIGH = 1, + STD_VIDEO_AV1_MESA_PROFILE_PROFESSIONAL = 2, +} StdVideoAV1MESAProfile; + +typedef enum StdVideoAV1MESALevel { + STD_VIDEO_AV1_MESA_LEVEL_2_0 = 0, + STD_VIDEO_AV1_MESA_LEVEL_2_1 = 1, + STD_VIDEO_AV1_MESA_LEVEL_2_2 = 2, + STD_VIDEO_AV1_MESA_LEVEL_2_3 = 3, + STD_VIDEO_AV1_MESA_LEVEL_3_0 = 4, + STD_VIDEO_AV1_MESA_LEVEL_3_1 = 5, + STD_VIDEO_AV1_MESA_LEVEL_3_2 = 6, + STD_VIDEO_AV1_MESA_LEVEL_3_3 = 7, + STD_VIDEO_AV1_MESA_LEVEL_4_0 = 8, + STD_VIDEO_AV1_MESA_LEVEL_4_1 = 9, + STD_VIDEO_AV1_MESA_LEVEL_4_2 = 10, + STD_VIDEO_AV1_MESA_LEVEL_4_3 = 11, + STD_VIDEO_AV1_MESA_LEVEL_5_0 = 12, + STD_VIDEO_AV1_MESA_LEVEL_5_1 = 13, + STD_VIDEO_AV1_MESA_LEVEL_5_2 = 14, + STD_VIDEO_AV1_MESA_LEVEL_5_3 = 15, + STD_VIDEO_AV1_MESA_LEVEL_6_0 = 16, + STD_VIDEO_AV1_MESA_LEVEL_6_1 = 17, + STD_VIDEO_AV1_MESA_LEVEL_6_2 = 18, + STD_VIDEO_AV1_MESA_LEVEL_6_3 = 19, + STD_VIDEO_AV1_MESA_LEVEL_7_0 = 20, + STD_VIDEO_AV1_MESA_LEVEL_7_1 = 21, + STD_VIDEO_AV1_MESA_LEVEL_7_2 = 22, + STD_VIDEO_AV1_MESA_LEVEL_7_3 = 23, + STD_VIDEO_AV1_MESA_LEVEL_MAX = 31, +} StdVideoAV1MESALevel; + +typedef struct StdVideoAV1MESAFilmGrainFlags { + uint8_t apply_grain; + uint8_t chroma_scaling_from_luma; + uint8_t overlap_flag; + uint8_t clip_to_restricted_range; +} StdVideoAV1MESAFilmGrainFlags; + +typedef struct StdVideoAV1MESAFilmGrainParameters { + StdVideoAV1MESAFilmGrainFlags flags; + uint32_t grain_scaling_minus_8; + uint32_t ar_coeff_lag; + uint32_t ar_coeff_shift_minus_6; + uint32_t grain_scale_shift; + + uint16_t grain_seed; + uint8_t num_y_points; + uint8_t point_y_value[14]; + uint8_t point_y_scaling[14]; + + uint8_t num_cb_points; + uint8_t point_cb_value[10]; + uint8_t point_cb_scaling[10]; + + uint8_t num_cr_points; + uint8_t point_cr_value[10]; + uint8_t point_cr_scaling[10]; + + int8_t ar_coeffs_y_plus_128[24]; + int8_t ar_coeffs_cb_plus_128[25]; + int8_t ar_coeffs_cr_plus_128[25]; + uint8_t cb_mult; + uint8_t cb_luma_mult; + uint16_t cb_offset; + uint8_t cr_mult; + uint8_t cr_luma_mult; + uint16_t cr_offset; +} StdVideoAV1MESAFilmGrainParameters; + +typedef struct StdVideoAV1MESAGlobalMotionFlags { + uint8_t gm_invalid; +} StdVideoAV1MESAGlobalMotionFlags; + +typedef struct StdVideoAV1MESAGlobalMotion { + StdVideoAV1MESAGlobalMotionFlags flags; + uint8_t gm_type; + uint32_t gm_params[6]; +} StdVideoAV1MESAGlobalMotion; + +typedef struct StdVideoAV1MESALoopRestoration { + uint8_t lr_type[3]; + uint8_t lr_unit_shift; + uint8_t lr_uv_shift; +} StdVideoAV1MESALoopRestoration; + +typedef struct StdVideoAV1MESATileInfoFlags { + uint8_t uniform_tile_spacing_flag; +} StdVideoAV1MESATileInfoFlags; + +typedef struct StdVideoAV1MESATileInfo { + StdVideoAV1MESATileInfoFlags flags; + uint8_t tile_cols; + uint8_t tile_rows; + uint8_t tile_start_col_sb[64]; + uint8_t tile_start_row_sb[64]; + uint8_t width_in_sbs_minus_1[64]; + uint8_t height_in_sbs_minus_1[64]; + uint16_t context_update_tile_id; + uint8_t tile_size_bytes_minus1; +} StdVideoAV1MESATileInfo; + +typedef struct StdVideoAV1MESAQuantizationFlags { + uint8_t using_qmatrix; +} StdVideoAV1MESAQuantizationFlags; + +typedef struct StdVideoAV1MESAQuantization { + StdVideoAV1MESAQuantizationFlags flags; + uint8_t base_q_idx; + int8_t delta_q_y_dc; + uint8_t diff_uv_delta; + int8_t delta_q_u_dc; + int8_t delta_q_u_ac; + int8_t delta_q_v_dc; + int8_t delta_q_v_ac; + uint8_t qm_y; + uint8_t qm_u; + uint8_t qm_v; +} StdVideoAV1MESAQuantization; + +typedef struct StdVideoAV1MESACDEF { + uint8_t damping_minus_3; + uint8_t bits; + uint8_t y_pri_strength[8]; + uint8_t y_sec_strength[8]; + uint8_t uv_pri_strength[8]; + uint8_t uv_sec_strength[8]; +} StdVideoAV1MESACDEF; + +typedef struct StdVideoAV1MESADeltaQFlags { + uint8_t delta_lf_present; + uint8_t delta_lf_multi; +} StdVideoAV1MESADeltaQFlags; + +typedef struct StdVideoAV1MESADeltaQ { + StdVideoAV1MESADeltaQFlags flags; + uint8_t delta_q_res; + uint8_t delta_lf_res; +} StdVideoAV1MESADeltaQ; + +typedef struct StdVideoAV1MESASegmentationFlags { + uint8_t enabled; + uint8_t update_map; + uint8_t temporal_update; + uint8_t update_data; +} StdVideoAV1MESASegmentationFlags; + +typedef struct StdVideoAV1MESASegmentation { + StdVideoAV1MESASegmentationFlags flags; + uint8_t feature_enabled_bits[8]; + int16_t feature_data[8][8]; +} StdVideoAV1MESASegmentation; + +typedef struct StdVideoAV1MESALoopFilterFlags { + uint8_t delta_enabled; + uint8_t delta_update; +} StdVideoAV1MESALoopFilterFlags; + +typedef struct StdVideoAV1MESALoopFilter { + StdVideoAV1MESALoopFilterFlags flags; + uint8_t level[4]; + uint8_t sharpness; + int8_t ref_deltas[8]; + int8_t mode_deltas[2]; +} StdVideoAV1MESALoopFilter; + +typedef struct StdVideoAV1MESAFrameHeaderFlags { + uint8_t error_resilient_mode; + uint8_t disable_cdf_update; + uint8_t use_superres; + uint8_t render_and_frame_size_different; + uint8_t allow_screen_content_tools; + uint8_t is_filter_switchable; + uint8_t force_integer_mv; + uint8_t frame_size_override_flag; + uint8_t buffer_removal_time_present_flag; + uint8_t allow_intrabc; + uint8_t frame_refs_short_signaling; + uint8_t allow_high_precision_mv; + uint8_t is_motion_mode_switchable; + uint8_t use_ref_frame_mvs; + uint8_t disable_frame_end_update_cdf; + uint8_t allow_warped_motion; + uint8_t reduced_tx_set; + uint8_t reference_select; + uint8_t skip_mode_present; + uint8_t delta_q_present; + uint8_t UsesLr; +} StdVideoAV1MESAFrameHeaderFlags; + +typedef struct StdVideoAV1MESAFrameHeader { + StdVideoAV1MESAFrameHeaderFlags flags; + + uint32_t frame_presentation_time; + uint32_t display_frame_id; + uint32_t current_frame_id; + uint8_t frame_to_show_map_idx; + uint8_t frame_type; + uint8_t order_hint; + uint8_t primary_ref_frame; + uint16_t frame_width_minus_1; + uint16_t frame_height_minus_1; + uint16_t render_width_minus_1; + uint16_t render_height_minus_1; + uint8_t coded_denom; + + uint8_t refresh_frame_flags; + uint8_t ref_order_hint[8]; + int8_t ref_frame_idx[7]; + uint32_t delta_frame_id_minus1[7]; + + uint8_t interpolation_filter; + uint8_t tx_mode; + + StdVideoAV1MESATileInfo tiling; + StdVideoAV1MESAQuantization quantization; + StdVideoAV1MESASegmentation segmentation; + StdVideoAV1MESADeltaQ delta_q; + StdVideoAV1MESALoopFilter loop_filter; + StdVideoAV1MESACDEF cdef; + StdVideoAV1MESALoopRestoration lr; + StdVideoAV1MESAGlobalMotion global_motion[8]; // One per ref frame + StdVideoAV1MESAFilmGrainParameters film_grain; +} StdVideoAV1MESAFrameHeader; + +typedef struct StdVideoAV1MESAScreenCoding { + uint8_t seq_force_screen_content_tools; +} StdVideoAV1MESAScreenCoding; + +typedef struct StdVideoAV1MESATimingInfoFlags { + uint8_t equal_picture_interval; +} StdVideoAV1MESATimingInfoFlags; + +typedef struct StdVideoAV1MESATimingInfo { + StdVideoAV1MESATimingInfoFlags flags; + uint32_t num_units_in_display_tick; + uint32_t time_scale; + uint32_t num_ticks_per_picture_minus_1; +} StdVideoAV1MESATimingInfo; + +typedef struct StdVideoAV1MESAColorConfigFlags { + uint8_t mono_chrome; + uint8_t color_range; + uint8_t separate_uv_delta_q; +} StdVideoAV1MESAColorConfigFlags; + +typedef struct StdVideoAV1MESAColorConfig { + StdVideoAV1MESAColorConfigFlags flags; + uint8_t bit_depth; + uint8_t subsampling_x; + uint8_t subsampling_y; +} StdVideoAV1MESAColorConfig; + +typedef struct StdVideoAV1MESASequenceHeaderFlags { + uint8_t still_picture; + uint8_t reduced_still_picture_header; + uint8_t use_128x128_superblock; + uint8_t enable_filter_intra; + uint8_t enable_intra_edge_filter; + uint8_t enable_interintra_compound; + uint8_t enable_masked_compound; + uint8_t enable_warped_motion; + uint8_t enable_dual_filter; + uint8_t enable_order_hint; + uint8_t enable_jnt_comp; + uint8_t enable_ref_frame_mvs; + uint8_t frame_id_numbers_present_flag; + uint8_t enable_superres; + uint8_t enable_cdef; + uint8_t enable_restoration; + uint8_t film_grain_params_present; + uint8_t timing_info_present_flag; + uint8_t initial_display_delay_present_flag; +} StdVideoAV1MESASequenceHeaderFlags; + +typedef struct StdVideoAV1MESASequenceHeader { + StdVideoAV1MESASequenceHeaderFlags flags; + + StdVideoAV1MESAProfile seq_profile; + uint8_t frame_width_bits_minus_1; + uint8_t frame_height_bits_minus_1; + uint16_t max_frame_width_minus_1; + uint16_t max_frame_height_minus_1; + uint8_t delta_frame_id_length_minus_2; + uint8_t additional_frame_id_length_minus_1; + uint8_t order_hint_bits_minus_1; + uint8_t seq_choose_integer_mv; + uint8_t seq_force_integer_mv; + + StdVideoAV1MESATimingInfo timing_info; + StdVideoAV1MESAColorConfig color_config; +} StdVideoAV1MESASequenceHeader; + +typedef struct StdVideoAV1MESATile { + uint16_t tg_start; + uint16_t tg_end; + uint16_t row; + uint16_t column; + uint32_t size; + uint32_t offset; +} StdVideoAV1MESATile; + +typedef struct StdVideoAV1MESATileList { + StdVideoAV1MESATile *tile_list; + uint32_t nb_tiles; +} StdVideoAV1MESATileList; + +typedef struct VkVideoDecodeAV1PictureInfoMESA { + VkStructureType sType; + const void *pNext; + StdVideoAV1MESAFrameHeader *frame_header; + StdVideoAV1MESATileList *tile_list; + uint8_t skip_mode_frame_idx[2]; +} VkVideoDecodeAV1PictureInfoMESA; + +typedef struct VkVideoDecodeAV1DpbSlotInfoMESA { + VkStructureType sType; + const void *pNext; + uint8_t frameIdx; + uint8_t ref_order_hint[7]; + uint8_t disable_frame_end_update_cdf; +} VkVideoDecodeAV1DpbSlotInfoMESA; + +typedef struct VkVideoDecodeAV1SessionParametersAddInfoMESA { + VkStructureType sType; + const void *pNext; + StdVideoAV1MESASequenceHeader *sequence_header; +} VkVideoDecodeAV1SessionParametersAddInfoMESA; + +typedef struct VkVideoDecodeAV1SessionParametersCreateInfoMESA { + VkStructureType sType; + const void *pNext; + const VkVideoDecodeAV1SessionParametersAddInfoMESA *pParametersAddInfo; +} VkVideoDecodeAV1SessionParametersCreateInfoMESA; + +typedef struct VkVideoDecodeAV1ProfileInfoMESA { + VkStructureType sType; + const void *pNext; + StdVideoAV1MESAProfile stdProfileIdc; +} VkVideoDecodeAV1ProfileInfoMESA; + +typedef enum VkVideoDecodeAV1CapabilityFlagBitsMESA { + VK_VIDEO_DECODE_AV1_CAPABILITY_EXTERNAL_FILM_GRAIN_MESA = 0x00000001, + VK_VIDEO_DECODE_AV1_CAPABILITY_FLAG_BITS_MAX_ENUM_MESA = 0x7FFFFFFF +} VkVideoDecodeAV1CapabilityFlagBitsMESA; +typedef VkFlags VkVideoDecodeAV1CapabilityFlagsMESA; + +typedef struct VkVideoDecodeAV1CapabilitiesMESA { + VkStructureType sType; + const void *pNext; + VkVideoDecodeAV1CapabilityFlagsMESA flags; + StdVideoAV1MESALevel maxLevelIdc; +} VkVideoDecodeAV1CapabilitiesMESA; + +#define VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_PICTURE_INFO_MESA 1000509000 +#define VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_SESSION_PARAMETERS_CREATE_INFO_MESA 1000509001 +#define VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_SESSION_PARAMETERS_ADD_INFO_MESA 1000509002 +#define VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_DPB_SLOT_INFO_MESA 1000509003 +#define VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_CAPABILITIES_MESA 1000509004 +#define VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_PROFILE_INFO_MESA 1000509005 + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/libavcodec/vulkan_video_codec_av1std_decode.h b/libavcodec/vulkan_video_codec_av1std_decode.h new file mode 100644 index 0000000000000..fba46b63b1093 --- /dev/null +++ b/libavcodec/vulkan_video_codec_av1std_decode.h @@ -0,0 +1,27 @@ +#ifndef VULKAN_VIDEO_CODEC_AV1STD_DECODE_H_ +#define VULKAN_VIDEO_CODEC_AV1STD_DECODE_H_ 1 + + +/* +** Copyright 2015-2022 The Khronos Group Inc. +** +** SPDX-License-Identifier: Apache-2.0 +*/ + +/* +** This header is NOT YET generated from the Khronos Vulkan XML API Registry. +** +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#define vulkan_video_codec_av1std_decode 1 + + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 7ebab02d9f8b3..e804126e113bf 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -425,6 +425,7 @@ static const VulkanOptExtension optional_device_exts[] = { { VK_EXT_VIDEO_ENCODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H265 }, #endif { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 }, + { "VK_MESA_video_decode_av1", FF_VK_EXT_VIDEO_DECODE_AV1 }, }; static VkBool32 VKAPI_CALL vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h index 20096b1506a3f..1f96671eddcd3 100644 --- a/libavutil/vulkan_functions.h +++ b/libavutil/vulkan_functions.h @@ -46,6 +46,7 @@ typedef enum FFVulkanExtensions { FF_VK_EXT_VIDEO_ENCODE_QUEUE = 1ULL << 14, /* VK_KHR_video_encode_queue */ FF_VK_EXT_VIDEO_ENCODE_H264 = 1ULL << 15, /* VK_EXT_video_encode_h264 */ FF_VK_EXT_VIDEO_ENCODE_H265 = 1ULL << 16, /* VK_EXT_video_encode_h265 */ + FF_VK_EXT_VIDEO_DECODE_AV1 = 1ULL << 17, /* VK_MESA_video_decode_av1 */ FF_VK_EXT_NO_FLAG = 1ULL << 31, } FFVulkanExtensions; diff --git a/libavutil/vulkan_loader.h b/libavutil/vulkan_loader.h index 76da10d477364..78c190ebb06a0 100644 --- a/libavutil/vulkan_loader.h +++ b/libavutil/vulkan_loader.h @@ -61,6 +61,7 @@ static inline uint64_t ff_vk_extensions_to_mask(const char * const *extensions, { VK_EXT_VIDEO_ENCODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H265 }, #endif { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 }, + { "VK_MESA_video_decode_av1", FF_VK_EXT_VIDEO_DECODE_AV1 }, }; FFVulkanExtensions mask = 0x0; From b42939e00a127daee1359c75903a6cc2edd3c801 Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 22 Feb 2023 22:21:43 +0100 Subject: [PATCH 91/98] lavfi: add color_vulkan filter --- configure | 1 + libavfilter/Makefile | 1 + libavfilter/allfilters.c | 1 + libavfilter/vsrc_testsrc_vulkan.c | 373 ++++++++++++++++++++++++++++++ 4 files changed, 376 insertions(+) create mode 100644 libavfilter/vsrc_testsrc_vulkan.c diff --git a/configure b/configure index 817752ecf9b23..d709b83a617ad 100755 --- a/configure +++ b/configure @@ -3653,6 +3653,7 @@ boxblur_opencl_filter_deps="opencl gpl" bs2b_filter_deps="libbs2b" bwdif_vulkan_filter_deps="vulkan spirv_compiler" chromaber_vulkan_filter_deps="vulkan spirv_compiler" +color_vulkan_filter_deps="vulkan spirv_compiler" colorkey_opencl_filter_deps="opencl" colormatrix_filter_deps="gpl" convolution_opencl_filter_deps="opencl" diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 4bc30c37f88c2..aa1d9c0497614 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -592,6 +592,7 @@ OBJS-$(CONFIG_RGBTESTSRC_FILTER) += vsrc_testsrc.o OBJS-$(CONFIG_SIERPINSKI_FILTER) += vsrc_sierpinski.o OBJS-$(CONFIG_SMPTEBARS_FILTER) += vsrc_testsrc.o OBJS-$(CONFIG_SMPTEHDBARS_FILTER) += vsrc_testsrc.o +OBJS-$(CONFIG_COLOR_VULKAN_FILTER) += vsrc_testsrc_vulkan.o vulkan.o vulkan_filter.o OBJS-$(CONFIG_TESTSRC_FILTER) += vsrc_testsrc.o OBJS-$(CONFIG_TESTSRC2_FILTER) += vsrc_testsrc.o OBJS-$(CONFIG_YUVTESTSRC_FILTER) += vsrc_testsrc.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 8f88c1443f228..30a8830f68659 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -538,6 +538,7 @@ extern const AVFilter ff_vsrc_allrgb; extern const AVFilter ff_vsrc_allyuv; extern const AVFilter ff_vsrc_cellauto; extern const AVFilter ff_vsrc_color; +extern const AVFilter ff_vsrc_color_vulkan; extern const AVFilter ff_vsrc_colorchart; extern const AVFilter ff_vsrc_colorspectrum; extern const AVFilter ff_vsrc_coreimagesrc; diff --git a/libavfilter/vsrc_testsrc_vulkan.c b/libavfilter/vsrc_testsrc_vulkan.c new file mode 100644 index 0000000000000..52b6df39f5b50 --- /dev/null +++ b/libavfilter/vsrc_testsrc_vulkan.c @@ -0,0 +1,373 @@ +/* + * Copyright (c) Lynne + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/random_seed.h" +#include "libavutil/csp.h" +#include "libavutil/opt.h" +#include "vulkan_filter.h" +#include "vulkan_spirv.h" +#include "internal.h" +#include "filters.h" +#include "colorspace.h" + +enum TestSrcVulkanMode { + TESTSRC_COLOR, +}; + +typedef struct TestSrcVulkanPushData { + float color_comp[4]; +} TestSrcVulkanPushData; + +typedef struct TestSrcVulkanContext { + FFVulkanContext vkctx; + + int initialized; + FFVulkanPipeline pl; + FFVkExecPool e; + FFVkQueueFamilyCtx qf; + FFVkSPIRVShader shd; + + /* Only used by color_vulkan */ + uint8_t color_rgba[4]; + + TestSrcVulkanPushData opts; + + int w, h; + int pw, ph; + char *out_format_string; + enum AVColorRange out_range; + unsigned int nb_frame; + AVRational time_base, frame_rate; + int64_t pts; + int64_t duration; ///< duration expressed in microseconds + AVRational sar; ///< sample aspect ratio + int draw_once; ///< draw only the first frame, always put out the same picture + int draw_once_reset; ///< draw only the first frame or in case of reset + AVFrame *picref; ///< cached reference containing the painted picture +} TestSrcVulkanContext; + +static av_cold int init_filter(AVFilterContext *ctx, enum TestSrcVulkanMode mode) +{ + int err; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; + TestSrcVulkanContext *s = ctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + FFVkSPIRVShader *shd = &s->shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc_set; + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->vkctx.output_format); + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_shader_init(&s->pl, &s->shd, "testsrc_compute", + VK_SHADER_STAGE_COMPUTE_BIT, 0)); + + ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1); + + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, vec2 dist; ); + GLSLC(0, }; ); + GLSLC(0, ); + + ff_vk_add_push_constant(&s->pl, 0, sizeof(s->opts), + VK_SHADER_STAGE_COMPUTE_BIT); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc_set, 1, 0, 0)); + + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + if (mode == TESTSRC_COLOR) { + double rgb2yuv[3][3]; + double rgbad[4]; + double yuvad[4]; + + enum AVColorSpace csp; + const AVLumaCoefficients *luma = NULL; + if (desc->flags & AV_PIX_FMT_FLAG_RGB) + csp = AVCOL_SPC_RGB; + else + csp = AVCOL_SPC_SMPTE170M; + + if (!(desc->flags & AV_PIX_FMT_FLAG_RGB) && !(luma = av_csp_luma_coeffs_from_avcsp(csp))) + return AVERROR(EINVAL); + else + ff_fill_rgb2yuv_table(luma, rgb2yuv); + + for (int i = 0; i < 4; i++) + rgbad[i] = s->color_rgba[i] / 255.0; + + if (!(desc->flags & AV_PIX_FMT_FLAG_RGB)) + ff_matrix_mul_3x3_vec(yuvad, rgbad, rgb2yuv); + else + memcpy(yuvad, rgbad, sizeof(rgbad)); + + yuvad[3] = rgbad[3]; + + for (int i = 0; i < 3; i++) { + int chroma = (!(desc->flags & AV_PIX_FMT_FLAG_RGB) && i > 0); + if (s->out_range == AVCOL_RANGE_MPEG) { + yuvad[i] *= (chroma ? 224.0 : 219.0) / 255.0; + yuvad[i] += (chroma ? 128.0 : 16.0) / 255.0; + } else if (chroma) { + yuvad[i] += 0.5; + } + } + + /* Ensure we place the alpha appropriately for gray formats */ + if (desc->nb_components <= 2) + yuvad[1] = yuvad[3]; + + for (int i = 0; i < 4; i++) + s->opts.color_comp[i] = yuvad[i] / 255.0; + + GLSLC(1, vec4 r; ); + for (int i = 0; i < planes; i++) { + for (int c = 0; c < desc->nb_components; c++) { + if (desc->comp[c].plane == i) { + int off = desc->comp[c].offset / (FFALIGN(desc->comp[c].depth, 8)/8); + GLSLF(1, r[%i] = color_comp[%i]; ,off, i); + GLSLC(0, ); + } + } + GLSLF(1, imageStore(output_img[%i], pos, r); ,i); + GLSLC(0, ); + } + } + GLSLC(0, } ); + + RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); + + RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd)); + RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl)); + + s->initialized = 1; + + return 0; + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); + + return err; +} + +static int testsrc_vulkan_activate(AVFilterContext *ctx) +{ + int err; + AVFilterLink *outlink = ctx->outputs[0]; + TestSrcVulkanContext *s = ctx->priv; + AVFrame *frame; + + if (!s->initialized) { + enum TestSrcVulkanMode mode = TESTSRC_COLOR; + err = init_filter(ctx, mode); + if (err < 0) + return err; + } + + if (!ff_outlink_frame_wanted(outlink)) + return FFERROR_NOT_READY; + if (s->duration >= 0 && + av_rescale_q(s->pts, s->time_base, AV_TIME_BASE_Q) >= s->duration) { + ff_outlink_set_status(outlink, AVERROR_EOF, s->pts); + return 0; + } + + if (s->draw_once) { + if (s->draw_once_reset) { + av_frame_free(&s->picref); + s->draw_once_reset = 0; + } + if (!s->picref) { + s->picref = ff_get_video_buffer(outlink, s->w, s->h); + if (!s->picref) + return AVERROR(ENOMEM); + + err = ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, frame, NULL, + NULL, &s->opts, sizeof(s->opts)); + if (err < 0) + return err; + } + frame = av_frame_clone(s->picref); + } else { + frame = ff_get_video_buffer(outlink, s->w, s->h); + } + + if (!frame) + return AVERROR(ENOMEM); + + frame->pts = s->pts; + frame->duration = 1; + frame->key_frame = 1; + frame->interlaced_frame = 0; + frame->pict_type = AV_PICTURE_TYPE_I; + frame->sample_aspect_ratio = s->sar; + if (!s->draw_once) { + err = ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, frame, NULL, + NULL, &s->opts, sizeof(s->opts)); + if (err < 0) { + av_frame_free(&frame); + return err; + } + } + + s->pts++; + s->nb_frame++; + + return ff_filter_frame(outlink, frame); +} + +static int testsrc_vulkan_config_props(AVFilterLink *outlink) +{ + int err; + TestSrcVulkanContext *s = outlink->src->priv; + FFVulkanContext *vkctx = &s->vkctx; + + if (!s->out_format_string) { + vkctx->output_format = AV_PIX_FMT_YUV444P; + } else { + vkctx->output_format = av_get_pix_fmt(s->out_format_string); + if (vkctx->output_format == AV_PIX_FMT_NONE) { + av_log(vkctx, AV_LOG_ERROR, "Invalid output format.\n"); + return AVERROR(EINVAL); + } + + if (vkctx->output_format != AV_PIX_FMT_YUV444P) + return AVERROR(EINVAL); + } + + err = ff_vk_filter_init_context(outlink->src, vkctx, NULL, + s->w, s->h, vkctx->output_format); + if (err < 0) + return err; + + outlink->hw_frames_ctx = av_buffer_ref(vkctx->frames_ref); + if (!outlink->hw_frames_ctx) + return AVERROR(ENOMEM); + + s->time_base = av_inv_q(s->frame_rate); + s->nb_frame = 0; + s->pts = 0; + + outlink->w = s->w; + outlink->h = s->h; + outlink->sample_aspect_ratio = s->sar; + outlink->frame_rate = s->frame_rate; + outlink->time_base = s->time_base; + + return 0; +} + +static void testsrc_vulkan_uninit(AVFilterContext *avctx) +{ + TestSrcVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + + av_frame_free(&s->picref); + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl); + ff_vk_shader_free(vkctx, &s->shd); + + ff_vk_uninit(&s->vkctx); + + s->initialized = 0; +} + +#define OFFSET(x) offsetof(TestSrcVulkanContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) + +#define COMMON_OPTS \ + { "size", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, { .str = "1920x1080" }, 0, 0, FLAGS }, \ + { "s", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, { .str = "1920x1080" }, 0, 0, FLAGS }, \ + \ + { "rate", "set video rate", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, { .str = "60" }, 0, INT_MAX, FLAGS }, \ + { "r", "set video rate", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, { .str = "60" }, 0, INT_MAX, FLAGS }, \ + \ + { "duration", "set video duration", OFFSET(duration), AV_OPT_TYPE_DURATION, { .i64 = -1 }, -1, INT64_MAX, FLAGS }, \ + { "d", "set video duration", OFFSET(duration), AV_OPT_TYPE_DURATION, { .i64 = -1 }, -1, INT64_MAX, FLAGS }, \ + \ + { "sar", "set video sample aspect ratio", OFFSET(sar), AV_OPT_TYPE_RATIONAL, { .dbl = 1 }, 0, INT_MAX, FLAGS }, \ + \ + { "format", "Output video format (software format of hardware frames)", OFFSET(out_format_string), AV_OPT_TYPE_STRING, .flags = FLAGS }, + +static const AVOption color_vulkan_options[] = { + { "color", "set color", OFFSET(color_rgba), AV_OPT_TYPE_COLOR, {.str = "black"}, 0, 0, FLAGS }, + { "c", "set color", OFFSET(color_rgba), AV_OPT_TYPE_COLOR, {.str = "black"}, 0, 0, FLAGS }, + COMMON_OPTS + { "out_range", "Output colour range (from 0 to 2) (default 0)", OFFSET(out_range), AV_OPT_TYPE_INT, {.i64 = AVCOL_RANGE_UNSPECIFIED}, AVCOL_RANGE_UNSPECIFIED, AVCOL_RANGE_JPEG, .flags = FLAGS, "range" }, + { "full", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" }, + { "limited", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" }, + { "jpeg", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" }, + { "mpeg", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" }, + { "tv", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" }, + { "pc", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" }, + { NULL }, +}; + +AVFILTER_DEFINE_CLASS(color_vulkan); + +static const AVFilterPad testsrc_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = testsrc_vulkan_config_props, + }, +}; + +const AVFilter ff_vsrc_color_vulkan = { + .name = "color_vulkan", + .description = NULL_IF_CONFIG_SMALL("Generate a constant color (Vulkan)"), + .priv_size = sizeof(TestSrcVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &testsrc_vulkan_uninit, + .inputs = NULL, + .activate = testsrc_vulkan_activate, + FILTER_OUTPUTS(testsrc_vulkan_outputs), + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), + .priv_class = &color_vulkan_class, + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; From c1181005955cf87ff894079e52b9a6250e63e4f8 Mon Sep 17 00:00:00 2001 From: Lynne Date: Sat, 18 Mar 2023 19:15:41 +0100 Subject: [PATCH 92/98] tools/cl2c: change to tools/source2c and allow non-OpenCL source files --- libavfilter/Makefile | 11 +++++++++-- libavfilter/opencl_source.h | 28 ++++++++++++++-------------- libavfilter/vf_avgblur_opencl.c | 2 +- libavfilter/vf_colorkey_opencl.c | 2 +- libavfilter/vf_convolution_opencl.c | 2 +- libavfilter/vf_deshake_opencl.c | 2 +- libavfilter/vf_neighbor_opencl.c | 2 +- libavfilter/vf_nlmeans_opencl.c | 2 +- libavfilter/vf_overlay_opencl.c | 2 +- libavfilter/vf_pad_opencl.c | 2 +- libavfilter/vf_remap_opencl.c | 2 +- libavfilter/vf_tonemap_opencl.c | 4 ++-- libavfilter/vf_transpose_opencl.c | 2 +- libavfilter/vf_unsharp_opencl.c | 2 +- libavfilter/vf_xfade_opencl.c | 2 +- tools/{cl2c => source2c} | 10 ++++------ 16 files changed, 41 insertions(+), 36 deletions(-) rename tools/{cl2c => source2c} (78%) diff --git a/libavfilter/Makefile b/libavfilter/Makefile index aa1d9c0497614..01c083e77dc75 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -651,10 +651,17 @@ TESTPROGS = drawutils filtfmts formats integral TOOLS-$(CONFIG_LIBZMQ) += zmqsend clean:: - $(RM) $(CLEANSUFFIXES:%=libavfilter/dnn/%) $(CLEANSUFFIXES:%=libavfilter/opencl/%) + $(RM) $(CLEANSUFFIXES:%=libavfilter/dnn/%) $(CLEANSUFFIXES:%=libavfilter/opencl/%) \ + $(CLEANSUFFIXES:%=libavfilter/vulkan/%) OPENCL = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavfilter/opencl/*.cl)) .SECONDARY: $(OPENCL:.cl=.c) libavfilter/opencl/%.c: TAG = OPENCL libavfilter/opencl/%.c: $(SRC_PATH)/libavfilter/opencl/%.cl - $(M)$(SRC_PATH)/tools/cl2c $< $@ + $(M)$(SRC_PATH)/tools/source2c $< $@ + +VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavfilter/vulkan/*.comp)) +.SECONDARY: $(VULKAN:.comp=.c) +libavfilter/vulkan/%.c: TAG = OPENCL +libavfilter/vulkan/%.c: $(SRC_PATH)/libavfilter/vulkan/%.comp + $(M)$(SRC_PATH)/tools/source2c $< $@ diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h index 9eac2dc516a81..b6930fb686da9 100644 --- a/libavfilter/opencl_source.h +++ b/libavfilter/opencl_source.h @@ -19,19 +19,19 @@ #ifndef AVFILTER_OPENCL_SOURCE_H #define AVFILTER_OPENCL_SOURCE_H -extern const char *ff_opencl_source_avgblur; -extern const char *ff_opencl_source_colorkey; -extern const char *ff_opencl_source_colorspace_common; -extern const char *ff_opencl_source_convolution; -extern const char *ff_opencl_source_deshake; -extern const char *ff_opencl_source_neighbor; -extern const char *ff_opencl_source_nlmeans; -extern const char *ff_opencl_source_overlay; -extern const char *ff_opencl_source_pad; -extern const char *ff_opencl_source_remap; -extern const char *ff_opencl_source_tonemap; -extern const char *ff_opencl_source_transpose; -extern const char *ff_opencl_source_unsharp; -extern const char *ff_opencl_source_xfade; +extern const char *ff_source_avgblur_cl; +extern const char *ff_source_colorkey_cl; +extern const char *ff_source_colorspace_common_cl; +extern const char *ff_source_convolution_cl; +extern const char *ff_source_deshake_cl; +extern const char *ff_source_neighbor_cl; +extern const char *ff_source_nlmeans_cl; +extern const char *ff_source_overlay_cl; +extern const char *ff_source_pad_cl; +extern const char *ff_source_remap_cl; +extern const char *ff_source_tonemap_cl; +extern const char *ff_source_transpose_cl; +extern const char *ff_source_unsharp_cl; +extern const char *ff_source_xfade_cl; #endif /* AVFILTER_OPENCL_SOURCE_H */ diff --git a/libavfilter/vf_avgblur_opencl.c b/libavfilter/vf_avgblur_opencl.c index 68f3a63249231..c00d2f6363c82 100644 --- a/libavfilter/vf_avgblur_opencl.c +++ b/libavfilter/vf_avgblur_opencl.c @@ -59,7 +59,7 @@ static int avgblur_opencl_init(AVFilterContext *avctx) cl_int cle; int err; - err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_avgblur, 1); + err = ff_opencl_filter_load_program(avctx, &ff_source_avgblur_cl, 1); if (err < 0) goto fail; diff --git a/libavfilter/vf_colorkey_opencl.c b/libavfilter/vf_colorkey_opencl.c index 2b019b290c16b..94361df88fed6 100644 --- a/libavfilter/vf_colorkey_opencl.c +++ b/libavfilter/vf_colorkey_opencl.c @@ -52,7 +52,7 @@ static int colorkey_opencl_init(AVFilterContext *avctx) cl_int cle; int err; - err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_colorkey, 1); + err = ff_opencl_filter_load_program(avctx, &ff_source_colorkey_cl, 1); if (err < 0) goto fail; diff --git a/libavfilter/vf_convolution_opencl.c b/libavfilter/vf_convolution_opencl.c index bf721a7416ba3..0eff9f40d3b21 100644 --- a/libavfilter/vf_convolution_opencl.c +++ b/libavfilter/vf_convolution_opencl.c @@ -62,7 +62,7 @@ static int convolution_opencl_init(AVFilterContext *avctx) cl_int cle; int err; - err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_convolution, 1); + err = ff_opencl_filter_load_program(avctx, &ff_source_convolution_cl, 1); if (err < 0) goto fail; diff --git a/libavfilter/vf_deshake_opencl.c b/libavfilter/vf_deshake_opencl.c index e670a4cc2349e..8db59767bdc71 100644 --- a/libavfilter/vf_deshake_opencl.c +++ b/libavfilter/vf_deshake_opencl.c @@ -1251,7 +1251,7 @@ static int deshake_opencl_init(AVFilterContext *avctx) } ctx->sw_format = hw_frames_ctx->sw_format; - err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_deshake, 1); + err = ff_opencl_filter_load_program(avctx, &ff_source_deshake_cl, 1); if (err < 0) goto fail; diff --git a/libavfilter/vf_neighbor_opencl.c b/libavfilter/vf_neighbor_opencl.c index d2d93cd2405a7..b2939f841a280 100644 --- a/libavfilter/vf_neighbor_opencl.c +++ b/libavfilter/vf_neighbor_opencl.c @@ -55,7 +55,7 @@ static int neighbor_opencl_init(AVFilterContext *avctx) cl_int cle; int err; - err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_neighbor, 1); + err = ff_opencl_filter_load_program(avctx, &ff_source_neighbor_cl, 1); if (err < 0) goto fail; diff --git a/libavfilter/vf_nlmeans_opencl.c b/libavfilter/vf_nlmeans_opencl.c index ca3ec45d7a328..5149be02ca4c2 100644 --- a/libavfilter/vf_nlmeans_opencl.c +++ b/libavfilter/vf_nlmeans_opencl.c @@ -98,7 +98,7 @@ static int nlmeans_opencl_init(AVFilterContext *avctx, int width, int height) if (!ctx->patch_size_uv) ctx->patch_size_uv = ctx->patch_size; - err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_nlmeans, 1); + err = ff_opencl_filter_load_program(avctx, &ff_source_nlmeans_cl, 1); if (err < 0) goto fail; diff --git a/libavfilter/vf_overlay_opencl.c b/libavfilter/vf_overlay_opencl.c index 38a3fc8795a50..9beb09f05a1ba 100644 --- a/libavfilter/vf_overlay_opencl.c +++ b/libavfilter/vf_overlay_opencl.c @@ -51,7 +51,7 @@ static int overlay_opencl_load(AVFilterContext *avctx, { OverlayOpenCLContext *ctx = avctx->priv; cl_int cle; - const char *source = ff_opencl_source_overlay; + const char *source = ff_source_overlay_cl; const char *kernel; const AVPixFmtDescriptor *main_desc, *overlay_desc; int err, i, main_planes, overlay_planes; diff --git a/libavfilter/vf_pad_opencl.c b/libavfilter/vf_pad_opencl.c index d6b71765eeb3a..b4b10397a4de4 100644 --- a/libavfilter/vf_pad_opencl.c +++ b/libavfilter/vf_pad_opencl.c @@ -93,7 +93,7 @@ static int pad_opencl_init(AVFilterContext *avctx, AVFrame *input_frame) ctx->hsub = desc->log2_chroma_w; ctx->vsub = desc->log2_chroma_h; - err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_pad, 1); + err = ff_opencl_filter_load_program(avctx, &ff_source_pad_cl, 1); if (err < 0) goto fail; diff --git a/libavfilter/vf_remap_opencl.c b/libavfilter/vf_remap_opencl.c index eeb1eb5d69d32..89d47426c0179 100644 --- a/libavfilter/vf_remap_opencl.c +++ b/libavfilter/vf_remap_opencl.c @@ -73,7 +73,7 @@ static int remap_opencl_load(AVFilterContext *avctx, { RemapOpenCLContext *ctx = avctx->priv; cl_int cle; - const char *source = ff_opencl_source_remap; + const char *source = ff_source_remap_cl; const char *kernel = kernels[ctx->interp]; const AVPixFmtDescriptor *main_desc; int err, main_planes; diff --git a/libavfilter/vf_tonemap_opencl.c b/libavfilter/vf_tonemap_opencl.c index 883eb043427f2..84bf394e75a69 100644 --- a/libavfilter/vf_tonemap_opencl.c +++ b/libavfilter/vf_tonemap_opencl.c @@ -240,8 +240,8 @@ static int tonemap_opencl_init(AVFilterContext *avctx) av_log(avctx, AV_LOG_DEBUG, "Generated OpenCL header:\n%s\n", header.str); opencl_sources[0] = header.str; - opencl_sources[1] = ff_opencl_source_tonemap; - opencl_sources[2] = ff_opencl_source_colorspace_common; + opencl_sources[1] = ff_source_tonemap_cl; + opencl_sources[2] = ff_source_colorspace_common_cl; err = ff_opencl_filter_load_program(avctx, opencl_sources, OPENCL_SOURCE_NB); av_bprint_finalize(&header, NULL); diff --git a/libavfilter/vf_transpose_opencl.c b/libavfilter/vf_transpose_opencl.c index 56d34d193bcff..b2128049537fa 100644 --- a/libavfilter/vf_transpose_opencl.c +++ b/libavfilter/vf_transpose_opencl.c @@ -44,7 +44,7 @@ static int transpose_opencl_init(AVFilterContext *avctx) cl_int cle; int err; - err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_transpose, 1); + err = ff_opencl_filter_load_program(avctx, &ff_source_transpose_cl, 1); if (err < 0) goto fail; diff --git a/libavfilter/vf_unsharp_opencl.c b/libavfilter/vf_unsharp_opencl.c index 2c3ac14050d07..09398464ca39b 100644 --- a/libavfilter/vf_unsharp_opencl.c +++ b/libavfilter/vf_unsharp_opencl.c @@ -69,7 +69,7 @@ static int unsharp_opencl_init(AVFilterContext *avctx) cl_int cle; int err; - err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_unsharp, 1); + err = ff_opencl_filter_load_program(avctx, &ff_source_unsharp_cl, 1); if (err < 0) goto fail; diff --git a/libavfilter/vf_xfade_opencl.c b/libavfilter/vf_xfade_opencl.c index 415cf7ac35d8e..fb567aa7fd83e 100644 --- a/libavfilter/vf_xfade_opencl.c +++ b/libavfilter/vf_xfade_opencl.c @@ -93,7 +93,7 @@ static int xfade_opencl_load(AVFilterContext *avctx, if (ctx->transition == CUSTOM) { err = ff_opencl_filter_load_program_from_file(avctx, ctx->source_file); } else { - err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_xfade, 1); + err = ff_opencl_filter_load_program(avctx, &ff_source_xfade_cl, 1); } if (err < 0) return err; diff --git a/tools/cl2c b/tools/source2c similarity index 78% rename from tools/cl2c rename to tools/source2c index e3f92bab1c6e9..6e5f123144aa9 100755 --- a/tools/cl2c +++ b/tools/source2c @@ -1,7 +1,6 @@ #!/bin/sh -# Convert an OpenCL source file into a C source file containing the -# OpenCL source as a C string. Also adds a #line directive so that -# compiler messages are useful. +# Convert a source file into a C source file containing the +# source code as a C string. # This file is part of FFmpeg. # @@ -22,12 +21,11 @@ input="$1" output="$2" -name=$(basename "$input" | sed 's/.cl$//') +name=$(basename "$input" | sed 's/\./_/') cat >$output < Date: Thu, 13 Apr 2023 12:18:12 +0200 Subject: [PATCH 93/98] vulkan: add support for the atomic float ops extension --- libavutil/hwcontext_vulkan.c | 15 ++++++++++++++- libavutil/vulkan.c | 13 +++++++++++++ libavutil/vulkan.h | 4 ++++ libavutil/vulkan_functions.h | 1 + libavutil/vulkan_loader.h | 1 + 5 files changed, 33 insertions(+), 1 deletion(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index e804126e113bf..648bdcb3985ef 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -96,6 +96,7 @@ typedef struct VulkanDevicePriv { VkPhysicalDeviceVulkan12Features device_features_1_2; VkPhysicalDeviceVulkan13Features device_features_1_3; VkPhysicalDeviceDescriptorBufferFeaturesEXT desc_buf_features; + VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float_features; /* Queues */ pthread_mutex_t **qf_mutex; @@ -401,6 +402,7 @@ static const VulkanOptExtension optional_device_exts[] = { { VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, { VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER, }, { VK_EXT_PHYSICAL_DEVICE_DRM_EXTENSION_NAME, FF_VK_EXT_DEVICE_DRM }, + { VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME, FF_VK_EXT_ATOMIC_FLOAT }, /* Imports/exports */ { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_MEMORY }, @@ -1202,9 +1204,13 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, VkPhysicalDeviceTimelineSemaphoreFeatures timeline_features = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES, }; + VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float_features = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT, + .pNext = &timeline_features, + }; VkPhysicalDeviceDescriptorBufferFeaturesEXT desc_buf_features = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT, - .pNext = &timeline_features, + .pNext = &atomic_float_features, }; VkPhysicalDeviceVulkan13Features dev_features_1_3 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES, @@ -1236,6 +1242,10 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, p->device_features_1_3.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES; p->device_features_1_3.pNext = &p->desc_buf_features; p->desc_buf_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT; + p->desc_buf_features.pNext = &p->atomic_float_features; + p->atomic_float_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT; + p->atomic_float_features.pNext = NULL; + ctx->free = vulkan_device_free; /* Create an instance if not given one */ @@ -1290,6 +1300,9 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, p->desc_buf_features.descriptorBuffer = desc_buf_features.descriptorBuffer; p->desc_buf_features.descriptorBufferPushDescriptors = desc_buf_features.descriptorBufferPushDescriptors; + p->atomic_float_features.shaderBufferFloat32Atomics = atomic_float_features.shaderBufferFloat32Atomics; + p->atomic_float_features.shaderBufferFloat32AtomicAdd = atomic_float_features.shaderBufferFloat32AtomicAdd; + dev_info.pNext = &hwctx->device_features; /* Setup queue family */ diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 6ca361e054275..4ebd24c38902d 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -107,8 +107,21 @@ int ff_vk_load_props(FFVulkanContext *s) .pNext = &s->driver_props, }; + s->atomic_float_feats = (VkPhysicalDeviceShaderAtomicFloatFeaturesEXT) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT, + }; + s->feats_12 = (VkPhysicalDeviceVulkan12Features) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, + .pNext = &s->atomic_float_feats, + }; + s->feats = (VkPhysicalDeviceFeatures2) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, + .pNext = &s->feats_12, + }; + vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props); vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops); + vk->GetPhysicalDeviceFeatures2(s->hwctx->phys_dev, &s->feats); if (s->qf_props) return 0; diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index ec03ba8b717b8..58da720a1cff4 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -240,6 +240,10 @@ typedef struct FFVulkanContext { VkQueueFamilyProperties2 *qf_props; int tot_nb_qfs; + VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float_feats; + VkPhysicalDeviceVulkan12Features feats_12; + VkPhysicalDeviceFeatures2 feats; + AVHWDeviceContext *device; AVVulkanDeviceContext *hwctx; diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h index 1f96671eddcd3..ac636ab99b586 100644 --- a/libavutil/vulkan_functions.h +++ b/libavutil/vulkan_functions.h @@ -47,6 +47,7 @@ typedef enum FFVulkanExtensions { FF_VK_EXT_VIDEO_ENCODE_H264 = 1ULL << 15, /* VK_EXT_video_encode_h264 */ FF_VK_EXT_VIDEO_ENCODE_H265 = 1ULL << 16, /* VK_EXT_video_encode_h265 */ FF_VK_EXT_VIDEO_DECODE_AV1 = 1ULL << 17, /* VK_MESA_video_decode_av1 */ + FF_VK_EXT_ATOMIC_FLOAT = 1ULL << 18, /* VK_EXT_shader_atomic_float */ FF_VK_EXT_NO_FLAG = 1ULL << 31, } FFVulkanExtensions; diff --git a/libavutil/vulkan_loader.h b/libavutil/vulkan_loader.h index 78c190ebb06a0..717e684fb9562 100644 --- a/libavutil/vulkan_loader.h +++ b/libavutil/vulkan_loader.h @@ -45,6 +45,7 @@ static inline uint64_t ff_vk_extensions_to_mask(const char * const *extensions, { VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_HOST_MEMORY }, { VK_EXT_DEBUG_UTILS_EXTENSION_NAME, FF_VK_EXT_DEBUG_UTILS }, { VK_EXT_PHYSICAL_DEVICE_DRM_EXTENSION_NAME, FF_VK_EXT_DEVICE_DRM }, + { VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME, FF_VK_EXT_ATOMIC_FLOAT }, #ifdef _WIN32 { VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY }, { VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM }, From e54149669dcc9a929859fd53e8a08df0ab9cb52f Mon Sep 17 00:00:00 2001 From: Lynne Date: Thu, 13 Apr 2023 12:15:13 +0200 Subject: [PATCH 94/98] lavfi: add nlmeans_vulkan filter --- configure | 1 + libavfilter/Makefile | 2 + libavfilter/allfilters.c | 1 + libavfilter/vf_nlmeans_vulkan.c | 1114 ++++++++++++++++++++++++++++ libavfilter/vulkan/prefix_sum.comp | 151 ++++ libavutil/vulkan_functions.h | 1 + 6 files changed, 1270 insertions(+) create mode 100644 libavfilter/vf_nlmeans_vulkan.c create mode 100644 libavfilter/vulkan/prefix_sum.comp diff --git a/configure b/configure index d709b83a617ad..d7db6cd326f1f 100755 --- a/configure +++ b/configure @@ -3710,6 +3710,7 @@ minterpolate_filter_select="scene_sad" mptestsrc_filter_deps="gpl" negate_filter_deps="lut_filter" nlmeans_opencl_filter_deps="opencl" +nlmeans_vulkan_filter_deps="vulkan spirv_compiler" nnedi_filter_deps="gpl" ocr_filter_deps="libtesseract" ocv_filter_deps="libopencv" diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 01c083e77dc75..18935b16169e0 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -390,6 +390,8 @@ OBJS-$(CONFIG_MULTIPLY_FILTER) += vf_multiply.o OBJS-$(CONFIG_NEGATE_FILTER) += vf_negate.o OBJS-$(CONFIG_NLMEANS_FILTER) += vf_nlmeans.o OBJS-$(CONFIG_NLMEANS_OPENCL_FILTER) += vf_nlmeans_opencl.o opencl.o opencl/nlmeans.o +OBJS-$(CONFIG_NLMEANS_VULKAN_FILTER) += vf_nlmeans_vulkan.o vulkan.o vulkan_filter.o \ + vulkan/prefix_sum.o OBJS-$(CONFIG_NNEDI_FILTER) += vf_nnedi.o OBJS-$(CONFIG_NOFORMAT_FILTER) += vf_format.o OBJS-$(CONFIG_NOISE_FILTER) += vf_noise.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 30a8830f68659..f1f781101bee5 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -368,6 +368,7 @@ extern const AVFilter ff_vf_multiply; extern const AVFilter ff_vf_negate; extern const AVFilter ff_vf_nlmeans; extern const AVFilter ff_vf_nlmeans_opencl; +extern const AVFilter ff_vf_nlmeans_vulkan; extern const AVFilter ff_vf_nnedi; extern const AVFilter ff_vf_noformat; extern const AVFilter ff_vf_noise; diff --git a/libavfilter/vf_nlmeans_vulkan.c b/libavfilter/vf_nlmeans_vulkan.c new file mode 100644 index 0000000000000..2907875e31e02 --- /dev/null +++ b/libavfilter/vf_nlmeans_vulkan.c @@ -0,0 +1,1114 @@ +/* + * Copyright (c) Lynne + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/random_seed.h" +#include "libavutil/opt.h" +#include "vulkan_filter.h" +#include "vulkan_spirv.h" +#include "internal.h" + +#define TYPE_NAME "vec4" +#define TYPE_ELEMS 4 +#define TYPE_SIZE (TYPE_ELEMS*4) + +typedef struct NLMeansVulkanContext { + FFVulkanContext vkctx; + + int initialized; + FFVkExecPool e; + FFVkQueueFamilyCtx qf; + VkSampler sampler; + + AVBufferPool *integral_buf_pool; + AVBufferPool *state_buf_pool; + AVBufferPool *ws_buf_pool; + + int pl_weights_rows; + FFVulkanPipeline pl_weights; + FFVkSPIRVShader shd_weights; + + FFVulkanPipeline pl_denoise; + FFVkSPIRVShader shd_denoise; + + int *xoffsets; + int *yoffsets; + int nb_offsets; + float strength[4]; + int patch[4]; + + struct nlmeans_opts { + int r; + double s; + double sc[4]; + int p; + int pc[4]; + int t; + } opts; +} NLMeansVulkanContext; + +extern const char *ff_source_prefix_sum_comp; + +static void insert_first(FFVkSPIRVShader *shd, int r, int horiz, int plane, int comp) +{ + GLSLF(2, s1 = texture(input_img[%i], ivec2(x + %i, y + %i))[%i]; + ,plane, horiz ? r : 0, !horiz ? r : 0, comp); + + if (TYPE_ELEMS == 4) { + GLSLF(2, s2[0] = texture(input_img[%i], ivec2(x + %i + xoffs[0], y + %i + yoffs[0]))[%i]; + ,plane, horiz ? r : 0, !horiz ? r : 0, comp); + GLSLF(2, s2[1] = texture(input_img[%i], ivec2(x + %i + xoffs[1], y + %i + yoffs[1]))[%i]; + ,plane, horiz ? r : 0, !horiz ? r : 0, comp); + GLSLF(2, s2[2] = texture(input_img[%i], ivec2(x + %i + xoffs[2], y + %i + yoffs[2]))[%i]; + ,plane, horiz ? r : 0, !horiz ? r : 0, comp); + GLSLF(2, s2[3] = texture(input_img[%i], ivec2(x + %i + xoffs[3], y + %i + yoffs[3]))[%i]; + ,plane, horiz ? r : 0, !horiz ? r : 0, comp); + } else { + for (int i = 0; i < 16; i++) { + GLSLF(2, s2[%i][%i] = texture(input_img[%i], ivec2(x + %i + xoffs[%i], y + %i + yoffs[%i]))[%i]; + ,i / 4, i % 4, plane, horiz ? r : 0, i, !horiz ? r : 0, i, comp); + } + } + + GLSLC(2, s2 = (s1 - s2) * (s1 - s2); ); +} + +static void insert_horizontal_pass(FFVkSPIRVShader *shd, int nb_rows, int first, int plane, int comp) +{ + GLSLF(1, x = int(gl_GlobalInvocationID.x) * %i; ,nb_rows); + if (!first) { + GLSLC(1, controlBarrier(gl_ScopeWorkgroup, gl_ScopeWorkgroup, + gl_StorageSemanticsBuffer, + gl_SemanticsAcquireRelease | + gl_SemanticsMakeAvailable | + gl_SemanticsMakeVisible); ); + } + GLSLC(1, for (y = 0; y < height[0]; y++) { ); + GLSLC(2, offset = uint64_t(int_stride)*y*T_ALIGN; ); + GLSLC(2, dst = DataBuffer(uint64_t(integral_data) + offset); ); + GLSLC(0, ); + if (first) { + for (int r = 0; r < nb_rows; r++) { + insert_first(shd, r, 1, plane, comp); + GLSLF(2, dst.v[x + %i] = s2; ,r); + GLSLC(0, ); + } + } + GLSLC(2, barrier(); ); + GLSLC(2, prefix_sum(dst, 1, dst, 1); ); + GLSLC(1, } ); + GLSLC(0, ); +} + +static void insert_vertical_pass(FFVkSPIRVShader *shd, int nb_rows, int first, int plane, int comp) +{ + GLSLF(1, y = int(gl_GlobalInvocationID.x) * %i; ,nb_rows); + if (!first) { + GLSLC(1, controlBarrier(gl_ScopeWorkgroup, gl_ScopeWorkgroup, + gl_StorageSemanticsBuffer, + gl_SemanticsAcquireRelease | + gl_SemanticsMakeAvailable | + gl_SemanticsMakeVisible); ); + } + GLSLC(1, for (x = 0; x < width[0]; x++) { ); + GLSLC(2, dst = DataBuffer(uint64_t(integral_data) + x*T_ALIGN); ); + + for (int r = 0; r < nb_rows; r++) { + if (first) { + insert_first(shd, r, 0, plane, comp); + GLSLF(2, integral_data.v[(y + %i)*int_stride + x] = s2; ,r); + GLSLC(0, ); + } + } + + GLSLC(2, barrier(); ); + GLSLC(2, prefix_sum(dst, int_stride, dst, int_stride); ); + GLSLC(1, } ); + GLSLC(0, ); +} + +static void insert_weights_pass(FFVkSPIRVShader *shd, int nb_rows, int vert, + int t, int dst_comp, int plane, int comp) +{ + GLSLF(1, p = patch_size[%i]; ,dst_comp); + GLSLC(0, ); + GLSLC(1, controlBarrier(gl_ScopeWorkgroup, gl_ScopeWorkgroup, + gl_StorageSemanticsBuffer, + gl_SemanticsAcquireRelease | + gl_SemanticsMakeAvailable | + gl_SemanticsMakeVisible); ); + GLSLC(1, barrier(); ); + if (!vert) { + GLSLC(1, for (y = 0; y < height[0]; y++) { ); + GLSLF(2, if (gl_GlobalInvocationID.x*%i >= width[%i]) ,nb_rows, plane); + GLSLC(3, break; ); + GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows); + GLSLF(3, x = int(gl_GlobalInvocationID.x) * %i + r; ,nb_rows); + } else { + GLSLC(1, for (x = 0; x < width[0]; x++) { ); + GLSLF(2, if (gl_GlobalInvocationID.x*%i >= height[%i]) ,nb_rows, plane); + GLSLC(3, break; ); + GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows); + GLSLF(3, y = int(gl_GlobalInvocationID.x) * %i + r; ,nb_rows); + } + GLSLC(0, ); + GLSLC(3, a = DTYPE(0); ); + GLSLC(3, b = DTYPE(0); ); + GLSLC(3, c = DTYPE(0); ); + GLSLC(3, d = DTYPE(0); ); + GLSLC(0, ); + GLSLC(3, lt = ((x - p) < 0) || ((y - p) < 0); ); + GLSLC(0, ); + if (TYPE_ELEMS == 4) { + GLSLF(3, src[0] = texture(input_img[%i], ivec2(x + xoffs[0], y + yoffs[0]))[%i]; ,plane, comp); + GLSLF(3, src[1] = texture(input_img[%i], ivec2(x + xoffs[1], y + yoffs[1]))[%i]; ,plane, comp); + GLSLF(3, src[2] = texture(input_img[%i], ivec2(x + xoffs[2], y + yoffs[2]))[%i]; ,plane, comp); + GLSLF(3, src[3] = texture(input_img[%i], ivec2(x + xoffs[3], y + yoffs[3]))[%i]; ,plane, comp); + } else { + for (int i = 0; i < 16; i++) + GLSLF(3, src[%i][%i] = texture(input_img[%i], ivec2(x + xoffs[%i], y + yoffs[%i]))[%i]; + ,i / 4, i % 4, plane, i, i, comp); + + } + GLSLC(0, ); + GLSLC(3, if (lt == false) { ); + GLSLC(4, a = integral_data.v[(y - p)*int_stride + x - p]; ); + GLSLC(4, c = integral_data.v[(y - p)*int_stride + x + p]; ); + GLSLC(4, b = integral_data.v[(y + p)*int_stride + x - p]; ); + GLSLC(4, d = integral_data.v[(y + p)*int_stride + x + p]; ); + GLSLC(3, } ); + GLSLC(0, ); + GLSLC(3, patch_diff = d + a - b - c; ); + if (TYPE_ELEMS == 4) { + GLSLF(3, w = exp(patch_diff * strength[%i]); ,dst_comp); + GLSLC(3, w_sum = w[0] + w[1] + w[2] + w[3]; ); + GLSLC(3, sum = dot(w, src*255); ); + } else { + for (int i = 0; i < 4; i++) + GLSLF(3, w[%i] = exp(patch_diff[%i] * strength[%i]); ,i,i,dst_comp); + for (int i = 0; i < 4; i++) + GLSLF(3, w_sum %s w[%i][0] + w[%i][1] + w[%i][2] + w[%i][3]; + ,!i ? "=" : "+=", i, i, i, i); + for (int i = 0; i < 4; i++) + GLSLF(3, sum %s dot(w[%i], src[%i]*255); + ,!i ? "=" : "+=", i, i); + } + GLSLC(0, ); + if (t > 1) { + GLSLF(3, atomicAdd(weights_%i[y*ws_stride[%i] + x], w_sum); ,dst_comp, dst_comp); + GLSLF(3, atomicAdd(sums_%i[y*ws_stride[%i] + x], sum); ,dst_comp, dst_comp); + } else { + GLSLF(3, weights_%i[y*ws_stride[%i] + x] += w_sum; ,dst_comp, dst_comp); + GLSLF(3, sums_%i[y*ws_stride[%i] + x] += sum; ,dst_comp, dst_comp); + } + GLSLC(2, } ); + GLSLC(1, } ); +} + +typedef struct HorizontalPushData { + VkDeviceAddress integral_data; + VkDeviceAddress state_data; + int32_t xoffs[TYPE_ELEMS]; + int32_t yoffs[TYPE_ELEMS]; + uint32_t width[4]; + uint32_t height[4]; + uint32_t ws_stride[4]; + int32_t patch_size[4]; + float strength[4]; + uint32_t int_stride; +} HorizontalPushData; + +static av_cold int init_weights_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec, + FFVulkanPipeline *pl, FFVkSPIRVShader *shd, + VkSampler sampler, FFVkSPIRVCompiler *spv, + int width, int height, int t, + const AVPixFmtDescriptor *desc, + int planes, int *nb_rows) +{ + int err; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; + FFVulkanDescriptorSetBinding *desc_set; + int max_dim = FFMAX(width, height); + uint32_t max_wg = vkctx->props.properties.limits.maxComputeWorkGroupSize[0]; + int max_shm = vkctx->props.properties.limits.maxComputeSharedMemorySize; + int wg_size = FFMIN(max_wg, max_dim); + int wg_rows = 1; + + RET(ff_vk_shader_init(pl, shd, "nlmeans_weights", VK_SHADER_STAGE_COMPUTE_BIT, 0)); + + /* If not true, wg_size is set to max_wg */ + while (wg_size*wg_rows < max_dim) + wg_rows++; + + /* Make sure there's enough shared memory */ + while ((wg_size * TYPE_SIZE + TYPE_SIZE + 2*4) > max_shm) { + wg_size >>= 1; + wg_rows++; + } + + ff_vk_shader_set_compute_sizes(shd, wg_size, 1, 1); + *nb_rows = wg_rows; + + if (t > 1) + GLSLC(0, #extension GL_EXT_shader_atomic_float : require ); + GLSLC(0, #extension GL_ARB_gpu_shader_int64 : require ); + GLSLC(0, #pragma use_vulkan_memory_model ); + GLSLC(0, #extension GL_KHR_memory_scope_semantics : enable ); + GLSLC(0, ); + GLSLF(0, #define N_ROWS %i ,*nb_rows); + GLSLC(0, #define WG_SIZE (gl_WorkGroupSize.x) ); + GLSLF(0, #define LG_WG_SIZE %i ,ff_log2(shd->local_size[0])); + GLSLC(0, #define PARTITION_SIZE (N_ROWS*WG_SIZE) ); + GLSLF(0, #define DTYPE %s ,TYPE_NAME); + GLSLF(0, #define T_ALIGN %i ,TYPE_SIZE); + GLSLC(0, ); + GLSLC(0, layout(buffer_reference, buffer_reference_align = T_ALIGN) coherent buffer DataBuffer { ); + GLSLC(1, DTYPE v[]; ); + GLSLC(0, }; ); + GLSLC(0, ); + GLSLC(0, layout(buffer_reference) buffer StateData; ); + GLSLC(0, ); + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, coherent DataBuffer integral_data; ); + GLSLC(1, StateData state; ); + GLSLF(1, uint xoffs[%i]; ,TYPE_ELEMS); + GLSLF(1, uint yoffs[%i]; ,TYPE_ELEMS); + GLSLC(1, uvec4 width; ); + GLSLC(1, uvec4 height; ); + GLSLC(1, uvec4 ws_stride; ); + GLSLC(1, ivec4 patch_size; ); + GLSLC(1, vec4 strength; ); + GLSLC(1, uint int_stride; ); + GLSLC(0, }; ); + GLSLC(0, ); + + ff_vk_add_push_constant(pl, 0, sizeof(HorizontalPushData), VK_SHADER_STAGE_COMPUTE_BIT); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "input_img", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(sampler), + }, + { + .name = "weights_buffer_0", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float weights_0[];", + }, + { + .name = "sums_buffer_0", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float sums_0[];", + }, + { + .name = "weights_buffer_1", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float weights_1[];", + }, + { + .name = "sums_buffer_1", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float sums_1[];", + }, + { + .name = "weights_buffer_2", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float weights_2[];", + }, + { + .name = "sums_buffer_2", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float sums_2[];", + }, + { + .name = "weights_buffer_3", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float weights_3[];", + }, + { + .name = "sums_buffer_3", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float sums_3[];", + }, + }; + RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc_set, 1 + 2*desc->nb_components, 0, 0)); + + GLSLD( ff_source_prefix_sum_comp ); + GLSLC(0, ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, uint64_t offset; ); + GLSLC(1, DataBuffer dst; ); + GLSLC(1, float s1; ); + GLSLC(1, DTYPE s2; ); + GLSLC(1, int r; ); + GLSLC(1, int x; ); + GLSLC(1, int y; ); + GLSLC(1, int p; ); + GLSLC(0, ); + GLSLC(1, DTYPE a; ); + GLSLC(1, DTYPE b; ); + GLSLC(1, DTYPE c; ); + GLSLC(1, DTYPE d; ); + GLSLC(0, ); + GLSLC(1, DTYPE patch_diff; ); + if (TYPE_ELEMS == 4) { + GLSLC(1, vec4 src; ); + GLSLC(1, vec4 w; ); + } else { + GLSLC(1, vec4 src[4]; ); + GLSLC(1, vec4 w[4]; ); + } + GLSLC(1, float w_sum; ); + GLSLC(1, float sum; ); + GLSLC(0, ); + GLSLC(1, bool lt; ); + GLSLC(1, bool gt; ); + GLSLC(0, ); + + for (int i = 0; i < desc->nb_components; i++) { + int off = desc->comp[i].offset / (FFALIGN(desc->comp[i].depth, 8)/8); + if (width > height) { + insert_horizontal_pass(shd, *nb_rows, 1, desc->comp[i].plane, off); + insert_vertical_pass(shd, *nb_rows, 0, desc->comp[i].plane, off); + insert_weights_pass(shd, *nb_rows, 0, t, i, desc->comp[i].plane, off); + } else { + insert_vertical_pass(shd, *nb_rows, 1, desc->comp[i].plane, off); + insert_horizontal_pass(shd, *nb_rows, 0, desc->comp[i].plane, off); + insert_weights_pass(shd, *nb_rows, 1, t, i, desc->comp[i].plane, off); + } + } + + GLSLC(0, } ); + + RET(spv->compile_shader(spv, vkctx, shd, &spv_data, &spv_len, "main", &spv_opaque)); + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); + + RET(ff_vk_init_compute_pipeline(vkctx, pl, shd)); + RET(ff_vk_exec_pipeline_register(vkctx, exec, pl)); + + return 0; + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + + return err; +} + +typedef struct DenoisePushData { + uint32_t ws_stride[4]; +} DenoisePushData; + +static av_cold int init_denoise_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec, + FFVulkanPipeline *pl, FFVkSPIRVShader *shd, + VkSampler sampler, FFVkSPIRVCompiler *spv, + const AVPixFmtDescriptor *desc, int planes) +{ + int err; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; + FFVulkanDescriptorSetBinding *desc_set; + + RET(ff_vk_shader_init(pl, shd, "nlmeans_denoise", + VK_SHADER_STAGE_COMPUTE_BIT, 0)); + + ff_vk_shader_set_compute_sizes(shd, 32, 32, 1); + + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, uvec4 ws_stride; ); + GLSLC(0, }; ); + + ff_vk_add_push_constant(pl, 0, sizeof(DenoisePushData), VK_SHADER_STAGE_COMPUTE_BIT); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "input_img", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(sampler), + }, + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(vkctx->output_format), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { + .name = "weights_buffer_0", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float weights_0[];", + }, + { + .name = "sums_buffer_0", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float sums_0[];", + }, + { + .name = "weights_buffer_1", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float weights_1[];", + }, + { + .name = "sums_buffer_1", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float sums_1[];", + }, + { + .name = "weights_buffer_2", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float weights_2[];", + }, + { + .name = "sums_buffer_2", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float sums_2[];", + }, + { + .name = "weights_buffer_3", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float weights_3[];", + }, + { + .name = "sums_buffer_3", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float sums_3[];", + }, + }; + RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc_set, 2 + 2*desc->nb_components, 0, 0)); + + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + GLSLC(0, ); + GLSLC(1, float w_sum; ); + GLSLC(1, float sum; ); + GLSLC(1, vec4 src; ); + GLSLC(1, vec4 r; ); + GLSLC(0, ); + + for (int i = 0; i < planes; i++) { + GLSLF(1, src = texture(input_img[%i], pos); ,i); + for (int c = 0; c < desc->nb_components; c++) { + if (desc->comp[c].plane == i) { + int off = desc->comp[c].offset / (FFALIGN(desc->comp[c].depth, 8)/8); + GLSLF(1, w_sum = weights_%i[pos.y*ws_stride[%i] + pos.x]; ,c, c); + GLSLF(1, sum = sums_%i[pos.y*ws_stride[%i] + pos.x]; ,c, c); + GLSLF(1, r[%i] = (sum + src[%i]*255) / (1.0 + w_sum) / 255; ,off, off); + GLSLC(0, ); + } + } + GLSLF(1, imageStore(output_img[%i], pos, r); ,i); + GLSLC(0, ); + } + + GLSLC(0, } ); + + RET(spv->compile_shader(spv, vkctx, shd, &spv_data, &spv_len, "main", &spv_opaque)); + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); + + RET(ff_vk_init_compute_pipeline(vkctx, pl, shd)); + RET(ff_vk_exec_pipeline_register(vkctx, exec, pl)); + + return 0; + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + + return err; +} + +static av_cold int init_filter(AVFilterContext *ctx) +{ + int rad, err; + int xcnt = 0, ycnt = 0; + NLMeansVulkanContext *s = ctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + FFVkSPIRVCompiler *spv; + + const AVPixFmtDescriptor *desc; + desc = av_pix_fmt_desc_get(vkctx->output_format); + if (!desc) + return AVERROR(EINVAL); + + if (!(s->opts.r & 1)) { + s->opts.r |= 1; + av_log(ctx, AV_LOG_WARNING, "Research size should be odd, setting to %i", + s->opts.r); + } + + if (!(s->opts.p & 1)) { + s->opts.p |= 1; + av_log(ctx, AV_LOG_WARNING, "Patch size should be odd, setting to %i", + s->opts.p); + } + + for (int i = 0; i < 4; i++) { + double str = (s->opts.sc[i] > 1.0) ? s->opts.sc[i] : s->opts.s; + int ps = (s->opts.pc[i] ? s->opts.pc[i] : s->opts.p); + str = 10.0f*str; + str *= -str; + str = 255.0*255.0 / str; + s->strength[i] = str; + if (!(ps & 1)) { + ps |= 1; + av_log(ctx, AV_LOG_WARNING, "Patch size should be odd, setting to %i", + ps); + } + s->patch[i] = ps / 2; + } + + rad = s->opts.r/2; + s->nb_offsets = (2*rad + 1)*(2*rad + 1) - 1; + s->xoffsets = av_malloc(s->nb_offsets*sizeof(*s->xoffsets)); + s->yoffsets = av_malloc(s->nb_offsets*sizeof(*s->yoffsets)); + s->nb_offsets = 0; + + for (int x = -rad; x <= rad; x++) { + for (int y = -rad; y <= rad; y++) { + if (!x && !y) + continue; + + s->xoffsets[xcnt++] = x; + s->yoffsets[ycnt++] = y; + s->nb_offsets++; + } + } + + s->opts.t = FFMIN(s->opts.t, (FFALIGN(s->nb_offsets, TYPE_ELEMS) / TYPE_ELEMS)); + if (!vkctx->atomic_float_feats.shaderBufferFloat32AtomicAdd) { + av_log(ctx, AV_LOG_WARNING, "Device doesn't support atomic float adds, " + "disabling dispatch parallelism\n"); + s->opts.t = 1; + } + + if (!vkctx->feats_12.vulkanMemoryModel) { + av_log(ctx, AV_LOG_ERROR, "Device doesn't support the Vulkan memory model!"); + return AVERROR(EINVAL);; + } + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, 1, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST)); + + RET(init_weights_pipeline(vkctx, &s->e, &s->pl_weights, &s->shd_weights, s->sampler, + spv, s->vkctx.output_width, s->vkctx.output_height, + s->opts.t, desc, planes, &s->pl_weights_rows)); + + RET(init_denoise_pipeline(vkctx, &s->e, &s->pl_denoise, &s->shd_denoise, s->sampler, + spv, desc, planes)); + + av_log(ctx, AV_LOG_VERBOSE, "Filter initialized, %i x/y offsets, %i dispatches, %i parallel\n", + s->nb_offsets, (FFALIGN(s->nb_offsets, TYPE_ELEMS) / TYPE_ELEMS) + 1, s->opts.t); + + s->initialized = 1; + + return 0; + +fail: + if (spv) + spv->uninit(&spv); + + return err; +} + +static int denoise_pass(NLMeansVulkanContext *s, FFVkExecContext *exec, + FFVkBuffer *ws_vk, uint32_t ws_stride[4]) +{ + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + VkBufferMemoryBarrier2 buf_bar[8]; + int nb_buf_bar = 0; + + /* Denoise pass pipeline */ + ff_vk_exec_bind_pipeline(vkctx, exec, &s->pl_denoise); + + /* Push data */ + ff_vk_update_push_exec(vkctx, exec, &s->pl_denoise, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(DenoisePushData), &(DenoisePushData) { + { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] }, + }); + + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = ws_vk->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = ws_vk->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = ws_vk->buf, + .size = ws_vk->size, + .offset = 0, + }; + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + }); + ws_vk->stage = buf_bar[0].dstStageMask; + ws_vk->access = buf_bar[0].dstAccessMask; + + /* End of denoise pass */ + vk->CmdDispatch(exec->buf, + FFALIGN(vkctx->output_width, s->pl_denoise.wg_size[0])/s->pl_denoise.wg_size[0], + FFALIGN(vkctx->output_height, s->pl_denoise.wg_size[1])/s->pl_denoise.wg_size[1], + 1); + + return 0; +} + +static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) +{ + int err; + AVFrame *out = NULL; + AVFilterContext *ctx = link->dst; + NLMeansVulkanContext *s = ctx->priv; + AVFilterLink *outlink = ctx->outputs[0]; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + + const AVPixFmtDescriptor *desc; + int plane_widths[4]; + int plane_heights[4]; + + /* Integral */ + AVBufferRef *state_buf; + FFVkBuffer *state_vk; + AVBufferRef *integral_buf; + FFVkBuffer *integral_vk; + uint32_t int_stride; + size_t int_size; + size_t state_size; + int t_offset = 0; + + /* Weights/sums */ + AVBufferRef *ws_buf; + FFVkBuffer *ws_vk; + VkDeviceAddress weights_addr[4]; + VkDeviceAddress sums_addr[4]; + uint32_t ws_stride[4]; + size_t ws_size[4]; + size_t ws_total_size = 0; + + FFVkExecContext *exec; + VkImageView in_views[AV_NUM_DATA_POINTERS]; + VkImageView out_views[AV_NUM_DATA_POINTERS]; + VkImageMemoryBarrier2 img_bar[8]; + int nb_img_bar = 0; + VkBufferMemoryBarrier2 buf_bar[8]; + int nb_buf_bar = 0; + + if (!s->initialized) + RET(init_filter(ctx)); + + desc = av_pix_fmt_desc_get(vkctx->output_format); + if (!desc) + return AVERROR(EINVAL); + + /* Integral image */ + int_stride = s->pl_weights.wg_size[0]*s->pl_weights_rows; + int_size = int_stride * int_stride * TYPE_SIZE; + state_size = int_stride * 3 *TYPE_SIZE; + + /* Plane dimensions */ + for (int i = 0; i < desc->nb_components; i++) { + plane_widths[i] = !i || (i == 3) ? vkctx->output_width : AV_CEIL_RSHIFT(vkctx->output_width, desc->log2_chroma_w); + plane_heights[i] = !i || (i == 3) ? vkctx->output_height : AV_CEIL_RSHIFT(vkctx->output_height, desc->log2_chroma_w); + plane_widths[i] = FFALIGN(plane_widths[i], s->pl_denoise.wg_size[0]); + plane_heights[i] = FFALIGN(plane_heights[i], s->pl_denoise.wg_size[1]); + + ws_stride[i] = plane_widths[i]; + ws_size[i] = ws_stride[i] * plane_heights[i] * sizeof(float); + ws_total_size += ws_size[i]; + } + + /* Buffers */ + err = ff_vk_get_pooled_buffer(&s->vkctx, &s->integral_buf_pool, &integral_buf, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + NULL, + s->opts.t * int_size, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + if (err < 0) + return err; + integral_vk = (FFVkBuffer *)integral_buf->data; + + err = ff_vk_get_pooled_buffer(&s->vkctx, &s->state_buf_pool, &state_buf, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + NULL, + s->opts.t * state_size, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + if (err < 0) + return err; + state_vk = (FFVkBuffer *)state_buf->data; + + err = ff_vk_get_pooled_buffer(&s->vkctx, &s->ws_buf_pool, &ws_buf, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + NULL, + ws_total_size * 2, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + if (err < 0) + return err; + ws_vk = (FFVkBuffer *)ws_buf->data; + + weights_addr[0] = ws_vk->address; + sums_addr[0] = ws_vk->address + ws_total_size; + for (int i = 1; i < desc->nb_components; i++) { + weights_addr[i] = weights_addr[i - 1] + ws_size[i - 1]; + sums_addr[i] = sums_addr[i - 1] + ws_size[i - 1]; + } + + /* Output frame */ + out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + err = AVERROR(ENOMEM); + goto fail; + } + + /* Execution context */ + exec = ff_vk_exec_get(&s->e); + ff_vk_exec_start(vkctx, exec); + + /* Dependencies */ + RET(ff_vk_exec_add_dep_frame(vkctx, exec, in, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_exec_add_dep_frame(vkctx, exec, out, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_exec_add_dep_buf(vkctx, exec, &integral_buf, 1, 0)); + RET(ff_vk_exec_add_dep_buf(vkctx, exec, &state_buf, 1, 0)); + RET(ff_vk_exec_add_dep_buf(vkctx, exec, &ws_buf, 1, 0)); + + /* Input frame prep */ + RET(ff_vk_create_imageviews(vkctx, exec, in_views, in)); + ff_vk_update_descriptor_img_array(vkctx, &s->pl_weights, exec, in, in_views, 0, 0, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + s->sampler); + ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); + + /* Output frame prep */ + RET(ff_vk_create_imageviews(vkctx, exec, out_views, out)); + ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = ws_vk->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .srcAccessMask = ws_vk->access, + .dstAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = ws_vk->buf, + .size = ws_vk->size, + .offset = 0, + }; + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + }); + ws_vk->stage = buf_bar[0].dstStageMask; + ws_vk->access = buf_bar[0].dstAccessMask; + + /* Weights/sums buffer zeroing */ + vk->CmdFillBuffer(exec->buf, ws_vk->buf, 0, ws_vk->size, 0x0); + + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = ws_vk->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = ws_vk->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = ws_vk->buf, + .size = ws_vk->size, + .offset = 0, + }; + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + }); + ws_vk->stage = buf_bar[0].dstStageMask; + ws_vk->access = buf_bar[0].dstAccessMask; + + /* Update weights descriptors */ + ff_vk_update_descriptor_img_array(vkctx, &s->pl_weights, exec, in, in_views, 0, 0, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + s->sampler); + for (int i = 0; i < desc->nb_components; i++) { + RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_weights, exec, 0, 1 + i*2 + 0, 0, + weights_addr[i], ws_size[i], + VK_FORMAT_UNDEFINED)); + RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_weights, exec, 0, 1 + i*2 + 1, 0, + sums_addr[i], ws_size[i], + VK_FORMAT_UNDEFINED)); + } + + /* Update denoise descriptors */ + ff_vk_update_descriptor_img_array(vkctx, &s->pl_denoise, exec, in, in_views, 0, 0, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + s->sampler); + ff_vk_update_descriptor_img_array(vkctx, &s->pl_denoise, exec, out, out_views, 0, 1, + VK_IMAGE_LAYOUT_GENERAL, s->sampler); + for (int i = 0; i < desc->nb_components; i++) { + RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_denoise, exec, 0, 2 + i*2 + 0, 0, + weights_addr[i], ws_size[i], + VK_FORMAT_UNDEFINED)); + RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_denoise, exec, 0, 2 + i*2 + 1, 0, + sums_addr[i], ws_size[i], + VK_FORMAT_UNDEFINED)); + } + + /* Weights pipeline */ + ff_vk_exec_bind_pipeline(vkctx, exec, &s->pl_weights); + + for (int i = 0; i < s->nb_offsets; i += TYPE_ELEMS) { + int *xoffs = s->xoffsets + i; + int *yoffs = s->yoffsets + i; + HorizontalPushData pd = { + integral_vk->address + t_offset*int_size, + state_vk->address + t_offset*state_size, + { 0 }, + { 0 }, + { plane_widths[0], plane_widths[1], plane_widths[2], plane_widths[3] }, + { plane_heights[0], plane_heights[1], plane_heights[2], plane_heights[3] }, + { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] }, + { s->patch[0], s->patch[1], s->patch[2], s->patch[3] }, + { s->strength[0], s->strength[1], s->strength[2], s->strength[2], }, + int_stride, + }; + + memcpy(pd.xoffs, xoffs, sizeof(pd.xoffs)); + memcpy(pd.yoffs, yoffs, sizeof(pd.yoffs)); + + /* Put a barrier once we run out of parallelism buffers */ + if (!t_offset) { + nb_buf_bar = 0; + /* Buffer prep/sync */ + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = integral_vk->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = integral_vk->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = integral_vk->buf, + .size = integral_vk->size, + .offset = 0, + }; + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = state_vk->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = state_vk->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = state_vk->buf, + .size = state_vk->size, + .offset = 0, + }; + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + }); + integral_vk->stage = buf_bar[0].dstStageMask; + integral_vk->access = buf_bar[0].dstAccessMask; + state_vk->stage = buf_bar[1].dstStageMask; + state_vk->access = buf_bar[1].dstAccessMask; + } + t_offset = (t_offset + 1) % s->opts.t; + + /* Push data */ + ff_vk_update_push_exec(vkctx, exec, &s->pl_weights, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pd), &pd); + + /* End of horizontal pass */ + vk->CmdDispatch(exec->buf, 1, 1, 1); + } + + RET(denoise_pass(s, exec, ws_vk, ws_stride)); + + err = ff_vk_exec_submit(vkctx, exec); + if (err < 0) + return err; + + err = av_frame_copy_props(out, in); + if (err < 0) + goto fail; + + av_frame_free(&in); + + return ff_filter_frame(outlink, out); + +fail: + av_frame_free(&in); + av_frame_free(&out); + return err; +} + +static void nlmeans_vulkan_uninit(AVFilterContext *avctx) +{ + NLMeansVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl_weights); + ff_vk_shader_free(vkctx, &s->shd_weights); + ff_vk_pipeline_free(vkctx, &s->pl_denoise); + ff_vk_shader_free(vkctx, &s->shd_denoise); + + av_buffer_pool_uninit(&s->integral_buf_pool); + av_buffer_pool_uninit(&s->state_buf_pool); + av_buffer_pool_uninit(&s->ws_buf_pool); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); + + ff_vk_uninit(&s->vkctx); + + s->initialized = 0; +} + +#define OFFSET(x) offsetof(NLMeansVulkanContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption nlmeans_vulkan_options[] = { + { "s", "denoising strength for all components", OFFSET(opts.s), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS }, + { "p", "patch size for all components", OFFSET(opts.p), AV_OPT_TYPE_INT, { .i64 = 3*2+1 }, 0, 99, FLAGS }, + { "r", "research window radius", OFFSET(opts.r), AV_OPT_TYPE_INT, { .i64 = 7*2+1 }, 0, 99, FLAGS }, + { "t", "parallelism", OFFSET(opts.t), AV_OPT_TYPE_INT, { .i64 = 36 }, 1, 168, FLAGS }, + + { "s1", "denoising strength for component 1", OFFSET(opts.sc[0]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS }, + { "s2", "denoising strength for component 2", OFFSET(opts.sc[1]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS }, + { "s3", "denoising strength for component 3", OFFSET(opts.sc[2]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS }, + { "s4", "denoising strength for component 4", OFFSET(opts.sc[3]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS }, + + { "p1", "patch size for component 1", OFFSET(opts.pc[0]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS }, + { "p2", "patch size for component 2", OFFSET(opts.pc[1]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS }, + { "p3", "patch size for component 3", OFFSET(opts.pc[2]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS }, + { "p4", "patch size for component 4", OFFSET(opts.pc[3]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS }, + + { NULL } +}; + +AVFILTER_DEFINE_CLASS(nlmeans_vulkan); + +static const AVFilterPad nlmeans_vulkan_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = &nlmeans_vulkan_filter_frame, + .config_props = &ff_vk_filter_config_input, + }, +}; + +static const AVFilterPad nlmeans_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_vk_filter_config_output, + }, +}; + +const AVFilter ff_vf_nlmeans_vulkan = { + .name = "nlmeans_vulkan", + .description = NULL_IF_CONFIG_SMALL("Non-local means denoiser (Vulkan)"), + .priv_size = sizeof(NLMeansVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &nlmeans_vulkan_uninit, + FILTER_INPUTS(nlmeans_vulkan_inputs), + FILTER_OUTPUTS(nlmeans_vulkan_outputs), + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), + .priv_class = &nlmeans_vulkan_class, + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; diff --git a/libavfilter/vulkan/prefix_sum.comp b/libavfilter/vulkan/prefix_sum.comp new file mode 100644 index 0000000000000..9147cd82fbd90 --- /dev/null +++ b/libavfilter/vulkan/prefix_sum.comp @@ -0,0 +1,151 @@ +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference2 : require + +#define ACQUIRE gl_StorageSemanticsBuffer, gl_SemanticsAcquire +#define RELEASE gl_StorageSemanticsBuffer, gl_SemanticsRelease + +// These correspond to X, A, P respectively in the prefix sum paper. +#define FLAG_NOT_READY 0u +#define FLAG_AGGREGATE_READY 1u +#define FLAG_PREFIX_READY 2u + +layout(buffer_reference, buffer_reference_align = T_ALIGN) nonprivate buffer StateData { + DTYPE aggregate; + DTYPE prefix; + uint flag; +}; + +shared DTYPE sh_scratch[WG_SIZE]; +shared DTYPE sh_prefix; +shared uint sh_part_ix; +shared uint sh_flag; + +void prefix_sum(DataBuffer dst, uint dst_stride, DataBuffer src, uint src_stride) +{ + DTYPE local[N_ROWS]; + // Determine partition to process by atomic counter (described in Section 4.4 of prefix sum paper). + if (gl_GlobalInvocationID.x == 0) + sh_part_ix = gl_WorkGroupID.x; +// sh_part_ix = atomicAdd(part_counter, 1); + + barrier(); + uint part_ix = sh_part_ix; + + uint ix = part_ix * PARTITION_SIZE + gl_LocalInvocationID.x * N_ROWS; + + // TODO: gate buffer read? (evaluate whether shader check or CPU-side padding is better) + local[0] = src.v[ix*src_stride]; + for (uint i = 1; i < N_ROWS; i++) + local[i] = local[i - 1] + src.v[(ix + i)*src_stride]; + + DTYPE agg = local[N_ROWS - 1]; + sh_scratch[gl_LocalInvocationID.x] = agg; + for (uint i = 0; i < LG_WG_SIZE; i++) { + barrier(); + if (gl_LocalInvocationID.x >= (1u << i)) + agg += sh_scratch[gl_LocalInvocationID.x - (1u << i)]; + barrier(); + + sh_scratch[gl_LocalInvocationID.x] = agg; + } + + // Publish aggregate for this partition + if (gl_LocalInvocationID.x == WG_SIZE - 1) { + state[part_ix].aggregate = agg; + if (part_ix == 0) + state[0].prefix = agg; + } + + // Write flag with release semantics + if (gl_LocalInvocationID.x == WG_SIZE - 1) { + uint flag = part_ix == 0 ? FLAG_PREFIX_READY : FLAG_AGGREGATE_READY; + atomicStore(state[part_ix].flag, flag, gl_ScopeDevice, RELEASE); + } + + DTYPE exclusive = DTYPE(0); + if (part_ix != 0) { + // step 4 of paper: decoupled lookback + uint look_back_ix = part_ix - 1; + + DTYPE their_agg; + uint their_ix = 0; + while (true) { + // Read flag with acquire semantics. + if (gl_LocalInvocationID.x == WG_SIZE - 1) + sh_flag = atomicLoad(state[look_back_ix].flag, gl_ScopeDevice, ACQUIRE); + + // The flag load is done only in the last thread. However, because the + // translation of memoryBarrierBuffer to Metal requires uniform control + // flow, we broadcast it to all threads. + barrier(); + + uint flag = sh_flag; + barrier(); + + if (flag == FLAG_PREFIX_READY) { + if (gl_LocalInvocationID.x == WG_SIZE - 1) { + DTYPE their_prefix = state[look_back_ix].prefix; + exclusive = their_prefix + exclusive; + } + break; + } else if (flag == FLAG_AGGREGATE_READY) { + if (gl_LocalInvocationID.x == WG_SIZE - 1) { + their_agg = state[look_back_ix].aggregate; + exclusive = their_agg + exclusive; + } + look_back_ix--; + their_ix = 0; + continue; + } // else spins + + if (gl_LocalInvocationID.x == WG_SIZE - 1) { + // Unfortunately there's no guarantee of forward progress of other + // workgroups, so compute a bit of the aggregate before trying again. + // In the worst case, spinning stops when the aggregate is complete. + DTYPE m = src.v[(look_back_ix * PARTITION_SIZE + their_ix)*src_stride]; + if (their_ix == 0) + their_agg = m; + else + their_agg += m; + + their_ix++; + if (their_ix == PARTITION_SIZE) { + exclusive = their_agg + exclusive; + if (look_back_ix == 0) { + sh_flag = FLAG_PREFIX_READY; + } else { + look_back_ix--; + their_ix = 0; + } + } + } + barrier(); + flag = sh_flag; + barrier(); + if (flag == FLAG_PREFIX_READY) + break; + } + + // step 5 of paper: compute inclusive prefix + if (gl_LocalInvocationID.x == WG_SIZE - 1) { + DTYPE inclusive_prefix = exclusive + agg; + sh_prefix = exclusive; + state[part_ix].prefix = inclusive_prefix; + } + + if (gl_LocalInvocationID.x == WG_SIZE - 1) + atomicStore(state[part_ix].flag, FLAG_PREFIX_READY, gl_ScopeDevice, RELEASE); + } + + barrier(); + if (part_ix != 0) + exclusive = sh_prefix; + + DTYPE row = exclusive; + if (gl_LocalInvocationID.x > 0) + row += sh_scratch[gl_LocalInvocationID.x - 1]; + + // note - may overwrite + for (uint i = 0; i < N_ROWS; i++) + dst.v[(ix + i)*dst_stride] = row + local[i]; +} diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h index ac636ab99b586..195428eb79214 100644 --- a/libavutil/vulkan_functions.h +++ b/libavutil/vulkan_functions.h @@ -136,6 +136,7 @@ typedef enum FFVulkanExtensions { MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateBuffer) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, BindBufferMemory) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetBufferDeviceAddress) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdFillBuffer) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyBuffer) \ \ /* Image */ \ From a0c78dbed806ee9142f5d091ae39c02fc8d19faf Mon Sep 17 00:00:00 2001 From: Lynne Date: Sat, 20 May 2023 15:49:01 +0200 Subject: [PATCH 95/98] vulkan_h264: reject end_frame being called without start_frame Happens for both VAAPI and Vulkan. Could be an issue elsewhere, hence the individual commit. --- libavcodec/vulkan_h264.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libavcodec/vulkan_h264.c b/libavcodec/vulkan_h264.c index 86234f3ad38e0..3cd89c504e0ca 100644 --- a/libavcodec/vulkan_h264.c +++ b/libavcodec/vulkan_h264.c @@ -483,10 +483,14 @@ static int vk_h264_end_frame(AVCodecContext *avctx) const H264Context *h = avctx->priv_data; H264Picture *pic = h->cur_pic_ptr; H264VulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; FFVulkanDecodePicture *vp = &hp->vp; FFVulkanDecodePicture *rvp[H264_MAX_PICTURE_COUNT] = { 0 }; AVFrame *rav[H264_MAX_PICTURE_COUNT] = { 0 }; + if (!dec->session_params) + return AVERROR(EINVAL); + for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) { H264Picture *rp = hp->ref_src[i]; H264VulkanDecodePicture *rhp = rp->hwaccel_picture_private; From 13b1baa2f31f1d92540e967b9e39346031dce9fb Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Mon, 30 Jan 2023 14:18:34 +0100 Subject: [PATCH 96/98] avfilter/vf_libplacebo: forward queue locking primitives For thread safety. --- libavfilter/vf_libplacebo.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/libavfilter/vf_libplacebo.c b/libavfilter/vf_libplacebo.c index 65fe6ef746dc3..e1a179e648e40 100644 --- a/libavfilter/vf_libplacebo.c +++ b/libavfilter/vf_libplacebo.c @@ -544,6 +544,30 @@ static int libplacebo_init(AVFilterContext *avctx) return err; } +#if PL_API_VER >= 201 +# if PL_API_VER >= 278 +static void lock_queue(void *priv, uint32_t qf, uint32_t qidx) +# else +static void lock_queue(void *priv, int qf, int qidx) +# endif +{ + AVHWDeviceContext *avhwctx = priv; + const AVVulkanDeviceContext *hwctx = avhwctx->hwctx; + hwctx->lock_queue(avhwctx, qf, qidx); +} + +# if PL_API_VER >= 278 +static void unlock_queue(void *priv, uint32_t qf, uint32_t qidx) +# else +static void unlock_queue(void *priv, int qf, int qidx) +# endif +{ + AVHWDeviceContext *avhwctx = priv; + const AVVulkanDeviceContext *hwctx = avhwctx->hwctx; + hwctx->unlock_queue(avhwctx, qf, qidx); +} +#endif + static int init_vulkan(AVFilterContext *avctx, const AVVulkanDeviceContext *hwctx) { int err = 0; @@ -561,6 +585,11 @@ static int init_vulkan(AVFilterContext *avctx, const AVVulkanDeviceContext *hwct .extensions = hwctx->enabled_dev_extensions, .num_extensions = hwctx->nb_enabled_dev_extensions, .features = &hwctx->device_features, +#if PL_API_VER >= 201 + .lock_queue = lock_queue, + .unlock_queue = unlock_queue, + .queue_ctx = avctx->hw_device_ctx->data, +#endif .queue_graphics = { .index = hwctx->queue_family_index, .count = hwctx->nb_graphics_queues, From 16855d376d34fa8719949329830ad0ee4ab73d2d Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Mon, 24 Apr 2023 13:22:23 +0200 Subject: [PATCH 97/98] avfilter/vf_libplacebo: bump max vk version For two reasons: 1. We now create a vulkan 1.3 device 2. libplacebo master currently requires a vulkan 1.3 device --- libavfilter/vf_libplacebo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavfilter/vf_libplacebo.c b/libavfilter/vf_libplacebo.c index e1a179e648e40..2c327a3a12fc7 100644 --- a/libavfilter/vf_libplacebo.c +++ b/libavfilter/vf_libplacebo.c @@ -603,7 +603,7 @@ static int init_vulkan(AVFilterContext *avctx, const AVVulkanDeviceContext *hwct .count = hwctx->nb_tx_queues, }, /* This is the highest version created by hwcontext_vulkan.c */ - .max_api_version = VK_API_VERSION_1_2, + .max_api_version = VK_API_VERSION_1_3, )); } else { s->vulkan = pl_vulkan_create(s->log, pl_vulkan_params( From 2da8e59cc3c637f4c27181de3be689d4cdf9601b Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Mon, 24 Apr 2023 17:31:21 +0200 Subject: [PATCH 98/98] avutil/hwcontext_vulkan: add libplacebo required features For compatibility with vf_libplacebo --- libavutil/hwcontext_vulkan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 648bdcb3985ef..80d86cde5751f 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -1282,6 +1282,7 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, p->device_features_1_2.timelineSemaphore = 1; p->device_features_1_2.bufferDeviceAddress = dev_features_1_2.bufferDeviceAddress; + p->device_features_1_2.hostQueryReset = dev_features_1_2.hostQueryReset; p->device_features_1_2.storagePushConstant8 = dev_features_1_2.storagePushConstant8; p->device_features_1_2.shaderInt8 = dev_features_1_2.shaderInt8; p->device_features_1_2.storageBuffer8BitAccess = dev_features_1_2.storageBuffer8BitAccess; @@ -1292,6 +1293,8 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, p->device_features_1_2.vulkanMemoryModelDeviceScope = dev_features_1_2.vulkanMemoryModelDeviceScope; p->device_features_1_2.hostQueryReset = dev_features_1_2.hostQueryReset; + p->device_features_1_3.dynamicRendering = dev_features_1_3.dynamicRendering; + p->device_features_1_3.maintenance4 = dev_features_1_3.maintenance4; p->device_features_1_3.synchronization2 = dev_features_1_3.synchronization2; p->device_features_1_3.computeFullSubgroups = dev_features_1_3.computeFullSubgroups; p->device_features_1_3.shaderZeroInitializeWorkgroupMemory = dev_features_1_3.shaderZeroInitializeWorkgroupMemory;