diff --git a/configure b/configure index a54398c57fa50..d7db6cd326f1f 100755 --- a/configure +++ b/configure @@ -327,7 +327,6 @@ External library support: --disable-securetransport disable Secure Transport, needed for TLS support on OSX if openssl and gnutls are not used [autodetect] --enable-vapoursynth enable VapourSynth demuxer [no] - --disable-vulkan disable Vulkan code [autodetect] --disable-xlib disable xlib [autodetect] --disable-zlib disable zlib [autodetect] @@ -354,6 +353,8 @@ External library support: --disable-vaapi disable Video Acceleration API (mainly Unix/Intel) code [autodetect] --disable-vdpau disable Nvidia Video Decode and Presentation API for Unix code [autodetect] --disable-videotoolbox disable VideoToolbox code [autodetect] + --disable-vulkan disable Vulkan code [autodetect] + --enable-vulkan-encode enable Vulkan encoding code [no] Toolchain options: --arch=ARCH select architecture [$arch] @@ -1915,6 +1916,7 @@ HWACCEL_LIBRARY_LIST=" mmal omx opencl + vulkan_encode " DOCUMENT_LIST=" @@ -2514,6 +2516,7 @@ CONFIG_EXTRA=" vp56dsp vp8dsp wma_freqs + vulkan_encode wmv2dsp " @@ -3020,6 +3023,8 @@ av1_vaapi_hwaccel_deps="vaapi VADecPictureParameterBufferAV1_bit_depth_idx" av1_vaapi_hwaccel_select="av1_decoder" av1_vdpau_hwaccel_deps="vdpau VdpPictureInfoAV1" av1_vdpau_hwaccel_select="av1_decoder" +av1_vulkan_hwaccel_deps="vulkan" +av1_vulkan_hwaccel_select="av1_decoder" h263_vaapi_hwaccel_deps="vaapi" h263_vaapi_hwaccel_select="h263_decoder" h263_videotoolbox_hwaccel_deps="videotoolbox" @@ -3038,6 +3043,8 @@ h264_vdpau_hwaccel_deps="vdpau" h264_vdpau_hwaccel_select="h264_decoder" h264_videotoolbox_hwaccel_deps="videotoolbox" h264_videotoolbox_hwaccel_select="h264_decoder" +h264_vulkan_hwaccel_deps="vulkan" +h264_vulkan_hwaccel_select="h264_decoder" hevc_d3d11va_hwaccel_deps="d3d11va DXVA_PicParams_HEVC" hevc_d3d11va_hwaccel_select="hevc_decoder" hevc_d3d11va2_hwaccel_deps="d3d11va DXVA_PicParams_HEVC" @@ -3052,6 +3059,8 @@ hevc_vdpau_hwaccel_deps="vdpau VdpPictureInfoHEVC" hevc_vdpau_hwaccel_select="hevc_decoder" hevc_videotoolbox_hwaccel_deps="videotoolbox" hevc_videotoolbox_hwaccel_select="hevc_decoder" +hevc_vulkan_hwaccel_deps="vulkan" +hevc_vulkan_hwaccel_select="hevc_decoder" mjpeg_nvdec_hwaccel_deps="nvdec" mjpeg_nvdec_hwaccel_select="mjpeg_decoder" mjpeg_vaapi_hwaccel_deps="vaapi" @@ -3133,6 +3142,7 @@ qsvenc_select="qsv" qsvvpp_select="qsv" vaapi_encode_deps="vaapi" v4l2_m2m_deps="linux_videodev2_h sem_timedwait" +vulkan_encode_deps="vulkan" bilateral_cuda_filter_deps="ffnvcodec" bilateral_cuda_filter_deps_any="cuda_nvcc cuda_llvm" @@ -3185,6 +3195,7 @@ h264_qsv_encoder_select="atsc_a53 qsvenc" h264_rkmpp_decoder_deps="rkmpp" h264_rkmpp_decoder_select="h264_mp4toannexb_bsf" h264_vaapi_encoder_select="atsc_a53 cbs_h264 vaapi_encode" +h264_vulkan_encoder_select="cbs_h264 vulkan_encode" h264_v4l2m2m_decoder_deps="v4l2_m2m h264_v4l2_m2m" h264_v4l2m2m_decoder_select="h264_mp4toannexb_bsf" h264_v4l2m2m_encoder_deps="v4l2_m2m h264_v4l2_m2m" @@ -3640,7 +3651,9 @@ blend_vulkan_filter_deps="vulkan spirv_compiler" boxblur_filter_deps="gpl" boxblur_opencl_filter_deps="opencl gpl" bs2b_filter_deps="libbs2b" +bwdif_vulkan_filter_deps="vulkan spirv_compiler" chromaber_vulkan_filter_deps="vulkan spirv_compiler" +color_vulkan_filter_deps="vulkan spirv_compiler" colorkey_opencl_filter_deps="opencl" colormatrix_filter_deps="gpl" convolution_opencl_filter_deps="opencl" @@ -3697,6 +3710,7 @@ minterpolate_filter_select="scene_sad" mptestsrc_filter_deps="gpl" negate_filter_deps="lut_filter" nlmeans_opencl_filter_deps="opencl" +nlmeans_vulkan_filter_deps="vulkan spirv_compiler" nnedi_filter_deps="gpl" ocr_filter_deps="libtesseract" ocv_filter_deps="libopencv" @@ -7040,8 +7054,8 @@ enabled crystalhd && check_lib crystalhd "stdint.h libcrystalhd/libcrystalhd_if. "in maintaining it." if enabled vulkan; then - check_pkg_config_header_only vulkan "vulkan >= 1.2.189" "vulkan/vulkan.h" "defined VK_VERSION_1_2" || - check_cpp_condition vulkan "vulkan/vulkan.h" "defined(VK_VERSION_1_3) || (defined(VK_VERSION_1_2) && VK_HEADER_VERSION >= 189)" + check_pkg_config_header_only vulkan "vulkan >= 1.3.238" "vulkan/vulkan.h" "defined VK_VERSION_1_3" || + check_cpp_condition vulkan "vulkan/vulkan.h" "defined(VK_VERSION_1_4) || (defined(VK_VERSION_1_3) && VK_HEADER_VERSION >= 238)" fi if enabled x86; then diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 9c38240025ce7..f841512620bcf 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -170,6 +170,7 @@ OBJS-$(CONFIG_VP3DSP) += vp3dsp.o OBJS-$(CONFIG_VP56DSP) += vp56dsp.o OBJS-$(CONFIG_VP8DSP) += vp8dsp.o OBJS-$(CONFIG_V4L2_M2M) += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o +OBJS-$(CONFIG_VULKAN_ENCODE) += vulkan_encode.o OBJS-$(CONFIG_WMA_FREQS) += wma_freqs.o OBJS-$(CONFIG_WMV2DSP) += wmv2dsp.o @@ -419,6 +420,8 @@ OBJS-$(CONFIG_H264_VAAPI_ENCODER) += vaapi_encode_h264.o h264_levels.o \ OBJS-$(CONFIG_H264_VIDEOTOOLBOX_ENCODER) += videotoolboxenc.o OBJS-$(CONFIG_H264_V4L2M2M_DECODER) += v4l2_m2m_dec.o OBJS-$(CONFIG_H264_V4L2M2M_ENCODER) += v4l2_m2m_enc.o +OBJS-$(CONFIG_H264_VULKAN_ENCODER) += vulkan_encode_h264.o h264_levels.o \ + h2645data.o OBJS-$(CONFIG_HAP_DECODER) += hapdec.o hap.o OBJS-$(CONFIG_HAP_ENCODER) += hapenc.o hap.o OBJS-$(CONFIG_HCA_DECODER) += hcadec.o @@ -982,12 +985,14 @@ OBJS-$(CONFIG_NVDEC) += nvdec.o OBJS-$(CONFIG_VAAPI) += vaapi_decode.o OBJS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.o OBJS-$(CONFIG_VDPAU) += vdpau.o +OBJS-$(CONFIG_VULKAN) += vulkan.o vulkan_video.o OBJS-$(CONFIG_AV1_D3D11VA_HWACCEL) += dxva2_av1.o OBJS-$(CONFIG_AV1_DXVA2_HWACCEL) += dxva2_av1.o OBJS-$(CONFIG_AV1_NVDEC_HWACCEL) += nvdec_av1.o OBJS-$(CONFIG_AV1_VAAPI_HWACCEL) += vaapi_av1.o OBJS-$(CONFIG_AV1_VDPAU_HWACCEL) += vdpau_av1.o +OBJS-$(CONFIG_AV1_VULKAN_HWACCEL) += vulkan_av1.o OBJS-$(CONFIG_H263_VAAPI_HWACCEL) += vaapi_mpeg4.o OBJS-$(CONFIG_H263_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o OBJS-$(CONFIG_H264_D3D11VA_HWACCEL) += dxva2_h264.o @@ -997,12 +1002,14 @@ OBJS-$(CONFIG_H264_QSV_HWACCEL) += qsvdec.o OBJS-$(CONFIG_H264_VAAPI_HWACCEL) += vaapi_h264.o OBJS-$(CONFIG_H264_VDPAU_HWACCEL) += vdpau_h264.o OBJS-$(CONFIG_H264_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o +OBJS-$(CONFIG_H264_VULKAN_HWACCEL) += vulkan_decode.o vulkan_h264.o OBJS-$(CONFIG_HEVC_D3D11VA_HWACCEL) += dxva2_hevc.o OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL) += dxva2_hevc.o OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL) += nvdec_hevc.o OBJS-$(CONFIG_HEVC_QSV_HWACCEL) += qsvdec.o OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL) += vaapi_hevc.o h265_profile_level.o OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL) += vdpau_hevc.o h265_profile_level.o +OBJS-$(CONFIG_HEVC_VULKAN_HWACCEL) += vulkan_decode.o vulkan_hevc.o OBJS-$(CONFIG_MJPEG_NVDEC_HWACCEL) += nvdec_mjpeg.o OBJS-$(CONFIG_MJPEG_VAAPI_HWACCEL) += vaapi_mjpeg.o OBJS-$(CONFIG_MPEG1_NVDEC_HWACCEL) += nvdec_mpeg12.o @@ -1290,6 +1297,7 @@ SKIPHEADERS-$(CONFIG_XVMC) += xvmc.h SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_decode.h vaapi_hevc.h vaapi_encode.h SKIPHEADERS-$(CONFIG_VDPAU) += vdpau.h vdpau_internal.h SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.h vt_internal.h +SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_video.h vulkan_encode.h vulkan_decode.h vulkan_video_codec_av1std.h vulkan_video_codec_av1std_decode.h SKIPHEADERS-$(CONFIG_V4L2_M2M) += v4l2_buffers.h v4l2_context.h v4l2_m2m.h SKIPHEADERS-$(CONFIG_ZLIB) += zlib_wrapper.h diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index 184bb8521f043..84203027fb2a6 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -852,6 +852,7 @@ extern const FFCodec ff_h264_qsv_encoder; extern const FFCodec ff_h264_v4l2m2m_encoder; extern const FFCodec ff_h264_vaapi_encoder; extern const FFCodec ff_h264_videotoolbox_encoder; +extern const FFCodec ff_h264_vulkan_encoder; extern const FFCodec ff_hevc_amf_encoder; extern const FFCodec ff_hevc_cuvid_decoder; extern const FFCodec ff_hevc_mediacodec_decoder; diff --git a/libavcodec/av1dec.c b/libavcodec/av1dec.c index d46ee48335f56..a4c56e8446e03 100644 --- a/libavcodec/av1dec.c +++ b/libavcodec/av1dec.c @@ -27,6 +27,7 @@ #include "libavutil/opt.h" #include "avcodec.h" #include "av1_parse.h" +#include "decode.h" #include "av1dec.h" #include "atsc_a53.h" #include "bytestream.h" @@ -268,8 +269,10 @@ static void skip_mode_params(AV1DecContext *s) int second_forward_idx, second_forward_hint; int ref_hint, dist, i; - if (!header->skip_mode_present) - return; + if (header->frame_type == AV1_FRAME_KEY || + header->frame_type == AV1_FRAME_INTRA_ONLY || + !header->reference_select || !seq->enable_order_hint) + return; forward_idx = -1; backward_idx = -1; @@ -446,7 +449,8 @@ static int get_pixel_format(AVCodecContext *avctx) CONFIG_AV1_D3D11VA_HWACCEL * 2 + \ CONFIG_AV1_NVDEC_HWACCEL + \ CONFIG_AV1_VAAPI_HWACCEL + \ - CONFIG_AV1_VDPAU_HWACCEL) + CONFIG_AV1_VDPAU_HWACCEL + \ + CONFIG_AV1_VULKAN_HWACCEL) enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts; if (seq->seq_profile == 2 && seq->color_config.high_bitdepth) @@ -526,6 +530,9 @@ static int get_pixel_format(AVCodecContext *avctx) #endif #if CONFIG_AV1_VDPAU_HWACCEL *fmtp++ = AV_PIX_FMT_VDPAU; +#endif +#if CONFIG_AV1_VULKAN_HWACCEL + *fmtp++ = AV_PIX_FMT_VULKAN; #endif break; case AV_PIX_FMT_YUV420P10: @@ -544,6 +551,44 @@ static int get_pixel_format(AVCodecContext *avctx) #endif #if CONFIG_AV1_VDPAU_HWACCEL *fmtp++ = AV_PIX_FMT_VDPAU; +#endif +#if CONFIG_AV1_VULKAN_HWACCEL + *fmtp++ = AV_PIX_FMT_VULKAN; +#endif + break; + case AV_PIX_FMT_YUV420P12: +#if CONFIG_AV1_VULKAN_HWACCEL + *fmtp++ = AV_PIX_FMT_VULKAN; +#endif + break; + case AV_PIX_FMT_YUV422P: +#if CONFIG_AV1_VULKAN_HWACCEL + *fmtp++ = AV_PIX_FMT_VULKAN; +#endif + break; + case AV_PIX_FMT_YUV422P10: +#if CONFIG_AV1_VULKAN_HWACCEL + *fmtp++ = AV_PIX_FMT_VULKAN; +#endif + break; + case AV_PIX_FMT_YUV422P12: +#if CONFIG_AV1_VULKAN_HWACCEL + *fmtp++ = AV_PIX_FMT_VULKAN; +#endif + break; + case AV_PIX_FMT_YUV444P: +#if CONFIG_AV1_VULKAN_HWACCEL + *fmtp++ = AV_PIX_FMT_VULKAN; +#endif + break; + case AV_PIX_FMT_YUV444P10: +#if CONFIG_AV1_VULKAN_HWACCEL + *fmtp++ = AV_PIX_FMT_VULKAN; +#endif + break; + case AV_PIX_FMT_YUV444P12: +#if CONFIG_AV1_VULKAN_HWACCEL + *fmtp++ = AV_PIX_FMT_VULKAN; #endif break; case AV_PIX_FMT_GRAY8: @@ -863,8 +908,7 @@ static int av1_frame_alloc(AVCodecContext *avctx, AV1Frame *f) if (avctx->hwaccel) { const AVHWAccel *hwaccel = avctx->hwaccel; if (hwaccel->frame_priv_data_size) { - f->hwaccel_priv_buf = - av_buffer_allocz(hwaccel->frame_priv_data_size); + f->hwaccel_priv_buf = ff_hwaccel_frame_priv_alloc(avctx, hwaccel); if (!f->hwaccel_priv_buf) { ret = AVERROR(ENOMEM); goto fail; @@ -1412,6 +1456,9 @@ static void av1_decode_flush(AVCodecContext *avctx) av_buffer_unref(&itut_t35.payload_ref); ff_cbs_flush(s->cbc); + + if (avctx->hwaccel && avctx->hwaccel->flush) + avctx->hwaccel->flush(avctx); } #define OFFSET(x) offsetof(AV1DecContext, x) @@ -1464,6 +1511,9 @@ const FFCodec ff_av1_decoder = { #if CONFIG_AV1_VDPAU_HWACCEL HWACCEL_VDPAU(av1), #endif +#if CONFIG_AV1_VULKAN_HWACCEL + HWACCEL_VULKAN(av1), +#endif NULL }, diff --git a/libavcodec/avcodec.c b/libavcodec/avcodec.c index 5a96899d505e8..db8226f9b3d59 100644 --- a/libavcodec/avcodec.c +++ b/libavcodec/avcodec.c @@ -39,6 +39,7 @@ #include "decode.h" #include "encode.h" #include "frame_thread_encoder.h" +#include "hwconfig.h" #include "internal.h" #include "thread.h" @@ -459,9 +460,7 @@ av_cold int avcodec_close(AVCodecContext *avctx) av_buffer_unref(&avci->pool); - if (avctx->hwaccel && avctx->hwaccel->uninit) - avctx->hwaccel->uninit(avctx); - av_freep(&avci->hwaccel_priv_data); + ff_hwaccel_uninit(avctx); av_bsf_free(&avci->bsf); diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 06b1a120abed9..84e29a02dbca6 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -2253,6 +2253,25 @@ typedef struct AVHWAccel { * that avctx->hwaccel_priv_data is invalid. */ int (*frame_params)(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx); + + /** + * Copy necessary context variables from a previous thread context to the current one. + * For thread-safe hwaccels only. + */ + int (*update_thread_context)(AVCodecContext *dst, const AVCodecContext *src); + + /** + * Callback to free the hwaccel-specific frame data. + * + * @param hwctx a pointer to an AVHWDeviceContext. + * @param data the per-frame hardware accelerator private data to be freed. + */ + void (*free_frame_priv)(void *hwctx, uint8_t *data); + + /** + * Callback to flush the hwaccel state. + */ + void (*flush)(AVCodecContext *avctx); } AVHWAccel; /** diff --git a/libavcodec/cbs_av1.h b/libavcodec/cbs_av1.h index 36848d4410c03..64dfdce9c4eb5 100644 --- a/libavcodec/cbs_av1.h +++ b/libavcodec/cbs_av1.h @@ -215,6 +215,8 @@ typedef struct AV1RawFrameHeader { uint8_t uniform_tile_spacing_flag; uint8_t tile_cols_log2; uint8_t tile_rows_log2; + uint8_t tile_start_col_sb[AV1_MAX_TILE_COLS]; + uint8_t tile_start_row_sb[AV1_MAX_TILE_COLS]; uint8_t width_in_sbs_minus_1[AV1_MAX_TILE_COLS]; uint8_t height_in_sbs_minus_1[AV1_MAX_TILE_ROWS]; uint16_t context_update_tile_id; diff --git a/libavcodec/cbs_av1_syntax_template.c b/libavcodec/cbs_av1_syntax_template.c index 8f4640d1af82f..a747e17784f71 100644 --- a/libavcodec/cbs_av1_syntax_template.c +++ b/libavcodec/cbs_av1_syntax_template.c @@ -626,6 +626,10 @@ static int FUNC(tile_info)(CodedBitstreamContext *ctx, RWContext *rw, tile_width_sb = (sb_cols + (1 << current->tile_cols_log2) - 1) >> current->tile_cols_log2; + + for (int off = 0, i = 0; off < sb_cols; off += tile_width_sb) + current->tile_start_col_sb[i++] = off; + current->tile_cols = (sb_cols + tile_width_sb - 1) / tile_width_sb; min_log2_tile_rows = FFMAX(min_log2_tiles - current->tile_cols_log2, 0); @@ -634,6 +638,10 @@ static int FUNC(tile_info)(CodedBitstreamContext *ctx, RWContext *rw, tile_height_sb = (sb_rows + (1 << current->tile_rows_log2) - 1) >> current->tile_rows_log2; + + for (int off = 0, i = 0; off < sb_rows; off += tile_height_sb) + current->tile_start_row_sb[i++] = off; + current->tile_rows = (sb_rows + tile_height_sb - 1) / tile_height_sb; for (i = 0; i < current->tile_cols - 1; i++) @@ -652,6 +660,7 @@ static int FUNC(tile_info)(CodedBitstreamContext *ctx, RWContext *rw, start_sb = 0; for (i = 0; start_sb < sb_cols && i < AV1_MAX_TILE_COLS; i++) { + current->tile_start_col_sb[i] = start_sb; max_width = FFMIN(sb_cols - start_sb, max_tile_width_sb); ns(max_width, width_in_sbs_minus_1[i], 1, i); size_sb = current->width_in_sbs_minus_1[i] + 1; @@ -669,6 +678,7 @@ static int FUNC(tile_info)(CodedBitstreamContext *ctx, RWContext *rw, start_sb = 0; for (i = 0; start_sb < sb_rows && i < AV1_MAX_TILE_ROWS; i++) { + current->tile_start_row_sb[i] = start_sb; max_height = FFMIN(sb_rows - start_sb, max_tile_height_sb); ns(max_height, height_in_sbs_minus_1[i], 1, i); size_sb = current->height_in_sbs_minus_1[i] + 1; diff --git a/libavcodec/decode.c b/libavcodec/decode.c index 360837a0adb95..a7c130207c17b 100644 --- a/libavcodec/decode.c +++ b/libavcodec/decode.c @@ -1087,6 +1087,15 @@ int avcodec_get_hw_frames_parameters(AVCodecContext *avctx, if (!frames_ref) return AVERROR(ENOMEM); + if (!avctx->internal->hwaccel_priv_data) { + avctx->internal->hwaccel_priv_data = + av_mallocz(hwa->priv_data_size); + if (!avctx->internal->hwaccel_priv_data) { + av_buffer_unref(&frames_ref); + return AVERROR(ENOMEM); + } + } + ret = hwa->frame_params(avctx, frames_ref); if (ret >= 0) { AVHWFramesContext *frames_ctx = (AVHWFramesContext*)frames_ref->data; @@ -1111,12 +1120,10 @@ int avcodec_get_hw_frames_parameters(AVCodecContext *avctx, } static int hwaccel_init(AVCodecContext *avctx, - const AVCodecHWConfigInternal *hw_config) + const AVHWAccel *hwaccel) { - const AVHWAccel *hwaccel; int err; - hwaccel = hw_config->hwaccel; if (hwaccel->capabilities & AV_HWACCEL_CODEC_CAP_EXPERIMENTAL && avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) { av_log(avctx, AV_LOG_WARNING, "Ignoring experimental hwaccel: %s\n", @@ -1124,7 +1131,7 @@ static int hwaccel_init(AVCodecContext *avctx, return AVERROR_PATCHWELCOME; } - if (hwaccel->priv_data_size) { + if (!avctx->internal->hwaccel_priv_data && hwaccel->priv_data_size) { avctx->internal->hwaccel_priv_data = av_mallocz(hwaccel->priv_data_size); if (!avctx->internal->hwaccel_priv_data) @@ -1137,7 +1144,7 @@ static int hwaccel_init(AVCodecContext *avctx, if (err < 0) { av_log(avctx, AV_LOG_ERROR, "Failed setup for format %s: " "hwaccel initialisation returned error.\n", - av_get_pix_fmt_name(hw_config->public.pix_fmt)); + av_get_pix_fmt_name(hwaccel->pix_fmt)); av_freep(&avctx->internal->hwaccel_priv_data); avctx->hwaccel = NULL; return err; @@ -1147,7 +1154,7 @@ static int hwaccel_init(AVCodecContext *avctx, return 0; } -static void hwaccel_uninit(AVCodecContext *avctx) +void ff_hwaccel_uninit(AVCodecContext *avctx) { if (avctx->hwaccel && avctx->hwaccel->uninit) avctx->hwaccel->uninit(avctx); @@ -1186,7 +1193,7 @@ int ff_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt) for (;;) { // Remove the previous hwaccel, if there was one. - hwaccel_uninit(avctx); + ff_hwaccel_uninit(avctx); user_choice = avctx->get_format(avctx, choices); if (user_choice == AV_PIX_FMT_NONE) { @@ -1271,7 +1278,7 @@ int ff_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt) if (hw_config->hwaccel) { av_log(avctx, AV_LOG_DEBUG, "Format %s requires hwaccel " "initialisation.\n", desc->name); - err = hwaccel_init(avctx, hw_config); + err = hwaccel_init(avctx, hw_config->hwaccel); if (err < 0) goto try_again; } @@ -1290,6 +1297,9 @@ int ff_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt) --n; } + if (ret < 0) + ff_hwaccel_uninit(avctx); + av_freep(&choices); return ret; } @@ -1708,3 +1718,23 @@ int ff_copy_palette(void *dst, const AVPacket *src, void *logctx) } return 0; } + +AVBufferRef *ff_hwaccel_frame_priv_alloc(AVCodecContext *avctx, + const AVHWAccel *hwaccel) +{ + AVBufferRef *ref; + AVHWFramesContext *frames_ctx = (AVHWFramesContext *)avctx->hw_frames_ctx->data; + uint8_t *data = av_mallocz(hwaccel->frame_priv_data_size); + if (!data) + return NULL; + + ref = av_buffer_create(data, hwaccel->frame_priv_data_size, + hwaccel->free_frame_priv, + frames_ctx->device_ctx, 0); + if (!ref) { + av_free(data); + return NULL; + } + + return ref; +} diff --git a/libavcodec/decode.h b/libavcodec/decode.h index 8430ffbd66484..aaa29bc7f528a 100644 --- a/libavcodec/decode.h +++ b/libavcodec/decode.h @@ -150,4 +150,15 @@ int ff_reget_buffer(AVCodecContext *avctx, AVFrame *frame, int flags); int ff_side_data_update_matrix_encoding(AVFrame *frame, enum AVMatrixEncoding matrix_encoding); +/** + * Allocate a hwaccel frame private data and create an AVBufferRef + * from it. + * + * @param avctx The codec context which to attach as an opaque value + * @param hwaccel The hwaccel for which to allocate + * @return The allocated buffer + */ +AVBufferRef *ff_hwaccel_frame_priv_alloc(AVCodecContext *avctx, + const AVHWAccel *hwaccel); + #endif /* AVCODEC_DECODE_H */ diff --git a/libavcodec/h2645_vui.c b/libavcodec/h2645_vui.c index 0633fcbddd2b9..e5c7bf46f9b3d 100644 --- a/libavcodec/h2645_vui.c +++ b/libavcodec/h2645_vui.c @@ -36,21 +36,19 @@ void ff_h2645_decode_common_vui_params(GetBitContext *gb, H2645VUI *vui, void *logctx) { - int aspect_ratio_info_present_flag; - av_log(logctx, AV_LOG_DEBUG, "Decoding VUI\n"); - aspect_ratio_info_present_flag = get_bits1(gb); - if (aspect_ratio_info_present_flag) { - uint8_t aspect_ratio_idc = get_bits(gb, 8); - if (aspect_ratio_idc < FF_ARRAY_ELEMS(ff_h2645_pixel_aspect)) - vui->sar = ff_h2645_pixel_aspect[aspect_ratio_idc]; - else if (aspect_ratio_idc == EXTENDED_SAR) { + vui->aspect_ratio_info_present_flag = get_bits1(gb); + if (vui->aspect_ratio_info_present_flag) { + vui->aspect_ratio_idc = get_bits(gb, 8); + if (vui->aspect_ratio_idc < FF_ARRAY_ELEMS(ff_h2645_pixel_aspect)) + vui->sar = ff_h2645_pixel_aspect[vui->aspect_ratio_idc]; + else if (vui->aspect_ratio_idc == EXTENDED_SAR) { vui->sar.num = get_bits(gb, 16); vui->sar.den = get_bits(gb, 16); } else av_log(logctx, AV_LOG_WARNING, - "Unknown SAR index: %u.\n", aspect_ratio_idc); + "Unknown SAR index: %u.\n", vui->aspect_ratio_idc); } else vui->sar = (AVRational){ 0, 1 }; diff --git a/libavcodec/h2645_vui.h b/libavcodec/h2645_vui.h index 638da7c36672e..2c839f4b01557 100644 --- a/libavcodec/h2645_vui.h +++ b/libavcodec/h2645_vui.h @@ -26,6 +26,8 @@ typedef struct H2645VUI { AVRational sar; + int aspect_ratio_idc; + int aspect_ratio_info_present_flag; int overscan_info_present_flag; int overscan_appropriate_flag; diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c index d0d1e6590398e..53446e9aabeb5 100644 --- a/libavcodec/h264_ps.c +++ b/libavcodec/h264_ps.c @@ -113,12 +113,13 @@ static inline int decode_hrd_parameters(GetBitContext *gb, void *logctx, return AVERROR_INVALIDDATA; } - get_bits(gb, 4); /* bit_rate_scale */ + sps->cpr_flag = 0x0; + sps->bit_rate_scale = get_bits(gb, 4); get_bits(gb, 4); /* cpb_size_scale */ for (i = 0; i < cpb_count; i++) { - get_ue_golomb_long(gb); /* bit_rate_value_minus1 */ - get_ue_golomb_long(gb); /* cpb_size_value_minus1 */ - get_bits1(gb); /* cbr_flag */ + sps->bit_rate_value[i] = get_ue_golomb_long(gb) + 1; /* bit_rate_value_minus1 + 1 */ + sps->cpb_size_value[i] = get_ue_golomb_long(gb) + 1; /* cpb_size_value_minus1 + 1 */ + sps->cpr_flag |= get_bits1(gb) << i; } sps->initial_cpb_removal_delay_length = get_bits(gb, 5) + 1; sps->cpb_removal_delay_length = get_bits(gb, 5) + 1; @@ -176,7 +177,7 @@ static inline int decode_vui_parameters(GetBitContext *gb, void *logctx, get_ue_golomb_31(gb); /* log2_max_mv_length_horizontal */ get_ue_golomb_31(gb); /* log2_max_mv_length_vertical */ sps->num_reorder_frames = get_ue_golomb_31(gb); - get_ue_golomb_31(gb); /*max_dec_frame_buffering*/ + sps->max_dec_frame_buffering = get_ue_golomb_31(gb); if (get_bits_left(gb) < 0) { sps->num_reorder_frames = 0; @@ -197,12 +198,14 @@ static inline int decode_vui_parameters(GetBitContext *gb, void *logctx, } static int decode_scaling_list(GetBitContext *gb, uint8_t *factors, int size, - const uint8_t *jvt_list, - const uint8_t *fallback_list) + const uint8_t *jvt_list, const uint8_t *fallback_list, + uint16_t *mask, int pos) { int i, last = 8, next = 8; const uint8_t *scan = size == 16 ? ff_zigzag_scan : ff_zigzag_direct; - if (!get_bits1(gb)) /* matrix not written, we use the predicted one */ + uint16_t seq_scaling_list_present_flag = get_bits1(gb); + *mask |= (seq_scaling_list_present_flag << pos); + if (!seq_scaling_list_present_flag) /* matrix not written, we use the predicted one */ memcpy(factors, fallback_list, size * sizeof(uint8_t)); else for (i = 0; i < size; i++) { @@ -226,6 +229,7 @@ static int decode_scaling_list(GetBitContext *gb, uint8_t *factors, int size, /* returns non zero if the provided SPS scaling matrix has been filled */ static int decode_scaling_matrices(GetBitContext *gb, const SPS *sps, const PPS *pps, int is_sps, + int present_flag, uint16_t *mask, uint8_t(*scaling_matrix4)[16], uint8_t(*scaling_matrix8)[64]) { @@ -237,21 +241,22 @@ static int decode_scaling_matrices(GetBitContext *gb, const SPS *sps, fallback_sps ? sps->scaling_matrix8[3] : default_scaling8[1] }; int ret = 0; - if (get_bits1(gb)) { - ret |= decode_scaling_list(gb, scaling_matrix4[0], 16, default_scaling4[0], fallback[0]); // Intra, Y - ret |= decode_scaling_list(gb, scaling_matrix4[1], 16, default_scaling4[0], scaling_matrix4[0]); // Intra, Cr - ret |= decode_scaling_list(gb, scaling_matrix4[2], 16, default_scaling4[0], scaling_matrix4[1]); // Intra, Cb - ret |= decode_scaling_list(gb, scaling_matrix4[3], 16, default_scaling4[1], fallback[1]); // Inter, Y - ret |= decode_scaling_list(gb, scaling_matrix4[4], 16, default_scaling4[1], scaling_matrix4[3]); // Inter, Cr - ret |= decode_scaling_list(gb, scaling_matrix4[5], 16, default_scaling4[1], scaling_matrix4[4]); // Inter, Cb + *mask = 0x0; + if (present_flag) { + ret |= decode_scaling_list(gb, scaling_matrix4[0], 16, default_scaling4[0], fallback[0], mask, 0); // Intra, Y + ret |= decode_scaling_list(gb, scaling_matrix4[1], 16, default_scaling4[0], scaling_matrix4[0], mask, 1); // Intra, Cr + ret |= decode_scaling_list(gb, scaling_matrix4[2], 16, default_scaling4[0], scaling_matrix4[1], mask, 2); // Intra, Cb + ret |= decode_scaling_list(gb, scaling_matrix4[3], 16, default_scaling4[1], fallback[1], mask, 3); // Inter, Y + ret |= decode_scaling_list(gb, scaling_matrix4[4], 16, default_scaling4[1], scaling_matrix4[3], mask, 4); // Inter, Cr + ret |= decode_scaling_list(gb, scaling_matrix4[5], 16, default_scaling4[1], scaling_matrix4[4], mask, 5); // Inter, Cb if (is_sps || pps->transform_8x8_mode) { - ret |= decode_scaling_list(gb, scaling_matrix8[0], 64, default_scaling8[0], fallback[2]); // Intra, Y - ret |= decode_scaling_list(gb, scaling_matrix8[3], 64, default_scaling8[1], fallback[3]); // Inter, Y + ret |= decode_scaling_list(gb, scaling_matrix8[0], 64, default_scaling8[0], fallback[2], mask, 6); // Intra, Y + ret |= decode_scaling_list(gb, scaling_matrix8[3], 64, default_scaling8[1], fallback[3], mask, 7); // Inter, Y if (sps->chroma_format_idc == 3) { - ret |= decode_scaling_list(gb, scaling_matrix8[1], 64, default_scaling8[0], scaling_matrix8[0]); // Intra, Cr - ret |= decode_scaling_list(gb, scaling_matrix8[4], 64, default_scaling8[1], scaling_matrix8[3]); // Inter, Cr - ret |= decode_scaling_list(gb, scaling_matrix8[2], 64, default_scaling8[0], scaling_matrix8[1]); // Intra, Cb - ret |= decode_scaling_list(gb, scaling_matrix8[5], 64, default_scaling8[1], scaling_matrix8[4]); // Inter, Cb + ret |= decode_scaling_list(gb, scaling_matrix8[1], 64, default_scaling8[0], scaling_matrix8[0], mask, 8); // Intra, Cr + ret |= decode_scaling_list(gb, scaling_matrix8[4], 64, default_scaling8[1], scaling_matrix8[3], mask, 9); // Inter, Cr + ret |= decode_scaling_list(gb, scaling_matrix8[2], 64, default_scaling8[0], scaling_matrix8[1], mask, 10); // Intra, Cb + ret |= decode_scaling_list(gb, scaling_matrix8[5], 64, default_scaling8[1], scaling_matrix8[4], mask, 11); // Inter, Cb } } if (!ret) @@ -368,7 +373,8 @@ int ff_h264_decode_seq_parameter_set(GetBitContext *gb, AVCodecContext *avctx, goto fail; } sps->transform_bypass = get_bits1(gb); - ret = decode_scaling_matrices(gb, sps, NULL, 1, + ret = decode_scaling_matrices(gb, sps, NULL, 1, get_bits1(gb), + &sps->scaling_matrix_present_mask, sps->scaling_matrix4, sps->scaling_matrix8); if (ret < 0) goto fail; @@ -731,6 +737,7 @@ int ff_h264_decode_picture_parameter_set(GetBitContext *gb, AVCodecContext *avct if (!(bit_length & 7) && pps->data_size < sizeof(pps->data)) pps->data[pps->data_size++] = 0x80; + pps->pps_id = pps_id; pps->sps_id = get_ue_golomb_31(gb); if ((unsigned)pps->sps_id >= MAX_SPS_COUNT || !ps->sps_list[pps->sps_id]) { @@ -802,7 +809,10 @@ int ff_h264_decode_picture_parameter_set(GetBitContext *gb, AVCodecContext *avct bits_left = bit_length - get_bits_count(gb); if (bits_left > 0 && more_rbsp_data_in_pps(sps, avctx)) { pps->transform_8x8_mode = get_bits1(gb); + pps->pic_scaling_matrix_present_flag = get_bits1(gb); ret = decode_scaling_matrices(gb, sps, pps, 0, + pps->pic_scaling_matrix_present_flag, + &pps->pic_scaling_matrix_present_mask, pps->scaling_matrix4, pps->scaling_matrix8); if (ret < 0) goto fail; diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h index 5c35761fbc844..e6756196352b2 100644 --- a/libavcodec/h264_ps.h +++ b/libavcodec/h264_ps.h @@ -80,7 +80,9 @@ typedef struct SPS { int32_t offset_for_ref_frame[256]; int bitstream_restriction_flag; int num_reorder_frames; + int max_dec_frame_buffering; int scaling_matrix_present; + uint16_t scaling_matrix_present_mask; uint8_t scaling_matrix4[6][16]; uint8_t scaling_matrix8[6][64]; int nal_hrd_parameters_present_flag; @@ -88,6 +90,10 @@ typedef struct SPS { int pic_struct_present_flag; int time_offset_length; int cpb_cnt; ///< See H.264 E.1.2 + int bit_rate_scale; + uint32_t bit_rate_value[32]; ///< bit_rate_value_minus1 + 1 + uint32_t cpb_size_value[32]; ///< cpb_size_value_minus1 + 1 + uint32_t cpr_flag; int initial_cpb_removal_delay_length; ///< initial_cpb_removal_delay_length_minus1 + 1 int cpb_removal_delay_length; ///< cpb_removal_delay_length_minus1 + 1 int dpb_output_delay_length; ///< dpb_output_delay_length_minus1 + 1 @@ -103,9 +109,10 @@ typedef struct SPS { * Picture parameter set */ typedef struct PPS { + unsigned int pps_id; unsigned int sps_id; int cabac; ///< entropy_coding_mode_flag - int pic_order_present; ///< pic_order_present_flag + int pic_order_present; ///< bottom_field_pic_order_in_frame_present_flag int slice_group_count; ///< num_slice_groups_minus1 + 1 int mb_slice_group_map_type; unsigned int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1 @@ -118,6 +125,8 @@ typedef struct PPS { int constrained_intra_pred; ///< constrained_intra_pred_flag int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag int transform_8x8_mode; ///< transform_8x8_mode_flag + int pic_scaling_matrix_present_flag; + uint16_t pic_scaling_matrix_present_mask; uint8_t scaling_matrix4[6][16]; uint8_t scaling_matrix8[6][64]; uint8_t chroma_qp_table[2][QP_MAX_NUM+1]; ///< pre-scaled (with chroma_qp_index_offset) version of qp_table diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c index be7a8e0b5abe3..41bf30eefca6a 100644 --- a/libavcodec/h264_slice.c +++ b/libavcodec/h264_slice.c @@ -33,6 +33,7 @@ #include "libavutil/pixdesc.h" #include "libavutil/timecode.h" #include "internal.h" +#include "decode.h" #include "cabac.h" #include "cabac_functions.h" #include "decode.h" @@ -212,7 +213,7 @@ static int alloc_picture(H264Context *h, H264Picture *pic) const AVHWAccel *hwaccel = h->avctx->hwaccel; av_assert0(!pic->hwaccel_picture_private); if (hwaccel->frame_priv_data_size) { - pic->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size); + pic->hwaccel_priv_buf = ff_hwaccel_frame_priv_alloc(h->avctx, hwaccel); if (!pic->hwaccel_priv_buf) return AVERROR(ENOMEM); pic->hwaccel_picture_private = pic->hwaccel_priv_buf->data; @@ -780,7 +781,8 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback) CONFIG_H264_NVDEC_HWACCEL + \ CONFIG_H264_VAAPI_HWACCEL + \ CONFIG_H264_VIDEOTOOLBOX_HWACCEL + \ - CONFIG_H264_VDPAU_HWACCEL) + CONFIG_H264_VDPAU_HWACCEL + \ + CONFIG_H264_VULKAN_HWACCEL) enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts; const enum AVPixelFormat *choices = pix_fmts; int i; @@ -801,6 +803,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback) #if CONFIG_H264_VIDEOTOOLBOX_HWACCEL if (h->avctx->colorspace != AVCOL_SPC_RGB) *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX; +#endif +#if CONFIG_H264_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; #endif if (CHROMA444(h)) { if (h->avctx->colorspace == AVCOL_SPC_RGB) { @@ -820,6 +825,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback) } break; case 12: +#if CONFIG_H264_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; +#endif if (CHROMA444(h)) { if (h->avctx->colorspace == AVCOL_SPC_RGB) { *fmt++ = AV_PIX_FMT_GBRP12; @@ -845,6 +853,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback) #if CONFIG_H264_VDPAU_HWACCEL *fmt++ = AV_PIX_FMT_VDPAU; #endif +#if CONFIG_H264_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; +#endif #if CONFIG_H264_NVDEC_HWACCEL *fmt++ = AV_PIX_FMT_CUDA; #endif diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c index 521b1e2235d83..19f8dba131780 100644 --- a/libavcodec/h264dec.c +++ b/libavcodec/h264dec.c @@ -484,6 +484,9 @@ static void h264_decode_flush(AVCodecContext *avctx) ff_h264_free_tables(h); h->context_initialized = 0; + + if (avctx->hwaccel && avctx->hwaccel->flush) + avctx->hwaccel->flush(avctx); } static int get_last_needed_nal(H264Context *h) @@ -1097,6 +1100,9 @@ const FFCodec ff_h264_decoder = { #endif #if CONFIG_H264_VIDEOTOOLBOX_HWACCEL HWACCEL_VIDEOTOOLBOX(h264), +#endif +#if CONFIG_H264_VULKAN_HWACCEL + HWACCEL_VULKAN(h264), #endif NULL }, diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c index 043e1bf30854d..01b11ed42aa3b 100644 --- a/libavcodec/hevc_ps.c +++ b/libavcodec/hevc_ps.c @@ -100,51 +100,50 @@ static void remove_vps(HEVCParamSets *s, int id) int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, ShortTermRPS *rps, const HEVCSPS *sps, int is_slice_header) { - uint8_t rps_predict = 0; int delta_poc; int k0 = 0; int k = 0; int i; + rps->rps_predict = 0; + if (rps != sps->st_rps && sps->nb_st_rps) - rps_predict = get_bits1(gb); + rps->rps_predict = get_bits1(gb); - if (rps_predict) { + if (rps->rps_predict) { const ShortTermRPS *rps_ridx; int delta_rps; - unsigned abs_delta_rps; - uint8_t use_delta_flag = 0; - uint8_t delta_rps_sign; if (is_slice_header) { - unsigned int delta_idx = get_ue_golomb_long(gb) + 1; - if (delta_idx > sps->nb_st_rps) { + rps->delta_idx = get_ue_golomb_long(gb) + 1; + if (rps->delta_idx > sps->nb_st_rps) { av_log(avctx, AV_LOG_ERROR, "Invalid value of delta_idx in slice header RPS: %d > %d.\n", - delta_idx, sps->nb_st_rps); + rps->delta_idx, sps->nb_st_rps); return AVERROR_INVALIDDATA; } - rps_ridx = &sps->st_rps[sps->nb_st_rps - delta_idx]; + rps_ridx = &sps->st_rps[sps->nb_st_rps - rps->delta_idx]; rps->rps_idx_num_delta_pocs = rps_ridx->num_delta_pocs; } else rps_ridx = &sps->st_rps[rps - sps->st_rps - 1]; - delta_rps_sign = get_bits1(gb); - abs_delta_rps = get_ue_golomb_long(gb) + 1; - if (abs_delta_rps < 1 || abs_delta_rps > 32768) { + rps->delta_rps_sign = get_bits1(gb); + rps->abs_delta_rps = get_ue_golomb_long(gb) + 1; + if (rps->abs_delta_rps > 32768) { av_log(avctx, AV_LOG_ERROR, "Invalid value of abs_delta_rps: %d\n", - abs_delta_rps); + rps->abs_delta_rps); return AVERROR_INVALIDDATA; } - delta_rps = (1 - (delta_rps_sign << 1)) * abs_delta_rps; + delta_rps = (1 - (rps->delta_rps_sign << 1)) * rps->abs_delta_rps; for (i = 0; i <= rps_ridx->num_delta_pocs; i++) { int used = rps->used[k] = get_bits1(gb); + rps->use_delta_flag = 0; if (!used) - use_delta_flag = get_bits1(gb); + rps->use_delta_flag = get_bits1(gb); - if (used || use_delta_flag) { + if (used || rps->use_delta_flag) { if (i < rps_ridx->num_delta_pocs) delta_poc = delta_rps + rps_ridx->delta_poc[i]; else @@ -210,7 +209,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, if (rps->num_delta_pocs) { prev = 0; for (i = 0; i < rps->num_negative_pics; i++) { - delta_poc = get_ue_golomb_long(gb) + 1; + delta_poc = rps->delta_poc_s0[i] = get_ue_golomb_long(gb) + 1; if (delta_poc < 1 || delta_poc > 32768) { av_log(avctx, AV_LOG_ERROR, "Invalid value of delta_poc: %d\n", @@ -223,7 +222,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, } prev = 0; for (i = 0; i < nb_positive_pics; i++) { - delta_poc = get_ue_golomb_long(gb) + 1; + delta_poc = rps->delta_poc_s1[i] = get_ue_golomb_long(gb) + 1; if (delta_poc < 1 || delta_poc > 32768) { av_log(avctx, AV_LOG_ERROR, "Invalid value of delta_poc: %d\n", @@ -357,81 +356,84 @@ static int parse_ptl(GetBitContext *gb, AVCodecContext *avctx, } static void decode_sublayer_hrd(GetBitContext *gb, unsigned int nb_cpb, - int subpic_params_present) + HEVCSublayerHdrParams *par, int subpic_params_present) { int i; for (i = 0; i < nb_cpb; i++) { - get_ue_golomb_long(gb); // bit_rate_value_minus1 - get_ue_golomb_long(gb); // cpb_size_value_minus1 + par->bit_rate_value_minus1[i] = get_ue_golomb_long(gb); + par->cpb_size_value_minus1[i] = get_ue_golomb_long(gb); if (subpic_params_present) { - get_ue_golomb_long(gb); // cpb_size_du_value_minus1 - get_ue_golomb_long(gb); // bit_rate_du_value_minus1 + par->cpb_size_du_value_minus1[i] = get_ue_golomb_long(gb); + par->bit_rate_du_value_minus1[i] = get_ue_golomb_long(gb); } - skip_bits1(gb); // cbr_flag + + par->cbr_flag = get_bits1(gb); } } static int decode_hrd(GetBitContext *gb, int common_inf_present, - int max_sublayers) + HEVCHdrParams *hdr, int max_sublayers) { - int nal_params_present = 0, vcl_params_present = 0; - int subpic_params_present = 0; - int i; - if (common_inf_present) { - nal_params_present = get_bits1(gb); - vcl_params_present = get_bits1(gb); - - if (nal_params_present || vcl_params_present) { - subpic_params_present = get_bits1(gb); - - if (subpic_params_present) { - skip_bits(gb, 8); // tick_divisor_minus2 - skip_bits(gb, 5); // du_cpb_removal_delay_increment_length_minus1 - skip_bits(gb, 1); // sub_pic_cpb_params_in_pic_timing_sei_flag - skip_bits(gb, 5); // dpb_output_delay_du_length_minus1 + hdr->flags.nal_hrd_parameters_present_flag = get_bits1(gb); + hdr->flags.vcl_hrd_parameters_present_flag = get_bits1(gb); + + if (hdr->flags.nal_hrd_parameters_present_flag || + hdr->flags.vcl_hrd_parameters_present_flag) { + hdr->flags.sub_pic_hrd_params_present_flag = get_bits1(gb); + + if (hdr->flags.sub_pic_hrd_params_present_flag) { + hdr->tick_divisor_minus2 = get_bits(gb, 8); + hdr->du_cpb_removal_delay_increment_length_minus1 = get_bits(gb, 5); + hdr->flags.sub_pic_cpb_params_in_pic_timing_sei_flag = get_bits1(gb); + hdr->dpb_output_delay_du_length_minus1 = get_bits(gb, 5); } - skip_bits(gb, 4); // bit_rate_scale - skip_bits(gb, 4); // cpb_size_scale + hdr->bit_rate_scale = get_bits(gb, 4); + hdr->cpb_size_scale = get_bits(gb, 4); - if (subpic_params_present) - skip_bits(gb, 4); // cpb_size_du_scale + if (hdr->flags.sub_pic_hrd_params_present_flag) + hdr->cpb_size_du_scale = get_bits(gb, 4); - skip_bits(gb, 5); // initial_cpb_removal_delay_length_minus1 - skip_bits(gb, 5); // au_cpb_removal_delay_length_minus1 - skip_bits(gb, 5); // dpb_output_delay_length_minus1 + hdr->initial_cpb_removal_delay_length_minus1 = get_bits(gb, 5); + hdr->au_cpb_removal_delay_length_minus1 = get_bits(gb, 5); + hdr->dpb_output_delay_length_minus1 = get_bits(gb, 5); } } - for (i = 0; i < max_sublayers; i++) { - int low_delay = 0; - unsigned int nb_cpb = 1; - int fixed_rate = get_bits1(gb); + for (int i = 0; i < max_sublayers; i++) { + hdr->flags.fixed_pic_rate_general_flag = get_bits1(gb); + + hdr->cpb_cnt_minus1[i] = 1; - if (!fixed_rate) - fixed_rate = get_bits1(gb); + if (!hdr->flags.fixed_pic_rate_general_flag) + hdr->flags.fixed_pic_rate_within_cvs_flag = get_bits1(gb); - if (fixed_rate) - get_ue_golomb_long(gb); // elemental_duration_in_tc_minus1 + if (hdr->flags.fixed_pic_rate_within_cvs_flag) + hdr->elemental_duration_in_tc_minus1[i] = get_ue_golomb_long(gb); else - low_delay = get_bits1(gb); + hdr->flags.low_delay_hrd_flag = get_bits1(gb); - if (!low_delay) { - nb_cpb = get_ue_golomb_long(gb) + 1; - if (nb_cpb < 1 || nb_cpb > 32) { - av_log(NULL, AV_LOG_ERROR, "nb_cpb %d invalid\n", nb_cpb); + if (!hdr->flags.low_delay_hrd_flag) { + hdr->cpb_cnt_minus1[i] = get_ue_golomb_long(gb); + if (hdr->cpb_cnt_minus1[i] > 31) { + av_log(NULL, AV_LOG_ERROR, "nb_cpb %d invalid\n", + hdr->cpb_cnt_minus1[i]); return AVERROR_INVALIDDATA; } } - if (nal_params_present) - decode_sublayer_hrd(gb, nb_cpb, subpic_params_present); - if (vcl_params_present) - decode_sublayer_hrd(gb, nb_cpb, subpic_params_present); + if (hdr->flags.nal_hrd_parameters_present_flag) + decode_sublayer_hrd(gb, hdr->cpb_cnt_minus1[i], &hdr->nal_params[i], + hdr->flags.sub_pic_hrd_params_present_flag); + + if (hdr->flags.vcl_hrd_parameters_present_flag) + decode_sublayer_hrd(gb, hdr->cpb_cnt_minus1[i], &hdr->vcl_params[i], + hdr->flags.sub_pic_hrd_params_present_flag); } + return 0; } @@ -461,7 +463,7 @@ int ff_hevc_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx, } memcpy(vps->data, gb->buffer, vps->data_size); - vps_id = get_bits(gb, 4); + vps_id = vps->vps_id = get_bits(gb, 4); if (get_bits(gb, 2) != 3) { // vps_reserved_three_2bits av_log(avctx, AV_LOG_ERROR, "vps_reserved_three_2bits is not three\n"); @@ -538,7 +540,8 @@ int ff_hevc_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx, get_ue_golomb_long(gb); // hrd_layer_set_idx if (i) common_inf_present = get_bits1(gb); - decode_hrd(gb, common_inf_present, vps->vps_max_sub_layers); + decode_hrd(gb, common_inf_present, &vps->hdr[i], + vps->vps_max_sub_layers); } } get_bits1(gb); /* vps_extension_flag */ @@ -657,7 +660,7 @@ static void decode_vui(GetBitContext *gb, AVCodecContext *avctx, vui->vui_num_ticks_poc_diff_one_minus1 = get_ue_golomb_long(gb); vui->vui_hrd_parameters_present_flag = get_bits1(gb); if (vui->vui_hrd_parameters_present_flag) - decode_hrd(gb, 1, sps->max_sub_layers); + decode_hrd(gb, 1, &sps->hdr, sps->max_sub_layers); } vui->bitstream_restriction_flag = get_bits1(gb); @@ -852,9 +855,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, { HEVCWindow *ow; int ret = 0; - int log2_diff_max_min_transform_block_size; - int bit_depth_chroma, start, vui_present, sublayer_ordering_info, num_comps; - int i; + int bit_depth_chroma, start, num_comps; + int i, j; // Coded parameters @@ -902,7 +904,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, sps->height, 0, avctx)) < 0) return ret; - if (get_bits1(gb)) { // pic_conformance_flag + sps->conformance_window_flag = get_bits1(gb); + if (sps->conformance_window_flag) { int vert_mult = hevc_sub_height_c[sps->chroma_format_idc]; int horiz_mult = hevc_sub_width_c[sps->chroma_format_idc]; sps->pic_conf_win.left_offset = get_ue_golomb_long(gb) * horiz_mult; @@ -959,8 +962,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, return AVERROR_INVALIDDATA; } - sublayer_ordering_info = get_bits1(gb); - start = sublayer_ordering_info ? 0 : sps->max_sub_layers - 1; + sps->sublayer_ordering_info_flag = get_bits1(gb); + start = sps->sublayer_ordering_info_flag ? 0 : sps->max_sub_layers - 1; for (i = start; i < sps->max_sub_layers; i++) { sps->temporal_layer[i].max_dec_pic_buffering = get_ue_golomb_long(gb) + 1; sps->temporal_layer[i].num_reorder_pics = get_ue_golomb_long(gb); @@ -981,7 +984,7 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, } } - if (!sublayer_ordering_info) { + if (!sps->sublayer_ordering_info_flag) { for (i = 0; i < start; i++) { sps->temporal_layer[i].max_dec_pic_buffering = sps->temporal_layer[start].max_dec_pic_buffering; sps->temporal_layer[i].num_reorder_pics = sps->temporal_layer[start].num_reorder_pics; @@ -989,12 +992,12 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, } } - sps->log2_min_cb_size = get_ue_golomb_long(gb) + 3; - sps->log2_diff_max_min_coding_block_size = get_ue_golomb_long(gb); - sps->log2_min_tb_size = get_ue_golomb_long(gb) + 2; - log2_diff_max_min_transform_block_size = get_ue_golomb_long(gb); - sps->log2_max_trafo_size = log2_diff_max_min_transform_block_size + - sps->log2_min_tb_size; + sps->log2_min_cb_size = get_ue_golomb_long(gb) + 3; + sps->log2_diff_max_min_coding_block_size = get_ue_golomb_long(gb); + sps->log2_min_tb_size = get_ue_golomb_long(gb) + 2; + sps->log2_diff_max_min_transform_block_size = get_ue_golomb_long(gb); + sps->log2_max_trafo_size = sps->log2_diff_max_min_transform_block_size + + sps->log2_min_tb_size; if (sps->log2_min_cb_size < 3 || sps->log2_min_cb_size > 30) { av_log(avctx, AV_LOG_ERROR, "Invalid value %d for log2_min_cb_size", sps->log2_min_cb_size); @@ -1011,8 +1014,9 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, return AVERROR_INVALIDDATA; } - if (log2_diff_max_min_transform_block_size < 0 || log2_diff_max_min_transform_block_size > 30) { - av_log(avctx, AV_LOG_ERROR, "Invalid value %d for log2_diff_max_min_transform_block_size", log2_diff_max_min_transform_block_size); + if (sps->log2_diff_max_min_transform_block_size > 30) { + av_log(avctx, AV_LOG_ERROR, "Invalid value %d for log2_diff_max_min_transform_block_size", + sps->log2_diff_max_min_transform_block_size); return AVERROR_INVALIDDATA; } @@ -1079,11 +1083,12 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, sps->sps_temporal_mvp_enabled_flag = get_bits1(gb); sps->sps_strong_intra_smoothing_enable_flag = get_bits1(gb); sps->vui.common.sar = (AVRational){0, 1}; - vui_present = get_bits1(gb); - if (vui_present) + sps->vui_present = get_bits1(gb); + if (sps->vui_present) decode_vui(gb, avctx, apply_defdispwin, sps); - if (get_bits1(gb)) { // sps_extension_flag + sps->sps_extension_present_flag = get_bits1(gb); + if (sps->sps_extension_present_flag) { sps->sps_range_extension_flag = get_bits1(gb); sps->sps_multilayer_extension_flag = get_bits1(gb); sps->sps_3d_extension_flag = get_bits1(gb); @@ -1772,7 +1777,7 @@ int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx, pps->log2_max_transform_skip_block_size = 2; // Coded parameters - pps_id = get_ue_golomb_long(gb); + pps_id = pps->pps_id = get_ue_golomb_long(gb); if (pps_id >= HEVC_MAX_PPS_COUNT) { av_log(avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", pps_id); ret = AVERROR_INVALIDDATA; @@ -1952,7 +1957,8 @@ int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx, pps->slice_header_extension_present_flag = get_bits1(gb); - if (get_bits1(gb)) { // pps_extension_present_flag + pps->pps_extension_present_flag = get_bits1(gb); + if (pps->pps_extension_present_flag) { pps->pps_range_extensions_flag = get_bits1(gb); pps->pps_multilayer_extension_flag = get_bits1(gb); pps->pps_3d_extension_flag = get_bits1(gb); diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h index 2124deb953d9d..ef11e51ee72f3 100644 --- a/libavcodec/hevc_ps.h +++ b/libavcodec/hevc_ps.h @@ -32,10 +32,54 @@ #include "h2645_vui.h" #include "hevc.h" +typedef struct HEVCSublayerHdrParams { + uint32_t bit_rate_value_minus1[HEVC_MAX_CPB_CNT]; + uint32_t cpb_size_value_minus1[HEVC_MAX_CPB_CNT]; + uint32_t cpb_size_du_value_minus1[HEVC_MAX_CPB_CNT]; + uint32_t bit_rate_du_value_minus1[HEVC_MAX_CPB_CNT]; + uint32_t cbr_flag; +} HEVCSublayerHdrParams; + +typedef struct HEVCHdrFlagParams { + uint32_t nal_hrd_parameters_present_flag; + uint32_t vcl_hrd_parameters_present_flag; + uint32_t sub_pic_hrd_params_present_flag; + uint32_t sub_pic_cpb_params_in_pic_timing_sei_flag; + uint32_t fixed_pic_rate_general_flag; + uint32_t fixed_pic_rate_within_cvs_flag; + uint32_t low_delay_hrd_flag; +} HEVCHdrFlagParams; + +typedef struct HEVCHdrParams { + HEVCHdrFlagParams flags; + + uint8_t tick_divisor_minus2; + uint8_t du_cpb_removal_delay_increment_length_minus1; + uint8_t dpb_output_delay_du_length_minus1; + uint8_t bit_rate_scale; + uint8_t cpb_size_scale; + uint8_t cpb_size_du_scale; + uint8_t initial_cpb_removal_delay_length_minus1; + uint8_t au_cpb_removal_delay_length_minus1; + uint8_t dpb_output_delay_length_minus1; + uint8_t cpb_cnt_minus1[HEVC_MAX_SUB_LAYERS]; + uint16_t elemental_duration_in_tc_minus1[HEVC_MAX_SUB_LAYERS]; + + HEVCSublayerHdrParams nal_params[HEVC_MAX_SUB_LAYERS]; + HEVCSublayerHdrParams vcl_params[HEVC_MAX_SUB_LAYERS]; +} HEVCHdrParams; + typedef struct ShortTermRPS { + uint8_t rps_predict; + unsigned int delta_idx; + uint8_t use_delta_flag; + uint8_t delta_rps_sign; + unsigned int abs_delta_rps; unsigned int num_negative_pics; int num_delta_pocs; int rps_idx_num_delta_pocs; + int32_t delta_poc_s0[32]; + int32_t delta_poc_s1[32]; int32_t delta_poc[32]; uint8_t used[32]; } ShortTermRPS; @@ -108,6 +152,9 @@ typedef struct PTL { } PTL; typedef struct HEVCVPS { + unsigned int vps_id; + HEVCHdrParams hdr[HEVC_MAX_LAYER_SETS]; + uint8_t vps_temporal_id_nesting_flag; int vps_max_layers; int vps_max_sub_layers; ///< vps_max_temporal_layers_minus1 + 1 @@ -144,8 +191,11 @@ typedef struct HEVCSPS { HEVCWindow output_window; + uint8_t conformance_window_flag; HEVCWindow pic_conf_win; + HEVCHdrParams hdr; + int bit_depth; int bit_depth_chroma; int pixel_shift; @@ -154,6 +204,7 @@ typedef struct HEVCSPS { unsigned int log2_max_poc_lsb; int pcm_enabled_flag; + uint8_t sublayer_ordering_info_flag; int max_sub_layers; struct { int max_dec_pic_buffering; @@ -162,9 +213,11 @@ typedef struct HEVCSPS { } temporal_layer[HEVC_MAX_SUB_LAYERS]; uint8_t temporal_id_nesting_flag; + int vui_present; VUI vui; PTL ptl; + uint8_t sps_extension_present_flag; uint8_t scaling_list_enable_flag; ScalingList scaling_list; @@ -195,6 +248,7 @@ typedef struct HEVCSPS { unsigned int log2_max_trafo_size; unsigned int log2_ctb_size; unsigned int log2_min_pu_size; + unsigned int log2_diff_max_min_transform_block_size; int max_transform_hierarchy_depth_inter; int max_transform_hierarchy_depth_intra; @@ -248,6 +302,7 @@ typedef struct HEVCSPS { } HEVCSPS; typedef struct HEVCPPS { + unsigned int pps_id; unsigned int sps_id; ///< seq_parameter_set_id uint8_t sign_data_hiding_flag; @@ -297,6 +352,7 @@ typedef struct HEVCPPS { int num_extra_slice_header_bits; uint8_t slice_header_extension_present_flag; uint8_t log2_max_transform_skip_block_size; + uint8_t pps_extension_present_flag; uint8_t pps_range_extensions_flag; uint8_t pps_multilayer_extension_flag; uint8_t pps_3d_extension_flag; diff --git a/libavcodec/hevc_refs.c b/libavcodec/hevc_refs.c index e9be02c489232..a4af6ca65682b 100644 --- a/libavcodec/hevc_refs.c +++ b/libavcodec/hevc_refs.c @@ -23,6 +23,7 @@ #include "libavutil/avassert.h" +#include "decode.h" #include "thread.h" #include "hevc.h" #include "hevcdec.h" @@ -121,7 +122,7 @@ static HEVCFrame *alloc_frame(HEVCContext *s) const AVHWAccel *hwaccel = s->avctx->hwaccel; av_assert0(!frame->hwaccel_picture_private); if (hwaccel->frame_priv_data_size) { - frame->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size); + frame->hwaccel_priv_buf = ff_hwaccel_frame_priv_alloc(s->avctx, hwaccel); if (!frame->hwaccel_priv_buf) goto fail; frame->hwaccel_picture_private = frame->hwaccel_priv_buf->data; diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c index 7e1bf4e9157d4..eee77ec4dbb2f 100644 --- a/libavcodec/hevcdec.c +++ b/libavcodec/hevcdec.c @@ -405,7 +405,8 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) CONFIG_HEVC_NVDEC_HWACCEL + \ CONFIG_HEVC_VAAPI_HWACCEL + \ CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \ - CONFIG_HEVC_VDPAU_HWACCEL) + CONFIG_HEVC_VDPAU_HWACCEL + \ + CONFIG_HEVC_VULKAN_HWACCEL) enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts; switch (sps->pix_fmt) { @@ -429,6 +430,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) #endif #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX; +#endif +#if CONFIG_HEVC_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; #endif break; case AV_PIX_FMT_YUV420P10: @@ -445,6 +449,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX; #endif +#if CONFIG_HEVC_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; +#endif #if CONFIG_HEVC_VDPAU_HWACCEL *fmt++ = AV_PIX_FMT_VDPAU; #endif @@ -464,6 +471,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) #endif #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX; +#endif +#if CONFIG_HEVC_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; #endif break; case AV_PIX_FMT_YUV422P: @@ -473,12 +483,16 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) #endif #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX; +#endif +#if CONFIG_HEVC_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; #endif break; case AV_PIX_FMT_YUV444P10: #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX; #endif + /* NOTE: fallthrough */ case AV_PIX_FMT_YUV420P12: case AV_PIX_FMT_YUV444P12: #if CONFIG_HEVC_VAAPI_HWACCEL @@ -487,6 +501,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) #if CONFIG_HEVC_VDPAU_HWACCEL *fmt++ = AV_PIX_FMT_VDPAU; #endif +#if CONFIG_HEVC_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; +#endif #if CONFIG_HEVC_NVDEC_HWACCEL *fmt++ = AV_PIX_FMT_CUDA; #endif @@ -494,6 +511,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) case AV_PIX_FMT_YUV422P12: #if CONFIG_HEVC_VAAPI_HWACCEL *fmt++ = AV_PIX_FMT_VAAPI; +#endif +#if CONFIG_HEVC_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; #endif break; } @@ -703,6 +723,7 @@ static int hls_slice_header(HEVCContext *s) if (ret < 0) return ret; + sh->bits_used_for_short_term_rps = pos - get_bits_left(gb); sh->short_term_rps = &sh->slice_rps; } else { int numbits, rps_idx; @@ -3707,6 +3728,9 @@ static void hevc_decode_flush(AVCodecContext *avctx) av_buffer_unref(&s->rpu_buf); s->max_ra = INT_MAX; s->eos = 1; + + if (avctx->hwaccel && avctx->hwaccel->flush) + avctx->hwaccel->flush(avctx); } #define OFFSET(x) offsetof(HEVCContext, x) @@ -3765,6 +3789,9 @@ const FFCodec ff_hevc_decoder = { #endif #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL HWACCEL_VIDEOTOOLBOX(hevc), +#endif +#if CONFIG_HEVC_VULKAN_HWACCEL + HWACCEL_VULKAN(hevc), #endif NULL }, diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h index 94609e46993d1..04ec25d540b8a 100644 --- a/libavcodec/hevcdec.h +++ b/libavcodec/hevcdec.h @@ -268,6 +268,7 @@ typedef struct SliceHeader { ///< RPS coded in the slice header itself is stored here int short_term_ref_pic_set_sps_flag; + int bits_used_for_short_term_rps; int short_term_ref_pic_set_size; ShortTermRPS slice_rps; const ShortTermRPS *short_term_rps; diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h index aca55831f32f9..48dfc17f72b5e 100644 --- a/libavcodec/hwaccels.h +++ b/libavcodec/hwaccels.h @@ -27,6 +27,7 @@ extern const AVHWAccel ff_av1_dxva2_hwaccel; extern const AVHWAccel ff_av1_nvdec_hwaccel; extern const AVHWAccel ff_av1_vaapi_hwaccel; extern const AVHWAccel ff_av1_vdpau_hwaccel; +extern const AVHWAccel ff_av1_vulkan_hwaccel; extern const AVHWAccel ff_h263_vaapi_hwaccel; extern const AVHWAccel ff_h263_videotoolbox_hwaccel; extern const AVHWAccel ff_h264_d3d11va_hwaccel; @@ -36,6 +37,7 @@ extern const AVHWAccel ff_h264_nvdec_hwaccel; extern const AVHWAccel ff_h264_vaapi_hwaccel; extern const AVHWAccel ff_h264_vdpau_hwaccel; extern const AVHWAccel ff_h264_videotoolbox_hwaccel; +extern const AVHWAccel ff_h264_vulkan_hwaccel; extern const AVHWAccel ff_hevc_d3d11va_hwaccel; extern const AVHWAccel ff_hevc_d3d11va2_hwaccel; extern const AVHWAccel ff_hevc_dxva2_hwaccel; @@ -43,6 +45,7 @@ extern const AVHWAccel ff_hevc_nvdec_hwaccel; extern const AVHWAccel ff_hevc_vaapi_hwaccel; extern const AVHWAccel ff_hevc_vdpau_hwaccel; extern const AVHWAccel ff_hevc_videotoolbox_hwaccel; +extern const AVHWAccel ff_hevc_vulkan_hwaccel; extern const AVHWAccel ff_mjpeg_nvdec_hwaccel; extern const AVHWAccel ff_mjpeg_vaapi_hwaccel; extern const AVHWAccel ff_mpeg1_nvdec_hwaccel; diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h index 721424912c46d..e8c6186151d7e 100644 --- a/libavcodec/hwconfig.h +++ b/libavcodec/hwconfig.h @@ -24,6 +24,7 @@ #define HWACCEL_CAP_ASYNC_SAFE (1 << 0) +#define HWACCEL_CAP_THREAD_SAFE (1 << 1) typedef struct AVCodecHWConfigInternal { @@ -39,6 +40,7 @@ typedef struct AVCodecHWConfigInternal { const AVHWAccel *hwaccel; } AVCodecHWConfigInternal; +void ff_hwaccel_uninit(AVCodecContext *avctx); // These macros are used to simplify AVCodecHWConfigInternal definitions. @@ -76,6 +78,8 @@ typedef struct AVCodecHWConfigInternal { HW_CONFIG_HWACCEL(1, 1, 1, VDPAU, VDPAU, ff_ ## codec ## _vdpau_hwaccel) #define HWACCEL_VIDEOTOOLBOX(codec) \ HW_CONFIG_HWACCEL(1, 1, 1, VIDEOTOOLBOX, VIDEOTOOLBOX, ff_ ## codec ## _videotoolbox_hwaccel) +#define HWACCEL_VULKAN(codec) \ + HW_CONFIG_HWACCEL(1, 1, 1, VULKAN, VULKAN, ff_ ## codec ## _vulkan_hwaccel) #define HWACCEL_D3D11VA(codec) \ HW_CONFIG_HWACCEL(0, 0, 1, D3D11VA_VLD, NONE, ff_ ## codec ## _d3d11va_hwaccel) diff --git a/libavcodec/mpegpicture.c b/libavcodec/mpegpicture.c index 3204a70578cb6..71c7a3fd706e8 100644 --- a/libavcodec/mpegpicture.c +++ b/libavcodec/mpegpicture.c @@ -27,6 +27,8 @@ #include "avcodec.h" #include "encode.h" +#include "internal.h" +#include "decode.h" #include "motion_est.h" #include "mpegpicture.h" #include "mpegutils.h" @@ -172,7 +174,7 @@ static int alloc_frame_buffer(AVCodecContext *avctx, Picture *pic, if (avctx->hwaccel) { assert(!pic->hwaccel_picture_private); if (avctx->hwaccel->frame_priv_data_size) { - pic->hwaccel_priv_buf = av_buffer_allocz(avctx->hwaccel->frame_priv_data_size); + pic->hwaccel_priv_buf = ff_hwaccel_frame_priv_alloc(avctx, avctx->hwaccel); if (!pic->hwaccel_priv_buf) { av_log(avctx, AV_LOG_ERROR, "alloc_frame_buffer() failed (hwaccel private data allocation)\n"); return -1; diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c index 773e78ae3458c..28335231fd742 100644 --- a/libavcodec/pthread_frame.c +++ b/libavcodec/pthread_frame.c @@ -104,6 +104,12 @@ typedef struct PerThreadContext { int hwaccel_serializing; int async_serializing; + // set to 1 in ff_thread_finish_setup() when a threadsafe hwaccel is used; + // cannot check hwaccel caps directly, because + // worked threads clear hwaccel state for thread-unsafe hwaccels + // after each decode call + int hwaccel_threadsafe; + atomic_int debug_threads; ///< Set if the FF_DEBUG_THREADS option is set. } PerThreadContext; @@ -117,8 +123,8 @@ typedef struct FrameThreadContext { unsigned pthread_init_cnt; ///< Number of successfully initialized mutexes/conditions pthread_mutex_t buffer_mutex; ///< Mutex used to protect get/release_buffer(). /** - * This lock is used for ensuring threads run in serial when hwaccel - * is used. + * This lock is used for ensuring threads run in serial when thread-unsafe + * hwaccel is used. */ pthread_mutex_t hwaccel_mutex; pthread_mutex_t async_mutex; @@ -133,13 +139,19 @@ typedef struct FrameThreadContext { * While it is set, ff_thread_en/decode_frame won't return any results. */ - /* hwaccel state is temporarily stored here in order to transfer its ownership - * to the next decoding thread without the need for extra synchronization */ + /* hwaccel state for thread-unsafe hwaccels is temporarily stored here in + * order to transfer its ownership to the next decoding thread without the + * need for extra synchronization */ const AVHWAccel *stash_hwaccel; void *stash_hwaccel_context; void *stash_hwaccel_priv; } FrameThreadContext; +static int hwaccel_serial(const AVCodecContext *avctx) +{ + return avctx->hwaccel && !(avctx->hwaccel->caps_internal & HWACCEL_CAP_THREAD_SAFE); +} + static void async_lock(FrameThreadContext *fctx) { pthread_mutex_lock(&fctx->async_mutex); @@ -202,9 +214,9 @@ static attribute_align_arg void *frame_worker_thread(void *arg) * cannot be true here. */ av_assert0(!p->hwaccel_serializing); - /* if the previous thread uses hwaccel then we take the lock to ensure - * the threads don't run concurrently */ - if (avctx->hwaccel) { + /* if the previous thread uses thread-unsafe hwaccel then we take the + * lock to ensure the threads don't run concurrently */ + if (hwaccel_serial(avctx)) { pthread_mutex_lock(&p->parent->hwaccel_mutex); p->hwaccel_serializing = 1; } @@ -220,7 +232,8 @@ static attribute_align_arg void *frame_worker_thread(void *arg) ff_thread_finish_setup(avctx); if (p->hwaccel_serializing) { - /* wipe hwaccel state to avoid stale pointers lying around; + /* wipe hwaccel state for thread-unsafe hwaccels to avoid stale + * pointers lying around; * the state was transferred to FrameThreadContext in * ff_thread_finish_setup(), so nothing is leaked */ avctx->hwaccel = NULL; @@ -230,7 +243,8 @@ static attribute_align_arg void *frame_worker_thread(void *arg) p->hwaccel_serializing = 0; pthread_mutex_unlock(&p->parent->hwaccel_mutex); } - av_assert0(!avctx->hwaccel); + av_assert0(!avctx->hwaccel || + (avctx->hwaccel->caps_internal & HWACCEL_CAP_THREAD_SAFE)); if (p->async_serializing) { p->async_serializing = 0; @@ -332,8 +346,49 @@ FF_ENABLE_DEPRECATION_WARNINGS if (codec->update_thread_context_for_user) err = codec->update_thread_context_for_user(dst, src); } else { - if (codec->update_thread_context) + const PerThreadContext *p_src = src->internal->thread_ctx; + PerThreadContext *p_dst = dst->internal->thread_ctx; + + if (codec->update_thread_context) { err = codec->update_thread_context(dst, src); + if (err < 0) + return err; + } + + // reset dst hwaccel state if needed + av_assert0(p_dst->hwaccel_threadsafe || + (!dst->hwaccel && !dst->internal->hwaccel_priv_data)); + if (p_dst->hwaccel_threadsafe && + (!p_src->hwaccel_threadsafe || dst->hwaccel != src->hwaccel)) { + ff_hwaccel_uninit(dst); + p_dst->hwaccel_threadsafe = 0; + } + + // propagate hwaccel state for threadsafe hwaccels + if (p_src->hwaccel_threadsafe) { + if (!dst->hwaccel) { + if (src->hwaccel->priv_data_size) { + av_assert0(src->hwaccel->update_thread_context); + + dst->internal->hwaccel_priv_data = + av_mallocz(src->hwaccel->priv_data_size); + if (!dst->internal->hwaccel_priv_data) + return AVERROR(ENOMEM); + } + dst->hwaccel = src->hwaccel; + } + av_assert0(dst->hwaccel == src->hwaccel); + + if (src->hwaccel->update_thread_context) { + err = src->hwaccel->update_thread_context(dst, src); + if (err < 0) { + av_log(dst, AV_LOG_ERROR, "Error propagating hwaccel state\n"); + ff_hwaccel_uninit(dst); + return err; + } + } + p_dst->hwaccel_threadsafe = 1; + } } return err; @@ -441,10 +496,12 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx, } /* transfer the stashed hwaccel state, if any */ - av_assert0(!p->avctx->hwaccel); - FFSWAP(const AVHWAccel*, p->avctx->hwaccel, fctx->stash_hwaccel); - FFSWAP(void*, p->avctx->hwaccel_context, fctx->stash_hwaccel_context); - FFSWAP(void*, p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv); + av_assert0(!p->avctx->hwaccel || p->hwaccel_threadsafe); + if (!p->hwaccel_threadsafe) { + FFSWAP(const AVHWAccel*, p->avctx->hwaccel, fctx->stash_hwaccel); + FFSWAP(void*, p->avctx->hwaccel_context, fctx->stash_hwaccel_context); + FFSWAP(void*, p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv); + } av_packet_unref(p->avpkt); ret = av_packet_ref(p->avpkt, avpkt); @@ -598,7 +655,10 @@ void ff_thread_finish_setup(AVCodecContext *avctx) { if (!(avctx->active_thread_type&FF_THREAD_FRAME)) return; - if (avctx->hwaccel && !p->hwaccel_serializing) { + p->hwaccel_threadsafe = avctx->hwaccel && + (avctx->hwaccel->caps_internal & HWACCEL_CAP_THREAD_SAFE); + + if (hwaccel_serial(avctx) && !p->hwaccel_serializing) { pthread_mutex_lock(&p->parent->hwaccel_mutex); p->hwaccel_serializing = 1; } @@ -611,13 +671,16 @@ void ff_thread_finish_setup(AVCodecContext *avctx) { async_lock(p->parent); } - /* save hwaccel state for passing to the next thread; + /* thread-unsafe hwaccels share a single private data instance, so we + * save hwaccel state for passing to the next thread; * this is done here so that this worker thread can wipe its own hwaccel * state after decoding, without requiring synchronization */ av_assert0(!p->parent->stash_hwaccel); - p->parent->stash_hwaccel = avctx->hwaccel; - p->parent->stash_hwaccel_context = avctx->hwaccel_context; - p->parent->stash_hwaccel_priv = avctx->internal->hwaccel_priv_data; + if (hwaccel_serial(avctx)) { + p->parent->stash_hwaccel = avctx->hwaccel; + p->parent->stash_hwaccel_context = avctx->hwaccel_context; + p->parent->stash_hwaccel_priv = avctx->internal->hwaccel_priv_data; + } pthread_mutex_lock(&p->progress_mutex); if(atomic_load(&p->state) == STATE_SETUP_FINISHED){ diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c index 2ab06c82939f6..50afe19b7a5bb 100644 --- a/libavcodec/vp8.c +++ b/libavcodec/vp8.c @@ -109,7 +109,7 @@ static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref) if (s->avctx->hwaccel) { const AVHWAccel *hwaccel = s->avctx->hwaccel; if (hwaccel->frame_priv_data_size) { - f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size); + f->hwaccel_priv_buf = ff_hwaccel_frame_priv_alloc(s->avctx, hwaccel); if (!f->hwaccel_priv_buf) goto fail; f->hwaccel_picture_private = f->hwaccel_priv_buf->data; @@ -167,6 +167,9 @@ static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem) if (free_mem) free_buffers(s); + + if (avctx->hwaccel && avctx->hwaccel->flush) + avctx->hwaccel->flush(avctx); } static void vp8_decode_flush(AVCodecContext *avctx) diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c index d8a31507fa50d..4f704ec0dd12f 100644 --- a/libavcodec/vp9.c +++ b/libavcodec/vp9.c @@ -136,7 +136,7 @@ static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f) const AVHWAccel *hwaccel = avctx->hwaccel; av_assert0(!f->hwaccel_picture_private); if (hwaccel->frame_priv_data_size) { - f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size); + f->hwaccel_priv_buf = ff_hwaccel_frame_priv_alloc(avctx, hwaccel); if (!f->hwaccel_priv_buf) goto fail; f->hwaccel_picture_private = f->hwaccel_priv_buf->data; @@ -1801,6 +1801,9 @@ static void vp9_decode_flush(AVCodecContext *avctx) vp9_frame_unref(avctx, &s->s.frames[i]); for (i = 0; i < 8; i++) ff_thread_release_ext_buffer(avctx, &s->s.refs[i]); + + if (avctx->hwaccel && avctx->hwaccel->flush) + avctx->hwaccel->flush(avctx); } static av_cold int vp9_decode_init(AVCodecContext *avctx) diff --git a/libavcodec/vulkan.c b/libavcodec/vulkan.c new file mode 100644 index 0000000000000..fc8a1fa47bad4 --- /dev/null +++ b/libavcodec/vulkan.c @@ -0,0 +1,19 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/vulkan.c" diff --git a/libavcodec/vulkan.h b/libavcodec/vulkan.h new file mode 100644 index 0000000000000..b15efd4addb13 --- /dev/null +++ b/libavcodec/vulkan.h @@ -0,0 +1,24 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VULKAN_H +#define AVCODEC_VULKAN_H + +#include "libavutil/vulkan.h" + +#endif /* AVCODEC_VULKAN_H */ diff --git a/libavcodec/vulkan_av1.c b/libavcodec/vulkan_av1.c new file mode 100644 index 0000000000000..19bfd22b606a3 --- /dev/null +++ b/libavcodec/vulkan_av1.c @@ -0,0 +1,598 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "av1dec.h" + +#include "vulkan_decode.h" + +/* Maximum number of tiles specified by any defined level */ +#define MAX_TILES 256 + +const VkExtensionProperties ff_vk_dec_av1_ext = { + .extensionName = VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME, + .specVersion = VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION, +}; + +typedef struct AV1VulkanDecodePicture { + FFVulkanDecodePicture vp; + + /* Workaround for a spec issue. + *Can be removed once no longer needed, and threading can be enabled. */ + FFVulkanDecodeContext *dec; + + StdVideoAV1MESATile tiles[MAX_TILES]; + StdVideoAV1MESATileList tile_list; + const uint32_t *tile_offsets; + + /* Current picture */ + VkVideoDecodeAV1DpbSlotInfoMESA vkav1_ref; + StdVideoAV1MESAFrameHeader av1_frame_header; + VkVideoDecodeAV1PictureInfoMESA av1_pic_info; + + /* Picture refs */ + const AV1Frame *ref_src [AV1_NUM_REF_FRAMES]; + VkVideoDecodeAV1DpbSlotInfoMESA vkav1_refs[AV1_NUM_REF_FRAMES]; + + uint8_t frame_id_set; + uint8_t frame_id; +} AV1VulkanDecodePicture; + +static int vk_av1_fill_pict(AVCodecContext *avctx, const AV1Frame **ref_src, + VkVideoReferenceSlotInfoKHR *ref_slot, /* Main structure */ + VkVideoPictureResourceInfoKHR *ref, /* Goes in ^ */ + VkVideoDecodeAV1DpbSlotInfoMESA *vkav1_ref, /* Goes in ^ */ + const AV1Frame *pic, int is_current, int has_grain, + int dpb_slot_index) +{ + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = (FFVulkanDecodeShared *)dec->shared_ref->data; + AV1VulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vkpic = &hp->vp; + + int err = ff_vk_decode_prepare_frame(ctx, pic->f, vkpic, is_current, + has_grain || ctx->dedicated_dpb); + if (err < 0) + return err; + + *vkav1_ref = (VkVideoDecodeAV1DpbSlotInfoMESA) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_DPB_SLOT_INFO_MESA, + .frameIdx = hp->frame_id, + }; + + for (unsigned i = 0; i < 7; i++) { + const int idx = pic->raw_frame_header->ref_frame_idx[i]; + vkav1_ref->ref_order_hint[i] = pic->raw_frame_header->ref_order_hint[idx]; + } + + vkav1_ref->disable_frame_end_update_cdf = pic->raw_frame_header->disable_frame_end_update_cdf; + + *ref = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .codedOffset = (VkOffset2D){ 0, 0 }, + .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height }, + .baseArrayLayer = ((has_grain || ctx->dedicated_dpb) && ctx->layered_dpb) ? + dpb_slot_index : 0, + .imageViewBinding = vkpic->img_view_ref, + }; + + *ref_slot = (VkVideoReferenceSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR, + .pNext = vkav1_ref, + .slotIndex = dpb_slot_index, + .pPictureResource = ref, + }; + + if (ref_src) + *ref_src = pic; + + return 0; +} + +static int vk_av1_create_params(AVCodecContext *avctx, AVBufferRef **buf) +{ + VkResult ret; + + const AV1DecContext *s = avctx->priv_data; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = (FFVulkanDecodeShared *)dec->shared_ref->data; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + const AV1RawSequenceHeader *seq = s->raw_seq; + + StdVideoAV1MESASequenceHeader av1_sequence_header; + VkVideoDecodeAV1SessionParametersAddInfoMESA av1_params_info; + VkVideoDecodeAV1SessionParametersCreateInfoMESA av1_params; + VkVideoSessionParametersCreateInfoKHR session_params_create; + + AVBufferRef *tmp; + VkVideoSessionParametersKHR *par = av_malloc(sizeof(*par)); + if (!par) + return AVERROR(ENOMEM); + + av1_sequence_header = (StdVideoAV1MESASequenceHeader) { + .flags = (StdVideoAV1MESASequenceHeaderFlags) { + .still_picture = seq->still_picture, + .reduced_still_picture_header = seq->reduced_still_picture_header, + .use_128x128_superblock = seq->use_128x128_superblock, + .enable_filter_intra = seq->enable_filter_intra, + .enable_intra_edge_filter = seq->enable_intra_edge_filter, + .enable_interintra_compound = seq->enable_interintra_compound, + .enable_masked_compound = seq->enable_masked_compound, + .enable_warped_motion = seq->enable_warped_motion, + .enable_dual_filter = seq->enable_dual_filter, + .enable_order_hint = seq->enable_order_hint, + .enable_jnt_comp = seq->enable_jnt_comp, + .enable_ref_frame_mvs = seq->enable_ref_frame_mvs, + .frame_id_numbers_present_flag = seq->frame_id_numbers_present_flag, + .enable_superres = seq->enable_superres, + .enable_cdef = seq->enable_cdef, + .enable_restoration = seq->enable_restoration, + .film_grain_params_present = seq->film_grain_params_present, + .timing_info_present_flag = seq->timing_info_present_flag, + .initial_display_delay_present_flag = seq->initial_display_delay_present_flag, + }, + .seq_profile = seq->seq_profile, + .frame_width_bits_minus_1 = seq->frame_width_bits_minus_1, + .frame_height_bits_minus_1 = seq->frame_height_bits_minus_1, + .max_frame_width_minus_1 = seq->max_frame_width_minus_1, + .max_frame_height_minus_1 = seq->max_frame_height_minus_1, + .delta_frame_id_length_minus_2 = seq->delta_frame_id_length_minus_2, + .additional_frame_id_length_minus_1 = seq->additional_frame_id_length_minus_1, + .order_hint_bits_minus_1 = seq->order_hint_bits_minus_1, + .timing_info = (StdVideoAV1MESATimingInfo) { + .flags = (StdVideoAV1MESATimingInfoFlags) { + .equal_picture_interval = seq->timing_info.equal_picture_interval, + }, + .num_units_in_display_tick = seq->timing_info.num_units_in_display_tick, + .time_scale = seq->timing_info.time_scale, + .num_ticks_per_picture_minus_1 = seq->timing_info.num_ticks_per_picture_minus_1, + }, + .color_config = (StdVideoAV1MESAColorConfig) { + .flags = (StdVideoAV1MESAColorConfigFlags) { + .mono_chrome = seq->color_config.mono_chrome, + .color_range = seq->color_config.color_range, + .separate_uv_delta_q = seq->color_config.separate_uv_delta_q, + }, + .bit_depth = seq->color_config.twelve_bit ? 12 : + seq->color_config.high_bitdepth ? 10 : 8, + .subsampling_x = seq->color_config.subsampling_x, + .subsampling_y = seq->color_config.subsampling_y, + }, + }; + + av1_params_info = (VkVideoDecodeAV1SessionParametersAddInfoMESA) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_SESSION_PARAMETERS_ADD_INFO_MESA, + .sequence_header = &av1_sequence_header, + }; + av1_params = (VkVideoDecodeAV1SessionParametersCreateInfoMESA) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_SESSION_PARAMETERS_CREATE_INFO_MESA, + .pParametersAddInfo = &av1_params_info, + }; + session_params_create = (VkVideoSessionParametersCreateInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pNext = &av1_params, + .videoSession = ctx->common.session, + .videoSessionParametersTemplate = NULL, + }; + + /* Create session parameters */ + ret = vk->CreateVideoSessionParametersKHR(ctx->s.hwctx->act_dev, &session_params_create, + ctx->s.hwctx->alloc, par); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to create Vulkan video session parameters: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + tmp = av_buffer_create((uint8_t *)par, sizeof(*par), ff_vk_decode_free_params, + ctx, 0); + if (!tmp) { + ff_vk_decode_free_params(ctx, (uint8_t *)par); + return AVERROR(ENOMEM); + } + + av_log(avctx, AV_LOG_DEBUG, "Created frame parameters\n"); + + *buf = tmp; + + return 0; +} + +static int vk_av1_start_frame(AVCodecContext *avctx, + av_unused const uint8_t *buffer, + av_unused uint32_t size) +{ + int err; + int ref_count = 0; + AV1DecContext *s = avctx->priv_data; + const AV1Frame *pic = &s->cur_frame; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + AV1VulkanDecodePicture *ap = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &ap->vp; + + const AV1RawFrameHeader *frame_header = s->raw_frame_header; + const AV1RawFilmGrainParams *film_grain = &s->cur_frame.film_grain; + const int apply_grain = !(avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN) && + film_grain->apply_grain; + + if (!dec->session_params || dec->params_changed) { + av_buffer_unref(&dec->session_params); + err = vk_av1_create_params(avctx, &dec->session_params); + if (err < 0) + return err; + dec->params_changed = 0; + } + + if (!ap->frame_id_set) { + unsigned slot_idx = 0; + for (unsigned i = 0; i < 32; i++) { + if (!(dec->frame_id_alloc_mask & (1 << i))) { + slot_idx = i; + break; + } + } + ap->frame_id = slot_idx; + ap->frame_id_set = 1; + dec->frame_id_alloc_mask |= (1 << slot_idx); + } + + /* Fill in references */ + for (int i = 0; i < AV1_NUM_REF_FRAMES; i++) { + const AV1Frame *ref_frame = &s->ref[i]; + if (s->ref[i].f->pict_type == AV_PICTURE_TYPE_NONE) + continue; + + err = vk_av1_fill_pict(avctx, &ap->ref_src[i], &vp->ref_slots[i], + &vp->refs[i], &ap->vkav1_refs[i], + ref_frame, 0, 0, i); + if (err < 0) + return err; + + ref_count++; + } + + err = vk_av1_fill_pict(avctx, NULL, &vp->ref_slot, &vp->ref, + &ap->vkav1_ref, + pic, 1, apply_grain, 8); + if (err < 0) + return err; + + ap->tile_list.nb_tiles = 0; + ap->tile_list.tile_list = ap->tiles; + + ap->av1_pic_info = (VkVideoDecodeAV1PictureInfoMESA) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_PICTURE_INFO_MESA, + .frame_header = &ap->av1_frame_header, + .tile_list = &ap->tile_list, + }; + + vp->decode_info = (VkVideoDecodeInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR, + .pNext = &ap->av1_pic_info, + .flags = 0x0, + .pSetupReferenceSlot = &vp->ref_slot, + .referenceSlotCount = ref_count, + .pReferenceSlots = vp->ref_slots, + .dstPictureResource = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .codedOffset = (VkOffset2D){ 0, 0 }, + .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height }, + .baseArrayLayer = 0, + .imageViewBinding = vp->img_view_out, + }, + }; + + /* Setup frame header */ + ap->av1_frame_header = (StdVideoAV1MESAFrameHeader) { + .flags = (StdVideoAV1MESAFrameHeaderFlags) { + .error_resilient_mode = frame_header->error_resilient_mode, + .disable_cdf_update = frame_header->disable_cdf_update, + .use_superres = frame_header->use_superres, + .render_and_frame_size_different = frame_header->render_and_frame_size_different, + .allow_screen_content_tools = frame_header->allow_screen_content_tools, + .is_filter_switchable = frame_header->is_filter_switchable, + .force_integer_mv = frame_header->force_integer_mv, + .frame_size_override_flag = frame_header->frame_size_override_flag, + .buffer_removal_time_present_flag = frame_header->buffer_removal_time_present_flag, + .allow_intrabc = frame_header->allow_intrabc, + .frame_refs_short_signaling = frame_header->frame_refs_short_signaling, + .allow_high_precision_mv = frame_header->allow_high_precision_mv, + .is_motion_mode_switchable = frame_header->is_motion_mode_switchable, + .use_ref_frame_mvs = frame_header->use_ref_frame_mvs, + .disable_frame_end_update_cdf = frame_header->disable_frame_end_update_cdf, + .allow_warped_motion = frame_header->allow_warped_motion, + .reduced_tx_set = frame_header->reduced_tx_set, + .reference_select = frame_header->reference_select, + .skip_mode_present = frame_header->skip_mode_present, + .delta_q_present = frame_header->delta_q_present, + }, + .frame_to_show_map_idx = frame_header->frame_to_show_map_idx, + .frame_presentation_time = frame_header->frame_presentation_time, + .display_frame_id = frame_header->display_frame_id, + .frame_type = frame_header->frame_type, + .current_frame_id = frame_header->current_frame_id, + .order_hint = frame_header->order_hint, + .primary_ref_frame = frame_header->primary_ref_frame, + .frame_width_minus_1 = frame_header->frame_width_minus_1, + .frame_height_minus_1 = frame_header->frame_height_minus_1, + .coded_denom = frame_header->coded_denom, + .render_width_minus_1 = frame_header->render_width_minus_1, + .render_height_minus_1 = frame_header->render_height_minus_1, + .refresh_frame_flags = frame_header->refresh_frame_flags, + .interpolation_filter = frame_header->interpolation_filter, + .tx_mode = frame_header->tx_mode, + .tiling = (StdVideoAV1MESATileInfo) { + .flags = (StdVideoAV1MESATileInfoFlags) { + .uniform_tile_spacing_flag = frame_header->uniform_tile_spacing_flag, + }, + .tile_cols = frame_header->tile_cols, + .tile_rows = frame_header->tile_rows, + .context_update_tile_id = frame_header->context_update_tile_id, + .tile_size_bytes_minus1 = frame_header->tile_size_bytes_minus1, + }, + .quantization = (StdVideoAV1MESAQuantization) { + .flags.using_qmatrix = frame_header->using_qmatrix, + .base_q_idx = frame_header->base_q_idx, + .delta_q_y_dc = frame_header->delta_q_y_dc, + .diff_uv_delta = frame_header->diff_uv_delta, + .delta_q_u_dc = frame_header->delta_q_u_dc, + .delta_q_u_ac = frame_header->delta_q_u_ac, + .delta_q_v_dc = frame_header->delta_q_v_dc, + .delta_q_v_ac = frame_header->delta_q_v_ac, + .qm_y = frame_header->qm_y, + .qm_u = frame_header->qm_u, + .qm_v = frame_header->qm_v, + }, + .delta_q = (StdVideoAV1MESADeltaQ) { + .flags = (StdVideoAV1MESADeltaQFlags) { + .delta_lf_present = frame_header->delta_lf_present, + .delta_lf_multi = frame_header->delta_lf_multi, + }, + .delta_q_res = frame_header->delta_q_res, + .delta_lf_res = frame_header->delta_lf_res, + }, + .loop_filter = (StdVideoAV1MESALoopFilter) { + .flags = (StdVideoAV1MESALoopFilterFlags) { + .delta_enabled = frame_header->loop_filter_delta_enabled, + .delta_update = frame_header->loop_filter_delta_update, + }, + .level = { + frame_header->loop_filter_level[0], frame_header->loop_filter_level[1], + frame_header->loop_filter_level[2], frame_header->loop_filter_level[3], + }, + .sharpness = frame_header->loop_filter_sharpness, + .mode_deltas = { + frame_header->loop_filter_mode_deltas[0], frame_header->loop_filter_mode_deltas[1], + }, + }, + .cdef = (StdVideoAV1MESACDEF) { + .damping_minus_3 = frame_header->cdef_damping_minus_3, + .bits = frame_header->cdef_bits, + }, + .lr = (StdVideoAV1MESALoopRestoration) { + .lr_unit_shift = frame_header->lr_unit_shift, + .lr_uv_shift = frame_header->lr_uv_shift, + .lr_type = { frame_header->lr_type[0], frame_header->lr_type[1], frame_header->lr_type[2] }, + }, + .segmentation = (StdVideoAV1MESASegmentation) { + .flags = (StdVideoAV1MESASegmentationFlags) { + .enabled = frame_header->segmentation_enabled, + .update_map = frame_header->segmentation_update_map, + .temporal_update = frame_header->segmentation_temporal_update, + .update_data = frame_header->segmentation_update_data, + }, + }, + .film_grain = (StdVideoAV1MESAFilmGrainParameters) { + .flags = (StdVideoAV1MESAFilmGrainFlags) { + .apply_grain = apply_grain, + .chroma_scaling_from_luma = film_grain->chroma_scaling_from_luma, + .overlap_flag = film_grain->overlap_flag, + .clip_to_restricted_range = film_grain->clip_to_restricted_range, + }, + .grain_scaling_minus_8 = film_grain->grain_scaling_minus_8, + .ar_coeff_lag = film_grain->ar_coeff_lag, + .ar_coeff_shift_minus_6 = film_grain->ar_coeff_shift_minus_6, + .grain_scale_shift = film_grain->grain_scale_shift, + .grain_seed = film_grain->grain_seed, + .num_y_points = film_grain->num_y_points, + .num_cb_points = film_grain->num_cb_points, + .num_cr_points = film_grain->num_cr_points, + .cb_mult = film_grain->cb_mult, + .cb_luma_mult = film_grain->cb_luma_mult, + .cb_offset = film_grain->cb_offset, + .cr_mult = film_grain->cr_mult, + .cr_luma_mult = film_grain->cr_luma_mult, + .cr_offset = film_grain->cr_offset, + }, + }; + + for (int i = 0; i < 64; i++) { + ap->av1_frame_header.tiling.width_in_sbs_minus_1[i] = frame_header->width_in_sbs_minus_1[i]; + ap->av1_frame_header.tiling.height_in_sbs_minus_1[i] = frame_header->height_in_sbs_minus_1[i]; + ap->av1_frame_header.tiling.tile_start_col_sb[i] = frame_header->tile_start_col_sb[i]; + ap->av1_frame_header.tiling.tile_start_row_sb[i] = frame_header->tile_start_row_sb[i]; + } + + for (int i = 0; i < 8; i++) { + ap->av1_frame_header.segmentation.feature_enabled_bits[i] = 0; + for (int j = 0; j < 8; j++) { + ap->av1_frame_header.segmentation.feature_enabled_bits[i] |= (frame_header->feature_enabled[i][j] << j); + ap->av1_frame_header.segmentation.feature_data[i][j] = frame_header->feature_value[i][j]; + } + + ap->av1_frame_header.loop_filter.ref_deltas[i] = frame_header->loop_filter_ref_deltas[i]; + + ap->av1_frame_header.cdef.y_pri_strength[i] = frame_header->cdef_y_pri_strength[i]; + ap->av1_frame_header.cdef.y_sec_strength[i] = frame_header->cdef_y_sec_strength[i]; + ap->av1_frame_header.cdef.uv_pri_strength[i] = frame_header->cdef_uv_pri_strength[i]; + ap->av1_frame_header.cdef.uv_sec_strength[i] = frame_header->cdef_uv_sec_strength[i]; + + ap->av1_frame_header.ref_order_hint[i] = frame_header->ref_order_hint[i]; + ap->av1_frame_header.global_motion[i] = (StdVideoAV1MESAGlobalMotion) { + .flags = (StdVideoAV1MESAGlobalMotionFlags) { + .gm_invalid = s->cur_frame.gm_invalid[i], + }, + .gm_type = s->cur_frame.gm_type[i], + .gm_params = { + frame_header->gm_params[i][0], frame_header->gm_params[i][1], + frame_header->gm_params[i][2], frame_header->gm_params[i][3], + frame_header->gm_params[i][4], frame_header->gm_params[i][5], + }, + }; + } + + for (int i = 0; i < 7; i++) { + ap->av1_frame_header.ref_frame_idx[i] = frame_header->ref_frame_idx[i]; + ap->av1_frame_header.delta_frame_id_minus1[i] = frame_header->delta_frame_id_minus1[i]; + } + + ap->av1_pic_info.skip_mode_frame_idx[0] = s->cur_frame.skip_mode_frame_idx[0]; + ap->av1_pic_info.skip_mode_frame_idx[1] = s->cur_frame.skip_mode_frame_idx[1]; + + if (apply_grain) { + for (int i = 0; i < 14; i++) { + ap->av1_frame_header.film_grain.point_y_value[i] = film_grain->point_y_value[i]; + ap->av1_frame_header.film_grain.point_y_scaling[i] = film_grain->point_y_scaling[i]; + } + + for (int i = 0; i < 10; i++) { + ap->av1_frame_header.film_grain.point_cb_value[i] = film_grain->point_cb_value[i]; + ap->av1_frame_header.film_grain.point_cb_scaling[i] = film_grain->point_cb_scaling[i]; + ap->av1_frame_header.film_grain.point_cr_value[i] = film_grain->point_cr_value[i]; + ap->av1_frame_header.film_grain.point_cr_scaling[i] = film_grain->point_cr_scaling[i]; + } + + for (int i = 0; i < 24; i++) { + ap->av1_frame_header.film_grain.ar_coeffs_y_plus_128[i] = film_grain->ar_coeffs_y_plus_128[i]; + ap->av1_frame_header.film_grain.ar_coeffs_cb_plus_128[i] = film_grain->ar_coeffs_cb_plus_128[i]; + ap->av1_frame_header.film_grain.ar_coeffs_cr_plus_128[i] = film_grain->ar_coeffs_cr_plus_128[i]; + } + + ap->av1_frame_header.film_grain.ar_coeffs_cb_plus_128[24] = film_grain->ar_coeffs_cb_plus_128[24]; + ap->av1_frame_header.film_grain.ar_coeffs_cr_plus_128[24] = film_grain->ar_coeffs_cr_plus_128[24]; + } + + av_log(avctx, AV_LOG_DEBUG, "Created frame parameters"); + + /* Workaround for a spec issue. */ + ap->dec = dec; + + return 0; +} + +static int vk_av1_decode_slice(AVCodecContext *avctx, + const uint8_t *data, + uint32_t size) +{ + int err; + const AV1DecContext *s = avctx->priv_data; + AV1VulkanDecodePicture *ap = s->cur_frame.hwaccel_picture_private; + FFVulkanDecodePicture *vp = &ap->vp; + + for (int i = s->tg_start; i <= s->tg_end; i++) { + ap->tiles[ap->tile_list.nb_tiles] = (StdVideoAV1MESATile) { + .size = s->tile_group_info[i].tile_size, + .offset = s->tile_group_info[i].tile_offset, + .row = s->tile_group_info[i].tile_row, + .column = s->tile_group_info[i].tile_column, + .tg_start = s->tg_start, + .tg_end = s->tg_end, + }; + + err = ff_vk_decode_add_slice(avctx, vp, data, size, 0, + &ap->tile_list.nb_tiles, + &ap->tile_offsets); + if (err < 0) + return err; + +// ap->tiles[ap->tile_list.nb_tiles - 1].offset = ap->tile_offsets[ap->tile_list.nb_tiles - 1]; + } + + return 0; +} + +static int vk_av1_end_frame(AVCodecContext *avctx) +{ + const AV1DecContext *s = avctx->priv_data; + const AV1Frame *pic = &s->cur_frame; + AV1VulkanDecodePicture *ap = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &ap->vp; + FFVulkanDecodePicture *rvp[AV1_NUM_REF_FRAMES] = { 0 }; + AVFrame *rav[AV1_NUM_REF_FRAMES] = { 0 }; + + for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) { + const AV1Frame *rp = ap->ref_src[i]; + AV1VulkanDecodePicture *rhp = rp->hwaccel_picture_private; + + rvp[i] = &rhp->vp; + rav[i] = ap->ref_src[i]->f; + } + + av_log(avctx, AV_LOG_VERBOSE, "Decoding frame, %lu bytes, %i tiles\n", + vp->slices_size, ap->tile_list.nb_tiles); + + return ff_vk_decode_frame(avctx, pic->f, vp, rav, rvp); +} + +static void vk_av1_free_frame_priv(void *_hwctx, uint8_t *data) +{ + AVHWDeviceContext *hwctx = _hwctx; + AV1VulkanDecodePicture *ap = (AV1VulkanDecodePicture *)data; + + /* Workaround for a spec issue. */ + if (ap->frame_id_set) + ap->dec->frame_id_alloc_mask &= ~(1 << ap->frame_id); + + /* Free frame resources, this also destroys the session parameters. */ + ff_vk_decode_free_frame(hwctx, &ap->vp); + + /* Free frame context */ + av_free(ap); +} + +const AVHWAccel ff_av1_vulkan_hwaccel = { + .name = "av1_vulkan", + .type = AVMEDIA_TYPE_VIDEO, + .id = AV_CODEC_ID_AV1, + .pix_fmt = AV_PIX_FMT_VULKAN, + .start_frame = &vk_av1_start_frame, + .decode_slice = &vk_av1_decode_slice, + .end_frame = &vk_av1_end_frame, + .free_frame_priv = &vk_av1_free_frame_priv, + .frame_priv_data_size = sizeof(AV1VulkanDecodePicture), + .init = &ff_vk_decode_init, + .update_thread_context = &ff_vk_update_thread_context, + .decode_params = &ff_vk_params_changed, + .flush = &ff_vk_decode_flush, + .uninit = &ff_vk_decode_uninit, + .frame_params = &ff_vk_frame_params, + .priv_data_size = sizeof(FFVulkanDecodeContext), + + /* NOTE: Threading is intentionally disabled here. Due to the design of Vulkan, + * where frames are opaque to users, and mostly opaque for driver developers, + * there's an issue with current hardware accelerator implementations of AV1, + * where they require an internal index. With regular hwaccel APIs, this index + * is given to users as an opaque handle directly. With Vulkan, due to increased + * flexibility, this index cannot be present anywhere. + * The current implementation tracks the index for the driver and submits it + * as necessary information. Due to needing to modify the decoding context, + * which is not thread-safe, on frame free, threading is disabled. + * In the future, once this is fixed in the spec, the workarounds may be removed + * and threading enabled. */ + .caps_internal = HWACCEL_CAP_ASYNC_SAFE, +}; diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c new file mode 100644 index 0000000000000..30c4bbf0ce1f7 --- /dev/null +++ b/libavcodec/vulkan_decode.c @@ -0,0 +1,1172 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "vulkan_video.h" +#include "vulkan_decode.h" +#include "config_components.h" + +#if CONFIG_H264_VULKAN_HWACCEL +extern const VkExtensionProperties ff_vk_dec_h264_ext; +#endif +#if CONFIG_HEVC_VULKAN_HWACCEL +extern const VkExtensionProperties ff_vk_dec_hevc_ext; +#endif +#if CONFIG_AV1_VULKAN_HWACCEL +extern const VkExtensionProperties ff_vk_dec_av1_ext; +#endif + +static const VkExtensionProperties *dec_ext[] = { +#if CONFIG_H264_VULKAN_HWACCEL + [AV_CODEC_ID_H264] = &ff_vk_dec_h264_ext, +#endif +#if CONFIG_HEVC_VULKAN_HWACCEL + [AV_CODEC_ID_HEVC] = &ff_vk_dec_hevc_ext, +#endif +#if CONFIG_AV1_VULKAN_HWACCEL + [AV_CODEC_ID_AV1] = &ff_vk_dec_av1_ext, +#endif +}; + +int ff_vk_update_thread_context(AVCodecContext *dst, const AVCodecContext *src) +{ + int err; + FFVulkanDecodeContext *src_ctx = src->internal->hwaccel_priv_data; + FFVulkanDecodeContext *dst_ctx = dst->internal->hwaccel_priv_data; + + err = av_buffer_replace(&dst_ctx->shared_ref, src_ctx->shared_ref); + if (err < 0) + return err; + + if (src_ctx->session_params) { + err = av_buffer_replace(&dst_ctx->session_params, src_ctx->session_params); + if (err < 0) + return err; + } + + dst_ctx->frame_id_alloc_mask = src_ctx->frame_id_alloc_mask; + + return 0; +} + +int ff_vk_params_changed(AVCodecContext *avctx, int t, const uint8_t *b, uint32_t s) +{ + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + dec->params_changed = 1; + return 0; +} + +static int vk_decode_create_view(FFVulkanDecodeShared *ctx, VkImageView *dst_view, + VkImageAspectFlags *aspect, AVVkFrame *src, + VkFormat vkf) +{ + VkResult ret; + FFVulkanFunctions *vk = &ctx->s.vkfn; + VkImageAspectFlags aspect_mask = ff_vk_aspect_bits_from_vkfmt(vkf); + + VkSamplerYcbcrConversionInfo yuv_sampler_info = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO, + .conversion = ctx->yuv_sampler, + }; + VkImageViewCreateInfo img_view_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = &yuv_sampler_info, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = vkf, + .image = src->img[0], + .components = (VkComponentMapping) { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + .levelCount = 1, + }, + }; + + ret = vk->CreateImageView(ctx->s.hwctx->act_dev, &img_view_create_info, + ctx->s.hwctx->alloc, dst_view); + if (ret != VK_SUCCESS) + return AVERROR_EXTERNAL; + + *aspect = aspect_mask; + + return 0; +} + +static AVFrame *vk_get_dpb_pool(FFVulkanDecodeShared *ctx) +{ + int err; + AVFrame *avf = av_frame_alloc(); + if (!avf) + return NULL; + + err = av_hwframe_get_buffer(ctx->dpb_hwfc_ref, avf, 0x0); + if (err < 0) + av_frame_free(&avf); + + return avf; +} + +int ff_vk_decode_prepare_frame(FFVulkanDecodeShared *ctx, AVFrame *pic, + FFVulkanDecodePicture *vkpic, int is_current, + int alloc_dpb) +{ + int err; + + vkpic->nb_slices = 0; + vkpic->slices_size = 0; + + /* If the decoder made a blank frame to make up for a missing ref, or the + * frame is the current frame so it's missing one, create a re-representation */ + if (vkpic->img_view_ref) + return 0; + + vkpic->dpb_frame = NULL; + vkpic->img_view_ref = NULL; + vkpic->img_view_out = NULL; + vkpic->img_view_dest = NULL; + + if (ctx->layered_dpb && alloc_dpb) { + vkpic->img_view_ref = ctx->layered_view; + vkpic->img_aspect_ref = ctx->layered_aspect; + } else if (alloc_dpb) { + AVHWFramesContext *dpb_frames = (AVHWFramesContext *)ctx->dpb_hwfc_ref->data; + AVVulkanFramesContext *dpb_hwfc = dpb_frames->hwctx; + + vkpic->dpb_frame = vk_get_dpb_pool(ctx); + if (!vkpic->dpb_frame) + return AVERROR(ENOMEM); + + err = vk_decode_create_view(ctx, &vkpic->img_view_ref, + &vkpic->img_aspect_ref, + (AVVkFrame *)vkpic->dpb_frame->data[0], + dpb_hwfc->format[0]); + if (err < 0) + return err; + + vkpic->img_view_dest = vkpic->img_view_ref; + } + + if (!alloc_dpb || is_current) { + AVHWFramesContext *frames = (AVHWFramesContext *)pic->hw_frames_ctx->data; + AVVulkanFramesContext *hwfc = frames->hwctx; + + err = vk_decode_create_view(ctx, &vkpic->img_view_out, + &vkpic->img_aspect, + (AVVkFrame *)pic->data[0], + hwfc->format[0]); + if (err < 0) + return err; + + if (!alloc_dpb) { + vkpic->img_view_ref = vkpic->img_view_out; + vkpic->img_aspect_ref = vkpic->img_aspect; + } + } + + return 0; +} + +int ff_vk_decode_add_slice(AVCodecContext *avctx, FFVulkanDecodePicture *vp, + const uint8_t *data, size_t size, int add_startcode, + uint32_t *nb_slices, const uint32_t **offsets) +{ + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = (FFVulkanDecodeShared *)dec->shared_ref->data; + + static const uint8_t startcode_prefix[3] = { 0x0, 0x0, 0x1 }; + const size_t startcode_len = add_startcode ? sizeof(startcode_prefix) : 0; + const int nb = *nb_slices; + uint8_t *slices; + uint32_t *slice_off; + FFVkVideoBuffer *vkbuf; + + size_t new_size = vp->slices_size + startcode_len + size + + ctx->common.caps.minBitstreamBufferSizeAlignment; + + slice_off = av_fast_realloc(vp->slice_off, &vp->slice_off_max, + (nb + 1)*sizeof(slice_off)); + if (!slice_off) + return AVERROR(ENOMEM); + + *offsets = vp->slice_off = slice_off; + slice_off[nb] = vp->slices_size; + + vkbuf = vp->slices_buf ? (FFVkVideoBuffer *)vp->slices_buf->data : NULL; + if (!vkbuf || vkbuf->buf.size < new_size) { + int err; + AVBufferRef *new_ref; + FFVkVideoBuffer *new_buf; + err = ff_vk_video_get_buffer(&ctx->s, &ctx->common, &new_ref, + VK_BUFFER_USAGE_VIDEO_DECODE_SRC_BIT_KHR, + ctx->s.hwfc->create_pnext, new_size); + if (err < 0) + return err; + + new_buf = (FFVkVideoBuffer *)new_ref->data; + + /* Copy data from the old buffer */ + if (vkbuf) { + memcpy(new_buf->mem, vkbuf->mem, vp->slices_size); + av_buffer_unref(&vp->slices_buf); + } + + vp->slices_buf = new_ref; + vkbuf = new_buf; + } + slices = vkbuf->mem; + + /* Startcode */ + memcpy(slices + vp->slices_size, startcode_prefix, startcode_len); + + /* Slice data */ + memcpy(slices + vp->slices_size + startcode_len, data, size); + + *nb_slices = nb + 1; + vp->nb_slices++; + vp->slices_size += startcode_len + size; + + return 0; +} + +void ff_vk_decode_flush(AVCodecContext *avctx) +{ + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = (FFVulkanDecodeShared *)dec->shared_ref->data; + + FFVulkanFunctions *vk = &ctx->s.vkfn; + VkVideoBeginCodingInfoKHR decode_start = { + .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR, + .videoSession = ctx->common.session, + .videoSessionParameters = ctx->empty_session_params, + }; + VkVideoCodingControlInfoKHR decode_ctrl = { + .sType = VK_STRUCTURE_TYPE_VIDEO_CODING_CONTROL_INFO_KHR, + .flags = VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR, + }; + VkVideoEndCodingInfoKHR decode_end = { + .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR, + }; + + VkCommandBuffer cmd_buf; + FFVkExecContext *exec = ff_vk_exec_get(&ctx->exec_pool); + ff_vk_exec_start(&ctx->s, exec); + cmd_buf = exec->buf; + + vk->CmdBeginVideoCodingKHR(cmd_buf, &decode_start); + vk->CmdControlVideoCodingKHR(cmd_buf, &decode_ctrl); + vk->CmdEndVideoCodingKHR(cmd_buf, &decode_end); + ff_vk_exec_submit(&ctx->s, exec); +} + +int ff_vk_decode_frame(AVCodecContext *avctx, + AVFrame *pic, FFVulkanDecodePicture *vp, + AVFrame *rpic[], FFVulkanDecodePicture *rvkp[]) +{ + int err; + VkResult ret; + VkCommandBuffer cmd_buf; + FFVkVideoBuffer *sd_buf; + + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = (FFVulkanDecodeShared *)dec->shared_ref->data; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + /* Output */ + AVVkFrame *vkf = (AVVkFrame *)pic->buf[0]->data; + + /* Quirks */ + const int layered_dpb = ctx->layered_dpb; + + VkVideoSessionParametersKHR *par = (VkVideoSessionParametersKHR *)dec->session_params->data; + VkVideoBeginCodingInfoKHR decode_start = { + .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR, + .videoSession = ctx->common.session, + .videoSessionParameters = *par, + .referenceSlotCount = vp->decode_info.referenceSlotCount, + .pReferenceSlots = vp->decode_info.pReferenceSlots, + }; + VkVideoEndCodingInfoKHR decode_end = { + .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR, + }; + + VkImageMemoryBarrier2 img_bar[37]; + int nb_img_bar = 0; + size_t data_size = FFALIGN(vp->slices_size, + ctx->common.caps.minBitstreamBufferSizeAlignment); + + FFVkExecContext *exec = ff_vk_exec_get(&ctx->exec_pool); + + /* The current decoding reference has to be bound as an inactive reference */ + VkVideoReferenceSlotInfoKHR *cur_vk_ref; + cur_vk_ref = (void *)&decode_start.pReferenceSlots[decode_start.referenceSlotCount]; + cur_vk_ref[0] = vp->ref_slot; + cur_vk_ref[0].slotIndex = -1; + decode_start.referenceSlotCount++; + + if (ctx->exec_pool.nb_queries) { + int64_t prev_sub_res = 0; + ff_vk_exec_wait(&ctx->s, exec); + ret = ff_vk_exec_get_query(&ctx->s, exec, NULL, &prev_sub_res); + if (ret != VK_NOT_READY && ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to perform query: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + if (ret == VK_SUCCESS) + av_log(avctx, prev_sub_res < 0 ? AV_LOG_ERROR : AV_LOG_DEBUG, + "Result of previous frame decoding: %li\n", prev_sub_res); + } + + sd_buf = (FFVkVideoBuffer *)vp->slices_buf->data; + + /* Flush if needed */ + if (!(sd_buf->buf.flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { + VkMappedMemoryRange flush_buf = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = sd_buf->buf.mem, + .offset = 0, + .size = FFALIGN(vp->slices_size, + ctx->s.props.properties.limits.nonCoherentAtomSize), + }; + + ret = vk->FlushMappedMemoryRanges(ctx->s.hwctx->act_dev, 1, &flush_buf); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to flush memory: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + vp->decode_info.srcBuffer = sd_buf->buf.buf; + vp->decode_info.srcBufferOffset = 0; + vp->decode_info.srcBufferRange = data_size; + + /* Start command buffer recording */ + err = ff_vk_exec_start(&ctx->s, exec); + if (err < 0) + return err; + cmd_buf = exec->buf; + + /* Slices */ + err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->slices_buf, 1, 0); + if (err < 0) + return err; + vp->slices_buf = NULL; /* Owned by the exec buffer from now on */ + + /* Parameters */ + err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &dec->session_params, 1, 1); + if (err < 0) + return err; + + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, pic, + VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR); + if (err < 0) + return err; + + err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value, + pic); + if (err < 0) + return err; + + /* Output image - change layout, as it comes from a pool */ + img_bar[nb_img_bar] = (VkImageMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .pNext = NULL, + .srcStageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + .srcAccessMask = VK_ACCESS_2_NONE, + .dstAccessMask = VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR, + .oldLayout = vkf->layout[0], + .newLayout = vp->dpb_frame ? VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR : + VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR, /* Spec, 07252 utter madness */ + .srcQueueFamilyIndex = vkf->queue_family[0], + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = vkf->img[0], + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = vp->img_aspect, + .layerCount = 1, + .levelCount = 1, + }, + }; + ff_vk_exec_update_frame(&ctx->s, exec, pic, + &img_bar[nb_img_bar], &nb_img_bar); + + /* Reference for the current image, if existing and not layered */ + if (vp->dpb_frame) { + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, vp->dpb_frame, + VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR); + if (err < 0) + return err; + } + + if (!layered_dpb) { + /* All references (apart from the current) for non-layered refs */ + + for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) { + AVFrame *ref_frame = rpic[i]; + FFVulkanDecodePicture *rvp = rvkp[i]; + AVFrame *ref = rvp->dpb_frame ? rvp->dpb_frame : ref_frame; + + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ref, + VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR); + if (err < 0) + return err; + + if (err == 0) { + err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, + &rvp->sem, &rvp->sem_value, + ref); + if (err < 0) + return err; + } + + if (!rvp->dpb_frame) { + AVVkFrame *rvkf = (AVVkFrame *)ref->data[0]; + + img_bar[nb_img_bar] = (VkImageMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .pNext = NULL, + .srcStageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + .srcAccessMask = VK_ACCESS_2_NONE, + .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + .dstAccessMask = VK_ACCESS_2_VIDEO_DECODE_READ_BIT_KHR | + VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR, + .oldLayout = rvkf->layout[0], + .newLayout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR, + .srcQueueFamilyIndex = rvkf->queue_family[0], + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = rvkf->img[0], + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = rvp->img_aspect_ref, + .layerCount = 1, + .levelCount = 1, + }, + }; + ff_vk_exec_update_frame(&ctx->s, exec, ref, + &img_bar[nb_img_bar], &nb_img_bar); + } + } + } else if (vp->decode_info.referenceSlotCount || + vp->img_view_out != vp->img_view_ref) { + /* Single barrier for a single layered ref */ + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ctx->layered_frame, + VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR); + if (err < 0) + return err; + } + + /* Change image layout */ + vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + /* Start, use parameters, decode and end decoding */ + vk->CmdBeginVideoCodingKHR(cmd_buf, &decode_start); + + /* Start status query */ + if (ctx->exec_pool.nb_queries) + vk->CmdBeginQuery(cmd_buf, ctx->exec_pool.query_pool, exec->query_idx + 0, 0); + + vk->CmdDecodeVideoKHR(cmd_buf, &vp->decode_info); + + /* End status query */ + if (ctx->exec_pool.nb_queries) + vk->CmdEndQuery(cmd_buf, ctx->exec_pool.query_pool, exec->query_idx + 0); + + vk->CmdEndVideoCodingKHR(cmd_buf, &decode_end); + + /* End recording and submit for execution */ + return ff_vk_exec_submit(&ctx->s, exec); +} + +void ff_vk_decode_free_frame(AVHWDeviceContext *dev_ctx, FFVulkanDecodePicture *vp) +{ + AVVulkanDeviceContext *hwctx = dev_ctx->hwctx; + PFN_vkGetDeviceProcAddr device_proc_addr; + PFN_vkWaitSemaphores wait_semaphores; + PFN_vkDestroyImageView destroy_image_view; + + VkSemaphoreWaitInfo sem_wait = (VkSemaphoreWaitInfo) { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, + .pSemaphores = &vp->sem, + .pValues = &vp->sem_value, + .semaphoreCount = 1, + }; + + /* Guaranteed to exist */ + device_proc_addr = (PFN_vkGetDeviceProcAddr)hwctx->get_proc_addr(hwctx->inst, "vkGetDeviceProcAddr"); + destroy_image_view = (PFN_vkDestroyImageView)device_proc_addr(hwctx->act_dev, "vkDestroyImageView"); + wait_semaphores = (PFN_vkWaitSemaphores)device_proc_addr(hwctx->act_dev, "vkWaitSemaphores"); + + /* We do not have to lock the frame here because we're not interested + * in the actual current semaphore value, but only that it's later than + * the time we submitted the image for decoding. */ + if (vp->sem) + wait_semaphores(hwctx->act_dev, &sem_wait, UINT64_MAX); + + /* Free slices data */ + av_buffer_unref(&vp->slices_buf); + + /* TODO: use a pool in the decode context instead to avoid per-frame allocs. */ + av_freep(&vp->slice_off); + + /* Destroy image view (out) */ + if (vp->img_view_out && vp->img_view_out != vp->img_view_dest) + destroy_image_view(hwctx->act_dev, vp->img_view_out, hwctx->alloc); + + /* Destroy image view (ref, unlayered) */ + if (vp->img_view_dest) + destroy_image_view(hwctx->act_dev, vp->img_view_dest, hwctx->alloc); + + av_frame_free(&vp->dpb_frame); +} + +static void free_common(void *opaque, uint8_t *data) +{ + FFVulkanDecodeShared *ctx = (FFVulkanDecodeShared *)data; + FFVulkanContext *s = &ctx->s; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + /* Wait on and free execution pool */ + ff_vk_exec_pool_free(s, &ctx->exec_pool); + + /* Destroy layered view */ + if (ctx->layered_view) + vk->DestroyImageView(s->hwctx->act_dev, ctx->layered_view, s->hwctx->alloc); + + /* This also frees all references from this pool */ + av_frame_free(&ctx->layered_frame); + av_buffer_unref(&ctx->dpb_hwfc_ref); + + /* Destroy parameters */ + if (ctx->empty_session_params) + vk->DestroyVideoSessionParametersKHR(s->hwctx->act_dev, + ctx->empty_session_params, + s->hwctx->alloc); + + ff_vk_video_common_uninit(s, &ctx->common); + + vk->DestroySamplerYcbcrConversion(s->hwctx->act_dev, ctx->yuv_sampler, + s->hwctx->alloc); + + ff_vk_uninit(s); +} + +/* Since to even get decoder capabilities, we have to initialize quite a lot, + * this function does initialization and saves it to hwaccel_priv_data if + * available. */ +static int vulkan_decode_check_init(AVCodecContext *avctx, AVBufferRef *frames_ref, + int *width_align, int *height_align, + enum AVPixelFormat *pix_fmt, VkFormat *vk_fmt, + int *dpb_dedicate) +{ + VkResult ret; + int err, max_level; + const struct FFVkCodecMap *vk_codec = &ff_vk_codec_map[avctx->codec_id]; + AVHWFramesContext *frames = (AVHWFramesContext *)frames_ref->data; + AVHWDeviceContext *device = (AVHWDeviceContext *)frames->device_ref->data; + AVVulkanDeviceContext *hwctx = device->hwctx; + enum AVPixelFormat source_format; + enum AVPixelFormat best_format; + VkFormat best_vkfmt; + int base_profile, cur_profile = avctx->profile; + + int dedicated_dpb; + int layered_dpb; + + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx; + + if (!dec->shared_ref) { + ctx = av_mallocz(sizeof(*ctx)); + if (!ctx) + return AVERROR(ENOMEM); + + dec->shared_ref = av_buffer_create((uint8_t *)ctx, sizeof(*ctx), + free_common, NULL, 0); + if (!dec->shared_ref) { + av_free(ctx); + return AVERROR(ENOMEM); + } + } + + ctx = (FFVulkanDecodeShared *)dec->shared_ref->data; + + FFVulkanExtensions *extensions = &ctx->s.extensions; + FFVulkanFunctions *vk = &ctx->s.vkfn; + VkVideoCapabilitiesKHR *caps = &ctx->common.caps; + + VkVideoDecodeCapabilitiesKHR *dec_caps = &ctx->dec_caps; + VkVideoDecodeH264ProfileInfoKHR *h264_profile = &ctx->h264_profile; + VkVideoDecodeH264ProfileInfoKHR *h265_profile = &ctx->h265_profile; + VkVideoDecodeAV1ProfileInfoMESA *av1_profile = &ctx->av1_profile; + VkVideoDecodeUsageInfoKHR *usage = &ctx->usage; + VkVideoProfileInfoKHR *profile = &ctx->profile; + VkVideoProfileListInfoKHR *profile_list = &ctx->profile_list; + + VkPhysicalDeviceVideoFormatInfoKHR fmt_info = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_FORMAT_INFO_KHR, + .pNext = profile_list, + }; + VkVideoDecodeH264CapabilitiesKHR h264_caps = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_CAPABILITIES_KHR, + }; + VkVideoDecodeH265CapabilitiesKHR h265_caps = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_CAPABILITIES_KHR, + }; + VkVideoDecodeAV1CapabilitiesMESA av1_caps = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_CAPABILITIES_MESA, + }; + VkVideoFormatPropertiesKHR *ret_info; + uint32_t nb_out_fmts = 0; + + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); + if (!desc) + return AVERROR(EINVAL); + + if (ctx->init) + return 0; + + if (!vk_codec->decode_op) + return AVERROR(EINVAL); + + *extensions = ff_vk_extensions_to_mask(hwctx->enabled_dev_extensions, + hwctx->nb_enabled_dev_extensions); + + if (!(*extensions & FF_VK_EXT_VIDEO_DECODE_QUEUE)) { + av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n", + VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME); + return AVERROR(ENOSYS); + } else if (!vk_codec->decode_extension) { + av_log(avctx, AV_LOG_ERROR, "Unsupported codec for Vulkan decoding: %s!\n", + avcodec_get_name(avctx->codec_id)); + return AVERROR(ENOSYS); + } else if (!(vk_codec->decode_extension & *extensions)) { + av_log(avctx, AV_LOG_ERROR, "Device does not support decoding %s!\n", + avcodec_get_name(avctx->codec_id)); + return AVERROR(ENOSYS); + } + + err = ff_vk_load_functions(device, vk, *extensions, 1, 1); + if (err < 0) + return err; + +repeat: + if (avctx->codec_id == AV_CODEC_ID_H264) { + base_profile = FF_PROFILE_H264_CONSTRAINED_BASELINE; + dec_caps->pNext = &h264_caps; + usage->pNext = h264_profile; + h264_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PROFILE_INFO_KHR; + h264_profile->stdProfileIdc = cur_profile; + h264_profile->pictureLayout = avctx->field_order == AV_FIELD_UNKNOWN || + avctx->field_order == AV_FIELD_PROGRESSIVE ? + VK_VIDEO_DECODE_H264_PICTURE_LAYOUT_PROGRESSIVE_KHR : + VK_VIDEO_DECODE_H264_PICTURE_LAYOUT_INTERLACED_INTERLEAVED_LINES_BIT_KHR; + } else if (avctx->codec_id == AV_CODEC_ID_H265) { + base_profile = FF_PROFILE_HEVC_MAIN; + dec_caps->pNext = &h265_caps; + usage->pNext = h265_profile; + h265_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_INFO_KHR; + h265_profile->stdProfileIdc = cur_profile; + } else if (avctx->codec_id == AV_CODEC_ID_AV1) { + base_profile = STD_VIDEO_AV1_MESA_PROFILE_MAIN; + dec_caps->pNext = &av1_caps; + usage->pNext = av1_profile; + av1_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_PROFILE_INFO_MESA; + av1_profile->stdProfileIdc = cur_profile; + } + + usage->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_USAGE_INFO_KHR; + usage->videoUsageHints = VK_VIDEO_DECODE_USAGE_DEFAULT_KHR; + + profile->sType = VK_STRUCTURE_TYPE_VIDEO_PROFILE_INFO_KHR; + profile->pNext = usage; + profile->videoCodecOperation = vk_codec->decode_op; + profile->chromaSubsampling = ff_vk_subsampling_from_av_desc(desc); + profile->lumaBitDepth = ff_vk_depth_from_av_depth(desc->comp[0].depth); + profile->chromaBitDepth = profile->lumaBitDepth; + + profile_list->sType = VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR; + profile_list->profileCount = 1; + profile_list->pProfiles = profile; + + /* Get the capabilities of the decoder for the given profile */ + caps->sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR; + caps->pNext = dec_caps; + dec_caps->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_CAPABILITIES_KHR; + /* dec_caps->pNext already filled in */ + + ret = vk->GetPhysicalDeviceVideoCapabilitiesKHR(hwctx->phys_dev, profile, + caps); + if (ret == VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR && + avctx->flags & AV_HWACCEL_FLAG_ALLOW_PROFILE_MISMATCH && + cur_profile != base_profile) { + cur_profile = base_profile; + av_log(avctx, AV_LOG_VERBOSE, "%s profile %s not supported, attempting " + "again with profile %s\n", + avcodec_get_name(avctx->codec_id), + avcodec_profile_name(avctx->codec_id, avctx->profile), + avcodec_profile_name(avctx->codec_id, base_profile)); + goto repeat; + } else if (ret == VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR) { + av_log(avctx, AV_LOG_VERBOSE, "Unable to initialize video session: " + "%s profile \"%s\" not supported!\n", + avcodec_get_name(avctx->codec_id), + avcodec_profile_name(avctx->codec_id, cur_profile)); + return AVERROR(EINVAL); + } else if (ret == VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR) { + av_log(avctx, AV_LOG_VERBOSE, "Unable to initialize video session: " + "format (%s) not supported!\n", + av_get_pix_fmt_name(avctx->sw_pix_fmt)); + return AVERROR(EINVAL); + } else if (ret == VK_ERROR_FEATURE_NOT_PRESENT || + ret == VK_ERROR_FORMAT_NOT_SUPPORTED) { + return AVERROR(EINVAL); + } else if (ret != VK_SUCCESS) { + return AVERROR_EXTERNAL; + } + + max_level = avctx->codec_id == AV_CODEC_ID_H264 ? h264_caps.maxLevelIdc : + avctx->codec_id == AV_CODEC_ID_H265 ? h265_caps.maxLevelIdc : + avctx->codec_id == AV_CODEC_ID_AV1 ? av1_caps.maxLevelIdc : + 0; + + if (ctx) { + av_log(avctx, AV_LOG_VERBOSE, "Decoder capabilities for %s profile \"%s\":\n", + avcodec_get_name(avctx->codec_id), + avcodec_profile_name(avctx->codec_id, avctx->profile)); + av_log(avctx, AV_LOG_VERBOSE, " Maximum level: %i\n", + max_level); + av_log(avctx, AV_LOG_VERBOSE, " Width: from %i to %i\n", + caps->minCodedExtent.width, caps->maxCodedExtent.width); + av_log(avctx, AV_LOG_VERBOSE, " Height: from %i to %i\n", + caps->minCodedExtent.height, caps->maxCodedExtent.height); + av_log(avctx, AV_LOG_VERBOSE, " Width alignment: %i\n", + caps->pictureAccessGranularity.width); + av_log(avctx, AV_LOG_VERBOSE, " Height alignment: %i\n", + caps->pictureAccessGranularity.height); + av_log(avctx, AV_LOG_VERBOSE, " Bitstream offset alignment: %"PRIu64"\n", + caps->minBitstreamBufferOffsetAlignment); + av_log(avctx, AV_LOG_VERBOSE, " Bitstream size alignment: %"PRIu64"\n", + caps->minBitstreamBufferSizeAlignment); + av_log(avctx, AV_LOG_VERBOSE, " Maximum references: %u\n", + caps->maxDpbSlots); + av_log(avctx, AV_LOG_VERBOSE, " Maximum active references: %u\n", + caps->maxActiveReferencePictures); + av_log(avctx, AV_LOG_VERBOSE, " Codec header version: %i.%i.%i (driver), %i.%i.%i (compiled)\n", + CODEC_VER(caps->stdHeaderVersion.specVersion), + CODEC_VER(dec_ext[avctx->codec_id]->specVersion)); + av_log(avctx, AV_LOG_VERBOSE, " Decode modes:%s%s%s\n", + dec_caps->flags ? "" : + " invalid", + dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR ? + " reuse_dst_dpb" : "", + dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR ? + " dedicated_dpb" : ""); + av_log(avctx, AV_LOG_VERBOSE, " Capability flags:%s%s%s\n", + caps->flags ? "" : + " none", + caps->flags & VK_VIDEO_CAPABILITY_PROTECTED_CONTENT_BIT_KHR ? + " protected" : "", + caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR ? + " separate_references" : ""); + } + + /* Check if decoding is possible with the given parameters */ + if (avctx->coded_width < caps->minCodedExtent.width || + avctx->coded_height < caps->minCodedExtent.height || + avctx->coded_width > caps->maxCodedExtent.width || + avctx->coded_height > caps->maxCodedExtent.height) + return AVERROR(EINVAL); + + if (!(avctx->hwaccel_flags & AV_HWACCEL_FLAG_IGNORE_LEVEL) && + avctx->level > max_level) + return AVERROR(EINVAL); + + /* Some basic sanity checking */ + if (!(dec_caps->flags & (VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR | + VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR))) { + av_log(avctx, AV_LOG_ERROR, "Buggy driver signals invalid decoding mode: neither " + "VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR nor " + "VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR are set!\n"); + return AVERROR_EXTERNAL; + } else if ((dec_caps->flags & (VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR | + VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR) == + VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR) && + !(caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR)) { + av_log(avctx, AV_LOG_ERROR, "Cannot initialize Vulkan decoding session, buggy driver: " + "VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR set " + "but VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR is unset!\n"); + return AVERROR_EXTERNAL; + } else if (!(dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR) && + avctx->codec_id == AV_CODEC_ID_AV1) { + av_log(avctx, AV_LOG_ERROR, "Cannot initialize Vulkan decoding session, buggy driver: " + "codec is AV1, but VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR isn't set!\n"); + return AVERROR_EXTERNAL; + } + + /* TODO: make dedicated_dpb tunable */ + dedicated_dpb = !(dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR); + layered_dpb = !(caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR); + + if (ctx) { + ctx->dedicated_dpb = dedicated_dpb; + ctx->layered_dpb = layered_dpb; + ctx->external_fg = av1_caps.flags & VK_VIDEO_DECODE_AV1_CAPABILITY_EXTERNAL_FILM_GRAIN_MESA; + ctx->init = 1; + } + + if (!pix_fmt) + return 0; + + if (dedicated_dpb) { + fmt_info.imageUsage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR; + } else { + fmt_info.imageUsage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | + VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT; + } + + /* Get the format of the images necessary */ + ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(hwctx->phys_dev, + &fmt_info, + &nb_out_fmts, NULL); + if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED || + (!nb_out_fmts && ret == VK_SUCCESS)) { + return AVERROR(EINVAL); + } else if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + ret_info = av_mallocz(sizeof(*ret_info)*nb_out_fmts); + if (!ret_info) + return AVERROR(ENOMEM); + + for (int i = 0; i < nb_out_fmts; i++) + ret_info[i].sType = VK_STRUCTURE_TYPE_VIDEO_FORMAT_PROPERTIES_KHR; + + ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(hwctx->phys_dev, + &fmt_info, + &nb_out_fmts, ret_info); + if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED || + (!nb_out_fmts && ret == VK_SUCCESS)) { + av_free(ret_info); + return AVERROR(EINVAL); + } else if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n", + ff_vk_ret2str(ret)); + av_free(ret_info); + return AVERROR_EXTERNAL; + } + + /* Find a format to use */ + *pix_fmt = best_format = AV_PIX_FMT_NONE; + *vk_fmt = best_vkfmt = VK_FORMAT_UNDEFINED; + source_format = avctx->sw_pix_fmt; + + av_log(avctx, AV_LOG_DEBUG, "Choosing best pixel format for decoding from %i:\n", nb_out_fmts); + for (int i = 0; i < nb_out_fmts; i++) { + enum AVPixelFormat tmp = ff_vk_pix_fmt_from_vkfmt(ret_info[i].format); + if (tmp == AV_PIX_FMT_NONE) { + av_log(avctx, AV_LOG_WARNING, "Invalid/unknown Vulkan format %i!\n", ret_info[i].format); + continue; + } + + best_format = av_find_best_pix_fmt_of_2(tmp, best_format, source_format, 0, NULL); + if (tmp == best_format) + best_vkfmt = ret_info[i].format; + + av_log(avctx, AV_LOG_DEBUG, " %s%s (Vulkan ID: %i)\n", + av_get_pix_fmt_name(tmp), tmp == best_format ? "*" : "", + ret_info[i].format); + } + + av_free(ret_info); + + if (best_format == AV_PIX_FMT_NONE) { + av_log(avctx, AV_LOG_ERROR, "No valid/compatible pixel format found for decoding!\n"); + return AVERROR(EINVAL); + } else { + av_log(avctx, AV_LOG_VERBOSE, "Chosen frame pixfmt: %s (Vulkan ID: %i)\n", + av_get_pix_fmt_name(best_format), best_vkfmt); + } + + *pix_fmt = best_format; + *vk_fmt = best_vkfmt; + + *width_align = caps->pictureAccessGranularity.width; + *height_align = caps->pictureAccessGranularity.height; + *dpb_dedicate = dedicated_dpb; + + return 0; +} + +int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx) +{ + VkFormat vkfmt; + int err, width_align, height_align, dedicated_dpb; + AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data; + AVVulkanFramesContext *hwfc = frames_ctx->hwctx; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx; + + frames_ctx->sw_format = AV_PIX_FMT_NONE; + + err = vulkan_decode_check_init(avctx, hw_frames_ctx, + &width_align, &height_align, + &frames_ctx->sw_format, &vkfmt, + &dedicated_dpb); + if (err < 0) + return err; + + ctx = (FFVulkanDecodeShared *)dec->shared_ref->data; + + frames_ctx->width = FFALIGN(avctx->coded_width, width_align); + frames_ctx->height = FFALIGN(avctx->coded_height, height_align); + frames_ctx->format = AV_PIX_FMT_VULKAN; + + hwfc->format[0] = vkfmt; + hwfc->create_pnext = &ctx->profile_list; + hwfc->tiling = VK_IMAGE_TILING_OPTIMAL; + hwfc->usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR; + + if (!dedicated_dpb) + hwfc->usage |= VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR; + + return err; +} + +void ff_vk_decode_free_params(void *opaque, uint8_t *data) +{ + FFVulkanDecodeShared *ctx = opaque; + FFVulkanFunctions *vk = &ctx->s.vkfn; + VkVideoSessionParametersKHR *par = (VkVideoSessionParametersKHR *)data; + vk->DestroyVideoSessionParametersKHR(ctx->s.hwctx->act_dev, *par, + ctx->s.hwctx->alloc); + av_free(par); +} + +int ff_vk_decode_uninit(AVCodecContext *avctx) +{ + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + av_buffer_pool_uninit(&dec->tmp_pool); + av_buffer_unref(&dec->session_params); + av_buffer_unref(&dec->shared_ref); + return 0; +} + +int ff_vk_decode_init(AVCodecContext *avctx) +{ + int err, qf, cxpos = 0, cypos = 0, nb_q = 0; + VkResult ret; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx; + FFVulkanContext *s; + FFVulkanFunctions *vk; + const VkVideoProfileListInfoKHR *profile_list; + FFVkQueueFamilyCtx qf_dec; + + VkVideoDecodeH264SessionParametersCreateInfoKHR h264_params = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_CREATE_INFO_KHR, + }; + VkVideoDecodeH265SessionParametersCreateInfoKHR h265_params = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_CREATE_INFO_KHR, + }; + VkVideoDecodeAV1SessionParametersCreateInfoMESA av1_params = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_SESSION_PARAMETERS_CREATE_INFO_MESA, + }; + VkVideoSessionParametersCreateInfoKHR session_params_create = { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pNext = avctx->codec_id == AV_CODEC_ID_H264 ? (void *)&h264_params : + avctx->codec_id == AV_CODEC_ID_HEVC ? (void *)&h265_params : + avctx->codec_id == AV_CODEC_ID_AV1 ? (void *)&av1_params : + NULL, + }; + VkVideoSessionCreateInfoKHR session_create = { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_CREATE_INFO_KHR, + }; + VkSamplerYcbcrConversionCreateInfo yuv_sampler_info = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO, + .components = ff_comp_identity_map, + .ycbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY, + .ycbcrRange = avctx->color_range == AVCOL_RANGE_MPEG, /* Ignored */ + }; + + err = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_VULKAN); + if (err < 0) + return err; + + /* Get parameters, capabilities and final pixel/vulkan format */ + err = vulkan_decode_check_init(avctx, avctx->hw_frames_ctx, + NULL, NULL, NULL, NULL, NULL); + if (err < 0) + goto fail; + + ctx = (FFVulkanDecodeShared *)dec->shared_ref->data; + s = &ctx->s; + vk = &ctx->s.vkfn; + + s->frames_ref = av_buffer_ref(avctx->hw_frames_ctx); + s->frames = (AVHWFramesContext *)s->frames_ref->data; + s->hwfc = s->frames->hwctx; + + s->device = (AVHWDeviceContext *)s->frames->device_ref->data; + s->hwctx = s->device->hwctx; + + /* Load all properties */ + err = ff_vk_load_props(s); + if (err < 0) + goto fail; + + /* Create queue context */ + qf = ff_vk_qf_init(s, &qf_dec, VK_QUEUE_VIDEO_DECODE_BIT_KHR); + + /* Check for support */ + if (!(s->video_props[qf].videoCodecOperations & + ff_vk_codec_map[avctx->codec_id].decode_op)) { + av_log(avctx, AV_LOG_ERROR, "Decoding %s not supported on the given " + "queue family %i!\n", avcodec_get_name(avctx->codec_id), qf); + return AVERROR(EINVAL); + } + + /* Enable queries if supported */ + if (s->query_props[qf].queryResultStatusSupport) + nb_q = 1; + + profile_list = ff_vk_find_struct(s->hwfc->create_pnext, + VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR); + + session_create.flags = 0x0; + session_create.queueFamilyIndex = s->hwctx->queue_family_decode_index; + session_create.maxCodedExtent = ctx->common.caps.maxCodedExtent; + session_create.maxDpbSlots = ctx->common.caps.maxDpbSlots; + session_create.maxActiveReferencePictures = ctx->common.caps.maxActiveReferencePictures; + session_create.pictureFormat = s->hwfc->format[0]; + session_create.referencePictureFormat = session_create.pictureFormat; + session_create.pStdHeaderVersion = dec_ext[avctx->codec_id]; + session_create.pVideoProfile = &profile_list->pProfiles[0]; + + /* Create decode exec context. + * 4 async contexts per thread seems like a good number. */ + err = ff_vk_exec_pool_init(s, &qf_dec, &ctx->exec_pool, 4*avctx->thread_count, + nb_q, VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR, 0, + session_create.pVideoProfile); + if (err < 0) + goto fail; + + err = ff_vk_video_common_init(avctx, s, &ctx->common, &session_create); + if (err < 0) + goto fail; + + /* Get sampler */ + av_chroma_location_enum_to_pos(&cxpos, &cypos, avctx->chroma_sample_location); + yuv_sampler_info.xChromaOffset = cxpos >> 7; + yuv_sampler_info.yChromaOffset = cypos >> 7; + yuv_sampler_info.format = s->hwfc->format[0]; + ret = vk->CreateSamplerYcbcrConversion(s->hwctx->act_dev, &yuv_sampler_info, + s->hwctx->alloc, &ctx->yuv_sampler); + if (ret != VK_SUCCESS) { + err = AVERROR_EXTERNAL; + goto fail; + } + + /* If doing an out-of-place decoding, create a DPB pool */ + if (ctx->dedicated_dpb || avctx->codec_id == AV_CODEC_ID_AV1) { + AVHWFramesContext *dpb_frames; + AVVulkanFramesContext *dpb_hwfc; + + ctx->dpb_hwfc_ref = av_hwframe_ctx_alloc(s->frames->device_ref); + if (!ctx->dpb_hwfc_ref) { + err = AVERROR(ENOMEM); + goto fail; + } + + dpb_frames = (AVHWFramesContext *)ctx->dpb_hwfc_ref->data; + dpb_frames->format = s->frames->format; + dpb_frames->sw_format = s->frames->sw_format; + dpb_frames->width = s->frames->width; + dpb_frames->height = s->frames->height; + + dpb_hwfc = dpb_frames->hwctx; + dpb_hwfc->create_pnext = (void *)profile_list; + dpb_hwfc->format[0] = s->hwfc->format[0]; + dpb_hwfc->tiling = VK_IMAGE_TILING_OPTIMAL; + dpb_hwfc->usage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | + VK_IMAGE_USAGE_SAMPLED_BIT; /* Shuts validator up. */ + + if (ctx->layered_dpb) + dpb_hwfc->nb_layers = ctx->common.caps.maxDpbSlots; + + err = av_hwframe_ctx_init(ctx->dpb_hwfc_ref); + if (err < 0) + goto fail; + + if (ctx->layered_dpb) { + ctx->layered_frame = vk_get_dpb_pool(ctx); + if (!ctx->layered_frame) { + err = AVERROR(ENOMEM); + goto fail; + } + + err = vk_decode_create_view(ctx, &ctx->layered_view, &ctx->layered_aspect, + (AVVkFrame *)ctx->layered_frame->data[0], + s->hwfc->format[0]); + if (err < 0) + goto fail; + } + } + + session_params_create.videoSession = ctx->common.session; + ret = vk->CreateVideoSessionParametersKHR(s->hwctx->act_dev, &session_params_create, + s->hwctx->alloc, &ctx->empty_session_params); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to create empty Vulkan video session parameters: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + ff_vk_decode_flush(avctx); + + av_log(avctx, AV_LOG_VERBOSE, "Vulkan decoder initialization sucessful\n"); + + return 0; + +fail: + ff_vk_decode_uninit(avctx); + + return err; +} diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h new file mode 100644 index 0000000000000..b6472311e43eb --- /dev/null +++ b/libavcodec/vulkan_decode.h @@ -0,0 +1,177 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VULKAN_DECODE_H +#define AVCODEC_VULKAN_DECODE_H + +#include "decode.h" +#include "hwconfig.h" +#include "internal.h" + +#include "vulkan_video.h" + +typedef struct FFVulkanDecodeShared { + FFVulkanContext s; + FFVkVideoCommon common; + FFVkExecPool exec_pool; + + int dedicated_dpb; /* Oddity #1 - separate DPB images */ + int layered_dpb; /* Madness #1 - layered DPB images */ + int external_fg; /* Oddity #2 - hardware can't apply film grain */ + + AVBufferRef *dpb_hwfc_ref; /* Only used for dedicated_dpb */ + + AVFrame *layered_frame; /* Only used for layered_dpb */ + VkImageView layered_view; + VkImageAspectFlags layered_aspect; + + VkVideoSessionParametersKHR empty_session_params; + + VkVideoDecodeCapabilitiesKHR dec_caps; + VkVideoDecodeH264ProfileInfoKHR h264_profile; + VkVideoDecodeH264ProfileInfoKHR h265_profile; + VkVideoDecodeAV1ProfileInfoMESA av1_profile; + VkVideoDecodeUsageInfoKHR usage; + VkVideoProfileInfoKHR profile; + VkVideoProfileListInfoKHR profile_list; + + VkSamplerYcbcrConversion yuv_sampler; + int init; +} FFVulkanDecodeShared; + +typedef struct FFVulkanDecodeContext { + AVBufferRef *shared_ref; + + /* Thread-local state below */ + AVBufferPool *tmp_pool; /* Pool for temporary data, if needed (HEVC) */ + size_t tmp_pool_ele_size; + + /* Thread-synchronized data below */ + AVBufferRef *session_params; + int params_changed; + uint32_t frame_id_alloc_mask; /* For AV1 only */ +} FFVulkanDecodeContext; + +typedef struct FFVulkanDecodePicture { + AVFrame *dpb_frame; /* Only used for out-of-place decoding. */ + + VkImageView img_view_ref; /* Image representation view (reference) */ + VkImageView img_view_out; /* Image representation view (output-only) */ + VkImageView img_view_dest; /* Set to img_view_out if no layered refs are used */ + VkImageAspectFlags img_aspect; /* Image plane mask bits */ + VkImageAspectFlags img_aspect_ref; /* Only used for out-of-place decoding */ + + VkSemaphore sem; + uint64_t sem_value; + + /* State */ + int update_params; + AVBufferRef *session_params; + + /* Current picture */ + VkVideoPictureResourceInfoKHR ref; + VkVideoReferenceSlotInfoKHR ref_slot; + + /* Picture refs. H264 has the maximum number of refs (36) of any supported codec. */ + VkVideoPictureResourceInfoKHR refs [36]; + VkVideoReferenceSlotInfoKHR ref_slots[36]; + + /* Main decoding struct */ + AVBufferRef *params_buf; + VkVideoDecodeInfoKHR decode_info; + + /* Slice data */ + AVBufferRef *slices_buf; + size_t slices_size; + uint32_t *slice_off; + unsigned int slice_off_max; + uint32_t nb_slices; +} FFVulkanDecodePicture; + +/** + * Initialize decoder. + */ +int ff_vk_decode_init(AVCodecContext *avctx); + +/** + * Synchronize the contexts between 2 threads. + */ +int ff_vk_update_thread_context(AVCodecContext *dst, const AVCodecContext *src); + +/** + * Initialize hw_frames_ctx with the parameters needed to decode the stream + * using the parameters from avctx. + * + * NOTE: if avctx->internal->hwaccel_priv_data exists, will partially initialize + * the context. + */ +int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx); + +/** + * Sets FFVulkanDecodeContext.params_changed to 1. + */ +int ff_vk_params_changed(AVCodecContext *avctx, int t, const uint8_t *b, uint32_t s); + +/** + * Prepare a frame, creates the image view, and sets up the dpb fields. + */ +int ff_vk_decode_prepare_frame(FFVulkanDecodeShared *ctx, AVFrame *pic, + FFVulkanDecodePicture *vkpic, int is_current, + int alloc_dpb); + +/** + * Add slice data to frame. + */ +int ff_vk_decode_add_slice(AVCodecContext *avctx, FFVulkanDecodePicture *vp, + const uint8_t *data, size_t size, int add_startcode, + uint32_t *nb_slices, const uint32_t **offsets); + +/** + * Decode a frame. + */ +int ff_vk_decode_frame(AVCodecContext *avctx, + AVFrame *pic, FFVulkanDecodePicture *vp, + AVFrame *rpic[], FFVulkanDecodePicture *rvkp[]); + +/** + * Free a frame and its state. + */ +void ff_vk_decode_free_frame(AVHWDeviceContext *dev_ctx, FFVulkanDecodePicture *vp); + +/** + * Get an FFVkBuffer suitable for decoding from. + */ +int ff_vk_get_decode_buffer(FFVulkanDecodeContext *ctx, AVBufferRef **buf, + void *create_pNext, size_t size); + +/** + * Free VkVideoSessionParametersKHR. + */ +void ff_vk_decode_free_params(void *opaque, uint8_t *data); + +/** + * Flush decoder. + */ +void ff_vk_decode_flush(AVCodecContext *avctx); + +/** + * Free decoder. + */ +int ff_vk_decode_uninit(AVCodecContext *avctx); + +#endif /* AVCODEC_VULKAN_DECODE_H */ diff --git a/libavcodec/vulkan_encode.c b/libavcodec/vulkan_encode.c new file mode 100644 index 0000000000000..c832fbf4e7539 --- /dev/null +++ b/libavcodec/vulkan_encode.c @@ -0,0 +1,989 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "vulkan_encode.h" + +#include // TODO: REMOVE WHEN THE HEADERS BUG IS FIXED + +const VkExtensionProperties ff_vk_enc_ext[AV_CODEC_ID_FIRST_AUDIO] = { + [AV_CODEC_ID_H264] = (VkExtensionProperties) { + .extensionName = VK_STD_VULKAN_VIDEO_CODEC_H264_ENCODE_EXTENSION_NAME, + .specVersion = VK_STD_VULKAN_VIDEO_CODEC_H264_ENCODE_SPEC_VERSION, + }, + [AV_CODEC_ID_HEVC] = (VkExtensionProperties) { + .extensionName = VK_STD_VULKAN_VIDEO_CODEC_H265_ENCODE_EXTENSION_NAME, + .specVersion = VK_STD_VULKAN_VIDEO_CODEC_H265_ENCODE_SPEC_VERSION, + }, +}; + +const AVCodecHWConfigInternal *const ff_vulkan_encode_hw_configs[] = { + HW_CONFIG_ENCODER_FRAMES(VULKAN, VULKAN), + NULL, +}; + +av_cold void ff_vulkan_encode_uninit(FFVulkanEncodeContext *ctx) +{ + FFVulkanContext *s = &ctx->s; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + /* Wait on and free execution pool */ + ff_vk_exec_pool_free(s, &ctx->enc_pool); + + /* This also frees all references from this pool */ + av_frame_free(&ctx->layered_frame); + av_buffer_unref(&ctx->dpb_hwfc_ref); + + /* Destroy YUV sampler */ + if (ctx->yuv_sampler) + vk->DestroySamplerYcbcrConversion(s->hwctx->act_dev, ctx->yuv_sampler, + s->hwctx->alloc); + + /* Destroy parameters */ + if (ctx->session_params) + vk->DestroyVideoSessionParametersKHR(s->hwctx->act_dev, ctx->session_params, + s->hwctx->alloc); + + ff_vk_video_common_uninit(s, &ctx->common); + + ff_vk_uninit(s); + + av_free(ctx->pic); +} + +int ff_vk_encode_create_view(FFVulkanEncodeContext *ctx, VkImageView *dst_view, + VkImageAspectFlags *aspect, AVVkFrame *src, int layer) +{ + VkResult ret; + FFVulkanFunctions *vk = &ctx->s.vkfn; + VkImageAspectFlags aspect_mask = ff_vk_aspect_bits_from_vkfmt(ctx->pic_format); + + VkSamplerYcbcrConversionInfo yuv_sampler_info = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO, + .conversion = ctx->yuv_sampler, + }; + VkImageViewCreateInfo img_view_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = &yuv_sampler_info, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = ctx->pic_format, + .image = src->img[0], + .components = (VkComponentMapping) { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseArrayLayer = layer, + .layerCount = 1, + .levelCount = 1, + }, + }; + + ret = vk->CreateImageView(ctx->s.hwctx->act_dev, &img_view_create_info, + ctx->s.hwctx->alloc, dst_view); + if (ret != VK_SUCCESS) + return AVERROR_EXTERNAL; + + *aspect = aspect_mask; + + return 0; +} + +static AVFrame *vk_get_dpb_pool(FFVulkanEncodeContext *ctx) +{ + AVFrame *avf = av_frame_alloc(); + AVHWFramesContext *dpb_frames = (AVHWFramesContext *)ctx->dpb_hwfc_ref->data; + if (!avf) + return NULL; + + avf->hw_frames_ctx = av_buffer_ref(ctx->dpb_hwfc_ref); + if (!avf->hw_frames_ctx) + av_frame_free(&avf); + avf->buf[0] = av_buffer_pool_get(dpb_frames->pool); + if (!avf->buf[0]) + av_frame_free(&avf); + avf->data[0] = avf->buf[0]->data; + + return avf; +} + +av_cold int ff_vulkan_encode_init(AVCodecContext *avctx, FFVulkanEncodeContext *ctx, + void *codec_profile, void *caps, + const FFVulkanEncoder *enc, + int output_delay, int decode_delay) +{ + int i, err, qf; + VkResult ret; + int cxpos = 0, cypos = 0; + FFVulkanFunctions *vk = &ctx->s.vkfn; + FFVulkanContext *s = &ctx->s; + FFVulkanExtensions extensions; + const struct FFVkCodecMap *vk_codec = &ff_vk_codec_map[avctx->codec_id]; + const AVPixFmtDescriptor *desc; + + AVHWFramesContext *dpb_frames; + AVVulkanFramesContext *dpb_hwfc; + + VkVideoFormatPropertiesKHR *ret_info; + uint32_t nb_out_fmts = 0; + + VkQueryPoolVideoEncodeFeedbackCreateInfoKHR query_create = { + .sType = VK_STRUCTURE_TYPE_QUERY_POOL_VIDEO_ENCODE_FEEDBACK_CREATE_INFO_KHR, + .encodeFeedbackFlags = VK_VIDEO_ENCODE_FEEDBACK_BITSTREAM_BUFFER_OFFSET_BIT_KHR | + VK_VIDEO_ENCODE_FEEDBACK_BITSTREAM_BYTES_WRITTEN_BIT_KHR, + }; + VkVideoSessionCreateInfoKHR session_create = { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_CREATE_INFO_KHR, + }; + VkPhysicalDeviceVideoFormatInfoKHR fmt_info = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_FORMAT_INFO_KHR, + .pNext = &ctx->profile_list, + }; + VkSamplerYcbcrConversionCreateInfo yuv_sampler_info = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO, + .components = ff_comp_identity_map, + .ycbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY, + .ycbcrRange = avctx->color_range == AVCOL_RANGE_MPEG, /* Ignored */ + }; + + if (!avctx->hw_frames_ctx) { + av_log(avctx, AV_LOG_ERROR, "A hardware frames reference is " + "required to associate the encoding device.\n"); + return AVERROR(EINVAL); + } + + s->frames_ref = av_buffer_ref(avctx->hw_frames_ctx); + s->frames = (AVHWFramesContext *)s->frames_ref->data; + s->hwfc = s->frames->hwctx; + + s->device = (AVHWDeviceContext *)s->frames->device_ref->data; + s->hwctx = s->device->hwctx; + + desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); + if (!desc) + return AVERROR(EINVAL); + + ctx->enc = enc; + ctx->output_delay = output_delay; + ctx->decode_delay = decode_delay; + + extensions = ff_vk_extensions_to_mask(s->hwctx->enabled_dev_extensions, + s->hwctx->nb_enabled_dev_extensions); + + if (!(extensions & FF_VK_EXT_VIDEO_ENCODE_QUEUE)) { + av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n", + VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME); + return AVERROR(ENOSYS); + } else if (!(vk_codec->encode_extension & extensions)) { + av_log(avctx, AV_LOG_ERROR, "Device does not support decoding %s!\n", + avcodec_get_name(avctx->codec_id)); + return AVERROR(ENOSYS); + } + + /* Load functions */ + err = ff_vk_load_functions(s->device, vk, extensions, 1, 1); + if (err < 0) + return err; + + /* Load all properties */ + err = ff_vk_load_props(s); + if (err < 0) + return err; + + /* Create queue context */ + qf = ff_vk_qf_init(s, &ctx->qf_enc, VK_QUEUE_VIDEO_ENCODE_BIT_KHR); + + /* Check for support */ + if (!(s->video_props[qf].videoCodecOperations & vk_codec->encode_op)) { + av_log(avctx, AV_LOG_ERROR, "Encoding %s not supported on the given " + "queue family %i!\n", avcodec_get_name(avctx->codec_id), qf); + return AVERROR(EINVAL); + } + + /* Set tuning */ + ctx->usage_info = (VkVideoEncodeUsageInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_USAGE_INFO_KHR, + .pNext = codec_profile, + .videoUsageHints = ctx->opts.usage, + .videoContentHints = ctx->opts.content, + .tuningMode = ctx->opts.tune, + }; + + /* Load up the profile now, we need it to create a query pool */ + ctx->profile.sType = VK_STRUCTURE_TYPE_VIDEO_PROFILE_INFO_KHR; + ctx->profile.pNext = &ctx->usage_info; + ctx->profile.videoCodecOperation = vk_codec->encode_op; + ctx->profile.chromaSubsampling = ff_vk_subsampling_from_av_desc(desc); + ctx->profile.lumaBitDepth = ff_vk_depth_from_av_depth(desc->comp[0].depth); + ctx->profile.chromaBitDepth = ctx->profile.lumaBitDepth; + + query_create.pNext = &ctx->profile; + + /* Create command and query pool. + * 18.12. Video Encode Bitstream Buffer Range Queries: + * Two values are written, the buffer offset, and the number of bytes written. */ + err = ff_vk_exec_pool_init(s, &ctx->qf_enc, &ctx->enc_pool, 1, + 1, VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR, 0, + &query_create); + if (err < 0) + return err; + + ctx->profile_list.sType = VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR; + ctx->profile_list.profileCount = 1; + ctx->profile_list.pProfiles = &ctx->profile; + + /* Get the capabilities of the decoder for the given profile */ + ctx->common.caps.sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR; + ctx->common.caps.pNext = &ctx->enc_caps; + ctx->enc_caps.sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_CAPABILITIES_KHR; + ctx->enc_caps.pNext = caps; + + ret = vk->GetPhysicalDeviceVideoCapabilitiesKHR(s->hwctx->phys_dev, + &ctx->profile, + &ctx->common.caps); + if (ret == VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize encoding: " + "%s profile \"%s\" not supported!\n", + avcodec_get_name(avctx->codec_id), + avcodec_profile_name(avctx->codec_id, ctx->opts.profile)); + return AVERROR(EINVAL); + } else if (ret == VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize encoding: " + "format (%s) not supported!\n", + av_get_pix_fmt_name(avctx->sw_pix_fmt)); + return AVERROR(EINVAL); + } else if (ret == VK_ERROR_FEATURE_NOT_PRESENT || + ret == VK_ERROR_FORMAT_NOT_SUPPORTED) { + return AVERROR(EINVAL); + } else if (ret != VK_SUCCESS) { + return AVERROR_EXTERNAL; + } + + av_log(avctx, AV_LOG_VERBOSE, "encoder capabilities for %s profile \"%s\":\n", + avcodec_get_name(avctx->codec_id), + avcodec_profile_name(avctx->codec_id, ctx->opts.profile)); + av_log(avctx, AV_LOG_VERBOSE, " Width: from %i to %i\n", + ctx->common.caps.minCodedExtent.width, ctx->common.caps.maxCodedExtent.width); + av_log(avctx, AV_LOG_VERBOSE, " Height: from %i to %i\n", + ctx->common.caps.minCodedExtent.height, ctx->common.caps.maxCodedExtent.height); + av_log(avctx, AV_LOG_VERBOSE, " Width alignment: %i\n", + ctx->common.caps.pictureAccessGranularity.width); + av_log(avctx, AV_LOG_VERBOSE, " Height alignment: %i\n", + ctx->common.caps.pictureAccessGranularity.height); + av_log(avctx, AV_LOG_VERBOSE, " Bitstream offset alignment: %"PRIu64"\n", + ctx->common.caps.minBitstreamBufferOffsetAlignment); + av_log(avctx, AV_LOG_VERBOSE, " Bitstream size alignment: %"PRIu64"\n", + ctx->common.caps.minBitstreamBufferSizeAlignment); + av_log(avctx, AV_LOG_VERBOSE, " Maximum references: %u\n", + ctx->common.caps.maxDpbSlots); + av_log(avctx, AV_LOG_VERBOSE, " Maximum active references: %u\n", + ctx->common.caps.maxActiveReferencePictures); + av_log(avctx, AV_LOG_VERBOSE, " Codec header version: %i.%i.%i (driver), %i.%i.%i (compiled)\n", + CODEC_VER(ctx->common.caps.stdHeaderVersion.specVersion), + CODEC_VER(ff_vk_enc_ext[avctx->codec_id].specVersion)); + av_log(avctx, AV_LOG_VERBOSE, " encode quality levels: %i\n", + ctx->enc_caps.maxQualityLevels); + av_log(avctx, AV_LOG_VERBOSE, " encode image width alignment: %i\n", + ctx->enc_caps.inputImageDataFillAlignment.width); + av_log(avctx, AV_LOG_VERBOSE, " encode image height alignment: %i\n", + ctx->enc_caps.inputImageDataFillAlignment.height); + av_log(avctx, AV_LOG_VERBOSE, " Capability flags:%s%s%s\n", + ctx->common.caps.flags ? "" : + " none", + ctx->common.caps.flags & VK_VIDEO_CAPABILITY_PROTECTED_CONTENT_BIT_KHR ? + " protected" : "", + ctx->common.caps.flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR ? + " separate_references" : ""); + + /* Check if decoding is possible with the given parameters */ + if (avctx->coded_width < ctx->common.caps.minCodedExtent.width || + avctx->coded_height < ctx->common.caps.minCodedExtent.height || + avctx->coded_width > ctx->common.caps.maxCodedExtent.width || + avctx->coded_height > ctx->common.caps.maxCodedExtent.height) + return AVERROR(EINVAL); + + fmt_info.imageUsage = VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR | + VK_IMAGE_USAGE_VIDEO_ENCODE_DST_BIT_KHR; + + ctx->layered_dpb = !(ctx->common.caps.flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR); + + /* Get the supported image formats */ + ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(s->hwctx->phys_dev, + &fmt_info, + &nb_out_fmts, NULL); + if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED || + (!nb_out_fmts && ret == VK_SUCCESS)) { + return AVERROR(EINVAL); + } else if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + ret_info = av_mallocz(sizeof(*ret_info)*nb_out_fmts); + if (!ret_info) + return AVERROR(ENOMEM); + + for (int i = 0; i < nb_out_fmts; i++) + ret_info[i].sType = VK_STRUCTURE_TYPE_VIDEO_FORMAT_PROPERTIES_KHR; + + ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(s->hwctx->phys_dev, + &fmt_info, + &nb_out_fmts, ret_info); + if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED || + (!nb_out_fmts && ret == VK_SUCCESS)) { + av_free(ret_info); + return AVERROR(EINVAL); + } else if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n", + ff_vk_ret2str(ret)); + av_free(ret_info); + return AVERROR_EXTERNAL; + } + + av_log(avctx, AV_LOG_VERBOSE, "Supported input formats:\n"); + for (i = 0; i < nb_out_fmts; i++) + av_log(avctx, AV_LOG_VERBOSE, " %i: %i\n", i, ret_info[i].format); + + for (i = 0; i < nb_out_fmts; i++) { + if (ff_vk_pix_fmt_from_vkfmt(ret_info[i].format) == s->frames->sw_format) { + ctx->pic_format = ret_info[i].format; + break; + } + } + + av_free(ret_info); + + if (i == nb_out_fmts) { + av_log(avctx, AV_LOG_ERROR, "Pixel format %s of input frames not supported!\n", + av_get_pix_fmt_name(s->frames->sw_format)); + return AVERROR(EINVAL); + } + + session_create.pVideoProfile = &ctx->profile; + session_create.flags = 0x0; + session_create.queueFamilyIndex = s->hwctx->queue_family_encode_index; + session_create.maxCodedExtent = ctx->common.caps.maxCodedExtent; + session_create.maxDpbSlots = ctx->common.caps.maxDpbSlots; + session_create.maxActiveReferencePictures = ctx->common.caps.maxActiveReferencePictures; + session_create.pictureFormat = ctx->pic_format; + session_create.referencePictureFormat = session_create.pictureFormat; + session_create.pStdHeaderVersion = &ff_vk_enc_ext[avctx->codec_id]; + + err = ff_vk_video_common_init(avctx, s, &ctx->common, &session_create); + if (err < 0) + return err; + + /* Get sampler */ + av_chroma_location_enum_to_pos(&cxpos, &cypos, avctx->chroma_sample_location); + yuv_sampler_info.xChromaOffset = cxpos >> 7; + yuv_sampler_info.yChromaOffset = cypos >> 7; + yuv_sampler_info.format = ctx->pic_format; + ret = vk->CreateSamplerYcbcrConversion(s->hwctx->act_dev, &yuv_sampler_info, + s->hwctx->alloc, &ctx->yuv_sampler); + if (ret != VK_SUCCESS) + return AVERROR_EXTERNAL; + + ctx->dpb_hwfc_ref = av_hwframe_ctx_alloc(s->frames->device_ref); + if (!ctx->dpb_hwfc_ref) + return AVERROR(ENOMEM); + + dpb_frames = (AVHWFramesContext *)ctx->dpb_hwfc_ref->data; + dpb_frames->format = s->frames->format; + dpb_frames->sw_format = s->frames->sw_format; + dpb_frames->width = s->frames->width; + dpb_frames->height = s->frames->height; + + dpb_hwfc = dpb_frames->hwctx; + dpb_hwfc->create_pnext = &ctx->profile_list; + dpb_hwfc->tiling = VK_IMAGE_TILING_OPTIMAL; + dpb_hwfc->format[0] = ctx->pic_format; + dpb_hwfc->usage = VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR | + VK_IMAGE_USAGE_SAMPLED_BIT; /* Shuts validator up. */ + + if (ctx->layered_dpb) { + ctx->dpb_layers = ctx->common.caps.maxDpbSlots; + dpb_hwfc->nb_layers = ctx->dpb_layers; + } + + err = av_hwframe_ctx_init(ctx->dpb_hwfc_ref); + if (err < 0) + return err; + + if (dpb_hwfc->nb_layers) { + ctx->dpb_layer_taken = av_mallocz(ctx->dpb_layers*sizeof(*ctx->dpb_layer_taken)); + if (!ctx->dpb_layer_taken) + return AVERROR(ENOMEM); + } + + if (ctx->layered_dpb) { + ctx->layered_frame = vk_get_dpb_pool(ctx); + if (!ctx->layered_frame) + return AVERROR(ENOMEM); + } + + ctx->pic = av_mallocz(3*sizeof(*ctx->pic)); + if (!ctx->pic) + return AVERROR(ENOMEM); + + return 0; +} + +static void vkctx_frame_free(FFVulkanEncodeContext *ctx, + FFVulkanEncodePicture *pic) +{ + FFVulkanFunctions *vk = &ctx->s.vkfn; + + if (pic->view) + vk->DestroyImageView(ctx->s.hwctx->act_dev, pic->view, + ctx->s.hwctx->alloc); + + /* TODO: keep these */ + if (pic->dpb_view) + vk->DestroyImageView(ctx->s.hwctx->act_dev, pic->dpb_view, + ctx->s.hwctx->alloc); + + av_frame_free(&pic->dpb_frame); + av_buffer_unref(&pic->pkt_buf); + av_freep(&pic->priv_data); + + ctx->dpb_layer_taken[pic->dpb_layer] = 0; +} + +static int vkctx_frame_init(FFVulkanEncodeContext *ctx, + FFVulkanEncodePicture *pic, AVFrame *src) +{ + int err; + AVVkFrame *vkf; + + if (!src) { + ctx->end_of_stream = 1; + + /* Fix timestamps if we hit end-of-stream before the initial decode + * delay has elapsed. */ + if (ctx->input_order < ctx->decode_delay) + ctx->dts_pts_diff = ctx->pic_end->pts - ctx->first_pts; + + return AVERROR_EOF; + } + + vkf = (AVVkFrame *)src->buf[0]->data; + + /* Create image view for the input frame */ + err = ff_vk_encode_create_view(ctx, &pic->view, &pic->aspect, vkf, 0); + if (err < 0) + goto fail; + + /* Create private data for input frame TODO remove this */ + pic->priv_data = av_mallocz(ctx->enc->pic_priv_data_size); + if (!pic->priv_data) { + err = AVERROR(ENOMEM); + goto fail; + } + + /* Allocate a DPB buffer */ + if (!pic->dpb_frame) { + int layer_id = 0; + if (ctx->layered_dpb) { + for (int layer_id = 0; layer_id < ctx->dpb_layers; layer_id++) + if (!ctx->dpb_layer_taken[layer_id]) + break; + + /* No free DPB slots */ + if (layer_id == ctx->dpb_layers) + return AVERROR(EAGAIN); + + pic->dpb_frame = av_frame_clone(ctx->layered_frame); + } else { + pic->dpb_frame = vk_get_dpb_pool(ctx); + } + + if (!pic->dpb_frame) { + err = AVERROR(ENOMEM); + goto fail; + } + + /* Create image view for the DPB */ + err = ff_vk_encode_create_view(ctx, &pic->dpb_view, &pic->dpb_aspect, + (AVVkFrame *)pic->dpb_frame->data[0], layer_id); + if (err < 0) + goto fail; + + ctx->dpb_layer_taken[layer_id] = 1; + pic->dpb_layer = layer_id; + } + + if (ctx->input_order == 0 || src->pict_type == AV_PICTURE_TYPE_I) + pic->force_idr = 1; + + pic->pts = src->pts; + pic->duration = src->duration; + pic->time_base = src->time_base; + + if (ctx->input_order == 0) + ctx->first_pts = pic->pts; + if (ctx->input_order == ctx->decode_delay) + ctx->dts_pts_diff = pic->pts - ctx->first_pts; + if (ctx->output_delay > 0) + ctx->dts_ring[ctx->input_order % + (3 * ctx->output_delay + ctx->opts.async_depth)] = pic->pts; + + pic->display_order = ctx->input_order; + ctx->input_order++; + + if (ctx->pic_start) { + ctx->pic_end->next = pic; + ctx->pic_end = pic; + } else { + ctx->pic_start = pic; + ctx->pic_end = pic; + } + + return 0; + +fail: + vkctx_frame_free(ctx, pic); + return err; +} + +static int vulkan_encode_issue(AVCodecContext *avctx, + FFVulkanEncodeContext *ctx, + FFVulkanEncodePicture *pic, + AVFrame *src) +{ + int err, max_pkt_size; + const size_t size_align = ctx->common.caps.minBitstreamBufferSizeAlignment; + const int layered_dpb = ctx->layered_dpb; + + FFVulkanFunctions *vk = &ctx->s.vkfn; + AVVkFrame *vkf = (AVVkFrame *)src->buf[0]->data; + + FFVkVideoBuffer *sd_buf; + + FFVkExecContext *exec; + VkCommandBuffer cmd_buf; + VkImageMemoryBarrier2 img_bar[37]; + int nb_img_bar = 0; + + /* Coding start/end */ + VkVideoBeginCodingInfoKHR encode_start = { + .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR, + .videoSession = ctx->common.session, + .videoSessionParameters = ctx->session_params, + .referenceSlotCount = 0, + .pReferenceSlots = NULL, + }; + VkVideoEndCodingInfoKHR encode_end = { + .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR, + }; + + VkVideoEncodeRateControlLayerInfoKHR rc_layer; + VkVideoEncodeRateControlInfoKHR rc_info; + VkVideoCodingControlInfoKHR encode_ctrl; + VkVideoPictureResourceInfoKHR dpb_pic; + VkVideoReferenceSlotInfoKHR dpb_slot; + VkVideoPictureResourceInfoKHR ref_pic[37]; + VkVideoReferenceSlotInfoKHR ref_slot[37]; + VkVideoEncodeInfoKHR encode_info; + + /* Initialize all codec-specific headers */ + err = ctx->enc->init_pic_headers(avctx, pic); + if (err < 0) + return err; + + /* Create packet data buffer, TODO make this smarter */ + max_pkt_size = (src->width * src->height)*2; + + err = ff_vk_video_get_buffer(&ctx->s, &ctx->common, &pic->pkt_buf, + VK_BUFFER_USAGE_VIDEO_ENCODE_DST_BIT_KHR, + &ctx->profile_list, max_pkt_size); + if (err < 0) + goto fail; + + sd_buf = (FFVkVideoBuffer *)pic->pkt_buf->data; + + /* Rate control */ + rc_layer = (VkVideoEncodeRateControlLayerInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_RATE_CONTROL_LAYER_INFO_KHR, + .pNext = pic->codec_rc_layer, + }; + rc_info = (VkVideoEncodeRateControlInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_RATE_CONTROL_INFO_KHR, + .pNext = NULL, + .rateControlMode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR, + .layerCount = 1, /* Required to be >= 1 */ + .pLayers = &rc_layer, + }; + encode_ctrl = (VkVideoCodingControlInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_CODING_CONTROL_INFO_KHR, + .pNext = &rc_info, + .flags = VK_VIDEO_CODING_CONTROL_ENCODE_RATE_CONTROL_BIT_KHR, + }; + + /* Current picture */ + dpb_pic = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .pNext = NULL, + .codedOffset = { 0 }, + .codedExtent = (VkExtent2D){ src->width, src->height }, + .baseArrayLayer = 0, + .imageViewBinding = pic->dpb_view, + }; + dpb_slot = (VkVideoReferenceSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR, + .pNext = NULL, + .slotIndex = pic->slot, + .pPictureResource = &dpb_pic, + }; + + /* References for current picture */ + for (int i = 0; i < pic->nb_refs; i++) { + FFVulkanEncodePicture *ref = pic->refs[i]; + + ref_pic[i] = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .pNext = NULL, + .codedOffset = { 0 }, + .codedExtent = (VkExtent2D){ src->width, src->height }, + .baseArrayLayer = 0, + .imageViewBinding = ref->dpb_view, + }; + ref_slot[i] = (VkVideoReferenceSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR, + .pNext = NULL, + .slotIndex = ref->slot, + .pPictureResource = &ref_pic[i], + }; + } + + encode_info = (VkVideoEncodeInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_INFO_KHR, + .pNext = pic->codec_info, + .flags = 0x0, + .qualityLevel = 0, + .srcPictureResource = (VkVideoPictureResourceInfoKHR) { // SPEC: this should be separate + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .pNext = NULL, + .codedOffset = 0, + .codedExtent = (VkExtent2D){ src->width, src->height }, + .baseArrayLayer = 0, + .imageViewBinding = pic->view, + }, + .pSetupReferenceSlot = &dpb_slot, /* pic->is_reference ? &ref_slot : NULL */ + .referenceSlotCount = pic->nb_refs, + .pReferenceSlots = ref_slot, + .dstBuffer = sd_buf->buf.buf, + .dstBufferOffset = 0, + .dstBufferRange = sd_buf->buf.size, + .precedingExternallyEncodedBytes = 0, + }; + + /* Reset encoder for the very first frame and on every keyframe */ + if (pic->display_order == 0 || pic->type == FF_VK_FRAME_KEY) + encode_ctrl.flags |= VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR; + + /* Write header */ + if (pic->type == FF_VK_FRAME_KEY && ctx->enc->write_stream_headers) { + uint8_t *hdr_dst = sd_buf->mem + encode_info.dstBufferOffset; + size_t data_size = encode_info.dstBufferRange; + err = ctx->enc->write_stream_headers(avctx, hdr_dst, &data_size); + if (err < 0) + goto fail; + encode_info.dstBufferOffset += data_size; + encode_info.dstBufferRange -= data_size; + } + + /* Write extra units */ + if (ctx->enc->write_extra_headers) { + uint8_t *hdr_dst = sd_buf->mem + encode_info.dstBufferOffset; + size_t data_size = encode_info.dstBufferRange; + err = ctx->enc->write_extra_headers(avctx, pic, hdr_dst, &data_size); + if (err < 0) + goto fail; + encode_info.dstBufferOffset += data_size; + encode_info.dstBufferRange -= data_size; + } + + /* Align buffer offset to the required value with filler units */ + if (ctx->enc->write_filler) { + uint8_t *hdr_dst = sd_buf->mem + encode_info.dstBufferOffset; + size_t data_size = encode_info.dstBufferRange; + + uint32_t offset = encode_info.dstBufferOffset; + size_t offset_align = ctx->common.caps.minBitstreamBufferOffsetAlignment; + + uint32_t filler_data = FFALIGN(offset, offset_align) - offset; + + if (filler_data) { + while (filler_data < ctx->enc->filler_header_size) + filler_data += offset_align; + + filler_data -= ctx->enc->filler_header_size; + + err = ctx->enc->write_filler(avctx, filler_data, + hdr_dst, &data_size); + if (err < 0) + goto fail; + encode_info.dstBufferOffset += data_size; + encode_info.dstBufferRange -= data_size; + } + } + + pic->pkt_buf_offset = encode_info.dstBufferOffset; + + /* Align buffer size to the nearest lower alignment requirement. */ + encode_info.dstBufferRange -= size_align; + encode_info.dstBufferRange = FFALIGN(encode_info.dstBufferRange, + size_align); + + /* Start command buffer recording */ + exec = ff_vk_exec_get(&ctx->enc_pool); + ff_vk_exec_start(&ctx->s, exec); + cmd_buf = exec->buf; + + /* Output packet buffer */ + err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &pic->pkt_buf, 1, 1); + if (err < 0) + goto fail; + + /* Source image - change encode to compute once we have analysis */ + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, src, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR); + if (err < 0) + goto fail; + + /* Source image layout conversion */ + img_bar[nb_img_bar] = (VkImageMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .pNext = NULL, + .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + .srcAccessMask = vkf->access[0], + .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR, + .dstAccessMask = VK_ACCESS_2_VIDEO_ENCODE_READ_BIT_KHR, + .oldLayout = vkf->layout[0], + .newLayout = VK_IMAGE_LAYOUT_VIDEO_ENCODE_SRC_KHR, + .srcQueueFamilyIndex = vkf->queue_family[0], + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = vkf->img[0], + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = pic->aspect, + .layerCount = 1, + .levelCount = 1, + }, + }; + ff_vk_exec_update_frame(&ctx->s, exec, src, + &img_bar[nb_img_bar], &nb_img_bar); + + /* Source image's DPB */ + if (encode_info.pSetupReferenceSlot) { + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, pic->dpb_frame, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR); + if (err < 0) + return err; + } + + /* Reference frame DPBs */ + if (!layered_dpb) { + for (int i = 0; i < pic->nb_refs; i++) { + FFVulkanEncodePicture *ref = pic->refs[i]; + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ref->dpb_frame, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR); + if (err < 0) + return err; + } + } + + /* Change image layout */ + vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + /* Start, use parameters */ + vk->CmdBeginVideoCodingKHR(cmd_buf, &encode_start); + + /* Send control data */ + vk->CmdControlVideoCodingKHR(cmd_buf, &encode_ctrl); + + /* encode and fetch the status. + * SPEC: cannot insert AUD units between slices! */ + vk->CmdBeginQuery(cmd_buf, ctx->enc_pool.query_pool, exec->query_idx + 0, 0); + vk->CmdEncodeVideoKHR(cmd_buf, &encode_info); + vk->CmdEndQuery(cmd_buf, ctx->enc_pool.query_pool, exec->query_idx + 0); + + /* End encoding */ + vk->CmdEndVideoCodingKHR(cmd_buf, &encode_end); + + /* End recording and submit for execution */ + ff_vk_exec_submit(&ctx->s, exec); + + pic->encode_issued = 1; + pic->exec = exec; + + return 0; + +fail: + vkctx_frame_free(ctx, pic); + return err; +} + +static int vulkan_encode_output(AVCodecContext *avctx, + FFVulkanEncodeContext *ctx, + AVPacket *pkt, + FFVulkanEncodePicture *pic) +{ + int err; + VkResult ret; + int64_t qstatus = 0; + uint32_t pkt_size, *query_data; + FFVulkanFunctions *vk = &ctx->s.vkfn; + FFVkVideoBuffer *sd_buf = (FFVkVideoBuffer *)pic->pkt_buf->data; + + /* TODO: replace this with a semaphore wait, maybe? */ + ff_vk_exec_wait(&ctx->s, pic->exec); + + /* Get status */ + ret = ff_vk_exec_get_query(&ctx->s, pic->exec, (void **)&query_data, &qstatus); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Error querying results from encoder: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + // SPEC: signal if there hasn't been enough buffer with a unique return code */ + + if (qstatus < 0) { + av_log(ctx, AV_LOG_ERROR, "Error while encoding: %li!\n", qstatus); + return AVERROR(EINVAL); + } + + // SPEC: why? + query_data[0] += pic->pkt_buf_offset; + + pkt_size = query_data[0] /* Buffer offset */ + + query_data[1] /* Data written */; + + av_log(ctx, AV_LOG_VERBOSE, "Received a packet, %u bytes large (%u off, %u data), " + "status = %i\n", pkt_size, query_data[0], query_data[1], err); + + if (!(sd_buf->buf.flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { + VkMappedMemoryRange invalidate_buf = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = sd_buf->buf.mem, + .offset = query_data[0], /* Should be already aligned */ + .size = FFALIGN(query_data[1], + ctx->s.props.properties.limits.nonCoherentAtomSize), + }; + + vk->FlushMappedMemoryRanges(ctx->s.hwctx->act_dev, 1, &invalidate_buf); + } + + /* Transfer data buffer ref */ + pkt->pts = pkt->dts = pic->pts; +// pkt->duration = pic->duration; + pkt->time_base = pic->time_base; + pkt->buf = pic->pkt_buf; + pkt->data = sd_buf->mem; + pkt->size = pkt_size; + pkt->flags = ( pic->type == FF_VK_FRAME_KEY ? AV_PKT_FLAG_KEY : 0x0) | + (!pic->is_reference ? AV_PKT_FLAG_DISPOSABLE : 0x0); + + if (ctx->output_delay == 0) { + pkt->dts = pkt->pts; + } else if (pic->encode_order < ctx->decode_delay) { + if (ctx->dts_ring[pic->encode_order] < INT64_MIN + ctx->dts_pts_diff) + pkt->dts = INT64_MIN; + else + pkt->dts = ctx->dts_ring[pic->encode_order] - ctx->dts_pts_diff; + } else { + pkt->dts = ctx->dts_ring[(pic->encode_order - ctx->decode_delay) % + (3 * ctx->output_delay + ctx->opts.async_depth)]; + } + + pic->encode_complete = 1; + pic->encode_size = pkt_size; + pic->pkt_buf = NULL; + + return 0; +} + +int ff_vulkan_encode_receive_packet(AVCodecContext *avctx, FFVulkanEncodeContext *ctx, + AVPacket *pkt) +{ + int err; + AVFrame *frame; + FFVulkanEncodePicture *cur_pic = &ctx->pic[avctx->frame_num % ctx->gop_size]; + int ft = !(avctx->frame_num % ctx->gop_size) ? FF_VK_FRAME_KEY : FF_VK_FRAME_P; + + { + /* TODO: remove this per-frame alloc */ + frame = av_frame_alloc(); + if (!frame) + return AVERROR(ENOMEM); + + err = ff_encode_get_frame(avctx, frame); + if (err < 0 && err != AVERROR_EOF) { + av_frame_free(&frame); + return err; + } + + if (err == AVERROR_EOF) { + av_frame_free(&frame); + return err; + } + + if (ft == FF_VK_FRAME_P) + cur_pic->prev = &ctx->pic[(avctx->frame_num % ctx->gop_size) - 1]; + + err = vkctx_frame_init(ctx, cur_pic, frame); + if (err < 0) { + av_frame_free(&frame); + return err; + } + + cur_pic->refs[0] = !(avctx->frame_num % ctx->gop_size) ? NULL : &ctx->pic[0]; + cur_pic->slot = avctx->frame_num % ctx->gop_size; + cur_pic->nb_refs = !!(avctx->frame_num % ctx->gop_size); + cur_pic->is_reference = ft == FF_VK_FRAME_KEY; + cur_pic->type = ft; + cur_pic->qp = 30; + cur_pic->encode_order = ctx->encode_order++; + + err = vulkan_encode_issue(avctx, ctx, cur_pic, frame); + av_frame_free(&frame); + if (err < 0) + return err; + } + + err = vulkan_encode_output(avctx, ctx, pkt, cur_pic); + if (err < 0) + return err; + + if (!((avctx->frame_num + 1) % ctx->gop_size)) { + vkctx_frame_free(ctx, &ctx->pic[0]); + vkctx_frame_free(ctx, &ctx->pic[1]); + vkctx_frame_free(ctx, &ctx->pic[2]); + } + + return pkt->size; +} diff --git a/libavcodec/vulkan_encode.h b/libavcodec/vulkan_encode.h new file mode 100644 index 0000000000000..1a98ae4680c88 --- /dev/null +++ b/libavcodec/vulkan_encode.h @@ -0,0 +1,258 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VULKAN_ENCODE_H +#define AVCODEC_VULKAN_ENCODE_H + +#include "encode.h" +#include "hwconfig.h" + +#include "vulkan_video.h" + +#define MAX_REORDER_DELAY 16 +#define MAX_ASYNC_DEPTH 64 +#define MAX_PICTURE_REFERENCES 2 + +enum FFVkFrameType { + FF_VK_FRAME_KEY, /* IDR in mpeg-ese */ + FF_VK_FRAME_I, /* mpeg-only */ + + FF_VK_FRAME_P, + FF_VK_FRAME_B, /* mpeg-only */ + + FF_VK_FRAME_S, /* av1-only */ +}; + +typedef struct FFVkEncodeCommonOptions { + int profile; + int async_depth; + VkVideoEncodeUsageFlagBitsKHR usage; + VkVideoEncodeContentFlagBitsKHR content; + VkVideoEncodeTuningModeKHR tune; +} FFVkEncodeCommonOptions; + +typedef struct FFVulkanEncodePicture { + int qp; + enum FFVkFrameType type; + int64_t display_order; + int64_t encode_order; + + int slot; + int64_t pts; + int64_t duration; + AVRational time_base; + int64_t reordered_opaque; + + FFVkExecContext *exec; + int encode_issued; + int encode_complete; + size_t encode_size; + + int force_idr; + int b_depth; + + int is_reference; /* Set if picture is used as a ref */ + + void *priv_data; + + void *codec_info; + void *codec_rc_layer; + + /* Input frame */ + VkImageView view; + VkImageAspectFlags aspect; + + /* DPB */ + AVFrame *dpb_frame; + VkImageView dpb_view; + VkImageAspectFlags dpb_aspect; + int dpb_layer; + + /* Packet buffer - contains an FFVkVideoBuffer struct */ + size_t pkt_buf_offset; // SPEC: MAKE UP YOUR MIND JAMMIT + AVBufferRef *pkt_buf; + + /* The previous reference picture in encode order. Must be in at least + * one of the reference list and DPB list. */ + struct FFVulkanEncodePicture *prev; + struct FFVulkanEncodePicture *next; + + /* The contents of the DPB after this picture has been decoded. + * This will contain the picture itself if it is a reference picture, + * but not if it isn't. */ + struct FFVulkanEncodePicture *dpb[16]; + int nb_dpb_pics; + + /* The reference pictures used in decoding this picture. If they are + * used by later pictures they will also appear in the DPB. */ + struct FFVulkanEncodePicture *refs[MAX_PICTURE_REFERENCES]; + int nb_refs; + + /* Reference count for other pictures referring to this one through + * the above pointers, directly from incomplete pictures and indirectly + * through completed pictures. */ + int ref_count[2]; + int ref_removed[2]; +} FFVulkanEncodePicture; + +/** + * Callback for writing stream-level headers. + */ +typedef int (*vkenc_cb_write_stream_headers)(AVCodecContext *avctx, + uint8_t *data, size_t *data_len); + +/** + * Callback for initializing codec-specific picture headers. + */ +typedef int (*vkenc_cb_init_pic_headers)(AVCodecContext *avctx, + FFVulkanEncodePicture *pic); + +/** + * Callback for writing alignment data. + * Align is the value to align offset to. + */ +typedef int (*vkenc_cb_write_filler)(AVCodecContext *avctx, uint32_t filler, + uint8_t *data, size_t *data_len); + +/** + * Callback for writing any extra units requested. data_len must be set + * to the available size, and its value will be overwritten by the #bytes written + * to the output buffer. + */ +typedef int (*vkenc_cb_write_extra_headers)(AVCodecContext *avctx, + FFVulkanEncodePicture *pic, + uint8_t *data, size_t *data_len); + +typedef struct FFVulkanEncoder { + size_t pic_priv_data_size; + uint32_t filler_header_size; + + vkenc_cb_write_stream_headers write_stream_headers; + vkenc_cb_init_pic_headers init_pic_headers; + vkenc_cb_write_filler write_filler; + vkenc_cb_write_extra_headers write_extra_headers; +} FFVulkanEncoder; + +typedef struct FFVulkanEncodeContext { + FFVulkanContext s; + FFVkVideoCommon common; + const FFVulkanEncoder *enc; + + int64_t input_order; + int64_t encode_order; + + FFVulkanEncodePicture *pic; + + int gop_size; + int64_t bitrate; + + int64_t first_pts; + int output_delay; + int decode_delay; + int end_of_stream; + int64_t dts_pts_diff; + + int64_t dts_ring[MAX_REORDER_DELAY * 3 + MAX_ASYNC_DEPTH]; + + /* Current encoding window, in display (input) order. */ + FFVulkanEncodePicture *pic_start, *pic_end; + /* The next picture to use as the previous reference picture in + * encoding order. */ + FFVulkanEncodePicture *next_prev; + + int frame_num; + + /* DPB */ + AVBufferRef *dpb_hwfc_ref; + AVFrame *layered_frame; + int layered_dpb; + int *dpb_layer_taken; + int dpb_layers; + + VkVideoSessionParametersKHR session_params; + + VkSamplerYcbcrConversion yuv_sampler; + VkFormat pic_format; + + FFVkEncodeCommonOptions opts; + + VkVideoProfileInfoKHR profile; + VkVideoProfileListInfoKHR profile_list; + VkVideoEncodeCapabilitiesKHR enc_caps; + VkVideoEncodeUsageInfoKHR usage_info; + + FFVkQueueFamilyCtx qf_enc; + FFVkExecPool enc_pool; +} FFVulkanEncodeContext; + +#define FF_VK_ENCODE_COMMON_OPTS \ + { "tune", "Select tuning type", OFFSET(vkenc.opts.tune), AV_OPT_TYPE_INT, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_DEFAULT_KHR }, 0, INT_MAX, FLAGS, "tune" }, \ + { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_DEFAULT_KHR }, INT_MIN, INT_MAX, FLAGS, "tune" }, \ + { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_HIGH_QUALITY_KHR }, INT_MIN, INT_MAX, FLAGS, "tune" }, \ + { "ll", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_LOW_LATENCY_KHR }, INT_MIN, INT_MAX, FLAGS, "tune" }, \ + { "ull", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_ULTRA_LOW_LATENCY_KHR }, INT_MIN, INT_MAX, FLAGS, "tune" }, \ + { "lossless", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_LOSSLESS_KHR }, INT_MIN, INT_MAX, FLAGS, "tune" }, \ + { "usage", "Select usage type", OFFSET(vkenc.opts.usage), AV_OPT_TYPE_FLAGS, { .i64 = VK_VIDEO_DECODE_USAGE_DEFAULT_KHR }, 0, INT_MAX, FLAGS, "usage" }, \ + { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_DECODE_USAGE_DEFAULT_KHR }, INT_MIN, INT_MAX, FLAGS, "usage" }, \ + { "transcode", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_TRANSCODING_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "usage" }, \ + { "stream", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_STREAMING_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "usage" }, \ + { "record", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_RECORDING_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "usage" }, \ + { "conference", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_CONFERENCING_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "usage" }, \ + { "content", "Select content type", OFFSET(vkenc.opts.content), AV_OPT_TYPE_FLAGS, { .i64 = VK_VIDEO_ENCODE_CONTENT_DEFAULT_KHR }, 0, INT_MAX, FLAGS, "content" }, \ + { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_CONTENT_DEFAULT_KHR }, INT_MIN, INT_MAX, FLAGS, "content" }, \ + { "camera", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_CONTENT_CAMERA_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "content" }, \ + { "desktop", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_CONTENT_DESKTOP_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "content" }, \ + { "rendered", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_CONTENT_RENDERED_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "content" }, \ + { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(vkenc.opts.async_depth), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, MAX_ASYNC_DEPTH, FLAGS }, \ + +/** + * Paperwork. + */ +extern const AVCodecHWConfigInternal *const ff_vulkan_encode_hw_configs[]; + +/** + * Extension name and version. + */ +extern const VkExtensionProperties ff_vk_enc_ext[AV_CODEC_ID_FIRST_AUDIO]; + +/** + * Create image view for a frame. + */ +int ff_vk_encode_create_view(FFVulkanEncodeContext *ctx, VkImageView *dst_view, + VkImageAspectFlags *aspect, AVVkFrame *src, int layer); + +/** + * Initialize encoder. + */ +int ff_vulkan_encode_init(AVCodecContext *avctx, FFVulkanEncodeContext *ctx, + void *codec_profile, void *caps, + const FFVulkanEncoder *enc, + int output_delay, int decode_delay); + +/** + * Uninitialize encoder. + */ +void ff_vulkan_encode_uninit(FFVulkanEncodeContext *ctx); + +/** + * Encode. + */ +int ff_vulkan_encode_receive_packet(AVCodecContext *avctx, FFVulkanEncodeContext *ctx, + AVPacket *pkt); + +#endif /* AVCODEC_VULKAN_ENCODE_H */ diff --git a/libavcodec/vulkan_encode_h264.c b/libavcodec/vulkan_encode_h264.c new file mode 100644 index 0000000000000..d5a44e14b5ff8 --- /dev/null +++ b/libavcodec/vulkan_encode_h264.c @@ -0,0 +1,1124 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/opt.h" + +#include "cbs.h" +#include "cbs_h264.h" +#include "h264_levels.h" +#include "h2645data.h" +#include "codec_internal.h" +#include "version.h" + +#include "vulkan_encode.h" + +enum UnitElems { + UNIT_AUD = 1 << 0, + UNIT_TIMING = 1 << 1, + UNIT_IDENTIFIER = 1 << 2, + UNIT_RECOVERY = 1 << 3, +}; + +/* Random (version 4) ISO 11578 UUID. */ +static const uint8_t vulkan_encode_h264_sei_identifier_uuid[16] = { + 0x03, 0xfd, 0xf2, 0x0a, 0x5d, 0x4c, 0x05, 0x48, + 0x20, 0x98, 0xca, 0x6b, 0x0c, 0x95, 0x30, 0x1c, +}; + +typedef struct VulkanEncodeH264Context { + FFVulkanEncodeContext vkenc; + VkVideoEncodeH264ProfileInfoEXT profile; + VkVideoEncodeH264CapabilitiesEXT caps; + + int bit_rate; + int output_delay; + int decode_delay; + int gop_size; + int b_per_p; + int dpb_frames; + int max_b_depth; + int hrd_initial_buffer_fullness; + int hrd_buffer_size; + + int mb_width; + int mb_height; + + /* Options */ + enum UnitElems insert_units; + int coder; + int desired_b_depth; + + /* State */ + enum UnitElems write_units; + + /* SPS structs */ + H264RawSPS raw_sps; + StdVideoH264ScalingLists vksps_scaling; + StdVideoH264HrdParameters vksps_vui_header; + StdVideoH264SequenceParameterSetVui vksps_vui; + StdVideoH264SequenceParameterSet vksps; + + /* PPS structs */ + H264RawPPS raw_pps; + StdVideoH264ScalingLists vkpps_scaling; + StdVideoH264PictureParameterSet vkpps; + + /* Structs needed for CBC */ + H264RawAUD raw_aud; + + CodedBitstreamContext *cbc; + CodedBitstreamFragment current_access_unit; + SEIRawUserDataUnregistered sei_identifier; + H264RawSEIBufferingPeriod sei_buffering_period; + H264RawSEIPicTiming sei_pic_timing; + H264RawSEIRecoveryPoint sei_recovery_point; + char *sei_identifier_string; +} VulkanEncodeH264Context; + +typedef struct VulkanEncodeH264Picture { + uint64_t frame_num; + int64_t last_idr_frame; + uint16_t idr_pic_id; + uint16_t pic_order_cnt; + + StdVideoEncodeH264WeightTable slice_wt; + StdVideoEncodeH264SliceHeader slice_hdr; + VkVideoEncodeH264NaluSliceInfoEXT vkslice; + StdVideoEncodeH264PictureInfo h264pic_info; + VkVideoEncodeH264VclFrameInfoEXT vkh264pic_info; + VkVideoEncodeH264RateControlLayerInfoEXT vkrc_layer_info; + + StdVideoEncodeH264ReferenceListsInfo final_list; + VkVideoEncodeH264DpbSlotInfoEXT l0refs[37]; + VkVideoEncodeH264DpbSlotInfoEXT l1refs[37]; + StdVideoEncodeH264ReferenceInfo l0ref_info[37]; + StdVideoEncodeH264ReferenceInfo l1ref_info[37]; + + StdVideoEncodeH264ReferenceListsInfo ref_list_info; + StdVideoEncodeH264RefListModEntry l0mods[37]; + StdVideoEncodeH264RefListModEntry l1mods[37]; + StdVideoEncodeH264RefPicMarkingEntry marks[37]; +} VulkanEncodeH264Picture; + +static av_cold int vulkan_encode_h264_init_seq_params(AVCodecContext *avctx) +{ + VulkanEncodeH264Context *enc = avctx->priv_data; + + H264RawSPS *sps = &enc->raw_sps; + H264RawHRD *hrd = &sps->vui.nal_hrd_parameters; + StdVideoH264ScalingLists *vksps_scaling = &enc->vksps_scaling; + StdVideoH264HrdParameters *vksps_vui_header = &enc->vksps_vui_header; + StdVideoH264SequenceParameterSetVui *vksps_vui = &enc->vksps_vui; + StdVideoH264SequenceParameterSet *vksps = &enc->vksps; + + H264RawPPS *pps = &enc->raw_pps; + StdVideoH264ScalingLists *vkpps_scaling = &enc->vkpps_scaling; + StdVideoH264PictureParameterSet *vkpps = &enc->vkpps; + + memset(sps, 0, sizeof(*sps)); + memset(pps, 0, sizeof(*pps)); + + sps->nal_unit_header.nal_ref_idc = 3; + sps->nal_unit_header.nal_unit_type = H264_NAL_SPS; + + sps->profile_idc = enc->vkenc.opts.profile & 0xff; + + if (sps->profile_idc == FF_PROFILE_H264_MAIN) + sps->constraint_set1_flag = 1; + + if (sps->profile_idc == FF_PROFILE_H264_HIGH) + sps->constraint_set3_flag = enc->gop_size == 1; + + if (sps->profile_idc == FF_PROFILE_H264_MAIN || + sps->profile_idc == FF_PROFILE_H264_HIGH) { + sps->constraint_set4_flag = 1; + sps->constraint_set5_flag = enc->b_per_p == 0; + } + + if (avctx->gop_size == 1) + enc->dpb_frames = 0; + else + enc->dpb_frames = 1 + enc->max_b_depth; + + if (avctx->level != FF_LEVEL_UNKNOWN) { + sps->level_idc = avctx->level; + } else { + const H264LevelDescriptor *level; + int framerate; + + if (avctx->framerate.num > 0 && avctx->framerate.den > 0) + framerate = avctx->framerate.num / avctx->framerate.den; + else + framerate = 0; + + level = ff_h264_guess_level(sps->profile_idc, + avctx->bit_rate, + framerate, + enc->mb_width * 16, + enc->mb_height * 16, + enc->dpb_frames); + if (level) { + av_log(avctx, AV_LOG_VERBOSE, "Using level %s.\n", level->name); + if (level->constraint_set3_flag) + sps->constraint_set3_flag = 1; + sps->level_idc = level->level_idc; + } else { + av_log(avctx, AV_LOG_WARNING, "Stream will not conform " + "to any level: using level 6.2.\n"); + sps->level_idc = 62; + } + } + + sps->seq_parameter_set_id = 0; + sps->chroma_format_idc = 1; + + sps->log2_max_frame_num_minus4 = 4; + sps->pic_order_cnt_type = enc->max_b_depth ? 0 : 2; + if (!sps->pic_order_cnt_type) + sps->log2_max_pic_order_cnt_lsb_minus4 = 4; + + sps->max_num_ref_frames = enc->dpb_frames; + + sps->pic_width_in_mbs_minus1 = enc->mb_width - 1; + sps->pic_height_in_map_units_minus1 = enc->mb_height - 1; + + sps->frame_mbs_only_flag = 1; + sps->direct_8x8_inference_flag = 1; + + if (avctx->width != 16 * enc->mb_width || + avctx->height != 16 * enc->mb_height) { + sps->frame_cropping_flag = 1; + + sps->frame_crop_left_offset = 0; + sps->frame_crop_right_offset = (16 * enc->mb_width - avctx->width) / 2; + sps->frame_crop_top_offset = 0; + sps->frame_crop_bottom_offset = (16 * enc->mb_height - avctx->height) / 2; + } else { + sps->frame_cropping_flag = 0; + } + + sps->vui_parameters_present_flag = 1; + + if (avctx->sample_aspect_ratio.num != 0 && + avctx->sample_aspect_ratio.den != 0) { + int num, den, i; + av_reduce(&num, &den, avctx->sample_aspect_ratio.num, + avctx->sample_aspect_ratio.den, 65535); + for (i = 0; i < FF_ARRAY_ELEMS(ff_h2645_pixel_aspect); i++) { + if (num == ff_h2645_pixel_aspect[i].num && + den == ff_h2645_pixel_aspect[i].den) { + sps->vui.aspect_ratio_idc = i; + break; + } + } + if (i >= FF_ARRAY_ELEMS(ff_h2645_pixel_aspect)) { + sps->vui.aspect_ratio_idc = 255; + sps->vui.sar_width = num; + sps->vui.sar_height = den; + } + sps->vui.aspect_ratio_info_present_flag = 1; + } + + /* Unspecified video format, from table E-2. */ + sps->vui.video_format = 5; + sps->vui.video_full_range_flag = avctx->color_range == AVCOL_RANGE_JPEG; + sps->vui.colour_primaries = avctx->color_primaries; + sps->vui.transfer_characteristics = avctx->color_trc; + sps->vui.matrix_coefficients = avctx->colorspace; + if (avctx->color_primaries != AVCOL_PRI_UNSPECIFIED || + avctx->color_trc != AVCOL_TRC_UNSPECIFIED || + avctx->colorspace != AVCOL_SPC_UNSPECIFIED) + sps->vui.colour_description_present_flag = 1; + if (avctx->color_range != AVCOL_RANGE_UNSPECIFIED || + sps->vui.colour_description_present_flag) + sps->vui.video_signal_type_present_flag = 1; + + if (avctx->chroma_sample_location != AVCHROMA_LOC_UNSPECIFIED) { + sps->vui.chroma_loc_info_present_flag = 1; + sps->vui.chroma_sample_loc_type_top_field = + sps->vui.chroma_sample_loc_type_bottom_field = + avctx->chroma_sample_location - 1; + } + + sps->vui.timing_info_present_flag = 1; + if (avctx->framerate.num > 0 && avctx->framerate.den > 0) { + sps->vui.num_units_in_tick = avctx->framerate.den; + sps->vui.time_scale = 2 * avctx->framerate.num; + sps->vui.fixed_frame_rate_flag = 1; + } else { + sps->vui.num_units_in_tick = avctx->time_base.num; + sps->vui.time_scale = 2 * avctx->time_base.den; + sps->vui.fixed_frame_rate_flag = 0; + } + + if (enc->insert_units & UNIT_TIMING) { + H264RawHRD *hrd = &sps->vui.nal_hrd_parameters; + H264RawSEIBufferingPeriod *bp = &enc->sei_buffering_period; + + sps->vui.nal_hrd_parameters_present_flag = 1; + + hrd->cpb_cnt_minus1 = 0; + + /* Try to scale these to a sensible range so that the + * golomb encode of the value is not overlong. */ + hrd->bit_rate_scale = av_clip_uintp2(av_log2(enc->bit_rate) - 15 - 6, 4); + hrd->bit_rate_value_minus1[0] = (enc->bit_rate >> hrd->bit_rate_scale + 6) - 1; + + hrd->cpb_size_scale = av_clip_uintp2(av_log2(enc->hrd_buffer_size) - 15 - 4, 4); + hrd->cpb_size_value_minus1[0] = (enc->hrd_buffer_size >> hrd->cpb_size_scale + 4) - 1; + + /* CBR mode as defined for the HRD cannot be achieved without filler + * data */ + hrd->cbr_flag[0] = 0; + + hrd->initial_cpb_removal_delay_length_minus1 = 23; + hrd->cpb_removal_delay_length_minus1 = 23; + hrd->dpb_output_delay_length_minus1 = 7; + hrd->time_offset_length = 0; + + bp->seq_parameter_set_id = sps->seq_parameter_set_id; + + // This calculation can easily overflow 32 bits. + bp->nal.initial_cpb_removal_delay[0] = 90000 * + (uint64_t)enc->hrd_initial_buffer_fullness / enc->hrd_buffer_size; + bp->nal.initial_cpb_removal_delay_offset[0] = 0; + } else { + sps->vui.nal_hrd_parameters_present_flag = 0; + sps->vui.low_delay_hrd_flag = 1 - sps->vui.fixed_frame_rate_flag; + } + + sps->vui.bitstream_restriction_flag = 1; + sps->vui.motion_vectors_over_pic_boundaries_flag = 1; + sps->vui.log2_max_mv_length_horizontal = 15; + sps->vui.log2_max_mv_length_vertical = 15; + sps->vui.max_num_reorder_frames = enc->max_b_depth; + sps->vui.max_dec_frame_buffering = enc->max_b_depth + 1; + + pps->nal_unit_header.nal_ref_idc = 3; + pps->nal_unit_header.nal_unit_type = H264_NAL_PPS; + + pps->pic_parameter_set_id = 0; + pps->seq_parameter_set_id = 0; + + pps->entropy_coding_mode_flag = + !(sps->profile_idc == FF_PROFILE_H264_BASELINE || + sps->profile_idc == FF_PROFILE_H264_EXTENDED || + sps->profile_idc == FF_PROFILE_H264_CAVLC_444); + if (!enc->coder && pps->entropy_coding_mode_flag) + pps->entropy_coding_mode_flag = 0; + + pps->num_ref_idx_l0_default_active_minus1 = 0; + pps->num_ref_idx_l1_default_active_minus1 = 0; + + pps->pic_init_qp_minus26 = 0; // TODO - fix, I have no idea + + if (sps->profile_idc == FF_PROFILE_H264_BASELINE || + sps->profile_idc == FF_PROFILE_H264_EXTENDED || + sps->profile_idc == FF_PROFILE_H264_MAIN) { + pps->more_rbsp_data = 0; + } else { + pps->more_rbsp_data = 1; + + pps->transform_8x8_mode_flag = 1; + } + + *vksps_scaling = (StdVideoH264ScalingLists) { + .scaling_list_present_mask = sps->seq_scaling_matrix_present_flag, + .use_default_scaling_matrix_mask = 1, + }; + + *vksps_vui_header = (StdVideoH264HrdParameters) { + .cpb_cnt_minus1 = hrd->cpb_cnt_minus1, + .bit_rate_scale = hrd->bit_rate_scale, + .initial_cpb_removal_delay_length_minus1 = hrd->initial_cpb_removal_delay_length_minus1, + .cpb_removal_delay_length_minus1 = hrd->cpb_removal_delay_length_minus1, + .dpb_output_delay_length_minus1 = hrd->dpb_output_delay_length_minus1, + .time_offset_length = hrd->time_offset_length, + }; + + for (int i = 0; i < H264_MAX_CPB_CNT; i++) { + vksps_vui_header->bit_rate_value_minus1[i] = hrd->bit_rate_value_minus1[i]; + vksps_vui_header->cpb_size_value_minus1[i] = hrd->cpb_size_value_minus1[i]; + vksps_vui_header->cbr_flag[i] = hrd->cbr_flag[i]; + } + + *vksps_vui = (StdVideoH264SequenceParameterSetVui) { + .aspect_ratio_idc = sps->vui.aspect_ratio_idc, + .sar_width = sps->vui.sar_width, + .sar_height = sps->vui.sar_height, + .video_format = sps->vui.video_format, + .colour_primaries = sps->vui.colour_primaries, + .transfer_characteristics = sps->vui.transfer_characteristics, + .matrix_coefficients = sps->vui.matrix_coefficients, + .num_units_in_tick = sps->vui.num_units_in_tick, + .time_scale = sps->vui.time_scale, + .pHrdParameters = vksps_vui_header, + .max_num_reorder_frames = sps->vui.max_num_reorder_frames, + .max_dec_frame_buffering = sps->vui.max_dec_frame_buffering, + .flags = (StdVideoH264SpsVuiFlags) { + .aspect_ratio_info_present_flag = sps->vui.aspect_ratio_info_present_flag, + .overscan_info_present_flag = sps->vui.overscan_info_present_flag, + .overscan_appropriate_flag = sps->vui.overscan_appropriate_flag, + .video_signal_type_present_flag = sps->vui.video_signal_type_present_flag, + .video_full_range_flag = sps->vui.video_full_range_flag, + .color_description_present_flag = sps->vui.colour_description_present_flag, + .chroma_loc_info_present_flag = sps->vui.chroma_loc_info_present_flag, + .timing_info_present_flag = sps->vui.timing_info_present_flag, + .fixed_frame_rate_flag = sps->vui.fixed_frame_rate_flag, + .bitstream_restriction_flag = sps->vui.bitstream_restriction_flag, + .nal_hrd_parameters_present_flag = sps->vui.nal_hrd_parameters_present_flag, + .vcl_hrd_parameters_present_flag = sps->vui.vcl_hrd_parameters_present_flag, + }, + }; + + *vksps = (StdVideoH264SequenceParameterSet) { + .profile_idc = sps->profile_idc, + .level_idc = sps->level_idc, + .seq_parameter_set_id = sps->seq_parameter_set_id, + .chroma_format_idc = sps->chroma_format_idc, + .bit_depth_luma_minus8 = sps->bit_depth_luma_minus8, + .bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8, + .log2_max_frame_num_minus4 = sps->log2_max_frame_num_minus4, + .pic_order_cnt_type = sps->pic_order_cnt_type, + .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4, + .offset_for_non_ref_pic = sps->offset_for_non_ref_pic, + .offset_for_top_to_bottom_field = sps->offset_for_top_to_bottom_field, + .num_ref_frames_in_pic_order_cnt_cycle = sps->num_ref_frames_in_pic_order_cnt_cycle, + .max_num_ref_frames = sps->max_num_ref_frames, + .pic_width_in_mbs_minus1 = sps->pic_width_in_mbs_minus1, + .pic_height_in_map_units_minus1 = sps->pic_height_in_map_units_minus1, + .frame_crop_left_offset = sps->frame_crop_left_offset, + .frame_crop_right_offset = sps->frame_crop_right_offset, + .frame_crop_top_offset = sps->frame_crop_top_offset, + .frame_crop_bottom_offset = sps->frame_crop_bottom_offset, + .flags = (StdVideoH264SpsFlags) { + .constraint_set0_flag = sps->constraint_set0_flag, + .constraint_set1_flag = sps->constraint_set1_flag, + .constraint_set2_flag = sps->constraint_set2_flag, + .constraint_set3_flag = sps->constraint_set3_flag, + .constraint_set4_flag = sps->constraint_set4_flag, + .constraint_set5_flag = sps->constraint_set5_flag, + .direct_8x8_inference_flag = sps->direct_8x8_inference_flag, + .mb_adaptive_frame_field_flag = sps->mb_adaptive_frame_field_flag, + .frame_mbs_only_flag = sps->frame_mbs_only_flag, + .delta_pic_order_always_zero_flag = sps->delta_pic_order_always_zero_flag, + .separate_colour_plane_flag = sps->separate_colour_plane_flag, + .gaps_in_frame_num_value_allowed_flag = sps->gaps_in_frame_num_allowed_flag, + .qpprime_y_zero_transform_bypass_flag = sps->qpprime_y_zero_transform_bypass_flag, + .frame_cropping_flag = sps->frame_cropping_flag, + .seq_scaling_matrix_present_flag = sps->seq_scaling_matrix_present_flag, + .vui_parameters_present_flag = sps->vui_parameters_present_flag, + }, + .pOffsetForRefFrame = sps->offset_for_ref_frame, + .pSequenceParameterSetVui = vksps_vui, + }; + + *vkpps_scaling = (StdVideoH264ScalingLists) { + .scaling_list_present_mask = pps->pic_scaling_matrix_present_flag, + .use_default_scaling_matrix_mask = 1, + }; + + *vkpps = (StdVideoH264PictureParameterSet) { + .seq_parameter_set_id = pps->seq_parameter_set_id, + .pic_parameter_set_id = pps->pic_parameter_set_id, + .num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active_minus1, + .num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active_minus1, + .weighted_bipred_idc = pps->weighted_bipred_idc, + .pic_init_qp_minus26 = pps->pic_init_qp_minus26, + .pic_init_qs_minus26 = pps->pic_init_qs_minus26, + .chroma_qp_index_offset = pps->chroma_qp_index_offset, + .second_chroma_qp_index_offset = pps->second_chroma_qp_index_offset, + .flags = (StdVideoH264PpsFlags) { + .transform_8x8_mode_flag = pps->transform_8x8_mode_flag, + .redundant_pic_cnt_present_flag = pps->redundant_pic_cnt_present_flag, + .constrained_intra_pred_flag = pps->constrained_intra_pred_flag, + .deblocking_filter_control_present_flag = pps->deblocking_filter_control_present_flag, + .weighted_pred_flag = pps->weighted_pred_flag, + .bottom_field_pic_order_in_frame_present_flag = pps->bottom_field_pic_order_in_frame_present_flag, + .entropy_coding_mode_flag = pps->entropy_coding_mode_flag, + .pic_scaling_matrix_present_flag = pps->pic_scaling_matrix_present_flag, + }, + }; + + return 0; +} + +static int vulkan_encode_h264_add_nal(AVCodecContext *avctx, + CodedBitstreamFragment *au, + void *nal_unit) +{ + H264RawNALUnitHeader *header = nal_unit; + + int err = ff_cbs_insert_unit_content(au, -1, + header->nal_unit_type, nal_unit, NULL); + if (err < 0) + av_log(avctx, AV_LOG_ERROR, "Failed to add NAL unit: " + "type = %d.\n", header->nal_unit_type); + + return err; +} + +static int vulkan_encode_h264_write_access_unit(AVCodecContext *avctx, + uint8_t *data, size_t *data_len, + CodedBitstreamFragment *au) +{ + VulkanEncodeH264Context *enc = avctx->priv_data; + + int err = ff_cbs_write_fragment_data(enc->cbc, au); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Failed to write packed header.\n"); + return err; + } + + if (*data_len < au->data_size) { + av_log(avctx, AV_LOG_ERROR, "Access unit too large: %zu < %zu.\n", + *data_len, au->data_size); + return AVERROR(ENOSPC); + } + + memcpy(data, au->data, au->data_size); + *data_len = au->data_size; + + return 0; +} + +static int vulkan_encode_h264_write_sequence_header(AVCodecContext *avctx, + uint8_t *data, size_t *data_len) +{ + int err; + VulkanEncodeH264Context *enc = avctx->priv_data; + CodedBitstreamFragment *au = &enc->current_access_unit; + + if (enc->write_units & UNIT_AUD) { + err = vulkan_encode_h264_add_nal(avctx, au, &enc->raw_aud); + if (err < 0) + goto fail; + } + + err = vulkan_encode_h264_add_nal(avctx, au, &enc->raw_sps); + if (err < 0) + goto fail; + + err = vulkan_encode_h264_add_nal(avctx, au, &enc->raw_pps); + if (err < 0) + goto fail; + + err = vulkan_encode_h264_write_access_unit(avctx, data, data_len, au); +fail: + ff_cbs_fragment_reset(au); + return err; +} + +static int vulkan_encode_h264_write_filler(AVCodecContext *avctx, uint32_t filler, + uint8_t *data, size_t *data_len) +{ + int err; + VulkanEncodeH264Context *enc = avctx->priv_data; + CodedBitstreamFragment *au = &enc->current_access_unit; + + H264RawFiller raw_filler = { + .nal_unit_header = { + .nal_unit_type = H264_NAL_FILLER_DATA, + }, + .filler_size = filler, + }; + + err = vulkan_encode_h264_add_nal(avctx, au, &raw_filler); + if (err < 0) + goto fail; + + err = vulkan_encode_h264_write_access_unit(avctx, data, data_len, au); +fail: + ff_cbs_fragment_reset(au); + return err; +} + +static av_cold int vulkan_encode_h264_create_session(AVCodecContext *avctx) +{ + VkResult ret; + VulkanEncodeH264Context *enc = avctx->priv_data; + FFVulkanFunctions *vk = &enc->vkenc.s.vkfn; + + VkVideoEncodeH264SessionParametersAddInfoEXT h264_params_info; + VkVideoEncodeH264SessionParametersCreateInfoEXT h264_params; + VkVideoSessionParametersCreateInfoKHR session_params_create; + + h264_params_info = (VkVideoEncodeH264SessionParametersAddInfoEXT) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_SESSION_PARAMETERS_ADD_INFO_EXT, + .pStdSPSs = &enc->vksps, + .stdSPSCount = 1, + .pStdPPSs = &enc->vkpps, + .stdPPSCount = 1, + }; + h264_params = (VkVideoEncodeH264SessionParametersCreateInfoEXT) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_SESSION_PARAMETERS_CREATE_INFO_EXT, + .maxStdSPSCount = 1, + .maxStdPPSCount = 1, + .pParametersAddInfo = &h264_params_info, + }; + session_params_create = (VkVideoSessionParametersCreateInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pNext = &h264_params, + .videoSession = enc->vkenc.common.session, + .videoSessionParametersTemplate = NULL, + }; + + /* Create session parameters */ + ret = vk->CreateVideoSessionParametersKHR(enc->vkenc.s.hwctx->act_dev, &session_params_create, + enc->vkenc.s.hwctx->alloc, &enc->vkenc.session_params); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to create Vulkan video session parameters: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + return 0; +} + +static int vulkan_encode_h264_init_pic_headers(AVCodecContext *avctx, + FFVulkanEncodePicture *pic) +{ + VulkanEncodeH264Context *enc = avctx->priv_data; + VulkanEncodeH264Picture *hpic = pic->priv_data; + FFVulkanEncodePicture *prev = pic->prev; + VulkanEncodeH264Picture *hprev = prev ? prev->priv_data : NULL; + + int qp = pic->qp; + int cpb_delay; + int dpb_delay; + int primary_pic_type; + int slice_type; + + if (pic->type == FF_VK_FRAME_KEY) { + av_assert0(pic->display_order == pic->encode_order); + + hpic->frame_num = 0; + hpic->last_idr_frame = pic->display_order; + hpic->idr_pic_id = hprev ? hprev->idr_pic_id + 1 : 0; + + primary_pic_type = 0; + slice_type = 7; // SPEC: add slice types above 5 + } else { + av_assert0(prev); + + hpic->frame_num = hprev->frame_num + prev->is_reference; + hpic->last_idr_frame = hprev->last_idr_frame; + hpic->idr_pic_id = hprev->idr_pic_id; + + /* SPEC: missing StdVideoH264PictureType entries */ + if (pic->type == FF_VK_FRAME_I) { + slice_type = 7; + primary_pic_type = 0; + } else if (pic->type == FF_VK_FRAME_P) { + slice_type = 5; + primary_pic_type = 1; + } else { + slice_type = 6; + primary_pic_type = 2; + } + } + + hpic->pic_order_cnt = pic->display_order - hpic->last_idr_frame; + if (enc->raw_sps.pic_order_cnt_type == 2) + hpic->pic_order_cnt *= 2; + + dpb_delay = pic->display_order - pic->encode_order + enc->max_b_depth; + cpb_delay = pic->encode_order - hpic->last_idr_frame; + + enc->write_units = 0x0; + + if (pic->display_order == 0 && enc->insert_units & UNIT_IDENTIFIER) + enc->write_units |= UNIT_IDENTIFIER; + + if (enc->insert_units & UNIT_AUD) { + enc->raw_aud = (H264RawAUD) { + .nal_unit_header = { + .nal_unit_type = H264_NAL_AUD, + }, + .primary_pic_type = primary_pic_type, + }; + enc->write_units |= UNIT_AUD; + } + if (enc->insert_units & UNIT_TIMING) { + enc->sei_pic_timing = (H264RawSEIPicTiming) { + .cpb_removal_delay = 2 * cpb_delay, + .dpb_output_delay = 2 * dpb_delay, + }; + enc->write_units |= UNIT_TIMING; + } + if (enc->insert_units & UNIT_RECOVERY && pic->type == FF_VK_FRAME_I) { + enc->sei_recovery_point = (H264RawSEIRecoveryPoint) { + .recovery_frame_cnt = 0, + .exact_match_flag = 1, + .broken_link_flag = enc->b_per_p > 0, + }; + enc->write_units |= UNIT_RECOVERY; + } + + hpic->slice_wt = (StdVideoEncodeH264WeightTable) { + .flags = (StdVideoEncodeH264WeightTableFlags) { + .luma_weight_l0_flag = 0, + .chroma_weight_l0_flag = 0, + .luma_weight_l1_flag = 0, + .chroma_weight_l1_flag = 0, + }, + .luma_log2_weight_denom = 0, + .chroma_log2_weight_denom = 0, + .luma_weight_l0 = { 0 }, + .luma_offset_l0 = { 0 }, + .chroma_weight_l0 = { { 0 } }, + .chroma_offset_l0 = { { 0 } }, + .luma_weight_l1 = { 0 }, + .luma_offset_l1 = { 0 }, + .chroma_weight_l1 = { { 0 } }, + .chroma_offset_l1 = { { 0 } }, + }; + + hpic->slice_hdr = (StdVideoEncodeH264SliceHeader) { + .flags = (StdVideoEncodeH264SliceHeaderFlags) { + .direct_spatial_mv_pred_flag = 0, + .num_ref_idx_active_override_flag = 0, + .no_output_of_prior_pics_flag = 0, + .adaptive_ref_pic_marking_mode_flag = 0, + .no_prior_references_available_flag = 0, + }, + .first_mb_in_slice = 0, + .slice_type = slice_type, + .idr_pic_id = hpic->idr_pic_id, + .num_ref_idx_l0_active_minus1 = 0, + .num_ref_idx_l1_active_minus1 = 0, + .cabac_init_idc = 0, + .disable_deblocking_filter_idc = 1, + .slice_alpha_c0_offset_div2 = 0, + .slice_beta_offset_div2 = 0, + .pWeightTable = &hpic->slice_wt, + }; + + hpic->vkslice = (VkVideoEncodeH264NaluSliceInfoEXT) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_NALU_SLICE_INFO_EXT, + .pNext = NULL, + .mbCount = enc->mb_width * enc->mb_height, + .pStdReferenceFinalLists = NULL, + .pStdSliceHeader = &hpic->slice_hdr, + }; + + hpic->h264pic_info = (StdVideoEncodeH264PictureInfo) { + .flags = (StdVideoEncodeH264PictureInfoFlags) { + .idr_flag = pic->type == FF_VK_FRAME_KEY, + .is_reference_flag = pic->is_reference, + .used_for_long_term_reference = 0, + }, + .seq_parameter_set_id = enc->raw_sps.seq_parameter_set_id, + .pic_parameter_set_id = enc->raw_pps.pic_parameter_set_id, + .pictureType = pic->type == FF_VK_FRAME_P ? STD_VIDEO_H264_PICTURE_TYPE_P : + pic->type == FF_VK_FRAME_B ? STD_VIDEO_H264_PICTURE_TYPE_B : + pic->type == FF_VK_FRAME_I ? STD_VIDEO_H264_PICTURE_TYPE_I : + STD_VIDEO_H264_PICTURE_TYPE_IDR, + .frame_num = hpic->frame_num, + .PicOrderCnt = hpic->pic_order_cnt, + }; + +#if 0 + hpic->ref_list_info = (StdVideoEncodeH264ReferenceListsInfo) { + .flags = (StdVideoEncodeH264ReferenceListsInfoFlags) { + .ref_pic_list_modification_flag_l0 = 0, + .ref_pic_list_modification_flag_l1 = 0, + }, + .pRefList0ModOperations = hpic->l0mods, + .refList0ModOpCount = 0, + .pRefList1ModOperations = hpic->l1mods, + .refList1ModOpCount = 0, + .pRefPicMarkingOperations = hpic->marks, + .refPicMarkingOpCount = 0, + }; + + for (int i = 0; i < pic->nb_refs; i++) { + FFVulkanEncodePicture *ref = pic->refs[i]; + VulkanEncodeH264Picture *href = ref->priv_data; + + hpic->l0ref_info[0] = (StdVideoEncodeH264ReferenceInfo) { + .flags = (StdVideoEncodeH264ReferenceInfoFlags) { + .used_for_long_term_reference = 0, + }, + .FrameNum = href->frame_num, + .PicOrderCnt = href->pic_order_cnt, + .long_term_pic_num = 0, + .long_term_frame_idx = 0, + }; + + hpic->l0refs[i] = (VkVideoEncodeH264DpbSlotInfoEXT) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_DPB_SLOT_INFO_EXT, + .pStdReferenceInfo = &hpic->l0ref_info[i], + }; + } + + hpic->final_list = (StdVideoEncodeH264ReferenceListsInfo) { + .pReferenceList0Entries = hpic->l0refs, + .referenceList0EntryCount = pic->nb_refs, + .pReferenceList1Entries = hpic->l1refs, + .pReferenceList1Entries = 0, + .pMemMgmtCtrlOperations = &hpic->mem_mgmt, + }; +#endif + + hpic->vkh264pic_info = (VkVideoEncodeH264VclFrameInfoEXT) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_VCL_FRAME_INFO_EXT, + .pNext = NULL, + .pStdReferenceFinalLists = &hpic->final_list, + .naluSliceEntryCount = 1, + .pNaluSliceEntries = &hpic->vkslice, + .pStdPictureInfo = &hpic->h264pic_info, + }; + + hpic->vkrc_layer_info = (VkVideoEncodeH264RateControlLayerInfoEXT) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_RATE_CONTROL_LAYER_INFO_EXT, + .minQp = (VkVideoEncodeH264QpEXT){ qp, qp, qp }, + .maxQp = (VkVideoEncodeH264QpEXT){ qp, qp, qp }, + .useMinQp = 1, + .useMaxQp = 1, + }; + + pic->codec_info = &hpic->vkh264pic_info; + pic->codec_rc_layer = &hpic->vkrc_layer_info; + + return 0; +} + +static int vulkan_encode_h264_write_extra_headers(AVCodecContext *avctx, + FFVulkanEncodePicture *pic, + uint8_t *data, size_t *data_len) +{ + int err; + VulkanEncodeH264Context *enc = avctx->priv_data; + CodedBitstreamFragment *au = &enc->current_access_unit; + + if (enc->write_units) { + if (enc->write_units & UNIT_AUD) { + err = vulkan_encode_h264_add_nal(avctx, au, &enc->raw_aud); + if (err < 0) + goto fail; + } + + if (enc->write_units & UNIT_IDENTIFIER) { + err = ff_cbs_sei_add_message(enc->cbc, au, 1, + SEI_TYPE_USER_DATA_UNREGISTERED, + &enc->sei_identifier, NULL); + if (err < 0) + goto fail; + } + if (enc->write_units & UNIT_TIMING) { + if (pic->type == FF_VK_FRAME_KEY) { + err = ff_cbs_sei_add_message(enc->cbc, au, 1, + SEI_TYPE_BUFFERING_PERIOD, + &enc->sei_buffering_period, NULL); + if (err < 0) + goto fail; + } + err = ff_cbs_sei_add_message(enc->cbc, au, 1, + SEI_TYPE_PIC_TIMING, + &enc->sei_pic_timing, NULL); + if (err < 0) + goto fail; + } + if (enc->write_units & UNIT_RECOVERY) { + err = ff_cbs_sei_add_message(enc->cbc, au, 1, + SEI_TYPE_RECOVERY_POINT, + &enc->sei_recovery_point, NULL); + if (err < 0) + goto fail; + } + + err = vulkan_encode_h264_write_access_unit(avctx, data, data_len, au); + if (err < 0) + goto fail; + + ff_cbs_fragment_reset(au); + + return 0; + } + +fail: + ff_cbs_fragment_reset(au); + return err; +} + +static const FFVulkanEncoder encoder = { + .pic_priv_data_size = sizeof(VulkanEncodeH264Picture), + .write_stream_headers = vulkan_encode_h264_write_sequence_header, + .init_pic_headers = vulkan_encode_h264_init_pic_headers, + .write_filler = vulkan_encode_h264_write_filler, + .filler_header_size = 6, + .write_extra_headers = vulkan_encode_h264_write_extra_headers, +}; + +static av_cold int vulkan_encode_h264_init(AVCodecContext *avctx) +{ + int err; + VulkanEncodeH264Context *enc = avctx->priv_data; + + enc->profile = (VkVideoEncodeH264ProfileInfoEXT) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_PROFILE_INFO_EXT, + .stdProfileIdc = enc->vkenc.opts.profile, + }; + + enc->caps = (VkVideoEncodeH264CapabilitiesEXT) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_CAPABILITIES_EXT, + }; + + err = ff_cbs_init(&enc->cbc, AV_CODEC_ID_H264, avctx); + if (err < 0) + return err; + + enc->mb_width = FFALIGN(avctx->width, 16) / 16; + enc->mb_height = FFALIGN(avctx->height, 16) / 16; + + enc->bit_rate = avctx->bit_rate; + enc->gop_size = 3; /* avctx->gop_size; */ + enc->b_per_p = 0; /* avctx->max_b_frames; */ + enc->max_b_depth = 0; /* FFMIN(enc->desired_b_depth, + av_log2(enc->b_per_p) + 1); */ + + enc->vkenc.gop_size = enc->gop_size; + enc->vkenc.bitrate =enc->bit_rate; + + err = ff_vulkan_encode_init(avctx, &enc->vkenc, &enc->profile, &enc->caps, + &encoder, enc->b_per_p, enc->max_b_depth); + if (err < 0) + return err; + + av_log(avctx, AV_LOG_VERBOSE, "H264 encoder capabilities:\n"); + av_log(avctx, AV_LOG_VERBOSE, " Capability flags:\n"); + av_log(avctx, AV_LOG_VERBOSE, " 8x8_inference:%s%s\n", + enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_DIRECT_8X8_INFERENCE_ENABLED_BIT_EXT ? + " enabling" : "", + enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_DIRECT_8X8_INFERENCE_DISABLED_BIT_EXT ? + " disabling" : ""); + av_log(avctx, AV_LOG_VERBOSE, " separate_color_plane: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_SEPARATE_COLOUR_PLANE_BIT_EXT)); + av_log(avctx, AV_LOG_VERBOSE, " qprime_y_zero_transform_bypass: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_QPPRIME_Y_ZERO_TRANSFORM_BYPASS_BIT_EXT)); + av_log(avctx, AV_LOG_VERBOSE, " scaling_lists: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_SCALING_LISTS_BIT_EXT)); + av_log(avctx, AV_LOG_VERBOSE, " hrd_compliance: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_HRD_COMPLIANCE_BIT_EXT)); + av_log(avctx, AV_LOG_VERBOSE, " chroma_qp_offset: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_CHROMA_QP_OFFSET_BIT_EXT)); + av_log(avctx, AV_LOG_VERBOSE, " second_chroma_qp_offset: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_SECOND_CHROMA_QP_OFFSET_BIT_EXT)); + av_log(avctx, AV_LOG_VERBOSE, " pic_init_qp: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_PIC_INIT_QP_MINUS26_BIT_EXT)); + av_log(avctx, AV_LOG_VERBOSE, " weighted:%s%s%s%s\n", + enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_WEIGHTED_PRED_BIT_EXT ? + " pred" : "", + enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_WEIGHTED_BIPRED_EXPLICIT_BIT_EXT ? + " bipred_explicit" : "", + enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_WEIGHTED_BIPRED_IMPLICIT_BIT_EXT ? + " bipred_implicit" : "", + enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_WEIGHTED_PRED_NO_TABLE_BIT_EXT ? + " pred_no_table" : ""); + av_log(avctx, AV_LOG_VERBOSE, " 8x8_transforms: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_TRANSFORM_8X8_BIT_EXT)); + av_log(avctx, AV_LOG_VERBOSE, " coder:%s%s\n", + enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_CABAC_BIT_EXT ? + " cabac" : "", + enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_CAVLC_BIT_EXT ? + " cavlc" : ""); + av_log(avctx, AV_LOG_VERBOSE, " deblock:%s%s%s\n", + enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_DEBLOCKING_FILTER_DISABLED_BIT_EXT ? + " filter_disabling" : "", + enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_DEBLOCKING_FILTER_ENABLED_BIT_EXT ? + " filter_enabling" : "", + enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_DEBLOCKING_FILTER_PARTIAL_BIT_EXT ? + " filter_partial" : ""); + av_log(avctx, AV_LOG_VERBOSE, " disable_direct_spatial_mv_pred: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_DISABLE_DIRECT_SPATIAL_MV_PRED_BIT_EXT)); + av_log(avctx, AV_LOG_VERBOSE, " multiple_slices: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_MULTIPLE_SLICE_PER_FRAME_BIT_EXT)); + av_log(avctx, AV_LOG_VERBOSE, " slice_mb_count: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_SLICE_MB_COUNT_BIT_EXT)); + av_log(avctx, AV_LOG_VERBOSE, " row_unaligned_slice: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_ROW_UNALIGNED_SLICE_BIT_EXT)); + av_log(avctx, AV_LOG_VERBOSE, " different_slice_type: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_DIFFERENT_SLICE_TYPE_BIT_EXT)); + av_log(avctx, AV_LOG_VERBOSE, " b_frame_in_l1_list: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_B_FRAME_IN_L1_LIST_BIT_EXT)); + + av_log(avctx, AV_LOG_VERBOSE, " L0_refs: %i P's; %i B's\n", + enc->caps.maxPPictureL0ReferenceCount, + enc->caps.maxBPictureL0ReferenceCount); + + av_log(avctx, AV_LOG_VERBOSE, " L1_refs: %i\n", + enc->caps.maxL1ReferenceCount); + + av_log(avctx, AV_LOG_VERBOSE, " bitstream_restriction:\n"); + av_log(avctx, AV_LOG_VERBOSE, " mvs_over_pic_boundaries: %i\n", + enc->caps.motionVectorsOverPicBoundariesFlag); + av_log(avctx, AV_LOG_VERBOSE, " max_bytes_per_pic_denom: %i\n", + enc->caps.maxBytesPerPicDenom); + av_log(avctx, AV_LOG_VERBOSE, " max_bits_per_mb_denom: %i\n", + enc->caps.maxBitsPerMbDenom); + av_log(avctx, AV_LOG_VERBOSE, " log2_max_mv_length_hor: %i\n", + enc->caps.log2MaxMvLengthHorizontal); + av_log(avctx, AV_LOG_VERBOSE, " log2_max_mv_length_ver: %i\n", + enc->caps.log2MaxMvLengthVertical); + + if (enc->insert_units & UNIT_IDENTIFIER) { + int len; + + memcpy(enc->sei_identifier.uuid_iso_iec_11578, + vulkan_encode_h264_sei_identifier_uuid, + sizeof(enc->sei_identifier.uuid_iso_iec_11578)); + + len = snprintf(NULL, 0, + "%s / Vulkan video %i.%i.%i / %s %i.%i.%i / %s", + LIBAVCODEC_IDENT, + CODEC_VER(ff_vk_enc_ext[avctx->codec_id].specVersion), + enc->vkenc.s.driver_props.driverName, + CODEC_VER(enc->vkenc.s.props.properties.driverVersion), + enc->vkenc.s.props.properties.deviceName); + + if (len >= 0) { + enc->sei_identifier_string = av_malloc(len + 1); + if (!enc->sei_identifier_string) + return AVERROR(ENOMEM); + + len = snprintf(enc->sei_identifier_string, len + 1, + "%s / Vulkan video %i.%i.%i / %s %i.%i.%i / %s", + LIBAVCODEC_IDENT, + CODEC_VER(ff_vk_enc_ext[avctx->codec_id].specVersion), + enc->vkenc.s.driver_props.driverName, + CODEC_VER(enc->vkenc.s.props.properties.driverVersion), + enc->vkenc.s.props.properties.deviceName); + + enc->sei_identifier.data = enc->sei_identifier_string; + enc->sei_identifier.data_length = len + 1; + } + } + + err = vulkan_encode_h264_init_seq_params(avctx); + if (err < 0) + return err; + + err = vulkan_encode_h264_create_session(avctx); + if (err < 0) + return err; + + if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) { + uint8_t data[4096]; + size_t data_len = sizeof(data); + + err = vulkan_encode_h264_write_sequence_header(avctx, data, &data_len); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Failed to write sequence header " + "for extradata: %d.\n", err); + return err; + } else { + avctx->extradata_size = data_len; + avctx->extradata = av_mallocz(avctx->extradata_size + + AV_INPUT_BUFFER_PADDING_SIZE); + if (!avctx->extradata) { + err = AVERROR(ENOMEM); + return err; + } + memcpy(avctx->extradata, data, avctx->extradata_size); + } + } + + return 0; +} + +static av_cold int vulkan_encode_h264_close(AVCodecContext *avctx) +{ + VulkanEncodeH264Context *enc = avctx->priv_data; + ff_vulkan_encode_uninit(&enc->vkenc); + return 0; +} + +static int video_encode_h264_receive_packet(AVCodecContext *avctx, AVPacket *pkt) +{ + VulkanEncodeH264Context *enc = avctx->priv_data; + return ff_vulkan_encode_receive_packet(avctx, &enc->vkenc, pkt); +} + +static void vulkan_encode_h264_flush(AVCodecContext *avctx) +{ + +} + +#define OFFSET(x) offsetof(VulkanEncodeH264Context, x) +#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM) +static const AVOption vulkan_encode_h264_options[] = { + { "profile", "Select profile", OFFSET(vkenc.opts.profile), AV_OPT_TYPE_INT, { .i64 = FF_PROFILE_H264_MAIN }, 0, INT_MAX, FLAGS, "profile" }, + { "baseline", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_H264_BASELINE }, INT_MIN, INT_MAX, FLAGS, "profile" }, + { "main", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_H264_MAIN }, INT_MIN, INT_MAX, FLAGS, "profile" }, + { "high", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_H264_HIGH }, INT_MIN, INT_MAX, FLAGS, "profile" }, + { "high444p", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_H264_HIGH_444_PREDICTIVE }, INT_MIN, INT_MAX, FLAGS, "profile" }, + + { "b_depth", "Maximum B-frame reference depth", OFFSET(desired_b_depth), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, INT_MAX, FLAGS }, + + { "coder", "Entropy coder type", OFFSET(coder), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, FLAGS, "coder" }, + { "cabac", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, INT_MIN, INT_MAX, FLAGS, "coder" }, + { "vlc", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, INT_MIN, INT_MAX, FLAGS, "coder" }, + + { "units", "Set units to include", OFFSET(insert_units), AV_OPT_TYPE_FLAGS, { .i64 = UNIT_IDENTIFIER | UNIT_AUD | UNIT_RECOVERY }, 0, INT_MAX, FLAGS, "units" }, + { "identifier", "Include encoder version identifier", 0, AV_OPT_TYPE_CONST, { .i64 = UNIT_IDENTIFIER }, INT_MIN, INT_MAX, FLAGS, "units" }, + { "aud", "Include AUD units", 0, AV_OPT_TYPE_CONST, { .i64 = UNIT_AUD }, INT_MIN, INT_MAX, FLAGS, "units" }, + { "timing", "Include timing parameters (buffering_period and pic_timing)", 0, AV_OPT_TYPE_CONST, { .i64 = UNIT_TIMING }, INT_MIN, INT_MAX, FLAGS, "units" }, + { "recovery", "Include recovery points where appropriate", 0, AV_OPT_TYPE_CONST, { .i64 = UNIT_RECOVERY }, INT_MIN, INT_MAX, FLAGS, "units" }, + + FF_VK_ENCODE_COMMON_OPTS + + { NULL }, +}; + +static const FFCodecDefault vulkan_encode_h264_defaults[] = { + { "b", "0" }, + { "g", "120" }, + { NULL }, +}; + +static const AVClass vulkan_encode_h264_class = { + .class_name = "h264_vulkan", + .item_name = av_default_item_name, + .option = vulkan_encode_h264_options, + .version = LIBAVUTIL_VERSION_INT, +}; + +const FFCodec ff_h264_vulkan_encoder = { + .p.name = "h264_vulkan", + CODEC_LONG_NAME("H.264/AVC (Vulkan)"), + .p.type = AVMEDIA_TYPE_VIDEO, + .p.id = AV_CODEC_ID_H264, + .priv_data_size = sizeof(VulkanEncodeH264Context), + .init = &vulkan_encode_h264_init, + FF_CODEC_RECEIVE_PACKET_CB(&video_encode_h264_receive_packet), + .flush = &vulkan_encode_h264_flush, + .close = &vulkan_encode_h264_close, + .p.priv_class = &vulkan_encode_h264_class, + .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE | + AV_CODEC_CAP_DR1 | + AV_CODEC_CAP_ENCODER_FLUSH /* | AV_CODEC_CAP_EXPERIMENTAL */, + .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, + .defaults = vulkan_encode_h264_defaults, + .p.pix_fmts = (const enum AVPixelFormat[]) { + AV_PIX_FMT_VULKAN, + AV_PIX_FMT_NONE, + }, + .hw_configs = ff_vulkan_encode_hw_configs, + .p.wrapper_name = "vulkan", +}; diff --git a/libavcodec/vulkan_h264.c b/libavcodec/vulkan_h264.c new file mode 100644 index 0000000000000..3cd89c504e0ca --- /dev/null +++ b/libavcodec/vulkan_h264.c @@ -0,0 +1,538 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "h264dec.h" +#include "h264_ps.h" + +#include "vulkan_decode.h" + +const VkExtensionProperties ff_vk_dec_h264_ext = { + .extensionName = VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME, + .specVersion = VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION, +}; + +typedef struct H264VulkanDecodePicture { + FFVulkanDecodePicture vp; + + /* Current picture */ + StdVideoDecodeH264ReferenceInfo h264_ref; + VkVideoDecodeH264DpbSlotInfoKHR vkh264_ref; + + /* Picture refs */ + H264Picture *ref_src [H264_MAX_PICTURE_COUNT]; + StdVideoDecodeH264ReferenceInfo h264_refs [H264_MAX_PICTURE_COUNT]; + VkVideoDecodeH264DpbSlotInfoKHR vkh264_refs[H264_MAX_PICTURE_COUNT]; + + /* Current picture (contd.) */ + StdVideoDecodeH264PictureInfo h264pic; + VkVideoDecodeH264PictureInfoKHR h264_pic_info; +} H264VulkanDecodePicture; + +static int vk_h264_fill_pict(AVCodecContext *avctx, H264Picture **ref_src, + VkVideoReferenceSlotInfoKHR *ref_slot, /* Main structure */ + VkVideoPictureResourceInfoKHR *ref, /* Goes in ^ */ + VkVideoDecodeH264DpbSlotInfoKHR *vkh264_ref, /* Goes in ^ */ + StdVideoDecodeH264ReferenceInfo *h264_ref, /* Goes in ^ */ + H264Picture *pic, int is_current, + int is_field, int picture_structure, + int dpb_slot_index) +{ + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = (FFVulkanDecodeShared *)dec->shared_ref->data; + H264VulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vkpic = &hp->vp; + + int err = ff_vk_decode_prepare_frame(ctx, pic->f, vkpic, is_current, + ctx->dedicated_dpb); + if (err < 0) + return err; + + *h264_ref = (StdVideoDecodeH264ReferenceInfo) { + .FrameNum = pic->long_ref ? pic->pic_id : pic->frame_num, + .PicOrderCnt = { pic->field_poc[0], pic->field_poc[1] }, + .flags = (StdVideoDecodeH264ReferenceInfoFlags) { + .top_field_flag = is_field ? !!(picture_structure & PICT_TOP_FIELD) : 0, + .bottom_field_flag = is_field ? !!(picture_structure & PICT_BOTTOM_FIELD) : 0, + .used_for_long_term_reference = pic->reference && pic->long_ref, + .is_non_existing = 0, + }, + }; + + *vkh264_ref = (VkVideoDecodeH264DpbSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR, + .pStdReferenceInfo = h264_ref, + }; + + *ref = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .codedOffset = (VkOffset2D){ 0, 0 }, + .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height }, + .baseArrayLayer = ctx->layered_dpb ? dpb_slot_index : 0, + .imageViewBinding = vkpic->img_view_ref, + }; + + *ref_slot = (VkVideoReferenceSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR, + .pNext = vkh264_ref, + .slotIndex = dpb_slot_index, + .pPictureResource = ref, + }; + + if (ref_src) + *ref_src = pic; + + return 0; +} + +static void set_sps(const SPS *sps, + StdVideoH264ScalingLists *vksps_scaling, + StdVideoH264HrdParameters *vksps_vui_header, + StdVideoH264SequenceParameterSetVui *vksps_vui, + StdVideoH264SequenceParameterSet *vksps) +{ + *vksps_scaling = (StdVideoH264ScalingLists) { + .scaling_list_present_mask = sps->scaling_matrix_present_mask, + .use_default_scaling_matrix_mask = 0, /* We already fill in the default matrix */ + }; + + for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS; i++) + memcpy(vksps_scaling->ScalingList4x4[i], sps->scaling_matrix4[i], + STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**sps->scaling_matrix4)); + + for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_8X8_NUM_LISTS; i++) + memcpy(vksps_scaling->ScalingList8x8[i], sps->scaling_matrix8[i], + STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS * sizeof(**sps->scaling_matrix8)); + + *vksps_vui_header = (StdVideoH264HrdParameters) { + .cpb_cnt_minus1 = sps->cpb_cnt - 1, + .bit_rate_scale = sps->bit_rate_scale, + .initial_cpb_removal_delay_length_minus1 = sps->initial_cpb_removal_delay_length - 1, + .cpb_removal_delay_length_minus1 = sps->cpb_removal_delay_length - 1, + .dpb_output_delay_length_minus1 = sps->dpb_output_delay_length - 1, + .time_offset_length = sps->time_offset_length, + }; + + for (int i = 0; i < sps->cpb_cnt; i++) { + vksps_vui_header->bit_rate_value_minus1[i] = sps->bit_rate_value[i] - 1; + vksps_vui_header->cpb_size_value_minus1[i] = sps->cpb_size_value[i] - 1; + vksps_vui_header->cbr_flag[i] = (sps->cpr_flag >> i) & 0x1; + } + + *vksps_vui = (StdVideoH264SequenceParameterSetVui) { + .aspect_ratio_idc = sps->vui.aspect_ratio_idc, + .sar_width = sps->vui.sar.num, + .sar_height = sps->vui.sar.den, + .video_format = sps->vui.video_format, + .colour_primaries = sps->vui.colour_primaries, + .transfer_characteristics = sps->vui.transfer_characteristics, + .matrix_coefficients = sps->vui.matrix_coeffs, + .num_units_in_tick = sps->num_units_in_tick, + .time_scale = sps->time_scale, + .pHrdParameters = vksps_vui_header, + .max_num_reorder_frames = sps->num_reorder_frames, + .max_dec_frame_buffering = sps->max_dec_frame_buffering, + .flags = (StdVideoH264SpsVuiFlags) { + .aspect_ratio_info_present_flag = sps->vui.aspect_ratio_info_present_flag, + .overscan_info_present_flag = sps->vui.overscan_info_present_flag, + .overscan_appropriate_flag = sps->vui.overscan_appropriate_flag, + .video_signal_type_present_flag = sps->vui.video_signal_type_present_flag, + .video_full_range_flag = sps->vui.video_full_range_flag, + .color_description_present_flag = sps->vui.colour_description_present_flag, + .chroma_loc_info_present_flag = sps->vui.chroma_location, + .timing_info_present_flag = sps->timing_info_present_flag, + .fixed_frame_rate_flag = sps->fixed_frame_rate_flag, + .bitstream_restriction_flag = sps->bitstream_restriction_flag, + .nal_hrd_parameters_present_flag = sps->nal_hrd_parameters_present_flag, + .vcl_hrd_parameters_present_flag = sps->vcl_hrd_parameters_present_flag, + }, + }; + + *vksps = (StdVideoH264SequenceParameterSet) { + .profile_idc = sps->profile_idc, + .level_idc = sps->level_idc, + .seq_parameter_set_id = sps->sps_id, + .chroma_format_idc = sps->chroma_format_idc, + .bit_depth_luma_minus8 = sps->bit_depth_luma - 8, + .bit_depth_chroma_minus8 = sps->bit_depth_chroma - 8, + .log2_max_frame_num_minus4 = sps->log2_max_frame_num - 4, + .pic_order_cnt_type = sps->poc_type, + .log2_max_pic_order_cnt_lsb_minus4 = sps->poc_type ? 0 : sps->log2_max_poc_lsb - 4, + .offset_for_non_ref_pic = sps->offset_for_non_ref_pic, + .offset_for_top_to_bottom_field = sps->offset_for_top_to_bottom_field, + .num_ref_frames_in_pic_order_cnt_cycle = sps->poc_cycle_length, + .max_num_ref_frames = sps->ref_frame_count, + .pic_width_in_mbs_minus1 = sps->mb_width - 1, + .pic_height_in_map_units_minus1 = (sps->mb_height/(2 - sps->frame_mbs_only_flag)) - 1, + .frame_crop_left_offset = sps->crop_left, + .frame_crop_right_offset = sps->crop_right, + .frame_crop_top_offset = sps->crop_top, + .frame_crop_bottom_offset = sps->crop_bottom, + .flags = (StdVideoH264SpsFlags) { + .constraint_set0_flag = (sps->constraint_set_flags >> 0) & 0x1, + .constraint_set1_flag = (sps->constraint_set_flags >> 1) & 0x1, + .constraint_set2_flag = (sps->constraint_set_flags >> 2) & 0x1, + .constraint_set3_flag = (sps->constraint_set_flags >> 3) & 0x1, + .constraint_set4_flag = (sps->constraint_set_flags >> 4) & 0x1, + .constraint_set5_flag = (sps->constraint_set_flags >> 5) & 0x1, + .direct_8x8_inference_flag = sps->direct_8x8_inference_flag, + .mb_adaptive_frame_field_flag = sps->mb_aff, + .frame_mbs_only_flag = sps->frame_mbs_only_flag, + .delta_pic_order_always_zero_flag = sps->delta_pic_order_always_zero_flag, + .separate_colour_plane_flag = sps->residual_color_transform_flag, + .gaps_in_frame_num_value_allowed_flag = sps->gaps_in_frame_num_allowed_flag, + .qpprime_y_zero_transform_bypass_flag = sps->transform_bypass, + .frame_cropping_flag = sps->crop, + .seq_scaling_matrix_present_flag = sps->scaling_matrix_present, + .vui_parameters_present_flag = sps->vui_parameters_present_flag, + }, + .pOffsetForRefFrame = sps->offset_for_ref_frame, + .pScalingLists = vksps_scaling, + .pSequenceParameterSetVui = vksps_vui, + }; +} + +static void set_pps(const PPS *pps, const SPS *sps, + StdVideoH264ScalingLists *vkpps_scaling, + StdVideoH264PictureParameterSet *vkpps) +{ + *vkpps_scaling = (StdVideoH264ScalingLists) { + .scaling_list_present_mask = pps->pic_scaling_matrix_present_mask, + .use_default_scaling_matrix_mask = 0, /* We already fill in the default matrix */ + }; + + for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS; i++) + memcpy(vkpps_scaling->ScalingList4x4[i], pps->scaling_matrix4[i], + STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**pps->scaling_matrix4)); + + for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_8X8_NUM_LISTS; i++) + memcpy(vkpps_scaling->ScalingList8x8[i], pps->scaling_matrix8[i], + STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS * sizeof(**pps->scaling_matrix8)); + + *vkpps = (StdVideoH264PictureParameterSet) { + .seq_parameter_set_id = pps->sps_id, + .pic_parameter_set_id = pps->pps_id, + .num_ref_idx_l0_default_active_minus1 = pps->ref_count[0] - 1, + .num_ref_idx_l1_default_active_minus1 = pps->ref_count[1] - 1, + .weighted_bipred_idc = pps->weighted_bipred_idc, + .pic_init_qp_minus26 = pps->init_qp - 26, + .pic_init_qs_minus26 = pps->init_qs - 26, + .chroma_qp_index_offset = pps->chroma_qp_index_offset[0], + .second_chroma_qp_index_offset = pps->chroma_qp_index_offset[1], + .flags = (StdVideoH264PpsFlags) { + .transform_8x8_mode_flag = pps->transform_8x8_mode, + .redundant_pic_cnt_present_flag = pps->redundant_pic_cnt_present, + .constrained_intra_pred_flag = pps->constrained_intra_pred, + .deblocking_filter_control_present_flag = pps->deblocking_filter_parameters_present, + .weighted_pred_flag = pps->weighted_pred, + .bottom_field_pic_order_in_frame_present_flag = pps->pic_order_present, + .entropy_coding_mode_flag = pps->cabac, + .pic_scaling_matrix_present_flag = pps->pic_scaling_matrix_present_flag, + }, + .pScalingLists = vkpps_scaling, + }; +} + +static int vk_h264_create_params(AVCodecContext *avctx, AVBufferRef **buf) +{ + VkResult ret; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = (FFVulkanDecodeShared *)dec->shared_ref->data; + FFVulkanFunctions *vk = &ctx->s.vkfn; + const H264Context *h = avctx->priv_data; + + /* SPS */ + StdVideoH264ScalingLists vksps_scaling[MAX_SPS_COUNT]; + StdVideoH264HrdParameters vksps_vui_header[MAX_SPS_COUNT]; + StdVideoH264SequenceParameterSetVui vksps_vui[MAX_SPS_COUNT]; + StdVideoH264SequenceParameterSet vksps[MAX_SPS_COUNT]; + + /* PPS */ + StdVideoH264ScalingLists vkpps_scaling[MAX_PPS_COUNT]; + StdVideoH264PictureParameterSet vkpps[MAX_PPS_COUNT]; + + VkVideoDecodeH264SessionParametersAddInfoKHR h264_params_info = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_ADD_INFO_KHR, + .pStdSPSs = vksps, + .stdSPSCount = 0, + .pStdPPSs = vkpps, + .stdPPSCount = 0, + }; + VkVideoDecodeH264SessionParametersCreateInfoKHR h264_params = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pParametersAddInfo = &h264_params_info, + }; + VkVideoSessionParametersCreateInfoKHR session_params_create = { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pNext = &h264_params, + .videoSession = ctx->common.session, + .videoSessionParametersTemplate = NULL, + }; + + AVBufferRef *tmp; + VkVideoSessionParametersKHR *par = av_malloc(sizeof(*par)); + if (!par) + return AVERROR(ENOMEM); + + /* SPS list */ + for (int i = 0; i < FF_ARRAY_ELEMS(h->ps.sps_list); i++) { + if (h->ps.sps_list[i]) { + const SPS *sps_l = (const SPS *)h->ps.sps_list[i]->data; + int idx = h264_params_info.stdSPSCount; + set_sps(sps_l, &vksps_scaling[idx], &vksps_vui_header[idx], &vksps_vui[idx], &vksps[idx]); + h264_params_info.stdSPSCount++; + } + } + + /* PPS list */ + for (int i = 0; i < FF_ARRAY_ELEMS(h->ps.pps_list); i++) { + if (h->ps.pps_list[i]) { + const PPS *pps_l = (const PPS *)h->ps.pps_list[i]->data; + int idx = h264_params_info.stdPPSCount; + set_pps(pps_l, pps_l->sps, &vkpps_scaling[idx], &vkpps[idx]); + h264_params_info.stdPPSCount++; + } + } + + h264_params.maxStdSPSCount = h264_params_info.stdSPSCount; + h264_params.maxStdPPSCount = h264_params_info.stdPPSCount; + + /* Create session parameters */ + ret = vk->CreateVideoSessionParametersKHR(ctx->s.hwctx->act_dev, &session_params_create, + ctx->s.hwctx->alloc, par); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to create Vulkan video session parameters: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + tmp = av_buffer_create((uint8_t *)par, sizeof(*par), ff_vk_decode_free_params, + ctx, 0); + if (!tmp) { + ff_vk_decode_free_params(ctx, (uint8_t *)par); + return AVERROR(ENOMEM); + } + + av_log(avctx, AV_LOG_DEBUG, "Created frame parameters: %i SPS %i PPS\n", + h264_params_info.stdSPSCount, h264_params_info.stdPPSCount); + + *buf = tmp; + + return 0; +} + +static int vk_h264_start_frame(AVCodecContext *avctx, + av_unused const uint8_t *buffer, + av_unused uint32_t size) +{ + int err; + int dpb_slot_index = 0; + H264Context *h = avctx->priv_data; + H264Picture *pic = h->cur_pic_ptr; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + H264VulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &hp->vp; + + if (!dec->session_params || dec->params_changed) { + av_buffer_unref(&dec->session_params); + err = vk_h264_create_params(avctx, &dec->session_params); + if (err < 0) + return err; + dec->params_changed = 0; + } + + /* Fill in main slot */ + dpb_slot_index = 0; + for (unsigned slot = 0; slot < H264_MAX_PICTURE_COUNT; slot++) { + if (pic == &h->DPB[slot]) { + dpb_slot_index = slot; + break; + } + } + + err = vk_h264_fill_pict(avctx, NULL, &vp->ref_slot, &vp->ref, + &hp->vkh264_ref, &hp->h264_ref, pic, 1, + h->DPB[dpb_slot_index].field_picture, + h->DPB[dpb_slot_index].reference, + dpb_slot_index); + if (err < 0) + return err; + + /* Fill in short-term references */ + for (int i = 0; i < h->short_ref_count; i++) { + dpb_slot_index = 0; + for (unsigned slot = 0; slot < H264_MAX_PICTURE_COUNT; slot++) { + if (h->short_ref[i] == &h->DPB[slot]) { + dpb_slot_index = slot; + break; + } + } + err = vk_h264_fill_pict(avctx, &hp->ref_src[i], &vp->ref_slots[i], + &vp->refs[i], &hp->vkh264_refs[i], + &hp->h264_refs[i], h->short_ref[i], 0, + h->DPB[dpb_slot_index].field_picture, + h->DPB[dpb_slot_index].reference, + dpb_slot_index); + if (err < 0) + return err; + } + + /* Fill in long-term refs */ + for (int r = 0, i = h->short_ref_count; i < h->short_ref_count + h->long_ref_count; i++, r++) { + dpb_slot_index = 0; + for (unsigned slot = 0; slot < H264_MAX_PICTURE_COUNT; slot++) { + if (h->long_ref[i] == &h->DPB[slot]) { + dpb_slot_index = slot; + break; + } + } + err = vk_h264_fill_pict(avctx, &hp->ref_src[i], &vp->ref_slots[i], + &vp->refs[i], &hp->vkh264_refs[i], + &hp->h264_refs[i], h->long_ref[r], 0, + h->DPB[dpb_slot_index].field_picture, + h->DPB[dpb_slot_index].reference, + dpb_slot_index); + if (err < 0) + return err; + } + + hp->h264pic = (StdVideoDecodeH264PictureInfo) { + .seq_parameter_set_id = pic->pps->sps_id, + .pic_parameter_set_id = pic->pps->pps_id, + .frame_num = 0, /* Set later */ + .idr_pic_id = 0, /* Set later */ + .PicOrderCnt[0] = pic->field_poc[0], + .PicOrderCnt[1] = pic->field_poc[1], + .flags = (StdVideoDecodeH264PictureInfoFlags) { + .field_pic_flag = FIELD_PICTURE(h), + .is_intra = 1, /* Set later */ + .IdrPicFlag = h->picture_idr, + .bottom_field_flag = h->picture_structure != PICT_FRAME && + h->picture_structure & PICT_BOTTOM_FIELD, + .is_reference = h->nal_ref_idc != 0, + .complementary_field_pair = h->first_field && FIELD_PICTURE(h), + }, + }; + + hp->h264_pic_info = (VkVideoDecodeH264PictureInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PICTURE_INFO_KHR, + .pStdPictureInfo = &hp->h264pic, + .sliceCount = 0, + }; + + vp->decode_info = (VkVideoDecodeInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR, + .pNext = &hp->h264_pic_info, + .flags = 0x0, + .pSetupReferenceSlot = &vp->ref_slot, + .referenceSlotCount = h->short_ref_count + h->long_ref_count, + .pReferenceSlots = vp->ref_slots, + .dstPictureResource = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .codedOffset = (VkOffset2D){ 0, 0 }, + .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height }, + .baseArrayLayer = 0, + .imageViewBinding = vp->img_view_out, + }, + }; + + return 0; +} + +static int vk_h264_decode_slice(AVCodecContext *avctx, + const uint8_t *data, + uint32_t size) +{ + const H264Context *h = avctx->priv_data; + const H264SliceContext *sl = &h->slice_ctx[0]; + H264VulkanDecodePicture *hp = h->cur_pic_ptr->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &hp->vp; + + int err = ff_vk_decode_add_slice(avctx, vp, data, size, 1, + &hp->h264_pic_info.sliceCount, + &hp->h264_pic_info.pSliceOffsets); + if (err < 0) + return err; + + hp->h264pic.frame_num = sl->frame_num; + hp->h264pic.idr_pic_id = sl->idr_pic_id; + + /* Frame is only intra of all slices are marked as intra */ + if (sl->slice_type != AV_PICTURE_TYPE_I && sl->slice_type != AV_PICTURE_TYPE_SI) + hp->h264pic.flags.is_intra = 0; + + return 0; +} + +static int vk_h264_end_frame(AVCodecContext *avctx) +{ + const H264Context *h = avctx->priv_data; + H264Picture *pic = h->cur_pic_ptr; + H264VulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodePicture *vp = &hp->vp; + FFVulkanDecodePicture *rvp[H264_MAX_PICTURE_COUNT] = { 0 }; + AVFrame *rav[H264_MAX_PICTURE_COUNT] = { 0 }; + + if (!dec->session_params) + return AVERROR(EINVAL); + + for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) { + H264Picture *rp = hp->ref_src[i]; + H264VulkanDecodePicture *rhp = rp->hwaccel_picture_private; + + rvp[i] = &rhp->vp; + rav[i] = hp->ref_src[i]->f; + } + + av_log(avctx, AV_LOG_VERBOSE, "Decoding frame, %lu bytes, %i slices\n", + vp->slices_size, hp->h264_pic_info.sliceCount); + + return ff_vk_decode_frame(avctx, pic->f, vp, rav, rvp); +} + +static void vk_h264_free_frame_priv(void *_hwctx, uint8_t *data) +{ + AVHWDeviceContext *hwctx = _hwctx; + H264VulkanDecodePicture *hp = (H264VulkanDecodePicture *)data; + + /* Free frame resources, this also destroys the session parameters. */ + ff_vk_decode_free_frame(hwctx, &hp->vp); + + /* Free frame context */ + av_free(hp); +} + +const AVHWAccel ff_h264_vulkan_hwaccel = { + .name = "h264_vulkan", + .type = AVMEDIA_TYPE_VIDEO, + .id = AV_CODEC_ID_H264, + .pix_fmt = AV_PIX_FMT_VULKAN, + .start_frame = &vk_h264_start_frame, + .decode_slice = &vk_h264_decode_slice, + .end_frame = &vk_h264_end_frame, + .free_frame_priv = &vk_h264_free_frame_priv, + .frame_priv_data_size = sizeof(H264VulkanDecodePicture), + .init = &ff_vk_decode_init, + .update_thread_context = &ff_vk_update_thread_context, + .decode_params = &ff_vk_params_changed, + .flush = &ff_vk_decode_flush, + .uninit = &ff_vk_decode_uninit, + .frame_params = &ff_vk_frame_params, + .priv_data_size = sizeof(FFVulkanDecodeContext), + .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE, +}; diff --git a/libavcodec/vulkan_hevc.c b/libavcodec/vulkan_hevc.c new file mode 100644 index 0000000000000..53071626196f3 --- /dev/null +++ b/libavcodec/vulkan_hevc.c @@ -0,0 +1,930 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "hevcdec.h" +#include "hevc_ps.h" + +#include "vulkan_decode.h" + +const VkExtensionProperties ff_vk_dec_hevc_ext = { + .extensionName = VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME, + .specVersion = VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION, +}; + +typedef struct HEVCHeaderSPS { + StdVideoH265ScalingLists scaling; + StdVideoH265HrdParameters vui_header; + StdVideoH265SequenceParameterSetVui vui; + StdVideoH265ProfileTierLevel ptl; + StdVideoH265DecPicBufMgr dpbm; + StdVideoH265PredictorPaletteEntries pal; + StdVideoH265SubLayerHrdParameters nal_hdr[HEVC_MAX_SUB_LAYERS]; + StdVideoH265SubLayerHrdParameters vcl_hdr[HEVC_MAX_SUB_LAYERS]; + StdVideoH265ShortTermRefPicSet str[HEVC_MAX_SHORT_TERM_REF_PIC_SETS]; + StdVideoH265LongTermRefPicsSps ltr; +} HEVCHeaderSPS; + +typedef struct HEVCHeaderPPS { + StdVideoH265ScalingLists scaling; + StdVideoH265PredictorPaletteEntries pal; +} HEVCHeaderPPS; + +typedef struct HEVCHeaderVPSSet { + StdVideoH265SubLayerHrdParameters nal_hdr[HEVC_MAX_SUB_LAYERS]; + StdVideoH265SubLayerHrdParameters vcl_hdr[HEVC_MAX_SUB_LAYERS]; +} HEVCHeaderVPSSet; + +typedef struct HEVCHeaderVPS { + StdVideoH265ProfileTierLevel ptl; + StdVideoH265DecPicBufMgr dpbm; + StdVideoH265HrdParameters hdr[HEVC_MAX_LAYER_SETS]; + HEVCHeaderVPSSet *sls; +} HEVCHeaderVPS; + +typedef struct HEVCHeaderSet { + StdVideoH265SequenceParameterSet sps[HEVC_MAX_SPS_COUNT]; + HEVCHeaderSPS hsps[HEVC_MAX_SPS_COUNT]; + + StdVideoH265PictureParameterSet pps[HEVC_MAX_PPS_COUNT]; + HEVCHeaderPPS hpps[HEVC_MAX_PPS_COUNT]; + + StdVideoH265VideoParameterSet vps[HEVC_MAX_PPS_COUNT]; + HEVCHeaderVPS *hvps; +} HEVCHeaderSet; + +static int get_data_set_buf(FFVulkanDecodeContext *s, AVBufferRef **data_buf, + int nb_vps, AVBufferRef * const vps_list[HEVC_MAX_VPS_COUNT]) +{ + uint8_t *data_ptr; + HEVCHeaderSet *hdr; + + size_t base_size = sizeof(StdVideoH265SequenceParameterSet)*HEVC_MAX_SPS_COUNT + + sizeof(HEVCHeaderSPS)*HEVC_MAX_SPS_COUNT + + sizeof(StdVideoH265PictureParameterSet)*HEVC_MAX_PPS_COUNT + + sizeof(HEVCHeaderPPS)*HEVC_MAX_PPS_COUNT + + sizeof(StdVideoH265VideoParameterSet)*HEVC_MAX_VPS_COUNT + + sizeof(HEVCHeaderVPS *); + + size_t vps_size = sizeof(StdVideoH265ProfileTierLevel) + + sizeof(StdVideoH265DecPicBufMgr) + + sizeof(StdVideoH265HrdParameters)*HEVC_MAX_LAYER_SETS + + sizeof(HEVCHeaderVPSSet *); + + size_t buf_size = base_size + vps_size*nb_vps; + + for (int i = 0; i < nb_vps; i++) { + const HEVCVPS *vps = (const HEVCVPS *)vps_list[i]->data; + buf_size += sizeof(HEVCHeaderVPSSet)*vps->vps_num_hrd_parameters; + } + + if (buf_size > s->tmp_pool_ele_size) { + av_buffer_pool_uninit(&s->tmp_pool); + s->tmp_pool_ele_size = 0; + s->tmp_pool = av_buffer_pool_init(buf_size, NULL); + if (!s->tmp_pool) + return AVERROR(ENOMEM); + s->tmp_pool_ele_size = buf_size; + } + + *data_buf = av_buffer_pool_get(s->tmp_pool); + if (!(*data_buf)) + return AVERROR(ENOMEM); + + /* Setup pointers */ + data_ptr = (*data_buf)->data; + hdr = (HEVCHeaderSet *)data_ptr; + hdr->hvps = (HEVCHeaderVPS *)(data_ptr + base_size); + data_ptr += base_size + vps_size*nb_vps; + for (int i = 0; i < nb_vps; i++) { + const HEVCVPS *vps = (const HEVCVPS *)vps_list[i]->data; + hdr->hvps[i].sls = (HEVCHeaderVPSSet *)data_ptr; + data_ptr += sizeof(HEVCHeaderVPSSet)*vps->vps_num_hrd_parameters; + } + + return 0; +} + +typedef struct HEVCVulkanDecodePicture { + FFVulkanDecodePicture vp; + + /* Current picture */ + StdVideoDecodeH265ReferenceInfo h265_ref; + VkVideoDecodeH265DpbSlotInfoKHR vkh265_ref; + + /* Picture refs */ + HEVCFrame *ref_src [HEVC_MAX_REFS]; + StdVideoDecodeH265ReferenceInfo h265_refs [HEVC_MAX_REFS]; + VkVideoDecodeH265DpbSlotInfoKHR vkh265_refs[HEVC_MAX_REFS]; + + /* Current picture (contd.) */ + StdVideoDecodeH265PictureInfo h265pic; + VkVideoDecodeH265PictureInfoKHR h265_pic_info; +} HEVCVulkanDecodePicture; + +static int vk_hevc_fill_pict(AVCodecContext *avctx, HEVCFrame **ref_src, + VkVideoReferenceSlotInfoKHR *ref_slot, /* Main structure */ + VkVideoPictureResourceInfoKHR *ref, /* Goes in ^ */ + VkVideoDecodeH265DpbSlotInfoKHR *vkh265_ref, /* Goes in ^ */ + StdVideoDecodeH265ReferenceInfo *h265_ref, /* Goes in ^ */ + HEVCFrame *pic, int is_current, int pic_id) +{ + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = (FFVulkanDecodeShared *)dec->shared_ref->data; + HEVCVulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vkpic = &hp->vp; + + int err = ff_vk_decode_prepare_frame(ctx, pic->frame, vkpic, is_current, + ctx->dedicated_dpb); + if (err < 0) + return err; + + *h265_ref = (StdVideoDecodeH265ReferenceInfo) { + .flags = (StdVideoDecodeH265ReferenceInfoFlags) { + .used_for_long_term_reference = pic->flags & HEVC_FRAME_FLAG_LONG_REF, + .unused_for_reference = 0, + }, + .PicOrderCntVal = pic->poc, + }; + + *vkh265_ref = (VkVideoDecodeH265DpbSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_DPB_SLOT_INFO_KHR, + .pStdReferenceInfo = h265_ref, + }; + + *ref = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .codedOffset = (VkOffset2D){ 0, 0 }, + .codedExtent = (VkExtent2D){ pic->frame->width, pic->frame->height }, + .baseArrayLayer = ctx->layered_dpb ? pic_id : 0, + .imageViewBinding = vkpic->img_view_ref, + }; + + *ref_slot = (VkVideoReferenceSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR, + .pNext = vkh265_ref, + .slotIndex = pic_id, + .pPictureResource = ref, + }; + + if (ref_src) + *ref_src = pic; + + return 0; +} + +static void set_sps(const HEVCSPS *sps, int sps_idx, + StdVideoH265ScalingLists *vksps_scaling, + StdVideoH265HrdParameters *vksps_vui_header, + StdVideoH265SequenceParameterSetVui *vksps_vui, + StdVideoH265SequenceParameterSet *vksps, + StdVideoH265SubLayerHrdParameters *slhdrnal, + StdVideoH265SubLayerHrdParameters *slhdrvcl, + StdVideoH265ProfileTierLevel *ptl, + StdVideoH265DecPicBufMgr *dpbm, + StdVideoH265PredictorPaletteEntries *pal, + StdVideoH265ShortTermRefPicSet *str, + StdVideoH265LongTermRefPicsSps *ltr) +{ + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS; i++) + memcpy(vksps_scaling->ScalingList4x4[i], sps->scaling_list.sl[0][i], + STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**vksps_scaling->ScalingList4x4)); + + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS; i++) + memcpy(vksps_scaling->ScalingList8x8[i], sps->scaling_list.sl[1][i], + STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS * sizeof(**vksps_scaling->ScalingList8x8)); + + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS; i++) + memcpy(vksps_scaling->ScalingList16x16[i], sps->scaling_list.sl[2][i], + STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**vksps_scaling->ScalingList16x16)); + + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; i++) + memcpy(vksps_scaling->ScalingList32x32[i], sps->scaling_list.sl[3][i], + STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS * sizeof(**vksps_scaling->ScalingList32x32)); + + memcpy(vksps_scaling->ScalingListDCCoef16x16, sps->scaling_list.sl_dc[0], + STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(*vksps_scaling->ScalingListDCCoef16x16)); + + memcpy(vksps_scaling->ScalingListDCCoef32x32, sps->scaling_list.sl_dc[1], + STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS * sizeof(*vksps_scaling->ScalingListDCCoef32x32)); + + *vksps_vui_header = (StdVideoH265HrdParameters) { + .flags = (StdVideoH265HrdFlags) { + .nal_hrd_parameters_present_flag = sps->hdr.flags.nal_hrd_parameters_present_flag, + .vcl_hrd_parameters_present_flag = sps->hdr.flags.vcl_hrd_parameters_present_flag, + .sub_pic_hrd_params_present_flag = sps->hdr.flags.sub_pic_hrd_params_present_flag, + .sub_pic_cpb_params_in_pic_timing_sei_flag = sps->hdr.flags.sub_pic_cpb_params_in_pic_timing_sei_flag, + .fixed_pic_rate_general_flag = sps->hdr.flags.fixed_pic_rate_general_flag, + .fixed_pic_rate_within_cvs_flag = sps->hdr.flags.fixed_pic_rate_within_cvs_flag, + .low_delay_hrd_flag = sps->hdr.flags.low_delay_hrd_flag, + }, + .tick_divisor_minus2 = sps->hdr.tick_divisor_minus2, + .du_cpb_removal_delay_increment_length_minus1 = sps->hdr.du_cpb_removal_delay_increment_length_minus1, + .dpb_output_delay_du_length_minus1 = sps->hdr.dpb_output_delay_du_length_minus1, + .bit_rate_scale = sps->hdr.bit_rate_scale, + .cpb_size_scale = sps->hdr.cpb_size_scale, + .cpb_size_du_scale = sps->hdr.cpb_size_du_scale, + .initial_cpb_removal_delay_length_minus1 = sps->hdr.initial_cpb_removal_delay_length_minus1, + .au_cpb_removal_delay_length_minus1 = sps->hdr.au_cpb_removal_delay_length_minus1, + .dpb_output_delay_length_minus1 = sps->hdr.dpb_output_delay_length_minus1, + /* Reserved - 3*16 bits */ + .pSubLayerHrdParametersNal = slhdrnal, + .pSubLayerHrdParametersNal = slhdrvcl, + }; + + memcpy(vksps_vui_header->cpb_cnt_minus1, sps->hdr.cpb_cnt_minus1, + STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*vksps_vui_header->cpb_cnt_minus1)); + memcpy(vksps_vui_header->elemental_duration_in_tc_minus1, sps->hdr.elemental_duration_in_tc_minus1, + STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*vksps_vui_header->elemental_duration_in_tc_minus1)); + + memcpy(slhdrnal, sps->hdr.nal_params, HEVC_MAX_SUB_LAYERS*sizeof(*slhdrnal)); + memcpy(slhdrvcl, sps->hdr.vcl_params, HEVC_MAX_SUB_LAYERS*sizeof(*slhdrvcl)); + + *vksps_vui = (StdVideoH265SequenceParameterSetVui) { + .flags = (StdVideoH265SpsVuiFlags) { + .aspect_ratio_info_present_flag = sps->vui.common.aspect_ratio_info_present_flag, + .overscan_info_present_flag = sps->vui.common.overscan_info_present_flag, + .overscan_appropriate_flag = sps->vui.common.overscan_appropriate_flag, + .video_signal_type_present_flag = sps->vui.common.video_signal_type_present_flag, + .video_full_range_flag = sps->vui.common.video_full_range_flag, + .colour_description_present_flag = sps->vui.common.colour_description_present_flag, + .chroma_loc_info_present_flag = sps->vui.common.chroma_loc_info_present_flag, + .neutral_chroma_indication_flag = sps->vui.neutra_chroma_indication_flag, + .field_seq_flag = sps->vui.field_seq_flag, + .frame_field_info_present_flag = sps->vui.frame_field_info_present_flag, + .default_display_window_flag = sps->vui.default_display_window_flag, + .vui_timing_info_present_flag = sps->vui.vui_timing_info_present_flag, + .vui_poc_proportional_to_timing_flag = sps->vui.vui_poc_proportional_to_timing_flag, + .vui_hrd_parameters_present_flag = sps->vui.vui_hrd_parameters_present_flag, + .bitstream_restriction_flag = sps->vui.bitstream_restriction_flag, + .tiles_fixed_structure_flag = sps->vui.tiles_fixed_structure_flag, + .motion_vectors_over_pic_boundaries_flag = sps->vui.motion_vectors_over_pic_boundaries_flag, + .restricted_ref_pic_lists_flag = sps->vui.restricted_ref_pic_lists_flag, + }, + .aspect_ratio_idc = sps->vui.common.aspect_ratio_idc, + .sar_width = sps->vui.common.sar.num, + .sar_height = sps->vui.common.sar.den, + .video_format = sps->vui.common.video_format, + .colour_primaries = sps->vui.common.colour_primaries, + .transfer_characteristics = sps->vui.common.transfer_characteristics, + .matrix_coeffs = sps->vui.common.matrix_coeffs, + .chroma_sample_loc_type_top_field = sps->vui.common.chroma_sample_loc_type_top_field, + .chroma_sample_loc_type_bottom_field = sps->vui.common.chroma_sample_loc_type_bottom_field, + /* Reserved */ + /* Reserved */ + .def_disp_win_left_offset = sps->vui.def_disp_win.left_offset, + .def_disp_win_right_offset = sps->vui.def_disp_win.right_offset, + .def_disp_win_top_offset = sps->vui.def_disp_win.top_offset, + .def_disp_win_bottom_offset = sps->vui.def_disp_win.bottom_offset, + .vui_num_units_in_tick = sps->vui.vui_num_units_in_tick, + .vui_time_scale = sps->vui.vui_time_scale, + .vui_num_ticks_poc_diff_one_minus1 = sps->vui.vui_num_ticks_poc_diff_one_minus1, + .min_spatial_segmentation_idc = sps->vui.min_spatial_segmentation_idc, + .max_bytes_per_pic_denom = sps->vui.max_bytes_per_pic_denom, + .max_bits_per_min_cu_denom = sps->vui.max_bits_per_min_cu_denom, + .log2_max_mv_length_horizontal = sps->vui.log2_max_mv_length_horizontal, + .log2_max_mv_length_vertical = sps->vui.log2_max_mv_length_vertical, + .pHrdParameters = vksps_vui_header, + }; + + *ptl = (StdVideoH265ProfileTierLevel) { + .flags = (StdVideoH265ProfileTierLevelFlags) { + .general_tier_flag = sps->ptl.general_ptl.tier_flag, + .general_progressive_source_flag = sps->ptl.general_ptl.progressive_source_flag, + .general_interlaced_source_flag = sps->ptl.general_ptl.interlaced_source_flag, + .general_non_packed_constraint_flag = sps->ptl.general_ptl.non_packed_constraint_flag, + .general_frame_only_constraint_flag = sps->ptl.general_ptl.frame_only_constraint_flag, + }, + .general_profile_idc = sps->ptl.general_ptl.profile_idc, + .general_level_idc = sps->ptl.general_ptl.level_idc, + }; + + for (int i = 0; i < sps->max_sub_layers; i++) { + dpbm->max_latency_increase_plus1[i] = sps->temporal_layer[i].max_latency_increase + 1; + dpbm->max_dec_pic_buffering_minus1[i] = sps->temporal_layer[i].max_dec_pic_buffering - 1; + dpbm->max_num_reorder_pics[i] = sps->temporal_layer[i].num_reorder_pics; + } + + for (int i = 0; i < (sps->chroma_format_idc ? 3 : 1); i++) + for (int j = 0; j < sps->sps_num_palette_predictor_initializers; j++) + pal->PredictorPaletteEntries[i][j] = sps->sps_palette_predictor_initializer[i][j]; + + for (int i = 0; i < sps->nb_st_rps; i++) { + str[i] = (StdVideoH265ShortTermRefPicSet) { + .flags = (StdVideoH265ShortTermRefPicSetFlags) { + .inter_ref_pic_set_prediction_flag = sps->st_rps[i].rps_predict, + .delta_rps_sign = sps->st_rps[i].delta_rps_sign, + }, + .delta_idx_minus1 = sps->st_rps[i].delta_idx - 1, + .use_delta_flag = sps->st_rps[i].use_delta_flag, + .abs_delta_rps_minus1 = sps->st_rps[i].abs_delta_rps - 1, + .used_by_curr_pic_flag = 0x0, + .used_by_curr_pic_s0_flag = 0x0, + .used_by_curr_pic_s1_flag = 0x0, + /* Reserved */ + /* Reserved */ + /* Reserved */ + .num_negative_pics = sps->st_rps[i].num_negative_pics, + .num_positive_pics = sps->st_rps[i].num_delta_pocs - sps->st_rps[i].num_negative_pics, + }; + + /* NOTE: This is the predicted, and *reordered* version. + * Probably incorrect, but the spec doesn't say which version to use. */ + for (int j = 0; j < sps->st_rps[i].num_delta_pocs; j++) + str[i].used_by_curr_pic_flag |= sps->st_rps[i].used[j] << j; + + for (int j = 0; j < str[i].num_negative_pics; j++) { + str[i].delta_poc_s0_minus1[j] = sps->st_rps[i].delta_poc_s0[j] - 1; + str[i].used_by_curr_pic_s0_flag |= sps->st_rps[i].used[j] << j; + } + + for (int j = 0; j < str[i].num_positive_pics; j++) { + str[i].delta_poc_s1_minus1[j] = sps->st_rps[i].delta_poc_s1[j] - 1; + str[i].used_by_curr_pic_s0_flag |= sps->st_rps[i].used[str[i].num_negative_pics + j] << j; + } + } + + *ltr = (StdVideoH265LongTermRefPicsSps) { + .used_by_curr_pic_lt_sps_flag = 0x0, + }; + + for (int i = 0; i < sps->num_long_term_ref_pics_sps; i++) { + ltr->used_by_curr_pic_lt_sps_flag |= sps->used_by_curr_pic_lt_sps_flag[i] << i; + ltr->lt_ref_pic_poc_lsb_sps[i] = sps->lt_ref_pic_poc_lsb_sps[i]; + } + + *vksps = (StdVideoH265SequenceParameterSet) { + .flags = (StdVideoH265SpsFlags) { + .sps_temporal_id_nesting_flag = sps->temporal_id_nesting_flag, + .separate_colour_plane_flag = sps->separate_colour_plane_flag, + .conformance_window_flag = sps->conformance_window_flag, + .sps_sub_layer_ordering_info_present_flag = sps->sublayer_ordering_info_flag, + .scaling_list_enabled_flag = sps->scaling_list_enable_flag, + .sps_scaling_list_data_present_flag = sps->scaling_list_enable_flag, + .amp_enabled_flag = sps->amp_enabled_flag, + .sample_adaptive_offset_enabled_flag = sps->sao_enabled, + .pcm_enabled_flag = sps->pcm_enabled_flag, + .pcm_loop_filter_disabled_flag = sps->pcm.loop_filter_disable_flag, + .long_term_ref_pics_present_flag = sps->long_term_ref_pics_present_flag, + .sps_temporal_mvp_enabled_flag = sps->sps_temporal_mvp_enabled_flag, + .strong_intra_smoothing_enabled_flag = sps->sps_strong_intra_smoothing_enable_flag, + .vui_parameters_present_flag = sps->vui_present, + .sps_extension_present_flag = sps->sps_extension_present_flag, + .sps_range_extension_flag = sps->sps_range_extension_flag, + .transform_skip_rotation_enabled_flag = sps->transform_skip_rotation_enabled_flag, + .transform_skip_context_enabled_flag = sps->transform_skip_context_enabled_flag, + .implicit_rdpcm_enabled_flag = sps->implicit_rdpcm_enabled_flag, + .explicit_rdpcm_enabled_flag = sps->explicit_rdpcm_enabled_flag, + .extended_precision_processing_flag = sps->extended_precision_processing_flag, + .intra_smoothing_disabled_flag = sps->intra_smoothing_disabled_flag, + .high_precision_offsets_enabled_flag = sps->high_precision_offsets_enabled_flag, + .persistent_rice_adaptation_enabled_flag = sps->persistent_rice_adaptation_enabled_flag, + .cabac_bypass_alignment_enabled_flag = sps->cabac_bypass_alignment_enabled_flag, + .sps_scc_extension_flag = sps->sps_scc_extension_flag, + .sps_curr_pic_ref_enabled_flag = sps->sps_curr_pic_ref_enabled_flag, + .palette_mode_enabled_flag = sps->palette_mode_enabled_flag, + .sps_palette_predictor_initializers_present_flag = sps->sps_palette_predictor_initializers_present_flag, + .intra_boundary_filtering_disabled_flag = sps->intra_boundary_filtering_disabled_flag, + }, + .chroma_format_idc = sps->chroma_format_idc, + .pic_width_in_luma_samples = sps->width, + .pic_height_in_luma_samples = sps->height, + .sps_video_parameter_set_id = sps->vps_id, + .sps_max_sub_layers_minus1 = sps->max_sub_layers - 1, + .sps_seq_parameter_set_id = sps_idx, + .bit_depth_luma_minus8 = sps->bit_depth - 8, + .bit_depth_chroma_minus8 = sps->bit_depth_chroma - 8, + .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_poc_lsb - 4, + .log2_min_luma_coding_block_size_minus3 = sps->log2_min_cb_size - 3, + .log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_coding_block_size, + .log2_min_luma_transform_block_size_minus2 = sps->log2_min_tb_size - 2, + .log2_diff_max_min_luma_transform_block_size = sps->log2_diff_max_min_transform_block_size, + .max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter, + .max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra, + .num_short_term_ref_pic_sets = sps->nb_st_rps, + .num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps, + .pcm_sample_bit_depth_luma_minus1 = sps->pcm.bit_depth - 1, + .pcm_sample_bit_depth_chroma_minus1 = sps->pcm.bit_depth_chroma - 1, + .log2_min_pcm_luma_coding_block_size_minus3 = sps->pcm.log2_min_pcm_cb_size - 3, + .log2_diff_max_min_pcm_luma_coding_block_size = sps->pcm.log2_max_pcm_cb_size - sps->pcm.log2_min_pcm_cb_size, + /* Reserved */ + /* Reserved */ + .palette_max_size = sps->palette_max_size, + .delta_palette_max_predictor_size = sps->delta_palette_max_predictor_size, + .motion_vector_resolution_control_idc = sps->motion_vector_resolution_control_idc, + .sps_num_palette_predictor_initializers_minus1 = sps->sps_num_palette_predictor_initializers - 1, + .conf_win_left_offset = sps->pic_conf_win.left_offset, + .conf_win_right_offset = sps->pic_conf_win.right_offset, + .conf_win_top_offset = sps->pic_conf_win.top_offset, + .conf_win_bottom_offset = sps->pic_conf_win.bottom_offset, + .pProfileTierLevel = ptl, + .pDecPicBufMgr = dpbm, + .pScalingLists = vksps_scaling, + .pShortTermRefPicSet = str, + .pLongTermRefPicsSps = ltr, + .pSequenceParameterSetVui = vksps_vui, + .pPredictorPaletteEntries = pal, + }; +} + +static void set_pps(const HEVCPPS *pps, const HEVCSPS *sps, + StdVideoH265ScalingLists *vkpps_scaling, + StdVideoH265PictureParameterSet *vkpps, + StdVideoH265PredictorPaletteEntries *pal) +{ + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS; i++) + memcpy(vkpps_scaling->ScalingList4x4[i], pps->scaling_list.sl[0][i], + STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**vkpps_scaling->ScalingList4x4)); + + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS; i++) + memcpy(vkpps_scaling->ScalingList8x8[i], pps->scaling_list.sl[1][i], + STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS * sizeof(**vkpps_scaling->ScalingList8x8)); + + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS; i++) + memcpy(vkpps_scaling->ScalingList16x16[i], pps->scaling_list.sl[2][i], + STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**vkpps_scaling->ScalingList16x16)); + + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; i++) + memcpy(vkpps_scaling->ScalingList32x32[i], pps->scaling_list.sl[3][i], + STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS * sizeof(**vkpps_scaling->ScalingList32x32)); + + memcpy(vkpps_scaling->ScalingListDCCoef16x16, pps->scaling_list.sl_dc[0], + STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(*vkpps_scaling->ScalingListDCCoef16x16)); + + memcpy(vkpps_scaling->ScalingListDCCoef32x32, pps->scaling_list.sl_dc[1], + STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS * sizeof(*vkpps_scaling->ScalingListDCCoef32x32)); + + *vkpps = (StdVideoH265PictureParameterSet) { + .flags = (StdVideoH265PpsFlags) { + .dependent_slice_segments_enabled_flag = pps->dependent_slice_segments_enabled_flag, + .output_flag_present_flag = pps->output_flag_present_flag, + .sign_data_hiding_enabled_flag = pps->sign_data_hiding_flag, + .cabac_init_present_flag = pps->cabac_init_present_flag, + .constrained_intra_pred_flag = pps->constrained_intra_pred_flag, + .transform_skip_enabled_flag = pps->transform_skip_enabled_flag, + .cu_qp_delta_enabled_flag = pps->cu_qp_delta_enabled_flag, + .pps_slice_chroma_qp_offsets_present_flag = pps->pic_slice_level_chroma_qp_offsets_present_flag, + .weighted_pred_flag = pps->weighted_pred_flag, + .weighted_bipred_flag = pps->weighted_bipred_flag, + .transquant_bypass_enabled_flag = pps->transquant_bypass_enable_flag, + .tiles_enabled_flag = pps->tiles_enabled_flag, + .entropy_coding_sync_enabled_flag = pps->entropy_coding_sync_enabled_flag, + .uniform_spacing_flag = pps->uniform_spacing_flag, + .loop_filter_across_tiles_enabled_flag = pps->loop_filter_across_tiles_enabled_flag, + .pps_loop_filter_across_slices_enabled_flag = pps->seq_loop_filter_across_slices_enabled_flag, + .deblocking_filter_control_present_flag = pps->deblocking_filter_control_present_flag, + .deblocking_filter_override_enabled_flag = pps->deblocking_filter_override_enabled_flag, + .pps_deblocking_filter_disabled_flag = pps->disable_dbf, + .pps_scaling_list_data_present_flag = pps->scaling_list_data_present_flag, + .lists_modification_present_flag = pps->lists_modification_present_flag, + .slice_segment_header_extension_present_flag = pps->slice_header_extension_present_flag, + .pps_extension_present_flag = pps->pps_extension_present_flag, + .cross_component_prediction_enabled_flag = pps->cross_component_prediction_enabled_flag, + .chroma_qp_offset_list_enabled_flag = pps->chroma_qp_offset_list_enabled_flag, + .pps_curr_pic_ref_enabled_flag = pps->pps_curr_pic_ref_enabled_flag, + .residual_adaptive_colour_transform_enabled_flag = pps->residual_adaptive_colour_transform_enabled_flag, + .pps_slice_act_qp_offsets_present_flag = pps->pps_slice_act_qp_offsets_present_flag, + .pps_palette_predictor_initializers_present_flag = pps->pps_palette_predictor_initializers_present_flag, + .monochrome_palette_flag = pps->monochrome_palette_flag, + .pps_range_extension_flag = pps->pps_range_extensions_flag, + }, + .pps_pic_parameter_set_id = pps->pps_id, + .pps_seq_parameter_set_id = pps->sps_id, + .sps_video_parameter_set_id = sps->vps_id, + .num_extra_slice_header_bits = pps->num_extra_slice_header_bits, + .num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active - 1, + .num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active - 1, + .init_qp_minus26 = pps->pic_init_qp_minus26, + .diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth, + .pps_cb_qp_offset = pps->cb_qp_offset, + .pps_cr_qp_offset = pps->cr_qp_offset, + .pps_beta_offset_div2 = pps->beta_offset >> 1, + .pps_tc_offset_div2 = pps->tc_offset >> 1, + .log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level - 2, + .log2_max_transform_skip_block_size_minus2 = pps->log2_max_transform_skip_block_size - 2, + .diff_cu_chroma_qp_offset_depth = pps->diff_cu_chroma_qp_offset_depth, + .chroma_qp_offset_list_len_minus1 = pps->chroma_qp_offset_list_len_minus1, + .log2_sao_offset_scale_luma = pps->log2_sao_offset_scale_luma, + .log2_sao_offset_scale_chroma = pps->log2_sao_offset_scale_chroma, + .pps_act_y_qp_offset_plus5 = pps->pps_act_y_qp_offset + 5, + .pps_act_cb_qp_offset_plus5 = pps->pps_act_cb_qp_offset + 5, + .pps_act_cr_qp_offset_plus3 = pps->pps_act_cr_qp_offset + 3, + .pps_num_palette_predictor_initializers = pps->pps_num_palette_predictor_initializers, + .luma_bit_depth_entry_minus8 = pps->luma_bit_depth_entry - 8, + .chroma_bit_depth_entry_minus8 = pps->chroma_bit_depth_entry - 8, + .num_tile_columns_minus1 = pps->num_tile_columns - 1, + .num_tile_rows_minus1 = pps->num_tile_rows - 1, + .pScalingLists = vkpps_scaling, + .pPredictorPaletteEntries = pal, + }; + + for (int i = 0; i < (pps->monochrome_palette_flag ? 1 : 3); i++) { + for (int j = 0; j < pps->pps_num_palette_predictor_initializers; j++) + pal->PredictorPaletteEntries[i][j] = pps->pps_palette_predictor_initializer[i][j]; + } + + for (int i = 0; i < pps->num_tile_columns - 1; i++) + vkpps->column_width_minus1[i] = pps->column_width[i] - 1; + + for (int i = 0; i < pps->num_tile_rows - 1; i++) + vkpps->row_height_minus1[i] = pps->row_height[i] - 1; + + for (int i = 0; i <= pps->chroma_qp_offset_list_len_minus1; i++) { + vkpps->cb_qp_offset_list[i] = pps->cb_qp_offset_list[i]; + vkpps->cr_qp_offset_list[i] = pps->cr_qp_offset_list[i]; + } +} + +static void set_vps(const HEVCVPS *vps, + StdVideoH265VideoParameterSet *vkvps, + StdVideoH265ProfileTierLevel *ptl, + StdVideoH265DecPicBufMgr *dpbm, + StdVideoH265HrdParameters *sls_hdr, + HEVCHeaderVPSSet sls[]) +{ + for (int i = 0; i < vps->vps_num_hrd_parameters; i++) { + const HEVCHdrParams *src = &vps->hdr[i]; + + sls_hdr[i] = (StdVideoH265HrdParameters) { + .flags = (StdVideoH265HrdFlags) { + .nal_hrd_parameters_present_flag = src->flags.nal_hrd_parameters_present_flag, + .vcl_hrd_parameters_present_flag = src->flags.vcl_hrd_parameters_present_flag, + .sub_pic_hrd_params_present_flag = src->flags.sub_pic_hrd_params_present_flag, + .sub_pic_cpb_params_in_pic_timing_sei_flag = src->flags.sub_pic_cpb_params_in_pic_timing_sei_flag, + .fixed_pic_rate_general_flag = src->flags.fixed_pic_rate_general_flag, + .fixed_pic_rate_within_cvs_flag = src->flags.fixed_pic_rate_within_cvs_flag, + .low_delay_hrd_flag = src->flags.low_delay_hrd_flag, + }, + .tick_divisor_minus2 = src->tick_divisor_minus2, + .du_cpb_removal_delay_increment_length_minus1 = src->du_cpb_removal_delay_increment_length_minus1, + .dpb_output_delay_du_length_minus1 = src->dpb_output_delay_du_length_minus1, + .bit_rate_scale = src->bit_rate_scale, + .cpb_size_scale = src->cpb_size_scale, + .cpb_size_du_scale = src->cpb_size_du_scale, + .initial_cpb_removal_delay_length_minus1 = src->initial_cpb_removal_delay_length_minus1, + .au_cpb_removal_delay_length_minus1 = src->au_cpb_removal_delay_length_minus1, + .dpb_output_delay_length_minus1 = src->dpb_output_delay_length_minus1, + /* Reserved - 3*16 bits */ + .pSubLayerHrdParametersNal = sls[i].nal_hdr, + .pSubLayerHrdParametersNal = sls[i].vcl_hdr, + }; + + memcpy(sls_hdr[i].cpb_cnt_minus1, src->cpb_cnt_minus1, + STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*sls_hdr[i].cpb_cnt_minus1)); + memcpy(sls_hdr[i].elemental_duration_in_tc_minus1, src->elemental_duration_in_tc_minus1, + STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*sls_hdr[i].elemental_duration_in_tc_minus1)); + + memcpy(sls[i].nal_hdr, src->nal_params, HEVC_MAX_SUB_LAYERS*sizeof(*sls[i].nal_hdr)); + memcpy(sls[i].vcl_hdr, src->vcl_params, HEVC_MAX_SUB_LAYERS*sizeof(*sls[i].vcl_hdr)); + } + + *ptl = (StdVideoH265ProfileTierLevel) { + .flags = (StdVideoH265ProfileTierLevelFlags) { + .general_tier_flag = vps->ptl.general_ptl.tier_flag, + .general_progressive_source_flag = vps->ptl.general_ptl.progressive_source_flag, + .general_interlaced_source_flag = vps->ptl.general_ptl.interlaced_source_flag, + .general_non_packed_constraint_flag = vps->ptl.general_ptl.non_packed_constraint_flag, + .general_frame_only_constraint_flag = vps->ptl.general_ptl.frame_only_constraint_flag, + }, + .general_profile_idc = vps->ptl.general_ptl.profile_idc, + .general_level_idc = vps->ptl.general_ptl.level_idc, + }; + + for (int i = 0; i < vps->vps_max_sub_layers; i++) { + dpbm->max_latency_increase_plus1[i] = vps->vps_max_latency_increase[i] + 1; + dpbm->max_dec_pic_buffering_minus1[i] = vps->vps_max_dec_pic_buffering[i] - 1; + dpbm->max_num_reorder_pics[i] = vps->vps_num_reorder_pics[i]; + } + + *vkvps = (StdVideoH265VideoParameterSet) { + .flags = (StdVideoH265VpsFlags) { + .vps_temporal_id_nesting_flag = vps->vps_temporal_id_nesting_flag, + .vps_sub_layer_ordering_info_present_flag = vps->vps_sub_layer_ordering_info_present_flag, + .vps_timing_info_present_flag = vps->vps_timing_info_present_flag, + .vps_poc_proportional_to_timing_flag = vps->vps_poc_proportional_to_timing_flag, + }, + .vps_video_parameter_set_id = vps->vps_id, + .vps_max_sub_layers_minus1 = vps->vps_max_sub_layers - 1, + /* Reserved */ + /* Reserved */ + .vps_num_units_in_tick = vps->vps_num_units_in_tick, + .vps_time_scale = vps->vps_time_scale, + .vps_num_ticks_poc_diff_one_minus1 = vps->vps_num_ticks_poc_diff_one - 1, + /* Reserved */ + .pDecPicBufMgr = dpbm, + .pHrdParameters = sls_hdr, + .pProfileTierLevel = ptl, + }; +} + +static int vk_hevc_create_params(AVCodecContext *avctx, AVBufferRef **buf) +{ + int err; + VkResult ret; + const HEVCContext *h = avctx->priv_data; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = (FFVulkanDecodeShared *)dec->shared_ref->data; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + VkVideoDecodeH265SessionParametersAddInfoKHR h265_params_info = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_ADD_INFO_KHR, + .stdSPSCount = 0, + .stdPPSCount = 0, + .stdVPSCount = 0, + }; + VkVideoDecodeH265SessionParametersCreateInfoKHR h265_params = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pParametersAddInfo = &h265_params_info, + }; + VkVideoSessionParametersCreateInfoKHR session_params_create = { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pNext = &h265_params, + .videoSession = ctx->common.session, + .videoSessionParametersTemplate = NULL, + }; + + int nb_vps = 0; + AVBufferRef *data_set; + HEVCHeaderSet *hdr; + + AVBufferRef *tmp; + VkVideoSessionParametersKHR *par = av_malloc(sizeof(*par)); + if (!par) + return AVERROR(ENOMEM); + + for (int i = 0; h->ps.vps_list[i]; i++) + nb_vps++; + + err = get_data_set_buf(dec, &data_set, nb_vps, h->ps.vps_list); + if (err < 0) + return err; + + hdr = (HEVCHeaderSet *)data_set->data; + + h265_params_info.pStdSPSs = hdr->sps; + h265_params_info.pStdPPSs = hdr->pps; + h265_params_info.pStdVPSs = hdr->vps; + + /* SPS list */ + for (int i = 0; h->ps.sps_list[i]; i++) { + const HEVCSPS *sps_l = (const HEVCSPS *)h->ps.sps_list[i]->data; + set_sps(sps_l, i, &hdr->hsps[i].scaling, &hdr->hsps[i].vui_header, + &hdr->hsps[i].vui, &hdr->sps[i], hdr->hsps[i].nal_hdr, + hdr->hsps[i].vcl_hdr, &hdr->hsps[i].ptl, &hdr->hsps[i].dpbm, + &hdr->hsps[i].pal, hdr->hsps[i].str, &hdr->hsps[i].ltr); + h265_params_info.stdSPSCount++; + } + + /* PPS list */ + for (int i = 0; h->ps.pps_list[i]; i++) { + const HEVCPPS *pps_l = (const HEVCPPS *)h->ps.pps_list[i]->data; + const HEVCSPS *sps_l = (const HEVCSPS *)h->ps.sps_list[pps_l->sps_id]->data; + set_pps(pps_l, sps_l, &hdr->hpps[i].scaling, &hdr->pps[i], &hdr->hpps[i].pal); + h265_params_info.stdPPSCount++; + } + + /* VPS list */ + for (int i = 0; i < nb_vps; i++) { + const HEVCVPS *vps_l = (const HEVCVPS *)h->ps.vps_list[i]->data; + set_vps(vps_l, &hdr->vps[i], &hdr->hvps[i].ptl, &hdr->hvps[i].dpbm, + hdr->hvps[i].hdr, hdr->hvps[i].sls); + h265_params_info.stdVPSCount++; + } + + h265_params.maxStdSPSCount = h265_params_info.stdSPSCount; + h265_params.maxStdPPSCount = h265_params_info.stdPPSCount; + h265_params.maxStdVPSCount = h265_params_info.stdVPSCount; + + /* Create session parameters */ + ret = vk->CreateVideoSessionParametersKHR(ctx->s.hwctx->act_dev, &session_params_create, + ctx->s.hwctx->alloc, par); + av_buffer_unref(&data_set); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to create Vulkan video session parameters: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + tmp = av_buffer_create((uint8_t *)par, sizeof(*par), ff_vk_decode_free_params, + ctx, 0); + if (!tmp) { + ff_vk_decode_free_params(ctx, (uint8_t *)par); + return AVERROR(ENOMEM); + } + + av_log(avctx, AV_LOG_DEBUG, "Created frame parameters: %i SPS %i PPS %i VPS\n", + h265_params_info.stdSPSCount, h265_params_info.stdPPSCount, + h265_params_info.stdVPSCount); + + *buf = tmp; + + return 0; +} + +static int vk_hevc_start_frame(AVCodecContext *avctx, + av_unused const uint8_t *buffer, + av_unused uint32_t size) +{ + int err; + HEVCContext *h = avctx->priv_data; + HEVCFrame *pic = h->ref; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + HEVCVulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &hp->vp; + const HEVCSPS *sps = h->ps.sps; + const HEVCPPS *pps = h->ps.pps; + int nb_refs = 0; + + if (!dec->session_params || dec->params_changed) { + av_buffer_unref(&dec->session_params); + err = vk_hevc_create_params(avctx, &dec->session_params); + if (err < 0) + return err; + dec->params_changed = 0; + } + + hp->h265pic = (StdVideoDecodeH265PictureInfo) { + .flags = (StdVideoDecodeH265PictureInfoFlags) { + .IrapPicFlag = IS_IRAP(h), + .IdrPicFlag = IS_IDR(h), + .IsReference = h->nal_unit_type < 16 ? h->nal_unit_type & 1 : 1, + .short_term_ref_pic_set_sps_flag = h->sh.short_term_ref_pic_set_sps_flag, + }, + .sps_video_parameter_set_id = sps->vps_id, + .pps_seq_parameter_set_id = pps->sps_id, + .pps_pic_parameter_set_id = pps->pps_id, + .NumDeltaPocsOfRefRpsIdx = h->sh.short_term_rps ? h->sh.short_term_rps->rps_idx_num_delta_pocs : 0, + .PicOrderCntVal = h->poc, + .NumBitsForSTRefPicSetInSlice = !h->sh.short_term_ref_pic_set_sps_flag ? + h->sh.bits_used_for_short_term_rps : 0, + }; + + /* Fill in references */ + for (int i = 0; i < FF_ARRAY_ELEMS(h->DPB); i++) { + const HEVCFrame *ref = &h->DPB[i]; + int idx = nb_refs; + + if (!(ref->flags & (HEVC_FRAME_FLAG_SHORT_REF | HEVC_FRAME_FLAG_LONG_REF))) + continue; + + if (ref == pic) { + err = vk_hevc_fill_pict(avctx, NULL, &vp->ref_slot, &vp->ref, + &hp->vkh265_ref, &hp->h265_ref, pic, 1, i); + if (err < 0) + return err; + + continue; + } + + err = vk_hevc_fill_pict(avctx, &hp->ref_src[idx], &vp->ref_slots[idx], + &vp->refs[idx], &hp->vkh265_refs[idx], + &hp->h265_refs[idx], (HEVCFrame *)ref, 0, i); + if (err < 0) + return err; + + nb_refs++; + } + + memset(hp->h265pic.RefPicSetStCurrBefore, 0xff, 8); + for (int i = 0; i < h->rps[ST_CURR_BEF].nb_refs; i++) { + HEVCFrame *frame = h->rps[ST_CURR_BEF].ref[i]; + for (int j = 0; j < FF_ARRAY_ELEMS(h->DPB); j++) { + const HEVCFrame *ref = &h->DPB[j]; + if (ref == frame) { + hp->h265pic.RefPicSetStCurrBefore[i] = j; + break; + } + } + } + memset(hp->h265pic.RefPicSetStCurrAfter, 0xff, 8); + for (int i = 0; i < h->rps[ST_CURR_AFT].nb_refs; i++) { + HEVCFrame *frame = h->rps[ST_CURR_AFT].ref[i]; + for (int j = 0; j < FF_ARRAY_ELEMS(h->DPB); j++) { + const HEVCFrame *ref = &h->DPB[j]; + if (ref == frame) { + hp->h265pic.RefPicSetStCurrAfter[i] = j; + break; + } + } + } + memset(hp->h265pic.RefPicSetLtCurr, 0xff, 8); + for (int i = 0; i < h->rps[LT_CURR].nb_refs; i++) { + HEVCFrame *frame = h->rps[LT_CURR].ref[i]; + for (int j = 0; j < FF_ARRAY_ELEMS(h->DPB); j++) { + const HEVCFrame *ref = &h->DPB[j]; + if (ref == frame) { + hp->h265pic.RefPicSetLtCurr[i] = j; + break; + } + } + } + + hp->h265_pic_info = (VkVideoDecodeH265PictureInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PICTURE_INFO_KHR, + .pStdPictureInfo = &hp->h265pic, + .sliceSegmentCount = 0, + .pSliceSegmentOffsets = vp->slice_off, + }; + + vp->decode_info = (VkVideoDecodeInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR, + .pNext = &hp->h265_pic_info, + .flags = 0x0, + .pSetupReferenceSlot = &vp->ref_slot, + .referenceSlotCount = nb_refs, + .pReferenceSlots = vp->ref_slots, + .dstPictureResource = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .codedOffset = (VkOffset2D){ 0, 0 }, + .codedExtent = (VkExtent2D){ pic->frame->width, pic->frame->height }, + .baseArrayLayer = 0, + .imageViewBinding = vp->img_view_out, + }, + }; + + return 0; +} + +static int vk_hevc_decode_slice(AVCodecContext *avctx, + const uint8_t *data, + uint32_t size) +{ + const HEVCContext *h = avctx->priv_data; + HEVCVulkanDecodePicture *hp = h->ref->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &hp->vp; + + int err = ff_vk_decode_add_slice(avctx, vp, data, size, 1, + &hp->h265_pic_info.sliceSegmentCount, + &hp->h265_pic_info.pSliceSegmentOffsets); + if (err < 0) + return err; + + return 0; +} + +static int vk_hevc_end_frame(AVCodecContext *avctx) +{ + const HEVCContext *h = avctx->priv_data; + HEVCFrame *pic = h->ref; + HEVCVulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &hp->vp; + FFVulkanDecodePicture *rvp[HEVC_MAX_REFS] = { 0 }; + AVFrame *rav[HEVC_MAX_REFS] = { 0 }; + + for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) { + HEVCVulkanDecodePicture *rfhp = hp->ref_src[i]->hwaccel_picture_private; + rav[i] = hp->ref_src[i]->frame; + rvp[i] = &rfhp->vp; + } + + av_log(avctx, AV_LOG_VERBOSE, "Decoding frame, %lu bytes, %i slices\n", + vp->slices_size, hp->h265_pic_info.sliceSegmentCount); + + return ff_vk_decode_frame(avctx, pic->frame, vp, rav, rvp); +} + +static void vk_hevc_free_frame_priv(void *_hwctx, uint8_t *data) +{ + AVHWDeviceContext *hwctx = _hwctx; + HEVCVulkanDecodePicture *hp = (HEVCVulkanDecodePicture *)data; + + /* Free frame resources */ + ff_vk_decode_free_frame(hwctx, &hp->vp); + + /* Free frame context */ + av_free(hp); +} + +const AVHWAccel ff_hevc_vulkan_hwaccel = { + .name = "hevc_vulkan", + .type = AVMEDIA_TYPE_VIDEO, + .id = AV_CODEC_ID_HEVC, + .pix_fmt = AV_PIX_FMT_VULKAN, + .start_frame = &vk_hevc_start_frame, + .decode_slice = &vk_hevc_decode_slice, + .end_frame = &vk_hevc_end_frame, + .free_frame_priv = &vk_hevc_free_frame_priv, + .frame_priv_data_size = sizeof(HEVCVulkanDecodePicture), + .init = &ff_vk_decode_init, + .update_thread_context = &ff_vk_update_thread_context, + .decode_params = &ff_vk_params_changed, + .flush = &ff_vk_decode_flush, + .uninit = &ff_vk_decode_uninit, + .frame_params = &ff_vk_frame_params, + .priv_data_size = sizeof(FFVulkanDecodeContext), + .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE, +}; diff --git a/libavcodec/vulkan_video.c b/libavcodec/vulkan_video.c new file mode 100644 index 0000000000000..f33bb33a21676 --- /dev/null +++ b/libavcodec/vulkan_video.c @@ -0,0 +1,378 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "codec_id.h" + +#include "vulkan_video.h" + +const FFVkCodecMap ff_vk_codec_map[AV_CODEC_ID_FIRST_AUDIO] = { + [AV_CODEC_ID_H264] = { +#if CONFIG_VULKAN_ENCODE + FF_VK_EXT_VIDEO_ENCODE_H264, + VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_EXT, +#else + 0, + 0, +#endif + FF_VK_EXT_VIDEO_DECODE_H264, + VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR, + }, + [AV_CODEC_ID_HEVC] = { +#if CONFIG_VULKAN_ENCODE + FF_VK_EXT_VIDEO_ENCODE_H265, + VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_EXT, +#else + 0, + 0, +#endif + FF_VK_EXT_VIDEO_DECODE_H265, + VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR + }, + [AV_CODEC_ID_AV1] = { + 0, + 0, + FF_VK_EXT_VIDEO_DECODE_AV1, + 0x01000000 /* TODO fix this */ + }, +}; + +#define ASPECT_2PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT) +#define ASPECT_3PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT) + +static const struct FFVkFormatMapEntry { + VkFormat vkf; + enum AVPixelFormat pixfmt; + VkImageAspectFlags aspect; +} vk_format_map[] = { + /* Gray formats */ + { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GRAY8, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY16, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GRAYF32, VK_IMAGE_ASPECT_COLOR_BIT }, + + /* RGB formats */ + { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_XV36, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGRA, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGBA, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R8G8B8_UNORM, AV_PIX_FMT_RGB24, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_B8G8R8_UNORM, AV_PIX_FMT_BGR24, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R16G16B16_UNORM, AV_PIX_FMT_RGB48, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_RGBA64, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R5G6B5_UNORM_PACK16, AV_PIX_FMT_RGB565, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_B5G6R5_UNORM_PACK16, AV_PIX_FMT_BGR565, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGR0, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGB0, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_A2R10G10B10_UNORM_PACK32, AV_PIX_FMT_X2RGB10, VK_IMAGE_ASPECT_COLOR_BIT }, + + /* Planar RGB */ + { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GBRAP, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP16, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRPF32, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRAPF32, VK_IMAGE_ASPECT_COLOR_BIT }, + + /* Two-plane 420 YUV at 8, 10, 12 and 16 bits */ + { VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, AV_PIX_FMT_NV12, ASPECT_2PLANE }, + { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P010, ASPECT_2PLANE }, + { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P012, ASPECT_2PLANE }, + { VK_FORMAT_G16_B16R16_2PLANE_420_UNORM, AV_PIX_FMT_P016, ASPECT_2PLANE }, + + /* Two-plane 422 YUV at 8, 10 and 16 bits */ + { VK_FORMAT_G8_B8R8_2PLANE_422_UNORM, AV_PIX_FMT_NV16, ASPECT_2PLANE }, + { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P210, ASPECT_2PLANE }, + { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P212, ASPECT_2PLANE }, + { VK_FORMAT_G16_B16R16_2PLANE_422_UNORM, AV_PIX_FMT_P216, ASPECT_2PLANE }, + + /* Two-plane 444 YUV at 8, 10 and 16 bits */ + { VK_FORMAT_G8_B8R8_2PLANE_444_UNORM, AV_PIX_FMT_NV24, ASPECT_2PLANE }, + { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P410, ASPECT_2PLANE }, + { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P412, ASPECT_2PLANE }, + { VK_FORMAT_G16_B16R16_2PLANE_444_UNORM, AV_PIX_FMT_P416, ASPECT_2PLANE }, + + /* Three-plane 420, 422, 444 at 8, 10, 12 and 16 bits */ + { VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P, ASPECT_3PLANE }, + { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P10, ASPECT_3PLANE }, + { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P12, ASPECT_3PLANE }, + { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P16, ASPECT_3PLANE }, + { VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P, ASPECT_3PLANE }, + { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P10, ASPECT_3PLANE }, + { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P12, ASPECT_3PLANE }, + { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P16, ASPECT_3PLANE }, + { VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P, ASPECT_3PLANE }, + { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P10, ASPECT_3PLANE }, + { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P12, ASPECT_3PLANE }, + { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P16, ASPECT_3PLANE }, + + /* Single plane 422 at 8, 10 and 12 bits */ + { VK_FORMAT_G8B8G8R8_422_UNORM, AV_PIX_FMT_YUYV422, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_B8G8R8G8_422_UNORM, AV_PIX_FMT_UYVY422, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16, AV_PIX_FMT_Y210, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16, AV_PIX_FMT_Y212, VK_IMAGE_ASPECT_COLOR_BIT }, +}; +static const int nb_vk_format_map = FF_ARRAY_ELEMS(vk_format_map); + +enum AVPixelFormat ff_vk_pix_fmt_from_vkfmt(VkFormat vkf) +{ + for (int i = 0; i < nb_vk_format_map; i++) + if (vk_format_map[i].vkf == vkf) + return vk_format_map[i].pixfmt; + return AV_PIX_FMT_NONE; +} + +VkImageAspectFlags ff_vk_aspect_bits_from_vkfmt(VkFormat vkf) +{ + for (int i = 0; i < nb_vk_format_map; i++) + if (vk_format_map[i].vkf == vkf) + return vk_format_map[i].aspect; + return VK_IMAGE_ASPECT_NONE; +} + +VkVideoChromaSubsamplingFlagBitsKHR ff_vk_subsampling_from_av_desc(const AVPixFmtDescriptor *desc) +{ + if (desc->nb_components == 1) + return VK_VIDEO_CHROMA_SUBSAMPLING_MONOCHROME_BIT_KHR; + else if (!desc->log2_chroma_w && !desc->log2_chroma_h) + return VK_VIDEO_CHROMA_SUBSAMPLING_444_BIT_KHR; + else if (!desc->log2_chroma_w && desc->log2_chroma_h == 1) + return VK_VIDEO_CHROMA_SUBSAMPLING_422_BIT_KHR; + else if (desc->log2_chroma_w == 1 && desc->log2_chroma_h == 1) + return VK_VIDEO_CHROMA_SUBSAMPLING_420_BIT_KHR; + return VK_VIDEO_CHROMA_SUBSAMPLING_INVALID_KHR; +} + +VkVideoComponentBitDepthFlagBitsKHR ff_vk_depth_from_av_depth(int depth) +{ + switch (depth) { + case 8: return VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR; + case 10: return VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR; + case 12: return VK_VIDEO_COMPONENT_BIT_DEPTH_12_BIT_KHR; + default: break; + } + return VK_VIDEO_COMPONENT_BIT_DEPTH_INVALID_KHR; +} + +static void free_data_buf(void *opaque, uint8_t *data) +{ + FFVulkanContext *ctx = opaque; + FFVkVideoBuffer *buf = (FFVkVideoBuffer *)data; + ff_vk_unmap_buffer(ctx, &buf->buf, 0); + ff_vk_free_buf(ctx, &buf->buf); + av_free(data); +} + +static AVBufferRef *alloc_data_buf(void *opaque, size_t size) +{ + AVBufferRef *ref; + uint8_t *buf = av_mallocz(size); + if (!buf) + return NULL; + + ref = av_buffer_create(buf, size, free_data_buf, opaque, 0); + if (!ref) + av_free(buf); + return ref; +} + +int ff_vk_video_get_buffer(FFVulkanContext *ctx, FFVkVideoCommon *s, + AVBufferRef **buf, VkBufferUsageFlags usage, + void *create_pNext, size_t size) +{ + int err; + AVBufferRef *ref; + FFVkVideoBuffer *data; + + if (!s->buf_pool) { + s->buf_pool = av_buffer_pool_init2(sizeof(FFVkVideoBuffer), ctx, + alloc_data_buf, NULL); + if (!s->buf_pool) + return AVERROR(ENOMEM); + } + + *buf = ref = av_buffer_pool_get(s->buf_pool); + if (!ref) + return AVERROR(ENOMEM); + + data = (FFVkVideoBuffer *)ref->data; + + if (data->buf.size >= size) + return 0; + + /* No point in requesting anything smaller. */ + size = FFMAX(size, 1024*1024); + size = FFALIGN(size, s->caps.minBitstreamBufferSizeAlignment); + + /* Align buffer to nearest power of two. Makes fragmentation management + * easier, and gives us ample headroom. */ + size--; + size |= size >> 1; + size |= size >> 2; + size |= size >> 4; + size |= size >> 8; + size |= size >> 16; + size++; + + ff_vk_free_buf(ctx, &data->buf); + memset(data, 0, sizeof(FFVkVideoBuffer)); + + err = ff_vk_create_buf(ctx, &data->buf, size, + create_pNext, NULL, usage, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + if (err < 0) { + av_buffer_unref(&ref); + return err; + } + + /* Map the buffer */ + err = ff_vk_map_buffer(ctx, &data->buf, &data->mem, 0); + if (err < 0) { + av_buffer_unref(&ref); + return err; + } + + return 0; +} + +av_cold void ff_vk_video_common_uninit(FFVulkanContext *s, + FFVkVideoCommon *common) +{ + FFVulkanFunctions *vk = &s->vkfn; + + if (common->session) { + vk->DestroyVideoSessionKHR(s->hwctx->act_dev, common->session, + s->hwctx->alloc); + common->session = NULL; + } + + if (common->nb_mem && common->mem) + for (int i = 0; i < common->nb_mem; i++) + vk->FreeMemory(s->hwctx->act_dev, common->mem[i], s->hwctx->alloc); + + av_freep(&common->mem); + + av_buffer_pool_uninit(&common->buf_pool); +} + +av_cold int ff_vk_video_common_init(void *log, FFVulkanContext *s, + FFVkVideoCommon *common, + VkVideoSessionCreateInfoKHR *session_create) +{ + int err; + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + VkMemoryRequirements2 *mem_req = NULL; + VkVideoSessionMemoryRequirementsKHR *mem = NULL; + VkBindVideoSessionMemoryInfoKHR *bind_mem = NULL; + + /* Create session */ + ret = vk->CreateVideoSessionKHR(s->hwctx->act_dev, session_create, + s->hwctx->alloc, &common->session); + if (ret != VK_SUCCESS) + return AVERROR_EXTERNAL; + + /* Get memory requirements */ + ret = vk->GetVideoSessionMemoryRequirementsKHR(s->hwctx->act_dev, + common->session, + &common->nb_mem, + NULL); + if (ret != VK_SUCCESS) { + err = AVERROR_EXTERNAL; + goto fail; + } + + /* Allocate all memory needed to actually allocate memory */ + common->mem = av_mallocz(sizeof(*common->mem)*common->nb_mem); + if (!common->mem) { + err = AVERROR(ENOMEM); + goto fail; + } + mem = av_mallocz(sizeof(*mem)*common->nb_mem); + if (!mem) { + err = AVERROR(ENOMEM); + goto fail; + } + mem_req = av_mallocz(sizeof(*mem_req)*common->nb_mem); + if (!mem_req) { + err = AVERROR(ENOMEM); + goto fail; + } + bind_mem = av_mallocz(sizeof(*bind_mem)*common->nb_mem); + if (!bind_mem) { + err = AVERROR(ENOMEM); + goto fail; + } + + /* Set the needed fields to get the memory requirements */ + for (int i = 0; i < common->nb_mem; i++) { + mem_req[i] = (VkMemoryRequirements2) { + .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, + }; + mem[i] = (VkVideoSessionMemoryRequirementsKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_MEMORY_REQUIREMENTS_KHR, + .memoryRequirements = mem_req[i].memoryRequirements, + }; + } + + /* Finally get the memory requirements */ + ret = vk->GetVideoSessionMemoryRequirementsKHR(s->hwctx->act_dev, + common->session, &common->nb_mem, + mem); + if (ret != VK_SUCCESS) { + err = AVERROR_EXTERNAL; + goto fail; + } + + /* Now allocate each requested memory. + * For ricing, could pool together memory that ends up in the same index. */ + for (int i = 0; i < common->nb_mem; i++) { + err = ff_vk_alloc_mem(s, &mem[i].memoryRequirements, + UINT32_MAX, NULL, NULL, &common->mem[i]); + if (err < 0) + goto fail; + + bind_mem[i] = (VkBindVideoSessionMemoryInfoKHR) { + .sType = VK_STRUCTURE_TYPE_BIND_VIDEO_SESSION_MEMORY_INFO_KHR, + .memory = common->mem[i], + .memoryBindIndex = mem[i].memoryBindIndex, + .memoryOffset = 0, + .memorySize = mem[i].memoryRequirements.size, + }; + + av_log(log, AV_LOG_VERBOSE, "Allocating %lu bytes in bind index %i for video session\n", + bind_mem[i].memorySize, bind_mem[i].memoryBindIndex); + } + + /* Bind the allocated memory */ + ret = vk->BindVideoSessionMemoryKHR(s->hwctx->act_dev, common->session, + common->nb_mem, bind_mem); + if (ret != VK_SUCCESS) { + err = AVERROR_EXTERNAL; + goto fail; + } + + av_freep(&mem); + av_freep(&mem_req); + av_freep(&bind_mem); + + return 0; + +fail: + av_freep(&mem); + av_freep(&mem_req); + av_freep(&bind_mem); + + ff_vk_video_common_uninit(s, common); + return err; +} diff --git a/libavcodec/vulkan_video.h b/libavcodec/vulkan_video.h new file mode 100644 index 0000000000000..35c0f90d28aa7 --- /dev/null +++ b/libavcodec/vulkan_video.h @@ -0,0 +1,100 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VULKAN_VIDEO_H +#define AVCODEC_VULKAN_VIDEO_H + +#include "codec_id.h" +#include "vulkan.h" + +#include +#include "vulkan_video_codec_av1std.h" +#include "vulkan_video_codec_av1std_decode.h" + +#define CODEC_VER_MAJ(ver) (ver >> 22) +#define CODEC_VER_MIN(ver) ((ver >> 12) & ((1 << 10) - 1)) +#define CODEC_VER_PAT(ver) (ver & ((1 << 12) - 1)) +#define CODEC_VER(ver) CODEC_VER_MAJ(ver), CODEC_VER_MIN(ver), CODEC_VER_PAT(ver) + +typedef struct FFVkCodecMap { + FFVulkanExtensions encode_extension; + VkVideoCodecOperationFlagBitsKHR encode_op; + FFVulkanExtensions decode_extension; + VkVideoCodecOperationFlagBitsKHR decode_op; +} FFVkCodecMap; + +typedef struct FFVkVideoSession { + VkVideoSessionKHR session; + VkDeviceMemory *mem; + uint32_t nb_mem; + VkVideoCapabilitiesKHR caps; + + AVBufferPool *buf_pool; +} FFVkVideoCommon; + +/** + * Index is codec_id. + */ +extern const FFVkCodecMap ff_vk_codec_map[AV_CODEC_ID_FIRST_AUDIO]; + +/** + * Get pixfmt from a Vulkan format. + */ +enum AVPixelFormat ff_vk_pix_fmt_from_vkfmt(VkFormat vkf); + +/** + * Get aspect bits which include all planes from a VkFormat. + */ +VkImageAspectFlags ff_vk_aspect_bits_from_vkfmt(VkFormat vkf); + +/** + * Get Vulkan's chroma subsampling from a pixfmt descriptor. + */ +VkVideoChromaSubsamplingFlagBitsKHR ff_vk_subsampling_from_av_desc(const AVPixFmtDescriptor *desc); + +/** + * Get Vulkan's bit depth from an [8:12] integer. + */ +VkVideoComponentBitDepthFlagBitsKHR ff_vk_depth_from_av_depth(int depth); + +typedef struct FFVkVideoBuffer { + FFVkBuffer buf; + uint8_t *mem; +} FFVkVideoBuffer; + +/** + * Get a mapped FFVkPooledBuffer with a specific guaranteed minimum size + * from a pool. + */ +int ff_vk_video_get_buffer(FFVulkanContext *ctx, FFVkVideoCommon *s, + AVBufferRef **buf, VkBufferUsageFlags usage, + void *create_pNext, size_t size); + +/** + * Initialize video session, allocating and binding necessary memory. + */ +int ff_vk_video_common_init(void *log, FFVulkanContext *s, + FFVkVideoCommon *common, + VkVideoSessionCreateInfoKHR *session_create); + +/** + * Free video session and required resources. + */ +void ff_vk_video_common_uninit(FFVulkanContext *s, FFVkVideoCommon *common); + +#endif /* AVCODEC_VULKAN_VIDEO_H */ diff --git a/libavcodec/vulkan_video_codec_av1std.h b/libavcodec/vulkan_video_codec_av1std.h new file mode 100644 index 0000000000000..ee64b15cb7a7d --- /dev/null +++ b/libavcodec/vulkan_video_codec_av1std.h @@ -0,0 +1,394 @@ +#ifndef VULKAN_VIDEO_CODEC_AV1STD_H_ +#define VULKAN_VIDEO_CODEC_AV1STD_H_ 1 + + +/* +** Copyright 2015-2022 The Khronos Group Inc. +** +** SPDX-License-Identifier: Apache-2.0 +*/ + +/* +** This header is NOT YET generated from the Khronos Vulkan XML API Registry. +** +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#define vulkan_video_codec_av1std 1 + +#define VK_MAKE_VIDEO_STD_VERSION(major, minor, patch) \ + ((((uint32_t)(major)) << 22) | (((uint32_t)(minor)) << 12) | ((uint32_t)(patch))) +#define VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_API_VERSION_0_1_0 VK_MAKE_VIDEO_STD_VERSION(0, 1, 0) +#define VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_API_VERSION_0_1_0 +#define VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME "VK_STD_vulkan_video_codec_av1_decode" + +typedef enum StdVideoAV1MESAProfile { + STD_VIDEO_AV1_MESA_PROFILE_MAIN = 0, + STD_VIDEO_AV1_MESA_PROFILE_HIGH = 1, + STD_VIDEO_AV1_MESA_PROFILE_PROFESSIONAL = 2, +} StdVideoAV1MESAProfile; + +typedef enum StdVideoAV1MESALevel { + STD_VIDEO_AV1_MESA_LEVEL_2_0 = 0, + STD_VIDEO_AV1_MESA_LEVEL_2_1 = 1, + STD_VIDEO_AV1_MESA_LEVEL_2_2 = 2, + STD_VIDEO_AV1_MESA_LEVEL_2_3 = 3, + STD_VIDEO_AV1_MESA_LEVEL_3_0 = 4, + STD_VIDEO_AV1_MESA_LEVEL_3_1 = 5, + STD_VIDEO_AV1_MESA_LEVEL_3_2 = 6, + STD_VIDEO_AV1_MESA_LEVEL_3_3 = 7, + STD_VIDEO_AV1_MESA_LEVEL_4_0 = 8, + STD_VIDEO_AV1_MESA_LEVEL_4_1 = 9, + STD_VIDEO_AV1_MESA_LEVEL_4_2 = 10, + STD_VIDEO_AV1_MESA_LEVEL_4_3 = 11, + STD_VIDEO_AV1_MESA_LEVEL_5_0 = 12, + STD_VIDEO_AV1_MESA_LEVEL_5_1 = 13, + STD_VIDEO_AV1_MESA_LEVEL_5_2 = 14, + STD_VIDEO_AV1_MESA_LEVEL_5_3 = 15, + STD_VIDEO_AV1_MESA_LEVEL_6_0 = 16, + STD_VIDEO_AV1_MESA_LEVEL_6_1 = 17, + STD_VIDEO_AV1_MESA_LEVEL_6_2 = 18, + STD_VIDEO_AV1_MESA_LEVEL_6_3 = 19, + STD_VIDEO_AV1_MESA_LEVEL_7_0 = 20, + STD_VIDEO_AV1_MESA_LEVEL_7_1 = 21, + STD_VIDEO_AV1_MESA_LEVEL_7_2 = 22, + STD_VIDEO_AV1_MESA_LEVEL_7_3 = 23, + STD_VIDEO_AV1_MESA_LEVEL_MAX = 31, +} StdVideoAV1MESALevel; + +typedef struct StdVideoAV1MESAFilmGrainFlags { + uint8_t apply_grain; + uint8_t chroma_scaling_from_luma; + uint8_t overlap_flag; + uint8_t clip_to_restricted_range; +} StdVideoAV1MESAFilmGrainFlags; + +typedef struct StdVideoAV1MESAFilmGrainParameters { + StdVideoAV1MESAFilmGrainFlags flags; + uint32_t grain_scaling_minus_8; + uint32_t ar_coeff_lag; + uint32_t ar_coeff_shift_minus_6; + uint32_t grain_scale_shift; + + uint16_t grain_seed; + uint8_t num_y_points; + uint8_t point_y_value[14]; + uint8_t point_y_scaling[14]; + + uint8_t num_cb_points; + uint8_t point_cb_value[10]; + uint8_t point_cb_scaling[10]; + + uint8_t num_cr_points; + uint8_t point_cr_value[10]; + uint8_t point_cr_scaling[10]; + + int8_t ar_coeffs_y_plus_128[24]; + int8_t ar_coeffs_cb_plus_128[25]; + int8_t ar_coeffs_cr_plus_128[25]; + uint8_t cb_mult; + uint8_t cb_luma_mult; + uint16_t cb_offset; + uint8_t cr_mult; + uint8_t cr_luma_mult; + uint16_t cr_offset; +} StdVideoAV1MESAFilmGrainParameters; + +typedef struct StdVideoAV1MESAGlobalMotionFlags { + uint8_t gm_invalid; +} StdVideoAV1MESAGlobalMotionFlags; + +typedef struct StdVideoAV1MESAGlobalMotion { + StdVideoAV1MESAGlobalMotionFlags flags; + uint8_t gm_type; + uint32_t gm_params[6]; +} StdVideoAV1MESAGlobalMotion; + +typedef struct StdVideoAV1MESALoopRestoration { + uint8_t lr_type[3]; + uint8_t lr_unit_shift; + uint8_t lr_uv_shift; +} StdVideoAV1MESALoopRestoration; + +typedef struct StdVideoAV1MESATileInfoFlags { + uint8_t uniform_tile_spacing_flag; +} StdVideoAV1MESATileInfoFlags; + +typedef struct StdVideoAV1MESATileInfo { + StdVideoAV1MESATileInfoFlags flags; + uint8_t tile_cols; + uint8_t tile_rows; + uint8_t tile_start_col_sb[64]; + uint8_t tile_start_row_sb[64]; + uint8_t width_in_sbs_minus_1[64]; + uint8_t height_in_sbs_minus_1[64]; + uint16_t context_update_tile_id; + uint8_t tile_size_bytes_minus1; +} StdVideoAV1MESATileInfo; + +typedef struct StdVideoAV1MESAQuantizationFlags { + uint8_t using_qmatrix; +} StdVideoAV1MESAQuantizationFlags; + +typedef struct StdVideoAV1MESAQuantization { + StdVideoAV1MESAQuantizationFlags flags; + uint8_t base_q_idx; + int8_t delta_q_y_dc; + uint8_t diff_uv_delta; + int8_t delta_q_u_dc; + int8_t delta_q_u_ac; + int8_t delta_q_v_dc; + int8_t delta_q_v_ac; + uint8_t qm_y; + uint8_t qm_u; + uint8_t qm_v; +} StdVideoAV1MESAQuantization; + +typedef struct StdVideoAV1MESACDEF { + uint8_t damping_minus_3; + uint8_t bits; + uint8_t y_pri_strength[8]; + uint8_t y_sec_strength[8]; + uint8_t uv_pri_strength[8]; + uint8_t uv_sec_strength[8]; +} StdVideoAV1MESACDEF; + +typedef struct StdVideoAV1MESADeltaQFlags { + uint8_t delta_lf_present; + uint8_t delta_lf_multi; +} StdVideoAV1MESADeltaQFlags; + +typedef struct StdVideoAV1MESADeltaQ { + StdVideoAV1MESADeltaQFlags flags; + uint8_t delta_q_res; + uint8_t delta_lf_res; +} StdVideoAV1MESADeltaQ; + +typedef struct StdVideoAV1MESASegmentationFlags { + uint8_t enabled; + uint8_t update_map; + uint8_t temporal_update; + uint8_t update_data; +} StdVideoAV1MESASegmentationFlags; + +typedef struct StdVideoAV1MESASegmentation { + StdVideoAV1MESASegmentationFlags flags; + uint8_t feature_enabled_bits[8]; + int16_t feature_data[8][8]; +} StdVideoAV1MESASegmentation; + +typedef struct StdVideoAV1MESALoopFilterFlags { + uint8_t delta_enabled; + uint8_t delta_update; +} StdVideoAV1MESALoopFilterFlags; + +typedef struct StdVideoAV1MESALoopFilter { + StdVideoAV1MESALoopFilterFlags flags; + uint8_t level[4]; + uint8_t sharpness; + int8_t ref_deltas[8]; + int8_t mode_deltas[2]; +} StdVideoAV1MESALoopFilter; + +typedef struct StdVideoAV1MESAFrameHeaderFlags { + uint8_t error_resilient_mode; + uint8_t disable_cdf_update; + uint8_t use_superres; + uint8_t render_and_frame_size_different; + uint8_t allow_screen_content_tools; + uint8_t is_filter_switchable; + uint8_t force_integer_mv; + uint8_t frame_size_override_flag; + uint8_t buffer_removal_time_present_flag; + uint8_t allow_intrabc; + uint8_t frame_refs_short_signaling; + uint8_t allow_high_precision_mv; + uint8_t is_motion_mode_switchable; + uint8_t use_ref_frame_mvs; + uint8_t disable_frame_end_update_cdf; + uint8_t allow_warped_motion; + uint8_t reduced_tx_set; + uint8_t reference_select; + uint8_t skip_mode_present; + uint8_t delta_q_present; + uint8_t UsesLr; +} StdVideoAV1MESAFrameHeaderFlags; + +typedef struct StdVideoAV1MESAFrameHeader { + StdVideoAV1MESAFrameHeaderFlags flags; + + uint32_t frame_presentation_time; + uint32_t display_frame_id; + uint32_t current_frame_id; + uint8_t frame_to_show_map_idx; + uint8_t frame_type; + uint8_t order_hint; + uint8_t primary_ref_frame; + uint16_t frame_width_minus_1; + uint16_t frame_height_minus_1; + uint16_t render_width_minus_1; + uint16_t render_height_minus_1; + uint8_t coded_denom; + + uint8_t refresh_frame_flags; + uint8_t ref_order_hint[8]; + int8_t ref_frame_idx[7]; + uint32_t delta_frame_id_minus1[7]; + + uint8_t interpolation_filter; + uint8_t tx_mode; + + StdVideoAV1MESATileInfo tiling; + StdVideoAV1MESAQuantization quantization; + StdVideoAV1MESASegmentation segmentation; + StdVideoAV1MESADeltaQ delta_q; + StdVideoAV1MESALoopFilter loop_filter; + StdVideoAV1MESACDEF cdef; + StdVideoAV1MESALoopRestoration lr; + StdVideoAV1MESAGlobalMotion global_motion[8]; // One per ref frame + StdVideoAV1MESAFilmGrainParameters film_grain; +} StdVideoAV1MESAFrameHeader; + +typedef struct StdVideoAV1MESAScreenCoding { + uint8_t seq_force_screen_content_tools; +} StdVideoAV1MESAScreenCoding; + +typedef struct StdVideoAV1MESATimingInfoFlags { + uint8_t equal_picture_interval; +} StdVideoAV1MESATimingInfoFlags; + +typedef struct StdVideoAV1MESATimingInfo { + StdVideoAV1MESATimingInfoFlags flags; + uint32_t num_units_in_display_tick; + uint32_t time_scale; + uint32_t num_ticks_per_picture_minus_1; +} StdVideoAV1MESATimingInfo; + +typedef struct StdVideoAV1MESAColorConfigFlags { + uint8_t mono_chrome; + uint8_t color_range; + uint8_t separate_uv_delta_q; +} StdVideoAV1MESAColorConfigFlags; + +typedef struct StdVideoAV1MESAColorConfig { + StdVideoAV1MESAColorConfigFlags flags; + uint8_t bit_depth; + uint8_t subsampling_x; + uint8_t subsampling_y; +} StdVideoAV1MESAColorConfig; + +typedef struct StdVideoAV1MESASequenceHeaderFlags { + uint8_t still_picture; + uint8_t reduced_still_picture_header; + uint8_t use_128x128_superblock; + uint8_t enable_filter_intra; + uint8_t enable_intra_edge_filter; + uint8_t enable_interintra_compound; + uint8_t enable_masked_compound; + uint8_t enable_warped_motion; + uint8_t enable_dual_filter; + uint8_t enable_order_hint; + uint8_t enable_jnt_comp; + uint8_t enable_ref_frame_mvs; + uint8_t frame_id_numbers_present_flag; + uint8_t enable_superres; + uint8_t enable_cdef; + uint8_t enable_restoration; + uint8_t film_grain_params_present; + uint8_t timing_info_present_flag; + uint8_t initial_display_delay_present_flag; +} StdVideoAV1MESASequenceHeaderFlags; + +typedef struct StdVideoAV1MESASequenceHeader { + StdVideoAV1MESASequenceHeaderFlags flags; + + StdVideoAV1MESAProfile seq_profile; + uint8_t frame_width_bits_minus_1; + uint8_t frame_height_bits_minus_1; + uint16_t max_frame_width_minus_1; + uint16_t max_frame_height_minus_1; + uint8_t delta_frame_id_length_minus_2; + uint8_t additional_frame_id_length_minus_1; + uint8_t order_hint_bits_minus_1; + uint8_t seq_choose_integer_mv; + uint8_t seq_force_integer_mv; + + StdVideoAV1MESATimingInfo timing_info; + StdVideoAV1MESAColorConfig color_config; +} StdVideoAV1MESASequenceHeader; + +typedef struct StdVideoAV1MESATile { + uint16_t tg_start; + uint16_t tg_end; + uint16_t row; + uint16_t column; + uint32_t size; + uint32_t offset; +} StdVideoAV1MESATile; + +typedef struct StdVideoAV1MESATileList { + StdVideoAV1MESATile *tile_list; + uint32_t nb_tiles; +} StdVideoAV1MESATileList; + +typedef struct VkVideoDecodeAV1PictureInfoMESA { + VkStructureType sType; + const void *pNext; + StdVideoAV1MESAFrameHeader *frame_header; + StdVideoAV1MESATileList *tile_list; + uint8_t skip_mode_frame_idx[2]; +} VkVideoDecodeAV1PictureInfoMESA; + +typedef struct VkVideoDecodeAV1DpbSlotInfoMESA { + VkStructureType sType; + const void *pNext; + uint8_t frameIdx; + uint8_t ref_order_hint[7]; + uint8_t disable_frame_end_update_cdf; +} VkVideoDecodeAV1DpbSlotInfoMESA; + +typedef struct VkVideoDecodeAV1SessionParametersAddInfoMESA { + VkStructureType sType; + const void *pNext; + StdVideoAV1MESASequenceHeader *sequence_header; +} VkVideoDecodeAV1SessionParametersAddInfoMESA; + +typedef struct VkVideoDecodeAV1SessionParametersCreateInfoMESA { + VkStructureType sType; + const void *pNext; + const VkVideoDecodeAV1SessionParametersAddInfoMESA *pParametersAddInfo; +} VkVideoDecodeAV1SessionParametersCreateInfoMESA; + +typedef struct VkVideoDecodeAV1ProfileInfoMESA { + VkStructureType sType; + const void *pNext; + StdVideoAV1MESAProfile stdProfileIdc; +} VkVideoDecodeAV1ProfileInfoMESA; + +typedef enum VkVideoDecodeAV1CapabilityFlagBitsMESA { + VK_VIDEO_DECODE_AV1_CAPABILITY_EXTERNAL_FILM_GRAIN_MESA = 0x00000001, + VK_VIDEO_DECODE_AV1_CAPABILITY_FLAG_BITS_MAX_ENUM_MESA = 0x7FFFFFFF +} VkVideoDecodeAV1CapabilityFlagBitsMESA; +typedef VkFlags VkVideoDecodeAV1CapabilityFlagsMESA; + +typedef struct VkVideoDecodeAV1CapabilitiesMESA { + VkStructureType sType; + const void *pNext; + VkVideoDecodeAV1CapabilityFlagsMESA flags; + StdVideoAV1MESALevel maxLevelIdc; +} VkVideoDecodeAV1CapabilitiesMESA; + +#define VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_PICTURE_INFO_MESA 1000509000 +#define VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_SESSION_PARAMETERS_CREATE_INFO_MESA 1000509001 +#define VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_SESSION_PARAMETERS_ADD_INFO_MESA 1000509002 +#define VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_DPB_SLOT_INFO_MESA 1000509003 +#define VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_CAPABILITIES_MESA 1000509004 +#define VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_PROFILE_INFO_MESA 1000509005 + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/libavcodec/vulkan_video_codec_av1std_decode.h b/libavcodec/vulkan_video_codec_av1std_decode.h new file mode 100644 index 0000000000000..fba46b63b1093 --- /dev/null +++ b/libavcodec/vulkan_video_codec_av1std_decode.h @@ -0,0 +1,27 @@ +#ifndef VULKAN_VIDEO_CODEC_AV1STD_DECODE_H_ +#define VULKAN_VIDEO_CODEC_AV1STD_DECODE_H_ 1 + + +/* +** Copyright 2015-2022 The Khronos Group Inc. +** +** SPDX-License-Identifier: Apache-2.0 +*/ + +/* +** This header is NOT YET generated from the Khronos Vulkan XML API Registry. +** +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#define vulkan_video_codec_av1std_decode 1 + + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 19283a71de193..18935b16169e0 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -213,6 +213,7 @@ OBJS-$(CONFIG_BOXBLUR_FILTER) += vf_boxblur.o boxblur.o OBJS-$(CONFIG_BOXBLUR_OPENCL_FILTER) += vf_avgblur_opencl.o opencl.o \ opencl/avgblur.o boxblur.o OBJS-$(CONFIG_BWDIF_FILTER) += vf_bwdif.o yadif_common.o +OBJS-$(CONFIG_BWDIF_VULKAN_FILTER) += vf_bwdif_vulkan.o yadif_common.o vulkan.o vulkan_filter.o OBJS-$(CONFIG_CAS_FILTER) += vf_cas.o OBJS-$(CONFIG_CCREPACK_FILTER) += vf_ccrepack.o OBJS-$(CONFIG_CHROMABER_VULKAN_FILTER) += vf_chromaber_vulkan.o vulkan.o vulkan_filter.o @@ -389,6 +390,8 @@ OBJS-$(CONFIG_MULTIPLY_FILTER) += vf_multiply.o OBJS-$(CONFIG_NEGATE_FILTER) += vf_negate.o OBJS-$(CONFIG_NLMEANS_FILTER) += vf_nlmeans.o OBJS-$(CONFIG_NLMEANS_OPENCL_FILTER) += vf_nlmeans_opencl.o opencl.o opencl/nlmeans.o +OBJS-$(CONFIG_NLMEANS_VULKAN_FILTER) += vf_nlmeans_vulkan.o vulkan.o vulkan_filter.o \ + vulkan/prefix_sum.o OBJS-$(CONFIG_NNEDI_FILTER) += vf_nnedi.o OBJS-$(CONFIG_NOFORMAT_FILTER) += vf_format.o OBJS-$(CONFIG_NOISE_FILTER) += vf_noise.o @@ -591,6 +594,7 @@ OBJS-$(CONFIG_RGBTESTSRC_FILTER) += vsrc_testsrc.o OBJS-$(CONFIG_SIERPINSKI_FILTER) += vsrc_sierpinski.o OBJS-$(CONFIG_SMPTEBARS_FILTER) += vsrc_testsrc.o OBJS-$(CONFIG_SMPTEHDBARS_FILTER) += vsrc_testsrc.o +OBJS-$(CONFIG_COLOR_VULKAN_FILTER) += vsrc_testsrc_vulkan.o vulkan.o vulkan_filter.o OBJS-$(CONFIG_TESTSRC_FILTER) += vsrc_testsrc.o OBJS-$(CONFIG_TESTSRC2_FILTER) += vsrc_testsrc.o OBJS-$(CONFIG_YUVTESTSRC_FILTER) += vsrc_testsrc.o @@ -623,6 +627,10 @@ OBJS-$(CONFIG_AVSYNCTEST_FILTER) += src_avsynctest.o OBJS-$(CONFIG_AMOVIE_FILTER) += src_movie.o OBJS-$(CONFIG_MOVIE_FILTER) += src_movie.o +# vulkan libs +OBJS-$(CONFIG_LIBGLSLANG) += vulkan_glslang.o +OBJS-$(CONFIG_LIBSHADERC) += vulkan_shaderc.o + # Objects duplicated from other libraries for shared builds SHLIBOBJS += log2_tab.o @@ -636,6 +644,8 @@ SKIPHEADERS-$(CONFIG_QSVVPP) += qsvvpp.h stack_internal.h SKIPHEADERS-$(CONFIG_OPENCL) += opencl.h SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_vpp.h stack_internal.h SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_filter.h +SKIPHEADERS-$(CONFIG_LIBSHADERC) += vulkan_spirv.h +SKIPHEADERS-$(CONFIG_LIBGLSLANG) += vulkan_spirv.h TOOLS = graph2dot TESTPROGS = drawutils filtfmts formats integral @@ -643,10 +653,17 @@ TESTPROGS = drawutils filtfmts formats integral TOOLS-$(CONFIG_LIBZMQ) += zmqsend clean:: - $(RM) $(CLEANSUFFIXES:%=libavfilter/dnn/%) $(CLEANSUFFIXES:%=libavfilter/opencl/%) + $(RM) $(CLEANSUFFIXES:%=libavfilter/dnn/%) $(CLEANSUFFIXES:%=libavfilter/opencl/%) \ + $(CLEANSUFFIXES:%=libavfilter/vulkan/%) OPENCL = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavfilter/opencl/*.cl)) .SECONDARY: $(OPENCL:.cl=.c) libavfilter/opencl/%.c: TAG = OPENCL libavfilter/opencl/%.c: $(SRC_PATH)/libavfilter/opencl/%.cl - $(M)$(SRC_PATH)/tools/cl2c $< $@ + $(M)$(SRC_PATH)/tools/source2c $< $@ + +VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavfilter/vulkan/*.comp)) +.SECONDARY: $(VULKAN:.comp=.c) +libavfilter/vulkan/%.c: TAG = OPENCL +libavfilter/vulkan/%.c: $(SRC_PATH)/libavfilter/vulkan/%.comp + $(M)$(SRC_PATH)/tools/source2c $< $@ diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 30447a11c8da1..f1f781101bee5 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -197,6 +197,7 @@ extern const AVFilter ff_vf_bm3d; extern const AVFilter ff_vf_boxblur; extern const AVFilter ff_vf_boxblur_opencl; extern const AVFilter ff_vf_bwdif; +extern const AVFilter ff_vf_bwdif_vulkan; extern const AVFilter ff_vf_cas; extern const AVFilter ff_vf_ccrepack; extern const AVFilter ff_vf_chromaber_vulkan; @@ -367,6 +368,7 @@ extern const AVFilter ff_vf_multiply; extern const AVFilter ff_vf_negate; extern const AVFilter ff_vf_nlmeans; extern const AVFilter ff_vf_nlmeans_opencl; +extern const AVFilter ff_vf_nlmeans_vulkan; extern const AVFilter ff_vf_nnedi; extern const AVFilter ff_vf_noformat; extern const AVFilter ff_vf_noise; @@ -537,6 +539,7 @@ extern const AVFilter ff_vsrc_allrgb; extern const AVFilter ff_vsrc_allyuv; extern const AVFilter ff_vsrc_cellauto; extern const AVFilter ff_vsrc_color; +extern const AVFilter ff_vsrc_color_vulkan; extern const AVFilter ff_vsrc_colorchart; extern const AVFilter ff_vsrc_colorspectrum; extern const AVFilter ff_vsrc_coreimagesrc; diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h index 9eac2dc516a81..b6930fb686da9 100644 --- a/libavfilter/opencl_source.h +++ b/libavfilter/opencl_source.h @@ -19,19 +19,19 @@ #ifndef AVFILTER_OPENCL_SOURCE_H #define AVFILTER_OPENCL_SOURCE_H -extern const char *ff_opencl_source_avgblur; -extern const char *ff_opencl_source_colorkey; -extern const char *ff_opencl_source_colorspace_common; -extern const char *ff_opencl_source_convolution; -extern const char *ff_opencl_source_deshake; -extern const char *ff_opencl_source_neighbor; -extern const char *ff_opencl_source_nlmeans; -extern const char *ff_opencl_source_overlay; -extern const char *ff_opencl_source_pad; -extern const char *ff_opencl_source_remap; -extern const char *ff_opencl_source_tonemap; -extern const char *ff_opencl_source_transpose; -extern const char *ff_opencl_source_unsharp; -extern const char *ff_opencl_source_xfade; +extern const char *ff_source_avgblur_cl; +extern const char *ff_source_colorkey_cl; +extern const char *ff_source_colorspace_common_cl; +extern const char *ff_source_convolution_cl; +extern const char *ff_source_deshake_cl; +extern const char *ff_source_neighbor_cl; +extern const char *ff_source_nlmeans_cl; +extern const char *ff_source_overlay_cl; +extern const char *ff_source_pad_cl; +extern const char *ff_source_remap_cl; +extern const char *ff_source_tonemap_cl; +extern const char *ff_source_transpose_cl; +extern const char *ff_source_unsharp_cl; +extern const char *ff_source_xfade_cl; #endif /* AVFILTER_OPENCL_SOURCE_H */ diff --git a/libavfilter/vf_avgblur_opencl.c b/libavfilter/vf_avgblur_opencl.c index 68f3a63249231..c00d2f6363c82 100644 --- a/libavfilter/vf_avgblur_opencl.c +++ b/libavfilter/vf_avgblur_opencl.c @@ -59,7 +59,7 @@ static int avgblur_opencl_init(AVFilterContext *avctx) cl_int cle; int err; - err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_avgblur, 1); + err = ff_opencl_filter_load_program(avctx, &ff_source_avgblur_cl, 1); if (err < 0) goto fail; diff --git a/libavfilter/vf_avgblur_vulkan.c b/libavfilter/vf_avgblur_vulkan.c index 6a54d158ce1fe..4873824c70476 100644 --- a/libavfilter/vf_avgblur_vulkan.c +++ b/libavfilter/vf_avgblur_vulkan.c @@ -1,4 +1,6 @@ /* + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -19,23 +21,24 @@ #include "libavutil/random_seed.h" #include "libavutil/opt.h" #include "vulkan_filter.h" +#include "vulkan_spirv.h" #include "internal.h" -#define CGS 32 - typedef struct AvgBlurVulkanContext { FFVulkanContext vkctx; int initialized; + FFVkExecPool e; FFVkQueueFamilyCtx qf; - FFVkExecContext *exec; - FFVulkanPipeline *pl_hor; - FFVulkanPipeline *pl_ver; + VkSampler sampler; + FFVulkanPipeline pl; + FFVkSPIRVShader shd; - /* Shader updators, must be in the main filter struct */ - VkDescriptorImageInfo input_images[3]; - VkDescriptorImageInfo tmp_images[3]; - VkDescriptorImageInfo output_images[3]; + /* Push constants / options */ + struct { + float filter_norm[4]; + int32_t filter_len[2]; + } opts; int size_x; int size_y; @@ -43,46 +46,53 @@ typedef struct AvgBlurVulkanContext { } AvgBlurVulkanContext; static const char blur_kernel[] = { - C(0, shared vec4 cache[DIR(gl_WorkGroupSize) + FILTER_RADIUS*2 + 1]; ) - C(0, ) - C(0, void distort(const ivec2 pos, const int idx) ) - C(0, { ) - C(1, const uint cp = DIR(gl_LocalInvocationID) + FILTER_RADIUS; ) - C(0, ) - C(1, cache[cp] = texture(input_img[idx], pos); ) - C(0, ) - C(1, const ivec2 loc_l = pos - INC(FILTER_RADIUS); ) - C(1, cache[cp - FILTER_RADIUS] = texture(input_img[idx], loc_l); ) - C(0, ) - C(1, const ivec2 loc_h = pos + INC(DIR(gl_WorkGroupSize)); ) - C(1, cache[cp + DIR(gl_WorkGroupSize)] = texture(input_img[idx], loc_h); ) - C(0, ) - C(1, barrier(); ) - C(0, ) - C(1, vec4 sum = vec4(0); ) - C(1, for (int p = -FILTER_RADIUS; p <= FILTER_RADIUS; p++) ) - C(2, sum += cache[cp + p]; ) - C(0, ) - C(1, sum /= vec4(FILTER_RADIUS*2 + 1); ) - C(1, imageStore(output_img[idx], pos, sum); ) - C(0, } ) + C(0, void distort(const ivec2 pos, const int idx) ) + C(0, { ) + C(1, vec4 sum = vec4(0); ) + C(1, for (int y = -filter_len.y; y <= filter_len.y; y++) ) + C(1, for (int x = -filter_len.x; x <= filter_len.x; x++) ) + C(2, sum += texture(input_img[idx], pos + ivec2(x, y)); ) + C(0, ) + C(1, imageStore(output_img[idx], pos, sum * filter_norm); ) + C(0, } ) }; static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) { int err; - FFVkSPIRVShader *shd; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; AvgBlurVulkanContext *s = ctx->priv; FFVulkanContext *vkctx = &s->vkctx; const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + FFVkSPIRVShader *shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } - FFVulkanDescriptorSetBinding desc_i[2] = { + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_LINEAR)); + RET(ff_vk_shader_init(&s->pl, &s->shd, "avgblur_compute", + VK_SHADER_STAGE_COMPUTE_BIT, 0)); + shd = &s->shd; + + ff_vk_shader_set_compute_sizes(shd, 32, 1, 1); + + desc = (FFVulkanDescriptorSetBinding []) { { .name = "input_img", .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .dimensions = 2, .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), }, { .name = "output_img", @@ -95,244 +105,68 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) }, }; - ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0); + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0)); - desc_i[0].sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_LINEAR); - if (!desc_i[0].sampler) - return AVERROR_EXTERNAL; + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, vec4 filter_norm; ); + GLSLC(1, ivec2 filter_len; ); + GLSLC(0, }; ); + GLSLC(0, ); - { /* Create shader for the horizontal pass */ - desc_i[0].updater = s->input_images; - desc_i[1].updater = s->tmp_images; - - s->pl_hor = ff_vk_create_pipeline(vkctx, &s->qf); - if (!s->pl_hor) - return AVERROR(ENOMEM); - - shd = ff_vk_init_shader(s->pl_hor, "avgblur_compute_hor", - VK_SHADER_STAGE_COMPUTE_BIT); - if (!shd) - return AVERROR(ENOMEM); - - ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 }); - - RET(ff_vk_add_descriptor_set(vkctx, s->pl_hor, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0)); - - GLSLF(0, #define FILTER_RADIUS (%i) ,s->size_x - 1); - GLSLC(0, #define INC(x) (ivec2(x, 0)) ); - GLSLC(0, #define DIR(var) (var.x) ); - GLSLD( blur_kernel ); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 size; ); - GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - for (int i = 0; i < planes; i++) { - GLSLC(0, ); - GLSLF(1, size = imageSize(output_img[%i]); ,i); - GLSLC(1, if (IS_WITHIN(pos, size)) { ); - if (s->planes & (1 << i)) { - GLSLF(2, distort(pos, %i); ,i); - } else { - GLSLF(2, vec4 res = texture(input_img[%i], pos); ,i); - GLSLF(2, imageStore(output_img[%i], pos, res); ,i); - } - GLSLC(1, } ); - } - GLSLC(0, } ); - - RET(ff_vk_compile_shader(vkctx, shd, "main")); - - RET(ff_vk_init_pipeline_layout(vkctx, s->pl_hor)); - RET(ff_vk_init_compute_pipeline(vkctx, s->pl_hor)); - } + ff_vk_add_push_constant(&s->pl, 0, sizeof(s->opts), + VK_SHADER_STAGE_COMPUTE_BIT); - { /* Create shader for the vertical pass */ - desc_i[0].updater = s->tmp_images; - desc_i[1].updater = s->output_images; - - s->pl_ver = ff_vk_create_pipeline(vkctx, &s->qf); - if (!s->pl_ver) - return AVERROR(ENOMEM); - - shd = ff_vk_init_shader(s->pl_ver, "avgblur_compute_ver", - VK_SHADER_STAGE_COMPUTE_BIT); - if (!shd) - return AVERROR(ENOMEM); - - ff_vk_set_compute_shader_sizes(shd, (int [3]){ 1, CGS, 1 }); - - RET(ff_vk_add_descriptor_set(vkctx, s->pl_ver, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0)); - - GLSLF(0, #define FILTER_RADIUS (%i) ,s->size_y - 1); - GLSLC(0, #define INC(x) (ivec2(0, x)) ); - GLSLC(0, #define DIR(var) (var.y) ); - GLSLD( blur_kernel ); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 size; ); - GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - for (int i = 0; i < planes; i++) { - GLSLC(0, ); - GLSLF(1, size = imageSize(output_img[%i]); ,i); - GLSLC(1, if (IS_WITHIN(pos, size)) { ); - if (s->planes & (1 << i)) { - GLSLF(2, distort(pos, %i); ,i); - } else { - GLSLF(2, vec4 res = texture(input_img[%i], pos); ,i); - GLSLF(2, imageStore(output_img[%i], pos, res); ,i); - } - GLSLC(1, } ); + GLSLD( blur_kernel ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + for (int i = 0; i < planes; i++) { + GLSLC(0, ); + GLSLF(1, size = imageSize(output_img[%i]); ,i); + GLSLC(1, if (!IS_WITHIN(pos, size)) ); + GLSLC(2, return; ); + if (s->planes & (1 << i)) { + GLSLF(1, distort(pos, %i); ,i); + } else { + GLSLF(1, vec4 res = texture(input_img[%i], pos); ,i); + GLSLF(1, imageStore(output_img[%i], pos, res); ,i); } - GLSLC(0, } ); - - RET(ff_vk_compile_shader(vkctx, shd, "main")); - - RET(ff_vk_init_pipeline_layout(vkctx, s->pl_ver)); - RET(ff_vk_init_compute_pipeline(vkctx, s->pl_ver)); } + GLSLC(0, } ); + + RET(spv->compile_shader(spv, ctx, &s->shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(vkctx, &s->shd, spv_data, spv_len, "main")); - /* Execution context */ - RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf)); + RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, &s->shd)); + RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl)); s->initialized = 1; + s->opts.filter_len[0] = s->size_x - 1; + s->opts.filter_len[1] = s->size_y - 1; + + s->opts.filter_norm[0] = s->opts.filter_len[0]*2 + 1; + s->opts.filter_norm[0] = 1.0/(s->opts.filter_norm[0]*s->opts.filter_norm[0]); + s->opts.filter_norm[1] = s->opts.filter_norm[0]; + s->opts.filter_norm[2] = s->opts.filter_norm[0]; + s->opts.filter_norm[3] = s->opts.filter_norm[0]; return 0; fail: - return err; -} - -static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *tmp_f, AVFrame *in_f) -{ - int err; - VkCommandBuffer cmd_buf; - AvgBlurVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &vkctx->vkfn; - AVVkFrame *in = (AVVkFrame *)in_f->data[0]; - AVVkFrame *tmp = (AVVkFrame *)tmp_f->data[0]; - AVVkFrame *out = (AVVkFrame *)out_f->data[0]; - - const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format); - const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format); - - int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - - /* Update descriptors and init the exec context */ - ff_vk_start_exec_recording(vkctx, s->exec); - cmd_buf = ff_vk_get_exec_buf(s->exec); - - for (int i = 0; i < planes; i++) { - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->input_images[i].imageView, in->img[i], - input_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->tmp_images[i].imageView, tmp->img[i], - output_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->output_images[i].imageView, out->img[i], - output_formats[i], - ff_comp_identity_map)); - - s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->tmp_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - } - - ff_vk_update_descriptor_set(vkctx, s->pl_hor, 0); - ff_vk_update_descriptor_set(vkctx, s->pl_ver, 0); - - for (int i = 0; i < planes; i++) { - VkImageMemoryBarrier bar[] = { - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = in->layout[i], - .newLayout = s->input_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = in->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT, - .oldLayout = tmp->layout[i], - .newLayout = s->tmp_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = tmp->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .oldLayout = out->layout[i], - .newLayout = s->output_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = out->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - }; - - vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, - 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar); - - in->layout[i] = bar[0].newLayout; - in->access[i] = bar[0].dstAccessMask; - - tmp->layout[i] = bar[1].newLayout; - tmp->access[i] = bar[1].dstAccessMask; - - out->layout[i] = bar[2].newLayout; - out->access[i] = bar[2].dstAccessMask; - } - - ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl_hor); - - vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS, - s->vkctx.output_height, 1); - - ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl_ver); - - vk->CmdDispatch(cmd_buf, s->vkctx.output_width, - FFALIGN(s->vkctx.output_height, CGS)/CGS, 1); - - ff_vk_add_exec_dep(vkctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(vkctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - - err = ff_vk_submit_exec_queue(vkctx,s->exec); - if (err) - return err; - - ff_vk_qf_rotate(&s->qf); + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); return err; - -fail: - ff_vk_discard_exec_deps(s->exec); - return err; } static int avgblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) { int err; - AVFrame *tmp = NULL, *out = NULL; + AVFrame *out = NULL; AVFilterContext *ctx = link->dst; AvgBlurVulkanContext *s = ctx->priv; AVFilterLink *outlink = ctx->outputs[0]; @@ -343,29 +177,22 @@ static int avgblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) goto fail; } - tmp = ff_get_video_buffer(outlink, outlink->w, outlink->h); - if (!tmp) { - err = AVERROR(ENOMEM); - goto fail; - } - if (!s->initialized) RET(init_filter(ctx, in)); - RET(process_frames(ctx, out, tmp, in)); + RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, + out, in, s->sampler, &s->opts, sizeof(s->opts))); err = av_frame_copy_props(out, in); if (err < 0) goto fail; av_frame_free(&in); - av_frame_free(&tmp); return ff_filter_frame(outlink, out); fail: av_frame_free(&in); - av_frame_free(&tmp); av_frame_free(&out); return err; } @@ -373,6 +200,16 @@ static int avgblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) static void avgblur_vulkan_uninit(AVFilterContext *avctx) { AvgBlurVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl); + ff_vk_shader_free(vkctx, &s->shd); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); ff_vk_uninit(&s->vkctx); @@ -382,9 +219,9 @@ static void avgblur_vulkan_uninit(AVFilterContext *avctx) #define OFFSET(x) offsetof(AvgBlurVulkanContext, x) #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) static const AVOption avgblur_vulkan_options[] = { - { "sizeX", "Set horizontal radius", OFFSET(size_x), AV_OPT_TYPE_INT, {.i64 = 3}, 1, 32, .flags = FLAGS }, + { "sizeX", "Set horizontal radius", OFFSET(size_x), AV_OPT_TYPE_INT, { .i64 = 3 }, 1, 32, .flags = FLAGS }, + { "sizeY", "Set vertical radius", OFFSET(size_y), AV_OPT_TYPE_INT, { .i64 = 3 }, 1, 32, .flags = FLAGS }, { "planes", "Set planes to filter (bitmask)", OFFSET(planes), AV_OPT_TYPE_INT, {.i64 = 0xF}, 0, 0xF, .flags = FLAGS }, - { "sizeY", "Set vertical radius", OFFSET(size_y), AV_OPT_TYPE_INT, {.i64 = 3}, 1, 32, .flags = FLAGS }, { NULL }, }; diff --git a/libavfilter/vf_blend_vulkan.c b/libavfilter/vf_blend_vulkan.c index 4cee688a22fcf..170992c3ef1ff 100644 --- a/libavfilter/vf_blend_vulkan.c +++ b/libavfilter/vf_blend_vulkan.c @@ -1,5 +1,7 @@ /* * copyright (c) 2021-2022 Wu Jianhua + * Copyright (c) Lynne + * * The blend modes are based on the blend.c. * * This file is part of FFmpeg. @@ -22,12 +24,11 @@ #include "libavutil/random_seed.h" #include "libavutil/opt.h" #include "vulkan_filter.h" +#include "vulkan_spirv.h" #include "internal.h" #include "framesync.h" #include "blend.h" -#define CGS 32 - #define IN_TOP 0 #define IN_BOTTOM 1 @@ -40,20 +41,18 @@ typedef struct FilterParamsVulkan { typedef struct BlendVulkanContext { FFVulkanContext vkctx; - FFVkQueueFamilyCtx qf; - FFVkExecContext *exec; - FFVulkanPipeline *pl; FFFrameSync fs; - VkDescriptorImageInfo top_images[3]; - VkDescriptorImageInfo bottom_images[3]; - VkDescriptorImageInfo output_images[3]; + int initialized; + FFVulkanPipeline pl; + FFVkExecPool e; + FFVkQueueFamilyCtx qf; + FFVkSPIRVShader shd; + VkSampler sampler; FilterParamsVulkan params[4]; double all_opacity; enum BlendMode all_mode; - - int initialized; } BlendVulkanContext; #define DEFINE_BLEND_MODE(MODE, EXPR) \ @@ -125,223 +124,103 @@ static int process_command(AVFilterContext *ctx, const char *cmd, const char *ar static av_cold int init_filter(AVFilterContext *avctx) { int err = 0; - FFVkSampler *sampler; - FFVkSPIRVShader *shd; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; BlendVulkanContext *s = avctx->priv; FFVulkanContext *vkctx = &s->vkctx; const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + FFVkSPIRVShader *shd = &s->shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; - ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0); - - sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_LINEAR); - if (!sampler) + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); return AVERROR_EXTERNAL; - - s->pl = ff_vk_create_pipeline(vkctx, &s->qf); - if (!s->pl) - return AVERROR(ENOMEM); - - { - FFVulkanDescriptorSetBinding image_descs[] = { - { - .name = "top_images", - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->top_images, - .sampler = sampler, - }, - { - .name = "bottom_images", - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->bottom_images, - .sampler = sampler, - }, - { - .name = "output_images", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), - .mem_quali = "writeonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->output_images, - }, - }; - - shd = ff_vk_init_shader(s->pl, "blend_compute", image_descs[0].stages); - if (!shd) - return AVERROR(ENOMEM); - - ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, CGS, 1 }); - RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0)); - - for (int i = 0, j = 0; i < planes; i++) { - for (j = 0; j < i; j++) - if (s->params[i].blend_func == s->params[j].blend_func) - break; - /* note: the bracket is needed, for GLSLD is a macro with multiple statements. */ - if (j == i) { - GLSLD(s->params[i].blend_func); - } - } - - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 size; ); - GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - for (int i = 0; i < planes; i++) { - GLSLC(0, ); - GLSLF(1, size = imageSize(output_images[%i]); ,i); - GLSLC(1, if (IS_WITHIN(pos, size)) { ); - GLSLF(2, const vec4 top = texture(top_images[%i], pos); ,i); - GLSLF(2, const vec4 bottom = texture(bottom_images[%i], pos); ,i); - GLSLF(2, const float opacity = %f; ,s->params[i].opacity); - GLSLF(2, vec4 dst = %s(top, bottom, opacity); ,s->params[i].blend); - GLSLC(0, ); - GLSLF(2, imageStore(output_images[%i], pos, dst); ,i); - GLSLC(1, } ); - } - GLSLC(0, } ); - - RET(ff_vk_compile_shader(vkctx, shd, "main")); - RET(ff_vk_init_pipeline_layout(vkctx, s->pl)); - RET(ff_vk_init_compute_pipeline(vkctx, s->pl)); } - RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf)); - - s->initialized = 1; - -fail: - return err; -} - -static int process_frames(AVFilterContext *avctx, AVFrame *out_frame, AVFrame *top_frame, AVFrame *bottom_frame) -{ - int err = 0; - VkCommandBuffer cmd_buf; - BlendVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &s->vkctx.vkfn; - const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - - AVVkFrame *out = (AVVkFrame *)out_frame->data[0]; - AVVkFrame *top = (AVVkFrame *)top_frame->data[0]; - AVVkFrame *bottom = (AVVkFrame *)bottom_frame->data[0]; - - AVHWFramesContext *top_fc = (AVHWFramesContext*)top_frame->hw_frames_ctx->data; - AVHWFramesContext *bottom_fc = (AVHWFramesContext*)bottom_frame->hw_frames_ctx->data; - - const VkFormat *top_formats = av_vkfmt_from_pixfmt(top_fc->sw_format); - const VkFormat *bottom_formats = av_vkfmt_from_pixfmt(bottom_fc->sw_format); - const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format); - - ff_vk_start_exec_recording(vkctx, s->exec); - cmd_buf = ff_vk_get_exec_buf(s->exec); - - for (int i = 0; i < planes; i++) { - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->top_images[i].imageView, top->img[i], - top_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->bottom_images[i].imageView, bottom->img[i], - bottom_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->output_images[i].imageView, out->img[i], - output_formats[i], - ff_comp_identity_map)); - - s->top_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->bottom_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST)); + RET(ff_vk_shader_init(&s->pl, &s->shd, "blend_compute", + VK_SHADER_STAGE_COMPUTE_BIT, 0)); + + ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1); + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "top_images", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "bottom_images", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "output_images", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 3, 0, 0)); + + for (int i = 0, j = 0; i < planes; i++) { + for (j = 0; j < i; j++) + if (s->params[i].blend_func == s->params[j].blend_func) + break; + /* note: the bracket is needed, for GLSLD is a macro with multiple statements. */ + if (j == i) { + GLSLD(s->params[i].blend_func); + } } - ff_vk_update_descriptor_set(vkctx, s->pl, 0); - + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); for (int i = 0; i < planes; i++) { - VkImageMemoryBarrier barriers[] = { - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = top->layout[i], - .newLayout = s->top_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = top->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = bottom->layout[i], - .newLayout = s->bottom_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = bottom->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .oldLayout = out->layout[i], - .newLayout = s->output_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = out->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - }; - - vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, - 0, NULL, 0, NULL, FF_ARRAY_ELEMS(barriers), barriers); - - top->layout[i] = barriers[0].newLayout; - top->access[i] = barriers[0].dstAccessMask; - - bottom->layout[i] = barriers[1].newLayout; - bottom->access[i] = barriers[1].dstAccessMask; - - out->layout[i] = barriers[2].newLayout; - out->access[i] = barriers[2].dstAccessMask; + GLSLC(0, ); + GLSLF(1, size = imageSize(output_images[%i]); ,i); + GLSLC(1, if (IS_WITHIN(pos, size)) { ); + GLSLF(2, const vec4 top = texture(top_images[%i], pos); ,i); + GLSLF(2, const vec4 bottom = texture(bottom_images[%i], pos); ,i); + GLSLF(2, const float opacity = %f; ,s->params[i].opacity); + GLSLF(2, vec4 dst = %s(top, bottom, opacity); ,s->params[i].blend); + GLSLC(0, ); + GLSLF(2, imageStore(output_images[%i], pos, dst); ,i); + GLSLC(1, } ); } + GLSLC(0, } ); - ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl); - vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS) / CGS, - FFALIGN(s->vkctx.output_height, CGS) / CGS, 1); - - ff_vk_add_exec_dep(vkctx, s->exec, top_frame, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(vkctx, s->exec, bottom_frame, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(vkctx, s->exec, out_frame, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + RET(spv->compile_shader(spv, avctx, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); - err = ff_vk_submit_exec_queue(vkctx, s->exec); - if (err) - return err; + RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd)); + RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl)); - ff_vk_qf_rotate(&s->qf); - - return 0; + s->initialized = 1; fail: - ff_vk_discard_exec_deps(s->exec); + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); + return err; } @@ -375,7 +254,9 @@ static int blend_frame(FFFrameSync *fs) RET(init_filter(avctx)); } - RET(process_frames(avctx, out, top, bottom)); + RET(ff_vk_filter_process_Nin(&s->vkctx, &s->e, &s->pl, + out, (AVFrame *[]){ top, bottom }, 2, + s->sampler, NULL, 0)); return ff_filter_frame(outlink, out); @@ -396,10 +277,19 @@ static av_cold int init(AVFilterContext *avctx) static av_cold void uninit(AVFilterContext *avctx) { BlendVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; - ff_framesync_uninit(&s->fs); + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl); + ff_vk_shader_free(vkctx, &s->shd); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); ff_vk_uninit(&s->vkctx); + ff_framesync_uninit(&s->fs); s->initialized = 0; } diff --git a/libavfilter/vf_bwdif_vulkan.c b/libavfilter/vf_bwdif_vulkan.c new file mode 100644 index 0000000000000..126e852e96d4f --- /dev/null +++ b/libavfilter/vf_bwdif_vulkan.c @@ -0,0 +1,416 @@ +/* + * Copyright (c) Lynne + * Copyright (C) 2018 Philip Langdale + * Copyright (C) 2016 Thomas Mundt + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/random_seed.h" +#include "libavutil/opt.h" +#include "vulkan_filter.h" +#include "vulkan_spirv.h" +#include "yadif.h" +#include "internal.h" + +typedef struct BWDIFVulkanContext { + YADIFContext yadif; + FFVulkanContext vkctx; + + int initialized; + FFVkExecPool e; + FFVkQueueFamilyCtx qf; + VkSampler sampler; + FFVulkanPipeline pl; + FFVkSPIRVShader shd; +} BWDIFVulkanContext; + +typedef struct BWDIFParameters { + int parity; + int tff; + int current_field; +} BWDIFParameters; + +static const char filter_fn[] = { + "const vec4 coef_lf[2] = { vec4(4309), vec4(213), };\n" + "const vec4 coef_hf[3] = { vec4(5570), vec4(3801), vec4(1016) };\n" + "const vec4 coef_sp[2] = { vec4(5077), vec4(981), };\n" + C(0, ) + C(0, vec4 process_intra(vec4 cur[4]) ) + C(0, { ) + C(1, return (coef_sp[0]*(cur[1] + cur[2]) - coef_sp[1]*(cur[0] + cur[3])) / (1 << 13); ) + C(0, } ) + C(0, ) + C(0, vec4 process_line(vec4 prev2[5], vec4 prev1[2], vec4 cur[4], vec4 next1[2], vec4 next2[5]) ) + C(0, { ) + C(1, vec4 fc = cur[1]; ) + C(1, vec4 fe = cur[2]; ) + C(1, vec4 fs = prev2[2] + next2[2]; ) + C(1, vec4 fd = fs / 2; ) + C(0, ) + C(1, vec4 temp_diff[3]; ) + C(1, temp_diff[0] = abs(prev2[2] - next2[2]); ) + C(1, temp_diff[1] = (abs(prev1[0] - fc) + abs(prev1[1] - fe)) / 2; ) + C(1, temp_diff[1] = (abs(next1[0] - fc) + abs(next1[1] - fe)) / 2; ) + C(1, vec4 diff = max(temp_diff[0] / 2, max(temp_diff[1], temp_diff[2])); ) + C(1, bvec4 diff_mask = equal(diff, vec4(0)); ) + C(0, ) + C(1, vec4 fbs = prev2[1] + next2[1]; ) + C(1, vec4 ffs = prev2[3] + next2[3]; ) + C(1, vec4 fb = (fbs / 2) - fc; ) + C(1, vec4 ff = (ffs / 2) - fe; ) + C(1, vec4 dc = fd - fc; ) + C(1, vec4 de = fd - fe; ) + C(1, vec4 mmax = max(de, max(dc, min(fb, ff))); ) + C(1, vec4 mmin = min(de, min(dc, max(fb, ff))); ) + C(1, diff = max(diff, max(mmin, -mmax)); ) + C(0, ) +" vec4 interpolate_all = (((coef_hf[0]*(fs) - coef_hf[1]*(fbs + ffs) +\n" +" coef_hf[2]*(prev2[0] + next2[0] + prev2[4] + next2[4])) / 4) +\n" +" coef_lf[0]*(fc + fe) - coef_lf[1]*(cur[0] + cur[3])) / (1 << 13);\n" +" vec4 interpolate_cur = (coef_sp[0]*(fc + fe) - coef_sp[1]*(cur[0] + cur[3])) / (1 << 13);\n" + C(0, ) + C(1, bvec4 interpolate_cnd1 = greaterThan(abs(fc - fe), temp_diff[0]); ) + C(1, vec4 dst = mix(interpolate_cur, interpolate_all, interpolate_cnd1); ) + C(1, return mix(dst, fd, diff_mask); ) + C(0, } ) +}; + +static av_cold int init_filter(AVFilterContext *ctx) +{ + int err; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; + BWDIFVulkanContext *s = ctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + FFVkSPIRVShader *shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST)); + RET(ff_vk_shader_init(&s->pl, &s->shd, "bwdif_compute", + VK_SHADER_STAGE_COMPUTE_BIT, 0)); + shd = &s->shd; + + ff_vk_shader_set_compute_sizes(shd, 1, 64, 1); + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "prev", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "cur", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "next", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "dst", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 4, 0, 0)); + + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, int parity; ); + GLSLC(1, int tff; ); + GLSLC(1, int current_field; ); + GLSLC(0, }; ); + + ff_vk_add_push_constant(&s->pl, 0, sizeof(BWDIFParameters), + VK_SHADER_STAGE_COMPUTE_BIT); + + GLSLD( filter_fn ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, vec4 res; ); + GLSLC(1, ivec2 size; ); + GLSLC(1, vec4 dcur[4]; ); + GLSLC(1, vec4 prev1[2]; ); + GLSLC(1, vec4 next1[2]; ); + GLSLC(1, vec4 prev2[5]; ); + GLSLC(1, vec4 next2[5]; ); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + GLSLC(1, bool filter_field = ((pos.y ^ parity) & 1) == 1; ); + GLSLF(1, bool is_intra = filter_field && (current_field == %i); ,YADIF_FIELD_END); + GLSLC(1, bool field_parity = (parity ^ tff) != 0; ); + GLSLC(0, ); + + for (int i = 0; i < planes; i++) { + GLSLC(0, ); + GLSLF(1, size = imageSize(dst[%i]); ,i); + GLSLC(1, if (!IS_WITHIN(pos, size)) { ); + GLSLC(2, return; ); + GLSLC(1, } else if (is_intra) { ); + GLSLF(2, dcur[0] = texture(cur[%i], pos - ivec2(0, 3)); ,i); + GLSLF(2, dcur[1] = texture(cur[%i], pos - ivec2(0, 1)); ,i); + GLSLF(2, dcur[2] = texture(cur[%i], pos + ivec2(0, 1)); ,i); + GLSLF(2, dcur[3] = texture(cur[%i], pos + ivec2(0, 3)); ,i); + GLSLC(0, ); + GLSLC(2, res = process_intra(dcur); ); + GLSLF(2, imageStore(dst[%i], pos, res); ,i); + GLSLC(1, } else if (filter_field) { ); + GLSLF(2, dcur[0] = texture(cur[%i], pos - ivec2(0, 3)); ,i); + GLSLF(2, dcur[1] = texture(cur[%i], pos - ivec2(0, 1)); ,i); + GLSLF(2, dcur[2] = texture(cur[%i], pos + ivec2(0, 1)); ,i); + GLSLF(2, dcur[3] = texture(cur[%i], pos + ivec2(0, 3)); ,i); + GLSLC(0, ); + GLSLF(2, prev1[0] = texture(prev[%i], pos - ivec2(0, 1)); ,i); + GLSLF(2, prev1[1] = texture(prev[%i], pos + ivec2(0, 1)); ,i); + GLSLC(0, ); + GLSLF(2, next1[0] = texture(next[%i], pos - ivec2(0, 1)); ,i); + GLSLF(2, next1[1] = texture(next[%i], pos + ivec2(0, 1)); ,i); + GLSLC(0, ); + GLSLC(2, if (field_parity) { ); + GLSLF(3, prev2[0] = texture(prev[%i], pos - ivec2(0, 4)); ,i); + GLSLF(3, prev2[1] = texture(prev[%i], pos - ivec2(0, 2)); ,i); + GLSLF(3, prev2[2] = texture(prev[%i], pos); ,i); + GLSLF(3, prev2[3] = texture(prev[%i], pos + ivec2(0, 2)); ,i); + GLSLF(3, prev2[4] = texture(prev[%i], pos + ivec2(0, 4)); ,i); + GLSLC(0, ); + GLSLF(3, next2[0] = texture(cur[%i], pos - ivec2(0, 4)); ,i); + GLSLF(3, next2[1] = texture(cur[%i], pos - ivec2(0, 2)); ,i); + GLSLF(3, next2[2] = texture(cur[%i], pos); ,i); + GLSLF(3, next2[3] = texture(cur[%i], pos + ivec2(0, 2)); ,i); + GLSLF(3, next2[4] = texture(cur[%i], pos + ivec2(0, 4)); ,i); + GLSLC(2, } else { ); + GLSLF(3, prev2[0] = texture(cur[%i], pos - ivec2(0, 4)); ,i); + GLSLF(3, prev2[1] = texture(cur[%i], pos - ivec2(0, 2)); ,i); + GLSLF(3, prev2[2] = texture(cur[%i], pos); ,i); + GLSLF(3, prev2[3] = texture(cur[%i], pos + ivec2(0, 2)); ,i); + GLSLF(3, prev2[4] = texture(cur[%i], pos + ivec2(0, 4)); ,i); + GLSLC(0, ); + GLSLF(3, next2[0] = texture(next[%i], pos - ivec2(0, 4)); ,i); + GLSLF(3, next2[1] = texture(next[%i], pos - ivec2(0, 2)); ,i); + GLSLF(3, next2[2] = texture(next[%i], pos); ,i); + GLSLF(3, next2[3] = texture(next[%i], pos + ivec2(0, 2)); ,i); + GLSLF(3, next2[4] = texture(next[%i], pos + ivec2(0, 4)); ,i); + GLSLC(2, } ); + GLSLC(0, ); + GLSLC(2, res = process_line(prev2, prev1, dcur, next1, next2); ); + GLSLF(2, imageStore(dst[%i], pos, res); ,i); + GLSLC(1, } else { ); + GLSLF(2, res = texture(cur[%i], pos); ,i); + GLSLF(2, imageStore(dst[%i], pos, res); ,i); + GLSLC(1, } ); + } + + GLSLC(0, } ); + + RET(spv->compile_shader(spv, ctx, &s->shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(vkctx, &s->shd, spv_data, spv_len, "main")); + + RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, &s->shd)); + RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl)); + + s->initialized = 1; + + return 0; + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); + + return err; +} + +static void bwdif_vulkan_filter_frame(AVFilterContext *ctx, AVFrame *dst, + int parity, int tff) +{ + BWDIFVulkanContext *s = ctx->priv; + YADIFContext *y = &s->yadif; + BWDIFParameters params = { + .parity = parity, + .tff = tff, + .current_field = y->current_field, + }; + + ff_vk_filter_process_Nin(&s->vkctx, &s->e, &s->pl, dst, + (AVFrame *[]){ y->prev, y->cur, y->next }, 3, + s->sampler, ¶ms, sizeof(params)); + + if (y->current_field == YADIF_FIELD_END) + y->current_field = YADIF_FIELD_NORMAL; +} + +static void bwdif_vulkan_uninit(AVFilterContext *avctx) +{ + BWDIFVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl); + ff_vk_shader_free(vkctx, &s->shd); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); + + ff_vk_uninit(&s->vkctx); + + s->initialized = 0; +} + +static int bwdif_vulkan_config_input(AVFilterLink *inlink) +{ + AVHWFramesContext *input_frames; + AVFilterContext *avctx = inlink->dst; + BWDIFVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + + if (!inlink->hw_frames_ctx) { + av_log(inlink->dst, AV_LOG_ERROR, "Vulkan filtering requires a " + "hardware frames context on the input.\n"); + return AVERROR(EINVAL); + } + + input_frames = (AVHWFramesContext *)inlink->hw_frames_ctx->data; + if (input_frames->format != AV_PIX_FMT_VULKAN) + return AVERROR(EINVAL); + + /* Extract the device and default output format from the first input. */ + if (avctx->inputs[0] != inlink) + return 0; + + /* Save the ref, without reffing it */ + vkctx->input_frames_ref = inlink->hw_frames_ctx; + + /* Defaults */ + vkctx->output_format = input_frames->sw_format; + vkctx->output_width = input_frames->width; + vkctx->output_height = input_frames->height; + + return 0; +} + +static int bwdif_vulkan_config_output(AVFilterLink *outlink) +{ + int err; + AVFilterContext *avctx = outlink->src; + BWDIFVulkanContext *s = avctx->priv; + YADIFContext *y = &s->yadif; + FFVulkanContext *vkctx = &s->vkctx; + + av_buffer_unref(&outlink->hw_frames_ctx); + + err = ff_vk_filter_init_context(avctx, vkctx, vkctx->input_frames_ref, + vkctx->output_width, vkctx->output_height, + vkctx->output_format); + if (err < 0) + return err; + + /* For logging */ + vkctx->class = y->class; + + outlink->hw_frames_ctx = av_buffer_ref(vkctx->frames_ref); + if (!outlink->hw_frames_ctx) + return AVERROR(ENOMEM); + + outlink->time_base = av_mul_q(avctx->inputs[0]->time_base, (AVRational){1, 2}); + outlink->w = vkctx->output_width; + outlink->h = vkctx->output_height; + + if (y->mode & 1) + outlink->frame_rate = av_mul_q(avctx->inputs[0]->frame_rate, + (AVRational){2, 1}); + + if (outlink->w < 4 || outlink->h < 4) { + av_log(avctx, AV_LOG_ERROR, "Video of less than 4 columns or lines is not " + "supported\n"); + return AVERROR(EINVAL); + } + + y->csp = av_pix_fmt_desc_get(vkctx->frames->sw_format); + y->filter = bwdif_vulkan_filter_frame; + + return init_filter(avctx); +} + +static const AVClass bwdif_vulkan_class = { + .class_name = "bwdif_vulkan", + .item_name = av_default_item_name, + .option = ff_yadif_options, + .version = LIBAVUTIL_VERSION_INT, + .category = AV_CLASS_CATEGORY_FILTER, +}; + +static const AVFilterPad bwdif_vulkan_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = ff_yadif_filter_frame, + .config_props = &bwdif_vulkan_config_input, + }, +}; + +static const AVFilterPad bwdif_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .request_frame = ff_yadif_request_frame, + .config_props = &bwdif_vulkan_config_output, + }, +}; + +const AVFilter ff_vf_bwdif_vulkan = { + .name = "bwdif_vulkan", + .description = NULL_IF_CONFIG_SMALL("Deinterlace Vulkan frames via bwdif"), + .priv_size = sizeof(BWDIFVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &bwdif_vulkan_uninit, + FILTER_INPUTS(bwdif_vulkan_inputs), + FILTER_OUTPUTS(bwdif_vulkan_outputs), + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), + .priv_class = &bwdif_vulkan_class, + .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL, + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; diff --git a/libavfilter/vf_chromaber_vulkan.c b/libavfilter/vf_chromaber_vulkan.c index 62b99cc4d9190..dcce64304efa2 100644 --- a/libavfilter/vf_chromaber_vulkan.c +++ b/libavfilter/vf_chromaber_vulkan.c @@ -1,4 +1,6 @@ /* + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -19,21 +21,18 @@ #include "libavutil/random_seed.h" #include "libavutil/opt.h" #include "vulkan_filter.h" +#include "vulkan_spirv.h" #include "internal.h" -#define CGROUPS (int [3]){ 32, 32, 1 } - typedef struct ChromaticAberrationVulkanContext { FFVulkanContext vkctx; int initialized; + FFVulkanPipeline pl; + FFVkExecPool e; FFVkQueueFamilyCtx qf; - FFVkExecContext *exec; - FFVulkanPipeline *pl; - - /* Shader updators, must be in the main filter struct */ - VkDescriptorImageInfo input_images[3]; - VkDescriptorImageInfo output_images[3]; + FFVkSPIRVShader shd; + VkSampler sampler; /* Push constants / options */ struct { @@ -59,7 +58,7 @@ static const char distort_chroma_kernel[] = { C(0, { ) C(1, vec2 p = ((vec2(pos)/vec2(size)) - 0.5f)*2.0f; ) C(1, float d = sqrt(p.x*p.x + p.y*p.y); ) - C(1, p *= d / (d* dist); ) + C(1, p *= d / (d*dist); ) C(1, vec4 res = texture(input_img[idx], (p/2.0f) + 0.5f); ) C(1, imageStore(output_img[idx], pos, res); ) C(0, } ) @@ -68,205 +67,102 @@ static const char distort_chroma_kernel[] = { static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) { int err; - FFVkSampler *sampler; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; ChromaticAberrationVulkanContext *s = ctx->priv; FFVulkanContext *vkctx = &s->vkctx; const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - - ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0); - - /* Create a sampler */ - sampler = ff_vk_init_sampler(vkctx, 0, VK_FILTER_LINEAR); - if (!sampler) - return AVERROR_EXTERNAL; - - s->pl = ff_vk_create_pipeline(vkctx, &s->qf); - if (!s->pl) - return AVERROR(ENOMEM); + FFVkSPIRVShader *shd = &s->shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; /* Normalize options */ s->opts.dist[0] = (s->opts.dist[0] / 100.0f) + 1.0f; s->opts.dist[1] = (s->opts.dist[1] / 100.0f) + 1.0f; - { /* Create the shader */ - FFVulkanDescriptorSetBinding desc_i[2] = { - { - .name = "input_img", - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->input_images, - .sampler = sampler, - }, - { - .name = "output_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), - .mem_quali = "writeonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->output_images, - }, - }; - - FFVkSPIRVShader *shd = ff_vk_init_shader(s->pl, "chromaber_compute", - VK_SHADER_STAGE_COMPUTE_BIT); - if (!shd) - return AVERROR(ENOMEM); - - ff_vk_set_compute_shader_sizes(shd, CGROUPS); - - GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); - GLSLC(1, vec2 dist; ); - GLSLC(0, }; ); - GLSLC(0, ); - - ff_vk_add_push_constant(s->pl, 0, sizeof(s->opts), - VK_SHADER_STAGE_COMPUTE_BIT); - - RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0)); /* set 0 */ - - GLSLD( distort_chroma_kernel ); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - if (planes == 1) { - GLSLC(1, distort_rgb(imageSize(output_img[0]), pos); ); - } else { - GLSLC(1, ivec2 size = imageSize(output_img[0]); ); - GLSLC(1, vec2 npos = vec2(pos)/vec2(size); ); - GLSLC(1, vec4 res = texture(input_img[0], npos); ); - GLSLC(1, imageStore(output_img[0], pos, res); ); - for (int i = 1; i < planes; i++) { - GLSLC(0, ); - GLSLF(1, size = imageSize(output_img[%i]); ,i); - GLSLC(1, if (IS_WITHIN(pos, size)) { ); - GLSLF(2, distort_chroma(%i, size, pos); ,i); - GLSLC(1, } else { ); - GLSLC(2, npos = vec2(pos)/vec2(size); ); - GLSLF(2, res = texture(input_img[%i], npos); ,i); - GLSLF(2, imageStore(output_img[%i], pos, res); ,i); - GLSLC(1, } ); - } - } - GLSLC(0, } ); + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } - RET(ff_vk_compile_shader(vkctx, shd, "main")); + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 0, VK_FILTER_LINEAR)); + RET(ff_vk_shader_init(&s->pl, &s->shd, "chromaber_compute", + VK_SHADER_STAGE_COMPUTE_BIT, 0)); + + ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1); + + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, vec2 dist; ); + GLSLC(0, }; ); + GLSLC(0, ); + + ff_vk_add_push_constant(&s->pl, 0, sizeof(s->opts), + VK_SHADER_STAGE_COMPUTE_BIT); + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "input_img", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0)); + + GLSLD( distort_chroma_kernel ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + if (planes == 1) { + GLSLC(1, distort_rgb(imageSize(output_img[0]), pos); ); + } else { + GLSLC(1, ivec2 size = imageSize(output_img[0]); ); + GLSLC(1, vec2 npos = vec2(pos)/vec2(size); ); + GLSLC(1, vec4 res = texture(input_img[0], npos); ); + GLSLC(1, imageStore(output_img[0], pos, res); ); + for (int i = 1; i < planes; i++) { + GLSLC(0, ); + GLSLF(1, size = imageSize(output_img[%i]); ,i); + GLSLC(1, if (!IS_WITHIN(pos, size)) ); + GLSLC(2, return; ); + GLSLF(1, distort_chroma(%i, size, pos); ,i); + } } + GLSLC(0, } ); - RET(ff_vk_init_pipeline_layout(vkctx, s->pl)); - RET(ff_vk_init_compute_pipeline(vkctx, s->pl)); + RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); - /* Execution context */ - RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf)); + RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd)); + RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl)); s->initialized = 1; return 0; fail: - return err; -} - -static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f) -{ - int err = 0; - VkCommandBuffer cmd_buf; - ChromaticAberrationVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &vkctx->vkfn; - AVVkFrame *in = (AVVkFrame *)in_f->data[0]; - AVVkFrame *out = (AVVkFrame *)out_f->data[0]; - int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format); - const VkFormat *ouput_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format); - - /* Update descriptors and init the exec context */ - ff_vk_start_exec_recording(vkctx, s->exec); - cmd_buf = ff_vk_get_exec_buf(s->exec); - - for (int i = 0; i < planes; i++) { - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->input_images[i].imageView, in->img[i], - input_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->output_images[i].imageView, out->img[i], - ouput_formats[i], - ff_comp_identity_map)); - - s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - } - - ff_vk_update_descriptor_set(vkctx, s->pl, 0); - - for (int i = 0; i < planes; i++) { - VkImageMemoryBarrier bar[2] = { - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = in->layout[i], - .newLayout = s->input_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = in->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .oldLayout = out->layout[i], - .newLayout = s->output_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = out->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - }; - - vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, - 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar); - - in->layout[i] = bar[0].newLayout; - in->access[i] = bar[0].dstAccessMask; - - out->layout[i] = bar[1].newLayout; - out->access[i] = bar[1].dstAccessMask; - } - - ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl); - - ff_vk_update_push_exec(vkctx, s->exec, VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(s->opts), &s->opts); - - vk->CmdDispatch(cmd_buf, - FFALIGN(s->vkctx.output_width, CGROUPS[0])/CGROUPS[0], - FFALIGN(s->vkctx.output_height, CGROUPS[1])/CGROUPS[1], 1); - - ff_vk_add_exec_dep(vkctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(vkctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - - err = ff_vk_submit_exec_queue(vkctx, s->exec); - if (err) - return err; + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); - ff_vk_qf_rotate(&s->qf); - - return err; - -fail: - ff_vk_discard_exec_deps(s->exec); return err; } @@ -286,7 +182,8 @@ static int chromaber_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) if (!s->initialized) RET(init_filter(ctx, in)); - RET(process_frames(ctx, out, in)); + RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, out, in, + s->sampler, &s->opts, sizeof(s->opts))); err = av_frame_copy_props(out, in); if (err < 0) @@ -305,6 +202,16 @@ static int chromaber_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) static void chromaber_vulkan_uninit(AVFilterContext *avctx) { ChromaticAberrationVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl); + ff_vk_shader_free(vkctx, &s->shd); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); ff_vk_uninit(&s->vkctx); diff --git a/libavfilter/vf_colorkey_opencl.c b/libavfilter/vf_colorkey_opencl.c index 2b019b290c16b..94361df88fed6 100644 --- a/libavfilter/vf_colorkey_opencl.c +++ b/libavfilter/vf_colorkey_opencl.c @@ -52,7 +52,7 @@ static int colorkey_opencl_init(AVFilterContext *avctx) cl_int cle; int err; - err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_colorkey, 1); + err = ff_opencl_filter_load_program(avctx, &ff_source_colorkey_cl, 1); if (err < 0) goto fail; diff --git a/libavfilter/vf_convolution_opencl.c b/libavfilter/vf_convolution_opencl.c index bf721a7416ba3..0eff9f40d3b21 100644 --- a/libavfilter/vf_convolution_opencl.c +++ b/libavfilter/vf_convolution_opencl.c @@ -62,7 +62,7 @@ static int convolution_opencl_init(AVFilterContext *avctx) cl_int cle; int err; - err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_convolution, 1); + err = ff_opencl_filter_load_program(avctx, &ff_source_convolution_cl, 1); if (err < 0) goto fail; diff --git a/libavfilter/vf_deshake_opencl.c b/libavfilter/vf_deshake_opencl.c index e670a4cc2349e..8db59767bdc71 100644 --- a/libavfilter/vf_deshake_opencl.c +++ b/libavfilter/vf_deshake_opencl.c @@ -1251,7 +1251,7 @@ static int deshake_opencl_init(AVFilterContext *avctx) } ctx->sw_format = hw_frames_ctx->sw_format; - err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_deshake, 1); + err = ff_opencl_filter_load_program(avctx, &ff_source_deshake_cl, 1); if (err < 0) goto fail; diff --git a/libavfilter/vf_flip_vulkan.c b/libavfilter/vf_flip_vulkan.c index 6868e39ee6321..4279dd2123ab1 100644 --- a/libavfilter/vf_flip_vulkan.c +++ b/libavfilter/vf_flip_vulkan.c @@ -1,5 +1,7 @@ /* * copyright (c) 2021 Wu Jianhua + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -20,10 +22,9 @@ #include "libavutil/random_seed.h" #include "libavutil/opt.h" #include "vulkan_filter.h" +#include "vulkan_spirv.h" #include "internal.h" -#define CGS 32 - enum FlipType { FLIP_VERTICAL, FLIP_HORIZONTAL, @@ -32,32 +33,50 @@ enum FlipType { typedef struct FlipVulkanContext { FFVulkanContext vkctx; - FFVkQueueFamilyCtx qf; - FFVkExecContext *exec; - FFVulkanPipeline *pl; - - VkDescriptorImageInfo input_images[3]; - VkDescriptorImageInfo output_images[3]; int initialized; + FFVulkanPipeline pl; + FFVkExecPool e; + FFVkQueueFamilyCtx qf; + FFVkSPIRVShader shd; + VkSampler sampler; } FlipVulkanContext; static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in, enum FlipType type) { int err = 0; - FFVkSPIRVShader *shd; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; FlipVulkanContext *s = ctx->priv; FFVulkanContext *vkctx = &s->vkctx; const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + FFVkSPIRVShader *shd = &s->shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_LINEAR)); + RET(ff_vk_shader_init(&s->pl, &s->shd, "flip_compute", + VK_SHADER_STAGE_COMPUTE_BIT, 0)); + + ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1); - FFVulkanDescriptorSetBinding image_descs[] = { + desc = (FFVulkanDescriptorSetBinding []) { { .name = "input_image", .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .dimensions = 2, .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->input_images, + .samplers = DUP_SAMPLER(s->sampler), }, { .name = "output_image", @@ -67,167 +86,75 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in, enum FlipType .dimensions = 2, .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->output_images, }, }; - image_descs[0].sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_LINEAR); - if (!image_descs[0].sampler) - return AVERROR_EXTERNAL; + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0)); - ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0); - - { - s->pl = ff_vk_create_pipeline(vkctx, &s->qf); - if (!s->pl) - return AVERROR(ENOMEM); - - shd = ff_vk_init_shader(s->pl, "flip_compute", image_descs[0].stages); - if (!shd) - return AVERROR(ENOMEM); - - ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 }); - RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0)); - - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 size; ); - GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - for (int i = 0; i < planes; i++) { - GLSLC(0, ); - GLSLF(1, size = imageSize(output_image[%i]); ,i); - GLSLC(1, if (IS_WITHIN(pos, size)) { ); - switch (type) - { - case FLIP_HORIZONTAL: - GLSLF(2, vec4 res = texture(input_image[%i], ivec2(size.x - pos.x, pos.y)); ,i); - break; - case FLIP_VERTICAL: - GLSLF(2, vec4 res = texture(input_image[%i], ivec2(pos.x, size.y - pos.y)); ,i); - break; - case FLIP_BOTH: - GLSLF(2, vec4 res = texture(input_image[%i], ivec2(size.xy - pos.xy));, i); - break; - default: - GLSLF(2, vec4 res = texture(input_image[%i], pos); ,i); - break; - } - GLSLF(2, imageStore(output_image[%i], pos, res); ,i); - GLSLC(1, } ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + for (int i = 0; i < planes; i++) { + GLSLC(0, ); + GLSLF(1, size = imageSize(output_image[%i]); ,i); + GLSLC(1, if (IS_WITHIN(pos, size)) { ); + switch (type) + { + case FLIP_HORIZONTAL: + GLSLF(2, vec4 res = texture(input_image[%i], ivec2(size.x - pos.x, pos.y)); ,i); + break; + case FLIP_VERTICAL: + GLSLF(2, vec4 res = texture(input_image[%i], ivec2(pos.x, size.y - pos.y)); ,i); + break; + case FLIP_BOTH: + GLSLF(2, vec4 res = texture(input_image[%i], ivec2(size.xy - pos.xy));, i); + break; + default: + GLSLF(2, vec4 res = texture(input_image[%i], pos); ,i); + break; } - GLSLC(0, } ); - - RET(ff_vk_compile_shader(vkctx, shd, "main")); - RET(ff_vk_init_pipeline_layout(vkctx, s->pl)); - RET(ff_vk_init_compute_pipeline(vkctx, s->pl)); + GLSLF(2, imageStore(output_image[%i], pos, res); ,i); + GLSLC(1, } ); } + GLSLC(0, } ); + + RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); + + RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd)); + RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl)); - RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf)); s->initialized = 1; fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); + return err; } static av_cold void flip_vulkan_uninit(AVFilterContext *avctx) { FlipVulkanContext *s = avctx->priv; - ff_vk_uninit(&s->vkctx); - s->initialized = 0; -} - -static int process_frames(AVFilterContext *avctx, AVFrame *outframe, AVFrame *inframe) -{ - int err = 0; - VkCommandBuffer cmd_buf; - FlipVulkanContext *s = avctx->priv; FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &s->vkctx.vkfn; - AVVkFrame *in = (AVVkFrame *)inframe->data[0]; - AVVkFrame *out = (AVVkFrame *)outframe->data[0]; - const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format); - const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format); - - ff_vk_start_exec_recording(vkctx, s->exec); - cmd_buf = ff_vk_get_exec_buf(s->exec); - - for (int i = 0; i < planes; i++) { - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->input_images[i].imageView, in->img[i], - input_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->output_images[i].imageView, out->img[i], - output_formats[i], - ff_comp_identity_map)); - - s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - } + FFVulkanFunctions *vk = &vkctx->vkfn; - ff_vk_update_descriptor_set(vkctx, s->pl, 0); + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl); + ff_vk_shader_free(vkctx, &s->shd); - for (int i = 0; i < planes; i++) { - VkImageMemoryBarrier barriers[] = { - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = in->layout[i], - .newLayout = s->input_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = in->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .oldLayout = out->layout[i], - .newLayout = s->output_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = out->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - }; - - vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, - 0, NULL, 0, NULL, FF_ARRAY_ELEMS(barriers), barriers); - - in->layout[i] = barriers[0].newLayout; - in->access[i] = barriers[0].dstAccessMask; - - out->layout[i] = barriers[1].newLayout; - out->access[i] = barriers[1].dstAccessMask; - } - - ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl); - vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS, - s->vkctx.output_height, 1); - - ff_vk_add_exec_dep(vkctx, s->exec, inframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(vkctx, s->exec, outframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - - err = ff_vk_submit_exec_queue(vkctx, s->exec); - if (err) - return err; + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); - ff_vk_qf_rotate(&s->qf); + ff_vk_uninit(&s->vkctx); - return 0; -fail: - ff_vk_discard_exec_deps(s->exec); - return err; + s->initialized = 0; } static int filter_frame(AVFilterLink *link, AVFrame *in, enum FlipType type) @@ -247,7 +174,8 @@ static int filter_frame(AVFilterLink *link, AVFrame *in, enum FlipType type) if (!s->initialized) RET(init_filter(ctx, in, type)); - RET(process_frames(ctx, out, in)); + RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, out, in, + s->sampler, NULL, 0)); RET(av_frame_copy_props(out, in)); diff --git a/libavfilter/vf_gblur_vulkan.c b/libavfilter/vf_gblur_vulkan.c index e6ffc8c07359e..0f0f5dff43243 100644 --- a/libavfilter/vf_gblur_vulkan.c +++ b/libavfilter/vf_gblur_vulkan.c @@ -1,5 +1,7 @@ /* * copyright (c) 2021-2022 Wu Jianhua + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -20,6 +22,7 @@ #include "libavutil/random_seed.h" #include "libavutil/opt.h" #include "vulkan_filter.h" +#include "vulkan_spirv.h" #include "internal.h" #define CGS 32 @@ -27,26 +30,23 @@ typedef struct GBlurVulkanContext { FFVulkanContext vkctx; - FFVkQueueFamilyCtx qf; - FFVkExecContext *exec; - FFVulkanPipeline *pl_hor; - FFVulkanPipeline *pl_ver; - FFVkBuffer params_buf_hor; - FFVkBuffer params_buf_ver; - - VkDescriptorImageInfo input_images[3]; - VkDescriptorImageInfo tmp_images[3]; - VkDescriptorImageInfo output_images[3]; - VkDescriptorBufferInfo params_desc_hor; - VkDescriptorBufferInfo params_desc_ver; int initialized; + FFVkExecPool e; + FFVkQueueFamilyCtx qf; + VkSampler sampler; + FFVulkanPipeline pl_hor; + FFVkSPIRVShader shd_hor; + FFVkBuffer params_hor; + FFVulkanPipeline pl_ver; + FFVkSPIRVShader shd_ver; + FFVkBuffer params_ver; + int size; int sizeV; int planes; float sigma; float sigmaV; - AVFrame *tmpframe; } GBlurVulkanContext; static const char gblur_func[] = { @@ -118,16 +118,17 @@ static av_cold void init_gaussian_params(GBlurVulkanContext *s) s->sizeV = s->size; else init_kernel_size(s, &s->sizeV); - - s->tmpframe = NULL; } -static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVkSPIRVShader *shd, - FFVkBuffer *params_buf, VkDescriptorBufferInfo *params_desc, - int ksize, float sigma) +static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, + FFVkSPIRVShader *shd, FFVkBuffer *params_buf, + int ksize, float sigma, FFVkSPIRVCompiler *spv) { int err = 0; uint8_t *kernel_mapped; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); @@ -137,7 +138,6 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVk .mem_quali = "readonly", .mem_layout = "std430", .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = NULL, .buf_content = NULL, }; @@ -145,10 +145,9 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVk if (!kernel_def) return AVERROR(ENOMEM); - buf_desc.updater = params_desc; buf_desc.buf_content = kernel_def; - RET(ff_vk_add_descriptor_set(&s->vkctx, pl, shd, &buf_desc, 1, 0)); + RET(ff_vk_pipeline_descriptor_set_add(&s->vkctx, pl, shd, &buf_desc, 1, 1, 0)); GLSLD( gblur_func ); GLSLC(0, void main() ); @@ -157,38 +156,43 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVk GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); for (int i = 0; i < planes; i++) { GLSLC(0, ); - GLSLF(1, size = imageSize(output_images[%i]); ,i); - GLSLC(1, if (IS_WITHIN(pos, size)) { ); + GLSLF(1, size = imageSize(output_images[%i]); ,i); + GLSLC(1, if (!IS_WITHIN(pos, size)) ); + GLSLC(2, return; ); if (s->planes & (1 << i)) { - GLSLF(2, gblur(pos, %i); ,i); + GLSLF(1, gblur(pos, %i); ,i); } else { - GLSLF(2, vec4 res = texture(input_images[%i], pos); ,i); - GLSLF(2, imageStore(output_images[%i], pos, res); ,i); + GLSLF(1, vec4 res = texture(input_images[%i], pos); ,i); + GLSLF(1, imageStore(output_images[%i], pos, res); ,i); } - GLSLC(1, } ); } GLSLC(0, } ); - RET(ff_vk_compile_shader(&s->vkctx, shd, "main")); + RET(spv->compile_shader(spv, s, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(&s->vkctx, shd, spv_data, spv_len, "main")); - RET(ff_vk_init_pipeline_layout(&s->vkctx, pl)); - RET(ff_vk_init_compute_pipeline(&s->vkctx, pl)); + RET(ff_vk_init_compute_pipeline(&s->vkctx, pl, shd)); + RET(ff_vk_exec_pipeline_register(&s->vkctx, &s->e, pl)); - RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize, - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); - RET(ff_vk_map_buffers(&s->vkctx, params_buf, &kernel_mapped, 1, 0)); + RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize, NULL, NULL, + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); + RET(ff_vk_map_buffer(&s->vkctx, params_buf, &kernel_mapped, 0)); init_gaussian_kernel((float *)kernel_mapped, sigma, ksize); - RET(ff_vk_unmap_buffers(&s->vkctx, params_buf, 1, 1)); - - params_desc->buffer = params_buf->buf; - params_desc->range = VK_WHOLE_SIZE; + RET(ff_vk_unmap_buffer(&s->vkctx, params_buf, 1)); - ff_vk_update_descriptor_set(&s->vkctx, pl, 1); + RET(ff_vk_set_descriptor_buffer(&s->vkctx, pl, NULL, 1, 0, 0, + params_buf->address, params_buf->size, + VK_FORMAT_UNDEFINED)); fail: av_free(kernel_def); + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); return err; } @@ -196,16 +200,35 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) { int err = 0; GBlurVulkanContext *s = ctx->priv; - FFVkSPIRVShader *shd; + FFVulkanContext *vkctx = &s->vkctx; const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - FFVulkanDescriptorSetBinding image_descs[] = { + FFVkSPIRVShader *shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_LINEAR)); + RET(ff_vk_shader_init(&s->pl_hor, &s->shd_hor, "gblur_hor_compute", + VK_SHADER_STAGE_COMPUTE_BIT, 0)); + RET(ff_vk_shader_init(&s->pl_ver, &s->shd_ver, "gblur_ver_compute", + VK_SHADER_STAGE_COMPUTE_BIT, 0)); + + desc = (FFVulkanDescriptorSetBinding []) { { .name = "input_images", .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .dimensions = 2, .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), }, { .name = "output_images", @@ -218,215 +241,64 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) }, }; - image_descs[0].sampler = ff_vk_init_sampler(&s->vkctx, 1, VK_FILTER_LINEAR); - if (!image_descs[0].sampler) - return AVERROR_EXTERNAL; - init_gaussian_params(s); - ff_vk_qf_init(&s->vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0); - { - /* Create shader for the horizontal pass */ - image_descs[0].updater = s->input_images; - image_descs[1].updater = s->tmp_images; - - s->pl_hor = ff_vk_create_pipeline(&s->vkctx, &s->qf); - if (!s->pl_hor) { - err = AVERROR(ENOMEM); - goto fail; - } - - shd = ff_vk_init_shader(s->pl_hor, "gblur_compute_hor", image_descs[0].stages); - if (!shd) { - err = AVERROR(ENOMEM); - goto fail; - } + shd = &s->shd_hor; + ff_vk_shader_set_compute_sizes(shd, 32, 1, 1); - ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 }); - RET(ff_vk_add_descriptor_set(&s->vkctx, s->pl_hor, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0)); + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl_hor, shd, desc, 2, 0, 0)); GLSLC(0, #define OFFSET (vec2(i, 0.0))); - RET(init_gblur_pipeline(s, s->pl_hor, shd, &s->params_buf_hor, &s->params_desc_hor, - s->size, s->sigma)); + RET(init_gblur_pipeline(s, &s->pl_hor, shd, &s->params_hor, s->size, s->sigma, spv)); } { - /* Create shader for the vertical pass */ - image_descs[0].updater = s->tmp_images; - image_descs[1].updater = s->output_images; - - s->pl_ver = ff_vk_create_pipeline(&s->vkctx, &s->qf); - if (!s->pl_ver) { - err = AVERROR(ENOMEM); - goto fail; - } + shd = &s->shd_ver; + ff_vk_shader_set_compute_sizes(shd, 1, 32, 1); - shd = ff_vk_init_shader(s->pl_ver, "gblur_compute_ver", image_descs[0].stages); - if (!shd) { - err = AVERROR(ENOMEM); - goto fail; - } - - ff_vk_set_compute_shader_sizes(shd, (int [3]){ 1, CGS, 1 }); - RET(ff_vk_add_descriptor_set(&s->vkctx, s->pl_ver, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0)); + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl_ver, shd, desc, 2, 0, 0)); GLSLC(0, #define OFFSET (vec2(0.0, i))); - RET(init_gblur_pipeline(s, s->pl_ver, shd, &s->params_buf_ver, &s->params_desc_ver, - s->sizeV, s->sigmaV)); + RET(init_gblur_pipeline(s, &s->pl_ver, shd, &s->params_ver, s->sizeV, s->sigmaV, spv)); } - RET(ff_vk_create_exec_ctx(&s->vkctx, &s->exec, &s->qf)); - s->initialized = 1; fail: + if (spv) + spv->uninit(&spv); + return err; } static av_cold void gblur_vulkan_uninit(AVFilterContext *avctx) { GBlurVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; - av_frame_free(&s->tmpframe); + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl_hor); + ff_vk_pipeline_free(vkctx, &s->pl_ver); + ff_vk_shader_free(vkctx, &s->shd_hor); + ff_vk_shader_free(vkctx, &s->shd_ver); + ff_vk_free_buf(vkctx, &s->params_hor); + ff_vk_free_buf(vkctx, &s->params_ver); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); - ff_vk_free_buf(&s->vkctx, &s->params_buf_hor); - ff_vk_free_buf(&s->vkctx, &s->params_buf_ver); ff_vk_uninit(&s->vkctx); s->initialized = 0; } -static int process_frames(AVFilterContext *avctx, AVFrame *outframe, AVFrame *inframe) -{ - int err; - VkCommandBuffer cmd_buf; - GBlurVulkanContext *s = avctx->priv; - FFVulkanFunctions *vk = &s->vkctx.vkfn; - - const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - - AVVkFrame *in = (AVVkFrame *)inframe->data[0]; - AVVkFrame *out = (AVVkFrame *)outframe->data[0]; - AVVkFrame *tmp = (AVVkFrame *)s->tmpframe->data[0]; - - const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format); - const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format); - - ff_vk_start_exec_recording(&s->vkctx, s->exec); - cmd_buf = ff_vk_get_exec_buf(s->exec); - - for (int i = 0; i < planes; i++) { - RET(ff_vk_create_imageview(&s->vkctx, s->exec, &s->input_images[i].imageView, - in->img[i], - input_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(&s->vkctx, s->exec, &s->tmp_images[i].imageView, - tmp->img[i], - output_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(&s->vkctx, s->exec, &s->output_images[i].imageView, - out->img[i], - output_formats[i], - ff_comp_identity_map)); - - s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->tmp_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - } - - ff_vk_update_descriptor_set(&s->vkctx, s->pl_hor, 0); - ff_vk_update_descriptor_set(&s->vkctx, s->pl_ver, 0); - - for (int i = 0; i < planes; i++) { - VkImageMemoryBarrier barriers[] = { - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = in->layout[i], - .newLayout = s->input_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = in->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT, - .oldLayout = tmp->layout[i], - .newLayout = s->tmp_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = tmp->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .oldLayout = out->layout[i], - .newLayout = s->output_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = out->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - }; - - vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, - 0, NULL, 0, NULL, FF_ARRAY_ELEMS(barriers), barriers); - - in->layout[i] = barriers[0].newLayout; - in->access[i] = barriers[0].dstAccessMask; - - tmp->layout[i] = barriers[1].newLayout; - tmp->access[i] = barriers[1].dstAccessMask; - - out->layout[i] = barriers[2].newLayout; - out->access[i] = barriers[2].dstAccessMask; - } - - ff_vk_bind_pipeline_exec(&s->vkctx, s->exec, s->pl_hor); - - vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS, - s->vkctx.output_height, 1); - - ff_vk_bind_pipeline_exec(&s->vkctx, s->exec, s->pl_ver); - - vk->CmdDispatch(cmd_buf,s->vkctx.output_width, - FFALIGN(s->vkctx.output_height, CGS)/CGS, 1); - - ff_vk_add_exec_dep(&s->vkctx, s->exec, inframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(&s->vkctx, s->exec, outframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - - err = ff_vk_submit_exec_queue(&s->vkctx, s->exec); - if (err) - return err; - - ff_vk_qf_rotate(&s->qf); - - return 0; - -fail: - ff_vk_discard_exec_deps(s->exec); - return err; -} - static int gblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) { int err; - AVFrame *out = NULL; + AVFrame *tmp = NULL, *out = NULL; AVFilterContext *ctx = link->dst; GBlurVulkanContext *s = ctx->priv; AVFilterLink *outlink = ctx->outputs[0]; @@ -437,28 +309,32 @@ static int gblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) goto fail; } - if (!s->initialized) { - RET(init_filter(ctx, in)); - s->tmpframe = ff_get_video_buffer(outlink, outlink->w, outlink->h); - if (!s->tmpframe) { - err = AVERROR(ENOMEM); - goto fail; - } + tmp = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!tmp) { + err = AVERROR(ENOMEM); + goto fail; } - RET(process_frames(ctx, out, in)); + if (!s->initialized) + RET(init_filter(ctx, in)); - RET(av_frame_copy_props(out, in)); + RET(ff_vk_filter_process_2pass(&s->vkctx, &s->e, + (FFVulkanPipeline *[2]){ &s->pl_hor, &s->pl_ver }, + out, tmp, in, s->sampler, NULL, 0)); + + err = av_frame_copy_props(out, in); + if (err < 0) + goto fail; av_frame_free(&in); + av_frame_free(&tmp); return ff_filter_frame(outlink, out); fail: av_frame_free(&in); + av_frame_free(&tmp); av_frame_free(&out); - av_frame_free(&s->tmpframe); - return err; } diff --git a/libavfilter/vf_libplacebo.c b/libavfilter/vf_libplacebo.c index 65fe6ef746dc3..2c327a3a12fc7 100644 --- a/libavfilter/vf_libplacebo.c +++ b/libavfilter/vf_libplacebo.c @@ -544,6 +544,30 @@ static int libplacebo_init(AVFilterContext *avctx) return err; } +#if PL_API_VER >= 201 +# if PL_API_VER >= 278 +static void lock_queue(void *priv, uint32_t qf, uint32_t qidx) +# else +static void lock_queue(void *priv, int qf, int qidx) +# endif +{ + AVHWDeviceContext *avhwctx = priv; + const AVVulkanDeviceContext *hwctx = avhwctx->hwctx; + hwctx->lock_queue(avhwctx, qf, qidx); +} + +# if PL_API_VER >= 278 +static void unlock_queue(void *priv, uint32_t qf, uint32_t qidx) +# else +static void unlock_queue(void *priv, int qf, int qidx) +# endif +{ + AVHWDeviceContext *avhwctx = priv; + const AVVulkanDeviceContext *hwctx = avhwctx->hwctx; + hwctx->unlock_queue(avhwctx, qf, qidx); +} +#endif + static int init_vulkan(AVFilterContext *avctx, const AVVulkanDeviceContext *hwctx) { int err = 0; @@ -561,6 +585,11 @@ static int init_vulkan(AVFilterContext *avctx, const AVVulkanDeviceContext *hwct .extensions = hwctx->enabled_dev_extensions, .num_extensions = hwctx->nb_enabled_dev_extensions, .features = &hwctx->device_features, +#if PL_API_VER >= 201 + .lock_queue = lock_queue, + .unlock_queue = unlock_queue, + .queue_ctx = avctx->hw_device_ctx->data, +#endif .queue_graphics = { .index = hwctx->queue_family_index, .count = hwctx->nb_graphics_queues, @@ -574,7 +603,7 @@ static int init_vulkan(AVFilterContext *avctx, const AVVulkanDeviceContext *hwct .count = hwctx->nb_tx_queues, }, /* This is the highest version created by hwcontext_vulkan.c */ - .max_api_version = VK_API_VERSION_1_2, + .max_api_version = VK_API_VERSION_1_3, )); } else { s->vulkan = pl_vulkan_create(s->log, pl_vulkan_params( diff --git a/libavfilter/vf_neighbor_opencl.c b/libavfilter/vf_neighbor_opencl.c index d2d93cd2405a7..b2939f841a280 100644 --- a/libavfilter/vf_neighbor_opencl.c +++ b/libavfilter/vf_neighbor_opencl.c @@ -55,7 +55,7 @@ static int neighbor_opencl_init(AVFilterContext *avctx) cl_int cle; int err; - err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_neighbor, 1); + err = ff_opencl_filter_load_program(avctx, &ff_source_neighbor_cl, 1); if (err < 0) goto fail; diff --git a/libavfilter/vf_nlmeans_opencl.c b/libavfilter/vf_nlmeans_opencl.c index ca3ec45d7a328..5149be02ca4c2 100644 --- a/libavfilter/vf_nlmeans_opencl.c +++ b/libavfilter/vf_nlmeans_opencl.c @@ -98,7 +98,7 @@ static int nlmeans_opencl_init(AVFilterContext *avctx, int width, int height) if (!ctx->patch_size_uv) ctx->patch_size_uv = ctx->patch_size; - err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_nlmeans, 1); + err = ff_opencl_filter_load_program(avctx, &ff_source_nlmeans_cl, 1); if (err < 0) goto fail; diff --git a/libavfilter/vf_nlmeans_vulkan.c b/libavfilter/vf_nlmeans_vulkan.c new file mode 100644 index 0000000000000..2907875e31e02 --- /dev/null +++ b/libavfilter/vf_nlmeans_vulkan.c @@ -0,0 +1,1114 @@ +/* + * Copyright (c) Lynne + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/random_seed.h" +#include "libavutil/opt.h" +#include "vulkan_filter.h" +#include "vulkan_spirv.h" +#include "internal.h" + +#define TYPE_NAME "vec4" +#define TYPE_ELEMS 4 +#define TYPE_SIZE (TYPE_ELEMS*4) + +typedef struct NLMeansVulkanContext { + FFVulkanContext vkctx; + + int initialized; + FFVkExecPool e; + FFVkQueueFamilyCtx qf; + VkSampler sampler; + + AVBufferPool *integral_buf_pool; + AVBufferPool *state_buf_pool; + AVBufferPool *ws_buf_pool; + + int pl_weights_rows; + FFVulkanPipeline pl_weights; + FFVkSPIRVShader shd_weights; + + FFVulkanPipeline pl_denoise; + FFVkSPIRVShader shd_denoise; + + int *xoffsets; + int *yoffsets; + int nb_offsets; + float strength[4]; + int patch[4]; + + struct nlmeans_opts { + int r; + double s; + double sc[4]; + int p; + int pc[4]; + int t; + } opts; +} NLMeansVulkanContext; + +extern const char *ff_source_prefix_sum_comp; + +static void insert_first(FFVkSPIRVShader *shd, int r, int horiz, int plane, int comp) +{ + GLSLF(2, s1 = texture(input_img[%i], ivec2(x + %i, y + %i))[%i]; + ,plane, horiz ? r : 0, !horiz ? r : 0, comp); + + if (TYPE_ELEMS == 4) { + GLSLF(2, s2[0] = texture(input_img[%i], ivec2(x + %i + xoffs[0], y + %i + yoffs[0]))[%i]; + ,plane, horiz ? r : 0, !horiz ? r : 0, comp); + GLSLF(2, s2[1] = texture(input_img[%i], ivec2(x + %i + xoffs[1], y + %i + yoffs[1]))[%i]; + ,plane, horiz ? r : 0, !horiz ? r : 0, comp); + GLSLF(2, s2[2] = texture(input_img[%i], ivec2(x + %i + xoffs[2], y + %i + yoffs[2]))[%i]; + ,plane, horiz ? r : 0, !horiz ? r : 0, comp); + GLSLF(2, s2[3] = texture(input_img[%i], ivec2(x + %i + xoffs[3], y + %i + yoffs[3]))[%i]; + ,plane, horiz ? r : 0, !horiz ? r : 0, comp); + } else { + for (int i = 0; i < 16; i++) { + GLSLF(2, s2[%i][%i] = texture(input_img[%i], ivec2(x + %i + xoffs[%i], y + %i + yoffs[%i]))[%i]; + ,i / 4, i % 4, plane, horiz ? r : 0, i, !horiz ? r : 0, i, comp); + } + } + + GLSLC(2, s2 = (s1 - s2) * (s1 - s2); ); +} + +static void insert_horizontal_pass(FFVkSPIRVShader *shd, int nb_rows, int first, int plane, int comp) +{ + GLSLF(1, x = int(gl_GlobalInvocationID.x) * %i; ,nb_rows); + if (!first) { + GLSLC(1, controlBarrier(gl_ScopeWorkgroup, gl_ScopeWorkgroup, + gl_StorageSemanticsBuffer, + gl_SemanticsAcquireRelease | + gl_SemanticsMakeAvailable | + gl_SemanticsMakeVisible); ); + } + GLSLC(1, for (y = 0; y < height[0]; y++) { ); + GLSLC(2, offset = uint64_t(int_stride)*y*T_ALIGN; ); + GLSLC(2, dst = DataBuffer(uint64_t(integral_data) + offset); ); + GLSLC(0, ); + if (first) { + for (int r = 0; r < nb_rows; r++) { + insert_first(shd, r, 1, plane, comp); + GLSLF(2, dst.v[x + %i] = s2; ,r); + GLSLC(0, ); + } + } + GLSLC(2, barrier(); ); + GLSLC(2, prefix_sum(dst, 1, dst, 1); ); + GLSLC(1, } ); + GLSLC(0, ); +} + +static void insert_vertical_pass(FFVkSPIRVShader *shd, int nb_rows, int first, int plane, int comp) +{ + GLSLF(1, y = int(gl_GlobalInvocationID.x) * %i; ,nb_rows); + if (!first) { + GLSLC(1, controlBarrier(gl_ScopeWorkgroup, gl_ScopeWorkgroup, + gl_StorageSemanticsBuffer, + gl_SemanticsAcquireRelease | + gl_SemanticsMakeAvailable | + gl_SemanticsMakeVisible); ); + } + GLSLC(1, for (x = 0; x < width[0]; x++) { ); + GLSLC(2, dst = DataBuffer(uint64_t(integral_data) + x*T_ALIGN); ); + + for (int r = 0; r < nb_rows; r++) { + if (first) { + insert_first(shd, r, 0, plane, comp); + GLSLF(2, integral_data.v[(y + %i)*int_stride + x] = s2; ,r); + GLSLC(0, ); + } + } + + GLSLC(2, barrier(); ); + GLSLC(2, prefix_sum(dst, int_stride, dst, int_stride); ); + GLSLC(1, } ); + GLSLC(0, ); +} + +static void insert_weights_pass(FFVkSPIRVShader *shd, int nb_rows, int vert, + int t, int dst_comp, int plane, int comp) +{ + GLSLF(1, p = patch_size[%i]; ,dst_comp); + GLSLC(0, ); + GLSLC(1, controlBarrier(gl_ScopeWorkgroup, gl_ScopeWorkgroup, + gl_StorageSemanticsBuffer, + gl_SemanticsAcquireRelease | + gl_SemanticsMakeAvailable | + gl_SemanticsMakeVisible); ); + GLSLC(1, barrier(); ); + if (!vert) { + GLSLC(1, for (y = 0; y < height[0]; y++) { ); + GLSLF(2, if (gl_GlobalInvocationID.x*%i >= width[%i]) ,nb_rows, plane); + GLSLC(3, break; ); + GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows); + GLSLF(3, x = int(gl_GlobalInvocationID.x) * %i + r; ,nb_rows); + } else { + GLSLC(1, for (x = 0; x < width[0]; x++) { ); + GLSLF(2, if (gl_GlobalInvocationID.x*%i >= height[%i]) ,nb_rows, plane); + GLSLC(3, break; ); + GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows); + GLSLF(3, y = int(gl_GlobalInvocationID.x) * %i + r; ,nb_rows); + } + GLSLC(0, ); + GLSLC(3, a = DTYPE(0); ); + GLSLC(3, b = DTYPE(0); ); + GLSLC(3, c = DTYPE(0); ); + GLSLC(3, d = DTYPE(0); ); + GLSLC(0, ); + GLSLC(3, lt = ((x - p) < 0) || ((y - p) < 0); ); + GLSLC(0, ); + if (TYPE_ELEMS == 4) { + GLSLF(3, src[0] = texture(input_img[%i], ivec2(x + xoffs[0], y + yoffs[0]))[%i]; ,plane, comp); + GLSLF(3, src[1] = texture(input_img[%i], ivec2(x + xoffs[1], y + yoffs[1]))[%i]; ,plane, comp); + GLSLF(3, src[2] = texture(input_img[%i], ivec2(x + xoffs[2], y + yoffs[2]))[%i]; ,plane, comp); + GLSLF(3, src[3] = texture(input_img[%i], ivec2(x + xoffs[3], y + yoffs[3]))[%i]; ,plane, comp); + } else { + for (int i = 0; i < 16; i++) + GLSLF(3, src[%i][%i] = texture(input_img[%i], ivec2(x + xoffs[%i], y + yoffs[%i]))[%i]; + ,i / 4, i % 4, plane, i, i, comp); + + } + GLSLC(0, ); + GLSLC(3, if (lt == false) { ); + GLSLC(4, a = integral_data.v[(y - p)*int_stride + x - p]; ); + GLSLC(4, c = integral_data.v[(y - p)*int_stride + x + p]; ); + GLSLC(4, b = integral_data.v[(y + p)*int_stride + x - p]; ); + GLSLC(4, d = integral_data.v[(y + p)*int_stride + x + p]; ); + GLSLC(3, } ); + GLSLC(0, ); + GLSLC(3, patch_diff = d + a - b - c; ); + if (TYPE_ELEMS == 4) { + GLSLF(3, w = exp(patch_diff * strength[%i]); ,dst_comp); + GLSLC(3, w_sum = w[0] + w[1] + w[2] + w[3]; ); + GLSLC(3, sum = dot(w, src*255); ); + } else { + for (int i = 0; i < 4; i++) + GLSLF(3, w[%i] = exp(patch_diff[%i] * strength[%i]); ,i,i,dst_comp); + for (int i = 0; i < 4; i++) + GLSLF(3, w_sum %s w[%i][0] + w[%i][1] + w[%i][2] + w[%i][3]; + ,!i ? "=" : "+=", i, i, i, i); + for (int i = 0; i < 4; i++) + GLSLF(3, sum %s dot(w[%i], src[%i]*255); + ,!i ? "=" : "+=", i, i); + } + GLSLC(0, ); + if (t > 1) { + GLSLF(3, atomicAdd(weights_%i[y*ws_stride[%i] + x], w_sum); ,dst_comp, dst_comp); + GLSLF(3, atomicAdd(sums_%i[y*ws_stride[%i] + x], sum); ,dst_comp, dst_comp); + } else { + GLSLF(3, weights_%i[y*ws_stride[%i] + x] += w_sum; ,dst_comp, dst_comp); + GLSLF(3, sums_%i[y*ws_stride[%i] + x] += sum; ,dst_comp, dst_comp); + } + GLSLC(2, } ); + GLSLC(1, } ); +} + +typedef struct HorizontalPushData { + VkDeviceAddress integral_data; + VkDeviceAddress state_data; + int32_t xoffs[TYPE_ELEMS]; + int32_t yoffs[TYPE_ELEMS]; + uint32_t width[4]; + uint32_t height[4]; + uint32_t ws_stride[4]; + int32_t patch_size[4]; + float strength[4]; + uint32_t int_stride; +} HorizontalPushData; + +static av_cold int init_weights_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec, + FFVulkanPipeline *pl, FFVkSPIRVShader *shd, + VkSampler sampler, FFVkSPIRVCompiler *spv, + int width, int height, int t, + const AVPixFmtDescriptor *desc, + int planes, int *nb_rows) +{ + int err; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; + FFVulkanDescriptorSetBinding *desc_set; + int max_dim = FFMAX(width, height); + uint32_t max_wg = vkctx->props.properties.limits.maxComputeWorkGroupSize[0]; + int max_shm = vkctx->props.properties.limits.maxComputeSharedMemorySize; + int wg_size = FFMIN(max_wg, max_dim); + int wg_rows = 1; + + RET(ff_vk_shader_init(pl, shd, "nlmeans_weights", VK_SHADER_STAGE_COMPUTE_BIT, 0)); + + /* If not true, wg_size is set to max_wg */ + while (wg_size*wg_rows < max_dim) + wg_rows++; + + /* Make sure there's enough shared memory */ + while ((wg_size * TYPE_SIZE + TYPE_SIZE + 2*4) > max_shm) { + wg_size >>= 1; + wg_rows++; + } + + ff_vk_shader_set_compute_sizes(shd, wg_size, 1, 1); + *nb_rows = wg_rows; + + if (t > 1) + GLSLC(0, #extension GL_EXT_shader_atomic_float : require ); + GLSLC(0, #extension GL_ARB_gpu_shader_int64 : require ); + GLSLC(0, #pragma use_vulkan_memory_model ); + GLSLC(0, #extension GL_KHR_memory_scope_semantics : enable ); + GLSLC(0, ); + GLSLF(0, #define N_ROWS %i ,*nb_rows); + GLSLC(0, #define WG_SIZE (gl_WorkGroupSize.x) ); + GLSLF(0, #define LG_WG_SIZE %i ,ff_log2(shd->local_size[0])); + GLSLC(0, #define PARTITION_SIZE (N_ROWS*WG_SIZE) ); + GLSLF(0, #define DTYPE %s ,TYPE_NAME); + GLSLF(0, #define T_ALIGN %i ,TYPE_SIZE); + GLSLC(0, ); + GLSLC(0, layout(buffer_reference, buffer_reference_align = T_ALIGN) coherent buffer DataBuffer { ); + GLSLC(1, DTYPE v[]; ); + GLSLC(0, }; ); + GLSLC(0, ); + GLSLC(0, layout(buffer_reference) buffer StateData; ); + GLSLC(0, ); + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, coherent DataBuffer integral_data; ); + GLSLC(1, StateData state; ); + GLSLF(1, uint xoffs[%i]; ,TYPE_ELEMS); + GLSLF(1, uint yoffs[%i]; ,TYPE_ELEMS); + GLSLC(1, uvec4 width; ); + GLSLC(1, uvec4 height; ); + GLSLC(1, uvec4 ws_stride; ); + GLSLC(1, ivec4 patch_size; ); + GLSLC(1, vec4 strength; ); + GLSLC(1, uint int_stride; ); + GLSLC(0, }; ); + GLSLC(0, ); + + ff_vk_add_push_constant(pl, 0, sizeof(HorizontalPushData), VK_SHADER_STAGE_COMPUTE_BIT); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "input_img", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(sampler), + }, + { + .name = "weights_buffer_0", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float weights_0[];", + }, + { + .name = "sums_buffer_0", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float sums_0[];", + }, + { + .name = "weights_buffer_1", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float weights_1[];", + }, + { + .name = "sums_buffer_1", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float sums_1[];", + }, + { + .name = "weights_buffer_2", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float weights_2[];", + }, + { + .name = "sums_buffer_2", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float sums_2[];", + }, + { + .name = "weights_buffer_3", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float weights_3[];", + }, + { + .name = "sums_buffer_3", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float sums_3[];", + }, + }; + RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc_set, 1 + 2*desc->nb_components, 0, 0)); + + GLSLD( ff_source_prefix_sum_comp ); + GLSLC(0, ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, uint64_t offset; ); + GLSLC(1, DataBuffer dst; ); + GLSLC(1, float s1; ); + GLSLC(1, DTYPE s2; ); + GLSLC(1, int r; ); + GLSLC(1, int x; ); + GLSLC(1, int y; ); + GLSLC(1, int p; ); + GLSLC(0, ); + GLSLC(1, DTYPE a; ); + GLSLC(1, DTYPE b; ); + GLSLC(1, DTYPE c; ); + GLSLC(1, DTYPE d; ); + GLSLC(0, ); + GLSLC(1, DTYPE patch_diff; ); + if (TYPE_ELEMS == 4) { + GLSLC(1, vec4 src; ); + GLSLC(1, vec4 w; ); + } else { + GLSLC(1, vec4 src[4]; ); + GLSLC(1, vec4 w[4]; ); + } + GLSLC(1, float w_sum; ); + GLSLC(1, float sum; ); + GLSLC(0, ); + GLSLC(1, bool lt; ); + GLSLC(1, bool gt; ); + GLSLC(0, ); + + for (int i = 0; i < desc->nb_components; i++) { + int off = desc->comp[i].offset / (FFALIGN(desc->comp[i].depth, 8)/8); + if (width > height) { + insert_horizontal_pass(shd, *nb_rows, 1, desc->comp[i].plane, off); + insert_vertical_pass(shd, *nb_rows, 0, desc->comp[i].plane, off); + insert_weights_pass(shd, *nb_rows, 0, t, i, desc->comp[i].plane, off); + } else { + insert_vertical_pass(shd, *nb_rows, 1, desc->comp[i].plane, off); + insert_horizontal_pass(shd, *nb_rows, 0, desc->comp[i].plane, off); + insert_weights_pass(shd, *nb_rows, 1, t, i, desc->comp[i].plane, off); + } + } + + GLSLC(0, } ); + + RET(spv->compile_shader(spv, vkctx, shd, &spv_data, &spv_len, "main", &spv_opaque)); + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); + + RET(ff_vk_init_compute_pipeline(vkctx, pl, shd)); + RET(ff_vk_exec_pipeline_register(vkctx, exec, pl)); + + return 0; + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + + return err; +} + +typedef struct DenoisePushData { + uint32_t ws_stride[4]; +} DenoisePushData; + +static av_cold int init_denoise_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec, + FFVulkanPipeline *pl, FFVkSPIRVShader *shd, + VkSampler sampler, FFVkSPIRVCompiler *spv, + const AVPixFmtDescriptor *desc, int planes) +{ + int err; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; + FFVulkanDescriptorSetBinding *desc_set; + + RET(ff_vk_shader_init(pl, shd, "nlmeans_denoise", + VK_SHADER_STAGE_COMPUTE_BIT, 0)); + + ff_vk_shader_set_compute_sizes(shd, 32, 32, 1); + + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, uvec4 ws_stride; ); + GLSLC(0, }; ); + + ff_vk_add_push_constant(pl, 0, sizeof(DenoisePushData), VK_SHADER_STAGE_COMPUTE_BIT); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "input_img", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(sampler), + }, + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(vkctx->output_format), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { + .name = "weights_buffer_0", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float weights_0[];", + }, + { + .name = "sums_buffer_0", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float sums_0[];", + }, + { + .name = "weights_buffer_1", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float weights_1[];", + }, + { + .name = "sums_buffer_1", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float sums_1[];", + }, + { + .name = "weights_buffer_2", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float weights_2[];", + }, + { + .name = "sums_buffer_2", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float sums_2[];", + }, + { + .name = "weights_buffer_3", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float weights_3[];", + }, + { + .name = "sums_buffer_3", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float sums_3[];", + }, + }; + RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc_set, 2 + 2*desc->nb_components, 0, 0)); + + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + GLSLC(0, ); + GLSLC(1, float w_sum; ); + GLSLC(1, float sum; ); + GLSLC(1, vec4 src; ); + GLSLC(1, vec4 r; ); + GLSLC(0, ); + + for (int i = 0; i < planes; i++) { + GLSLF(1, src = texture(input_img[%i], pos); ,i); + for (int c = 0; c < desc->nb_components; c++) { + if (desc->comp[c].plane == i) { + int off = desc->comp[c].offset / (FFALIGN(desc->comp[c].depth, 8)/8); + GLSLF(1, w_sum = weights_%i[pos.y*ws_stride[%i] + pos.x]; ,c, c); + GLSLF(1, sum = sums_%i[pos.y*ws_stride[%i] + pos.x]; ,c, c); + GLSLF(1, r[%i] = (sum + src[%i]*255) / (1.0 + w_sum) / 255; ,off, off); + GLSLC(0, ); + } + } + GLSLF(1, imageStore(output_img[%i], pos, r); ,i); + GLSLC(0, ); + } + + GLSLC(0, } ); + + RET(spv->compile_shader(spv, vkctx, shd, &spv_data, &spv_len, "main", &spv_opaque)); + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); + + RET(ff_vk_init_compute_pipeline(vkctx, pl, shd)); + RET(ff_vk_exec_pipeline_register(vkctx, exec, pl)); + + return 0; + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + + return err; +} + +static av_cold int init_filter(AVFilterContext *ctx) +{ + int rad, err; + int xcnt = 0, ycnt = 0; + NLMeansVulkanContext *s = ctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + FFVkSPIRVCompiler *spv; + + const AVPixFmtDescriptor *desc; + desc = av_pix_fmt_desc_get(vkctx->output_format); + if (!desc) + return AVERROR(EINVAL); + + if (!(s->opts.r & 1)) { + s->opts.r |= 1; + av_log(ctx, AV_LOG_WARNING, "Research size should be odd, setting to %i", + s->opts.r); + } + + if (!(s->opts.p & 1)) { + s->opts.p |= 1; + av_log(ctx, AV_LOG_WARNING, "Patch size should be odd, setting to %i", + s->opts.p); + } + + for (int i = 0; i < 4; i++) { + double str = (s->opts.sc[i] > 1.0) ? s->opts.sc[i] : s->opts.s; + int ps = (s->opts.pc[i] ? s->opts.pc[i] : s->opts.p); + str = 10.0f*str; + str *= -str; + str = 255.0*255.0 / str; + s->strength[i] = str; + if (!(ps & 1)) { + ps |= 1; + av_log(ctx, AV_LOG_WARNING, "Patch size should be odd, setting to %i", + ps); + } + s->patch[i] = ps / 2; + } + + rad = s->opts.r/2; + s->nb_offsets = (2*rad + 1)*(2*rad + 1) - 1; + s->xoffsets = av_malloc(s->nb_offsets*sizeof(*s->xoffsets)); + s->yoffsets = av_malloc(s->nb_offsets*sizeof(*s->yoffsets)); + s->nb_offsets = 0; + + for (int x = -rad; x <= rad; x++) { + for (int y = -rad; y <= rad; y++) { + if (!x && !y) + continue; + + s->xoffsets[xcnt++] = x; + s->yoffsets[ycnt++] = y; + s->nb_offsets++; + } + } + + s->opts.t = FFMIN(s->opts.t, (FFALIGN(s->nb_offsets, TYPE_ELEMS) / TYPE_ELEMS)); + if (!vkctx->atomic_float_feats.shaderBufferFloat32AtomicAdd) { + av_log(ctx, AV_LOG_WARNING, "Device doesn't support atomic float adds, " + "disabling dispatch parallelism\n"); + s->opts.t = 1; + } + + if (!vkctx->feats_12.vulkanMemoryModel) { + av_log(ctx, AV_LOG_ERROR, "Device doesn't support the Vulkan memory model!"); + return AVERROR(EINVAL);; + } + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, 1, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST)); + + RET(init_weights_pipeline(vkctx, &s->e, &s->pl_weights, &s->shd_weights, s->sampler, + spv, s->vkctx.output_width, s->vkctx.output_height, + s->opts.t, desc, planes, &s->pl_weights_rows)); + + RET(init_denoise_pipeline(vkctx, &s->e, &s->pl_denoise, &s->shd_denoise, s->sampler, + spv, desc, planes)); + + av_log(ctx, AV_LOG_VERBOSE, "Filter initialized, %i x/y offsets, %i dispatches, %i parallel\n", + s->nb_offsets, (FFALIGN(s->nb_offsets, TYPE_ELEMS) / TYPE_ELEMS) + 1, s->opts.t); + + s->initialized = 1; + + return 0; + +fail: + if (spv) + spv->uninit(&spv); + + return err; +} + +static int denoise_pass(NLMeansVulkanContext *s, FFVkExecContext *exec, + FFVkBuffer *ws_vk, uint32_t ws_stride[4]) +{ + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + VkBufferMemoryBarrier2 buf_bar[8]; + int nb_buf_bar = 0; + + /* Denoise pass pipeline */ + ff_vk_exec_bind_pipeline(vkctx, exec, &s->pl_denoise); + + /* Push data */ + ff_vk_update_push_exec(vkctx, exec, &s->pl_denoise, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(DenoisePushData), &(DenoisePushData) { + { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] }, + }); + + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = ws_vk->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = ws_vk->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = ws_vk->buf, + .size = ws_vk->size, + .offset = 0, + }; + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + }); + ws_vk->stage = buf_bar[0].dstStageMask; + ws_vk->access = buf_bar[0].dstAccessMask; + + /* End of denoise pass */ + vk->CmdDispatch(exec->buf, + FFALIGN(vkctx->output_width, s->pl_denoise.wg_size[0])/s->pl_denoise.wg_size[0], + FFALIGN(vkctx->output_height, s->pl_denoise.wg_size[1])/s->pl_denoise.wg_size[1], + 1); + + return 0; +} + +static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) +{ + int err; + AVFrame *out = NULL; + AVFilterContext *ctx = link->dst; + NLMeansVulkanContext *s = ctx->priv; + AVFilterLink *outlink = ctx->outputs[0]; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + + const AVPixFmtDescriptor *desc; + int plane_widths[4]; + int plane_heights[4]; + + /* Integral */ + AVBufferRef *state_buf; + FFVkBuffer *state_vk; + AVBufferRef *integral_buf; + FFVkBuffer *integral_vk; + uint32_t int_stride; + size_t int_size; + size_t state_size; + int t_offset = 0; + + /* Weights/sums */ + AVBufferRef *ws_buf; + FFVkBuffer *ws_vk; + VkDeviceAddress weights_addr[4]; + VkDeviceAddress sums_addr[4]; + uint32_t ws_stride[4]; + size_t ws_size[4]; + size_t ws_total_size = 0; + + FFVkExecContext *exec; + VkImageView in_views[AV_NUM_DATA_POINTERS]; + VkImageView out_views[AV_NUM_DATA_POINTERS]; + VkImageMemoryBarrier2 img_bar[8]; + int nb_img_bar = 0; + VkBufferMemoryBarrier2 buf_bar[8]; + int nb_buf_bar = 0; + + if (!s->initialized) + RET(init_filter(ctx)); + + desc = av_pix_fmt_desc_get(vkctx->output_format); + if (!desc) + return AVERROR(EINVAL); + + /* Integral image */ + int_stride = s->pl_weights.wg_size[0]*s->pl_weights_rows; + int_size = int_stride * int_stride * TYPE_SIZE; + state_size = int_stride * 3 *TYPE_SIZE; + + /* Plane dimensions */ + for (int i = 0; i < desc->nb_components; i++) { + plane_widths[i] = !i || (i == 3) ? vkctx->output_width : AV_CEIL_RSHIFT(vkctx->output_width, desc->log2_chroma_w); + plane_heights[i] = !i || (i == 3) ? vkctx->output_height : AV_CEIL_RSHIFT(vkctx->output_height, desc->log2_chroma_w); + plane_widths[i] = FFALIGN(plane_widths[i], s->pl_denoise.wg_size[0]); + plane_heights[i] = FFALIGN(plane_heights[i], s->pl_denoise.wg_size[1]); + + ws_stride[i] = plane_widths[i]; + ws_size[i] = ws_stride[i] * plane_heights[i] * sizeof(float); + ws_total_size += ws_size[i]; + } + + /* Buffers */ + err = ff_vk_get_pooled_buffer(&s->vkctx, &s->integral_buf_pool, &integral_buf, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + NULL, + s->opts.t * int_size, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + if (err < 0) + return err; + integral_vk = (FFVkBuffer *)integral_buf->data; + + err = ff_vk_get_pooled_buffer(&s->vkctx, &s->state_buf_pool, &state_buf, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + NULL, + s->opts.t * state_size, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + if (err < 0) + return err; + state_vk = (FFVkBuffer *)state_buf->data; + + err = ff_vk_get_pooled_buffer(&s->vkctx, &s->ws_buf_pool, &ws_buf, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + NULL, + ws_total_size * 2, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + if (err < 0) + return err; + ws_vk = (FFVkBuffer *)ws_buf->data; + + weights_addr[0] = ws_vk->address; + sums_addr[0] = ws_vk->address + ws_total_size; + for (int i = 1; i < desc->nb_components; i++) { + weights_addr[i] = weights_addr[i - 1] + ws_size[i - 1]; + sums_addr[i] = sums_addr[i - 1] + ws_size[i - 1]; + } + + /* Output frame */ + out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + err = AVERROR(ENOMEM); + goto fail; + } + + /* Execution context */ + exec = ff_vk_exec_get(&s->e); + ff_vk_exec_start(vkctx, exec); + + /* Dependencies */ + RET(ff_vk_exec_add_dep_frame(vkctx, exec, in, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_exec_add_dep_frame(vkctx, exec, out, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_exec_add_dep_buf(vkctx, exec, &integral_buf, 1, 0)); + RET(ff_vk_exec_add_dep_buf(vkctx, exec, &state_buf, 1, 0)); + RET(ff_vk_exec_add_dep_buf(vkctx, exec, &ws_buf, 1, 0)); + + /* Input frame prep */ + RET(ff_vk_create_imageviews(vkctx, exec, in_views, in)); + ff_vk_update_descriptor_img_array(vkctx, &s->pl_weights, exec, in, in_views, 0, 0, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + s->sampler); + ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); + + /* Output frame prep */ + RET(ff_vk_create_imageviews(vkctx, exec, out_views, out)); + ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = ws_vk->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .srcAccessMask = ws_vk->access, + .dstAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = ws_vk->buf, + .size = ws_vk->size, + .offset = 0, + }; + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + }); + ws_vk->stage = buf_bar[0].dstStageMask; + ws_vk->access = buf_bar[0].dstAccessMask; + + /* Weights/sums buffer zeroing */ + vk->CmdFillBuffer(exec->buf, ws_vk->buf, 0, ws_vk->size, 0x0); + + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = ws_vk->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = ws_vk->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = ws_vk->buf, + .size = ws_vk->size, + .offset = 0, + }; + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + }); + ws_vk->stage = buf_bar[0].dstStageMask; + ws_vk->access = buf_bar[0].dstAccessMask; + + /* Update weights descriptors */ + ff_vk_update_descriptor_img_array(vkctx, &s->pl_weights, exec, in, in_views, 0, 0, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + s->sampler); + for (int i = 0; i < desc->nb_components; i++) { + RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_weights, exec, 0, 1 + i*2 + 0, 0, + weights_addr[i], ws_size[i], + VK_FORMAT_UNDEFINED)); + RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_weights, exec, 0, 1 + i*2 + 1, 0, + sums_addr[i], ws_size[i], + VK_FORMAT_UNDEFINED)); + } + + /* Update denoise descriptors */ + ff_vk_update_descriptor_img_array(vkctx, &s->pl_denoise, exec, in, in_views, 0, 0, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + s->sampler); + ff_vk_update_descriptor_img_array(vkctx, &s->pl_denoise, exec, out, out_views, 0, 1, + VK_IMAGE_LAYOUT_GENERAL, s->sampler); + for (int i = 0; i < desc->nb_components; i++) { + RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_denoise, exec, 0, 2 + i*2 + 0, 0, + weights_addr[i], ws_size[i], + VK_FORMAT_UNDEFINED)); + RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_denoise, exec, 0, 2 + i*2 + 1, 0, + sums_addr[i], ws_size[i], + VK_FORMAT_UNDEFINED)); + } + + /* Weights pipeline */ + ff_vk_exec_bind_pipeline(vkctx, exec, &s->pl_weights); + + for (int i = 0; i < s->nb_offsets; i += TYPE_ELEMS) { + int *xoffs = s->xoffsets + i; + int *yoffs = s->yoffsets + i; + HorizontalPushData pd = { + integral_vk->address + t_offset*int_size, + state_vk->address + t_offset*state_size, + { 0 }, + { 0 }, + { plane_widths[0], plane_widths[1], plane_widths[2], plane_widths[3] }, + { plane_heights[0], plane_heights[1], plane_heights[2], plane_heights[3] }, + { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] }, + { s->patch[0], s->patch[1], s->patch[2], s->patch[3] }, + { s->strength[0], s->strength[1], s->strength[2], s->strength[2], }, + int_stride, + }; + + memcpy(pd.xoffs, xoffs, sizeof(pd.xoffs)); + memcpy(pd.yoffs, yoffs, sizeof(pd.yoffs)); + + /* Put a barrier once we run out of parallelism buffers */ + if (!t_offset) { + nb_buf_bar = 0; + /* Buffer prep/sync */ + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = integral_vk->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = integral_vk->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = integral_vk->buf, + .size = integral_vk->size, + .offset = 0, + }; + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = state_vk->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = state_vk->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = state_vk->buf, + .size = state_vk->size, + .offset = 0, + }; + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + }); + integral_vk->stage = buf_bar[0].dstStageMask; + integral_vk->access = buf_bar[0].dstAccessMask; + state_vk->stage = buf_bar[1].dstStageMask; + state_vk->access = buf_bar[1].dstAccessMask; + } + t_offset = (t_offset + 1) % s->opts.t; + + /* Push data */ + ff_vk_update_push_exec(vkctx, exec, &s->pl_weights, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pd), &pd); + + /* End of horizontal pass */ + vk->CmdDispatch(exec->buf, 1, 1, 1); + } + + RET(denoise_pass(s, exec, ws_vk, ws_stride)); + + err = ff_vk_exec_submit(vkctx, exec); + if (err < 0) + return err; + + err = av_frame_copy_props(out, in); + if (err < 0) + goto fail; + + av_frame_free(&in); + + return ff_filter_frame(outlink, out); + +fail: + av_frame_free(&in); + av_frame_free(&out); + return err; +} + +static void nlmeans_vulkan_uninit(AVFilterContext *avctx) +{ + NLMeansVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl_weights); + ff_vk_shader_free(vkctx, &s->shd_weights); + ff_vk_pipeline_free(vkctx, &s->pl_denoise); + ff_vk_shader_free(vkctx, &s->shd_denoise); + + av_buffer_pool_uninit(&s->integral_buf_pool); + av_buffer_pool_uninit(&s->state_buf_pool); + av_buffer_pool_uninit(&s->ws_buf_pool); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); + + ff_vk_uninit(&s->vkctx); + + s->initialized = 0; +} + +#define OFFSET(x) offsetof(NLMeansVulkanContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption nlmeans_vulkan_options[] = { + { "s", "denoising strength for all components", OFFSET(opts.s), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS }, + { "p", "patch size for all components", OFFSET(opts.p), AV_OPT_TYPE_INT, { .i64 = 3*2+1 }, 0, 99, FLAGS }, + { "r", "research window radius", OFFSET(opts.r), AV_OPT_TYPE_INT, { .i64 = 7*2+1 }, 0, 99, FLAGS }, + { "t", "parallelism", OFFSET(opts.t), AV_OPT_TYPE_INT, { .i64 = 36 }, 1, 168, FLAGS }, + + { "s1", "denoising strength for component 1", OFFSET(opts.sc[0]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS }, + { "s2", "denoising strength for component 2", OFFSET(opts.sc[1]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS }, + { "s3", "denoising strength for component 3", OFFSET(opts.sc[2]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS }, + { "s4", "denoising strength for component 4", OFFSET(opts.sc[3]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS }, + + { "p1", "patch size for component 1", OFFSET(opts.pc[0]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS }, + { "p2", "patch size for component 2", OFFSET(opts.pc[1]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS }, + { "p3", "patch size for component 3", OFFSET(opts.pc[2]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS }, + { "p4", "patch size for component 4", OFFSET(opts.pc[3]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS }, + + { NULL } +}; + +AVFILTER_DEFINE_CLASS(nlmeans_vulkan); + +static const AVFilterPad nlmeans_vulkan_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = &nlmeans_vulkan_filter_frame, + .config_props = &ff_vk_filter_config_input, + }, +}; + +static const AVFilterPad nlmeans_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_vk_filter_config_output, + }, +}; + +const AVFilter ff_vf_nlmeans_vulkan = { + .name = "nlmeans_vulkan", + .description = NULL_IF_CONFIG_SMALL("Non-local means denoiser (Vulkan)"), + .priv_size = sizeof(NLMeansVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &nlmeans_vulkan_uninit, + FILTER_INPUTS(nlmeans_vulkan_inputs), + FILTER_OUTPUTS(nlmeans_vulkan_outputs), + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), + .priv_class = &nlmeans_vulkan_class, + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; diff --git a/libavfilter/vf_overlay_opencl.c b/libavfilter/vf_overlay_opencl.c index 38a3fc8795a50..9beb09f05a1ba 100644 --- a/libavfilter/vf_overlay_opencl.c +++ b/libavfilter/vf_overlay_opencl.c @@ -51,7 +51,7 @@ static int overlay_opencl_load(AVFilterContext *avctx, { OverlayOpenCLContext *ctx = avctx->priv; cl_int cle; - const char *source = ff_opencl_source_overlay; + const char *source = ff_source_overlay_cl; const char *kernel; const AVPixFmtDescriptor *main_desc, *overlay_desc; int err, i, main_planes, overlay_planes; diff --git a/libavfilter/vf_overlay_vulkan.c b/libavfilter/vf_overlay_vulkan.c index 6db7baddfddc0..a05d9155be133 100644 --- a/libavfilter/vf_overlay_vulkan.c +++ b/libavfilter/vf_overlay_vulkan.c @@ -1,4 +1,6 @@ /* + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -19,26 +21,26 @@ #include "libavutil/random_seed.h" #include "libavutil/opt.h" #include "vulkan_filter.h" +#include "vulkan_spirv.h" #include "internal.h" #include "framesync.h" -#define CGROUPS (int [3]){ 32, 32, 1 } - typedef struct OverlayVulkanContext { FFVulkanContext vkctx; + FFFrameSync fs; int initialized; + FFVulkanPipeline pl; + FFVkExecPool e; FFVkQueueFamilyCtx qf; - FFVkExecContext *exec; - FFVulkanPipeline *pl; - FFFrameSync fs; - FFVkBuffer params_buf; + FFVkSPIRVShader shd; + VkSampler sampler; - /* Shader updators, must be in the main filter struct */ - VkDescriptorImageInfo main_images[3]; - VkDescriptorImageInfo overlay_images[3]; - VkDescriptorImageInfo output_images[3]; - VkDescriptorBufferInfo params_desc; + /* Push constants / options */ + struct { + int32_t o_offset[2*3]; + int32_t o_size[2*3]; + } opts; int overlay_x; int overlay_y; @@ -80,279 +82,114 @@ static const char overlay_alpha[] = { static av_cold int init_filter(AVFilterContext *ctx) { int err; - FFVkSampler *sampler; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; OverlayVulkanContext *s = ctx->priv; FFVulkanContext *vkctx = &s->vkctx; const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - - ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0); - - sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_NEAREST); - if (!sampler) + const int ialpha = av_pix_fmt_desc_get(s->vkctx.input_format)->flags & AV_PIX_FMT_FLAG_ALPHA; + const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(s->vkctx.output_format); + FFVkSPIRVShader *shd = &s->shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); return AVERROR_EXTERNAL; - - s->pl = ff_vk_create_pipeline(vkctx, &s->qf); - if (!s->pl) - return AVERROR(ENOMEM); - - { /* Create the shader */ - const int ialpha = av_pix_fmt_desc_get(s->vkctx.input_format)->flags & AV_PIX_FMT_FLAG_ALPHA; - - FFVulkanDescriptorSetBinding desc_i[3] = { - { - .name = "main_img", - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->main_images, - .sampler = sampler, - }, - { - .name = "overlay_img", - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->overlay_images, - .sampler = sampler, - }, - { - .name = "output_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), - .mem_quali = "writeonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->output_images, - }, - }; - - FFVulkanDescriptorSetBinding desc_b = { - .name = "params", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .mem_quali = "readonly", - .mem_layout = "std430", - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = &s->params_desc, - .buf_content = "ivec2 o_offset[3], o_size[3];", - }; - - FFVkSPIRVShader *shd = ff_vk_init_shader(s->pl, "overlay_compute", - VK_SHADER_STAGE_COMPUTE_BIT); - if (!shd) - return AVERROR(ENOMEM); - - ff_vk_set_compute_shader_sizes(shd, CGROUPS); - - RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0)); /* set 0 */ - RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, &desc_b, 1, 0)); /* set 1 */ - - GLSLD( overlay_noalpha ); - GLSLD( overlay_alpha ); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - GLSLF(1, int planes = %i; ,planes); - GLSLC(1, for (int i = 0; i < planes; i++) { ); - if (ialpha) - GLSLC(2, overlay_alpha_opaque(i, pos); ); - else - GLSLC(2, overlay_noalpha(i, pos); ); - GLSLC(1, } ); - GLSLC(0, } ); - - RET(ff_vk_compile_shader(vkctx, shd, "main")); - } - - RET(ff_vk_init_pipeline_layout(vkctx, s->pl)); - RET(ff_vk_init_compute_pipeline(vkctx, s->pl)); - - { /* Create and update buffer */ - const AVPixFmtDescriptor *desc; - - /* NOTE: std430 requires the same identical struct layout, padding and - * alignment as C, so we're allowed to do this, as this will map - * exactly to what the shader recieves */ - struct { - int32_t o_offset[2*3]; - int32_t o_size[2*3]; - } *par; - - err = ff_vk_create_buf(vkctx, &s->params_buf, - sizeof(*par), - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); - if (err) - return err; - - err = ff_vk_map_buffers(vkctx, &s->params_buf, (uint8_t **)&par, 1, 0); - if (err) - return err; - - desc = av_pix_fmt_desc_get(s->vkctx.output_format); - - par->o_offset[0] = s->overlay_x; - par->o_offset[1] = s->overlay_y; - par->o_offset[2] = par->o_offset[0] >> desc->log2_chroma_w; - par->o_offset[3] = par->o_offset[1] >> desc->log2_chroma_h; - par->o_offset[4] = par->o_offset[0] >> desc->log2_chroma_w; - par->o_offset[5] = par->o_offset[1] >> desc->log2_chroma_h; - - par->o_size[0] = s->overlay_w; - par->o_size[1] = s->overlay_h; - par->o_size[2] = par->o_size[0] >> desc->log2_chroma_w; - par->o_size[3] = par->o_size[1] >> desc->log2_chroma_h; - par->o_size[4] = par->o_size[0] >> desc->log2_chroma_w; - par->o_size[5] = par->o_size[1] >> desc->log2_chroma_h; - - err = ff_vk_unmap_buffers(vkctx, &s->params_buf, 1, 1); - if (err) - return err; - - s->params_desc.buffer = s->params_buf.buf; - s->params_desc.range = VK_WHOLE_SIZE; - - ff_vk_update_descriptor_set(vkctx, s->pl, 1); } - /* Execution context */ - RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf)); + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST)); + RET(ff_vk_shader_init(&s->pl, &s->shd, "overlay_compute", + VK_SHADER_STAGE_COMPUTE_BIT, 0)); + + ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1); + + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, ivec2 o_offset[3]; ); + GLSLC(1, ivec2 o_size[3]; ); + GLSLC(0, }; ); + GLSLC(0, ); + + ff_vk_add_push_constant(&s->pl, 0, sizeof(s->opts), + VK_SHADER_STAGE_COMPUTE_BIT); + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "main_img", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "overlay_img", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 3, 0, 0)); + + GLSLD( overlay_noalpha ); + GLSLD( overlay_alpha ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + GLSLF(1, int planes = %i; ,planes); + GLSLC(1, for (int i = 0; i < planes; i++) { ); + if (ialpha) + GLSLC(2, overlay_alpha_opaque(i, pos); ); + else + GLSLC(2, overlay_noalpha(i, pos); ); + GLSLC(1, } ); + GLSLC(0, } ); + + RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); + + RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd)); + RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl)); + + s->opts.o_offset[0] = s->overlay_x; + s->opts.o_offset[1] = s->overlay_y; + s->opts.o_offset[2] = s->opts.o_offset[0] >> pix_desc->log2_chroma_w; + s->opts.o_offset[3] = s->opts.o_offset[1] >> pix_desc->log2_chroma_h; + s->opts.o_offset[4] = s->opts.o_offset[0] >> pix_desc->log2_chroma_w; + s->opts.o_offset[5] = s->opts.o_offset[1] >> pix_desc->log2_chroma_h; + + s->opts.o_size[0] = s->overlay_w; + s->opts.o_size[1] = s->overlay_h; + s->opts.o_size[2] = s->opts.o_size[0] >> pix_desc->log2_chroma_w; + s->opts.o_size[3] = s->opts.o_size[1] >> pix_desc->log2_chroma_h; + s->opts.o_size[4] = s->opts.o_size[0] >> pix_desc->log2_chroma_w; + s->opts.o_size[5] = s->opts.o_size[1] >> pix_desc->log2_chroma_h; s->initialized = 1; - return 0; - fail: - return err; -} - -static int process_frames(AVFilterContext *avctx, AVFrame *out_f, - AVFrame *main_f, AVFrame *overlay_f) -{ - int err; - VkCommandBuffer cmd_buf; - OverlayVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &vkctx->vkfn; - int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - - AVVkFrame *out = (AVVkFrame *)out_f->data[0]; - AVVkFrame *main = (AVVkFrame *)main_f->data[0]; - AVVkFrame *overlay = (AVVkFrame *)overlay_f->data[0]; - - AVHWFramesContext *main_fc = (AVHWFramesContext*)main_f->hw_frames_ctx->data; - AVHWFramesContext *overlay_fc = (AVHWFramesContext*)overlay_f->hw_frames_ctx->data; - - const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format); - const VkFormat *main_sw_formats = av_vkfmt_from_pixfmt(main_fc->sw_format); - const VkFormat *overlay_sw_formats = av_vkfmt_from_pixfmt(overlay_fc->sw_format); - - /* Update descriptors and init the exec context */ - ff_vk_start_exec_recording(vkctx, s->exec); - cmd_buf = ff_vk_get_exec_buf(s->exec); - - for (int i = 0; i < planes; i++) { - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->main_images[i].imageView, main->img[i], - main_sw_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->overlay_images[i].imageView, overlay->img[i], - overlay_sw_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->output_images[i].imageView, out->img[i], - output_formats[i], - ff_comp_identity_map)); - - s->main_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->overlay_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - } - - ff_vk_update_descriptor_set(vkctx, s->pl, 0); - - for (int i = 0; i < planes; i++) { - VkImageMemoryBarrier bar[3] = { - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = main->layout[i], - .newLayout = s->main_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = main->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = overlay->layout[i], - .newLayout = s->overlay_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = overlay->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .oldLayout = out->layout[i], - .newLayout = s->output_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = out->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - }; - - vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, - 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar); - - main->layout[i] = bar[0].newLayout; - main->access[i] = bar[0].dstAccessMask; - - overlay->layout[i] = bar[1].newLayout; - overlay->access[i] = bar[1].dstAccessMask; - - out->layout[i] = bar[2].newLayout; - out->access[i] = bar[2].dstAccessMask; - } - - ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl); - - vk->CmdDispatch(cmd_buf, - FFALIGN(s->vkctx.output_width, CGROUPS[0])/CGROUPS[0], - FFALIGN(s->vkctx.output_height, CGROUPS[1])/CGROUPS[1], 1); - - ff_vk_add_exec_dep(vkctx, s->exec, main_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(vkctx, s->exec, overlay_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(vkctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - - err = ff_vk_submit_exec_queue(vkctx, s->exec); - if (err) - return err; + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); - ff_vk_qf_rotate(&s->qf); - - return err; - -fail: - ff_vk_discard_exec_deps(s->exec); return err; } @@ -394,7 +231,9 @@ static int overlay_vulkan_blend(FFFrameSync *fs) goto fail; } - RET(process_frames(ctx, out, input_main, input_overlay)); + RET(ff_vk_filter_process_Nin(&s->vkctx, &s->e, &s->pl, + out, (AVFrame *[]){ input_main, input_overlay }, 2, + s->sampler, &s->opts, sizeof(s->opts))); err = av_frame_copy_props(out, input_main); if (err < 0) @@ -443,8 +282,17 @@ static av_cold int overlay_vulkan_init(AVFilterContext *avctx) static void overlay_vulkan_uninit(AVFilterContext *avctx) { OverlayVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl); + ff_vk_shader_free(vkctx, &s->shd); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); - ff_vk_free_buf(&s->vkctx, &s->params_buf); ff_vk_uninit(&s->vkctx); ff_framesync_uninit(&s->fs); diff --git a/libavfilter/vf_pad_opencl.c b/libavfilter/vf_pad_opencl.c index d6b71765eeb3a..b4b10397a4de4 100644 --- a/libavfilter/vf_pad_opencl.c +++ b/libavfilter/vf_pad_opencl.c @@ -93,7 +93,7 @@ static int pad_opencl_init(AVFilterContext *avctx, AVFrame *input_frame) ctx->hsub = desc->log2_chroma_w; ctx->vsub = desc->log2_chroma_h; - err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_pad, 1); + err = ff_opencl_filter_load_program(avctx, &ff_source_pad_cl, 1); if (err < 0) goto fail; diff --git a/libavfilter/vf_remap_opencl.c b/libavfilter/vf_remap_opencl.c index eeb1eb5d69d32..89d47426c0179 100644 --- a/libavfilter/vf_remap_opencl.c +++ b/libavfilter/vf_remap_opencl.c @@ -73,7 +73,7 @@ static int remap_opencl_load(AVFilterContext *avctx, { RemapOpenCLContext *ctx = avctx->priv; cl_int cle; - const char *source = ff_opencl_source_remap; + const char *source = ff_source_remap_cl; const char *kernel = kernels[ctx->interp]; const AVPixFmtDescriptor *main_desc; int err, main_planes; diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c index 3b09f0dcc120d..64f5e79afb64a 100644 --- a/libavfilter/vf_scale_vulkan.c +++ b/libavfilter/vf_scale_vulkan.c @@ -1,4 +1,6 @@ /* + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -19,12 +21,11 @@ #include "libavutil/random_seed.h" #include "libavutil/opt.h" #include "vulkan_filter.h" +#include "vulkan_spirv.h" #include "scale_eval.h" #include "internal.h" #include "colorspace.h" -#define CGROUPS (int [3]){ 32, 32, 1 } - enum ScalerFunc { F_BILINEAR = 0, F_NEAREST, @@ -35,15 +36,17 @@ enum ScalerFunc { typedef struct ScaleVulkanContext { FFVulkanContext vkctx; + int initialized; + FFVulkanPipeline pl; + FFVkExecPool e; FFVkQueueFamilyCtx qf; - FFVkExecContext *exec; - FFVulkanPipeline *pl; - FFVkBuffer params_buf; + FFVkSPIRVShader shd; + VkSampler sampler; - /* Shader updators, must be in the main filter struct */ - VkDescriptorImageInfo input_images[3]; - VkDescriptorImageInfo output_images[3]; - VkDescriptorBufferInfo params_desc; + /* Push constants / options */ + struct { + float yuv_matrix[4][4]; + } opts; char *out_format_string; char *w_expr; @@ -51,8 +54,6 @@ typedef struct ScaleVulkanContext { enum ScalerFunc scaler; enum AVColorRange out_range; - - int initialized; } ScaleVulkanContext; static const char scale_bilinear[] = { @@ -110,10 +111,15 @@ static const char write_444[] = { static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) { int err; - FFVkSampler *sampler; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; VkFilter sampler_mode; ScaleVulkanContext *s = ctx->priv; FFVulkanContext *vkctx = &s->vkctx; + FFVkSPIRVShader *shd = &s->shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; int crop_x = in->crop_left; int crop_y = in->crop_top; @@ -121,8 +127,6 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) int crop_h = in->height - (in->crop_top + in->crop_bottom); int in_planes = av_pix_fmt_count_planes(s->vkctx.input_format); - ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0); - switch (s->scaler) { case F_NEAREST: sampler_mode = VK_FILTER_NEAREST; @@ -132,264 +136,134 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) break; }; - /* Create a sampler */ - sampler = ff_vk_init_sampler(vkctx, 0, sampler_mode); - if (!sampler) + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); return AVERROR_EXTERNAL; + } - s->pl = ff_vk_create_pipeline(vkctx, &s->qf); - if (!s->pl) - return AVERROR(ENOMEM); - - { /* Create the shader */ - FFVulkanDescriptorSetBinding desc_i[2] = { - { - .name = "input_img", - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .dimensions = 2, - .elems = in_planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->input_images, - .sampler = sampler, - }, - { - .name = "output_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), - .mem_quali = "writeonly", - .dimensions = 2, - .elems = av_pix_fmt_count_planes(s->vkctx.output_format), - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->output_images, - }, - }; - - FFVulkanDescriptorSetBinding desc_b = { - .name = "params", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .mem_quali = "readonly", - .mem_layout = "std430", - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = &s->params_desc, - .buf_content = "mat4 yuv_matrix;", - }; - - FFVkSPIRVShader *shd = ff_vk_init_shader(s->pl, "scale_compute", - VK_SHADER_STAGE_COMPUTE_BIT); - if (!shd) - return AVERROR(ENOMEM); - - ff_vk_set_compute_shader_sizes(shd, CGROUPS); - - RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0)); /* set 0 */ - RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, &desc_b, 1, 0)); /* set 1 */ - - GLSLD( scale_bilinear ); - - if (s->vkctx.output_format != s->vkctx.input_format) { - GLSLD( rgb2yuv ); - } + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 0, sampler_mode)); + RET(ff_vk_shader_init(&s->pl, &s->shd, "scale_compute", + VK_SHADER_STAGE_COMPUTE_BIT, 0)); + + ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1); + + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, mat4 yuv_matrix; ); + GLSLC(0, }; ); + GLSLC(0, ); + + ff_vk_add_push_constant(&s->pl, 0, sizeof(s->opts), + VK_SHADER_STAGE_COMPUTE_BIT); + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "input_img", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = in_planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = av_pix_fmt_count_planes(s->vkctx.output_format), + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; - switch (s->vkctx.output_format) { - case AV_PIX_FMT_NV12: GLSLD(write_nv12); break; - case AV_PIX_FMT_YUV420P: GLSLD( write_420); break; - case AV_PIX_FMT_YUV444P: GLSLD( write_444); break; - default: break; - } + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0)); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 size; ); - GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - GLSLF(1, vec2 in_d = vec2(%i, %i); ,in->width, in->height); - GLSLF(1, vec2 c_r = vec2(%i, %i) / in_d; ,crop_w, crop_h); - GLSLF(1, vec2 c_o = vec2(%i, %i) / in_d; ,crop_x,crop_y); - GLSLC(0, ); - - if (s->vkctx.output_format == s->vkctx.input_format) { - for (int i = 0; i < desc_i[1].elems; i++) { - GLSLF(1, size = imageSize(output_img[%i]); ,i); - GLSLC(1, if (IS_WITHIN(pos, size)) { ); - switch (s->scaler) { - case F_NEAREST: - case F_BILINEAR: - GLSLF(2, vec4 res = scale_bilinear(%i, pos, c_r, c_o); ,i); - GLSLF(2, imageStore(output_img[%i], pos, res); ,i); - break; - }; - GLSLC(1, } ); - } - } else { - GLSLC(1, vec4 res = scale_bilinear(0, pos, c_r, c_o); ); - GLSLF(1, res = rgb2yuv(res, %i); ,s->out_range == AVCOL_RANGE_JPEG); - switch (s->vkctx.output_format) { - case AV_PIX_FMT_NV12: GLSLC(1, write_nv12(res, pos); ); break; - case AV_PIX_FMT_YUV420P: GLSLC(1, write_420(res, pos); ); break; - case AV_PIX_FMT_YUV444P: GLSLC(1, write_444(res, pos); ); break; - default: return AVERROR(EINVAL); - } - } + GLSLD( scale_bilinear ); + + if (s->vkctx.output_format != s->vkctx.input_format) { + GLSLD( rgb2yuv ); + } - GLSLC(0, } ); + switch (s->vkctx.output_format) { + case AV_PIX_FMT_NV12: GLSLD(write_nv12); break; + case AV_PIX_FMT_YUV420P: GLSLD( write_420); break; + case AV_PIX_FMT_YUV444P: GLSLD( write_444); break; + default: break; + } - RET(ff_vk_compile_shader(vkctx, shd, "main")); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + GLSLF(1, vec2 in_d = vec2(%i, %i); ,in->width, in->height); + GLSLF(1, vec2 c_r = vec2(%i, %i) / in_d; ,crop_w, crop_h); + GLSLF(1, vec2 c_o = vec2(%i, %i) / in_d; ,crop_x,crop_y); + GLSLC(0, ); + + if (s->vkctx.output_format == s->vkctx.input_format) { + for (int i = 0; i < desc[i].elems; i++) { + GLSLF(1, size = imageSize(output_img[%i]); ,i); + GLSLC(1, if (IS_WITHIN(pos, size)) { ); + switch (s->scaler) { + case F_NEAREST: + case F_BILINEAR: + GLSLF(2, vec4 res = scale_bilinear(%i, pos, c_r, c_o); ,i); + GLSLF(2, imageStore(output_img[%i], pos, res); ,i); + break; + }; + GLSLC(1, } ); + } + } else { + GLSLC(1, vec4 res = scale_bilinear(0, pos, c_r, c_o); ); + GLSLF(1, res = rgb2yuv(res, %i); ,s->out_range == AVCOL_RANGE_JPEG); + switch (s->vkctx.output_format) { + case AV_PIX_FMT_NV12: GLSLC(1, write_nv12(res, pos); ); break; + case AV_PIX_FMT_YUV420P: GLSLC(1, write_420(res, pos); ); break; + case AV_PIX_FMT_YUV444P: GLSLC(1, write_444(res, pos); ); break; + default: return AVERROR(EINVAL); + } } - RET(ff_vk_init_pipeline_layout(vkctx, s->pl)); - RET(ff_vk_init_compute_pipeline(vkctx, s->pl)); + GLSLC(0, } ); if (s->vkctx.output_format != s->vkctx.input_format) { const AVLumaCoefficients *lcoeffs; double tmp_mat[3][3]; - struct { - float yuv_matrix[4][4]; - } *par; - lcoeffs = av_csp_luma_coeffs_from_avcsp(in->colorspace); if (!lcoeffs) { av_log(ctx, AV_LOG_ERROR, "Unsupported colorspace\n"); return AVERROR(EINVAL); } - RET(ff_vk_create_buf(vkctx, &s->params_buf, - sizeof(*par), - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); - - RET(ff_vk_map_buffers(vkctx, &s->params_buf, (uint8_t **)&par, 1, 0)); - ff_fill_rgb2yuv_table(lcoeffs, tmp_mat); - memset(par, 0, sizeof(*par)); - for (int y = 0; y < 3; y++) for (int x = 0; x < 3; x++) - par->yuv_matrix[x][y] = tmp_mat[x][y]; - - par->yuv_matrix[3][3] = 1.0; - - RET(ff_vk_unmap_buffers(vkctx, &s->params_buf, 1, 1)); - - s->params_desc.buffer = s->params_buf.buf; - s->params_desc.range = VK_WHOLE_SIZE; - - ff_vk_update_descriptor_set(vkctx, s->pl, 1); + s->opts.yuv_matrix[x][y] = tmp_mat[x][y]; + s->opts.yuv_matrix[3][3] = 1.0; } - /* Execution context */ - RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf)); + RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); + + RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd)); + RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl)); s->initialized = 1; return 0; fail: - return err; -} - -static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f) -{ - int err = 0; - VkCommandBuffer cmd_buf; - ScaleVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &vkctx->vkfn; - AVVkFrame *in = (AVVkFrame *)in_f->data[0]; - AVVkFrame *out = (AVVkFrame *)out_f->data[0]; - VkImageMemoryBarrier barriers[AV_NUM_DATA_POINTERS*2]; - int barrier_count = 0; - const int planes = av_pix_fmt_count_planes(s->vkctx.input_format); - const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format); - const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format); - - /* Update descriptors and init the exec context */ - ff_vk_start_exec_recording(vkctx, s->exec); - cmd_buf = ff_vk_get_exec_buf(s->exec); - - for (int i = 0; i < planes; i++) { - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->input_images[i].imageView, in->img[i], - input_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->output_images[i].imageView, out->img[i], - output_formats[i], - ff_comp_identity_map)); - - s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - } - - ff_vk_update_descriptor_set(vkctx, s->pl, 0); - - for (int i = 0; i < planes; i++) { - VkImageMemoryBarrier bar = { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = in->layout[i], - .newLayout = s->input_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = in->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }; - - memcpy(&barriers[barrier_count++], &bar, sizeof(VkImageMemoryBarrier)); - - in->layout[i] = bar.newLayout; - in->access[i] = bar.dstAccessMask; - } - - for (int i = 0; i < av_pix_fmt_count_planes(s->vkctx.output_format); i++) { - VkImageMemoryBarrier bar = { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .oldLayout = out->layout[i], - .newLayout = s->output_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = out->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }; - - memcpy(&barriers[barrier_count++], &bar, sizeof(VkImageMemoryBarrier)); - - out->layout[i] = bar.newLayout; - out->access[i] = bar.dstAccessMask; - } - - vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, - 0, NULL, 0, NULL, barrier_count, barriers); - - ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl); - - vk->CmdDispatch(cmd_buf, - FFALIGN(vkctx->output_width, CGROUPS[0])/CGROUPS[0], - FFALIGN(vkctx->output_height, CGROUPS[1])/CGROUPS[1], 1); - - ff_vk_add_exec_dep(vkctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(vkctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - - err = ff_vk_submit_exec_queue(vkctx, s->exec); - if (err) - return err; - - ff_vk_qf_rotate(&s->qf); + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); return err; - -fail: - ff_vk_discard_exec_deps(s->exec); - return err; } static int scale_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) @@ -408,7 +282,8 @@ static int scale_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) if (!s->initialized) RET(init_filter(ctx, in)); - RET(process_frames(ctx, out, in)); + RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, out, in, + s->sampler, &s->opts, sizeof(s->opts))); err = av_frame_copy_props(out, in); if (err < 0) @@ -475,8 +350,17 @@ static int scale_vulkan_config_output(AVFilterLink *outlink) static void scale_vulkan_uninit(AVFilterContext *avctx) { ScaleVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl); + ff_vk_shader_free(vkctx, &s->shd); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); - ff_vk_free_buf(&s->vkctx, &s->params_buf); ff_vk_uninit(&s->vkctx); s->initialized = 0; diff --git a/libavfilter/vf_tonemap_opencl.c b/libavfilter/vf_tonemap_opencl.c index 883eb043427f2..84bf394e75a69 100644 --- a/libavfilter/vf_tonemap_opencl.c +++ b/libavfilter/vf_tonemap_opencl.c @@ -240,8 +240,8 @@ static int tonemap_opencl_init(AVFilterContext *avctx) av_log(avctx, AV_LOG_DEBUG, "Generated OpenCL header:\n%s\n", header.str); opencl_sources[0] = header.str; - opencl_sources[1] = ff_opencl_source_tonemap; - opencl_sources[2] = ff_opencl_source_colorspace_common; + opencl_sources[1] = ff_source_tonemap_cl; + opencl_sources[2] = ff_source_colorspace_common_cl; err = ff_opencl_filter_load_program(avctx, opencl_sources, OPENCL_SOURCE_NB); av_bprint_finalize(&header, NULL); diff --git a/libavfilter/vf_transpose_opencl.c b/libavfilter/vf_transpose_opencl.c index 56d34d193bcff..b2128049537fa 100644 --- a/libavfilter/vf_transpose_opencl.c +++ b/libavfilter/vf_transpose_opencl.c @@ -44,7 +44,7 @@ static int transpose_opencl_init(AVFilterContext *avctx) cl_int cle; int err; - err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_transpose, 1); + err = ff_opencl_filter_load_program(avctx, &ff_source_transpose_cl, 1); if (err < 0) goto fail; diff --git a/libavfilter/vf_transpose_vulkan.c b/libavfilter/vf_transpose_vulkan.c index 3b2ce4fb693ee..f9c0dd928d73c 100644 --- a/libavfilter/vf_transpose_vulkan.c +++ b/libavfilter/vf_transpose_vulkan.c @@ -1,5 +1,7 @@ /* * copyright (c) 2021 Wu Jianhua + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -20,41 +22,60 @@ #include "libavutil/random_seed.h" #include "libavutil/opt.h" #include "vulkan_filter.h" +#include "vulkan_spirv.h" #include "internal.h" #include "transpose.h" -#define CGS 32 - typedef struct TransposeVulkanContext { FFVulkanContext vkctx; - FFVkQueueFamilyCtx qf; - FFVkExecContext *exec; - FFVulkanPipeline *pl; - VkDescriptorImageInfo input_images[3]; - VkDescriptorImageInfo output_images[3]; + int initialized; + FFVulkanPipeline pl; + FFVkExecPool e; + FFVkQueueFamilyCtx qf; + FFVkSPIRVShader shd; + VkSampler sampler; int dir; int passthrough; - int initialized; } TransposeVulkanContext; static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) { - int err = 0; - FFVkSPIRVShader *shd; + int err; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; TransposeVulkanContext *s = ctx->priv; FFVulkanContext *vkctx = &s->vkctx; + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + FFVkSPIRVShader *shd = &s->shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_LINEAR)); + RET(ff_vk_shader_init(&s->pl, &s->shd, "transpose_compute", + VK_SHADER_STAGE_COMPUTE_BIT, 0)); - FFVulkanDescriptorSetBinding image_descs[] = { + ff_vk_shader_set_compute_sizes(&s->shd, 32, 1, 1); + + desc = (FFVulkanDescriptorSetBinding []) { { .name = "input_images", .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .dimensions = 2, .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->input_images, + .samplers = DUP_SAMPLER(s->sampler), }, { .name = "output_images", @@ -64,154 +85,49 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) .dimensions = 2, .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->output_images, }, }; - image_descs[0].sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_LINEAR); - if (!image_descs[0].sampler) - return AVERROR_EXTERNAL; - - ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0); - - { - s->pl = ff_vk_create_pipeline(vkctx, &s->qf); - if (!s->pl) - return AVERROR(ENOMEM); - - shd = ff_vk_init_shader(s->pl, "transpose_compute", image_descs[0].stages); - if (!shd) - return AVERROR(ENOMEM); - - ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 }); - RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0)); - - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 size; ); - GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - for (int i = 0; i < planes; i++) { - GLSLC(0, ); - GLSLF(1, size = imageSize(output_images[%i]); ,i); - GLSLC(1, if (IS_WITHIN(pos, size)) { ); - if (s->dir == TRANSPOSE_CCLOCK) - GLSLF(2, vec4 res = texture(input_images[%i], ivec2(size.y - pos.y, pos.x)); ,i); - else if (s->dir == TRANSPOSE_CLOCK_FLIP || s->dir == TRANSPOSE_CLOCK) { - GLSLF(2, vec4 res = texture(input_images[%i], ivec2(size.yx - pos.yx)); ,i); - if (s->dir == TRANSPOSE_CLOCK) - GLSLC(2, pos = ivec2(pos.x, size.y - pos.y); ); - } else - GLSLF(2, vec4 res = texture(input_images[%i], pos.yx); ,i); - GLSLF(2, imageStore(output_images[%i], pos, res); ,i); - GLSLC(1, } ); - } - GLSLC(0, } ); - - RET(ff_vk_compile_shader(vkctx, shd, "main")); - RET(ff_vk_init_pipeline_layout(vkctx, s->pl)); - RET(ff_vk_init_compute_pipeline(vkctx, s->pl)); - } - - RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf)); - s->initialized = 1; - -fail: - return err; -} - -static int process_frames(AVFilterContext *avctx, AVFrame *outframe, AVFrame *inframe) -{ - int err = 0; - VkCommandBuffer cmd_buf; - TransposeVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &s->vkctx.vkfn; - const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - - AVVkFrame *in = (AVVkFrame *)inframe->data[0]; - AVVkFrame *out = (AVVkFrame *)outframe->data[0]; - - const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format); - const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format); - - ff_vk_start_exec_recording(vkctx, s->exec); - cmd_buf = ff_vk_get_exec_buf(s->exec); - - for (int i = 0; i < planes; i++) { - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->input_images[i].imageView, in->img[i], - input_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->output_images[i].imageView, out->img[i], - output_formats[i], - ff_comp_identity_map)); - - s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - } - - ff_vk_update_descriptor_set(vkctx, s->pl, 0); + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0)); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); for (int i = 0; i < planes; i++) { - VkImageMemoryBarrier barriers[] = { - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = in->layout[i], - .newLayout = s->input_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = in->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .oldLayout = out->layout[i], - .newLayout = s->output_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = out->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - }; - - vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, - 0, NULL, 0, NULL, FF_ARRAY_ELEMS(barriers), barriers); - - in->layout[i] = barriers[0].newLayout; - in->access[i] = barriers[0].dstAccessMask; - - out->layout[i] = barriers[1].newLayout; - out->access[i] = barriers[1].dstAccessMask; + GLSLC(0, ); + GLSLF(1, size = imageSize(output_images[%i]); ,i); + GLSLC(1, if (IS_WITHIN(pos, size)) { ); + if (s->dir == TRANSPOSE_CCLOCK) + GLSLF(2, vec4 res = texture(input_images[%i], ivec2(size.y - pos.y, pos.x)); ,i); + else if (s->dir == TRANSPOSE_CLOCK_FLIP || s->dir == TRANSPOSE_CLOCK) { + GLSLF(2, vec4 res = texture(input_images[%i], ivec2(size.yx - pos.yx)); ,i); + if (s->dir == TRANSPOSE_CLOCK) + GLSLC(2, pos = ivec2(pos.x, size.y - pos.y); ); + } else + GLSLF(2, vec4 res = texture(input_images[%i], pos.yx); ,i); + GLSLF(2, imageStore(output_images[%i], pos, res); ,i); + GLSLC(1, } ); } + GLSLC(0, } ); - ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl); - vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS, - s->vkctx.output_height, 1); + RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); - ff_vk_add_exec_dep(vkctx, s->exec, inframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(vkctx, s->exec, outframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd)); + RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl)); - err = ff_vk_submit_exec_queue(vkctx, s->exec); - if (err) - return err; - - ff_vk_qf_rotate(&s->qf); + s->initialized = 1; return 0; fail: - ff_vk_discard_exec_deps(s->exec); + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); + return err; } @@ -235,7 +151,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) if (!s->initialized) RET(init_filter(ctx, in)); - RET(process_frames(ctx, out, in)); + RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, out, in, + s->sampler, NULL, 0)); RET(av_frame_copy_props(out, in)); @@ -259,6 +176,17 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) static av_cold void transpose_vulkan_uninit(AVFilterContext *avctx) { TransposeVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl); + ff_vk_shader_free(vkctx, &s->shd); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); + ff_vk_uninit(&s->vkctx); s->initialized = 0; diff --git a/libavfilter/vf_unsharp_opencl.c b/libavfilter/vf_unsharp_opencl.c index 2c3ac14050d07..09398464ca39b 100644 --- a/libavfilter/vf_unsharp_opencl.c +++ b/libavfilter/vf_unsharp_opencl.c @@ -69,7 +69,7 @@ static int unsharp_opencl_init(AVFilterContext *avctx) cl_int cle; int err; - err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_unsharp, 1); + err = ff_opencl_filter_load_program(avctx, &ff_source_unsharp_cl, 1); if (err < 0) goto fail; diff --git a/libavfilter/vf_xfade_opencl.c b/libavfilter/vf_xfade_opencl.c index 415cf7ac35d8e..fb567aa7fd83e 100644 --- a/libavfilter/vf_xfade_opencl.c +++ b/libavfilter/vf_xfade_opencl.c @@ -93,7 +93,7 @@ static int xfade_opencl_load(AVFilterContext *avctx, if (ctx->transition == CUSTOM) { err = ff_opencl_filter_load_program_from_file(avctx, ctx->source_file); } else { - err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_xfade, 1); + err = ff_opencl_filter_load_program(avctx, &ff_source_xfade_cl, 1); } if (err < 0) return err; diff --git a/libavfilter/vsrc_testsrc_vulkan.c b/libavfilter/vsrc_testsrc_vulkan.c new file mode 100644 index 0000000000000..52b6df39f5b50 --- /dev/null +++ b/libavfilter/vsrc_testsrc_vulkan.c @@ -0,0 +1,373 @@ +/* + * Copyright (c) Lynne + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/random_seed.h" +#include "libavutil/csp.h" +#include "libavutil/opt.h" +#include "vulkan_filter.h" +#include "vulkan_spirv.h" +#include "internal.h" +#include "filters.h" +#include "colorspace.h" + +enum TestSrcVulkanMode { + TESTSRC_COLOR, +}; + +typedef struct TestSrcVulkanPushData { + float color_comp[4]; +} TestSrcVulkanPushData; + +typedef struct TestSrcVulkanContext { + FFVulkanContext vkctx; + + int initialized; + FFVulkanPipeline pl; + FFVkExecPool e; + FFVkQueueFamilyCtx qf; + FFVkSPIRVShader shd; + + /* Only used by color_vulkan */ + uint8_t color_rgba[4]; + + TestSrcVulkanPushData opts; + + int w, h; + int pw, ph; + char *out_format_string; + enum AVColorRange out_range; + unsigned int nb_frame; + AVRational time_base, frame_rate; + int64_t pts; + int64_t duration; ///< duration expressed in microseconds + AVRational sar; ///< sample aspect ratio + int draw_once; ///< draw only the first frame, always put out the same picture + int draw_once_reset; ///< draw only the first frame or in case of reset + AVFrame *picref; ///< cached reference containing the painted picture +} TestSrcVulkanContext; + +static av_cold int init_filter(AVFilterContext *ctx, enum TestSrcVulkanMode mode) +{ + int err; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; + TestSrcVulkanContext *s = ctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + FFVkSPIRVShader *shd = &s->shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc_set; + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->vkctx.output_format); + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_shader_init(&s->pl, &s->shd, "testsrc_compute", + VK_SHADER_STAGE_COMPUTE_BIT, 0)); + + ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1); + + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, vec2 dist; ); + GLSLC(0, }; ); + GLSLC(0, ); + + ff_vk_add_push_constant(&s->pl, 0, sizeof(s->opts), + VK_SHADER_STAGE_COMPUTE_BIT); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc_set, 1, 0, 0)); + + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + if (mode == TESTSRC_COLOR) { + double rgb2yuv[3][3]; + double rgbad[4]; + double yuvad[4]; + + enum AVColorSpace csp; + const AVLumaCoefficients *luma = NULL; + if (desc->flags & AV_PIX_FMT_FLAG_RGB) + csp = AVCOL_SPC_RGB; + else + csp = AVCOL_SPC_SMPTE170M; + + if (!(desc->flags & AV_PIX_FMT_FLAG_RGB) && !(luma = av_csp_luma_coeffs_from_avcsp(csp))) + return AVERROR(EINVAL); + else + ff_fill_rgb2yuv_table(luma, rgb2yuv); + + for (int i = 0; i < 4; i++) + rgbad[i] = s->color_rgba[i] / 255.0; + + if (!(desc->flags & AV_PIX_FMT_FLAG_RGB)) + ff_matrix_mul_3x3_vec(yuvad, rgbad, rgb2yuv); + else + memcpy(yuvad, rgbad, sizeof(rgbad)); + + yuvad[3] = rgbad[3]; + + for (int i = 0; i < 3; i++) { + int chroma = (!(desc->flags & AV_PIX_FMT_FLAG_RGB) && i > 0); + if (s->out_range == AVCOL_RANGE_MPEG) { + yuvad[i] *= (chroma ? 224.0 : 219.0) / 255.0; + yuvad[i] += (chroma ? 128.0 : 16.0) / 255.0; + } else if (chroma) { + yuvad[i] += 0.5; + } + } + + /* Ensure we place the alpha appropriately for gray formats */ + if (desc->nb_components <= 2) + yuvad[1] = yuvad[3]; + + for (int i = 0; i < 4; i++) + s->opts.color_comp[i] = yuvad[i] / 255.0; + + GLSLC(1, vec4 r; ); + for (int i = 0; i < planes; i++) { + for (int c = 0; c < desc->nb_components; c++) { + if (desc->comp[c].plane == i) { + int off = desc->comp[c].offset / (FFALIGN(desc->comp[c].depth, 8)/8); + GLSLF(1, r[%i] = color_comp[%i]; ,off, i); + GLSLC(0, ); + } + } + GLSLF(1, imageStore(output_img[%i], pos, r); ,i); + GLSLC(0, ); + } + } + GLSLC(0, } ); + + RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); + + RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd)); + RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl)); + + s->initialized = 1; + + return 0; + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); + + return err; +} + +static int testsrc_vulkan_activate(AVFilterContext *ctx) +{ + int err; + AVFilterLink *outlink = ctx->outputs[0]; + TestSrcVulkanContext *s = ctx->priv; + AVFrame *frame; + + if (!s->initialized) { + enum TestSrcVulkanMode mode = TESTSRC_COLOR; + err = init_filter(ctx, mode); + if (err < 0) + return err; + } + + if (!ff_outlink_frame_wanted(outlink)) + return FFERROR_NOT_READY; + if (s->duration >= 0 && + av_rescale_q(s->pts, s->time_base, AV_TIME_BASE_Q) >= s->duration) { + ff_outlink_set_status(outlink, AVERROR_EOF, s->pts); + return 0; + } + + if (s->draw_once) { + if (s->draw_once_reset) { + av_frame_free(&s->picref); + s->draw_once_reset = 0; + } + if (!s->picref) { + s->picref = ff_get_video_buffer(outlink, s->w, s->h); + if (!s->picref) + return AVERROR(ENOMEM); + + err = ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, frame, NULL, + NULL, &s->opts, sizeof(s->opts)); + if (err < 0) + return err; + } + frame = av_frame_clone(s->picref); + } else { + frame = ff_get_video_buffer(outlink, s->w, s->h); + } + + if (!frame) + return AVERROR(ENOMEM); + + frame->pts = s->pts; + frame->duration = 1; + frame->key_frame = 1; + frame->interlaced_frame = 0; + frame->pict_type = AV_PICTURE_TYPE_I; + frame->sample_aspect_ratio = s->sar; + if (!s->draw_once) { + err = ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, frame, NULL, + NULL, &s->opts, sizeof(s->opts)); + if (err < 0) { + av_frame_free(&frame); + return err; + } + } + + s->pts++; + s->nb_frame++; + + return ff_filter_frame(outlink, frame); +} + +static int testsrc_vulkan_config_props(AVFilterLink *outlink) +{ + int err; + TestSrcVulkanContext *s = outlink->src->priv; + FFVulkanContext *vkctx = &s->vkctx; + + if (!s->out_format_string) { + vkctx->output_format = AV_PIX_FMT_YUV444P; + } else { + vkctx->output_format = av_get_pix_fmt(s->out_format_string); + if (vkctx->output_format == AV_PIX_FMT_NONE) { + av_log(vkctx, AV_LOG_ERROR, "Invalid output format.\n"); + return AVERROR(EINVAL); + } + + if (vkctx->output_format != AV_PIX_FMT_YUV444P) + return AVERROR(EINVAL); + } + + err = ff_vk_filter_init_context(outlink->src, vkctx, NULL, + s->w, s->h, vkctx->output_format); + if (err < 0) + return err; + + outlink->hw_frames_ctx = av_buffer_ref(vkctx->frames_ref); + if (!outlink->hw_frames_ctx) + return AVERROR(ENOMEM); + + s->time_base = av_inv_q(s->frame_rate); + s->nb_frame = 0; + s->pts = 0; + + outlink->w = s->w; + outlink->h = s->h; + outlink->sample_aspect_ratio = s->sar; + outlink->frame_rate = s->frame_rate; + outlink->time_base = s->time_base; + + return 0; +} + +static void testsrc_vulkan_uninit(AVFilterContext *avctx) +{ + TestSrcVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + + av_frame_free(&s->picref); + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl); + ff_vk_shader_free(vkctx, &s->shd); + + ff_vk_uninit(&s->vkctx); + + s->initialized = 0; +} + +#define OFFSET(x) offsetof(TestSrcVulkanContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) + +#define COMMON_OPTS \ + { "size", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, { .str = "1920x1080" }, 0, 0, FLAGS }, \ + { "s", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, { .str = "1920x1080" }, 0, 0, FLAGS }, \ + \ + { "rate", "set video rate", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, { .str = "60" }, 0, INT_MAX, FLAGS }, \ + { "r", "set video rate", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, { .str = "60" }, 0, INT_MAX, FLAGS }, \ + \ + { "duration", "set video duration", OFFSET(duration), AV_OPT_TYPE_DURATION, { .i64 = -1 }, -1, INT64_MAX, FLAGS }, \ + { "d", "set video duration", OFFSET(duration), AV_OPT_TYPE_DURATION, { .i64 = -1 }, -1, INT64_MAX, FLAGS }, \ + \ + { "sar", "set video sample aspect ratio", OFFSET(sar), AV_OPT_TYPE_RATIONAL, { .dbl = 1 }, 0, INT_MAX, FLAGS }, \ + \ + { "format", "Output video format (software format of hardware frames)", OFFSET(out_format_string), AV_OPT_TYPE_STRING, .flags = FLAGS }, + +static const AVOption color_vulkan_options[] = { + { "color", "set color", OFFSET(color_rgba), AV_OPT_TYPE_COLOR, {.str = "black"}, 0, 0, FLAGS }, + { "c", "set color", OFFSET(color_rgba), AV_OPT_TYPE_COLOR, {.str = "black"}, 0, 0, FLAGS }, + COMMON_OPTS + { "out_range", "Output colour range (from 0 to 2) (default 0)", OFFSET(out_range), AV_OPT_TYPE_INT, {.i64 = AVCOL_RANGE_UNSPECIFIED}, AVCOL_RANGE_UNSPECIFIED, AVCOL_RANGE_JPEG, .flags = FLAGS, "range" }, + { "full", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" }, + { "limited", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" }, + { "jpeg", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" }, + { "mpeg", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" }, + { "tv", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" }, + { "pc", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" }, + { NULL }, +}; + +AVFILTER_DEFINE_CLASS(color_vulkan); + +static const AVFilterPad testsrc_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = testsrc_vulkan_config_props, + }, +}; + +const AVFilter ff_vsrc_color_vulkan = { + .name = "color_vulkan", + .description = NULL_IF_CONFIG_SMALL("Generate a constant color (Vulkan)"), + .priv_size = sizeof(TestSrcVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &testsrc_vulkan_uninit, + .inputs = NULL, + .activate = testsrc_vulkan_activate, + FILTER_OUTPUTS(testsrc_vulkan_outputs), + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), + .priv_class = &color_vulkan_class, + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; diff --git a/libavfilter/vulkan/prefix_sum.comp b/libavfilter/vulkan/prefix_sum.comp new file mode 100644 index 0000000000000..9147cd82fbd90 --- /dev/null +++ b/libavfilter/vulkan/prefix_sum.comp @@ -0,0 +1,151 @@ +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference2 : require + +#define ACQUIRE gl_StorageSemanticsBuffer, gl_SemanticsAcquire +#define RELEASE gl_StorageSemanticsBuffer, gl_SemanticsRelease + +// These correspond to X, A, P respectively in the prefix sum paper. +#define FLAG_NOT_READY 0u +#define FLAG_AGGREGATE_READY 1u +#define FLAG_PREFIX_READY 2u + +layout(buffer_reference, buffer_reference_align = T_ALIGN) nonprivate buffer StateData { + DTYPE aggregate; + DTYPE prefix; + uint flag; +}; + +shared DTYPE sh_scratch[WG_SIZE]; +shared DTYPE sh_prefix; +shared uint sh_part_ix; +shared uint sh_flag; + +void prefix_sum(DataBuffer dst, uint dst_stride, DataBuffer src, uint src_stride) +{ + DTYPE local[N_ROWS]; + // Determine partition to process by atomic counter (described in Section 4.4 of prefix sum paper). + if (gl_GlobalInvocationID.x == 0) + sh_part_ix = gl_WorkGroupID.x; +// sh_part_ix = atomicAdd(part_counter, 1); + + barrier(); + uint part_ix = sh_part_ix; + + uint ix = part_ix * PARTITION_SIZE + gl_LocalInvocationID.x * N_ROWS; + + // TODO: gate buffer read? (evaluate whether shader check or CPU-side padding is better) + local[0] = src.v[ix*src_stride]; + for (uint i = 1; i < N_ROWS; i++) + local[i] = local[i - 1] + src.v[(ix + i)*src_stride]; + + DTYPE agg = local[N_ROWS - 1]; + sh_scratch[gl_LocalInvocationID.x] = agg; + for (uint i = 0; i < LG_WG_SIZE; i++) { + barrier(); + if (gl_LocalInvocationID.x >= (1u << i)) + agg += sh_scratch[gl_LocalInvocationID.x - (1u << i)]; + barrier(); + + sh_scratch[gl_LocalInvocationID.x] = agg; + } + + // Publish aggregate for this partition + if (gl_LocalInvocationID.x == WG_SIZE - 1) { + state[part_ix].aggregate = agg; + if (part_ix == 0) + state[0].prefix = agg; + } + + // Write flag with release semantics + if (gl_LocalInvocationID.x == WG_SIZE - 1) { + uint flag = part_ix == 0 ? FLAG_PREFIX_READY : FLAG_AGGREGATE_READY; + atomicStore(state[part_ix].flag, flag, gl_ScopeDevice, RELEASE); + } + + DTYPE exclusive = DTYPE(0); + if (part_ix != 0) { + // step 4 of paper: decoupled lookback + uint look_back_ix = part_ix - 1; + + DTYPE their_agg; + uint their_ix = 0; + while (true) { + // Read flag with acquire semantics. + if (gl_LocalInvocationID.x == WG_SIZE - 1) + sh_flag = atomicLoad(state[look_back_ix].flag, gl_ScopeDevice, ACQUIRE); + + // The flag load is done only in the last thread. However, because the + // translation of memoryBarrierBuffer to Metal requires uniform control + // flow, we broadcast it to all threads. + barrier(); + + uint flag = sh_flag; + barrier(); + + if (flag == FLAG_PREFIX_READY) { + if (gl_LocalInvocationID.x == WG_SIZE - 1) { + DTYPE their_prefix = state[look_back_ix].prefix; + exclusive = their_prefix + exclusive; + } + break; + } else if (flag == FLAG_AGGREGATE_READY) { + if (gl_LocalInvocationID.x == WG_SIZE - 1) { + their_agg = state[look_back_ix].aggregate; + exclusive = their_agg + exclusive; + } + look_back_ix--; + their_ix = 0; + continue; + } // else spins + + if (gl_LocalInvocationID.x == WG_SIZE - 1) { + // Unfortunately there's no guarantee of forward progress of other + // workgroups, so compute a bit of the aggregate before trying again. + // In the worst case, spinning stops when the aggregate is complete. + DTYPE m = src.v[(look_back_ix * PARTITION_SIZE + their_ix)*src_stride]; + if (their_ix == 0) + their_agg = m; + else + their_agg += m; + + their_ix++; + if (their_ix == PARTITION_SIZE) { + exclusive = their_agg + exclusive; + if (look_back_ix == 0) { + sh_flag = FLAG_PREFIX_READY; + } else { + look_back_ix--; + their_ix = 0; + } + } + } + barrier(); + flag = sh_flag; + barrier(); + if (flag == FLAG_PREFIX_READY) + break; + } + + // step 5 of paper: compute inclusive prefix + if (gl_LocalInvocationID.x == WG_SIZE - 1) { + DTYPE inclusive_prefix = exclusive + agg; + sh_prefix = exclusive; + state[part_ix].prefix = inclusive_prefix; + } + + if (gl_LocalInvocationID.x == WG_SIZE - 1) + atomicStore(state[part_ix].flag, FLAG_PREFIX_READY, gl_ScopeDevice, RELEASE); + } + + barrier(); + if (part_ix != 0) + exclusive = sh_prefix; + + DTYPE row = exclusive; + if (gl_LocalInvocationID.x > 0) + row += sh_scratch[gl_LocalInvocationID.x - 1]; + + // note - may overwrite + for (uint i = 0; i < N_ROWS; i++) + dst.v[(ix + i)*dst_stride] = row + local[i]; +} diff --git a/libavfilter/vulkan_filter.c b/libavfilter/vulkan_filter.c index e22541bd23a78..b4d8f952b538e 100644 --- a/libavfilter/vulkan_filter.c +++ b/libavfilter/vulkan_filter.c @@ -1,4 +1,6 @@ /* + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -18,107 +20,186 @@ #include "vulkan_filter.h" -static int vulkan_filter_set_device(AVFilterContext *avctx, - AVBufferRef *device) +int ff_vk_filter_init_context(AVFilterContext *avctx, FFVulkanContext *s, + AVBufferRef *frames_ref, + int width, int height, enum AVPixelFormat sw_format) { - FFVulkanContext *s = avctx->priv; + int err; + AVHWFramesContext *frames_ctx; + AVHWDeviceContext *device_ctx; + AVVulkanFramesContext *vk_frames; + AVVulkanDeviceContext *vk_dev; + AVBufferRef *device_ref = avctx->hw_device_ctx; + + /* Check if context is reusable as-is */ + if (frames_ref) { + int no_storage = 0; + FFVulkanFunctions *vk; + const VkFormat *sub = av_vkfmt_from_pixfmt(sw_format); + + frames_ctx = (AVHWFramesContext *)frames_ref->data; + device_ctx = (AVHWDeviceContext *)frames_ctx->device_ref->data; + vk_frames = frames_ctx->hwctx; + vk_dev = device_ctx->hwctx; + + /* Basic format validation */ + if (width != frames_ctx->width || + height != frames_ctx->height || + sw_format != frames_ctx->sw_format || + (vk_frames->tiling != VK_IMAGE_TILING_LINEAR && + vk_frames->tiling != VK_IMAGE_TILING_OPTIMAL) || + !(vk_frames->usage & VK_IMAGE_USAGE_SAMPLED_BIT)) { + goto skip; + } - av_buffer_unref(&s->device_ref); + if (vk_frames->usage & VK_IMAGE_USAGE_STORAGE_BIT) + goto accept; - s->device_ref = av_buffer_ref(device); - if (!s->device_ref) - return AVERROR(ENOMEM); + s->extensions = ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions, + vk_dev->nb_enabled_dev_extensions); + err = ff_vk_load_functions(device_ctx, &s->vkfn, s->extensions, 1, 1); + if (err < 0) + return err; + vk = &s->vkfn; + + /* Check if the subformats can do storage */ + for (int i = 0; sub[i] != VK_FORMAT_UNDEFINED; i++) { + VkFormatProperties2 prop = { + .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, + }; + vk->GetPhysicalDeviceFormatProperties2(vk_dev->phys_dev, sub[i], + &prop); + + if (vk_frames->tiling == VK_IMAGE_TILING_LINEAR) { + no_storage |= !(prop.formatProperties.linearTilingFeatures & + VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT); + } else { + no_storage |= !(prop.formatProperties.optimalTilingFeatures & + VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT); + } + } - s->device = (AVHWDeviceContext*)s->device_ref->data; - s->hwctx = s->device->hwctx; + /* Check if it's usable */ + if (no_storage) { +skip: + device_ref = frames_ctx->device_ref; + frames_ref = NULL; + } else { +accept: + frames_ref = av_buffer_ref(frames_ref); + if (!frames_ref) + return AVERROR(ENOMEM); + } + } - return 0; -} + if (!frames_ref) { + if (!device_ref) { + av_log(avctx, AV_LOG_ERROR, + "Vulkan filtering requires a device context!\n"); + return AVERROR(EINVAL); + } -static int vulkan_filter_set_frames(AVFilterContext *avctx, - AVBufferRef *frames) -{ - FFVulkanContext *s = avctx->priv; + frames_ref = av_hwframe_ctx_alloc(device_ref); - av_buffer_unref(&s->frames_ref); + frames_ctx = (AVHWFramesContext *)frames_ref->data; + frames_ctx->format = AV_PIX_FMT_VULKAN; + frames_ctx->sw_format = sw_format; + frames_ctx->width = width; + frames_ctx->height = height; - s->frames_ref = av_buffer_ref(frames); - if (!s->frames_ref) - return AVERROR(ENOMEM); + vk_frames = frames_ctx->hwctx; + vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL; + vk_frames->usage = VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT; - return 0; + err = av_hwframe_ctx_init(frames_ref); + if (err < 0) { + av_buffer_unref(&frames_ref); + return err; + } + + device_ctx = (AVHWDeviceContext *)frames_ctx->device_ref->data; + vk_dev = device_ctx->hwctx; + } + + s->extensions = ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions, + vk_dev->nb_enabled_dev_extensions); + + /** + * libplacebo does not use descriptor buffers. + */ + if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) && + strcmp(avctx->filter->name, "libplacebo")) { + av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires that " + "the %s extension is supported!\n", + VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME); + av_buffer_unref(&frames_ref); + return AVERROR(EINVAL); + } + + err = ff_vk_load_functions(device_ctx, &s->vkfn, s->extensions, 1, 1); + if (err < 0) { + av_buffer_unref(&frames_ref); + return err; + } + + s->frames_ref = frames_ref; + s->frames = frames_ctx; + s->hwfc = vk_frames; + s->device = device_ctx; + s->hwctx = device_ctx->hwctx; + + err = ff_vk_load_props(s); + if (err < 0) + av_buffer_unref(&s->frames_ref); + + return err; } int ff_vk_filter_config_input(AVFilterLink *inlink) { - int err; - AVFilterContext *avctx = inlink->dst; - FFVulkanContext *s = avctx->priv; - FFVulkanFunctions *vk = &s->vkfn; AVHWFramesContext *input_frames; + AVFilterContext *avctx = inlink->dst; + FFVulkanContext *s = inlink->dst->priv; if (!inlink->hw_frames_ctx) { - av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a " + av_log(inlink->dst, AV_LOG_ERROR, "Vulkan filtering requires a " "hardware frames context on the input.\n"); return AVERROR(EINVAL); } - /* Extract the device and default output format from the first input. */ - if (avctx->inputs[0] != inlink) - return 0; - input_frames = (AVHWFramesContext *)inlink->hw_frames_ctx->data; if (input_frames->format != AV_PIX_FMT_VULKAN) return AVERROR(EINVAL); - err = vulkan_filter_set_device(avctx, input_frames->device_ref); - if (err < 0) - return err; - err = vulkan_filter_set_frames(avctx, inlink->hw_frames_ctx); - if (err < 0) - return err; - - s->extensions = ff_vk_extensions_to_mask(s->hwctx->enabled_dev_extensions, - s->hwctx->nb_enabled_dev_extensions); - - err = ff_vk_load_functions(s->device, &s->vkfn, s->extensions, 1, 1); - if (err < 0) - return err; + /* Extract the device and default output format from the first input. */ + if (avctx->inputs[0] != inlink) + return 0; - vk->GetPhysicalDeviceProperties(s->hwctx->phys_dev, &s->props); - vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops); + /* Save the ref, without reffing it */ + s->input_frames_ref = inlink->hw_frames_ctx; - /* Default output parameters match input parameters. */ - s->input_format = input_frames->sw_format; - if (s->output_format == AV_PIX_FMT_NONE) - s->output_format = input_frames->sw_format; - if (!s->output_width) - s->output_width = inlink->w; - if (!s->output_height) - s->output_height = inlink->h; + /* Defaults */ + s->output_format = input_frames->sw_format; + s->output_width = inlink->w; + s->output_height = inlink->h; return 0; } -int ff_vk_filter_config_output_inplace(AVFilterLink *outlink) +int ff_vk_filter_config_output(AVFilterLink *outlink) { int err; - AVFilterContext *avctx = outlink->src; - FFVulkanContext *s = avctx->priv; + FFVulkanContext *s = outlink->src->priv; av_buffer_unref(&outlink->hw_frames_ctx); - if (!s->device_ref) { - if (!avctx->hw_device_ctx) { - av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a " - "Vulkan device.\n"); - return AVERROR(EINVAL); - } - - err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx); - if (err < 0) - return err; - } + err = ff_vk_filter_init_context(outlink->src, s, s->input_frames_ref, + s->output_width, s->output_height, + s->output_format); + if (err < 0) + return err; outlink->hw_frames_ctx = av_buffer_ref(s->frames_ref); if (!outlink->hw_frames_ctx) @@ -127,65 +208,246 @@ int ff_vk_filter_config_output_inplace(AVFilterLink *outlink) outlink->w = s->output_width; outlink->h = s->output_height; - return 0; + return err; } -int ff_vk_filter_config_output(AVFilterLink *outlink) +int ff_vk_filter_init(AVFilterContext *avctx) { - int err; - AVFilterContext *avctx = outlink->src; FFVulkanContext *s = avctx->priv; - AVBufferRef *output_frames_ref; - AVHWFramesContext *output_frames; - - av_buffer_unref(&outlink->hw_frames_ctx); - if (!s->device_ref) { - if (!avctx->hw_device_ctx) { - av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a " - "Vulkan device.\n"); - return AVERROR(EINVAL); - } + s->output_format = AV_PIX_FMT_NONE; - err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx); - if (err < 0) - return err; - } + return 0; +} - output_frames_ref = av_hwframe_ctx_alloc(s->device_ref); - if (!output_frames_ref) { - err = AVERROR(ENOMEM); - goto fail; +int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanPipeline *pl, AVFrame *out_f, AVFrame *in_f, + VkSampler sampler, void *push_src, size_t push_size) +{ + int err = 0; + FFVulkanFunctions *vk = &vkctx->vkfn; + VkImageView in_views[AV_NUM_DATA_POINTERS]; + VkImageView out_views[AV_NUM_DATA_POINTERS]; + VkImageMemoryBarrier2 img_bar[37]; + int nb_img_bar = 0; + + /* Update descriptors and init the exec context */ + FFVkExecContext *exec = ff_vk_exec_get(e); + ff_vk_exec_start(vkctx, exec); + + ff_vk_exec_bind_pipeline(vkctx, exec, pl); + + if (push_src) + ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT, + 0, push_size, push_src); + + if (in_f) { + RET(ff_vk_exec_add_dep_frame(vkctx, exec, in_f, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(vkctx, exec, in_views, in_f)); + ff_vk_update_descriptor_img_array(vkctx, pl, exec, in_f, in_views, 0, 0, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + sampler); + ff_vk_frame_barrier(vkctx, exec, in_f, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); } - output_frames = (AVHWFramesContext*)output_frames_ref->data; - output_frames->format = AV_PIX_FMT_VULKAN; - output_frames->sw_format = s->output_format; - output_frames->width = s->output_width; - output_frames->height = s->output_height; + RET(ff_vk_exec_add_dep_frame(vkctx, exec, out_f, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(vkctx, exec, out_views, out_f)); + ff_vk_update_descriptor_img_array(vkctx, pl, exec, out_f, out_views, 0, !!in_f, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + ff_vk_frame_barrier(vkctx, exec, out_f, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + vk->CmdDispatch(exec->buf, + FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0], + FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1], + pl->wg_size[2]); + + return ff_vk_exec_submit(vkctx, exec); +fail: + ff_vk_exec_discard_deps(vkctx, exec); + return err; +} - err = av_hwframe_ctx_init(output_frames_ref); - if (err < 0) { - av_log(avctx, AV_LOG_ERROR, "Failed to initialise output " - "frames: %d.\n", err); - goto fail; +int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanPipeline *pls[2], + AVFrame *out, AVFrame *tmp, AVFrame *in, + VkSampler sampler, void *push_src, size_t push_size) +{ + int err = 0; + FFVulkanFunctions *vk = &vkctx->vkfn; + VkImageView in_views[AV_NUM_DATA_POINTERS]; + VkImageView tmp_views[AV_NUM_DATA_POINTERS]; + VkImageView out_views[AV_NUM_DATA_POINTERS]; + VkImageMemoryBarrier2 img_bar[37]; + int nb_img_bar = 0; + + /* Update descriptors and init the exec context */ + FFVkExecContext *exec = ff_vk_exec_get(e); + ff_vk_exec_start(vkctx, exec); + + RET(ff_vk_exec_add_dep_frame(vkctx, exec, in, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_exec_add_dep_frame(vkctx, exec, tmp, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_exec_add_dep_frame(vkctx, exec, out, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + + RET(ff_vk_create_imageviews(vkctx, exec, in_views, in)); + RET(ff_vk_create_imageviews(vkctx, exec, tmp_views, tmp)); + RET(ff_vk_create_imageviews(vkctx, exec, out_views, out)); + + ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); + ff_vk_frame_barrier(vkctx, exec, tmp, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + for (int i = 0; i < 2; i++) { + FFVulkanPipeline *pl = pls[i]; + AVFrame *src_f = !i ? in : tmp; + AVFrame *dst_f = !i ? tmp : out; + VkImageView *src_views = !i ? in_views : tmp_views; + VkImageView *dst_views = !i ? tmp_views : out_views; + + ff_vk_exec_bind_pipeline(vkctx, exec, pl); + + if (push_src) + ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT, + 0, push_size, push_src); + + ff_vk_update_descriptor_img_array(vkctx, pl, exec, src_f, src_views, 0, 0, + !i ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : + VK_IMAGE_LAYOUT_GENERAL, + sampler); + ff_vk_update_descriptor_img_array(vkctx, pl, exec, dst_f, dst_views, 0, 1, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + + vk->CmdDispatch(exec->buf, + FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0], + FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1], + pl->wg_size[2]); } - outlink->hw_frames_ctx = output_frames_ref; - outlink->w = s->output_width; - outlink->h = s->output_height; - - return 0; + return ff_vk_exec_submit(vkctx, exec); fail: - av_buffer_unref(&output_frames_ref); + ff_vk_exec_discard_deps(vkctx, exec); return err; } -int ff_vk_filter_init(AVFilterContext *avctx) +int ff_vk_filter_process_Nin(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanPipeline *pl, + AVFrame *out, AVFrame *in[], int nb_in, + VkSampler sampler, void *push_src, size_t push_size) { - FFVulkanContext *s = avctx->priv; - - s->output_format = AV_PIX_FMT_NONE; + int err = 0; + FFVulkanFunctions *vk = &vkctx->vkfn; + VkImageView in_views[16][AV_NUM_DATA_POINTERS]; + VkImageView out_views[AV_NUM_DATA_POINTERS]; + VkImageMemoryBarrier2 img_bar[128]; + int nb_img_bar = 0; + + /* Update descriptors and init the exec context */ + FFVkExecContext *exec = ff_vk_exec_get(e); + ff_vk_exec_start(vkctx, exec); + + /* Inputs */ + for (int i = 0; i < nb_in; i++) { + RET(ff_vk_exec_add_dep_frame(vkctx, exec, in[i], + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(vkctx, exec, in_views[i], in[i])); + + ff_vk_frame_barrier(vkctx, exec, in[i], img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); + } - return 0; + /* Output */ + RET(ff_vk_exec_add_dep_frame(vkctx, exec, out, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(vkctx, exec, out_views, out)); + ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + ff_vk_exec_bind_pipeline(vkctx, exec, pl); + + if (push_src) + ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT, + 0, push_size, push_src); + + for (int i = 0; i < nb_in; i++) + ff_vk_update_descriptor_img_array(vkctx, pl, exec, in[i], in_views[i], 0, i, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + sampler); + + ff_vk_update_descriptor_img_array(vkctx, pl, exec, out, out_views, 0, nb_in, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + + vk->CmdDispatch(exec->buf, + FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0], + FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1], + pl->wg_size[2]); + + return ff_vk_exec_submit(vkctx, exec); +fail: + ff_vk_exec_discard_deps(vkctx, exec); + return err; } diff --git a/libavfilter/vulkan_filter.h b/libavfilter/vulkan_filter.h index bfdb9b2d7d84e..d2c14601d94de 100644 --- a/libavfilter/vulkan_filter.h +++ b/libavfilter/vulkan_filter.h @@ -1,4 +1,6 @@ /* + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -26,9 +28,38 @@ /** * General lavfi IO functions */ -int ff_vk_filter_init (AVFilterContext *avctx); -int ff_vk_filter_config_input (AVFilterLink *inlink); -int ff_vk_filter_config_output (AVFilterLink *outlink); -int ff_vk_filter_config_output_inplace(AVFilterLink *outlink); +int ff_vk_filter_init (AVFilterContext *avctx); +int ff_vk_filter_config_input (AVFilterLink *inlink); +int ff_vk_filter_config_output(AVFilterLink *outlink); + +/** + * Can be called manually, if not using ff_vk_filter_config_output. + */ +int ff_vk_filter_init_context(AVFilterContext *avctx, FFVulkanContext *s, + AVBufferRef *frames_ref, + int width, int height, enum AVPixelFormat sw_format); + +/** + * Submit a compute shader with a zero/one input and single out for execution. + */ +int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanPipeline *pl, AVFrame *out_f, AVFrame *in_f, + VkSampler sampler, void *push_src, size_t push_size); + +/** + * Submit a compute shader with a single in and single out with 2 stages. + */ +int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanPipeline *pls[2], + AVFrame *out, AVFrame *tmp, AVFrame *in, + VkSampler sampler, void *push_src, size_t push_size); + +/** + * Up to 16 inputs, one output + */ +int ff_vk_filter_process_Nin(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanPipeline *pl, + AVFrame *out, AVFrame *in[], int nb_in, + VkSampler sampler, void *push_src, size_t push_size); #endif /* AVFILTER_VULKAN_FILTER_H */ diff --git a/libavutil/vulkan_glslang.c b/libavfilter/vulkan_glslang.c similarity index 95% rename from libavutil/vulkan_glslang.c rename to libavfilter/vulkan_glslang.c index e7785f6d4055a..845a530ee0d8d 100644 --- a/libavutil/vulkan_glslang.c +++ b/libavfilter/vulkan_glslang.c @@ -21,8 +21,9 @@ #include #include -#include "mem.h" -#include "avassert.h" +#include "vulkan_spirv.h" +#include "libavutil/mem.h" +#include "libavutil/avassert.h" static pthread_mutex_t glslc_mutex = PTHREAD_MUTEX_INITIALIZER; static int glslc_refcount = 0; @@ -176,11 +177,13 @@ static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx, av_assert0(glslc_refcount); + *opaque = NULL; + if (!(glslc_shader = glslang_shader_create(&glslc_input))) return AVERROR(ENOMEM); if (!glslang_shader_preprocess(glslc_shader, &glslc_input)) { - ff_vk_print_shader(avctx, shd, AV_LOG_WARNING); + ff_vk_shader_print(avctx, shd, AV_LOG_WARNING); av_log(avctx, AV_LOG_ERROR, "Unable to preprocess shader: %s (%s)!\n", glslang_shader_get_info_log(glslc_shader), glslang_shader_get_info_debug_log(glslc_shader)); @@ -189,7 +192,7 @@ static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx, } if (!glslang_shader_parse(glslc_shader, &glslc_input)) { - ff_vk_print_shader(avctx, shd, AV_LOG_WARNING); + ff_vk_shader_print(avctx, shd, AV_LOG_WARNING); av_log(avctx, AV_LOG_ERROR, "Unable to parse shader: %s (%s)!\n", glslang_shader_get_info_log(glslc_shader), glslang_shader_get_info_debug_log(glslc_shader)); @@ -206,7 +209,7 @@ static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx, if (!glslang_program_link(glslc_program, GLSLANG_MSG_SPV_RULES_BIT | GLSLANG_MSG_VULKAN_RULES_BIT)) { - ff_vk_print_shader(avctx, shd, AV_LOG_WARNING); + ff_vk_shader_print(avctx, shd, AV_LOG_WARNING); av_log(avctx, AV_LOG_ERROR, "Unable to link shader: %s (%s)!\n", glslang_program_get_info_log(glslc_program), glslang_program_get_info_debug_log(glslc_program)); @@ -219,10 +222,10 @@ static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx, messages = glslang_program_SPIRV_get_messages(glslc_program); if (messages) { - ff_vk_print_shader(avctx, shd, AV_LOG_WARNING); + ff_vk_shader_print(avctx, shd, AV_LOG_WARNING); av_log(avctx, AV_LOG_WARNING, "%s\n", messages); } else { - ff_vk_print_shader(avctx, shd, AV_LOG_VERBOSE); + ff_vk_shader_print(avctx, shd, AV_LOG_VERBOSE); } glslang_shader_delete(glslc_shader); @@ -257,7 +260,7 @@ static void glslc_uninit(FFVkSPIRVCompiler **ctx) av_freep(ctx); } -static FFVkSPIRVCompiler *ff_vk_glslang_init(void) +FFVkSPIRVCompiler *ff_vk_glslang_init(void) { FFVkSPIRVCompiler *ret = av_mallocz(sizeof(*ret)); if (!ret) diff --git a/libavutil/vulkan_shaderc.c b/libavfilter/vulkan_shaderc.c similarity index 96% rename from libavutil/vulkan_shaderc.c rename to libavfilter/vulkan_shaderc.c index bd40edf187671..38be1030ad20d 100644 --- a/libavutil/vulkan_shaderc.c +++ b/libavfilter/vulkan_shaderc.c @@ -18,7 +18,8 @@ #include -#include "mem.h" +#include "libavutil/mem.h" +#include "vulkan_spirv.h" static int shdc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx, FFVkSPIRVShader *shd, uint8_t **data, @@ -43,6 +44,7 @@ static int shdc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx, }; shaderc_compile_options_t opts = shaderc_compile_options_initialize(); + *opaque = NULL; if (!opts) return AVERROR(ENOMEM); @@ -65,7 +67,7 @@ static int shdc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx, loglevel = err ? AV_LOG_ERROR : warn ? AV_LOG_WARNING : AV_LOG_VERBOSE; - ff_vk_print_shader(avctx, shd, loglevel); + ff_vk_shader_print(avctx, shd, loglevel); if (message && (err || warn)) av_log(avctx, loglevel, "%s\n", message); status = ret < FF_ARRAY_ELEMS(shdc_result) ? shdc_result[ret] : "unknown"; @@ -104,7 +106,7 @@ static void shdc_uninit(FFVkSPIRVCompiler **ctx) av_freep(ctx); } -static FFVkSPIRVCompiler *ff_vk_shaderc_init(void) +FFVkSPIRVCompiler *ff_vk_shaderc_init(void) { FFVkSPIRVCompiler *ret = av_mallocz(sizeof(*ret)); if (!ret) diff --git a/libavfilter/vulkan_spirv.h b/libavfilter/vulkan_spirv.h new file mode 100644 index 0000000000000..5638cd9696a0c --- /dev/null +++ b/libavfilter/vulkan_spirv.h @@ -0,0 +1,45 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVFILTER_VULKAN_SPIRV_H +#define AVFILTER_VULKAN_SPIRV_H + +#include "libavutil/vulkan.h" + +#include "vulkan.h" +#include "config.h" + +typedef struct FFVkSPIRVCompiler { + void *priv; + int (*compile_shader)(struct FFVkSPIRVCompiler *ctx, void *avctx, + struct FFVkSPIRVShader *shd, uint8_t **data, + size_t *size, const char *entrypoint, void **opaque); + void (*free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque); + void (*uninit)(struct FFVkSPIRVCompiler **ctx); +} FFVkSPIRVCompiler; + +#if CONFIG_LIBGLSLANG +FFVkSPIRVCompiler *ff_vk_glslang_init(void); +#define ff_vk_spirv_init ff_vk_glslang_init +#endif +#if CONFIG_LIBSHADERC +FFVkSPIRVCompiler *ff_vk_shaderc_init(void); +#define ff_vk_spirv_init ff_vk_shaderc_init +#endif + +#endif /* AVFILTER_VULKAN_H */ diff --git a/libavutil/Makefile b/libavutil/Makefile index dc9012f9a83a7..bd9c6f9e32778 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -195,7 +195,7 @@ OBJS-$(CONFIG_QSV) += hwcontext_qsv.o OBJS-$(CONFIG_VAAPI) += hwcontext_vaapi.o OBJS-$(CONFIG_VIDEOTOOLBOX) += hwcontext_videotoolbox.o OBJS-$(CONFIG_VDPAU) += hwcontext_vdpau.o -OBJS-$(CONFIG_VULKAN) += hwcontext_vulkan.o +OBJS-$(CONFIG_VULKAN) += hwcontext_vulkan.o vulkan.o OBJS-$(!CONFIG_VULKAN) += hwcontext_stub.o diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index ffd4f5dec4ec5..80d86cde5751f 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -1,4 +1,6 @@ /* + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -27,6 +29,7 @@ #include #endif +#include "thread.h" #include #include "config.h" @@ -41,14 +44,19 @@ #include "vulkan.h" #include "vulkan_loader.h" +#if CONFIG_VAAPI +#include "hwcontext_vaapi.h" +#endif + #if CONFIG_LIBDRM -#include -#include -#include "hwcontext_drm.h" #if CONFIG_VAAPI #include -#include "hwcontext_vaapi.h" #endif +#include +#include +#include +#include +#include "hwcontext_drm.h" #endif #if CONFIG_CUDA @@ -61,25 +69,22 @@ typedef struct VulkanQueueCtx { VkFence fence; VkQueue queue; int was_synchronous; + int qf; + int qidx; /* Buffer dependencies */ AVBufferRef **buf_deps; int nb_buf_deps; - int buf_deps_alloc_size; + unsigned int buf_deps_alloc_size; } VulkanQueueCtx; -typedef struct VulkanExecCtx { - VkCommandPool pool; - VkCommandBuffer *bufs; - VulkanQueueCtx *queues; - int nb_queues; - int cur_queue_idx; -} VulkanExecCtx; - typedef struct VulkanDevicePriv { /* Vulkan library and loader functions */ void *libvulkan; - FFVulkanFunctions vkfn; + + FFVulkanContext vkctx; + FFVkQueueFamilyCtx compute_qf; + FFVkQueueFamilyCtx transfer_qf; /* Properties */ VkPhysicalDeviceProperties2 props; @@ -89,43 +94,47 @@ typedef struct VulkanDevicePriv { /* Features */ VkPhysicalDeviceVulkan11Features device_features_1_1; VkPhysicalDeviceVulkan12Features device_features_1_2; + VkPhysicalDeviceVulkan13Features device_features_1_3; + VkPhysicalDeviceDescriptorBufferFeaturesEXT desc_buf_features; + VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float_features; /* Queues */ - uint32_t qfs[5]; - int num_qfs; + pthread_mutex_t **qf_mutex; + uint32_t nb_tot_qfs; + uint32_t img_qfs[5]; + uint32_t nb_img_qfs; /* Debug callback */ VkDebugUtilsMessengerEXT debug_ctx; - /* Extensions */ - FFVulkanExtensions extensions; - /* Settings */ int use_linear_images; /* Option to allocate all image planes in a single allocation */ int contiguous_planes; + /* Disable multiplane images */ + int disable_multiplane; + /* Nvidia */ int dev_is_nvidia; - - /* Intel */ - int dev_is_intel; } VulkanDevicePriv; typedef struct VulkanFramesPriv { /* Image conversions */ - VulkanExecCtx conv_ctx; + FFVkExecPool compute_exec; /* Image transfers */ - VulkanExecCtx upload_ctx; - VulkanExecCtx download_ctx; + FFVkExecPool upload_exec; + FFVkExecPool download_exec; /* Modifier info list to free at uninit */ VkImageDrmFormatModifierListCreateInfoEXT *modifier_info; } VulkanFramesPriv; typedef struct AVVkFrameInternal { + pthread_mutex_t update_mutex; + #if CONFIG_CUDA /* Importing external memory into cuda is really expensive so we keep the * memory imported all the time */ @@ -141,162 +150,205 @@ typedef struct AVVkFrameInternal { #endif } AVVkFrameInternal; -#define ADD_VAL_TO_LIST(list, count, val) \ - do { \ - list = av_realloc_array(list, sizeof(*list), ++count); \ - if (!list) { \ - err = AVERROR(ENOMEM); \ - goto fail; \ - } \ - list[count - 1] = av_strdup(val); \ - if (!list[count - 1]) { \ - err = AVERROR(ENOMEM); \ - goto fail; \ - } \ - } while(0) - -#define RELEASE_PROPS(props, count) \ - if (props) { \ - for (int i = 0; i < count; i++) \ - av_free((void *)((props)[i])); \ - av_free((void *)props); \ - } +#define ASPECT_2PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT) +#define ASPECT_3PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT) -static const struct { +static const struct FFVkFormatEntry { + VkFormat vkf; enum AVPixelFormat pixfmt; - const VkFormat vkfmts[4]; -} vk_pixfmt_map[] = { - { AV_PIX_FMT_GRAY8, { VK_FORMAT_R8_UNORM } }, - { AV_PIX_FMT_GRAY16, { VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } }, - - { AV_PIX_FMT_NV12, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, - { AV_PIX_FMT_NV21, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, - { AV_PIX_FMT_P010, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, - { AV_PIX_FMT_P012, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, - { AV_PIX_FMT_P016, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, - - { AV_PIX_FMT_NV16, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, - - { AV_PIX_FMT_NV24, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, - { AV_PIX_FMT_NV42, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, - - { AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { AV_PIX_FMT_YUV420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUV420P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUV420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - - { AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { AV_PIX_FMT_YUV422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUV422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUV422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - - { AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { AV_PIX_FMT_YUV444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUV444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUV444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - - { AV_PIX_FMT_YUVA420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { AV_PIX_FMT_YUVA420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - /* There is no AV_PIX_FMT_YUVA420P12 */ - { AV_PIX_FMT_YUVA420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - - { AV_PIX_FMT_YUVA422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { AV_PIX_FMT_YUVA422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUVA422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUVA422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - - { AV_PIX_FMT_YUVA444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { AV_PIX_FMT_YUVA444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUVA444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUVA444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - - { AV_PIX_FMT_VUYX, { VK_FORMAT_R8G8B8A8_UNORM } }, - { AV_PIX_FMT_XV36, { VK_FORMAT_R16G16B16A16_UNORM } }, - - { AV_PIX_FMT_BGRA, { VK_FORMAT_B8G8R8A8_UNORM } }, - { AV_PIX_FMT_RGBA, { VK_FORMAT_R8G8B8A8_UNORM } }, - { AV_PIX_FMT_RGB24, { VK_FORMAT_R8G8B8_UNORM } }, - { AV_PIX_FMT_BGR24, { VK_FORMAT_B8G8R8_UNORM } }, - { AV_PIX_FMT_RGB48, { VK_FORMAT_R16G16B16_UNORM } }, - { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } }, - { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } }, - { AV_PIX_FMT_RGB565, { VK_FORMAT_R5G6B5_UNORM_PACK16 } }, - { AV_PIX_FMT_BGR565, { VK_FORMAT_B5G6R5_UNORM_PACK16 } }, - { AV_PIX_FMT_BGR0, { VK_FORMAT_B8G8R8A8_UNORM } }, - { AV_PIX_FMT_RGB0, { VK_FORMAT_R8G8B8A8_UNORM } }, - - /* Lower priority as there's an endianess-dependent overlap between these - * and rgba/bgr0, and PACK32 formats are more limited */ - { AV_PIX_FMT_BGR32, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } }, - { AV_PIX_FMT_0BGR32, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } }, - - { AV_PIX_FMT_X2RGB10, { VK_FORMAT_A2R10G10B10_UNORM_PACK32 } }, - - { AV_PIX_FMT_GBRAP, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { AV_PIX_FMT_GBRAP16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } }, - { AV_PIX_FMT_GBRAPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } }, + VkImageAspectFlags aspect; + int vk_planes; + int nb_images; + int nb_images_fallback; + const VkFormat fallback[5]; +} vk_formats_list[] = { + /* Gray formats */ + { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GRAY8, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY16, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GRAYF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32_SFLOAT } }, + + /* RGB formats */ + { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_XV36, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, + { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGRA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } }, + { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGBA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } }, + { VK_FORMAT_R8G8B8_UNORM, AV_PIX_FMT_RGB24, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8_UNORM } }, + { VK_FORMAT_B8G8R8_UNORM, AV_PIX_FMT_BGR24, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8_UNORM } }, + { VK_FORMAT_R16G16B16_UNORM, AV_PIX_FMT_RGB48, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16_UNORM } }, + { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_RGBA64, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, + { VK_FORMAT_R5G6B5_UNORM_PACK16, AV_PIX_FMT_RGB565, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R5G6B5_UNORM_PACK16 } }, + { VK_FORMAT_B5G6R5_UNORM_PACK16, AV_PIX_FMT_BGR565, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B5G6R5_UNORM_PACK16 } }, + { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGR0, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } }, + { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGB0, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } }, + { VK_FORMAT_A2R10G10B10_UNORM_PACK32, AV_PIX_FMT_X2RGB10, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_A2R10G10B10_UNORM_PACK32 } }, + + /* Planar RGB */ + { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GBRAP, VK_IMAGE_ASPECT_COLOR_BIT, 1, 4, 4, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP16, VK_IMAGE_ASPECT_COLOR_BIT, 1, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRPF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 3, 3, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } }, + { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRAPF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 4, 4, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } }, + + /* Two-plane 420 YUV at 8, 10, 12 and 16 bits */ + { VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, AV_PIX_FMT_NV12, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, + { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P010, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P012, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + { VK_FORMAT_G16_B16R16_2PLANE_420_UNORM, AV_PIX_FMT_P016, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + + /* Two-plane 422 YUV at 8, 10 and 16 bits */ + { VK_FORMAT_G8_B8R8_2PLANE_422_UNORM, AV_PIX_FMT_NV16, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, + { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P210, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P212, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + { VK_FORMAT_G16_B16R16_2PLANE_422_UNORM, AV_PIX_FMT_P216, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + + /* Two-plane 444 YUV at 8, 10 and 16 bits */ + { VK_FORMAT_G8_B8R8_2PLANE_444_UNORM, AV_PIX_FMT_NV24, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, + { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P410, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P412, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + { VK_FORMAT_G16_B16R16_2PLANE_444_UNORM, AV_PIX_FMT_P416, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + + /* Three-plane 420, 422, 444 at 8, 10, 12 and 16 bits */ + { VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + + /* Single plane 422 at 8, 10 and 12 bits */ + { VK_FORMAT_G8B8G8R8_422_UNORM, AV_PIX_FMT_YUYV422, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } }, + { VK_FORMAT_B8G8R8G8_422_UNORM, AV_PIX_FMT_UYVY422, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } }, + { VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16, AV_PIX_FMT_Y210, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, + { VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16, AV_PIX_FMT_Y212, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, }; +static const int nb_vk_formats_list = FF_ARRAY_ELEMS(vk_formats_list); const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p) { - for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_map); i++) - if (vk_pixfmt_map[i].pixfmt == p) - return vk_pixfmt_map[i].vkfmts; + for (int i = 0; i < nb_vk_formats_list; i++) + if (vk_formats_list[i].pixfmt == p) + return vk_formats_list[i].fallback; return NULL; } -static const void *vk_find_struct(const void *chain, VkStructureType stype) +static const struct FFVkFormatEntry *vk_find_format_entry(enum AVPixelFormat p) { - const VkBaseInStructure *in = chain; - while (in) { - if (in->sType == stype) - return in; - - in = in->pNext; - } - + for (int i = 0; i < nb_vk_formats_list; i++) + if (vk_formats_list[i].pixfmt == p) + return &vk_formats_list[i]; return NULL; } -static void vk_link_struct(void *chain, void *in) -{ - VkBaseOutStructure *out = chain; - if (!in) - return; - - while (out->pNext) - out = out->pNext; - - out->pNext = in; -} - -static int pixfmt_is_supported(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p, - int linear) +/* Malitia pura, Khronos */ +#define FN_MAP_TO(dst_t, dst_name, src_t, src_name) \ + static av_unused dst_t map_ ##src_name## _to_ ##dst_name(src_t src) \ + { \ + dst_t dst = 0x0; \ + MAP_TO(VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT, \ + VK_IMAGE_USAGE_SAMPLED_BIT); \ + MAP_TO(VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT, \ + VK_IMAGE_USAGE_TRANSFER_SRC_BIT); \ + MAP_TO(VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT, \ + VK_IMAGE_USAGE_TRANSFER_DST_BIT); \ + MAP_TO(VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT, \ + VK_IMAGE_USAGE_STORAGE_BIT); \ + MAP_TO(VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT, \ + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); \ + MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_DECODE_OUTPUT_BIT_KHR, \ + VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR); \ + MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_DECODE_DPB_BIT_KHR, \ + VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR); \ + MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_ENCODE_DPB_BIT_KHR, \ + VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR); \ + MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_ENCODE_INPUT_BIT_KHR, \ + VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR); \ + return dst; \ + } + +#define MAP_TO(flag1, flag2) if (src & flag2) dst |= flag1; +FN_MAP_TO(VkFormatFeatureFlagBits2, feats, VkImageUsageFlags, usage) +#undef MAP_TO +#define MAP_TO(flag1, flag2) if (src & flag1) dst |= flag2; +FN_MAP_TO(VkImageUsageFlags, usage, VkFormatFeatureFlagBits2, feats) +#undef MAP_TO +#undef FN_MAP_TO + +static int vkfmt_from_pixfmt2(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p, + VkImageTiling tiling, + VkFormat fmts[AV_NUM_DATA_POINTERS], + int *nb_images, VkImageAspectFlags *aspect, + VkImageUsageFlags *supported_usage, int disable_multiplane) { AVVulkanDeviceContext *hwctx = dev_ctx->hwctx; VulkanDevicePriv *priv = dev_ctx->internal->priv; - FFVulkanFunctions *vk = &priv->vkfn; - const VkFormat *fmt = av_vkfmt_from_pixfmt(p); - int planes = av_pix_fmt_count_planes(p); + FFVulkanFunctions *vk = &priv->vkctx.vkfn; - if (!fmt) - return 0; + const VkFormatFeatureFlagBits2 basic_flags = VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT | + VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT | + VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT; - for (int i = 0; i < planes; i++) { - VkFormatFeatureFlags flags; - VkFormatProperties2 prop = { - .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, - }; - vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev, fmt[i], &prop); - flags = linear ? prop.formatProperties.linearTilingFeatures : - prop.formatProperties.optimalTilingFeatures; - if (!(flags & FF_VK_DEFAULT_USAGE_FLAGS)) - return 0; + for (int i = 0; i < nb_vk_formats_list; i++) { + if (vk_formats_list[i].pixfmt == p) { + VkFormatProperties2 prop = { + .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, + }; + VkFormatFeatureFlagBits2 feats_primary, feats_secondary; + int basics_primary = 0, basics_secondary = 0; + + vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev, + vk_formats_list[i].vkf, + &prop); + + feats_primary = tiling == VK_IMAGE_TILING_LINEAR ? + prop.formatProperties.linearTilingFeatures : + prop.formatProperties.optimalTilingFeatures; + basics_primary = (feats_primary & basic_flags) == basic_flags; + + if (vk_formats_list[i].vkf != vk_formats_list[i].fallback[0]) { + vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev, + vk_formats_list[i].fallback[0], + &prop); + feats_secondary = tiling == VK_IMAGE_TILING_LINEAR ? + prop.formatProperties.linearTilingFeatures : + prop.formatProperties.optimalTilingFeatures; + basics_secondary = (feats_secondary & basic_flags) == basic_flags; + } else { + basics_secondary = basics_primary; + } + + if (basics_primary && !(disable_multiplane && vk_formats_list[i].vk_planes > 1)) { + if (fmts) + fmts[0] = vk_formats_list[i].vkf; + if (nb_images) + *nb_images = 1; + if (aspect) + *aspect = vk_formats_list[i].aspect; + if (supported_usage) + *supported_usage = map_feats_to_usage(feats_primary); + return 0; + } else if (basics_secondary) { + if (fmts) { + for (int j = 0; j < vk_formats_list[i].nb_images_fallback; j++) + fmts[j] = vk_formats_list[i].fallback[j]; + } + if (nb_images) + *nb_images = vk_formats_list[i].nb_images_fallback; + if (aspect) + *aspect = vk_formats_list[i].aspect; + if (supported_usage) + *supported_usage = map_feats_to_usage(feats_secondary); + return 0; + } else { + return AVERROR(ENOTSUP); + } + } } - return 1; + return AVERROR(EINVAL); } static int load_libvulkan(AVHWDeviceContext *ctx) @@ -339,14 +391,18 @@ typedef struct VulkanOptExtension { } VulkanOptExtension; static const VulkanOptExtension optional_instance_exts[] = { - /* For future use */ + /* Pointless, here avoid zero-sized structs */ + { VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, }; static const VulkanOptExtension optional_device_exts[] = { /* Misc or required by other extensions */ + { VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, { VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, - { VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, + { VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER, }, + { VK_EXT_PHYSICAL_DEVICE_DRM_EXTENSION_NAME, FF_VK_EXT_DEVICE_DRM }, + { VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME, FF_VK_EXT_ATOMIC_FLOAT }, /* Imports/exports */ { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_MEMORY }, @@ -358,48 +414,21 @@ static const VulkanOptExtension optional_device_exts[] = { { VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY }, { VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM }, #endif -}; -/* Converts return values to strings */ -static const char *vk_ret2str(VkResult res) -{ -#define CASE(VAL) case VAL: return #VAL - switch (res) { - CASE(VK_SUCCESS); - CASE(VK_NOT_READY); - CASE(VK_TIMEOUT); - CASE(VK_EVENT_SET); - CASE(VK_EVENT_RESET); - CASE(VK_INCOMPLETE); - CASE(VK_ERROR_OUT_OF_HOST_MEMORY); - CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY); - CASE(VK_ERROR_INITIALIZATION_FAILED); - CASE(VK_ERROR_DEVICE_LOST); - CASE(VK_ERROR_MEMORY_MAP_FAILED); - CASE(VK_ERROR_LAYER_NOT_PRESENT); - CASE(VK_ERROR_EXTENSION_NOT_PRESENT); - CASE(VK_ERROR_FEATURE_NOT_PRESENT); - CASE(VK_ERROR_INCOMPATIBLE_DRIVER); - CASE(VK_ERROR_TOO_MANY_OBJECTS); - CASE(VK_ERROR_FORMAT_NOT_SUPPORTED); - CASE(VK_ERROR_FRAGMENTED_POOL); - CASE(VK_ERROR_SURFACE_LOST_KHR); - CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR); - CASE(VK_SUBOPTIMAL_KHR); - CASE(VK_ERROR_OUT_OF_DATE_KHR); - CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR); - CASE(VK_ERROR_VALIDATION_FAILED_EXT); - CASE(VK_ERROR_INVALID_SHADER_NV); - CASE(VK_ERROR_OUT_OF_POOL_MEMORY); - CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE); - CASE(VK_ERROR_NOT_PERMITTED_EXT); - CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT); - CASE(VK_ERROR_INVALID_DEVICE_ADDRESS_EXT); - CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT); - default: return "Unknown error"; - } -#undef CASE -} + /* Video encoding/decoding */ + { VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_QUEUE }, + { VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_QUEUE }, + { VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_QUEUE }, +#if CONFIG_VULKAN_ENCODE + { VK_EXT_VIDEO_ENCODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H264 }, +#endif + { VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H264 }, +#if CONFIG_VULKAN_ENCODE + { VK_EXT_VIDEO_ENCODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H265 }, +#endif + { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 }, + { "VK_MESA_video_decode_av1", FF_VK_EXT_VIDEO_DECODE_AV1 }, +}; static VkBool32 VKAPI_CALL vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, VkDebugUtilsMessageTypeFlagsEXT messageType, @@ -424,13 +453,34 @@ static VkBool32 VKAPI_CALL vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEX return 0; } +#define ADD_VAL_TO_LIST(list, count, val) \ + do { \ + list = av_realloc_array(list, sizeof(*list), ++count); \ + if (!list) { \ + err = AVERROR(ENOMEM); \ + goto fail; \ + } \ + list[count - 1] = av_strdup(val); \ + if (!list[count - 1]) { \ + err = AVERROR(ENOMEM); \ + goto fail; \ + } \ + } while(0) + +#define RELEASE_PROPS(props, count) \ + if (props) { \ + for (int i = 0; i < count; i++) \ + av_free((void *)((props)[i])); \ + av_free((void *)props); \ + } + static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts, const char * const **dst, uint32_t *num, int debug) { const char *tstr; const char **extension_names = NULL; VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; AVVulkanDeviceContext *hwctx = ctx->hwctx; int err = 0, found, extensions_found = 0; @@ -493,7 +543,7 @@ static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts, continue; av_log(ctx, AV_LOG_VERBOSE, "Using %s extension %s\n", mod, tstr); - p->extensions |= optional_exts[i].flag; + p->vkctx.extensions |= optional_exts[i].flag; ADD_VAL_TO_LIST(extension_names, extensions_found, tstr); } @@ -509,7 +559,7 @@ static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts, if (found) { av_log(ctx, AV_LOG_VERBOSE, "Using %s extension %s\n", mod, tstr); ADD_VAL_TO_LIST(extension_names, extensions_found, tstr); - p->extensions |= FF_VK_EXT_DEBUG_UTILS; + p->vkctx.extensions |= FF_VK_EXT_DEBUG_UTILS; } else { av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n", tstr); @@ -561,7 +611,7 @@ static int check_validation_layers(AVHWDeviceContext *ctx, AVDictionary *opts, int found = 0, err = 0; VulkanDevicePriv *priv = ctx->internal->priv; - FFVulkanFunctions *vk = &priv->vkfn; + FFVulkanFunctions *vk = &priv->vkctx.vkfn; uint32_t sup_layer_count; VkLayerProperties *sup_layers; @@ -668,16 +718,23 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts) int err = 0, debug_mode = 0; VkResult ret; VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; AVVulkanDeviceContext *hwctx = ctx->hwctx; VkApplicationInfo application_info = { .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, + .pApplicationName = "ffmpeg", + .applicationVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR, + LIBAVUTIL_VERSION_MINOR, + LIBAVUTIL_VERSION_MICRO), .pEngineName = "libavutil", - .apiVersion = VK_API_VERSION_1_2, + .apiVersion = VK_API_VERSION_1_3, .engineVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR, LIBAVUTIL_VERSION_MINOR, LIBAVUTIL_VERSION_MICRO), }; + VkValidationFeaturesEXT validation_features = { + .sType = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT, + }; VkInstanceCreateInfo inst_props = { .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, .pApplicationInfo = &application_info, @@ -689,7 +746,7 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts) return err; } - err = ff_vk_load_functions(ctx, vk, p->extensions, 0, 0); + err = ff_vk_load_functions(ctx, vk, p->vkctx.extensions, 0, 0); if (err < 0) { av_log(ctx, AV_LOG_ERROR, "Unable to load instance enumeration functions!\n"); return err; @@ -708,18 +765,29 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts) if (err < 0) goto fail; + if (debug_mode) { + VkValidationFeatureEnableEXT feat_list[] = { + VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT, + VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT, + VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT, + }; + validation_features.pEnabledValidationFeatures = feat_list; + validation_features.enabledValidationFeatureCount = FF_ARRAY_ELEMS(feat_list); + inst_props.pNext = &validation_features; + } + /* Try to create the instance */ ret = vk->CreateInstance(&inst_props, hwctx->alloc, &hwctx->inst); /* Check for errors */ if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Instance creation failure: %s\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); err = AVERROR_EXTERNAL; goto fail; } - err = ff_vk_load_functions(ctx, vk, p->extensions, 1, 0); + err = ff_vk_load_functions(ctx, vk, p->vkctx.extensions, 1, 0); if (err < 0) { av_log(ctx, AV_LOG_ERROR, "Unable to load instance functions!\n"); goto fail; @@ -753,8 +821,11 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts) typedef struct VulkanDeviceSelection { uint8_t uuid[VK_UUID_SIZE]; /* Will use this first unless !has_uuid */ int has_uuid; - const char *name; /* Will use this second unless NULL */ - uint32_t pci_device; /* Will use this third unless 0x0 */ + uint32_t drm_major; /* Will use this second unless !has_drm */ + uint32_t drm_minor; /* Will use this second unless !has_drm */ + uint32_t has_drm; /* has drm node info */ + const char *name; /* Will use this third unless NULL */ + uint32_t pci_device; /* Will use this fourth unless 0x0 */ uint32_t vendor_id; /* Last resort to find something deterministic */ int index; /* Finally fall back to index */ } VulkanDeviceSelection; @@ -777,15 +848,16 @@ static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select) uint32_t num; VkResult ret; VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; VkPhysicalDevice *devices = NULL; VkPhysicalDeviceIDProperties *idp = NULL; VkPhysicalDeviceProperties2 *prop = NULL; + VkPhysicalDeviceDrmPropertiesEXT *drm_prop = NULL; AVVulkanDeviceContext *hwctx = ctx->hwctx; ret = vk->EnumeratePhysicalDevices(hwctx->inst, &num, NULL); if (ret != VK_SUCCESS || !num) { - av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", vk_ret2str(ret)); + av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", ff_vk_ret2str(ret)); return AVERROR(ENODEV); } @@ -796,7 +868,7 @@ static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select) ret = vk->EnumeratePhysicalDevices(hwctx->inst, &num, devices); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Failed enumerating devices: %s\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); err = AVERROR(ENODEV); goto end; } @@ -813,8 +885,20 @@ static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select) goto end; } + if (p->vkctx.extensions & FF_VK_EXT_DEVICE_DRM) { + drm_prop = av_calloc(num, sizeof(*drm_prop)); + if (!drm_prop) { + err = AVERROR(ENOMEM); + goto end; + } + } + av_log(ctx, AV_LOG_VERBOSE, "GPU listing:\n"); for (int i = 0; i < num; i++) { + if (p->vkctx.extensions & FF_VK_EXT_DEVICE_DRM) { + drm_prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT; + idp[i].pNext = &drm_prop[i]; + } idp[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES; prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; prop[i].pNext = &idp[i]; @@ -836,6 +920,20 @@ static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select) av_log(ctx, AV_LOG_ERROR, "Unable to find device by given UUID!\n"); err = AVERROR(ENODEV); goto end; + } else if ((p->vkctx.extensions & FF_VK_EXT_DEVICE_DRM) && select->has_drm) { + for (int i = 0; i < num; i++) { + if ((select->drm_major == drm_prop[i].primaryMajor && + select->drm_minor == drm_prop[i].primaryMinor) || + (select->drm_major == drm_prop[i].renderMajor && + select->drm_minor == drm_prop[i].renderMinor)) { + choice = i; + goto end; + } + } + av_log(ctx, AV_LOG_ERROR, "Unable to find device by given DRM node numbers %i:%i!\n", + select->drm_major, select->drm_minor); + err = AVERROR(ENODEV); + goto end; } else if (select->name) { av_log(ctx, AV_LOG_VERBOSE, "Requested device: %s\n", select->name); for (int i = 0; i < num; i++) { @@ -895,6 +993,7 @@ static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select) av_free(devices); av_free(prop); av_free(idp); + av_free(drm_prop); return err; } @@ -929,7 +1028,7 @@ static int setup_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd) float *weights; VkQueueFamilyProperties *qf = NULL; VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; AVVulkanDeviceContext *hwctx = ctx->hwctx; int graph_index, comp_index, tx_index, enc_index, dec_index; @@ -1057,237 +1156,10 @@ static int setup_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd) return 0; } -static int create_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd, - int queue_family_index, int num_queues) -{ - VkResult ret; - AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; - VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; - - VkCommandPoolCreateInfo cqueue_create = { - .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, - .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, - .queueFamilyIndex = queue_family_index, - }; - VkCommandBufferAllocateInfo cbuf_create = { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, - .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, - .commandBufferCount = num_queues, - }; - - cmd->nb_queues = num_queues; - - /* Create command pool */ - ret = vk->CreateCommandPool(hwctx->act_dev, &cqueue_create, - hwctx->alloc, &cmd->pool); - if (ret != VK_SUCCESS) { - av_log(hwfc, AV_LOG_ERROR, "Command pool creation failure: %s\n", - vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - cmd->bufs = av_mallocz(num_queues * sizeof(*cmd->bufs)); - if (!cmd->bufs) - return AVERROR(ENOMEM); - - cbuf_create.commandPool = cmd->pool; - - /* Allocate command buffer */ - ret = vk->AllocateCommandBuffers(hwctx->act_dev, &cbuf_create, cmd->bufs); - if (ret != VK_SUCCESS) { - av_log(hwfc, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", - vk_ret2str(ret)); - av_freep(&cmd->bufs); - return AVERROR_EXTERNAL; - } - - cmd->queues = av_mallocz(num_queues * sizeof(*cmd->queues)); - if (!cmd->queues) - return AVERROR(ENOMEM); - - for (int i = 0; i < num_queues; i++) { - VulkanQueueCtx *q = &cmd->queues[i]; - vk->GetDeviceQueue(hwctx->act_dev, queue_family_index, i, &q->queue); - q->was_synchronous = 1; - } - - return 0; -} - -static void free_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd) -{ - AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; - VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; - - if (cmd->queues) { - for (int i = 0; i < cmd->nb_queues; i++) { - VulkanQueueCtx *q = &cmd->queues[i]; - - /* Make sure all queues have finished executing */ - if (q->fence && !q->was_synchronous) { - vk->WaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); - vk->ResetFences(hwctx->act_dev, 1, &q->fence); - } - - /* Free the fence */ - if (q->fence) - vk->DestroyFence(hwctx->act_dev, q->fence, hwctx->alloc); - - /* Free buffer dependencies */ - for (int j = 0; j < q->nb_buf_deps; j++) - av_buffer_unref(&q->buf_deps[j]); - av_free(q->buf_deps); - } - } - - if (cmd->bufs) - vk->FreeCommandBuffers(hwctx->act_dev, cmd->pool, cmd->nb_queues, cmd->bufs); - if (cmd->pool) - vk->DestroyCommandPool(hwctx->act_dev, cmd->pool, hwctx->alloc); - - av_freep(&cmd->queues); - av_freep(&cmd->bufs); - cmd->pool = VK_NULL_HANDLE; -} - -static VkCommandBuffer get_buf_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd) -{ - return cmd->bufs[cmd->cur_queue_idx]; -} - -static void unref_exec_ctx_deps(AVHWFramesContext *hwfc, VulkanExecCtx *cmd) -{ - VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx]; - - for (int j = 0; j < q->nb_buf_deps; j++) - av_buffer_unref(&q->buf_deps[j]); - q->nb_buf_deps = 0; -} - -static int wait_start_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd) -{ - VkResult ret; - AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; - VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx]; - VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; - - VkCommandBufferBeginInfo cmd_start = { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, - }; - - /* Create the fence and don't wait for it initially */ - if (!q->fence) { - VkFenceCreateInfo fence_spawn = { - .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, - }; - ret = vk->CreateFence(hwctx->act_dev, &fence_spawn, hwctx->alloc, - &q->fence); - if (ret != VK_SUCCESS) { - av_log(hwfc, AV_LOG_ERROR, "Failed to queue frame fence: %s\n", - vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } else if (!q->was_synchronous) { - vk->WaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); - vk->ResetFences(hwctx->act_dev, 1, &q->fence); - } - - /* Discard queue dependencies */ - unref_exec_ctx_deps(hwfc, cmd); - - ret = vk->BeginCommandBuffer(cmd->bufs[cmd->cur_queue_idx], &cmd_start); - if (ret != VK_SUCCESS) { - av_log(hwfc, AV_LOG_ERROR, "Unable to init command buffer: %s\n", - vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - return 0; -} - -static int add_buf_dep_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd, - AVBufferRef * const *deps, int nb_deps) -{ - AVBufferRef **dst; - VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx]; - - if (!deps || !nb_deps) - return 0; - - dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size, - (q->nb_buf_deps + nb_deps) * sizeof(*dst)); - if (!dst) - goto err; - - q->buf_deps = dst; - - for (int i = 0; i < nb_deps; i++) { - q->buf_deps[q->nb_buf_deps] = av_buffer_ref(deps[i]); - if (!q->buf_deps[q->nb_buf_deps]) - goto err; - q->nb_buf_deps++; - } - - return 0; - -err: - unref_exec_ctx_deps(hwfc, cmd); - return AVERROR(ENOMEM); -} - -static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd, - VkSubmitInfo *s_info, AVVkFrame *f, int synchronous) -{ - VkResult ret; - VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx]; - VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; - - ret = vk->EndCommandBuffer(cmd->bufs[cmd->cur_queue_idx]); - if (ret != VK_SUCCESS) { - av_log(hwfc, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", - vk_ret2str(ret)); - unref_exec_ctx_deps(hwfc, cmd); - return AVERROR_EXTERNAL; - } - - s_info->pCommandBuffers = &cmd->bufs[cmd->cur_queue_idx]; - s_info->commandBufferCount = 1; - - ret = vk->QueueSubmit(q->queue, 1, s_info, q->fence); - if (ret != VK_SUCCESS) { - av_log(hwfc, AV_LOG_ERROR, "Queue submission failure: %s\n", - vk_ret2str(ret)); - unref_exec_ctx_deps(hwfc, cmd); - return AVERROR_EXTERNAL; - } - - if (f) - for (int i = 0; i < s_info->signalSemaphoreCount; i++) - f->sem_value[i]++; - - q->was_synchronous = synchronous; - - if (synchronous) { - AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; - vk->WaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); - vk->ResetFences(hwctx->act_dev, 1, &q->fence); - unref_exec_ctx_deps(hwfc, cmd); - } else { /* Rotate queues */ - cmd->cur_queue_idx = (cmd->cur_queue_idx + 1) % cmd->nb_queues; - } - - return 0; -} - static void vulkan_device_free(AVHWDeviceContext *ctx) { VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; AVVulkanDeviceContext *hwctx = ctx->hwctx; if (hwctx->act_dev) @@ -1303,6 +1175,12 @@ static void vulkan_device_free(AVHWDeviceContext *ctx) if (p->libvulkan) dlclose(p->libvulkan); + for (uint32_t i = 0; i < p->nb_tot_qfs; i++) { + pthread_mutex_destroy(p->qf_mutex[i]); + av_freep(&p->qf_mutex[i]); + } + av_freep(&p->qf_mutex); + RELEASE_PROPS(hwctx->enabled_inst_extensions, hwctx->nb_enabled_inst_extensions); RELEASE_PROPS(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions); } @@ -1315,7 +1193,7 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, VkResult ret; AVDictionaryEntry *opt_d; VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; AVVulkanDeviceContext *hwctx = ctx->hwctx; /* @@ -1326,9 +1204,21 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, VkPhysicalDeviceTimelineSemaphoreFeatures timeline_features = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES, }; + VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float_features = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT, + .pNext = &timeline_features, + }; + VkPhysicalDeviceDescriptorBufferFeaturesEXT desc_buf_features = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT, + .pNext = &atomic_float_features, + }; + VkPhysicalDeviceVulkan13Features dev_features_1_3 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES, + .pNext = &desc_buf_features, + }; VkPhysicalDeviceVulkan12Features dev_features_1_2 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, - .pNext = &timeline_features, + .pNext = &dev_features_1_3, }; VkPhysicalDeviceVulkan11Features dev_features_1_1 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES, @@ -1340,8 +1230,7 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, }; VkDeviceCreateInfo dev_info = { - .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, - .pNext = &hwctx->device_features, + .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, }; hwctx->device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; @@ -1349,6 +1238,14 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, p->device_features_1_1.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES; p->device_features_1_1.pNext = &p->device_features_1_2; p->device_features_1_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES; + p->device_features_1_2.pNext = &p->device_features_1_3; + p->device_features_1_3.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES; + p->device_features_1_3.pNext = &p->desc_buf_features; + p->desc_buf_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT; + p->desc_buf_features.pNext = &p->atomic_float_features; + p->atomic_float_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT; + p->atomic_float_features.pNext = NULL; + ctx->free = vulkan_device_free; /* Create an instance if not given one */ @@ -1369,6 +1266,8 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, COPY_FEATURE(hwctx->device_features, fragmentStoresAndAtomics) COPY_FEATURE(hwctx->device_features, vertexPipelineStoresAndAtomics) COPY_FEATURE(hwctx->device_features, shaderInt64) + COPY_FEATURE(hwctx->device_features, shaderInt16) + COPY_FEATURE(hwctx->device_features, shaderFloat64) #undef COPY_FEATURE /* We require timeline semaphores */ @@ -1377,7 +1276,37 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, err = AVERROR(ENOSYS); goto end; } + + p->device_features_1_1.samplerYcbcrConversion = dev_features_1_1.samplerYcbcrConversion; + p->device_features_1_1.storagePushConstant16 = dev_features_1_1.storagePushConstant16; + p->device_features_1_2.timelineSemaphore = 1; + p->device_features_1_2.bufferDeviceAddress = dev_features_1_2.bufferDeviceAddress; + p->device_features_1_2.hostQueryReset = dev_features_1_2.hostQueryReset; + p->device_features_1_2.storagePushConstant8 = dev_features_1_2.storagePushConstant8; + p->device_features_1_2.shaderInt8 = dev_features_1_2.shaderInt8; + p->device_features_1_2.storageBuffer8BitAccess = dev_features_1_2.storageBuffer8BitAccess; + p->device_features_1_2.uniformAndStorageBuffer8BitAccess = dev_features_1_2.uniformAndStorageBuffer8BitAccess; + p->device_features_1_2.shaderFloat16 = dev_features_1_2.shaderFloat16; + p->device_features_1_2.shaderSharedInt64Atomics = dev_features_1_2.shaderSharedInt64Atomics; + p->device_features_1_2.vulkanMemoryModel = dev_features_1_2.vulkanMemoryModel; + p->device_features_1_2.vulkanMemoryModelDeviceScope = dev_features_1_2.vulkanMemoryModelDeviceScope; + p->device_features_1_2.hostQueryReset = dev_features_1_2.hostQueryReset; + + p->device_features_1_3.dynamicRendering = dev_features_1_3.dynamicRendering; + p->device_features_1_3.maintenance4 = dev_features_1_3.maintenance4; + p->device_features_1_3.synchronization2 = dev_features_1_3.synchronization2; + p->device_features_1_3.computeFullSubgroups = dev_features_1_3.computeFullSubgroups; + p->device_features_1_3.shaderZeroInitializeWorkgroupMemory = dev_features_1_3.shaderZeroInitializeWorkgroupMemory; + p->device_features_1_3.dynamicRendering = dev_features_1_3.dynamicRendering; + + p->desc_buf_features.descriptorBuffer = desc_buf_features.descriptorBuffer; + p->desc_buf_features.descriptorBufferPushDescriptors = desc_buf_features.descriptorBufferPushDescriptors; + + p->atomic_float_features.shaderBufferFloat32Atomics = atomic_float_features.shaderBufferFloat32Atomics; + p->atomic_float_features.shaderBufferFloat32AtomicAdd = atomic_float_features.shaderBufferFloat32AtomicAdd; + + dev_info.pNext = &hwctx->device_features; /* Setup queue family */ if ((err = setup_queue_families(ctx, &dev_info))) @@ -1400,7 +1329,7 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); for (int i = 0; i < dev_info.enabledExtensionCount; i++) av_free((void *)dev_info.ppEnabledExtensionNames[i]); av_free((void *)dev_info.ppEnabledExtensionNames); @@ -1413,11 +1342,9 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, if (opt_d) p->use_linear_images = strtol(opt_d->value, NULL, 10); - opt_d = av_dict_get(opts, "contiguous_planes", NULL, 0); + opt_d = av_dict_get(opts, "disable_multiplane", NULL, 0); if (opt_d) - p->contiguous_planes = strtol(opt_d->value, NULL, 10); - else - p->contiguous_planes = -1; + p->disable_multiplane = strtol(opt_d->value, NULL, 10); hwctx->enabled_dev_extensions = dev_info.ppEnabledExtensionNames; hwctx->nb_enabled_dev_extensions = dev_info.enabledExtensionCount; @@ -1426,13 +1353,26 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, return err; } +static void lock_queue(AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index) +{ + VulkanDevicePriv *p = ctx->internal->priv; + pthread_mutex_lock(&p->qf_mutex[queue_family][index]); +} + +static void unlock_queue(AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index) +{ + VulkanDevicePriv *p = ctx->internal->priv; + pthread_mutex_unlock(&p->qf_mutex[queue_family][index]); +} + static int vulkan_device_init(AVHWDeviceContext *ctx) { int err; - uint32_t queue_num; + uint32_t qf_num; AVVulkanDeviceContext *hwctx = ctx->hwctx; VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + VkQueueFamilyProperties *qf; int graph_index, comp_index, tx_index, enc_index, dec_index; /* Set device extension flags */ @@ -1440,13 +1380,13 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) for (int j = 0; j < FF_ARRAY_ELEMS(optional_device_exts); j++) { if (!strcmp(hwctx->enabled_dev_extensions[i], optional_device_exts[j].name)) { - p->extensions |= optional_device_exts[j].flag; + p->vkctx.extensions |= optional_device_exts[j].flag; break; } } } - err = ff_vk_load_functions(ctx, vk, p->extensions, 1, 1); + err = ff_vk_load_functions(ctx, vk, p->vkctx.extensions, 1, 1); if (err < 0) { av_log(ctx, AV_LOG_ERROR, "Unable to load functions!\n"); return err; @@ -1464,19 +1404,45 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) p->props.properties.limits.optimalBufferCopyRowPitchAlignment); av_log(ctx, AV_LOG_VERBOSE, " minMemoryMapAlignment: %"SIZE_SPECIFIER"\n", p->props.properties.limits.minMemoryMapAlignment); - if (p->extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) + av_log(ctx, AV_LOG_VERBOSE, " nonCoherentAtomSize: %"PRIu64"\n", + p->props.properties.limits.nonCoherentAtomSize); + if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) av_log(ctx, AV_LOG_VERBOSE, " minImportedHostPointerAlignment: %"PRIu64"\n", p->hprops.minImportedHostPointerAlignment); p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de); - p->dev_is_intel = (p->props.properties.vendorID == 0x8086); - vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL); - if (!queue_num) { + vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, NULL); + if (!qf_num) { av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n"); return AVERROR_EXTERNAL; } + qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties)); + if (!qf) + return AVERROR(ENOMEM); + + vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, qf); + + p->qf_mutex = av_calloc(qf_num, sizeof(*p->qf_mutex)); + if (!p->qf_mutex) + return AVERROR(ENOMEM); + p->nb_tot_qfs = qf_num; + + for (uint32_t i = 0; i < qf_num; i++) { + p->qf_mutex[i] = av_calloc(qf[i].queueCount, sizeof(**p->qf_mutex)); + if (!p->qf_mutex[i]) + return AVERROR(ENOMEM); + for (uint32_t j = 0; j < qf[i].queueCount; j++) { + err = pthread_mutex_init(&p->qf_mutex[i][j], NULL); + if (err != 0) { + av_log(ctx, AV_LOG_ERROR, "pthread_mutex_init failed : %s\n", + av_err2str(err)); + return AVERROR(err); + } + } + } + graph_index = hwctx->queue_family_index; comp_index = hwctx->queue_family_comp_index; tx_index = hwctx->queue_family_tx_index; @@ -1491,9 +1457,9 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) return AVERROR(EINVAL); \ } else if (fidx < 0 || ctx_qf < 0) { \ break; \ - } else if (ctx_qf >= queue_num) { \ + } else if (ctx_qf >= qf_num) { \ av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \ - type, ctx_qf, queue_num); \ + type, ctx_qf, qf_num); \ return AVERROR(EINVAL); \ } \ \ @@ -1510,7 +1476,7 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) tx_index = (ctx_qf == tx_index) ? -1 : tx_index; \ enc_index = (ctx_qf == enc_index) ? -1 : enc_index; \ dec_index = (ctx_qf == dec_index) ? -1 : dec_index; \ - p->qfs[p->num_qfs++] = ctx_qf; \ + p->img_qfs[p->nb_img_qfs++] = ctx_qf; \ } while (0) CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index, hwctx->nb_graphics_queues); @@ -1521,9 +1487,21 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) #undef CHECK_QUEUE + if (!hwctx->lock_queue) + hwctx->lock_queue = lock_queue; + if (!hwctx->unlock_queue) + hwctx->unlock_queue = unlock_queue; + /* Get device capabilities */ vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops); + p->vkctx.device = ctx; + p->vkctx.hwctx = hwctx; + + ff_vk_load_props(&p->vkctx); + ff_vk_qf_init(&p->vkctx, &p->compute_qf, VK_QUEUE_COMPUTE_BIT); + ff_vk_qf_init(&p->vkctx, &p->transfer_qf, VK_QUEUE_TRANSFER_BIT); + return 0; } @@ -1553,7 +1531,6 @@ static int vulkan_device_derive(AVHWDeviceContext *ctx, * by the following checks (e.g. non-PCIe ARM GPU), having an empty * dev_select will mean it'll get picked. */ switch(src_ctx->type) { -#if CONFIG_LIBDRM #if CONFIG_VAAPI case AV_HWDEVICE_TYPE_VAAPI: { AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx; @@ -1564,21 +1541,34 @@ static int vulkan_device_derive(AVHWDeviceContext *ctx, return AVERROR_EXTERNAL; } - if (strstr(vendor, "Intel")) - dev_select.vendor_id = 0x8086; if (strstr(vendor, "AMD")) dev_select.vendor_id = 0x1002; return vulkan_device_create_internal(ctx, &dev_select, opts, flags); } #endif +#if CONFIG_LIBDRM case AV_HWDEVICE_TYPE_DRM: { + int err; + struct stat drm_node_info; + drmDevice *drm_dev_info; AVDRMDeviceContext *src_hwctx = src_ctx->hwctx; - drmDevice *drm_dev_info; - int err = drmGetDevice(src_hwctx->fd, &drm_dev_info); + err = fstat(src_hwctx->fd, &drm_node_info); + if (err) { + av_log(ctx, AV_LOG_ERROR, "Unable to get node info from DRM fd: %s!\n", + av_err2str(AVERROR(errno))); + return AVERROR_EXTERNAL; + } + + dev_select.drm_major = major(drm_node_info.st_dev); + dev_select.drm_minor = minor(drm_node_info.st_dev); + dev_select.has_drm = 1; + + err = drmGetDevice(src_hwctx->fd, &drm_dev_info); if (err) { - av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd!\n"); + av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd: %s!\n", + av_err2str(AVERROR(errno))); return AVERROR_EXTERNAL; } @@ -1621,8 +1611,12 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx, int count = 0; VulkanDevicePriv *p = ctx->internal->priv; - for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++) - count += pixfmt_is_supported(ctx, i, p->use_linear_images); + for (enum AVPixelFormat i = 0; i < nb_vk_formats_list; i++) { + count += vkfmt_from_pixfmt2(ctx, vk_formats_list[i].pixfmt, + p->use_linear_images ? VK_IMAGE_TILING_LINEAR : + VK_IMAGE_TILING_OPTIMAL, + NULL, NULL, NULL, NULL, 0) >= 0; + } #if CONFIG_CUDA if (p->dev_is_nvidia) @@ -1635,9 +1629,14 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx, return AVERROR(ENOMEM); count = 0; - for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++) - if (pixfmt_is_supported(ctx, i, p->use_linear_images)) - constraints->valid_sw_formats[count++] = i; + for (enum AVPixelFormat i = 0; i < nb_vk_formats_list; i++) { + if (vkfmt_from_pixfmt2(ctx, vk_formats_list[i].pixfmt, + p->use_linear_images ? VK_IMAGE_TILING_LINEAR : + VK_IMAGE_TILING_OPTIMAL, + NULL, NULL, NULL, NULL, 0) >= 0) { + constraints->valid_sw_formats[count++] = vk_formats_list[i].pixfmt; + } + } #if CONFIG_CUDA if (p->dev_is_nvidia) @@ -1645,8 +1644,8 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx, #endif constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE; - constraints->min_width = 0; - constraints->min_height = 0; + constraints->min_width = 1; + constraints->min_height = 1; constraints->max_width = p->props.properties.limits.maxImageDimension2D; constraints->max_height = p->props.properties.limits.maxImageDimension2D; @@ -1667,7 +1666,7 @@ static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req, VkResult ret; int index = -1; VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; AVVulkanDeviceContext *dev_hwctx = ctx->hwctx; VkMemoryAllocateInfo alloc_info = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, @@ -1709,7 +1708,7 @@ static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req, dev_hwctx->alloc, mem); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); return AVERROR(ENOMEM); } @@ -1720,10 +1719,7 @@ static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req, static void vulkan_free_internal(AVVkFrame *f) { - AVVkFrameInternal *internal = f->internal; - - if (!internal) - return; + av_unused AVVkFrameInternal *internal = f->internal; #if CONFIG_CUDA if (internal->cuda_fc_ref) { @@ -1753,6 +1749,7 @@ static void vulkan_free_internal(AVVkFrame *f) } #endif + pthread_mutex_destroy(&internal->update_mutex); av_freep(&f->internal); } @@ -1762,18 +1759,23 @@ static void vulkan_frame_free(void *opaque, uint8_t *data) AVHWFramesContext *hwfc = opaque; AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; - int planes = av_pix_fmt_count_planes(hwfc->sw_format); + FFVulkanFunctions *vk = &p->vkctx.vkfn; + int nb_images = ff_vk_count_images(f); + + VkSemaphoreWaitInfo sem_wait = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, + .pSemaphores = f->sem, + .pValues = f->sem_value, + .semaphoreCount = nb_images, + }; - /* We could use vkWaitSemaphores, but the validation layer seems to have - * issues tracking command buffer execution state on uninit. */ - vk->DeviceWaitIdle(hwctx->act_dev); + vk->WaitSemaphores(hwctx->act_dev, &sem_wait, UINT64_MAX); vulkan_free_internal(f); - for (int i = 0; i < planes; i++) { - vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); - vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); + for (int i = 0; i < nb_images; i++) { + vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); + vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc); } @@ -1783,30 +1785,24 @@ static void vulkan_frame_free(void *opaque, uint8_t *data) static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f, void *alloc_pnext, size_t alloc_pnext_stride) { - int err; + int img_cnt = 0, err; VkResult ret; AVHWDeviceContext *ctx = hwfc->device_ctx; VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; - AVVulkanFramesContext *hwfctx = hwfc->hwctx; - const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + FFVulkanFunctions *vk = &p->vkctx.vkfn; VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } }; - VkMemoryRequirements cont_memory_requirements = { 0 }; - int cont_mem_size_list[AV_NUM_DATA_POINTERS] = { 0 }; - int cont_mem_size = 0; - AVVulkanDeviceContext *hwctx = ctx->hwctx; - for (int i = 0; i < planes; i++) { + while (f->img[img_cnt]) { int use_ded_mem; VkImageMemoryRequirementsInfo2 req_desc = { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2, - .image = f->img[i], + .image = f->img[img_cnt], }; VkMemoryDedicatedAllocateInfo ded_alloc = { .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, - .pNext = (void *)(((uint8_t *)alloc_pnext) + i*alloc_pnext_stride), + .pNext = (void *)(((uint8_t *)alloc_pnext) + img_cnt*alloc_pnext_stride), }; VkMemoryDedicatedRequirements ded_req = { .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, @@ -1822,32 +1818,11 @@ static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f, req.memoryRequirements.size = FFALIGN(req.memoryRequirements.size, p->props.properties.limits.minMemoryMapAlignment); - if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY) { - if (ded_req.requiresDedicatedAllocation) { - av_log(hwfc, AV_LOG_ERROR, "Cannot allocate all planes in a single allocation, " - "device requires dedicated image allocation!\n"); - return AVERROR(EINVAL); - } else if (!i) { - cont_memory_requirements = req.memoryRequirements; - } else if (cont_memory_requirements.memoryTypeBits != - req.memoryRequirements.memoryTypeBits) { - av_log(hwfc, AV_LOG_ERROR, "The memory requirements differ between plane 0 " - "and %i, cannot allocate in a single region!\n", - i); - return AVERROR(EINVAL); - } - - cont_mem_size_list[i] = FFALIGN(req.memoryRequirements.size, - req.memoryRequirements.alignment); - cont_mem_size += cont_mem_size_list[i]; - continue; - } - /* In case the implementation prefers/requires dedicated allocation */ use_ded_mem = ded_req.prefersDedicatedAllocation | ded_req.requiresDedicatedAllocation; if (use_ded_mem) - ded_alloc.image = f->img[i]; + ded_alloc.image = f->img[img_cnt]; /* Allocate memory */ if ((err = alloc_mem(ctx, &req.memoryRequirements, @@ -1855,45 +1830,22 @@ static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext, - &f->flags, &f->mem[i]))) - return err; - - f->size[i] = req.memoryRequirements.size; - bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO; - bind_info[i].image = f->img[i]; - bind_info[i].memory = f->mem[i]; - } - - if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY) { - cont_memory_requirements.size = cont_mem_size; - - /* Allocate memory */ - if ((err = alloc_mem(ctx, &cont_memory_requirements, - f->tiling == VK_IMAGE_TILING_LINEAR ? - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT : - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - (void *)(((uint8_t *)alloc_pnext)), - &f->flags, &f->mem[0]))) + &f->flags, &f->mem[img_cnt]))) return err; - f->size[0] = cont_memory_requirements.size; - - for (int i = 0, offset = 0; i < planes; i++) { - bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO; - bind_info[i].image = f->img[i]; - bind_info[i].memory = f->mem[0]; - bind_info[i].memoryOffset = offset; + f->size[img_cnt] = req.memoryRequirements.size; + bind_info[img_cnt].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO; + bind_info[img_cnt].image = f->img[img_cnt]; + bind_info[img_cnt].memory = f->mem[img_cnt]; - f->offset[i] = bind_info[i].memoryOffset; - offset += cont_mem_size_list[i]; - } + img_cnt++; } /* Bind the allocated memory to the images */ - ret = vk->BindImageMemory2(hwctx->act_dev, planes, bind_info); + ret = vk->BindImageMemory2(hwctx->act_dev, img_cnt, bind_info); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } @@ -1903,105 +1855,99 @@ static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f, enum PrepMode { PREP_MODE_WRITE, PREP_MODE_EXTERNAL_EXPORT, - PREP_MODE_EXTERNAL_IMPORT + PREP_MODE_EXTERNAL_IMPORT, + PREP_MODE_DECODING_DST, + PREP_MODE_DECODING_DPB, + PREP_MODE_ENCODING_DPB, }; -static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, +static int prepare_frame(AVHWFramesContext *hwfc, FFVkExecPool *ectx, AVVkFrame *frame, enum PrepMode pmode) { int err; - uint32_t src_qf, dst_qf; - VkImageLayout new_layout; - VkAccessFlags new_access; - const int planes = av_pix_fmt_count_planes(hwfc->sw_format); VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; - uint64_t sem_sig_val[AV_NUM_DATA_POINTERS]; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS]; + int nb_img_bar = 0; - VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 }; - - VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = { - .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, - .pSignalSemaphoreValues = sem_sig_val, - .signalSemaphoreValueCount = planes, + uint32_t dst_qf = VK_QUEUE_FAMILY_IGNORED; + VkImageLayout new_layout; + VkAccessFlags2 new_access; + VkPipelineStageFlagBits2 src_stage = VK_PIPELINE_STAGE_2_NONE; + + /* This is dirty - but it works. The vulkan.c dependency system doesn't + * free non-refcounted frames, and non-refcounted hardware frames cannot + * happen anywhere outside of here. */ + AVBufferRef tmp_ref = { + .data = (uint8_t *)hwfc, }; - - VkSubmitInfo s_info = { - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .pNext = &s_timeline_sem_info, - .pSignalSemaphores = frame->sem, - .signalSemaphoreCount = planes, + AVFrame tmp_frame = { + .data[0] = (uint8_t *)frame, + .hw_frames_ctx = &tmp_ref, }; - VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS]; - for (int i = 0; i < planes; i++) { - wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; - sem_sig_val[i] = frame->sem_value[i] + 1; - } + VkCommandBuffer cmd_buf; + FFVkExecContext *exec = ff_vk_exec_get(ectx); + cmd_buf = exec->buf; + ff_vk_exec_start(&p->vkctx, exec); + + err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, &tmp_frame, + VK_PIPELINE_STAGE_2_NONE, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT); + if (err < 0) + return err; switch (pmode) { case PREP_MODE_WRITE: new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; new_access = VK_ACCESS_TRANSFER_WRITE_BIT; - src_qf = VK_QUEUE_FAMILY_IGNORED; - dst_qf = VK_QUEUE_FAMILY_IGNORED; break; case PREP_MODE_EXTERNAL_IMPORT: new_layout = VK_IMAGE_LAYOUT_GENERAL; new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT; - src_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR; - dst_qf = VK_QUEUE_FAMILY_IGNORED; - s_timeline_sem_info.pWaitSemaphoreValues = frame->sem_value; - s_timeline_sem_info.waitSemaphoreValueCount = planes; - s_info.pWaitSemaphores = frame->sem; - s_info.pWaitDstStageMask = wait_st; - s_info.waitSemaphoreCount = planes; break; case PREP_MODE_EXTERNAL_EXPORT: new_layout = VK_IMAGE_LAYOUT_GENERAL; new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT; - src_qf = VK_QUEUE_FAMILY_IGNORED; dst_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR; - s_timeline_sem_info.pWaitSemaphoreValues = frame->sem_value; - s_timeline_sem_info.waitSemaphoreValueCount = planes; - s_info.pWaitSemaphores = frame->sem; - s_info.pWaitDstStageMask = wait_st; - s_info.waitSemaphoreCount = planes; + src_stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; + break; + case PREP_MODE_DECODING_DST: + new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR; + new_access = VK_ACCESS_TRANSFER_WRITE_BIT; + break; + case PREP_MODE_DECODING_DPB: + new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR; + new_access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; + break; + case PREP_MODE_ENCODING_DPB: + new_layout = VK_IMAGE_LAYOUT_VIDEO_ENCODE_DPB_KHR; + new_access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; break; } - if ((err = wait_start_exec_ctx(hwfc, ectx))) - return err; + ff_vk_frame_barrier(&p->vkctx, exec, &tmp_frame, img_bar, &nb_img_bar, + src_stage, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + new_access, new_layout, dst_qf); - /* Change the image layout to something more optimal for writes. - * This also signals the newly created semaphore, making it usable - * for synchronization */ - for (int i = 0; i < planes; i++) { - img_bar[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - img_bar[i].srcAccessMask = 0x0; - img_bar[i].dstAccessMask = new_access; - img_bar[i].oldLayout = frame->layout[i]; - img_bar[i].newLayout = new_layout; - img_bar[i].srcQueueFamilyIndex = src_qf; - img_bar[i].dstQueueFamilyIndex = dst_qf; - img_bar[i].image = frame->img[i]; - img_bar[i].subresourceRange.levelCount = 1; - img_bar[i].subresourceRange.layerCount = 1; - img_bar[i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - - frame->layout[i] = img_bar[i].newLayout; - frame->access[i] = img_bar[i].dstAccessMask; - } + vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + err = ff_vk_exec_submit(&p->vkctx, exec); + if (err < 0) + return err; - vk->CmdPipelineBarrier(get_buf_exec_ctx(hwfc, ectx), - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, 0, NULL, 0, NULL, planes, img_bar); + /* We can do this because there are no real dependencies */ + ff_vk_exec_discard_deps(&p->vkctx, exec); - return submit_exec_ctx(hwfc, ectx, &s_info, frame, 0); + return 0; } -static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format, +static inline void get_plane_wh(uint32_t *w, uint32_t *h, enum AVPixelFormat format, int frame_w, int frame_h, int plane) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format); @@ -2020,17 +1966,16 @@ static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format, static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, VkImageTiling tiling, VkImageUsageFlagBits usage, + VkImageCreateFlags flags, int nb_layers, void *create_pnext) { int err; VkResult ret; + AVVulkanFramesContext *hwfc_vk = hwfc->hwctx; AVHWDeviceContext *ctx = hwfc->device_ctx; VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; AVVulkanDeviceContext *hwctx = ctx->hwctx; - enum AVPixelFormat format = hwfc->sw_format; - const VkFormat *img_fmts = av_vkfmt_from_pixfmt(format); - const int planes = av_pix_fmt_count_planes(format); VkExportSemaphoreCreateInfo ext_sem_info = { .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO, @@ -2046,9 +1991,9 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, VkSemaphoreTypeCreateInfo sem_type_info = { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, #ifdef _WIN32 - .pNext = p->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM ? &ext_sem_info : NULL, + .pNext = p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM ? &ext_sem_info : NULL, #else - .pNext = p->extensions & FF_VK_EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL, + .pNext = p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL, #endif .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE, .initialValue = 0, @@ -2065,35 +2010,37 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, return AVERROR(ENOMEM); } + // TODO: check witdh and height for alignment in case of multiplanar (must be mod-2 if subsampled) + /* Create the images */ - for (int i = 0; i < planes; i++) { + for (int i = 0; (hwfc_vk->format[i] != VK_FORMAT_UNDEFINED); i++) { VkImageCreateInfo create_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .pNext = create_pnext, .imageType = VK_IMAGE_TYPE_2D, - .format = img_fmts[i], + .format = hwfc_vk->format[i], .extent.depth = 1, .mipLevels = 1, - .arrayLayers = 1, - .flags = VK_IMAGE_CREATE_ALIAS_BIT, + .arrayLayers = nb_layers, + .flags = flags, .tiling = tiling, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, .usage = usage, .samples = VK_SAMPLE_COUNT_1_BIT, - .pQueueFamilyIndices = p->qfs, - .queueFamilyIndexCount = p->num_qfs, - .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : - VK_SHARING_MODE_EXCLUSIVE, + .pQueueFamilyIndices = p->img_qfs, + .queueFamilyIndexCount = p->nb_img_qfs, + .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : + VK_SHARING_MODE_EXCLUSIVE, }; get_plane_wh(&create_info.extent.width, &create_info.extent.height, - format, hwfc->width, hwfc->height, i); + hwfc->sw_format, hwfc->width, hwfc->height, i); ret = vk->CreateImage(hwctx->act_dev, &create_info, hwctx->alloc, &f->img[i]); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); err = AVERROR(EINVAL); goto fail; } @@ -2103,10 +2050,11 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, hwctx->alloc, &f->sem[i]); if (ret != VK_SUCCESS) { av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } + f->queue_family[i] = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0]; f->layout[i] = create_info.initialLayout; f->access[i] = 0x0; f->sem_value[i] = 0; @@ -2133,11 +2081,11 @@ static void try_export_flags(AVHWFramesContext *hwfc, AVVulkanFramesContext *hwctx = hwfc->hwctx; AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; const VkImageDrmFormatModifierListCreateInfoEXT *drm_mod_info = - vk_find_struct(hwctx->create_pnext, - VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT); + ff_vk_find_struct(hwctx->create_pnext, + VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT); int has_mods = hwctx->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && drm_mod_info; int nb_mods; @@ -2151,10 +2099,10 @@ static void try_export_flags(AVHWFramesContext *hwfc, VkPhysicalDeviceImageDrmFormatModifierInfoEXT phy_dev_mod_info = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT, .pNext = NULL, - .pQueueFamilyIndices = p->qfs, - .queueFamilyIndexCount = p->num_qfs, - .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : - VK_SHARING_MODE_EXCLUSIVE, + .pQueueFamilyIndices = p->img_qfs, + .queueFamilyIndexCount = p->nb_img_qfs, + .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : + VK_SHARING_MODE_EXCLUSIVE, }; VkPhysicalDeviceExternalImageFormatInfo enext = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO, @@ -2195,8 +2143,8 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size) AVVulkanFramesContext *hwctx = hwfc->hwctx; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; VulkanFramesPriv *fp = hwfc->internal->priv; - VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS]; VkExternalMemoryHandleTypeFlags e = 0x0; + VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS]; VkExternalMemoryImageCreateInfo eiinfo = { .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, @@ -2204,18 +2152,14 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size) }; #ifdef _WIN32 - if (p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) + if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) try_export_flags(hwfc, &eiinfo.handleTypes, &e, IsWindows8OrGreater() ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT); #else - if (p->extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) + if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) try_export_flags(hwfc, &eiinfo.handleTypes, &e, VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT); - - if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS)) - try_export_flags(hwfc, &eiinfo.handleTypes, &e, - VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); #endif for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) { @@ -2224,8 +2168,8 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size) eminfo[i].handleTypes = e; } - err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, - eiinfo.handleTypes ? &eiinfo : NULL); + err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->img_flags, + hwctx->nb_layers, eiinfo.handleTypes ? &eiinfo : NULL); if (err) return NULL; @@ -2233,7 +2177,15 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size) if (err) goto fail; - err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_WRITE); + if ( (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) && + !(hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)) + err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_DECODING_DPB); + else if (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR) + err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_DECODING_DST); + else if (hwctx->usage & VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR) + err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_ENCODING_DPB); + else + err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_WRITE); if (err) goto fail; @@ -2249,8 +2201,19 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size) return NULL; } +static void lock_frame(AVHWFramesContext *fc, AVVkFrame *vkf) +{ + pthread_mutex_lock(&vkf->internal->update_mutex); +} + +static void unlock_frame(AVHWFramesContext *fc, AVVkFrame *vkf) +{ + pthread_mutex_unlock(&vkf->internal->update_mutex); +} + static void vulkan_frames_uninit(AVHWFramesContext *hwfc) { + VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; VulkanFramesPriv *fp = hwfc->internal->priv; if (fp->modifier_info) { @@ -2259,9 +2222,9 @@ static void vulkan_frames_uninit(AVHWFramesContext *hwfc) av_freep(&fp->modifier_info); } - free_exec_ctx(hwfc, &fp->conv_ctx); - free_exec_ctx(hwfc, &fp->upload_ctx); - free_exec_ctx(hwfc, &fp->download_ctx); + ff_vk_exec_pool_free(&p->vkctx, &fp->compute_exec); + ff_vk_exec_pool_free(&p->vkctx, &fp->upload_exec); + ff_vk_exec_pool_free(&p->vkctx, &fp->download_exec); } static int vulkan_frames_init(AVHWFramesContext *hwfc) @@ -2270,132 +2233,108 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc) AVVkFrame *f; AVVulkanFramesContext *hwctx = hwfc->hwctx; VulkanFramesPriv *fp = hwfc->internal->priv; - AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - const VkImageDrmFormatModifierListCreateInfoEXT *modifier_info; - const int has_modifiers = !!(p->extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS); - - /* Default tiling flags */ - hwctx->tiling = hwctx->tiling ? hwctx->tiling : - has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT : - p->use_linear_images ? VK_IMAGE_TILING_LINEAR : - VK_IMAGE_TILING_OPTIMAL; - - if (!hwctx->usage) - hwctx->usage = FF_VK_DEFAULT_USAGE_FLAGS; - - if (!(hwctx->flags & AV_VK_FRAME_FLAG_NONE)) { - if (p->contiguous_planes == 1 || - ((p->contiguous_planes == -1) && p->dev_is_intel)) - hwctx->flags |= AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY; - } - - modifier_info = vk_find_struct(hwctx->create_pnext, - VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT); - - /* Get the supported modifiers if the user has not given any. */ - if (has_modifiers && !modifier_info) { - const VkFormat *fmt = av_vkfmt_from_pixfmt(hwfc->sw_format); - VkImageDrmFormatModifierListCreateInfoEXT *modifier_info; - FFVulkanFunctions *vk = &p->vkfn; - VkDrmFormatModifierPropertiesEXT *mod_props; - uint64_t *modifiers; - int modifier_count = 0; - - VkDrmFormatModifierPropertiesListEXT mod_props_list = { - .sType = VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT, - .pNext = NULL, - .drmFormatModifierCount = 0, - .pDrmFormatModifierProperties = NULL, - }; - VkFormatProperties2 prop = { - .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, - .pNext = &mod_props_list, - }; - - /* Get all supported modifiers */ - vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->phys_dev, fmt[0], &prop); - - if (!mod_props_list.drmFormatModifierCount) { - av_log(hwfc, AV_LOG_ERROR, "There are no supported modifiers for the given sw_format\n"); - return AVERROR(EINVAL); - } - - /* Createa structure to hold the modifier list info */ - modifier_info = av_mallocz(sizeof(*modifier_info)); - if (!modifier_info) - return AVERROR(ENOMEM); - - modifier_info->pNext = NULL; - modifier_info->sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT; - - /* Add structure to the image creation pNext chain */ - if (!hwctx->create_pnext) - hwctx->create_pnext = modifier_info; - else - vk_link_struct(hwctx->create_pnext, (void *)modifier_info); - - /* Backup the allocated struct to be freed later */ - fp->modifier_info = modifier_info; - - /* Allocate list of modifiers */ - modifiers = av_mallocz(mod_props_list.drmFormatModifierCount * - sizeof(*modifiers)); - if (!modifiers) - return AVERROR(ENOMEM); - - modifier_info->pDrmFormatModifiers = modifiers; + VkImageUsageFlagBits supported_usage; + const struct FFVkFormatEntry *fmt; + int disable_multiplane = p->disable_multiplane || + (hwctx->flags & AV_VK_FRAME_FLAG_DISABLE_MULTIPLANE); - /* Allocate a temporary list to hold all modifiers supported */ - mod_props = av_mallocz(mod_props_list.drmFormatModifierCount * - sizeof(*mod_props)); - if (!mod_props) - return AVERROR(ENOMEM); + /* Defaults */ + if (!hwctx->nb_layers) + hwctx->nb_layers = 1; - mod_props_list.pDrmFormatModifierProperties = mod_props; + /* VK_IMAGE_TILING_OPTIMAL == 0, can't check for it really */ + if (p->use_linear_images && + (hwctx->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)) + hwctx->tiling = VK_IMAGE_TILING_LINEAR; - /* Finally get all modifiers from the device */ - vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->phys_dev, fmt[0], &prop); - /* Reject any modifiers that don't match our requirements */ - for (int i = 0; i < mod_props_list.drmFormatModifierCount; i++) { - if (!(mod_props[i].drmFormatModifierTilingFeatures & hwctx->usage)) - continue; + fmt = vk_find_format_entry(hwfc->sw_format); + if (!fmt) { + av_log(hwfc, AV_LOG_ERROR, "Unsupported pixel format: %s!\n", + av_get_pix_fmt_name(hwfc->sw_format)); + return AVERROR(EINVAL); + } - modifiers[modifier_count++] = mod_props[i].drmFormatModifier; + if (hwctx->format[0] != VK_FORMAT_UNDEFINED) { + if (hwctx->format[0] != fmt->vkf) { + for (int i = 0; i < fmt->nb_images_fallback; i++) { + if (hwctx->format[i] != fmt->fallback[i]) { + av_log(hwfc, AV_LOG_ERROR, "Incompatible Vulkan format given " + "for the current sw_format %s!\n", + av_get_pix_fmt_name(hwfc->sw_format)); + return AVERROR(EINVAL); + } + } } - if (!modifier_count) { - av_log(hwfc, AV_LOG_ERROR, "None of the given modifiers supports" - " the usage flags!\n"); - av_freep(&mod_props); + /* Check if the sw_format itself is supported */ + err = vkfmt_from_pixfmt2(hwfc->device_ctx, hwfc->sw_format, + hwctx->tiling, NULL, + NULL, NULL, &supported_usage, 0); + if (err < 0) { + av_log(hwfc, AV_LOG_ERROR, "Unsupported sw format: %s!\n", + av_get_pix_fmt_name(hwfc->sw_format)); return AVERROR(EINVAL); } + } else { + err = vkfmt_from_pixfmt2(hwfc->device_ctx, hwfc->sw_format, + hwctx->tiling, hwctx->format, NULL, + NULL, &supported_usage, + disable_multiplane); + if (err < 0) + return err; + } - modifier_info->drmFormatModifierCount = modifier_count; - av_freep(&mod_props); + /* Image usage flags */ + if (!hwctx->usage) { + hwctx->usage = supported_usage & (VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR); + } + + /* Image creation flags. + * Only fill them in automatically if the image is not going to be used as + * a DPB-only image, and we have SAMPLED/STORAGE bits set. */ + if (!hwctx->img_flags) { + int is_lone_dpb = (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) && + !(hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR); + int sampleable = hwctx->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT); + if (sampleable && !is_lone_dpb) { + hwctx->img_flags = VK_IMAGE_CREATE_ALIAS_BIT; + if ((fmt->vk_planes > 1) && (hwctx->format[0] == fmt->vkf)) + hwctx->img_flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT | + VK_IMAGE_CREATE_EXTENDED_USAGE_BIT; + } } - err = create_exec_ctx(hwfc, &fp->conv_ctx, - dev_hwctx->queue_family_comp_index, - dev_hwctx->nb_comp_queues); + if (!hwctx->lock_frame) + hwctx->lock_frame = lock_frame; + + if (!hwctx->unlock_frame) + hwctx->unlock_frame = unlock_frame; + + err = ff_vk_exec_pool_init(&p->vkctx, &p->compute_qf, &fp->compute_exec, + p->compute_qf.nb_queues*4, 0, 0, 0, NULL); if (err) return err; - err = create_exec_ctx(hwfc, &fp->upload_ctx, - dev_hwctx->queue_family_tx_index, - dev_hwctx->nb_tx_queues); + err = ff_vk_exec_pool_init(&p->vkctx, &p->transfer_qf, &fp->upload_exec, + p->transfer_qf.nb_queues*4, 0, 0, 0, NULL); if (err) return err; - err = create_exec_ctx(hwfc, &fp->download_ctx, - dev_hwctx->queue_family_tx_index, 1); + err = ff_vk_exec_pool_init(&p->vkctx, &p->transfer_qf, &fp->download_exec, + p->transfer_qf.nb_queues*4, 0, 0, 0, NULL); if (err) return err; /* Test to see if allocation will fail */ - err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, - hwctx->create_pnext); + err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->img_flags, + hwctx->nb_layers, hwctx->create_pnext); if (err) return err; @@ -2443,168 +2382,30 @@ static int vulkan_transfer_get_formats(AVHWFramesContext *hwfc, return 0; } -typedef struct VulkanMapping { - AVVkFrame *frame; - int flags; -} VulkanMapping; - -static void vulkan_unmap_frame(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap) -{ - VulkanMapping *map = hwmap->priv; - AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; - const int planes = av_pix_fmt_count_planes(hwfc->sw_format); - VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; - - /* Check if buffer needs flushing */ - if ((map->flags & AV_HWFRAME_MAP_WRITE) && - !(map->frame->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { - VkResult ret; - VkMappedMemoryRange flush_ranges[AV_NUM_DATA_POINTERS] = { { 0 } }; - - for (int i = 0; i < planes; i++) { - flush_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - flush_ranges[i].memory = map->frame->mem[i]; - flush_ranges[i].size = VK_WHOLE_SIZE; - } - - ret = vk->FlushMappedMemoryRanges(hwctx->act_dev, planes, - flush_ranges); - if (ret != VK_SUCCESS) { - av_log(hwfc, AV_LOG_ERROR, "Failed to flush memory: %s\n", - vk_ret2str(ret)); - } - } - - for (int i = 0; i < planes; i++) - vk->UnmapMemory(hwctx->act_dev, map->frame->mem[i]); - - av_free(map); -} - -static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst, - const AVFrame *src, int flags) -{ - VkResult ret; - int err, mapped_mem_count = 0, mem_planes = 0; - AVVkFrame *f = (AVVkFrame *)src->data[0]; - AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; - AVVulkanFramesContext *hwfctx = hwfc->hwctx; - const int planes = av_pix_fmt_count_planes(hwfc->sw_format); - VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; - - VulkanMapping *map = av_mallocz(sizeof(VulkanMapping)); - if (!map) - return AVERROR(EINVAL); - - if (src->format != AV_PIX_FMT_VULKAN) { - av_log(hwfc, AV_LOG_ERROR, "Cannot map from pixel format %s!\n", - av_get_pix_fmt_name(src->format)); - err = AVERROR(EINVAL); - goto fail; - } - - if (!(f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) || - !(f->tiling == VK_IMAGE_TILING_LINEAR)) { - av_log(hwfc, AV_LOG_ERROR, "Unable to map frame, not host visible " - "and linear!\n"); - err = AVERROR(EINVAL); - goto fail; - } - - dst->width = src->width; - dst->height = src->height; - - mem_planes = hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY ? 1 : planes; - for (int i = 0; i < mem_planes; i++) { - ret = vk->MapMemory(hwctx->act_dev, f->mem[i], 0, - VK_WHOLE_SIZE, 0, (void **)&dst->data[i]); - if (ret != VK_SUCCESS) { - av_log(hwfc, AV_LOG_ERROR, "Failed to map image memory: %s\n", - vk_ret2str(ret)); - err = AVERROR_EXTERNAL; - goto fail; - } - mapped_mem_count++; - } - - if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY) { - for (int i = 0; i < planes; i++) - dst->data[i] = dst->data[0] + f->offset[i]; - } - - /* Check if the memory contents matter */ - if (((flags & AV_HWFRAME_MAP_READ) || !(flags & AV_HWFRAME_MAP_OVERWRITE)) && - !(f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { - VkMappedMemoryRange map_mem_ranges[AV_NUM_DATA_POINTERS] = { { 0 } }; - for (int i = 0; i < planes; i++) { - map_mem_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - map_mem_ranges[i].size = VK_WHOLE_SIZE; - map_mem_ranges[i].memory = f->mem[i]; - } - - ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, planes, - map_mem_ranges); - if (ret != VK_SUCCESS) { - av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate memory: %s\n", - vk_ret2str(ret)); - err = AVERROR_EXTERNAL; - goto fail; - } - } - - for (int i = 0; i < planes; i++) { - VkImageSubresource sub = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - }; - VkSubresourceLayout layout; - vk->GetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout); - dst->linesize[i] = layout.rowPitch; - } - - map->frame = f; - map->flags = flags; - - err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, - &vulkan_unmap_frame, map); - if (err < 0) - goto fail; - - return 0; - -fail: - for (int i = 0; i < mapped_mem_count; i++) - vk->UnmapMemory(hwctx->act_dev, f->mem[i]); - - av_free(map); - return err; -} - #if CONFIG_LIBDRM static void vulkan_unmap_from_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap) { AVVkFrame *f = hwmap->priv; AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; - const int planes = av_pix_fmt_count_planes(hwfc->sw_format); VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + const int nb_images = ff_vk_count_images(f); VkSemaphoreWaitInfo wait_info = { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, .flags = 0x0, .pSemaphores = f->sem, .pValues = f->sem_value, - .semaphoreCount = planes, + .semaphoreCount = nb_images, }; vk->WaitSemaphores(hwctx->act_dev, &wait_info, UINT64_MAX); vulkan_free_internal(f); - for (int i = 0; i < planes; i++) { - vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); - vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); + for (int i = 0; i < nb_images; i++) { + vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); + vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc); } @@ -2654,7 +2455,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f AVHWDeviceContext *ctx = hwfc->device_ctx; AVVulkanDeviceContext *hwctx = ctx->hwctx; VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; VulkanFramesPriv *fp = hwfc->internal->priv; const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0]; VkBindImageMemoryInfo bind_info[AV_DRM_MAX_PLANES]; @@ -2711,16 +2512,16 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f .extent.depth = 1, .mipLevels = 1, .arrayLayers = 1, - .flags = 0x0, /* ALIAS flag is implicit for imported images */ - .tiling = f->tiling, + .flags = 0x0, + .tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */ .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, .samples = VK_SAMPLE_COUNT_1_BIT, - .pQueueFamilyIndices = p->qfs, - .queueFamilyIndexCount = p->num_qfs, - .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : - VK_SHARING_MODE_EXCLUSIVE, + .pQueueFamilyIndices = p->img_qfs, + .queueFamilyIndexCount = p->nb_img_qfs, + .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : + VK_SHARING_MODE_EXCLUSIVE, }; /* Image format verification */ @@ -2758,7 +2559,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f &fmt_props, &props_ret); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Cannot map DRM frame to Vulkan: %s\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); err = AVERROR_EXTERNAL; goto fail; } @@ -2781,7 +2582,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f hwctx->alloc, &f->img[i]); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); err = AVERROR(EINVAL); goto fail; } @@ -2790,7 +2591,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f hwctx->alloc, &f->sem[i]); if (ret != VK_SUCCESS) { av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } @@ -2799,12 +2600,13 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f * offer us anything we could import and sync with, so instead * just signal the semaphore we created. */ + f->queue_family[i] = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0]; f->layout[i] = create_info.initialLayout; f->access[i] = 0x0; f->sem_value[i] = 0; } - for (int i = 0; i < desc->nb_objects; i++) { + for (int i = 0; i < desc->nb_layers; i++) { /* Memory requirements */ VkImageMemoryRequirementsInfo2 req_desc = { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2, @@ -2822,9 +2624,13 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f VkMemoryFdPropertiesKHR fdmp = { .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR, }; + /* This assumes that a layer will never be constructed from multiple + * objects. If that was to happen in the real world, this code would + * need to import each plane separately. + */ VkImportMemoryFdInfoKHR idesc = { .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR, - .fd = dup(desc->objects[i].fd), + .fd = dup(desc->objects[desc->layers[i].planes[0].object_index].fd), .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, }; VkMemoryDedicatedAllocateInfo ded_alloc = { @@ -2839,7 +2645,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f idesc.fd, &fdmp); if (ret != VK_SUCCESS) { av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); err = AVERROR_EXTERNAL; close(idesc.fd); goto fail; @@ -2878,7 +2684,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f bind_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO; bind_info[bind_counts].pNext = planes > 1 ? &plane_info[bind_counts] : NULL; bind_info[bind_counts].image = f->img[i]; - bind_info[bind_counts].memory = f->mem[desc->layers[i].planes[j].object_index]; + bind_info[bind_counts].memory = f->mem[i]; /* Offset is already signalled via pPlaneLayouts above */ bind_info[bind_counts].memoryOffset = 0; @@ -2891,12 +2697,12 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f ret = vk->BindImageMemory2(hwctx->act_dev, bind_counts, bind_info); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); err = AVERROR_EXTERNAL; goto fail; } - err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_EXTERNAL_IMPORT); + err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_EXTERNAL_IMPORT); if (err) goto fail; @@ -2996,7 +2802,7 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, const int planes = av_pix_fmt_count_planes(hwfc->sw_format); const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format); VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; AVHWFramesContext *cuda_fc = (AVHWFramesContext*)cuda_hwfc->data; AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx; @@ -3007,20 +2813,12 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, CU_AD_FORMAT_UNSIGNED_INT8; dst_f = (AVVkFrame *)frame->data[0]; - dst_int = dst_f->internal; - if (!dst_int || !dst_int->cuda_fc_ref) { - if (!dst_f->internal) - dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal)); - - if (!dst_int) - return AVERROR(ENOMEM); + if (!dst_int->cuda_fc_ref) { dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc); - if (!dst_int->cuda_fc_ref) { - av_freep(&dst_f->internal); + if (!dst_int->cuda_fc_ref) return AVERROR(ENOMEM); - } for (int i = 0; i < planes; i++) { CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = { @@ -3064,7 +2862,7 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, &ext_desc.handle.win32.handle); if (ret != VK_SUCCESS) { av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a Win32 Handle: %s!\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); err = AVERROR_EXTERNAL; goto fail; } @@ -3092,7 +2890,7 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, &ext_desc.handle.fd); if (ret != VK_SUCCESS) { av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD: %s!\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); err = AVERROR_EXTERNAL; goto fail; } @@ -3135,7 +2933,7 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, #endif if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n", - vk_ret2str(ret)); + ff_vk_ret2str(ret)); err = AVERROR_EXTERNAL; goto fail; } @@ -3183,7 +2981,7 @@ static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc, dst_f = (AVVkFrame *)dst->data[0]; - err = prepare_frame(hwfc, &fp->upload_ctx, dst_f, PREP_MODE_EXTERNAL_EXPORT); + err = prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_EXPORT); if (err < 0) return err; @@ -3243,7 +3041,7 @@ static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc, av_log(hwfc, AV_LOG_VERBOSE, "Transfered CUDA image to Vulkan!\n"); - return err = prepare_frame(hwfc, &fp->upload_ctx, dst_f, PREP_MODE_EXTERNAL_IMPORT); + return err = prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_IMPORT); fail: CHECK_CU(cu->cuCtxPopCurrent(&dummy)); @@ -3263,13 +3061,13 @@ static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst, #if CONFIG_LIBDRM #if CONFIG_VAAPI case AV_PIX_FMT_VAAPI: - if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS)) + if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS) return vulkan_map_from_vaapi(hwfc, dst, src, flags); else return AVERROR(ENOSYS); #endif case AV_PIX_FMT_DRM_PRIME: - if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS)) + if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS) return vulkan_map_from_drm(hwfc, dst, src, flags); else return AVERROR(ENOSYS); @@ -3310,7 +3108,7 @@ static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst, VkResult ret; AVVkFrame *f = (AVVkFrame *)src->data[0]; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; VulkanFramesPriv *fp = hwfc->internal->priv; AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; AVVulkanFramesContext *hwfctx = hwfc->hwctx; @@ -3328,7 +3126,7 @@ static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst, if (!drm_desc) return AVERROR(ENOMEM); - err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_EXTERNAL_EXPORT); + err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_EXTERNAL_EXPORT); if (err < 0) goto end; @@ -3449,44 +3247,22 @@ static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst, switch (dst->format) { #if CONFIG_LIBDRM case AV_PIX_FMT_DRM_PRIME: - if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS)) + if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS) return vulkan_map_to_drm(hwfc, dst, src, flags); else return AVERROR(ENOSYS); #if CONFIG_VAAPI case AV_PIX_FMT_VAAPI: - if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS)) + if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS) return vulkan_map_to_vaapi(hwfc, dst, src, flags); else return AVERROR(ENOSYS); #endif #endif default: - return vulkan_map_frame_to_mem(hwfc, dst, src, flags); + break; } -} - -typedef struct ImageBuffer { - VkBuffer buf; - VkDeviceMemory mem; - VkMemoryPropertyFlagBits flags; - int mapped_mem; -} ImageBuffer; - -static void free_buf(void *opaque, uint8_t *data) -{ - AVHWDeviceContext *ctx = opaque; - AVVulkanDeviceContext *hwctx = ctx->hwctx; - VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; - ImageBuffer *vkbuf = (ImageBuffer *)data; - - if (vkbuf->buf) - vk->DestroyBuffer(hwctx->act_dev, vkbuf->buf, hwctx->alloc); - if (vkbuf->mem) - vk->FreeMemory(hwctx->act_dev, vkbuf->mem, hwctx->alloc); - - av_free(data); + return AVERROR(ENOSYS); } static size_t get_req_buffer_size(VulkanDevicePriv *p, int *stride, int height) @@ -3498,203 +3274,7 @@ static size_t get_req_buffer_size(VulkanDevicePriv *p, int *stride, int height) return size; } -static int create_buf(AVHWDeviceContext *ctx, AVBufferRef **buf, - VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags, - size_t size, uint32_t req_memory_bits, int host_mapped, - void *create_pnext, void *alloc_pnext) -{ - int err; - VkResult ret; - int use_ded_mem; - AVVulkanDeviceContext *hwctx = ctx->hwctx; - VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; - - VkBufferCreateInfo buf_spawn = { - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = create_pnext, - .usage = usage, - .size = size, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - }; - - VkBufferMemoryRequirementsInfo2 req_desc = { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, - }; - VkMemoryDedicatedAllocateInfo ded_alloc = { - .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, - .pNext = alloc_pnext, - }; - VkMemoryDedicatedRequirements ded_req = { - .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, - }; - VkMemoryRequirements2 req = { - .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, - .pNext = &ded_req, - }; - - ImageBuffer *vkbuf = av_mallocz(sizeof(*vkbuf)); - if (!vkbuf) - return AVERROR(ENOMEM); - - vkbuf->mapped_mem = host_mapped; - - ret = vk->CreateBuffer(hwctx->act_dev, &buf_spawn, NULL, &vkbuf->buf); - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Failed to create buffer: %s\n", - vk_ret2str(ret)); - err = AVERROR_EXTERNAL; - goto fail; - } - - req_desc.buffer = vkbuf->buf; - - vk->GetBufferMemoryRequirements2(hwctx->act_dev, &req_desc, &req); - - /* In case the implementation prefers/requires dedicated allocation */ - use_ded_mem = ded_req.prefersDedicatedAllocation | - ded_req.requiresDedicatedAllocation; - if (use_ded_mem) - ded_alloc.buffer = vkbuf->buf; - - /* Additional requirements imposed on us */ - if (req_memory_bits) - req.memoryRequirements.memoryTypeBits &= req_memory_bits; - - err = alloc_mem(ctx, &req.memoryRequirements, flags, - use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext, - &vkbuf->flags, &vkbuf->mem); - if (err) - goto fail; - - ret = vk->BindBufferMemory(hwctx->act_dev, vkbuf->buf, vkbuf->mem, 0); - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n", - vk_ret2str(ret)); - err = AVERROR_EXTERNAL; - goto fail; - } - - *buf = av_buffer_create((uint8_t *)vkbuf, sizeof(*vkbuf), free_buf, ctx, 0); - if (!(*buf)) { - err = AVERROR(ENOMEM); - goto fail; - } - - return 0; - -fail: - free_buf(ctx, (uint8_t *)vkbuf); - return err; -} - -/* Skips mapping of host mapped buffers but still invalidates them */ -static int map_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, uint8_t *mem[], - int nb_buffers, int invalidate) -{ - VkResult ret; - AVVulkanDeviceContext *hwctx = ctx->hwctx; - VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; - VkMappedMemoryRange invalidate_ctx[AV_NUM_DATA_POINTERS]; - int invalidate_count = 0; - - for (int i = 0; i < nb_buffers; i++) { - ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; - if (vkbuf->mapped_mem) - continue; - - ret = vk->MapMemory(hwctx->act_dev, vkbuf->mem, 0, - VK_WHOLE_SIZE, 0, (void **)&mem[i]); - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n", - vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } - - if (!invalidate) - return 0; - - for (int i = 0; i < nb_buffers; i++) { - ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; - const VkMappedMemoryRange ival_buf = { - .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - .memory = vkbuf->mem, - .size = VK_WHOLE_SIZE, - }; - - /* For host imported memory Vulkan says to use platform-defined - * sync methods, but doesn't really say not to call flush or invalidate - * on original host pointers. It does explicitly allow to do that on - * host-mapped pointers which are then mapped again using vkMapMemory, - * but known implementations return the original pointers when mapped - * again. */ - if (vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) - continue; - - invalidate_ctx[invalidate_count++] = ival_buf; - } - - if (invalidate_count) { - ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, invalidate_count, - invalidate_ctx); - if (ret != VK_SUCCESS) - av_log(ctx, AV_LOG_WARNING, "Failed to invalidate memory: %s\n", - vk_ret2str(ret)); - } - - return 0; -} - -static int unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, - int nb_buffers, int flush) -{ - int err = 0; - VkResult ret; - AVVulkanDeviceContext *hwctx = ctx->hwctx; - VulkanDevicePriv *p = ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; - VkMappedMemoryRange flush_ctx[AV_NUM_DATA_POINTERS]; - int flush_count = 0; - - if (flush) { - for (int i = 0; i < nb_buffers; i++) { - ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; - const VkMappedMemoryRange flush_buf = { - .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - .memory = vkbuf->mem, - .size = VK_WHOLE_SIZE, - }; - - if (vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) - continue; - - flush_ctx[flush_count++] = flush_buf; - } - } - - if (flush_count) { - ret = vk->FlushMappedMemoryRanges(hwctx->act_dev, flush_count, flush_ctx); - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Failed to flush memory: %s\n", - vk_ret2str(ret)); - err = AVERROR_EXTERNAL; /* We still want to try to unmap them */ - } - } - - for (int i = 0; i < nb_buffers; i++) { - ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; - if (vkbuf->mapped_mem) - continue; - - vk->UnmapMemory(hwctx->act_dev, vkbuf->mem); - } - - return err; -} - -static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f, +static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f, AVBufferRef **bufs, size_t *buf_offsets, const int *buf_stride, int w, int h, enum AVPixelFormat pix_fmt, int to_buf) @@ -3703,125 +3283,89 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f, AVVkFrame *frame = (AVVkFrame *)f->data[0]; VulkanFramesPriv *fp = hwfc->internal->priv; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; - - int bar_num = 0; - VkPipelineStageFlagBits sem_wait_dst[AV_NUM_DATA_POINTERS]; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS]; + int nb_img_bar = 0; - const int planes = av_pix_fmt_count_planes(pix_fmt); + const int nb_images = ff_vk_count_images(frame); + int pixfmt_planes = av_pix_fmt_count_planes(pix_fmt); const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); - VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 }; - VulkanExecCtx *ectx = to_buf ? &fp->download_ctx : &fp->upload_ctx; - VkCommandBuffer cmd_buf = get_buf_exec_ctx(hwfc, ectx); - - uint64_t sem_signal_values[AV_NUM_DATA_POINTERS]; - - VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = { - .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, - .pWaitSemaphoreValues = frame->sem_value, - .pSignalSemaphoreValues = sem_signal_values, - .waitSemaphoreValueCount = planes, - .signalSemaphoreValueCount = planes, - }; - - VkSubmitInfo s_info = { - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .pNext = &s_timeline_sem_info, - .pSignalSemaphores = frame->sem, - .pWaitSemaphores = frame->sem, - .pWaitDstStageMask = sem_wait_dst, - .signalSemaphoreCount = planes, - .waitSemaphoreCount = planes, - }; + VkCommandBuffer cmd_buf; + FFVkExecContext *exec = ff_vk_exec_get(to_buf ? &fp->download_exec : + &fp->upload_exec); + cmd_buf = exec->buf; + ff_vk_exec_start(&p->vkctx, exec); - for (int i = 0; i < planes; i++) - sem_signal_values[i] = frame->sem_value[i] + 1; - - if ((err = wait_start_exec_ctx(hwfc, ectx))) + err = ff_vk_exec_add_dep_buf(&p->vkctx, exec, bufs, pixfmt_planes, 1); + if (err < 0) return err; - /* Change the image layout to something more optimal for transfers */ - for (int i = 0; i < planes; i++) { - VkImageLayout new_layout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL : - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - VkAccessFlags new_access = to_buf ? VK_ACCESS_TRANSFER_READ_BIT : - VK_ACCESS_TRANSFER_WRITE_BIT; - - sem_wait_dst[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; - - /* If the layout matches and we have read access skip the barrier */ - if ((frame->layout[i] == new_layout) && (frame->access[i] & new_access)) - continue; - - img_bar[bar_num].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - img_bar[bar_num].srcAccessMask = 0x0; - img_bar[bar_num].dstAccessMask = new_access; - img_bar[bar_num].oldLayout = frame->layout[i]; - img_bar[bar_num].newLayout = new_layout; - img_bar[bar_num].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - img_bar[bar_num].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - img_bar[bar_num].image = frame->img[i]; - img_bar[bar_num].subresourceRange.levelCount = 1; - img_bar[bar_num].subresourceRange.layerCount = 1; - img_bar[bar_num].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - - frame->layout[i] = img_bar[bar_num].newLayout; - frame->access[i] = img_bar[bar_num].dstAccessMask; - - bar_num++; - } + err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, f, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_TRANSFER_BIT); + if (err < 0) + return err; - if (bar_num) - vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, 0, - 0, NULL, 0, NULL, bar_num, img_bar); + ff_vk_frame_barrier(&p->vkctx, exec, f, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, + to_buf ? VK_ACCESS_TRANSFER_READ_BIT : + VK_ACCESS_TRANSFER_WRITE_BIT, + to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL : + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); + + vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); /* Schedule a copy for each plane */ - for (int i = 0; i < planes; i++) { - ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; + for (int i = 0; i < pixfmt_planes; i++) { + int idx = FFMIN(i, nb_images - 1); + VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_ASPECT_PLANE_0_BIT, + VK_IMAGE_ASPECT_PLANE_1_BIT, + VK_IMAGE_ASPECT_PLANE_2_BIT, }; + + FFVkBuffer *vkbuf = (FFVkBuffer *)bufs[i]->data; VkBufferImageCopy buf_reg = { .bufferOffset = buf_offsets[i], .bufferRowLength = buf_stride[i] / desc->comp[i].step, .imageSubresource.layerCount = 1, - .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .imageSubresource.aspectMask = plane_aspect[(pixfmt_planes != nb_images) + + i*(pixfmt_planes != nb_images)], .imageOffset = { 0, 0, 0, }, }; - int p_w, p_h; + uint32_t p_w, p_h; get_plane_wh(&p_w, &p_h, pix_fmt, w, h, i); buf_reg.bufferImageHeight = p_h; buf_reg.imageExtent = (VkExtent3D){ p_w, p_h, 1, }; if (to_buf) - vk->CmdCopyImageToBuffer(cmd_buf, frame->img[i], frame->layout[i], - vkbuf->buf, 1, &buf_reg); + vk->CmdCopyImageToBuffer(cmd_buf, frame->img[idx], + img_bar[0].newLayout, + vkbuf->buf, + 1, &buf_reg); else - vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[i], - frame->layout[i], 1, &buf_reg); + vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[idx], + img_bar[0].newLayout, + 1, &buf_reg); } - /* When uploading, do this asynchronously if the source is refcounted by - * keeping the buffers as a submission dependency. - * The hwcontext is guaranteed to not be freed until all frames are freed - * in the frames_unint function. - * When downloading to buffer, do this synchronously and wait for the - * queue submission to finish executing */ - if (!to_buf) { - int ref; - for (ref = 0; ref < AV_NUM_DATA_POINTERS; ref++) { - if (!f->buf[ref]) - break; - if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1))) - return err; - } - if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, planes))) - return err; - return submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref); - } else { - return submit_exec_ctx(hwfc, ectx, &s_info, frame, 1); - } + err = ff_vk_exec_submit(&p->vkctx, exec); + if (err < 0) + return err; + + /* Wait for the operation to complete when downloading */ + if (to_buf) + ff_vk_exec_wait(&p->vkctx, exec); + + return 0; } static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, @@ -3829,21 +3373,21 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, { int err = 0; VkResult ret; - AVVkFrame *f = (AVVkFrame *)vkf->data[0]; AVHWDeviceContext *dev_ctx = hwfc->device_ctx; AVVulkanDeviceContext *hwctx = dev_ctx->hwctx; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - FFVulkanFunctions *vk = &p->vkfn; + FFVulkanFunctions *vk = &p->vkctx.vkfn; AVFrame tmp; + FFVkBuffer *vkbufs[AV_NUM_DATA_POINTERS]; AVBufferRef *bufs[AV_NUM_DATA_POINTERS] = { 0 }; size_t buf_offsets[AV_NUM_DATA_POINTERS] = { 0 }; - int p_w, p_h; + uint32_t p_w, p_h; const int planes = av_pix_fmt_count_planes(swf->format); int host_mapped[AV_NUM_DATA_POINTERS] = { 0 }; - const int map_host = !!(p->extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY); + const int map_host = !!(p->vkctx.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY); if ((swf->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(swf->format))) { av_log(hwfc, AV_LOG_ERROR, "Unsupported software frame pixel format!\n"); @@ -3853,23 +3397,6 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, if (swf->width > hwfc->width || swf->height > hwfc->height) return AVERROR(EINVAL); - /* For linear, host visiable images */ - if (f->tiling == VK_IMAGE_TILING_LINEAR && - f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { - AVFrame *map = av_frame_alloc(); - if (!map) - return AVERROR(ENOMEM); - map->format = swf->format; - - err = vulkan_map_frame_to_mem(hwfc, map, vkf, AV_HWFRAME_MAP_WRITE); - if (err) - return err; - - err = av_frame_copy((AVFrame *)(from ? swf : map), from ? map : swf); - av_frame_free(&map); - return err; - } - /* Create buffers */ for (int i = 0; i < planes; i++) { size_t req_size; @@ -3907,8 +3434,7 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, import_desc.handleType, import_desc.pHostPointer, &p_props); - - if (ret == VK_SUCCESS) { + if (ret == VK_SUCCESS && p_props.memoryTypeBits) { host_mapped[i] = 1; buf_offsets[i] = offs; } @@ -3917,20 +3443,23 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, if (!host_mapped[i]) req_size = get_req_buffer_size(p, &tmp.linesize[i], p_h); - err = create_buf(dev_ctx, &bufs[i], - from ? VK_BUFFER_USAGE_TRANSFER_DST_BIT : - VK_BUFFER_USAGE_TRANSFER_SRC_BIT, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, - req_size, p_props.memoryTypeBits, host_mapped[i], - host_mapped[i] ? &create_desc : NULL, - host_mapped[i] ? &import_desc : NULL); - if (err) + err = ff_vk_create_avbuf(&p->vkctx, &bufs[i], req_size, + host_mapped[i] ? &create_desc : NULL, + host_mapped[i] ? &import_desc : NULL, + from ? VK_BUFFER_USAGE_TRANSFER_DST_BIT : + VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + (host_mapped[i] ? + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT : 0x0)); + if (err < 0) goto end; + + vkbufs[i] = (FFVkBuffer *)bufs[i]->data; } if (!from) { /* Map, copy image TO buffer (which then goes to the VkImage), unmap */ - if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 0))) + if ((err = ff_vk_map_buffers(&p->vkctx, vkbufs, tmp.data, planes, 0))) goto end; for (int i = 0; i < planes; i++) { @@ -3945,17 +3474,18 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, p_h); } - if ((err = unmap_buffers(dev_ctx, bufs, planes, 1))) + if ((err = ff_vk_unmap_buffers(&p->vkctx, vkbufs, planes, 1))) goto end; } /* Copy buffers into/from image */ - err = transfer_image_buf(hwfc, vkf, bufs, buf_offsets, tmp.linesize, - swf->width, swf->height, swf->format, from); + err = transfer_image_buf(hwfc, (AVFrame *)vkf, bufs, buf_offsets, + tmp.linesize, swf->width, swf->height, swf->format, + from); if (from) { /* Map, copy buffer (which came FROM the VkImage) to the frame, unmap */ - if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 0))) + if ((err = ff_vk_map_buffers(&p->vkctx, vkbufs, tmp.data, planes, 0))) goto end; for (int i = 0; i < planes; i++) { @@ -3970,7 +3500,7 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, p_h); } - if ((err = unmap_buffers(dev_ctx, bufs, planes, 1))) + if ((err = ff_vk_unmap_buffers(&p->vkctx, vkbufs, planes, 1))) goto end; } @@ -3990,11 +3520,11 @@ static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst, #if CONFIG_CUDA case AV_PIX_FMT_CUDA: #ifdef _WIN32 - if ((p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) && - (p->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM)) + if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) && + (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM)) #else - if ((p->extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) && - (p->extensions & FF_VK_EXT_EXTERNAL_FD_SEM)) + if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) && + (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_SEM)) #endif return vulkan_transfer_data_from_cuda(hwfc, dst, src); #endif @@ -4028,7 +3558,7 @@ static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst, dst_f = (AVVkFrame *)src->data[0]; - err = prepare_frame(hwfc, &fp->upload_ctx, dst_f, PREP_MODE_EXTERNAL_EXPORT); + err = prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_EXPORT); if (err < 0) return err; @@ -4088,7 +3618,7 @@ static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst, av_log(hwfc, AV_LOG_VERBOSE, "Transfered Vulkan image to CUDA!\n"); - return prepare_frame(hwfc, &fp->upload_ctx, dst_f, PREP_MODE_EXTERNAL_IMPORT); + return prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_IMPORT); fail: CHECK_CU(cu->cuCtxPopCurrent(&dummy)); @@ -4108,11 +3638,11 @@ static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst, #if CONFIG_CUDA case AV_PIX_FMT_CUDA: #ifdef _WIN32 - if ((p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) && - (p->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM)) + if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) && + (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM)) #else - if ((p->extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) && - (p->extensions & FF_VK_EXT_EXTERNAL_FD_SEM)) + if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) && + (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_SEM)) #endif return vulkan_transfer_data_to_cuda(hwfc, dst, src); #endif @@ -4132,7 +3662,25 @@ static int vulkan_frames_derive_to(AVHWFramesContext *dst_fc, AVVkFrame *av_vk_frame_alloc(void) { - return av_mallocz(sizeof(AVVkFrame)); + int err; + AVVkFrame *f = av_mallocz(sizeof(AVVkFrame)); + if (!f) + return NULL; + + f->internal = av_mallocz(sizeof(*f->internal)); + if (!f->internal) { + av_free(f); + return NULL; + } + + err = pthread_mutex_init(&f->internal->update_mutex, NULL); + if (err != 0) { + av_free(f->internal); + av_free(f); + return NULL; + } + + return f; } const HWContextType ff_hwcontext_type_vulkan = { diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h index df86c85b3ce2e..13c12a5351ee4 100644 --- a/libavutil/hwcontext_vulkan.h +++ b/libavutil/hwcontext_vulkan.h @@ -27,6 +27,8 @@ #include "pixfmt.h" #include "frame.h" +typedef struct AVVkFrame AVVkFrame; + /** * @file * API-specific header for AV_HWDEVICE_TYPE_VULKAN. @@ -53,7 +55,7 @@ typedef struct AVVulkanDeviceContext { PFN_vkGetInstanceProcAddr get_proc_addr; /** - * Vulkan instance. Must be at least version 1.2. + * Vulkan instance. Must be at least version 1.3. */ VkInstance inst; @@ -135,6 +137,19 @@ typedef struct AVVulkanDeviceContext { */ int queue_family_decode_index; int nb_decode_queues; + + /** + * Locks a queue, preventing other threads from submitting any command + * buffers to this queue. + * If set to NULL, will be set to lavu-internal functions that utilize a + * mutex. + */ + void (*lock_queue)(struct AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index); + + /** + * Similar to lock_queue(), unlocks a queue. Must only be called after locking. + */ + void (*unlock_queue)(struct AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index); } AVVulkanDeviceContext; /** @@ -145,10 +160,14 @@ typedef enum AVVkFrameFlags { * device and tiling during av_hwframe_ctx_init(). */ AV_VK_FRAME_FLAG_NONE = (1ULL << 0), - /* Image planes will be allocated in a single VkDeviceMemory, rather - * than as per-plane VkDeviceMemory allocations. Required for exporting - * to VAAPI on Intel devices. */ +#if FF_API_VULKAN_CONTIGUOUS_MEMORY + /* DEPRECATED: does nothing. */ AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY = (1ULL << 1), +#endif + + /* Disables multiplane images. + * This is required to export/import images from CUDA. */ + AV_VK_FRAME_FLAG_DISABLE_MULTIPLANE = (1ULL << 2), } AVVkFrameFlags; /** @@ -156,26 +175,32 @@ typedef enum AVVkFrameFlags { */ typedef struct AVVulkanFramesContext { /** - * Controls the tiling of allocated frames. If left as optimal tiling, - * then during av_hwframe_ctx_init() will decide based on whether the device - * supports DRM modifiers, or if the linear_images flag is set, otherwise - * will allocate optimally-tiled images. + * Controls the tiling of allocated frames. + * If left as VK_IMAGE_TILING_OPTIMAL (0), will use optimal tiling. + * Can be set to VK_IMAGE_TILING_LINEAR to force linear images, + * or VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT to force DMABUF-backed + * images. + * @note Imported frames from other APIs ignore this. */ VkImageTiling tiling; /** - * Defines extra usage of output frames. If left as 0, the following bits - * are set: TRANSFER_SRC, TRANSFER_DST. SAMPLED and STORAGE. + * Defines extra usage of output frames. If non-zero, all flags MUST be + * supported by the VkFormat. Otherwise, will use supported flags amongst: + * - VK_IMAGE_USAGE_SAMPLED_BIT + * - VK_IMAGE_USAGE_STORAGE_BIT + * - VK_IMAGE_USAGE_TRANSFER_SRC_BIT + * - VK_IMAGE_USAGE_TRANSFER_DST_BIT */ VkImageUsageFlagBits usage; /** * Extension data for image creation. - * If VkImageDrmFormatModifierListCreateInfoEXT is present in the chain, - * and the device supports DRM modifiers, then images will be allocated - * with the specific requested DRM modifiers. + * If DRM tiling is used, a VkImageDrmFormatModifierListCreateInfoEXT structure + * can be added to specify the exact modifier to use. + * * Additional structures may be added at av_hwframe_ctx_init() time, - * which will be freed automatically on uninit(), so users need only free + * which will be freed automatically on uninit(), so users must only free * any structures they've allocated themselves. */ void *create_pnext; @@ -195,29 +220,58 @@ typedef struct AVVulkanFramesContext { * av_hwframe_ctx_init(). */ AVVkFrameFlags flags; + + /** + * Flags to set during image creation. If unset, defaults to + * VK_IMAGE_CREATE_ALIAS_BIT. + */ + VkImageCreateFlags img_flags; + + /** + * Vulkan format for each image. MUST be compatible with the pixel format. + * If unset, will be automatically set. + * There are at most two compatible formats for a frame - a multiplane + * format, and a single-plane multi-image format. + */ + VkFormat format[AV_NUM_DATA_POINTERS]; + + /** + * Number of layers each image will have. + */ + int nb_layers; + + /** + * Locks a frame, preventing other threads from changing frame properties. + * If set to NULL, will be set to lavu-internal functions that utilize a + * mutex. + * Users SHOULD only ever lock just before command submission in order + * to get accurate frame properties, and unlock immediately after command + * submission without waiting for it to finish. + * + * If unset, will be set to lavu-internal functions that utilize a mutex. + */ + void (*lock_frame)(struct AVHWFramesContext *fc, AVVkFrame *vkf); + + /** + * Similar to lock_frame(), unlocks a frame. Must only be called after locking. + */ + void (*unlock_frame)(struct AVHWFramesContext *fc, AVVkFrame *vkf); } AVVulkanFramesContext; /* - * Frame structure, the VkFormat of the image will always match - * the pool's sw_format. - * All frames, imported or allocated, will be created with the - * VK_IMAGE_CREATE_ALIAS_BIT flag set, so the memory may be aliased if needed. - * - * If all queue family indices in the device context are the same, - * images will be created with the EXCLUSIVE sharing mode. Otherwise, all images - * will be created using the CONCURRENT sharing mode. + * Frame structure. * * @note the size of this structure is not part of the ABI, to allocate * you must use @av_vk_frame_alloc(). */ -typedef struct AVVkFrame { +struct AVVkFrame { /** * Vulkan images to which the memory is bound to. */ VkImage img[AV_NUM_DATA_POINTERS]; /** - * The same tiling must be used for all images in the frame. + * Tiling for the frame. */ VkImageTiling tiling; @@ -235,13 +289,13 @@ typedef struct AVVkFrame { VkMemoryPropertyFlagBits flags; /** - * Updated after every barrier + * Updated after every barrier. One per VkImage. */ VkAccessFlagBits access[AV_NUM_DATA_POINTERS]; VkImageLayout layout[AV_NUM_DATA_POINTERS]; /** - * Synchronization timeline semaphores, one for each sw_format plane. + * Synchronization timeline semaphores, one for each VkImage. * Must not be freed manually. Must be waited on at every submission using * the value in sem_value, and must be signalled at every submission, * using an incremented value. @@ -250,6 +304,7 @@ typedef struct AVVkFrame { /** * Up to date semaphore value at which each image becomes accessible. + * One per VkImage. * Clients must wait on this value when submitting a command queue, * and increment it when signalling. */ @@ -261,10 +316,18 @@ typedef struct AVVkFrame { struct AVVkFrameInternal *internal; /** - * Describes the binding offset of each plane to the VkDeviceMemory. + * Describes the binding offset of each image to the VkDeviceMemory. + * One per VkImage. */ ptrdiff_t offset[AV_NUM_DATA_POINTERS]; -} AVVkFrame; + + /** + * Queue family of the images. Must be VK_QUEUE_FAMILY_IGNORED if + * the image was allocated with the CONCURRENT concurrency option. + * One per VkImage. + */ + uint32_t queue_family[AV_NUM_DATA_POINTERS]; +}; /** * Allocates a single AVVkFrame and initializes everything as 0. @@ -273,7 +336,7 @@ typedef struct AVVkFrame { AVVkFrame *av_vk_frame_alloc(void); /** - * Returns the format of each image up to the number of planes for a given sw_format. + * Returns the optimal format for a given sw_format, one for each plane. * Returns NULL on unsupported formats. */ const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p); diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c index 62a2ae08d9078..e1e0dd2a9ea10 100644 --- a/libavutil/pixdesc.c +++ b/libavutil/pixdesc.c @@ -2717,6 +2717,54 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = { .flags = AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_FLOAT | AV_PIX_FMT_FLAG_ALPHA, }, + [AV_PIX_FMT_P212BE] = { + .name = "p212be", + .nb_components = 3, + .log2_chroma_w = 1, + .log2_chroma_h = 0, + .comp = { + { 0, 2, 0, 4, 12 }, /* Y */ + { 1, 4, 0, 4, 12 }, /* U */ + { 1, 4, 2, 4, 12 }, /* V */ + }, + .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_BE, + }, + [AV_PIX_FMT_P212LE] = { + .name = "p212le", + .nb_components = 3, + .log2_chroma_w = 1, + .log2_chroma_h = 0, + .comp = { + { 0, 2, 0, 4, 12 }, /* Y */ + { 1, 4, 0, 4, 12 }, /* U */ + { 1, 4, 2, 4, 12 }, /* V */ + }, + .flags = AV_PIX_FMT_FLAG_PLANAR, + }, + [AV_PIX_FMT_P412BE] = { + .name = "p412be", + .nb_components = 3, + .log2_chroma_w = 0, + .log2_chroma_h = 0, + .comp = { + { 0, 2, 0, 4, 12 }, /* Y */ + { 1, 4, 0, 4, 12 }, /* U */ + { 1, 4, 2, 4, 12 }, /* V */ + }, + .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_BE, + }, + [AV_PIX_FMT_P412LE] = { + .name = "p412le", + .nb_components = 3, + .log2_chroma_w = 0, + .log2_chroma_h = 0, + .comp = { + { 0, 2, 0, 4, 12 }, /* Y */ + { 1, 4, 0, 4, 12 }, /* U */ + { 1, 4, 2, 4, 12 }, /* V */ + }, + .flags = AV_PIX_FMT_FLAG_PLANAR, + }, }; static const char * const color_range_names[] = { diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h index 37c2c79e01405..63e07ba64f752 100644 --- a/libavutil/pixfmt.h +++ b/libavutil/pixfmt.h @@ -420,6 +420,12 @@ enum AVPixelFormat { AV_PIX_FMT_RGBAF32BE, ///< IEEE-754 single precision packed RGBA 32:32:32:32, 128bpp, RGBARGBA..., big-endian AV_PIX_FMT_RGBAF32LE, ///< IEEE-754 single precision packed RGBA 32:32:32:32, 128bpp, RGBARGBA..., little-endian + AV_PIX_FMT_P212BE, ///< interleaved chroma YUV 4:2:2, 24bpp, data in the high bits, big-endian + AV_PIX_FMT_P212LE, ///< interleaved chroma YUV 4:2:2, 24bpp, data in the high bits, little-endian + + AV_PIX_FMT_P412BE, ///< interleaved chroma YUV 4:4:4, 36bpp, data in the high bits, big-endian + AV_PIX_FMT_P412LE, ///< interleaved chroma YUV 4:4:4, 36bpp, data in the high bits, little-endian + AV_PIX_FMT_NB ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions }; @@ -518,6 +524,8 @@ enum AVPixelFormat { #define AV_PIX_FMT_P210 AV_PIX_FMT_NE(P210BE, P210LE) #define AV_PIX_FMT_P410 AV_PIX_FMT_NE(P410BE, P410LE) +#define AV_PIX_FMT_P212 AV_PIX_FMT_NE(P212BE, P212LE) +#define AV_PIX_FMT_P412 AV_PIX_FMT_NE(P412BE, P412LE) #define AV_PIX_FMT_P216 AV_PIX_FMT_NE(P216BE, P216LE) #define AV_PIX_FMT_P416 AV_PIX_FMT_NE(P416BE, P416LE) diff --git a/libavutil/version.h b/libavutil/version.h index 8c7ea1a47a6dd..fa53fae768e91 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -118,6 +118,7 @@ #define FF_API_INTERLACED_FRAME (LIBAVUTIL_VERSION_MAJOR < 59) #define FF_API_FRAME_KEY (LIBAVUTIL_VERSION_MAJOR < 59) #define FF_API_PALETTE_HAS_CHANGED (LIBAVUTIL_VERSION_MAJOR < 59) +#define FF_API_VULKAN_CONTIGUOUS_MEMORY (LIBAVUTIL_VERSION_MAJOR < 59) /** * @} diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 403f0b1f27660..4ebd24c38902d 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -1,4 +1,6 @@ /* + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -21,33 +23,6 @@ #include "vulkan.h" #include "vulkan_loader.h" -#if CONFIG_LIBGLSLANG -#include "vulkan_glslang.c" -#elif CONFIG_LIBSHADERC -#include "vulkan_shaderc.c" -#endif - -/* Generic macro for creating contexts which need to keep their addresses - * if another context is created. */ -#define FN_CREATING(ctx, type, shortname, array, num) \ -static av_always_inline type *create_ ##shortname(ctx *dctx) \ -{ \ - type **array, *sctx = av_mallocz(sizeof(*sctx)); \ - if (!sctx) \ - return NULL; \ - \ - array = av_realloc_array(dctx->array, sizeof(*dctx->array), dctx->num + 1);\ - if (!array) { \ - av_free(sctx); \ - return NULL; \ - } \ - \ - dctx->array = array; \ - dctx->array[dctx->num++] = sctx; \ - \ - return sctx; \ -} - const VkComponentMapping ff_comp_identity_map = { .r = VK_COMPONENT_SWIZZLE_IDENTITY, .g = VK_COMPONENT_SWIZZLE_IDENTITY, @@ -78,6 +53,12 @@ const char *ff_vk_ret2str(VkResult res) CASE(VK_ERROR_TOO_MANY_OBJECTS); CASE(VK_ERROR_FORMAT_NOT_SUPPORTED); CASE(VK_ERROR_FRAGMENTED_POOL); + CASE(VK_ERROR_UNKNOWN); + CASE(VK_ERROR_OUT_OF_POOL_MEMORY); + CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE); + CASE(VK_ERROR_FRAGMENTATION); + CASE(VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS); + CASE(VK_PIPELINE_COMPILE_REQUIRED); CASE(VK_ERROR_SURFACE_LOST_KHR); CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR); CASE(VK_SUBOPTIMAL_KHR); @@ -85,58 +66,691 @@ const char *ff_vk_ret2str(VkResult res) CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR); CASE(VK_ERROR_VALIDATION_FAILED_EXT); CASE(VK_ERROR_INVALID_SHADER_NV); - CASE(VK_ERROR_OUT_OF_POOL_MEMORY); - CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE); - CASE(VK_ERROR_NOT_PERMITTED_EXT); + CASE(VK_ERROR_VIDEO_PICTURE_LAYOUT_NOT_SUPPORTED_KHR); + CASE(VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR); + CASE(VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR); + CASE(VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR); + CASE(VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR); + CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT); + CASE(VK_ERROR_NOT_PERMITTED_KHR); + CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT); + CASE(VK_THREAD_IDLE_KHR); + CASE(VK_THREAD_DONE_KHR); + CASE(VK_OPERATION_DEFERRED_KHR); + CASE(VK_OPERATION_NOT_DEFERRED_KHR); default: return "Unknown error"; } #undef CASE } -void ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, - VkQueueFlagBits dev_family, int nb_queues) +int ff_vk_load_props(FFVulkanContext *s) +{ + FFVulkanFunctions *vk = &s->vkfn; + + s->hprops = (VkPhysicalDeviceExternalMemoryHostPropertiesEXT) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT, + }; + s->subgroup_props = (VkPhysicalDeviceSubgroupSizeControlProperties) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES, + .pNext = &s->hprops, + }; + s->desc_buf_props = (VkPhysicalDeviceDescriptorBufferPropertiesEXT) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_PROPERTIES_EXT, + .pNext = &s->subgroup_props, + }; + s->driver_props = (VkPhysicalDeviceDriverProperties) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES, + .pNext = &s->desc_buf_props, + }; + s->props = (VkPhysicalDeviceProperties2) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, + .pNext = &s->driver_props, + }; + + s->atomic_float_feats = (VkPhysicalDeviceShaderAtomicFloatFeaturesEXT) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT, + }; + s->feats_12 = (VkPhysicalDeviceVulkan12Features) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, + .pNext = &s->atomic_float_feats, + }; + s->feats = (VkPhysicalDeviceFeatures2) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, + .pNext = &s->feats_12, + }; + + vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props); + vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops); + vk->GetPhysicalDeviceFeatures2(s->hwctx->phys_dev, &s->feats); + + if (s->qf_props) + return 0; + + vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &s->tot_nb_qfs, NULL); + + s->qf_props = av_calloc(s->tot_nb_qfs, sizeof(*s->qf_props)); + if (!s->qf_props) + return AVERROR(ENOMEM); + + s->query_props = av_calloc(s->tot_nb_qfs, sizeof(*s->query_props)); + if (!s->qf_props) { + av_freep(&s->qf_props); + return AVERROR(ENOMEM); + } + + s->video_props = av_calloc(s->tot_nb_qfs, sizeof(*s->video_props)); + if (!s->video_props) { + av_freep(&s->qf_props); + av_freep(&s->query_props); + return AVERROR(ENOMEM); + } + + for (uint32_t i = 0; i < s->tot_nb_qfs; i++) { + s->query_props[i] = (VkQueueFamilyQueryResultStatusPropertiesKHR) { + .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR, + }; + s->video_props[i] = (VkQueueFamilyVideoPropertiesKHR) { + .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR, + .pNext = &s->query_props[i], + }; + s->qf_props[i] = (VkQueueFamilyProperties2) { + .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2, + .pNext = &s->video_props[i], + }; + } + + vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &s->tot_nb_qfs, s->qf_props); + + return 0; +} + +static int vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb) { + int ret, num; + switch (dev_family) { case VK_QUEUE_GRAPHICS_BIT: - qf->queue_family = s->hwctx->queue_family_index; - qf->actual_queues = s->hwctx->nb_graphics_queues; + ret = s->hwctx->queue_family_index; + num = s->hwctx->nb_graphics_queues; break; case VK_QUEUE_COMPUTE_BIT: - qf->queue_family = s->hwctx->queue_family_comp_index; - qf->actual_queues = s->hwctx->nb_comp_queues; + ret = s->hwctx->queue_family_comp_index; + num = s->hwctx->nb_comp_queues; break; case VK_QUEUE_TRANSFER_BIT: - qf->queue_family = s->hwctx->queue_family_tx_index; - qf->actual_queues = s->hwctx->nb_tx_queues; + ret = s->hwctx->queue_family_tx_index; + num = s->hwctx->nb_tx_queues; break; case VK_QUEUE_VIDEO_ENCODE_BIT_KHR: - qf->queue_family = s->hwctx->queue_family_encode_index; - qf->actual_queues = s->hwctx->nb_encode_queues; + ret = s->hwctx->queue_family_encode_index; + num = s->hwctx->nb_encode_queues; break; case VK_QUEUE_VIDEO_DECODE_BIT_KHR: - qf->queue_family = s->hwctx->queue_family_decode_index; - qf->actual_queues = s->hwctx->nb_decode_queues; + ret = s->hwctx->queue_family_decode_index; + num = s->hwctx->nb_decode_queues; break; default: av_assert0(0); /* Should never happen */ } - if (!nb_queues) - qf->nb_queues = qf->actual_queues; - else - qf->nb_queues = nb_queues; + if (nb) + *nb = num; + + return ret; +} + +int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, + VkQueueFlagBits dev_family) +{ + /* Fill in queue families from context if not done yet */ + if (!s->nb_qfs) { + s->nb_qfs = 0; + + /* Simply fills in all unique queues into s->qfs */ + if (s->hwctx->queue_family_index >= 0) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_index; + if (!s->nb_qfs || s->qfs[0] != s->hwctx->queue_family_tx_index) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_tx_index; + if (!s->nb_qfs || (s->qfs[0] != s->hwctx->queue_family_comp_index && + s->qfs[1] != s->hwctx->queue_family_comp_index)) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_comp_index; + if (s->hwctx->queue_family_decode_index >= 0 && + (s->qfs[0] != s->hwctx->queue_family_decode_index && + s->qfs[1] != s->hwctx->queue_family_decode_index && + s->qfs[2] != s->hwctx->queue_family_decode_index)) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_decode_index; + if (s->hwctx->queue_family_encode_index >= 0 && + (s->qfs[0] != s->hwctx->queue_family_encode_index && + s->qfs[1] != s->hwctx->queue_family_encode_index && + s->qfs[2] != s->hwctx->queue_family_encode_index && + s->qfs[3] != s->hwctx->queue_family_encode_index)) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index; + } + + return (qf->queue_family = vk_qf_get_index(s, dev_family, &qf->nb_queues)); +} + +void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool) +{ + FFVulkanFunctions *vk = &s->vkfn; + + for (int i = 0; i < pool->pool_size; i++) { + FFVkExecContext *e = &pool->contexts[i]; + + if (e->fence) { + vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX); + vk->DestroyFence(s->hwctx->act_dev, e->fence, s->hwctx->alloc); + } + + ff_vk_exec_discard_deps(s, e); + + av_free(e->frame_deps); + av_free(e->buf_deps); + av_free(e->queue_family_dst); + av_free(e->layout_dst); + av_free(e->access_dst); + av_free(e->frame_update); + av_free(e->frame_locked); + av_free(e->sem_sig); + av_free(e->sem_sig_val_dst); + av_free(e->sem_wait); + } + + if (pool->cmd_bufs) + vk->FreeCommandBuffers(s->hwctx->act_dev, pool->cmd_buf_pool, + pool->pool_size, pool->cmd_bufs); + if (pool->cmd_buf_pool) + vk->DestroyCommandPool(s->hwctx->act_dev, pool->cmd_buf_pool, s->hwctx->alloc); + if (pool->query_pool) + vk->DestroyQueryPool(s->hwctx->act_dev, pool->query_pool, s->hwctx->alloc); + + av_free(pool->query_data); + av_free(pool->cmd_bufs); + av_free(pool->contexts); +} + +int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, + FFVkExecPool *pool, int nb_contexts, + int nb_queries, VkQueryType query_type, int query_64bit, + const void *query_create_pnext) +{ + int err; + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + + VkCommandPoolCreateInfo cqueue_create; + VkCommandBufferAllocateInfo cbuf_create; + + atomic_init(&pool->idx, 0); + + /* Create command pool */ + cqueue_create = (VkCommandPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | + VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + .queueFamilyIndex = qf->queue_family, + }; + ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create, + s->hwctx->alloc, &pool->cmd_buf_pool); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + + /* Allocate space for command buffers */ + pool->cmd_bufs = av_malloc(nb_contexts*sizeof(*pool->cmd_bufs)); + if (!pool->cmd_bufs) { + err = AVERROR(ENOMEM); + goto fail; + } + + /* Allocate command buffer */ + cbuf_create = (VkCommandBufferAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandPool = pool->cmd_buf_pool, + .commandBufferCount = nb_contexts, + }; + ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, + pool->cmd_bufs); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + + /* Query pool */ + if (nb_queries) { + VkQueryPoolCreateInfo query_pool_info = { + .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, + .pNext = query_create_pnext, + .queryType = query_type, + .queryCount = nb_queries*nb_contexts, + }; + ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info, + s->hwctx->alloc, &pool->query_pool); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Query pool alloc failure: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + + pool->nb_queries = nb_queries; + pool->query_status_stride = 2; + pool->query_results = nb_queries; + pool->query_statuses = 0; /* if radv supports it, nb_queries; */ + +#if CONFIG_VULKAN_ENCODE + /* Video encode quieries produce two results per query */ + if (query_type == VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR) { + pool->query_status_stride = 3; /* skip,skip,result,skip,skip,result */ + pool->query_results *= 2; + } else +#endif + if (query_type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) { + pool->query_status_stride = 1; + pool->query_results = 0; + pool->query_statuses = nb_queries; + } + + pool->qd_size = (pool->query_results + pool->query_statuses)*(query_64bit ? 8 : 4); + + /* Allocate space for the query data */ + pool->query_data = av_calloc(nb_contexts, pool->qd_size); + if (!pool->query_data) { + err = AVERROR(ENOMEM); + goto fail; + } + } + + /* Allocate space for the contexts */ + pool->contexts = av_calloc(nb_contexts, sizeof(*pool->contexts)); + if (!pool->contexts) { + err = AVERROR(ENOMEM); + goto fail; + } + + pool->pool_size = nb_contexts; + + /* Init contexts */ + for (int i = 0; i < pool->pool_size; i++) { + FFVkExecContext *e = &pool->contexts[i]; + + /* Fence */ + VkFenceCreateInfo fence_create = { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + .flags = VK_FENCE_CREATE_SIGNALED_BIT, + }; + ret = vk->CreateFence(s->hwctx->act_dev, &fence_create, s->hwctx->alloc, + &e->fence); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to create submission fence: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + e->idx = i; + e->parent = pool; + + /* Query data */ + e->query_data = ((uint8_t *)pool->query_data) + pool->qd_size*i; + e->query_idx = nb_queries*i; + + /* Command buffer */ + e->buf = pool->cmd_bufs[i]; + + /* Queue index distribution */ + e->qi = i % qf->nb_queues; + e->qf = qf->queue_family; + vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family, + e->qi, &e->queue); + } + + return 0; + +fail: + ff_vk_exec_pool_free(s, pool); + return err; +} + +VkResult ff_vk_exec_get_query(FFVulkanContext *s, FFVkExecContext *e, + void **data, int64_t *status) +{ + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + const FFVkExecPool *pool = e->parent; + + int32_t *res32 = e->query_data; + int64_t *res64 = e->query_data; + int64_t res = 0; + VkQueryResultFlags qf = 0; + + qf |= pool->query_64bit ? + VK_QUERY_RESULT_64_BIT : 0x0; + qf |= pool->query_statuses ? + VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0; + + ret = vk->GetQueryPoolResults(s->hwctx->act_dev, pool->query_pool, + e->query_idx, + pool->nb_queries, + pool->qd_size, e->query_data, + pool->query_64bit ? 8 : 4, qf); + if (ret != VK_SUCCESS) + return ret; + + if (pool->query_statuses && pool->query_64bit) { + for (int i = 0; i < pool->query_statuses; i++) { + res = (res64[i] < res) || (res >= 0 && res64[i] > res) ? + res64[i] : res; + res64 += pool->query_status_stride; + } + } else if (pool->query_statuses) { + for (int i = 0; i < pool->query_statuses; i++) { + res = (res32[i] < res) || (res >= 0 && res32[i] > res) ? + res32[i] : res; + res32 += pool->query_status_stride; + } + } + + if (data) + *data = e->query_data; + if (status) + *status = res; + + return VK_SUCCESS; +} + +FFVkExecContext *ff_vk_exec_get(FFVkExecPool *pool) +{ + int idx = atomic_fetch_add_explicit(&pool->idx, 1, memory_order_relaxed); + idx %= pool->pool_size; + return &pool->contexts[idx]; +} + +void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e) +{ + FFVulkanFunctions *vk = &s->vkfn; + vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX); +} + +int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e) +{ + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + const FFVkExecPool *pool = e->parent; + + VkCommandBufferBeginInfo cmd_start = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + }; + + /* Create the fence and don't wait for it initially */ + vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX); + vk->ResetFences(s->hwctx->act_dev, 1, &e->fence); + + /* Discard queue dependencies */ + ff_vk_exec_discard_deps(s, e); + + ret = vk->BeginCommandBuffer(e->buf, &cmd_start); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + if (pool->nb_queries) + vk->CmdResetQueryPool(e->buf, pool->query_pool, + e->query_idx, pool->nb_queries); + + return 0; +} + +void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e) +{ + for (int j = 0; j < e->nb_buf_deps; j++) + av_buffer_unref(&e->buf_deps[j]); + e->nb_buf_deps = 0; + + for (int j = 0; j < e->nb_frame_deps; j++) { + AVFrame *f = e->frame_deps[j]; + if (e->frame_locked[j]) { + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + AVVulkanFramesContext *vkfc = hwfc->hwctx; + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + vkfc->unlock_frame(hwfc, vkf); + e->frame_locked[j] = 0; + } + e->frame_update[j] = 0; + if (f->buf[0]) + av_frame_free(&e->frame_deps[j]); + } + e->nb_frame_deps = 0; + + e->sem_wait_cnt = 0; + e->sem_sig_cnt = 0; + e->sem_sig_val_dst_cnt = 0; +} + +int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e, + AVBufferRef **deps, int nb_deps, int ref) +{ + AVBufferRef **dst = av_fast_realloc(e->buf_deps, &e->buf_deps_alloc_size, + (e->nb_buf_deps + nb_deps) * sizeof(*dst)); + if (!dst) { + ff_vk_exec_discard_deps(s, e); + return AVERROR(ENOMEM); + } + + e->buf_deps = dst; + + for (int i = 0; i < nb_deps; i++) { + e->buf_deps[e->nb_buf_deps] = ref ? av_buffer_ref(deps[i]) : deps[i]; + if (!e->buf_deps[e->nb_buf_deps]) { + ff_vk_exec_discard_deps(s, e); + return AVERROR(ENOMEM); + } + e->nb_buf_deps++; + } + + return 0; +} + +int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, + VkPipelineStageFlagBits2 wait_stage, + VkPipelineStageFlagBits2 signal_stage) +{ + uint8_t *frame_locked; + uint8_t *frame_update; + AVFrame **frame_deps; + VkImageLayout *layout_dst; + uint32_t *queue_family_dst; + VkAccessFlagBits *access_dst; + + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + AVVulkanFramesContext *vkfc = hwfc->hwctx; + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + int nb_images = ff_vk_count_images(vkf); + + /* Don't add duplicates */ + for (int i = 0; i < e->nb_frame_deps; i++) + if (e->frame_deps[i]->data[0] == f->data[0]) + return 1; + +#define ARR_REALLOC(str, arr, alloc_s, cnt) \ + do { \ + arr = av_fast_realloc(str->arr, alloc_s, (cnt + 1)*sizeof(*arr)); \ + if (!arr) { \ + ff_vk_exec_discard_deps(s, e); \ + return AVERROR(ENOMEM); \ + } \ + str->arr = arr; \ + } while (0) + + ARR_REALLOC(e, layout_dst, &e->layout_dst_alloc, e->nb_frame_deps); + ARR_REALLOC(e, queue_family_dst, &e->queue_family_dst_alloc, e->nb_frame_deps); + ARR_REALLOC(e, access_dst, &e->access_dst_alloc, e->nb_frame_deps); + + ARR_REALLOC(e, frame_locked, &e->frame_locked_alloc_size, e->nb_frame_deps); + ARR_REALLOC(e, frame_update, &e->frame_update_alloc_size, e->nb_frame_deps); + ARR_REALLOC(e, frame_deps, &e->frame_deps_alloc_size, e->nb_frame_deps); + + e->frame_deps[e->nb_frame_deps] = f->buf[0] ? av_frame_clone(f) : f; + if (!e->frame_deps[e->nb_frame_deps]) { + ff_vk_exec_discard_deps(s, e); + return AVERROR(ENOMEM); + } + + vkfc->lock_frame(hwfc, vkf); + e->frame_locked[e->nb_frame_deps] = 1; + e->frame_update[e->nb_frame_deps] = 0; + e->nb_frame_deps++; + + for (int i = 0; i < nb_images; i++) { + VkSemaphoreSubmitInfo *sem_wait; + VkSemaphoreSubmitInfo *sem_sig; + uint64_t **sem_sig_val_dst; + + ARR_REALLOC(e, sem_wait, &e->sem_wait_alloc, e->sem_wait_cnt); + ARR_REALLOC(e, sem_sig, &e->sem_sig_alloc, e->sem_sig_cnt); + ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt); + + e->sem_wait[e->sem_wait_cnt++] = (VkSemaphoreSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .semaphore = vkf->sem[i], + .value = vkf->sem_value[i], + .stageMask = wait_stage, + }; + + e->sem_sig[e->sem_sig_cnt++] = (VkSemaphoreSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .semaphore = vkf->sem[i], + .value = vkf->sem_value[i] + 1, + .stageMask = signal_stage, + }; + + e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = &vkf->sem_value[i]; + e->sem_sig_val_dst_cnt++; + } + + return 0; +} + +void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, + VkImageMemoryBarrier2 *bar, uint32_t *nb_img_bar) +{ + int i; + for (i = 0; i < e->nb_frame_deps; i++) + if (e->frame_deps[i]->data[0] == f->data[0]) + break; + av_assert0(i < e->nb_frame_deps); - return; + /* Don't update duplicates */ + if (nb_img_bar && !e->frame_update[i]) + (*nb_img_bar)++; + + e->queue_family_dst[i] = bar->dstQueueFamilyIndex; + e->access_dst[i] = bar->dstAccessMask; + e->layout_dst[i] = bar->newLayout; + e->frame_update[i] = 1; } -void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf) +int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e, + VkSemaphore *dst, uint64_t *dst_val, + AVFrame *f) { - qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues; + uint64_t **sem_sig_val_dst; + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + + /* Reject unknown frames */ + int i; + for (i = 0; i < e->nb_frame_deps; i++) + if (e->frame_deps[i]->data[0] == f->data[0]) + break; + if (i == e->nb_frame_deps) + return AVERROR(EINVAL); + + ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt); + + *dst = vkf->sem[0]; + *dst_val = vkf->sem_value[0]; + + e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = dst_val; + e->sem_sig_val_dst_cnt++; + + return 0; +} + +int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e) +{ + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + VkCommandBufferSubmitInfo cmd_buf_info = (VkCommandBufferSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, + .commandBuffer = e->buf, + }; + VkSubmitInfo2 submit_info = (VkSubmitInfo2) { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, + .pCommandBufferInfos = &cmd_buf_info, + .commandBufferInfoCount = 1, + .pWaitSemaphoreInfos = e->sem_wait, + .waitSemaphoreInfoCount = e->sem_wait_cnt, + .pSignalSemaphoreInfos = e->sem_sig, + .signalSemaphoreInfoCount = e->sem_sig_cnt, + }; + + ret = vk->EndCommandBuffer(e->buf); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", + ff_vk_ret2str(ret)); + ff_vk_exec_discard_deps(s, e); + return AVERROR_EXTERNAL; + } + + s->hwctx->lock_queue(s->device, e->qf, e->qi); + ret = vk->QueueSubmit2(e->queue, 1, &submit_info, e->fence); + s->hwctx->unlock_queue(s->device, e->qf, e->qi); + + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n", + ff_vk_ret2str(ret)); + ff_vk_exec_discard_deps(s, e); + return AVERROR_EXTERNAL; + } + + for (int i = 0; i < e->sem_sig_val_dst_cnt; i++) + *e->sem_sig_val_dst[i] += 1; + + /* Unlock all frames */ + for (int j = 0; j < e->nb_frame_deps; j++) { + if (e->frame_locked[j]) { + AVFrame *f = e->frame_deps[j]; + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + AVVulkanFramesContext *vkfc = hwfc->hwctx; + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + + if (e->frame_update[j]) { + int nb_images = ff_vk_count_images(vkf); + for (int i = 0; i < nb_images; i++) { + vkf->layout[i] = e->layout_dst[j]; + vkf->access[i] = e->access_dst[j]; + vkf->queue_family[i] = e->queue_family_dst[j]; + } + } + vkfc->unlock_frame(hwfc, vkf); + e->frame_locked[j] = 0; + } + } + + return 0; } -static int vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, - VkMemoryPropertyFlagBits req_flags, void *alloc_extension, - VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem) +int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, + VkMemoryPropertyFlagBits req_flags, void *alloc_extension, + VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem) { VkResult ret; int index = -1; @@ -148,8 +762,8 @@ static int vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, }; /* Align if we need to */ - if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) - req->size = FFALIGN(req->size, s->props.limits.minMemoryMapAlignment); + if ((req_flags != UINT32_MAX) && req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + req->size = FFALIGN(req->size, s->props.properties.limits.minMemoryMapAlignment); alloc_info.allocationSize = req->size; @@ -161,7 +775,8 @@ static int vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, continue; /* The memory type flags must include our properties */ - if ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags) + if ((req_flags != UINT32_MAX) && + ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)) continue; /* Found a suitable memory type */ @@ -170,7 +785,7 @@ static int vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, } if (index < 0) { - av_log(s, AV_LOG_ERROR, "No memory type found for flags 0x%x\n", + av_log(s->device, AV_LOG_ERROR, "No memory type found for flags 0x%x\n", req_flags); return AVERROR(EINVAL); } @@ -185,12 +800,14 @@ static int vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, return AVERROR(ENOMEM); } - *mem_flags |= s->mprops.memoryTypes[index].propertyFlags; + if (mem_flags) + *mem_flags |= s->mprops.memoryTypes[index].propertyFlags; return 0; } int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, + void *pNext, void *alloc_pNext, VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags) { int err; @@ -200,19 +817,23 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, VkBufferCreateInfo buf_spawn = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = NULL, + .pNext = pNext, .usage = usage, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .size = size, /* Gets FFALIGNED during alloc if host visible but should be ok */ }; + VkMemoryAllocateFlagsInfo alloc_flags = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, + .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT, + }; VkBufferMemoryRequirementsInfo2 req_desc = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, }; VkMemoryDedicatedAllocateInfo ded_alloc = { .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, - .pNext = NULL, + .pNext = alloc_pNext, }; VkMemoryDedicatedRequirements ded_req = { .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, @@ -236,12 +857,19 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, /* In case the implementation prefers/requires dedicated allocation */ use_ded_mem = ded_req.prefersDedicatedAllocation | ded_req.requiresDedicatedAllocation; - if (use_ded_mem) + if (use_ded_mem) { ded_alloc.buffer = buf->buf; + ded_alloc.pNext = alloc_pNext; + alloc_pNext = &ded_alloc; + } - err = vk_alloc_mem(s, &req.memoryRequirements, flags, - use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext, - &buf->flags, &buf->mem); + if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) { + alloc_flags.pNext = alloc_pNext; + alloc_pNext = &alloc_flags; + } + + err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags, alloc_pNext, + &buf->flags, &buf->mem); if (err) return err; @@ -252,25 +880,72 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, return AVERROR_EXTERNAL; } + if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) { + VkBufferDeviceAddressInfo address_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, + .buffer = buf->buf, + }; + buf->address = vk->GetBufferDeviceAddress(s->hwctx->act_dev, &address_info); + } + + buf->size = size; + return 0; } -int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[], +static void destroy_avvkbuf(void *opaque, uint8_t *data) +{ + FFVulkanContext *s = opaque; + FFVkBuffer *buf = (FFVkBuffer *)data; + ff_vk_free_buf(s, buf); + av_free(buf); +} + +int ff_vk_create_avbuf(FFVulkanContext *s, AVBufferRef **ref, size_t size, + void *pNext, void *alloc_pNext, + VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags) +{ + int err; + AVBufferRef *buf; + FFVkBuffer *vkb = av_mallocz(sizeof(*vkb)); + if (!vkb) + return AVERROR(ENOMEM); + + err = ff_vk_create_buf(s, vkb, size, pNext, alloc_pNext, usage, flags); + if (err < 0) { + av_free(vkb); + return err; + } + + buf = av_buffer_create((uint8_t *)vkb, sizeof(*vkb), destroy_avvkbuf, s, 0); + if (!buf) { + destroy_avvkbuf(s, (uint8_t *)vkb); + return AVERROR(ENOMEM); + } + + *ref = buf; + + return 0; +} + +int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer **buf, uint8_t *mem[], int nb_buffers, int invalidate) { VkResult ret; FFVulkanFunctions *vk = &s->vkfn; - VkMappedMemoryRange *inval_list = NULL; + VkMappedMemoryRange inval_list[64]; int inval_count = 0; for (int i = 0; i < nb_buffers; i++) { - ret = vk->MapMemory(s->hwctx->act_dev, buf[i].mem, 0, - VK_WHOLE_SIZE, 0, (void **)&mem[i]); + void *dst; + ret = vk->MapMemory(s->hwctx->act_dev, buf[i]->mem, 0, + VK_WHOLE_SIZE, 0, &dst); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Failed to map buffer memory: %s\n", ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } + mem[i] = dst; } if (!invalidate) @@ -279,16 +954,12 @@ int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[], for (int i = 0; i < nb_buffers; i++) { const VkMappedMemoryRange ival_buf = { .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - .memory = buf[i].mem, + .memory = buf[i]->mem, .size = VK_WHOLE_SIZE, }; - if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + if (buf[i]->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) continue; - inval_list = av_fast_realloc(s->scratch, &s->scratch_size, - (++inval_count)*sizeof(*inval_list)); - if (!inval_list) - return AVERROR(ENOMEM); - inval_list[inval_count - 1] = ival_buf; + inval_list[inval_count++] = ival_buf; } if (inval_count) { @@ -304,29 +975,25 @@ int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[], return 0; } -int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers, +int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer **buf, int nb_buffers, int flush) { int err = 0; VkResult ret; FFVulkanFunctions *vk = &s->vkfn; - VkMappedMemoryRange *flush_list = NULL; + VkMappedMemoryRange flush_list[64]; int flush_count = 0; if (flush) { for (int i = 0; i < nb_buffers; i++) { const VkMappedMemoryRange flush_buf = { .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - .memory = buf[i].mem, + .memory = buf[i]->mem, .size = VK_WHOLE_SIZE, }; - if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + if (buf[i]->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) continue; - flush_list = av_fast_realloc(s->scratch, &s->scratch_size, - (++flush_count)*sizeof(*flush_list)); - if (!flush_list) - return AVERROR(ENOMEM); - flush_list[flush_count - 1] = flush_buf; + flush_list[flush_count++] = flush_buf; } } @@ -341,7 +1008,7 @@ int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers, } for (int i = 0; i < nb_buffers; i++) - vk->UnmapMemory(s->hwctx->act_dev, buf[i].mem); + vk->UnmapMemory(s->hwctx->act_dev, buf[i]->mem); return err; } @@ -353,322 +1020,109 @@ void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf) if (!buf || !s->hwctx) return; - vk->DeviceWaitIdle(s->hwctx->act_dev); - + if (buf->mapped_mem) + ff_vk_unmap_buffer(s, buf, 0); if (buf->buf != VK_NULL_HANDLE) vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc); if (buf->mem != VK_NULL_HANDLE) vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc); } -int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size, - VkShaderStageFlagBits stage) -{ - VkPushConstantRange *pc; - - pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts), - pl->push_consts_num + 1); - if (!pl->push_consts) - return AVERROR(ENOMEM); - - pc = &pl->push_consts[pl->push_consts_num++]; - memset(pc, 0, sizeof(*pc)); - - pc->stageFlags = stage; - pc->offset = offset; - pc->size = size; - - return 0; -} - -FN_CREATING(FFVulkanContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num) -int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx, - FFVkQueueFamilyCtx *qf) -{ - VkResult ret; - FFVkExecContext *e; - FFVulkanFunctions *vk = &s->vkfn; - - VkCommandPoolCreateInfo cqueue_create = { - .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, - .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, - .queueFamilyIndex = qf->queue_family, - }; - VkCommandBufferAllocateInfo cbuf_create = { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, - .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, - .commandBufferCount = qf->nb_queues, - }; - - e = create_exec_ctx(s); - if (!e) - return AVERROR(ENOMEM); - - e->qf = qf; - - e->queues = av_mallocz(qf->nb_queues * sizeof(*e->queues)); - if (!e->queues) - return AVERROR(ENOMEM); - - e->bufs = av_mallocz(qf->nb_queues * sizeof(*e->bufs)); - if (!e->bufs) - return AVERROR(ENOMEM); - - /* Create command pool */ - ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create, - s->hwctx->alloc, &e->pool); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - cbuf_create.commandPool = e->pool; - - /* Allocate command buffer */ - ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, e->bufs); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - for (int i = 0; i < qf->nb_queues; i++) { - FFVkQueueCtx *q = &e->queues[i]; - vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family, - i % qf->actual_queues, &q->queue); - } - - *ctx = e; - - return 0; -} - -void ff_vk_discard_exec_deps(FFVkExecContext *e) +static void free_data_buf(void *opaque, uint8_t *data) { - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; - - for (int j = 0; j < q->nb_buf_deps; j++) - av_buffer_unref(&q->buf_deps[j]); - q->nb_buf_deps = 0; - - for (int j = 0; j < q->nb_frame_deps; j++) - av_frame_free(&q->frame_deps[j]); - q->nb_frame_deps = 0; - - e->sem_wait_cnt = 0; - e->sem_sig_cnt = 0; + FFVulkanContext *ctx = opaque; + FFVkBuffer *buf = (FFVkBuffer *)data; + ff_vk_free_buf(ctx, buf); + av_free(data); } -int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e) -{ - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; - - VkCommandBufferBeginInfo cmd_start = { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, - }; - - /* Create the fence and don't wait for it initially */ - if (!q->fence) { - VkFenceCreateInfo fence_spawn = { - .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, - }; - ret = vk->CreateFence(s->hwctx->act_dev, &fence_spawn, s->hwctx->alloc, - &q->fence); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to queue frame fence: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } else { - vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); - vk->ResetFences(s->hwctx->act_dev, 1, &q->fence); - } - - /* Discard queue dependencies */ - ff_vk_discard_exec_deps(e); - - ret = vk->BeginCommandBuffer(e->bufs[e->qf->cur_queue], &cmd_start); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - return 0; -} - -VkCommandBuffer ff_vk_get_exec_buf(FFVkExecContext *e) -{ - return e->bufs[e->qf->cur_queue]; -} - -int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame, - VkPipelineStageFlagBits in_wait_dst_flag) -{ - AVFrame **dst; - AVVkFrame *f = (AVVkFrame *)frame->data[0]; - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; - AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data; - int planes = av_pix_fmt_count_planes(fc->sw_format); - - for (int i = 0; i < planes; i++) { - e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc, - (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait)); - if (!e->sem_wait) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - - e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc, - (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst)); - if (!e->sem_wait_dst) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - - e->sem_wait_val = av_fast_realloc(e->sem_wait_val, &e->sem_wait_val_alloc, - (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_val)); - if (!e->sem_wait_val) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - - e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc, - (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig)); - if (!e->sem_sig) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - - e->sem_sig_val = av_fast_realloc(e->sem_sig_val, &e->sem_sig_val_alloc, - (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val)); - if (!e->sem_sig_val) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - - e->sem_sig_val_dst = av_fast_realloc(e->sem_sig_val_dst, &e->sem_sig_val_dst_alloc, - (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val_dst)); - if (!e->sem_sig_val_dst) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - - e->sem_wait[e->sem_wait_cnt] = f->sem[i]; - e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag; - e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i]; - e->sem_wait_cnt++; - - e->sem_sig[e->sem_sig_cnt] = f->sem[i]; - e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i] + 1; - e->sem_sig_val_dst[e->sem_sig_cnt] = &f->sem_value[i]; - e->sem_sig_cnt++; - } - - dst = av_fast_realloc(q->frame_deps, &q->frame_deps_alloc_size, - (q->nb_frame_deps + 1) * sizeof(*dst)); - if (!dst) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - - q->frame_deps = dst; - q->frame_deps[q->nb_frame_deps] = av_frame_clone(frame); - if (!q->frame_deps[q->nb_frame_deps]) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - q->nb_frame_deps++; +static AVBufferRef *alloc_data_buf(void *opaque, size_t size) +{ + AVBufferRef *ref; + uint8_t *buf = av_mallocz(size); + if (!buf) + return NULL; - return 0; + ref = av_buffer_create(buf, size, free_data_buf, opaque, 0); + if (!ref) + av_free(buf); + return ref; } -int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e) +int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool, + AVBufferRef **buf, VkBufferUsageFlags usage, + void *create_pNext, size_t size, + VkMemoryPropertyFlagBits mem_props) { - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; - - VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = { - .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, - .pWaitSemaphoreValues = e->sem_wait_val, - .pSignalSemaphoreValues = e->sem_sig_val, - .waitSemaphoreValueCount = e->sem_wait_cnt, - .signalSemaphoreValueCount = e->sem_sig_cnt, - }; + int err; + AVBufferRef *ref; + FFVkBuffer *data; - VkSubmitInfo s_info = { - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .pNext = &s_timeline_sem_info, + if (!(*buf_pool)) { + *buf_pool = av_buffer_pool_init2(sizeof(FFVkBuffer), ctx, + alloc_data_buf, NULL); + if (!(*buf_pool)) + return AVERROR(ENOMEM); + } - .commandBufferCount = 1, - .pCommandBuffers = &e->bufs[e->qf->cur_queue], + *buf = ref = av_buffer_pool_get(*buf_pool); + if (!ref) + return AVERROR(ENOMEM); - .pWaitSemaphores = e->sem_wait, - .pWaitDstStageMask = e->sem_wait_dst, - .waitSemaphoreCount = e->sem_wait_cnt, + data = (FFVkBuffer *)ref->data; + data->stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; + data->access = VK_ACCESS_2_NONE; - .pSignalSemaphores = e->sem_sig, - .signalSemaphoreCount = e->sem_sig_cnt, - }; + if (data->size >= size) + return 0; - ret = vk->EndCommandBuffer(e->bufs[e->qf->cur_queue]); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } + ff_vk_free_buf(ctx, data); + memset(data, 0, sizeof(*data)); - ret = vk->QueueSubmit(q->queue, 1, &s_info, q->fence); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; + av_log(ctx, AV_LOG_DEBUG, "Allocating buffer of %lu bytes for pool %p\n", + size, *buf_pool); + + err = ff_vk_create_buf(ctx, data, size, + create_pNext, NULL, usage, + mem_props); + if (err < 0) { + av_buffer_unref(&ref); + return err; } - for (int i = 0; i < e->sem_sig_cnt; i++) - *e->sem_sig_val_dst[i] += 1; + if (mem_props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { + err = ff_vk_map_buffer(ctx, data, &data->mapped_mem, 0); + if (err < 0) { + av_buffer_unref(&ref); + return err; + } + } return 0; } -int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e, - AVBufferRef **deps, int nb_deps) +int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size, + VkShaderStageFlagBits stage) { - AVBufferRef **dst; - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; - - if (!deps || !nb_deps) - return 0; + VkPushConstantRange *pc; - dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size, - (q->nb_buf_deps + nb_deps) * sizeof(*dst)); - if (!dst) - goto err; + pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts), + pl->push_consts_num + 1); + if (!pl->push_consts) + return AVERROR(ENOMEM); - q->buf_deps = dst; + pc = &pl->push_consts[pl->push_consts_num++]; + memset(pc, 0, sizeof(*pc)); - for (int i = 0; i < nb_deps; i++) { - q->buf_deps[q->nb_buf_deps] = deps[i]; - if (!q->buf_deps[q->nb_buf_deps]) - goto err; - q->nb_buf_deps++; - } + pc->stageFlags = stage; + pc->offset = offset; + pc->size = size; return 0; - -err: - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); } -FN_CREATING(FFVulkanContext, FFVkSampler, sampler, samplers, samplers_num) -FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, - int unnorm_coords, VkFilter filt) +int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler, + int unnorm_coords, VkFilter filt) { VkResult ret; FFVulkanFunctions *vk = &s->vkfn; @@ -688,22 +1142,15 @@ FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, .unnormalizedCoordinates = unnorm_coords, }; - FFVkSampler *sctx = create_sampler(s); - if (!sctx) - return NULL; - ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info, - s->hwctx->alloc, &sctx->sampler[0]); + s->hwctx->alloc, sampler); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Unable to init sampler: %s\n", ff_vk_ret2str(ret)); - return NULL; + return AVERROR_EXTERNAL; } - for (int i = 1; i < 4; i++) - sctx->sampler[i] = sctx->sampler[0]; - - return sctx; + return 0; } int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt) @@ -726,105 +1173,174 @@ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt) } typedef struct ImageViewCtx { - VkImageView view; + VkImageView views[AV_NUM_DATA_POINTERS]; + int nb_views; } ImageViewCtx; -static void destroy_imageview(void *opaque, uint8_t *data) +static void destroy_imageviews(void *opaque, uint8_t *data) { FFVulkanContext *s = opaque; FFVulkanFunctions *vk = &s->vkfn; ImageViewCtx *iv = (ImageViewCtx *)data; - vk->DestroyImageView(s->hwctx->act_dev, iv->view, s->hwctx->alloc); + for (int i = 0; i < iv->nb_views; i++) + vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc); + av_free(iv); } -int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e, - VkImageView *v, VkImage img, VkFormat fmt, - const VkComponentMapping map) +int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, + VkImageView views[AV_NUM_DATA_POINTERS], + AVFrame *f) { int err; + VkResult ret; AVBufferRef *buf; FFVulkanFunctions *vk = &s->vkfn; - - VkImageViewCreateInfo imgview_spawn = { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .pNext = NULL, - .image = img, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = fmt, - .components = map, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }; + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + const VkFormat *rep_fmts = av_vkfmt_from_pixfmt(hwfc->sw_format); + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + const int nb_images = ff_vk_count_images(vkf); + const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format); ImageViewCtx *iv = av_mallocz(sizeof(*iv)); + if (!iv) + return AVERROR(ENOMEM); - VkResult ret = vk->CreateImageView(s->hwctx->act_dev, &imgview_spawn, - s->hwctx->alloc, &iv->view); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; + for (int i = 0; i < nb_planes; i++) { + VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_ASPECT_PLANE_0_BIT, + VK_IMAGE_ASPECT_PLANE_1_BIT, + VK_IMAGE_ASPECT_PLANE_2_BIT, }; + + VkImageViewCreateInfo view_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = NULL, + .image = vkf->img[FFMIN(i, nb_images - 1)], + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = rep_fmts[i], + .components = ff_comp_identity_map, + .subresourceRange = { + .aspectMask = plane_aspect[(nb_planes != nb_images) + + i*(nb_planes != nb_images)], + .levelCount = 1, + .layerCount = 1, + }, + }; + + ret = vk->CreateImageView(s->hwctx->act_dev, &view_create_info, + s->hwctx->alloc, &iv->views[i]); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + + iv->nb_views++; } - buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageview, s, 0); + buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageviews, s, 0); if (!buf) { - destroy_imageview(s, (uint8_t *)iv); - return AVERROR(ENOMEM); + err = AVERROR(ENOMEM); + goto fail; } /* Add to queue dependencies */ - err = ff_vk_add_dep_exec_ctx(s, e, &buf, 1); - if (err) { + err = ff_vk_exec_add_dep_buf(s, e, &buf, 1, 0); + if (err < 0) av_buffer_unref(&buf); - return err; - } - *v = iv->view; + memcpy(views, iv->views, nb_planes*sizeof(*views)); - return 0; + return err; + +fail: + for (int i = 0; i < iv->nb_views; i++) + vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc); + av_free(iv); + return err; } -FN_CREATING(FFVulkanPipeline, FFVkSPIRVShader, shader, shaders, shaders_num) -FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name, - VkShaderStageFlags stage) +void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, + AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar, + VkPipelineStageFlags src_stage, + VkPipelineStageFlags dst_stage, + VkAccessFlagBits new_access, + VkImageLayout new_layout, + uint32_t new_qf) { - FFVkSPIRVShader *shd = create_shader(pl); - if (!shd) - return NULL; + int i, found; + AVVkFrame *vkf = (AVVkFrame *)pic->data[0]; + const int nb_images = ff_vk_count_images(vkf); + for (i = 0; i < e->nb_frame_deps; i++) + if (e->frame_deps[i]->data[0] == pic->data[0]) + break; + found = (i < e->nb_frame_deps) && (e->frame_update[i]) ? i : -1; + + for (int i = 0; i < nb_images; i++) { + bar[*nb_bar] = (VkImageMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .pNext = NULL, + .srcStageMask = src_stage, + .dstStageMask = dst_stage, + .srcAccessMask = found >= 0 ? e->access_dst[found] : vkf->access[i], + .dstAccessMask = new_access, + .oldLayout = found >= 0 ? e->layout_dst[found] : vkf->layout[0], + .newLayout = new_layout, + .srcQueueFamilyIndex = found >= 0 ? e->queue_family_dst[found] : vkf->queue_family[0], + .dstQueueFamilyIndex = new_qf, + .image = vkf->img[i], + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .layerCount = 1, + .levelCount = 1, + }, + }; + *nb_bar += 1; + } + ff_vk_exec_update_frame(s, e, pic, &bar[*nb_bar - nb_images], NULL); +} + +int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name, + VkShaderStageFlags stage, uint32_t required_subgroup_size) +{ av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED); shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; shd->shader.stage = stage; + if (required_subgroup_size) { + shd->shader.flags |= VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT; + shd->shader.pNext = &shd->subgroup_info; + shd->subgroup_info.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO; + shd->subgroup_info.requiredSubgroupSize = required_subgroup_size; + } + shd->name = name; GLSLF(0, #version %i ,460); GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) ); GLSLC(0, ); + GLSLC(0, #extension GL_EXT_buffer_reference : require ); + GLSLC(0, #extension GL_EXT_buffer_reference2 : require ); - return shd; + return 0; } -void ff_vk_set_compute_shader_sizes(FFVkSPIRVShader *shd, int local_size[3]) +void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int x, int y, int z) { - shd->local_size[0] = local_size[0]; - shd->local_size[1] = local_size[1]; - shd->local_size[2] = local_size[2]; + shd->local_size[0] = x; + shd->local_size[1] = y; + shd->local_size[2] = z; av_bprintf(&shd->src, "layout (local_size_x = %i, " "local_size_y = %i, local_size_z = %i) in;\n\n", shd->local_size[0], shd->local_size[1], shd->local_size[2]); } -void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio) +void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio) { int line = 0; const char *p = shd->src.str; @@ -846,36 +1362,24 @@ void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio) av_bprint_finalize(&buf, NULL); } -int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd, - const char *entrypoint) +void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd) +{ + FFVulkanFunctions *vk = &s->vkfn; + av_bprint_finalize(&shd->src, NULL); + + if (shd->shader.module) + vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module, s->hwctx->alloc); +} + +int ff_vk_shader_create(FFVulkanContext *s, FFVkSPIRVShader *shd, + uint8_t *spirv, size_t spirv_size, const char *entrypoint) { - int err; VkResult ret; FFVulkanFunctions *vk = &s->vkfn; VkShaderModuleCreateInfo shader_create; - uint8_t *spirv; - size_t spirv_size; - void *priv; shd->shader.pName = entrypoint; - if (!s->spirv_compiler) { -#if CONFIG_LIBGLSLANG - s->spirv_compiler = ff_vk_glslang_init(); -#elif CONFIG_LIBSHADERC - s->spirv_compiler = ff_vk_shaderc_init(); -#else - return AVERROR(ENOSYS); -#endif - if (!s->spirv_compiler) - return AVERROR(ENOMEM); - } - - err = s->spirv_compiler->compile_shader(s->spirv_compiler, s, shd, &spirv, - &spirv_size, entrypoint, &priv); - if (err < 0) - return err; - av_log(s, AV_LOG_VERBOSE, "Shader %s compiled! Size: %zu bytes\n", shd->name, spirv_size); @@ -887,11 +1391,8 @@ int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd, ret = vk->CreateShaderModule(s->hwctx->act_dev, &shader_create, NULL, &shd->shader.module); - - s->spirv_compiler->free_shader(s->spirv_compiler, &priv); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to create shader module: %s\n", + av_log(s, AV_LOG_VERBOSE, "Error creating shader module: %s\n", ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } @@ -920,132 +1421,88 @@ static const struct descriptor_props { [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, }, }; -int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, - FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc, - int num, int only_print_to_shader) +int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkSPIRVShader *shd, + FFVulkanDescriptorSetBinding *desc, int nb, + int read_only, int print_to_shader_only) { VkResult ret; - VkDescriptorSetLayout *layout; + int has_sampler = 0; FFVulkanFunctions *vk = &s->vkfn; + FFVulkanDescriptorSet *set; + VkDescriptorSetLayoutCreateInfo desc_create_layout; - if (only_print_to_shader) + if (print_to_shader_only) goto print; - pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout), - pl->desc_layout_num + pl->qf->nb_queues); - if (!pl->desc_layout) + /* Actual layout allocated for the pipeline */ + set = av_realloc_array(pl->desc_set, sizeof(*pl->desc_set), + pl->nb_descriptor_sets + 1); + if (!set) return AVERROR(ENOMEM); + pl->desc_set = set; + set = &set[pl->nb_descriptor_sets]; + memset(set, 0, sizeof(*set)); - pl->desc_set_initialized = av_realloc_array(pl->desc_set_initialized, - sizeof(*pl->desc_set_initialized), - pl->descriptor_sets_num + 1); - if (!pl->desc_set_initialized) + set->binding = av_calloc(nb, sizeof(*set->binding)); + if (!set->binding) return AVERROR(ENOMEM); - pl->desc_set_initialized[pl->descriptor_sets_num] = 0; - layout = &pl->desc_layout[pl->desc_layout_num]; - - { /* Create descriptor set layout descriptions */ - VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 }; - VkDescriptorSetLayoutBinding *desc_binding; - - desc_binding = av_mallocz(sizeof(*desc_binding)*num); - if (!desc_binding) - return AVERROR(ENOMEM); - - for (int i = 0; i < num; i++) { - desc_binding[i].binding = i; - desc_binding[i].descriptorType = desc[i].type; - desc_binding[i].descriptorCount = FFMAX(desc[i].elems, 1); - desc_binding[i].stageFlags = desc[i].stages; - desc_binding[i].pImmutableSamplers = desc[i].sampler ? - desc[i].sampler->sampler : - NULL; - } - - desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - desc_create_layout.pBindings = desc_binding; - desc_create_layout.bindingCount = num; - - for (int i = 0; i < pl->qf->nb_queues; i++) { - ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout, - s->hwctx->alloc, &layout[i]); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to init descriptor set " - "layout: %s\n", ff_vk_ret2str(ret)); - av_free(desc_binding); - return AVERROR_EXTERNAL; - } - } - - av_free(desc_binding); + set->binding_offset = av_calloc(nb, sizeof(*set->binding_offset)); + if (!set->binding_offset) { + av_freep(&set->binding); + return AVERROR(ENOMEM); } - { /* Pool each descriptor by type and update pool counts */ - for (int i = 0; i < num; i++) { - int j; - for (j = 0; j < pl->pool_size_desc_num; j++) - if (pl->pool_size_desc[j].type == desc[i].type) - break; - if (j >= pl->pool_size_desc_num) { - pl->pool_size_desc = av_realloc_array(pl->pool_size_desc, - sizeof(*pl->pool_size_desc), - ++pl->pool_size_desc_num); - if (!pl->pool_size_desc) - return AVERROR(ENOMEM); - memset(&pl->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize)); - } - pl->pool_size_desc[j].type = desc[i].type; - pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1)*pl->qf->nb_queues; - } - } + desc_create_layout = (VkDescriptorSetLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = nb, + .pBindings = set->binding, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT, + }; - { /* Create template creation struct */ - VkDescriptorUpdateTemplateCreateInfo *dt; - VkDescriptorUpdateTemplateEntry *des_entries; + for (int i = 0; i < nb; i++) { + set->binding[i].binding = i; + set->binding[i].descriptorType = desc[i].type; + set->binding[i].descriptorCount = FFMAX(desc[i].elems, 1); + set->binding[i].stageFlags = desc[i].stages; + set->binding[i].pImmutableSamplers = desc[i].samplers; - /* Freed after descriptor set initialization */ - des_entries = av_mallocz(num*sizeof(VkDescriptorUpdateTemplateEntry)); - if (!des_entries) - return AVERROR(ENOMEM); + if (desc[i].type == VK_DESCRIPTOR_TYPE_SAMPLER || + desc[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) + has_sampler |= 1; + } - for (int i = 0; i < num; i++) { - des_entries[i].dstBinding = i; - des_entries[i].descriptorType = desc[i].type; - des_entries[i].descriptorCount = FFMAX(desc[i].elems, 1); - des_entries[i].dstArrayElement = 0; - des_entries[i].offset = ((uint8_t *)desc[i].updater) - (uint8_t *)s; - des_entries[i].stride = descriptor_props[desc[i].type].struct_size; - } + set->usage = VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; + if (has_sampler) + set->usage |= VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT; - pl->desc_template_info = av_realloc_array(pl->desc_template_info, - sizeof(*pl->desc_template_info), - pl->total_descriptor_sets + pl->qf->nb_queues); - if (!pl->desc_template_info) - return AVERROR(ENOMEM); + ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout, + s->hwctx->alloc, &set->layout); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to init descriptor set layout: %s", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } - dt = &pl->desc_template_info[pl->total_descriptor_sets]; - memset(dt, 0, sizeof(*dt)*pl->qf->nb_queues); + vk->GetDescriptorSetLayoutSizeEXT(s->hwctx->act_dev, set->layout, &set->layout_size); - for (int i = 0; i < pl->qf->nb_queues; i++) { - dt[i].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO; - dt[i].templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET; - dt[i].descriptorSetLayout = layout[i]; - dt[i].pDescriptorUpdateEntries = des_entries; - dt[i].descriptorUpdateEntryCount = num; - } - } + set->aligned_size = FFALIGN(set->layout_size, s->desc_buf_props.descriptorBufferOffsetAlignment); - pl->descriptor_sets_num++; + for (int i = 0; i < nb; i++) + vk->GetDescriptorSetLayoutBindingOffsetEXT(s->hwctx->act_dev, set->layout, + i, &set->binding_offset[i]); - pl->desc_layout_num += pl->qf->nb_queues; - pl->total_descriptor_sets += pl->qf->nb_queues; + set->read_only = read_only; + set->nb_bindings = nb; + pl->nb_descriptor_sets++; print: /* Write shader info */ - for (int i = 0; i < num; i++) { + for (int i = 0; i < nb; i++) { const struct descriptor_props *prop = &descriptor_props[desc[i].type]; - GLSLA("layout (set = %i, binding = %i", pl->descriptor_sets_num - 1, i); + GLSLA("layout (set = %i, binding = %i", pl->nb_descriptor_sets - 1, i); if (desc[i].mem_layout) GLSLA(", %s", desc[i].mem_layout); @@ -1070,185 +1527,268 @@ int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, else if (desc[i].elems > 0) GLSLA("[%i]", desc[i].elems); - GLSLA(";\n"); + GLSLA(";"); + GLSLA("\n"); } GLSLA("\n"); return 0; } -void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, - int set_id) +int ff_vk_exec_pipeline_register(FFVulkanContext *s, FFVkExecPool *pool, + FFVulkanPipeline *pl) { - FFVulkanFunctions *vk = &s->vkfn; + int err; - /* If a set has never been updated, update all queues' sets. */ - if (!pl->desc_set_initialized[set_id]) { - for (int i = 0; i < pl->qf->nb_queues; i++) { - int idx = set_id*pl->qf->nb_queues + i; - vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev, - pl->desc_set[idx], - pl->desc_template[idx], - s); - } - pl->desc_set_initialized[set_id] = 1; - return; - } + pl->desc_bind = av_calloc(pl->nb_descriptor_sets, sizeof(*pl->desc_bind)); + if (!pl->desc_bind) + return AVERROR(ENOMEM); + + pl->bound_buffer_indices = av_calloc(pl->nb_descriptor_sets, + sizeof(*pl->bound_buffer_indices)); + if (!pl->bound_buffer_indices) + return AVERROR(ENOMEM); + + for (int i = 0; i < pl->nb_descriptor_sets; i++) { + FFVulkanDescriptorSet *set = &pl->desc_set[i]; + int nb = set->read_only ? 1 : pool->pool_size; + + err = ff_vk_create_buf(s, &set->buf, set->aligned_size*nb, + NULL, NULL, set->usage, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + if (err < 0) + return err; + + err = ff_vk_map_buffer(s, &set->buf, &set->desc_mem, 0); + if (err < 0) + return err; + + pl->desc_bind[i] = (VkDescriptorBufferBindingInfoEXT) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT, + .usage = set->usage, + .address = set->buf.address, + }; - set_id = set_id*pl->qf->nb_queues + pl->qf->cur_queue; + pl->bound_buffer_indices[i] = i; + } - vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev, - pl->desc_set[set_id], - pl->desc_template[set_id], - s); + return 0; } -void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e, - VkShaderStageFlagBits stage, int offset, - size_t size, void *src) +static inline void update_set_descriptor(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanDescriptorSet *set, + int bind_idx, int array_idx, + VkDescriptorGetInfoEXT *desc_get_info, + size_t desc_size) { FFVulkanFunctions *vk = &s->vkfn; + const size_t exec_offset = set->read_only ? 0 : set->aligned_size*e->idx; + void *desc = set->desc_mem + /* Base */ + exec_offset + /* Execution context */ + set->binding_offset[bind_idx] + /* Descriptor binding */ + array_idx*desc_size; /* Array position */ - vk->CmdPushConstants(e->bufs[e->qf->cur_queue], e->bound_pl->pipeline_layout, - stage, offset, size, src); + vk->GetDescriptorEXT(s->hwctx->act_dev, desc_get_info, desc_size, desc); } -int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl) +int ff_vk_set_descriptor_sampler(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkSampler *sampler) { - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - - pl->desc_staging = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_staging)); - if (!pl->desc_staging) - return AVERROR(ENOMEM); + FFVulkanDescriptorSet *desc_set = &pl->desc_set[set]; + VkDescriptorGetInfoEXT desc_get_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, + .type = desc_set->binding[bind].descriptorType, + }; - { /* Init descriptor set pool */ - VkDescriptorPoolCreateInfo pool_create_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - .poolSizeCount = pl->pool_size_desc_num, - .pPoolSizes = pl->pool_size_desc, - .maxSets = pl->total_descriptor_sets, - }; + switch (desc_get_info.type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + desc_get_info.data.pSampler = sampler; + break; + default: + av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n", + set, bind, desc_get_info.type); + return AVERROR(EINVAL); + break; + }; - ret = vk->CreateDescriptorPool(s->hwctx->act_dev, &pool_create_info, - s->hwctx->alloc, &pl->desc_pool); - av_freep(&pl->pool_size_desc); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to init descriptor set " - "pool: %s\n", ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } + update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info, + s->desc_buf_props.samplerDescriptorSize); - { /* Allocate descriptor sets */ - VkDescriptorSetAllocateInfo alloc_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = pl->desc_pool, - .descriptorSetCount = pl->total_descriptor_sets, - .pSetLayouts = pl->desc_layout, - }; + return 0; +} - pl->desc_set = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_set)); - if (!pl->desc_set) - return AVERROR(ENOMEM); +int ff_vk_set_descriptor_image(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkImageView view, VkImageLayout layout, VkSampler sampler) +{ + FFVulkanDescriptorSet *desc_set = &pl->desc_set[set]; + VkDescriptorGetInfoEXT desc_get_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, + .type = desc_set->binding[bind].descriptorType, + }; + VkDescriptorImageInfo desc_img_info = { + .imageView = view, + .sampler = sampler, + .imageLayout = layout, + }; + size_t desc_size; - ret = vk->AllocateDescriptorSets(s->hwctx->act_dev, &alloc_info, - pl->desc_set); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } + switch (desc_get_info.type) { + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + desc_get_info.data.pSampledImage = &desc_img_info; + desc_size = s->desc_buf_props.sampledImageDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + desc_get_info.data.pStorageImage = &desc_img_info; + desc_size = s->desc_buf_props.storageImageDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + desc_get_info.data.pInputAttachmentImage = &desc_img_info; + desc_size = s->desc_buf_props.inputAttachmentDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + desc_get_info.data.pCombinedImageSampler = &desc_img_info; + desc_size = s->desc_buf_props.combinedImageSamplerDescriptorSize; + break; + default: + av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n", + set, bind, desc_get_info.type); + return AVERROR(EINVAL); + break; + }; - { /* Finally create the pipeline layout */ - VkPipelineLayoutCreateInfo spawn_pipeline_layout = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pSetLayouts = (VkDescriptorSetLayout *)pl->desc_staging, - .pushConstantRangeCount = pl->push_consts_num, - .pPushConstantRanges = pl->push_consts, - }; + update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info, desc_size); - for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) - pl->desc_staging[spawn_pipeline_layout.setLayoutCount++] = pl->desc_layout[i]; + return 0; +} - ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout, - s->hwctx->alloc, &pl->pipeline_layout); - av_freep(&pl->push_consts); - pl->push_consts_num = 0; - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } +int ff_vk_set_descriptor_buffer(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkDeviceAddress addr, VkDeviceSize len, VkFormat fmt) +{ + FFVulkanDescriptorSet *desc_set = &pl->desc_set[set]; + VkDescriptorGetInfoEXT desc_get_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, + .type = desc_set->binding[bind].descriptorType, + }; + VkDescriptorAddressInfoEXT desc_buf_info = { + .address = addr, + .range = len, + .format = fmt, + }; + size_t desc_size; - { /* Descriptor template (for tightly packed descriptors) */ - VkDescriptorUpdateTemplateCreateInfo *dt; + switch (desc_get_info.type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + desc_get_info.data.pUniformBuffer = &desc_buf_info; + desc_size = s->desc_buf_props.uniformBufferDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + desc_get_info.data.pStorageBuffer = &desc_buf_info; + desc_size = s->desc_buf_props.storageBufferDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + desc_get_info.data.pUniformTexelBuffer = &desc_buf_info; + desc_size = s->desc_buf_props.uniformTexelBufferDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + desc_get_info.data.pStorageTexelBuffer = &desc_buf_info; + desc_size = s->desc_buf_props.storageTexelBufferDescriptorSize; + break; + default: + av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n", + set, bind, desc_get_info.type); + return AVERROR(EINVAL); + break; + }; - pl->desc_template = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_template)); - if (!pl->desc_template) - return AVERROR(ENOMEM); + update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info, desc_size); - /* Create update templates for the descriptor sets */ - for (int i = 0; i < pl->total_descriptor_sets; i++) { - dt = &pl->desc_template_info[i]; - dt->pipelineLayout = pl->pipeline_layout; - ret = vk->CreateDescriptorUpdateTemplate(s->hwctx->act_dev, - dt, s->hwctx->alloc, - &pl->desc_template[i]); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to init descriptor " - "template: %s\n", ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } + return 0; +} - /* Free the duplicated memory used for the template entries */ - for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) { - dt = &pl->desc_template_info[i]; - av_free((void *)dt->pDescriptorUpdateEntries); - } +void ff_vk_update_descriptor_img_array(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, AVFrame *f, + VkImageView *views, int set, int binding, + VkImageLayout layout, VkSampler sampler) +{ + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format); - av_freep(&pl->desc_template_info); - } + for (int i = 0; i < nb_planes; i++) + ff_vk_set_descriptor_image(s, pl, e, set, binding, i, + views[i], layout, sampler); +} - return 0; +void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanPipeline *pl, + VkShaderStageFlagBits stage, + int offset, size_t size, void *src) +{ + FFVulkanFunctions *vk = &s->vkfn; + vk->CmdPushConstants(e->buf, pl->pipeline_layout, + stage, offset, size, src); } -FN_CREATING(FFVulkanContext, FFVulkanPipeline, pipeline, pipelines, pipelines_num) -FFVulkanPipeline *ff_vk_create_pipeline(FFVulkanContext *s, FFVkQueueFamilyCtx *qf) +static int init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl) { - FFVulkanPipeline *pl = create_pipeline(s); - if (pl) - pl->qf = qf; + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + VkPipelineLayoutCreateInfo pipeline_layout_info; + + VkDescriptorSetLayout *desc_layouts = av_malloc(pl->nb_descriptor_sets* + sizeof(desc_layouts)); + if (!desc_layouts) + return AVERROR(ENOMEM); + + for (int i = 0; i < pl->nb_descriptor_sets; i++) + desc_layouts[i] = pl->desc_set[i].layout; - return pl; + /* Finally create the pipeline layout */ + pipeline_layout_info = (VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pSetLayouts = desc_layouts, + .setLayoutCount = pl->nb_descriptor_sets, + .pushConstantRangeCount = pl->push_consts_num, + .pPushConstantRanges = pl->push_consts, + }; + + ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &pipeline_layout_info, + s->hwctx->alloc, &pl->pipeline_layout); + av_free(desc_layouts); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + return 0; } -int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl) +int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkSPIRVShader *shd) { - int i; + int err; VkResult ret; FFVulkanFunctions *vk = &s->vkfn; - VkComputePipelineCreateInfo pipe = { + VkComputePipelineCreateInfo pipeline_create_info; + + err = init_pipeline_layout(s, pl); + if (err < 0) + return err; + + pipeline_create_info = (VkComputePipelineCreateInfo) { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .flags = VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT, .layout = pl->pipeline_layout, + .stage = shd->shader, }; - for (i = 0; i < pl->shaders_num; i++) { - if (pl->shaders[i]->shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) { - pipe.stage = pl->shaders[i]->shader; - break; - } - } - if (i == pl->shaders_num) { - av_log(s, AV_LOG_ERROR, "Can't init compute pipeline, no shader\n"); - return AVERROR(EINVAL); - } - - ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipe, + ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, + &pipeline_create_info, s->hwctx->alloc, &pl->pipeline); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n", @@ -1257,150 +1797,68 @@ int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl) } pl->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE; + pl->wg_size[0] = shd->local_size[0]; + pl->wg_size[1] = shd->local_size[1]; + pl->wg_size[2] = shd->local_size[2]; return 0; } -void ff_vk_bind_pipeline_exec(FFVulkanContext *s, FFVkExecContext *e, +void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e, FFVulkanPipeline *pl) { FFVulkanFunctions *vk = &s->vkfn; + VkDeviceSize offsets[1024]; - vk->CmdBindPipeline(e->bufs[e->qf->cur_queue], pl->bind_point, pl->pipeline); - - for (int i = 0; i < pl->descriptor_sets_num; i++) - pl->desc_staging[i] = pl->desc_set[i*pl->qf->nb_queues + pl->qf->cur_queue]; - - vk->CmdBindDescriptorSets(e->bufs[e->qf->cur_queue], pl->bind_point, - pl->pipeline_layout, 0, - pl->descriptor_sets_num, - (VkDescriptorSet *)pl->desc_staging, - 0, NULL); - - e->bound_pl = pl; -} - -static void free_exec_ctx(FFVulkanContext *s, FFVkExecContext *e) -{ - FFVulkanFunctions *vk = &s->vkfn; - - /* Make sure all queues have finished executing */ - for (int i = 0; i < e->qf->nb_queues; i++) { - FFVkQueueCtx *q = &e->queues[i]; + /* Bind pipeline */ + vk->CmdBindPipeline(e->buf, pl->bind_point, pl->pipeline); - if (q->fence) { - vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); - vk->ResetFences(s->hwctx->act_dev, 1, &q->fence); - } - - /* Free the fence */ - if (q->fence) - vk->DestroyFence(s->hwctx->act_dev, q->fence, s->hwctx->alloc); + if (pl->nb_descriptor_sets) { + for (int i = 0; i < pl->nb_descriptor_sets; i++) + offsets[i] = pl->desc_set[i].read_only ? 0 : pl->desc_set[i].aligned_size*e->idx; - /* Free buffer dependencies */ - for (int j = 0; j < q->nb_buf_deps; j++) - av_buffer_unref(&q->buf_deps[j]); - av_free(q->buf_deps); - - /* Free frame dependencies */ - for (int j = 0; j < q->nb_frame_deps; j++) - av_frame_free(&q->frame_deps[j]); - av_free(q->frame_deps); + /* Bind descriptor buffers */ + vk->CmdBindDescriptorBuffersEXT(e->buf, pl->nb_descriptor_sets, pl->desc_bind); + /* Binding offsets */ + vk->CmdSetDescriptorBufferOffsetsEXT(e->buf, pl->bind_point, pl->pipeline_layout, + 0, pl->nb_descriptor_sets, + pl->bound_buffer_indices, offsets); } - - if (e->bufs) - vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, e->qf->nb_queues, e->bufs); - if (e->pool) - vk->DestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc); - - av_freep(&e->bufs); - av_freep(&e->queues); - av_freep(&e->sem_sig); - av_freep(&e->sem_sig_val); - av_freep(&e->sem_sig_val_dst); - av_freep(&e->sem_wait); - av_freep(&e->sem_wait_dst); - av_freep(&e->sem_wait_val); - av_free(e); } -static void free_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl) +void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl) { FFVulkanFunctions *vk = &s->vkfn; - for (int i = 0; i < pl->shaders_num; i++) { - FFVkSPIRVShader *shd = pl->shaders[i]; - av_bprint_finalize(&shd->src, NULL); - vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module, - s->hwctx->alloc); - av_free(shd); - } - - vk->DestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc); - vk->DestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout, - s->hwctx->alloc); + if (pl->pipeline) + vk->DestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc); + if (pl->pipeline_layout) + vk->DestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout, + s->hwctx->alloc); - for (int i = 0; i < pl->desc_layout_num; i++) { - if (pl->desc_template && pl->desc_template[i]) - vk->DestroyDescriptorUpdateTemplate(s->hwctx->act_dev, pl->desc_template[i], - s->hwctx->alloc); - if (pl->desc_layout && pl->desc_layout[i]) - vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, pl->desc_layout[i], + for (int i = 0; i < pl->nb_descriptor_sets; i++) { + FFVulkanDescriptorSet *set = &pl->desc_set[i]; + if (set->buf.mem) + ff_vk_unmap_buffer(s, &set->buf, 0); + ff_vk_free_buf(s, &set->buf); + if (set->layout) + vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, set->layout, s->hwctx->alloc); + av_free(set->binding); + av_free(set->binding_offset); } - /* Also frees the descriptor sets */ - if (pl->desc_pool) - vk->DestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool, - s->hwctx->alloc); - - av_freep(&pl->desc_staging); av_freep(&pl->desc_set); - av_freep(&pl->shaders); - av_freep(&pl->desc_layout); - av_freep(&pl->desc_template); - av_freep(&pl->desc_set_initialized); + av_freep(&pl->desc_bind); av_freep(&pl->push_consts); pl->push_consts_num = 0; - - /* Only freed in case of failure */ - av_freep(&pl->pool_size_desc); - if (pl->desc_template_info) { - for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) { - VkDescriptorUpdateTemplateCreateInfo *dt = &pl->desc_template_info[i]; - av_free((void *)dt->pDescriptorUpdateEntries); - } - av_freep(&pl->desc_template_info); - } - - av_free(pl); } void ff_vk_uninit(FFVulkanContext *s) { - FFVulkanFunctions *vk = &s->vkfn; - - if (s->spirv_compiler) - s->spirv_compiler->uninit(&s->spirv_compiler); - - for (int i = 0; i < s->exec_ctx_num; i++) - free_exec_ctx(s, s->exec_ctx[i]); - av_freep(&s->exec_ctx); - - for (int i = 0; i < s->samplers_num; i++) { - vk->DestroySampler(s->hwctx->act_dev, s->samplers[i]->sampler[0], - s->hwctx->alloc); - av_free(s->samplers[i]); - } - av_freep(&s->samplers); - - for (int i = 0; i < s->pipelines_num; i++) - free_pipeline(s, s->pipelines[i]); - av_freep(&s->pipelines); - - av_freep(&s->scratch); - s->scratch_size = 0; + av_freep(&s->query_props); + av_freep(&s->qf_props); + av_freep(&s->video_props); - av_buffer_unref(&s->device_ref); av_buffer_unref(&s->frames_ref); } diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 90922c6cf3de3..58da720a1cff4 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -19,6 +19,10 @@ #ifndef AVUTIL_VULKAN_H #define AVUTIL_VULKAN_H +#define VK_NO_PROTOTYPES + +#include + #include "pixdesc.h" #include "bprint.h" #include "hwcontext.h" @@ -26,11 +30,6 @@ #include "hwcontext_vulkan.h" #include "vulkan_loader.h" -#define FF_VK_DEFAULT_USAGE_FLAGS (VK_IMAGE_USAGE_SAMPLED_BIT | \ - VK_IMAGE_USAGE_STORAGE_BIT | \ - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | \ - VK_IMAGE_USAGE_TRANSFER_DST_BIT) - /* GLSL management macros */ #define INDENT(N) INDENT_##N #define INDENT_0 @@ -41,12 +40,28 @@ #define INDENT_5 INDENT_4 INDENT_1 #define INDENT_6 INDENT_5 INDENT_1 #define C(N, S) INDENT(N) #S "\n" -#define GLSLC(N, S) av_bprintf(&shd->src, C(N, S)) -#define GLSLA(...) av_bprintf(&shd->src, __VA_ARGS__) -#define GLSLF(N, S, ...) av_bprintf(&shd->src, C(N, S), __VA_ARGS__) -#define GLSLD(D) GLSLC(0, ); \ - av_bprint_append_data(&shd->src, D, strlen(D)); \ - GLSLC(0, ) + +#define GLSLC(N, S) \ + do { \ + av_bprintf(&shd->src, C(N, S)); \ + } while (0) + +#define GLSLA(...) \ + do { \ + av_bprintf(&shd->src, __VA_ARGS__); \ + } while (0) + +#define GLSLF(N, S, ...) \ + do { \ + av_bprintf(&shd->src, C(N, S), __VA_ARGS__); \ + } while (0) + +#define GLSLD(D) \ + do { \ + av_bprintf(&shd->src, "\n"); \ + av_bprint_append_data(&shd->src, D, strlen(D)); \ + av_bprintf(&shd->src, "\n"); \ + } while (0) /* Helper, pretty much every Vulkan return value needs to be checked */ #define RET(x) \ @@ -55,26 +70,16 @@ goto fail; \ } while (0) +#define DUP_SAMPLER(x) { x, x, x, x } + typedef struct FFVkSPIRVShader { const char *name; /* Name for id/debugging purposes */ AVBPrint src; int local_size[3]; /* Compute shader workgroup sizes */ VkPipelineShaderStageCreateInfo shader; + VkPipelineShaderStageRequiredSubgroupSizeCreateInfo subgroup_info; } FFVkSPIRVShader; -typedef struct FFVkSPIRVCompiler { - void *priv; - int (*compile_shader)(struct FFVkSPIRVCompiler *ctx, void *avctx, - struct FFVkSPIRVShader *shd, uint8_t **data, - size_t *size, const char *entrypoint, void **opaque); - void (*free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque); - void (*uninit)(struct FFVkSPIRVCompiler **ctx); -} FFVkSPIRVCompiler; - -typedef struct FFVkSampler { - VkSampler sampler[4]; -} FFVkSampler; - typedef struct FFVulkanDescriptorSetBinding { const char *name; VkDescriptorType type; @@ -84,150 +89,200 @@ typedef struct FFVulkanDescriptorSetBinding { uint32_t dimensions; /* Needed for e.g. sampler%iD */ uint32_t elems; /* 0 - scalar, 1 or more - vector */ VkShaderStageFlags stages; - FFVkSampler *sampler; /* Sampler to use for all elems */ - void *updater; /* Pointer to VkDescriptor*Info */ + VkSampler samplers[4]; /* Sampler to use for all elems */ } FFVulkanDescriptorSetBinding; typedef struct FFVkBuffer { VkBuffer buf; VkDeviceMemory mem; VkMemoryPropertyFlagBits flags; + size_t size; + VkDeviceAddress address; + + /* Local use only */ + VkPipelineStageFlags2 stage; + VkAccessFlags2 access; + + /* Only valid when allocated via ff_vk_get_pooled_buffer with HOST_VISIBLE */ + uint8_t *mapped_mem; } FFVkBuffer; typedef struct FFVkQueueFamilyCtx { int queue_family; int nb_queues; - int cur_queue; - int actual_queues; } FFVkQueueFamilyCtx; -typedef struct FFVulkanPipeline { - FFVkQueueFamilyCtx *qf; +typedef struct FFVulkanDescriptorSet { + VkDescriptorSetLayout layout; + FFVkBuffer buf; + uint8_t *desc_mem; + VkDeviceSize layout_size; + VkDeviceSize aligned_size; /* descriptorBufferOffsetAlignment */ + VkDeviceSize total_size; /* Once registered to an exec context */ + VkBufferUsageFlags usage; + VkDescriptorSetLayoutBinding *binding; + VkDeviceSize *binding_offset; + int nb_bindings; + + int read_only; +} FFVulkanDescriptorSet; + +typedef struct FFVulkanPipeline { VkPipelineBindPoint bind_point; /* Contexts */ VkPipelineLayout pipeline_layout; VkPipeline pipeline; - /* Shaders */ - FFVkSPIRVShader **shaders; - int shaders_num; - /* Push consts */ VkPushConstantRange *push_consts; int push_consts_num; + /* Workgroup */ + int wg_size[3]; + /* Descriptors */ - VkDescriptorSetLayout *desc_layout; - VkDescriptorPool desc_pool; - VkDescriptorSet *desc_set; -#if VK_USE_64_BIT_PTR_DEFINES == 1 - void **desc_staging; -#else - uint64_t *desc_staging; -#endif - VkDescriptorSetLayoutBinding **desc_binding; - VkDescriptorUpdateTemplate *desc_template; - int *desc_set_initialized; - int desc_layout_num; - int descriptor_sets_num; - int total_descriptor_sets; - int pool_size_desc_num; - - /* Temporary, used to store data in between initialization stages */ - VkDescriptorUpdateTemplateCreateInfo *desc_template_info; - VkDescriptorPoolSize *pool_size_desc; + FFVulkanDescriptorSet *desc_set; + VkDescriptorBufferBindingInfoEXT *desc_bind; + uint32_t *bound_buffer_indices; + int nb_descriptor_sets; } FFVulkanPipeline; -typedef struct FFVkQueueCtx { - VkFence fence; +typedef struct FFVkExecContext { + int idx; + const struct FFVkExecPool *parent; + + /* Queue for the execution context */ VkQueue queue; + int qf; + int qi; + + /* Command buffer for the context */ + VkCommandBuffer buf; + + /* Fence for the command buffer */ + VkFence fence; + + void *query_data; + int query_idx; /* Buffer dependencies */ AVBufferRef **buf_deps; int nb_buf_deps; - int buf_deps_alloc_size; + unsigned int buf_deps_alloc_size; /* Frame dependencies */ AVFrame **frame_deps; + unsigned int frame_deps_alloc_size; int nb_frame_deps; - int frame_deps_alloc_size; -} FFVkQueueCtx; - -typedef struct FFVkExecContext { - FFVkQueueFamilyCtx *qf; - - VkCommandPool pool; - VkCommandBuffer *bufs; - FFVkQueueCtx *queues; - AVBufferRef ***deps; - int *nb_deps; - int *dep_alloc_size; + VkSemaphoreSubmitInfo *sem_wait; + unsigned int sem_wait_alloc; + int sem_wait_cnt; - FFVulkanPipeline *bound_pl; + VkSemaphoreSubmitInfo *sem_sig; + unsigned int sem_sig_alloc; + int sem_sig_cnt; - VkSemaphore *sem_wait; - int sem_wait_alloc; /* Allocated sem_wait */ - int sem_wait_cnt; + uint64_t **sem_sig_val_dst; + unsigned int sem_sig_val_dst_alloc; + int sem_sig_val_dst_cnt; - uint64_t *sem_wait_val; - int sem_wait_val_alloc; + uint8_t *frame_locked; + unsigned int frame_locked_alloc_size; - VkPipelineStageFlagBits *sem_wait_dst; - int sem_wait_dst_alloc; /* Allocated sem_wait_dst */ + VkAccessFlagBits *access_dst; + unsigned int access_dst_alloc; - VkSemaphore *sem_sig; - int sem_sig_alloc; /* Allocated sem_sig */ - int sem_sig_cnt; + VkImageLayout *layout_dst; + unsigned int layout_dst_alloc; - uint64_t *sem_sig_val; - int sem_sig_val_alloc; + uint32_t *queue_family_dst; + unsigned int queue_family_dst_alloc; - uint64_t **sem_sig_val_dst; - int sem_sig_val_dst_alloc; + uint8_t *frame_update; + unsigned int frame_update_alloc_size; } FFVkExecContext; +typedef struct FFVkExecPool { + FFVkQueueFamilyCtx *qf; + FFVkExecContext *contexts; + atomic_int_least64_t idx; + + VkCommandPool cmd_buf_pool; + VkCommandBuffer *cmd_bufs; + int pool_size; + + VkQueryPool query_pool; + void *query_data; + int query_results; + int query_statuses; + int query_64bit; + int query_status_stride; + int nb_queries; + size_t qd_size; +} FFVkExecPool; + typedef struct FFVulkanContext { const AVClass *class; /* Filters and encoders use this */ FFVulkanFunctions vkfn; FFVulkanExtensions extensions; - VkPhysicalDeviceProperties props; + VkPhysicalDeviceProperties2 props; + VkPhysicalDeviceDriverProperties driver_props; VkPhysicalDeviceMemoryProperties mprops; + VkPhysicalDeviceExternalMemoryHostPropertiesEXT hprops; + VkPhysicalDeviceDescriptorBufferPropertiesEXT desc_buf_props; + VkPhysicalDeviceSubgroupSizeControlProperties subgroup_props; + VkQueueFamilyQueryResultStatusPropertiesKHR *query_props; + VkQueueFamilyVideoPropertiesKHR *video_props; + VkQueueFamilyProperties2 *qf_props; + int tot_nb_qfs; + + VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float_feats; + VkPhysicalDeviceVulkan12Features feats_12; + VkPhysicalDeviceFeatures2 feats; - AVBufferRef *device_ref; AVHWDeviceContext *device; AVVulkanDeviceContext *hwctx; + AVBufferRef *input_frames_ref; AVBufferRef *frames_ref; AVHWFramesContext *frames; AVVulkanFramesContext *hwfc; - FFVkSPIRVCompiler *spirv_compiler; + uint32_t qfs[5]; + int nb_qfs; /* Properties */ int output_width; int output_height; enum AVPixelFormat output_format; enum AVPixelFormat input_format; +} FFVulkanContext; - /* Samplers */ - FFVkSampler **samplers; - int samplers_num; +static inline int ff_vk_count_images(AVVkFrame *f) +{ + int cnt = 0; + while (f->img[cnt]) + cnt++; - /* Exec contexts */ - FFVkExecContext **exec_ctx; - int exec_ctx_num; + return cnt; +} - /* Pipelines (each can have 1 shader of each type) */ - FFVulkanPipeline **pipelines; - int pipelines_num; +static inline const void *ff_vk_find_struct(const void *chain, VkStructureType stype) +{ + const VkBaseInStructure *in = chain; + while (in) { + if (in->sType == stype) + return in; - void *scratch; /* Scratch memory used only in functions */ - unsigned int scratch_size; -} FFVulkanContext; + in = in->pNext; + } + + return NULL; +} /* Identity mapping - r = r, b = b, g = g, a = a */ extern const VkComponentMapping ff_comp_identity_map; @@ -238,188 +293,207 @@ extern const VkComponentMapping ff_comp_identity_map; const char *ff_vk_ret2str(VkResult res); /** - * Returns 1 if the image is any sort of supported RGB + * Returns 1 if pixfmt is a usable RGB format. */ int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt); /** - * Gets the glsl format string for a pixel format + * Returns the format to use for images in shaders. */ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt); /** - * Initialize a queue family with a specific number of queues. - * If nb_queues == 0, use however many queues the queue family has. + * Loads props/mprops/driver_props */ -void ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, - VkQueueFlagBits dev_family, int nb_queues); +int ff_vk_load_props(FFVulkanContext *s); /** - * Rotate through the queues in a queue family. + * Chooses a QF and loads it into a context. */ -void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf); +int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, + VkQueueFlagBits dev_family); /** - * Create a Vulkan sampler, will be auto-freed in ff_vk_filter_uninit() + * Allocates/frees an execution pool. + * ff_vk_exec_pool_init_desc() MUST be called if ff_vk_exec_descriptor_set_add() + * has been called. */ -FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, int unnorm_coords, - VkFilter filt); +int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, + FFVkExecPool *pool, int nb_contexts, + int nb_queries, VkQueryType query_type, int query_64bit, + const void *query_create_pnext); +void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool); /** - * Create an imageview. - * Guaranteed to remain alive until the queue submission has finished executing, - * and will be destroyed after that. + * Retrieve an execution pool. Threadsafe. */ -int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e, - VkImageView *v, VkImage img, VkFormat fmt, - const VkComponentMapping map); +FFVkExecContext *ff_vk_exec_get(FFVkExecPool *pool); /** - * Define a push constant for a given stage into a pipeline. - * Must be called before the pipeline layout has been initialized. + * Performs nb_queries queries and returns their results and statuses. + * Execution must have been waited on to produce valid results. */ -int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size, - VkShaderStageFlagBits stage); +VkResult ff_vk_exec_get_query(FFVulkanContext *s, FFVkExecContext *e, + void **data, int64_t *status); /** - * Inits a pipeline. Everything in it will be auto-freed when calling - * ff_vk_filter_uninit(). + * Start/submit/wait an execution. + * ff_vk_exec_start() always waits on a submission, so using ff_vk_exec_wait() + * is not necessary (unless using it is just better). */ -FFVulkanPipeline *ff_vk_create_pipeline(FFVulkanContext *s, FFVkQueueFamilyCtx *qf); +int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e); +int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e); +void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e); /** - * Inits a shader for a specific pipeline. Will be auto-freed on uninit. - */ -FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name, - VkShaderStageFlags stage); + * Execution dependency management. + * Can attach buffers to executions that will only be unref'd once the + * buffer has finished executing. + * Adding a frame dep will *lock the frame*, until either the dependencies + * are discarded, the execution is submitted, or a failure happens. + * update_frame will update the frame's properties before it is unlocked, + * only if submission was successful. + */ +int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e, + AVBufferRef **deps, int nb_deps, int ref); +int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, + VkPipelineStageFlagBits2 wait_stage, + VkPipelineStageFlagBits2 signal_stage); +void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, + VkImageMemoryBarrier2 *bar, uint32_t *nb_img_bar); +int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e, + VkSemaphore *dst, uint64_t *dst_val, + AVFrame *f); +void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e); /** - * Writes the workgroup size for a shader. + * Create an imageview and add it as a dependency to an execution. */ -void ff_vk_set_compute_shader_sizes(FFVkSPIRVShader *shd, int local_size[3]); +int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, + VkImageView views[AV_NUM_DATA_POINTERS], + AVFrame *f); -/** - * Adds a descriptor set to the shader and registers them in the pipeline. - */ -int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, - FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc, - int num, int only_print_to_shader); +void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, + AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar, + VkPipelineStageFlags src_stage, + VkPipelineStageFlags dst_stage, + VkAccessFlagBits new_access, + VkImageLayout new_layout, + uint32_t new_qf); /** - * Compiles the shader, entrypoint must be set to "main". + * Memory/buffer/image allocation helpers. */ -int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd, - const char *entrypoint); +int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, + VkMemoryPropertyFlagBits req_flags, void *alloc_extension, + VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem); +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, + void *pNext, void *alloc_pNext, + VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags); +int ff_vk_create_avbuf(FFVulkanContext *s, AVBufferRef **ref, size_t size, + void *pNext, void *alloc_pNext, + VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags); /** - * Pretty print shader, mainly used by shader compilers. + * Buffer management code. */ -void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio); +int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer **buf, uint8_t *mem[], + int nb_buffers, int invalidate); +int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer **buf, int nb_buffers, + int flush); -/** - * Initializes the pipeline layout after all shaders and descriptor sets have - * been finished. - */ -int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl); +static inline int ff_vk_map_buffer(FFVulkanContext *s, FFVkBuffer *buf, uint8_t **mem, + int invalidate) +{ + return ff_vk_map_buffers(s, (FFVkBuffer *[]){ buf }, mem, + 1, invalidate); +} -/** - * Initializes a compute pipeline. Will pick the first shader with the - * COMPUTE flag set. - */ -int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl); +static inline int ff_vk_unmap_buffer(FFVulkanContext *s, FFVkBuffer *buf, int flush) +{ + return ff_vk_unmap_buffers(s, (FFVkBuffer *[]){ buf }, 1, flush); +} -/** - * Updates a descriptor set via the updaters defined. - * Can be called immediately after pipeline creation, but must be called - * at least once before queue submission. - */ -void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, - int set_id); +void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf); -/** - * Init an execution context for command recording and queue submission. - * WIll be auto-freed on uninit. - */ -int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx, - FFVkQueueFamilyCtx *qf); +/** Initialize a pool and create AVBufferRefs containing FFVkBuffer. + * Threadsafe to use. Buffers are automatically mapped on creation if + * VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT is set in mem_props. Users should + * synchronize access themselvesd. Mainly meant for device-local buffers. */ +int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool, + AVBufferRef **buf, VkBufferUsageFlags usage, + void *create_pNext, size_t size, + VkMemoryPropertyFlagBits mem_props); /** - * Begin recording to the command buffer. Previous execution must have been - * completed, which ff_vk_submit_exec_queue() will ensure. + * Create a sampler. */ -int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e); +int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler, + int unnorm_coords, VkFilter filt); /** - * Add a command to bind the completed pipeline and its descriptor sets. - * Must be called after ff_vk_start_exec_recording() and before submission. + * Shader management. */ -void ff_vk_bind_pipeline_exec(FFVulkanContext *s, FFVkExecContext *e, - FFVulkanPipeline *pl); +int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name, + VkShaderStageFlags stage, uint32_t required_subgroup_size); +void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int x, int y, int z); +void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio); +int ff_vk_shader_create(FFVulkanContext *s, FFVkSPIRVShader *shd, + uint8_t *spirv, size_t spirv_size, const char *entrypoint); +void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd); /** - * Updates push constants. - * Must be called after binding a pipeline if any push constants were defined. + * Add/update push constants for execution. */ +int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size, + VkShaderStageFlagBits stage); void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e, - VkShaderStageFlagBits stage, int offset, - size_t size, void *src); + FFVulkanPipeline *pl, + VkShaderStageFlagBits stage, + int offset, size_t size, void *src); /** - * Gets the command buffer to use for this submission from the exe context. + * Add descriptor to a pipeline. Must be called before pipeline init. */ -VkCommandBuffer ff_vk_get_exec_buf(FFVkExecContext *e); +int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkSPIRVShader *shd, + FFVulkanDescriptorSetBinding *desc, int nb, + int read_only, int print_to_shader_only); -/** - * Adds a generic AVBufferRef as a queue depenency. - */ -int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e, - AVBufferRef **deps, int nb_deps); +/* Initialize/free a pipeline. */ +int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkSPIRVShader *shd); +void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl); /** - * Discards all queue dependencies + * Register a pipeline with an exec pool. + * Pool may be NULL if all descriptor sets are read-only. */ -void ff_vk_discard_exec_deps(FFVkExecContext *e); +int ff_vk_exec_pipeline_register(FFVulkanContext *s, FFVkExecPool *pool, + FFVulkanPipeline *pl); -/** - * Adds a frame as a queue dependency. This also manages semaphore signalling. - * Must be called before submission. - */ -int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame, - VkPipelineStageFlagBits in_wait_dst_flag); - -/** - * Submits a command buffer to the queue for execution. - * Will block until execution has finished in order to simplify resource - * management. - */ -int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e); - -/** - * Create a VkBuffer with the specified parameters. - */ -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, - VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags); - -/** - * Maps the buffer to userspace. Set invalidate to 1 if reading the contents - * is necessary. - */ -int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[], - int nb_buffers, int invalidate); - -/** - * Unmaps the buffer from userspace. Set flush to 1 to write and sync. - */ -int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers, - int flush); +/* Bind pipeline */ +void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanPipeline *pl); -/** - * Frees a buffer. - */ -void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf); +/* Update sampler/image/buffer descriptors. e may be NULL for read-only descriptors. */ +int ff_vk_set_descriptor_sampler(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkSampler *sampler); +int ff_vk_set_descriptor_image(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkImageView view, VkImageLayout layout, VkSampler sampler); +int ff_vk_set_descriptor_buffer(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkDeviceAddress addr, VkDeviceSize len, VkFormat fmt); + +void ff_vk_update_descriptor_img_array(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, AVFrame *f, + VkImageView *views, int set, int binding, + VkImageLayout layout, VkSampler sampler); /** - * Frees the main Vulkan context. + * Frees main context. */ void ff_vk_uninit(FFVulkanContext *s); diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h index d15a5d9a425c3..195428eb79214 100644 --- a/libavutil/vulkan_functions.h +++ b/libavutil/vulkan_functions.h @@ -37,6 +37,17 @@ typedef enum FFVulkanExtensions { FF_VK_EXT_EXTERNAL_WIN32_MEMORY = 1ULL << 6, /* VK_KHR_external_memory_win32 */ FF_VK_EXT_EXTERNAL_WIN32_SEM = 1ULL << 7, /* VK_KHR_external_semaphore_win32 */ #endif + FF_VK_EXT_DESCRIPTOR_BUFFER = 1ULL << 8, /* VK_EXT_descriptor_buffer */ + FF_VK_EXT_DEVICE_DRM = 1ULL << 9, /* VK_EXT_physical_device_drm */ + FF_VK_EXT_VIDEO_QUEUE = 1ULL << 10, /* VK_KHR_video_queue */ + FF_VK_EXT_VIDEO_DECODE_QUEUE = 1ULL << 11, /* VK_KHR_video_decode_queue */ + FF_VK_EXT_VIDEO_DECODE_H264 = 1ULL << 12, /* VK_EXT_video_decode_h264 */ + FF_VK_EXT_VIDEO_DECODE_H265 = 1ULL << 13, /* VK_EXT_video_decode_h265 */ + FF_VK_EXT_VIDEO_ENCODE_QUEUE = 1ULL << 14, /* VK_KHR_video_encode_queue */ + FF_VK_EXT_VIDEO_ENCODE_H264 = 1ULL << 15, /* VK_EXT_video_encode_h264 */ + FF_VK_EXT_VIDEO_ENCODE_H265 = 1ULL << 16, /* VK_EXT_video_encode_h265 */ + FF_VK_EXT_VIDEO_DECODE_AV1 = 1ULL << 17, /* VK_MESA_video_decode_av1 */ + FF_VK_EXT_ATOMIC_FLOAT = 1ULL << 18, /* VK_EXT_shader_atomic_float */ FF_VK_EXT_NO_FLAG = 1ULL << 31, } FFVulkanExtensions; @@ -58,6 +69,8 @@ typedef enum FFVulkanExtensions { MACRO(1, 0, FF_VK_EXT_NO_FLAG, CreateDevice) \ MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceFeatures2) \ MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceProperties) \ + MACRO(1, 0, FF_VK_EXT_VIDEO_QUEUE, GetPhysicalDeviceVideoCapabilitiesKHR) \ + MACRO(1, 0, FF_VK_EXT_VIDEO_QUEUE, GetPhysicalDeviceVideoFormatPropertiesKHR) \ MACRO(1, 0, FF_VK_EXT_NO_FLAG, DeviceWaitIdle) \ MACRO(1, 0, FF_VK_EXT_NO_FLAG, DestroyDevice) \ \ @@ -69,6 +82,7 @@ typedef enum FFVulkanExtensions { MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceFormatProperties2) \ MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceImageFormatProperties2) \ MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceQueueFamilyProperties) \ + MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceQueueFamilyProperties2) \ \ /* Command pool */ \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateCommandPool) \ @@ -84,6 +98,7 @@ typedef enum FFVulkanExtensions { /* Queue */ \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetDeviceQueue) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, QueueSubmit) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, QueueSubmit2) \ \ /* Fences */ \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateFence) \ @@ -120,6 +135,8 @@ typedef enum FFVulkanExtensions { MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetBufferMemoryRequirements2) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateBuffer) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, BindBufferMemory) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetBufferDeviceAddress) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdFillBuffer) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyBuffer) \ \ /* Image */ \ @@ -141,10 +158,46 @@ typedef enum FFVulkanExtensions { MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyDescriptorPool) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyDescriptorSetLayout) \ \ + /* Descriptor buffers */ \ + MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, GetDescriptorSetLayoutSizeEXT) \ + MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, GetDescriptorSetLayoutBindingOffsetEXT) \ + MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, GetDescriptorEXT) \ + MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, CmdBindDescriptorBuffersEXT) \ + MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, CmdSetDescriptorBufferOffsetsEXT) \ + \ /* DescriptorUpdateTemplate */ \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, UpdateDescriptorSetWithTemplate) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateDescriptorUpdateTemplate) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyDescriptorUpdateTemplate) \ + \ + /* Queries */ \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateQueryPool) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetQueryPoolResults) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, ResetQueryPool) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdBeginQuery) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdEndQuery) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdResetQueryPool) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyQueryPool) \ + \ + /* sync2 */ \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdPipelineBarrier2) \ + \ + /* Video queue */ \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CreateVideoSessionKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CreateVideoSessionParametersKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, GetVideoSessionMemoryRequirementsKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, BindVideoSessionMemoryKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CmdBeginVideoCodingKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CmdControlVideoCodingKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CmdEndVideoCodingKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, DestroyVideoSessionParametersKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, DestroyVideoSessionKHR) \ + \ + /* Video decoding */ \ + MACRO(1, 1, FF_VK_EXT_VIDEO_DECODE_QUEUE, CmdDecodeVideoKHR) \ + \ + /* Video encoding */ \ + MACRO(1, 1, FF_VK_EXT_VIDEO_ENCODE_QUEUE, CmdEncodeVideoKHR) \ \ /* Pipeline */ \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreatePipelineLayout) \ @@ -155,6 +208,8 @@ typedef enum FFVulkanExtensions { MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyPipeline) \ \ /* Sampler */ \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateSamplerYcbcrConversion) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroySamplerYcbcrConversion) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateSampler) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroySampler) \ \ diff --git a/libavutil/vulkan_loader.h b/libavutil/vulkan_loader.h index 3f1ee6aa4672a..717e684fb9562 100644 --- a/libavutil/vulkan_loader.h +++ b/libavutil/vulkan_loader.h @@ -44,10 +44,25 @@ static inline uint64_t ff_vk_extensions_to_mask(const char * const *extensions, { VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_SEM }, { VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_HOST_MEMORY }, { VK_EXT_DEBUG_UTILS_EXTENSION_NAME, FF_VK_EXT_DEBUG_UTILS }, + { VK_EXT_PHYSICAL_DEVICE_DRM_EXTENSION_NAME, FF_VK_EXT_DEVICE_DRM }, + { VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME, FF_VK_EXT_ATOMIC_FLOAT }, #ifdef _WIN32 { VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY }, { VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM }, #endif + { VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER, }, + { VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_QUEUE }, + { VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_QUEUE }, + { VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_QUEUE }, +#if CONFIG_VULKAN_ENCODE + { VK_EXT_VIDEO_ENCODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H264 }, +#endif + { VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H264 }, +#if CONFIG_VULKAN_ENCODE + { VK_EXT_VIDEO_ENCODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H265 }, +#endif + { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 }, + { "VK_MESA_video_decode_av1", FF_VK_EXT_VIDEO_DECODE_AV1 }, }; FFVulkanExtensions mask = 0x0; diff --git a/libswscale/input.c b/libswscale/input.c index d5676062a298f..41795c636ee73 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -1452,9 +1452,13 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) c->chrToYV12 = p010BEToUV_c; break; case AV_PIX_FMT_P012LE: + case AV_PIX_FMT_P212LE: + case AV_PIX_FMT_P412LE: c->chrToYV12 = p012LEToUV_c; break; case AV_PIX_FMT_P012BE: + case AV_PIX_FMT_P212BE: + case AV_PIX_FMT_P412BE: c->chrToYV12 = p012BEToUV_c; break; case AV_PIX_FMT_P016LE: @@ -1944,9 +1948,13 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) c->lumToYV12 = p010BEToY_c; break; case AV_PIX_FMT_P012LE: + case AV_PIX_FMT_P212LE: + case AV_PIX_FMT_P412LE: c->lumToYV12 = p012LEToY_c; break; case AV_PIX_FMT_P012BE: + case AV_PIX_FMT_P212BE: + case AV_PIX_FMT_P412BE: c->lumToYV12 = p012BEToY_c; break; case AV_PIX_FMT_GRAYF32LE: diff --git a/libswscale/utils.c b/libswscale/utils.c index 925c536bf17c3..a3a7a407509c7 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -248,8 +248,12 @@ static const FormatEntry format_entries[] = { [AV_PIX_FMT_X2BGR10LE] = { 1, 1 }, [AV_PIX_FMT_P210BE] = { 1, 1 }, [AV_PIX_FMT_P210LE] = { 1, 1 }, + [AV_PIX_FMT_P212BE] = { 1, 1 }, + [AV_PIX_FMT_P212LE] = { 1, 1 }, [AV_PIX_FMT_P410BE] = { 1, 1 }, [AV_PIX_FMT_P410LE] = { 1, 1 }, + [AV_PIX_FMT_P412BE] = { 1, 1 }, + [AV_PIX_FMT_P412LE] = { 1, 1 }, [AV_PIX_FMT_P216BE] = { 1, 1 }, [AV_PIX_FMT_P216LE] = { 1, 1 }, [AV_PIX_FMT_P416BE] = { 1, 1 }, diff --git a/tests/ref/fate/imgutils b/tests/ref/fate/imgutils index e79ec7e4b3dc2..02a755f2b7632 100644 --- a/tests/ref/fate/imgutils +++ b/tests/ref/fate/imgutils @@ -262,3 +262,7 @@ rgbf32be planes: 1, linesizes: 768 0 0 0, plane_sizes: 36864 0 rgbf32le planes: 1, linesizes: 768 0 0 0, plane_sizes: 36864 0 0 0, plane_offsets: 0 0 0, total_size: 36864 rgbaf32be planes: 1, linesizes: 1024 0 0 0, plane_sizes: 49152 0 0 0, plane_offsets: 0 0 0, total_size: 49152 rgbaf32le planes: 1, linesizes: 1024 0 0 0, plane_sizes: 49152 0 0 0, plane_offsets: 0 0 0, total_size: 49152 +p212be planes: 2, linesizes: 128 128 0 0, plane_sizes: 6144 6144 0 0, plane_offsets: 6144 0 0, total_size: 12288 +p212le planes: 2, linesizes: 128 128 0 0, plane_sizes: 6144 6144 0 0, plane_offsets: 6144 0 0, total_size: 12288 +p412be planes: 2, linesizes: 128 256 0 0, plane_sizes: 6144 12288 0 0, plane_offsets: 6144 0 0, total_size: 18432 +p412le planes: 2, linesizes: 128 256 0 0, plane_sizes: 6144 12288 0 0, plane_offsets: 6144 0 0, total_size: 18432 diff --git a/tests/ref/fate/sws-pixdesc-query b/tests/ref/fate/sws-pixdesc-query index 14156a383cc39..fd7f2aefc0f18 100644 --- a/tests/ref/fate/sws-pixdesc-query +++ b/tests/ref/fate/sws-pixdesc-query @@ -67,8 +67,12 @@ isNBPS: p012le p210be p210le + p212be + p212le p410be p410le + p412be + p412le x2bgr10be x2bgr10le x2rgb10be @@ -160,8 +164,10 @@ isBE: p012be p016be p210be + p212be p216be p410be + p412be p416be rgb444be rgb48be @@ -226,10 +232,14 @@ isYUV: p016le p210be p210le + p212be + p212le p216be p216le p410be p410le + p412be + p412le p416be p416le uyvy422 @@ -338,10 +348,14 @@ isPlanarYUV: p016le p210be p210le + p212be + p212le p216be p216le p410be p410le + p412be + p412le p416be p416le yuv410p @@ -431,10 +445,14 @@ isSemiPlanarYUV: p016le p210be p210le + p212be + p212le p216be p216le p410be p410le + p412be + p412le p416be p416le @@ -853,10 +871,14 @@ Planar: p016le p210be p210le + p212be + p212le p216be p216le p410be p410le + p412be + p412le p416be p416le yuv410p @@ -1029,8 +1051,12 @@ DataInHighBits: p012le p210be p210le + p212be + p212le p410be p410le + p412be + p412le xv36be xv36le xyz12be diff --git a/tools/cl2c b/tools/source2c similarity index 78% rename from tools/cl2c rename to tools/source2c index e3f92bab1c6e9..6e5f123144aa9 100755 --- a/tools/cl2c +++ b/tools/source2c @@ -1,7 +1,6 @@ #!/bin/sh -# Convert an OpenCL source file into a C source file containing the -# OpenCL source as a C string. Also adds a #line directive so that -# compiler messages are useful. +# Convert a source file into a C source file containing the +# source code as a C string. # This file is part of FFmpeg. # @@ -22,12 +21,11 @@ input="$1" output="$2" -name=$(basename "$input" | sed 's/.cl$//') +name=$(basename "$input" | sed 's/\./_/') cat >$output <