/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ #ifndef TVM_RUNTIME_VULKAN_VULKAN_DEVICE_H_ #define TVM_RUNTIME_VULKAN_VULKAN_DEVICE_H_ #include #include #include #include #include #include #include #include #include "../thread_map.h" #include "vulkan/vulkan_core.h" #include "vulkan_buffer.h" #include "vulkan_stream.h" namespace tvm { namespace runtime { namespace vulkan { class VulkanInstance; class VulkanDevice; struct VulkanDescriptorTemplateKHRFunctions { explicit VulkanDescriptorTemplateKHRFunctions(VkDevice device); PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR{nullptr}; PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR{nullptr}; PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR{nullptr}; PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR{nullptr}; }; struct VulkanGetBufferMemoryRequirements2Functions { explicit VulkanGetBufferMemoryRequirements2Functions(VkDevice device); PFN_vkGetBufferMemoryRequirements2KHR vkGetBufferMemoryRequirements2KHR{nullptr}; }; /*! * \brief Stores the capabilities/limits queried from the physical device. * * The member variables here have a 1-1 mapping to Target parameters, * if target->kind->device_type==kDLVulkan. A separate struct is used * to maintain the boundary between the Vulkan runtime in * libtvm_runtime.so, and the Target object in libtvm.so. */ struct VulkanDeviceProperties { VulkanDeviceProperties() {} VulkanDeviceProperties(const VulkanInstance& instance, const VulkanDevice& device); bool supports_float16{false}; bool supports_float32{true}; bool supports_float64{false}; bool supports_int8{false}; bool supports_int16{false}; bool supports_int32{true}; bool supports_int64{false}; bool supports_8bit_buffer{false}; bool supports_16bit_buffer{false}; bool supports_storage_buffer_storage_class{false}; bool supports_push_descriptor{false}; bool supports_dedicated_allocation{false}; uint32_t supported_subgroup_operations{0}; uint32_t max_num_threads{1}; uint32_t thread_warp_size{1}; uint32_t max_block_size_x{1}; uint32_t max_block_size_y{1}; uint32_t max_block_size_z{1}; uint32_t max_push_constants_size{128}; uint32_t max_uniform_buffer_range{16384}; uint32_t max_storage_buffer_range{1 << 27}; uint32_t max_per_stage_descriptor_storage_buffer{4}; uint32_t max_shared_memory_per_block{16384}; std::string device_type{"unknown_device_type"}; std::string device_name{"unknown_device_name"}; std::string driver_name{"unknown_driver_name"}; uint32_t driver_version{0}; uint32_t vulkan_api_version{VK_API_VERSION_1_0}; uint32_t max_spirv_version{0x10000}; }; /*! \brief Handle to the Vulkan API's VkDevice * * Handles all setup and teardown of the class. The owner of the * VulkanDevice object is responsible for ensuring that it remains * alive as long as any object that accesses that device is used. */ class VulkanDevice { public: VulkanDevice(const VulkanInstance& instance, VkPhysicalDevice phy_dev); ~VulkanDevice(); // Allow move constructor/assignment VulkanDevice(VulkanDevice&&); VulkanDevice& operator=(VulkanDevice&&); // Disable copy constructor/assignment VulkanDevice(const VulkanDevice&) = delete; VulkanDevice& operator=(const VulkanDevice&) = delete; /*! \brief Expose the internal VkDevice * * Allows the managed class to be passed to Vulkan APIs as if it * were the VkDevice handler itself. */ operator VkDevice() const { return device_; } /*! \brief Expose the internal VkPhysicalDevice * * Allows the managed class to be passed to Vulkan APIs as if it * were the VkPhysicalDevice handler itself. */ operator VkPhysicalDevice() const { return physical_device_; } /*! \brief Returns whether this device supports Vulkan compute operations. * * If the device does not support Vulkan compute operations, it * should not be used any further. */ bool SupportsCompute() const; /*! \brief Calls vkQueueSubmit to run work on the GPU * * Currently only supports submitting a single VkSubmitInfo at a * time. Handles mutexing internally, safe to call from multiple * CPU threads. * * \param submit_info The job submission information to be passed to * vkQueueSubmit. * * \param fence Optional fence to be passed to vkQueueSubmit, * signals once the command buffers submitted have completed. */ void QueueSubmit(VkSubmitInfo submit_info, VkFence fence) const; /*! \brief Checks if the device has an extension enabled * * Returns true if the device was initialized with the extension * given. * * \param query The name of the extension to check. */ bool HasExtension(const char* query) const; //! \brief Return the VulkanStream for the current CPU thread VulkanStream& ThreadLocalStream(); //! \brief Return the VulkanStream for the current CPU thread const VulkanStream& ThreadLocalStream() const; /*! \brief Return the staging buffer for the current CPU thread * * This function may re-allocate the staging buffer depending on the * size of the previously allocated buffer. * * \param min_size The size in bytes of the staging buffer to be * returned. The buffer may be larger than requested, depending on * previous use. */ VulkanStagingBuffer& ThreadLocalStagingBuffer(size_t min_size); /*! \brief Allocate the uniform buffer for the current CPU thread * * \param min_size The minimum size in bytes of the uniformn buffer * to be allocated. If a larger uniform buffer has already been * allocated, no allocation is performed. */ void AllocateThreadLocalUniformBuffer(size_t min_size); /*! \brief Return the uniform buffer for the current CPU thread * * Assumes that AllocateThreadLocalUniformBuffer has previously been * called, with a min_size greater than or equal to the min_size of * the current call. If this is not the case, will throw an * exception. * * \param min_size The minimum size in bytes of the uniform buffer to be * returned. */ VulkanUniformBuffer& ThreadLocalUniformBuffer(size_t min_size); // Cached device properties, queried through Vulkan API. VulkanDeviceProperties device_properties{}; // Memory type index for staging. uint32_t staging_mtype_index{0}; // whether staging is coherent bool coherent_staging{false}; std::unique_ptr descriptor_template_khr_functions{nullptr}; std::unique_ptr get_buffer_memory_requirements_2_functions{nullptr}; // Memory type index for compute uint32_t compute_mtype_index{0}; // queue family_index; uint32_t queue_family_index{uint32_t(-1)}; bool UseImmediate() const { return descriptor_template_khr_functions != nullptr; } private: /*! \brief Helper function for move assignment/construction * * Named "do_swap" instead of "swap" because otherwise cpplint.py * thinks that it needs the header include. */ void do_swap(VulkanDevice&& other); /*! \brief Returns a queue family capable of running Vulkan compute * operations */ uint32_t SelectComputeQueueFamily() const; /*! \brief Returns the extensions to be enabled. * * All char* in the returned vector point to static memory * allocations, and do not require cleanup. */ std::vector SelectEnabledExtensions() const; /*! \brief Initialize the VkDevice * * Called during VulkanDevice construction. Assumes that * queue_family_index, device_properties, and enabled_extensions * have been set. */ void CreateVkDevice(const VulkanInstance& instance); //! \brief Handle to the Vulkan API physical device VkPhysicalDevice physical_device_{nullptr}; /*! \brief Extensions enabled for this device * * Based on supported extensions queried from physical_device_ prior * to creating device_. Contains only statically allocated string * literals, no cleanup required. */ std::vector enabled_extensions; //! \brief Handle to the Vulkan API logical device VkDevice device_{nullptr}; //! \brief Mutex to protect access to queue mutable std::mutex queue_mutex; /*! \brief Handle to Vulkan API VkQueue. * * Work can be executed by submitted to this queue using * VulkanDevice::QueueSubmit. */ VkQueue queue{nullptr}; /*! \brief The VulkanStream for each CPU thread. * * To mimic the semantics of cudaSetDevice and cuLaunchKernel, each * CPU thread must have a separate stream of execution. The * ThreadMap is declared mutable so that the streams can be lazily * generated. */ mutable ThreadMap stream_per_thread; //! \brief The VulkanStagingBuffer for each CPU thread. ThreadMap staging_buffer_per_thread; //! \brief The VulkanUniformBuffer for each CPU thread. ThreadMap uniform_buffer_per_thread; }; uint32_t FindMemoryType(const VulkanDevice& device, VkBufferCreateInfo info, VkMemoryPropertyFlags req_prop); VkBufferCreateInfo MakeBufferCreateInfo(size_t nbytes, VkBufferUsageFlags usage); } // namespace vulkan } // namespace runtime } // namespace tvm #endif // TVM_RUNTIME_VULKAN_VULKAN_DEVICE_H_