/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ /*! * \file graph_executor.cc */ #include "graph_executor.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "../file_utils.h" namespace tvm { namespace runtime { namespace details { inline size_t GetDataAlignment(const DLTensor& arr) { size_t align = (arr.dtype.bits / 8) * arr.dtype.lanes; if (align < kAllocAlignment) return kAllocAlignment; return align; } } // namespace details /*! * \brief Run all the operations one by one. */ void GraphExecutor::Run() { // setup the array and requirements. for (size_t i = 0; i < op_execs_.size(); ++i) { if (op_execs_[i]) op_execs_[i](); } } /*! * \brief Initialize the graph executor with graph and device. * \param graph_json The execution graph. * \param module The module containing the compiled functions for the host * processor. * \param devs The devices of the host and devices where graph nodes will be * executed on. * \param lookup_linked_param_func Linked parameter lookup function. Default is nullptr. */ void GraphExecutor::Init(const std::string& graph_json, tvm::runtime::Module module, const std::vector& devs, const PackedFunc lookup_linked_param_func) { std::istringstream is(graph_json); dmlc::JSONReader reader(&is); this->Load(&reader); module_ = module; devices_ = devs; lookup_linked_param_ = lookup_linked_param_func; if (lookup_linked_param_ == nullptr) { lookup_linked_param_ = PackedFunc( [this](TVMArgs args, TVMRetValue* rv) { this->DefaultLookupLinkedParam(args, rv); }); } this->SetupStorage(); this->SetupOpExecs(); for (size_t i = 0; i < input_nodes_.size(); i++) { const uint32_t nid = input_nodes_[i]; std::string& name = nodes_[nid].name; input_map_[name] = i; } for (size_t i = 0; i < outputs_.size(); i++) { const uint32_t nid = outputs_[i].node_id; std::string& name = nodes_[nid].name; output_map_[name] = i; } } /*! * \brief Get the input index given the name of input. * \param name The name of the input. * \return The index of input. */ int GraphExecutor::GetInputIndex(const std::string& name) { auto it = input_map_.find(name); if (it != input_map_.end()) { return it->second; } return -1; } /*! * \brief Get the output index given the name of output. * \param name The name of the output. * \return The index of output. */ int GraphExecutor::GetOutputIndex(const std::string& name) { auto it = output_map_.find(name); if (it != output_map_.end()) { return it->second; } return -1; } /*! * \brief set index-th input to the graph. * \param index The input index. * \param data_in The input data. */ void GraphExecutor::SetInput(int index, DLTensor* data_in) { ICHECK_LT(static_cast(index), input_nodes_.size()); uint32_t eid = this->entry_id(input_nodes_[index], 0); data_entry_[eid].CopyFrom(data_in); } /*! * \brief Get the name of the index-th input. * \param index The input index. * * \return The name of the index-th input. */ std::string GraphExecutor::GetInputName(int index) const { CHECK_LT(static_cast(index), input_nodes_.size()) << "The index is out of range."; return nodes_[input_nodes_[index]].name; } /*! * \brief Get the type of the index-th input. * \param index The input index. * * \return The type of the index-th input. */ std::string GraphExecutor::GetInputType(int index) const { CHECK_LT(static_cast(index), input_nodes_.size()) << "The index is out of range."; uint32_t eid = this->entry_id(input_nodes_[index], 0); return attrs_.dltype[eid]; } /*! * \brief Get the names of weight inputs. * * \return The names of the weight inputs. */ std::vector GraphExecutor::GetWeightNames() const { return weight_names_; } /*! * \brief Check the legality of external DLTensor*. * \param external The external DLTensor*. * \param eid The data_enrty_ index. */ void GraphExecutor::CheckExternalDLTensor(const DLTensor* external, uint32_t eid) const { const DLTensor* internal = data_entry_[eid].operator->(); ICHECK_EQ(data_alignment_[eid], details::GetDataAlignment(*external)); ICHECK_EQ(reinterpret_cast(external->data) % kAllocAlignment, 0); ICHECK_EQ(internal->ndim, static_cast(external->ndim)); ICHECK_EQ(internal->device.device_type, external->device.device_type); ICHECK_EQ(internal->device.device_id, external->device.device_id); for (auto i = 0; i < external->ndim; ++i) { ICHECK_EQ(internal->shape[i], external->shape[i]); } } /*! * \brief set index-th input to the graph without copying the data. * \param index The input index. * \param data_ref The input data that is referred. */ void GraphExecutor::SetInputZeroCopy(int index, DLTensor* data_ref) { ICHECK_LT(static_cast(index), input_nodes_.size()); uint32_t eid = this->entry_id(input_nodes_[index], 0); // check the consistency of input CheckExternalDLTensor(data_ref, eid); // Update the data pointer for each argument of each op for (DLTensor* t : input_dltensors_[eid]) { t->data = data_ref->data; } } /*! * \brief set index-th output to the graph without copying the data. * \param index The output index. * \param data_ref The output data that is referred. */ void GraphExecutor::SetOutputZeroCopy(int index, DLTensor* data_ref) { ICHECK_LT(static_cast(index), outputs_.size()); ICHECK_LT(static_cast(index), output_dltensors_.size()); const NodeEntry& output_node = outputs_[index]; uint32_t output_node_eid = this->entry_id(output_node); // check the consistency of output CheckExternalDLTensor(data_ref, output_node_eid); // Update the data pointer for output op for (DLTensor* t : output_dltensors_[output_node_eid]) { t->data = data_ref->data; } // Update the input of the op connected to the output for (DLTensor* t : both_output_opinput_dltensors_[output_node_eid]) { t->data = data_ref->data; } } /*! * \brief Get the number of outputs * * \return The number of outputs from graph. */ int GraphExecutor::NumOutputs() const { return outputs_.size(); } /*! * \brief Get the number of inputs * * \return The number of inputs to the graph. */ int GraphExecutor::NumInputs() const { return input_nodes_.size(); } /*! * \brief Get the type of the index-th output. * \param index The output index. * * \return The type of the index-th output. */ std::string GraphExecutor::GetOutputType(int index) const { CHECK_LT(static_cast(index), outputs_.size()) << "The index is out of range."; uint32_t eid = this->entry_id(outputs_[index]); return attrs_.dltype[eid]; } /*! * \brief Return NDArray for given input index. * \param index The input index. * * \return NDArray corresponding to given input node index. */ NDArray GraphExecutor::GetInput(int index) const { ICHECK_LT(static_cast(index), input_nodes_.size()); uint32_t eid = this->entry_id(input_nodes_[index], 0); return data_entry_[eid]; } /*! * \brief Return NDArray for given output index. * \param index The output index. * * \return NDArray corresponding to given output node index. */ NDArray GraphExecutor::GetOutput(int index) const { ICHECK_LT(static_cast(index), outputs_.size()); uint32_t eid = this->entry_id(outputs_[index]); return data_entry_[eid]; } /*! * \brief Copy index-th output to data_out. * \param index The output index. * \param data_out the output data. */ void GraphExecutor::CopyOutputTo(int index, DLTensor* data_out) { ICHECK_LT(static_cast(index), outputs_.size()); uint32_t eid = this->entry_id(outputs_[index]); // Check the shapes to avoid receiving in different dimension but same size. const NDArray& data = data_entry_[eid]; ICHECK_EQ(data->ndim, data_out->ndim); for (int32_t j = 0; j < data->ndim; ++j) { ICHECK_EQ(data->shape[j], data_out->shape[j]); } data_entry_[eid].CopyTo(data_out); } /*! * \brief Load parameters from parameter blob. * \param param_blob A binary blob of parameter. */ void GraphExecutor::LoadParams(const std::string& param_blob) { dmlc::MemoryStringStream strm(const_cast(¶m_blob)); this->LoadParams(&strm); } void GraphExecutor::LoadParams(dmlc::Stream* strm) { weight_names_.clear(); Map params = ::tvm::runtime::LoadParams(strm); for (auto& p : params) { int in_idx = GetInputIndex(p.first); if (in_idx < 0) continue; uint32_t eid = this->entry_id(input_nodes_[in_idx], 0); data_entry_[eid].CopyFrom(p.second); // neo-ai-tvm: Store weight names. weight_names_.push_back(p.first); } } void GraphExecutor::ShareParams(const GraphExecutor& other, dmlc::Stream* strm) { uint64_t header, reserved; ICHECK(strm->Read(&header)) << "Invalid parameters file format"; ICHECK(header == kTVMNDArrayListMagic) << "Invalid parameters file format"; ICHECK(strm->Read(&reserved)) << "Invalid parameters file format"; std::vector names; ICHECK(strm->Read(&names)) << "Invalid parameters file format"; uint64_t sz; strm->Read(&sz); size_t size = static_cast(sz); ICHECK(size == names.size()) << "Invalid parameters file format"; for (size_t i = 0; i < size; ++i) { int in_idx = GetInputIndex(names[i]); if (in_idx < 0) continue; uint32_t eid = this->entry_id(input_nodes_[in_idx], 0); ICHECK_LT(eid, data_entry_.size()); ICHECK_EQ(data_entry_[eid].use_count(), 1); data_entry_[eid] = other.GetInput(GetInputIndex(names[i])); ICHECK_GT(data_entry_[eid].use_count(), 1); const DLTensor* tmp = data_entry_[eid].operator->(); data_alignment_[eid] = details::GetDataAlignment(*tmp); } this->SetupOpExecs(); } void GraphExecutor::LinkedNDArrayDeleter(Object* container) { // container is the NDArray::Container which needs to get deleted. // The data member points to global const memory, so it does not need deleting. delete static_cast(container); } void GraphExecutor::DefaultLookupLinkedParam(TVMArgs args, TVMRetValue* rv) { Module mod = args[0]; int64_t storage_id = args[1]; DLTensor* template_tensor = args[2]; Device dev = args[3]; // Get pre-linked parameter lookup function, if it was generated. When pf == nullptr, no linked // params are present. if (!module_lookup_linked_param_valid_) { module_lookup_linked_param_ = mod.GetFunction(::tvm::runtime::symbol::tvm_lookup_linked_param, true); } if (module_lookup_linked_param_ == nullptr) { *rv = nullptr; return; } TVMRetValue opaque_handle = module_lookup_linked_param_(storage_id); if (opaque_handle.type_code() == kTVMNullptr) { *rv = nullptr; return; } std::vector shape_vec{template_tensor->shape, template_tensor->shape + template_tensor->ndim}; auto* container = new NDArray::Container(static_cast(opaque_handle), shape_vec, template_tensor->dtype, dev); container->SetDeleter(GraphExecutor::LinkedNDArrayDeleter); *rv = NDArray(GetObjectPtr