/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ /*! * \file adjoint.cc * \brief Perform reverse-mode autodiff. * Suppose we have f(x) = g(h1(x), h2(x), ..., hn(x)), * df/dx = \sum_i df/dhi * dhi/dx * We call df/dx as adjoint(x), df/dhi as adjoint(hi), dhi/dx is the Jacobian * The idea is to first construct the reverse-dependency {input->outputs} between tensors, * start from one input, * (1) collect adjoints from all its dependencies (outputs), * (2) multiply the Jacobian (PartialAdjoint), * (3) and sum them together to get the adjoint of the input itself. * The three steps are computed recursively. */ #include #include #include #include #include #include #include #include "ad_utils.h" namespace tvm { namespace te { Tensor Identity(const Tensor& output) { Array shape = output->shape; for (auto e : output->shape) { // add extra dimension for Jacobian shape.push_back(e); } auto func = [&output](const Array& input_indices) { PrimExpr res = const_true(); for (size_t i = 0; i < output->shape.size(); ++i) { res = res && (PrimExpr(input_indices[i]) == PrimExpr(input_indices[output->shape.size() + i])); } return Cast(output->dtype, res); }; return te::compute(shape, func, "identity"); } Tensor VectorJacobianProduct(const Tensor& output, const Tensor& input, const Tensor& head) { Tensor jac = Jacobian(output, input); Tensor result = topi::tensordot(head, jac, /*axes=*/output->shape.size(), output->op->name + "." + input->op->name + ".grad"); result = InlineTensorAccess(result, {jac}, false); result = RemoveJacobianAndLiftNonzeroCond(result); // inline tail call result = InlineTailTensorAccess(result); return result; } Array Gradient(const Tensor& output, const Array& inputs, const Tensor& head_or_null) { // Diagonal identity tensor Tensor head = head_or_null.get() ? head_or_null : Identity(output); // This Map{input -> outputs} maps a tensor to the list of tensors // immediately depending on it (using it in their bodies) std::unordered_map> reverse_dependencies; std::vector stack({output}); while (!stack.empty()) { Tensor tensor = stack.back(); stack.pop_back(); for (const Tensor& input : tensor->op->InputTensors()) { if (!reverse_dependencies.count(input)) { stack.push_back(input); } reverse_dependencies[input].push_back(tensor); } } // This map maps tensors to the corresponding adjoints (dLoss/dTensor) std::unordered_map adjoints; // head is the adjoint of output by definition adjoints[output] = head; // This is a recursive function that does all the work. It computes the adjoint for a given // tensor, adds it to the map, and returns it std::function compute_adjoint; compute_adjoint = [&compute_adjoint, &adjoints, &reverse_dependencies, &head, &output](const Tensor& tensor) { if (!adjoints.count(tensor)) { // Here the adjoint hasn't been computed yet Tensor res_adjoint; std::vector direct_consumers = reverse_dependencies[tensor]; if (direct_consumers.empty()) { // No reverse dependencies means that the output does not depend on this tensor, // return a zero tensor of the appropriate shape // (i.e., output shape + tensor shape, aka shape of Jacobian) Array result_shape(head->shape.begin(), head->shape.end() - output->shape.size()); for (auto e : tensor->shape) { result_shape.push_back(e); } res_adjoint = topi::full(result_shape, output->dtype, make_zero(output->dtype)); } else { // The new adjoint is computed as a sum of the reverse dependencies' adjoints multiplied // by the corresponding "local" jacobians (dDep/dTensor). The computation of the jacobian // and the multiplication is done in the function VectorJacobianProduct for (const Tensor& direct_consumer : direct_consumers) { // part = (adjoint of direct_consumer) * Jacobian(direct_consumer, tensor) Tensor part = VectorJacobianProduct(direct_consumer, tensor, compute_adjoint(direct_consumer)); res_adjoint = res_adjoint.get() ? topi::add(res_adjoint, part) : part; } } adjoints[tensor] = res_adjoint; return res_adjoint; } else { return adjoints[tensor]; } }; // Adjoints corresponding to inputs Array result; // Compute an adjoint for each input for (const Tensor& input : inputs) { result.push_back(compute_adjoint(input)); } return result; } TVM_REGISTER_GLOBAL("te.Gradient").set_body([](TVMArgs args, TVMRetValue* ret) { LOG(WARNING) << "te.Gradient is an experimental feature."; if (args.size() == 2) { *ret = Gradient(args[0], args[1]); } else if (args.size() == 3) { *ret = Gradient(args[0], args[1], args[2]); } }); } // namespace te } // namespace tvm