# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import numpy as np import pytest import tvm from tvm import te from tvm import relay from tvm.relay.testing import check_grad, run_infer_type, run_opt_pass, _np_randn_from_type from tvm.relay.transform import gradient import tvm.testing @tvm.testing.uses_gpu def test_clip(): for dtype in ("float32", "float64"): ref = lambda x: np.where( x > 10.0, np.zeros_like(x), np.where(x < 1.0, np.zeros_like(x), np.ones_like(x)) ) x = relay.var("x", relay.TensorType((10, 4), dtype)) y = tvm.relay.clip(x, 1.0, 10.0) data = np.random.rand(10, 4).astype(dtype) * 11.0 ref_grad = ref(data) fwd_func = relay.Function([x], y) fwd_func = run_infer_type(fwd_func) bwd_func = run_infer_type(gradient(fwd_func)) for target, dev in tvm.testing.enabled_targets(): op_res, (op_grad,) = relay.create_executor(device=dev, target=target).evaluate( bwd_func )(data) np.testing.assert_allclose(op_grad.numpy(), ref_grad, rtol=0.01) def verify_transpose_grad(d_shape, axes=None): data = relay.var("data", relay.TensorType(d_shape, "float32")) fwd_func = relay.Function([data], relay.transpose(data, axes=axes)) check_grad(fwd_func) def test_transpose_grad(): verify_transpose_grad((1, 2, 3, 4)) verify_transpose_grad((1, 2, 3, 4), axes=(0, 2, 3, 1)) def test_negative_grad(): data = relay.var("data", relay.TensorType((10, 4), "float32")) fwd_func = relay.Function([data], relay.negative(data)) check_grad(fwd_func) def test_cast_grad(): data = relay.var("data", relay.TensorType((10, 4), "float32")) fwd_func = relay.Function([data], relay.cast(data, "float64")) check_grad(fwd_func) def test_cast_like_grad(): data = relay.var("data", shape=(10, 4), dtype="float32") like = relay.var("like", shape=(1,), dtype="float64") fwd_func = relay.Function([data, like], relay.cast_like(data, like)) check_grad(fwd_func) def test_copy_grad(): data = relay.var("data", relay.TensorType((10, 4), "float64")) fwd_func = relay.Function([data], relay.copy(data)) check_grad(fwd_func) def test_take_grad(): data_dtype = relay.TensorType((3, 4, 5), "float64") data = relay.var("data", data_dtype) indices = relay.var("indices", relay.TensorType((relay.Any(),), "int32")) inputs = [_np_randn_from_type(data_dtype, scale=1e-5), np.array([1, 2], dtype="int32")] test_inputs = [inputs[0]] # take on axis fwd_func = relay.Function([data, indices], relay.take(data, indices, axis=1)) check_grad(fwd_func, inputs=inputs, test_inputs=test_inputs) # take on flattened fwd_func = relay.Function([data, indices], relay.take(data, indices, axis=None)) check_grad(fwd_func, inputs=inputs, test_inputs=test_inputs) def test_stack_grad(): args = [relay.var(c, shape=(2, 3, 4), dtype="float64") for c in "xyz"] fwd_func = relay.Function(args, relay.stack(args, axis=0)) check_grad(fwd_func) def test_squeeze_grad(): data = relay.var("data", shape=(2, 1, 1, 3, 4, 1), dtype="float64") fwd_func = relay.Function([data], relay.squeeze(data)) fwd_func_subset = relay.Function([data], relay.squeeze(data, axis=[1, -1])) check_grad(fwd_func) check_grad(fwd_func_subset) def test_arange_grad(): # TODO: testing arange numerically is strange because two-sided approx can # produce different output shapes dtype = "float64" start = relay.var("start", relay.TensorType((), dtype)) stop = relay.var("stop", relay.TensorType((), dtype)) step = relay.var("step", relay.TensorType((), dtype)) values = [np.array(v, dtype=dtype) for v in [2.5, 9.5, 1.8]] fwd_func = relay.Function([start, stop, step], relay.arange(start, stop, step, dtype)) check_grad(fwd_func, inputs=values) def test_gather_nd_grad(): data = relay.var("data", relay.TensorType((2, 3), "float64")) indices = relay.var("indices", relay.TensorType((2, 4), "int64")) fwd = relay.Function([data, indices], relay.gather_nd(data, indices)) data_np = np.random.rand(2, 3).astype("float64") indices_np = np.array([[0, 1, 1, 0], [0, 1, 0, 0]], dtype="int64") check_grad(fwd, inputs=[data_np, indices_np], test_inputs=[data_np]) def test_reshape_like_grad(): data = relay.var("data", shape=(2, 3, 4), dtype="float32") shape_like = relay.var("shape_like", shape=(6, 2, 2), dtype="float32") fwd_func = relay.Function([data, shape_like], relay.reshape_like(data, shape_like)) check_grad(fwd_func) def test_zeros_ones_grad_const_ints(): # when shape is static (i.e. not an input), there is no gradient at all static_ty = relay.TensorType([2, 3, 4], dtype="float32") expected_ty = relay.TupleType([static_ty, relay.TupleType([])]) for op in [relay.zeros, relay.ones]: fwd_func = relay.Function([], op(static_ty.concrete_shape, static_ty.dtype)) bwd_func = run_infer_type(gradient(run_infer_type(fwd_func))) tvm.ir.assert_structural_equal(bwd_func.ret_type, expected_ty) def test_zeros_ones_grad_const_expr(): # when shape is static (i.e. not an input), there is no gradient at all shape_const = relay.const(np.array([2, 3, 4]), dtype="int32") * relay.const(1, dtype="int32") static_ty = relay.TensorType([2, 3, 4], dtype="float32") dyn_ty = relay.TensorType([relay.Any(), relay.Any(), relay.Any()], dtype="float32") expected_ty_static = relay.TupleType([static_ty, relay.TupleType([])]) expected_ty_dyn = relay.TupleType([dyn_ty, relay.TupleType([])]) for op in [relay.zeros, relay.ones]: # with DynamicToStatic, the shape should be concretized fwd_func = relay.Function([], op(shape_const, static_ty.dtype)) fwd_func = run_opt_pass(fwd_func, relay.transform.DynamicToStatic()) bwd_func = run_infer_type(gradient(run_infer_type(fwd_func))) tvm.ir.assert_structural_equal(bwd_func.ret_type, expected_ty_static) fwd_func = relay.Function([], op(shape_const, static_ty.dtype)) bwd_func = run_infer_type(gradient(run_infer_type(fwd_func))) tvm.ir.assert_structural_equal(bwd_func.ret_type, expected_ty_dyn) def test_zeros_ones_grad_dynamic(): rank = np.random.randint(low=1, high=5, dtype="int32") dyn_shape = np.random.randint(low=1, high=4, size=(rank,), dtype="int32") shape_data = relay.var("shape_data", shape=(rank,), dtype="int32") for op, op_ref in [(relay.zeros, np.zeros), (relay.ones, np.ones)]: fwd_func = relay.Function([shape_data], op(shape_data, dtype="float32")) bwd_func = run_infer_type(gradient(run_infer_type(fwd_func))) for target, dev in tvm.testing.enabled_targets(): res, (grad,) = relay.create_executor(device=dev, target=target).evaluate(bwd_func)( dyn_shape ) tvm.testing.assert_allclose(res.numpy(), op_ref(dyn_shape, dtype="float32")) tvm.testing.assert_allclose(grad.numpy(), np.zeros((rank,), dtype="int32")) if __name__ == "__main__": pytest.main()