# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import numpy as np import pytest import tvm from tvm import te, topi from tvm.testing import assert_allclose from tvm.topi.utils import get_const_tuple def check_grad( out, inputs, args=[], data_range=(-10, 10), desired_grads=None, assert_no_jacobian=True ): inputs = inputs if isinstance(inputs, list) else [inputs] def check_device(device, host="llvm"): dev = tvm.device(device, 0) if not tvm.testing.device_enabled(host): return sout = te.create_schedule(out.op) mout = tvm.build(sout, [out] + inputs + args) out_shape = get_const_tuple(out.shape) l, h = data_range input_data = [ tvm.nd.array( np.random.uniform(l, h, size=get_const_tuple(input.shape)).astype(input.dtype) ) for input in inputs ] arg_vals = [ tvm.nd.array(np.random.uniform(l, h, size=get_const_tuple(arg.shape)).astype(arg.dtype)) for arg in args ] ones = topi.full_like(out, 1.0) # we provide head to sum and reduce the output dimension, # which equals to grad(out.sum(), inputs) grads = te.gradient(out, inputs, head=ones) grad_sched = te.create_schedule([grad.op for grad in grads]) mgrad = tvm.build(grad_sched, list(grads) + inputs + args) if assert_no_jacobian: # TODO(yzhliu): it is better to visit the expression and do assertion lowered_ir = str(tvm.lower(grad_sched, list(grads) + inputs + args, simple_mode=True)) assert "jacobian" not in lowered_ir, lowered_ir grad_data = [tvm.nd.empty(get_const_tuple(i.shape), g.dtype) for i, g in zip(inputs, grads)] mgrad(*grad_data, *input_data, *arg_vals) g_res = [g.numpy() for g in grad_data] if desired_grads: assert isinstance(desired_grads, list) for actual, desired in zip(g_res, desired_grads): assert_allclose(actual, desired, rtol=0.1, atol=1e-2) else: def forward(*in_data): out_data = tvm.nd.empty(out_shape, out.dtype) mout(out_data, *[tvm.nd.array(d) for d in list(in_data)]) return out_data.numpy().sum() tvm.testing.check_numerical_grads( forward, [d.numpy() for d in input_data + arg_vals], g_res ) check_device("cpu") def test_basic_operation(): np.random.seed(0) shape = (10, 10) x = te.var("x", dtype="float32") k = te.reduce_axis((0, 10), name="k") l = te.reduce_axis((0, 10), name="l") A0 = te.placeholder(shape, name="A0") A1 = te.placeholder(shape, name="A1") zeros = np.zeros(shape) B = te.compute(shape, lambda i, j: A0[i, j], name="B") check_grad(B, [A0]) B = te.compute(shape, lambda i, j: A0[i, j] + A1[i, j], name="B") check_grad(B, [A0, A1]) B = te.compute(shape, lambda i, j: A0[i, j] + A0[j, i], name="B") check_grad(B, A0) B = te.compute(shape, lambda i, j: te.floor(A0[i, j]), name="B") check_grad(B, A0, desired_grads=[zeros]) B = te.compute(shape, lambda i, j: te.ceil(A0[i, j]), name="B") check_grad(B, A0, desired_grads=[zeros]) B = te.compute(shape, lambda i, j: te.trunc(A0[i, j]), name="B") check_grad(B, A0, desired_grads=[zeros]) B = te.compute(shape, lambda i, j: te.round(A0[i, j]), name="B") check_grad(B, A0, desired_grads=[zeros]) B = te.compute(shape, lambda i, j: A0[i, j] + te.exp(A0[j, i]), name="B") check_grad(B, A0) B = te.compute(shape, lambda i, j: te.log(0.1 + te.abs(A0[i, j] + te.exp(A0[j, i]))), name="B") check_grad(B, A0) B = te.compute(shape, lambda i, j: te.sigmoid(A0[i, j] * A0[i, j] * A0[j, i]), name="B") check_grad(B, A0) B = te.compute(shape, lambda i, j: te.tanh(A0[i, j] * A0[i, j] * A0[j, i]), name="B") check_grad(B, A0) B = te.compute(shape, lambda i, j: te.sqrt(A0[i, j] * A0[i, j] * A0[j, i]), name="B") check_grad(B, A0, data_range=(0.1, 10)) B = te.compute(shape, lambda i, j: te.power(te.abs(A0[i, j]), A0[j, i]), name="B") check_grad(B, A0, data_range=(-4, 4)) B = te.compute(shape, lambda i, j: A0[i, j] * A0[j, i], name="B") check_grad(B, A0) B = te.compute((10,), lambda i: te.sum(A0[i, k] * A0[k, i], axis=k), name="B") check_grad(B, A0) B = te.compute(shape, lambda i, j: te.sum(A0[i, k] * A0[k, i] + 5, axis=k), name="B") check_grad(B, A0) B = te.compute(shape, lambda i, j: te.max(A0[i, k] * A0[k, j] + 5, axis=k), name="B") check_grad(B, A0) B = te.compute(shape, lambda i, j: A0[i, j] * (A1[j, i] + A0[j, i]), name="B") check_grad(B, [A0, A1]) B = te.compute( shape, lambda i, j: te.sum(A0[k, k] - A0[te.min(j + k, 9), j] * A0[i, k], axis=k), name="B" ) check_grad(B, A0) def fcombine(x, y): return x * y def fidentity(t0): return tvm.tir.const(1, t0) prod = te.comm_reducer(fcombine, fidentity, name="prod") B = te.compute((10, 10), lambda i, j: prod(A0[i, k] + A0[k, i], axis=k), name="B") check_grad(B, A0) X = te.placeholder((10,), name="X") A = te.compute((10,), lambda i: X[i] + X[9 - i]) B = te.compute((10,), lambda i: X[i] * X[9 - i]) Y = topi.tensordot(A, B, 1) check_grad(Y, X) X = te.placeholder((3, 3), name="X") Y = topi.einsum("ii->i", (X)) check_grad(Y, X) def test_topi(): X = te.placeholder((1, 2, 4, 4), name="X") W = te.placeholder((5, 2, 3, 3), name="W") W1 = te.placeholder((2, 5, 3, 3), name="W1") W2 = te.placeholder((1,), name="W2") R = topi.nn.conv2d(X, W, 1, 1, 1) check_grad(R, [X, W]) R1 = topi.nn.conv2d(topi.nn.relu(R), W1, 1, 0, 1) check_grad(R1, [X, W, W1]) R = topi.broadcast_to(W2, (5, 2, 3, 3)) check_grad(R, [W2]) R = topi.nn.conv2d(X, topi.broadcast_to(W2, (5, 2, 3, 3)), 1, 1, 1) check_grad(R, [X, W2]) R = topi.nn.pool2d(X, [2, 2], [1, 1], [2, 2], [0, 0, 0, 0], "avg") check_grad(R, X) R = topi.nn.pool2d(X, [2, 2], [1, 1], [2, 2], [0, 0, 0, 0], "max") check_grad(R, X) X = te.placeholder((1, 2, 5, 5), name="X") R = topi.reshape(X, (1, 32)) check_grad(R, [X]) X = te.placeholder((1, 2, 5, 5), name="X") W = te.placeholder((2, 2, 3, 3), name="W") S = topi.reshape(X, (1, 50)) check_grad(S, [X]) R = X + topi.nn.conv2d(X + topi.nn.conv2d(X, W, 1, 1, 1), W, 1, 1, 1) check_grad(R, [X, W]) S = topi.nn.softmax(topi.reshape(R, (1, 50))) check_grad(S, [X, W]) S = topi.sigmoid(topi.reshape(R, (1, 50))) check_grad(S, [X, W]) S = topi.tanh(topi.reshape(R, (1, 50))) check_grad(S, [X, W]) S = topi.nn.log_softmax(topi.reshape(R, (1, 50))) check_grad(S, [X, W]) check_grad(S, [W], [X]) X = te.placeholder((1, 2, 3, 5), name="X") Y = te.placeholder((1, 2, 7, 5), name="Y") S = topi.concatenate((X, Y), 2) check_grad(S, [X, Y]) X = te.placeholder((1, 2, 6, 5), name="X") (S, R) = topi.split(X, 2, 2) check_grad(S, [X]) check_grad(R, [X]) R1 = topi.concatenate((S, R), 2) check_grad(R1, [X]) R2 = topi.concatenate((R, S), 2) check_grad(R2, [X]) X = te.placeholder((4, 5), name="X") I = te.placeholder((100,), name="I", dtype="int32") R = topi.take(X, topi.abs(I)) check_grad(R, [X], [I]) W = te.placeholder((5, 5), name="W") exps = topi.exp(topi.nn.dense(X, W)) sumexps = topi.sum(exps, axis=-1, keepdims=True) R = exps / sumexps check_grad(R, [X, W], data_range=(-1, 1)) def test_stride_dilation(): X = te.placeholder((1, 2, 10, 10), name="X") W = te.placeholder((2, 2, 1, 1), name="W") Y = topi.nn.conv2d(X, W, 1, 0, 1) check_grad(Y, [X, W]) Y = topi.nn.conv2d(X, W, 2, 0, 1) check_grad(Y, [X, W]) Y = topi.nn.conv2d(X, W, 3, 0, 1) check_grad(Y, [X, W]) Y = topi.nn.conv2d(X, W, 1, 0, 2) check_grad(Y, [X, W]) Y = topi.nn.conv2d(X, W, 2, 0, 2) check_grad(Y, [X, W]) Y = topi.nn.conv2d(X, W, 3, 0, 2) check_grad(Y, [X, W]) Y = topi.nn.conv2d(X, W, 1, 0, 3) check_grad(Y, [X, W]) Y = topi.nn.conv2d(X, W, 2, 0, 3) check_grad(Y, [X, W]) Y = topi.nn.conv2d(X, W, 3, 0, 3) check_grad(Y, [X, W]) W = te.placeholder((2, 2, 2, 2), name="W") Y = topi.nn.conv2d(X, W, 1, 0, 1) check_grad(Y, [X, W]) Y = topi.nn.conv2d(X, W, 2, 0, 1) check_grad(Y, [X, W]) Y = topi.nn.conv2d(X, W, 3, 0, 1) check_grad(Y, [X, W]) Y = topi.nn.conv2d(X, W, 1, 0, 2) check_grad(Y, [X, W]) Y = topi.nn.conv2d(X, W, 2, 0, 2) check_grad(Y, [X, W]) Y = topi.nn.conv2d(X, W, 3, 0, 2) check_grad(Y, [X, W]) Y = topi.nn.conv2d(X, W, 1, 0, 3) check_grad(Y, [X, W]) Y = topi.nn.conv2d(X, W, 2, 0, 3) check_grad(Y, [X, W]) Y = topi.nn.conv2d(X, W, 3, 0, 3) check_grad(Y, [X, W]) W = te.placeholder((2, 2, 3, 3), name="W") Y = topi.nn.conv2d(X, W, 1, 0, 1) check_grad(Y, [X, W]) Y = topi.nn.conv2d(X, W, 2, 0, 1) check_grad(Y, [X, W]) Y = topi.nn.conv2d(X, W, 3, 0, 1) check_grad(Y, [X, W]) Y = topi.nn.conv2d(X, W, 1, 0, 2) check_grad(Y, [X, W]) Y = topi.nn.conv2d(X, W, 2, 0, 2) check_grad(Y, [X, W]) Y = topi.nn.conv2d(X, W, 3, 0, 2) check_grad(Y, [X, W]) Y = topi.nn.conv2d(X, W, 1, 0, 3) check_grad(Y, [X, W]) Y = topi.nn.conv2d(X, W, 2, 0, 3) check_grad(Y, [X, W]) Y = topi.nn.conv2d(X, W, 3, 0, 3) check_grad(Y, [X, W]) Y = topi.nn.pool2d(X, [1, 1], [1, 1], [1, 1], [0, 0, 0, 0], "max") check_grad(Y, [X]) Y = topi.nn.pool2d(X, [1, 1], [1, 1], [2, 2], [0, 0, 0, 0], "max") check_grad(Y, [X]) Y = topi.nn.pool2d(X, [1, 1], [1, 1], [3, 3], [0, 0, 0, 0], "max") check_grad(Y, [X]) Y = topi.nn.pool2d(X, [2, 2], [1, 1], [1, 1], [0, 0, 0, 0], "max") check_grad(Y, [X]) Y = topi.nn.pool2d(X, [2, 2], [1, 1], [2, 2], [0, 0, 0, 0], "max") check_grad(Y, [X]) Y = topi.nn.pool2d(X, [2, 2], [1, 1], [3, 3], [0, 0, 0, 0], "max") check_grad(Y, [X]) Y = topi.nn.pool2d(X, [3, 3], [1, 1], [1, 1], [0, 0, 0, 0], "max") check_grad(Y, [X]) Y = topi.nn.pool2d(X, [3, 3], [1, 1], [2, 2], [0, 0, 0, 0], "max") check_grad(Y, [X]) Y = topi.nn.pool2d(X, [3, 3], [1, 1], [3, 3], [0, 0, 0, 0], "max") check_grad(Y, [X]) @pytest.mark.xfail def test_reduction_init(): np.random.seed(0) shape = (10, 10) k = te.reduce_axis((0, 10), name="k") A0 = te.placeholder(shape, name="A0") B = te.compute((10,), lambda i: te.sum(A0[i, k] * A0[k, i], axis=k, init=0.0), name="B") check_grad(B, A0) if __name__ == "__main__": test_basic_operation() test_topi() test_stride_dilation()