# File content is auto-generated. Do not modify. # pylint: skip-file from ._internal import NDArrayBase from ..base import _Null def ElementWiseSum(*args, **kwargs): r"""Adds all input arguments element-wise. .. math:: add\_n(a_1, a_2, ..., a_n) = a_1 + a_2 + ... + a_n ``add_n`` is potentially more efficient than calling ``add`` by `n` times. The storage type of ``add_n`` output depends on storage types of inputs - add_n(row_sparse, row_sparse, ..) = row_sparse - add_n(default, csr, default) = default - add_n(any input combinations longer than 4 (>4) with at least one default type) = default - otherwise, ``add_n`` falls all inputs back to default storage and generates default storage Defined in src/operator/tensor/elemwise_sum.cc:L156 Parameters ---------- args : NDArray[] Positional input arguments out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def Embedding(data=None, weight=None, input_dim=_Null, output_dim=_Null, dtype=_Null, sparse_grad=_Null, out=None, name=None, **kwargs): r"""Maps integer indices to vector representations (embeddings). This operator maps words to real-valued vectors in a high-dimensional space, called word embeddings. These embeddings can capture semantic and syntactic properties of the words. For example, it has been noted that in the learned embedding spaces, similar words tend to be close to each other and dissimilar words far apart. For an input array of shape (d1, ..., dK), the shape of an output array is (d1, ..., dK, output_dim). All the input values should be integers in the range [0, input_dim). If the input_dim is ip0 and output_dim is op0, then shape of the embedding weight matrix must be (ip0, op0). By default, if any index mentioned is too large, it is replaced by the index that addresses the last vector in an embedding matrix. Examples:: input_dim = 4 output_dim = 5 // Each row in weight matrix y represents a word. So, y = (w0,w1,w2,w3) y = [[ 0., 1., 2., 3., 4.], [ 5., 6., 7., 8., 9.], [ 10., 11., 12., 13., 14.], [ 15., 16., 17., 18., 19.]] // Input array x represents n-grams(2-gram). So, x = [(w1,w3), (w0,w2)] x = [[ 1., 3.], [ 0., 2.]] // Mapped input x to its vector representation y. Embedding(x, y, 4, 5) = [[[ 5., 6., 7., 8., 9.], [ 15., 16., 17., 18., 19.]], [[ 0., 1., 2., 3., 4.], [ 10., 11., 12., 13., 14.]]] The storage type of weight can be either row_sparse or default. .. Note:: If "sparse_grad" is set to True, the storage type of gradient w.r.t weights will be "row_sparse". Only a subset of optimizers support sparse gradients, including SGD, AdaGrad and Adam. Note that by default lazy updates is turned on, which may perform differently from standard updates. For more details, please check the Optimization API at: https://mxnet.incubator.apache.org/api/python/optimization/optimization.html Defined in src/operator/tensor/indexing_op.cc:L267 Parameters ---------- data : NDArray The input array to the embedding operator. weight : NDArray The embedding weight matrix. input_dim : int, required Vocabulary size of the input indices. output_dim : int, required Dimension of the embedding vectors. dtype : {'float16', 'float32', 'float64', 'int32', 'int64', 'int8', 'uint8'},optional, default='float32' Data type of weight. sparse_grad : boolean, optional, default=0 Compute row sparse gradient in the backward calculation. If set to True, the grad's storage type is row_sparse. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def FullyConnected(data=None, weight=None, bias=None, num_hidden=_Null, no_bias=_Null, flatten=_Null, out=None, name=None, **kwargs): r"""Applies a linear transformation: :math:`Y = XW^T + b`. If ``flatten`` is set to be true, then the shapes are: - **data**: `(batch_size, x1, x2, ..., xn)` - **weight**: `(num_hidden, x1 * x2 * ... * xn)` - **bias**: `(num_hidden,)` - **out**: `(batch_size, num_hidden)` If ``flatten`` is set to be false, then the shapes are: - **data**: `(x1, x2, ..., xn, input_dim)` - **weight**: `(num_hidden, input_dim)` - **bias**: `(num_hidden,)` - **out**: `(x1, x2, ..., xn, num_hidden)` The learnable parameters include both ``weight`` and ``bias``. If ``no_bias`` is set to be true, then the ``bias`` term is ignored. .. Note:: The sparse support for FullyConnected is limited to forward evaluation with `row_sparse` weight and bias, where the length of `weight.indices` and `bias.indices` must be equal to `num_hidden`. This could be useful for model inference with `row_sparse` weights trained with importance sampling or noise contrastive estimation. To compute linear transformation with 'csr' sparse data, sparse.dot is recommended instead of sparse.FullyConnected. Defined in src/operator/nn/fully_connected.cc:L271 Parameters ---------- data : NDArray Input data. weight : NDArray Weight matrix. bias : NDArray Bias parameter. num_hidden : int, required Number of hidden nodes of the output. no_bias : boolean, optional, default=0 Whether to disable bias parameter. flatten : boolean, optional, default=1 Whether to collapse all but the first axis of the input data tensor. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def LinearRegressionOutput(data=None, label=None, grad_scale=_Null, out=None, name=None, **kwargs): r"""Computes and optimizes for squared loss during backward propagation. Just outputs ``data`` during forward propagation. If :math:`\hat{y}_i` is the predicted value of the i-th sample, and :math:`y_i` is the corresponding target value, then the squared loss estimated over :math:`n` samples is defined as :math:`\text{SquaredLoss}(\textbf{Y}, \hat{\textbf{Y}} ) = \frac{1}{n} \sum_{i=0}^{n-1} \lVert \textbf{y}_i - \hat{\textbf{y}}_i \rVert_2` .. note:: Use the LinearRegressionOutput as the final output layer of a net. The storage type of ``label`` can be ``default`` or ``csr`` - LinearRegressionOutput(default, default) = default - LinearRegressionOutput(default, csr) = default By default, gradients of this loss function are scaled by factor `1/m`, where m is the number of regression outputs of a training example. The parameter `grad_scale` can be used to change this scale to `grad_scale/m`. Defined in src/operator/regression_output.cc:L92 Parameters ---------- data : NDArray Input data to the function. label : NDArray Input label to the function. grad_scale : float, optional, default=1 Scale the gradient by a float factor out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def LogisticRegressionOutput(data=None, label=None, grad_scale=_Null, out=None, name=None, **kwargs): r"""Applies a logistic function to the input. The logistic function, also known as the sigmoid function, is computed as :math:`\frac{1}{1+exp(-\textbf{x})}`. Commonly, the sigmoid is used to squash the real-valued output of a linear model :math:`wTx+b` into the [0,1] range so that it can be interpreted as a probability. It is suitable for binary classification or probability prediction tasks. .. note:: Use the LogisticRegressionOutput as the final output layer of a net. The storage type of ``label`` can be ``default`` or ``csr`` - LogisticRegressionOutput(default, default) = default - LogisticRegressionOutput(default, csr) = default The loss function used is the Binary Cross Entropy Loss: :math:`-{(y\log(p) + (1 - y)\log(1 - p))}` Where `y` is the ground truth probability of positive outcome for a given example, and `p` the probability predicted by the model. By default, gradients of this loss function are scaled by factor `1/m`, where m is the number of regression outputs of a training example. The parameter `grad_scale` can be used to change this scale to `grad_scale/m`. Defined in src/operator/regression_output.cc:L152 Parameters ---------- data : NDArray Input data to the function. label : NDArray Input label to the function. grad_scale : float, optional, default=1 Scale the gradient by a float factor out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def MAERegressionOutput(data=None, label=None, grad_scale=_Null, out=None, name=None, **kwargs): r"""Computes mean absolute error of the input. MAE is a risk metric corresponding to the expected value of the absolute error. If :math:`\hat{y}_i` is the predicted value of the i-th sample, and :math:`y_i` is the corresponding target value, then the mean absolute error (MAE) estimated over :math:`n` samples is defined as :math:`\text{MAE}(\textbf{Y}, \hat{\textbf{Y}} ) = \frac{1}{n} \sum_{i=0}^{n-1} \lVert \textbf{y}_i - \hat{\textbf{y}}_i \rVert_1` .. note:: Use the MAERegressionOutput as the final output layer of a net. The storage type of ``label`` can be ``default`` or ``csr`` - MAERegressionOutput(default, default) = default - MAERegressionOutput(default, csr) = default By default, gradients of this loss function are scaled by factor `1/m`, where m is the number of regression outputs of a training example. The parameter `grad_scale` can be used to change this scale to `grad_scale/m`. Defined in src/operator/regression_output.cc:L120 Parameters ---------- data : NDArray Input data to the function. label : NDArray Input label to the function. grad_scale : float, optional, default=1 Scale the gradient by a float factor out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def abs(data=None, out=None, name=None, **kwargs): r"""Returns element-wise absolute value of the input. Example:: abs([-2, 0, 3]) = [2, 0, 3] The storage type of ``abs`` output depends upon the input storage type: - abs(default) = default - abs(row_sparse) = row_sparse - abs(csr) = csr Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L660 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def adagrad_update(weight=None, grad=None, history=None, lr=_Null, epsilon=_Null, wd=_Null, rescale_grad=_Null, clip_gradient=_Null, out=None, name=None, **kwargs): r"""Update function for AdaGrad optimizer. Referenced from *Adaptive Subgradient Methods for Online Learning and Stochastic Optimization*, and available at http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf. Updates are applied by:: rescaled_grad = clip(grad * rescale_grad, clip_gradient) history = history + square(rescaled_grad) w = w - learning_rate * rescaled_grad / sqrt(history + epsilon) Note that non-zero values for the weight decay option are not supported. Defined in src/operator/optimizer_op.cc:L665 Parameters ---------- weight : NDArray Weight grad : NDArray Gradient history : NDArray History lr : float, required Learning rate epsilon : float, optional, default=1e-07 epsilon wd : float, optional, default=0 weight decay rescale_grad : float, optional, default=1 Rescale gradient to grad = rescale_grad*grad. clip_gradient : float, optional, default=-1 Clip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad, clip_gradient), -clip_gradient). out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def adam_update(weight=None, grad=None, mean=None, var=None, lr=_Null, beta1=_Null, beta2=_Null, epsilon=_Null, wd=_Null, rescale_grad=_Null, clip_gradient=_Null, lazy_update=_Null, out=None, name=None, **kwargs): r"""Update function for Adam optimizer. Adam is seen as a generalization of AdaGrad. Adam update consists of the following steps, where g represents gradient and m, v are 1st and 2nd order moment estimates (mean and variance). .. math:: g_t = \nabla J(W_{t-1})\\ m_t = \beta_1 m_{t-1} + (1 - \beta_1) g_t\\ v_t = \beta_2 v_{t-1} + (1 - \beta_2) g_t^2\\ W_t = W_{t-1} - \alpha \frac{ m_t }{ \sqrt{ v_t } + \epsilon } It updates the weights using:: m = beta1*m + (1-beta1)*grad v = beta2*v + (1-beta2)*(grad**2) w += - learning_rate * m / (sqrt(v) + epsilon) However, if grad's storage type is ``row_sparse``, ``lazy_update`` is True and the storage type of weight is the same as those of m and v, only the row slices whose indices appear in grad.indices are updated (for w, m and v):: for row in grad.indices: m[row] = beta1*m[row] + (1-beta1)*grad[row] v[row] = beta2*v[row] + (1-beta2)*(grad[row]**2) w[row] += - learning_rate * m[row] / (sqrt(v[row]) + epsilon) Defined in src/operator/optimizer_op.cc:L495 Parameters ---------- weight : NDArray Weight grad : NDArray Gradient mean : NDArray Moving mean var : NDArray Moving variance lr : float, required Learning rate beta1 : float, optional, default=0.9 The decay rate for the 1st moment estimates. beta2 : float, optional, default=0.999 The decay rate for the 2nd moment estimates. epsilon : float, optional, default=1e-08 A small constant for numerical stability. wd : float, optional, default=0 Weight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of each weight. rescale_grad : float, optional, default=1 Rescale gradient to grad = rescale_grad*grad. clip_gradient : float, optional, default=-1 Clip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad, clip_gradient), -clip_gradient). lazy_update : boolean, optional, default=1 If true, lazy updates are applied if gradient's stype is row_sparse and all of w, m and v have the same stype out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def add_n(*args, **kwargs): r"""Adds all input arguments element-wise. .. math:: add\_n(a_1, a_2, ..., a_n) = a_1 + a_2 + ... + a_n ``add_n`` is potentially more efficient than calling ``add`` by `n` times. The storage type of ``add_n`` output depends on storage types of inputs - add_n(row_sparse, row_sparse, ..) = row_sparse - add_n(default, csr, default) = default - add_n(any input combinations longer than 4 (>4) with at least one default type) = default - otherwise, ``add_n`` falls all inputs back to default storage and generates default storage Defined in src/operator/tensor/elemwise_sum.cc:L156 Parameters ---------- args : NDArray[] Positional input arguments out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def arccos(data=None, out=None, name=None, **kwargs): r"""Returns element-wise inverse cosine of the input array. The input should be in range `[-1, 1]`. The output is in the closed interval :math:`[0, \pi]` .. math:: arccos([-1, -.707, 0, .707, 1]) = [\pi, 3\pi/4, \pi/2, \pi/4, 0] The storage type of ``arccos`` output is always dense Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L123 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def arccosh(data=None, out=None, name=None, **kwargs): r"""Returns the element-wise inverse hyperbolic cosine of the input array, \ computed element-wise. The storage type of ``arccosh`` output is always dense Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L264 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def arcsin(data=None, out=None, name=None, **kwargs): r"""Returns element-wise inverse sine of the input array. The input should be in the range `[-1, 1]`. The output is in the closed interval of [:math:`-\pi/2`, :math:`\pi/2`]. .. math:: arcsin([-1, -.707, 0, .707, 1]) = [-\pi/2, -\pi/4, 0, \pi/4, \pi/2] The storage type of ``arcsin`` output depends upon the input storage type: - arcsin(default) = default - arcsin(row_sparse) = row_sparse - arcsin(csr) = csr Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L104 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def arcsinh(data=None, out=None, name=None, **kwargs): r"""Returns the element-wise inverse hyperbolic sine of the input array, \ computed element-wise. The storage type of ``arcsinh`` output depends upon the input storage type: - arcsinh(default) = default - arcsinh(row_sparse) = row_sparse - arcsinh(csr) = csr Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L250 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def arctan(data=None, out=None, name=None, **kwargs): r"""Returns element-wise inverse tangent of the input array. The output is in the closed interval :math:`[-\pi/2, \pi/2]` .. math:: arctan([-1, 0, 1]) = [-\pi/4, 0, \pi/4] The storage type of ``arctan`` output depends upon the input storage type: - arctan(default) = default - arctan(row_sparse) = row_sparse - arctan(csr) = csr Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L144 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def arctanh(data=None, out=None, name=None, **kwargs): r"""Returns the element-wise inverse hyperbolic tangent of the input array, \ computed element-wise. The storage type of ``arctanh`` output depends upon the input storage type: - arctanh(default) = default - arctanh(row_sparse) = row_sparse - arctanh(csr) = csr Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L281 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def broadcast_add(lhs=None, rhs=None, out=None, name=None, **kwargs): r"""Returns element-wise sum of the input arrays with broadcasting. `broadcast_plus` is an alias to the function `broadcast_add`. Example:: x = [[ 1., 1., 1.], [ 1., 1., 1.]] y = [[ 0.], [ 1.]] broadcast_add(x, y) = [[ 1., 1., 1.], [ 2., 2., 2.]] broadcast_plus(x, y) = [[ 1., 1., 1.], [ 2., 2., 2.]] Supported sparse operations: broadcast_add(csr, dense(1D)) = dense broadcast_add(dense(1D), csr) = dense Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L58 Parameters ---------- lhs : NDArray First input to the function rhs : NDArray Second input to the function out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def broadcast_div(lhs=None, rhs=None, out=None, name=None, **kwargs): r"""Returns element-wise division of the input arrays with broadcasting. Example:: x = [[ 6., 6., 6.], [ 6., 6., 6.]] y = [[ 2.], [ 3.]] broadcast_div(x, y) = [[ 3., 3., 3.], [ 2., 2., 2.]] Supported sparse operations: broadcast_div(csr, dense(1D)) = csr Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L187 Parameters ---------- lhs : NDArray First input to the function rhs : NDArray Second input to the function out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def broadcast_minus(lhs=None, rhs=None, out=None, name=None, **kwargs): r"""Returns element-wise difference of the input arrays with broadcasting. `broadcast_minus` is an alias to the function `broadcast_sub`. Example:: x = [[ 1., 1., 1.], [ 1., 1., 1.]] y = [[ 0.], [ 1.]] broadcast_sub(x, y) = [[ 1., 1., 1.], [ 0., 0., 0.]] broadcast_minus(x, y) = [[ 1., 1., 1.], [ 0., 0., 0.]] Supported sparse operations: broadcast_sub/minus(csr, dense(1D)) = dense broadcast_sub/minus(dense(1D), csr) = dense Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L106 Parameters ---------- lhs : NDArray First input to the function rhs : NDArray Second input to the function out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def broadcast_mul(lhs=None, rhs=None, out=None, name=None, **kwargs): r"""Returns element-wise product of the input arrays with broadcasting. Example:: x = [[ 1., 1., 1.], [ 1., 1., 1.]] y = [[ 0.], [ 1.]] broadcast_mul(x, y) = [[ 0., 0., 0.], [ 1., 1., 1.]] Supported sparse operations: broadcast_mul(csr, dense(1D)) = csr Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L146 Parameters ---------- lhs : NDArray First input to the function rhs : NDArray Second input to the function out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def broadcast_plus(lhs=None, rhs=None, out=None, name=None, **kwargs): r"""Returns element-wise sum of the input arrays with broadcasting. `broadcast_plus` is an alias to the function `broadcast_add`. Example:: x = [[ 1., 1., 1.], [ 1., 1., 1.]] y = [[ 0.], [ 1.]] broadcast_add(x, y) = [[ 1., 1., 1.], [ 2., 2., 2.]] broadcast_plus(x, y) = [[ 1., 1., 1.], [ 2., 2., 2.]] Supported sparse operations: broadcast_add(csr, dense(1D)) = dense broadcast_add(dense(1D), csr) = dense Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L58 Parameters ---------- lhs : NDArray First input to the function rhs : NDArray Second input to the function out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def broadcast_sub(lhs=None, rhs=None, out=None, name=None, **kwargs): r"""Returns element-wise difference of the input arrays with broadcasting. `broadcast_minus` is an alias to the function `broadcast_sub`. Example:: x = [[ 1., 1., 1.], [ 1., 1., 1.]] y = [[ 0.], [ 1.]] broadcast_sub(x, y) = [[ 1., 1., 1.], [ 0., 0., 0.]] broadcast_minus(x, y) = [[ 1., 1., 1.], [ 0., 0., 0.]] Supported sparse operations: broadcast_sub/minus(csr, dense(1D)) = dense broadcast_sub/minus(dense(1D), csr) = dense Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L106 Parameters ---------- lhs : NDArray First input to the function rhs : NDArray Second input to the function out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def cast_storage(data=None, stype=_Null, out=None, name=None, **kwargs): r"""Casts tensor storage type to the new type. When an NDArray with default storage type is cast to csr or row_sparse storage, the result is compact, which means: - for csr, zero values will not be retained - for row_sparse, row slices of all zeros will not be retained The storage type of ``cast_storage`` output depends on stype parameter: - cast_storage(csr, 'default') = default - cast_storage(row_sparse, 'default') = default - cast_storage(default, 'csr') = csr - cast_storage(default, 'row_sparse') = row_sparse - cast_storage(csr, 'csr') = csr - cast_storage(row_sparse, 'row_sparse') = row_sparse Example:: dense = [[ 0., 1., 0.], [ 2., 0., 3.], [ 0., 0., 0.], [ 0., 0., 0.]] # cast to row_sparse storage type rsp = cast_storage(dense, 'row_sparse') rsp.indices = [0, 1] rsp.values = [[ 0., 1., 0.], [ 2., 0., 3.]] # cast to csr storage type csr = cast_storage(dense, 'csr') csr.indices = [1, 0, 2] csr.values = [ 1., 2., 3.] csr.indptr = [0, 1, 3, 3, 3] Defined in src/operator/tensor/cast_storage.cc:L71 Parameters ---------- data : NDArray The input. stype : {'csr', 'default', 'row_sparse'}, required Output storage type. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def cbrt(data=None, out=None, name=None, **kwargs): r"""Returns element-wise cube-root value of the input. .. math:: cbrt(x) = \sqrt[3]{x} Example:: cbrt([1, 8, -125]) = [1, 2, -5] The storage type of ``cbrt`` output depends upon the input storage type: - cbrt(default) = default - cbrt(row_sparse) = row_sparse - cbrt(csr) = csr Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L881 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def ceil(data=None, out=None, name=None, **kwargs): r"""Returns element-wise ceiling of the input. The ceil of the scalar x is the smallest integer i, such that i >= x. Example:: ceil([-2.1, -1.9, 1.5, 1.9, 2.1]) = [-2., -1., 2., 2., 3.] The storage type of ``ceil`` output depends upon the input storage type: - ceil(default) = default - ceil(row_sparse) = row_sparse - ceil(csr) = csr Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L738 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def clip(data=None, a_min=_Null, a_max=_Null, out=None, name=None, **kwargs): r"""Clips (limits) the values in an array. Given an interval, values outside the interval are clipped to the interval edges. Clipping ``x`` between `a_min` and `a_x` would be:: clip(x, a_min, a_max) = max(min(x, a_max), a_min)) Example:: x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] clip(x,1,8) = [ 1., 1., 2., 3., 4., 5., 6., 7., 8., 8.] The storage type of ``clip`` output depends on storage types of inputs and the a_min, a_max \ parameter values: - clip(default) = default - clip(row_sparse, a_min <= 0, a_max >= 0) = row_sparse - clip(csr, a_min <= 0, a_max >= 0) = csr - clip(row_sparse, a_min < 0, a_max < 0) = default - clip(row_sparse, a_min > 0, a_max > 0) = default - clip(csr, a_min < 0, a_max < 0) = csr - clip(csr, a_min > 0, a_max > 0) = csr Defined in src/operator/tensor/matrix_op.cc:L619 Parameters ---------- data : NDArray Input array. a_min : float, required Minimum value a_max : float, required Maximum value out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def concat(*data, **kwargs): r"""Joins input arrays along a given axis. .. note:: `Concat` is deprecated. Use `concat` instead. The dimensions of the input arrays should be the same except the axis along which they will be concatenated. The dimension of the output array along the concatenated axis will be equal to the sum of the corresponding dimensions of the input arrays. The storage type of ``concat`` output depends on storage types of inputs - concat(csr, csr, ..., csr, dim=0) = csr - otherwise, ``concat`` generates output with default storage Example:: x = [[1,1],[2,2]] y = [[3,3],[4,4],[5,5]] z = [[6,6], [7,7],[8,8]] concat(x,y,z,dim=0) = [[ 1., 1.], [ 2., 2.], [ 3., 3.], [ 4., 4.], [ 5., 5.], [ 6., 6.], [ 7., 7.], [ 8., 8.]] Note that you cannot concat x,y,z along dimension 1 since dimension 0 is not the same for all the input arrays. concat(y,z,dim=1) = [[ 3., 3., 6., 6.], [ 4., 4., 7., 7.], [ 5., 5., 8., 8.]] Defined in src/operator/nn/concat.cc:L365 Parameters ---------- data : NDArray[] List of arrays to concatenate dim : int, optional, default='1' the dimension to be concated. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def cos(data=None, out=None, name=None, **kwargs): r"""Computes the element-wise cosine of the input array. The input should be in radians (:math:`2\pi` rad equals 360 degrees). .. math:: cos([0, \pi/4, \pi/2]) = [1, 0.707, 0] The storage type of ``cos`` output is always dense Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L63 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def cosh(data=None, out=None, name=None, **kwargs): r"""Returns the hyperbolic cosine of the input array, computed element-wise. .. math:: cosh(x) = 0.5\times(exp(x) + exp(-x)) The storage type of ``cosh`` output is always dense Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L216 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def degrees(data=None, out=None, name=None, **kwargs): r"""Converts each element of the input array from radians to degrees. .. math:: degrees([0, \pi/2, \pi, 3\pi/2, 2\pi]) = [0, 90, 180, 270, 360] The storage type of ``degrees`` output depends upon the input storage type: - degrees(default) = default - degrees(row_sparse) = row_sparse - degrees(csr) = csr Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L163 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def dot(lhs=None, rhs=None, transpose_a=_Null, transpose_b=_Null, forward_stype=_Null, out=None, name=None, **kwargs): r"""Dot product of two arrays. ``dot``'s behavior depends on the input array dimensions: - 1-D arrays: inner product of vectors - 2-D arrays: matrix multiplication - N-D arrays: a sum product over the last axis of the first input and the first axis of the second input For example, given 3-D ``x`` with shape `(n,m,k)` and ``y`` with shape `(k,r,s)`, the result array will have shape `(n,m,r,s)`. It is computed by:: dot(x,y)[i,j,a,b] = sum(x[i,j,:]*y[:,a,b]) Example:: x = reshape([0,1,2,3,4,5,6,7], shape=(2,2,2)) y = reshape([7,6,5,4,3,2,1,0], shape=(2,2,2)) dot(x,y)[0,0,1,1] = 0 sum(x[0,0,:]*y[:,1,1]) = 0 The storage type of ``dot`` output depends on storage types of inputs, transpose option and forward_stype option for output storage type. Implemented sparse operations include: - dot(default, default, transpose_a=True/False, transpose_b=True/False) = default - dot(csr, default, transpose_a=True) = default - dot(csr, default, transpose_a=True) = row_sparse - dot(csr, default) = default - dot(csr, row_sparse) = default - dot(default, csr) = csr (CPU only) - dot(default, csr, forward_stype='default') = default - dot(default, csr, transpose_b=True, forward_stype='default') = default If the combination of input storage types and forward_stype does not match any of the above patterns, ``dot`` will fallback and generate output with default storage. .. Note:: If the storage type of the lhs is "csr", the storage type of gradient w.r.t rhs will be "row_sparse". Only a subset of optimizers support sparse gradients, including SGD, AdaGrad and Adam. Note that by default lazy updates is turned on, which may perform differently from standard updates. For more details, please check the Optimization API at: https://mxnet.incubator.apache.org/api/python/optimization/optimization.html Defined in src/operator/tensor/dot.cc:L77 Parameters ---------- lhs : NDArray The first input rhs : NDArray The second input transpose_a : boolean, optional, default=0 If true then transpose the first input before dot. transpose_b : boolean, optional, default=0 If true then transpose the second input before dot. forward_stype : {None, 'csr', 'default', 'row_sparse'},optional, default='None' The desired storage type of the forward output given by user, if thecombination of input storage types and this hint does not matchany implemented ones, the dot operator will perform fallback operationand still produce an output of the desired storage type. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def elemwise_add(lhs=None, rhs=None, out=None, name=None, **kwargs): r"""Adds arguments element-wise. The storage type of ``elemwise_add`` output depends on storage types of inputs - elemwise_add(row_sparse, row_sparse) = row_sparse - elemwise_add(csr, csr) = csr - elemwise_add(default, csr) = default - elemwise_add(csr, default) = default - elemwise_add(default, rsp) = default - elemwise_add(rsp, default) = default - otherwise, ``elemwise_add`` generates output with default storage Parameters ---------- lhs : NDArray first input rhs : NDArray second input out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def elemwise_div(lhs=None, rhs=None, out=None, name=None, **kwargs): r"""Divides arguments element-wise. The storage type of ``elemwise_div`` output is always dense Parameters ---------- lhs : NDArray first input rhs : NDArray second input out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def elemwise_mul(lhs=None, rhs=None, out=None, name=None, **kwargs): r"""Multiplies arguments element-wise. The storage type of ``elemwise_mul`` output depends on storage types of inputs - elemwise_mul(default, default) = default - elemwise_mul(row_sparse, row_sparse) = row_sparse - elemwise_mul(default, row_sparse) = row_sparse - elemwise_mul(row_sparse, default) = row_sparse - elemwise_mul(csr, csr) = csr - otherwise, ``elemwise_mul`` generates output with default storage Parameters ---------- lhs : NDArray first input rhs : NDArray second input out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def elemwise_sub(lhs=None, rhs=None, out=None, name=None, **kwargs): r"""Subtracts arguments element-wise. The storage type of ``elemwise_sub`` output depends on storage types of inputs - elemwise_sub(row_sparse, row_sparse) = row_sparse - elemwise_sub(csr, csr) = csr - elemwise_sub(default, csr) = default - elemwise_sub(csr, default) = default - elemwise_sub(default, rsp) = default - elemwise_sub(rsp, default) = default - otherwise, ``elemwise_sub`` generates output with default storage Parameters ---------- lhs : NDArray first input rhs : NDArray second input out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def exp(data=None, out=None, name=None, **kwargs): r"""Returns element-wise exponential value of the input. .. math:: exp(x) = e^x \approx 2.718^x Example:: exp([0, 1, 2]) = [1., 2.71828175, 7.38905621] The storage type of ``exp`` output is always dense Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L921 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def expm1(data=None, out=None, name=None, **kwargs): r"""Returns ``exp(x) - 1`` computed element-wise on the input. This function provides greater precision than ``exp(x) - 1`` for small values of ``x``. The storage type of ``expm1`` output depends upon the input storage type: - expm1(default) = default - expm1(row_sparse) = row_sparse - expm1(csr) = csr Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1000 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def fix(data=None, out=None, name=None, **kwargs): r"""Returns element-wise rounded value to the nearest \ integer towards zero of the input. Example:: fix([-2.1, -1.9, 1.9, 2.1]) = [-2., -1., 1., 2.] The storage type of ``fix`` output depends upon the input storage type: - fix(default) = default - fix(row_sparse) = row_sparse - fix(csr) = csr Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L795 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def floor(data=None, out=None, name=None, **kwargs): r"""Returns element-wise floor of the input. The floor of the scalar x is the largest integer i, such that i <= x. Example:: floor([-2.1, -1.9, 1.5, 1.9, 2.1]) = [-3., -2., 1., 1., 2.] The storage type of ``floor`` output depends upon the input storage type: - floor(default) = default - floor(row_sparse) = row_sparse - floor(csr) = csr Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L757 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def ftrl_update(weight=None, grad=None, z=None, n=None, lr=_Null, lamda1=_Null, beta=_Null, wd=_Null, rescale_grad=_Null, clip_gradient=_Null, out=None, name=None, **kwargs): r"""Update function for Ftrl optimizer. Referenced from *Ad Click Prediction: a View from the Trenches*, available at http://dl.acm.org/citation.cfm?id=2488200. It updates the weights using:: rescaled_grad = clip(grad * rescale_grad, clip_gradient) z += rescaled_grad - (sqrt(n + rescaled_grad**2) - sqrt(n)) * weight / learning_rate n += rescaled_grad**2 w = (sign(z) * lamda1 - z) / ((beta + sqrt(n)) / learning_rate + wd) * (abs(z) > lamda1) If w, z and n are all of ``row_sparse`` storage type, only the row slices whose indices appear in grad.indices are updated (for w, z and n):: for row in grad.indices: rescaled_grad[row] = clip(grad[row] * rescale_grad, clip_gradient) z[row] += rescaled_grad[row] - (sqrt(n[row] + rescaled_grad[row]**2) - sqrt(n[row])) * weight[row] / learning_rate n[row] += rescaled_grad[row]**2 w[row] = (sign(z[row]) * lamda1 - z[row]) / ((beta + sqrt(n[row])) / learning_rate + wd) * (abs(z[row]) > lamda1) Defined in src/operator/optimizer_op.cc:L632 Parameters ---------- weight : NDArray Weight grad : NDArray Gradient z : NDArray z n : NDArray Square of grad lr : float, required Learning rate lamda1 : float, optional, default=0.01 The L1 regularization coefficient. beta : float, optional, default=1 Per-Coordinate Learning Rate beta. wd : float, optional, default=0 Weight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of each weight. rescale_grad : float, optional, default=1 Rescale gradient to grad = rescale_grad*grad. clip_gradient : float, optional, default=-1 Clip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad, clip_gradient), -clip_gradient). out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def gamma(data=None, out=None, name=None, **kwargs): r"""Returns the gamma function (extension of the factorial function \ to the reals), computed element-wise on the input array. The storage type of ``gamma`` output is always dense Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def gammaln(data=None, out=None, name=None, **kwargs): r"""Returns element-wise log of the absolute value of the gamma function \ of the input. The storage type of ``gammaln`` output is always dense Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def log(data=None, out=None, name=None, **kwargs): r"""Returns element-wise Natural logarithmic value of the input. The natural logarithm is logarithm in base *e*, so that ``log(exp(x)) = x`` The storage type of ``log`` output is always dense Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L933 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def log10(data=None, out=None, name=None, **kwargs): r"""Returns element-wise Base-10 logarithmic value of the input. ``10**log10(x) = x`` The storage type of ``log10`` output is always dense Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L945 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def log1p(data=None, out=None, name=None, **kwargs): r"""Returns element-wise ``log(1 + x)`` value of the input. This function is more accurate than ``log(1 + x)`` for small ``x`` so that :math:`1+x\approx 1` The storage type of ``log1p`` output depends upon the input storage type: - log1p(default) = default - log1p(row_sparse) = row_sparse - log1p(csr) = csr Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L982 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def log2(data=None, out=None, name=None, **kwargs): r"""Returns element-wise Base-2 logarithmic value of the input. ``2**log2(x) = x`` The storage type of ``log2`` output is always dense Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L957 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def make_loss(data=None, out=None, name=None, **kwargs): r"""Make your own loss function in network construction. This operator accepts a customized loss function symbol as a terminal loss and the symbol should be an operator with no backward dependency. The output of this function is the gradient of loss with respect to the input data. For example, if you are a making a cross entropy loss function. Assume ``out`` is the predicted output and ``label`` is the true label, then the cross entropy can be defined as:: cross_entropy = label * log(out) + (1 - label) * log(1 - out) loss = make_loss(cross_entropy) We will need to use ``make_loss`` when we are creating our own loss function or we want to combine multiple loss functions. Also we may want to stop some variables' gradients from backpropagation. See more detail in ``BlockGrad`` or ``stop_gradient``. The storage type of ``make_loss`` output depends upon the input storage type: - make_loss(default) = default - make_loss(row_sparse) = row_sparse Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L300 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def mean(data=None, axis=_Null, keepdims=_Null, exclude=_Null, out=None, name=None, **kwargs): r"""Computes the mean of array elements over given axes. Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L132 Parameters ---------- data : NDArray The input axis : Shape or None, optional, default=None The axis or axes along which to perform the reduction. The default, `axis=()`, will compute over all elements into a scalar array with shape `(1,)`. If `axis` is int, a reduction is performed on a particular axis. If `axis` is a tuple of ints, a reduction is performed on all the axes specified in the tuple. If `exclude` is true, reduction will be performed on the axes that are NOT in axis instead. Negative values means indexing from right to left. keepdims : boolean, optional, default=0 If this is set to `True`, the reduced axes are left in the result as dimension with size one. exclude : boolean, optional, default=0 Whether to perform reduction on axis that are NOT in axis instead. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def negative(data=None, out=None, name=None, **kwargs): r"""Numerical negative of the argument, element-wise. The storage type of ``negative`` output depends upon the input storage type: - negative(default) = default - negative(row_sparse) = row_sparse - negative(csr) = csr Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def norm(data=None, ord=_Null, axis=_Null, keepdims=_Null, out=None, name=None, **kwargs): r"""Computes the norm on an NDArray. This operator computes the norm on an NDArray with the specified axis, depending on the value of the ord parameter. By default, it computes the L2 norm on the entire array. Currently only ord=2 supports sparse ndarrays. Examples:: x = [[[1, 2], [3, 4]], [[2, 2], [5, 6]]] norm(x, ord=2, axis=1) = [[3.1622777 4.472136 ] [5.3851647 6.3245554]] norm(x, ord=1, axis=1) = [[4., 6.], [7., 8.]] rsp = x.cast_storage('row_sparse') norm(rsp) = [5.47722578] csr = x.cast_storage('csr') norm(csr) = [5.47722578] Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L350 Parameters ---------- data : NDArray The input ord : int, optional, default='2' Order of the norm. Currently ord=1 and ord=2 is supported. axis : Shape or None, optional, default=None The axis or axes along which to perform the reduction. The default, `axis=()`, will compute over all elements into a scalar array with shape `(1,)`. If `axis` is int, a reduction is performed on a particular axis. If `axis` is a 2-tuple, it specifies the axes that hold 2-D matrices, and the matrix norms of these matrices are computed. keepdims : boolean, optional, default=0 If this is set to `True`, the reduced axis is left in the result as dimension with size one. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def radians(data=None, out=None, name=None, **kwargs): r"""Converts each element of the input array from degrees to radians. .. math:: radians([0, 90, 180, 270, 360]) = [0, \pi/2, \pi, 3\pi/2, 2\pi] The storage type of ``radians`` output depends upon the input storage type: - radians(default) = default - radians(row_sparse) = row_sparse - radians(csr) = csr Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L182 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def relu(data=None, out=None, name=None, **kwargs): r"""Computes rectified linear. .. math:: max(features, 0) The storage type of ``relu`` output depends upon the input storage type: - relu(default) = default - relu(row_sparse) = row_sparse - relu(csr) = csr Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L85 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def retain(data=None, indices=None, out=None, name=None, **kwargs): r"""pick rows specified by user input index array from a row sparse matrix and save them in the output sparse matrix. Example:: data = [[1, 2], [3, 4], [5, 6]] indices = [0, 1, 3] shape = (4, 2) rsp_in = row_sparse(data, indices) to_retain = [0, 3] rsp_out = retain(rsp_in, to_retain) rsp_out.values = [[1, 2], [5, 6]] rsp_out.indices = [0, 3] The storage type of ``retain`` output depends on storage types of inputs - retain(row_sparse, default) = row_sparse - otherwise, ``retain`` is not supported Defined in src/operator/tensor/sparse_retain.cc:L53 Parameters ---------- data : NDArray The input array for sparse_retain operator. indices : NDArray The index array of rows ids that will be retained. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def rint(data=None, out=None, name=None, **kwargs): r"""Returns element-wise rounded value to the nearest integer of the input. .. note:: - For input ``n.5`` ``rint`` returns ``n`` while ``round`` returns ``n+1``. - For input ``-n.5`` both ``rint`` and ``round`` returns ``-n-1``. Example:: rint([-1.5, 1.5, -1.9, 1.9, 2.1]) = [-2., 1., -2., 2., 2.] The storage type of ``rint`` output depends upon the input storage type: - rint(default) = default - rint(row_sparse) = row_sparse - rint(csr) = csr Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L719 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def round(data=None, out=None, name=None, **kwargs): r"""Returns element-wise rounded value to the nearest integer of the input. Example:: round([-1.5, 1.5, -1.9, 1.9, 2.1]) = [-2., 2., -2., 2., 2.] The storage type of ``round`` output depends upon the input storage type: - round(default) = default - round(row_sparse) = row_sparse - round(csr) = csr Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L698 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def rsqrt(data=None, out=None, name=None, **kwargs): r"""Returns element-wise inverse square-root value of the input. .. math:: rsqrt(x) = 1/\sqrt{x} Example:: rsqrt([4,9,16]) = [0.5, 0.33333334, 0.25] The storage type of ``rsqrt`` output is always dense Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L858 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def sgd_mom_update(weight=None, grad=None, mom=None, lr=_Null, momentum=_Null, wd=_Null, rescale_grad=_Null, clip_gradient=_Null, lazy_update=_Null, out=None, name=None, **kwargs): r"""Momentum update function for Stochastic Gradient Descent (SGD) optimizer. Momentum update has better convergence rates on neural networks. Mathematically it looks like below: .. math:: v_1 = \alpha * \nabla J(W_0)\\ v_t = \gamma v_{t-1} - \alpha * \nabla J(W_{t-1})\\ W_t = W_{t-1} + v_t It updates the weights using:: v = momentum * v - learning_rate * gradient weight += v Where the parameter ``momentum`` is the decay rate of momentum estimates at each epoch. However, if grad's storage type is ``row_sparse``, ``lazy_update`` is True and weight's storage type is the same as momentum's storage type, only the row slices whose indices appear in grad.indices are updated (for both weight and momentum):: for row in gradient.indices: v[row] = momentum[row] * v[row] - learning_rate * gradient[row] weight[row] += v[row] Defined in src/operator/optimizer_op.cc:L372 Parameters ---------- weight : NDArray Weight grad : NDArray Gradient mom : NDArray Momentum lr : float, required Learning rate momentum : float, optional, default=0 The decay rate of momentum estimates at each epoch. wd : float, optional, default=0 Weight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of each weight. rescale_grad : float, optional, default=1 Rescale gradient to grad = rescale_grad*grad. clip_gradient : float, optional, default=-1 Clip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad, clip_gradient), -clip_gradient). lazy_update : boolean, optional, default=1 If true, lazy updates are applied if gradient's stype is row_sparse and both weight and momentum have the same stype out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def sgd_update(weight=None, grad=None, lr=_Null, wd=_Null, rescale_grad=_Null, clip_gradient=_Null, lazy_update=_Null, out=None, name=None, **kwargs): r"""Update function for Stochastic Gradient Descent (SDG) optimizer. It updates the weights using:: weight = weight - learning_rate * (gradient + wd * weight) However, if gradient is of ``row_sparse`` storage type and ``lazy_update`` is True, only the row slices whose indices appear in grad.indices are updated:: for row in gradient.indices: weight[row] = weight[row] - learning_rate * (gradient[row] + wd * weight[row]) Defined in src/operator/optimizer_op.cc:L331 Parameters ---------- weight : NDArray Weight grad : NDArray Gradient lr : float, required Learning rate wd : float, optional, default=0 Weight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of each weight. rescale_grad : float, optional, default=1 Rescale gradient to grad = rescale_grad*grad. clip_gradient : float, optional, default=-1 Clip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad, clip_gradient), -clip_gradient). lazy_update : boolean, optional, default=1 If true, lazy updates are applied if gradient's stype is row_sparse. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def sigmoid(data=None, out=None, name=None, **kwargs): r"""Computes sigmoid of x element-wise. .. math:: y = 1 / (1 + exp(-x)) The storage type of ``sigmoid`` output is always dense Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L101 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def sign(data=None, out=None, name=None, **kwargs): r"""Returns element-wise sign of the input. Example:: sign([-2, 0, 3]) = [-1, 0, 1] The storage type of ``sign`` output depends upon the input storage type: - sign(default) = default - sign(row_sparse) = row_sparse - sign(csr) = csr Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L679 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def sin(data=None, out=None, name=None, **kwargs): r"""Computes the element-wise sine of the input array. The input should be in radians (:math:`2\pi` rad equals 360 degrees). .. math:: sin([0, \pi/4, \pi/2]) = [0, 0.707, 1] The storage type of ``sin`` output depends upon the input storage type: - sin(default) = default - sin(row_sparse) = row_sparse - sin(csr) = csr Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L46 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def sinh(data=None, out=None, name=None, **kwargs): r"""Returns the hyperbolic sine of the input array, computed element-wise. .. math:: sinh(x) = 0.5\times(exp(x) - exp(-x)) The storage type of ``sinh`` output depends upon the input storage type: - sinh(default) = default - sinh(row_sparse) = row_sparse - sinh(csr) = csr Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L201 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def slice(data=None, begin=_Null, end=_Null, step=_Null, out=None, name=None, **kwargs): r"""Slices a region of the array. .. note:: ``crop`` is deprecated. Use ``slice`` instead. This function returns a sliced array between the indices given by `begin` and `end` with the corresponding `step`. For an input array of ``shape=(d_0, d_1, ..., d_n-1)``, slice operation with ``begin=(b_0, b_1...b_m-1)``, ``end=(e_0, e_1, ..., e_m-1)``, and ``step=(s_0, s_1, ..., s_m-1)``, where m <= n, results in an array with the shape ``(|e_0-b_0|/|s_0|, ..., |e_m-1-b_m-1|/|s_m-1|, d_m, ..., d_n-1)``. The resulting array's *k*-th dimension contains elements from the *k*-th dimension of the input array starting from index ``b_k`` (inclusive) with step ``s_k`` until reaching ``e_k`` (exclusive). If the *k*-th elements are `None` in the sequence of `begin`, `end`, and `step`, the following rule will be used to set default values. If `s_k` is `None`, set `s_k=1`. If `s_k > 0`, set `b_k=0`, `e_k=d_k`; else, set `b_k=d_k-1`, `e_k=-1`. The storage type of ``slice`` output depends on storage types of inputs - slice(csr) = csr - otherwise, ``slice`` generates output with default storage .. note:: When input data storage type is csr, it only supports step=(), or step=(None,), or step=(1,) to generate a csr output. For other step parameter values, it falls back to slicing a dense tensor. Example:: x = [[ 1., 2., 3., 4.], [ 5., 6., 7., 8.], [ 9., 10., 11., 12.]] slice(x, begin=(0,1), end=(2,4)) = [[ 2., 3., 4.], [ 6., 7., 8.]] slice(x, begin=(None, 0), end=(None, 3), step=(-1, 2)) = [[9., 11.], [5., 7.], [1., 3.]] Defined in src/operator/tensor/matrix_op.cc:L414 Parameters ---------- data : NDArray Source input begin : Shape(tuple), required starting indices for the slice operation, supports negative indices. end : Shape(tuple), required ending indices for the slice operation, supports negative indices. step : Shape(tuple), optional, default=[] step for the slice operation, supports negative values. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def sqrt(data=None, out=None, name=None, **kwargs): r"""Returns element-wise square-root value of the input. .. math:: \textrm{sqrt}(x) = \sqrt{x} Example:: sqrt([4, 9, 16]) = [2, 3, 4] The storage type of ``sqrt`` output depends upon the input storage type: - sqrt(default) = default - sqrt(row_sparse) = row_sparse - sqrt(csr) = csr Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L838 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def square(data=None, out=None, name=None, **kwargs): r"""Returns element-wise squared value of the input. .. math:: square(x) = x^2 Example:: square([2, 3, 4]) = [4, 9, 16] The storage type of ``square`` output depends upon the input storage type: - square(default) = default - square(row_sparse) = row_sparse - square(csr) = csr Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L815 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def stop_gradient(data=None, out=None, name=None, **kwargs): r"""Stops gradient computation. Stops the accumulated gradient of the inputs from flowing through this operator in the backward direction. In other words, this operator prevents the contribution of its inputs to be taken into account for computing gradients. Example:: v1 = [1, 2] v2 = [0, 1] a = Variable('a') b = Variable('b') b_stop_grad = stop_gradient(3 * b) loss = MakeLoss(b_stop_grad + a) executor = loss.simple_bind(ctx=cpu(), a=(1,2), b=(1,2)) executor.forward(is_train=True, a=v1, b=v2) executor.outputs [ 1. 5.] executor.backward() executor.grad_arrays [ 0. 0.] [ 1. 1.] Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L267 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def sum(data=None, axis=_Null, keepdims=_Null, exclude=_Null, out=None, name=None, **kwargs): r"""Computes the sum of array elements over given axes. .. Note:: `sum` and `sum_axis` are equivalent. For ndarray of csr storage type summation along axis 0 and axis 1 is supported. Setting keepdims or exclude to True will cause a fallback to dense operator. Example:: data = [[[1, 2], [2, 3], [1, 3]], [[1, 4], [4, 3], [5, 2]], [[7, 1], [7, 2], [7, 3]]] sum(data, axis=1) [[ 4. 8.] [ 10. 9.] [ 21. 6.]] sum(data, axis=[1,2]) [ 12. 19. 27.] data = [[1, 2, 0], [3, 0, 1], [4, 1, 0]] csr = cast_storage(data, 'csr') sum(csr, axis=0) [ 8. 3. 1.] sum(csr, axis=1) [ 3. 4. 5.] Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L116 Parameters ---------- data : NDArray The input axis : Shape or None, optional, default=None The axis or axes along which to perform the reduction. The default, `axis=()`, will compute over all elements into a scalar array with shape `(1,)`. If `axis` is int, a reduction is performed on a particular axis. If `axis` is a tuple of ints, a reduction is performed on all the axes specified in the tuple. If `exclude` is true, reduction will be performed on the axes that are NOT in axis instead. Negative values means indexing from right to left. keepdims : boolean, optional, default=0 If this is set to `True`, the reduced axes are left in the result as dimension with size one. exclude : boolean, optional, default=0 Whether to perform reduction on axis that are NOT in axis instead. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def tan(data=None, out=None, name=None, **kwargs): r"""Computes the element-wise tangent of the input array. The input should be in radians (:math:`2\pi` rad equals 360 degrees). .. math:: tan([0, \pi/4, \pi/2]) = [0, 1, -inf] The storage type of ``tan`` output depends upon the input storage type: - tan(default) = default - tan(row_sparse) = row_sparse - tan(csr) = csr Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L83 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def tanh(data=None, out=None, name=None, **kwargs): r"""Returns the hyperbolic tangent of the input array, computed element-wise. .. math:: tanh(x) = sinh(x) / cosh(x) The storage type of ``tanh`` output depends upon the input storage type: - tanh(default) = default - tanh(row_sparse) = row_sparse - tanh(csr) = csr Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L234 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def trunc(data=None, out=None, name=None, **kwargs): r"""Return the element-wise truncated value of the input. The truncated value of the scalar x is the nearest integer i which is closer to zero than x is. In short, the fractional part of the signed number x is discarded. Example:: trunc([-2.1, -1.9, 1.5, 1.9, 2.1]) = [-2., -1., 1., 1., 2.] The storage type of ``trunc`` output depends upon the input storage type: - trunc(default) = default - trunc(row_sparse) = row_sparse - trunc(csr) = csr Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L777 Parameters ---------- data : NDArray The input array. out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def where(condition=None, x=None, y=None, out=None, name=None, **kwargs): r"""Return the elements, either from x or y, depending on the condition. Given three ndarrays, condition, x, and y, return an ndarray with the elements from x or y, depending on the elements from condition are true or false. x and y must have the same shape. If condition has the same shape as x, each element in the output array is from x if the corresponding element in the condition is true, and from y if false. If condition does not have the same shape as x, it must be a 1D array whose size is the same as x's first dimension size. Each row of the output array is from x's row if the corresponding element from condition is true, and from y's row if false. Note that all non-zero values are interpreted as ``True`` in condition. Examples:: x = [[1, 2], [3, 4]] y = [[5, 6], [7, 8]] cond = [[0, 1], [-1, 0]] where(cond, x, y) = [[5, 2], [3, 8]] csr_cond = cast_storage(cond, 'csr') where(csr_cond, x, y) = [[5, 2], [3, 8]] Defined in src/operator/tensor/control_flow_op.cc:L57 Parameters ---------- condition : NDArray condition array x : NDArray y : NDArray out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) def zeros_like(data=None, out=None, name=None, **kwargs): r"""Return an array of zeros with the same shape, type and storage type as the input array. The storage type of ``zeros_like`` output depends on the storage type of the input - zeros_like(row_sparse) = row_sparse - zeros_like(csr) = csr - zeros_like(default) = default Examples:: x = [[ 1., 1., 1.], [ 1., 1., 1.]] zeros_like(x) = [[ 0., 0., 0.], [ 0., 0., 0.]] Parameters ---------- data : NDArray The input out : NDArray, optional The output NDArray to hold the result. Returns ------- out : NDArray or list of NDArrays The output of this function. """ return (0,) __all__ = ['ElementWiseSum', 'Embedding', 'FullyConnected', 'LinearRegressionOutput', 'LogisticRegressionOutput', 'MAERegressionOutput', 'abs', 'adagrad_update', 'adam_update', 'add_n', 'arccos', 'arccosh', 'arcsin', 'arcsinh', 'arctan', 'arctanh', 'broadcast_add', 'broadcast_div', 'broadcast_minus', 'broadcast_mul', 'broadcast_plus', 'broadcast_sub', 'cast_storage', 'cbrt', 'ceil', 'clip', 'concat', 'cos', 'cosh', 'degrees', 'dot', 'elemwise_add', 'elemwise_div', 'elemwise_mul', 'elemwise_sub', 'exp', 'expm1', 'fix', 'floor', 'ftrl_update', 'gamma', 'gammaln', 'log', 'log10', 'log1p', 'log2', 'make_loss', 'mean', 'negative', 'norm', 'radians', 'relu', 'retain', 'rint', 'round', 'rsqrt', 'sgd_mom_update', 'sgd_update', 'sigmoid', 'sign', 'sin', 'sinh', 'slice', 'sqrt', 'square', 'stop_gradient', 'sum', 'tan', 'tanh', 'trunc', 'where', 'zeros_like']