# # Copyright (c) 2017 Intel Corporation # SPDX-License-Identifier: BSD-2-Clause # from __future__ import print_function, division, absolute_import import types as pytypes # avoid confusion with numba.types import numpy import operator from numba import ir, analysis, types, config, cgutils, typing from numba.ir_utils import ( mk_unique_var, replace_vars_inner, find_topo_order, dprint_func_ir, get_global_func_typ, guard, require, get_definition, find_callname, find_build_sequence, find_const, is_namedtuple_class, build_definitions) from numba.analysis import (compute_cfg_from_blocks) from numba.typing import npydecl, signature import collections import copy from numba.extending import intrinsic import llvmlite.llvmpy.core as lc import llvmlite UNKNOWN_CLASS = -1 CONST_CLASS = 0 MAP_TYPES = [numpy.ufunc] array_analysis_extensions = {} # declaring call classes array_creation = ['empty', 'zeros', 'ones', 'full'] random_int_args = ['rand', 'randn'] random_1arg_size = ['ranf', 'random_sample', 'sample', 'random', 'standard_normal'] random_2arg_sizelast = ['chisquare', 'weibull', 'power', 'geometric', 'exponential', 'poisson', 'rayleigh'] random_3arg_sizelast = ['normal', 'uniform', 'beta', 'binomial', 'f', 'gamma', 'lognormal', 'laplace'] random_calls = (random_int_args + random_1arg_size + random_2arg_sizelast + random_3arg_sizelast + ['randint', 'triangular']) @intrinsic def wrap_index(typingctx, idx, size): """ Calculate index value "idx" relative to a size "size" value as (idx % size), where "size" is known to be positive. Note that we use the mod(%) operation here instead of (idx < 0 ? idx + size : idx) because we may have situations where idx > size due to the way indices are calculated during slice/range analysis. """ unified_ty = typingctx.unify_types(idx, size) if not unified_ty: raise ValueError("Argument types for wrap_index must match") def codegen(context, builder, sig, args): ll_unified_ty = context.get_data_type(unified_ty) idx = builder.sext(args[0], ll_unified_ty) size = builder.sext(args[1], ll_unified_ty) neg_size = builder.neg(size) zero = llvmlite.ir.Constant(ll_unified_ty, 0) idx_negative = builder.icmp_signed('<', idx, zero) pos_oversize = builder.icmp_signed('>=', idx, size) neg_oversize = builder.icmp_signed('<=', idx, neg_size) pos_res = builder.select(pos_oversize, size, idx) neg_res = builder.select(neg_oversize, zero, builder.add(idx, size)) mod = builder.select(idx_negative, neg_res, pos_res) return mod return signature(unified_ty, idx, size), codegen def wrap_index_literal(idx, size): if idx < 0: if idx <= -size: return 0 else: return idx + size else: if idx >= size: return size else: return idx @intrinsic def assert_equiv(typingctx, *val): """ A function that asserts the inputs are of equivalent size, and throws runtime error when they are not. The input is a vararg that contains an error message, followed by a set of objects of either array, tuple or integer. """ if len(val) > 1: # Make sure argument is a single tuple type. Note that this only # happens when IR containing assert_equiv call is being compiled # (and going through type inference) again. val = (types.Tuple(val),) assert(len(val[0]) > 1) # Arguments must be either array, tuple, or integer assert all(map(lambda a: (isinstance(a, types.ArrayCompatible) or isinstance(a, types.BaseTuple) or isinstance(a, types.SliceType) or isinstance(a, types.Integer)), val[0][1:])) def codegen(context, builder, sig, args): assert(len(args) == 1) # it is a vararg tuple tup = cgutils.unpack_tuple(builder, args[0]) tup_type = sig.args[0] msg = sig.args[0][0].literal_value def unpack_shapes(a, aty): if isinstance(aty, types.ArrayCompatible): ary = context.make_array(aty)(context, builder, a) return cgutils.unpack_tuple(builder, ary.shape) elif isinstance(aty, types.BaseTuple): return cgutils.unpack_tuple(builder, a) else: # otherwise it is a single integer return [a] def pairwise(a, aty, b, bty): ashapes = unpack_shapes(a, aty) bshapes = unpack_shapes(b, bty) assert len(ashapes) == len(bshapes) for (m, n) in zip(ashapes, bshapes): m_eq_n = builder.icmp(lc.ICMP_EQ, m, n) with builder.if_else(m_eq_n) as (then, orelse): with then: pass with orelse: context.call_conv.return_user_exc( builder, AssertionError, (msg,)) for i in range(1, len(tup_type) - 1): pairwise(tup[i], tup_type[i], tup[i + 1], tup_type[i + 1]) r = context.get_constant_generic(builder, types.NoneType, None) return r return signature(types.none, *val), codegen class EquivSet(object): """EquivSet keeps track of equivalence relations between a set of objects. """ def __init__(self, obj_to_ind=None, ind_to_obj=None, next_ind=0): """Create a new EquivSet object. Optional keyword arguments are for internal use only. """ # obj_to_ind maps object to equivalence index (sometimes also called # equivalence class) is a non-negative number that uniquely identifies # a set of objects that are equivalent. self.obj_to_ind = obj_to_ind if obj_to_ind else {} # ind_to_obj maps equivalence index to a list of objects. self.ind_to_obj = ind_to_obj if ind_to_obj else {} # next index number that is incremented each time a new equivalence # relation is created. self.next_ind = next_ind def empty(self): """Return an empty EquivSet object. """ return EquivSet() def clone(self): """Return a new copy. """ return EquivSet(obj_to_ind=copy.deepcopy(self.obj_to_ind), ind_to_obj=copy.deepcopy(self.ind_to_obj), next_id=self.next_ind) def __repr__(self): return "EquivSet({})".format(self.ind_to_obj) def is_empty(self): """Return true if the set is empty, or false otherwise. """ return self.obj_to_ind == {} def _get_ind(self, x): """Return the internal index (greater or equal to 0) of the given object, or -1 if not found. """ return self.obj_to_ind.get(x, -1) def _get_or_add_ind(self, x): """Return the internal index (greater or equal to 0) of the given object, or create a new one if not found. """ if x in self.obj_to_ind: i = self.obj_to_ind[x] else: i = self.next_ind self.next_ind += 1 return i def _insert(self, objs): """Base method that inserts a set of equivalent objects by modifying self. """ assert len(objs) > 1 inds = tuple(self._get_or_add_ind(x) for x in objs) ind = min(inds) if config.DEBUG_ARRAY_OPT >= 2: print("_insert:", objs, inds) if not (ind in self.ind_to_obj): self.ind_to_obj[ind] = [] for i, obj in zip(inds, objs): if i == ind: if not (obj in self.ind_to_obj[ind]): self.ind_to_obj[ind].append(obj) self.obj_to_ind[obj] = ind else: if i in self.ind_to_obj: # those already existing are reassigned for x in self.ind_to_obj[i]: self.obj_to_ind[x] = ind self.ind_to_obj[ind].append(x) del self.ind_to_obj[i] else: # those that are new are assigned. self.obj_to_ind[obj] = ind self.ind_to_obj[ind].append(obj) def is_equiv(self, *objs): """Try to derive if given objects are equivalent, return true if so, or false otherwise. """ inds = [self._get_ind(x) for x in objs] ind = max(inds) if ind != -1: return all(i == ind for i in inds) else: return all([x == objs[0] for x in objs]) def get_equiv_const(self, obj): """Check if obj is equivalent to some int constant, and return the constant if found, or None otherwise. """ ind = self._get_ind(obj) if ind >= 0: objs = self.ind_to_obj[ind] for x in objs: if isinstance(x, int): return x return None def get_equiv_set(self, obj): """Return the set of equivalent objects. """ ind = self._get_ind(obj) if ind >= 0: return set(self.ind_to_obj[ind]) return set() def insert_equiv(self, *objs): """Insert a set of equivalent objects by modifying self. This method can be overloaded to transform object type before insertion. """ return self._insert(objs) def intersect(self, equiv_set): """ Return the intersection of self and the given equiv_set, without modifying either of them. The result will also keep old equivalence indices unchanged. """ new_set = self.empty() new_set.next_ind = self.next_ind for objs in equiv_set.ind_to_obj.values(): inds = tuple(self._get_ind(x) for x in objs) ind_to_obj = {} for i, x in zip(inds, objs): if i in ind_to_obj: ind_to_obj[i].append(x) elif i >= 0: ind_to_obj[i] = [x] for v in ind_to_obj.values(): if len(v) > 1: new_set._insert(v) return new_set class ShapeEquivSet(EquivSet): """Just like EquivSet, except that it accepts only numba IR variables and constants as objects, guided by their types. Arrays are considered equivalent as long as their shapes are equivalent. Scalars are equivalent only when they are equal in value. Tuples are equivalent when they are of the same size, and their elements are equivalent. """ def __init__(self, typemap, defs=None, ind_to_var=None, obj_to_ind=None, ind_to_obj=None, next_id=0, ind_to_const=None): """Create a new ShapeEquivSet object, where typemap is a dictionary that maps variable names to their types, and it will not be modified. Optional keyword arguments are for internal use only. """ self.typemap = typemap # defs maps variable name to an int, where # 1 means the variable is defined only once, and numbers greater # than 1 means defined more than onces. self.defs = defs if defs else {} # ind_to_var maps index number to a list of variables (of ir.Var type). # It is used to retrieve defined shape variables given an equivalence # index. self.ind_to_var = ind_to_var if ind_to_var else {} # ind_to_const maps index number to a constant, if known. self.ind_to_const = ind_to_const if ind_to_const else {} super(ShapeEquivSet, self).__init__(obj_to_ind, ind_to_obj, next_id) def empty(self): """Return an empty ShapeEquivSet. """ return ShapeEquivSet(self.typemap, {}) def clone(self): """Return a new copy. """ return ShapeEquivSet( self.typemap, defs=copy.copy(self.defs), ind_to_var=copy.copy(self.ind_to_var), obj_to_ind=copy.deepcopy(self.obj_to_ind), ind_to_obj=copy.deepcopy(self.ind_to_obj), next_id=self.next_ind, ind_to_const=copy.deepcopy(self.ind_toconst)) def __repr__(self): return "ShapeEquivSet({}, ind_to_var={}, ind_to_const={})".format( self.ind_to_obj, self.ind_to_var, self.ind_to_const) def _get_names(self, obj): """Return a set of names for the given obj, where array and tuples are broken down to their individual shapes or elements. This is safe because both Numba array shapes and Python tuples are immutable. """ if isinstance(obj, ir.Var) or isinstance(obj, str): name = obj if isinstance(obj, str) else obj.name if name not in self.typemap: return (name,) typ = self.typemap[name] if (isinstance(typ, types.BaseTuple) or isinstance(typ, types.ArrayCompatible)): ndim = (typ.ndim if isinstance(typ, types.ArrayCompatible) else len(typ)) # Treat 0d array as if it were a scalar. if ndim == 0: return (name,) else: return tuple("{}#{}".format(name, i) for i in range(ndim)) else: return (name,) elif isinstance(obj, ir.Const): if isinstance(obj.value, tuple): return obj.value else: return (obj.value,) elif isinstance(obj, tuple): return tuple(self._get_names(x)[0] for x in obj) elif isinstance(obj, int): return (obj,) else: raise NotImplementedError( "ShapeEquivSet does not support {}".format(obj)) def is_equiv(self, *objs): """Overload EquivSet.is_equiv to handle Numba IR variables and constants. """ assert(len(objs) > 1) obj_names = [self._get_names(x) for x in objs] obj_names = [x for x in obj_names if x != ()] # rule out 0d shape if len(obj_names) <= 1: return False; ndims = [len(names) for names in obj_names] ndim = ndims[0] if not all(ndim == x for x in ndims): if config.DEBUG_ARRAY_OPT >= 1: print("is_equiv: Dimension mismatch for {}".format(objs)) return False for i in range(ndim): names = [obj_name[i] for obj_name in obj_names] if not super(ShapeEquivSet, self).is_equiv(*names): return False return True def get_equiv_const(self, obj): """If the given object is equivalent to a constant scalar, return the scalar value, or None otherwise. """ names = self._get_names(obj) if len(names) > 1: return None return super(ShapeEquivSet, self).get_equiv_const(names[0]) def get_equiv_var(self, obj): """If the given object is equivalent to some defined variable, return the variable, or None otherwise. """ names = self._get_names(obj) if len(names) != 1: return None ind = self._get_ind(names[0]) vs = self.ind_to_var.get(ind, []) return vs[0] if vs != [] else None def get_equiv_set(self, obj): """Return the set of equivalent objects. """ names = self._get_names(obj) if len(names) > 1: return None return super(ShapeEquivSet, self).get_equiv_set(names[0]) def _insert(self, objs): """Overload EquivSet._insert to manage ind_to_var dictionary. """ inds = [] for obj in objs: if obj in self.obj_to_ind: inds.append(self.obj_to_ind[obj]) varlist = [] constval = None names = set() for i in sorted(inds): if i in self.ind_to_var: for x in self.ind_to_var[i]: if not (x.name in names): varlist.append(x) names.add(x.name) if i in self.ind_to_const: assert(constval is None) constval = self.ind_to_const[i] super(ShapeEquivSet, self)._insert(objs) new_ind = self.obj_to_ind[objs[0]] for i in set(inds): if i in self.ind_to_var: del self.ind_to_var[i] self.ind_to_var[new_ind] = varlist if constval is not None: self.ind_to_const[new_ind] = constval def insert_equiv(self, *objs): """Overload EquivSet.insert_equiv to handle Numba IR variables and constants. Input objs are either variable or constant, and at least one of them must be variable. """ assert(len(objs) > 1) obj_names = [self._get_names(x) for x in objs] obj_names = [x for x in obj_names if x != ()] # rule out 0d shape if len(obj_names) <= 1: return; names = sum([list(x) for x in obj_names], []) ndims = [len(x) for x in obj_names] ndim = ndims[0] assert all(ndim == x for x in ndims), ( "Dimension mismatch for {}".format(objs)) varlist = [] constlist = [] for obj in objs: if not isinstance(obj, tuple): obj = (obj,) for var in obj: if isinstance(var, ir.Var) and not (var.name in varlist): # favor those already defined, move to front of varlist if var.name in self.defs: varlist.insert(0, var) else: varlist.append(var) if isinstance(var, ir.Const) and not (var.value in constlist): constlist.append(var.value) # try to populate ind_to_var if variables are present for obj in varlist: name = obj.name if name in names and not (name in self.obj_to_ind): self.ind_to_obj[self.next_ind] = [name] self.obj_to_ind[name] = self.next_ind self.ind_to_var[self.next_ind] = [obj] self.next_ind += 1 # create equivalence classes for previously unseen constants for const in constlist: if const in names and not (const in self.obj_to_ind): self.ind_to_obj[self.next_ind] = [const] self.obj_to_ind[const] = self.next_ind self.ind_to_const[self.next_ind] = const self.next_ind += 1 some_change = False for i in range(ndim): names = [obj_name[i] for obj_name in obj_names] ie_res = super(ShapeEquivSet, self).insert_equiv(*names) some_change = some_change or ie_res return some_change def has_shape(self, name): """Return true if the shape of the given variable is available. """ return self.get_shape(name) != None def get_shape(self, name): """Return a tuple of variables that corresponds to the shape of the given array, or None if not found. """ return guard(self._get_shape, name) def _get_shape(self, name): """Return a tuple of variables that corresponds to the shape of the given array, or raise GuardException if not found. """ inds = self.get_shape_classes(name) require (inds != ()) shape = [] for i in inds: require(i in self.ind_to_var) vs = self.ind_to_var[i] if vs != []: shape.append(vs[0]) else: require(i in self.ind_to_const) vs = self.ind_to_const[i] shape.append(vs) return tuple(shape) def get_shape_classes(self, name): """Instead of the shape tuple, return tuple of int, where each int is the corresponding class index of the size object. Unknown shapes are given class index -1. Return empty tuple if the input name is a scalar variable. """ if isinstance(name, ir.Var): name = name.name typ = self.typemap[name] if name in self.typemap else None if not (isinstance(typ, types.BaseTuple) or isinstance(typ, types.SliceType) or isinstance(typ, types.ArrayCompatible)): return [] # Treat 0d arrays like scalars. if isinstance(typ, types.ArrayCompatible) and typ.ndim == 0: return [] names = self._get_names(name) inds = tuple(self._get_ind(name) for name in names) return inds def intersect(self, equiv_set): """Overload the intersect method to handle ind_to_var. """ newset = super(ShapeEquivSet, self).intersect(equiv_set) ind_to_var = {} for i, objs in newset.ind_to_obj.items(): assert(len(objs) > 0) obj = objs[0] assert(obj in self.obj_to_ind) assert(obj in equiv_set.obj_to_ind) j = self.obj_to_ind[obj] k = equiv_set.obj_to_ind[obj] assert(j in self.ind_to_var) assert(k in equiv_set.ind_to_var) varlist = [] names = [x.name for x in equiv_set.ind_to_var[k]] for x in self.ind_to_var[j]: if x.name in names: varlist.append(x) ind_to_var[i] = varlist newset.ind_to_var = ind_to_var return newset def define(self, name, redefined): """Increment the internal count of how many times a variable is being defined. Most variables in Numba IR are SSA, i.e., defined only once, but not all of them. When a variable is being re-defined, it must be removed from the equivalence relation and added to the redefined set but only if that redefinition is not known to have the same equivalence classes. Those variables redefined are removed from all the blocks' equivalence sets later. Arrays passed to define() use their whole name but these do not appear in the equivalence sets since they are stored there per dimension. Calling _get_names() here converts array names to dimensional names. This function would previously invalidate if there were any multiple definitions of a variable. However, we realized that this behavior is overly restrictive. You need only invalidate on multiple definitions if they are not known to be equivalent. So, the equivalence insertion functions now return True if some change was made (meaning the definition was not equivalent) and False otherwise. If no change was made, then define() need not be called. For no change to have been made, the variable must already be present. If the new definition of the var has the case where lhs and rhs are in the same equivalence class then again, no change will be made and define() need not be called or the variable invalidated. """ if isinstance(name, ir.Var): name = name.name if name in self.defs: self.defs[name] += 1 name_res = list(self._get_names(name)) for one_name in name_res: # NOTE: variable being redefined, must invalidate previous # equivalences. Believe it is a rare case, and only happens to # scalar accumuators. if one_name in self.obj_to_ind: redefined.add(one_name) # remove this var from all equiv sets i = self.obj_to_ind[one_name] del self.obj_to_ind[one_name] self.ind_to_obj[i].remove(one_name) if self.ind_to_obj[i] == []: del self.ind_to_obj[i] assert(i in self.ind_to_var) names = [x.name for x in self.ind_to_var[i]] if name in names: j = names.index(name) del self.ind_to_var[i][j] if self.ind_to_var[i] == []: del self.ind_to_var[i] # no more size variables, remove equivalence too if i in self.ind_to_obj: for obj in self.ind_to_obj[i]: del self.obj_to_ind[obj] del self.ind_to_obj[i] else: self.defs[name] = 1 def union_defs(self, defs, redefined): """Union with the given defs dictionary. This is meant to handle branch join-point, where a variable may have been defined in more than one branches. """ for k, v in defs.items(): if v > 0: self.define(k, redefined) class SymbolicEquivSet(ShapeEquivSet): """Just like ShapeEquivSet, except that it also reasons about variable equivalence symbolically by using their arithmetic definitions. The goal is to automatically derive the equivalence of array ranges (slicing). For instance, a[1:m] and a[0:m-1] shall be considered size-equivalence. """ def __init__(self, typemap, def_by=None, ref_by=None, ext_shapes=None, defs=None, ind_to_var=None, obj_to_ind=None, ind_to_obj=None, next_id=0): """Create a new SymbolicEquivSet object, where typemap is a dictionary that maps variable names to their types, and it will not be modified. Optional keyword arguments are for internal use only. """ # A "defined-by" table that maps A to a tuple of (B, i), which # means A is defined as: A = B + i, where A,B are variable names, # and i is an integer constants. self.def_by = def_by if def_by else {} # A "refered-by" table that maps A to a list of [(B, i), (C, j) ...], # which implies a sequence of definitions: B = A - i, C = A - j, and # so on, where A,B,C,... are variable names, and i,j,... are # integer constants. self.ref_by = ref_by if ref_by else {} # A extended shape table that can map an arbitrary object to a shape, # currently used to remember shapes for SetItem IR node, and wrapped # indices for Slice objects. self.ext_shapes = ext_shapes if ext_shapes else {} # rel_map keeps a map of relative sizes that we have seen so # that if we compute the same relative sizes different times # in different ways we can associate those two instances # of the same relative size to the same equivalence class. self.rel_map = {} # wrap_index() computes the effectual index given a slice and a # dimension's size. We need to be able to know that two wrap_index # calls are equivalent. They are known to be equivalent if the slice # and dimension sizes of the two wrap_index calls are equivalent. # wrap_map maps from a tuple of equivalence class ids for a slice and # a dimension size to some new equivalence class id for the output size. self.wrap_map = {} super(SymbolicEquivSet, self).__init__( typemap, defs, ind_to_var, obj_to_ind, ind_to_obj, next_id) def empty(self): """Return an empty SymbolicEquivSet. """ return SymbolicEquivSet(self.typemap) def __repr__(self): return ("SymbolicEquivSet({}, ind_to_var={}, def_by={}, " "ref_by={}, ext_shapes={})".format(self.ind_to_obj, self.ind_to_var, self.def_by, self.ref_by, self.ext_shapes)) def clone(self): """Return a new copy. """ return SymbolicEquivSet( self.typemap, def_by=copy.copy(self.def_by), ref_by=copy.copy(self.ref_by), ext_shapes=copy.copy(self.ext_shapes), defs=copy.copy(self.defs), ind_to_var=copy.copy(self.ind_to_var), obj_to_ind=copy.deepcopy(self.obj_to_ind), ind_to_obj=copy.deepcopy(self.ind_to_obj), next_id=self.next_ind) def get_rel(self, name): """Retrieve a definition pair for the given variable, or return None if it is not available. """ return guard(self._get_or_set_rel, name) def _get_or_set_rel(self, name, func_ir=None): """Retrieve a definition pair for the given variable, and if it is not already available, try to look it up in the given func_ir, and remember it for future use. """ if isinstance(name, ir.Var): name = name.name require(self.defs.get(name, 0) == 1) if name in self.def_by: return self.def_by[name] else: require(func_ir != None) def plus(x, y): x_is_const = isinstance(x, int) y_is_const = isinstance(y, int) if x_is_const: if y_is_const: return x + y else: (var, offset) = y return (var, x + offset) else: (var, offset) = x if y_is_const: return (var, y + offset) else: return None def minus(x, y): if isinstance(y, int): return plus(x, -y) elif (isinstance(x, tuple) and isinstance(y, tuple) and x[0] == y[0]): return minus(x[1], y[1]) else: return None expr = get_definition(func_ir, name) value = (name, 0) # default to its own name if isinstance(expr, ir.Expr): if expr.op == 'call': fname, mod_name = find_callname( func_ir, expr, typemap=self.typemap) if fname == 'wrap_index' and mod_name == 'numba.array_analysis': index = tuple(self.obj_to_ind.get(x.name, -1) for x in expr.args) if -1 in index: return None names = self.ext_shapes.get(index, []) names.append(name) if len(names) > 0: self._insert(names) self.ext_shapes[index] = names elif expr.op == 'binop': lhs = self._get_or_set_rel(expr.lhs, func_ir) rhs = self._get_or_set_rel(expr.rhs, func_ir) if expr.fn == operator.add: value = plus(lhs, rhs) elif expr.fn == operator.sub: value = minus(lhs, rhs) elif isinstance(expr, ir.Const) and isinstance(expr.value, int): value = expr.value require(value != None) # update def_by table self.def_by[name] = value if isinstance(value, int) or (isinstance(value, tuple) and (value[0] != name or value[1] != 0)): # update ref_by table too if isinstance(value, tuple): (var, offset) = value if not (var in self.ref_by): self.ref_by[var] = [] self.ref_by[var].append((name, -offset)) # insert new equivalence if found ind = self._get_ind(var) if ind >= 0: objs = self.ind_to_obj[ind] names = [] for obj in objs: if obj in self.ref_by: names += [ x for (x, i) in self.ref_by[obj] if i == -offset ] if len(names) > 1: super(SymbolicEquivSet, self)._insert(names) return value def define(self, var, redefined, func_ir=None, typ=None): """Besides incrementing the definition count of the given variable name, it will also retrieve and simplify its definition from func_ir, and remember the result for later equivalence comparison. Supported operations are: 1. arithmetic plus and minus with constants 2. wrap_index (relative to some given size) """ if isinstance(var, ir.Var): name = var.name else: name = var super(SymbolicEquivSet, self).define(name, redefined) if (func_ir and self.defs.get(name, 0) == 1 and isinstance(typ, types.Number)): value = guard(self._get_or_set_rel, name, func_ir) # turn constant definition into equivalence if isinstance(value, int): self._insert([name, value]) if isinstance(var, ir.Var): ind = self._get_or_add_ind(name) if not (ind in self.ind_to_obj): self.ind_to_obj[ind] = [name] self.obj_to_ind[name] = ind if ind in self.ind_to_var: self.ind_to_var[ind].append(var) else: self.ind_to_var[ind] = [var] def _insert(self, objs): """Overload _insert method to handle ind changes between relative objects. Returns True if some change is made, false otherwise. """ indset = set() uniqs = set() for obj in objs: ind = self._get_ind(obj) if ind == -1: uniqs.add(obj) elif not (ind in indset): uniqs.add(obj) indset.add(ind) if len(uniqs) <= 1: return False uniqs = list(uniqs) super(SymbolicEquivSet, self)._insert(uniqs) objs = self.ind_to_obj[self._get_ind(uniqs[0])] # New equivalence guided by def_by and ref_by offset_dict = {} def get_or_set(d, k): if k in d: v = d[k] else: v = [] d[k] = v return v for obj in objs: if obj in self.def_by: value = self.def_by[obj] if isinstance(value, tuple): (name, offset) = value get_or_set(offset_dict, -offset).append(name) if name in self.ref_by: # relative to name for (v, i) in self.ref_by[name]: get_or_set(offset_dict, -(offset+i)).append(v) if obj in self.ref_by: for (name, offset) in self.ref_by[obj]: get_or_set(offset_dict, offset).append(name) for names in offset_dict.values(): self._insert(names) return True def set_shape_setitem(self, obj, shape): """remember shapes of SetItem IR nodes. """ assert isinstance(obj, ir.StaticSetItem) or isinstance(obj, ir.SetItem) self.ext_shapes[obj] = shape def _get_shape(self, obj): """Overload _get_shape to retrieve the shape of SetItem IR nodes. """ if isinstance(obj, ir.StaticSetItem) or isinstance(obj, ir.SetItem): require(obj in self.ext_shapes) return self.ext_shapes[obj] else: assert(isinstance(obj, ir.Var)) typ = self.typemap[obj.name] # for slice type, return the shape variable itself if isinstance(typ, types.SliceType): return (obj,) else: return super(SymbolicEquivSet, self)._get_shape(obj) class WrapIndexMeta(object): """ Array analysis should be able to analyze all the function calls that it adds to the IR. That way, array analysis can be run as often as needed and you should get the same equivalencies. One modification to the IR that array analysis makes is the insertion of wrap_index calls. Thus, repeated array analysis passes should be able to analyze these wrap_index calls. The difficulty of these calls is that the equivalence class of the left-hand side of the assignment is not present in the arguments to wrap_index in the right-hand side. Instead, the equivalence class of the wrap_index output is a combination of the wrap_index args. The important thing to note is that if the equivalence classes of the slice size and the dimension's size are the same for two wrap index calls then we can be assured of the answer being the same. So, we maintain the wrap_map dict that maps from a tuple of equivalence class ids for the slice and dimension size to some new equivalence class id for the output size. However, when we are analyzing the first such wrap_index call we don't have a variable there to associate to the size since we're in the process of analyzing the instruction that creates that mapping. So, instead we return an object of this special class and analyze_inst will establish the connection between a tuple of the parts of this object below and the left-hand side variable. """ def __init__(self, slice_size, dim_size): self.slice_size = slice_size self.dim_size = dim_size class ArrayAnalysis(object): aa_count = 0 """Analyzes Numpy array computations for properties such as shape/size equivalence, and keeps track of them on a per-block basis. The analysis should only be run once because it modifies the incoming IR by inserting assertion statements that safeguard parfor optimizations. """ def __init__(self, context, func_ir, typemap, calltypes): self.context = context self.func_ir = func_ir self.typemap = typemap self.calltypes = calltypes # EquivSet of variables, indexed by block number self.equiv_sets = {} # keep attr calls to arrays like t=A.sum() as {t:('sum',A)} self.array_attr_calls = {} # keep prepended instructions from conditional branch self.prepends = {} # keep track of pruned precessors when branch degenerates to jump self.pruned_predecessors = {} def get_equiv_set(self, block_label): """Return the equiv_set object of an block given its label. """ return self.equiv_sets[block_label] def remove_redefineds(self, redefineds): """Take a set of variables in redefineds and go through all the currently existing equivalence sets (created in topo order) and remove that variable from all of them since it is multiply defined within the function. """ unused = set() for r in redefineds: for eslabel in self.equiv_sets: es = self.equiv_sets[eslabel] es.define(r, unused) def run(self, blocks=None, equiv_set=None): """run array shape analysis on the given IR blocks, resulting in modified IR and finalized EquivSet for each block. """ if blocks == None: blocks = self.func_ir.blocks self.func_ir._definitions = build_definitions(self.func_ir.blocks) if equiv_set == None: init_equiv_set = SymbolicEquivSet(self.typemap) else: init_equiv_set = equiv_set aa_count_save = ArrayAnalysis.aa_count ArrayAnalysis.aa_count += 1 if config.DEBUG_ARRAY_OPT >= 1: print("Starting ArrayAnalysis:", aa_count_save) dprint_func_ir(self.func_ir, "before array analysis", blocks) if config.DEBUG_ARRAY_OPT >= 1: print("ArrayAnalysis variable types: ", sorted(self.typemap.items())) print("ArrayAnalysis call types: ", self.calltypes) cfg = compute_cfg_from_blocks(blocks) topo_order = find_topo_order(blocks, cfg=cfg) # Traverse blocks in topological order for label in topo_order: if config.DEBUG_ARRAY_OPT >= 2: print("Processing block:", label) block = blocks[label] scope = block.scope new_body = [] equiv_set = None # equiv_set is the intersection of predecessors preds = cfg.predecessors(label) # some incoming edge may be pruned due to prior analysis if label in self.pruned_predecessors: pruned = self.pruned_predecessors[label] else: pruned = [] # Go through each incoming edge, process prepended instructions and # calculate beginning equiv_set of current block as an intersection # of incoming ones. if config.DEBUG_ARRAY_OPT >= 2: print("preds:", preds) for (p, q) in preds: if config.DEBUG_ARRAY_OPT >= 2: print("p, q:", p, q) if p in pruned: continue if p in self.equiv_sets: from_set = self.equiv_sets[p].clone() if config.DEBUG_ARRAY_OPT >= 2: print("p in equiv_sets", from_set) if (p, label) in self.prepends: instrs = self.prepends[(p, label)] for inst in instrs: redefined = set() self._analyze_inst(label, scope, from_set, inst, redefined) # Remove anything multiply defined in this block # from every block equivs. self.remove_redefineds(redefined) if equiv_set == None: equiv_set = from_set else: equiv_set = equiv_set.intersect(from_set) redefined = set() equiv_set.union_defs(from_set.defs, redefined) # Remove anything multiply defined in this block # from every block equivs. self.remove_redefineds(redefined) # Start with a new equiv_set if none is computed if equiv_set == None: equiv_set = init_equiv_set self.equiv_sets[label] = equiv_set # Go through instructions in a block, and insert pre/post # instructions as we analyze them. for inst in block.body: redefined = set() if isinstance(inst, ir.StaticSetItem): orig_calltype = self.calltypes[inst] inst = ir.SetItem(inst.target, inst.index_var, inst.value, inst.loc) self.calltypes[inst] = orig_calltype pre, post = self._analyze_inst(label, scope, equiv_set, inst, redefined) # Remove anything multiply defined in this block from every block equivs. if len(redefined) > 0: self.remove_redefineds(redefined) for instr in pre: new_body.append(instr) new_body.append(inst) for instr in post: new_body.append(instr) block.body = new_body if config.DEBUG_ARRAY_OPT >= 1: self.dump() print("ArrayAnalysis post variable types: ", sorted(self.typemap.items())) print("ArrayAnalysis post call types: ", self.calltypes) dprint_func_ir(self.func_ir, "after array analysis", blocks) if config.DEBUG_ARRAY_OPT >= 1: print("Ending ArrayAnalysis:", aa_count_save) def dump(self): """dump per-block equivalence sets for debugging purposes. """ print("Array Analysis: ", self.equiv_sets) def _define(self, equiv_set, var, typ, value): self.typemap[var.name] = typ self.func_ir._definitions[var.name] = [value] redefineds = set() equiv_set.define(var, redefineds, self.func_ir, typ) def _analyze_inst(self, label, scope, equiv_set, inst, redefined): pre = [] post = [] if config.DEBUG_ARRAY_OPT >= 2: print("analyze_inst:", inst) if isinstance(inst, ir.Assign): lhs = inst.target typ = self.typemap[lhs.name] shape = None if isinstance(typ, types.ArrayCompatible) and typ.ndim == 0: shape = () elif isinstance(inst.value, ir.Expr): result = self._analyze_expr(scope, equiv_set, inst.value) if result: shape = result[0] pre = result[1] if len(result) > 2: rhs = result[2] inst.value = rhs elif (isinstance(inst.value, ir.Var) or isinstance(inst.value, ir.Const)): shape = inst.value if isinstance(shape, ir.Const): if isinstance(shape.value, tuple): loc = shape.loc shape = tuple(ir.Const(x, loc) for x in shape.value) elif isinstance(shape.value, int): shape = (shape,) else: shape = None elif (isinstance(shape, ir.Var) and isinstance(self.typemap[shape.name], types.Integer)): shape = (shape,) elif isinstance(shape, WrapIndexMeta): """ Here we've got the special WrapIndexMeta object back from analyzing a wrap_index call. We define the lhs and then get it's equivalence class then add the mapping from the tuple of slice size and dimensional size equivalence ids to the lhs equivalence id. """ equiv_set.define(lhs, redefined, self.func_ir, typ) lhs_ind = equiv_set._get_ind(lhs.name) if lhs_ind != -1: equiv_set.wrap_map[(shape.slice_size, shape.dim_size)] = lhs_ind return pre, post if isinstance(typ, types.ArrayCompatible): if (shape is not None and isinstance(shape, ir.Var) and isinstance(self.typemap[shape.name], types.containers.Tuple)): pass elif (shape == None or isinstance(shape, tuple) or (isinstance(shape, ir.Var) and not equiv_set.has_shape(shape))): (shape, post) = self._gen_shape_call(equiv_set, lhs, typ.ndim, shape) elif isinstance(typ, types.UniTuple): if shape and isinstance(typ.dtype, types.Integer): (shape, post) = self._gen_shape_call(equiv_set, lhs, len(typ), shape) """ See the comment on the define() function. We need only call define(), which will invalidate a variable from being in the equivalence sets on multiple definitions, if the variable was not previously defined or if the new definition would be in a conflicting equivalence class to the original equivalence class for the variable. insert_equiv() returns True if either of these conditions are True and then we call define() in those cases. If insert_equiv() returns False then no changes were made and all equivalence classes are consistent upon a redefinition so no invalidation is needed and we don't call define(). """ needs_define = True if shape != None: needs_define = equiv_set.insert_equiv(lhs, shape) if needs_define: equiv_set.define(lhs, redefined, self.func_ir, typ) elif isinstance(inst, ir.StaticSetItem) or isinstance(inst, ir.SetItem): index = inst.index if isinstance(inst, ir.SetItem) else inst.index_var result = guard(self._index_to_shape, scope, equiv_set, inst.target, index) if not result: return [], [] if result[0] is not None: assert(isinstance(inst, ir.SetItem)) inst.index = result[0] #inst.index_var = result[0] result = result[1] (target_shape, pre) = result value_shape = equiv_set.get_shape(inst.value) if value_shape == (): # constant equiv_set.set_shape_setitem(inst, target_shape) return pre, [] elif value_shape != None: target_typ = self.typemap[inst.target.name] require(isinstance(target_typ, types.ArrayCompatible)) target_ndim = target_typ.ndim shapes = [target_shape, value_shape] names = [inst.target.name, inst.value.name] shape, asserts = self._broadcast_assert_shapes( scope, equiv_set, inst.loc, shapes, names) n = len(shape) # shape dimension must be within target dimension assert(target_ndim >= n) equiv_set.set_shape_setitem(inst, shape) return pre + asserts, [] else: return pre, [] elif isinstance(inst, ir.Branch): cond_var = inst.cond cond_def = guard(get_definition, self.func_ir, cond_var) if not cond_def: # phi variable has no single definition # We'll use equiv_set to try to find a cond_def instead equivs = equiv_set.get_equiv_set(cond_var) defs = [] for name in equivs: if isinstance(name, str) and name in self.typemap: var_def = guard(get_definition, self.func_ir, name, lhs_only=True) if isinstance(var_def, ir.Var): var_def = var_def.name if var_def: defs.append(var_def) else: defs.append(name) defvars = set(filter(lambda x: isinstance(x, str), defs)) defconsts = set(defs).difference(defvars) if len(defconsts) == 1: cond_def = list(defconsts)[0] elif len(defvars) == 1: cond_def = guard(get_definition, self.func_ir, list(defvars)[0]) if isinstance(cond_def, ir.Expr) and cond_def.op == 'binop': br = None if cond_def.fn == operator.eq: br = inst.truebr otherbr = inst.falsebr cond_val = 1 elif cond_def.fn == operator.ne: br = inst.falsebr otherbr = inst.truebr cond_val = 0 lhs_typ = self.typemap[cond_def.lhs.name] rhs_typ = self.typemap[cond_def.rhs.name] if (br != None and ((isinstance(lhs_typ, types.Integer) and isinstance(rhs_typ, types.Integer)) or (isinstance(lhs_typ, types.BaseTuple) and isinstance(rhs_typ, types.BaseTuple)))): loc = inst.loc args = (cond_def.lhs, cond_def.rhs) asserts = self._make_assert_equiv( scope, loc, equiv_set, args) asserts.append( ir.Assign(ir.Const(cond_val, loc), cond_var, loc)) self.prepends[(label, br)] = asserts self.prepends[(label, otherbr)] = [ ir.Assign(ir.Const(1 - cond_val, loc), cond_var, loc)] else: if isinstance(cond_def, ir.Const): cond_def = cond_def.value if isinstance(cond_def, int) or isinstance(cond_def, bool): # condition is always true/false, prune the outgoing edge pruned_br = inst.falsebr if cond_def else inst.truebr if pruned_br in self.pruned_predecessors: self.pruned_predecessors[pruned_br].append(label) else: self.pruned_predecessors[pruned_br] = [label] elif type(inst) in array_analysis_extensions: # let external calls handle stmt if type matches f = array_analysis_extensions[type(inst)] pre, post = f(inst, equiv_set, self.typemap, self) return pre, post def _analyze_expr(self, scope, equiv_set, expr): fname = "_analyze_op_{}".format(expr.op) try: fn = getattr(self, fname) except AttributeError: return None return guard(fn, scope, equiv_set, expr) def _analyze_op_getattr(self, scope, equiv_set, expr): # TODO: getattr of npytypes.Record if expr.attr == 'T' and self._isarray(expr.value.name): return self._analyze_op_call_numpy_transpose(scope, equiv_set, [expr.value], {}) elif expr.attr == 'shape': shape = equiv_set.get_shape(expr.value) return shape, [] return None def _analyze_op_cast(self, scope, equiv_set, expr): return expr.value, [] def _analyze_op_exhaust_iter(self, scope, equiv_set, expr): var = expr.value typ = self.typemap[var.name] if isinstance(typ, types.BaseTuple): require(len(typ) == expr.count) require(equiv_set.has_shape(var)) return var, [] return None def gen_literal_slice_part(self, arg_val, loc, scope, stmts, equiv_set, name="static_literal_slice_part"): # Create var to hold the calculated slice size. static_literal_slice_part_var = ir.Var(scope, mk_unique_var(name), loc) static_literal_slice_part_val = ir.Const(arg_val, loc) static_literal_slice_part_typ = types.IntegerLiteral(arg_val) # We'll prepend this slice size calculation to the get/setitem. stmts.append(ir.Assign(value=static_literal_slice_part_val, target=static_literal_slice_part_var, loc=loc)) self._define(equiv_set, static_literal_slice_part_var, static_literal_slice_part_typ, static_literal_slice_part_val) return static_literal_slice_part_var, static_literal_slice_part_typ def gen_static_slice_size(self, lhs_rel, rhs_rel, loc, scope, stmts, equiv_set): the_var, the_typ = self.gen_literal_slice_part(rhs_rel - lhs_rel, loc, scope, stmts, equiv_set, name="static_slice_size") return the_var def gen_explicit_neg(self, arg, arg_rel, arg_typ, size_typ, loc, scope, dsize, stmts, equiv_set): assert(not isinstance(size_typ, int)) # Create var to hold the calculated slice size. explicit_neg_var = ir.Var(scope, mk_unique_var("explicit_neg"), loc) explicit_neg_val = ir.Expr.binop(operator.add, dsize, arg, loc=loc) # Determine the type of that var. Can be literal if we know the # literal size of the dimension. explicit_neg_typ = types.intp self.calltypes[explicit_neg_val] = signature(explicit_neg_typ, size_typ, arg_typ) # We'll prepend this slice size calculation to the get/setitem. stmts.append(ir.Assign(value=explicit_neg_val, target=explicit_neg_var, loc=loc)) self._define(equiv_set, explicit_neg_var, explicit_neg_typ, explicit_neg_val) return explicit_neg_var, explicit_neg_typ def update_replacement_slice(self, lhs, lhs_typ, lhs_rel, dsize_rel, replacement_slice, slice_index, need_replacement, loc, scope, stmts, equiv_set, size_typ, dsize): # Do compile-time calculation of real index value if both the given # index value and the array length are known at compile time. known = False if isinstance(lhs_rel, int): # If the index and the array size are known then the real index # can be calculated at compile time. if lhs_rel == 0: # Special-case 0 as nothing needing to be done. known = True elif isinstance(dsize_rel, int): known = True # Calculate the real index. wil = wrap_index_literal(lhs_rel, dsize_rel) # If the given index value is between 0 and dsize then # there's no need to rewrite anything. if wil != lhs_rel: if config.DEBUG_ARRAY_OPT >= 2: print("Replacing slice to hard-code known slice size.") # Indicate we will need to replace the slice var. need_replacement = True literal_var, literal_typ = self.gen_literal_slice_part( wil, loc, scope, stmts, equiv_set) assert(slice_index == 0 or slice_index == 1) if slice_index == 0: replacement_slice.args = (literal_var, replacement_slice.args[1]) else: replacement_slice.args = (replacement_slice.args[0], literal_var) # Update lhs information with the negative removed. lhs = replacement_slice.args[slice_index] lhs_typ = literal_typ lhs_rel = equiv_set.get_rel(lhs) elif lhs_rel < 0: # Indicate we will need to replace the slice var. need_replacement = True if config.DEBUG_ARRAY_OPT >= 2: print("Replacing slice due to known negative index.") explicit_neg_var, explicit_neg_typ = self.gen_explicit_neg(lhs, lhs_rel, lhs_typ, size_typ, loc, scope, dsize, stmts, equiv_set) if slice_index == 0: replacement_slice.args = (explicit_neg_var, replacement_slice.args[1]) else: replacement_slice.args = (replacement_slice.args[0], explicit_neg_var) # Update lhs information with the negative removed. lhs = replacement_slice.args[slice_index] lhs_typ = explicit_neg_typ lhs_rel = equiv_set.get_rel(lhs) return lhs, lhs_typ, lhs_rel, replacement_slice, need_replacement, known def slice_size(self, index, dsize, equiv_set, scope, stmts): """Reason about the size of a slice represented by the "index" variable, and return a variable that has this size data, or raise GuardException if it cannot reason about it. The computation takes care of negative values used in the slice with respect to the given dimensional size ("dsize"). Extra statments required to produce the result are appended to parent function's stmts list. """ loc = index.loc # Get the definition of the index variable. index_def = get_definition(self.func_ir, index) fname, mod_name = find_callname( self.func_ir, index_def, typemap=self.typemap) require(fname == 'slice' and mod_name in ('__builtin__', 'builtins')) require(len(index_def.args) == 2) lhs = index_def.args[0] rhs = index_def.args[1] size_typ = self.typemap[dsize.name] lhs_typ = self.typemap[lhs.name] rhs_typ = self.typemap[rhs.name] if config.DEBUG_ARRAY_OPT >= 2: print("slice_size", "index=", index, "dsize=", dsize, "index_def=", index_def, "lhs=", lhs, "rhs=", rhs, "size_typ=", size_typ, "lhs_typ=", lhs_typ, "rhs_typ=", rhs_typ) # Make a deepcopy of the original slice to use as the # replacement slice, which we will modify as necessary # below to convert all negative constants in the slice # to be relative to the dimension size. replacement_slice = copy.deepcopy(index_def) need_replacement = False # Fill in the left side of the slice's ":" with 0 if it wasn't specified. if isinstance(lhs_typ, types.NoneType): zero_var = ir.Var(scope, mk_unique_var("zero"), loc) zero = ir.Const(0, loc) stmts.append(ir.Assign(value=zero, target=zero_var, loc=loc)) self._define(equiv_set, zero_var, types.IntegerLiteral(0), zero) lhs = zero_var lhs_typ = types.IntegerLiteral(0) replacement_slice.args = (lhs, replacement_slice.args[1]) need_replacement = True if config.DEBUG_ARRAY_OPT >= 2: print("Replacing slice because lhs is None.") # Fill in the right side of the slice's ":" with the array # length if it wasn't specified. if isinstance(rhs_typ, types.NoneType): rhs = dsize rhs_typ = size_typ replacement_slice.args = (replacement_slice.args[0], rhs) need_replacement = True if config.DEBUG_ARRAY_OPT >= 2: print("Replacing slice because lhs is None.") lhs_rel = equiv_set.get_rel(lhs) rhs_rel = equiv_set.get_rel(rhs) dsize_rel = equiv_set.get_rel(dsize) if config.DEBUG_ARRAY_OPT >= 2: print("lhs_rel", lhs_rel, "rhs_rel", rhs_rel, "dsize_rel", dsize_rel) # Update replacement slice with the real index value if we can # compute it at compile time. lhs, lhs_typ, lhs_rel, replacement_slice, need_replacement, lhs_known = self.update_replacement_slice( lhs, lhs_typ, lhs_rel, dsize_rel, replacement_slice, 0, need_replacement, loc, scope, stmts, equiv_set, size_typ, dsize) rhs, rhs_typ, rhs_rel, replacement_slice, need_replacement, rhs_known = self.update_replacement_slice( rhs, rhs_typ, rhs_rel, dsize_rel, replacement_slice, 1, need_replacement, loc, scope, stmts, equiv_set, size_typ, dsize) if config.DEBUG_ARRAY_OPT >= 2: print("lhs_known:", lhs_known) print("rhs_known:", rhs_known) # If neither of the parts of the slice were negative constants # then we don't need to do slice replacement in the IR. if not need_replacement: replacement_slice_var = None else: # Create a new var for the replacement slice. replacement_slice_var = ir.Var(scope, mk_unique_var("replacement_slice"), loc) # Create a deepcopy of slice calltype so that when we change it below # the original isn't changed. Make the types of the parts of the slice # intp. new_arg_typs = (types.intp, types.intp) rs_calltype = self.typemap[index_def.func.name].get_call_type(self.context, new_arg_typs, {}) self.calltypes[replacement_slice] = rs_calltype stmts.append(ir.Assign(value=replacement_slice, target=replacement_slice_var, loc=loc)) # The type of the replacement slice is the same type as the original. self.typemap[replacement_slice_var.name] = self.typemap[index.name] if config.DEBUG_ARRAY_OPT >= 2: print("after rewriting negatives", "lhs_rel", lhs_rel, "rhs_rel", rhs_rel) if lhs_known and rhs_known: if config.DEBUG_ARRAY_OPT >= 2: print("lhs and rhs known so return static size") return self.gen_static_slice_size(lhs_rel, rhs_rel, loc, scope, stmts, equiv_set), replacement_slice_var if (lhs_rel == 0 and isinstance(rhs_rel, tuple) and equiv_set.is_equiv(dsize, rhs_rel[0]) and rhs_rel[1] == 0): return dsize, None slice_typ = types.intp orig_slice_typ = slice_typ size_var = ir.Var(scope, mk_unique_var("slice_size"), loc) size_val = ir.Expr.binop(operator.sub, rhs, lhs, loc=loc) self.calltypes[size_val] = signature(slice_typ, rhs_typ, lhs_typ) self._define(equiv_set, size_var, slice_typ, size_val) size_rel = equiv_set.get_rel(size_var) if config.DEBUG_ARRAY_OPT >= 2: print("size_rel", size_rel, type(size_rel)) wrap_var = ir.Var(scope, mk_unique_var("wrap"), loc) wrap_def = ir.Global('wrap_index', wrap_index, loc=loc) fnty = get_global_func_typ(wrap_index) sig = self.context.resolve_function_type(fnty, (orig_slice_typ, size_typ,), {}) self._define(equiv_set, wrap_var, fnty, wrap_def) def gen_wrap_if_not_known(val, val_typ, known): if not known: var = ir.Var(scope, mk_unique_var("var"), loc) var_typ = types.intp new_value = ir.Expr.call(wrap_var, [val, dsize], {}, loc) self._define(equiv_set, var, var_typ, new_value) self.calltypes[new_value] = sig return (var, var_typ, new_value) else: return (val, val_typ, None) var1, var1_typ, value1 = gen_wrap_if_not_known(lhs, lhs_typ, lhs_known) var2, var2_typ, value2 = gen_wrap_if_not_known(rhs, rhs_typ, rhs_known) post_wrap_size_var = ir.Var(scope, mk_unique_var("post_wrap_slice_size"), loc) post_wrap_size_val = ir.Expr.binop(operator.sub, var2, var1, loc=loc) self.calltypes[post_wrap_size_val] = signature(slice_typ, var2_typ, var1_typ) self._define(equiv_set, post_wrap_size_var, slice_typ, post_wrap_size_val) stmts.append(ir.Assign(value=size_val, target=size_var, loc=loc)) stmts.append(ir.Assign(value=wrap_def, target=wrap_var, loc=loc)) if value1 is not None: stmts.append(ir.Assign(value=value1, target=var1, loc=loc)) if value2 is not None: stmts.append(ir.Assign(value=value2, target=var2, loc=loc)) stmts.append(ir.Assign(value=post_wrap_size_val, target=post_wrap_size_var, loc=loc)) # rel_map keeps a map of relative sizes that we have seen so # that if we compute the same relative sizes different times # in different ways we can associate those two instances # of the same relative size to the same equivalence class. if isinstance(size_rel, tuple): if config.DEBUG_ARRAY_OPT >= 2: print("size_rel is tuple", equiv_set.rel_map) rel_map_entry = None for rme, rme_tuple in equiv_set.rel_map.items(): if rme[1] == size_rel[1] and equiv_set.is_equiv(rme[0], size_rel[0]): rel_map_entry = rme_tuple break if rel_map_entry is not None: # We have seen this relative size before so establish # equivalence to the previous variable. if config.DEBUG_ARRAY_OPT >= 2: print("establishing equivalence to", rel_map_entry) equiv_set.insert_equiv(size_var, rel_map_entry[0]) equiv_set.insert_equiv(post_wrap_size_var, rel_map_entry[1]) else: # The first time we've seen this relative size so # remember the variable defining that size. equiv_set.rel_map[size_rel] = (size_var, post_wrap_size_var) return post_wrap_size_var, replacement_slice_var def _index_to_shape(self, scope, equiv_set, var, ind_var): """For indexing like var[index] (either write or read), see if the index corresponds to a range/slice shape. Returns a 2-tuple where the first item is either None or a ir.Var to be used to replace the index variable in the outer getitem or setitem instruction. The second item is also a tuple returning the shape and prepending instructions. """ typ = self.typemap[var.name] require(isinstance(typ, types.ArrayCompatible)) ind_typ = self.typemap[ind_var.name] ind_shape = equiv_set._get_shape(ind_var) var_shape = equiv_set._get_shape(var) if isinstance(ind_typ, types.SliceType): seq_typs = (ind_typ,) else: require(isinstance(ind_typ, types.BaseTuple)) seq, op = find_build_sequence(self.func_ir, ind_var) require(op == 'build_tuple') seq_typs = tuple(self.typemap[x.name] for x in seq) require(len(ind_shape)==len(seq_typs)==len(var_shape)) stmts = [] def to_shape(typ, index, dsize): if isinstance(typ, types.SliceType): return self.slice_size(index, dsize, equiv_set, scope, stmts) elif isinstance(typ, types.Number): return None, None else: # unknown dimension size for this index, # so we'll raise GuardException require(False) shape_list = [] index_var_list = [] replace_index = False for (typ, size, dsize) in zip(seq_typs, ind_shape, var_shape): # Convert the given dimension of the get/setitem index expr. shape_part, index_var_part = to_shape(typ, size, dsize) shape_list.append(shape_part) # to_shape will return index_var_part as not None if a # replacement of the slice is required to convert from # negative indices to positive relative indices. if index_var_part is not None: # Remember that we need to replace the build_tuple. replace_index = True index_var_list.append(index_var_part) else: index_var_list.append(size) # If at least one of the dimensions required a new slice variable # then we'll need to replace the build_tuple for this get/setitem. if replace_index: # Multi-dimensional array access needs a replacement tuple built. if len(index_var_list) > 1: # Make a variable to hold the new build_tuple. replacement_build_tuple_var = ir.Var(scope, mk_unique_var("replacement_build_tuple"), ind_shape[0].loc) # Create the build tuple from the accumulated index vars above. new_build_tuple = ir.Expr.build_tuple(index_var_list, ind_shape[0].loc) stmts.append(ir.Assign(value=new_build_tuple, target=replacement_build_tuple_var, loc=ind_shape[0].loc)) # New build_tuple has same type as the original one. self.typemap[replacement_build_tuple_var.name] = ind_typ else: replacement_build_tuple_var = index_var_list[0] else: replacement_build_tuple_var = None shape = tuple(shape_list) require(not all(x == None for x in shape)) shape = tuple(x for x in shape if x != None) return (replacement_build_tuple_var, (shape, stmts)) def _analyze_op_getitem(self, scope, equiv_set, expr): result = self._index_to_shape(scope, equiv_set, expr.value, expr.index) if result[0] is not None: expr.index = result[0] return result[1] def _analyze_op_static_getitem(self, scope, equiv_set, expr): var = expr.value typ = self.typemap[var.name] if not isinstance(typ, types.BaseTuple): result = self._index_to_shape(scope, equiv_set, expr.value, expr.index_var) if result[0] is not None: expr.index_var = result[0] return result[1] shape = equiv_set._get_shape(var) require(isinstance(expr.index, int) and expr.index < len(shape)) return shape[expr.index], [] def _analyze_op_unary(self, scope, equiv_set, expr): require(expr.fn in UNARY_MAP_OP) # for scalars, only + operator results in equivalence # for example, if "m = -n", m and n are not equivalent if self._isarray(expr.value.name) or expr.fn == operator.add: return expr.value, [] return None def _analyze_op_binop(self, scope, equiv_set, expr): require(expr.fn in BINARY_MAP_OP) return self._analyze_broadcast(scope, equiv_set, expr.loc, [expr.lhs, expr.rhs]) def _analyze_op_inplace_binop(self, scope, equiv_set, expr): require(expr.fn in INPLACE_BINARY_MAP_OP) return self._analyze_broadcast(scope, equiv_set, expr.loc, [expr.lhs, expr.rhs]) def _analyze_op_arrayexpr(self, scope, equiv_set, expr): return self._analyze_broadcast(scope, equiv_set, expr.loc, expr.list_vars()) def _analyze_op_build_tuple(self, scope, equiv_set, expr): consts = [] for var in expr.items: x = guard(find_const, self.func_ir, var) if x is not None: consts.append(x) else: break else: out = tuple([ir.Const(x, expr.loc) for x in consts]) return out, [], ir.Const(tuple(consts), expr.loc) # default return for non-const return tuple(expr.items), [] def _analyze_op_call(self, scope, equiv_set, expr): from numba.stencil import StencilFunc callee = expr.func callee_def = get_definition(self.func_ir, callee) if (isinstance(callee_def, (ir.Global, ir.FreeVar)) and is_namedtuple_class(callee_def.value)): return tuple(expr.args), [] if (isinstance(callee_def, (ir.Global, ir.FreeVar)) and isinstance(callee_def.value, StencilFunc)): args = expr.args return self._analyze_stencil(scope, equiv_set, callee_def.value, expr.loc, args, dict(expr.kws)) fname, mod_name = find_callname( self.func_ir, expr, typemap=self.typemap) added_mod_name = False # call via attribute (i.e. array.func) if (isinstance(mod_name, ir.Var) and isinstance(self.typemap[mod_name.name], types.ArrayCompatible)): args = [mod_name] + expr.args mod_name = 'numpy' # Remember that args and expr.args don't alias. added_mod_name = True else: args = expr.args fname = "_analyze_op_call_{}_{}".format( mod_name, fname).replace('.', '_') if fname in UFUNC_MAP_OP: # known numpy ufuncs return self._analyze_broadcast(scope, equiv_set, expr.loc, args) else: try: fn = getattr(self, fname) except AttributeError: return None result = guard(fn, scope, equiv_set, args, dict(expr.kws)) # We want the ability for function fn to modify arguments. # If args and expr.args don't alias then we need the extra # step of assigning back into expr.args from the args that # was passed to fn. if added_mod_name: expr.args = args[1:] return result def _analyze_op_call___builtin___len(self, scope, equiv_set, args, kws): # python 2 version of len() return self._analyze_op_call_builtins_len(scope, equiv_set, args, kws) def _analyze_op_call_builtins_len(self, scope, equiv_set, args, kws): # python 3 version of len() require(len(args) == 1) var = args[0] typ = self.typemap[var.name] require(isinstance(typ, types.ArrayCompatible)) shape = equiv_set._get_shape(var) return shape[0], [], shape[0] def _analyze_op_call_numba_array_analysis_assert_equiv(self, scope, equiv_set, args, kws): equiv_set.insert_equiv(*args[1:]) return None def _analyze_op_call_numba_array_analysis_wrap_index(self, scope, equiv_set, args, kws): """ Analyze wrap_index calls added by a previous run of Array Analysis """ require(len(args) == 2) # Two parts to wrap index, the specified slice size... slice_size = args[0].name # ...and the size of the dimension. dim_size = args[1].name # Get the equivalence class ids for both. slice_eq = equiv_set._get_or_add_ind(slice_size) dim_eq = equiv_set._get_or_add_ind(dim_size) # See if a previous wrap_index calls we've analyzed maps from # the same pair of equivalence class ids for slice and dim size. if (slice_eq, dim_eq) in equiv_set.wrap_map: wrap_ind = equiv_set.wrap_map[(slice_eq, dim_eq)] require(wrap_ind in equiv_set.ind_to_var) vs = equiv_set.ind_to_var[wrap_ind] require(vs != []) # Return the shape of the variable from the previous wrap_index. return ((vs[0],),[]) else: # We haven't seen this combination of slice and dim # equivalence class ids so return a WrapIndexMeta so that # _analyze_inst can establish the connection to the lhs var. return (WrapIndexMeta(slice_eq, dim_eq),[]) def _analyze_numpy_create_array(self, scope, equiv_set, args, kws): shape_var = None if len(args) > 0: shape_var = args[0] elif 'shape' in kws: shape_var = kws['shape'] if shape_var: return shape_var, [] raise NotImplementedError("Must specify a shape for array creation") def _analyze_op_call_numpy_empty(self, scope, equiv_set, args, kws): return self._analyze_numpy_create_array(scope, equiv_set, args, kws) def _analyze_op_call_numba_unsafe_ndarray_empty_inferred(self, scope, equiv_set, args, kws): return self._analyze_numpy_create_array(scope, equiv_set, args, kws) def _analyze_op_call_numpy_zeros(self, scope, equiv_set, args, kws): return self._analyze_numpy_create_array(scope, equiv_set, args, kws) def _analyze_op_call_numpy_ones(self, scope, equiv_set, args, kws): return self._analyze_numpy_create_array(scope, equiv_set, args, kws) def _analyze_op_call_numpy_eye(self, scope, equiv_set, args, kws): if len(args) > 0: N = args[0] elif 'N' in kws: N = kws['N'] else: raise NotImplementedError( "Expect one argument (or 'N') to eye function") if 'M' in kws: M = kws['M'] else: M = N return (N, M), [] def _analyze_op_call_numpy_identity(self, scope, equiv_set, args, kws): assert len(args) > 0 N = args[0] return (N, N), [] def _analyze_op_call_numpy_diag(self, scope, equiv_set, args, kws): # We can only reason about the output shape when the input is 1D or # square 2D. assert len(args) > 0 a = args[0] assert(isinstance(a, ir.Var)) atyp = self.typemap[a.name] if isinstance(atyp, types.ArrayCompatible): if atyp.ndim == 2: if 'k' in kws: # will proceed only when k = 0 or absent k = kws['k'] if not equiv_set.is_equiv(k, 0): return None (m, n) = equiv_set._get_shape(a) if equiv_set.is_equiv(m, n): return (m,), [] elif atyp.ndim == 1: (m,) = equiv_set._get_shape(a) return (m, m), [] return None def _analyze_numpy_array_like(self, scope, equiv_set, args, kws): assert(len(args) > 0) var = args[0] typ = self.typemap[var.name] if isinstance(typ, types.Integer): return (1,), [] elif (isinstance(typ, types.ArrayCompatible) and equiv_set.has_shape(var)): return var, [] return None def _analyze_op_call_numpy_ravel(self, scope, equiv_set, args, kws): assert(len(args) == 1) var = args[0] typ = self.typemap[var.name] assert isinstance(typ, types.ArrayCompatible) # output array is same shape as input if input is 1D if typ.ndim == 1 and equiv_set.has_shape(var): if typ.layout == 'C': # output is the same as input (no copy) for 'C' layout # optimize out the call return var, [], var else: return var, [] # TODO: handle multi-D input arrays (calc array size) return None def _analyze_op_call_numpy_copy(self, *args): return self._analyze_numpy_array_like(*args) def _analyze_op_call_numpy_empty_like(self, *args): return self._analyze_numpy_array_like(*args) def _analyze_op_call_numpy_zeros_like(self, *args): return self._analyze_numpy_array_like(*args) def _analyze_op_call_numpy_ones_like(self, *args): return self._analyze_numpy_array_like(*args) def _analyze_op_call_numpy_full_like(self, *args): return self._analyze_numpy_array_like(*args) def _analyze_op_call_numpy_asfortranarray(self, *args): return self._analyze_numpy_array_like(*args) def _analyze_op_call_numpy_reshape(self, scope, equiv_set, args, kws): n = len(args) assert(n > 1) if n == 2: typ = self.typemap[args[1].name] if isinstance(typ, types.BaseTuple): return args[1], [] # Reshape is allowed to take one argument that has the value <0. # This means that the size of that dimension should be inferred from # the size of the array being reshaped and the other dimensions # specified. Our general approach here is to see if the reshape # has any <0 arguments. If it has more than one then throw a # ValueError. If exactly one <0 argument is found, remember its # argument index. stmts = [] neg_one_index = -1 for arg_index in range(1, len(args)): reshape_arg = args[arg_index] reshape_arg_def = guard(get_definition, self.func_ir, reshape_arg) if isinstance(reshape_arg_def, ir.Const): if reshape_arg_def.value < 0: if neg_one_index == -1: neg_one_index = arg_index else: msg = ("The reshape API may only include one negative" " argument. %s" % str(reshape_arg.loc)) raise ValueError(msg) if neg_one_index >= 0: # If exactly one <0 argument to reshape was found, then we are # going to insert code to calculate the missing dimension and then # replace the negative with the calculated size. We do this because we # can't let array equivalence analysis think that some array has # a negative dimension size. loc = args[0].loc # Create a variable to hold the size of the array being reshaped. calc_size_var = ir.Var(scope, mk_unique_var("calc_size_var"), loc) self.typemap[calc_size_var.name] = types.intp # Assign the size of the array calc_size_var. init_calc_var = ir.Assign(ir.Expr.getattr(args[0], "size", loc), calc_size_var, loc) stmts.append(init_calc_var) # For each other dimension, divide the current size by the specified # dimension size. Once all such dimensions have been done then what is # left is the size of the negative dimension. for arg_index in range(1, len(args)): # Skip the negative dimension. if arg_index == neg_one_index: continue div_calc_size_var = ir.Var(scope, mk_unique_var("calc_size_var"), loc) self.typemap[div_calc_size_var.name] = types.intp # Calculate the next size as current size // the current arg's dimension size. new_binop = ir.Expr.binop(operator.floordiv, calc_size_var, args[arg_index], loc) div_calc = ir.Assign(new_binop, div_calc_size_var, loc) self.calltypes[new_binop] = signature(types.intp, types.intp, types.intp) stmts.append(div_calc) calc_size_var = div_calc_size_var # Put the calculated value back into the reshape arguments, replacing the negative. args[neg_one_index] = calc_size_var return tuple(args[1:]), stmts def _analyze_op_call_numpy_transpose(self, scope, equiv_set, args, kws): in_arr = args[0] typ = self.typemap[in_arr.name] assert isinstance(typ, types.ArrayCompatible), \ "Invalid np.transpose argument" shape = equiv_set._get_shape(in_arr) if len(args) == 1: return tuple(reversed(shape)), [] axes = [guard(find_const, self.func_ir, a) for a in args[1:]] if isinstance(axes[0], tuple): axes = list(axes[0]) if None in axes: return None ret = [shape[i] for i in axes] return tuple(ret), [] def _analyze_op_call_numpy_random_rand(self, scope, equiv_set, args, kws): if len(args) > 0: return tuple(args), [] return None def _analyze_op_call_numpy_random_randn(self, *args): return self._analyze_op_call_numpy_random_rand(*args) def _analyze_op_numpy_random_with_size(self, pos, scope, equiv_set, args, kws): if 'size' in kws: return kws['size'], [] if len(args) > pos: return args[pos], [] return None def _analyze_op_call_numpy_random_ranf(self, *args): return self._analyze_op_numpy_random_with_size(0, *args) def _analyze_op_call_numpy_random_random_sample(self, *args): return self._analyze_op_numpy_random_with_size(0, *args) def _analyze_op_call_numpy_random_sample(self, *args): return self._analyze_op_numpy_random_with_size(0, *args) def _analyze_op_call_numpy_random_random(self, *args): return self._analyze_op_numpy_random_with_size(0, *args) def _analyze_op_call_numpy_random_standard_normal(self, *args): return self._analyze_op_numpy_random_with_size(0, *args) def _analyze_op_call_numpy_random_chisquare(self, *args): return self._analyze_op_numpy_random_with_size(1, *args) def _analyze_op_call_numpy_random_weibull(self, *args): return self._analyze_op_numpy_random_with_size(1, *args) def _analyze_op_call_numpy_random_power(self, *args): return self._analyze_op_numpy_random_with_size(1, *args) def _analyze_op_call_numpy_random_geometric(self, *args): return self._analyze_op_numpy_random_with_size(1, *args) def _analyze_op_call_numpy_random_exponential(self, *args): return self._analyze_op_numpy_random_with_size(1, *args) def _analyze_op_call_numpy_random_poisson(self, *args): return self._analyze_op_numpy_random_with_size(1, *args) def _analyze_op_call_numpy_random_rayleigh(self, *args): return self._analyze_op_numpy_random_with_size(1, *args) def _analyze_op_call_numpy_random_normal(self, *args): return self._analyze_op_numpy_random_with_size(2, *args) def _analyze_op_call_numpy_random_uniform(self, *args): return self._analyze_op_numpy_random_with_size(2, *args) def _analyze_op_call_numpy_random_beta(self, *args): return self._analyze_op_numpy_random_with_size(2, *args) def _analyze_op_call_numpy_random_binomial(self, *args): return self._analyze_op_numpy_random_with_size(2, *args) def _analyze_op_call_numpy_random_f(self, *args): return self._analyze_op_numpy_random_with_size(2, *args) def _analyze_op_call_numpy_random_gamma(self, *args): return self._analyze_op_numpy_random_with_size(2, *args) def _analyze_op_call_numpy_random_lognormal(self, *args): return self._analyze_op_numpy_random_with_size(2, *args) def _analyze_op_call_numpy_random_laplace(self, *args): return self._analyze_op_numpy_random_with_size(2, *args) def _analyze_op_call_numpy_random_randint(self, *args): return self._analyze_op_numpy_random_with_size(2, *args) def _analyze_op_call_numpy_random_triangular(self, *args): return self._analyze_op_numpy_random_with_size(3, *args) def _analyze_op_call_numpy_concatenate(self, scope, equiv_set, args, kws): assert(len(args) > 0) loc = args[0].loc seq, op = find_build_sequence(self.func_ir, args[0]) n = len(seq) require(n > 0) axis = 0 if 'axis' in kws: if isinstance(kws['axis'], int): # internal use only axis = kws['axis'] else: axis = find_const(self.func_ir, kws['axis']) elif len(args) > 1: axis = find_const(self.func_ir, args[1]) require(isinstance(axis, int)) require(op == 'build_tuple') shapes = [equiv_set._get_shape(x) for x in seq] if axis < 0: axis = len(shapes[0]) + axis require(0 <= axis < len(shapes[0])) asserts = [] new_shape = [] if n == 1: # from one array N-dimension to (N-1)-dimension shape = shapes[0] # first size is the count, pop it out of shapes n = equiv_set.get_equiv_const(shapes[0]) shape.pop(0) for i in range(len(shape)): if i == axis: m = equiv_set.get_equiv_const(shape[i]) size = m * n if (m and n) else None else: size = self._sum_size(equiv_set, shapes[0]) new_shape.append(size) else: # from n arrays N-dimension to N-dimension for i in range(len(shapes[0])): if i == axis: size = self._sum_size( equiv_set, [shape[i] for shape in shapes]) else: sizes = [shape[i] for shape in shapes] asserts.append( self._call_assert_equiv(scope, loc, equiv_set, sizes)) size = sizes[0] new_shape.append(size) return tuple(new_shape), sum(asserts, []) def _analyze_op_call_numpy_stack(self, scope, equiv_set, args, kws): assert(len(args) > 0) loc = args[0].loc seq, op = find_build_sequence(self.func_ir, args[0]) n = len(seq) require(n > 0) axis = 0 if 'axis' in kws: if isinstance(kws['axis'], int): # internal use only axis = kws['axis'] else: axis = find_const(self.func_ir, kws['axis']) elif len(args) > 1: axis = find_const(self.func_ir, args[1]) require(isinstance(axis, int)) # only build_tuple can give reliable count require(op == 'build_tuple') shapes = [equiv_set._get_shape(x) for x in seq] asserts = self._call_assert_equiv(scope, loc, equiv_set, seq) shape = shapes[0] if axis < 0: axis = len(shape) + axis + 1 require(0 <= axis <= len(shape)) new_shape = list(shape[0:axis]) + [n] + list(shape[axis:]) return tuple(new_shape), asserts def _analyze_op_call_numpy_vstack(self, scope, equiv_set, args, kws): assert(len(args) == 1) seq, op = find_build_sequence(self.func_ir, args[0]) n = len(seq) require(n > 0) typ = self.typemap[seq[0].name] require(isinstance(typ, types.ArrayCompatible)) if typ.ndim < 2: return self._analyze_op_call_numpy_stack(scope, equiv_set, args, kws) else: kws['axis'] = 0 return self._analyze_op_call_numpy_concatenate(scope, equiv_set, args, kws) def _analyze_op_call_numpy_hstack(self, scope, equiv_set, args, kws): assert(len(args) == 1) seq, op = find_build_sequence(self.func_ir, args[0]) n = len(seq) require(n > 0) typ = self.typemap[seq[0].name] require(isinstance(typ, types.ArrayCompatible)) if typ.ndim < 2: kws['axis'] = 0 else: kws['axis'] = 1 return self._analyze_op_call_numpy_concatenate(scope, equiv_set, args, kws) def _analyze_op_call_numpy_dstack(self, scope, equiv_set, args, kws): assert(len(args) == 1) seq, op = find_build_sequence(self.func_ir, args[0]) n = len(seq) require(n > 0) typ = self.typemap[seq[0].name] require(isinstance(typ, types.ArrayCompatible)) if typ.ndim == 1: kws['axis'] = 1 result = self._analyze_op_call_numpy_stack( scope, equiv_set, args, kws) require(result) (shape, pre) = result shape = tuple([1] + list(shape)) return shape, pre elif typ.ndim == 2: kws['axis'] = 2 return self._analyze_op_call_numpy_stack(scope, equiv_set, args, kws) else: kws['axis'] = 2 return self._analyze_op_call_numpy_concatenate(scope, equiv_set, args, kws) def _analyze_op_call_numpy_cumsum(self, scope, equiv_set, args, kws): # TODO return None def _analyze_op_call_numpy_cumprod(self, scope, equiv_set, args, kws): # TODO return None def _analyze_op_call_numpy_linspace(self, scope, equiv_set, args, kws): n = len(args) num = 50 if n > 2: num = args[2] elif 'num' in kws: num = kws['num'] return (num,), [] def _analyze_op_call_numpy_dot(self, scope, equiv_set, args, kws): n = len(args) assert(n >= 2) loc = args[0].loc require(all([self._isarray(x.name) for x in args])) typs = [self.typemap[x.name] for x in args] dims = [ty.ndim for ty in typs] require(all(x > 0 for x in dims)) if dims[0] == 1 and dims[1] == 1: return None shapes = [equiv_set._get_shape(x) for x in args] if dims[0] == 1: asserts = self._call_assert_equiv( scope, loc, equiv_set, [shapes[0][0], shapes[1][-2]]) return tuple(shapes[1][0:-2] + shapes[1][-1:]), asserts if dims[1] == 1: asserts = self._call_assert_equiv( scope, loc, equiv_set, [shapes[0][-1], shapes[1][0]]) return tuple(shapes[0][0:-1]), asserts if dims[0] == 2 and dims[1] == 2: asserts = self._call_assert_equiv( scope, loc, equiv_set, [shapes[0][1], shapes[1][0]]) return (shapes[0][0], shapes[1][1]), asserts if dims[0] > 2: # TODO: handle higher dimension cases pass return None def _analyze_stencil(self, scope, equiv_set, stencil_func, loc, args, kws): # stencil requires that all relatively indexed array arguments are # of same size std_idx_arrs = stencil_func.options.get('standard_indexing', ()) kernel_arg_names = stencil_func.kernel_ir.arg_names if isinstance(std_idx_arrs, str): std_idx_arrs = (std_idx_arrs,) rel_idx_arrs = [] assert(len(args) > 0 and len(args) == len(kernel_arg_names)) for arg, var in zip(kernel_arg_names, args): typ = self.typemap[var.name] if (isinstance(typ, types.ArrayCompatible) and not(arg in std_idx_arrs)): rel_idx_arrs.append(var) n = len(rel_idx_arrs) require(n > 0) asserts = self._call_assert_equiv(scope, loc, equiv_set, rel_idx_arrs) shape = equiv_set.get_shape(rel_idx_arrs[0]) return shape, asserts def _analyze_op_call_numpy_linalg_inv(self, scope, equiv_set, args, kws): require(len(args) >= 1) return equiv_set._get_shape(args[0]), [] def _analyze_broadcast(self, scope, equiv_set, loc, args): """Infer shape equivalence of arguments based on Numpy broadcast rules and return shape of output https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html """ arrs = list(filter(lambda a: self._isarray(a.name), args)) require(len(arrs) > 0) names = [x.name for x in arrs] dims = [self.typemap[x.name].ndim for x in arrs] max_dim = max(dims) require(max_dim > 0) try: shapes = [equiv_set.get_shape(x) for x in arrs] except GuardException: return arrs[0], self._call_assert_equiv(scope, loc, equiv_set, arrs) return self._broadcast_assert_shapes(scope, equiv_set, loc, shapes, names) def _broadcast_assert_shapes(self, scope, equiv_set, loc, shapes, names): """Produce assert_equiv for sizes in each dimension, taking into account of dimension coercion and constant size of 1. """ asserts = [] new_shape = [] max_dim = max([len(shape) for shape in shapes]) const_size_one = None for i in range(max_dim): sizes = [] size_names = [] for name, shape in zip(names, shapes): if i < len(shape): size = shape[len(shape) - 1 - i] const_size = equiv_set.get_equiv_const(size) if const_size == 1: const_size_one = size else: sizes.append(size) # non-1 size to front size_names.append(name) if sizes == []: assert(const_size_one != None) sizes.append(const_size_one) size_names.append("1") asserts.append(self._call_assert_equiv(scope, loc, equiv_set, sizes, names=size_names)) new_shape.append(sizes[0]) return tuple(reversed(new_shape)), sum(asserts, []) def _call_assert_equiv(self, scope, loc, equiv_set, args, names=None): insts = self._make_assert_equiv( scope, loc, equiv_set, args, names=names) if len(args) > 1: equiv_set.insert_equiv(*args) return insts def _make_assert_equiv(self, scope, loc, equiv_set, _args, names=None): # filter out those that are already equivalent if config.DEBUG_ARRAY_OPT >= 2: print("make_assert_equiv:", _args, names) if names == None: names = [x.name for x in _args] args = [] arg_names = [] for name, x in zip(names, _args): if config.DEBUG_ARRAY_OPT >= 2: print("name, x:", name, x) seen = False for y in args: if config.DEBUG_ARRAY_OPT >= 2: print("is equiv to?", y, equiv_set.is_equiv(x, y)) if equiv_set.is_equiv(x, y): seen = True break if not seen: args.append(x) arg_names.append(name) # no assertion necessary if there are less than two if len(args) < 2: if config.DEBUG_ARRAY_OPT >= 2: print("Will not insert assert_equiv as args are known to be equivalent.") return [] msg = "Sizes of {} do not match on {}".format(', '.join(arg_names), loc) msg_val = ir.Const(msg, loc) msg_typ = types.StringLiteral(msg) msg_var = ir.Var(scope, mk_unique_var("msg"), loc) self.typemap[msg_var.name] = msg_typ argtyps = tuple([msg_typ] + [self.typemap[x.name] for x in args]) # assert_equiv takes vararg, which requires a tuple as argument type tup_typ = types.BaseTuple.from_types(argtyps) # prepare function variable whose type may vary since it takes vararg assert_var = ir.Var(scope, mk_unique_var("assert"), loc) assert_def = ir.Global('assert_equiv', assert_equiv, loc=loc) fnty = get_global_func_typ(assert_equiv) sig = self.context.resolve_function_type(fnty, (tup_typ,), {}) self._define(equiv_set, assert_var, fnty, assert_def) # The return value from assert_equiv is always of none type. var = ir.Var(scope, mk_unique_var("ret"), loc) value = ir.Expr.call(assert_var, [msg_var] + args, {}, loc=loc) self._define(equiv_set, var, types.none, value) self.calltypes[value] = sig return [ir.Assign(value=msg_val, target=msg_var, loc=loc), ir.Assign(value=assert_def, target=assert_var, loc=loc), ir.Assign(value=value, target=var, loc=loc), ] def _gen_shape_call(self, equiv_set, var, ndims, shape): out = [] # attr call: A_sh_attr = getattr(A, shape) if isinstance(shape, ir.Var): shape = equiv_set.get_shape(shape) # already a tuple variable that contains size if isinstance(shape, ir.Var): attr_var = shape shape_attr_call = None shape = None else: shape_attr_call = ir.Expr.getattr(var, "shape", var.loc) attr_var = ir.Var(var.scope, mk_unique_var( "{}_shape".format(var.name)), var.loc) shape_attr_typ = types.containers.UniTuple(types.intp, ndims) size_vars = [] use_attr_var = False # trim shape tuple if it is more than ndim if shape: nshapes = len(shape) if ndims < nshapes: shape = shape[(nshapes-ndims):] for i in range(ndims): skip = False if shape and shape[i]: if isinstance(shape[i], ir.Var): typ = self.typemap[shape[i].name] if (isinstance(typ, types.Number) or isinstance(typ, types.SliceType)): size_var = shape[i] skip = True else: if isinstance(shape[i], int): size_val = ir.Const(shape[i], var.loc) else: size_val = shape[i] assert(isinstance(size_val, ir.Const)) size_var = ir.Var(var.scope, mk_unique_var( "{}_size{}".format(var.name, i)), var.loc) out.append(ir.Assign(size_val, size_var, var.loc)) self._define(equiv_set, size_var, types.intp, size_val) skip = True if not skip: # get size: Asize0 = A_sh_attr[0] size_var = ir.Var(var.scope, mk_unique_var( "{}_size{}".format(var.name, i)), var.loc) getitem = ir.Expr.static_getitem(attr_var, i, None, var.loc) use_attr_var = True self.calltypes[getitem] = None out.append(ir.Assign(getitem, size_var, var.loc)) self._define(equiv_set, size_var, types.intp, getitem) size_vars.append(size_var) if use_attr_var and shape_attr_call: # only insert shape call if there is any getitem call out.insert(0, ir.Assign(shape_attr_call, attr_var, var.loc)) self._define(equiv_set, attr_var, shape_attr_typ, shape_attr_call) return tuple(size_vars), out def _isarray(self, varname): typ = self.typemap[varname] return (isinstance(typ, types.npytypes.Array) and typ.ndim > 0) def _sum_size(self, equiv_set, sizes): """Return the sum of the given list of sizes if they are all equivalent to some constant, or None otherwise. """ s = 0 for size in sizes: n = equiv_set.get_equiv_const(size) if n == None: return None else: s += n return s UNARY_MAP_OP = list( npydecl.NumpyRulesUnaryArrayOperator._op_map.keys()) + [operator.pos] BINARY_MAP_OP = npydecl.NumpyRulesArrayOperator._op_map.keys() INPLACE_BINARY_MAP_OP = npydecl.NumpyRulesInplaceArrayOperator._op_map.keys() UFUNC_MAP_OP = [f.__name__ for f in npydecl.supported_ufuncs]