# cython: infer_types=True # cython: cdivision=True # cython: bounds_check=False # cython: wraparound=False from murmurhash.mrmr cimport hash32 cimport numpy as np from libc.stdint cimport uint64_t, int32_t, uint32_t from ..neural._classes.model import Model from .. import describe from ..describe import Dimension, Synapses, Biases, Gradient from ..neural.util import is_cupy_array, is_numpy_array # Sigh, these stuff up pickling if they're lambdas... def _get_W_shape(obj): return (obj.nO * obj.length,) def _init_W(W, ops): W.fill(0.) def _get_bias_shape(obj): return (obj.nO,) @describe.attributes( nO=Dimension("Output size"), length=Dimension("Weights length"), W=Synapses("Weights matrix", _get_W_shape, _init_W), b=Biases("Biases", _get_bias_shape), d_W=Gradient("W"), d_b=Gradient("b"), ) class LinearModel(Model): name = 'linear' def __init__(self, nO, length=2**18, **kwargs): Model.__init__(self, **kwargs) self.nO = nO self.length = length def begin_update(self, keys_values_lengths, drop=0.): keys, values, lengths = keys_values_lengths if is_cupy_array(keys): return self._begin_gpu_update(keys, values, lengths, drop=drop) else: return self._begin_cpu_update(keys, values, lengths, drop=drop) def _begin_gpu_update(self, keys, values, lengths, drop): # Currently we don't have a GPU-compatible implementation of this function :( # It sucks, but at least we can get the correct result by copying to CPU. cpu_keys = keys.get() cpu_values = values.get() cpu_lengths = lengths.get() return self._begin_cpu_update(cpu_keys, cpu_values, cpu_lengths, drop=drop) def _begin_cpu_update(self, uint64_t[::1] keys, np.ndarray values_, long[::1] lengths, drop): if drop is not None: drop *= self.drop_factor mask = self.ops.get_dropout_mask((values_.shape[0],), drop) cdef float[::1] values if mask is not None: values = values_ * mask else: values = values_ cdef float[:, ::1] scores = self.ops.allocate((len(lengths), self.nO)) + self.b cdef float[::1] weights = self.W set_scoresC(&scores[0, 0], &keys[0], &values[0], &lengths[0], lengths.shape[0], self.nO, &weights[0], self.length) return scores, _finish_linear_update(self, keys, values, lengths) class _finish_linear_update(object): """Move this out of a closure, into its own callable object, to avoid pickling errors :(.""" def __init__(self, layer, keys, values, lengths): self.layer = layer self.keys = keys self.values = values self.lengths = lengths def __call__(self, float[:, ::1] d_scores, sgd=None): cdef float[::1] d_weights = self.layer.d_W cdef float[::1] d_bias = self.layer.d_b cdef uint64_t[::1] keys = self.keys cdef float[::1] values = self.values cdef long[::1] lengths = self.lengths set_gradientC(&d_weights[0], &keys[0], &values[0], &lengths[0], lengths.shape[0], self.layer.nO, &d_scores[0,0], self.layer.length) cdef int i, j for i in range(d_scores.shape[0]): for j in range(d_scores.shape[1]): d_bias[j] += d_scores[i, j] if sgd is not None: sgd(self.layer._mem.weights, self.layer._mem.gradient, key=self.layer.id) return None cdef void set_scoresC(float* scores, const uint64_t* keys, const float* values, const long* lengths, int batch_size, int nr_out, const float* weights, int nr_weight) nogil: cdef uint32_t idx1, idx2 cdef uint32_t hash1, hash2 for length in lengths[:batch_size]: for i in range(length): hash1 = hash32(&keys[i], sizeof(keys[i]), 0) hash2 = hash32(&keys[i], sizeof(keys[i]), 1) idx1 = hash1 & (nr_weight-1) idx2 = hash2 & (nr_weight-1) value = values[i] for clas in range(nr_out): scores[clas] += weights[idx1 + clas] * value scores[clas] += weights[idx2 + clas] * value scores += nr_out keys += length values += length cdef void set_gradientC(float* d_weights, const uint64_t* keys, const float* values, const long* lengths, int batch_size, int nr_out, const float* d_scores, int nr_weight) nogil: cdef uint32_t idx1, idx2 cdef uint32_t hash1, hash2 for length in lengths[:batch_size]: for i in range(length): hash1 = hash32(&keys[i], sizeof(keys[i]), 0) hash2 = hash32(&keys[i], sizeof(keys[i]), 1) idx1 = hash1 & (nr_weight-1) idx2 = hash2 & (nr_weight-1) value = values[i] for clas in range(nr_out): d_weights[idx1 + clas] += d_scores[clas] * value d_weights[idx2 + clas] += d_scores[clas] * value d_scores += nr_out keys += length #values += length