# coding: utf8 from __future__ import unicode_literals from .. import Model from ...api import layerize from ... import describe from ...describe import Dimension, Synapses, Gradient def inverse(total): inverse = 1.0 / (1 + total) def backward(d_inverse): result = d_inverse * (-1 / (total + 1) ** 2) return result return inverse, backward def _get_mask(ops, shape, drop): return ops.xp.random.uniform(0.0, 1.0, shape) > drop def Siamese(layer, similarity): def begin_update(inputs, drop=0.0): ops = layer.ops if drop not in (None, 0.0): dropped = [] for in1, in2 in inputs: if in1.size > in2.size: mask = _get_mask(ops, in1.shape, drop) else: mask = _get_mask(ops, in2.shape, drop) in1 = in1 * mask[: in1.shape[0]] in2 = in2 * mask[: in2.shape[0]] dropped.append((in1, in2)) inputs = dropped input1, input2 = zip(*inputs) vec1, bp_vec1 = layer.begin_update(input1, drop=0.0) vec2, bp_vec2 = layer.begin_update(input2, drop=0.0) output, bp_output = similarity.begin_update((vec1, vec2), drop=0.0) def finish_update(d_output, sgd=None): d_vec1, d_vec2 = bp_output(d_output, sgd) # Remember that this is the same layer -- # Is this bad? Are we making bp_vec2 stale? d_input1 = bp_vec1(d_vec1, lambda *args, **kwargs: None) d_input2 = bp_vec2(d_vec2, sgd) return (d_input1, d_input2) return output, finish_update model = layerize(begin_update) model._layers.append(layer) model._layers.append(similarity) def on_data(self, X, y): input1, input2 = zip(*X) for hook in layer.on_data_hooks: hook(layer, input1, y) model.on_data_hooks.append(on_data) return model def unit_init(W, ops): W.fill(1) @describe.attributes( nO=Dimension("Output size"), W=Synapses("Weights matrix", lambda obj: (obj.nO,), unit_init), d_W=Gradient("W"), ) class CauchySimilarity(Model): # From chen (2013) def __init__(self, length): Model.__init__(self) self.nO = length def begin_update(self, vec1_vec2, drop=0.0): weights = self.W vec1, vec2 = vec1_vec2 diff = vec1 - vec2 square_diff = diff ** 2 total = (weights * square_diff).sum(axis=1) sim, bp_sim = inverse(total) total = total.reshape((vec1.shape[0], 1)) def finish_update(d_sim, sgd=None): d_total = bp_sim(d_sim) d_total = d_total.reshape(total.shape) self.d_W += (d_total * square_diff).sum(axis=0) d_square_diff = weights * d_total d_diff = 2 * d_square_diff * diff d_vec1 = d_diff d_vec2 = -d_diff sgd(self._mem.weights, self._mem.gradient, key=self.id) return (d_vec1, d_vec2) return sim, finish_update