forked from lisa-lab/DeepLearningTutorials
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathrbm.py
More file actions
133 lines (100 loc) · 4.43 KB
/
rbm.py
File metadata and controls
133 lines (100 loc) · 4.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import numpy
import theano
import theano.tensor as T
from theano.compile.sandbox.sharedvalue import shared
from theano.compile.sandbox.pfunc import pfunc
from theano.compile.sandbox.shared_randomstreams import RandomStreams
from theano.tensor.nnet import sigmoid
class A():
@execute
def propup();
# do symbolic prop
self.hid = T.dot(
class RBM():
def __init__(self, input=None, vsize=None, hsize=None, bsize=10, lr=1e-1, seed=123):
"""
RBM constructor. Defines the parameters of the model along with
basic operations for inferring hidden from visible (and vice-versa), as well
as for performing CD updates.
param input: None for standalone RBMs or symbolic variable if RBM is
part of a larger graph.
param vsize: number of visible units
param hsize: number of hidden units
param bsize: size of minibatch
param lr: unsupervised learning rate
param seed: seed for random number generator
"""
assert vsize and hsize
self.vsize = vsize
self.hsize = hsize
self.lr = shared(lr, 'lr')
# setup theano random number generator
self.random = RandomStreams(seed)
#### INITIALIZATION ####
# initialize input layer for standalone RBM or layer0 of DBN
self.input = input if input else T.dmatrix('input')
# initialize biases
self.b = shared(numpy.zeros(vsize), 'b')
self.c = shared(numpy.zeros(hsize), 'c')
# initialize random weights
rngseed = numpy.random.RandomState(seed).randint(2**30)
rng = numpy.random.RandomState(rngseed)
ubound = 1./numpy.sqrt(max(self.vsize,self.hsize))
self.w = shared(rng.uniform(low=-ubound, high=ubound, size=(hsize,vsize)), 'w')
#### POSITIVE AND NEGATIVE PHASE ####
# define graph for positive phase
ph, ph_s = self.def_propup(self.input)
# function which computes p(h|v=x) and ~ p(h|v=x)
self.pos_phase = pfunc([self.input], [ph, ph_s])
# define graph for negative phase
nv, nv_s = self.def_propdown(ph_s)
nh, nh_s = self.def_propup(nv_s)
# function which computes p(v|h=ph_s), ~ p(v|h=ph_s) and p(h|v=nv_s)
self.neg_phase = pfunc([ph_s], [nv, nv_s, nh, nh_s])
# calculate CD gradients for each parameter
db = T.mean(self.input, axis=0) - T.mean(nv, axis=0)
dc = T.mean(ph, axis=0) - T.mean(nh, axis=0)
dwp = T.dot(ph.T, self.input)/nv.shape[0]
dwn = T.dot(nh.T, nv)/nv.shape[0]
dw = dwp - dwn
# define dictionary of stochastic gradient update equations
updates = {self.b: self.b - self.lr * db,
self.c: self.c - self.lr * dc,
self.w: self.w - self.lr * dw}
# define private function, which performs one step in direction of CD gradient
self.cd_step = pfunc([self.input, ph, nv, nh], [], updates=updates)
def def_propup(self, vis):
""" Symbolic definition of p(hid|vis) """
hid_activation = T.dot(vis, self.w.T) + self.c
hid = sigmoid(hid_activation)
hid_sample = self.random.binomial(T.shape(hid), 1, hid)*1.0
return hid, hid_sample
def def_propdown(self, hid):
""" Symbolic definition of p(vis|hid) """
vis_activation = T.dot(hid, self.w) + self.b
vis = sigmoid(vis_activation)
vis_sample = self.random.binomial(T.shape(vis), 1, vis)*1.0
return vis, vis_sample
def cd(self, x, k=1):
""" Performs actual CD update """
ph, ph_s = self.pos_phase(x)
nh_s = ph_s
for ki in range(k):
nv, nv_s, nh, nh_s = self.neg_phase(nh_s)
self.cd_step(x, ph, nv_s, nh)
import os
from pylearn.datasets import MNIST
if __name__ == '__main__':
bsize = 10
# initialize dataset
dataset = MNIST.first_1k()
# initialize RBM with 784 visible units and 500 hidden units
r = RBM(vsize=784, hsize=500, bsize=bsize, lr=0.1)
# for a fixed number of epochs ...
for e in range(10):
print '@epoch %i ' % e
# iterate over all training set mini-batches
for i in range(len(dataset.train.x)/bsize):
rng = range(i*bsize,(i+1)*bsize) # index range of subsequent mini-batch
x = dataset.train.x[rng] # next mini-batch
r.cd(x) # perform cd update