working version of SdA.py

Razvan Pascanu · Razvan Pascanu · commit c8fc56d12611 · 2010-02-08T09:08:54.000-05:00
diff --git a/code/SdA.py b/code/SdA.py
@@ -135,14 +135,14 @@ class dA(object):
 
   """
 
-  def __init__(self, n_visible= 784, n_hidden= 500, input= None):
+  def __init__(self, n_visible= 784, n_hidden= 500, input= None, corruption_level = 0.1):
     """
     Initialize the dA class by specifying the number of visible units (the 
     dimension d of the input ), the number of hidden units ( the dimension 
     d' of the latent or hidden space ) and by giving a symbolic variable 
     for the input. Such a symbolic variable is useful when the input is 
     the result of some computations. For example when dealing with SdAs,
-    the dA on layer 2 gets as input the output of the DAE on layer 1. 
+    the dA on layer 2 gets as input the output of the dA on layer 1. 
     This output can be written as a function of the input to the entire 
     model, and as such can be computed by theano whenever needed. 
     
@@ -152,6 +152,13 @@ def __init__(self, n_visible= 784, n_hidden= 500, input= None):
 
     :param input:     a symbolic description of the input or None 
 
+    :param corruption_level: the corruption mechanism picks up randomly this fraction 
+                             of entries of the input and turns them to 0
+    
+    
+    amount of entries from the input to  0 from the input, defaul 
+                             is 0.1, which means 10% of entries are corrupted to 0
+
     """
     self.n_visible = n_visible
     self.n_hidden  = n_hidden
@@ -198,8 +205,8 @@ def __init__(self, n_visible= 784, n_hidden= 500, input= None):
     #        third argument is the probability of success of any trial
     #
     #        this will produce an array of 0s and 1s where 1 has a 
-    #        probability of 0.9 and 0 if 0.1
-    self.tilde_x  = theano_rng.binomial( self.x.shape,  1,  0.9) * self.x
+    #        probability of 1 - corruption_level and 0 if corruption_level
+    self.tilde_x  = theano_rng.binomial( self.x.shape,  1,  1 - corruption_level) * self.x
     # Equation (2)
     # note  : y is stored as an attribute of the class so that it can be 
     #         used later when stacking dAs. 
@@ -358,7 +365,9 @@ def shared_dataset(data_xy):
     # construct the logistic regression class
     classifier = SdA( input=x, n_ins=28*28, \
                       hidden_layers_sizes = [1000, 1000, 1000], n_outs=10)
-    
+
+
+    start_time = time.clock()  
     ## Pre-train layer-wise 
     for i in xrange(classifier.n_layers):
         cost = classifier.layers[i].cost
@@ -387,7 +396,9 @@ def shared_dataset(data_xy):
  
 
 
+    end_time = time.clock()
 
+    print ('Pretraining took %f minutes' %((end_time-start_time)/60.))
     # Fine-tune the entire model
     # the cost we minimize during training is the negative log likelihood of 
     # the model
@@ -435,9 +446,9 @@ def shared_dataset(data_xy):
 
     # early-stopping parameters
     patience              = 10000 # look as this many examples regardless
-    patience_increase     = 2     # wait this much longer when a new best is 
+    patience_increase     = 2.    # wait this much longer when a new best is 
                                   # found
-    improvement_threshold = 0.995 # a relative improvement of this much is 
+    improvement_threshold = 0.99  # a relative improvement of this much is 
                                   # considered significant
     validation_frequency  = min(n_train_batches, patience/2)
                                   # go through this many 
@@ -450,15 +461,19 @@ def shared_dataset(data_xy):
     best_validation_loss = float('inf')
     test_score           = 0.
     start_time = time.clock()
-    cost_ij = []
-    for epoch in xrange(training_epochs):
+
+    done_looping = False
+    epoch = 0
+
+    while (epoch < training_epochs) and (not done_looping):
+      epoch = epoch + 1
       for minibatch_index in xrange(n_train_batches):
 
-        cost_ij += [train_model(minibatch_index)]
+        cost_ij = train_model(minibatch_index)
         iter    = epoch * n_train_batches + minibatch_index
 
         if (iter+1) % validation_frequency == 0: 
-            cost_ij = []
+            
             validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
             this_validation_loss = numpy.mean(validation_losses)
             print('epoch %i, minibatch %i/%i, validation error %f %%' % \
@@ -488,6 +503,7 @@ def shared_dataset(data_xy):
 
 
         if patience <= iter :
+                done_looping = True
                 break
 
     end_time = time.clock()
diff --git a/code/convolutional_mlp.py b/code/convolutional_mlp.py
@@ -313,7 +313,11 @@ def shared_dataset(data_xy):
     test_score           = 0.
     start_time = time.clock()
 
-    for epoch in xrange(n_epochs):
+    epoch = 0 
+    done_looping = False
+
+    while (epoch < n_epoch) and (not done_looping):
+      epoch = epoch + 1
       for minibatch_index in xrange(n_train_batches):
         
         iter = epoch * n_train_batches + minibatch_index
@@ -353,6 +357,7 @@ def shared_dataset(data_xy):
                               test_score*100.))
 
         if patience <= iter :
+            done_looping = False
             break
 
     end_time = time.clock()
diff --git a/code/logistic_cg.py b/code/logistic_cg.py
@@ -136,12 +136,12 @@ def errors(self, y):
 
 
 
-def cg_optimization_mnist( n_iter=50, mnist_pkl_gz='mnist.pkl.gz' ):
+def cg_optimization_mnist( n_epochs=50, mnist_pkl_gz='mnist.pkl.gz' ):
     """Demonstrate conjugate gradient optimization of a log-linear model 
 
     This is demonstrated on MNIST.
     
-    :param n_iter: number of iterations ot run the optimizer 
+    :param n_epochs: number of epochs to run the optimizer 
 
     :param mnist_pkl_gz: the path of the mnist training file from 
     http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
@@ -263,7 +263,7 @@ def callback(theta_value):
             fprime=train_fn_grad,
             callback=callback,
             disp=0,
-            maxiter=n_iter)
+            maxiter=n_epochs)
     end_time = time.clock()
     print(('Optimization complete with best validation score of %f %%, with '
           'test performance %f %%') % 
diff --git a/code/logistic_sgd.py b/code/logistic_sgd.py
@@ -230,8 +230,11 @@ def shared_dataset(data_xy):
     best_validation_loss = float('inf')
     test_score           = 0.
     start_time = time.clock()
-    
-    for epoch in xrange(n_epochs):
+
+    done_looping = False 
+    epoch = 0  
+    while (epoch < n_epochs) and (not done_looping):
+      epoch = epoch + 1
       for minibatch_index in xrange(n_train_batches):
 
         cost_ij = train_model(minibatch_index)
@@ -266,6 +269,7 @@ def shared_dataset(data_xy):
                   (epoch, minibatch_index+1, n_train_batches,test_score*100.))
 
         if patience <= iter :
+                done_looping = True
                 break
 
     end_time = time.clock()
diff --git a/code/mlp.py b/code/mlp.py
@@ -265,7 +265,11 @@ def shared_dataset(data_xy):
     test_score           = 0.
     start_time = time.clock()
 
-    for epoch in xrange(n_epochs):
+    epoch = 0
+    done_looping = False
+
+    while (epoch < n_epochs) and (not done_looping):
+      epoch = epoch + 1
       for minibatch_index in xrange(n_train_batches):
 
         cost_ij = train_model(minibatch_index)
@@ -300,6 +304,7 @@ def shared_dataset(data_xy):
                   (epoch, minibatch_index+1, n_train_batches,test_score*100.))
 
         if patience <= iter :
+                done_looping = True
                 break
 
 
diff --git a/code/test.py b/code/test.py
@@ -15,5 +15,4 @@ def test_dbn():
 def test_rbm():
     raise SkipTest('Implementation not finished')
 def test_SdA():
-    raise SkipTest('Implementation not finished')
     SdA.sgd_optimization_mnist(pretraining_epochs = 2, n_epochs = 3)