fix: Examples project uses all data, unit test uses only small fraction

henon · henon · commit 1ff31db07eb6 · 2019-04-11T16:34:35.000+02:00
diff --git a/test/TensorFlowNET.Examples/LinearRegression.cs b/test/TensorFlowNET.Examples/LinearRegression.cs
@@ -20,7 +20,7 @@ public class LinearRegression : Python, IExample
 
         // Parameters
         float learning_rate = 0.01f;
-        int training_epochs = 1000;
+        public int TrainingEpochs = 1000;
         int display_step = 50;
 
         NDArray train_X, train_Y;
@@ -62,7 +62,7 @@ public bool Run()
                 sess.run(init);
 
                 // Fit all training data
-                for (int epoch = 0; epoch < training_epochs; epoch++)
+                for (int epoch = 0; epoch < TrainingEpochs; epoch++)
                 {
                     foreach (var (x, y) in zip<float>(train_X, train_Y))
                     {
diff --git a/test/TensorFlowNET.Examples/LogisticRegression.cs b/test/TensorFlowNET.Examples/LogisticRegression.cs
@@ -22,8 +22,9 @@ public class LogisticRegression : Python, IExample
 
         private float learning_rate = 0.01f;
         public int TrainingEpochs = 10;
-        public int DataSize = 5000;
-        public int TestSize = 5000;
+        public int? TrainSize = null;
+        public int ValidationSize = 5000;
+        public int? TestSize = null;
         public int BatchSize = 100;
         private int display_step = 1;
 
@@ -98,7 +99,7 @@ public bool Run()
 
         public void PrepareData()
         {
-            mnist = MnistDataSet.read_data_sets("mnist", one_hot: true, validation_size: DataSize, test_size: TestSize);
+            mnist = MnistDataSet.read_data_sets("mnist", one_hot: true, train_size: TrainSize, validation_size: ValidationSize, test_size: TestSize);
         }
 
         public void SaveModel(Session sess)
@@ -141,7 +142,7 @@ public void Predict()
                 if (results.argmax() == (batch_ys[0] as NDArray).argmax())
                     print("predicted OK!");
                 else
-                    throw new ValueError("predict error, maybe 90% accuracy");
+                    throw new ValueError("predict error, should be 90% accuracy");
             });
         }
     }
diff --git a/test/TensorFlowNET.Examples/NearestNeighbor.cs b/test/TensorFlowNET.Examples/NearestNeighbor.cs
@@ -19,8 +19,9 @@ public class NearestNeighbor : Python, IExample
         public string Name => "Nearest Neighbor";
         Datasets mnist;
         NDArray Xtr, Ytr, Xte, Yte;
-        public int DataSize = 5000;
-        public int TestBatchSize = 200;
+        public int? TrainSize = null;
+        public int ValidationSize = 5000;
+        public int? TestSize = null;
 
         public bool Run()
         {
@@ -64,10 +65,10 @@ public bool Run()
 
         public void PrepareData()
         {
-            mnist = MnistDataSet.read_data_sets("mnist", one_hot: true, validation_size: DataSize);
+            mnist = MnistDataSet.read_data_sets("mnist", one_hot: true, train_size: TrainSize, validation_size:ValidationSize, test_size:TestSize);
             // In this example, we limit mnist data
-            (Xtr, Ytr) = mnist.train.next_batch(DataSize); // 5000 for training (nn candidates)
-            (Xte, Yte) = mnist.test.next_batch(TestBatchSize); // 200 for testing
+            (Xtr, Ytr) = mnist.train.next_batch(TrainSize==null ? 5000 : TrainSize.Value / 100); // 5000 for training (nn candidates)
+            (Xte, Yte) = mnist.test.next_batch(TestSize==null ? 200 : TestSize.Value / 100); // 200 for testing
         }
     }
 }
diff --git a/test/TensorFlowNET.Examples/Utility/MnistDataSet.cs b/test/TensorFlowNET.Examples/Utility/MnistDataSet.cs
@@ -15,16 +15,17 @@ public class MnistDataSet
         private const string TRAIN_LABELS = "train-labels-idx1-ubyte.gz";
         private const string TEST_IMAGES = "t10k-images-idx3-ubyte.gz";
         private const string TEST_LABELS = "t10k-labels-idx1-ubyte.gz";
-
         public static Datasets read_data_sets(string train_dir, 
             bool one_hot = false,
             TF_DataType dtype = TF_DataType.TF_FLOAT,
             bool reshape = true,
             int validation_size = 5000,
-            int test_size = 5000,
+            int? train_size = null,
+            int? test_size = null,
             string source_url = DEFAULT_SOURCE_URL)
         {
-            var train_size = validation_size * 2;
+            if (train_size!=null && validation_size >= train_size)
+                throw new ArgumentException("Validation set should be smaller than training set");
 
             Web.Download(source_url + TRAIN_IMAGES, train_dir, TRAIN_IMAGES);
             Compress.ExtractGZip(Path.Join(train_dir, TRAIN_IMAGES), train_dir);
diff --git a/test/TensorFlowNET.UnitTest/ExamplesTests/ExamplesTest.cs b/test/TensorFlowNET.UnitTest/ExamplesTests/ExamplesTest.cs
@@ -51,7 +51,7 @@ public void LinearRegression()
         [TestMethod]
         public void LogisticRegression()
         {
-            new LogisticRegression() { Enabled = true, TrainingEpochs=10, DataSize = 500, TestSize = 500 }.Run();
+            new LogisticRegression() { Enabled = true, TrainingEpochs=10, TrainSize = 500, ValidationSize = 100, TestSize = 100 }.Run();
         }
 
         [Ignore]
@@ -78,7 +78,7 @@ public void NamedEntityRecognition()
         [TestMethod]
         public void NearestNeighbor()
         {
-            new NearestNeighbor() { Enabled = true, DataSize = 500, TestBatchSize = 100 }.Run();
+            new NearestNeighbor() { Enabled = true, TrainSize = 500, ValidationSize = 100, TestSize = 100 }.Run();
         }
 
         [Ignore]

Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,7 @@ public class LinearRegression : Python, IExample`
`20`	`20`
`21`	`21`	`// Parameters`
`22`	`22`	`float learning_rate = 0.01f;`
`23`		`- int training_epochs = 1000;`
	`23`	`+ public int TrainingEpochs = 1000;`
`24`	`24`	`int display_step = 50;`
`25`	`25`
`26`	`26`	`NDArray train_X, train_Y;`
`@@ -62,7 +62,7 @@ public bool Run()`
`62`	`62`	`sess.run(init);`
`63`	`63`
`64`	`64`	`// Fit all training data`
`65`		`- for (int epoch = 0; epoch < training_epochs; epoch++)`
	`65`	`+ for (int epoch = 0; epoch < TrainingEpochs; epoch++)`
`66`	`66`	`{`
`67`	`67`	`foreach (var (x, y) in zip<float>(train_X, train_Y))`
`68`	`68`	`{`
Original file line number	Diff line number	Diff line change
`@@ -22,8 +22,9 @@ public class LogisticRegression : Python, IExample`
`22`	`22`
`23`	`23`	`private float learning_rate = 0.01f;`
`24`	`24`	`public int TrainingEpochs = 10;`
`25`		`- public int DataSize = 5000;`
`26`		`- public int TestSize = 5000;`
	`25`	`+ public int? TrainSize = null;`
	`26`	`+ public int ValidationSize = 5000;`
	`27`	`+ public int? TestSize = null;`
`27`	`28`	`public int BatchSize = 100;`
`28`	`29`	`private int display_step = 1;`
`29`	`30`
`@@ -98,7 +99,7 @@ public bool Run()`
`98`	`99`
`99`	`100`	`public void PrepareData()`
`100`	`101`	`{`
`101`		`- mnist = MnistDataSet.read_data_sets("mnist", one_hot: true, validation_size: DataSize, test_size: TestSize);`
	`102`	`+ mnist = MnistDataSet.read_data_sets("mnist", one_hot: true, train_size: TrainSize, validation_size: ValidationSize, test_size: TestSize);`
`102`	`103`	`}`
`103`	`104`
`104`	`105`	`public void SaveModel(Session sess)`
`@@ -141,7 +142,7 @@ public void Predict()`
`141`	`142`	`if (results.argmax() == (batch_ys[0] as NDArray).argmax())`
`142`	`143`	`print("predicted OK!");`
`143`	`144`	`else`
`144`		`- throw new ValueError("predict error, maybe 90% accuracy");`
	`145`	`+ throw new ValueError("predict error, should be 90% accuracy");`
`145`	`146`	`});`
`146`	`147`	`}`
`147`	`148`	`}`
Original file line number	Diff line number	Diff line change
`@@ -19,8 +19,9 @@ public class NearestNeighbor : Python, IExample`
`19`	`19`	`public string Name => "Nearest Neighbor";`
`20`	`20`	`Datasets mnist;`
`21`	`21`	`NDArray Xtr, Ytr, Xte, Yte;`
`22`		`- public int DataSize = 5000;`
`23`		`- public int TestBatchSize = 200;`
	`22`	`+ public int? TrainSize = null;`
	`23`	`+ public int ValidationSize = 5000;`
	`24`	`+ public int? TestSize = null;`
`24`	`25`
`25`	`26`	`public bool Run()`
`26`	`27`	`{`
`@@ -64,10 +65,10 @@ public bool Run()`
`64`	`65`
`65`	`66`	`public void PrepareData()`
`66`	`67`	`{`
`67`		`- mnist = MnistDataSet.read_data_sets("mnist", one_hot: true, validation_size: DataSize);`
	`68`	`+ mnist = MnistDataSet.read_data_sets("mnist", one_hot: true, train_size: TrainSize, validation_size:ValidationSize, test_size:TestSize);`
`68`	`69`	`// In this example, we limit mnist data`
`69`		`- (Xtr, Ytr) = mnist.train.next_batch(DataSize); // 5000 for training (nn candidates)`
`70`		`- (Xte, Yte) = mnist.test.next_batch(TestBatchSize); // 200 for testing`
	`70`	`+ (Xtr, Ytr) = mnist.train.next_batch(TrainSize==null ? 5000 : TrainSize.Value / 100); // 5000 for training (nn candidates)`
	`71`	`+ (Xte, Yte) = mnist.test.next_batch(TestSize==null ? 200 : TestSize.Value / 100); // 200 for testing`
`71`	`72`	`}`
`72`	`73`	`}`
`73`	`74`	`}`
Original file line number	Diff line number	Diff line change
`@@ -51,7 +51,7 @@ public void LinearRegression()`
`51`	`51`	`[TestMethod]`
`52`	`52`	`public void LogisticRegression()`
`53`	`53`	`{`
`54`		`- new LogisticRegression() { Enabled = true, TrainingEpochs=10, DataSize = 500, TestSize = 500 }.Run();`
	`54`	`+ new LogisticRegression() { Enabled = true, TrainingEpochs=10, TrainSize = 500, ValidationSize = 100, TestSize = 100 }.Run();`
`55`	`55`	`}`
`56`	`56`
`57`	`57`	`[Ignore]`
`@@ -78,7 +78,7 @@ public void NamedEntityRecognition()`
`78`	`78`	`[TestMethod]`
`79`	`79`	`public void NearestNeighbor()`
`80`	`80`	`{`
`81`		`- new NearestNeighbor() { Enabled = true, DataSize = 500, TestBatchSize = 100 }.Run();`
	`81`	`+ new NearestNeighbor() { Enabled = true, TrainSize = 500, ValidationSize = 100, TestSize = 100 }.Run();`
`82`	`82`	`}`
`83`	`83`
`84`	`84`	`[Ignore]`