Add num_partitions arg to the SDCA optimizer for distribution and deprecate container argument.

tensorflower-gardener · tensorflower-gardener · commit cd25808f802f · 2016-08-12T19:47:47.000-07:00
Change: 130165115
diff --git a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py
@@ -18,8 +18,7 @@
 from __future__ import division
 from __future__ import print_function
 
-import uuid
-
+from threading import Thread
 import tensorflow as tf
 
 from tensorflow.contrib.linear_optimizer.python.ops.sdca_ops import _sdca_ops
@@ -31,7 +30,7 @@
 
 _MAX_ITERATIONS = 100
 _SHARD_NUMBERS = [None, 1, 3, 10]
-
+_NUM_PARTITIONS = [2, 4]
 
 def make_example_proto(feature_dict, target, value=1.0):
   e = tf.train.Example()
@@ -146,11 +145,6 @@ def get_binary_predictions_for_hinge(predictions):
       dtype=tf.int32)
 
 
-# Setup the single container shared across all tests. This is testing proper
-# isolation across optimizers instantiated in each of the tests below.
-CONTAINER = uuid.uuid4().hex
-
-
 # TODO(sibyl-Mooth6ku): Add tests that exercise L1 and Shrinking.
 # TODO(sibyl-vie3Poto): Refactor tests to avoid repetition of boilerplate code.
 class SdcaModelTest(TensorFlowTestCase):
@@ -184,8 +178,8 @@ def testSimple(self):
                        symmetric_l1_regularization=0,
                        loss_type='logistic_loss')
 
-        lr = SdcaModel(CONTAINER, examples, variables, options,
-                       num_table_shards=num_shards)
+        lr = SdcaModel(
+            examples, variables, options, num_table_shards=num_shards)
         tf.initialize_all_variables().run()
         unregularized_loss = lr.unregularized_loss(examples)
         loss = lr.regularized_loss(examples)
@@ -211,6 +205,62 @@ def testSimple(self):
                             rtol=1e-2,
                             atol=1e-2)
 
+  def testDistributedSimple(self):
+    # Setup test data
+    example_protos = [
+        make_example_proto({'age': [0],
+                            'gender': [0]}, 0),
+        make_example_proto({'age': [1],
+                            'gender': [1]}, 1),
+    ]
+    example_weights = [1.0, 1.0]
+    for num_shards in _SHARD_NUMBERS:
+      for num_partitions in _NUM_PARTITIONS:
+        with self._single_threaded_test_session():
+          examples = make_example_dict(example_protos, example_weights)
+          variables = make_variable_dict(1, 1)
+          options = dict(
+              symmetric_l2_regularization=1,
+              symmetric_l1_regularization=0,
+              loss_type='logistic_loss',
+              num_partitions=num_partitions)
+
+          lr = SdcaModel(
+              examples, variables, options, num_table_shards=num_shards)
+          tf.initialize_all_variables().run()
+          unregularized_loss = lr.unregularized_loss(examples)
+          loss = lr.regularized_loss(examples)
+          predictions = lr.predictions(examples)
+          self.assertAllClose(0.693147, unregularized_loss.eval())
+          self.assertAllClose(0.693147, loss.eval())
+
+          train_op = lr.minimize()
+
+          def Minimize():
+            with self._single_threaded_test_session():
+              for _ in range(_MAX_ITERATIONS):
+                train_op.run()
+
+          threads = []
+          for _ in range(num_partitions):
+            threads.append(Thread(target=Minimize))
+            threads[-1].start()
+
+          for t in threads:
+            t.join()
+
+          # The high tolerance in unregularized_loss comparisons is due to the
+          # fact that it's possible to trade off unregularized_loss vs.
+          # regularization and still have a sum that is quite close to the
+          # optimal regularized_loss value.  SDCA's duality gap only ensures
+          # that the regularized_loss is within 0.01 of optimal.
+          # 0.525457 is the optimal regularized_loss.
+          # 0.411608 is the unregularized_loss at that optimum.
+          self.assertAllClose(0.411608, unregularized_loss.eval(), atol=0.05)
+          self.assertAllClose(0.525457, loss.eval(), atol=0.01)
+          predicted_labels = get_binary_predictions_for_logistic(predictions)
+          self.assertAllEqual([0, 1], predicted_labels.eval())
+          self.assertTrue(lr.approximate_duality_gap().eval() < 0.02)
 
   def testSimpleNoL2(self):
     # Same as test above (so comments from above apply) but without an L2.
@@ -233,8 +283,8 @@ def testSimpleNoL2(self):
                        symmetric_l1_regularization=0,
                        loss_type='logistic_loss')
 
-        lr = SdcaModel(CONTAINER, examples, variables, options,
-                       num_table_shards=num_shards)
+        lr = SdcaModel(
+            examples, variables, options, num_table_shards=num_shards)
         tf.initialize_all_variables().run()
         unregularized_loss = lr.unregularized_loss(examples)
         loss = lr.regularized_loss(examples)
@@ -287,8 +337,8 @@ def testSomeUnweightedExamples(self):
                        symmetric_l1_regularization=0,
                        loss_type='logistic_loss')
 
-        lr = SdcaModel(CONTAINER, examples, variables, options,
-                       num_table_shards=num_shards)
+        lr = SdcaModel(
+            examples, variables, options, num_table_shards=num_shards)
         tf.initialize_all_variables().run()
         unregularized_loss = lr.unregularized_loss(examples)
         loss = lr.regularized_loss(examples)
@@ -325,8 +375,8 @@ def testFractionalExampleLabel(self):
                        symmetric_l1_regularization=0,
                        loss_type='logistic_loss')
 
-        lr = SdcaModel(CONTAINER, examples, variables, options,
-                       num_table_shards=num_shards)
+        lr = SdcaModel(
+            examples, variables, options, num_table_shards=num_shards)
         tf.initialize_all_variables().run()
         with self.assertRaisesOpError(
             'Only labels of 0.0 or 1.0 are supported right now.'):
@@ -357,8 +407,8 @@ def testImbalanced(self):
                        symmetric_l1_regularization=0,
                        loss_type='logistic_loss')
 
-        lr = SdcaModel(CONTAINER, examples, variables, options,
-                       num_table_shards=num_shards)
+        lr = SdcaModel(
+            examples, variables, options, num_table_shards=num_shards)
         tf.initialize_all_variables().run()
         unregularized_loss = lr.unregularized_loss(examples)
         loss = lr.regularized_loss(examples)
@@ -397,8 +447,8 @@ def testImbalancedWithExampleWeights(self):
                        symmetric_l1_regularization=0,
                        loss_type='logistic_loss')
 
-        lr = SdcaModel(CONTAINER, examples, variables, options,
-                       num_table_shards=num_shards)
+        lr = SdcaModel(
+            examples, variables, options, num_table_shards=num_shards)
         tf.initialize_all_variables().run()
         unregularized_loss = lr.unregularized_loss(examples)
         loss = lr.regularized_loss(examples)
@@ -435,8 +485,8 @@ def testInstancesOfOneClassOnly(self):
                        symmetric_l1_regularization=0,
                        loss_type='logistic_loss')
 
-        lr = SdcaModel(CONTAINER, examples, variables, options,
-                       num_table_shards=num_shards)
+        lr = SdcaModel(
+            examples, variables, options, num_table_shards=num_shards)
         tf.initialize_all_variables().run()
         unregularized_loss = lr.unregularized_loss(examples)
         loss = lr.regularized_loss(examples)
@@ -478,7 +528,7 @@ def testSimple(self):
                      symmetric_l1_regularization=0,
                      loss_type='squared_loss')
 
-      lr = SdcaModel(CONTAINER, examples, variables, options)
+      lr = SdcaModel(examples, variables, options)
       tf.initialize_all_variables().run()
       predictions = lr.predictions(examples)
       train_op = lr.minimize()
@@ -523,7 +573,7 @@ def testL2Regularization(self):
                      symmetric_l1_regularization=0,
                      loss_type='squared_loss')
 
-      lr = SdcaModel(CONTAINER, examples, variables, options)
+      lr = SdcaModel(examples, variables, options)
       tf.initialize_all_variables().run()
       predictions = lr.predictions(examples)
 
@@ -557,7 +607,7 @@ def testL1Regularization(self):
       options = dict(symmetric_l2_regularization=1.0,
                      symmetric_l1_regularization=4.0,
                      loss_type='squared_loss')
-      lr = SdcaModel(CONTAINER, examples, variables, options)
+      lr = SdcaModel(examples, variables, options)
       tf.initialize_all_variables().run()
       prediction = lr.predictions(examples)
       loss = lr.regularized_loss(examples)
@@ -593,7 +643,7 @@ def testFeatureValues(self):
                      symmetric_l1_regularization=0,
                      loss_type='squared_loss')
 
-      lr = SdcaModel(CONTAINER, examples, variables, options)
+      lr = SdcaModel(examples, variables, options)
       tf.initialize_all_variables().run()
       predictions = lr.predictions(examples)
 
@@ -626,7 +676,7 @@ def testDenseFeaturesWithDefaultWeights(self):
       options = dict(symmetric_l2_regularization=1.0,
                      symmetric_l1_regularization=0,
                      loss_type='squared_loss')
-      lr = SdcaModel(CONTAINER, examples, variables, options)
+      lr = SdcaModel(examples, variables, options)
       tf.initialize_all_variables().run()
       predictions = lr.predictions(examples)
 
@@ -656,7 +706,7 @@ def testDenseFeaturesWithArbitraryWeights(self):
       options = dict(symmetric_l2_regularization=5.0,
                      symmetric_l1_regularization=0,
                      loss_type='squared_loss')
-      lr = SdcaModel(CONTAINER, examples, variables, options)
+      lr = SdcaModel(examples, variables, options)
       tf.initialize_all_variables().run()
       predictions = lr.predictions(examples)
 
@@ -700,7 +750,7 @@ def testSimple(self):
       options = dict(symmetric_l2_regularization=1.0,
                      symmetric_l1_regularization=0,
                      loss_type='hinge_loss')
-      model = SdcaModel(CONTAINER, examples, variables, options)
+      model = SdcaModel(examples, variables, options)
       tf.initialize_all_variables().run()
 
       # Before minimization, the weights default to zero. There is no loss due
@@ -737,7 +787,7 @@ def testDenseFeaturesPerfectlySeparable(self):
           symmetric_l2_regularization=1.0,
           symmetric_l1_regularization=0,
           loss_type='hinge_loss')
-      model = SdcaModel(CONTAINER, examples, variables, options)
+      model = SdcaModel(examples, variables, options)
       tf.initialize_all_variables().run()
       predictions = model.predictions(examples)
       binary_predictions = get_binary_predictions_for_hinge(predictions)
@@ -767,7 +817,7 @@ def testDenseFeaturesSeparableWithinMargins(self):
       options = dict(symmetric_l2_regularization=1.0,
                      symmetric_l1_regularization=0,
                      loss_type='hinge_loss')
-      model = SdcaModel(CONTAINER, examples, variables, options)
+      model = SdcaModel(examples, variables, options)
       tf.initialize_all_variables().run()
       predictions = model.predictions(examples)
       binary_predictions = get_binary_predictions_for_hinge(predictions)
@@ -796,7 +846,7 @@ def testDenseFeaturesWeightedExamples(self):
       options = dict(symmetric_l2_regularization=1.0,
                      symmetric_l1_regularization=0,
                      loss_type='hinge_loss')
-      model = SdcaModel(CONTAINER, examples, variables, options)
+      model = SdcaModel(examples, variables, options)
       tf.initialize_all_variables().run()
       predictions = model.predictions(examples)
       binary_predictions = get_binary_predictions_for_hinge(predictions)
diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
@@ -280,8 +280,7 @@ class SdcaModel(object):
 
     ```python
     # Create a solver with the desired parameters.
-    lr = tf.contrib.linear_optimizer.SdcaModel(
-        container, examples, variables, options)
+    lr = tf.contrib.linear_optimizer.SdcaModel(examples, variables, options)
     opt_op = lr.minimize()
 
     predictions = lr.predictions(examples)
@@ -290,9 +289,6 @@ class SdcaModel(object):
     # Primal loss only
     unregularized_loss = lr.unregularized_loss(examples)
 
-    container: Name of the container (eg a hex-encoded UUID) where internal
-      state of the optimizer can be stored. The container can be safely shared
-      across many models.
     examples: {
       sparse_features: list of SparseFeatureColumn.
       dense_features: list of dense tensors of type float32.
@@ -308,6 +304,9 @@ class SdcaModel(object):
       symmetric_l1_regularization: 0.0
       symmetric_l2_regularization: 1.0
       loss_type: "logistic_loss"
+      num_partitions: 1 (Optional, with default value of 1. Number of
+      partitions of the global loss function, 1 means single machine solver,
+      and >=1 when we have more than one optimizer working concurrently.)
     }
     ```
 
@@ -325,13 +324,11 @@ class SdcaModel(object):
   """
 
   def __init__(self,
-               container,
                examples,
                variables,
                options,
                num_table_shards=None):  # pylint: disable=unused-argument
     """Create a new sdca optimizer."""
-    # TODO(andreasst): get rid of obsolete container parameter
 
     if not examples or not variables or not options:
       raise ValueError('examples, variables and options must all be specified.')
@@ -379,6 +376,10 @@ def _symmetric_l2_regularization(self):
     # Algorithmic requirement (for now) is to have minimal l2 of 1.0.
     return max(self._options['symmetric_l2_regularization'], 1.0)
 
+  def _num_partitions(self):
+    # Number of partitions of the global objective.
+    return self._options.get('num_partitions', 1)
+
   # TODO(sibyl-Aix6ihai): Use optimizer interface to make use of slot creation logic.
   def _create_slots(self):
     # Make internal variables which have the updates before applying L1
@@ -520,7 +521,7 @@ def minimize(self, global_step=None, name=None):
           loss_type=self._options['loss_type'],
           l1=self._options['symmetric_l1_regularization'],
           l2=self._symmetric_l2_regularization(),
-          num_partitions=1,
+          num_partitions=self._num_partitions(),
           # TODO(sibyl-Aix6ihai): Provide empirical evidence for this. It is better
           # to run more than one iteration on single mini-batch as we want to
           # spend more time in compute. SDCA works better with larger
diff --git a/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py b/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py
@@ -16,8 +16,6 @@
 from __future__ import division
 from __future__ import print_function
 
-import uuid
-
 from tensorflow.contrib import layers
 from tensorflow.contrib.linear_optimizer.python.ops import sdca_ops
 from tensorflow.python.ops import array_ops
@@ -36,6 +34,7 @@ class SDCAOptimizer(object):
     real_feature_column = real_valued_column(...)
     sparse_feature_column = sparse_column_with_hash_bucket(...)
     sdca_optimizer = linear.SDCAOptimizer(example_id_column='example_id',
+                                          num_partitions=1,
                                           symmetric_l2_regularization=2.0)
     classifier = tf.contrib.learn.LinearClassifier(
         feature_columns=[real_feature_column, sparse_feature_column],
@@ -47,13 +46,17 @@ class SDCAOptimizer(object):
   Here the expectation is that the input_fn_* functions passed to train and
   evaluate return a pair (dict, label_tensor) where dict has `example_id_column`
   as `key` whose value is a `Tensor` of shape [batch_size] and dtype string.
+  num_paritions defines the number of partitions of the loss function, which
+  is equivalent to the number of concurrent workers running the train steps.
   """
 
   def __init__(self,
                example_id_column,
+               num_partitions=1,
                symmetric_l1_regularization=0.0,
                symmetric_l2_regularization=1.0):
     self._example_id_column = example_id_column
+    self._num_partitions = num_partitions
     self._symmetric_l1_regularization = symmetric_l1_regularization
     self._symmetric_l2_regularization = symmetric_l2_regularization
 
@@ -157,13 +160,13 @@ def _training_examples_and_variables():
           dense_features_weights=dense_feature_weights)
       return examples, sdca_variables
 
-    options = dict(
-        symmetric_l1_regularization=self._symmetric_l1_regularization,
-        symmetric_l2_regularization=self._symmetric_l2_regularization,
-        loss_type=loss_type)
     training_examples, training_variables = _training_examples_and_variables()
-    sdca_model = sdca_ops.SdcaModel(container=uuid.uuid4().hex,
-                                    examples=training_examples,
-                                    variables=training_variables,
-                                    options=options)
+    sdca_model = sdca_ops.SdcaModel(
+        examples=training_examples,
+        variables=training_variables,
+        options=dict(
+            symmetric_l1_regularization=self._symmetric_l1_regularization,
+            symmetric_l2_regularization=self._symmetric_l2_regularization,
+            num_partitions=self._num_partitions,
+            loss_type=loss_type))
     return sdca_model.minimize(global_step=global_step)