pythonAI
diff --git a/‎tensorflow/contrib/learn/python/learn/estimators/linear.py‎
Lines changed: 73 additions & 18 deletions b/‎tensorflow/contrib/learn/python/learn/estimators/linear.py‎
Lines changed: 73 additions & 18 deletions
diff --git a/‎tensorflow/contrib/learn/python/learn/estimators/svm.py‎
Lines changed: 9 additions & 1 deletion b/‎tensorflow/contrib/learn/python/learn/estimators/svm.py‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py‎
Lines changed: 18 additions & 0 deletions b/‎tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py‎
Lines changed: 18 additions & 0 deletions
@@ -34,7 +34,6 @@
 from tensorflow.contrib.layers.python.layers import target_column
 from tensorflow.contrib.learn.python.learn import evaluable
 from tensorflow.contrib.learn.python.learn import metric_spec
-from tensorflow.contrib.learn.python.learn import session_run_hook
 from tensorflow.contrib.learn.python.learn import trainable
 from tensorflow.contrib.learn.python.learn.estimators import dnn_linear_combined
 from tensorflow.contrib.learn.python.learn.estimators import estimator
@@ -54,6 +53,7 @@
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.training import session_run_hook
 from tensorflow.python.training import training as train
 
 _CLASSES = "classes"
@@ -253,11 +253,26 @@ def _linear_classifier_model_fn(features, targets, mode, params):
 
 
 def sdca_classifier_model_fn(features, targets, mode, params):
-  """Estimator's linear model_fn."""
+  """Estimator's model_fn for the SDCA optimizer.
+
+  Args:
+    features: feature `Tensor` or `dict`. See the Estimator documentation.
+    targets: targets `Tensor` or `dict`. See the Estimator documentation.
+    mode: the mode. See the Estimator documentation.
+    params: a `dict` with entries for "feature_columns", "optimizer",
+      "weight_column_name", "loss_type", and optionally "update_weights_hook".
+
+  Returns:
+    Tuple of predictions, loss, and train_op.
+
+  Raises:
+    ValueError: if the parameters are invalid.
+  """
   feature_columns = params["feature_columns"]
   optimizer = params["optimizer"]
   weight_column_name = params["weight_column_name"]
   loss_type = params["loss_type"]
+  update_weights_hook = params.get("update_weights_hook")
 
   if not isinstance(optimizer, sdca_optimizer.SDCAOptimizer):
     raise ValueError("Optimizer must be of type SDCAOptimizer")
@@ -284,9 +299,12 @@ def sdca_classifier_model_fn(features, targets, mode, params):
   train_op = None
   if mode == estimator.ModeKeys.TRAIN:
     global_step = contrib_variables.get_global_step()
-    train_op = optimizer.get_train_step(
-        columns_to_variables, weight_column_name, loss_type, features,
-        targets, global_step)
+    sdca_model, train_op = optimizer.get_train_step(columns_to_variables,
+                                                    weight_column_name,
+                                                    loss_type, features,
+                                                    targets, global_step)
+    if update_weights_hook is not None:
+      update_weights_hook.set_parameters(sdca_model, train_op)
 
   predictions = {}
   predictions[_LOGISTIC] = math_ops.sigmoid(logits)
@@ -303,6 +321,28 @@ def _get_default_optimizer(feature_columns):
   return train.FtrlOptimizer(learning_rate=learning_rate)
 
 
+class _SdcaUpdateWeightsHook(session_run_hook.SessionRunHook):
+  """SessionRunHook to update and shrink SDCA model weights."""
+
+  def __init__(self):
+    pass
+
+  def set_parameters(self, sdca_model, train_op):
+    self._sdca_model = sdca_model
+    self._train_op = train_op
+
+  def begin(self):
+    """Construct the update_weights op.
+
+    The op is implicitly added to the default graph.
+    """
+    self._update_op = self._sdca_model.update_weights(self._train_op)
+
+  def before_run(self, run_context):
+    """Return the update_weights op so that it is executed during this run."""
+    return session_run_hook.SessionRunArgs(self._update_op)
+
+
 class LinearClassifier(evaluable.Evaluable, trainable.Trainable):
   """Linear classifier model.
 
@@ -432,15 +472,23 @@ def __init__(self,  # _joint_weight pylint: disable=invalid-name
       self._optimizer = _get_optimizer(optimizer)
     num_ps_replicas = config.num_ps_replicas if config else 0
 
+    chief_hook = None
     if isinstance(optimizer, sdca_optimizer.SDCAOptimizer):
       assert not _joint_weight, ("_joint_weight is incompatible with the"
                                  " SDCAOptimizer")
       model_fn = sdca_classifier_model_fn
+      # We use a hook to perform the weight update and shrink step only on the
+      # chief. Because the SdcaModel constructed by the estimator within the
+      # call to fit() but we need to pass the hook to fit(), we pass the hook
+      # as a parameter to the model_fn and have that propagate the model to the
+      # hook.
+      chief_hook = _SdcaUpdateWeightsHook()
       params = {
           "feature_columns": feature_columns,
           "optimizer": self._optimizer,
           "weight_column_name": weight_column_name,
           "loss_type": "logistic_loss",
+          "update_weights_hook": chief_hook,
       }
     else:
       model_fn = _linear_classifier_model_fn
@@ -462,29 +510,35 @@ def __init__(self,  # _joint_weight pylint: disable=invalid-name
         params=params,
         feature_engineering_fn=feature_engineering_fn)
 
+    self._additional_run_hook = None
+    if self._estimator.config.is_chief:
+      self._additional_run_hook = chief_hook
+
   def get_estimator(self):
     return self._estimator
 
   def fit(self, x=None, y=None, input_fn=None, steps=None, batch_size=None,
           monitors=None, max_steps=None):
     """See trainable.Trainable."""
     # TODO(roumposg): Remove when deprecated monitors are removed.
-    if monitors is not None:
-      deprecated_monitors = [
-          m for m in monitors
-          if not isinstance(m, session_run_hook.SessionRunHook)
-      ]
-      for monitor in deprecated_monitors:
-        monitor.set_estimator(self)
-        monitor._lock_estimator()  # pylint: disable=protected-access
-
+    if monitors is None:
+      monitors = []
+    deprecated_monitors = [
+        m for m in monitors
+        if not isinstance(m, session_run_hook.SessionRunHook)
+    ]
+    for monitor in deprecated_monitors:
+      monitor.set_estimator(self)
+      monitor._lock_estimator()  # pylint: disable=protected-access
+
+    if self._additional_run_hook:
+      monitors.append(self._additional_run_hook)
     result = self._estimator.fit(x=x, y=y, input_fn=input_fn, steps=steps,
                                  batch_size=batch_size, monitors=monitors,
                                  max_steps=max_steps)
 
-    if monitors is not None:
-      for monitor in deprecated_monitors:
-        monitor._unlock_estimator()  # pylint: disable=protected-access
+    for monitor in deprecated_monitors:
+      monitor._unlock_estimator()  # pylint: disable=protected-access
 
     return result
 
@@ -751,9 +805,10 @@ def _get_train_ops(self, features, targets):
                      columns_to_variables)
 
     def _train_op_fn(unused_loss):
-      return  self._linear_optimizer.get_train_step(
+      sdca_model, train_op = self._linear_optimizer.get_train_step(
           columns_to_variables, self._weight_column_name,
           self._loss_type(), features, targets, global_step)
+      return sdca_model.update_weights(train_op)
 
     model_fn_ops = self._head.head_ops(features, targets,
                                        estimator.ModeKeys.TRAIN, _train_op_fn,
 
@@ -55,7 +55,7 @@ class SVM(trainable.Trainable, evaluable.Evaluable):
   method), should be set to (#concurrent train ops per worker) x (#workers). If
   num_loss_partitions is larger or equal to this value, convergence is
   guaranteed but becomes slower as num_loss_partitions increases. If it is set
-  to a smaller value, the optimizer is more agressive in reducing the global
+  to a smaller value, the optimizer is more aggressive in reducing the global
   loss but convergence is not guaranteed. The recommended value in tf.learn
   (where there is one process per worker) is the number of workers running the
   train steps. It defaults to 1 (single machine).
@@ -146,6 +146,7 @@ def __init__(self,
 
     self._feature_columns = feature_columns
     self._model_dir = model_dir or tempfile.mkdtemp()
+    self._chief_hook = linear._SdcaUpdateWeightsHook()  # pylint: disable=protected-access
     self._estimator = estimator.Estimator(
         model_fn=linear.sdca_classifier_model_fn,
         model_dir=self._model_dir,
@@ -155,12 +156,19 @@ def __init__(self,
             "optimizer": self._optimizer,
             "weight_column_name": weight_column_name,
             "loss_type": "hinge_loss",
+            "update_weights_hook": self._chief_hook,
         },
         feature_engineering_fn=feature_engineering_fn)
+    if not self._estimator.config.is_chief:
+      self._chief_hook = None
 
   def fit(self, x=None, y=None, input_fn=None, steps=None, batch_size=None,
           monitors=None, max_steps=None):
     """See trainable.Trainable."""
+    if monitors is None:
+      monitors = []
+    if self._chief_hook:
+      monitors.append(self._chief_hook)
     return self._estimator.fit(x=x, y=y, input_fn=input_fn, steps=steps,
                                batch_size=batch_size, monitors=monitors,
                                max_steps=max_steps)
 
@@ -189,6 +189,7 @@ def testSimple(self):
         train_op = lr.minimize()
         for _ in range(_MAX_ITERATIONS):
           train_op.run()
+        lr.update_weights(train_op).run()
         # The high tolerance in unregularized_loss comparisons is due to the
         # fact that it's possible to trade off unregularized_loss vs.
         # regularization and still have a sum that is quite close to the
@@ -248,6 +249,7 @@ def Minimize():
 
           for t in threads:
             t.join()
+          lr.update_weights(train_op).run()
 
           # The high tolerance in unregularized_loss comparisons is due to the
           # fact that it's possible to trade off unregularized_loss vs.
@@ -294,6 +296,7 @@ def testSimpleNoL2(self):
         train_op = lr.minimize()
         for _ in range(_MAX_ITERATIONS):
           train_op.run()
+        lr.update_weights(train_op).run()
 
         # There is neither L1 nor L2 loss, so regularized and unregularized
         # losses should be exactly the same.
@@ -346,6 +349,7 @@ def testSomeUnweightedExamples(self):
         train_op = lr.minimize()
         for _ in range(_MAX_ITERATIONS):
           train_op.run()
+        lr.update_weights(train_op).run()
 
         self.assertAllClose(0.411608, unregularized_loss.eval(), atol=0.05)
         self.assertAllClose(0.525457, loss.eval(), atol=0.01)
@@ -416,6 +420,7 @@ def testImbalanced(self):
         train_op = lr.minimize()
         for _ in range(_MAX_ITERATIONS):
           train_op.run()
+        lr.update_weights(train_op).run()
 
         self.assertAllClose(0.226487 + 0.102902,
                             unregularized_loss.eval(),
@@ -456,6 +461,7 @@ def testImbalancedWithExampleWeights(self):
         train_op = lr.minimize()
         for _ in range(_MAX_ITERATIONS):
           train_op.run()
+        lr.update_weights(train_op).run()
 
         self.assertAllClose(0.284860, unregularized_loss.eval(), atol=0.08)
         self.assertAllClose(0.408044, loss.eval(), atol=0.012)
@@ -494,6 +500,7 @@ def testInstancesOfOneClassOnly(self):
         train_op = lr.minimize()
         for _ in range(_MAX_ITERATIONS):
           train_op.run()
+        lr.update_weights(train_op).run()
         self.assertAllClose(0.411608, unregularized_loss.eval(), atol=0.05)
         self.assertAllClose(0.525457, loss.eval(), atol=0.01)
         predicted_labels = get_binary_predictions_for_logistic(predictions)
@@ -580,6 +587,7 @@ def testSimple(self):
       train_op = lr.minimize()
       for _ in range(_MAX_ITERATIONS):
         train_op.run()
+      lr.update_weights(train_op).run()
 
       # Predictions should be 2/3 of label due to minimizing regularized loss:
       #   (label - 2 * weight)^2 / 2 + L2 * 2 * weight^2
@@ -626,6 +634,7 @@ def testL2Regularization(self):
       train_op = lr.minimize()
       for _ in range(_MAX_ITERATIONS):
         train_op.run()
+      lr.update_weights(train_op).run()
 
       # Predictions should be 1/5 of label due to minimizing regularized loss:
       #   (label - 2 * weight)^2 + L2 * 16 * weight^2
@@ -661,6 +670,7 @@ def testL1Regularization(self):
       train_op = lr.minimize()
       for _ in range(_MAX_ITERATIONS):
         train_op.run()
+      lr.update_weights(train_op).run()
 
       # Predictions should be -4.0, 48/5 due to minimizing regularized loss:
       #   (label - 2 * weight)^2 / 2 + L2 * 2 * weight^2 + L1 * 4 * weight
@@ -696,6 +706,7 @@ def testFeatureValues(self):
       train_op = lr.minimize()
       for _ in range(_MAX_ITERATIONS):
         train_op.run()
+      lr.update_weights(train_op).run()
 
       # There are 4 (sparse) variable weights to be learned. 2 for age and 2 for
       # gender. Let w_1, w_2 be age weights, w_3, w_4 be gender weights, y_1,
@@ -729,6 +740,7 @@ def testDenseFeaturesWithDefaultWeights(self):
       train_op = lr.minimize()
       for _ in range(_MAX_ITERATIONS):
         train_op.run()
+      lr.update_weights(train_op).run()
 
       # The loss function for these particular features is given by:
       # 1/2(label_1-w_1)^2 + 1/2(label_2-w_2)^2 + \lambda/2 (w_1^2 + w_2^2). So,
@@ -759,6 +771,7 @@ def testDenseFeaturesWithArbitraryWeights(self):
       train_op = lr.minimize()
       for _ in range(_MAX_ITERATIONS):
         train_op.run()
+      lr.update_weights(train_op).run()
 
       # The loss function for these particular features is given by:
       # 1/2 s_1 (label_1-w_1)^2 + 1/2 s_2(label_2-w_2)^2 +
@@ -816,6 +829,7 @@ def testSimple(self):
       train_op = model.minimize()
       for _ in range(_MAX_ITERATIONS):
         train_op.run()
+      model.update_weights(train_op).run()
 
       binary_predictions = get_binary_predictions_for_hinge(predictions)
       self.assertAllEqual([-1.0, 1.0], predictions.eval())
@@ -841,6 +855,7 @@ def testDenseFeaturesPerfectlySeparable(self):
       train_op = model.minimize()
       for _ in range(_MAX_ITERATIONS):
         train_op.run()
+      model.update_weights(train_op).run()
 
       self.assertAllClose([1.0, -1.0], predictions.eval(), atol=0.05)
       self.assertAllEqual([1, 0], binary_predictions.eval())
@@ -871,6 +886,7 @@ def testDenseFeaturesSeparableWithinMargins(self):
       train_op = model.minimize()
       for _ in range(_MAX_ITERATIONS):
         train_op.run()
+      model.update_weights(train_op).run()
 
       # (1.0, 0.5) and (1.0, -0.5) are separable by x-axis but the datapoints
       # are within the margins so there is unregularized loss (1/2 per example).
@@ -899,6 +915,7 @@ def testDenseFeaturesWeightedExamples(self):
       train_op = model.minimize()
       for _ in range(_MAX_ITERATIONS):
         train_op.run()
+      model.update_weights(train_op).run()
 
       # Point (1.0, 0.5) has higher weight than (1.0, -0.5) so the model will
       # try to increase the margin from (1.0, 0.5). Due to regularization,
@@ -953,6 +970,7 @@ def testSimple(self):
       train_op = model.minimize()
       for _ in range(_MAX_ITERATIONS):
         train_op.run()
+      model.update_weights(train_op).run()
 
       binary_predictions = get_binary_predictions_for_hinge(predictions)
       self.assertAllClose([-0.67, 0.67], predictions.eval(), atol=0.05)