Defer image/audio/histogram v2 summary preprocessing using LazyTensorCreator (#2899)

nfelt · web-flow · commit 88dbe5529d00 · 2019-11-06T23:58:44.000-08:00
* Add LazyTensorCreator

* Defer summary preprocessing using LazyTensorCreator

* Add guard against reentrant callables to avoid deadlock

* Improve separation of concerns between lazy creation and tensor conversion routine

* Make LazyTensorCreatorTest pass against TF 1.x

* CR: remove unused functools import and nested_call parameter

* Defer registration of tensor conversion function until after module import time
diff --git a/tensorboard/plugins/audio/BUILD b/tensorboard/plugins/audio/BUILD
@@ -92,6 +92,7 @@ py_library(
     deps = [
         ":metadata",
         "//tensorboard/compat",
+        "//tensorboard/util:lazy_tensor_creator",
     ],
 )
 
diff --git a/tensorboard/plugins/audio/summary_v2.py b/tensorboard/plugins/audio/summary_v2.py
@@ -29,6 +29,7 @@
 
 from tensorboard.compat import tf2 as tf
 from tensorboard.plugins.audio import metadata
+from tensorboard.util import lazy_tensor_creator
 
 
 def audio(name,
@@ -91,19 +92,27 @@ def audio(name,
       tf.summary.summary_scope)
   with summary_scope(
       name, 'audio_summary', values=inputs) as (tag, _):
-    tf.debugging.assert_rank(data, 3)
-    tf.debugging.assert_non_negative(max_outputs)
-    limited_audio = data[:max_outputs]
-    encode_fn = functools.partial(audio_ops.encode_wav,
-                                  sample_rate=sample_rate)
-    encoded_audio = tf.map_fn(encode_fn, limited_audio,
-                              dtype=tf.string,
-                              name='encode_each_audio')
-    # Workaround for map_fn returning float dtype for an empty elems input.
-    encoded_audio = tf.cond(
-        tf.shape(input=encoded_audio)[0] > 0,
-        lambda: encoded_audio, lambda: tf.constant([], tf.string))
-    limited_labels = tf.tile([''], tf.shape(input=limited_audio)[:1])
-    tensor = tf.transpose(a=tf.stack([encoded_audio, limited_labels]))
+    # Defer audio encoding preprocessing by passing it as a callable to write(),
+    # wrapped in a LazyTensorCreator for backwards compatibility, so that we
+    # only do this work when summaries are actually written.
+    @lazy_tensor_creator.LazyTensorCreator
+    def lazy_tensor():
+      tf.debugging.assert_rank(data, 3)
+      tf.debugging.assert_non_negative(max_outputs)
+      limited_audio = data[:max_outputs]
+      encode_fn = functools.partial(audio_ops.encode_wav,
+                                    sample_rate=sample_rate)
+      encoded_audio = tf.map_fn(encode_fn, limited_audio,
+                                dtype=tf.string,
+                                name='encode_each_audio')
+      # Workaround for map_fn returning float dtype for an empty elems input.
+      encoded_audio = tf.cond(
+          tf.shape(input=encoded_audio)[0] > 0,
+          lambda: encoded_audio, lambda: tf.constant([], tf.string))
+      limited_labels = tf.tile([''], tf.shape(input=limited_audio)[:1])
+      return tf.transpose(a=tf.stack([encoded_audio, limited_labels]))
+
+    # To ensure that audio encoding logic is only executed when summaries
+    # are written, we pass callable to `tensor` parameter.
     return tf.summary.write(
-        tag=tag, tensor=tensor, step=step, metadata=summary_metadata)
+        tag=tag, tensor=lazy_tensor, step=step, metadata=summary_metadata)
diff --git a/tensorboard/plugins/histogram/BUILD b/tensorboard/plugins/histogram/BUILD
@@ -109,6 +109,7 @@ py_library(
         "//tensorboard:expect_numpy_installed",
         "//tensorboard/compat",
         "//tensorboard/compat/proto:protos_all_py_pb2",
+        "//tensorboard/util:lazy_tensor_creator",
         "//tensorboard/util:tensor_util",
     ],
 )
diff --git a/tensorboard/plugins/histogram/summary_v2.py b/tensorboard/plugins/histogram/summary_v2.py
@@ -34,6 +34,7 @@
 from tensorboard.compat import tf2 as tf
 from tensorboard.compat.proto import summary_pb2
 from tensorboard.plugins.histogram import metadata
+from tensorboard.util import lazy_tensor_creator
 from tensorboard.util import tensor_util
 
 
@@ -76,9 +77,14 @@ def histogram(name, data, step=None, buckets=None, description=None):
   def histogram_summary(data, buckets, histogram_metadata, step):
     with summary_scope(
         name, 'histogram_summary', values=[data, buckets, step]) as (tag, _):
-      tensor = _buckets(data, bucket_count=buckets)
+      # Defer histogram bucketing logic by passing it as a callable to write(),
+      # wrapped in a LazyTensorCreator for backwards compatibility, so that we
+      # only do this work when summaries are actually written.
+      @lazy_tensor_creator.LazyTensorCreator
+      def lazy_tensor():
+        return _buckets(data, buckets)
       return tf.summary.write(
-          tag=tag, tensor=tensor, step=step, metadata=histogram_metadata)
+          tag=tag, tensor=lazy_tensor, step=step, metadata=summary_metadata)
 
   # `_buckets()` has dynamic output shapes which is not supported on TPU's. As so, place
   # the bucketing ops on outside compilation cluster so that the function in executed on CPU.
diff --git a/tensorboard/plugins/image/BUILD b/tensorboard/plugins/image/BUILD
@@ -107,7 +107,7 @@ py_library(
         ":metadata",
         "//tensorboard/compat",
         "//tensorboard/compat/proto:protos_all_py_pb2",
-        "//tensorboard/util:tensor_util",
+        "//tensorboard/util:lazy_tensor_creator",
     ],
 )
 
diff --git a/tensorboard/plugins/image/summary_v2.py b/tensorboard/plugins/image/summary_v2.py
@@ -24,6 +24,7 @@
 
 from tensorboard.compat import tf2 as tf
 from tensorboard.plugins.image import metadata
+from tensorboard.util import lazy_tensor_creator
 
 
 def image(name,
@@ -68,21 +69,29 @@ def image(name,
       tf.summary.summary_scope)
   with summary_scope(
       name, 'image_summary', values=[data, max_outputs, step]) as (tag, _):
-    tf.debugging.assert_rank(data, 4)
-    tf.debugging.assert_non_negative(max_outputs)
-    images = tf.image.convert_image_dtype(data, tf.uint8, saturate=True)
-    limited_images = images[:max_outputs]
-    encoded_images = tf.map_fn(tf.image.encode_png, limited_images,
-                               dtype=tf.string,
-                               name='encode_each_image')
-    # Workaround for map_fn returning float dtype for an empty elems input.
-    encoded_images = tf.cond(
-        tf.shape(input=encoded_images)[0] > 0,
-        lambda: encoded_images, lambda: tf.constant([], tf.string))
-    image_shape = tf.shape(input=images)
-    dimensions = tf.stack([tf.as_string(image_shape[2], name='width'),
-                           tf.as_string(image_shape[1], name='height')],
-                          name='dimensions')
-    tensor = tf.concat([dimensions, encoded_images], axis=0)
+    # Defer image encoding preprocessing by passing it as a callable to write(),
+    # wrapped in a LazyTensorCreator for backwards compatibility, so that we
+    # only do this work when summaries are actually written.
+    @lazy_tensor_creator.LazyTensorCreator
+    def lazy_tensor():
+      tf.debugging.assert_rank(data, 4)
+      tf.debugging.assert_non_negative(max_outputs)
+      images = tf.image.convert_image_dtype(data, tf.uint8, saturate=True)
+      limited_images = images[:max_outputs]
+      encoded_images = tf.map_fn(tf.image.encode_png, limited_images,
+                                 dtype=tf.string,
+                                 name='encode_each_image')
+      # Workaround for map_fn returning float dtype for an empty elems input.
+      encoded_images = tf.cond(
+          tf.shape(input=encoded_images)[0] > 0,
+          lambda: encoded_images, lambda: tf.constant([], tf.string))
+      image_shape = tf.shape(input=images)
+      dimensions = tf.stack([tf.as_string(image_shape[2], name='width'),
+                             tf.as_string(image_shape[1], name='height')],
+                            name='dimensions')
+      return tf.concat([dimensions, encoded_images], axis=0)
+
+    # To ensure that image encoding logic is only executed when summaries
+    # are written, we pass callable to `tensor` parameter.
     return tf.summary.write(
-        tag=tag, tensor=tensor, step=step, metadata=summary_metadata)
+        tag=tag, tensor=lazy_tensor, step=step, metadata=summary_metadata)
diff --git a/tensorboard/util/BUILD b/tensorboard/util/BUILD
@@ -79,9 +79,29 @@ py_test(
 
 tb_proto_library(
     name = "grpc_util_test_proto",
-    has_services = True,
-    srcs = ["grpc_util_test.proto"],
     testonly = True,
+    srcs = ["grpc_util_test.proto"],
+    has_services = True,
+)
+
+py_library(
+    name = "lazy_tensor_creator",
+    srcs = ["lazy_tensor_creator.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorboard/compat",
+    ],
+)
+
+py_test(
+    name = "lazy_tensor_creator_test",
+    size = "small",
+    srcs = ["lazy_tensor_creator_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":lazy_tensor_creator",
+        "//tensorboard:expect_tensorflow_installed",
+    ],
 )
 
 py_library(
diff --git a/tensorboard/util/lazy_tensor_creator.py b/tensorboard/util/lazy_tensor_creator.py
@@ -0,0 +1,107 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Provides a lazy wrapper for deferring Tensor creation."""
+
+import threading
+
+from tensorboard.compat import tf2 as tf
+
+
+# Sentinel used for LazyTensorCreator._tensor to indicate that a value is
+# currently being computed, in order to fail hard on reentrancy.
+_CALL_IN_PROGRESS_SENTINEL = object()
+
+
+class LazyTensorCreator(object):
+  """Lazy auto-converting wrapper for a callable that returns a `tf.Tensor`.
+
+  This class wraps an arbitrary callable that returns a `Tensor` so that it
+  will be automatically converted to a `Tensor` by any logic that calls
+  `tf.convert_to_tensor()`. This also memoizes the callable so that it is
+  called at most once.
+
+  The intended use of this class is to defer the construction of a `Tensor`
+  (e.g. to avoid unnecessary wasted computation, or ensure any new ops are
+  created in a context only available later on in execution), while remaining
+  compatible with APIs that expect to be given an already materialized value
+  that can be converted to a `Tensor`.
+
+  This class is thread-safe.
+  """
+
+  def __init__(self, tensor_callable):
+    """Initializes a LazyTensorCreator object.
+
+    Args:
+      tensor_callable: A callable that returns a `tf.Tensor`.
+    """
+    if not callable(tensor_callable):
+      raise ValueError("Not a callable: %r" % tensor_callable)
+    self._tensor_callable = tensor_callable
+    self._tensor = None
+    self._tensor_lock = threading.RLock()
+    _register_conversion_function_once()
+
+  def __call__(self):
+    if self._tensor is None or self._tensor is _CALL_IN_PROGRESS_SENTINEL:
+      with self._tensor_lock:
+        if self._tensor is _CALL_IN_PROGRESS_SENTINEL:
+          raise RuntimeError("Cannot use LazyTensorCreator with reentrant callable")
+        elif self._tensor is None:
+          self._tensor = _CALL_IN_PROGRESS_SENTINEL
+          self._tensor = self._tensor_callable()
+    return self._tensor
+
+
+def _lazy_tensor_creator_converter(value, dtype=None, name=None, as_ref=False):
+  del name  # ignored
+  if not isinstance(value, LazyTensorCreator):
+    raise RuntimeError("Expected LazyTensorCreator, got %r" % value)
+  if as_ref:
+    raise RuntimeError("Cannot use LazyTensorCreator to create ref tensor")
+  tensor = value()
+  if dtype not in (None, tensor.dtype):
+    raise RuntimeError(
+        "Cannot convert LazyTensorCreator returning dtype %s to dtype %s" % (
+            tensor.dtype, dtype))
+  return tensor
+
+
+# Use module-level bit and lock to ensure that registration of the
+# LazyTensorCreator conversion function happens only once.
+_conversion_registered = False
+_conversion_registered_lock = threading.Lock()
+
+
+def _register_conversion_function_once():
+  """Performs one-time registration of `_lazy_tensor_creator_converter`.
+
+  This helper can be invoked multiple times but only registers the conversion
+  function on the first invocation, making it suitable for calling when
+  constructing a LazyTensorCreator.
+
+  Deferring the registration is necessary because doing it at at module import
+  time would trigger the lazy TensorFlow import to resolve, and that in turn
+  would break the delicate `tf.summary` import cycle avoidance scheme.
+  """
+  global _conversion_registered
+  if not _conversion_registered:
+    with _conversion_registered_lock:
+      if not _conversion_registered:
+        _conversion_registered = True
+        tf.register_tensor_conversion_function(
+            base_type=LazyTensorCreator,
+            conversion_func=_lazy_tensor_creator_converter,
+            priority=0)
diff --git a/tensorboard/util/lazy_tensor_creator_test.py b/tensorboard/util/lazy_tensor_creator_test.py
@@ -0,0 +1,100 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+from tensorboard.util import lazy_tensor_creator
+
+
+tf.compat.v1.enable_eager_execution()
+
+
+class LazyTensorCreatorTest(tf.test.TestCase):
+
+  def assertEqualAsNumpy(self, a, b):
+    # TODO(#2507): Remove after we no longer test against TF 1.x.
+    self.assertEqual(a.numpy(), b.numpy())
+
+  def test_lazy_creation_with_memoization(self):
+    boxed_count = [0]
+    @lazy_tensor_creator.LazyTensorCreator
+    def lazy_tensor():
+      boxed_count[0] = boxed_count[0] + 1
+      return tf.constant(1)
+    self.assertEqual(0, boxed_count[0])
+    real_tensor = lazy_tensor()
+    self.assertEqual(1, boxed_count[0])
+    lazy_tensor()
+    self.assertEqual(1, boxed_count[0])
+
+  def test_conversion_explicit(self):
+    @lazy_tensor_creator.LazyTensorCreator
+    def lazy_tensor():
+      return tf.constant(1)
+    real_tensor = tf.convert_to_tensor(lazy_tensor)
+    self.assertEqualAsNumpy(tf.constant(1), real_tensor)
+
+  def test_conversion_identity(self):
+    @lazy_tensor_creator.LazyTensorCreator
+    def lazy_tensor():
+      return tf.constant(1)
+    real_tensor = tf.identity(lazy_tensor)
+    self.assertEqualAsNumpy(tf.constant(1), real_tensor)
+
+  def test_conversion_implicit(self):
+    @lazy_tensor_creator.LazyTensorCreator
+    def lazy_tensor():
+      return tf.constant(1)
+    real_tensor = lazy_tensor + tf.constant(1)
+    self.assertEqualAsNumpy(tf.constant(2), real_tensor)
+
+  def test_explicit_dtype_okay_if_matches(self):
+    @lazy_tensor_creator.LazyTensorCreator
+    def lazy_tensor():
+      return tf.constant(1, dtype=tf.int32)
+    real_tensor = tf.convert_to_tensor(lazy_tensor, dtype=tf.int32)
+    self.assertEqual(tf.int32, real_tensor.dtype)
+    self.assertEqualAsNumpy(tf.constant(1, dtype=tf.int32), real_tensor)
+
+  def test_explicit_dtype_rejected_if_different(self):
+    @lazy_tensor_creator.LazyTensorCreator
+    def lazy_tensor():
+      return tf.constant(1, dtype=tf.int32)
+    with self.assertRaisesRegex(RuntimeError, "dtype"):
+      tf.convert_to_tensor(lazy_tensor, dtype=tf.int64)
+
+  def test_as_ref_rejected(self):
+    @lazy_tensor_creator.LazyTensorCreator
+    def lazy_tensor():
+      return tf.constant(1, dtype=tf.int32)
+    with self.assertRaisesRegex(RuntimeError, "ref tensor"):
+      # Call conversion routine manually since this isn't actually
+      # exposed as an argument to tf.convert_to_tensor.
+      lazy_tensor_creator._lazy_tensor_creator_converter(
+          lazy_tensor, as_ref=True)
+
+  def test_reentrant_callable_does_not_deadlock(self):
+    @lazy_tensor_creator.LazyTensorCreator
+    def lazy_tensor():
+      return lazy_tensor()
+    with self.assertRaisesRegex(RuntimeError, "reentrant callable"):
+      lazy_tensor()
+
+
+if __name__ == '__main__':
+  tf.test.main()

Original file line number	Diff line number	Diff line change
`@@ -92,6 +92,7 @@ py_library(`
`92`	`92`	`deps = [`
`93`	`93`	`":metadata",`
`94`	`94`	`"//tensorboard/compat",`
	`95`	`+ "//tensorboard/util:lazy_tensor_creator",`
`95`	`96`	`],`
`96`	`97`	`)`
`97`	`98`
Original file line number	Diff line number	Diff line change
`@@ -109,6 +109,7 @@ py_library(`
`109`	`109`	`"//tensorboard:expect_numpy_installed",`
`110`	`110`	`"//tensorboard/compat",`
`111`	`111`	`"//tensorboard/compat/proto:protos_all_py_pb2",`
	`112`	`+ "//tensorboard/util:lazy_tensor_creator",`
`112`	`113`	`"//tensorboard/util:tensor_util",`
`113`	`114`	`],`
`114`	`115`	`)`
Original file line number	Diff line number	Diff line change
`@@ -107,7 +107,7 @@ py_library(`
`107`	`107`	`":metadata",`
`108`	`108`	`"//tensorboard/compat",`
`109`	`109`	`"//tensorboard/compat/proto:protos_all_py_pb2",`
`110`		`- "//tensorboard/util:tensor_util",`
	`110`	`+ "//tensorboard/util:lazy_tensor_creator",`
`111`	`111`	`],`
`112`	`112`	`)`
`113`	`113`