pythonAI
diff --git a/‎tensorflow/core/BUILD‎
Lines changed: 23 additions & 35 deletions b/‎tensorflow/core/BUILD‎
Lines changed: 23 additions & 35 deletions
diff --git a/‎tensorflow/core/common_runtime/direct_session.cc‎
Lines changed: 15 additions & 0 deletions b/‎tensorflow/core/common_runtime/direct_session.cc‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎tensorflow/core/common_runtime/direct_session.h‎
Lines changed: 3 additions & 0 deletions b/‎tensorflow/core/common_runtime/direct_session.h‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎tensorflow/core/common_runtime/gpu/gpu_util.cc‎
Lines changed: 28 additions & 0 deletions b/‎tensorflow/core/common_runtime/gpu/gpu_util.cc‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎tensorflow/core/common_runtime/gpu/gpu_util.h‎
Lines changed: 10 additions & 0 deletions b/‎tensorflow/core/common_runtime/gpu/gpu_util.h‎
Lines changed: 10 additions & 0 deletions
@@ -560,6 +560,8 @@ filegroup(
             "client/**/*.cc",
             "common_runtime/**/*.h",
             "common_runtime/**/*.cc",
+            "debug/**/*.h",
+            "debug/**/*.cc",
             "framework/**/*.h",
             "framework/**/*.cc",
             "graph/**/*.h",
@@ -1085,6 +1087,7 @@ tf_cuda_library(
     linkstatic = 1,
     deps = [
         ":core_cpu_internal",
+        ":debug_graph_utils",
         ":framework",
         ":gpu_tracer",
         ":lib",
@@ -1114,6 +1117,23 @@ tf_cuda_library(
     alwayslink = 1,
 )
 
+tf_cuda_library(
+    name = "debug_graph_utils",
+    srcs = ["debug/debug_graph_utils.cc"],
+    hdrs = ["debug/debug_graph_utils.h"],
+    copts = tf_copts(),
+    linkstatic = 1,
+    deps = [
+        ":core_cpu_internal",
+        ":framework",
+        ":lib",
+        ":lib_internal",
+        ":proto_text",
+        ":protos_all_cc",
+    ],
+    alwayslink = 1,
+)
+
 cc_library(
     name = "example_parser_configuration",
     srcs = ["example/example_parser_configuration.cc"],
@@ -1580,42 +1600,8 @@ tf_cc_test(
     ],
 )
 
-tf_cc_test(
-    name = "debug/debug_gateway_test",
-    size = "small",
-    linkstatic = tf_kernel_tests_linkstatic(),
-    deps = [
-        ":core",
-        ":core_cpu",
-        ":core_cpu_internal",
-        ":debug_gateway_internal",
-        ":direct_session_internal",
-        ":framework",
-        ":framework_internal",
-        ":lib",
-        ":lib_internal",
-        ":ops",
-        ":protos_all_cc",
-        ":test",
-        ":test_main",
-        ":testlib",
-        "//tensorflow/cc:cc_ops",
-        "//tensorflow/core/kernels:control_flow_ops",
-        "//tensorflow/core/kernels:cwise_op",
-        "//tensorflow/core/kernels:dense_update_ops",
-        "//tensorflow/core/kernels:fifo_queue_op",
-        "//tensorflow/core/kernels:identity_op",
-        "//tensorflow/core/kernels:matmul_op",
-        "//tensorflow/core/kernels:ops_util",
-        "//tensorflow/core/kernels:queue_ops",
-        "//tensorflow/core/kernels:session_ops",
-        "//tensorflow/core/kernels:variable_ops",
-        "//third_party/eigen3",
-    ],
-)
-
 tf_cc_test_gpu(
-    name = "debug/debug_gateway_gpu_test",
+    name = "debug/debug_gateway_test",
     size = "small",
     args = ["--heap_check=local"],
     linkstatic = tf_kernel_tests_linkstatic(),
@@ -1625,6 +1611,7 @@ tf_cc_test_gpu(
         ":core_cpu",
         ":core_cpu_internal",
         ":debug_gateway_internal",
+        ":debug_graph_utils",
         ":direct_session",
         ":direct_session_internal",
         ":framework",
@@ -1637,6 +1624,7 @@ tf_cc_test_gpu(
         ":test_main",
         ":testlib",
         "//tensorflow/cc:cc_ops",
+        "//tensorflow/core/kernels:debug_ops",
         "//tensorflow/core/kernels:ops_util",
     ],
 )
 
@@ -284,6 +284,15 @@ Status DirectSession::Run(const RunOptions& run_options,
   }
   thread::ThreadPool* pool = thread_pools_[run_options.inter_op_thread_pool()];
 
+  // EXPERIMENTAL: Options that allow the client to insert nodes into partition
+  // graphs for debugging.
+  if (!run_options.debug_tensor_watch_opts().empty()) {
+    debug_node_inserter_.reset(
+        new DebugNodeInserter(run_options.debug_tensor_watch_opts()));
+  } else {
+    debug_node_inserter_.reset(nullptr);
+  }
+
   // Check if we already have an executor for these arguments.
   ExecutorsAndKeys* executors_and_keys;
   RunStateArgs run_state_args;
@@ -794,6 +803,12 @@ Status DirectSession::GetOrCreateExecutors(
 
     partition_graph = iter->second.release();
     optimizer.Optimize(lib, device, &partition_graph);
+
+    // EXPERIMENTAL: tfdb inserts debug nodes (i.e., probes) to the graph
+    if (debug_node_inserter_) {
+      TF_RETURN_IF_ERROR(
+          debug_node_inserter_->InsertNodes(partition_graph, params.device));
+    }
     iter->second.reset(partition_graph);
 
     TF_RETURN_IF_ERROR(EnsureMemoryTypes(DeviceType(device->device_type()),
 
@@ -29,6 +29,7 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/executor.h"
 #include "tensorflow/core/common_runtime/rendezvous_mgr.h"
 #include "tensorflow/core/common_runtime/simple_graph_execution_state.h"
+#include "tensorflow/core/debug/debug_graph_utils.h"
 #include "tensorflow/core/framework/cancellation.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/session_state.h"
@@ -253,7 +254,9 @@ class DirectSession : public Session {
 
   TF_DISALLOW_COPY_AND_ASSIGN(DirectSession);
 
+  // EXPERIMENTAL: debugger (tfdb) related
   friend class DebugGateway;
+  std::unique_ptr<DebugNodeInserter> debug_node_inserter_;
 };
 
 }  // end namespace tensorflow
 
@@ -426,4 +426,32 @@ uint64 GPUUtil::Checksum(const Tensor& tensor) {
                 tensor.TotalBytes(), 0);
 }
 
+// static
+void GPUUtil::CopyGPUTensorToSameGPU(Device* gpu_device,
+                                     const DeviceContext* device_context,
+                                     const Tensor* src_gpu_tensor,
+                                     Tensor* dst_gpu_tensor,
+                                     StatusCallback done) {
+  VLOG(1) << "CopyGPUTensorToSameGPU";
+  const DeviceBase::GpuDeviceInfo* dev_info = nullptr;
+  gpu::Stream* send_stream = nullptr;
+  Status s = PrepareCopy(gpu_device, device_context, *src_gpu_tensor,
+                         dst_gpu_tensor, &dev_info, &send_stream);
+  if (!s.ok()) {
+    done(s);
+    return;
+  }
+
+  const int64 total_bytes = src_gpu_tensor->TotalBytes();
+  if (total_bytes > 0) {
+    void* src_ptr = GetBase(src_gpu_tensor);
+    DeviceMemoryBase gpu_src_ptr(src_ptr, total_bytes);
+    void* dst_ptr = GetBase(dst_gpu_tensor);
+    DeviceMemoryBase gpu_dst_ptr(dst_ptr, total_bytes);
+    send_stream->ThenMemcpy(&gpu_dst_ptr, gpu_src_ptr, total_bytes);
+  }
+
+  done(Status::OK());
+}
+
 }  // namespace tensorflow
@@ -100,6 +100,16 @@ class GPUUtil {
                                  AllocatorAttributes dst_alloc_attr,
                                  const Tensor* input, Tensor* output,
                                  StatusCallback done);
+
+  // Deep-copying of GPU tensor on the same device.
+  // 'src_gpu_tensor''s and 'dst_gpu_tensor''s backing memory must be on
+  // 'gpu_device' and 'dst_cpu_tensor' must be allocated to be of the same
+  // size as 'src_gpu_tensor'.
+  static void CopyGPUTensorToSameGPU(Device* gpu_device,
+                                     const DeviceContext* device_context,
+                                     const Tensor* src_gpu_tensor,
+                                     Tensor* dst_gpu_tensor,
+                                     StatusCallback done);
 };
 
 }  // namespace tensorflow