Fix codegen/cuda gcc-5.4 compilation issues (#43223)

malfet · facebook-github-bot · commit d06f1818ada6 · 2020-08-18T17:19:07.000-07:00
Summary: Most of the fixes is the same old enum-is-not-hasheable error In manager.cpp use std::unordered_map::emplace rather than `insert` to avoid error triggered by missed copy elision This regression was introduced by #43129 Pull Request resolved: #43223 Reviewed By: albanD, seemethere Differential Revision: D23198330 Pulled By: malfet fbshipit-source-id: 576082f7a4454dd29182892c9c4e0b51a967d456
diff --git a/torch/csrc/jit/codegen/cuda/lower_thread_predicate.h b/torch/csrc/jit/codegen/cuda/lower_thread_predicate.h
@@ -22,8 +22,10 @@ namespace fuser {
  */
 class TORCH_CUDA_API ThreadPredicateMap {
  public:
-  using SourceMapType =
-      std::unordered_map<ParallelType, std::unordered_set<const TensorView*>>;
+  using SourceMapType = std::unordered_map<
+      ParallelType,
+      std::unordered_set<const TensorView*>,
+      TypeHash>;
   using MapType = std::unordered_map<
       const TensorView*,
       std::pair<ir_utils::ParallelTypeBitmap, SourceMapType>>;
diff --git a/torch/csrc/jit/codegen/cuda/lower_utils.cpp b/torch/csrc/jit/codegen/cuda/lower_utils.cpp
@@ -479,13 +479,13 @@ bool isUnrolledFor(const Expr* expr) {
       ParallelType::Unroll;
 }
 
-const std::unordered_map<ParallelType, int> ParallelTypeBitmap::pt_to_offset_{
-    {ParallelType::BIDx, 0},
-    {ParallelType::BIDy, 1},
-    {ParallelType::BIDz, 2},
-    {ParallelType::TIDx, 3},
-    {ParallelType::TIDy, 4},
-    {ParallelType::TIDz, 5}};
+const std::unordered_map<ParallelType, int, TypeHash>
+    ParallelTypeBitmap::pt_to_offset_{{ParallelType::BIDx, 0},
+                                      {ParallelType::BIDy, 1},
+                                      {ParallelType::BIDz, 2},
+                                      {ParallelType::TIDx, 3},
+                                      {ParallelType::TIDy, 4},
+                                      {ParallelType::TIDz, 5}};
 
 const std::unordered_map<int, ParallelType> ParallelTypeBitmap::offset_to_pt_ =
     {{0, ParallelType::BIDx},
@@ -554,7 +554,7 @@ bool ParallelTypeBitmap::operator[](size_t pos) const {
 std::map<ParallelType, bool> ParallelTypeBitmap::getMap() const {
   std::map<ParallelType, bool> map;
   for (const auto& pt_offset : pt_to_offset_) {
-    map.emplace(std::make_pair(pt_offset.first, bitset_[pt_offset.second]));
+    map.emplace(pt_offset.first, bitset_[pt_offset.second]);
   }
   return map;
 }
diff --git a/torch/csrc/jit/codegen/cuda/lower_utils.h b/torch/csrc/jit/codegen/cuda/lower_utils.h
@@ -125,7 +125,7 @@ class ParallelTypeBitmap {
  private:
   ParallelTypeBitmap(const std::bitset<num_p_type>& bs) : bitset_(bs) {}
   std::bitset<num_p_type> bitset_;
-  const static std::unordered_map<ParallelType, int> pt_to_offset_;
+  const static std::unordered_map<ParallelType, int, TypeHash> pt_to_offset_;
   const static std::unordered_map<int, ParallelType> offset_to_pt_;
 };
 
diff --git a/torch/csrc/jit/codegen/cuda/manager.cpp b/torch/csrc/jit/codegen/cuda/manager.cpp
@@ -91,7 +91,7 @@ class CudaFusionManager {
       int32_t kernel_id = getNextUniqueID();
       graph_cache_ids_[repr] = kernel_id;
       TORCH_CHECK(
-          graph_cache_.insert({kernel_id, std::make_unique<GraphCache>(graph)})
+          graph_cache_.emplace(kernel_id, std::make_unique<GraphCache>(graph))
               .second);
     }
     return graph_cache_ids_[repr];

Original file line number	Diff line number	Diff line change
`@@ -91,7 +91,7 @@ class CudaFusionManager {`
`91`	`91`	`int32_t kernel_id = getNextUniqueID();`
`92`	`92`	`graph_cache_ids_[repr] = kernel_id;`
`93`	`93`	`TORCH_CHECK(`
`94`		`- graph_cache_.insert({kernel_id, std::make_unique<GraphCache>(graph)})`
	`94`	`+ graph_cache_.emplace(kernel_id, std::make_unique<GraphCache>(graph))`
`95`	`95`	`.second);`
`96`	`96`	`}`
`97`	`97`	`return graph_cache_ids_[repr];`