Skip to content

Commit d06f181

Browse files
malfetfacebook-github-bot
authored andcommitted
Fix codegen/cuda gcc-5.4 compilation issues (#43223)
Summary: Most of the fixes is the same old enum-is-not-hasheable error In manager.cpp use std::unordered_map::emplace rather than `insert` to avoid error triggered by missed copy elision This regression was introduced by #43129 Pull Request resolved: #43223 Reviewed By: albanD, seemethere Differential Revision: D23198330 Pulled By: malfet fbshipit-source-id: 576082f7a4454dd29182892c9c4e0b51a967d456
1 parent d5bc2a8 commit d06f181

File tree

4 files changed

+14
-12
lines changed

4 files changed

+14
-12
lines changed

torch/csrc/jit/codegen/cuda/lower_thread_predicate.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,10 @@ namespace fuser {
2222
*/
2323
class TORCH_CUDA_API ThreadPredicateMap {
2424
public:
25-
using SourceMapType =
26-
std::unordered_map<ParallelType, std::unordered_set<const TensorView*>>;
25+
using SourceMapType = std::unordered_map<
26+
ParallelType,
27+
std::unordered_set<const TensorView*>,
28+
TypeHash>;
2729
using MapType = std::unordered_map<
2830
const TensorView*,
2931
std::pair<ir_utils::ParallelTypeBitmap, SourceMapType>>;

torch/csrc/jit/codegen/cuda/lower_utils.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -479,13 +479,13 @@ bool isUnrolledFor(const Expr* expr) {
479479
ParallelType::Unroll;
480480
}
481481

482-
const std::unordered_map<ParallelType, int> ParallelTypeBitmap::pt_to_offset_{
483-
{ParallelType::BIDx, 0},
484-
{ParallelType::BIDy, 1},
485-
{ParallelType::BIDz, 2},
486-
{ParallelType::TIDx, 3},
487-
{ParallelType::TIDy, 4},
488-
{ParallelType::TIDz, 5}};
482+
const std::unordered_map<ParallelType, int, TypeHash>
483+
ParallelTypeBitmap::pt_to_offset_{{ParallelType::BIDx, 0},
484+
{ParallelType::BIDy, 1},
485+
{ParallelType::BIDz, 2},
486+
{ParallelType::TIDx, 3},
487+
{ParallelType::TIDy, 4},
488+
{ParallelType::TIDz, 5}};
489489

490490
const std::unordered_map<int, ParallelType> ParallelTypeBitmap::offset_to_pt_ =
491491
{{0, ParallelType::BIDx},
@@ -554,7 +554,7 @@ bool ParallelTypeBitmap::operator[](size_t pos) const {
554554
std::map<ParallelType, bool> ParallelTypeBitmap::getMap() const {
555555
std::map<ParallelType, bool> map;
556556
for (const auto& pt_offset : pt_to_offset_) {
557-
map.emplace(std::make_pair(pt_offset.first, bitset_[pt_offset.second]));
557+
map.emplace(pt_offset.first, bitset_[pt_offset.second]);
558558
}
559559
return map;
560560
}

torch/csrc/jit/codegen/cuda/lower_utils.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ class ParallelTypeBitmap {
125125
private:
126126
ParallelTypeBitmap(const std::bitset<num_p_type>& bs) : bitset_(bs) {}
127127
std::bitset<num_p_type> bitset_;
128-
const static std::unordered_map<ParallelType, int> pt_to_offset_;
128+
const static std::unordered_map<ParallelType, int, TypeHash> pt_to_offset_;
129129
const static std::unordered_map<int, ParallelType> offset_to_pt_;
130130
};
131131

torch/csrc/jit/codegen/cuda/manager.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ class CudaFusionManager {
9191
int32_t kernel_id = getNextUniqueID();
9292
graph_cache_ids_[repr] = kernel_id;
9393
TORCH_CHECK(
94-
graph_cache_.insert({kernel_id, std::make_unique<GraphCache>(graph)})
94+
graph_cache_.emplace(kernel_id, std::make_unique<GraphCache>(graph))
9595
.second);
9696
}
9797
return graph_cache_ids_[repr];

0 commit comments

Comments
 (0)