@@ -201,7 +201,7 @@ std::string valueName(Value * n) {
201201 auto s = at::Scalar (t);
202202 return (s.isIntegral ()) ?
203203 std::to_string (s.toLong ()) :
204- std::to_string (s.toDouble ());
204+ ( std::to_string (s.toDouble ()) + " f " );
205205}
206206
207207const char * scalarTypeName (at::ScalarType type) {
@@ -558,10 +558,15 @@ struct CUDAFusionFunction : public CompiledFusionFunction {
558558struct TempFile {
559559 TH_DISALLOW_COPY_AND_ASSIGN (TempFile);
560560 TempFile (const std::string & t, int suffix) {
561+ // mkstemps edits its first argument in places
562+ // so we make a copy of the string here, including null terminator
561563 std::vector<char > tt (t.c_str (), t.c_str () + t.size () + 1 );
562564 int fd = mkstemps (tt.data (), suffix);
563565 JIT_ASSERT (fd != -1 );
564566 file_ = fdopen (fd, " r+" );
567+
568+ // - 1 becuase tt.size() includes the null terminator,
569+ // but std::string does not expect one
565570 name_ = std::string (tt.begin (), tt.end () - 1 );
566571 }
567572 const std::string & name () const {
@@ -623,18 +628,29 @@ static const std::string cpp_template = "/tmp/pytorch_fuserXXXXXX.cpp";
623628static const std::string compile_string =
624629 " \" ${cxx}\" -O3 -g -march=native -std=c++11 -fPIC -shared \" ${cpp_file}\" -o \" ${so_file}\" " ;
625630
626- static void runCompiler (const std::string & cxx , const std::string & cpp_file, const std::string & so_file) {
631+ static void runCompiler (FusionCompilerConfig & config , const std::string & cpp_file, const std::string & so_file) {
627632 TemplateEnv env;
628- env.s (" cxx" , cxx);
633+ env.s (" cxx" , config. cxx );
629634 env.s (" cpp_file" ,cpp_file);
630635 env.s (" so_file" ,so_file);
631636 std::string result = format (compile_string,env);
632637 int r = system (result.c_str ());
633638 JIT_ASSERT (r == 0 );
634639}
635640
641+
642+ static const std::string disas_string =
643+ " objdump -M intel -d \" ${so_file}\" " ;
644+ static void disas (const std::string & so_file) {
645+ TemplateEnv env;
646+ env.s (" so_file" , so_file);
647+ std::string cmd = format (disas_string, env);
648+ int r = system (cmd.c_str ());
649+ JIT_ASSERT (r == 0 );
650+ }
651+
636652struct CPUFusionFunction : public CompiledFusionFunction {
637- CPUFusionFunction (const std::string & name, AnnotatedGraph & agraph, const std::string & cxx )
653+ CPUFusionFunction (const std::string & name, AnnotatedGraph & agraph, FusionCompilerConfig & config )
638654 : CompiledFusionFunction(name, agraph) {
639655 TempFile so_file (so_template, 3 );
640656 TempFile cpp_file (cpp_template, 4 );
@@ -644,7 +660,11 @@ struct CPUFusionFunction : public CompiledFusionFunction {
644660 compilation_unit = cu.str ();
645661 cpp_file.write (compilation_unit);
646662 cpp_file.sync ();
647- runCompiler (cxx, cpp_file.name (), so_file.name ());
663+ runCompiler (config, cpp_file.name (), so_file.name ());
664+ if (config.debug ) {
665+ std::cout << compilation_unit << " \n " ;
666+ disas (so_file.name ());
667+ }
648668 so_lib.reset (new DynamicLibrary (so_file.name ().c_str ()));
649669 kernel = reinterpret_cast <void (*)(uint32_t , void **)>(so_lib->sym (name.c_str ()));
650670 }
@@ -690,7 +710,7 @@ std::shared_ptr<CompiledFusionFunction> FusionCompiler::getOrCompile(AnnotatedGr
690710#endif
691711 } else {
692712 JIT_ASSERT (canCompileOnCPU ());
693- raw_func = new CPUFusionFunction (name, agraph, cxx );
713+ raw_func = new CPUFusionFunction (name, agraph, config_ );
694714 }
695715 it = cache.emplace (key_, std::shared_ptr<CompiledFusionFunction>(raw_func)).first ;
696716 }
@@ -711,15 +731,23 @@ std::shared_ptr<CompiledFusionFunction> FusionCompiler::getOrCompile(Node* fusio
711731 return getOrCompile (agraph);
712732}
713733
714- void FusionCompiler::debugLaunchGraph (Graph & graph, bool is_cuda, at::ArrayRef<at::Tensor> inputs, at::ArrayRef<at::Tensor> outputs) {
734+
735+ std::shared_ptr<CompiledFusionFunction> FusionCompiler::getOrCompile (Graph & graph,
736+ bool is_cuda,
737+ at::ArrayRef<at::Tensor> inputs,
738+ at::ArrayRef<at::Tensor> outputs) {
715739 AnnotatedGraph agraph (graph, is_cuda);
716740 for (auto & i : inputs) {
717- agraph.input_desc .emplace_back (i);
741+ agraph.input_desc .emplace_back (i);
718742 }
719743 for (auto & i : outputs) {
720- agraph.output_desc .emplace_back (i);
744+ agraph.output_desc .emplace_back (i);
721745 }
722- auto func = getOrCompile (agraph);
746+ return getOrCompile (agraph);
747+ }
748+
749+ void FusionCompiler::debugLaunchGraph (Graph & graph, bool is_cuda, at::ArrayRef<at::Tensor> inputs, at::ArrayRef<at::Tensor> outputs) {
750+ auto func = getOrCompile (graph, is_cuda, inputs, outputs);
723751 func->launch_with_tensors (inputs, outputs);
724752}
725753
@@ -736,13 +764,13 @@ static bool programExists(const std::string & program) {
736764FusionCompiler::FusionCompiler () {
737765 const char * cxx_env = getenv (" CXX" );
738766 if (cxx_env != nullptr ) {
739- cxx = cxx_env;
740- } else {
741- cxx = " g++" ;
767+ config_.cxx = cxx_env;
742768 }
743- if (!programExists (cxx)) {
744- cxx = " " ;
769+ if (!programExists (config_. cxx )) {
770+ config_. cxx = " " ;
745771 }
772+ const char * debug_env = getenv (" PYTORCH_FUSION_DEBUG" );
773+ config_.debug = debug_env && atoi (debug_env) != 0 ;
746774}
747775
748776// TODO: thread safety
0 commit comments