Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions test/inductor/aot/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
project(test)

set(Torch_DIR "../../../../torch/share/cmake/Torch")
find_package(Torch REQUIRED)

add_executable(test test.cpp ${CMAKE_BINARY_DIR}/aot_inductor_output.h)

add_custom_command(
OUTPUT ${CMAKE_BINARY_DIR}/aot_inductor_output.h
COMMAND python ${CMAKE_SOURCE_DIR}/test.py
DEPENDS ${CMAKE_SOURCE_DIR}/test.py
)
add_custom_target(generate_header ALL
DEPENDS ${CMAKE_BINARY_DIR}/aot_inductor_output.h)

add_library(aot_inductor_output SHARED IMPORTED)
set_property(TARGET aot_inductor_output PROPERTY
IMPORTED_LOCATION ${CMAKE_BINARY_DIR}/aot_inductor_output.so)

target_link_libraries(test "${TORCH_LIBRARIES}" aot_inductor_output)

set_property(TARGET test PROPERTY CXX_STANDARD 17)
41 changes: 41 additions & 0 deletions test/inductor/aot/cpp/test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
//#include <gtest/gtest.h>
#include <iostream>

#include "build/aot_inductor_output.h"

/*
class Net(torch.nn.Module):
def __init__(self):
super().__init__()
self.weight = torch.ones(32, 64)

def forward(self, x):
x = torch.relu(x + self.weight)
return x
*/
struct Net : torch::nn::Module {
Net() {
weight = register_parameter("weight", torch::ones({32, 64}));
}
torch::Tensor forward(torch::Tensor input) {
return torch::relu(input + weight);
}
torch::Tensor weight;
};

int main() {
torch::Tensor x = at::randn({32, 64});
Net net;
torch::Tensor results_ref = net.forward(x);

// TODO: we need to provide an API to concatenate args and weights
std::vector<torch::Tensor> inputs = {x};
for (const auto& pair : net.named_parameters()) {
inputs.push_back(pair.value());
}
torch::Tensor results_opt = aot_inductor_entry(inputs);

assert(torch::allclose(results_ref, results_opt));
printf("PASS\n");
return 0;
}
22 changes: 22 additions & 0 deletions test/inductor/aot/cpp/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import torch
import torch._dynamo
import torch._inductor
import torch._inductor.config

torch._inductor.config.aot_codegen_output_prefix = "aot_inductor_output"


class Net(torch.nn.Module):
def __init__(self):
super().__init__()
self.weight = torch.ones(32, 64)

def forward(self, x):
x = torch.relu(x + self.weight)
return x


inp = torch.randn((32, 64), device="cpu")
module, _ = torch._dynamo.export(Net(), inp)
so_path = torch._inductor.aot_compile(module, [inp])
print(so_path)
8 changes: 8 additions & 0 deletions test/inductor/aot/cpp/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash
set -euxo pipefail

mkdir -p build
cd build
cmake ..
make
./test
21 changes: 21 additions & 0 deletions torch/_inductor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,27 @@ def compile(
return compile_fx(gm, example_inputs, config_patches=options)


def aot_compile(
gm: torch.fx.GraphModule,
example_inputs: List[torch.Tensor],
options: Optional[Dict[str, Any]] = None,
) -> str:
"""
Ahead-of-time compile a given FX graph with TorchInductor into a shared library.
Args:
gm: The FX graph to compile.
example_inputs: List of tensor inputs.
options: Optional dict of config options. See `torch._inductor.config`.
Returns:
Path to the generated shared library
"""
from .compile_fx import compile_fx

return compile_fx(gm, example_inputs, config_patches=options, aot_mode=True)()


def list_mode_options(mode: str = None) -> Dict[str, Any]:
r"""Returns a dictionary describing the optimizations that each of the available
modes passed to `torch.compile()` performs.
Expand Down
46 changes: 46 additions & 0 deletions torch/_inductor/codecache.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,52 @@ def cpp_compile_command(
).strip()


class AotCodeCache:
cache = dict()
clear = staticmethod(cache.clear)

@classmethod
def compile(cls, source_code):
from .codegen.wrapper import CppWrapperCodeGen

# TODO: update cpp_compile_command for different platforms
picked_vec_isa = pick_vec_isa()
key, input_path = write(
source_code,
"cpp",
code_hash(repr(cpp_compile_command("i", "o", vec_isa=picked_vec_isa))),
)
if key not in cls.cache:
from filelock import FileLock

lock_dir = get_lock_dir()
lock = FileLock(os.path.join(lock_dir, key + ".lock"), timeout=LOCK_TIMEOUT)
with lock:
output_so = (
os.path.join(os.getcwd(), f"{config.aot_codegen_output_prefix}.so")
if config.aot_codegen_output_prefix
else f"{input_path[:-3]}.so"
)

output_header = f"{output_so[:-3]}.h"
with open(output_header, "w") as header_file:
header_file.writelines("#include <torch/torch.h>\n\n")
header_file.writelines(f"{CppWrapperCodeGen.decl_str};\n")

log.info(f"AOT-Inductor compiles code into: {output_so}")
if not os.path.exists(output_so):
cmd = cpp_compile_command(
input=input_path, output=output_so, vec_isa=picked_vec_isa
).split(" ")
try:
subprocess.check_output(cmd, stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as e:
raise exc.CppCompileError(cmd, e.output) from e

cls.cache[key] = output_so
return cls.cache[key]


class CppCodeCache:
cache = dict()
clear = staticmethod(cache.clear)
Expand Down
16 changes: 12 additions & 4 deletions torch/_inductor/codegen/cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -2050,7 +2050,12 @@ def codegen_define_and_call(self, wrapper):
)
if enable_kernel_profile:
code.writelines(["#include <ATen/record_function.h>"])
code.writelines([cpp_prefix(), "" f'extern "C" void kernel({arg_defs})'])
kernel_decl_name = kernel_name if V.graph.aot_mode else "kernel"

if not V.graph.aot_mode or self.count == 1:
code.writeline(cpp_prefix())

code.writeline(f'extern "C" void {kernel_decl_name}({arg_defs})')
with code.indent():
if enable_kernel_profile:
graph_id = V.graph.graph_id
Expand All @@ -2065,9 +2070,12 @@ def codegen_define_and_call(self, wrapper):
code.splice(self.loops_code)

codecache_def = IndentedBuffer()
codecache_def.writeline("async_compile.cpp('''")
codecache_def.splice(code)
codecache_def.writeline("''')")
if V.graph.aot_mode:
codecache_def.splice(code)
else:
codecache_def.writeline("async_compile.cpp('''")
codecache_def.splice(code)
codecache_def.writeline("''')")

codecache_str = codecache_def.getvalue()
# TODO(voz): Ostensibly, we should not need this. But there are cases where C++ codegen does
Expand Down
1 change: 1 addition & 0 deletions torch/_inductor/codegen/cpp_prefix.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <limits>
#include <omp.h>

#include <ATen/ATen.h>
#include <ATen/core/PhiloxRNGEngine.h>
#if defined(CPU_CAPABILITY_AVX512) || defined(CPU_CAPABILITY_AVX2)
#include <ATen/cpu/vec/functional.h>
Expand Down
Loading