Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions test/profiler/test_memory_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1305,14 +1305,14 @@ def step_fn(mark_region):
aten::detach 7 (GRADIENT) -> 7 (GRADIENT)

-- Optimizer --------------------------------------------------------------------------------------------
aten::clone 7 (GRADIENT) -> 10 (???)
aten::detach 10 (???) -> 10 (???)
aten::detach 10 (???) -> 10 (???)
aten::add_.Tensor 2 (PARAMETER), 10 (???) -> 2 (PARAMETER)
aten::clone 9 (GRADIENT) -> 11 (???)
aten::detach 11 (???) -> 11 (???)
aten::detach 11 (???) -> 11 (???)
aten::add_.Tensor 3 (PARAMETER), 11 (???) -> 3 (PARAMETER)
aten::clone 7 (GRADIENT) -> 10 (OPTIMIZER_STATE)
aten::detach 10 (OPTIMIZER_STATE) -> 10 (OPTIMIZER_STATE)
aten::detach 10 (OPTIMIZER_STATE) -> 10 (OPTIMIZER_STATE)
aten::add_.Tensor 2 (PARAMETER), 10 (OPTIMIZER_STATE) -> 2 (PARAMETER)
aten::clone 9 (GRADIENT) -> 11 (OPTIMIZER_STATE)
aten::detach 11 (OPTIMIZER_STATE) -> 11 (OPTIMIZER_STATE)
aten::detach 11 (OPTIMIZER_STATE) -> 11 (OPTIMIZER_STATE)
aten::add_.Tensor 3 (PARAMETER), 11 (OPTIMIZER_STATE) -> 3 (PARAMETER)
aten::zero_ 7 (GRADIENT) -> 7 (GRADIENT)
aten::zero_ 9 (GRADIENT) -> 9 (GRADIENT)""",
)
Expand Down
12 changes: 12 additions & 0 deletions torch/profiler/_memory_profiler.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import collections
import dataclasses
import enum
import itertools as it
from typing import (
Any,
cast,
Expand Down Expand Up @@ -36,6 +37,7 @@ class Category(enum.Enum):
ACTIVATION = enum.auto()
GRADIENT = enum.auto()
PARAMETER = enum.auto()
OPTIMIZER_STATE = enum.auto()


@dataclasses.dataclass
Expand Down Expand Up @@ -561,6 +563,7 @@ def __init__(self, result: _ProfilerResult) -> None:
self._set_inputs()
self._set_parameters_using_data_flow()
self._set_activations()
self._set_optimizer_state()

def _is_gradient(self, *args, **kwargs) -> bool:
return self._categories.get(*args, **kwargs) == Category.GRADIENT
Expand Down Expand Up @@ -781,3 +784,12 @@ def _set_activations(self) -> None:
):
for i in node.outputs.items():
self._categories.setdefault_by_version(*i, Category.ACTIVATION)

def _set_optimizer_state(self) -> None:
for event in self._op_tree.dfs():
if event.typed[0] == _EventType.PyCall and event.typed[1].optimizer:
parameters = event.typed[1].optimizer.parameters
for _, t in it.chain(*[state for _, _, state in parameters]):
key = TensorKey.from_tensor(t)
if key is not None:
self._categories.set_by_id(key, Category.OPTIMIZER_STATE)