@@ -66,26 +66,21 @@ static int THPVariable_traverse(THPVariable *self, visitproc visit, void *arg)
6666{
6767 Py_VISIT (self->data );
6868 Py_VISIT (self->backward_hooks );
69+ // We don't want to traverse the grad_fn, even if the Variable owns it and the
70+ // shared pointer's use count is 1. This is because we would need to treat
71+ // the grad_fn as part of the Python state and hold the GIL sometimes when
72+ // grad_fn's shared_ptr is copied, otherwise a race condition with the Python
73+ // GC could occur. Holding the GIL when the shared_ptr is copied adds
74+ // undesirable complexity/overhead.
75+ //
76+ // When hooks, a Variable, and its grad_fn are involved in a Python reference
77+ // cycle, because we're not traversing the grad_fn, the reference cycle will
78+ // in fact leak.
79+ //
80+ // See https://gist.github.com/zou3519/7ac92b84dd7d206dcc6eae55fee8372c
81+ // for more details about the race condition involving traversing the grad_fn
82+ // and the python GC.
6983 if (self->cdata .defined ()) {
70- // Only visit this if we actually own it (no one else use the shared pointer)
71- auto & grad_fn = self->cdata .grad_fn ();
72- if (grad_fn.use_count () == 1 ) {
73- if (auto fn = dynamic_cast <PyFunction*>(grad_fn.get ())) {
74- Py_VISIT (fn->obj );
75- } else {
76- // visit hooks in C++ implemented autograd functions
77- for (auto & hook : grad_fn->pre_hooks ) {
78- if (auto pyhook = dynamic_cast <PyFunctionPreHook*>(hook.get ())) {
79- Py_VISIT (pyhook->dict );
80- }
81- }
82- for (auto & hook : grad_fn->post_hooks ) {
83- if (auto pyhook = dynamic_cast <PyFunctionPostHook*>(hook.get ())) {
84- Py_VISIT (pyhook->dict );
85- }
86- }
87- }
88- }
8984 for (auto & hook : self->cdata .hooks ()) {
9085 if (auto pyhook = dynamic_cast <PyFunctionPreHook*>(hook.get ())) {
9186 Py_VISIT (pyhook->dict );
0 commit comments