Skip to content

Commit 6875018

Browse files
James Reedfacebook-github-bot
authored andcommitted
Record source/line info in SourceRange and report in highlight (#20898)
Summary: Resolves meta-pytorch/lockdown#29 Examples: ``` import torch torch.jit.script def foobar(x): return torch.blargh(xyz) == RuntimeError: object has no attribute blargh: at compile.py:5:12 torch.jit.script def foo(x): return torch.blargh(x) ~~~~~~~~~~~~ <--- HERE ``` It also gets the correct column number in the case where the original source file has common leading whitespace in front of the callable: ``` import torch with torch.no_grad(): torch.jit.script def foo(x): return torch.blargh(x) == RuntimeError: object has no attribute blargh: at compile_leading.py:6:24 torch.jit.script def foo(x): return torch.blargh(x) ~~~~~~~~~~~~ <--- HERE ``` Pull Request resolved: #20898 Differential Revision: D15552424 Pulled By: jamesr66a fbshipit-source-id: 78d0f0de03f7ccbf3e7ea193a1b4eced57ea5d69
1 parent 57f4f98 commit 6875018

File tree

9 files changed

+224
-103
lines changed

9 files changed

+224
-103
lines changed

test/test_jit.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3680,6 +3680,23 @@ def foo():
36803680
return [[4]] + [[4, 5]]
36813681
self.checkScript(foo, ())
36823682

3683+
def test_file_line_error(self):
3684+
def foobar(xyz):
3685+
return torch.blargh(xyz)
3686+
3687+
_, lineno = inspect.getsourcelines(foobar)
3688+
with self.assertRaisesRegex(RuntimeError, "test_jit.py:{}:20".format(lineno + 1)):
3689+
scripted = torch.jit.script(foobar)
3690+
3691+
def test_file_line_error_class_defn(self):
3692+
class FooBar(object):
3693+
def baz(x):
3694+
return torch.blargh(xyz)
3695+
3696+
_, lineno = inspect.getsourcelines(FooBar)
3697+
with self.assertRaisesRegex(RuntimeError, "test_jit.py:{}:24".format(lineno + 2)):
3698+
torch.jit.script(FooBar)
3699+
36833700
def test_tensor_shape(self):
36843701
x = torch.empty(34, 56, 78)
36853702

torch/csrc/jit/ir.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1230,8 +1230,7 @@ void Node::removeFromList() {
12301230
}
12311231

12321232
inline const SourceRange& fakeRange() {
1233-
static SourceRange range(
1234-
std::make_shared<std::string>("<internally-created-node>"), 0, 1);
1233+
static SourceRange range(std::make_shared<Source>(""), 0, 1);
12351234
return range;
12361235
}
12371236

torch/csrc/jit/script/lexer.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,7 @@ struct Token {
368368

369369
struct Lexer {
370370
explicit Lexer(const std::string& str)
371-
: file(std::make_shared<std::string>(str)),
371+
: source(std::make_shared<Source>(str)),
372372
pos(0),
373373
nesting(0),
374374
indent_stack(),
@@ -485,9 +485,9 @@ struct Lexer {
485485
int kind;
486486
size_t start;
487487
size_t length;
488-
AT_ASSERT(file);
488+
AT_ASSERT(source);
489489
if (!shared.match(
490-
*file,
490+
source->text(),
491491
pos,
492492
nesting > 0,
493493
whitespace_token,
@@ -496,14 +496,15 @@ struct Lexer {
496496
&length)) {
497497
expected(
498498
"a valid token",
499-
Token((*file)[start], SourceRange(file, start, start + 1)));
499+
Token(
500+
(source->text())[start], SourceRange(source, start, start + 1)));
500501
}
501-
auto t = Token(kind, SourceRange(file, start, start + length));
502+
auto t = Token(kind, SourceRange(source, start, start + length));
502503
pos = start + length;
503504
return t;
504505
}
505506

506-
std::shared_ptr<std::string> file;
507+
std::shared_ptr<Source> source;
507508
size_t pos;
508509
size_t nesting; // depth of ( [ { nesting...
509510
std::vector<int> indent_stack; // stack of identation level of blocks

torch/csrc/jit/script/python_tree_views.cpp

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,27 +14,44 @@ namespace jit {
1414
namespace script {
1515

1616
struct SourceRangeFactory {
17-
SourceRangeFactory(std::string source)
18-
: source_(std::make_shared<std::string>(std::move(source))) {
19-
size_t pos = 0;
20-
do {
21-
line_len_prefix_sum_.push_back(pos);
22-
pos++;
23-
} while ((pos = source_->find('\n', pos)) != std::string::npos);
24-
}
17+
SourceRangeFactory(
18+
std::string text,
19+
std::string filename,
20+
size_t file_lineno,
21+
size_t leading_whitespace_chars)
22+
: source_(std::make_shared<Source>(
23+
std::move(text),
24+
std::move(filename),
25+
file_lineno)),
26+
leading_whitespace_chars_(leading_whitespace_chars) {}
2527
SourceRange create(int line, int start_col, int end_col) {
28+
size_t start_byte_offset, end_byte_offset;
29+
std::tie(start_byte_offset, end_byte_offset) =
30+
line_col_to_byte_offs(
31+
line,
32+
start_col + leading_whitespace_chars_,
33+
end_col + leading_whitespace_chars_);
34+
return SourceRange(source_, start_byte_offset, end_byte_offset);
35+
}
36+
37+
std::tuple<size_t, size_t> line_col_to_byte_offs(
38+
int line,
39+
int start_col,
40+
int end_col) {
2641
// Python has a weird convention where col_offset points to the column
2742
// *before* the token starts.
2843
start_col++;
2944
end_col++;
3045
// Also, lines are counted from 1.
3146
line--;
32-
auto line_start = line_len_prefix_sum_.at(line);
33-
return SourceRange(source_, line_start + start_col, line_start + end_col);
47+
auto line_start = source_->offset_for_line(line);
48+
return std::make_tuple<size_t, size_t>(
49+
line_start + start_col, line_start + end_col);
3450
}
3551

36-
std::shared_ptr<std::string> source_;
52+
std::shared_ptr<Source> source_;
3753
std::vector<size_t> line_len_prefix_sum_;
54+
size_t leading_whitespace_chars_;
3855
};
3956

4057
template <typename T>
@@ -65,15 +82,15 @@ void initTreeViewBindings(PyObject* module) {
6582
.def_property_readonly("start", &SourceRange::start)
6683
.def_property_readonly("end", &SourceRange::end);
6784
py::class_<SourceRangeFactory>(m, "SourceRangeFactory")
68-
.def(py::init<std::string&&>())
85+
.def(py::init<std::string&&, std::string&&, size_t, size_t>())
6986
.def("make_range", &SourceRangeFactory::create)
7087
.def(
7188
"make_raw_range",
7289
[](const SourceRangeFactory& self, size_t start, size_t end) {
7390
return SourceRange(self.source_, start, end);
7491
})
7592
.def_property_readonly("source", [](const SourceRangeFactory& self) {
76-
return *self.source_;
93+
return self.source_->text();
7794
});
7895

7996
py::class_<TreeView>(m, "TreeView")

torch/csrc/jit/script/tree.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ static SourceRange mergeRanges(SourceRange c, const TreeList& others) {
128128
continue;
129129
size_t s = std::min(c.start(), t->range().start());
130130
size_t e = std::max(c.end(), t->range().end());
131-
c = SourceRange(c.file_ptr(), s, e);
131+
c = SourceRange(c.source(), s, e);
132132
}
133133
return c;
134134
}

torch/csrc/jit/source_range.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,14 @@ namespace jit {
55

66
// a range of a shared string 'file_' with
77
C10_EXPORT void SourceRange::highlight(std::ostream& out) const {
8-
if (size() == file_->size()) {
8+
const std::string& str = source_->text();
9+
if (size() == str.size()) {
910
// this is just the entire file, not a subset, so print it out.
1011
// primarily used to print out python stack traces
11-
out << *file_;
12+
out << str;
1213
return;
1314
}
1415

15-
const std::string& str = file();
1616
size_t begin_line = start(); // beginning of line to highlight
1717
size_t end_line = start(); // end of line to highlight
1818
while (begin_line > 0 && str[begin_line - 1] != '\n')
@@ -42,6 +42,14 @@ C10_EXPORT void SourceRange::highlight(std::ostream& out) const {
4242
}
4343
AT_ASSERT(end_highlight == str.size() || str[end_highlight] == '\n');
4444

45+
if (source_->filename()) {
46+
auto lineno = source_->lineno_for_offset(start());
47+
auto col_offset = (int)start() -
48+
(int)source_->offset_for_line(lineno);
49+
out << "at " << *source_->filename() << ":"
50+
<< source_->lineno_to_source_lineno(lineno) << ":" << col_offset
51+
<< "\n";
52+
}
4553
out << str.substr(begin_highlight, end_line - begin_highlight) << "\n";
4654
out << std::string(start() - begin_line, ' ');
4755
size_t len = std::min(size(), end_line - start());

torch/csrc/jit/source_range.h

Lines changed: 87 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,109 @@
11
#pragma once
22
#include <c10/util/Exception.h>
3+
#include <c10/util/Optional.h>
34

45
#include <algorithm>
56
#include <memory>
67
#include <iostream>
78
namespace torch {
89
namespace jit {
910

10-
// a range of a shared string 'file_' with functions to help debug by highlight
11-
// that
12-
// range.
11+
// Source represents a code segment. It keeps track of:
12+
// - text : the text of the code segment
13+
// - filename (optional) : if present, represents the name of the file from
14+
// which the code semgemnt originated.
15+
// - starting_line_no : represents the line in the original file where the
16+
// code segment started.
17+
struct Source {
18+
explicit Source(std::string text)
19+
: text_(std::move(text)), filename_(c10::nullopt) {
20+
calc_line_start_offsets();
21+
}
22+
23+
Source(
24+
std::string text,
25+
c10::optional<std::string> filename,
26+
size_t starting_line_no)
27+
: text_(std::move(text)),
28+
filename_(std::move(filename)),
29+
starting_line_no_(starting_line_no) {
30+
calc_line_start_offsets();
31+
}
32+
33+
// Given a line number (within source_), return the byte offset of the
34+
// beginning of that line.
35+
size_t offset_for_line(size_t line) const {
36+
return line_starting_offsets_.at(line);
37+
}
38+
39+
// Calculate the line (within the code segment) on which `offset` resides.
40+
size_t lineno_for_offset(size_t offset) const {
41+
return std::upper_bound(
42+
line_starting_offsets_.begin(),
43+
line_starting_offsets_.end(),
44+
offset) -
45+
line_starting_offsets_.begin() - 1;
46+
}
47+
48+
// Calculate the line (within the original source file, if present) on which
49+
// `lineno` resides.
50+
size_t lineno_to_source_lineno(size_t lineno) const {
51+
if (filename_) {
52+
return lineno + starting_line_no_;
53+
} else {
54+
return lineno;
55+
}
56+
}
57+
58+
const std::string& text() const {
59+
return text_;
60+
}
61+
62+
const c10::optional<std::string>& filename() const {
63+
return filename_;
64+
}
65+
66+
size_t starting_line_no() const {
67+
return starting_line_no_;
68+
}
69+
70+
private:
71+
void calc_line_start_offsets() {
72+
size_t pos = 0;
73+
do {
74+
line_starting_offsets_.push_back(pos);
75+
pos++;
76+
} while ((pos = text_.find('\n', pos)) != std::string::npos);
77+
}
78+
std::string text_;
79+
c10::optional<std::string> filename_;
80+
// If filename_ is not present, starting_line_no_ is don't care
81+
size_t starting_line_no_;
82+
// Starting offsets for lines into the source. e.g. line 0 starts at
83+
// line_starting_offsets_[0], etc.
84+
std::vector<size_t> line_starting_offsets_;
85+
};
86+
87+
// A SourceRange is a view into a Source, that points to a subset of the source,
88+
// specified by `start` and `end` byte offsets into the source text.
1389
struct CAFFE2_API SourceRange {
14-
SourceRange(std::shared_ptr<std::string> file_, size_t start_, size_t end_)
15-
: file_(std::move(file_)), start_(start_), end_(end_) {}
90+
SourceRange(std::shared_ptr<Source> source_, size_t start_, size_t end_)
91+
: source_(std::move(source_)), start_(start_), end_(end_) {}
1692
explicit SourceRange(std::string string_range)
17-
: file_(std::make_shared<std::string>(std::move(string_range))),
93+
: source_(std::make_shared<Source>(std::move(string_range))),
1894
start_(0),
19-
end_(file_->size()) {}
95+
end_(source_->text().size()) {}
2096

2197
const std::string text() const {
22-
return file().substr(start(), end() - start());
98+
return source_->text().substr(start(), end() - start());
2399
}
24100
size_t size() const {
25101
return end() - start();
26102
}
27103
static const size_t CONTEXT = 10;
28104
void highlight(std::ostream& out) const;
29-
const std::string& file() const {
30-
return *file_;
31-
}
32-
const std::shared_ptr<std::string>& file_ptr() const {
33-
return file_;
105+
const std::shared_ptr<Source>& source() const {
106+
return source_;
34107
}
35108
size_t start() const {
36109
return start_;
@@ -68,7 +141,7 @@ struct CAFFE2_API SourceRange {
68141
}
69142

70143
private:
71-
std::shared_ptr<std::string> file_;
144+
std::shared_ptr<Source> source_;
72145
size_t start_;
73146
size_t end_;
74147
};

0 commit comments

Comments
 (0)