Skip to content

Commit b03bdfa

Browse files
committed
Import Clang ast dump (experimental)
1 parent 84cb5fe commit b03bdfa

13 files changed

Lines changed: 441 additions & 5 deletions

Makefile

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ LIBOBJ = $(libcppdir)/analyzerinfo.o \
184184
$(libcppdir)/checkunusedfunctions.o \
185185
$(libcppdir)/checkunusedvar.o \
186186
$(libcppdir)/checkvaarg.o \
187+
$(libcppdir)/clangastdump.o \
187188
$(libcppdir)/cppcheck.o \
188189
$(libcppdir)/ctu.o \
189190
$(libcppdir)/errorlogger.o \
@@ -479,7 +480,10 @@ $(libcppdir)/checkunusedvar.o: lib/checkunusedvar.cpp lib/astutils.h lib/check.h
479480
$(libcppdir)/checkvaarg.o: lib/checkvaarg.cpp lib/astutils.h lib/check.h lib/checkvaarg.h lib/config.h lib/errorlogger.h lib/importproject.h lib/library.h lib/mathlib.h lib/platform.h lib/settings.h lib/standards.h lib/suppressions.h lib/symboldatabase.h lib/templatesimplifier.h lib/timer.h lib/token.h lib/tokenize.h lib/tokenlist.h lib/utils.h lib/valueflow.h
480481
$(CXX) ${INCLUDE_FOR_LIB} $(CPPFLAGS) $(CPPFILESDIR) $(CXXFLAGS) $(UNDEF_STRICT_ANSI) -c -o $(libcppdir)/checkvaarg.o $(libcppdir)/checkvaarg.cpp
481482

482-
$(libcppdir)/cppcheck.o: lib/cppcheck.cpp externals/picojson.h externals/simplecpp/simplecpp.h externals/tinyxml/tinyxml2.h lib/analyzerinfo.h lib/check.h lib/checkunusedfunctions.h lib/config.h lib/cppcheck.h lib/ctu.h lib/errorlogger.h lib/exprengine.h lib/importproject.h lib/library.h lib/mathlib.h lib/path.h lib/platform.h lib/preprocessor.h lib/settings.h lib/standards.h lib/suppressions.h lib/templatesimplifier.h lib/timer.h lib/token.h lib/tokenize.h lib/tokenlist.h lib/utils.h lib/valueflow.h lib/version.h
483+
$(libcppdir)/clangastdump.o: lib/clangastdump.cpp lib/clangastdump.h lib/config.h lib/errorlogger.h lib/library.h lib/mathlib.h lib/platform.h lib/standards.h lib/suppressions.h lib/symboldatabase.h lib/templatesimplifier.h lib/token.h lib/tokenize.h lib/tokenlist.h lib/utils.h lib/valueflow.h
484+
$(CXX) ${INCLUDE_FOR_LIB} $(CPPFLAGS) $(CPPFILESDIR) $(CXXFLAGS) $(UNDEF_STRICT_ANSI) -c -o $(libcppdir)/clangastdump.o $(libcppdir)/clangastdump.cpp
485+
486+
$(libcppdir)/cppcheck.o: lib/cppcheck.cpp externals/picojson.h externals/simplecpp/simplecpp.h externals/tinyxml/tinyxml2.h lib/analyzerinfo.h lib/check.h lib/checkunusedfunctions.h lib/clangastdump.h lib/config.h lib/cppcheck.h lib/ctu.h lib/errorlogger.h lib/exprengine.h lib/importproject.h lib/library.h lib/mathlib.h lib/path.h lib/platform.h lib/preprocessor.h lib/settings.h lib/standards.h lib/suppressions.h lib/templatesimplifier.h lib/timer.h lib/token.h lib/tokenize.h lib/tokenlist.h lib/utils.h lib/valueflow.h lib/version.h
483487
$(CXX) ${INCLUDE_FOR_LIB} $(CPPFLAGS) $(CPPFILESDIR) $(CXXFLAGS) $(UNDEF_STRICT_ANSI) -c -o $(libcppdir)/cppcheck.o $(libcppdir)/cppcheck.cpp
484488

485489
$(libcppdir)/ctu.o: lib/ctu.cpp externals/tinyxml/tinyxml2.h lib/astutils.h lib/check.h lib/config.h lib/ctu.h lib/errorlogger.h lib/importproject.h lib/library.h lib/mathlib.h lib/platform.h lib/settings.h lib/standards.h lib/suppressions.h lib/symboldatabase.h lib/templatesimplifier.h lib/timer.h lib/token.h lib/tokenize.h lib/tokenlist.h lib/utils.h lib/valueflow.h
@@ -488,7 +492,7 @@ $(libcppdir)/ctu.o: lib/ctu.cpp externals/tinyxml/tinyxml2.h lib/astutils.h lib/
488492
$(libcppdir)/errorlogger.o: lib/errorlogger.cpp externals/tinyxml/tinyxml2.h lib/analyzerinfo.h lib/check.h lib/config.h lib/cppcheck.h lib/errorlogger.h lib/importproject.h lib/library.h lib/mathlib.h lib/path.h lib/platform.h lib/settings.h lib/standards.h lib/suppressions.h lib/templatesimplifier.h lib/timer.h lib/token.h lib/tokenize.h lib/tokenlist.h lib/utils.h lib/valueflow.h
489493
$(CXX) ${INCLUDE_FOR_LIB} $(CPPFLAGS) $(CPPFILESDIR) $(CXXFLAGS) $(UNDEF_STRICT_ANSI) -c -o $(libcppdir)/errorlogger.o $(libcppdir)/errorlogger.cpp
490494

491-
$(libcppdir)/exprengine.o: lib/exprengine.cpp lib/astutils.h lib/config.h lib/errorlogger.h lib/exprengine.h lib/importproject.h lib/library.h lib/mathlib.h lib/platform.h lib/settings.h lib/standards.h lib/suppressions.h lib/symboldatabase.h lib/templatesimplifier.h lib/timer.h lib/token.h lib/tokenize.h lib/tokenlist.h lib/utils.h lib/valueflow.h
495+
$(libcppdir)/exprengine.o: lib/exprengine.cpp lib/astutils.h lib/config.h lib/errorlogger.h lib/exprengine.h lib/importproject.h lib/library.h lib/mathlib.h lib/path.h lib/platform.h lib/settings.h lib/standards.h lib/suppressions.h lib/symboldatabase.h lib/templatesimplifier.h lib/timer.h lib/token.h lib/tokenize.h lib/tokenlist.h lib/utils.h lib/valueflow.h
492496
$(CXX) ${INCLUDE_FOR_LIB} $(CPPFLAGS) $(CPPFILESDIR) $(CXXFLAGS) $(UNDEF_STRICT_ANSI) -c -o $(libcppdir)/exprengine.o $(libcppdir)/exprengine.cpp
493497

494498
$(libcppdir)/importproject.o: lib/importproject.cpp externals/picojson.h externals/tinyxml/tinyxml2.h lib/config.h lib/errorlogger.h lib/importproject.h lib/library.h lib/mathlib.h lib/path.h lib/platform.h lib/settings.h lib/standards.h lib/suppressions.h lib/templatesimplifier.h lib/timer.h lib/token.h lib/tokenize.h lib/tokenlist.h lib/utils.h lib/valueflow.h

cli/cmdlineparser.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,9 @@ bool CmdLineParser::parseFromArgs(int argc, const char* const argv[])
133133
else if (std::strncmp(argv[i], "--addon=", 8) == 0)
134134
mSettings->addons.emplace_back(argv[i]+8);
135135

136+
else if (std::strcmp(argv[i], "--clang") == 0)
137+
mSettings->clang = true;
138+
136139
else if (std::strncmp(argv[i], "--cppcheck-build-dir=", 21) == 0) {
137140
mSettings->buildDir = Path::fromNativeSeparators(argv[i] + 21);
138141
if (endsWith(mSettings->buildDir, '/'))

lib/clangastdump.cpp

Lines changed: 306 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,306 @@
1+
/*
2+
* Cppcheck - A tool for static C/C++ code analysis
3+
* Copyright (C) 2007-2019 Cppcheck team.
4+
*
5+
* This program is free software: you can redistribute it and/or modify
6+
* it under the terms of the GNU General Public License as published by
7+
* the Free Software Foundation, either version 3 of the License, or
8+
* (at your option) any later version.
9+
*
10+
* This program is distributed in the hope that it will be useful,
11+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
* GNU General Public License for more details.
14+
*
15+
* You should have received a copy of the GNU General Public License
16+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
17+
*/
18+
19+
#include "clangastdump.h"
20+
#include "symboldatabase.h"
21+
#include "tokenize.h"
22+
23+
#include <memory>
24+
#include <vector>
25+
#include <iostream>
26+
27+
static const std::string BinaryOperator = "BinaryOperator";
28+
static const std::string CallExpr = "CallExpr";
29+
static const std::string CompoundStmt = "CompoundStmt";
30+
static const std::string DeclRefExpr = "DeclRefExpr";
31+
static const std::string FunctionDecl = "FunctionDecl";
32+
static const std::string ImplicitCastExpr = "ImplicitCastExpr";
33+
static const std::string IntegerLiteral = "IntegerLiteral";
34+
static const std::string ParmVarDecl = "ParmVarDecl";
35+
static const std::string ReturnStmt = "ReturnStmt";
36+
static const std::string UnaryOperator = "UnaryOperator";
37+
38+
static std::string unquote(const std::string &s)
39+
{
40+
return (s[0] == '\'') ? s.substr(1, s.size() - 2) : s;
41+
}
42+
43+
static std::vector<std::string> splitString(const std::string &line)
44+
{
45+
std::vector<std::string> ret;
46+
std::string::size_type pos1 = line.find_first_not_of(" ");
47+
while (pos1 != std::string::npos) {
48+
std::string::size_type pos2;
49+
if (line[pos1] == '<')
50+
pos2 = line.find(">", pos1);
51+
else if (line[pos1] == '\'')
52+
pos2 = line.find("\'", pos1+1);
53+
else
54+
pos2 = line.find(" ", pos1) - 1;
55+
ret.push_back(line.substr(pos1, pos2+1-pos1));
56+
if (pos2 == std::string::npos)
57+
break;
58+
pos1 = line.find_first_not_of(" ", pos2 + 1);
59+
}
60+
return ret;
61+
}
62+
63+
namespace clangastdump {
64+
struct Data {
65+
std::map<std::string, int> varId;
66+
std::map<std::string, Variable *> variableMap;
67+
};
68+
69+
class AstNode {
70+
public:
71+
AstNode(const std::string &nodeType, const std::string &ext, Data *data, SymbolDatabase *symbolDatabase)
72+
: nodeType(nodeType), mExtTokens(splitString(ext)), mData(data), mSymbolDatabase(symbolDatabase)
73+
{}
74+
std::string nodeType;
75+
std::vector<std::shared_ptr<AstNode>> children;
76+
77+
void setLocations(TokenList *tokenList, int file, int line, int col);
78+
79+
void dumpAst(int num = 0, int indent = 0) const;
80+
Token *createTokens(TokenList *tokenList);
81+
private:
82+
Token *addtoken(TokenList *tokenList, const std::string &str);
83+
Token *addTypeTokens(TokenList *tokenList, const std::string &str);
84+
std::string getSpelling() const;
85+
std::string getType() const;
86+
87+
int mFile = 0;
88+
int mLine = 1;
89+
int mCol = 1;
90+
int mVarId = 0;
91+
std::vector<std::string> mExtTokens;
92+
Data *mData;
93+
SymbolDatabase *mSymbolDatabase;
94+
};
95+
96+
typedef std::shared_ptr<AstNode> AstNodePtr;
97+
}
98+
99+
std::string clangastdump::AstNode::getSpelling() const
100+
{
101+
if (nodeType == ParmVarDecl)
102+
return mExtTokens[mExtTokens.size() - 2];
103+
return "";
104+
}
105+
106+
std::string clangastdump::AstNode::getType() const
107+
{
108+
if (nodeType == DeclRefExpr)
109+
return unquote(mExtTokens.back());
110+
if (nodeType == BinaryOperator)
111+
return unquote(mExtTokens[mExtTokens.size() - 2]);
112+
if (nodeType == IntegerLiteral)
113+
return unquote(mExtTokens[mExtTokens.size() - 2]);
114+
return "";
115+
}
116+
117+
void clangastdump::AstNode::dumpAst(int num, int indent) const
118+
{
119+
(void)num;
120+
std::cout << std::string(indent, ' ') << nodeType;
121+
for (auto tok: mExtTokens)
122+
std::cout << " " << tok;
123+
std::cout << std::endl;
124+
for (int c = 0; c < children.size(); ++c)
125+
children[c]->dumpAst(c, indent + 2);
126+
}
127+
128+
void clangastdump::AstNode::setLocations(TokenList *tokenList, int file, int line, int col)
129+
{
130+
for (const std::string &ext: mExtTokens) {
131+
if (ext.compare(0,5,"<col:") == 0)
132+
col = std::atoi(ext.substr(5).c_str());
133+
else if (ext.compare(0,6,"<line:") == 0)
134+
line = std::atoi(ext.substr(6).c_str());
135+
else if (ext[0] == '<')
136+
file = tokenList->appendFileIfNew(ext.substr(1,ext.find(":") - 1));
137+
}
138+
mFile = file;
139+
mLine = line;
140+
mCol = col;
141+
for (auto child: children)
142+
child->setLocations(tokenList, file, line, col);
143+
}
144+
145+
Token *clangastdump::AstNode::addtoken(TokenList *tokenList, const std::string &str)
146+
{
147+
tokenList->addtoken(str, mLine, mFile);
148+
if (getType() == "int")
149+
tokenList->back()->setValueType(new ValueType(ValueType::Sign::SIGNED, ValueType::Type::INT, 0));
150+
return tokenList->back();
151+
}
152+
153+
Token *clangastdump::AstNode::addTypeTokens(TokenList *tokenList, const std::string &str)
154+
{
155+
if (str.find(" (") == std::string::npos)
156+
return addtoken(tokenList, unquote(str));
157+
return addtoken(tokenList, str.substr(1,str.find(" (")-1));
158+
}
159+
160+
Token *clangastdump::AstNode::createTokens(TokenList *tokenList)
161+
{
162+
if (nodeType == BinaryOperator) {
163+
Token *tok1 = children[0]->createTokens(tokenList);
164+
Token *binop = addtoken(tokenList, unquote(mExtTokens.back()));
165+
Token *tok2 = children[1]->createTokens(tokenList);
166+
binop->astOperand1(tok1);
167+
binop->astOperand2(tok2);
168+
return binop;
169+
}
170+
if (nodeType == CallExpr) {
171+
Token *op1 = children[0]->createTokens(tokenList);
172+
Token *call = addtoken(tokenList, "(");
173+
call->astOperand1(op1);
174+
for (int c = 1; c < children.size(); ++c)
175+
call->astOperand2(children[c]->createTokens(tokenList));
176+
call->link(addtoken(tokenList, ")"));
177+
return call;
178+
}
179+
if (nodeType == CompoundStmt) {
180+
Token *start = addtoken(tokenList, "{");
181+
for (AstNodePtr child: children) {
182+
child->createTokens(tokenList);
183+
child->addtoken(tokenList, ";");
184+
}
185+
Token *end = addtoken(tokenList, "}");
186+
start->link(end);
187+
return start;
188+
}
189+
if (nodeType == DeclRefExpr) {
190+
Token *vartok = addtoken(tokenList, unquote(mExtTokens[mExtTokens.size() - 2]));
191+
std::string addr = mExtTokens[mExtTokens.size() - 3];
192+
vartok->varId(mData->varId[addr]);
193+
vartok->variable(mData->variableMap[addr]);
194+
return vartok;
195+
}
196+
if (nodeType == FunctionDecl) {
197+
addTypeTokens(tokenList, mExtTokens.back());
198+
Token *nameToken = addtoken(tokenList, mExtTokens[mExtTokens.size() - 2]);
199+
Scope &globalScope = mSymbolDatabase->scopeList.front();
200+
mSymbolDatabase->scopeList.push_back(Scope(nullptr, nullptr, &globalScope));
201+
Scope &scope = mSymbolDatabase->scopeList.back();
202+
mSymbolDatabase->functionScopes.push_back(&scope);
203+
globalScope.functionList.push_back(Function(nameToken));
204+
scope.function = &globalScope.functionList.back();
205+
Token *par1 = addtoken(tokenList, "(");
206+
for (AstNodePtr child: children) {
207+
if (child->nodeType != ParmVarDecl)
208+
continue;
209+
if (tokenList->back() != par1)
210+
addtoken(tokenList, ",");
211+
addTypeTokens(tokenList, child->mExtTokens.back());
212+
const std::string spelling = child->getSpelling();
213+
if (!spelling.empty()) {
214+
Token *vartok = addtoken(tokenList, spelling);
215+
std::string addr = child->mExtTokens[0];
216+
int varId = mData->varId.size() + 1;
217+
mData->varId[addr] = varId;
218+
vartok->varId(varId);
219+
scope.function->argumentList.push_back(Variable(vartok, nullptr, nullptr, varId, AccessControl::Argument, nullptr, nullptr, nullptr));
220+
Variable *var = &scope.function->argumentList.back();
221+
mData->variableMap[addr] = var;
222+
vartok->variable(var);
223+
var->setValueType(ValueType(ValueType::Sign::SIGNED, ValueType::Type::INT, 0));
224+
}
225+
}
226+
Token *par2 = addtoken(tokenList, ")");
227+
par1->link(par2);
228+
children.back()->createTokens(tokenList);
229+
if (Token::simpleMatch(par2, ") {")) {
230+
Token *bodyStart = par2->next();
231+
scope.bodyStart = bodyStart;
232+
scope.bodyEnd = bodyStart->link();
233+
}
234+
return nullptr;
235+
}
236+
if (nodeType == ImplicitCastExpr)
237+
return children[0]->createTokens(tokenList);
238+
if (nodeType == IntegerLiteral)
239+
return addtoken(tokenList, mExtTokens.back());
240+
if (nodeType == ReturnStmt) {
241+
Token *tok1 = addtoken(tokenList, "return");
242+
if (!children.empty())
243+
tok1->astOperand1(children[0]->createTokens(tokenList));
244+
return tok1;
245+
}
246+
if (nodeType == UnaryOperator) {
247+
Token *unop = addtoken(tokenList, unquote(mExtTokens.back()));
248+
unop->astOperand1(children[0]->createTokens(tokenList));
249+
return unop;
250+
}
251+
return addtoken(tokenList, "?" + nodeType + "?");
252+
}
253+
254+
void clangastdump::parseClangAstDump(Tokenizer *tokenizer, std::istream &f)
255+
{
256+
TokenList *tokenList = &tokenizer->list;
257+
clangastdump::Data data;
258+
259+
tokenizer->createSymbolDatabase();
260+
SymbolDatabase *symbolDatabase = const_cast<SymbolDatabase *>(tokenizer->getSymbolDatabase());
261+
symbolDatabase->scopeList.push_back(Scope(nullptr, nullptr, nullptr));
262+
symbolDatabase->scopeList.back().type = Scope::ScopeType::eGlobal;
263+
264+
std::string line;
265+
std::vector<AstNodePtr> tree;
266+
while (std::getline(f,line)) {
267+
const std::string::size_type pos1 = line.find("-");
268+
if (pos1 == std::string::npos)
269+
continue;
270+
const std::string::size_type pos2 = line.find(" ", pos1);
271+
if (pos2 < pos1 + 4 || pos2 == std::string::npos)
272+
continue;
273+
const std::string nodeType = line.substr(pos1+1, pos2 - pos1 - 1);
274+
const std::string ext = line.substr(pos2);
275+
276+
if (nodeType == FunctionDecl) {
277+
if (!tree.empty()) {
278+
tree[0]->setLocations(tokenList, 0, 1, 1);
279+
tree[0]->createTokens(tokenList);
280+
}
281+
tree.clear();
282+
tree.push_back(std::make_shared<AstNode>(nodeType, ext, &data, symbolDatabase));
283+
continue;
284+
}
285+
286+
const int level = (pos1 - 1) / 2;
287+
if (level == 0 || tree.empty())
288+
continue;
289+
290+
AstNodePtr newNode = std::make_shared<AstNode>(nodeType, ext, &data, symbolDatabase);
291+
tree[level - 1]->children.push_back(newNode);
292+
if (level >= tree.size())
293+
tree.push_back(newNode);
294+
else
295+
tree[level] = newNode;
296+
}
297+
298+
if (!tree.empty()) {
299+
tree[0]->setLocations(tokenList, 0, 1, 1);
300+
tree[0]->createTokens(tokenList);
301+
}
302+
303+
symbolDatabase->clangSetVariables(data.variableMap);
304+
tokenList->clangSetOrigFiles();
305+
}
306+

lib/clangastdump.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*
2+
* Cppcheck - A tool for static C/C++ code analysis
3+
* Copyright (C) 2007-2020 Cppcheck team.
4+
*
5+
* This program is free software: you can redistribute it and/or modify
6+
* it under the terms of the GNU General Public License as published by
7+
* the Free Software Foundation, either version 3 of the License, or
8+
* (at your option) any later version.
9+
*
10+
* This program is distributed in the hope that it will be useful,
11+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
* GNU General Public License for more details.
14+
*
15+
* You should have received a copy of the GNU General Public License
16+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
17+
*/
18+
19+
20+
//---------------------------------------------------------------------------
21+
#ifndef clangastdumpH
22+
#define clangastdumpH
23+
//---------------------------------------------------------------------------
24+
25+
#include <string>
26+
#include <vector>
27+
28+
class SymbolDatabase;
29+
class Tokenizer;
30+
31+
namespace clangastdump {
32+
33+
void parseClangAstDump(Tokenizer *tokenizer, std::istream &f);
34+
35+
}
36+
37+
#endif

0 commit comments

Comments
 (0)