Skip to content

Commit 946e01c

Browse files
committed
Migrate lexing to javac
and complete the migration off ecj. MOE_MIGRATED_REVID=139931395
1 parent ce7852b commit 946e01c

File tree

7 files changed

+292
-106
lines changed

7 files changed

+292
-106
lines changed

core/pom.xml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,6 @@
3939
<groupId>com.google.guava</groupId>
4040
<artifactId>guava</artifactId>
4141
</dependency>
42-
<dependency>
43-
<groupId>org.eclipse.jdt</groupId>
44-
<artifactId>org.eclipse.jdt.core</artifactId>
45-
</dependency>
4642
<dependency>
4743
<groupId>com.google.errorprone</groupId>
4844
<artifactId>javac</artifactId>

core/src/main/java/com/google/googlejavaformat/java/ImportOrderer.java

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,43 +14,34 @@
1414
package com.google.googlejavaformat.java;
1515

1616
import static com.google.common.collect.Iterables.getLast;
17-
import static org.eclipse.jdt.core.compiler.ITerminalSymbols.TokenNameclass;
18-
import static org.eclipse.jdt.core.compiler.ITerminalSymbols.TokenNameenum;
19-
import static org.eclipse.jdt.core.compiler.ITerminalSymbols.TokenNameinterface;
2017

2118
import com.google.common.base.Optional;
2219
import com.google.common.collect.ImmutableList;
2320
import com.google.common.collect.ImmutableSet;
2421
import com.google.common.collect.ImmutableSortedSet;
2522
import com.google.googlejavaformat.Newlines;
2623
import com.google.googlejavaformat.java.JavaInput.Tok;
27-
import org.eclipse.jdt.core.compiler.InvalidInputException;
24+
import com.sun.tools.javac.parser.Tokens.TokenKind;
2825

2926
/** Orders imports in Java source code. */
3027
public class ImportOrderer {
3128
/**
32-
* Reorder the inputs in {@code input}, a complete Java program. On success, another complete Java
29+
* Reorder the inputs in {@code text}, a complete Java program. On success, another complete Java
3330
* program is returned, which is the same as the original except the imports are in order.
3431
*
3532
* @throws FormatterException if the input could not be parsed.
3633
*/
3734
public static String reorderImports(String text) throws FormatterException {
38-
ImmutableList<Tok> toks;
39-
try {
40-
toks = JavaInput.buildToks(text, CLASS_START);
41-
} catch (InvalidInputException e) {
42-
// error handling is done during formatting
43-
return text;
44-
}
35+
ImmutableList<Tok> toks = JavaInput.buildToks(text, CLASS_START);
4536
return new ImportOrderer(text, toks).reorderImports();
4637
}
4738

4839
/**
49-
* Eclipse token ids that indicate the start of a type definition. We use this to avoid scanning
40+
* {@link TokenKind}s that indicate the start of a type definition. We use this to avoid scanning
5041
* the whole file, since we know that imports must precede any type definition.
5142
*/
52-
private static final ImmutableSet<Integer> CLASS_START =
53-
ImmutableSet.of(TokenNameclass, TokenNameinterface, TokenNameenum);
43+
private static final ImmutableSet<TokenKind> CLASS_START =
44+
ImmutableSet.of(TokenKind.CLASS, TokenKind.INTERFACE, TokenKind.ENUM);
5445

5546
/**
5647
* We use this set to find the first import, and again to check that there are no imports after

core/src/main/java/com/google/googlejavaformat/java/JavaInput.java

Lines changed: 59 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
import static com.google.common.base.Preconditions.checkNotNull;
1818
import static com.google.common.collect.Iterables.getLast;
19+
import static java.nio.charset.StandardCharsets.UTF_8;
1920

2021
import com.google.common.base.MoreObjects;
2122
import com.google.common.base.Verify;
@@ -31,15 +32,25 @@
3132
import com.google.common.collect.TreeRangeSet;
3233
import com.google.googlejavaformat.Input;
3334
import com.google.googlejavaformat.Newlines;
35+
import com.google.googlejavaformat.java.JavacTokens.RawTok;
36+
import com.sun.tools.javac.file.JavacFileManager;
37+
import com.sun.tools.javac.parser.Tokens.TokenKind;
3438
import com.sun.tools.javac.tree.JCTree.JCCompilationUnit;
39+
import com.sun.tools.javac.util.Context;
40+
import com.sun.tools.javac.util.Log;
41+
import com.sun.tools.javac.util.Log.DeferredDiagnosticHandler;
42+
import java.io.IOException;
43+
import java.net.URI;
3544
import java.util.ArrayList;
3645
import java.util.Collection;
3746
import java.util.Iterator;
3847
import java.util.List;
39-
import org.eclipse.jdt.core.ToolFactory;
40-
import org.eclipse.jdt.core.compiler.IScanner;
41-
import org.eclipse.jdt.core.compiler.ITerminalSymbols;
42-
import org.eclipse.jdt.core.compiler.InvalidInputException;
48+
import javax.tools.Diagnostic;
49+
import javax.tools.DiagnosticCollector;
50+
import javax.tools.DiagnosticListener;
51+
import javax.tools.JavaFileObject;
52+
import javax.tools.JavaFileObject.Kind;
53+
import javax.tools.SimpleJavaFileObject;
4354

4455
/** {@code JavaInput} extends {@link Input} to represent a Java input document. */
4556
public final class JavaInput extends Input {
@@ -63,7 +74,7 @@ static final class Tok implements Input.Tok {
6374
private final int position;
6475
private final int columnI;
6576
private final boolean isToken;
66-
private final int id;
77+
private final TokenKind kind;
6778

6879
/**
6980
* The {@code Tok} constructor.
@@ -74,7 +85,7 @@ static final class Tok implements Input.Tok {
7485
* @param position its {@code 0}-origin position in the input
7586
* @param columnI its {@code 0}-origin column number in the input
7687
* @param isToken whether the {@code Tok} is a token
77-
* @param id the token id as defined by {@link org.eclipse.jdt.core.compiler.ITerminalSymbols}
88+
* @param kind the token kind
7889
*/
7990
Tok(
8091
int index,
@@ -83,14 +94,14 @@ static final class Tok implements Input.Tok {
8394
int position,
8495
int columnI,
8596
boolean isToken,
86-
int id) {
97+
TokenKind kind) {
8798
this.index = index;
8899
this.originalText = originalText;
89100
this.text = text;
90101
this.position = position;
91102
this.columnI = columnI;
92103
this.isToken = isToken;
93-
this.id = id;
104+
this.kind = kind;
94105
}
95106

96107
@Override
@@ -163,12 +174,8 @@ public String toString() {
163174
.toString();
164175
}
165176

166-
/**
167-
* The token id used by the eclipse scanner. See {@link
168-
* org.eclipse.jdt.core.compiler.ITerminalSymbols} for possible values.
169-
*/
170-
public int id() {
171-
return id;
177+
public TokenKind kind() {
178+
return kind;
172179
}
173180
}
174181

@@ -322,47 +329,54 @@ public ImmutableMap<Integer, Integer> getPositionToColumnMap() {
322329

323330
/** Lex the input and build the list of toks. */
324331
private ImmutableList<Tok> buildToks(String text) throws FormatterException {
325-
try {
326-
ImmutableList<Tok> toks = buildToks(text, ImmutableSet.<Integer>of());
327-
kN = getLast(toks).getIndex();
328-
computeRanges(toks);
329-
return toks;
330-
} catch (InvalidInputException e) {
331-
// jdt's scanner elects not to produce error messages, so we don't either
332-
//
333-
// problems will get caught (again!) and reported (with error messages!)
334-
// during parsing
335-
return ImmutableList.of();
336-
}
332+
ImmutableList<Tok> toks = buildToks(text, ImmutableSet.<TokenKind>of());
333+
kN = getLast(toks).getIndex();
334+
computeRanges(toks);
335+
return toks;
337336
}
338337

339338
/**
340339
* Lex the input and build the list of toks.
341340
*
342341
* @param text the text to be lexed.
343-
* @param stopIds a set of Eclipse token names which should cause lexing to stop. If one of these
344-
* is found, the returned list will include tokens up to but not including that token.
342+
* @param stopTokens a set of tokens which should cause lexing to stop. If one of these is found,
343+
* the returned list will include tokens up to but not including that token.
345344
*/
346-
static ImmutableList<Tok> buildToks(String text, ImmutableSet<Integer> stopIds)
347-
throws InvalidInputException, FormatterException {
348-
stopIds =
349-
ImmutableSet.<Integer>builder().addAll(stopIds).add(ITerminalSymbols.TokenNameEOF).build();
345+
static ImmutableList<Tok> buildToks(String text, ImmutableSet<TokenKind> stopTokens)
346+
throws FormatterException {
347+
stopTokens = ImmutableSet.<TokenKind>builder().addAll(stopTokens).add(TokenKind.EOF).build();
348+
Context context = new Context();
349+
new JavacFileManager(context, true, UTF_8);
350+
DiagnosticCollector<JavaFileObject> diagnosticCollector = new DiagnosticCollector<>();
351+
context.put(DiagnosticListener.class, diagnosticCollector);
352+
Log log = Log.instance(context);
353+
log.useSource(
354+
new SimpleJavaFileObject(URI.create("Source.java"), Kind.SOURCE) {
355+
@Override
356+
public CharSequence getCharContent(boolean ignoreEncodingErrors) throws IOException {
357+
return text;
358+
}
359+
});
360+
DeferredDiagnosticHandler diagnostics = new DeferredDiagnosticHandler(log);
361+
ImmutableList<RawTok> rawToks = JavacTokens.getTokens(text, context, stopTokens);
362+
if (diagnostics.getDiagnostics().stream().anyMatch(d -> d.getKind() == Diagnostic.Kind.ERROR)) {
363+
return ImmutableList.of(new Tok(0, "", "", 0, 0, true, null)); // EOF
364+
}
350365
int kN = 0;
351-
IScanner scanner = ToolFactory.createScanner(true, true, true, "1.8");
352-
scanner.setSource(text.toCharArray());
353-
int textLength = text.length();
354366
List<Tok> toks = new ArrayList<>();
355367
int charI = 0;
356368
int columnI = 0;
357-
while (scanner.getCurrentTokenEndPosition() < textLength - 1) {
358-
int tokenId = scanner.getNextToken();
359-
if (stopIds.contains(tokenId)) {
369+
for (RawTok t : rawToks) {
370+
if (stopTokens.contains(t.kind())) {
360371
break;
361372
}
362-
int charI0 = scanner.getCurrentTokenStartPosition();
373+
int charI0 = t.pos();
363374
// Get string, possibly with Unicode escapes.
364-
String originalTokText = text.substring(charI0, scanner.getCurrentTokenEndPosition() + 1);
365-
String tokText = new String(scanner.getCurrentTokenSource()); // Unicode escapes removed.
375+
String originalTokText = text.substring(charI0, t.endPos());
376+
String tokText =
377+
t.kind() == TokenKind.STRINGLITERAL
378+
? t.stringVal() // Unicode escapes removed.
379+
: originalTokText;
366380
char tokText0 = tokText.charAt(0); // The token's first character.
367381
final boolean isToken; // Is this tok a token?
368382
final boolean isNumbered; // Is this tok numbered? (tokens and comments)
@@ -427,7 +441,7 @@ static ImmutableList<Tok> buildToks(String text, ImmutableSet<Integer> stopIds)
427441
charI,
428442
columnI,
429443
isToken,
430-
tokenId));
444+
t.kind()));
431445
charI += originalTokText.length();
432446
columnI = updateColumn(columnI, originalTokText);
433447

@@ -437,18 +451,18 @@ static ImmutableList<Tok> buildToks(String text, ImmutableSet<Integer> stopIds)
437451
"Unicode escapes not allowed in whitespace or multi-character operators");
438452
}
439453
for (String str : strings) {
440-
toks.add(new Tok(isNumbered ? kN++ : -1, str, str, charI, columnI, isToken, tokenId));
454+
toks.add(new Tok(isNumbered ? kN++ : -1, str, str, charI, columnI, isToken, null));
441455
charI += str.length();
442456
columnI = updateColumn(columnI, originalTokText);
443457
}
444458
}
445459
if (extraNewline != null) {
446-
toks.add(new Tok(-1, extraNewline, extraNewline, charI, columnI, false, tokenId));
460+
toks.add(new Tok(-1, extraNewline, extraNewline, charI, columnI, false, null));
447461
columnI = 0;
448462
charI += extraNewline.length();
449463
}
450464
}
451-
toks.add(new Tok(kN, "", "", charI, columnI, true, ITerminalSymbols.TokenNameEOF)); // EOF tok.
465+
toks.add(new Tok(kN, "", "", charI, columnI, true, null)); // EOF tok.
452466
return ImmutableList.copyOf(toks);
453467
}
454468

0 commit comments

Comments
 (0)