1616
1717import static com .google .common .base .Preconditions .checkNotNull ;
1818import static com .google .common .collect .Iterables .getLast ;
19+ import static java .nio .charset .StandardCharsets .UTF_8 ;
1920
2021import com .google .common .base .MoreObjects ;
2122import com .google .common .base .Verify ;
3132import com .google .common .collect .TreeRangeSet ;
3233import com .google .googlejavaformat .Input ;
3334import com .google .googlejavaformat .Newlines ;
35+ import com .google .googlejavaformat .java .JavacTokens .RawTok ;
36+ import com .sun .tools .javac .file .JavacFileManager ;
37+ import com .sun .tools .javac .parser .Tokens .TokenKind ;
3438import com .sun .tools .javac .tree .JCTree .JCCompilationUnit ;
39+ import com .sun .tools .javac .util .Context ;
40+ import com .sun .tools .javac .util .Log ;
41+ import com .sun .tools .javac .util .Log .DeferredDiagnosticHandler ;
42+ import java .io .IOException ;
43+ import java .net .URI ;
3544import java .util .ArrayList ;
3645import java .util .Collection ;
3746import java .util .Iterator ;
3847import java .util .List ;
39- import org .eclipse .jdt .core .ToolFactory ;
40- import org .eclipse .jdt .core .compiler .IScanner ;
41- import org .eclipse .jdt .core .compiler .ITerminalSymbols ;
42- import org .eclipse .jdt .core .compiler .InvalidInputException ;
48+ import javax .tools .Diagnostic ;
49+ import javax .tools .DiagnosticCollector ;
50+ import javax .tools .DiagnosticListener ;
51+ import javax .tools .JavaFileObject ;
52+ import javax .tools .JavaFileObject .Kind ;
53+ import javax .tools .SimpleJavaFileObject ;
4354
4455/** {@code JavaInput} extends {@link Input} to represent a Java input document. */
4556public final class JavaInput extends Input {
@@ -63,7 +74,7 @@ static final class Tok implements Input.Tok {
6374 private final int position ;
6475 private final int columnI ;
6576 private final boolean isToken ;
66- private final int id ;
77+ private final TokenKind kind ;
6778
6879 /**
6980 * The {@code Tok} constructor.
@@ -74,7 +85,7 @@ static final class Tok implements Input.Tok {
7485 * @param position its {@code 0}-origin position in the input
7586 * @param columnI its {@code 0}-origin column number in the input
7687 * @param isToken whether the {@code Tok} is a token
77- * @param id the token id as defined by {@link org.eclipse.jdt.core.compiler.ITerminalSymbols}
88+ * @param kind the token kind
7889 */
7990 Tok (
8091 int index ,
@@ -83,14 +94,14 @@ static final class Tok implements Input.Tok {
8394 int position ,
8495 int columnI ,
8596 boolean isToken ,
86- int id ) {
97+ TokenKind kind ) {
8798 this .index = index ;
8899 this .originalText = originalText ;
89100 this .text = text ;
90101 this .position = position ;
91102 this .columnI = columnI ;
92103 this .isToken = isToken ;
93- this .id = id ;
104+ this .kind = kind ;
94105 }
95106
96107 @ Override
@@ -163,12 +174,8 @@ public String toString() {
163174 .toString ();
164175 }
165176
166- /**
167- * The token id used by the eclipse scanner. See {@link
168- * org.eclipse.jdt.core.compiler.ITerminalSymbols} for possible values.
169- */
170- public int id () {
171- return id ;
177+ public TokenKind kind () {
178+ return kind ;
172179 }
173180 }
174181
@@ -322,47 +329,54 @@ public ImmutableMap<Integer, Integer> getPositionToColumnMap() {
322329
323330 /** Lex the input and build the list of toks. */
324331 private ImmutableList <Tok > buildToks (String text ) throws FormatterException {
325- try {
326- ImmutableList <Tok > toks = buildToks (text , ImmutableSet .<Integer >of ());
327- kN = getLast (toks ).getIndex ();
328- computeRanges (toks );
329- return toks ;
330- } catch (InvalidInputException e ) {
331- // jdt's scanner elects not to produce error messages, so we don't either
332- //
333- // problems will get caught (again!) and reported (with error messages!)
334- // during parsing
335- return ImmutableList .of ();
336- }
332+ ImmutableList <Tok > toks = buildToks (text , ImmutableSet .<TokenKind >of ());
333+ kN = getLast (toks ).getIndex ();
334+ computeRanges (toks );
335+ return toks ;
337336 }
338337
339338 /**
340339 * Lex the input and build the list of toks.
341340 *
342341 * @param text the text to be lexed.
343- * @param stopIds a set of Eclipse token names which should cause lexing to stop. If one of these
344- * is found, the returned list will include tokens up to but not including that token.
342+ * @param stopTokens a set of tokens which should cause lexing to stop. If one of these is found,
343+ * the returned list will include tokens up to but not including that token.
345344 */
346- static ImmutableList <Tok > buildToks (String text , ImmutableSet <Integer > stopIds )
347- throws InvalidInputException , FormatterException {
348- stopIds =
349- ImmutableSet .<Integer >builder ().addAll (stopIds ).add (ITerminalSymbols .TokenNameEOF ).build ();
345+ static ImmutableList <Tok > buildToks (String text , ImmutableSet <TokenKind > stopTokens )
346+ throws FormatterException {
347+ stopTokens = ImmutableSet .<TokenKind >builder ().addAll (stopTokens ).add (TokenKind .EOF ).build ();
348+ Context context = new Context ();
349+ new JavacFileManager (context , true , UTF_8 );
350+ DiagnosticCollector <JavaFileObject > diagnosticCollector = new DiagnosticCollector <>();
351+ context .put (DiagnosticListener .class , diagnosticCollector );
352+ Log log = Log .instance (context );
353+ log .useSource (
354+ new SimpleJavaFileObject (URI .create ("Source.java" ), Kind .SOURCE ) {
355+ @ Override
356+ public CharSequence getCharContent (boolean ignoreEncodingErrors ) throws IOException {
357+ return text ;
358+ }
359+ });
360+ DeferredDiagnosticHandler diagnostics = new DeferredDiagnosticHandler (log );
361+ ImmutableList <RawTok > rawToks = JavacTokens .getTokens (text , context , stopTokens );
362+ if (diagnostics .getDiagnostics ().stream ().anyMatch (d -> d .getKind () == Diagnostic .Kind .ERROR )) {
363+ return ImmutableList .of (new Tok (0 , "" , "" , 0 , 0 , true , null )); // EOF
364+ }
350365 int kN = 0 ;
351- IScanner scanner = ToolFactory .createScanner (true , true , true , "1.8" );
352- scanner .setSource (text .toCharArray ());
353- int textLength = text .length ();
354366 List <Tok > toks = new ArrayList <>();
355367 int charI = 0 ;
356368 int columnI = 0 ;
357- while (scanner .getCurrentTokenEndPosition () < textLength - 1 ) {
358- int tokenId = scanner .getNextToken ();
359- if (stopIds .contains (tokenId )) {
369+ for (RawTok t : rawToks ) {
370+ if (stopTokens .contains (t .kind ())) {
360371 break ;
361372 }
362- int charI0 = scanner . getCurrentTokenStartPosition ();
373+ int charI0 = t . pos ();
363374 // Get string, possibly with Unicode escapes.
364- String originalTokText = text .substring (charI0 , scanner .getCurrentTokenEndPosition () + 1 );
365- String tokText = new String (scanner .getCurrentTokenSource ()); // Unicode escapes removed.
375+ String originalTokText = text .substring (charI0 , t .endPos ());
376+ String tokText =
377+ t .kind () == TokenKind .STRINGLITERAL
378+ ? t .stringVal () // Unicode escapes removed.
379+ : originalTokText ;
366380 char tokText0 = tokText .charAt (0 ); // The token's first character.
367381 final boolean isToken ; // Is this tok a token?
368382 final boolean isNumbered ; // Is this tok numbered? (tokens and comments)
@@ -427,7 +441,7 @@ static ImmutableList<Tok> buildToks(String text, ImmutableSet<Integer> stopIds)
427441 charI ,
428442 columnI ,
429443 isToken ,
430- tokenId ));
444+ t . kind () ));
431445 charI += originalTokText .length ();
432446 columnI = updateColumn (columnI , originalTokText );
433447
@@ -437,18 +451,18 @@ static ImmutableList<Tok> buildToks(String text, ImmutableSet<Integer> stopIds)
437451 "Unicode escapes not allowed in whitespace or multi-character operators" );
438452 }
439453 for (String str : strings ) {
440- toks .add (new Tok (isNumbered ? kN ++ : -1 , str , str , charI , columnI , isToken , tokenId ));
454+ toks .add (new Tok (isNumbered ? kN ++ : -1 , str , str , charI , columnI , isToken , null ));
441455 charI += str .length ();
442456 columnI = updateColumn (columnI , originalTokText );
443457 }
444458 }
445459 if (extraNewline != null ) {
446- toks .add (new Tok (-1 , extraNewline , extraNewline , charI , columnI , false , tokenId ));
460+ toks .add (new Tok (-1 , extraNewline , extraNewline , charI , columnI , false , null ));
447461 columnI = 0 ;
448462 charI += extraNewline .length ();
449463 }
450464 }
451- toks .add (new Tok (kN , "" , "" , charI , columnI , true , ITerminalSymbols . TokenNameEOF )); // EOF tok.
465+ toks .add (new Tok (kN , "" , "" , charI , columnI , true , null )); // EOF tok.
452466 return ImmutableList .copyOf (toks );
453467 }
454468
0 commit comments