Skip to content

Commit b37f452

Browse files
committed
Initial sketch of a Parser class
1 parent 0ca8d56 commit b37f452

2 files changed

Lines changed: 136 additions & 0 deletions

File tree

libraries/rushell/src/Parser.ts

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.
2+
// See LICENSE in the project root for license information.
3+
4+
import { ParseError } from './ParseError';
5+
import { Tokenizer, Token, TokenKind } from './Tokenizer';
6+
import {
7+
AstNode,
8+
AstScript,
9+
AstCommand,
10+
AstCompoundWord,
11+
AstText
12+
} from './AstNode';
13+
14+
// tslint:disable:no-constant-condition
15+
16+
export class Parser {
17+
private readonly _tokenizer: Tokenizer;
18+
private _peekedToken: Token | undefined;
19+
20+
public constructor(tokenizer: Tokenizer) {
21+
this._tokenizer = tokenizer;
22+
this._peekedToken = undefined;
23+
}
24+
25+
public parse(): AstScript {
26+
const script: AstScript = new AstScript();
27+
28+
const startingToken: Token = this._peekToken();
29+
30+
const astCommand: AstCommand | undefined = this._parseCommand();
31+
32+
if (!astCommand) {
33+
throw new ParseError('Expecting a command', startingToken.range);
34+
}
35+
36+
const nextToken: Token = this._peekToken();
37+
38+
if (nextToken.kind !== TokenKind.EndOfInput) {
39+
throw new ParseError(`Unexpected token: ${TokenKind[nextToken.kind]}`, nextToken.range);
40+
}
41+
42+
script.body = astCommand;
43+
44+
return script;
45+
}
46+
47+
private _parseCommand(): AstCommand | undefined {
48+
this._skipWhitespace();
49+
50+
const startingToken: Token = this._peekToken();
51+
52+
const command: AstCommand = new AstCommand();
53+
command.commandPath = this._parseCompoundWord();
54+
if (!command.commandPath) {
55+
throw new ParseError('Expecting a command path', startingToken.range);
56+
}
57+
58+
while (this._skipWhitespace()) {
59+
const compoundWord: AstCompoundWord | undefined = this._parseCompoundWord();
60+
if (!compoundWord) {
61+
break;
62+
}
63+
command.arguments.push(compoundWord);
64+
}
65+
66+
return command;
67+
}
68+
69+
private _parseCompoundWord(): AstCompoundWord | undefined {
70+
const compoundWord: AstCompoundWord = new AstCompoundWord();
71+
72+
while (true) {
73+
const node: AstNode | undefined = this._parseText();
74+
if (!node) {
75+
break;
76+
}
77+
compoundWord.parts.push(node);
78+
}
79+
80+
if (compoundWord.parts.length === 0) {
81+
// We didn't parse a word
82+
return undefined;
83+
}
84+
85+
return compoundWord;
86+
}
87+
88+
private _parseText(): AstText | undefined {
89+
const token: Token = this._peekToken();
90+
91+
if (token.kind === TokenKind.Text) {
92+
this._getToken();
93+
94+
const astText: AstText = new AstText();
95+
astText.token = token;
96+
return astText;
97+
}
98+
99+
return undefined;
100+
}
101+
102+
/**
103+
* Skips any whitespace tokens. Returns true if any whitespace was actually encountered.
104+
*/
105+
private _skipWhitespace(): boolean {
106+
let sawWhitespace: boolean = false;
107+
while (this._peekToken().kind === TokenKind.Spaces) {
108+
this._getToken();
109+
sawWhitespace = true;
110+
}
111+
if (this._peekToken().kind === TokenKind.EndOfInput) {
112+
sawWhitespace = true;
113+
}
114+
return sawWhitespace;
115+
}
116+
117+
private _getToken(): Token {
118+
let token: Token;
119+
if (this._peekedToken) {
120+
token = this._peekedToken;
121+
this._peekedToken = undefined;
122+
return token;
123+
}
124+
return this._tokenizer.getToken();
125+
}
126+
127+
private _peekToken(): Token {
128+
if (!this._peekedToken) {
129+
this._peekedToken = this._tokenizer.getToken();
130+
}
131+
return this._peekedToken;
132+
}
133+
}

libraries/rushell/src/Tokenizer.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,13 @@ import { ParseError } from './ParseError';
77
export enum TokenKind {
88
// One or more spaces/tabs
99
Spaces,
10+
1011
// A single newline sequence such as CRLF or LF
1112
NewLine,
13+
1214
// An unrecognized character
1315
OtherCharacter,
16+
1417
// A sequence of characters that doesn't contain any symbols with special meaning
1518
// Characters can be escaped, in which case the Token.text may differ from the
1619
// Token.range.toString()

0 commit comments

Comments
 (0)