parser: lazy support

This commit is contained in:
2023-10-14 03:28:07 -07:00
parent e96bcd8754
commit 9338b01b48
25 changed files with 159 additions and 118 deletions

View File

@ -6,6 +6,6 @@ import gay.pizza.pork.ast.gen.NodeType
object DiscardNodeAttribution : NodeAttribution {
override fun push(token: Token) {}
override fun <T : Node> adopt(node: T) {}
override fun <T : Node> guarded(type: NodeType?, block: () -> T): T =
override fun <T : Node> produce(type: NodeType, block: () -> T): T =
block()
}

View File

@ -0,0 +1,33 @@
package gay.pizza.pork.parser
class LazySkippingTokenSource(val source: TokenSource, val skipping: Set<TokenType>) : ParserAwareTokenSource {
private var index = 0
override val currentIndex: Int
get() = index
private val queue = mutableListOf<Token>()
override fun next(): Token {
needs(1)
return queue.removeFirst()
}
override fun peek(): Token {
needs(1)
return queue.first()
}
override fun peekTypeAhead(ahead: Int): TokenType {
needs(ahead + 1)
return queue[ahead].type
}
private fun needs(count: Int) {
while (queue.size < count) {
val token = source.next()
if (!skipping.contains(token.type)) {
queue.add(token)
}
}
}
}

View File

@ -1,38 +0,0 @@
package gay.pizza.pork.parser
class LazyTokenSource(val tokenizer: Tokenizer) : TokenSource {
private val queue = mutableListOf<Token>()
private var index = 0
override val currentIndex: Int
get() = index
override fun next(): Token {
index++
if (queue.isNotEmpty()) {
return queue.removeFirst()
}
return tokenizer.next()
}
override fun peek(): Token {
if (queue.isNotEmpty()) {
return queue.first()
}
val token = tokenizer.next()
queue.add(token)
return token
}
override fun peekTypeAhead(ahead: Int): TokenType {
wantAtLeast(ahead + 1)
return queue[ahead].type
}
private fun wantAtLeast(ahead: Int) {
if (queue.size < ahead) {
for (i in 1..ahead) {
queue.add(tokenizer.next())
}
}
}
}

View File

@ -6,5 +6,5 @@ import gay.pizza.pork.ast.gen.NodeType
interface NodeAttribution {
fun push(token: Token)
fun <T: Node> adopt(node: T)
fun <T: Node> guarded(type: NodeType?, block: () -> T): T
fun <T: Node> produce(type: NodeType, block: () -> T): T
}

View File

@ -4,7 +4,7 @@ import gay.pizza.pork.ast.gen.*
class Parser(source: TokenSource, attribution: NodeAttribution) :
ParserBase(source, attribution) {
override fun parseArgumentSpec(): ArgumentSpec = guarded(NodeType.ArgumentSpec) {
override fun parseArgumentSpec(): ArgumentSpec = produce(NodeType.ArgumentSpec) {
val symbol = parseSymbol()
ArgumentSpec(symbol, next(TokenType.DotDotDot))
}
@ -47,7 +47,7 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
if (expression is SymbolReference && peek(TokenType.Equals)) {
val symbolReference = expression
expression = guarded(NodeType.SetAssignment) {
expression = produce(NodeType.SetAssignment) {
attribution.adopt(expression)
expect(TokenType.Equals)
val value = parseExpression()
@ -56,7 +56,7 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
}
if (peek(TokenType.LeftBracket)) {
expression = guarded(NodeType.IndexedBy) {
expression = produce(NodeType.IndexedBy) {
attribution.adopt(expression)
expect(TokenType.LeftBracket)
val index = parseExpression()
@ -70,7 +70,7 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
TokenType.Pipe, TokenType.Caret, TokenType.Equality, TokenType.Inequality, TokenType.Mod,
TokenType.Rem, TokenType.Lesser, TokenType.Greater, TokenType.LesserEqual, TokenType.GreaterEqual,
TokenType.And, TokenType.Or)) {
guarded(NodeType.InfixOperation) {
produce(NodeType.InfixOperation) {
val infixToken = next()
val infixOperator = ParserHelpers.convertInfixOperator(infixToken)
InfixOperation(expression, infixOperator, parseExpression())
@ -78,7 +78,7 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
} else expression
}
override fun parseBooleanLiteral(): BooleanLiteral = guarded(NodeType.BooleanLiteral) {
override fun parseBooleanLiteral(): BooleanLiteral = produce(NodeType.BooleanLiteral) {
if (next(TokenType.True)) {
BooleanLiteral(true)
} else if (next(TokenType.False)) {
@ -88,12 +88,12 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
}
}
override fun parseBreak(): Break = guarded(NodeType.Break) {
override fun parseBreak(): Break = produce(NodeType.Break) {
expect(TokenType.Break)
Break()
}
override fun parseCompilationUnit(): CompilationUnit = guarded(NodeType.CompilationUnit) {
override fun parseCompilationUnit(): CompilationUnit = produce(NodeType.CompilationUnit) {
val declarations = mutableListOf<Declaration>()
val definitions = mutableListOf<Definition>()
var declarationAccepted = true
@ -114,7 +114,7 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
CompilationUnit(declarations, definitions)
}
override fun parseContinue(): Continue = guarded(NodeType.Continue) {
override fun parseContinue(): Continue = produce(NodeType.Continue) {
expect(TokenType.Continue)
Continue()
}
@ -187,11 +187,11 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
ForIn(forInItem, value, block)
}
override fun parseForInItem(): ForInItem = guarded(NodeType.ForInItem) {
override fun parseForInItem(): ForInItem = produce(NodeType.ForInItem) {
ForInItem(parseSymbol())
}
override fun parseFunctionCall(): FunctionCall = guarded(NodeType.FunctionCall) {
override fun parseFunctionCall(): FunctionCall = produce(NodeType.FunctionCall) {
val symbol = parseSymbol()
expect(TokenType.LeftParentheses)
val arguments = collect(TokenType.RightParentheses, TokenType.Comma) {
@ -201,7 +201,7 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
FunctionCall(symbol, arguments)
}
override fun parseFunctionDefinition(): FunctionDefinition = guarded(NodeType.FunctionDefinition) {
override fun parseFunctionDefinition(): FunctionDefinition = produce(NodeType.FunctionDefinition) {
val modifiers = parseDefinitionModifiers()
expect(TokenType.Func)
val name = parseSymbol()
@ -236,14 +236,14 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
ImportDeclaration(form, parseImportPath())
}
override fun parseImportPath(): ImportPath = guarded(NodeType.ImportPath) {
override fun parseImportPath(): ImportPath = produce(NodeType.ImportPath) {
val components = oneAndContinuedBy(TokenType.Dot) {
parseSymbol()
}
ImportPath(components)
}
override fun parseIndexedBy(): IndexedBy = guarded(NodeType.IndexedBy) {
override fun parseIndexedBy(): IndexedBy = produce(NodeType.IndexedBy) {
val expression = parseExpression()
expect(TokenType.LeftBracket)
val index = parseExpression()
@ -251,7 +251,7 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
IndexedBy(expression, index)
}
override fun parseInfixOperation(): InfixOperation = guarded(NodeType.InfixOperation) {
override fun parseInfixOperation(): InfixOperation = produce(NodeType.InfixOperation) {
val infixToken = next()
val infixOperator = ParserHelpers.convertInfixOperator(infixToken)
InfixOperation(parseExpression(), infixOperator, parseExpression())
@ -282,7 +282,7 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
LetAssignment(symbol, value)
}
override fun parseLetDefinition(): LetDefinition = guarded(NodeType.LetDefinition) {
override fun parseLetDefinition(): LetDefinition = produce(NodeType.LetDefinition) {
val definitionModifiers = parseDefinitionModifiers()
expect(TokenType.Let)
val name = parseSymbol()
@ -330,14 +330,14 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
PrefixOperation(ParserHelpers.convertPrefixOperator(it), parseExpression())
}
override fun parseSetAssignment(): SetAssignment = guarded(NodeType.SetAssignment) {
override fun parseSetAssignment(): SetAssignment = produce(NodeType.SetAssignment) {
val symbol = parseSymbol()
expect(TokenType.Equals)
val value = parseExpression()
SetAssignment(symbol, value)
}
override fun parseStringLiteral(): StringLiteral = guarded(NodeType.StringLiteral) {
override fun parseStringLiteral(): StringLiteral = produce(NodeType.StringLiteral) {
expect(TokenType.Quote)
val stringLiteralToken = expect(TokenType.StringLiteral)
expect(TokenType.Quote)
@ -345,7 +345,7 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
StringLiteral(content)
}
override fun parseSuffixOperation(): SuffixOperation = guarded(NodeType.SuffixOperation) {
override fun parseSuffixOperation(): SuffixOperation = produce(NodeType.SuffixOperation) {
val reference = parseSymbolReference()
expect(TokenType.PlusPlus, TokenType.MinusMinus) {
SuffixOperation(ParserHelpers.convertSuffixOperator(it), reference)
@ -364,7 +364,7 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
Symbol(it.text)
}
override fun parseSymbolReference(): SymbolReference = guarded(NodeType.SymbolReference) {
override fun parseSymbolReference(): SymbolReference = produce(NodeType.SymbolReference) {
SymbolReference(parseSymbol())
}

View File

@ -0,0 +1,3 @@
package gay.pizza.pork.parser
interface ParserAwareTokenSource : TokenSource

View File

@ -5,15 +5,19 @@ import gay.pizza.pork.ast.gen.NodeParser
import gay.pizza.pork.ast.gen.NodeType
abstract class ParserBase(source: TokenSource, val attribution: NodeAttribution) : NodeParser {
val source: TokenSource = source.ignoringParserIgnoredTypes()
val source: TokenSource = if (source is ParserAwareTokenSource) {
source
} else {
LazySkippingTokenSource(source, TokenType.ParserIgnoredTypes)
}
@Suppress("NOTHING_TO_INLINE")
protected inline fun <T: Node> guarded(type: NodeType? = null, noinline block: () -> T): T =
attribution.guarded(type, block)
protected inline fun <T: Node> produce(type: NodeType, noinline block: () -> T): T =
attribution.produce(type, block)
@Suppress("NOTHING_TO_INLINE")
protected inline fun <T: Node> expect(type: NodeType? = null, vararg tokenTypes: TokenType, noinline block: (Token) -> T): T =
guarded(type) {
protected inline fun <T: Node> expect(type: NodeType, vararg tokenTypes: TokenType, noinline block: (Token) -> T): T =
produce(type) {
block(expect(*tokenTypes))
}

View File

@ -23,7 +23,7 @@ open class ParserNodeAttribution : NodeAttribution {
}
}
override fun <T : Node> guarded(type: NodeType?, block: () -> T): T {
override fun <T : Node> produce(type: NodeType, block: () -> T): T {
var store = mutableListOf<Token>()
current = store
stack.add(store)

View File

@ -2,13 +2,13 @@ package gay.pizza.pork.parser
class SourceIndexCharSource(val delegate: CharSource) : CharSource by delegate {
private var currentLineIndex = 1
private var currentLineColumn = 0
private var currentLineColumn = 1
override fun next(): Char {
val char = delegate.next()
if (char == '\n') {
currentLineIndex++
currentLineColumn = 0
currentLineColumn = 1
}
currentLineColumn++
return char

View File

@ -1,7 +1,7 @@
package gay.pizza.pork.parser
object StringCharConsumer : CharConsumer {
override fun consume(type: TokenType, tokenizer: Tokenizer): String? {
override fun consume(type: TokenType, tokenizer: Tokenizer): String {
val buffer = StringBuilder()
var escape = false
while (true) {
@ -15,6 +15,10 @@ object StringCharConsumer : CharConsumer {
break
}
if (escape) {
escape = false
}
buffer.append(tokenizer.source.next())
if (char == '\\') {

View File

@ -2,6 +2,7 @@ package gay.pizza.pork.parser
interface TokenSource : PeekableSource<Token> {
fun peekTypeAhead(ahead: Int): TokenType
fun consumeAllRemainingTokens(): List<Token> {
val tokens = mutableListOf<Token>()
while (true) {
@ -14,9 +15,5 @@ interface TokenSource : PeekableSource<Token> {
return tokens
}
fun streamAllRemainingTokens(): TokenStream =
TokenStream(consumeAllRemainingTokens().filter { !TokenType.ParserIgnoredTypes.contains(it.type) })
fun ignoringParserIgnoredTypes(): TokenSource =
TokenStreamSource(streamAllRemainingTokens())
fun stream(): TokenStream = TokenStream(consumeAllRemainingTokens())
}

View File

@ -107,7 +107,7 @@ enum class TokenType(vararg val properties: TokenTypeProperty) {
val CharMatches = entries.filter { item -> item.charMatch != null }
val CharConsumes = entries.filter { item -> item.charConsume != null }
val ParserIgnoredTypes: Array<TokenType> = arrayOf(
val ParserIgnoredTypes: Set<TokenType> = setOf(
Whitespace,
BlockComment,
LineComment

View File

@ -1,9 +1,40 @@
package gay.pizza.pork.parser
class Tokenizer(source: CharSource) {
val source: SourceIndexCharSource = SourceIndexCharSource(source)
class Tokenizer(source: CharSource) : TokenSource {
internal val source = SourceIndexCharSource(source)
private var startIndex: SourceIndex = SourceIndex.zero()
private var state = TokenizerState.Normal
private var index = 0
override val currentIndex: Int
get() = index
private val queue = mutableListOf<Token>()
override fun next(): Token {
val token = readNextToken()
index++
return token
}
override fun peek(): Token {
if (queue.isEmpty()) {
val token = readNextToken()
queue.add(token)
return token
}
return queue.first()
}
override fun peekTypeAhead(ahead: Int): TokenType {
val needed = ahead - (queue.size - 1)
if (needed > 0) {
for (i in 1..needed) {
queue.add(readNextToken())
}
}
return queue[ahead].type
}
private fun nextTokenOrNull(): Token? {
if (source.peek() == CharSource.EndOfFile) {
@ -80,7 +111,7 @@ class Tokenizer(source: CharSource) {
return null
}
fun next(): Token {
private fun readNextToken(): Token {
val what = source.peek()
val token = nextTokenOrNull()
if (token != null) {
@ -95,18 +126,6 @@ class Tokenizer(source: CharSource) {
throw BadCharacterError(what, source.currentSourceIndex(), state)
}
fun stream(): TokenStream {
val tokens = mutableListOf<Token>()
while (true) {
val token = next()
tokens.add(token)
if (token.type == TokenType.EndOfFile) {
break
}
}
return TokenStream(tokens)
}
internal fun produceToken(type: TokenType, text: String) =
Token(type, startIndex, text)