parser: major refinement of error handling

This commit is contained in:
Alex Zenla 2023-09-18 01:07:28 -07:00
parent 5610326eda
commit 7cb3e02b21
Signed by: alex
GPG Key ID: C0780728420EBFE5
20 changed files with 170 additions and 80 deletions

View File

@ -40,7 +40,7 @@ func drawCells(renderer, cells, swap) {
var ix = 0 var ix = 0
while ix < gridWidth { while ix < gridWidth {
let mask = if swap { 2 } else { 1 } let mask = if swap { 2 } else { 1 }
if (java_util_ArrayList_get(cells, i) & mask) == mask { if (cells[i] & mask) == mask {
let x = ix * cellSize let x = ix * cellSize
let y = iy * cellSize let y = iy * cellSize
SDL_RenderDrawLine(renderer, x, y, x + cellSize, y) SDL_RenderDrawLine(renderer, x, y, x + cellSize, y)
@ -66,7 +66,7 @@ func createCellGrid() {
func getCell(cells, swap, x, y) { func getCell(cells, swap, x, y) {
if (x >= 0) and (y >= 0) and (x < gridWidth) and (y < gridHeight) { if (x >= 0) and (y >= 0) and (x < gridWidth) and (y < gridHeight) {
let mask = if swap { 2 } else { 1 } let mask = if swap { 2 } else { 1 }
(java_util_ArrayList_get(cells, x + y * gridWidth) & mask) != 0 (cells[x + y * gridWidth] & mask) != 0
} else { } else {
false false
} }
@ -76,7 +76,7 @@ func setCell(cells, swap, x, y, state) {
if (x >= 0) and (y >= 0) and (x < gridWidth) and (y < gridHeight) { if (x >= 0) and (y >= 0) and (x < gridWidth) and (y < gridHeight) {
let mask = if swap { 2 } else { 1 } let mask = if swap { 2 } else { 1 }
let idx = x + y * gridWidth let idx = x + y * gridWidth
let value = java_util_ArrayList_get(cells, idx) let value = cells[idx]
if state { java_util_ArrayList_set(cells, idx, value | mask) } if state { java_util_ArrayList_set(cells, idx, value | mask) }
else { java_util_ArrayList_set(cells, idx, value & (~mask)) } else { java_util_ArrayList_set(cells, idx, value & (~mask)) }
} }
@ -122,8 +122,8 @@ func createGosperGun(cells, swap, x, y) {
[25, 7], [35, 3], [36, 3], [35, 4], [36, 4] [25, 7], [35, 3], [36, 3], [35, 4], [36, 4]
] { ] {
setCell(cells, false, setCell(cells, false,
x + java_util_List_get(i, 0), x + i[0],
y + java_util_List_get(i, 1), y + i[1],
true) true)
} }
} }

View File

@ -0,0 +1,5 @@
package gay.pizza.pork.parser
class BadCharacterError(val char: Char, sourceIndex: SourceIndex) : ParseError(
"Failed to produce token for '${char}' at $sourceIndex"
)

View File

@ -0,0 +1,28 @@
package gay.pizza.pork.parser
class ExpectedTokenError(got: Token, sourceIndex: SourceIndex, vararg expectedTypes: TokenType) : ParseError(
message(got, sourceIndex, expectedTypes)
) {
companion object {
fun message(got: Token, sourceIndex: SourceIndex, expectedTypes: Array<out TokenType>): String {
val tokenTypeMessages = expectedTypes.map {
if (it.simpleWantString != null)
"${it.name} '${it.simpleWantString}'"
else
it.name
}
val expected = if (expectedTypes.size > 1) {
"one of " + tokenTypeMessages.joinToString(", ")
} else tokenTypeMessages.firstOrNull() ?: "unknown"
val friendlyIndex = if (sourceIndex.locationReliable) {
"line ${sourceIndex.line} column ${sourceIndex.column}"
} else {
"index ${sourceIndex.index}"
}
return "Expected $expected at $friendlyIndex but got ${got.type} '${got.text}'"
}
}
}

View File

@ -1,3 +1,6 @@
package gay.pizza.pork.parser package gay.pizza.pork.parser
open class ParseError(val error: String) : RuntimeException(error) open class ParseError(val error: String) : RuntimeException() {
override val message: String
get() = "${error}\nDescent path: ${ParserStackAnalysis(this).findDescentPath().joinToString(", ")}"
}

View File

@ -1,6 +1,7 @@
package gay.pizza.pork.parser package gay.pizza.pork.parser
import gay.pizza.pork.ast.* import gay.pizza.pork.ast.*
import kotlin.math.exp
class Parser(source: TokenSource, attribution: NodeAttribution) : class Parser(source: TokenSource, attribution: NodeAttribution) :
ParserBase(source, attribution) { ParserBase(source, attribution) {
@ -18,7 +19,7 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
override fun parseExpression(): Expression = guarded { override fun parseExpression(): Expression = guarded {
val token = peek() val token = peek()
val expression = when (token.type) { var expression = when (token.type) {
TokenType.NumberLiteral -> parseNumberLiteral() TokenType.NumberLiteral -> parseNumberLiteral()
TokenType.StringLiteral -> parseStringLiteral() TokenType.StringLiteral -> parseStringLiteral()
TokenType.True, TokenType.False -> parseBooleanLiteral() TokenType.True, TokenType.False -> parseBooleanLiteral()
@ -45,43 +46,35 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
} }
if (expression is SymbolReference && peek(TokenType.Equals)) { if (expression is SymbolReference && peek(TokenType.Equals)) {
return@guarded guarded(NodeType.SetAssignment) { val symbolReference = expression as SymbolReference
expression = guarded(NodeType.SetAssignment) {
attribution.adopt(expression) attribution.adopt(expression)
expect(TokenType.Equals) expect(TokenType.Equals)
val value = parseExpression() val value = parseExpression()
SetAssignment(expression.symbol, value) SetAssignment(symbolReference.symbol, value)
} }
} }
return@guarded if (peek( if (peek(TokenType.LeftBracket)) {
TokenType.Plus, expression = guarded(NodeType.IndexedBy) {
TokenType.Minus, attribution.adopt(expression)
TokenType.Multiply, expect(TokenType.LeftBracket)
TokenType.Divide, val index = parseExpression()
TokenType.Ampersand, expect(TokenType.RightBracket)
TokenType.Pipe, IndexedBy(expression, index)
TokenType.Caret, }
TokenType.Equality, }
TokenType.Inequality,
TokenType.Mod, if (peek(
TokenType.Rem, TokenType.Plus, TokenType.Minus, TokenType.Multiply, TokenType.Divide, TokenType.Ampersand,
TokenType.Lesser, TokenType.Pipe, TokenType.Caret, TokenType.Equality, TokenType.Inequality, TokenType.Mod,
TokenType.Greater, TokenType.Rem, TokenType.Lesser, TokenType.Greater, TokenType.LesserEqual, TokenType.GreaterEqual,
TokenType.LesserEqual, TokenType.And, TokenType.Or)) {
TokenType.GreaterEqual,
TokenType.And,
TokenType.Or
)
) {
guarded(NodeType.InfixOperation) { guarded(NodeType.InfixOperation) {
val infixToken = next() val infixToken = next()
val infixOperator = ParserHelpers.convertInfixOperator(infixToken) val infixOperator = ParserHelpers.convertInfixOperator(infixToken)
InfixOperation(expression, infixOperator, parseExpression()) InfixOperation(expression, infixOperator, parseExpression())
} }
} else if (next(TokenType.LeftBracket)) {
val index = parseExpression()
expect(TokenType.RightBracket)
IndexedBy(expression, index)
} else expression } else expression
} }
@ -91,7 +84,7 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
} else if (next(TokenType.False)) { } else if (next(TokenType.False)) {
BooleanLiteral(false) BooleanLiteral(false)
} else { } else {
throw ParseError("Expected ") expectedTokenError(source.peek(), TokenType.True, TokenType.False)
} }
} }

View File

@ -16,7 +16,7 @@ data class ParserAttributes(val tokens: List<Token>) {
} }
} }
coalescer.visit(node) coalescer.visit(node)
all.sortBy { it.start } all.sortBy { it.sourceIndex.index }
return all return all
} }
} }

View File

@ -5,12 +5,8 @@ import gay.pizza.pork.ast.NodeParser
import gay.pizza.pork.ast.NodeType import gay.pizza.pork.ast.NodeType
abstract class ParserBase(val source: TokenSource, val attribution: NodeAttribution) : NodeParser { abstract class ParserBase(val source: TokenSource, val attribution: NodeAttribution) : NodeParser {
class ExpectedTokenError(got: Token, vararg expectedTypes: TokenType) : ParseError( @Suppress("NOTHING_TO_INLINE")
"Expected one of ${expectedTypes.joinToString(", ")}" + protected inline fun <T: Node> guarded(type: NodeType? = null, noinline block: () -> T): T =
" but got type ${got.type} '${got.text}'"
)
protected fun <T: Node> guarded(type: NodeType? = null, block: () -> T): T =
attribution.guarded(type, block) attribution.guarded(type, block)
protected fun <T> collect( protected fun <T> collect(
@ -56,7 +52,7 @@ abstract class ParserBase(val source: TokenSource, val attribution: NodeAttribut
protected fun expect(vararg types: TokenType): Token { protected fun expect(vararg types: TokenType): Token {
val token = next() val token = next()
if (!types.contains(token.type)) { if (!types.contains(token.type)) {
throw ExpectedTokenError(token, *types) expectedTokenError(token, *types)
} }
return token return token
} }
@ -64,6 +60,10 @@ abstract class ParserBase(val source: TokenSource, val attribution: NodeAttribut
protected fun <T: Node> expect(vararg types: TokenType, consume: (Token) -> T): T = protected fun <T: Node> expect(vararg types: TokenType, consume: (Token) -> T): T =
consume(expect(*types)) consume(expect(*types))
protected fun expectedTokenError(token: Token, vararg types: TokenType): Nothing {
throw ExpectedTokenError(token, token.sourceIndex, *types)
}
protected fun next(): Token { protected fun next(): Token {
while (true) { while (true) {
val token = source.next() val token = source.next()

View File

@ -0,0 +1,27 @@
package gay.pizza.pork.parser
import gay.pizza.pork.ast.NodeType
class ParserStackAnalysis(private val stack: Array<StackTraceElement>) {
constructor(throwable: Throwable) : this(throwable.stackTrace)
fun findDescentPath(): List<NodeType> {
val parseDescentPaths = mutableListOf<NodeType>()
for (element in stack) {
if (element.className != Parser::class.java.name) {
continue
}
if (!element.methodName.startsWith("parse")) {
continue
}
val nodeTypeString = element.methodName.substring(5)
val type = NodeType.entries.firstOrNull { it.name == nodeTypeString }
if (type != null) {
parseDescentPaths.add(type)
}
}
return parseDescentPaths.reversed()
}
}

View File

@ -0,0 +1,10 @@
package gay.pizza.pork.parser
data class SourceIndex(val index: Int, val line: Int, val column: Int, val locationReliable: Boolean = true) {
companion object {
fun zero(): SourceIndex = SourceIndex(0, 1, 0)
fun indexOnly(index: Int) = SourceIndex(index, 0, 0, locationReliable = false)
}
override fun toString(): String = if (locationReliable) "${line}:${column}" else "$index"
}

View File

@ -1,11 +1,13 @@
package gay.pizza.pork.parser package gay.pizza.pork.parser
class Token(val type: TokenType, val start: Int, val text: String) { class Token(val type: TokenType, val sourceIndex: SourceIndex, val text: String) {
override fun toString(): String = override fun toString(): String =
"$start ${type.name} '${text.replace("\n", "\\n")}'" "$sourceIndex ${type.name} '${text.replace("\n", "\\n")}'"
companion object { companion object {
fun endOfFile(size: Int): Token = fun endOfFile(sourceIndex: SourceIndex): Token =
Token(TokenType.EndOfFile, size, "") Token(TokenType.EndOfFile, sourceIndex, "")
} }
fun upgrade(upgradedType: TokenType): Token = Token(upgradedType, sourceIndex, text)
} }

View File

@ -7,7 +7,7 @@ class TokenStreamSource(val stream: TokenStream) : TokenSource {
override fun next(): Token { override fun next(): Token {
if (index == stream.tokens.size) { if (index == stream.tokens.size) {
return Token.endOfFile(stream.tokens.size) return stream.tokens.last()
} }
val char = stream.tokens[index] val char = stream.tokens[index]
index++ index++
@ -16,7 +16,7 @@ class TokenStreamSource(val stream: TokenStream) : TokenSource {
override fun peek(): Token { override fun peek(): Token {
if (index == stream.tokens.size) { if (index == stream.tokens.size) {
return Token.endOfFile(stream.tokens.size) return stream.tokens.last()
} }
return stream.tokens[index] return stream.tokens[index]
} }

View File

@ -82,6 +82,8 @@ enum class TokenType(vararg properties: TokenTypeProperty) {
val tokenUpgrader: TokenUpgrader? = val tokenUpgrader: TokenUpgrader? =
properties.filterIsInstance<TokenUpgrader>().singleOrNull() properties.filterIsInstance<TokenUpgrader>().singleOrNull()
val simpleWantString: String? = manyChars?.text ?: singleChar?.char?.toString()
companion object { companion object {
val AnyOf = entries.filter { item -> item.anyOf != null } val AnyOf = entries.filter { item -> item.anyOf != null }
val ManyChars = entries.filter { item -> item.manyChars != null } val ManyChars = entries.filter { item -> item.manyChars != null }

View File

@ -13,15 +13,15 @@ interface TokenTypeProperty {
var upgraded: Token? = null var upgraded: Token? = null
for (item in TokenType.ManyChars) { for (item in TokenType.ManyChars) {
if (item.manyChars != null && token.text == item.manyChars.text) { if (item.manyChars != null && token.text == item.manyChars.text) {
upgraded = Token(item, token.start, token.text) upgraded = token.upgrade(item)
break break
} }
} }
if (upgraded == null) { if (upgraded == null) {
for (item in TokenType.AnyOf) { for (item in TokenType.AnyOf) {
if (item.anyOf != null && item.anyOf.strings.contains(token.text)) { if (item.anyOf != null && item.anyOf.strings.contains(token.text)) {
upgraded = Token(item, token.start, token.text) upgraded = token.upgrade(item)
break break
} }
} }

View File

@ -1,16 +1,18 @@
package gay.pizza.pork.parser package gay.pizza.pork.parser
class Tokenizer(val source: CharSource) { class Tokenizer(val source: CharSource) {
private var tokenStart: Int = 0 private var startIndex: SourceIndex = SourceIndex.zero()
private var currentLineIndex = 1
private var currentLineColumn = 0
private fun readBlockComment(firstChar: Char): Token { private fun readBlockComment(firstChar: Char): Token {
val comment = buildString { val comment = buildString {
append(firstChar) append(firstChar)
var endOfComment = false var endOfComment = false
while (true) { while (true) {
val char = source.next() val char = nextChar()
if (char == CharSource.NullChar) { if (char == CharSource.NullChar) {
throw ParseError("Unterminated block comment") throw UnterminatedTokenError("block comment", currentSourceIndex())
} }
append(char) append(char)
@ -27,7 +29,7 @@ class Tokenizer(val source: CharSource) {
} }
} }
} }
return Token(TokenType.BlockComment, tokenStart, comment) return produceToken(TokenType.BlockComment, comment)
} }
private fun readLineComment(firstChar: Char): Token { private fun readLineComment(firstChar: Char): Token {
@ -38,10 +40,10 @@ class Tokenizer(val source: CharSource) {
if (char == CharSource.NullChar || char == '\n') { if (char == CharSource.NullChar || char == '\n') {
break break
} }
append(source.next()) append(nextChar())
} }
} }
return Token(TokenType.LineComment, tokenStart, comment) return produceToken(TokenType.LineComment, comment)
} }
private fun readStringLiteral(firstChar: Char): Token { private fun readStringLiteral(firstChar: Char): Token {
@ -50,21 +52,21 @@ class Tokenizer(val source: CharSource) {
while (true) { while (true) {
val char = source.peek() val char = source.peek()
if (char == CharSource.NullChar) { if (char == CharSource.NullChar) {
throw ParseError("Unterminated string.") throw UnterminatedTokenError("string", currentSourceIndex())
} }
append(source.next()) append(nextChar())
if (char == '"') { if (char == '"') {
break break
} }
} }
} }
return Token(TokenType.StringLiteral, tokenStart, string) return produceToken(TokenType.StringLiteral, string)
} }
fun next(): Token { fun next(): Token {
while (source.peek() != CharSource.NullChar) { while (source.peek() != CharSource.NullChar) {
tokenStart = source.currentIndex startIndex = currentSourceIndex()
val char = source.next() val char = nextChar()
if (char == '/' && source.peek() == '*') { if (char == '/' && source.peek() == '*') {
return readBlockComment(char) return readBlockComment(char)
@ -89,13 +91,13 @@ class Tokenizer(val source: CharSource) {
if (source.peek() != promotion.nextChar) { if (source.peek() != promotion.nextChar) {
continue continue
} }
val nextChar = source.next() val nextChar = nextChar()
type = promotion.type type = promotion.type
text += nextChar text += nextChar
promoted = true promoted = true
} }
} }
return Token(type, tokenStart, text) return produceToken(type, text)
} }
var index = 0 var index = 0
@ -121,10 +123,10 @@ class Tokenizer(val source: CharSource) {
else else
item.charIndexConsumer!!.isValid(source.peek(), ++index) item.charIndexConsumer!!.isValid(source.peek(), ++index)
) { ) {
append(source.next()) append(nextChar())
} }
} }
var token = Token(item, tokenStart, text) var token = produceToken(item, text)
val tokenUpgrader = item.tokenUpgrader val tokenUpgrader = item.tokenUpgrader
if (tokenUpgrader != null) { if (tokenUpgrader != null) {
token = tokenUpgrader.maybeUpgrade(token) ?: token token = tokenUpgrader.maybeUpgrade(token) ?: token
@ -136,9 +138,9 @@ class Tokenizer(val source: CharSource) {
return readStringLiteral(char) return readStringLiteral(char)
} }
throw ParseError("Failed to parse: (${char}) next ${source.peek()}") throw BadCharacterError(char, startIndex)
} }
return Token.endOfFile(source.currentIndex) return Token.endOfFile(startIndex.copy(index = source.currentIndex))
} }
fun tokenize(): TokenStream { fun tokenize(): TokenStream {
@ -152,4 +154,19 @@ class Tokenizer(val source: CharSource) {
} }
return TokenStream(tokens) return TokenStream(tokens)
} }
private fun produceToken(type: TokenType, text: String) =
Token(type, startIndex, text)
private fun nextChar(): Char {
val char = source.next()
if (char == '\n') {
currentLineIndex++
currentLineColumn = 0
}
currentLineColumn++
return char
}
private fun currentSourceIndex(): SourceIndex = SourceIndex(source.currentIndex, currentLineIndex, currentLineColumn)
} }

View File

@ -0,0 +1,5 @@
package gay.pizza.pork.parser
class UnterminatedTokenError(what: String, sourceIndex: SourceIndex) : ParseError(
"Unterminated $what at $sourceIndex"
)

View File

@ -57,8 +57,8 @@ class PorkLexer : LexerBase() {
try { try {
val currentToken = tokenizer.next() val currentToken = tokenizer.next()
currentTokenType = PorkElementTypes.elementTypeFor(currentToken.type) currentTokenType = PorkElementTypes.elementTypeFor(currentToken.type)
internalTokenStart = currentToken.start internalTokenStart = currentToken.sourceIndex.index
internalTokenEnd = currentToken.start + currentToken.text.length internalTokenEnd = currentToken.sourceIndex.index + currentToken.text.length
} catch (e: ProcessCanceledException) { } catch (e: ProcessCanceledException) {
throw e throw e
} catch (e: Throwable) { } catch (e: Throwable) {

View File

@ -29,12 +29,8 @@ class PsiBuilderMarkAttribution(val builder: PsiBuilder) : ParserNodeAttribution
} }
throw PorkParser.ExitParser() throw PorkParser.ExitParser()
} catch (e: PorkParser.ExitParser) { } catch (e: PorkParser.ExitParser) {
if (e.error != null) { marker.done(PorkElementTypes.FailedToParse)
marker.error(e.error) throw e
} else {
marker.done(PorkElementTypes.FailedToParse)
}
throw PorkParser.ExitParser()
} }
if (map[result] != null) { if (map[result] != null) {
marker.drop() marker.drop()

View File

@ -1,6 +1,7 @@
package gay.pizza.pork.idea package gay.pizza.pork.idea
import com.intellij.lang.PsiBuilder import com.intellij.lang.PsiBuilder
import gay.pizza.pork.parser.SourceIndex
import gay.pizza.pork.parser.Token import gay.pizza.pork.parser.Token
import gay.pizza.pork.parser.TokenSource import gay.pizza.pork.parser.TokenSource
import com.intellij.psi.TokenType as PsiTokenType import com.intellij.psi.TokenType as PsiTokenType
@ -17,15 +18,15 @@ class PsiBuilderTokenSource(val builder: PsiBuilder) : TokenSource {
override fun peek(): Token { override fun peek(): Token {
if (builder.eof()) { if (builder.eof()) {
return Token.endOfFile(builder.currentOffset) return Token.endOfFile(SourceIndex.indexOnly(builder.currentOffset))
} }
val elementType = builder.tokenType!! val elementType = builder.tokenType!!
if (elementType == PsiTokenType.BAD_CHARACTER) { if (elementType == PsiTokenType.BAD_CHARACTER) {
throw BadCharacterError("Invalid character.") throw BadCharacterError("Invalid character")
} }
val tokenType = PorkElementTypes.tokenTypeFor(elementType) ?: val tokenType = PorkElementTypes.tokenTypeFor(elementType) ?:
throw RuntimeException("Lexing failure: ${elementType.debugName}") throw RuntimeException("Lexing failure: ${elementType.debugName}")
return Token(tokenType, builder.currentOffset, builder.tokenText!!) return Token(tokenType, SourceIndex.indexOnly(builder.currentOffset), builder.tokenText!!)
} }
class BadCharacterError(error: String) : RuntimeException(error) class BadCharacterError(error: String) : RuntimeException(error)

View File

@ -13,6 +13,7 @@ dependencies {
application { application {
applicationName = "pork" applicationName = "pork"
mainClass.set("gay.pizza.pork.tool.MainKt") mainClass.set("gay.pizza.pork.tool.MainKt")
applicationDefaultJvmArgs += "-XstartOnFirstThread"
} }
for (task in arrayOf(tasks.shadowDistTar, tasks.shadowDistZip, tasks.shadowJar)) { for (task in arrayOf(tasks.shadowDistTar, tasks.shadowDistZip, tasks.shadowJar)) {

View File

@ -12,7 +12,7 @@ class TokenizeCommand : CliktCommand(help = "Tokenize Compilation Unit", name =
val tool = FileTool(PlatformFsProvider.resolve(path)) val tool = FileTool(PlatformFsProvider.resolve(path))
val tokenStream = tool.tokenize() val tokenStream = tool.tokenize()
for (token in tokenStream.tokens) { for (token in tokenStream.tokens) {
println("${token.start} ${token.type.name} '${sanitize(token.text)}'") println("${token.sourceIndex.index} ${token.type.name} '${sanitize(token.text)}'")
} }
} }