parser: major refinement of error handling

This commit is contained in:
Alex Zenla 2023-09-18 01:07:28 -07:00
parent 5610326eda
commit 7cb3e02b21
Signed by: alex
GPG Key ID: C0780728420EBFE5
20 changed files with 170 additions and 80 deletions

View File

@ -40,7 +40,7 @@ func drawCells(renderer, cells, swap) {
var ix = 0
while ix < gridWidth {
let mask = if swap { 2 } else { 1 }
if (java_util_ArrayList_get(cells, i) & mask) == mask {
if (cells[i] & mask) == mask {
let x = ix * cellSize
let y = iy * cellSize
SDL_RenderDrawLine(renderer, x, y, x + cellSize, y)
@ -66,7 +66,7 @@ func createCellGrid() {
func getCell(cells, swap, x, y) {
if (x >= 0) and (y >= 0) and (x < gridWidth) and (y < gridHeight) {
let mask = if swap { 2 } else { 1 }
(java_util_ArrayList_get(cells, x + y * gridWidth) & mask) != 0
(cells[x + y * gridWidth] & mask) != 0
} else {
false
}
@ -76,7 +76,7 @@ func setCell(cells, swap, x, y, state) {
if (x >= 0) and (y >= 0) and (x < gridWidth) and (y < gridHeight) {
let mask = if swap { 2 } else { 1 }
let idx = x + y * gridWidth
let value = java_util_ArrayList_get(cells, idx)
let value = cells[idx]
if state { java_util_ArrayList_set(cells, idx, value | mask) }
else { java_util_ArrayList_set(cells, idx, value & (~mask)) }
}
@ -122,8 +122,8 @@ func createGosperGun(cells, swap, x, y) {
[25, 7], [35, 3], [36, 3], [35, 4], [36, 4]
] {
setCell(cells, false,
x + java_util_List_get(i, 0),
y + java_util_List_get(i, 1),
x + i[0],
y + i[1],
true)
}
}

View File

@ -0,0 +1,5 @@
package gay.pizza.pork.parser
class BadCharacterError(val char: Char, sourceIndex: SourceIndex) : ParseError(
"Failed to produce token for '${char}' at $sourceIndex"
)

View File

@ -0,0 +1,28 @@
package gay.pizza.pork.parser
class ExpectedTokenError(got: Token, sourceIndex: SourceIndex, vararg expectedTypes: TokenType) : ParseError(
message(got, sourceIndex, expectedTypes)
) {
companion object {
fun message(got: Token, sourceIndex: SourceIndex, expectedTypes: Array<out TokenType>): String {
val tokenTypeMessages = expectedTypes.map {
if (it.simpleWantString != null)
"${it.name} '${it.simpleWantString}'"
else
it.name
}
val expected = if (expectedTypes.size > 1) {
"one of " + tokenTypeMessages.joinToString(", ")
} else tokenTypeMessages.firstOrNull() ?: "unknown"
val friendlyIndex = if (sourceIndex.locationReliable) {
"line ${sourceIndex.line} column ${sourceIndex.column}"
} else {
"index ${sourceIndex.index}"
}
return "Expected $expected at $friendlyIndex but got ${got.type} '${got.text}'"
}
}
}

View File

@ -1,3 +1,6 @@
package gay.pizza.pork.parser
open class ParseError(val error: String) : RuntimeException(error)
open class ParseError(val error: String) : RuntimeException() {
override val message: String
get() = "${error}\nDescent path: ${ParserStackAnalysis(this).findDescentPath().joinToString(", ")}"
}

View File

@ -1,6 +1,7 @@
package gay.pizza.pork.parser
import gay.pizza.pork.ast.*
import kotlin.math.exp
class Parser(source: TokenSource, attribution: NodeAttribution) :
ParserBase(source, attribution) {
@ -18,7 +19,7 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
override fun parseExpression(): Expression = guarded {
val token = peek()
val expression = when (token.type) {
var expression = when (token.type) {
TokenType.NumberLiteral -> parseNumberLiteral()
TokenType.StringLiteral -> parseStringLiteral()
TokenType.True, TokenType.False -> parseBooleanLiteral()
@ -45,43 +46,35 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
}
if (expression is SymbolReference && peek(TokenType.Equals)) {
return@guarded guarded(NodeType.SetAssignment) {
val symbolReference = expression as SymbolReference
expression = guarded(NodeType.SetAssignment) {
attribution.adopt(expression)
expect(TokenType.Equals)
val value = parseExpression()
SetAssignment(expression.symbol, value)
SetAssignment(symbolReference.symbol, value)
}
}
return@guarded if (peek(
TokenType.Plus,
TokenType.Minus,
TokenType.Multiply,
TokenType.Divide,
TokenType.Ampersand,
TokenType.Pipe,
TokenType.Caret,
TokenType.Equality,
TokenType.Inequality,
TokenType.Mod,
TokenType.Rem,
TokenType.Lesser,
TokenType.Greater,
TokenType.LesserEqual,
TokenType.GreaterEqual,
TokenType.And,
TokenType.Or
)
) {
if (peek(TokenType.LeftBracket)) {
expression = guarded(NodeType.IndexedBy) {
attribution.adopt(expression)
expect(TokenType.LeftBracket)
val index = parseExpression()
expect(TokenType.RightBracket)
IndexedBy(expression, index)
}
}
if (peek(
TokenType.Plus, TokenType.Minus, TokenType.Multiply, TokenType.Divide, TokenType.Ampersand,
TokenType.Pipe, TokenType.Caret, TokenType.Equality, TokenType.Inequality, TokenType.Mod,
TokenType.Rem, TokenType.Lesser, TokenType.Greater, TokenType.LesserEqual, TokenType.GreaterEqual,
TokenType.And, TokenType.Or)) {
guarded(NodeType.InfixOperation) {
val infixToken = next()
val infixOperator = ParserHelpers.convertInfixOperator(infixToken)
InfixOperation(expression, infixOperator, parseExpression())
}
} else if (next(TokenType.LeftBracket)) {
val index = parseExpression()
expect(TokenType.RightBracket)
IndexedBy(expression, index)
} else expression
}
@ -91,7 +84,7 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
} else if (next(TokenType.False)) {
BooleanLiteral(false)
} else {
throw ParseError("Expected ")
expectedTokenError(source.peek(), TokenType.True, TokenType.False)
}
}

View File

@ -16,7 +16,7 @@ data class ParserAttributes(val tokens: List<Token>) {
}
}
coalescer.visit(node)
all.sortBy { it.start }
all.sortBy { it.sourceIndex.index }
return all
}
}

View File

@ -5,12 +5,8 @@ import gay.pizza.pork.ast.NodeParser
import gay.pizza.pork.ast.NodeType
abstract class ParserBase(val source: TokenSource, val attribution: NodeAttribution) : NodeParser {
class ExpectedTokenError(got: Token, vararg expectedTypes: TokenType) : ParseError(
"Expected one of ${expectedTypes.joinToString(", ")}" +
" but got type ${got.type} '${got.text}'"
)
protected fun <T: Node> guarded(type: NodeType? = null, block: () -> T): T =
@Suppress("NOTHING_TO_INLINE")
protected inline fun <T: Node> guarded(type: NodeType? = null, noinline block: () -> T): T =
attribution.guarded(type, block)
protected fun <T> collect(
@ -56,7 +52,7 @@ abstract class ParserBase(val source: TokenSource, val attribution: NodeAttribut
protected fun expect(vararg types: TokenType): Token {
val token = next()
if (!types.contains(token.type)) {
throw ExpectedTokenError(token, *types)
expectedTokenError(token, *types)
}
return token
}
@ -64,6 +60,10 @@ abstract class ParserBase(val source: TokenSource, val attribution: NodeAttribut
protected fun <T: Node> expect(vararg types: TokenType, consume: (Token) -> T): T =
consume(expect(*types))
protected fun expectedTokenError(token: Token, vararg types: TokenType): Nothing {
throw ExpectedTokenError(token, token.sourceIndex, *types)
}
protected fun next(): Token {
while (true) {
val token = source.next()

View File

@ -0,0 +1,27 @@
package gay.pizza.pork.parser
import gay.pizza.pork.ast.NodeType
class ParserStackAnalysis(private val stack: Array<StackTraceElement>) {
constructor(throwable: Throwable) : this(throwable.stackTrace)
fun findDescentPath(): List<NodeType> {
val parseDescentPaths = mutableListOf<NodeType>()
for (element in stack) {
if (element.className != Parser::class.java.name) {
continue
}
if (!element.methodName.startsWith("parse")) {
continue
}
val nodeTypeString = element.methodName.substring(5)
val type = NodeType.entries.firstOrNull { it.name == nodeTypeString }
if (type != null) {
parseDescentPaths.add(type)
}
}
return parseDescentPaths.reversed()
}
}

View File

@ -0,0 +1,10 @@
package gay.pizza.pork.parser
data class SourceIndex(val index: Int, val line: Int, val column: Int, val locationReliable: Boolean = true) {
companion object {
fun zero(): SourceIndex = SourceIndex(0, 1, 0)
fun indexOnly(index: Int) = SourceIndex(index, 0, 0, locationReliable = false)
}
override fun toString(): String = if (locationReliable) "${line}:${column}" else "$index"
}

View File

@ -1,11 +1,13 @@
package gay.pizza.pork.parser
class Token(val type: TokenType, val start: Int, val text: String) {
class Token(val type: TokenType, val sourceIndex: SourceIndex, val text: String) {
override fun toString(): String =
"$start ${type.name} '${text.replace("\n", "\\n")}'"
"$sourceIndex ${type.name} '${text.replace("\n", "\\n")}'"
companion object {
fun endOfFile(size: Int): Token =
Token(TokenType.EndOfFile, size, "")
fun endOfFile(sourceIndex: SourceIndex): Token =
Token(TokenType.EndOfFile, sourceIndex, "")
}
fun upgrade(upgradedType: TokenType): Token = Token(upgradedType, sourceIndex, text)
}

View File

@ -7,7 +7,7 @@ class TokenStreamSource(val stream: TokenStream) : TokenSource {
override fun next(): Token {
if (index == stream.tokens.size) {
return Token.endOfFile(stream.tokens.size)
return stream.tokens.last()
}
val char = stream.tokens[index]
index++
@ -16,7 +16,7 @@ class TokenStreamSource(val stream: TokenStream) : TokenSource {
override fun peek(): Token {
if (index == stream.tokens.size) {
return Token.endOfFile(stream.tokens.size)
return stream.tokens.last()
}
return stream.tokens[index]
}

View File

@ -82,6 +82,8 @@ enum class TokenType(vararg properties: TokenTypeProperty) {
val tokenUpgrader: TokenUpgrader? =
properties.filterIsInstance<TokenUpgrader>().singleOrNull()
val simpleWantString: String? = manyChars?.text ?: singleChar?.char?.toString()
companion object {
val AnyOf = entries.filter { item -> item.anyOf != null }
val ManyChars = entries.filter { item -> item.manyChars != null }

View File

@ -13,7 +13,7 @@ interface TokenTypeProperty {
var upgraded: Token? = null
for (item in TokenType.ManyChars) {
if (item.manyChars != null && token.text == item.manyChars.text) {
upgraded = Token(item, token.start, token.text)
upgraded = token.upgrade(item)
break
}
}
@ -21,7 +21,7 @@ interface TokenTypeProperty {
if (upgraded == null) {
for (item in TokenType.AnyOf) {
if (item.anyOf != null && item.anyOf.strings.contains(token.text)) {
upgraded = Token(item, token.start, token.text)
upgraded = token.upgrade(item)
break
}
}

View File

@ -1,16 +1,18 @@
package gay.pizza.pork.parser
class Tokenizer(val source: CharSource) {
private var tokenStart: Int = 0
private var startIndex: SourceIndex = SourceIndex.zero()
private var currentLineIndex = 1
private var currentLineColumn = 0
private fun readBlockComment(firstChar: Char): Token {
val comment = buildString {
append(firstChar)
var endOfComment = false
while (true) {
val char = source.next()
val char = nextChar()
if (char == CharSource.NullChar) {
throw ParseError("Unterminated block comment")
throw UnterminatedTokenError("block comment", currentSourceIndex())
}
append(char)
@ -27,7 +29,7 @@ class Tokenizer(val source: CharSource) {
}
}
}
return Token(TokenType.BlockComment, tokenStart, comment)
return produceToken(TokenType.BlockComment, comment)
}
private fun readLineComment(firstChar: Char): Token {
@ -38,10 +40,10 @@ class Tokenizer(val source: CharSource) {
if (char == CharSource.NullChar || char == '\n') {
break
}
append(source.next())
append(nextChar())
}
}
return Token(TokenType.LineComment, tokenStart, comment)
return produceToken(TokenType.LineComment, comment)
}
private fun readStringLiteral(firstChar: Char): Token {
@ -50,21 +52,21 @@ class Tokenizer(val source: CharSource) {
while (true) {
val char = source.peek()
if (char == CharSource.NullChar) {
throw ParseError("Unterminated string.")
throw UnterminatedTokenError("string", currentSourceIndex())
}
append(source.next())
append(nextChar())
if (char == '"') {
break
}
}
}
return Token(TokenType.StringLiteral, tokenStart, string)
return produceToken(TokenType.StringLiteral, string)
}
fun next(): Token {
while (source.peek() != CharSource.NullChar) {
tokenStart = source.currentIndex
val char = source.next()
startIndex = currentSourceIndex()
val char = nextChar()
if (char == '/' && source.peek() == '*') {
return readBlockComment(char)
@ -89,13 +91,13 @@ class Tokenizer(val source: CharSource) {
if (source.peek() != promotion.nextChar) {
continue
}
val nextChar = source.next()
val nextChar = nextChar()
type = promotion.type
text += nextChar
promoted = true
}
}
return Token(type, tokenStart, text)
return produceToken(type, text)
}
var index = 0
@ -121,10 +123,10 @@ class Tokenizer(val source: CharSource) {
else
item.charIndexConsumer!!.isValid(source.peek(), ++index)
) {
append(source.next())
append(nextChar())
}
}
var token = Token(item, tokenStart, text)
var token = produceToken(item, text)
val tokenUpgrader = item.tokenUpgrader
if (tokenUpgrader != null) {
token = tokenUpgrader.maybeUpgrade(token) ?: token
@ -136,9 +138,9 @@ class Tokenizer(val source: CharSource) {
return readStringLiteral(char)
}
throw ParseError("Failed to parse: (${char}) next ${source.peek()}")
throw BadCharacterError(char, startIndex)
}
return Token.endOfFile(source.currentIndex)
return Token.endOfFile(startIndex.copy(index = source.currentIndex))
}
fun tokenize(): TokenStream {
@ -152,4 +154,19 @@ class Tokenizer(val source: CharSource) {
}
return TokenStream(tokens)
}
private fun produceToken(type: TokenType, text: String) =
Token(type, startIndex, text)
private fun nextChar(): Char {
val char = source.next()
if (char == '\n') {
currentLineIndex++
currentLineColumn = 0
}
currentLineColumn++
return char
}
private fun currentSourceIndex(): SourceIndex = SourceIndex(source.currentIndex, currentLineIndex, currentLineColumn)
}

View File

@ -0,0 +1,5 @@
package gay.pizza.pork.parser
class UnterminatedTokenError(what: String, sourceIndex: SourceIndex) : ParseError(
"Unterminated $what at $sourceIndex"
)

View File

@ -57,8 +57,8 @@ class PorkLexer : LexerBase() {
try {
val currentToken = tokenizer.next()
currentTokenType = PorkElementTypes.elementTypeFor(currentToken.type)
internalTokenStart = currentToken.start
internalTokenEnd = currentToken.start + currentToken.text.length
internalTokenStart = currentToken.sourceIndex.index
internalTokenEnd = currentToken.sourceIndex.index + currentToken.text.length
} catch (e: ProcessCanceledException) {
throw e
} catch (e: Throwable) {

View File

@ -29,12 +29,8 @@ class PsiBuilderMarkAttribution(val builder: PsiBuilder) : ParserNodeAttribution
}
throw PorkParser.ExitParser()
} catch (e: PorkParser.ExitParser) {
if (e.error != null) {
marker.error(e.error)
} else {
marker.done(PorkElementTypes.FailedToParse)
}
throw PorkParser.ExitParser()
marker.done(PorkElementTypes.FailedToParse)
throw e
}
if (map[result] != null) {
marker.drop()

View File

@ -1,6 +1,7 @@
package gay.pizza.pork.idea
import com.intellij.lang.PsiBuilder
import gay.pizza.pork.parser.SourceIndex
import gay.pizza.pork.parser.Token
import gay.pizza.pork.parser.TokenSource
import com.intellij.psi.TokenType as PsiTokenType
@ -17,15 +18,15 @@ class PsiBuilderTokenSource(val builder: PsiBuilder) : TokenSource {
override fun peek(): Token {
if (builder.eof()) {
return Token.endOfFile(builder.currentOffset)
return Token.endOfFile(SourceIndex.indexOnly(builder.currentOffset))
}
val elementType = builder.tokenType!!
if (elementType == PsiTokenType.BAD_CHARACTER) {
throw BadCharacterError("Invalid character.")
throw BadCharacterError("Invalid character")
}
val tokenType = PorkElementTypes.tokenTypeFor(elementType) ?:
throw RuntimeException("Lexing failure: ${elementType.debugName}")
return Token(tokenType, builder.currentOffset, builder.tokenText!!)
return Token(tokenType, SourceIndex.indexOnly(builder.currentOffset), builder.tokenText!!)
}
class BadCharacterError(error: String) : RuntimeException(error)

View File

@ -13,6 +13,7 @@ dependencies {
application {
applicationName = "pork"
mainClass.set("gay.pizza.pork.tool.MainKt")
applicationDefaultJvmArgs += "-XstartOnFirstThread"
}
for (task in arrayOf(tasks.shadowDistTar, tasks.shadowDistZip, tasks.shadowJar)) {

View File

@ -12,7 +12,7 @@ class TokenizeCommand : CliktCommand(help = "Tokenize Compilation Unit", name =
val tool = FileTool(PlatformFsProvider.resolve(path))
val tokenStream = tool.tokenize()
for (token in tokenStream.tokens) {
println("${token.start} ${token.type.name} '${sanitize(token.text)}'")
println("${token.sourceIndex.index} ${token.type.name} '${sanitize(token.text)}'")
}
}