pork: it's got it all, ffi, state machine tokenizer, and better IDE support

This commit is contained in:
2023-10-13 01:04:35 -07:00
parent d355fb3914
commit 5078f38f61
58 changed files with 939 additions and 293 deletions

View File

@ -1,5 +1,5 @@
package gay.pizza.pork.parser
class BadCharacterError(val char: Char, sourceIndex: SourceIndex) : ParseError(
"Failed to produce token for '${char}' at $sourceIndex"
class BadCharacterError(val char: Char, sourceIndex: SourceIndex, state: TokenizerState) : ParseError(
"Failed to produce token for '${char}' at $sourceIndex in state $state"
)

View File

@ -2,5 +2,5 @@ package gay.pizza.pork.parser
open class ParseError(val error: String) : RuntimeException() {
override val message: String
get() = "${error}\nDescent path: ${ParserStackAnalysis(this).findDescentPath().joinToString(", ")}"
get() = "${error}${ParserStackAnalysis(this).buildDescentPathAddendum()}"
}

View File

@ -21,7 +21,7 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
val token = peek()
var expression = when (token.type) {
TokenType.NumberLiteral -> parseNumberLiteral()
TokenType.StringLiteral -> parseStringLiteral()
TokenType.Quote -> parseStringLiteral()
TokenType.True, TokenType.False -> parseBooleanLiteral()
TokenType.LeftBracket -> parseListLiteral()
TokenType.Let -> parseLetAssignment()
@ -233,10 +233,14 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
override fun parseImportDeclaration(): ImportDeclaration = expect(NodeType.ImportDeclaration, TokenType.Import) {
val form = parseSymbol()
ImportDeclaration(form, parseImportPath())
}
override fun parseImportPath(): ImportPath = guarded(NodeType.ImportPath) {
val components = oneAndContinuedBy(TokenType.Dot) {
parseSymbol()
}
ImportDeclaration(form, components)
ImportPath(components)
}
override fun parseIndexedBy(): IndexedBy = guarded(NodeType.IndexedBy) {
@ -302,7 +306,7 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
override fun parseNative(): Native = expect(NodeType.Native, TokenType.Native) {
val form = parseSymbol()
val definitions = mutableListOf<StringLiteral>()
while (peek(TokenType.StringLiteral)) {
while (peek(TokenType.Quote)) {
definitions.add(parseStringLiteral())
}
Native(form, definitions)
@ -333,8 +337,11 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
SetAssignment(symbol, value)
}
override fun parseStringLiteral(): StringLiteral = expect(NodeType.StringLiteral, TokenType.StringLiteral) {
val content = StringEscape.unescape(StringEscape.unquote(it.text))
override fun parseStringLiteral(): StringLiteral = guarded(NodeType.StringLiteral) {
expect(TokenType.Quote)
val stringLiteralToken = expect(TokenType.StringLiteral)
expect(TokenType.Quote)
val content = StringEscape.unescape(stringLiteralToken.text)
StringLiteral(content)
}

View File

@ -24,4 +24,13 @@ class ParserStackAnalysis(private val stack: Array<StackTraceElement>) {
}
return parseDescentPaths.reversed()
}
fun buildDescentPathAddendum(): String {
val descentPath = findDescentPath()
if (descentPath.isEmpty()) {
return ""
}
return "\nParser descent path: ${descentPath.joinToString(", ")}"
}
}

View File

@ -238,6 +238,10 @@ class Printer(buffer: StringBuilder) : NodeVisitor<Unit> {
append("import ")
visit(node.form)
append(" ")
visit(node.path)
}
override fun visitImportPath(node: ImportPath) {
for ((index, component) in node.components.withIndex()) {
visit(component)
if (index != node.components.size - 1) {

View File

@ -2,12 +2,7 @@ package gay.pizza.pork.parser
object StringCharConsumer : CharConsumer {
override fun consume(type: TokenType, tokenizer: Tokenizer): String? {
if (!tokenizer.peek("\"")) {
return null
}
val buffer = StringBuilder()
buffer.append(tokenizer.source.next())
var escape = false
while (true) {
val char = tokenizer.source.peek()
@ -16,12 +11,14 @@ object StringCharConsumer : CharConsumer {
throw UnterminatedTokenError("String", tokenizer.source.currentSourceIndex())
}
if (char == '"' && !escape) {
break
}
buffer.append(tokenizer.source.next())
if (char == '\\') {
escape = true
} else if (char == '"' && !escape) {
break
}
}
return buffer.toString()

View File

@ -3,5 +3,4 @@ package gay.pizza.pork.parser
object StringEscape {
fun escape(input: String): String = input.replace("\n", "\\n")
fun unescape(input: String): String = input.replace("\\n", "\n")
fun unquote(input: String): String = input.substring(1, input.length - 1)
}

View File

@ -6,7 +6,7 @@ import gay.pizza.pork.parser.TokenTypeProperty.*
import gay.pizza.pork.parser.TokenFamily.*
import gay.pizza.pork.parser.TokenTypeProperty.AnyOf
enum class TokenType(vararg properties: TokenTypeProperty) {
enum class TokenType(vararg val properties: TokenTypeProperty) {
NumberLiteral(NumericLiteralFamily, CharMatch(CharMatcher.AnyOf(
MatchRange('0'..'9'),
NotAtIndex(0, MatchSingle('.'))
@ -17,7 +17,8 @@ enum class TokenType(vararg properties: TokenTypeProperty) {
MatchRange('0' .. '9'),
MatchSingle('_')
)), KeywordUpgrader),
StringLiteral(StringLiteralFamily, CharConsume(StringCharConsumer)),
Quote(StringLiteralFamily, SingleChar('"'), InsideStates(TokenizerState.Normal, TokenizerState.StringLiteralEnd)),
StringLiteral(StringLiteralFamily, CharConsume(StringCharConsumer), InsideStates(TokenizerState.StringLiteralStart)),
Equality(OperatorFamily),
Inequality(ManyChars("!="), OperatorFamily),
ExclamationPoint(SingleChar('!'), Promotion('=', Inequality)),
@ -91,6 +92,11 @@ enum class TokenType(vararg properties: TokenTypeProperty) {
val charConsume: CharConsume? = properties.filterIsInstance<CharConsume>().singleOrNull()
val tokenUpgrader: TokenUpgrader? =
properties.filterIsInstance<TokenUpgrader>().singleOrNull()
val validStates: List<TokenizerState> by lazy {
properties
.filterIsInstance<InsideStates>()
.singleOrNull()?.states?.toList() ?: listOf(TokenizerState.Normal)
}
val simpleWantString: String? = manyChars?.text ?: singleChar?.char?.toString()

View File

@ -5,6 +5,7 @@ interface TokenTypeProperty {
class Promotion(val nextChar: Char, val type: TokenType) : TokenTypeProperty
class ManyChars(val text: String) : TokenTypeProperty
class AnyOf(vararg val strings: String): TokenTypeProperty
class InsideStates(vararg val states: TokenizerState) : TokenTypeProperty
open class CharMatch(val matcher: CharMatcher) : TokenTypeProperty
open class CharConsume(val consumer: CharConsumer) : TokenTypeProperty
open class TokenUpgrader(val maybeUpgrade: (Token) -> Token?) : TokenTypeProperty

View File

@ -2,73 +2,100 @@ package gay.pizza.pork.parser
class Tokenizer(source: CharSource) {
val source: SourceIndexCharSource = SourceIndexCharSource(source)
private var startIndex: SourceIndex = SourceIndex.zero()
private var state = TokenizerState.Normal
fun next(): Token {
while (source.peek() != CharSource.EndOfFile) {
startIndex = source.currentSourceIndex()
for (item in TokenType.CharConsumes) {
val text = item.charConsume!!.consumer.consume(item, this)
if (text != null) {
return produceToken(item, text)
}
}
val char = source.next()
for (item in TokenType.SingleChars) {
val itemChar = item.singleChar!!.char
if (itemChar != char) {
continue
}
var type = item
var text = itemChar.toString()
var promoted = true
while (promoted) {
promoted = false
for (promotion in type.promotions) {
if (source.peek() != promotion.nextChar) {
continue
}
val nextChar = source.next()
type = promotion.type
text += nextChar
promoted = true
}
}
return produceToken(type, text)
}
var index = 0
for (item in TokenType.CharMatches) {
if (!item.charMatch!!.matcher.valid(char, index)) {
continue
}
val text = buildString {
append(char)
while (item.charMatch.matcher.valid(source.peek(), ++index)) {
append(source.next())
}
}
var token = produceToken(item, text)
val tokenUpgrader = item.tokenUpgrader
if (tokenUpgrader != null) {
token = tokenUpgrader.maybeUpgrade(token) ?: token
}
return token
}
throw BadCharacterError(char, startIndex)
private fun nextTokenOrNull(): Token? {
if (source.peek() == CharSource.EndOfFile) {
source.next()
return Token.endOfFile(source.currentSourceIndex())
}
return Token.endOfFile(startIndex.copy(index = source.currentIndex))
startIndex = source.currentSourceIndex()
for (item in TokenType.CharConsumes) {
if (!item.validStates.contains(state)) {
continue
}
val text = item.charConsume!!.consumer.consume(item, this)
if (text != null) {
return produceToken(item, text)
}
}
val char = source.next()
for (item in TokenType.SingleChars) {
if (!item.validStates.contains(state)) {
continue
}
val itemChar = item.singleChar!!.char
if (itemChar != char) {
continue
}
var type = item
var text = itemChar.toString()
var promoted = true
while (promoted) {
promoted = false
for (promotion in type.promotions) {
if (source.peek() != promotion.nextChar) {
continue
}
val nextChar = source.next()
type = promotion.type
text += nextChar
promoted = true
}
}
return produceToken(type, text)
}
var index = 0
for (item in TokenType.CharMatches) {
if (!item.validStates.contains(state)) {
continue
}
if (!item.charMatch!!.matcher.valid(char, index)) {
continue
}
val text = buildString {
append(char)
while (item.charMatch.matcher.valid(source.peek(), ++index)) {
append(source.next())
}
}
var token = produceToken(item, text)
val tokenUpgrader = item.tokenUpgrader
if (tokenUpgrader != null) {
token = tokenUpgrader.maybeUpgrade(token) ?: token
}
return token
}
return null
}
fun tokenize(): TokenStream {
fun next(): Token {
val what = source.peek()
val token = nextTokenOrNull()
if (token != null) {
for (transition in state.transitions) {
if (transition.produced == token.type) {
state = transition.enter
break
}
}
return token
}
throw BadCharacterError(what, source.currentSourceIndex(), state)
}
fun stream(): TokenStream {
val tokens = mutableListOf<Token>()
while (true) {
val token = next()

View File

@ -0,0 +1,12 @@
package gay.pizza.pork.parser
enum class TokenizerState(vararg val transitions: Transition) {
Normal(Transition({ TokenType.Quote }) { StringLiteralStart }),
StringLiteralStart(Transition({ TokenType.StringLiteral }) { StringLiteralEnd }),
StringLiteralEnd(Transition({ TokenType.Quote }) { Normal });
data class Transition(private val producedToken: () -> TokenType, private val nextState: () -> TokenizerState) {
val produced by lazy { producedToken() }
val enter by lazy { nextState() }
}
}