mirror of
https://github.com/GayPizzaSpecifications/pork.git
synced 2025-08-03 21:21:33 +00:00
pork: it's got it all, ffi, state machine tokenizer, and better IDE support
This commit is contained in:
@ -1,5 +1,5 @@
|
||||
package gay.pizza.pork.parser
|
||||
|
||||
class BadCharacterError(val char: Char, sourceIndex: SourceIndex) : ParseError(
|
||||
"Failed to produce token for '${char}' at $sourceIndex"
|
||||
class BadCharacterError(val char: Char, sourceIndex: SourceIndex, state: TokenizerState) : ParseError(
|
||||
"Failed to produce token for '${char}' at $sourceIndex in state $state"
|
||||
)
|
||||
|
@ -2,5 +2,5 @@ package gay.pizza.pork.parser
|
||||
|
||||
open class ParseError(val error: String) : RuntimeException() {
|
||||
override val message: String
|
||||
get() = "${error}\nDescent path: ${ParserStackAnalysis(this).findDescentPath().joinToString(", ")}"
|
||||
get() = "${error}${ParserStackAnalysis(this).buildDescentPathAddendum()}"
|
||||
}
|
||||
|
@ -21,7 +21,7 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
|
||||
val token = peek()
|
||||
var expression = when (token.type) {
|
||||
TokenType.NumberLiteral -> parseNumberLiteral()
|
||||
TokenType.StringLiteral -> parseStringLiteral()
|
||||
TokenType.Quote -> parseStringLiteral()
|
||||
TokenType.True, TokenType.False -> parseBooleanLiteral()
|
||||
TokenType.LeftBracket -> parseListLiteral()
|
||||
TokenType.Let -> parseLetAssignment()
|
||||
@ -233,10 +233,14 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
|
||||
|
||||
override fun parseImportDeclaration(): ImportDeclaration = expect(NodeType.ImportDeclaration, TokenType.Import) {
|
||||
val form = parseSymbol()
|
||||
ImportDeclaration(form, parseImportPath())
|
||||
}
|
||||
|
||||
override fun parseImportPath(): ImportPath = guarded(NodeType.ImportPath) {
|
||||
val components = oneAndContinuedBy(TokenType.Dot) {
|
||||
parseSymbol()
|
||||
}
|
||||
ImportDeclaration(form, components)
|
||||
ImportPath(components)
|
||||
}
|
||||
|
||||
override fun parseIndexedBy(): IndexedBy = guarded(NodeType.IndexedBy) {
|
||||
@ -302,7 +306,7 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
|
||||
override fun parseNative(): Native = expect(NodeType.Native, TokenType.Native) {
|
||||
val form = parseSymbol()
|
||||
val definitions = mutableListOf<StringLiteral>()
|
||||
while (peek(TokenType.StringLiteral)) {
|
||||
while (peek(TokenType.Quote)) {
|
||||
definitions.add(parseStringLiteral())
|
||||
}
|
||||
Native(form, definitions)
|
||||
@ -333,8 +337,11 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
|
||||
SetAssignment(symbol, value)
|
||||
}
|
||||
|
||||
override fun parseStringLiteral(): StringLiteral = expect(NodeType.StringLiteral, TokenType.StringLiteral) {
|
||||
val content = StringEscape.unescape(StringEscape.unquote(it.text))
|
||||
override fun parseStringLiteral(): StringLiteral = guarded(NodeType.StringLiteral) {
|
||||
expect(TokenType.Quote)
|
||||
val stringLiteralToken = expect(TokenType.StringLiteral)
|
||||
expect(TokenType.Quote)
|
||||
val content = StringEscape.unescape(stringLiteralToken.text)
|
||||
StringLiteral(content)
|
||||
}
|
||||
|
||||
|
@ -24,4 +24,13 @@ class ParserStackAnalysis(private val stack: Array<StackTraceElement>) {
|
||||
}
|
||||
return parseDescentPaths.reversed()
|
||||
}
|
||||
|
||||
fun buildDescentPathAddendum(): String {
|
||||
val descentPath = findDescentPath()
|
||||
if (descentPath.isEmpty()) {
|
||||
return ""
|
||||
}
|
||||
|
||||
return "\nParser descent path: ${descentPath.joinToString(", ")}"
|
||||
}
|
||||
}
|
||||
|
@ -238,6 +238,10 @@ class Printer(buffer: StringBuilder) : NodeVisitor<Unit> {
|
||||
append("import ")
|
||||
visit(node.form)
|
||||
append(" ")
|
||||
visit(node.path)
|
||||
}
|
||||
|
||||
override fun visitImportPath(node: ImportPath) {
|
||||
for ((index, component) in node.components.withIndex()) {
|
||||
visit(component)
|
||||
if (index != node.components.size - 1) {
|
||||
|
@ -2,12 +2,7 @@ package gay.pizza.pork.parser
|
||||
|
||||
object StringCharConsumer : CharConsumer {
|
||||
override fun consume(type: TokenType, tokenizer: Tokenizer): String? {
|
||||
if (!tokenizer.peek("\"")) {
|
||||
return null
|
||||
}
|
||||
|
||||
val buffer = StringBuilder()
|
||||
buffer.append(tokenizer.source.next())
|
||||
var escape = false
|
||||
while (true) {
|
||||
val char = tokenizer.source.peek()
|
||||
@ -16,12 +11,14 @@ object StringCharConsumer : CharConsumer {
|
||||
throw UnterminatedTokenError("String", tokenizer.source.currentSourceIndex())
|
||||
}
|
||||
|
||||
if (char == '"' && !escape) {
|
||||
break
|
||||
}
|
||||
|
||||
buffer.append(tokenizer.source.next())
|
||||
|
||||
if (char == '\\') {
|
||||
escape = true
|
||||
} else if (char == '"' && !escape) {
|
||||
break
|
||||
}
|
||||
}
|
||||
return buffer.toString()
|
||||
|
@ -3,5 +3,4 @@ package gay.pizza.pork.parser
|
||||
object StringEscape {
|
||||
fun escape(input: String): String = input.replace("\n", "\\n")
|
||||
fun unescape(input: String): String = input.replace("\\n", "\n")
|
||||
fun unquote(input: String): String = input.substring(1, input.length - 1)
|
||||
}
|
||||
|
@ -6,7 +6,7 @@ import gay.pizza.pork.parser.TokenTypeProperty.*
|
||||
import gay.pizza.pork.parser.TokenFamily.*
|
||||
import gay.pizza.pork.parser.TokenTypeProperty.AnyOf
|
||||
|
||||
enum class TokenType(vararg properties: TokenTypeProperty) {
|
||||
enum class TokenType(vararg val properties: TokenTypeProperty) {
|
||||
NumberLiteral(NumericLiteralFamily, CharMatch(CharMatcher.AnyOf(
|
||||
MatchRange('0'..'9'),
|
||||
NotAtIndex(0, MatchSingle('.'))
|
||||
@ -17,7 +17,8 @@ enum class TokenType(vararg properties: TokenTypeProperty) {
|
||||
MatchRange('0' .. '9'),
|
||||
MatchSingle('_')
|
||||
)), KeywordUpgrader),
|
||||
StringLiteral(StringLiteralFamily, CharConsume(StringCharConsumer)),
|
||||
Quote(StringLiteralFamily, SingleChar('"'), InsideStates(TokenizerState.Normal, TokenizerState.StringLiteralEnd)),
|
||||
StringLiteral(StringLiteralFamily, CharConsume(StringCharConsumer), InsideStates(TokenizerState.StringLiteralStart)),
|
||||
Equality(OperatorFamily),
|
||||
Inequality(ManyChars("!="), OperatorFamily),
|
||||
ExclamationPoint(SingleChar('!'), Promotion('=', Inequality)),
|
||||
@ -91,6 +92,11 @@ enum class TokenType(vararg properties: TokenTypeProperty) {
|
||||
val charConsume: CharConsume? = properties.filterIsInstance<CharConsume>().singleOrNull()
|
||||
val tokenUpgrader: TokenUpgrader? =
|
||||
properties.filterIsInstance<TokenUpgrader>().singleOrNull()
|
||||
val validStates: List<TokenizerState> by lazy {
|
||||
properties
|
||||
.filterIsInstance<InsideStates>()
|
||||
.singleOrNull()?.states?.toList() ?: listOf(TokenizerState.Normal)
|
||||
}
|
||||
|
||||
val simpleWantString: String? = manyChars?.text ?: singleChar?.char?.toString()
|
||||
|
||||
|
@ -5,6 +5,7 @@ interface TokenTypeProperty {
|
||||
class Promotion(val nextChar: Char, val type: TokenType) : TokenTypeProperty
|
||||
class ManyChars(val text: String) : TokenTypeProperty
|
||||
class AnyOf(vararg val strings: String): TokenTypeProperty
|
||||
class InsideStates(vararg val states: TokenizerState) : TokenTypeProperty
|
||||
open class CharMatch(val matcher: CharMatcher) : TokenTypeProperty
|
||||
open class CharConsume(val consumer: CharConsumer) : TokenTypeProperty
|
||||
open class TokenUpgrader(val maybeUpgrade: (Token) -> Token?) : TokenTypeProperty
|
||||
|
@ -2,73 +2,100 @@ package gay.pizza.pork.parser
|
||||
|
||||
class Tokenizer(source: CharSource) {
|
||||
val source: SourceIndexCharSource = SourceIndexCharSource(source)
|
||||
|
||||
private var startIndex: SourceIndex = SourceIndex.zero()
|
||||
private var state = TokenizerState.Normal
|
||||
|
||||
fun next(): Token {
|
||||
while (source.peek() != CharSource.EndOfFile) {
|
||||
startIndex = source.currentSourceIndex()
|
||||
|
||||
for (item in TokenType.CharConsumes) {
|
||||
val text = item.charConsume!!.consumer.consume(item, this)
|
||||
if (text != null) {
|
||||
return produceToken(item, text)
|
||||
}
|
||||
}
|
||||
|
||||
val char = source.next()
|
||||
|
||||
for (item in TokenType.SingleChars) {
|
||||
val itemChar = item.singleChar!!.char
|
||||
if (itemChar != char) {
|
||||
continue
|
||||
}
|
||||
|
||||
var type = item
|
||||
var text = itemChar.toString()
|
||||
var promoted = true
|
||||
while (promoted) {
|
||||
promoted = false
|
||||
for (promotion in type.promotions) {
|
||||
if (source.peek() != promotion.nextChar) {
|
||||
continue
|
||||
}
|
||||
val nextChar = source.next()
|
||||
type = promotion.type
|
||||
text += nextChar
|
||||
promoted = true
|
||||
}
|
||||
}
|
||||
return produceToken(type, text)
|
||||
}
|
||||
|
||||
var index = 0
|
||||
for (item in TokenType.CharMatches) {
|
||||
if (!item.charMatch!!.matcher.valid(char, index)) {
|
||||
continue
|
||||
}
|
||||
|
||||
val text = buildString {
|
||||
append(char)
|
||||
|
||||
while (item.charMatch.matcher.valid(source.peek(), ++index)) {
|
||||
append(source.next())
|
||||
}
|
||||
}
|
||||
var token = produceToken(item, text)
|
||||
val tokenUpgrader = item.tokenUpgrader
|
||||
if (tokenUpgrader != null) {
|
||||
token = tokenUpgrader.maybeUpgrade(token) ?: token
|
||||
}
|
||||
return token
|
||||
}
|
||||
|
||||
throw BadCharacterError(char, startIndex)
|
||||
private fun nextTokenOrNull(): Token? {
|
||||
if (source.peek() == CharSource.EndOfFile) {
|
||||
source.next()
|
||||
return Token.endOfFile(source.currentSourceIndex())
|
||||
}
|
||||
return Token.endOfFile(startIndex.copy(index = source.currentIndex))
|
||||
|
||||
startIndex = source.currentSourceIndex()
|
||||
|
||||
for (item in TokenType.CharConsumes) {
|
||||
if (!item.validStates.contains(state)) {
|
||||
continue
|
||||
}
|
||||
val text = item.charConsume!!.consumer.consume(item, this)
|
||||
if (text != null) {
|
||||
return produceToken(item, text)
|
||||
}
|
||||
}
|
||||
|
||||
val char = source.next()
|
||||
|
||||
for (item in TokenType.SingleChars) {
|
||||
if (!item.validStates.contains(state)) {
|
||||
continue
|
||||
}
|
||||
|
||||
val itemChar = item.singleChar!!.char
|
||||
if (itemChar != char) {
|
||||
continue
|
||||
}
|
||||
|
||||
var type = item
|
||||
var text = itemChar.toString()
|
||||
var promoted = true
|
||||
while (promoted) {
|
||||
promoted = false
|
||||
for (promotion in type.promotions) {
|
||||
if (source.peek() != promotion.nextChar) {
|
||||
continue
|
||||
}
|
||||
val nextChar = source.next()
|
||||
type = promotion.type
|
||||
text += nextChar
|
||||
promoted = true
|
||||
}
|
||||
}
|
||||
return produceToken(type, text)
|
||||
}
|
||||
|
||||
var index = 0
|
||||
for (item in TokenType.CharMatches) {
|
||||
if (!item.validStates.contains(state)) {
|
||||
continue
|
||||
}
|
||||
|
||||
if (!item.charMatch!!.matcher.valid(char, index)) {
|
||||
continue
|
||||
}
|
||||
|
||||
val text = buildString {
|
||||
append(char)
|
||||
|
||||
while (item.charMatch.matcher.valid(source.peek(), ++index)) {
|
||||
append(source.next())
|
||||
}
|
||||
}
|
||||
var token = produceToken(item, text)
|
||||
val tokenUpgrader = item.tokenUpgrader
|
||||
if (tokenUpgrader != null) {
|
||||
token = tokenUpgrader.maybeUpgrade(token) ?: token
|
||||
}
|
||||
return token
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
fun tokenize(): TokenStream {
|
||||
fun next(): Token {
|
||||
val what = source.peek()
|
||||
val token = nextTokenOrNull()
|
||||
if (token != null) {
|
||||
for (transition in state.transitions) {
|
||||
if (transition.produced == token.type) {
|
||||
state = transition.enter
|
||||
break
|
||||
}
|
||||
}
|
||||
return token
|
||||
}
|
||||
throw BadCharacterError(what, source.currentSourceIndex(), state)
|
||||
}
|
||||
|
||||
fun stream(): TokenStream {
|
||||
val tokens = mutableListOf<Token>()
|
||||
while (true) {
|
||||
val token = next()
|
||||
|
@ -0,0 +1,12 @@
|
||||
package gay.pizza.pork.parser
|
||||
|
||||
enum class TokenizerState(vararg val transitions: Transition) {
|
||||
Normal(Transition({ TokenType.Quote }) { StringLiteralStart }),
|
||||
StringLiteralStart(Transition({ TokenType.StringLiteral }) { StringLiteralEnd }),
|
||||
StringLiteralEnd(Transition({ TokenType.Quote }) { Normal });
|
||||
|
||||
data class Transition(private val producedToken: () -> TokenType, private val nextState: () -> TokenizerState) {
|
||||
val produced by lazy { producedToken() }
|
||||
val enter by lazy { nextState() }
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user