Meta tokenizer.

This commit is contained in:
Alex Zenla 2023-08-25 23:55:09 -07:00
parent 1c300b6109
commit 7f0b9ca8e3
Signed by: alex
GPG Key ID: C0780728420EBFE5
4 changed files with 39 additions and 63 deletions

View File

@ -5,7 +5,7 @@ class Scope(val parent: Scope? = null) {
fun define(name: String, value: Any) {
if (variables.containsKey(name)) {
throw RuntimeException("Variable '${name}' is already defined.")
throw RuntimeException("Variable '${name}' is already defined")
}
variables[name] = value
}
@ -24,7 +24,7 @@ class Scope(val parent: Scope? = null) {
fun call(name: String, arguments: Arguments): Any {
val value = value(name)
if (value !is CallableFunction) {
throw RuntimeException("$value is not callable.")
throw RuntimeException("$value is not callable")
}
return value.call(arguments)
}
@ -35,7 +35,7 @@ class Scope(val parent: Scope? = null) {
fun leave(): Scope {
if (parent == null) {
throw RuntimeException("Parent context not found.")
throw RuntimeException("Parent context not found")
}
return parent
}

View File

@ -4,8 +4,8 @@ import gay.pizza.pork.parse.TokenTypeProperty.*
import gay.pizza.pork.parse.TokenFamily.*
enum class TokenType(vararg properties: TokenTypeProperty) {
Symbol(SymbolFamily),
IntLiteral(NumericLiteralFamily),
Symbol(SymbolFamily, CharConsumer { (it in 'a'..'z') || (it in 'A'..'Z') || it == '_' }, KeywordUpgrader),
IntLiteral(NumericLiteralFamily, CharConsumer { it in '0'..'9' }),
StringLiteral(StringLiteralFamily),
Equality(OperatorFamily),
Inequality(OperatorFamily),
@ -28,7 +28,7 @@ enum class TokenType(vararg properties: TokenTypeProperty) {
If(Keyword("if"), KeywordFamily),
Then(Keyword("then"), KeywordFamily),
Else(Keyword("else"), KeywordFamily),
Whitespace,
Whitespace(CharConsumer { it == ' ' || it == '\r' || it == '\n' || it == '\t' }),
BlockComment(CommentFamily),
LineComment(CommentFamily),
EndOfFile;
@ -38,9 +38,12 @@ enum class TokenType(vararg properties: TokenTypeProperty) {
val singleChar: SingleChar? = properties.filterIsInstance<SingleChar>().singleOrNull()
val family: TokenFamily =
properties.filterIsInstance<TokenFamily>().singleOrNull() ?: OtherFamily
val charConsumer: CharConsumer? = properties.filterIsInstance<CharConsumer>().singleOrNull()
val tokenUpgrader: TokenUpgrader? = properties.filterIsInstance<TokenUpgrader>().singleOrNull()
companion object {
val Keywords = entries.filter { item -> item.keyword != null }
val SingleChars = entries.filter { item -> item.singleChar != null }
val CharConsumers = entries.filter { item -> item.charConsumer != null }
}
}

View File

@ -4,4 +4,17 @@ interface TokenTypeProperty {
class SingleChar(val char: Char) : TokenTypeProperty
class Promotion(val nextChar: Char, val type: TokenType) : TokenTypeProperty
class Keyword(val text: String) : TokenTypeProperty
class CharConsumer(val isValid: (Char) -> Boolean) : TokenTypeProperty
open class TokenUpgrader(val maybeUpgrade: (Token) -> Token?) : TokenTypeProperty
object KeywordUpgrader : TokenUpgrader({ token ->
var upgraded: Token? = null
for (item in TokenType.Keywords) {
if (item.keyword != null && token.text == item.keyword.text) {
upgraded = Token(item, token.start, token.text)
break
}
}
upgraded
})
}

View File

@ -3,54 +3,6 @@ package gay.pizza.pork.parse
class Tokenizer(val source: CharSource) {
private var tokenStart: Int = 0
private fun isSymbol(c: Char): Boolean =
(c in 'a'..'z') || (c in 'A'..'Z') || c == '_'
private fun isDigit(c: Char): Boolean =
c in '0'..'9'
private fun isWhitespace(c: Char): Boolean =
c == ' ' || c == '\r' || c == '\n' || c == '\t'
private fun readSymbolOrKeyword(firstChar: Char): Token {
val symbol = buildString {
append(firstChar)
while (isSymbol(source.peek())) {
append(source.next())
}
}
var type = TokenType.Symbol
for (keyword in TokenType.Keywords) {
if (symbol == keyword.keyword?.text) {
type = keyword
}
}
return Token(type, tokenStart, symbol)
}
private fun readIntLiteral(firstChar: Char): Token {
val number = buildString {
append(firstChar)
while (isDigit(source.peek())) {
append(source.next())
}
}
return Token(TokenType.IntLiteral, tokenStart, number)
}
private fun readWhitespace(firstChar: Char): Token {
val whitespace = buildString {
append(firstChar)
while (isWhitespace(source.peek())) {
val char = source.next()
append(char)
}
}
return Token(TokenType.Whitespace, tokenStart, whitespace)
}
private fun readBlockComment(firstChar: Char): Token {
val comment = buildString {
append(firstChar)
@ -138,16 +90,24 @@ class Tokenizer(val source: CharSource) {
return Token(type, tokenStart, text)
}
if (isWhitespace(char)) {
return readWhitespace(char)
}
for (item in TokenType.CharConsumers) {
val consumer = item.charConsumer ?: continue
if (!consumer.isValid(char)) {
continue
}
if (isDigit(char)) {
return readIntLiteral(char)
}
if (isSymbol(char)) {
return readSymbolOrKeyword(char)
val text = buildString {
append(char)
while (consumer.isValid(source.peek())) {
append(source.next())
}
}
var token = Token(item, tokenStart, text)
val tokenUpgrader = item.tokenUpgrader
if (tokenUpgrader != null) {
token = tokenUpgrader.maybeUpgrade(token) ?: token
}
return token
}
if (char == '"') {