Tokenizer now always produces whitespace, parser ignores whitespace, and token DSL.

This commit is contained in:
2023-08-21 00:14:58 -07:00
parent ccd10343c3
commit ce1e262c05
10 changed files with 110 additions and 68 deletions

View File

@ -4,29 +4,32 @@ import gay.pizza.pork.ast.Program
import gay.pizza.pork.eval.Arguments import gay.pizza.pork.eval.Arguments
import gay.pizza.pork.eval.PorkEvaluator import gay.pizza.pork.eval.PorkEvaluator
import gay.pizza.pork.eval.Scope import gay.pizza.pork.eval.Scope
import gay.pizza.pork.parse.PorkParser import gay.pizza.pork.parse.*
import gay.pizza.pork.parse.PorkTokenizer
import gay.pizza.pork.parse.StringCharSource
import gay.pizza.pork.parse.TokenStreamSource
import kotlin.io.path.Path import kotlin.io.path.Path
import kotlin.io.path.readText import kotlin.io.path.readText
fun main(args: Array<String>) { fun eval(ast: Program) {
fun eval(ast: Program) { val scope = Scope()
val scope = Scope() val evaluator = PorkEvaluator(scope)
val evaluator = PorkEvaluator(scope) evaluator.visit(ast)
evaluator.visit(ast) println("> ${scope.call("main", Arguments.Zero)}")
println("> ${scope.call("main", Arguments.Zero)}") }
}
fun main(args: Array<String>) {
val code = Path(args[0]).readText() val code = Path(args[0]).readText()
val stream = PorkTokenizer(StringCharSource(code)).tokenize() val stream = tokenize(code).excludeAllWhitespace()
println(stream.tokens.joinToString("\n")) println(stream.tokens.joinToString("\n"))
val parser = PorkParser(TokenStreamSource(stream)) val program = parse(stream)
val program = parser.readProgram()
eval(program) eval(program)
val exactStream = PorkTokenizer(StringCharSource(code), preserveWhitespace = true).tokenize() val exactStream = tokenize(code)
val exactCode = exactStream.tokens.joinToString("") { it.text } val exactCode = exactStream.tokens.joinToString("") { it.text }
println(exactCode) println(exactCode)
println(code == exactCode)
} }
fun tokenize(input: String): TokenStream =
PorkTokenizer(StringCharSource(input)).tokenize()
fun parse(stream: TokenStream): Program =
PorkParser(TokenStreamSource(stream)).readProgram()

View File

@ -2,7 +2,9 @@ package gay.pizza.pork.parse
import gay.pizza.pork.ast.* import gay.pizza.pork.ast.*
class PorkParser(val source: PeekableSource<Token>) { class PorkParser(source: PeekableSource<Token>) {
private val whitespaceIncludedSource = source
private fun readIntLiteral(): IntLiteral { private fun readIntLiteral(): IntLiteral {
val token = expect(TokenType.IntLiteral) val token = expect(TokenType.IntLiteral)
return IntLiteral(token.text.toInt()) return IntLiteral(token.text.toInt())
@ -61,7 +63,7 @@ class PorkParser(val source: PeekableSource<Token>) {
} }
fun readExpression(): Expression { fun readExpression(): Expression {
val token = source.peek() val token = peek()
val expression = when (token.type) { val expression = when (token.type) {
TokenType.IntLiteral -> { TokenType.IntLiteral -> {
readIntLiteral() readIntLiteral()
@ -110,8 +112,13 @@ class PorkParser(val source: PeekableSource<Token>) {
} }
} }
if (peekType(TokenType.Plus, TokenType.Minus, TokenType.Multiply, TokenType.Divide, TokenType.Equality)) { if (peekType(
val infixToken = source.next() TokenType.Plus,
TokenType.Minus,
TokenType.Multiply,
TokenType.Divide,
TokenType.Equality)) {
val infixToken = next()
val infixOperator = convertInfixOperator(infixToken) val infixOperator = convertInfixOperator(infixToken)
return InfixOperation(expression, infixOperator, readExpression()) return InfixOperation(expression, infixOperator, readExpression())
} }
@ -155,15 +162,36 @@ class PorkParser(val source: PeekableSource<Token>) {
} }
private fun peekType(vararg types: TokenType): Boolean { private fun peekType(vararg types: TokenType): Boolean {
val token = source.peek() val token = peek()
return types.contains(token.type) return types.contains(token.type)
} }
private fun expect(type: TokenType): Token { private fun expect(type: TokenType): Token {
val token = source.next() val token = next()
if (token.type != type) { if (token.type != type) {
throw RuntimeException("Expected token type '${type}' but got type ${token.type} '${token.text}'") throw RuntimeException("Expected token type '${type}' but got type ${token.type} '${token.text}'")
} }
return token return token
} }
private fun next(): Token {
while (true) {
val token = whitespaceIncludedSource.next()
if (token.type == TokenType.Whitespace) {
continue
}
return token
}
}
private fun peek(): Token {
while (true) {
val token = whitespaceIncludedSource.peek()
if (token.type == TokenType.Whitespace) {
whitespaceIncludedSource.next()
continue
}
return token
}
}
} }

View File

@ -1,6 +1,6 @@
package gay.pizza.pork.parse package gay.pizza.pork.parse
class PorkTokenizer(val source: CharSource, val preserveWhitespace: Boolean = false) { class PorkTokenizer(val source: CharSource) {
private var tokenStart: Int = 0 private var tokenStart: Int = 0
private fun isSymbol(c: Char): Boolean = private fun isSymbol(c: Char): Boolean =
@ -22,12 +22,12 @@ class PorkTokenizer(val source: CharSource, val preserveWhitespace: Boolean = fa
var type = TokenType.Symbol var type = TokenType.Symbol
for (keyword in TokenType.Keywords) { for (keyword in TokenType.Keywords) {
if (symbol == keyword.keyword) { if (symbol == keyword.keyword?.text) {
type = keyword type = keyword
} }
} }
return Token(type, symbol) return Token(type, tokenStart, symbol)
} }
private fun readIntLiteral(firstChar: Char): Token { private fun readIntLiteral(firstChar: Char): Token {
@ -37,7 +37,7 @@ class PorkTokenizer(val source: CharSource, val preserveWhitespace: Boolean = fa
append(source.next()) append(source.next())
} }
} }
return Token(TokenType.IntLiteral, number) return Token(TokenType.IntLiteral, tokenStart, number)
} }
private fun readWhitespace(firstChar: Char): Token { private fun readWhitespace(firstChar: Char): Token {
@ -48,7 +48,7 @@ class PorkTokenizer(val source: CharSource, val preserveWhitespace: Boolean = fa
append(char) append(char)
} }
} }
return Token(TokenType.Whitespace, whitespace) return Token(TokenType.Whitespace, tokenStart, whitespace)
} }
fun next(): Token { fun next(): Token {
@ -57,12 +57,13 @@ class PorkTokenizer(val source: CharSource, val preserveWhitespace: Boolean = fa
val char = source.next() val char = source.next()
for (item in TokenType.SingleChars) { for (item in TokenType.SingleChars) {
if (item.char != char) { val itemChar = item.singleChar!!.char
if (itemChar != char) {
continue continue
} }
var type = item var type = item
var text = item.char.toString() var text = itemChar.toString()
for (promotion in item.promotions) { for (promotion in item.promotions) {
if (source.peek() != promotion.nextChar) { if (source.peek() != promotion.nextChar) {
continue continue
@ -71,15 +72,11 @@ class PorkTokenizer(val source: CharSource, val preserveWhitespace: Boolean = fa
type = promotion.type type = promotion.type
text += nextChar text += nextChar
} }
return Token(type, text) return Token(type, tokenStart, text)
} }
if (isWhitespace(char)) { if (isWhitespace(char)) {
val whitespace = readWhitespace(char) return readWhitespace(char)
if (preserveWhitespace) {
return whitespace
}
continue
} }
if (isDigit(char)) { if (isDigit(char)) {
@ -91,7 +88,7 @@ class PorkTokenizer(val source: CharSource, val preserveWhitespace: Boolean = fa
} }
throw RuntimeException("Failed to parse: (${char}) next ${source.peek()}") throw RuntimeException("Failed to parse: (${char}) next ${source.peek()}")
} }
return TokenSource.EndOfFile return Token.endOfFile(source.currentIndex)
} }
fun tokenize(): TokenStream { fun tokenize(): TokenStream {

View File

@ -1,5 +1,10 @@
package gay.pizza.pork.parse package gay.pizza.pork.parse
class Token(val type: TokenType, val text: String) { class Token(val type: TokenType, val start: Int, val text: String) {
override fun toString(): String = "${type.name} $text" override fun toString(): String = "${type.name} $text"
companion object {
fun endOfFile(size: Int): Token =
Token(TokenType.EndOfFile, size, "")
}
} }

View File

@ -1,3 +0,0 @@
package gay.pizza.pork.parse
class TokenPromotion(val nextChar: Char, val type: TokenType)

View File

@ -1,7 +1,3 @@
package gay.pizza.pork.parse package gay.pizza.pork.parse
interface TokenSource : PeekableSource<Token> { interface TokenSource : PeekableSource<Token>
companion object {
val EndOfFile = Token(TokenType.EndOfFile, "")
}
}

View File

@ -1,5 +1,8 @@
package gay.pizza.pork.parse package gay.pizza.pork.parse
class TokenStream(val tokens: List<Token>) { class TokenStream(val tokens: List<Token>) {
fun excludeAllWhitespace(): TokenStream =
TokenStream(tokens.filter { it.type != TokenType.Whitespace })
override fun toString(): String = tokens.toString() override fun toString(): String = tokens.toString()
} }

View File

@ -6,7 +6,7 @@ class TokenStreamSource(val stream: TokenStream) : TokenSource {
override fun next(): Token { override fun next(): Token {
if (index == stream.tokens.size) { if (index == stream.tokens.size) {
return TokenSource.EndOfFile return Token.endOfFile(stream.tokens.size)
} }
val char = stream.tokens[index] val char = stream.tokens[index]
index++ index++
@ -15,7 +15,7 @@ class TokenStreamSource(val stream: TokenStream) : TokenSource {
override fun peek(): Token { override fun peek(): Token {
if (index == stream.tokens.size) { if (index == stream.tokens.size) {
return TokenSource.EndOfFile return Token.endOfFile(stream.tokens.size)
} }
return stream.tokens[index] return stream.tokens[index]
} }

View File

@ -1,33 +1,39 @@
package gay.pizza.pork.parse package gay.pizza.pork.parse
enum class TokenType(val char: Char? = null, val keyword: String? = null, val promotions: List<TokenPromotion> = emptyList()) { import gay.pizza.pork.parse.TokenTypeProperty.*
enum class TokenType(vararg properties: TokenTypeProperty) {
Symbol, Symbol,
IntLiteral, IntLiteral,
Equality, Equality,
Equals(char = '=', promotions = listOf(TokenPromotion('=', Equality))), Equals(SingleChar('='), Promotion('=', Equality)),
Plus(char = '+'), Plus(SingleChar('+')),
Minus(char = '-'), Minus(SingleChar('-')),
Multiply(char = '*'), Multiply(SingleChar('*')),
Divide(char = '/'), Divide(SingleChar('/')),
LeftCurly(char = '{'), LeftCurly(SingleChar('{')),
RightCurly(char = '}'), RightCurly(SingleChar('}')),
LeftBracket(char = '['), LeftBracket(SingleChar('[')),
RightBracket(char = ']'), RightBracket(SingleChar(']')),
LeftParentheses(char = '('), LeftParentheses(SingleChar('(')),
RightParentheses(char = ')'), RightParentheses(SingleChar(')')),
Negation(char = '!'), Negation(SingleChar('!')),
Comma(char = ','), Comma(SingleChar(',')),
False(keyword = "false"), False(Keyword("false")),
True(keyword = "true"), True(Keyword("true")),
In(keyword = "in"), In(Keyword("in")),
If(keyword = "if"), If(Keyword("if")),
Then(keyword = "then"), Then(Keyword("then")),
Else(keyword = "else"), Else(Keyword("else")),
Whitespace, Whitespace,
EndOfFile; EndOfFile;
val promotions: List<Promotion> = properties.filterIsInstance<Promotion>()
val keyword: Keyword? = properties.filterIsInstance<Keyword>().singleOrNull()
val singleChar: SingleChar? = properties.filterIsInstance<SingleChar>().singleOrNull()
companion object { companion object {
val Keywords = entries.filter { it.keyword != null } val Keywords = entries.filter { item -> item.keyword != null }
val SingleChars = entries.filter { it.char != null } val SingleChars = entries.filter { item -> item.singleChar != null }
} }
} }

View File

@ -0,0 +1,7 @@
package gay.pizza.pork.parse
sealed class TokenTypeProperty {
class SingleChar(val char: Char) : TokenTypeProperty()
class Promotion(val nextChar: Char, val type: TokenType) : TokenTypeProperty()
class Keyword(val text: String) : TokenTypeProperty()
}