Tokenizer now always produces whitespace, parser ignores whitespace, and token DSL.

This commit is contained in:
Alex Zenla 2023-08-21 00:14:58 -07:00
parent ccd10343c3
commit ce1e262c05
Signed by: alex
GPG Key ID: C0780728420EBFE5
10 changed files with 110 additions and 68 deletions

View File

@ -4,29 +4,32 @@ import gay.pizza.pork.ast.Program
import gay.pizza.pork.eval.Arguments
import gay.pizza.pork.eval.PorkEvaluator
import gay.pizza.pork.eval.Scope
import gay.pizza.pork.parse.PorkParser
import gay.pizza.pork.parse.PorkTokenizer
import gay.pizza.pork.parse.StringCharSource
import gay.pizza.pork.parse.TokenStreamSource
import gay.pizza.pork.parse.*
import kotlin.io.path.Path
import kotlin.io.path.readText
fun main(args: Array<String>) {
fun eval(ast: Program) {
val scope = Scope()
val evaluator = PorkEvaluator(scope)
evaluator.visit(ast)
println("> ${scope.call("main", Arguments.Zero)}")
}
fun eval(ast: Program) {
val scope = Scope()
val evaluator = PorkEvaluator(scope)
evaluator.visit(ast)
println("> ${scope.call("main", Arguments.Zero)}")
}
fun main(args: Array<String>) {
val code = Path(args[0]).readText()
val stream = PorkTokenizer(StringCharSource(code)).tokenize()
val stream = tokenize(code).excludeAllWhitespace()
println(stream.tokens.joinToString("\n"))
val parser = PorkParser(TokenStreamSource(stream))
val program = parser.readProgram()
val program = parse(stream)
eval(program)
val exactStream = PorkTokenizer(StringCharSource(code), preserveWhitespace = true).tokenize()
val exactStream = tokenize(code)
val exactCode = exactStream.tokens.joinToString("") { it.text }
println(exactCode)
println(code == exactCode)
}
fun tokenize(input: String): TokenStream =
PorkTokenizer(StringCharSource(input)).tokenize()
fun parse(stream: TokenStream): Program =
PorkParser(TokenStreamSource(stream)).readProgram()

View File

@ -2,7 +2,9 @@ package gay.pizza.pork.parse
import gay.pizza.pork.ast.*
class PorkParser(val source: PeekableSource<Token>) {
class PorkParser(source: PeekableSource<Token>) {
private val whitespaceIncludedSource = source
private fun readIntLiteral(): IntLiteral {
val token = expect(TokenType.IntLiteral)
return IntLiteral(token.text.toInt())
@ -61,7 +63,7 @@ class PorkParser(val source: PeekableSource<Token>) {
}
fun readExpression(): Expression {
val token = source.peek()
val token = peek()
val expression = when (token.type) {
TokenType.IntLiteral -> {
readIntLiteral()
@ -110,8 +112,13 @@ class PorkParser(val source: PeekableSource<Token>) {
}
}
if (peekType(TokenType.Plus, TokenType.Minus, TokenType.Multiply, TokenType.Divide, TokenType.Equality)) {
val infixToken = source.next()
if (peekType(
TokenType.Plus,
TokenType.Minus,
TokenType.Multiply,
TokenType.Divide,
TokenType.Equality)) {
val infixToken = next()
val infixOperator = convertInfixOperator(infixToken)
return InfixOperation(expression, infixOperator, readExpression())
}
@ -155,15 +162,36 @@ class PorkParser(val source: PeekableSource<Token>) {
}
private fun peekType(vararg types: TokenType): Boolean {
val token = source.peek()
val token = peek()
return types.contains(token.type)
}
private fun expect(type: TokenType): Token {
val token = source.next()
val token = next()
if (token.type != type) {
throw RuntimeException("Expected token type '${type}' but got type ${token.type} '${token.text}'")
}
return token
}
private fun next(): Token {
while (true) {
val token = whitespaceIncludedSource.next()
if (token.type == TokenType.Whitespace) {
continue
}
return token
}
}
private fun peek(): Token {
while (true) {
val token = whitespaceIncludedSource.peek()
if (token.type == TokenType.Whitespace) {
whitespaceIncludedSource.next()
continue
}
return token
}
}
}

View File

@ -1,6 +1,6 @@
package gay.pizza.pork.parse
class PorkTokenizer(val source: CharSource, val preserveWhitespace: Boolean = false) {
class PorkTokenizer(val source: CharSource) {
private var tokenStart: Int = 0
private fun isSymbol(c: Char): Boolean =
@ -22,12 +22,12 @@ class PorkTokenizer(val source: CharSource, val preserveWhitespace: Boolean = fa
var type = TokenType.Symbol
for (keyword in TokenType.Keywords) {
if (symbol == keyword.keyword) {
if (symbol == keyword.keyword?.text) {
type = keyword
}
}
return Token(type, symbol)
return Token(type, tokenStart, symbol)
}
private fun readIntLiteral(firstChar: Char): Token {
@ -37,7 +37,7 @@ class PorkTokenizer(val source: CharSource, val preserveWhitespace: Boolean = fa
append(source.next())
}
}
return Token(TokenType.IntLiteral, number)
return Token(TokenType.IntLiteral, tokenStart, number)
}
private fun readWhitespace(firstChar: Char): Token {
@ -48,7 +48,7 @@ class PorkTokenizer(val source: CharSource, val preserveWhitespace: Boolean = fa
append(char)
}
}
return Token(TokenType.Whitespace, whitespace)
return Token(TokenType.Whitespace, tokenStart, whitespace)
}
fun next(): Token {
@ -57,12 +57,13 @@ class PorkTokenizer(val source: CharSource, val preserveWhitespace: Boolean = fa
val char = source.next()
for (item in TokenType.SingleChars) {
if (item.char != char) {
val itemChar = item.singleChar!!.char
if (itemChar != char) {
continue
}
var type = item
var text = item.char.toString()
var text = itemChar.toString()
for (promotion in item.promotions) {
if (source.peek() != promotion.nextChar) {
continue
@ -71,15 +72,11 @@ class PorkTokenizer(val source: CharSource, val preserveWhitespace: Boolean = fa
type = promotion.type
text += nextChar
}
return Token(type, text)
return Token(type, tokenStart, text)
}
if (isWhitespace(char)) {
val whitespace = readWhitespace(char)
if (preserveWhitespace) {
return whitespace
}
continue
return readWhitespace(char)
}
if (isDigit(char)) {
@ -91,7 +88,7 @@ class PorkTokenizer(val source: CharSource, val preserveWhitespace: Boolean = fa
}
throw RuntimeException("Failed to parse: (${char}) next ${source.peek()}")
}
return TokenSource.EndOfFile
return Token.endOfFile(source.currentIndex)
}
fun tokenize(): TokenStream {

View File

@ -1,5 +1,10 @@
package gay.pizza.pork.parse
class Token(val type: TokenType, val text: String) {
class Token(val type: TokenType, val start: Int, val text: String) {
override fun toString(): String = "${type.name} $text"
companion object {
fun endOfFile(size: Int): Token =
Token(TokenType.EndOfFile, size, "")
}
}

View File

@ -1,3 +0,0 @@
package gay.pizza.pork.parse
class TokenPromotion(val nextChar: Char, val type: TokenType)

View File

@ -1,7 +1,3 @@
package gay.pizza.pork.parse
interface TokenSource : PeekableSource<Token> {
companion object {
val EndOfFile = Token(TokenType.EndOfFile, "")
}
}
interface TokenSource : PeekableSource<Token>

View File

@ -1,5 +1,8 @@
package gay.pizza.pork.parse
class TokenStream(val tokens: List<Token>) {
fun excludeAllWhitespace(): TokenStream =
TokenStream(tokens.filter { it.type != TokenType.Whitespace })
override fun toString(): String = tokens.toString()
}

View File

@ -6,7 +6,7 @@ class TokenStreamSource(val stream: TokenStream) : TokenSource {
override fun next(): Token {
if (index == stream.tokens.size) {
return TokenSource.EndOfFile
return Token.endOfFile(stream.tokens.size)
}
val char = stream.tokens[index]
index++
@ -15,7 +15,7 @@ class TokenStreamSource(val stream: TokenStream) : TokenSource {
override fun peek(): Token {
if (index == stream.tokens.size) {
return TokenSource.EndOfFile
return Token.endOfFile(stream.tokens.size)
}
return stream.tokens[index]
}

View File

@ -1,33 +1,39 @@
package gay.pizza.pork.parse
enum class TokenType(val char: Char? = null, val keyword: String? = null, val promotions: List<TokenPromotion> = emptyList()) {
import gay.pizza.pork.parse.TokenTypeProperty.*
enum class TokenType(vararg properties: TokenTypeProperty) {
Symbol,
IntLiteral,
Equality,
Equals(char = '=', promotions = listOf(TokenPromotion('=', Equality))),
Plus(char = '+'),
Minus(char = '-'),
Multiply(char = '*'),
Divide(char = '/'),
LeftCurly(char = '{'),
RightCurly(char = '}'),
LeftBracket(char = '['),
RightBracket(char = ']'),
LeftParentheses(char = '('),
RightParentheses(char = ')'),
Negation(char = '!'),
Comma(char = ','),
False(keyword = "false"),
True(keyword = "true"),
In(keyword = "in"),
If(keyword = "if"),
Then(keyword = "then"),
Else(keyword = "else"),
Equals(SingleChar('='), Promotion('=', Equality)),
Plus(SingleChar('+')),
Minus(SingleChar('-')),
Multiply(SingleChar('*')),
Divide(SingleChar('/')),
LeftCurly(SingleChar('{')),
RightCurly(SingleChar('}')),
LeftBracket(SingleChar('[')),
RightBracket(SingleChar(']')),
LeftParentheses(SingleChar('(')),
RightParentheses(SingleChar(')')),
Negation(SingleChar('!')),
Comma(SingleChar(',')),
False(Keyword("false")),
True(Keyword("true")),
In(Keyword("in")),
If(Keyword("if")),
Then(Keyword("then")),
Else(Keyword("else")),
Whitespace,
EndOfFile;
val promotions: List<Promotion> = properties.filterIsInstance<Promotion>()
val keyword: Keyword? = properties.filterIsInstance<Keyword>().singleOrNull()
val singleChar: SingleChar? = properties.filterIsInstance<SingleChar>().singleOrNull()
companion object {
val Keywords = entries.filter { it.keyword != null }
val SingleChars = entries.filter { it.char != null }
val Keywords = entries.filter { item -> item.keyword != null }
val SingleChars = entries.filter { item -> item.singleChar != null }
}
}

View File

@ -0,0 +1,7 @@
package gay.pizza.pork.parse
sealed class TokenTypeProperty {
class SingleChar(val char: Char) : TokenTypeProperty()
class Promotion(val nextChar: Char, val type: TokenType) : TokenTypeProperty()
class Keyword(val text: String) : TokenTypeProperty()
}