mirror of
https://github.com/GayPizzaSpecifications/pork.git
synced 2025-08-02 12:50:55 +00:00
Tokenizer now always produces whitespace, parser ignores whitespace, and token DSL.
This commit is contained in:
parent
ccd10343c3
commit
ce1e262c05
@ -4,29 +4,32 @@ import gay.pizza.pork.ast.Program
|
||||
import gay.pizza.pork.eval.Arguments
|
||||
import gay.pizza.pork.eval.PorkEvaluator
|
||||
import gay.pizza.pork.eval.Scope
|
||||
import gay.pizza.pork.parse.PorkParser
|
||||
import gay.pizza.pork.parse.PorkTokenizer
|
||||
import gay.pizza.pork.parse.StringCharSource
|
||||
import gay.pizza.pork.parse.TokenStreamSource
|
||||
import gay.pizza.pork.parse.*
|
||||
import kotlin.io.path.Path
|
||||
import kotlin.io.path.readText
|
||||
|
||||
fun main(args: Array<String>) {
|
||||
fun eval(ast: Program) {
|
||||
val scope = Scope()
|
||||
val evaluator = PorkEvaluator(scope)
|
||||
evaluator.visit(ast)
|
||||
println("> ${scope.call("main", Arguments.Zero)}")
|
||||
}
|
||||
fun eval(ast: Program) {
|
||||
val scope = Scope()
|
||||
val evaluator = PorkEvaluator(scope)
|
||||
evaluator.visit(ast)
|
||||
println("> ${scope.call("main", Arguments.Zero)}")
|
||||
}
|
||||
|
||||
fun main(args: Array<String>) {
|
||||
val code = Path(args[0]).readText()
|
||||
val stream = PorkTokenizer(StringCharSource(code)).tokenize()
|
||||
val stream = tokenize(code).excludeAllWhitespace()
|
||||
println(stream.tokens.joinToString("\n"))
|
||||
val parser = PorkParser(TokenStreamSource(stream))
|
||||
val program = parser.readProgram()
|
||||
val program = parse(stream)
|
||||
eval(program)
|
||||
|
||||
val exactStream = PorkTokenizer(StringCharSource(code), preserveWhitespace = true).tokenize()
|
||||
val exactStream = tokenize(code)
|
||||
val exactCode = exactStream.tokens.joinToString("") { it.text }
|
||||
println(exactCode)
|
||||
println(code == exactCode)
|
||||
}
|
||||
|
||||
fun tokenize(input: String): TokenStream =
|
||||
PorkTokenizer(StringCharSource(input)).tokenize()
|
||||
|
||||
fun parse(stream: TokenStream): Program =
|
||||
PorkParser(TokenStreamSource(stream)).readProgram()
|
||||
|
@ -2,7 +2,9 @@ package gay.pizza.pork.parse
|
||||
|
||||
import gay.pizza.pork.ast.*
|
||||
|
||||
class PorkParser(val source: PeekableSource<Token>) {
|
||||
class PorkParser(source: PeekableSource<Token>) {
|
||||
private val whitespaceIncludedSource = source
|
||||
|
||||
private fun readIntLiteral(): IntLiteral {
|
||||
val token = expect(TokenType.IntLiteral)
|
||||
return IntLiteral(token.text.toInt())
|
||||
@ -61,7 +63,7 @@ class PorkParser(val source: PeekableSource<Token>) {
|
||||
}
|
||||
|
||||
fun readExpression(): Expression {
|
||||
val token = source.peek()
|
||||
val token = peek()
|
||||
val expression = when (token.type) {
|
||||
TokenType.IntLiteral -> {
|
||||
readIntLiteral()
|
||||
@ -110,8 +112,13 @@ class PorkParser(val source: PeekableSource<Token>) {
|
||||
}
|
||||
}
|
||||
|
||||
if (peekType(TokenType.Plus, TokenType.Minus, TokenType.Multiply, TokenType.Divide, TokenType.Equality)) {
|
||||
val infixToken = source.next()
|
||||
if (peekType(
|
||||
TokenType.Plus,
|
||||
TokenType.Minus,
|
||||
TokenType.Multiply,
|
||||
TokenType.Divide,
|
||||
TokenType.Equality)) {
|
||||
val infixToken = next()
|
||||
val infixOperator = convertInfixOperator(infixToken)
|
||||
return InfixOperation(expression, infixOperator, readExpression())
|
||||
}
|
||||
@ -155,15 +162,36 @@ class PorkParser(val source: PeekableSource<Token>) {
|
||||
}
|
||||
|
||||
private fun peekType(vararg types: TokenType): Boolean {
|
||||
val token = source.peek()
|
||||
val token = peek()
|
||||
return types.contains(token.type)
|
||||
}
|
||||
|
||||
private fun expect(type: TokenType): Token {
|
||||
val token = source.next()
|
||||
val token = next()
|
||||
if (token.type != type) {
|
||||
throw RuntimeException("Expected token type '${type}' but got type ${token.type} '${token.text}'")
|
||||
}
|
||||
return token
|
||||
}
|
||||
|
||||
private fun next(): Token {
|
||||
while (true) {
|
||||
val token = whitespaceIncludedSource.next()
|
||||
if (token.type == TokenType.Whitespace) {
|
||||
continue
|
||||
}
|
||||
return token
|
||||
}
|
||||
}
|
||||
|
||||
private fun peek(): Token {
|
||||
while (true) {
|
||||
val token = whitespaceIncludedSource.peek()
|
||||
if (token.type == TokenType.Whitespace) {
|
||||
whitespaceIncludedSource.next()
|
||||
continue
|
||||
}
|
||||
return token
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
package gay.pizza.pork.parse
|
||||
|
||||
class PorkTokenizer(val source: CharSource, val preserveWhitespace: Boolean = false) {
|
||||
class PorkTokenizer(val source: CharSource) {
|
||||
private var tokenStart: Int = 0
|
||||
|
||||
private fun isSymbol(c: Char): Boolean =
|
||||
@ -22,12 +22,12 @@ class PorkTokenizer(val source: CharSource, val preserveWhitespace: Boolean = fa
|
||||
|
||||
var type = TokenType.Symbol
|
||||
for (keyword in TokenType.Keywords) {
|
||||
if (symbol == keyword.keyword) {
|
||||
if (symbol == keyword.keyword?.text) {
|
||||
type = keyword
|
||||
}
|
||||
}
|
||||
|
||||
return Token(type, symbol)
|
||||
return Token(type, tokenStart, symbol)
|
||||
}
|
||||
|
||||
private fun readIntLiteral(firstChar: Char): Token {
|
||||
@ -37,7 +37,7 @@ class PorkTokenizer(val source: CharSource, val preserveWhitespace: Boolean = fa
|
||||
append(source.next())
|
||||
}
|
||||
}
|
||||
return Token(TokenType.IntLiteral, number)
|
||||
return Token(TokenType.IntLiteral, tokenStart, number)
|
||||
}
|
||||
|
||||
private fun readWhitespace(firstChar: Char): Token {
|
||||
@ -48,7 +48,7 @@ class PorkTokenizer(val source: CharSource, val preserveWhitespace: Boolean = fa
|
||||
append(char)
|
||||
}
|
||||
}
|
||||
return Token(TokenType.Whitespace, whitespace)
|
||||
return Token(TokenType.Whitespace, tokenStart, whitespace)
|
||||
}
|
||||
|
||||
fun next(): Token {
|
||||
@ -57,12 +57,13 @@ class PorkTokenizer(val source: CharSource, val preserveWhitespace: Boolean = fa
|
||||
val char = source.next()
|
||||
|
||||
for (item in TokenType.SingleChars) {
|
||||
if (item.char != char) {
|
||||
val itemChar = item.singleChar!!.char
|
||||
if (itemChar != char) {
|
||||
continue
|
||||
}
|
||||
|
||||
var type = item
|
||||
var text = item.char.toString()
|
||||
var text = itemChar.toString()
|
||||
for (promotion in item.promotions) {
|
||||
if (source.peek() != promotion.nextChar) {
|
||||
continue
|
||||
@ -71,15 +72,11 @@ class PorkTokenizer(val source: CharSource, val preserveWhitespace: Boolean = fa
|
||||
type = promotion.type
|
||||
text += nextChar
|
||||
}
|
||||
return Token(type, text)
|
||||
return Token(type, tokenStart, text)
|
||||
}
|
||||
|
||||
if (isWhitespace(char)) {
|
||||
val whitespace = readWhitespace(char)
|
||||
if (preserveWhitespace) {
|
||||
return whitespace
|
||||
}
|
||||
continue
|
||||
return readWhitespace(char)
|
||||
}
|
||||
|
||||
if (isDigit(char)) {
|
||||
@ -91,7 +88,7 @@ class PorkTokenizer(val source: CharSource, val preserveWhitespace: Boolean = fa
|
||||
}
|
||||
throw RuntimeException("Failed to parse: (${char}) next ${source.peek()}")
|
||||
}
|
||||
return TokenSource.EndOfFile
|
||||
return Token.endOfFile(source.currentIndex)
|
||||
}
|
||||
|
||||
fun tokenize(): TokenStream {
|
||||
|
@ -1,5 +1,10 @@
|
||||
package gay.pizza.pork.parse
|
||||
|
||||
class Token(val type: TokenType, val text: String) {
|
||||
class Token(val type: TokenType, val start: Int, val text: String) {
|
||||
override fun toString(): String = "${type.name} $text"
|
||||
|
||||
companion object {
|
||||
fun endOfFile(size: Int): Token =
|
||||
Token(TokenType.EndOfFile, size, "")
|
||||
}
|
||||
}
|
||||
|
@ -1,3 +0,0 @@
|
||||
package gay.pizza.pork.parse
|
||||
|
||||
class TokenPromotion(val nextChar: Char, val type: TokenType)
|
@ -1,7 +1,3 @@
|
||||
package gay.pizza.pork.parse
|
||||
|
||||
interface TokenSource : PeekableSource<Token> {
|
||||
companion object {
|
||||
val EndOfFile = Token(TokenType.EndOfFile, "")
|
||||
}
|
||||
}
|
||||
interface TokenSource : PeekableSource<Token>
|
||||
|
@ -1,5 +1,8 @@
|
||||
package gay.pizza.pork.parse
|
||||
|
||||
class TokenStream(val tokens: List<Token>) {
|
||||
fun excludeAllWhitespace(): TokenStream =
|
||||
TokenStream(tokens.filter { it.type != TokenType.Whitespace })
|
||||
|
||||
override fun toString(): String = tokens.toString()
|
||||
}
|
||||
|
@ -6,7 +6,7 @@ class TokenStreamSource(val stream: TokenStream) : TokenSource {
|
||||
|
||||
override fun next(): Token {
|
||||
if (index == stream.tokens.size) {
|
||||
return TokenSource.EndOfFile
|
||||
return Token.endOfFile(stream.tokens.size)
|
||||
}
|
||||
val char = stream.tokens[index]
|
||||
index++
|
||||
@ -15,7 +15,7 @@ class TokenStreamSource(val stream: TokenStream) : TokenSource {
|
||||
|
||||
override fun peek(): Token {
|
||||
if (index == stream.tokens.size) {
|
||||
return TokenSource.EndOfFile
|
||||
return Token.endOfFile(stream.tokens.size)
|
||||
}
|
||||
return stream.tokens[index]
|
||||
}
|
||||
|
@ -1,33 +1,39 @@
|
||||
package gay.pizza.pork.parse
|
||||
|
||||
enum class TokenType(val char: Char? = null, val keyword: String? = null, val promotions: List<TokenPromotion> = emptyList()) {
|
||||
import gay.pizza.pork.parse.TokenTypeProperty.*
|
||||
|
||||
enum class TokenType(vararg properties: TokenTypeProperty) {
|
||||
Symbol,
|
||||
IntLiteral,
|
||||
Equality,
|
||||
Equals(char = '=', promotions = listOf(TokenPromotion('=', Equality))),
|
||||
Plus(char = '+'),
|
||||
Minus(char = '-'),
|
||||
Multiply(char = '*'),
|
||||
Divide(char = '/'),
|
||||
LeftCurly(char = '{'),
|
||||
RightCurly(char = '}'),
|
||||
LeftBracket(char = '['),
|
||||
RightBracket(char = ']'),
|
||||
LeftParentheses(char = '('),
|
||||
RightParentheses(char = ')'),
|
||||
Negation(char = '!'),
|
||||
Comma(char = ','),
|
||||
False(keyword = "false"),
|
||||
True(keyword = "true"),
|
||||
In(keyword = "in"),
|
||||
If(keyword = "if"),
|
||||
Then(keyword = "then"),
|
||||
Else(keyword = "else"),
|
||||
Equals(SingleChar('='), Promotion('=', Equality)),
|
||||
Plus(SingleChar('+')),
|
||||
Minus(SingleChar('-')),
|
||||
Multiply(SingleChar('*')),
|
||||
Divide(SingleChar('/')),
|
||||
LeftCurly(SingleChar('{')),
|
||||
RightCurly(SingleChar('}')),
|
||||
LeftBracket(SingleChar('[')),
|
||||
RightBracket(SingleChar(']')),
|
||||
LeftParentheses(SingleChar('(')),
|
||||
RightParentheses(SingleChar(')')),
|
||||
Negation(SingleChar('!')),
|
||||
Comma(SingleChar(',')),
|
||||
False(Keyword("false")),
|
||||
True(Keyword("true")),
|
||||
In(Keyword("in")),
|
||||
If(Keyword("if")),
|
||||
Then(Keyword("then")),
|
||||
Else(Keyword("else")),
|
||||
Whitespace,
|
||||
EndOfFile;
|
||||
|
||||
val promotions: List<Promotion> = properties.filterIsInstance<Promotion>()
|
||||
val keyword: Keyword? = properties.filterIsInstance<Keyword>().singleOrNull()
|
||||
val singleChar: SingleChar? = properties.filterIsInstance<SingleChar>().singleOrNull()
|
||||
|
||||
companion object {
|
||||
val Keywords = entries.filter { it.keyword != null }
|
||||
val SingleChars = entries.filter { it.char != null }
|
||||
val Keywords = entries.filter { item -> item.keyword != null }
|
||||
val SingleChars = entries.filter { item -> item.singleChar != null }
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,7 @@
|
||||
package gay.pizza.pork.parse
|
||||
|
||||
sealed class TokenTypeProperty {
|
||||
class SingleChar(val char: Char) : TokenTypeProperty()
|
||||
class Promotion(val nextChar: Char, val type: TokenType) : TokenTypeProperty()
|
||||
class Keyword(val text: String) : TokenTypeProperty()
|
||||
}
|
Loading…
Reference in New Issue
Block a user