mirror of
https://github.com/GayPizzaSpecifications/pork.git
synced 2025-08-03 21:21:33 +00:00
Tokenizer now always produces whitespace, parser ignores whitespace, and token DSL.
This commit is contained in:
@ -4,14 +4,10 @@ import gay.pizza.pork.ast.Program
|
|||||||
import gay.pizza.pork.eval.Arguments
|
import gay.pizza.pork.eval.Arguments
|
||||||
import gay.pizza.pork.eval.PorkEvaluator
|
import gay.pizza.pork.eval.PorkEvaluator
|
||||||
import gay.pizza.pork.eval.Scope
|
import gay.pizza.pork.eval.Scope
|
||||||
import gay.pizza.pork.parse.PorkParser
|
import gay.pizza.pork.parse.*
|
||||||
import gay.pizza.pork.parse.PorkTokenizer
|
|
||||||
import gay.pizza.pork.parse.StringCharSource
|
|
||||||
import gay.pizza.pork.parse.TokenStreamSource
|
|
||||||
import kotlin.io.path.Path
|
import kotlin.io.path.Path
|
||||||
import kotlin.io.path.readText
|
import kotlin.io.path.readText
|
||||||
|
|
||||||
fun main(args: Array<String>) {
|
|
||||||
fun eval(ast: Program) {
|
fun eval(ast: Program) {
|
||||||
val scope = Scope()
|
val scope = Scope()
|
||||||
val evaluator = PorkEvaluator(scope)
|
val evaluator = PorkEvaluator(scope)
|
||||||
@ -19,14 +15,21 @@ fun main(args: Array<String>) {
|
|||||||
println("> ${scope.call("main", Arguments.Zero)}")
|
println("> ${scope.call("main", Arguments.Zero)}")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun main(args: Array<String>) {
|
||||||
val code = Path(args[0]).readText()
|
val code = Path(args[0]).readText()
|
||||||
val stream = PorkTokenizer(StringCharSource(code)).tokenize()
|
val stream = tokenize(code).excludeAllWhitespace()
|
||||||
println(stream.tokens.joinToString("\n"))
|
println(stream.tokens.joinToString("\n"))
|
||||||
val parser = PorkParser(TokenStreamSource(stream))
|
val program = parse(stream)
|
||||||
val program = parser.readProgram()
|
|
||||||
eval(program)
|
eval(program)
|
||||||
|
|
||||||
val exactStream = PorkTokenizer(StringCharSource(code), preserveWhitespace = true).tokenize()
|
val exactStream = tokenize(code)
|
||||||
val exactCode = exactStream.tokens.joinToString("") { it.text }
|
val exactCode = exactStream.tokens.joinToString("") { it.text }
|
||||||
println(exactCode)
|
println(exactCode)
|
||||||
|
println(code == exactCode)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun tokenize(input: String): TokenStream =
|
||||||
|
PorkTokenizer(StringCharSource(input)).tokenize()
|
||||||
|
|
||||||
|
fun parse(stream: TokenStream): Program =
|
||||||
|
PorkParser(TokenStreamSource(stream)).readProgram()
|
||||||
|
@ -2,7 +2,9 @@ package gay.pizza.pork.parse
|
|||||||
|
|
||||||
import gay.pizza.pork.ast.*
|
import gay.pizza.pork.ast.*
|
||||||
|
|
||||||
class PorkParser(val source: PeekableSource<Token>) {
|
class PorkParser(source: PeekableSource<Token>) {
|
||||||
|
private val whitespaceIncludedSource = source
|
||||||
|
|
||||||
private fun readIntLiteral(): IntLiteral {
|
private fun readIntLiteral(): IntLiteral {
|
||||||
val token = expect(TokenType.IntLiteral)
|
val token = expect(TokenType.IntLiteral)
|
||||||
return IntLiteral(token.text.toInt())
|
return IntLiteral(token.text.toInt())
|
||||||
@ -61,7 +63,7 @@ class PorkParser(val source: PeekableSource<Token>) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fun readExpression(): Expression {
|
fun readExpression(): Expression {
|
||||||
val token = source.peek()
|
val token = peek()
|
||||||
val expression = when (token.type) {
|
val expression = when (token.type) {
|
||||||
TokenType.IntLiteral -> {
|
TokenType.IntLiteral -> {
|
||||||
readIntLiteral()
|
readIntLiteral()
|
||||||
@ -110,8 +112,13 @@ class PorkParser(val source: PeekableSource<Token>) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (peekType(TokenType.Plus, TokenType.Minus, TokenType.Multiply, TokenType.Divide, TokenType.Equality)) {
|
if (peekType(
|
||||||
val infixToken = source.next()
|
TokenType.Plus,
|
||||||
|
TokenType.Minus,
|
||||||
|
TokenType.Multiply,
|
||||||
|
TokenType.Divide,
|
||||||
|
TokenType.Equality)) {
|
||||||
|
val infixToken = next()
|
||||||
val infixOperator = convertInfixOperator(infixToken)
|
val infixOperator = convertInfixOperator(infixToken)
|
||||||
return InfixOperation(expression, infixOperator, readExpression())
|
return InfixOperation(expression, infixOperator, readExpression())
|
||||||
}
|
}
|
||||||
@ -155,15 +162,36 @@ class PorkParser(val source: PeekableSource<Token>) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private fun peekType(vararg types: TokenType): Boolean {
|
private fun peekType(vararg types: TokenType): Boolean {
|
||||||
val token = source.peek()
|
val token = peek()
|
||||||
return types.contains(token.type)
|
return types.contains(token.type)
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun expect(type: TokenType): Token {
|
private fun expect(type: TokenType): Token {
|
||||||
val token = source.next()
|
val token = next()
|
||||||
if (token.type != type) {
|
if (token.type != type) {
|
||||||
throw RuntimeException("Expected token type '${type}' but got type ${token.type} '${token.text}'")
|
throw RuntimeException("Expected token type '${type}' but got type ${token.type} '${token.text}'")
|
||||||
}
|
}
|
||||||
return token
|
return token
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private fun next(): Token {
|
||||||
|
while (true) {
|
||||||
|
val token = whitespaceIncludedSource.next()
|
||||||
|
if (token.type == TokenType.Whitespace) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return token
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun peek(): Token {
|
||||||
|
while (true) {
|
||||||
|
val token = whitespaceIncludedSource.peek()
|
||||||
|
if (token.type == TokenType.Whitespace) {
|
||||||
|
whitespaceIncludedSource.next()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return token
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
package gay.pizza.pork.parse
|
package gay.pizza.pork.parse
|
||||||
|
|
||||||
class PorkTokenizer(val source: CharSource, val preserveWhitespace: Boolean = false) {
|
class PorkTokenizer(val source: CharSource) {
|
||||||
private var tokenStart: Int = 0
|
private var tokenStart: Int = 0
|
||||||
|
|
||||||
private fun isSymbol(c: Char): Boolean =
|
private fun isSymbol(c: Char): Boolean =
|
||||||
@ -22,12 +22,12 @@ class PorkTokenizer(val source: CharSource, val preserveWhitespace: Boolean = fa
|
|||||||
|
|
||||||
var type = TokenType.Symbol
|
var type = TokenType.Symbol
|
||||||
for (keyword in TokenType.Keywords) {
|
for (keyword in TokenType.Keywords) {
|
||||||
if (symbol == keyword.keyword) {
|
if (symbol == keyword.keyword?.text) {
|
||||||
type = keyword
|
type = keyword
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return Token(type, symbol)
|
return Token(type, tokenStart, symbol)
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun readIntLiteral(firstChar: Char): Token {
|
private fun readIntLiteral(firstChar: Char): Token {
|
||||||
@ -37,7 +37,7 @@ class PorkTokenizer(val source: CharSource, val preserveWhitespace: Boolean = fa
|
|||||||
append(source.next())
|
append(source.next())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Token(TokenType.IntLiteral, number)
|
return Token(TokenType.IntLiteral, tokenStart, number)
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun readWhitespace(firstChar: Char): Token {
|
private fun readWhitespace(firstChar: Char): Token {
|
||||||
@ -48,7 +48,7 @@ class PorkTokenizer(val source: CharSource, val preserveWhitespace: Boolean = fa
|
|||||||
append(char)
|
append(char)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Token(TokenType.Whitespace, whitespace)
|
return Token(TokenType.Whitespace, tokenStart, whitespace)
|
||||||
}
|
}
|
||||||
|
|
||||||
fun next(): Token {
|
fun next(): Token {
|
||||||
@ -57,12 +57,13 @@ class PorkTokenizer(val source: CharSource, val preserveWhitespace: Boolean = fa
|
|||||||
val char = source.next()
|
val char = source.next()
|
||||||
|
|
||||||
for (item in TokenType.SingleChars) {
|
for (item in TokenType.SingleChars) {
|
||||||
if (item.char != char) {
|
val itemChar = item.singleChar!!.char
|
||||||
|
if (itemChar != char) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
var type = item
|
var type = item
|
||||||
var text = item.char.toString()
|
var text = itemChar.toString()
|
||||||
for (promotion in item.promotions) {
|
for (promotion in item.promotions) {
|
||||||
if (source.peek() != promotion.nextChar) {
|
if (source.peek() != promotion.nextChar) {
|
||||||
continue
|
continue
|
||||||
@ -71,15 +72,11 @@ class PorkTokenizer(val source: CharSource, val preserveWhitespace: Boolean = fa
|
|||||||
type = promotion.type
|
type = promotion.type
|
||||||
text += nextChar
|
text += nextChar
|
||||||
}
|
}
|
||||||
return Token(type, text)
|
return Token(type, tokenStart, text)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isWhitespace(char)) {
|
if (isWhitespace(char)) {
|
||||||
val whitespace = readWhitespace(char)
|
return readWhitespace(char)
|
||||||
if (preserveWhitespace) {
|
|
||||||
return whitespace
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isDigit(char)) {
|
if (isDigit(char)) {
|
||||||
@ -91,7 +88,7 @@ class PorkTokenizer(val source: CharSource, val preserveWhitespace: Boolean = fa
|
|||||||
}
|
}
|
||||||
throw RuntimeException("Failed to parse: (${char}) next ${source.peek()}")
|
throw RuntimeException("Failed to parse: (${char}) next ${source.peek()}")
|
||||||
}
|
}
|
||||||
return TokenSource.EndOfFile
|
return Token.endOfFile(source.currentIndex)
|
||||||
}
|
}
|
||||||
|
|
||||||
fun tokenize(): TokenStream {
|
fun tokenize(): TokenStream {
|
||||||
|
@ -1,5 +1,10 @@
|
|||||||
package gay.pizza.pork.parse
|
package gay.pizza.pork.parse
|
||||||
|
|
||||||
class Token(val type: TokenType, val text: String) {
|
class Token(val type: TokenType, val start: Int, val text: String) {
|
||||||
override fun toString(): String = "${type.name} $text"
|
override fun toString(): String = "${type.name} $text"
|
||||||
|
|
||||||
|
companion object {
|
||||||
|
fun endOfFile(size: Int): Token =
|
||||||
|
Token(TokenType.EndOfFile, size, "")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,3 +0,0 @@
|
|||||||
package gay.pizza.pork.parse
|
|
||||||
|
|
||||||
class TokenPromotion(val nextChar: Char, val type: TokenType)
|
|
@ -1,7 +1,3 @@
|
|||||||
package gay.pizza.pork.parse
|
package gay.pizza.pork.parse
|
||||||
|
|
||||||
interface TokenSource : PeekableSource<Token> {
|
interface TokenSource : PeekableSource<Token>
|
||||||
companion object {
|
|
||||||
val EndOfFile = Token(TokenType.EndOfFile, "")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -1,5 +1,8 @@
|
|||||||
package gay.pizza.pork.parse
|
package gay.pizza.pork.parse
|
||||||
|
|
||||||
class TokenStream(val tokens: List<Token>) {
|
class TokenStream(val tokens: List<Token>) {
|
||||||
|
fun excludeAllWhitespace(): TokenStream =
|
||||||
|
TokenStream(tokens.filter { it.type != TokenType.Whitespace })
|
||||||
|
|
||||||
override fun toString(): String = tokens.toString()
|
override fun toString(): String = tokens.toString()
|
||||||
}
|
}
|
||||||
|
@ -6,7 +6,7 @@ class TokenStreamSource(val stream: TokenStream) : TokenSource {
|
|||||||
|
|
||||||
override fun next(): Token {
|
override fun next(): Token {
|
||||||
if (index == stream.tokens.size) {
|
if (index == stream.tokens.size) {
|
||||||
return TokenSource.EndOfFile
|
return Token.endOfFile(stream.tokens.size)
|
||||||
}
|
}
|
||||||
val char = stream.tokens[index]
|
val char = stream.tokens[index]
|
||||||
index++
|
index++
|
||||||
@ -15,7 +15,7 @@ class TokenStreamSource(val stream: TokenStream) : TokenSource {
|
|||||||
|
|
||||||
override fun peek(): Token {
|
override fun peek(): Token {
|
||||||
if (index == stream.tokens.size) {
|
if (index == stream.tokens.size) {
|
||||||
return TokenSource.EndOfFile
|
return Token.endOfFile(stream.tokens.size)
|
||||||
}
|
}
|
||||||
return stream.tokens[index]
|
return stream.tokens[index]
|
||||||
}
|
}
|
||||||
|
@ -1,33 +1,39 @@
|
|||||||
package gay.pizza.pork.parse
|
package gay.pizza.pork.parse
|
||||||
|
|
||||||
enum class TokenType(val char: Char? = null, val keyword: String? = null, val promotions: List<TokenPromotion> = emptyList()) {
|
import gay.pizza.pork.parse.TokenTypeProperty.*
|
||||||
|
|
||||||
|
enum class TokenType(vararg properties: TokenTypeProperty) {
|
||||||
Symbol,
|
Symbol,
|
||||||
IntLiteral,
|
IntLiteral,
|
||||||
Equality,
|
Equality,
|
||||||
Equals(char = '=', promotions = listOf(TokenPromotion('=', Equality))),
|
Equals(SingleChar('='), Promotion('=', Equality)),
|
||||||
Plus(char = '+'),
|
Plus(SingleChar('+')),
|
||||||
Minus(char = '-'),
|
Minus(SingleChar('-')),
|
||||||
Multiply(char = '*'),
|
Multiply(SingleChar('*')),
|
||||||
Divide(char = '/'),
|
Divide(SingleChar('/')),
|
||||||
LeftCurly(char = '{'),
|
LeftCurly(SingleChar('{')),
|
||||||
RightCurly(char = '}'),
|
RightCurly(SingleChar('}')),
|
||||||
LeftBracket(char = '['),
|
LeftBracket(SingleChar('[')),
|
||||||
RightBracket(char = ']'),
|
RightBracket(SingleChar(']')),
|
||||||
LeftParentheses(char = '('),
|
LeftParentheses(SingleChar('(')),
|
||||||
RightParentheses(char = ')'),
|
RightParentheses(SingleChar(')')),
|
||||||
Negation(char = '!'),
|
Negation(SingleChar('!')),
|
||||||
Comma(char = ','),
|
Comma(SingleChar(',')),
|
||||||
False(keyword = "false"),
|
False(Keyword("false")),
|
||||||
True(keyword = "true"),
|
True(Keyword("true")),
|
||||||
In(keyword = "in"),
|
In(Keyword("in")),
|
||||||
If(keyword = "if"),
|
If(Keyword("if")),
|
||||||
Then(keyword = "then"),
|
Then(Keyword("then")),
|
||||||
Else(keyword = "else"),
|
Else(Keyword("else")),
|
||||||
Whitespace,
|
Whitespace,
|
||||||
EndOfFile;
|
EndOfFile;
|
||||||
|
|
||||||
|
val promotions: List<Promotion> = properties.filterIsInstance<Promotion>()
|
||||||
|
val keyword: Keyword? = properties.filterIsInstance<Keyword>().singleOrNull()
|
||||||
|
val singleChar: SingleChar? = properties.filterIsInstance<SingleChar>().singleOrNull()
|
||||||
|
|
||||||
companion object {
|
companion object {
|
||||||
val Keywords = entries.filter { it.keyword != null }
|
val Keywords = entries.filter { item -> item.keyword != null }
|
||||||
val SingleChars = entries.filter { it.char != null }
|
val SingleChars = entries.filter { item -> item.singleChar != null }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,7 @@
|
|||||||
|
package gay.pizza.pork.parse
|
||||||
|
|
||||||
|
sealed class TokenTypeProperty {
|
||||||
|
class SingleChar(val char: Char) : TokenTypeProperty()
|
||||||
|
class Promotion(val nextChar: Char, val type: TokenType) : TokenTypeProperty()
|
||||||
|
class Keyword(val text: String) : TokenTypeProperty()
|
||||||
|
}
|
Reference in New Issue
Block a user