Split out all code into modules.

This commit is contained in:
2023-09-04 01:56:24 -07:00
parent d46ea1e307
commit 128f40bcf4
53 changed files with 119 additions and 81 deletions

9
parser/build.gradle.kts Normal file
View File

@ -0,0 +1,9 @@
plugins {
pork_module
}
dependencies {
api(project(":ast"))
implementation(project(":common"))
}

View File

@ -0,0 +1,36 @@
package gay.pizza.pork.parser
open class AnsiHighlightScheme : HighlightScheme {
override fun highlight(token: Token): Highlight {
val attributes = when (token.type.family) {
TokenFamily.StringLiteralFamily -> string()
TokenFamily.OperatorFamily -> operator()
TokenFamily.KeywordFamily -> keyword()
TokenFamily.SymbolFamily -> symbol()
TokenFamily.CommentFamily -> comment()
else -> null
}
return if (attributes != null) {
Highlight(token, ansi(attributes, token.text))
} else Highlight(token)
}
open fun string(): AnsiAttributes =
AnsiAttributes("32m")
open fun symbol(): AnsiAttributes =
AnsiAttributes("33m")
open fun operator(): AnsiAttributes =
AnsiAttributes("34m")
open fun keyword(): AnsiAttributes =
AnsiAttributes("35m")
open fun comment(): AnsiAttributes =
AnsiAttributes("37m")
private fun ansi(attributes: AnsiAttributes, text: String): String =
"\u001b[${attributes.color}${text}\u001b[0m"
class AnsiAttributes(
val color: String
)
}

View File

@ -0,0 +1,8 @@
package gay.pizza.pork.parser
interface CharSource : PeekableSource<Char> {
companion object {
@Suppress("ConstPropertyName")
const val NullChar = 0.toChar()
}
}

View File

@ -0,0 +1,9 @@
package gay.pizza.pork.parser
import gay.pizza.pork.ast.Node
object DiscardNodeAttribution : NodeAttribution {
override fun enter() {}
override fun push(token: Token) {}
override fun <T : Node> exit(node: T): T = node
}

View File

@ -0,0 +1,5 @@
package gay.pizza.pork.parser
class Highlight(val token: Token, val text: String? = null) {
override fun toString(): String = text ?: token.text
}

View File

@ -0,0 +1,5 @@
package gay.pizza.pork.parser
interface HighlightScheme {
fun highlight(token: Token): Highlight
}

View File

@ -0,0 +1,6 @@
package gay.pizza.pork.parser
class Highlighter(val scheme: HighlightScheme) {
fun highlight(stream: TokenStream): List<Highlight> =
stream.tokens.map { scheme.highlight(it) }
}

View File

@ -0,0 +1,9 @@
package gay.pizza.pork.parser
import gay.pizza.pork.ast.Node
interface NodeAttribution {
fun enter()
fun push(token: Token)
fun <T: Node> exit(node: T): T
}

View File

@ -0,0 +1,337 @@
package gay.pizza.pork.parser
import gay.pizza.pork.ast.*
class Parser(source: PeekableSource<Token>, val attribution: NodeAttribution) {
private val unsanitizedSource = source
private fun readIntLiteral(): IntLiteral = within {
expect(TokenType.IntLiteral) { IntLiteral(it.text.toInt()) }
}
private fun readStringLiteral(): StringLiteral = within {
expect(TokenType.StringLiteral) {
val content = StringEscape.unescape(StringEscape.unquote(it.text))
StringLiteral(content)
}
}
private fun readBooleanLiteral(): BooleanLiteral = within {
expect(TokenType.True, TokenType.False) {
BooleanLiteral(it.type == TokenType.True)
}
}
private fun readListLiteral(): ListLiteral = within {
expect(TokenType.LeftBracket)
val items = collect(TokenType.RightBracket, TokenType.Comma) {
readExpression()
}
expect(TokenType.RightBracket)
ListLiteral(items)
}
private fun readSymbolRaw(): Symbol = within {
expect(TokenType.Symbol) { Symbol(it.text) }
}
private fun readSymbolCases(): Expression = within {
val symbol = readSymbolRaw()
if (next(TokenType.LeftParentheses)) {
val arguments = collect(TokenType.RightParentheses, TokenType.Comma) {
readExpression()
}
expect(TokenType.RightParentheses)
FunctionCall(symbol, arguments)
} else if (next(TokenType.Equals)) {
Assignment(symbol, readExpression())
} else {
SymbolReference(symbol)
}
}
private fun readLambda(): Lambda = within {
expect(TokenType.LeftCurly)
val arguments = mutableListOf<Symbol>()
while (!peek(TokenType.In)) {
val symbol = readSymbolRaw()
arguments.add(symbol)
if (next(TokenType.Comma)) {
continue
} else {
break
}
}
expect(TokenType.In)
val items = collect(TokenType.RightCurly) {
readExpression()
}
expect(TokenType.RightCurly)
Lambda(arguments, items)
}
private fun readParentheses(): Parentheses = within {
expect(TokenType.LeftParentheses)
val expression = readExpression()
expect(TokenType.RightParentheses)
Parentheses(expression)
}
private fun readPrefixOperation(): PrefixOperation = within {
expect(TokenType.Negation) {
PrefixOperation(PrefixOperator.Negate, readExpression())
}
}
private fun readIf(): If = within {
expect(TokenType.If)
val condition = readExpression()
expect(TokenType.Then)
val thenExpression = readExpression()
var elseExpression: Expression? = null
if (next(TokenType.Else)) {
elseExpression = readExpression()
}
If(condition, thenExpression, elseExpression)
}
fun readExpression(): Expression {
val token = peek()
val expression = when (token.type) {
TokenType.IntLiteral -> {
readIntLiteral()
}
TokenType.StringLiteral -> {
readStringLiteral()
}
TokenType.True, TokenType.False -> {
readBooleanLiteral()
}
TokenType.LeftBracket -> {
readListLiteral()
}
TokenType.Symbol -> {
readSymbolCases()
}
TokenType.LeftCurly -> {
readLambda()
}
TokenType.LeftParentheses -> {
readParentheses()
}
TokenType.Negation -> {
readPrefixOperation()
}
TokenType.If -> {
readIf()
}
else -> {
throw RuntimeException(
"Failed to parse token: ${token.type} '${token.text}' as" +
" expression (index ${unsanitizedSource.currentIndex})"
)
}
}
return if (peek(
TokenType.Plus,
TokenType.Minus,
TokenType.Multiply,
TokenType.Divide,
TokenType.Equality,
TokenType.Inequality
)
) {
within {
val infixToken = next()
val infixOperator = convertInfixOperator(infixToken)
InfixOperation(expression, infixOperator, readExpression())
}
} else expression
}
private fun readBlock(): Block = within {
expect(TokenType.LeftCurly)
val items = collect(TokenType.RightCurly) {
readExpression()
}
expect(TokenType.RightCurly)
Block(items)
}
private fun readImportDeclaration(): ImportDeclaration = within {
expect(TokenType.Import)
ImportDeclaration(readStringLiteral())
}
private fun readFunctionDeclaration(): FunctionDefinition = within {
val modifiers = DefinitionModifiers(export = false)
while (true) {
val token = peek()
when (token.type) {
TokenType.Export -> {
expect(TokenType.Export)
modifiers.export = true
}
else -> break
}
}
expect(TokenType.Func)
val name = readSymbolRaw()
expect(TokenType.LeftParentheses)
val arguments = collect(TokenType.RightParentheses, TokenType.Comma) { readSymbolRaw() }
expect(TokenType.RightParentheses)
FunctionDefinition(modifiers, name, arguments, readBlock())
}
private fun maybeReadDefinition(): Definition? {
val token = peek()
return when (token.type) {
TokenType.Export,
TokenType.Func -> readFunctionDeclaration()
else -> null
}
}
private fun readDefinition(): Definition {
val definition = maybeReadDefinition()
if (definition != null) {
return definition
}
val token = peek()
throw RuntimeException(
"Failed to parse token: ${token.type} '${token.text}' as" +
" definition (index ${unsanitizedSource.currentIndex})"
)
}
fun readDeclaration(): Declaration {
val token = peek()
return when (token.type) {
TokenType.Import -> readImportDeclaration()
else -> throw RuntimeException(
"Failed to parse token: ${token.type} '${token.text}' as" +
" declaration (index ${unsanitizedSource.currentIndex})"
)
}
}
private fun convertInfixOperator(token: Token): InfixOperator =
when (token.type) {
TokenType.Plus -> InfixOperator.Plus
TokenType.Minus -> InfixOperator.Minus
TokenType.Multiply -> InfixOperator.Multiply
TokenType.Divide -> InfixOperator.Divide
TokenType.Equality -> InfixOperator.Equals
TokenType.Inequality -> InfixOperator.NotEquals
else -> throw RuntimeException("Unknown Infix Operator")
}
fun readCompilationUnit(): CompilationUnit = within {
val declarations = mutableListOf<Declaration>()
val definitions = mutableListOf<Definition>()
var declarationAccepted = true
while (!peek(TokenType.EndOfFile)) {
if (declarationAccepted) {
val definition = maybeReadDefinition()
if (definition != null) {
declarationAccepted = false
definitions.add(definition)
continue
}
declarations.add(readDeclaration())
} else {
definitions.add(readDefinition())
}
}
CompilationUnit(declarations, definitions)
}
private fun <T> collect(
peeking: TokenType,
consuming: TokenType? = null,
read: () -> T
): List<T> {
val items = mutableListOf<T>()
while (!peek(peeking)) {
val expression = read()
if (consuming != null) {
next(consuming)
}
items.add(expression)
}
return items
}
private fun peek(vararg types: TokenType): Boolean {
val token = peek()
return types.contains(token.type)
}
private fun next(type: TokenType): Boolean {
return if (peek(type)) {
expect(type)
true
} else false
}
private fun expect(vararg types: TokenType): Token {
val token = next()
if (!types.contains(token.type)) {
throw RuntimeException(
"Expected one of ${types.joinToString(", ")}" +
" but got type ${token.type} '${token.text}'"
)
}
return token
}
private fun <T: Node> expect(vararg types: TokenType, consume: (Token) -> T): T =
consume(expect(*types))
private fun next(): Token {
while (true) {
val token = unsanitizedSource.next()
attribution.push(token)
if (ignoredByParser(token.type)) {
continue
}
return token
}
}
private fun peek(): Token {
while (true) {
val token = unsanitizedSource.peek()
if (ignoredByParser(token.type)) {
attribution.push(token)
unsanitizedSource.next()
continue
}
return token
}
}
private fun <T: Node> within(block: () -> T): T {
attribution.enter()
return attribution.exit(block())
}
private fun ignoredByParser(type: TokenType): Boolean = when (type) {
TokenType.BlockComment -> true
TokenType.LineComment -> true
TokenType.Whitespace -> true
else -> false
}
}

View File

@ -0,0 +1,7 @@
package gay.pizza.pork.parser
interface PeekableSource<T> {
val currentIndex: Int
fun next(): T
fun peek(): T
}

View File

@ -0,0 +1,197 @@
package gay.pizza.pork.parser
import gay.pizza.pork.ast.*
import gay.pizza.pork.common.IndentPrinter
class Printer(buffer: StringBuilder) : NodeVisitor<Unit> {
private val out = IndentPrinter(buffer)
private var autoIndentState = false
private fun append(text: String) {
if (autoIndentState) {
out.emitIndent()
autoIndentState = false
}
out.append(text)
}
private fun appendLine() {
out.appendLine()
autoIndentState = true
}
override fun visitIntLiteral(node: IntLiteral) {
append(node.value.toString())
}
override fun visitStringLiteral(node: StringLiteral) {
append("\"")
append(StringEscape.escape(node.text))
append("\"")
}
override fun visitBooleanLiteral(node: BooleanLiteral) {
if (node.value) {
append("true")
} else {
append("false")
}
}
override fun visitListLiteral(node: ListLiteral) {
append("[")
if (node.items.isNotEmpty()) {
out.increaseIndent()
appendLine()
for ((index, item) in node.items.withIndex()) {
visit(item)
if (index != node.items.size - 1) {
append(",")
}
appendLine()
}
out.decreaseIndent()
}
append("]")
}
override fun visitSymbol(node: Symbol) {
append(node.id)
}
override fun visitFunctionCall(node: FunctionCall) {
visit(node.symbol)
append("(")
for ((index, argument) in node.arguments.withIndex()) {
visit(argument)
if (index + 1 != node.arguments.size) {
append(", ")
}
}
append(")")
}
override fun visitDefine(node: Assignment) {
visit(node.symbol)
append(" = ")
visit(node.value)
}
override fun visitSymbolReference(node: SymbolReference) {
visit(node.symbol)
}
override fun visitLambda(node: Lambda) {
append("{")
if (node.arguments.isNotEmpty()) {
append(" ")
for ((index, argument) in node.arguments.withIndex()) {
visit(argument)
if (index + 1 != node.arguments.size) {
append(",")
}
append(" ")
}
} else {
append(" ")
}
append("in")
out.increaseIndent()
for (expression in node.expressions) {
appendLine()
visit(expression)
}
if (node.expressions.isNotEmpty()) {
appendLine()
}
out.decreaseIndent()
append("}")
}
override fun visitParentheses(node: Parentheses) {
append("(")
visit(node.expression)
append(")")
}
override fun visitPrefixOperation(node: PrefixOperation) {
append(node.op.token)
visit(node.expression)
}
override fun visitIf(node: If) {
append("if ")
visit(node.condition)
append(" then")
out.increaseIndent()
appendLine()
visit(node.thenExpression)
out.decreaseIndent()
if (node.elseExpression != null) {
appendLine()
append("else")
out.increaseIndent()
appendLine()
visit(node.elseExpression!!)
out.decreaseIndent()
}
}
override fun visitInfixOperation(node: InfixOperation) {
visit(node.left)
append(" ")
append(node.op.token)
append(" ")
visit(node.right)
}
override fun visitFunctionDeclaration(node: FunctionDefinition) {
append("fn ")
visit(node.symbol)
append("(")
for ((index, argument) in node.arguments.withIndex()) {
visit(argument)
if (index + 1 != node.arguments.size) {
append(", ")
}
}
append(") ")
visit(node.block)
}
override fun visitBlock(node: Block) {
append("{")
if (node.expressions.isNotEmpty()) {
out.increaseIndent()
for (expression in node.expressions) {
appendLine()
visit(expression)
}
out.decreaseIndent()
appendLine()
}
append("}")
}
override fun visitImportDeclaration(node: ImportDeclaration) {
append("import ")
visit(node.path)
}
override fun visitCompilationUnit(node: CompilationUnit) {
for (declaration in node.declarations) {
visit(declaration)
appendLine()
}
if (node.declarations.isNotEmpty()) {
appendLine()
}
for (definition in node.definitions) {
visit(definition)
appendLine()
}
}
}

View File

@ -0,0 +1,23 @@
package gay.pizza.pork.parser
class StringCharSource(val input: String) : CharSource {
private var index = 0
override val currentIndex: Int
get() = index
override fun next(): Char {
if (index == input.length) {
return CharSource.NullChar
}
val char = input[index]
index++
return char
}
override fun peek(): Char {
if (index == input.length) {
return CharSource.NullChar
}
return input[index]
}
}

View File

@ -0,0 +1,7 @@
package gay.pizza.pork.parser
object StringEscape {
fun escape(input: String): String = input.replace("\n", "\\n")
fun unescape(input: String): String = input.replace("\\n", "\n")
fun unquote(input: String): String = input.substring(1, input.length - 1)
}

View File

@ -0,0 +1,10 @@
package gay.pizza.pork.parser
class Token(val type: TokenType, val start: Int, val text: String) {
override fun toString(): String = "$start ${type.name} '${text.replace("\n", "\\n")}'"
companion object {
fun endOfFile(size: Int): Token =
Token(TokenType.EndOfFile, size, "")
}
}

View File

@ -0,0 +1,11 @@
package gay.pizza.pork.parser
enum class TokenFamily : TokenTypeProperty {
OperatorFamily,
KeywordFamily,
SymbolFamily,
NumericLiteralFamily,
StringLiteralFamily,
CommentFamily,
OtherFamily
}

View File

@ -0,0 +1,44 @@
package gay.pizza.pork.parser
import gay.pizza.pork.ast.NodeCoalescer
import gay.pizza.pork.ast.Node
import java.util.IdentityHashMap
class TokenNodeAttribution : NodeAttribution {
val nodes: MutableMap<Node, List<Token>> = IdentityHashMap()
private val stack = mutableListOf<MutableList<Token>>()
private var current: MutableList<Token>? = null
override fun enter() {
val store = mutableListOf<Token>()
current = store
stack.add(store)
}
override fun push(token: Token) {
val store = current ?: throw RuntimeException("enter() not called!")
store.add(token)
}
override fun <T: Node> exit(node: T): T {
val store = stack.removeLast()
nodes[node] = store
current = stack.lastOrNull()
return node
}
fun tokensOf(node: Node): List<Token>? = nodes[node]
fun assembleTokens(node: Node): List<Token> {
val allTokens = mutableListOf<Token>()
val coalescer = NodeCoalescer { item ->
val tokens = tokensOf(item)
if (tokens != null) {
allTokens.addAll(tokens)
}
}
coalescer.visit(node)
return allTokens.asSequence().distinct().sortedBy { it.start }.toList()
}
}

View File

@ -0,0 +1,3 @@
package gay.pizza.pork.parser
interface TokenSource : PeekableSource<Token>

View File

@ -0,0 +1,5 @@
package gay.pizza.pork.parser
class TokenStream(val tokens: List<Token>) {
override fun toString(): String = tokens.toString()
}

View File

@ -0,0 +1,23 @@
package gay.pizza.pork.parser
class TokenStreamSource(val stream: TokenStream) : TokenSource {
private var index = 0
override val currentIndex: Int
get() = index
override fun next(): Token {
if (index == stream.tokens.size) {
return Token.endOfFile(stream.tokens.size)
}
val char = stream.tokens[index]
index++
return char
}
override fun peek(): Token {
if (index == stream.tokens.size) {
return Token.endOfFile(stream.tokens.size)
}
return stream.tokens[index]
}
}

View File

@ -0,0 +1,52 @@
package gay.pizza.pork.parser
import gay.pizza.pork.parser.TokenTypeProperty.*
import gay.pizza.pork.parser.TokenFamily.*
enum class TokenType(vararg properties: TokenTypeProperty) {
Symbol(SymbolFamily, CharConsumer { (it in 'a'..'z') || (it in 'A'..'Z') || it == '_' }, KeywordUpgrader),
IntLiteral(NumericLiteralFamily, CharConsumer { it in '0'..'9' }),
StringLiteral(StringLiteralFamily),
Equality(OperatorFamily),
Inequality(OperatorFamily),
Equals(SingleChar('='), Promotion('=', Equality)),
Plus(SingleChar('+'), OperatorFamily),
Minus(SingleChar('-'), OperatorFamily),
Multiply(SingleChar('*'), OperatorFamily),
Divide(SingleChar('/'), OperatorFamily),
LeftCurly(SingleChar('{')),
RightCurly(SingleChar('}')),
LeftBracket(SingleChar('[')),
RightBracket(SingleChar(']')),
LeftParentheses(SingleChar('(')),
RightParentheses(SingleChar(')')),
Negation(SingleChar('!'), Promotion('=', Inequality), OperatorFamily),
Comma(SingleChar(',')),
False(Keyword("false"), KeywordFamily),
True(Keyword("true"), KeywordFamily),
In(Keyword("in"), KeywordFamily),
If(Keyword("if"), KeywordFamily),
Then(Keyword("then"), KeywordFamily),
Else(Keyword("else"), KeywordFamily),
Import(Keyword("import"), KeywordFamily),
Export(Keyword("export"), KeywordFamily),
Func(Keyword("func"), KeywordFamily),
Whitespace(CharConsumer { it == ' ' || it == '\r' || it == '\n' || it == '\t' }),
BlockComment(CommentFamily),
LineComment(CommentFamily),
EndOfFile;
val promotions: List<Promotion> = properties.filterIsInstance<Promotion>()
val keyword: Keyword? = properties.filterIsInstance<Keyword>().singleOrNull()
val singleChar: SingleChar? = properties.filterIsInstance<SingleChar>().singleOrNull()
val family: TokenFamily =
properties.filterIsInstance<TokenFamily>().singleOrNull() ?: OtherFamily
val charConsumer: CharConsumer? = properties.filterIsInstance<CharConsumer>().singleOrNull()
val tokenUpgrader: TokenUpgrader? = properties.filterIsInstance<TokenUpgrader>().singleOrNull()
companion object {
val Keywords = entries.filter { item -> item.keyword != null }
val SingleChars = entries.filter { item -> item.singleChar != null }
val CharConsumers = entries.filter { item -> item.charConsumer != null }
}
}

View File

@ -0,0 +1,20 @@
package gay.pizza.pork.parser
interface TokenTypeProperty {
class SingleChar(val char: Char) : TokenTypeProperty
class Promotion(val nextChar: Char, val type: TokenType) : TokenTypeProperty
class Keyword(val text: String) : TokenTypeProperty
class CharConsumer(val isValid: (Char) -> Boolean) : TokenTypeProperty
open class TokenUpgrader(val maybeUpgrade: (Token) -> Token?) : TokenTypeProperty
object KeywordUpgrader : TokenUpgrader({ token ->
var upgraded: Token? = null
for (item in TokenType.Keywords) {
if (item.keyword != null && token.text == item.keyword.text) {
upgraded = Token(item, token.start, token.text)
break
}
}
upgraded
})
}

View File

@ -0,0 +1,133 @@
package gay.pizza.pork.parser
class Tokenizer(val source: CharSource) {
private var tokenStart: Int = 0
private fun readBlockComment(firstChar: Char): Token {
val comment = buildString {
append(firstChar)
var endOfComment = false
while (true) {
val char = source.next()
append(char)
if (endOfComment) {
if (char != '/') {
endOfComment = false
continue
}
break
}
if (char == '*') {
endOfComment = true
}
}
}
return Token(TokenType.BlockComment, tokenStart, comment)
}
private fun readLineComment(firstChar: Char): Token {
val comment = buildString {
append(firstChar)
while (true) {
val char = source.peek()
if (char == CharSource.NullChar || char == '\n') {
break
}
append(source.next())
}
}
return Token(TokenType.LineComment, tokenStart, comment)
}
private fun readStringLiteral(firstChar: Char): Token {
val string = buildString {
append(firstChar)
while (true) {
val char = source.peek()
if (char == CharSource.NullChar) {
throw RuntimeException("Unterminated string.")
}
append(source.next())
if (char == '"') {
break
}
}
}
return Token(TokenType.StringLiteral, tokenStart, string)
}
fun next(): Token {
while (source.peek() != CharSource.NullChar) {
tokenStart = source.currentIndex
val char = source.next()
if (char == '/' && source.peek() == '*') {
return readBlockComment(char)
}
if (char == '/' && source.peek() == '/') {
return readLineComment(char)
}
for (item in TokenType.SingleChars) {
val itemChar = item.singleChar!!.char
if (itemChar != char) {
continue
}
var type = item
var text = itemChar.toString()
for (promotion in item.promotions) {
if (source.peek() != promotion.nextChar) {
continue
}
val nextChar = source.next()
type = promotion.type
text += nextChar
}
return Token(type, tokenStart, text)
}
for (item in TokenType.CharConsumers) {
val consumer = item.charConsumer ?: continue
if (!consumer.isValid(char)) {
continue
}
val text = buildString {
append(char)
while (consumer.isValid(source.peek())) {
append(source.next())
}
}
var token = Token(item, tokenStart, text)
val tokenUpgrader = item.tokenUpgrader
if (tokenUpgrader != null) {
token = tokenUpgrader.maybeUpgrade(token) ?: token
}
return token
}
if (char == '"') {
return readStringLiteral(char)
}
throw RuntimeException("Failed to parse: (${char}) next ${source.peek()}")
}
return Token.endOfFile(source.currentIndex)
}
fun tokenize(): TokenStream {
val tokens = mutableListOf<Token>()
while (true) {
val token = next()
tokens.add(token)
if (token.type == TokenType.EndOfFile) {
break
}
}
return TokenStream(tokens)
}
}