build: move parser and tokenizer into separate modules

This commit is contained in:
2023-10-16 21:52:21 -07:00
parent 9338b01b48
commit 15f5f313cc
57 changed files with 92 additions and 61 deletions

View File

@ -4,6 +4,7 @@ plugins {
dependencies {
api(project(":ast"))
api(project(":tokenizer"))
implementation(project(":common"))
}

View File

@ -1,36 +0,0 @@
package gay.pizza.pork.parser
open class AnsiHighlightScheme : HighlightScheme {
override fun highlight(token: Token): Highlight {
val attributes = when (token.type.family) {
TokenFamily.StringLiteralFamily -> string()
TokenFamily.OperatorFamily -> operator()
TokenFamily.KeywordFamily -> keyword()
TokenFamily.SymbolFamily -> symbol()
TokenFamily.CommentFamily -> comment()
else -> null
}
return if (attributes != null) {
Highlight(token, ansi(attributes, token.text))
} else Highlight(token)
}
open fun string(): AnsiAttributes =
AnsiAttributes("32m")
open fun symbol(): AnsiAttributes =
AnsiAttributes("33m")
open fun operator(): AnsiAttributes =
AnsiAttributes("34m")
open fun keyword(): AnsiAttributes =
AnsiAttributes("35m")
open fun comment(): AnsiAttributes =
AnsiAttributes("37m")
private fun ansi(attributes: AnsiAttributes, text: String): String =
"\u001b[${attributes.color}${text}\u001b[0m"
class AnsiAttributes(
val color: String
)
}

View File

@ -1,5 +0,0 @@
package gay.pizza.pork.parser
class BadCharacterError(val char: Char, sourceIndex: SourceIndex, state: TokenizerState) : ParseError(
"Failed to produce token for '${char}' at $sourceIndex in state $state"
)

View File

@ -1,5 +0,0 @@
package gay.pizza.pork.parser
interface CharConsumer {
fun consume(type: TokenType, tokenizer: Tokenizer): String?
}

View File

@ -1,26 +0,0 @@
package gay.pizza.pork.parser
fun interface CharMatcher {
fun valid(char: Char, index: Int): Boolean
class AnyOf(vararg val filters: CharMatcher) : CharMatcher {
override fun valid(char: Char, index: Int): Boolean =
filters.any { it.valid(char, index) }
}
class MatchSingle(val char: Char) : CharMatcher {
override fun valid(char: Char, index: Int): Boolean =
char == this.char
}
class MatchRange(val charRange: CharRange) : CharMatcher {
override fun valid(char: Char, index: Int): Boolean =
charRange.contains(char)
}
class NotAtIndex(val index: Int, val matcher: CharMatcher) : CharMatcher {
override fun valid(char: Char, index: Int): Boolean {
return this.index != index && matcher.valid(char, index)
}
}
}

View File

@ -1,10 +0,0 @@
package gay.pizza.pork.parser
interface CharSource : PeekableSource<Char> {
fun peek(index: Int): Char
companion object {
@Suppress("ConstPropertyName")
const val EndOfFile = 0.toChar()
}
}

View File

@ -1,7 +0,0 @@
package gay.pizza.pork.parser
fun CharSource.readToString(): String = buildString {
while (peek() != CharSource.EndOfFile) {
append(next())
}
}

View File

@ -2,6 +2,7 @@ package gay.pizza.pork.parser
import gay.pizza.pork.ast.gen.Node
import gay.pizza.pork.ast.gen.NodeType
import gay.pizza.pork.tokenizer.Token
object DiscardNodeAttribution : NodeAttribution {
override fun push(token: Token) {}

View File

@ -1,28 +0,0 @@
package gay.pizza.pork.parser
class ExpectedTokenError(got: Token, sourceIndex: SourceIndex, vararg expectedTypes: TokenType) : ParseError(
message(got, sourceIndex, expectedTypes)
) {
companion object {
fun message(got: Token, sourceIndex: SourceIndex, expectedTypes: Array<out TokenType>): String {
val tokenTypeMessages = expectedTypes.map {
if (it.simpleWantString != null)
"${it.name} '${it.simpleWantString}'"
else
it.name
}
val expected = if (expectedTypes.size > 1) {
"one of " + tokenTypeMessages.joinToString(", ")
} else tokenTypeMessages.firstOrNull() ?: "unknown"
val friendlyIndex = if (sourceIndex.locationReliable) {
"line ${sourceIndex.line} column ${sourceIndex.column}"
} else {
"index ${sourceIndex.index}"
}
return "Expected $expected at $friendlyIndex but got ${got.type} '${got.text}'"
}
}
}

View File

@ -1,5 +0,0 @@
package gay.pizza.pork.parser
class Highlight(val token: Token, val text: String? = null) {
override fun toString(): String = text ?: token.text
}

View File

@ -1,5 +0,0 @@
package gay.pizza.pork.parser
interface HighlightScheme {
fun highlight(token: Token): Highlight
}

View File

@ -1,6 +0,0 @@
package gay.pizza.pork.parser
class Highlighter(val scheme: HighlightScheme) {
fun highlight(stream: TokenStream): List<Highlight> =
stream.tokens.map { scheme.highlight(it) }
}

View File

@ -1,5 +1,9 @@
package gay.pizza.pork.parser
import gay.pizza.pork.tokenizer.Token
import gay.pizza.pork.tokenizer.TokenSource
import gay.pizza.pork.tokenizer.TokenType
class LazySkippingTokenSource(val source: TokenSource, val skipping: Set<TokenType>) : ParserAwareTokenSource {
private var index = 0
override val currentIndex: Int

View File

@ -1,48 +0,0 @@
package gay.pizza.pork.parser
@Suppress("CanBeParameter")
class MatchedCharConsumer(
val start: CharSequence,
val end: CharSequence,
vararg val options: Options
) : CharConsumer {
private val eofTerminationAllowed = options.contains(Options.AllowEofTermination)
override fun consume(type: TokenType, tokenizer: Tokenizer): String? {
if (!tokenizer.peek(start)) {
return null
}
val buffer = StringBuilder()
tokenizer.read(start.length, buffer)
var endsNeededToTerminate = 1
while (true) {
if (tokenizer.peek(start)) {
endsNeededToTerminate++
tokenizer.read(start.length, buffer)
continue
}
if (tokenizer.peek(end)) {
endsNeededToTerminate--
tokenizer.read(end.length, buffer)
}
if (endsNeededToTerminate == 0) {
return buffer.toString()
}
val char = tokenizer.source.next()
if (char == CharSource.EndOfFile) {
if (eofTerminationAllowed) {
return buffer.toString()
}
throw UnterminatedTokenError(type.name, tokenizer.source.currentSourceIndex())
}
buffer.append(char)
}
}
enum class Options {
AllowEofTermination
}
}

View File

@ -2,6 +2,7 @@ package gay.pizza.pork.parser
import gay.pizza.pork.ast.gen.Node
import gay.pizza.pork.ast.gen.NodeType
import gay.pizza.pork.tokenizer.Token
interface NodeAttribution {
fun push(token: Token)

View File

@ -1,6 +1,8 @@
package gay.pizza.pork.parser
import gay.pizza.pork.ast.gen.*
import gay.pizza.pork.tokenizer.TokenSource
import gay.pizza.pork.tokenizer.TokenType
class Parser(source: TokenSource, attribution: NodeAttribution) :
ParserBase(source, attribution) {

View File

@ -4,6 +4,7 @@ import gay.pizza.pork.ast.gen.Node
import gay.pizza.pork.ast.gen.NodeCoalescer
import gay.pizza.pork.ast.gen.data
import gay.pizza.pork.ast.gen.visit
import gay.pizza.pork.tokenizer.Token
data class ParserAttributes(val tokens: List<Token>) {
companion object {

View File

@ -1,3 +1,5 @@
package gay.pizza.pork.parser
import gay.pizza.pork.tokenizer.TokenSource
interface ParserAwareTokenSource : TokenSource

View File

@ -3,6 +3,7 @@ package gay.pizza.pork.parser
import gay.pizza.pork.ast.gen.Node
import gay.pizza.pork.ast.gen.NodeParser
import gay.pizza.pork.ast.gen.NodeType
import gay.pizza.pork.tokenizer.*
abstract class ParserBase(source: TokenSource, val attribution: NodeAttribution) : NodeParser {
val source: TokenSource = if (source is ParserAwareTokenSource) {

View File

@ -3,6 +3,8 @@ package gay.pizza.pork.parser
import gay.pizza.pork.ast.gen.InfixOperator
import gay.pizza.pork.ast.gen.PrefixOperator
import gay.pizza.pork.ast.gen.SuffixOperator
import gay.pizza.pork.tokenizer.Token
import gay.pizza.pork.tokenizer.TokenType
internal object ParserHelpers {
fun convertInfixOperator(token: Token): InfixOperator = when (token.type) {

View File

@ -3,6 +3,7 @@ package gay.pizza.pork.parser
import gay.pizza.pork.ast.gen.Node
import gay.pizza.pork.ast.gen.NodeType
import gay.pizza.pork.ast.gen.data
import gay.pizza.pork.tokenizer.Token
open class ParserNodeAttribution : NodeAttribution {
private val stack = mutableListOf<MutableList<Token>>()

View File

@ -1,7 +0,0 @@
package gay.pizza.pork.parser
interface PeekableSource<T> {
val currentIndex: Int
fun next(): T
fun peek(): T
}

View File

@ -1,10 +0,0 @@
package gay.pizza.pork.parser
data class SourceIndex(val index: Int, val line: Int, val column: Int, val locationReliable: Boolean = true) {
companion object {
fun zero(): SourceIndex = SourceIndex(0, 1, 0)
fun indexOnly(index: Int) = SourceIndex(index, 0, 0, locationReliable = false)
}
override fun toString(): String = if (locationReliable) "${line}:${column}" else "$index"
}

View File

@ -1,19 +0,0 @@
package gay.pizza.pork.parser
class SourceIndexCharSource(val delegate: CharSource) : CharSource by delegate {
private var currentLineIndex = 1
private var currentLineColumn = 1
override fun next(): Char {
val char = delegate.next()
if (char == '\n') {
currentLineIndex++
currentLineColumn = 1
}
currentLineColumn++
return char
}
fun currentSourceIndex(): SourceIndex =
SourceIndex(delegate.currentIndex, currentLineIndex, currentLineColumn)
}

View File

@ -1,30 +0,0 @@
package gay.pizza.pork.parser
object StringCharConsumer : CharConsumer {
override fun consume(type: TokenType, tokenizer: Tokenizer): String {
val buffer = StringBuilder()
var escape = false
while (true) {
val char = tokenizer.source.peek()
if (char == CharSource.EndOfFile) {
throw UnterminatedTokenError("String", tokenizer.source.currentSourceIndex())
}
if (char == '"' && !escape) {
break
}
if (escape) {
escape = false
}
buffer.append(tokenizer.source.next())
if (char == '\\') {
escape = true
}
}
return buffer.toString()
}
}

View File

@ -1,36 +0,0 @@
package gay.pizza.pork.parser
class StringCharSource(
val input: CharSequence,
val startIndex: Int = 0,
val endIndex: Int = input.length - 1
) : CharSource {
private var index = startIndex
override val currentIndex: Int
get() = index
override fun next(): Char {
if (index == endIndex) {
return CharSource.EndOfFile
}
val char = input[index]
index++
return char
}
override fun peek(): Char {
if (index == endIndex) {
return CharSource.EndOfFile
}
return input[index]
}
override fun peek(index: Int): Char {
val target = this.index + index
if (target >= endIndex) {
return CharSource.EndOfFile
}
return input[target]
}
}

View File

@ -1,13 +0,0 @@
package gay.pizza.pork.parser
class Token(val type: TokenType, val sourceIndex: SourceIndex, val text: String) {
override fun toString(): String =
"$sourceIndex ${type.name} '${text.replace("\n", "\\n")}'"
companion object {
fun endOfFile(sourceIndex: SourceIndex): Token =
Token(TokenType.EndOfFile, sourceIndex, "")
}
fun upgrade(upgradedType: TokenType): Token = Token(upgradedType, sourceIndex, text)
}

View File

@ -1,11 +0,0 @@
package gay.pizza.pork.parser
enum class TokenFamily : TokenTypeProperty {
OperatorFamily,
KeywordFamily,
SymbolFamily,
NumericLiteralFamily,
StringLiteralFamily,
CommentFamily,
OtherFamily
}

View File

@ -1,19 +0,0 @@
package gay.pizza.pork.parser
interface TokenSource : PeekableSource<Token> {
fun peekTypeAhead(ahead: Int): TokenType
fun consumeAllRemainingTokens(): List<Token> {
val tokens = mutableListOf<Token>()
while (true) {
val token = next()
tokens.add(token)
if (token.type == TokenType.EndOfFile) {
break
}
}
return tokens
}
fun stream(): TokenStream = TokenStream(consumeAllRemainingTokens())
}

View File

@ -1,5 +0,0 @@
package gay.pizza.pork.parser
class TokenStream(val tokens: List<Token>) {
override fun toString(): String = tokens.toString()
}

View File

@ -1,32 +0,0 @@
package gay.pizza.pork.parser
class TokenStreamSource(val stream: TokenStream) : TokenSource {
private var index = 0
override val currentIndex: Int
get() = index
override fun next(): Token {
if (index == stream.tokens.size) {
return stream.tokens.last()
}
val char = stream.tokens[index]
index++
return char
}
override fun peek(): Token {
if (index == stream.tokens.size) {
return stream.tokens.last()
}
return stream.tokens[index]
}
override fun peekTypeAhead(ahead: Int): TokenType {
val calculated = index + ahead
if (calculated >= stream.tokens.size) {
return stream.tokens.last().type
}
return stream.tokens[calculated].type
}
}

View File

@ -1,120 +0,0 @@
package gay.pizza.pork.parser
import gay.pizza.pork.parser.CharMatcher.*
import gay.pizza.pork.parser.MatchedCharConsumer.Options.AllowEofTermination
import gay.pizza.pork.parser.TokenTypeProperty.*
import gay.pizza.pork.parser.TokenFamily.*
import gay.pizza.pork.parser.TokenTypeProperty.AnyOf
enum class TokenType(vararg val properties: TokenTypeProperty) {
NumberLiteral(NumericLiteralFamily, CharMatch(CharMatcher.AnyOf(
MatchRange('0'..'9'),
NotAtIndex(0, MatchSingle('.'))
))),
Symbol(SymbolFamily, CharMatch(CharMatcher.AnyOf(
MatchRange('a'..'z'),
MatchRange('A'..'Z'),
MatchRange('0' .. '9'),
MatchSingle('_')
)), KeywordUpgrader),
Quote(StringLiteralFamily, SingleChar('"'), InsideStates(TokenizerState.Normal, TokenizerState.StringLiteralEnd)),
StringLiteral(StringLiteralFamily, CharConsume(StringCharConsumer), InsideStates(TokenizerState.StringLiteralStart)),
Equality(OperatorFamily),
Inequality(ManyChars("!="), OperatorFamily),
ExclamationPoint(SingleChar('!'), Promotion('=', Inequality)),
None(ManyChars("None"), KeywordFamily),
Equals(SingleChar('='), Promotion('=', Equality)),
PlusPlus(ManyChars("++"), OperatorFamily),
MinusMinus(ManyChars("--"), OperatorFamily),
Plus(SingleChar('+'), OperatorFamily, Promotion('+', PlusPlus)),
Minus(SingleChar('-'), OperatorFamily, Promotion('-', MinusMinus)),
Multiply(SingleChar('*'), OperatorFamily),
Divide(SingleChar('/'), OperatorFamily),
And(ManyChars("and"), KeywordFamily),
Or(ManyChars("or"), KeywordFamily),
Tilde(SingleChar('~'), OperatorFamily),
Ampersand(SingleChar('&'), OperatorFamily),
Pipe(SingleChar('|'), OperatorFamily),
Caret(SingleChar('^'), OperatorFamily),
LesserEqual(OperatorFamily),
GreaterEqual(OperatorFamily),
Lesser(SingleChar('<'), OperatorFamily, Promotion('=', LesserEqual)),
Greater(SingleChar('>'), OperatorFamily, Promotion('=', GreaterEqual)),
LeftCurly(SingleChar('{')),
RightCurly(SingleChar('}')),
LeftBracket(SingleChar('[')),
RightBracket(SingleChar(']')),
LeftParentheses(SingleChar('(')),
RightParentheses(SingleChar(')')),
Not(ManyChars("not"), KeywordFamily),
Mod(ManyChars("mod"), KeywordFamily),
Rem(ManyChars("rem"), KeywordFamily),
Comma(SingleChar(',')),
DotDotDot(ManyChars("...")),
DotDot(ManyChars(".."), Promotion('.', DotDotDot)),
Dot(SingleChar('.'), Promotion('.', DotDot)),
False(ManyChars("false"), KeywordFamily),
True(ManyChars("true"), KeywordFamily),
If(ManyChars("if"), KeywordFamily),
Else(ManyChars("else"), KeywordFamily),
While(ManyChars("while"), KeywordFamily),
For(ManyChars("for"), KeywordFamily),
In(ManyChars("in"), KeywordFamily),
Continue(ManyChars("continue"), KeywordFamily),
Break(ManyChars("break"), KeywordFamily),
Import(AnyOf("import", "impork", "porkload"), KeywordFamily),
Export(ManyChars("export"), KeywordFamily),
Func(ManyChars("func"), KeywordFamily),
Native(ManyChars("native"), KeywordFamily),
Let(ManyChars("let"), KeywordFamily),
Var(ManyChars("var"), KeywordFamily),
Whitespace(CharMatch(CharMatcher.AnyOf(
MatchSingle(' '),
MatchSingle('\r'),
MatchSingle('\n'),
MatchSingle('\t')
))),
BlockComment(CharConsume(MatchedCharConsumer("/*", "*/")), CommentFamily),
LineComment(CharConsume(MatchedCharConsumer("//", "\n", AllowEofTermination)), CommentFamily),
EndOfFile;
val promotions: List<Promotion> =
properties.filterIsInstance<Promotion>()
val manyChars: ManyChars? =
properties.filterIsInstance<ManyChars>().singleOrNull()
val anyOf: AnyOf? =
properties.filterIsInstance<AnyOf>().singleOrNull()
val singleChar: SingleChar? =
properties.filterIsInstance<SingleChar>().singleOrNull()
val family: TokenFamily =
properties.filterIsInstance<TokenFamily>().singleOrNull() ?: OtherFamily
val charMatch: CharMatch? = properties.filterIsInstance<CharMatch>().singleOrNull()
val charConsume: CharConsume? = properties.filterIsInstance<CharConsume>().singleOrNull()
val tokenUpgrader: TokenUpgrader? =
properties.filterIsInstance<TokenUpgrader>().singleOrNull()
val validStates: List<TokenizerState> by lazy {
properties
.filterIsInstance<InsideStates>()
.singleOrNull()?.states?.toList() ?: listOf(TokenizerState.Normal)
}
val simpleWantString: String? = manyChars?.text ?: singleChar?.char?.toString()
companion object {
val AnyOf = entries.filter { item -> item.anyOf != null }
val ManyChars = entries.filter { item -> item.manyChars != null }
val SingleChars = entries.filter { item -> item.singleChar != null }
val CharMatches = entries.filter { item -> item.charMatch != null }
val CharConsumes = entries.filter { item -> item.charConsume != null }
val ParserIgnoredTypes: Set<TokenType> = setOf(
Whitespace,
BlockComment,
LineComment
)
val DeclarationModifiers: Array<TokenType> = arrayOf(
Export
)
}
}

View File

@ -1,32 +0,0 @@
package gay.pizza.pork.parser
interface TokenTypeProperty {
class SingleChar(val char: Char) : TokenTypeProperty
class Promotion(val nextChar: Char, val type: TokenType) : TokenTypeProperty
class ManyChars(val text: String) : TokenTypeProperty
class AnyOf(vararg val strings: String): TokenTypeProperty
class InsideStates(vararg val states: TokenizerState) : TokenTypeProperty
open class CharMatch(val matcher: CharMatcher) : TokenTypeProperty
open class CharConsume(val consumer: CharConsumer) : TokenTypeProperty
open class TokenUpgrader(val maybeUpgrade: (Token) -> Token?) : TokenTypeProperty
object KeywordUpgrader : TokenUpgrader({ token ->
var upgraded: Token? = null
for (item in TokenType.ManyChars) {
if (item.manyChars != null && token.text == item.manyChars.text) {
upgraded = token.upgrade(item)
break
}
}
if (upgraded == null) {
for (item in TokenType.AnyOf) {
if (item.anyOf != null && item.anyOf.strings.contains(token.text)) {
upgraded = token.upgrade(item)
break
}
}
}
upgraded
})
}

View File

@ -1,148 +0,0 @@
package gay.pizza.pork.parser
class Tokenizer(source: CharSource) : TokenSource {
internal val source = SourceIndexCharSource(source)
private var startIndex: SourceIndex = SourceIndex.zero()
private var state = TokenizerState.Normal
private var index = 0
override val currentIndex: Int
get() = index
private val queue = mutableListOf<Token>()
override fun next(): Token {
val token = readNextToken()
index++
return token
}
override fun peek(): Token {
if (queue.isEmpty()) {
val token = readNextToken()
queue.add(token)
return token
}
return queue.first()
}
override fun peekTypeAhead(ahead: Int): TokenType {
val needed = ahead - (queue.size - 1)
if (needed > 0) {
for (i in 1..needed) {
queue.add(readNextToken())
}
}
return queue[ahead].type
}
private fun nextTokenOrNull(): Token? {
if (source.peek() == CharSource.EndOfFile) {
source.next()
return Token.endOfFile(source.currentSourceIndex())
}
startIndex = source.currentSourceIndex()
for (item in TokenType.CharConsumes) {
if (!item.validStates.contains(state)) {
continue
}
val text = item.charConsume!!.consumer.consume(item, this)
if (text != null) {
return produceToken(item, text)
}
}
val char = source.next()
for (item in TokenType.SingleChars) {
if (!item.validStates.contains(state)) {
continue
}
val itemChar = item.singleChar!!.char
if (itemChar != char) {
continue
}
var type = item
var text = itemChar.toString()
var promoted = true
while (promoted) {
promoted = false
for (promotion in type.promotions) {
if (source.peek() != promotion.nextChar) {
continue
}
val nextChar = source.next()
type = promotion.type
text += nextChar
promoted = true
}
}
return produceToken(type, text)
}
var index = 0
for (item in TokenType.CharMatches) {
if (!item.validStates.contains(state)) {
continue
}
if (!item.charMatch!!.matcher.valid(char, index)) {
continue
}
val text = buildString {
append(char)
while (item.charMatch.matcher.valid(source.peek(), ++index)) {
append(source.next())
}
}
var token = produceToken(item, text)
val tokenUpgrader = item.tokenUpgrader
if (tokenUpgrader != null) {
token = tokenUpgrader.maybeUpgrade(token) ?: token
}
return token
}
return null
}
private fun readNextToken(): Token {
val what = source.peek()
val token = nextTokenOrNull()
if (token != null) {
for (transition in state.transitions) {
if (transition.produced == token.type) {
state = transition.enter
break
}
}
return token
}
throw BadCharacterError(what, source.currentSourceIndex(), state)
}
internal fun produceToken(type: TokenType, text: String) =
Token(type, startIndex, text)
internal fun peek(what: CharSequence): Boolean {
var current = 0
for (c in what) {
if (source.peek(current) != c) {
return false
}
current++
}
return true
}
internal fun read(count: Int, buffer: StringBuilder) {
for (i in 1..count) {
buffer.append(source.next())
}
}
}

View File

@ -1,12 +0,0 @@
package gay.pizza.pork.parser
enum class TokenizerState(vararg val transitions: Transition) {
Normal(Transition({ TokenType.Quote }) { StringLiteralStart }),
StringLiteralStart(Transition({ TokenType.StringLiteral }) { StringLiteralEnd }),
StringLiteralEnd(Transition({ TokenType.Quote }) { Normal });
data class Transition(private val producedToken: () -> TokenType, private val nextState: () -> TokenizerState) {
val produced by lazy { producedToken() }
val enter by lazy { nextState() }
}
}

View File

@ -1,5 +0,0 @@
package gay.pizza.pork.parser
class UnterminatedTokenError(what: String, sourceIndex: SourceIndex) : ParseError(
"Unterminated $what at $sourceIndex"
)