mirror of
https://github.com/GayPizzaSpecifications/pork.git
synced 2025-08-03 21:21:33 +00:00
build: move parser and tokenizer into separate modules
This commit is contained in:
7
tokenizer/build.gradle.kts
Normal file
7
tokenizer/build.gradle.kts
Normal file
@ -0,0 +1,7 @@
|
||||
plugins {
|
||||
id("gay.pizza.pork.module")
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation(project(":common"))
|
||||
}
|
@ -0,0 +1,36 @@
|
||||
package gay.pizza.pork.tokenizer
|
||||
|
||||
open class AnsiHighlightScheme : HighlightScheme {
|
||||
override fun highlight(token: Token): Highlight {
|
||||
val attributes = when (token.type.family) {
|
||||
TokenFamily.StringLiteralFamily -> string()
|
||||
TokenFamily.OperatorFamily -> operator()
|
||||
TokenFamily.KeywordFamily -> keyword()
|
||||
TokenFamily.SymbolFamily -> symbol()
|
||||
TokenFamily.CommentFamily -> comment()
|
||||
else -> null
|
||||
}
|
||||
|
||||
return if (attributes != null) {
|
||||
Highlight(token, ansi(attributes, token.text))
|
||||
} else Highlight(token)
|
||||
}
|
||||
|
||||
open fun string(): AnsiAttributes =
|
||||
AnsiAttributes("32m")
|
||||
open fun symbol(): AnsiAttributes =
|
||||
AnsiAttributes("33m")
|
||||
open fun operator(): AnsiAttributes =
|
||||
AnsiAttributes("34m")
|
||||
open fun keyword(): AnsiAttributes =
|
||||
AnsiAttributes("35m")
|
||||
open fun comment(): AnsiAttributes =
|
||||
AnsiAttributes("37m")
|
||||
|
||||
private fun ansi(attributes: AnsiAttributes, text: String): String =
|
||||
"\u001b[${attributes.color}${text}\u001b[0m"
|
||||
|
||||
class AnsiAttributes(
|
||||
val color: String
|
||||
)
|
||||
}
|
@ -0,0 +1,5 @@
|
||||
package gay.pizza.pork.tokenizer
|
||||
|
||||
class BadCharacterError(char: Char, sourceIndex: SourceIndex, state: TokenizerState) : TokenizeError(
|
||||
"Failed to produce token for '${char}' at $sourceIndex in state $state"
|
||||
)
|
@ -0,0 +1,5 @@
|
||||
package gay.pizza.pork.tokenizer
|
||||
|
||||
interface CharConsumer {
|
||||
fun consume(type: TokenType, tokenizer: Tokenizer): String?
|
||||
}
|
@ -0,0 +1,26 @@
|
||||
package gay.pizza.pork.tokenizer
|
||||
|
||||
fun interface CharMatcher {
|
||||
fun valid(char: Char, index: Int): Boolean
|
||||
|
||||
class AnyOf(vararg val filters: CharMatcher) : CharMatcher {
|
||||
override fun valid(char: Char, index: Int): Boolean =
|
||||
filters.any { it.valid(char, index) }
|
||||
}
|
||||
|
||||
class MatchSingle(val char: Char) : CharMatcher {
|
||||
override fun valid(char: Char, index: Int): Boolean =
|
||||
char == this.char
|
||||
}
|
||||
|
||||
class MatchRange(val charRange: CharRange) : CharMatcher {
|
||||
override fun valid(char: Char, index: Int): Boolean =
|
||||
charRange.contains(char)
|
||||
}
|
||||
|
||||
class NotAtIndex(val index: Int, val matcher: CharMatcher) : CharMatcher {
|
||||
override fun valid(char: Char, index: Int): Boolean {
|
||||
return this.index != index && matcher.valid(char, index)
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,10 @@
|
||||
package gay.pizza.pork.tokenizer
|
||||
|
||||
interface CharSource : PeekableSource<Char> {
|
||||
fun peek(index: Int): Char
|
||||
|
||||
companion object {
|
||||
@Suppress("ConstPropertyName")
|
||||
const val EndOfFile = 0.toChar()
|
||||
}
|
||||
}
|
@ -0,0 +1,7 @@
|
||||
package gay.pizza.pork.tokenizer
|
||||
|
||||
fun CharSource.readToString(): String = buildString {
|
||||
while (peek() != CharSource.EndOfFile) {
|
||||
append(next())
|
||||
}
|
||||
}
|
@ -0,0 +1,28 @@
|
||||
package gay.pizza.pork.tokenizer
|
||||
|
||||
class ExpectedTokenError(got: Token, sourceIndex: SourceIndex, vararg expectedTypes: TokenType) : TokenizeError(
|
||||
message(got, sourceIndex, expectedTypes)
|
||||
) {
|
||||
companion object {
|
||||
fun message(got: Token, sourceIndex: SourceIndex, expectedTypes: Array<out TokenType>): String {
|
||||
val tokenTypeMessages = expectedTypes.map {
|
||||
if (it.simpleWantString != null)
|
||||
"${it.name} '${it.simpleWantString}'"
|
||||
else
|
||||
it.name
|
||||
}
|
||||
|
||||
val expected = if (expectedTypes.size > 1) {
|
||||
"one of " + tokenTypeMessages.joinToString(", ")
|
||||
} else tokenTypeMessages.firstOrNull() ?: "unknown"
|
||||
|
||||
val friendlyIndex = if (sourceIndex.locationReliable) {
|
||||
"line ${sourceIndex.line} column ${sourceIndex.column}"
|
||||
} else {
|
||||
"index ${sourceIndex.index}"
|
||||
}
|
||||
|
||||
return "Expected $expected at $friendlyIndex but got ${got.type} '${got.text}'"
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,5 @@
|
||||
package gay.pizza.pork.tokenizer
|
||||
|
||||
class Highlight(val token: Token, val text: String? = null) {
|
||||
override fun toString(): String = text ?: token.text
|
||||
}
|
@ -0,0 +1,5 @@
|
||||
package gay.pizza.pork.tokenizer
|
||||
|
||||
interface HighlightScheme {
|
||||
fun highlight(token: Token): Highlight
|
||||
}
|
@ -0,0 +1,6 @@
|
||||
package gay.pizza.pork.tokenizer
|
||||
|
||||
class Highlighter(val scheme: HighlightScheme) {
|
||||
fun highlight(stream: TokenStream): List<Highlight> =
|
||||
stream.tokens.map { scheme.highlight(it) }
|
||||
}
|
@ -0,0 +1,48 @@
|
||||
package gay.pizza.pork.tokenizer
|
||||
|
||||
@Suppress("CanBeParameter")
|
||||
class MatchedCharConsumer(
|
||||
val start: CharSequence,
|
||||
val end: CharSequence,
|
||||
vararg val options: Options
|
||||
) : CharConsumer {
|
||||
private val eofTerminationAllowed = options.contains(Options.AllowEofTermination)
|
||||
|
||||
override fun consume(type: TokenType, tokenizer: Tokenizer): String? {
|
||||
if (!tokenizer.peek(start)) {
|
||||
return null
|
||||
}
|
||||
val buffer = StringBuilder()
|
||||
tokenizer.read(start.length, buffer)
|
||||
var endsNeededToTerminate = 1
|
||||
while (true) {
|
||||
if (tokenizer.peek(start)) {
|
||||
endsNeededToTerminate++
|
||||
tokenizer.read(start.length, buffer)
|
||||
continue
|
||||
}
|
||||
|
||||
if (tokenizer.peek(end)) {
|
||||
endsNeededToTerminate--
|
||||
tokenizer.read(end.length, buffer)
|
||||
}
|
||||
|
||||
if (endsNeededToTerminate == 0) {
|
||||
return buffer.toString()
|
||||
}
|
||||
|
||||
val char = tokenizer.source.next()
|
||||
if (char == CharSource.EndOfFile) {
|
||||
if (eofTerminationAllowed) {
|
||||
return buffer.toString()
|
||||
}
|
||||
throw UnterminatedTokenError(type.name, tokenizer.source.currentSourceIndex())
|
||||
}
|
||||
buffer.append(char)
|
||||
}
|
||||
}
|
||||
|
||||
enum class Options {
|
||||
AllowEofTermination
|
||||
}
|
||||
}
|
@ -0,0 +1,7 @@
|
||||
package gay.pizza.pork.tokenizer
|
||||
|
||||
interface PeekableSource<T> {
|
||||
val currentIndex: Int
|
||||
fun next(): T
|
||||
fun peek(): T
|
||||
}
|
@ -0,0 +1,10 @@
|
||||
package gay.pizza.pork.tokenizer
|
||||
|
||||
data class SourceIndex(val index: Int, val line: Int, val column: Int, val locationReliable: Boolean = true) {
|
||||
companion object {
|
||||
fun zero(): SourceIndex = SourceIndex(0, 1, 0)
|
||||
fun indexOnly(index: Int) = SourceIndex(index, 0, 0, locationReliable = false)
|
||||
}
|
||||
|
||||
override fun toString(): String = if (locationReliable) "${line}:${column}" else "$index"
|
||||
}
|
@ -0,0 +1,19 @@
|
||||
package gay.pizza.pork.tokenizer
|
||||
|
||||
class SourceIndexCharSource(val delegate: CharSource) : CharSource by delegate {
|
||||
private var currentLineIndex = 1
|
||||
private var currentLineColumn = 1
|
||||
|
||||
override fun next(): Char {
|
||||
val char = delegate.next()
|
||||
if (char == '\n') {
|
||||
currentLineIndex++
|
||||
currentLineColumn = 1
|
||||
}
|
||||
currentLineColumn++
|
||||
return char
|
||||
}
|
||||
|
||||
fun currentSourceIndex(): SourceIndex =
|
||||
SourceIndex(delegate.currentIndex, currentLineIndex, currentLineColumn)
|
||||
}
|
@ -0,0 +1,30 @@
|
||||
package gay.pizza.pork.tokenizer
|
||||
|
||||
object StringCharConsumer : CharConsumer {
|
||||
override fun consume(type: TokenType, tokenizer: Tokenizer): String {
|
||||
val buffer = StringBuilder()
|
||||
var escape = false
|
||||
while (true) {
|
||||
val char = tokenizer.source.peek()
|
||||
|
||||
if (char == CharSource.EndOfFile) {
|
||||
throw UnterminatedTokenError("String", tokenizer.source.currentSourceIndex())
|
||||
}
|
||||
|
||||
if (char == '"' && !escape) {
|
||||
break
|
||||
}
|
||||
|
||||
if (escape) {
|
||||
escape = false
|
||||
}
|
||||
|
||||
buffer.append(tokenizer.source.next())
|
||||
|
||||
if (char == '\\') {
|
||||
escape = true
|
||||
}
|
||||
}
|
||||
return buffer.toString()
|
||||
}
|
||||
}
|
@ -0,0 +1,36 @@
|
||||
package gay.pizza.pork.tokenizer
|
||||
|
||||
class StringCharSource(
|
||||
val input: CharSequence,
|
||||
val startIndex: Int = 0,
|
||||
val endIndex: Int = input.length - 1
|
||||
) : CharSource {
|
||||
private var index = startIndex
|
||||
|
||||
override val currentIndex: Int
|
||||
get() = index
|
||||
|
||||
override fun next(): Char {
|
||||
if (index == endIndex) {
|
||||
return CharSource.EndOfFile
|
||||
}
|
||||
val char = input[index]
|
||||
index++
|
||||
return char
|
||||
}
|
||||
|
||||
override fun peek(): Char {
|
||||
if (index == endIndex) {
|
||||
return CharSource.EndOfFile
|
||||
}
|
||||
return input[index]
|
||||
}
|
||||
|
||||
override fun peek(index: Int): Char {
|
||||
val target = this.index + index
|
||||
if (target >= endIndex) {
|
||||
return CharSource.EndOfFile
|
||||
}
|
||||
return input[target]
|
||||
}
|
||||
}
|
13
tokenizer/src/main/kotlin/gay/pizza/pork/tokenizer/Token.kt
Normal file
13
tokenizer/src/main/kotlin/gay/pizza/pork/tokenizer/Token.kt
Normal file
@ -0,0 +1,13 @@
|
||||
package gay.pizza.pork.tokenizer
|
||||
|
||||
class Token(val type: TokenType, val sourceIndex: SourceIndex, val text: String) {
|
||||
override fun toString(): String =
|
||||
"$sourceIndex ${type.name} '${text.replace("\n", "\\n")}'"
|
||||
|
||||
companion object {
|
||||
fun endOfFile(sourceIndex: SourceIndex): Token =
|
||||
Token(TokenType.EndOfFile, sourceIndex, "")
|
||||
}
|
||||
|
||||
fun upgrade(upgradedType: TokenType): Token = Token(upgradedType, sourceIndex, text)
|
||||
}
|
@ -0,0 +1,11 @@
|
||||
package gay.pizza.pork.tokenizer
|
||||
|
||||
enum class TokenFamily : TokenTypeProperty {
|
||||
OperatorFamily,
|
||||
KeywordFamily,
|
||||
SymbolFamily,
|
||||
NumericLiteralFamily,
|
||||
StringLiteralFamily,
|
||||
CommentFamily,
|
||||
OtherFamily
|
||||
}
|
@ -0,0 +1,19 @@
|
||||
package gay.pizza.pork.tokenizer
|
||||
|
||||
interface TokenSource : PeekableSource<Token> {
|
||||
fun peekTypeAhead(ahead: Int): TokenType
|
||||
|
||||
fun consumeAllRemainingTokens(): List<Token> {
|
||||
val tokens = mutableListOf<Token>()
|
||||
while (true) {
|
||||
val token = next()
|
||||
tokens.add(token)
|
||||
if (token.type == TokenType.EndOfFile) {
|
||||
break
|
||||
}
|
||||
}
|
||||
return tokens
|
||||
}
|
||||
|
||||
fun stream(): TokenStream = TokenStream(consumeAllRemainingTokens())
|
||||
}
|
@ -0,0 +1,5 @@
|
||||
package gay.pizza.pork.tokenizer
|
||||
|
||||
class TokenStream(val tokens: List<Token>) {
|
||||
override fun toString(): String = tokens.toString()
|
||||
}
|
@ -0,0 +1,32 @@
|
||||
package gay.pizza.pork.tokenizer
|
||||
|
||||
class TokenStreamSource(val stream: TokenStream) : TokenSource {
|
||||
private var index = 0
|
||||
|
||||
override val currentIndex: Int
|
||||
get() = index
|
||||
|
||||
override fun next(): Token {
|
||||
if (index == stream.tokens.size) {
|
||||
return stream.tokens.last()
|
||||
}
|
||||
val char = stream.tokens[index]
|
||||
index++
|
||||
return char
|
||||
}
|
||||
|
||||
override fun peek(): Token {
|
||||
if (index == stream.tokens.size) {
|
||||
return stream.tokens.last()
|
||||
}
|
||||
return stream.tokens[index]
|
||||
}
|
||||
|
||||
override fun peekTypeAhead(ahead: Int): TokenType {
|
||||
val calculated = index + ahead
|
||||
if (calculated >= stream.tokens.size) {
|
||||
return stream.tokens.last().type
|
||||
}
|
||||
return stream.tokens[calculated].type
|
||||
}
|
||||
}
|
120
tokenizer/src/main/kotlin/gay/pizza/pork/tokenizer/TokenType.kt
Normal file
120
tokenizer/src/main/kotlin/gay/pizza/pork/tokenizer/TokenType.kt
Normal file
@ -0,0 +1,120 @@
|
||||
package gay.pizza.pork.tokenizer
|
||||
|
||||
import gay.pizza.pork.tokenizer.CharMatcher.*
|
||||
import gay.pizza.pork.tokenizer.MatchedCharConsumer.Options.AllowEofTermination
|
||||
import gay.pizza.pork.tokenizer.TokenTypeProperty.*
|
||||
import gay.pizza.pork.tokenizer.TokenFamily.*
|
||||
import gay.pizza.pork.tokenizer.TokenTypeProperty.AnyOf
|
||||
|
||||
enum class TokenType(vararg val properties: TokenTypeProperty) {
|
||||
NumberLiteral(NumericLiteralFamily, CharMatch(CharMatcher.AnyOf(
|
||||
MatchRange('0'..'9'),
|
||||
NotAtIndex(0, MatchSingle('.'))
|
||||
))),
|
||||
Symbol(SymbolFamily, CharMatch(CharMatcher.AnyOf(
|
||||
MatchRange('a'..'z'),
|
||||
MatchRange('A'..'Z'),
|
||||
MatchRange('0' .. '9'),
|
||||
MatchSingle('_')
|
||||
)), KeywordUpgrader),
|
||||
Quote(StringLiteralFamily, SingleChar('"'), InsideStates(TokenizerState.Normal, TokenizerState.StringLiteralEnd)),
|
||||
StringLiteral(StringLiteralFamily, CharConsume(StringCharConsumer), InsideStates(TokenizerState.StringLiteralStart)),
|
||||
Equality(OperatorFamily),
|
||||
Inequality(ManyChars("!="), OperatorFamily),
|
||||
ExclamationPoint(SingleChar('!'), Promotion('=', Inequality)),
|
||||
None(ManyChars("None"), KeywordFamily),
|
||||
Equals(SingleChar('='), Promotion('=', Equality)),
|
||||
PlusPlus(ManyChars("++"), OperatorFamily),
|
||||
MinusMinus(ManyChars("--"), OperatorFamily),
|
||||
Plus(SingleChar('+'), OperatorFamily, Promotion('+', PlusPlus)),
|
||||
Minus(SingleChar('-'), OperatorFamily, Promotion('-', MinusMinus)),
|
||||
Multiply(SingleChar('*'), OperatorFamily),
|
||||
Divide(SingleChar('/'), OperatorFamily),
|
||||
And(ManyChars("and"), KeywordFamily),
|
||||
Or(ManyChars("or"), KeywordFamily),
|
||||
Tilde(SingleChar('~'), OperatorFamily),
|
||||
Ampersand(SingleChar('&'), OperatorFamily),
|
||||
Pipe(SingleChar('|'), OperatorFamily),
|
||||
Caret(SingleChar('^'), OperatorFamily),
|
||||
LesserEqual(OperatorFamily),
|
||||
GreaterEqual(OperatorFamily),
|
||||
Lesser(SingleChar('<'), OperatorFamily, Promotion('=', LesserEqual)),
|
||||
Greater(SingleChar('>'), OperatorFamily, Promotion('=', GreaterEqual)),
|
||||
LeftCurly(SingleChar('{')),
|
||||
RightCurly(SingleChar('}')),
|
||||
LeftBracket(SingleChar('[')),
|
||||
RightBracket(SingleChar(']')),
|
||||
LeftParentheses(SingleChar('(')),
|
||||
RightParentheses(SingleChar(')')),
|
||||
Not(ManyChars("not"), KeywordFamily),
|
||||
Mod(ManyChars("mod"), KeywordFamily),
|
||||
Rem(ManyChars("rem"), KeywordFamily),
|
||||
Comma(SingleChar(',')),
|
||||
DotDotDot(ManyChars("...")),
|
||||
DotDot(ManyChars(".."), Promotion('.', DotDotDot)),
|
||||
Dot(SingleChar('.'), Promotion('.', DotDot)),
|
||||
False(ManyChars("false"), KeywordFamily),
|
||||
True(ManyChars("true"), KeywordFamily),
|
||||
If(ManyChars("if"), KeywordFamily),
|
||||
Else(ManyChars("else"), KeywordFamily),
|
||||
While(ManyChars("while"), KeywordFamily),
|
||||
For(ManyChars("for"), KeywordFamily),
|
||||
In(ManyChars("in"), KeywordFamily),
|
||||
Continue(ManyChars("continue"), KeywordFamily),
|
||||
Break(ManyChars("break"), KeywordFamily),
|
||||
Import(AnyOf("import", "impork", "porkload"), KeywordFamily),
|
||||
Export(ManyChars("export"), KeywordFamily),
|
||||
Func(ManyChars("func"), KeywordFamily),
|
||||
Native(ManyChars("native"), KeywordFamily),
|
||||
Let(ManyChars("let"), KeywordFamily),
|
||||
Var(ManyChars("var"), KeywordFamily),
|
||||
Whitespace(CharMatch(CharMatcher.AnyOf(
|
||||
MatchSingle(' '),
|
||||
MatchSingle('\r'),
|
||||
MatchSingle('\n'),
|
||||
MatchSingle('\t')
|
||||
))),
|
||||
BlockComment(CharConsume(MatchedCharConsumer("/*", "*/")), CommentFamily),
|
||||
LineComment(CharConsume(MatchedCharConsumer("//", "\n", AllowEofTermination)), CommentFamily),
|
||||
EndOfFile;
|
||||
|
||||
val promotions: List<Promotion> =
|
||||
properties.filterIsInstance<Promotion>()
|
||||
val manyChars: ManyChars? =
|
||||
properties.filterIsInstance<ManyChars>().singleOrNull()
|
||||
val anyOf: AnyOf? =
|
||||
properties.filterIsInstance<AnyOf>().singleOrNull()
|
||||
val singleChar: SingleChar? =
|
||||
properties.filterIsInstance<SingleChar>().singleOrNull()
|
||||
val family: TokenFamily =
|
||||
properties.filterIsInstance<TokenFamily>().singleOrNull() ?: OtherFamily
|
||||
val charMatch: CharMatch? = properties.filterIsInstance<CharMatch>().singleOrNull()
|
||||
val charConsume: CharConsume? = properties.filterIsInstance<CharConsume>().singleOrNull()
|
||||
val tokenUpgrader: TokenUpgrader? =
|
||||
properties.filterIsInstance<TokenUpgrader>().singleOrNull()
|
||||
val validStates: List<TokenizerState> by lazy {
|
||||
properties
|
||||
.filterIsInstance<InsideStates>()
|
||||
.singleOrNull()?.states?.toList() ?: listOf(TokenizerState.Normal)
|
||||
}
|
||||
|
||||
val simpleWantString: String? = manyChars?.text ?: singleChar?.char?.toString()
|
||||
|
||||
companion object {
|
||||
val AnyOf = entries.filter { item -> item.anyOf != null }
|
||||
val ManyChars = entries.filter { item -> item.manyChars != null }
|
||||
val SingleChars = entries.filter { item -> item.singleChar != null }
|
||||
val CharMatches = entries.filter { item -> item.charMatch != null }
|
||||
val CharConsumes = entries.filter { item -> item.charConsume != null }
|
||||
|
||||
val ParserIgnoredTypes: Set<TokenType> = setOf(
|
||||
Whitespace,
|
||||
BlockComment,
|
||||
LineComment
|
||||
)
|
||||
|
||||
val DeclarationModifiers: Array<TokenType> = arrayOf(
|
||||
Export
|
||||
)
|
||||
}
|
||||
}
|
@ -0,0 +1,32 @@
|
||||
package gay.pizza.pork.tokenizer
|
||||
|
||||
interface TokenTypeProperty {
|
||||
class SingleChar(val char: Char) : TokenTypeProperty
|
||||
class Promotion(val nextChar: Char, val type: TokenType) : TokenTypeProperty
|
||||
class ManyChars(val text: String) : TokenTypeProperty
|
||||
class AnyOf(vararg val strings: String): TokenTypeProperty
|
||||
class InsideStates(vararg val states: TokenizerState) : TokenTypeProperty
|
||||
open class CharMatch(val matcher: CharMatcher) : TokenTypeProperty
|
||||
open class CharConsume(val consumer: CharConsumer) : TokenTypeProperty
|
||||
open class TokenUpgrader(val maybeUpgrade: (Token) -> Token?) : TokenTypeProperty
|
||||
|
||||
object KeywordUpgrader : TokenUpgrader({ token ->
|
||||
var upgraded: Token? = null
|
||||
for (item in TokenType.ManyChars) {
|
||||
if (item.manyChars != null && token.text == item.manyChars.text) {
|
||||
upgraded = token.upgrade(item)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if (upgraded == null) {
|
||||
for (item in TokenType.AnyOf) {
|
||||
if (item.anyOf != null && item.anyOf.strings.contains(token.text)) {
|
||||
upgraded = token.upgrade(item)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
upgraded
|
||||
})
|
||||
}
|
@ -0,0 +1,3 @@
|
||||
package gay.pizza.pork.tokenizer
|
||||
|
||||
abstract class TokenizeError(message: String) : RuntimeException(message)
|
148
tokenizer/src/main/kotlin/gay/pizza/pork/tokenizer/Tokenizer.kt
Normal file
148
tokenizer/src/main/kotlin/gay/pizza/pork/tokenizer/Tokenizer.kt
Normal file
@ -0,0 +1,148 @@
|
||||
package gay.pizza.pork.tokenizer
|
||||
|
||||
class Tokenizer(source: CharSource) : TokenSource {
|
||||
internal val source = SourceIndexCharSource(source)
|
||||
|
||||
private var startIndex: SourceIndex = SourceIndex.zero()
|
||||
private var state = TokenizerState.Normal
|
||||
private var index = 0
|
||||
override val currentIndex: Int
|
||||
get() = index
|
||||
|
||||
private val queue = mutableListOf<Token>()
|
||||
|
||||
override fun next(): Token {
|
||||
val token = readNextToken()
|
||||
index++
|
||||
return token
|
||||
}
|
||||
|
||||
override fun peek(): Token {
|
||||
if (queue.isEmpty()) {
|
||||
val token = readNextToken()
|
||||
queue.add(token)
|
||||
return token
|
||||
}
|
||||
return queue.first()
|
||||
}
|
||||
|
||||
override fun peekTypeAhead(ahead: Int): TokenType {
|
||||
val needed = ahead - (queue.size - 1)
|
||||
if (needed > 0) {
|
||||
for (i in 1..needed) {
|
||||
queue.add(readNextToken())
|
||||
}
|
||||
}
|
||||
return queue[ahead].type
|
||||
}
|
||||
|
||||
private fun nextTokenOrNull(): Token? {
|
||||
if (source.peek() == CharSource.EndOfFile) {
|
||||
source.next()
|
||||
return Token.endOfFile(source.currentSourceIndex())
|
||||
}
|
||||
|
||||
startIndex = source.currentSourceIndex()
|
||||
|
||||
for (item in TokenType.CharConsumes) {
|
||||
if (!item.validStates.contains(state)) {
|
||||
continue
|
||||
}
|
||||
val text = item.charConsume!!.consumer.consume(item, this)
|
||||
if (text != null) {
|
||||
return produceToken(item, text)
|
||||
}
|
||||
}
|
||||
|
||||
val char = source.next()
|
||||
|
||||
for (item in TokenType.SingleChars) {
|
||||
if (!item.validStates.contains(state)) {
|
||||
continue
|
||||
}
|
||||
|
||||
val itemChar = item.singleChar!!.char
|
||||
if (itemChar != char) {
|
||||
continue
|
||||
}
|
||||
|
||||
var type = item
|
||||
var text = itemChar.toString()
|
||||
var promoted = true
|
||||
while (promoted) {
|
||||
promoted = false
|
||||
for (promotion in type.promotions) {
|
||||
if (source.peek() != promotion.nextChar) {
|
||||
continue
|
||||
}
|
||||
val nextChar = source.next()
|
||||
type = promotion.type
|
||||
text += nextChar
|
||||
promoted = true
|
||||
}
|
||||
}
|
||||
return produceToken(type, text)
|
||||
}
|
||||
|
||||
var index = 0
|
||||
for (item in TokenType.CharMatches) {
|
||||
if (!item.validStates.contains(state)) {
|
||||
continue
|
||||
}
|
||||
|
||||
if (!item.charMatch!!.matcher.valid(char, index)) {
|
||||
continue
|
||||
}
|
||||
|
||||
val text = buildString {
|
||||
append(char)
|
||||
|
||||
while (item.charMatch.matcher.valid(source.peek(), ++index)) {
|
||||
append(source.next())
|
||||
}
|
||||
}
|
||||
var token = produceToken(item, text)
|
||||
val tokenUpgrader = item.tokenUpgrader
|
||||
if (tokenUpgrader != null) {
|
||||
token = tokenUpgrader.maybeUpgrade(token) ?: token
|
||||
}
|
||||
return token
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
private fun readNextToken(): Token {
|
||||
val what = source.peek()
|
||||
val token = nextTokenOrNull()
|
||||
if (token != null) {
|
||||
for (transition in state.transitions) {
|
||||
if (transition.produced == token.type) {
|
||||
state = transition.enter
|
||||
break
|
||||
}
|
||||
}
|
||||
return token
|
||||
}
|
||||
throw BadCharacterError(what, source.currentSourceIndex(), state)
|
||||
}
|
||||
|
||||
internal fun produceToken(type: TokenType, text: String) =
|
||||
Token(type, startIndex, text)
|
||||
|
||||
internal fun peek(what: CharSequence): Boolean {
|
||||
var current = 0
|
||||
for (c in what) {
|
||||
if (source.peek(current) != c) {
|
||||
return false
|
||||
}
|
||||
current++
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
internal fun read(count: Int, buffer: StringBuilder) {
|
||||
for (i in 1..count) {
|
||||
buffer.append(source.next())
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,12 @@
|
||||
package gay.pizza.pork.tokenizer
|
||||
|
||||
enum class TokenizerState(vararg val transitions: Transition) {
|
||||
Normal(Transition({ TokenType.Quote }) { StringLiteralStart }),
|
||||
StringLiteralStart(Transition({ TokenType.StringLiteral }) { StringLiteralEnd }),
|
||||
StringLiteralEnd(Transition({ TokenType.Quote }) { Normal });
|
||||
|
||||
data class Transition(private val producedToken: () -> TokenType, private val nextState: () -> TokenizerState) {
|
||||
val produced by lazy { producedToken() }
|
||||
val enter by lazy { nextState() }
|
||||
}
|
||||
}
|
@ -0,0 +1,5 @@
|
||||
package gay.pizza.pork.tokenizer
|
||||
|
||||
class UnterminatedTokenError(what: String, sourceIndex: SourceIndex) : TokenizeError(
|
||||
"Unterminated $what at $sourceIndex"
|
||||
)
|
Reference in New Issue
Block a user