mirror of
https://github.com/GayPizzaSpecifications/pork.git
synced 2025-08-03 21:21:33 +00:00
parser: switch to char matcher interface
This commit is contained in:
26
parser/src/main/kotlin/gay/pizza/pork/parser/CharMatcher.kt
Normal file
26
parser/src/main/kotlin/gay/pizza/pork/parser/CharMatcher.kt
Normal file
@ -0,0 +1,26 @@
|
||||
package gay.pizza.pork.parser
|
||||
|
||||
fun interface CharMatcher {
|
||||
fun valid(char: Char, index: Int): Boolean
|
||||
|
||||
class AnyOf(vararg val filters: CharMatcher) : CharMatcher {
|
||||
override fun valid(char: Char, index: Int): Boolean =
|
||||
filters.any { it.valid(char, index) }
|
||||
}
|
||||
|
||||
class MatchSingle(val char: Char) : CharMatcher {
|
||||
override fun valid(char: Char, index: Int): Boolean =
|
||||
char == this.char
|
||||
}
|
||||
|
||||
class MatchRange(val charRange: CharRange) : CharMatcher {
|
||||
override fun valid(char: Char, index: Int): Boolean =
|
||||
charRange.contains(char)
|
||||
}
|
||||
|
||||
class NotAtIndex(val index: Int, val matcher: CharMatcher) : CharMatcher {
|
||||
override fun valid(char: Char, index: Int): Boolean {
|
||||
return this.index != index && matcher.valid(char, index)
|
||||
}
|
||||
}
|
||||
}
|
@ -1,16 +1,21 @@
|
||||
package gay.pizza.pork.parser
|
||||
|
||||
import gay.pizza.pork.parser.CharMatcher.*
|
||||
import gay.pizza.pork.parser.TokenTypeProperty.*
|
||||
import gay.pizza.pork.parser.TokenFamily.*
|
||||
import gay.pizza.pork.parser.TokenTypeProperty.AnyOf
|
||||
|
||||
enum class TokenType(vararg properties: TokenTypeProperty) {
|
||||
NumberLiteral(NumericLiteralFamily, CharIndexConsumer { it, index ->
|
||||
(it in '0'..'9') || (index > 0 && it == '.') }),
|
||||
Symbol(SymbolFamily, CharConsumer {
|
||||
(it in 'a'..'z') ||
|
||||
(it in 'A'..'Z') ||
|
||||
(it == '_') ||
|
||||
(it in '0' .. '9')}, KeywordUpgrader),
|
||||
NumberLiteral(NumericLiteralFamily, CharConsumer(CharMatcher.AnyOf(
|
||||
MatchRange('0'..'9'),
|
||||
NotAtIndex(0, MatchSingle('.'))
|
||||
))),
|
||||
Symbol(SymbolFamily, CharConsumer(CharMatcher.AnyOf(
|
||||
MatchRange('a'..'z'),
|
||||
MatchRange('A'..'Z'),
|
||||
MatchRange('0' .. '9'),
|
||||
MatchSingle('_')
|
||||
)), KeywordUpgrader),
|
||||
StringLiteral(StringLiteralFamily),
|
||||
Equality(OperatorFamily),
|
||||
Inequality(ManyChars("!="), OperatorFamily),
|
||||
@ -61,7 +66,12 @@ enum class TokenType(vararg properties: TokenTypeProperty) {
|
||||
Native(ManyChars("native"), KeywordFamily),
|
||||
Let(ManyChars("let"), KeywordFamily),
|
||||
Var(ManyChars("var"), KeywordFamily),
|
||||
Whitespace(CharConsumer { it == ' ' || it == '\r' || it == '\n' || it == '\t' }),
|
||||
Whitespace(CharConsumer(CharMatcher.AnyOf(
|
||||
MatchSingle(' '),
|
||||
MatchSingle('\r'),
|
||||
MatchSingle('\n'),
|
||||
MatchSingle('\t')
|
||||
))),
|
||||
BlockComment(CommentFamily),
|
||||
LineComment(CommentFamily),
|
||||
EndOfFile;
|
||||
@ -77,8 +87,6 @@ enum class TokenType(vararg properties: TokenTypeProperty) {
|
||||
val family: TokenFamily =
|
||||
properties.filterIsInstance<TokenFamily>().singleOrNull() ?: OtherFamily
|
||||
val charConsumer: CharConsumer? = properties.filterIsInstance<CharConsumer>().singleOrNull()
|
||||
val charIndexConsumer: CharIndexConsumer? =
|
||||
properties.filterIsInstance<CharIndexConsumer>().singleOrNull()
|
||||
val tokenUpgrader: TokenUpgrader? =
|
||||
properties.filterIsInstance<TokenUpgrader>().singleOrNull()
|
||||
|
||||
@ -89,7 +97,7 @@ enum class TokenType(vararg properties: TokenTypeProperty) {
|
||||
val ManyChars = entries.filter { item -> item.manyChars != null }
|
||||
val SingleChars = entries.filter { item -> item.singleChar != null }
|
||||
val CharConsumers = entries.filter { item ->
|
||||
item.charConsumer != null || item.charIndexConsumer != null }
|
||||
item.charConsumer != null }
|
||||
|
||||
val ParserIgnoredTypes: Array<TokenType> = arrayOf(
|
||||
Whitespace,
|
||||
|
@ -5,8 +5,7 @@ interface TokenTypeProperty {
|
||||
class Promotion(val nextChar: Char, val type: TokenType) : TokenTypeProperty
|
||||
class ManyChars(val text: String) : TokenTypeProperty
|
||||
class AnyOf(vararg val strings: String): TokenTypeProperty
|
||||
class CharConsumer(val isValid: (Char) -> Boolean) : TokenTypeProperty
|
||||
class CharIndexConsumer(val isValid: (Char, Int) -> Boolean) : TokenTypeProperty
|
||||
open class CharConsumer(val matcher: CharMatcher) : TokenTypeProperty
|
||||
open class TokenUpgrader(val maybeUpgrade: (Token) -> Token?) : TokenTypeProperty
|
||||
|
||||
object KeywordUpgrader : TokenUpgrader({ token ->
|
||||
|
@ -102,27 +102,14 @@ class Tokenizer(val source: CharSource) {
|
||||
|
||||
var index = 0
|
||||
for (item in TokenType.CharConsumers) {
|
||||
if (item.charConsumer != null) {
|
||||
if (!item.charConsumer.isValid(char)) {
|
||||
continue
|
||||
}
|
||||
} else if (item.charIndexConsumer != null) {
|
||||
if (!item.charIndexConsumer.isValid(char, index)) {
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
throw ParseError("Unknown Char Consumer")
|
||||
if (!item.charConsumer!!.matcher.valid(char, index)) {
|
||||
continue
|
||||
}
|
||||
|
||||
val text = buildString {
|
||||
append(char)
|
||||
|
||||
while (
|
||||
if (item.charConsumer != null)
|
||||
item.charConsumer.isValid(source.peek())
|
||||
else
|
||||
item.charIndexConsumer!!.isValid(source.peek(), ++index)
|
||||
) {
|
||||
while (item.charConsumer.matcher.valid(source.peek(), ++index)) {
|
||||
append(nextChar())
|
||||
}
|
||||
}
|
||||
@ -168,5 +155,6 @@ class Tokenizer(val source: CharSource) {
|
||||
return char
|
||||
}
|
||||
|
||||
private fun currentSourceIndex(): SourceIndex = SourceIndex(source.currentIndex, currentLineIndex, currentLineColumn)
|
||||
private fun currentSourceIndex(): SourceIndex =
|
||||
SourceIndex(source.currentIndex, currentLineIndex, currentLineColumn)
|
||||
}
|
||||
|
Reference in New Issue
Block a user