parser: lazy support

This commit is contained in:
Alex Zenla 2023-10-14 03:28:07 -07:00
parent e96bcd8754
commit 9338b01b48
Signed by: alex
GPG Key ID: C0780728420EBFE5
25 changed files with 159 additions and 118 deletions

View File

@ -1,5 +1,5 @@
import std ffi.printf
export func main() {
printf("Hello World: %s\n", "Jolk")
printf("Hello World: \n", "Jolk")
}

View File

@ -4,7 +4,6 @@ import gay.pizza.pork.ast.gen.CompilationUnit
import gay.pizza.pork.ast.gen.ImportDeclaration
import gay.pizza.pork.parser.DiscardNodeAttribution
import gay.pizza.pork.parser.Parser
import gay.pizza.pork.parser.TokenStreamSource
import gay.pizza.pork.parser.Tokenizer
class World(val importSource: ImportSource) {
@ -23,8 +22,7 @@ class World(val importSource: ImportSource) {
}
val charSource = contentSource.loadAsCharSource(importLocator.path)
val tokenizer = Tokenizer(charSource)
val tokenStream = tokenizer.stream()
val parser = Parser(TokenStreamSource(tokenStream), DiscardNodeAttribution)
val parser = Parser(tokenizer, DiscardNodeAttribution)
val unit = parser.parseCompilationUnit()
internalUnits[stableKey] = unit
return unit

View File

@ -22,14 +22,14 @@ abstract class Tool {
val rootImportLocator: ImportLocator
get() = ImportLocator("local", rootFilePath())
fun tokenize(): LazyTokenSource =
LazyTokenSource(Tokenizer(createCharSource()))
fun tokenize(): Tokenizer =
Tokenizer(createCharSource())
fun parse(attribution: NodeAttribution = DiscardNodeAttribution): CompilationUnit =
Parser(TokenStreamSource(tokenize().streamAllRemainingTokens()), attribution).parseCompilationUnit()
Parser(tokenize(), attribution).parseCompilationUnit()
fun highlight(scheme: HighlightScheme): List<Highlight> =
Highlighter(scheme).highlight(tokenize().streamAllRemainingTokens())
Highlighter(scheme).highlight(tokenize().stream())
fun reprint(): String = buildString { visit(Printer(this)) }

View File

@ -6,6 +6,6 @@ import gay.pizza.pork.ast.gen.NodeType
object DiscardNodeAttribution : NodeAttribution {
override fun push(token: Token) {}
override fun <T : Node> adopt(node: T) {}
override fun <T : Node> guarded(type: NodeType?, block: () -> T): T =
override fun <T : Node> produce(type: NodeType, block: () -> T): T =
block()
}

View File

@ -0,0 +1,33 @@
package gay.pizza.pork.parser
class LazySkippingTokenSource(val source: TokenSource, val skipping: Set<TokenType>) : ParserAwareTokenSource {
private var index = 0
override val currentIndex: Int
get() = index
private val queue = mutableListOf<Token>()
override fun next(): Token {
needs(1)
return queue.removeFirst()
}
override fun peek(): Token {
needs(1)
return queue.first()
}
override fun peekTypeAhead(ahead: Int): TokenType {
needs(ahead + 1)
return queue[ahead].type
}
private fun needs(count: Int) {
while (queue.size < count) {
val token = source.next()
if (!skipping.contains(token.type)) {
queue.add(token)
}
}
}
}

View File

@ -1,38 +0,0 @@
package gay.pizza.pork.parser
class LazyTokenSource(val tokenizer: Tokenizer) : TokenSource {
private val queue = mutableListOf<Token>()
private var index = 0
override val currentIndex: Int
get() = index
override fun next(): Token {
index++
if (queue.isNotEmpty()) {
return queue.removeFirst()
}
return tokenizer.next()
}
override fun peek(): Token {
if (queue.isNotEmpty()) {
return queue.first()
}
val token = tokenizer.next()
queue.add(token)
return token
}
override fun peekTypeAhead(ahead: Int): TokenType {
wantAtLeast(ahead + 1)
return queue[ahead].type
}
private fun wantAtLeast(ahead: Int) {
if (queue.size < ahead) {
for (i in 1..ahead) {
queue.add(tokenizer.next())
}
}
}
}

View File

@ -6,5 +6,5 @@ import gay.pizza.pork.ast.gen.NodeType
interface NodeAttribution {
fun push(token: Token)
fun <T: Node> adopt(node: T)
fun <T: Node> guarded(type: NodeType?, block: () -> T): T
fun <T: Node> produce(type: NodeType, block: () -> T): T
}

View File

@ -4,7 +4,7 @@ import gay.pizza.pork.ast.gen.*
class Parser(source: TokenSource, attribution: NodeAttribution) :
ParserBase(source, attribution) {
override fun parseArgumentSpec(): ArgumentSpec = guarded(NodeType.ArgumentSpec) {
override fun parseArgumentSpec(): ArgumentSpec = produce(NodeType.ArgumentSpec) {
val symbol = parseSymbol()
ArgumentSpec(symbol, next(TokenType.DotDotDot))
}
@ -47,7 +47,7 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
if (expression is SymbolReference && peek(TokenType.Equals)) {
val symbolReference = expression
expression = guarded(NodeType.SetAssignment) {
expression = produce(NodeType.SetAssignment) {
attribution.adopt(expression)
expect(TokenType.Equals)
val value = parseExpression()
@ -56,7 +56,7 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
}
if (peek(TokenType.LeftBracket)) {
expression = guarded(NodeType.IndexedBy) {
expression = produce(NodeType.IndexedBy) {
attribution.adopt(expression)
expect(TokenType.LeftBracket)
val index = parseExpression()
@ -70,7 +70,7 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
TokenType.Pipe, TokenType.Caret, TokenType.Equality, TokenType.Inequality, TokenType.Mod,
TokenType.Rem, TokenType.Lesser, TokenType.Greater, TokenType.LesserEqual, TokenType.GreaterEqual,
TokenType.And, TokenType.Or)) {
guarded(NodeType.InfixOperation) {
produce(NodeType.InfixOperation) {
val infixToken = next()
val infixOperator = ParserHelpers.convertInfixOperator(infixToken)
InfixOperation(expression, infixOperator, parseExpression())
@ -78,7 +78,7 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
} else expression
}
override fun parseBooleanLiteral(): BooleanLiteral = guarded(NodeType.BooleanLiteral) {
override fun parseBooleanLiteral(): BooleanLiteral = produce(NodeType.BooleanLiteral) {
if (next(TokenType.True)) {
BooleanLiteral(true)
} else if (next(TokenType.False)) {
@ -88,12 +88,12 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
}
}
override fun parseBreak(): Break = guarded(NodeType.Break) {
override fun parseBreak(): Break = produce(NodeType.Break) {
expect(TokenType.Break)
Break()
}
override fun parseCompilationUnit(): CompilationUnit = guarded(NodeType.CompilationUnit) {
override fun parseCompilationUnit(): CompilationUnit = produce(NodeType.CompilationUnit) {
val declarations = mutableListOf<Declaration>()
val definitions = mutableListOf<Definition>()
var declarationAccepted = true
@ -114,7 +114,7 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
CompilationUnit(declarations, definitions)
}
override fun parseContinue(): Continue = guarded(NodeType.Continue) {
override fun parseContinue(): Continue = produce(NodeType.Continue) {
expect(TokenType.Continue)
Continue()
}
@ -187,11 +187,11 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
ForIn(forInItem, value, block)
}
override fun parseForInItem(): ForInItem = guarded(NodeType.ForInItem) {
override fun parseForInItem(): ForInItem = produce(NodeType.ForInItem) {
ForInItem(parseSymbol())
}
override fun parseFunctionCall(): FunctionCall = guarded(NodeType.FunctionCall) {
override fun parseFunctionCall(): FunctionCall = produce(NodeType.FunctionCall) {
val symbol = parseSymbol()
expect(TokenType.LeftParentheses)
val arguments = collect(TokenType.RightParentheses, TokenType.Comma) {
@ -201,7 +201,7 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
FunctionCall(symbol, arguments)
}
override fun parseFunctionDefinition(): FunctionDefinition = guarded(NodeType.FunctionDefinition) {
override fun parseFunctionDefinition(): FunctionDefinition = produce(NodeType.FunctionDefinition) {
val modifiers = parseDefinitionModifiers()
expect(TokenType.Func)
val name = parseSymbol()
@ -236,14 +236,14 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
ImportDeclaration(form, parseImportPath())
}
override fun parseImportPath(): ImportPath = guarded(NodeType.ImportPath) {
override fun parseImportPath(): ImportPath = produce(NodeType.ImportPath) {
val components = oneAndContinuedBy(TokenType.Dot) {
parseSymbol()
}
ImportPath(components)
}
override fun parseIndexedBy(): IndexedBy = guarded(NodeType.IndexedBy) {
override fun parseIndexedBy(): IndexedBy = produce(NodeType.IndexedBy) {
val expression = parseExpression()
expect(TokenType.LeftBracket)
val index = parseExpression()
@ -251,7 +251,7 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
IndexedBy(expression, index)
}
override fun parseInfixOperation(): InfixOperation = guarded(NodeType.InfixOperation) {
override fun parseInfixOperation(): InfixOperation = produce(NodeType.InfixOperation) {
val infixToken = next()
val infixOperator = ParserHelpers.convertInfixOperator(infixToken)
InfixOperation(parseExpression(), infixOperator, parseExpression())
@ -282,7 +282,7 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
LetAssignment(symbol, value)
}
override fun parseLetDefinition(): LetDefinition = guarded(NodeType.LetDefinition) {
override fun parseLetDefinition(): LetDefinition = produce(NodeType.LetDefinition) {
val definitionModifiers = parseDefinitionModifiers()
expect(TokenType.Let)
val name = parseSymbol()
@ -330,14 +330,14 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
PrefixOperation(ParserHelpers.convertPrefixOperator(it), parseExpression())
}
override fun parseSetAssignment(): SetAssignment = guarded(NodeType.SetAssignment) {
override fun parseSetAssignment(): SetAssignment = produce(NodeType.SetAssignment) {
val symbol = parseSymbol()
expect(TokenType.Equals)
val value = parseExpression()
SetAssignment(symbol, value)
}
override fun parseStringLiteral(): StringLiteral = guarded(NodeType.StringLiteral) {
override fun parseStringLiteral(): StringLiteral = produce(NodeType.StringLiteral) {
expect(TokenType.Quote)
val stringLiteralToken = expect(TokenType.StringLiteral)
expect(TokenType.Quote)
@ -345,7 +345,7 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
StringLiteral(content)
}
override fun parseSuffixOperation(): SuffixOperation = guarded(NodeType.SuffixOperation) {
override fun parseSuffixOperation(): SuffixOperation = produce(NodeType.SuffixOperation) {
val reference = parseSymbolReference()
expect(TokenType.PlusPlus, TokenType.MinusMinus) {
SuffixOperation(ParserHelpers.convertSuffixOperator(it), reference)
@ -364,7 +364,7 @@ class Parser(source: TokenSource, attribution: NodeAttribution) :
Symbol(it.text)
}
override fun parseSymbolReference(): SymbolReference = guarded(NodeType.SymbolReference) {
override fun parseSymbolReference(): SymbolReference = produce(NodeType.SymbolReference) {
SymbolReference(parseSymbol())
}

View File

@ -0,0 +1,3 @@
package gay.pizza.pork.parser
interface ParserAwareTokenSource : TokenSource

View File

@ -5,15 +5,19 @@ import gay.pizza.pork.ast.gen.NodeParser
import gay.pizza.pork.ast.gen.NodeType
abstract class ParserBase(source: TokenSource, val attribution: NodeAttribution) : NodeParser {
val source: TokenSource = source.ignoringParserIgnoredTypes()
val source: TokenSource = if (source is ParserAwareTokenSource) {
source
} else {
LazySkippingTokenSource(source, TokenType.ParserIgnoredTypes)
}
@Suppress("NOTHING_TO_INLINE")
protected inline fun <T: Node> guarded(type: NodeType? = null, noinline block: () -> T): T =
attribution.guarded(type, block)
protected inline fun <T: Node> produce(type: NodeType, noinline block: () -> T): T =
attribution.produce(type, block)
@Suppress("NOTHING_TO_INLINE")
protected inline fun <T: Node> expect(type: NodeType? = null, vararg tokenTypes: TokenType, noinline block: (Token) -> T): T =
guarded(type) {
protected inline fun <T: Node> expect(type: NodeType, vararg tokenTypes: TokenType, noinline block: (Token) -> T): T =
produce(type) {
block(expect(*tokenTypes))
}

View File

@ -23,7 +23,7 @@ open class ParserNodeAttribution : NodeAttribution {
}
}
override fun <T : Node> guarded(type: NodeType?, block: () -> T): T {
override fun <T : Node> produce(type: NodeType, block: () -> T): T {
var store = mutableListOf<Token>()
current = store
stack.add(store)

View File

@ -2,13 +2,13 @@ package gay.pizza.pork.parser
class SourceIndexCharSource(val delegate: CharSource) : CharSource by delegate {
private var currentLineIndex = 1
private var currentLineColumn = 0
private var currentLineColumn = 1
override fun next(): Char {
val char = delegate.next()
if (char == '\n') {
currentLineIndex++
currentLineColumn = 0
currentLineColumn = 1
}
currentLineColumn++
return char

View File

@ -1,7 +1,7 @@
package gay.pizza.pork.parser
object StringCharConsumer : CharConsumer {
override fun consume(type: TokenType, tokenizer: Tokenizer): String? {
override fun consume(type: TokenType, tokenizer: Tokenizer): String {
val buffer = StringBuilder()
var escape = false
while (true) {
@ -15,6 +15,10 @@ object StringCharConsumer : CharConsumer {
break
}
if (escape) {
escape = false
}
buffer.append(tokenizer.source.next())
if (char == '\\') {

View File

@ -2,6 +2,7 @@ package gay.pizza.pork.parser
interface TokenSource : PeekableSource<Token> {
fun peekTypeAhead(ahead: Int): TokenType
fun consumeAllRemainingTokens(): List<Token> {
val tokens = mutableListOf<Token>()
while (true) {
@ -14,9 +15,5 @@ interface TokenSource : PeekableSource<Token> {
return tokens
}
fun streamAllRemainingTokens(): TokenStream =
TokenStream(consumeAllRemainingTokens().filter { !TokenType.ParserIgnoredTypes.contains(it.type) })
fun ignoringParserIgnoredTypes(): TokenSource =
TokenStreamSource(streamAllRemainingTokens())
fun stream(): TokenStream = TokenStream(consumeAllRemainingTokens())
}

View File

@ -107,7 +107,7 @@ enum class TokenType(vararg val properties: TokenTypeProperty) {
val CharMatches = entries.filter { item -> item.charMatch != null }
val CharConsumes = entries.filter { item -> item.charConsume != null }
val ParserIgnoredTypes: Array<TokenType> = arrayOf(
val ParserIgnoredTypes: Set<TokenType> = setOf(
Whitespace,
BlockComment,
LineComment

View File

@ -1,9 +1,40 @@
package gay.pizza.pork.parser
class Tokenizer(source: CharSource) {
val source: SourceIndexCharSource = SourceIndexCharSource(source)
class Tokenizer(source: CharSource) : TokenSource {
internal val source = SourceIndexCharSource(source)
private var startIndex: SourceIndex = SourceIndex.zero()
private var state = TokenizerState.Normal
private var index = 0
override val currentIndex: Int
get() = index
private val queue = mutableListOf<Token>()
override fun next(): Token {
val token = readNextToken()
index++
return token
}
override fun peek(): Token {
if (queue.isEmpty()) {
val token = readNextToken()
queue.add(token)
return token
}
return queue.first()
}
override fun peekTypeAhead(ahead: Int): TokenType {
val needed = ahead - (queue.size - 1)
if (needed > 0) {
for (i in 1..needed) {
queue.add(readNextToken())
}
}
return queue[ahead].type
}
private fun nextTokenOrNull(): Token? {
if (source.peek() == CharSource.EndOfFile) {
@ -80,7 +111,7 @@ class Tokenizer(source: CharSource) {
return null
}
fun next(): Token {
private fun readNextToken(): Token {
val what = source.peek()
val token = nextTokenOrNull()
if (token != null) {
@ -95,18 +126,6 @@ class Tokenizer(source: CharSource) {
throw BadCharacterError(what, source.currentSourceIndex(), state)
}
fun stream(): TokenStream {
val tokens = mutableListOf<Token>()
while (true) {
val token = next()
tokens.add(token)
if (token.type == TokenType.EndOfFile) {
break
}
}
return TokenStream(tokens)
}
internal fun produceToken(type: TokenType, text: String) =
Token(type, startIndex, text)

View File

@ -10,8 +10,4 @@ class PorkFile(viewProvider: FileViewProvider) : PsiFileBase(viewProvider, PorkL
}
override fun toString(): String = "Pork"
override fun isPhysical(): Boolean {
return super.isPhysical()
}
}

View File

@ -1,15 +1,15 @@
package gay.pizza.pork.idea
import com.intellij.lexer.LexerBase
import com.intellij.openapi.diagnostic.Logger
import com.intellij.openapi.progress.ProcessCanceledException
import com.intellij.psi.tree.IElementType
import gay.pizza.pork.parser.*
import gay.pizza.pork.parser.BadCharacterError
import gay.pizza.pork.parser.StringCharSource
import gay.pizza.pork.parser.Tokenizer
import gay.pizza.pork.parser.UnterminatedTokenError
import com.intellij.psi.TokenType as PsiTokenType
class PorkLexer : LexerBase() {
private val log: Logger = Logger.getInstance(PorkLexer::class.java)
private lateinit var source: StringCharSource
private lateinit var tokenizer: Tokenizer
private var internalTokenStart: Int = 0

View File

@ -0,0 +1,27 @@
package gay.pizza.pork.idea
import com.intellij.codeInspection.ProblemHighlightType
import com.intellij.lang.annotation.AnnotationHolder
import com.intellij.lang.annotation.Annotator
import com.intellij.lang.annotation.HighlightSeverity
import com.intellij.psi.PsiElement
import gay.pizza.pork.idea.psi.gen.PorkElement
class PorkReferenceAnnotator : Annotator {
override fun annotate(element: PsiElement, holder: AnnotationHolder) {
if (element !is PorkElement) {
return
}
val reference = element.reference ?: return
val resolved = reference.resolve()
if (resolved != null) {
return
}
holder.newAnnotation(HighlightSeverity.ERROR, "Unresolved reference")
.range(element.textRange)
.highlightType(ProblemHighlightType.LIKE_UNKNOWN_SYMBOL)
.create()
}
}

View File

@ -7,10 +7,10 @@ import gay.pizza.pork.parser.ParseError
import gay.pizza.pork.parser.ParserNodeAttribution
class PsiBuilderMarkAttribution(val builder: PsiBuilder) : ParserNodeAttribution() {
override fun <T : Node> guarded(type: NodeType?, block: () -> T): T {
override fun <T : Node> produce(type: NodeType, block: () -> T): T {
val marker = builder.mark()
val result = try {
val item = super.guarded(type, block)
val item = super.produce(type, block)
marker.done(PorkElementTypes.elementTypeFor(item.type))
item
} catch (e: PsiBuilderTokenSource.BadCharacterError) {

View File

@ -1,14 +1,11 @@
package gay.pizza.pork.idea
import com.intellij.lang.PsiBuilder
import gay.pizza.pork.parser.SourceIndex
import gay.pizza.pork.parser.Token
import gay.pizza.pork.parser.TokenSource
import gay.pizza.pork.parser.TokenType
import gay.pizza.pork.parser.*
import com.intellij.psi.TokenType as PsiTokenType
@Suppress("UnstableApiUsage")
class PsiBuilderTokenSource(val builder: PsiBuilder) : TokenSource {
class PsiBuilderTokenSource(val builder: PsiBuilder) : ParserAwareTokenSource {
override val currentIndex: Int = 0
override fun next(): Token {
@ -41,7 +38,5 @@ class PsiBuilderTokenSource(val builder: PsiBuilder) : TokenSource {
return PorkElementTypes.tokenTypeFor(elementType) ?: TokenType.EndOfFile
}
override fun ignoringParserIgnoredTypes(): TokenSource = this
class BadCharacterError(error: String) : RuntimeException(error)
}

View File

@ -3,6 +3,7 @@ package gay.pizza.pork.idea.psi
import com.intellij.lang.ASTNode
import com.intellij.model.Symbol
import com.intellij.navigation.ItemPresentation
import com.intellij.openapi.util.TextRange
import com.intellij.psi.PsiElement
import com.intellij.psi.PsiFileFactory
import com.intellij.psi.PsiReference
@ -51,7 +52,7 @@ object PorkElementHelpers {
unused(type)
if (element is ImportPathElement) {
return PorkFileReference(element, element.textRange)
return PorkFileReference(element, TextRange(0, element.textLength))
}
val symbols = element.childrenOfType<SymbolElement>()
@ -77,7 +78,7 @@ object PorkElementHelpers {
}
if (element is LetAssignmentElement || element is VarAssignmentElement) {
return PorkPresentable(element.name, icon)
return PorkPresentable(element.name, icon, element.containingFile.virtualFile?.name)
}
return null

View File

@ -6,5 +6,5 @@ import javax.swing.Icon
class PorkPresentable(val porkText: String?, val porkIcon: Icon? = null, val porkLocation: String? = null) : ItemPresentation {
override fun getPresentableText(): String? = porkText
override fun getIcon(unused: Boolean): Icon? = porkIcon
override fun getLocationString(): String? = porkLocation
override fun getLocationString(): String? = porkLocation
}

View File

@ -32,6 +32,9 @@
<lang.quoteHandler
language="Pork"
implementationClass="gay.pizza.pork.idea.PorkQuoteHandler"/>
<annotator
language="Pork"
implementationClass="gay.pizza.pork.idea.PorkReferenceAnnotator"/>
<!-- <codeInsight.parameterInfo
language="Pork"
implementationClass="gay.pizza.pork.idea.PorkParameterInfoHandler"/>-->

View File

@ -15,7 +15,6 @@ class TokenizeCommand : CliktCommand(help = "Tokenize Compilation Unit", name =
while (true) {
val token = tokenSource.next()
println("${token.sourceIndex} ${token.type.name} '${sanitize(token.text)}'")
tokenSource.peekTypeAhead(5)
if (token.type == TokenType.EndOfFile) {
break
}