ceph-csi/vendor/github.com/antlr4-go/antlr/v4/lexer.go

427 lines
11 KiB
Go
Raw Permalink Normal View History

// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved.
// Use of this file is governed by the BSD 3-clause license that
// can be found in the LICENSE.txt file in the project root.
package antlr
import (
"fmt"
"strconv"
)
// A lexer is recognizer that draws input symbols from a character stream.
// lexer grammars result in a subclass of this object. A Lexer object
// uses simplified Match() and error recovery mechanisms in the interest
// of speed.
///
type Lexer interface {
TokenSource
Recognizer
Emit() Token
SetChannel(int)
PushMode(int)
PopMode() int
SetType(int)
SetMode(int)
}
type BaseLexer struct {
*BaseRecognizer
Interpreter ILexerATNSimulator
TokenStartCharIndex int
TokenStartLine int
TokenStartColumn int
ActionType int
Virt Lexer // The most derived lexer implementation. Allows virtual method calls.
input CharStream
factory TokenFactory
tokenFactorySourcePair *TokenSourceCharStreamPair
token Token
hitEOF bool
channel int
thetype int
modeStack IntStack
mode int
text string
}
func NewBaseLexer(input CharStream) *BaseLexer {
lexer := new(BaseLexer)
lexer.BaseRecognizer = NewBaseRecognizer()
lexer.input = input
lexer.factory = CommonTokenFactoryDEFAULT
lexer.tokenFactorySourcePair = &TokenSourceCharStreamPair{lexer, input}
lexer.Virt = lexer
lexer.Interpreter = nil // child classes must populate it
// The goal of all lexer rules/methods is to create a token object.
// l is an instance variable as multiple rules may collaborate to
// create a single token. NextToken will return l object after
// Matching lexer rule(s). If you subclass to allow multiple token
// emissions, then set l to the last token to be Matched or
// something non nil so that the auto token emit mechanism will not
// emit another token.
lexer.token = nil
// What character index in the stream did the current token start at?
// Needed, for example, to get the text for current token. Set at
// the start of NextToken.
lexer.TokenStartCharIndex = -1
// The line on which the first character of the token resides///
lexer.TokenStartLine = -1
// The character position of first character within the line///
lexer.TokenStartColumn = -1
// Once we see EOF on char stream, next token will be EOF.
// If you have DONE : EOF then you see DONE EOF.
lexer.hitEOF = false
// The channel number for the current token///
lexer.channel = TokenDefaultChannel
// The token type for the current token///
lexer.thetype = TokenInvalidType
lexer.modeStack = make([]int, 0)
lexer.mode = LexerDefaultMode
// You can set the text for the current token to override what is in
// the input char buffer. Use setText() or can set l instance var.
// /
lexer.text = ""
return lexer
}
const (
LexerDefaultMode = 0
LexerMore = -2
LexerSkip = -3
)
//goland:noinspection GoUnusedConst
const (
LexerDefaultTokenChannel = TokenDefaultChannel
LexerHidden = TokenHiddenChannel
LexerMinCharValue = 0x0000
LexerMaxCharValue = 0x10FFFF
)
func (b *BaseLexer) Reset() {
// wack Lexer state variables
if b.input != nil {
b.input.Seek(0) // rewind the input
}
b.token = nil
b.thetype = TokenInvalidType
b.channel = TokenDefaultChannel
b.TokenStartCharIndex = -1
b.TokenStartColumn = -1
b.TokenStartLine = -1
b.text = ""
b.hitEOF = false
b.mode = LexerDefaultMode
b.modeStack = make([]int, 0)
b.Interpreter.reset()
}
func (b *BaseLexer) GetInterpreter() ILexerATNSimulator {
return b.Interpreter
}
func (b *BaseLexer) GetInputStream() CharStream {
return b.input
}
func (b *BaseLexer) GetSourceName() string {
return b.GrammarFileName
}
func (b *BaseLexer) SetChannel(v int) {
b.channel = v
}
func (b *BaseLexer) GetTokenFactory() TokenFactory {
return b.factory
}
func (b *BaseLexer) setTokenFactory(f TokenFactory) {
b.factory = f
}
func (b *BaseLexer) safeMatch() (ret int) {
defer func() {
if e := recover(); e != nil {
if re, ok := e.(RecognitionException); ok {
b.notifyListeners(re) // Report error
b.Recover(re)
ret = LexerSkip // default
}
}
}()
return b.Interpreter.Match(b.input, b.mode)
}
// NextToken returns a token from the lexer input source i.e., Match a token on the source char stream.
func (b *BaseLexer) NextToken() Token {
if b.input == nil {
panic("NextToken requires a non-nil input stream.")
}
tokenStartMarker := b.input.Mark()
// previously in finally block
defer func() {
// make sure we release marker after Match or
// unbuffered char stream will keep buffering
b.input.Release(tokenStartMarker)
}()
for {
if b.hitEOF {
b.EmitEOF()
return b.token
}
b.token = nil
b.channel = TokenDefaultChannel
b.TokenStartCharIndex = b.input.Index()
b.TokenStartColumn = b.Interpreter.GetCharPositionInLine()
b.TokenStartLine = b.Interpreter.GetLine()
b.text = ""
continueOuter := false
for {
b.thetype = TokenInvalidType
ttype := b.safeMatch()
if b.input.LA(1) == TokenEOF {
b.hitEOF = true
}
if b.thetype == TokenInvalidType {
b.thetype = ttype
}
if b.thetype == LexerSkip {
continueOuter = true
break
}
if b.thetype != LexerMore {
break
}
}
if continueOuter {
continue
}
if b.token == nil {
b.Virt.Emit()
}
return b.token
}
}
// Skip instructs the lexer to Skip creating a token for current lexer rule
// and look for another token. [NextToken] knows to keep looking when
// a lexer rule finishes with token set to [SKIPTOKEN]. Recall that
// if token==nil at end of any token rule, it creates one for you
// and emits it.
func (b *BaseLexer) Skip() {
b.thetype = LexerSkip
}
func (b *BaseLexer) More() {
b.thetype = LexerMore
}
// SetMode changes the lexer to a new mode. The lexer will use this mode from hereon in and the rules for that mode
// will be in force.
func (b *BaseLexer) SetMode(m int) {
b.mode = m
}
// PushMode saves the current lexer mode so that it can be restored later. See [PopMode], then sets the
// current lexer mode to the supplied mode m.
func (b *BaseLexer) PushMode(m int) {
if runtimeConfig.lexerATNSimulatorDebug {
fmt.Println("pushMode " + strconv.Itoa(m))
}
b.modeStack.Push(b.mode)
b.mode = m
}
// PopMode restores the lexer mode saved by a call to [PushMode]. It is a panic error if there is no saved mode to
// return to.
func (b *BaseLexer) PopMode() int {
if len(b.modeStack) == 0 {
panic("Empty Stack")
}
if runtimeConfig.lexerATNSimulatorDebug {
fmt.Println("popMode back to " + fmt.Sprint(b.modeStack[0:len(b.modeStack)-1]))
}
i, _ := b.modeStack.Pop()
b.mode = i
return b.mode
}
func (b *BaseLexer) inputStream() CharStream {
return b.input
}
// SetInputStream resets the lexer input stream and associated lexer state.
func (b *BaseLexer) SetInputStream(input CharStream) {
b.input = nil
b.tokenFactorySourcePair = &TokenSourceCharStreamPair{b, b.input}
b.Reset()
b.input = input
b.tokenFactorySourcePair = &TokenSourceCharStreamPair{b, b.input}
}
func (b *BaseLexer) GetTokenSourceCharStreamPair() *TokenSourceCharStreamPair {
return b.tokenFactorySourcePair
}
// EmitToken by default does not support multiple emits per [NextToken] invocation
// for efficiency reasons. Subclass and override this func, [NextToken],
// and [GetToken] (to push tokens into a list and pull from that list
// rather than a single variable as this implementation does).
func (b *BaseLexer) EmitToken(token Token) {
b.token = token
}
// Emit is the standard method called to automatically emit a token at the
// outermost lexical rule. The token object should point into the
// char buffer start..stop. If there is a text override in 'text',
// use that to set the token's text. Override this method to emit
// custom [Token] objects or provide a new factory.
// /
func (b *BaseLexer) Emit() Token {
t := b.factory.Create(b.tokenFactorySourcePair, b.thetype, b.text, b.channel, b.TokenStartCharIndex, b.GetCharIndex()-1, b.TokenStartLine, b.TokenStartColumn)
b.EmitToken(t)
return t
}
// EmitEOF emits an EOF token. By default, this is the last token emitted
func (b *BaseLexer) EmitEOF() Token {
cpos := b.GetCharPositionInLine()
lpos := b.GetLine()
eof := b.factory.Create(b.tokenFactorySourcePair, TokenEOF, "", TokenDefaultChannel, b.input.Index(), b.input.Index()-1, lpos, cpos)
b.EmitToken(eof)
return eof
}
// GetCharPositionInLine returns the current position in the current line as far as the lexer is concerned.
func (b *BaseLexer) GetCharPositionInLine() int {
return b.Interpreter.GetCharPositionInLine()
}
func (b *BaseLexer) GetLine() int {
return b.Interpreter.GetLine()
}
func (b *BaseLexer) GetType() int {
return b.thetype
}
func (b *BaseLexer) SetType(t int) {
b.thetype = t
}
// GetCharIndex returns the index of the current character of lookahead
func (b *BaseLexer) GetCharIndex() int {
return b.input.Index()
}
// GetText returns the text Matched so far for the current token or any text override.
func (b *BaseLexer) GetText() string {
if b.text != "" {
return b.text
}
return b.Interpreter.GetText(b.input)
}
// SetText sets the complete text of this token; it wipes any previous changes to the text.
func (b *BaseLexer) SetText(text string) {
b.text = text
}
// GetATN returns the ATN used by the lexer.
func (b *BaseLexer) GetATN() *ATN {
return b.Interpreter.ATN()
}
// GetAllTokens returns a list of all [Token] objects in input char stream.
// Forces a load of all tokens that can be made from the input char stream.
//
// Does not include EOF token.
func (b *BaseLexer) GetAllTokens() []Token {
vl := b.Virt
tokens := make([]Token, 0)
t := vl.NextToken()
for t.GetTokenType() != TokenEOF {
tokens = append(tokens, t)
t = vl.NextToken()
}
return tokens
}
func (b *BaseLexer) notifyListeners(e RecognitionException) {
start := b.TokenStartCharIndex
stop := b.input.Index()
text := b.input.GetTextFromInterval(NewInterval(start, stop))
msg := "token recognition error at: '" + text + "'"
listener := b.GetErrorListenerDispatch()
listener.SyntaxError(b, nil, b.TokenStartLine, b.TokenStartColumn, msg, e)
}
func (b *BaseLexer) getErrorDisplayForChar(c rune) string {
if c == TokenEOF {
return "<EOF>"
} else if c == '\n' {
return "\\n"
} else if c == '\t' {
return "\\t"
} else if c == '\r' {
return "\\r"
} else {
return string(c)
}
}
func (b *BaseLexer) getCharErrorDisplay(c rune) string {
return "'" + b.getErrorDisplayForChar(c) + "'"
}
// Recover can normally Match any char in its vocabulary after Matching
// a token, so here we do the easy thing and just kill a character and hope
// it all works out. You can instead use the rule invocation stack
// to do sophisticated error recovery if you are in a fragment rule.
//
// In general, lexers should not need to recover and should have rules that cover any eventuality, such as
// a character that makes no sense to the recognizer.
func (b *BaseLexer) Recover(re RecognitionException) {
if b.input.LA(1) != TokenEOF {
if _, ok := re.(*LexerNoViableAltException); ok {
// Skip a char and try again
b.Interpreter.Consume(b.input)
} else {
// TODO: Do we lose character or line position information?
b.input.Consume()
}
}
}