21
0
xls/format/lexer.go

151 lines
3.4 KiB
Go

package format
import (
"strconv"
"strings"
"unicode/utf8"
)
const (
DATEFORMAT = iota
DECIMALFORMAT = iota
)
// LexToken holds is a (type, value) array.
type LexToken [3]string
// EOF character
var EOF string = "+++EOF+++"
// lexerState represents the state of the scanner
// as a function that returns the next state.
type lexerState func(*lexer) lexerState
// run lexes the input by executing state functions until
// the state is nil.
func (l *lexer) Run() {
for state := l.initialState; state != nil; {
state = state(l)
}
}
// Lexer creates a new scanner for the input string.
func Lexer(input string) (*lexer, []LexToken) {
l := &lexer{
input: input,
tokens: make([]LexToken, 0),
lineno: 1,
}
l.initialState = initLexerState
l.Run()
return l, l.tokens
}
// lexer holds the state of the scanner.
type lexer struct {
input string // the string being scanned.
start int // start position of this item.
pos int // current position in the input.
width int // width of last rune read from input.
tokens []LexToken // scanned items.
initialState lexerState
typ int
lineno int
}
// next returns the next rune in the input.
func (l *lexer) next() string {
var r rune
if l.pos >= len(l.input) {
l.width = 0
return EOF
}
r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
l.pos += l.width
return string(r)
}
// ignore skips over the pending input before this point.
func (l *lexer) ignore() {
l.start = l.pos
}
// backup steps back one rune.
// Can be called only once per call of next.
func (l *lexer) backup() {
l.pos -= l.width
}
// acceptRun consumes a run of runes from the valid set.
func (l *lexer) acceptRun(valid string) {
for strings.Index(valid, l.next()) >= 0 {
}
l.backup()
}
// acceptRun consumes a run of runes from the valid set.
func (l *lexer) acceptUntil(marker string) {
for r := l.next(); r != EOF && strings.Index(marker, r) < 0; r = l.next() {
}
}
// emit passes an item back to the client.
func (l *lexer) emit(t string) {
l.tokens = append(l.tokens, LexToken{t, l.input[l.start:l.pos], strconv.Itoa(l.lineno)})
l.start = l.pos
}
// emit passes an item back to the client.
func (l *lexer) emitRaw() {
if l.pos-l.start > 1 {
l.tokens = append(l.tokens, LexToken{T_RAW_MARK, l.input[l.start : l.pos-1], strconv.Itoa(l.lineno)})
l.start = l.pos - 1
}
}
// emit passes an item back to the client.
func (l *lexer) emitWithoutEnd(t string) {
if l.pos-l.start > 1 {
l.tokens = append(l.tokens, LexToken{t, l.input[l.start : l.pos-1], strconv.Itoa(l.lineno)})
l.start = l.pos
}
}
// initialState is the starting point for the
// scanner. It scans through each character and decides
// which state to create for the lexer. lexerState == nil
// is exit scanner.
func initLexerState(l *lexer) lexerState {
for r := l.next(); r != EOF; r = l.next() {
if r == "y" {
l.emitRaw()
l.acceptRun("y")
l.emit(T_YEAR_MARK)
} else if r == "m" {
l.emitRaw()
l.acceptRun("m")
l.emit(T_MONTH_MARK)
} else if r == "d" {
l.emitRaw()
l.acceptRun("d")
l.emit(T_DAY_MARK)
} else if r == "\"" {
l.emitRaw()
l.ignore()
l.acceptUntil("\"")
l.emitWithoutEnd(T_STRING_MARK)
} else if r == "#" {
l.emitRaw()
l.acceptRun("#,")
l.emit(T_COMMA_MARK)
} else if r == "0" {
l.emitRaw()
l.acceptRun("0123456789.")
l.emit(T_DECIMAL_MARK)
}
}
l.emit(T_EOF)
return nil
}