package main import ( "fmt" "strings" "unicode/utf8" ) type stateFunc func(*lexer) stateFunc type lexer struct { input string start int pos int width int tokenStream chan Token } func (l *lexer) run() { for state := lexCommand; state != nil; { state = state(l) } close(l.tokenStream) } func (l *lexer) emit(t TokenType) { l.tokenStream <- Token{ typ: t, val: l.input[l.start:l.pos], } l.start = l.pos } func (l *lexer) errorf(format string, args ...interface{}) stateFunc { l.tokenStream <- Token{ typ: TokenErr, val: fmt.Sprintf(format, args...), } return nil } const eof rune = -1 func (l *lexer) next() rune { if l.pos >= len(l.input) { l.width = 0 return eof } var r rune r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) l.pos += l.width return r } func (l *lexer) backup() { l.pos -= l.width } func (l *lexer) ignore() { l.start = l.pos } func (l *lexer) reset() { l.pos = l.start } func (l *lexer) peek() rune { w := l.width r := l.next() l.backup() l.width = w return r } func (l *lexer) accept(valid string) bool { if strings.IndexRune(valid, l.next()) >= 0 { return true } l.backup() return false } func (l *lexer) acceptAll(valid string) { for strings.IndexRune(valid, l.next()) >= 0 {} l.backup() } func (l *lexer) acceptPassing(valid func(rune) bool) bool { if valid(l.next()) { return true } l.backup() return false } func (l *lexer) acceptAllPassing(valid func(rune) bool) { for valid(l.next()) {} l.backup() } type TokenType int const ( TokenErr TokenType = iota // Lexing error TokenEOF // end of file TokenSemicolon // ; TokenLParen // ( TokenRParen // ) TokenLBrace // { TokenRBrace // } TokenLBrack // [ TokenRBrack // ] TokenCommand // A command character TokenHash // # TokenAt // @ TokenDot // . TokenAst // * TokenBar // | TokenQuestion // ? TokenPatternStringIndex // A string index in a pattern TokenPatternIntegerIndex // An integer index in a pattern ) type Token struct { typ TokenType val string } func (t Token) String() string { switch t.typ { case TokenEOF: return "EOF" case TokenErr: return t.val } if len(t.val) > 10 { return fmt.Sprintf("%.10q...", t.val) } return fmt.Sprintf("%q", t.val) } func Lex(input string) chan Token { l := &lexer{ input: input, tokenStream: make(chan Token), } go l.run() return l.tokenStream } const ( whitespace string = " \t" whitespaceNewlines string = " \t\r\n" ) func isAlpha(r rune) bool { return ('a' <= r && r < 'z') || ('A' <= r && r <= 'Z') } func isDigit(r rune) bool { return '0' <= r && r <= '9' } func isAlphaNumeric(r rune) bool { return isAlpha(r) || isDigit(r) } func isStringIndexChar(r rune) bool { return isAlphaNumeric(r) || r == '_' || r == '-' } func lexCommand(l *lexer) stateFunc { l.acceptAll(whitespace) l.ignore() if l.peek() == eof { l.emit(TokenEOF) return nil } r := l.next() switch r { case '#': l.emit(TokenHash) return lexPatternStringIndex case '@': l.emit(TokenAt) return lexPatternIntegerIndex case '.': l.emit(TokenDot) return lexCommand case '*': l.emit(TokenAst) return lexCommand case '|': l.emit(TokenBar) return lexCommand case '(': l.emit(TokenLParen) return lexCommand case ')': l.emit(TokenRParen) return lexCommand case '?': l.emit(TokenQuestion) return lexCommand case '{': l.emit(TokenLBrace) return lexCommand case '}': l.emit(TokenRBrace) return lexCommandEnd } if isAlpha(r) { l.emit(TokenCommand) return lexCommandEnd } return l.errorf("Expected command found something else") } func lexPatternStringIndex(l *lexer) stateFunc { l.acceptAllPassing(isStringIndexChar) l.emit(TokenPatternStringIndex) return lexCommand } func lexPatternIntegerIndex(l *lexer) stateFunc { l.acceptAllPassing(isDigit) l.emit(TokenPatternIntegerIndex) return lexCommand } func lexCommandEnd(l *lexer) stateFunc { if l.peek() == eof { l.emit(TokenEOF) return nil } if l.accept(";") { l.emit(TokenSemicolon) return lexCommand } return l.errorf("Expected ; found something else") }