diff options
author | Charlie Stanton <charlie@shtanton.xyz> | 2022-08-26 11:51:46 +0100 |
---|---|---|
committer | Charlie Stanton <charlie@shtanton.xyz> | 2022-08-26 11:51:46 +0100 |
commit | ce5c224211a94bfd4c898b51d15febdf2ed9d6f2 (patch) | |
tree | 8d1c9db463d9c1793bd3aad2b6875a22d4add90c /main | |
parent | ececdecdaf6c6f6295d31a92f0663d703e7760dd (diff) | |
download | stred-go-ce5c224211a94bfd4c898b51d15febdf2ed9d6f2.tar |
Refactors some stuff and adds lexing and parsing
Diffstat (limited to 'main')
-rw-r--r-- | main/command.go | 84 | ||||
-rw-r--r-- | main/filter.go | 52 | ||||
-rw-r--r-- | main/json.go | 5 | ||||
-rw-r--r-- | main/lex.go | 224 | ||||
-rw-r--r-- | main/main.go | 119 | ||||
-rw-r--r-- | main/parse.go | 145 | ||||
-rw-r--r-- | main/pathfilter.go | 31 | ||||
-rw-r--r-- | main/pathfilterast.go | 56 |
8 files changed, 593 insertions, 123 deletions
diff --git a/main/command.go b/main/command.go index 560d3c3..bad5b1e 100644 --- a/main/command.go +++ b/main/command.go @@ -2,24 +2,26 @@ package main type PrintValueCommand struct {} func (cmd PrintValueCommand) exec(state *ProgramState) { - state.out <- state.space + for _, item := range state.space { + state.out <- item + } } type ToggleTerminalCommand struct {} func (cmd ToggleTerminalCommand) exec(state *ProgramState) { - terminal, isTerminal := state.space.value.(TerminalValue) - if !isTerminal { - return + toggled := map[TerminalValue]TerminalValue { + ArrayBegin: MapBegin, + ArrayEnd: MapEnd, + MapBegin: ArrayBegin, + MapEnd: ArrayEnd, } - switch terminal { - case ArrayBegin: - state.space.value = MapBegin - case ArrayEnd: - state.space.value = MapEnd - case MapBegin: - state.space.value = ArrayBegin - case MapEnd: - state.space.value = ArrayEnd + + for i := range state.space { + terminal, isTerminal := state.space[i].value.(TerminalValue) + if !isTerminal { + continue + } + state.space[i].value = toggled[terminal] } } @@ -28,11 +30,63 @@ type FilteredCommand struct { command Command } func (cmd FilteredCommand) exec(state *ProgramState) { - if cmd.filter.exec(state) { - cmd.command.exec(state) + for _, item := range state.space { + if cmd.filter.exec(item) { + cmd.command.exec(state) + return + } + } +} + +type SequenceCommand struct { + commands []Command +} +func (cmd SequenceCommand) exec(state *ProgramState) { + for _, command := range cmd.commands { + command.exec(state) + } +} + +type AppendCommand struct { + values []WalkValue +} +func (cmd AppendCommand) exec(state *ProgramState) { + for _, value := range cmd.values { + state.space = append(state.space, WalkItem { + path: nil, + value: value, + }) } } +type PrependCommand struct { + values []WalkValue +} +func (cmd PrependCommand) exec(state *ProgramState) { + var newItems []WalkItem + for _, value := range cmd.values { + newItems = append(newItems, WalkItem { + path: nil, + value: value, + }) + } + state.space = append(newItems, state.space...) +} + +type PrintLiteralsCommand struct { + items []WalkItem +} +func (cmd PrintLiteralsCommand) exec(state *ProgramState) { + for _, item := range cmd.items { + state.out <- item + } +} + +type DeleteAllCommand struct {} +func (cmd DeleteAllCommand) exec(state *ProgramState) { + state.space = nil +} + type Command interface { exec(*ProgramState) }
\ No newline at end of file diff --git a/main/filter.go b/main/filter.go index 95e6d82..662fa7b 100644 --- a/main/filter.go +++ b/main/filter.go @@ -3,10 +3,10 @@ package main type PathFilter struct { initial PathFilterState } -func (filter PathFilter) exec(state *ProgramState) bool { +func (filter PathFilter) exec(space WalkItem) bool { pathFilterState := make(map[PathFilterState]struct{}) pathFilterState[filter.initial] = struct{}{} - for _, segment := range state.space.path { + for _, segment := range space.path { nextPathFilterState := make(map[PathFilterState]struct{}) for curState := range pathFilterState { for nextState := range curState.eat(segment) { @@ -23,12 +23,58 @@ func (filter PathFilter) exec(state *ProgramState) bool { return false } +type MapTerminalFilter struct {} +func (filter MapTerminalFilter) exec(space WalkItem) bool { + terminal, isTerminal := space.value.(TerminalValue) + if !isTerminal { + return false + } + return terminal == MapBegin || terminal == MapEnd +} + +type NonTerminalFilter struct {} +func (filter NonTerminalFilter) exec(space WalkItem) bool { + _, isTerminal := space.value.(TerminalValue) + return !isTerminal +} + type RangeFilter struct { start Filter end Filter active bool } +func (filter *RangeFilter) exec(space WalkItem) bool { + if filter.active { + if filter.end.exec(space) { + filter.active = false + } + return true + } else { + if filter.start.exec(space) { + filter.active = true + } + return filter.active + } +} + +type BeginTerminalFilter struct {} +func (filter BeginTerminalFilter) exec(space WalkItem) bool { + terminal, isTerminal := space.value.(TerminalValue) + if !isTerminal { + return false + } + return terminal == ArrayBegin || terminal == MapBegin +} + +type EndTerminalFilter struct {} +func (filter EndTerminalFilter) exec(space WalkItem) bool { + terminal, isTerminal := space.value.(TerminalValue) + if !isTerminal { + return false + } + return terminal == ArrayEnd || terminal == MapEnd +} type Filter interface { - exec(*ProgramState) bool + exec(WalkItem) bool }
\ No newline at end of file diff --git a/main/json.go b/main/json.go index 66ca5d5..77c3733 100644 --- a/main/json.go +++ b/main/json.go @@ -6,11 +6,6 @@ import ( "fmt" ) -type WalkItem struct { - value WalkValue - path Path -} - type WalkItemStream struct { channel chan WalkItem rewinds []WalkItem diff --git a/main/lex.go b/main/lex.go new file mode 100644 index 0000000..6977f8a --- /dev/null +++ b/main/lex.go @@ -0,0 +1,224 @@ +package main + +import ( + "fmt" + "strings" + "unicode/utf8" +) + +type stateFunc func(*lexer) stateFunc + +type lexer struct { + input string + start int + pos int + width int + tokenStream chan Token +} + +func (l *lexer) run() { + for state := lexCommand; state != nil; { + state = state(l) + } + close(l.tokenStream) +} + +func (l *lexer) emit(t TokenType) { + l.tokenStream <- Token{ + typ: t, + val: l.input[l.start:l.pos], + } + l.start = l.pos +} + +func (l *lexer) errorf(format string, args ...interface{}) stateFunc { + l.tokenStream <- Token{ + typ: TokenErr, + val: fmt.Sprintf(format, args...), + } + return nil +} + +const eof rune = -1 + +func (l *lexer) next() rune { + if l.pos >= len(l.input) { + l.width = 0 + return eof + } + var r rune + r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) + l.pos += l.width + return r +} + +func (l *lexer) backup() { + l.pos -= l.width +} + +func (l *lexer) ignore() { + l.start = l.pos +} + +func (l *lexer) reset() { + l.pos = l.start +} + +func (l *lexer) peek() rune { + w := l.width + r := l.next() + l.backup() + l.width = w + return r +} + +func (l *lexer) accept(valid string) bool { + if strings.IndexRune(valid, l.next()) >= 0 { + return true + } + l.backup() + return false +} + +func (l *lexer) acceptAll(valid string) { + for strings.IndexRune(valid, l.next()) >= 0 {} + l.backup() +} + +func (l *lexer) acceptPassing(valid func(rune) bool) bool { + if valid(l.next()) { + return true + } + l.backup() + return false +} + +func (l *lexer) acceptAllPassing(valid func(rune) bool) { + for valid(l.next()) {} + l.backup() +} + +type TokenType int + +const ( + TokenErr TokenType = iota // Lexing error + TokenEOF // end of file + TokenSemicolon // ; + TokenLParen // ( + TokenRParen // ) + TokenLBrace // { + TokenRBrace // } + TokenLBrack // [ + TokenRBrack // ] + TokenCommand // A command character + TokenHash // # + TokenAt // @ + TokenDot // . + TokenAst // * + TokenPatternStringIndex // A string index in a pattern + TokenPatternIntegerIndex // An integer index in a pattern +) + +type Token struct { + typ TokenType + val string +} + +func (t Token) String() string { + switch t.typ { + case TokenEOF: + return "EOF" + case TokenErr: + return t.val + } + if len(t.val) > 10 { + return fmt.Sprintf("%.10q...", t.val) + } + return fmt.Sprintf("%q", t.val) +} + +func Lex(input string) chan Token { + l := &lexer{ + input: input, + tokenStream: make(chan Token), + } + go l.run() + return l.tokenStream +} + +const ( + whitespace string = " \t" + whitespaceNewlines string = " \t\r\n" +) + +func isAlpha(r rune) bool { + return ('a' <= r && r < 'z') || ('A' <= r && r <= 'Z') +} +func isDigit(r rune) bool { + return '0' <= r && r <= '9' +} +func isAlphaNumeric(r rune) bool { + return isAlpha(r) || isDigit(r) +} +func isStringIndexChar(r rune) bool { + return isAlphaNumeric(r) || r == '_' || r == '-' +} + +func lexCommand(l *lexer) stateFunc { + l.acceptAll(whitespace) + l.ignore() + if l.peek() == eof { + l.emit(TokenEOF) + return nil + } + r := l.next() + switch r { + case '#': + l.emit(TokenHash) + return lexPatternStringIndex + case '@': + l.emit(TokenAt) + return lexPatternIntegerIndex + case '.': + l.emit(TokenDot) + return lexCommand + case '*': + l.emit(TokenAst) + return lexCommand + case '{': + l.emit(TokenLBrace) + return lexCommand + case '}': + l.emit(TokenRBrace) + return lexCommandEnd + } + if isAlpha(r) { + l.emit(TokenCommand) + return lexCommandEnd + } + return l.errorf("Expected command found something else") +} + +func lexPatternStringIndex(l *lexer) stateFunc { + l.acceptAllPassing(isStringIndexChar) + l.emit(TokenPatternStringIndex) + return lexCommand +} + +func lexPatternIntegerIndex(l *lexer) stateFunc { + l.acceptAllPassing(isDigit) + l.emit(TokenPatternIntegerIndex) + return lexCommand +} + +func lexCommandEnd(l *lexer) stateFunc { + if l.peek() == eof { + l.emit(TokenEOF) + return nil + } + if l.accept(";") { + l.emit(TokenSemicolon) + return lexCommand + } + return l.errorf("Expected ; found something else") +} diff --git a/main/main.go b/main/main.go index 31e46c6..5503fb1 100644 --- a/main/main.go +++ b/main/main.go @@ -1,7 +1,6 @@ package main import ( - "fmt" "os" "bufio" ) @@ -23,117 +22,67 @@ type ValueString string type WalkValue interface {} +type WalkItem struct { + value WalkValue + path Path +} + type Program []Command type ProgramState struct { - space WalkItem + space []WalkItem in chan WalkItem out chan WalkItem program []Command } -type StringSegmentPathFilterAST struct { - index string -} -func (ast StringSegmentPathFilterAST) compileWith(next PathFilterState) PathFilterState { - return StringSegmentPathFilter { - index: ast.index, - next: next, +func main() { + quiet := false + var input string + hasInput := false + + for i := 1; i < len(os.Args); i += 1 { + switch os.Args[i] { + case "-n": + quiet = true + continue + } + if i < len(os.Args) - 1 { + panic("Unexpected arguments after program") + } + input = os.Args[i] + hasInput = true } -} - -type RepeatPathFilterAST struct { - content PathFilterAST -} -func (ast RepeatPathFilterAST) compileWith(next PathFilterState) PathFilterState { - nextGroup := &GroupPathFilter{} - repeatStart := ast.content.compileWith(nextGroup) - nextGroup.filters = []PathFilterState{next, repeatStart} - return nextGroup -} - -type SequencePathFilterAST struct { - sequence []PathFilterAST -} -func (ast SequencePathFilterAST) compileWith(next PathFilterState) PathFilterState { - for i := len(ast.sequence) - 1; i >= 0; i -= 1 { - next = ast.sequence[i].compileWith(next) + if !hasInput { + panic("Missing program") } - return next -} -type AnySegmentPathFilterAST struct {} -func (ast AnySegmentPathFilterAST) compileWith(next PathFilterState) PathFilterState { - return AnySegmentPathFilter{next: next} -} + tokens := Lex(input) + program := Parse(tokens) -type PathFilterAST interface { - compileWith(PathFilterState) PathFilterState -} - -func compilePathFilterAST(ast PathFilterAST) PathFilter { - return PathFilter{ - initial: ast.compileWith(NonePathFilter{}), - } -} - -func main() { - if len(os.Args) < 2 { - fmt.Println("Missing program arg") - return - } - //input := os.Args[1] - //tokens := Lex(input) - //program := Parse(tokens) - stdin := bufio.NewReader(os.Stdin) dataStream := Json(stdin) - - var allRemainingPathFilter AnySegmentPathFilter - { - g := GroupPathFilter { - filters: []PathFilterState{NonePathFilter{}}, - } - allRemainingPathFilter = AnySegmentPathFilter { - next: PathFilterState(&g), - } - g.filters = append(g.filters, PathFilterState(&allRemainingPathFilter)) - } state := ProgramState { in: dataStream, out: make(chan WalkItem), - program: []Command { - FilteredCommand { - filter: compilePathFilterAST( - StringSegmentPathFilterAST {"people"}, - ), - command: PrintValueCommand{}, - }, - FilteredCommand { - filter: compilePathFilterAST( - SequencePathFilterAST { - []PathFilterAST{ - StringSegmentPathFilterAST {"people"}, - AnySegmentPathFilterAST{}, - StringSegmentPathFilterAST {"age"}, - }, - }, - ), - command: PrintValueCommand{}, - }, - }, + program: program, } go func () { for walkItem := range dataStream { - state.space = walkItem + state.space = []WalkItem{walkItem} for _, cmd := range state.program { cmd.exec(&state) } + if !quiet { + for _, item := range state.space { + state.out <- item + } + } } close(state.out) }() JsonOut(state.out) -} +}
\ No newline at end of file diff --git a/main/parse.go b/main/parse.go new file mode 100644 index 0000000..e876010 --- /dev/null +++ b/main/parse.go @@ -0,0 +1,145 @@ +package main + +import ( + "strings" + "strconv" + "fmt" +) + +type parser struct { + tokenStream chan Token + rewinds []Token +} +func (p *parser) next() Token { + if len(p.rewinds) == 0 { + return <- p.tokenStream + } + token := p.rewinds[len(p.rewinds)-1] + p.rewinds = p.rewinds[:len(p.rewinds)-1] + return token +} +func (p *parser) rewind(token Token) { + p.rewinds = append(p.rewinds, token) +} +func (p *parser) peek() Token { + token := p.next() + p.rewind(token) + return token +} + +// TODO: make a pratt parser +func (p *parser) parsePathPatternFilter() PathFilterAST { + var segments []PathFilterAST + loop: for { + token := p.next() + switch token.typ { + case TokenHash: + stringIndex := p.next() + if stringIndex.typ != TokenPatternStringIndex { + panic("Expected string index after # in pattern") + } + segments = append(segments, StringSegmentPathFilterAST{stringIndex.val}) + case TokenAt: + intIndex := p.next() + if intIndex.typ != TokenPatternIntegerIndex { + panic("Expected integer index after @ in pattern") + } + index, err := strconv.Atoi(intIndex.val) + if err != nil { + panic("Expected integer index after @ in pattern") + } + segments = append(segments, IntegerSegmentPathFilterAST{index}) + case TokenDot: + segments = append(segments, AnySegmentPathFilterAST{}) + case TokenAst: + if len(segments) == 0 { + panic("Invalid * in pattern, * must go after something") + } + segments[len(segments) - 1] = RepeatPathFilterAST {segments[len(segments)-1]} + default: + p.rewind(token) + break loop + } + } + return SequencePathFilterAST {segments} +} + +// TODO: should only return a single filter +func (p *parser) parseFilter() []Filter { + var filters []Filter + token := p.next() + switch token.typ { + case TokenHash, TokenAt, TokenDot: + p.rewind(token) + filterAst := p.parsePathPatternFilter() + filters = append(filters, compilePathFilterAST(filterAst)) + token = p.next() + } + if len(filters) == 0 { + panic("Missing filter") + } + p.rewind(token) + return filters +} + +func (p *parser) parseBasicCommand(commandChar rune) Command { + switch commandChar { + case 'p': + return PrintValueCommand{} + case 'd': + return DeleteAllCommand{} + default: + panic("Invalid command") + } +} + +func (p *parser) parseCommand() Command { + token := p.next() + switch token.typ { + case TokenHash, TokenAt, TokenDot: + p.rewind(token) + filters := p.parseFilter() + command := p.parseCommand() + for _, filter := range filters { + command = FilteredCommand { + filter: filter, + command: command, + } + } + return command + case TokenCommand: + commandChar, _, err := strings.NewReader(token.val).ReadRune() + if err != nil { + panic("Error reading a command character!?") + } + return p.parseBasicCommand(commandChar) + default: + fmt.Println(token) + panic("Invalid token, expected command") + } +} + +func (p *parser) parseCommands() []Command { + var commands []Command + for { + nextToken := p.peek() + if nextToken.typ == TokenEOF || nextToken.typ == TokenRBrace { + return commands + } + commands = append(commands, p.parseCommand()) + semicolon := p.next() + if semicolon.typ == TokenEOF || semicolon.typ == TokenRBrace { + return commands + } + if semicolon.typ != TokenSemicolon { + panic("Expected ; after command") + } + } +} + +func Parse(tokens chan Token) []Command { + p := parser { + tokenStream: tokens, + } + return p.parseCommands() +} diff --git a/main/pathfilter.go b/main/pathfilter.go index 7b6c64f..b64872e 100644 --- a/main/pathfilter.go +++ b/main/pathfilter.go @@ -1,20 +1,5 @@ package main -type MapTerminalFilter struct {} -func (filter MapTerminalFilter) exec(state *ProgramState) bool { - terminal, isTerminal := state.space.value.(TerminalValue) - if !isTerminal { - return false - } - return terminal == MapBegin || terminal == MapEnd -} - -type NonTerminalFilter struct {} -func (filter NonTerminalFilter) exec(state *ProgramState) bool { - _, isTerminal := state.space.value.(TerminalValue) - return !isTerminal -} - type AnySegmentPathFilter struct { next PathFilterState } @@ -72,6 +57,22 @@ func (filter StringSegmentPathFilter) accept() bool { return false } +type IntegerSegmentPathFilter struct { + index int + next PathFilterState +} +func (filter IntegerSegmentPathFilter) eat(segment PathSegment) map[PathFilterState]struct{} { + i, isInteger := segment.(int) + res := make(map[PathFilterState]struct{}) + if isInteger && i == filter.index { + res[filter.next] = struct{}{} + } + return res +} +func (filter IntegerSegmentPathFilter) accept() bool { + return false +} + type PathFilterState interface { eat(PathSegment) map[PathFilterState]struct{} accept() bool diff --git a/main/pathfilterast.go b/main/pathfilterast.go new file mode 100644 index 0000000..c2ddc7f --- /dev/null +++ b/main/pathfilterast.go @@ -0,0 +1,56 @@ +package main + +type StringSegmentPathFilterAST struct { + index string +} +func (ast StringSegmentPathFilterAST) compileWith(next PathFilterState) PathFilterState { + return StringSegmentPathFilter { + index: ast.index, + next: next, + } +} + +type IntegerSegmentPathFilterAST struct { + index int +} +func (ast IntegerSegmentPathFilterAST) compileWith(next PathFilterState) PathFilterState { + return IntegerSegmentPathFilter { + index: ast.index, + next: next, + } +} + +type RepeatPathFilterAST struct { + content PathFilterAST +} +func (ast RepeatPathFilterAST) compileWith(next PathFilterState) PathFilterState { + nextGroup := &GroupPathFilter{} + repeatStart := ast.content.compileWith(nextGroup) + nextGroup.filters = []PathFilterState{next, repeatStart} + return nextGroup +} + +type SequencePathFilterAST struct { + sequence []PathFilterAST +} +func (ast SequencePathFilterAST) compileWith(next PathFilterState) PathFilterState { + for i := len(ast.sequence) - 1; i >= 0; i -= 1 { + next = ast.sequence[i].compileWith(next) + } + return next +} + +type AnySegmentPathFilterAST struct {} +func (ast AnySegmentPathFilterAST) compileWith(next PathFilterState) PathFilterState { + return AnySegmentPathFilter{next: next} +} + +type PathFilterAST interface { + compileWith(PathFilterState) PathFilterState +} + +func compilePathFilterAST(ast PathFilterAST) PathFilter { + return PathFilter{ + initial: ast.compileWith(NonePathFilter{}), + } +} |