From 96812b9ea732cc7ae26efce4568c19aec0000abc Mon Sep 17 00:00:00 2001 From: Charlie Stanton Date: Wed, 21 Sep 2022 19:37:02 +0100 Subject: Adds some new commands --- main/command.go | 20 ++++-- main/filter.go | 8 +++ main/lex.go | 194 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-- main/parse.go | 103 ++++++++++++++++++++++++++++-- 4 files changed, 309 insertions(+), 16 deletions(-) (limited to 'main') diff --git a/main/command.go b/main/command.go index bad5b1e..91cb5e4 100644 --- a/main/command.go +++ b/main/command.go @@ -47,10 +47,10 @@ func (cmd SequenceCommand) exec(state *ProgramState) { } } -type AppendCommand struct { +type AppendLiteralCommand struct { values []WalkValue } -func (cmd AppendCommand) exec(state *ProgramState) { +func (cmd AppendLiteralCommand) exec(state *ProgramState) { for _, value := range cmd.values { state.space = append(state.space, WalkItem { path: nil, @@ -59,10 +59,10 @@ func (cmd AppendCommand) exec(state *ProgramState) { } } -type PrependCommand struct { +type PrependLiteralCommand struct { values []WalkValue } -func (cmd PrependCommand) exec(state *ProgramState) { +func (cmd PrependLiteralCommand) exec(state *ProgramState) { var newItems []WalkItem for _, value := range cmd.values { newItems = append(newItems, WalkItem { @@ -73,6 +73,18 @@ func (cmd PrependCommand) exec(state *ProgramState) { state.space = append(newItems, state.space...) } +type NextCommand struct {} +func (cmd NextCommand) exec(state *ProgramState) { + nextItem := <- state.in + state.space = []WalkItem{nextItem} +} + +type AppendNextCommand struct {} +func (cmd AppendNextCommand) exec(state *ProgramState) { + nextItem := <- state.in + state.space = append(state.space, nextItem) +} + type PrintLiteralsCommand struct { items []WalkItem } diff --git a/main/filter.go b/main/filter.go index 796f558..f69d01a 100644 --- a/main/filter.go +++ b/main/filter.go @@ -69,6 +69,14 @@ func (filter AndFilter) exec(space WalkItem) bool { return filter.left.exec(space) && filter.right.exec(space) } +type OrFilter struct { + left Filter + right Filter +} +func (filter OrFilter) exec(space WalkItem) bool { + return filter.left.exec(space) || filter.right.exec(space) +} + type NotFilter struct { content Filter } diff --git a/main/lex.go b/main/lex.go index 91231ed..0daf2d1 100644 --- a/main/lex.go +++ b/main/lex.go @@ -64,6 +64,16 @@ func (l *lexer) reset() { l.pos = l.start } +func (l *lexer) expect(valid string) bool { + for _, r := range valid { + if l.next() != r { + l.backup() + return false + } + } + return true +} + func (l *lexer) peek() rune { w := l.width r := l.next() @@ -116,6 +126,7 @@ const ( TokenDot // . TokenAst // * TokenBar // | + TokenOr // || TokenAnd // && TokenHat // ^ TokenDollar // $ @@ -123,6 +134,17 @@ const ( TokenHatDollar // ^$ TokenExclamation // ! TokenTilde // ~ + TokenDoubleQuote // " + TokenStringLiteral // A string literal, not including the " either side + TokenNullLiteral // null + TokenTrueLiteral // true + TokenFalseLiteral // false + TokenColon // : + TokenComma // , + TokenSubstituteDelimiter // usually / but could be something else + TokenSubstitutePlaceholder // \1, \2 etc. + TokenTerminalLiteral // One of {, }, [, ] + TokenNumberLiteral // A number literal TokenPatternStringIndex // A string index in a pattern TokenPatternIntegerIndex // An integer index in a pattern ) @@ -183,10 +205,12 @@ func lexCommand(l *lexer) stateFunc { switch r { case '#': l.emit(TokenHash) - return lexPatternStringIndex + lexPatternStringIndex(l) + return lexCommand case '@': l.emit(TokenAt) - return lexPatternIntegerIndex + lexPatternIntegerIndex(l) + return lexCommand case '.': l.emit(TokenDot) return lexCommand @@ -194,7 +218,17 @@ func lexCommand(l *lexer) stateFunc { l.emit(TokenAst) return lexCommand case '|': - l.emit(TokenBar) + if l.accept("|") { + l.emit(TokenOr) + } else { + l.emit(TokenBar) + } + return lexCommand + case '[': + l.emit(TokenLBrack) + return lexCommand + case ']': + l.emit(TokenRBrack) return lexCommand case '(': l.emit(TokenLParen) @@ -232,6 +266,12 @@ func lexCommand(l *lexer) stateFunc { case '~': l.emit(TokenTilde) return lexCommand + case 'i': + l.emit(TokenCommand) + return lexMultipleLiterals + case 'S': + l.emit(TokenCommand) + return lexBigSubstitution } if isAlpha(r) { l.emit(TokenCommand) @@ -240,16 +280,152 @@ func lexCommand(l *lexer) stateFunc { return l.errorf("Expected command found something else") } -func lexPatternStringIndex(l *lexer) stateFunc { +func lexBigSubstitution(l *lexer) stateFunc { + delimiter := l.next() + if delimiter == eof || isAlphaNumeric(delimiter) { + return l.errorf("Invalid delimiter for big substitution") + } + l.emit(TokenSubstituteDelimiter) + loop: for { + r := l.next() + switch r { + case delimiter: + l.emit(TokenSubstituteDelimiter) + break loop + case '#': + l.emit(TokenHash) + lexPatternStringIndex(l) + case '@': + l.emit(TokenAt) + lexPatternIntegerIndex(l) + case '.': + l.emit(TokenDot) + case '*': + l.emit(TokenAst) + case '|': + l.emit(TokenBar) + case '[': + l.emit(TokenLBrack) + case ']': + l.emit(TokenRBrack) + case '?': + l.emit(TokenQuestion) + case ':': + l.emit(TokenColon) + case ',': + l.emit(TokenComma) + } + } + loop2: for { + r := l.next() + switch r { + case delimiter: + l.emit(TokenSubstituteDelimiter) + break loop2 + case '\\': + if !l.acceptPassing(isDigit) { + return l.errorf("Expected digit after \\") + } + l.emit(TokenSubstitutePlaceholder) + } + } + // TODO: No clue where I was going with this + return lexCommand +} + +func lexMultipleLiterals(l *lexer) stateFunc { + l.acceptAll(whitespaceNewlines) + l.ignore() + r := l.next() + switch r { + case ';', eof: + l.backup() + return lexCommandEnd + case ':': + l.emit(TokenColon) + return lexMultipleLiterals + case ',': + l.emit(TokenComma) + return lexMultipleLiterals + } + err := lexSingleLiteral(l) + if err != "" { + return l.errorf(err) + } + return lexMultipleLiterals +} + +func lexSingleLiteral(l *lexer) string { + l.acceptAll(whitespaceNewlines) + l.ignore() + r := l.next() + switch r { + case '"': + l.emit(TokenDoubleQuote) + if !lexStringLiteral(l) { + return "Expected closing \"" + } + case 'n': + if !l.expect("ull") { + return "Invalid literal, expected null" + } + l.emit(TokenNullLiteral) + case 't': + if !l.expect("rue") { + return "Invalid literal, expected true" + } + l.emit(TokenTrueLiteral) + case 'f': + if !l.expect("alse") { + return "Invalid literal, expected false" + } + l.emit(TokenFalseLiteral) + case '{', '}', '[', ']': + l.emit(TokenTerminalLiteral) + default: + if isDigit(r) { + lexNumberLiteral(l) + return "" + } + return "Invalid literal" + } + return "" +} + +// Just read the first digit +func lexNumberLiteral(l *lexer) { + l.acceptAllPassing(isDigit) + if l.accept(".") { + l.acceptAllPassing(isDigit) + } + l.emit(TokenNumberLiteral) +} + +// TODO: escape characters +func lexStringLiteral(l *lexer) bool { + for { + r := l.next() + switch r { + case '"': + l.backup() + l.emit(TokenStringLiteral) + l.next() + l.emit(TokenDoubleQuote) + return true + case eof: + return false + } + } +} + +func lexPatternStringIndex(l *lexer) { l.acceptAllPassing(isStringIndexChar) l.emit(TokenPatternStringIndex) - return lexCommand } -func lexPatternIntegerIndex(l *lexer) stateFunc { +func lexPatternIntegerIndex(l *lexer) { l.acceptAllPassing(isDigit) l.emit(TokenPatternIntegerIndex) - return lexCommand } func lexCommandEnd(l *lexer) stateFunc { @@ -261,5 +437,9 @@ func lexCommandEnd(l *lexer) stateFunc { l.emit(TokenSemicolon) return lexCommand } + if l.accept("}") { + l.emit(TokenRBrace) + return lexCommandEnd + } return l.errorf("Expected ; found something else") } diff --git a/main/parse.go b/main/parse.go index 0767c0d..5466a02 100644 --- a/main/parse.go +++ b/main/parse.go @@ -37,7 +37,7 @@ var segmentTokens map[TokenType]bool = map[TokenType]bool { TokenHash: true, TokenAt: true, TokenDot: true, - TokenLParen: true, + TokenLBrack: true, } func (p *parser) parsePathPatternFilter(minPower int) PathFilterAST { @@ -62,10 +62,10 @@ func (p *parser) parsePathPatternFilter(minPower int) PathFilterAST { lhs = IntegerSegmentPathFilterAST{index} case TokenDot: lhs = AnySegmentPathFilterAST{} - case TokenLParen: + case TokenLBrack: lhs = p.parsePathPatternFilter(0) - if p.next().typ != TokenRParen { - panic("Expected )") + if p.next().typ != TokenRBrack { + panic("Expected ] in path filter") } default: panic("Expected path pattern filter segment") @@ -94,7 +94,7 @@ func (p *parser) parseFilter(minPower int) Filter { var lhs Filter token := p.next() switch token.typ { - case TokenHash, TokenAt, TokenDot: + case TokenHash, TokenAt, TokenDot, TokenLBrack: p.rewind(token) filterAst := p.parsePathPatternFilter(0) lhs = compilePathFilterAST(filterAst) @@ -106,6 +106,12 @@ func (p *parser) parseFilter(minPower int) Filter { lhs = TerminalFilter{} case TokenTilde: lhs = RootFilter{} + case TokenLParen: + lhs = p.parseFilter(0) + rParen := p.next() + if rParen.typ != TokenRParen { + panic("Missing ) in filter") + } default: panic("Expected filter") } @@ -114,6 +120,8 @@ func (p *parser) parseFilter(minPower int) Filter { switch { case token.typ == TokenAnd && 2 >= minPower: lhs = AndFilter {lhs, p.parseFilter(3)} + case token.typ == TokenOr && 0 >= minPower: + lhs = OrFilter {lhs, p.parseFilter(1)} default: p.rewind(token) break loop @@ -122,12 +130,90 @@ func (p *parser) parseFilter(minPower int) Filter { return lhs } +func (p *parser) parseLiterals() (items []WalkItem) { + var path Path + var value WalkValue + loop: for { + token := p.next() + switch token.typ { + case TokenSemicolon, TokenEOF: + p.rewind(token) + break loop + case TokenComma: + case TokenNullLiteral: + value = ValueNull{} + case TokenTrueLiteral: + value = ValueBool(true) + case TokenFalseLiteral: + value = ValueBool(false) + case TokenNumberLiteral: + numberLiteral, err := strconv.ParseFloat(token.val, 64) + if err != nil { + panic("Error parsing number literal to float64") + } + value = ValueNumber(numberLiteral) + case TokenDoubleQuote: + stringToken := p.next() + if stringToken.typ != TokenStringLiteral { + panic("Expected string literal after \"") + } + // TODO: resolve escape characters + stringLiteral := stringToken.val + if p.next().typ != TokenDoubleQuote { + panic("Expected \" after string literal") + } + colon := p.next() + if colon.typ == TokenColon { + if path != nil { + panic("Expected value after path:") + } + path = Path{stringLiteral} + } else { + p.rewind(colon) + value = ValueString(stringLiteral) + } + case TokenTerminalLiteral: + switch token.val { + case "{": + value = MapBegin + case "}": + value = MapEnd + case "[": + value = ArrayBegin + case "]": + value = ArrayEnd + default: + panic("Invalid terminal token") + } + } + if value != nil { + items = append(items, WalkItem { + path: path, + value: value, + }) + path = nil + value = nil + } + } + if path != nil { + panic("Expected value after path:") + } + return items +} + func (p *parser) parseBasicCommand(commandChar rune) Command { switch commandChar { case 'p': return PrintValueCommand{} case 'd': return DeleteAllCommand{} + case 'n': + return NextCommand{} + case 'N': + return AppendNextCommand{} + case 'i': + items := p.parseLiterals() + return PrintLiteralsCommand {items: items} default: panic("Invalid command") } @@ -151,6 +237,12 @@ func (p *parser) parseCommand() Command { command: command, } return command + case TokenLBrace: + commands := p.parseCommands() + if p.next().typ != TokenRBrace { + panic("Missing matching }") + } + return SequenceCommand {commands} case TokenCommand: commandChar, _, err := strings.NewReader(token.val).ReadRune() if err != nil { @@ -172,6 +264,7 @@ func (p *parser) parseCommands() []Command { commands = append(commands, p.parseCommand()) semicolon := p.next() if semicolon.typ == TokenEOF || semicolon.typ == TokenRBrace { + p.rewind(semicolon) return commands } if semicolon.typ != TokenSemicolon { -- cgit v1.2.3