treek

Treek, like awk but for tree input. Take JSON or TOML or some other tree like input format and run code on bits of it.
git clone http://shtanton.xyz/git/repo/treek
Log | Files | Refs

commit 7434dc0a910c128ff75256f31142146c8f2f7784
Author: Charlie Stanton <charlie@shtanton.xyz>
Date:   Wed, 17 Aug 2022 14:40:40 +0100

Initial commit

Implements:
JSON reading
Following pattern segments:
- all
- index
- filter (partially)
A bunch of operations for the actions
println

Diffstat:
Ago.mod | 3+++
Amain/eval.go | 907+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amain/json.go | 97+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amain/lex.go | 381+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amain/main.go | 29+++++++++++++++++++++++++++++
Amain/parser.go | 398+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 1815 insertions(+), 0 deletions(-)

diff --git a/go.mod b/go.mod @@ -0,0 +1,3 @@ +module main + +go 1.18 diff --git a/main/eval.go b/main/eval.go @@ -0,0 +1,907 @@ +package main + +import ( + "strconv" + "fmt" + "math" + "strings" +) + +type ValueType int +const ( + TypeNull ValueType = iota + TypeBool + TypeNumber + TypeString + TypeArray + TypeMap +) + +type ValueNull struct {} +type ValueBool bool +type ValueNumber float64 +type ValueString string +type ValueArray []Value +type ValueMap map[string]Value + +type Value interface{ + StackValue + getPath([]TreePathSegment) Value + typ() ValueType + clone() Value + withAssignment([]Value, Value) Value + castToBool() ValueBool + castToNumber() ValueNumber + castToString() ValueString + castToArray() ValueArray + castToMap() ValueMap + add(Value) Value + sub(Value) Value + mul(Value) Value + div(Value) Value + index(Value) Value +} + +func castToType(v Value, t ValueType) Value { + switch t { + case TypeNull: + return ValueNull{} + case TypeBool: + return v.castToBool() + case TypeNumber: + return v.castToNumber() + case TypeString: + return v.castToString() + case TypeArray: + return v.castToArray() + case TypeMap: + return v.castToMap() + default: + panic("Unknown value type") + } +} + +func (v ValueNull) getPath(path []TreePathSegment) Value { + if len(path) != 0 { + panic("Tried to index null") + } + return v +} +func (v ValueNull) typ() ValueType { + return TypeNull +} +func (v ValueNull) clone() Value { + return v +} +func (v ValueNull) withAssignment(path []Value, value Value) Value { + if len(path) == 0 { + return value + } + res := make(ValueMap) + res[string(path[0].castToString())] = ValueNull{}.withAssignment(path[1:], value) + return res +} +func (v ValueNull) castToBool() ValueBool { + return false +} +func (v ValueNull) castToNumber() ValueNumber { + return 0 +} +func (v ValueNull) castToString() ValueString { + return "" +} +func (v ValueNull) castToArray() ValueArray { + var res []Value + return res +} +func (v ValueNull) castToMap() ValueMap { + return make(map[string]Value) +} +func (v ValueNull) add(w Value) Value { + return w +} +func (v ValueNull) sub(w Value) Value { + typ := w.typ() + if typ == TypeNull { + return ValueNull{} + } + return castToType(v, typ).sub(w) +} +func (v ValueNull) mul(w Value) Value { + typ := w.typ() + if typ == TypeNull { + return ValueNull{} + } + return castToType(v, typ).mul(w) +} +func (v ValueNull) div(w Value) Value { + typ := w.typ() + if typ == TypeNull { + return ValueNull{} + } + return castToType(v, typ).div(w) +} +func (v ValueNull) index(w Value) Value { + return ValueNull {} +} + +func (v ValueBool) withAssignment(path []Value, value Value) Value { + if len(path) == 0 { + return value + } + res := make(ValueMap) + res[string(path[0].castToString())] = ValueNull{}.withAssignment(path[1:], value) + return res +} +func (v ValueBool) getPath(path []TreePathSegment) Value { + if len(path) != 0 { + panic("Tried to index bool") + } + return v +} +func (v ValueBool) typ() ValueType { + return TypeBool +} +func (v ValueBool) clone() Value { + return v +} +func (v ValueBool) castToBool() ValueBool { + return v +} +func (v ValueBool) castToNumber() ValueNumber { + if v { + return 1 + } else { + return 0 + } +} +func (v ValueBool) castToString() ValueString { + if v { + return "true" + } else { + return "false" + } +} +func (v ValueBool) castToArray() ValueArray { + var res []Value + if v { + res = append(res, ValueNull{}) + } + return res +} +func (v ValueBool) castToMap() ValueMap { + res := make(map[string]Value) + if v { + res[""] = ValueNull{} + } + return res +} +func (v ValueBool) add(w Value) Value { + return v || w.castToBool() +} +func (v ValueBool) sub(w Value) Value { + rhs := w.castToBool() + return (v || rhs) && !(v && rhs) +} +func (v ValueBool) mul(w Value) Value { + return v && w.castToBool() +} +func (v ValueBool) div(w Value) Value { + rhs := w.castToBool() + return (v && rhs) || !(v || rhs) +} +func (v ValueBool) index(w Value) Value { + return v +} + +func (v ValueNumber) withAssignment(path []Value, value Value) Value { + if len(path) == 0 { + return value + } + res := make(ValueMap) + res[string(path[0].castToString())] = ValueNull{}.withAssignment(path[1:], value) + return res +} +func (v ValueNumber) getPath(path []TreePathSegment) Value { + if len(path) != 0 { + panic("Tried to index number") + } + return v +} +func (v ValueNumber) typ() ValueType { + return TypeNumber +} +func (v ValueNumber) clone() Value { + return v +} +func (v ValueNumber) castToBool() ValueBool { + return v != 0 +} +func (v ValueNumber) castToNumber() ValueNumber { + return v +} +func (v ValueNumber) castToString() ValueString { + return ValueString(strconv.FormatFloat(float64(v), 'g', 10, 64)) +} +func (v ValueNumber) castToArray() ValueArray { + res := make([]Value, int(math.Round(float64(v)))) + for i := range res { + res[i] = ValueNull {} + } + return res +} +func (v ValueNumber) castToMap() ValueMap { + res := make(map[string]Value) + res[string(v.castToString())] = ValueNull {} + return res +} +func (v ValueNumber) add(w Value) Value { + return v + w.castToNumber() +} +func (v ValueNumber) sub(w Value) Value { + return v - w.castToNumber() +} +func (v ValueNumber) mul(w Value) Value { + return v * w.castToNumber() +} +func (v ValueNumber) div(w Value) Value { + return v / w.castToNumber() +} +func (v ValueNumber) index(w Value) Value { + return v +} + +func (v ValueString) withAssignment(path []Value, value Value) Value { + if len(path) == 0 { + return value + } + if len(path) > 1 { + panic("Cannot index string twice") + } + index := int(math.Round(float64(path[0].castToNumber()))) + var builder strings.Builder + reader := strings.NewReader(string(v)) + for i := 0; i < index; i += 1 { + r, _, err := reader.ReadRune() + if err != nil { + panic("Error assigning to string index") + } + builder.WriteRune(r) + } + _, _, err := reader.ReadRune() + if err != nil { + panic("Error assigning to string index (2)") + } + builder.WriteString(string(value.castToString())) + for { + r, _, err := reader.ReadRune() + if err != nil { + break + } + builder.WriteRune(r) + } + return ValueString(builder.String()) +} +func (v ValueString) getPath(path []TreePathSegment) Value { + if len(path) != 0 { + panic("Tried to index string") + } + return v +} +func (v ValueString) typ() ValueType { + return TypeString +} +func (v ValueString) clone() Value { + return v +} +func (v ValueString) castToBool() ValueBool { + if v == "" || v == "false" { + return false + } else { + return true + } +} +func (v ValueString) castToNumber() ValueNumber { + num, err := strconv.ParseFloat(string(v), 64) + if err != nil { + return 0 + } + return ValueNumber(num) +} +func (v ValueString) castToString() ValueString { + return v +} +func (v ValueString) castToArray() ValueArray { + fields := strings.Fields(string(v)) + res := make([]Value, len(fields)) + for i, field := range fields { + res[i] = ValueString(field) + } + return res +} +func (v ValueString) castToMap() ValueMap { + res := make(map[string]Value) + res[string(v)] = ValueNull {} + return res +} +func (v ValueString) add(w Value) Value { + return v + w.castToString() +} +func (v ValueString) sub(w Value) Value { + // TODO + panic("Cannot subtract strings yet") +} +func (v ValueString) mul(w Value) Value { + num := int(math.Round(float64(w.castToNumber()))) + var builder strings.Builder + for i := 0; i < num; i += 1 { + builder.WriteString(string(v)) + } + return ValueString(builder.String()) +} +func (v ValueString) div(w Value) Value { + // TODO + panic("Cannot divide strings yet") +} +func (v ValueString) index(w Value) Value { + index := int(math.Round(float64(w.castToNumber()))) + // TODO: use proper strings functions here + return ValueString(v[index]) +} + +func (v ValueArray) withAssignment(path []Value, value Value) Value { + if len(path) == 0 { + return value + } + index := int(math.Round(float64(path[0].castToNumber()))) + res := v.clone().(ValueArray) + res[index] = res[index].withAssignment(path[1:], value) + return res +} +func (v ValueArray) getPath(path []TreePathSegment) Value { + if len(path) == 0 { + return v + } + switch path[0].(type) { + case int: + return v[path[0].(int)].getPath(path[1:]) + default: + panic("Tried to index array with string") + } +} +func (v ValueArray) typ() ValueType { + return TypeArray +} +func (v ValueArray) clone() Value { + var res []Value + for _, el := range v { + res = append(res, el.clone()) + } + return ValueArray(res) +} +func (v ValueArray) castToBool() ValueBool { + return len(v) > 0 +} +func (v ValueArray) castToNumber() ValueNumber { + return ValueNumber(len(v)) +} +func (v ValueArray) castToString() ValueString { + var builder strings.Builder + for i, el := range v { + if i != 0 { + builder.WriteString(" ") + } + builder.WriteString(string(el.castToString())) + } + return ValueString(builder.String()) +} +func (v ValueArray) castToArray() ValueArray { + return v +} +func (v ValueArray) castToMap() ValueMap { + res := make(map[string]Value) + for _, el := range v { + res[string(el.castToString())] = ValueNull {} + } + return res +} +func (v ValueArray) add(w Value) Value { + return append(v, w.castToArray()...) +} +func (v ValueArray) sub(w Value) Value { + width := int(math.Round(float64(w.castToNumber()))) + if len(v) < width { + return v + } else { + res := make([]Value, 2) + res[0] = v[0:width] + res[1] = v[width:] + return ValueArray(res) + } +} +func (v ValueArray) mul(w Value) Value { + var res []Value + target := int(math.Round(float64(w.castToNumber()))) + for i := 0; i < target; i += 1 { + res = append(res, v...) + } + return ValueArray(res) +} +func(v ValueArray) div(w Value) Value { + l := len(v) + parts := int(math.Round(float64(w.castToNumber()))) + var res []Value + part_width := l / parts + remaining_els := l % parts + progress := 0 + for i := 0; i < parts; i += 1 { + if i < remaining_els { + res = append(res, v[progress:progress+part_width+1]) + progress += part_width + 1 + } else { + res = append(res, v[progress:progress+part_width]) + progress += part_width + } + } + return ValueArray(res) +} +func (v ValueArray) index(w Value) Value { + index := int(math.Round(float64(w.castToNumber()))) + return v[index] +} + +func (v ValueMap) withAssignment(path []Value, value Value) Value { + if len(path) == 0 { + return value + } + index := string(path[0].castToString()) + res := v.clone().(ValueMap) + part, hasPart := res[index] + if !hasPart { + part = ValueNull {} + } + res[index] = part.withAssignment(path[1:], value) + return res +} +func (v ValueMap) getPath(path []TreePathSegment) Value { + if len(path) == 0 { + return v + } + switch path[0].(type) { + case string: + return v[path[0].(string)].getPath(path[1:]) + default: + panic("Tried to index map with int") + } +} +func (v ValueMap) typ() ValueType { + return TypeMap +} +func (v ValueMap) clone() Value { + res := make(map[string]Value) + for key, value := range v { + res[key] = value.clone() + } + return ValueMap(res) +} +func (v ValueMap) castToBool() ValueBool { + return len(v) > 0 +} +func (v ValueMap) castToNumber() ValueNumber { + return ValueNumber(len(v)) +} +func (v ValueMap) castToString() ValueString { + var builder strings.Builder + first := true + for key, val := range v { + if !first { + builder.WriteString(" ") + } + builder.WriteString(key) + builder.WriteString(": ") + builder.WriteString(string(val.castToString())) + } + return ValueString(builder.String()) +} +func (v ValueMap) castToArray() ValueArray { + var res []Value + for key := range v { + res = append(res, ValueString(key)) + } + return res +} +func (v ValueMap) castToMap() ValueMap { + return v +} +func (v ValueMap) add(w Value) Value { + other := w.castToMap() + res := v.clone().(ValueMap) + for key, val := range other { + res[key] = val + } + return res +} +func (v ValueMap) sub(w Value) Value { + res := v.clone().(ValueMap) + to_remove := w.castToArray() + for _, key := range to_remove { + delete(res, string(key.castToString())) + } + return res +} +func (v ValueMap) mul(w Value) Value { + rhs := w.castToMap() + res := make(map[string]Value) + for key, val := range v { + val2, hasRhsVal := rhs[key] + if !hasRhsVal { + val2 = ValueNull {} + } + res[key] = ValueArray {val, val2} + } + for key, val2 := range rhs { + _, hasLhsVal := v[key] + if !hasLhsVal { + res[key] = ValueArray {ValueNull {}, val2} + } + } + return ValueMap(res) +} +func (v ValueMap) div(w Value) Value { + // TODO + panic("Dividing a map not yet implemented") +} +func (v ValueMap) index(w Value) Value { + index := string(w.castToString()) + res, hasValue := v[index] + if !hasValue { + return ValueNull {} + } + return res +} + +type VariableReference string +type IndexReference struct { + parent StackValue + index Value +} + +type StackValue interface{ + toValue(*EvalState) Value + toAddress() Address +} + +func (v ValueNull) toValue(state *EvalState) Value { + return v +} +func (v ValueNull) toAddress() Address { + panic("Invalid assign to non variable") +} + +func (v ValueBool) toValue(state *EvalState) Value { + return v +} +func (v ValueBool) toAddress() Address { + panic("Invalid assign to non variable") +} + +func (v ValueNumber) toValue(state *EvalState) Value { + return v +} +func (v ValueNumber) toAddress() Address { + panic("Invalid assign to non variable") +} + +func (v ValueString) toValue(state *EvalState) Value { + return v +} +func (v ValueString) toAddress() Address { + panic("Invalid assign to non variable") +} + +func (v ValueArray) toValue(state *EvalState) Value { + return v +} +func (v ValueArray) toAddress() Address { + panic("Invalid assign to non variable") +} + +func (v ValueMap) toValue(state *EvalState) Value { + return v +} +func (v ValueMap) toAddress() Address { + panic("Invalid assign to non variable") +} + +func (v VariableReference) toValue(state *EvalState) Value { + value, hasValue := state.variables[string(v)] + if !hasValue { + state.variables[string(v)] = ValueNull {} + return ValueNull {} + } + return value.clone() +} +func (v VariableReference) toAddress() Address { + return v +} + +func (v IndexReference) toValue(state *EvalState) Value { + return v.parent.toValue(state).index(v.index) +} +func (v IndexReference) toAddress() Address { + return v +} + +type Address interface { + assign(*EvalState, Value) + assignPath(*EvalState, []Value, Value) +} + +func (v VariableReference) assign(state *EvalState, value Value) { + state.variables[string(v)] = value +} +func (v VariableReference) assignPath(state *EvalState, path []Value, value Value) { + state.variables[string(v)] = state.variables[string(v)].withAssignment(path, value) +} + +func (v IndexReference) assign(state *EvalState, value Value) { + v.assignPath(state, nil, value) +} +func (v IndexReference) assignPath(state *EvalState, path []Value, value Value) { + v.parent.toAddress().assignPath(state, append([]Value{v.index}, path...), value) +} + +type EvalState struct { + stack []StackValue + variables map[string]Value + data Value +} + +func (state *EvalState) push(value StackValue) { + state.stack = append(state.stack, value) +} + +func (state *EvalState) pop() StackValue { + if len(state.stack) < 1 { + panic("Error tried to pop empty stack") + } + index := len(state.stack) - 1 + value := state.stack[index] + state.stack = state.stack[:index] + return value +} + +func (state *EvalState) popValue() Value { + return state.pop().toValue(state) +} + +func (state *EvalState) popAddress() Address { + return state.pop().toAddress() +} + +func (index PatternSegmentIndex) matches(_ *EvalState, pathSegment TreePathSegment) bool { + switch pathSegment.(type) { + case string: + return string(index) == pathSegment.(string) + case int: + return string(index) == strconv.Itoa(pathSegment.(int)) + default: + panic("Bug in treek, invalid TreePathSegment") + } +} + +func (filter PatternSegmentFilter) matches(state *EvalState, pathSegment TreePathSegment) bool { + result := evalExpr(state, Expression(filter)) + return bool(result.castToBool()) +} + +func (segment PatternSegmentBasic) matches(state *EvalState, pathSegment TreePathSegment) bool { + switch segment { + case PatternSegmentAll: + return true + default: + panic("Invalid basic pattern segment") + } +} + +func matchPattern(state *EvalState, pattern Pattern, walkItem TreeWalkItem) bool { + if len(pattern.segments) != len(walkItem.path) || pattern.isFirst != walkItem.first{ + return false + } + for i, patternSegment := range pattern.segments { + pathSegment := walkItem.path[i] + if !patternSegment.matches(state, pathSegment) { + return false + } + } + return true +} + +func evalAction(state *EvalState, action Expression, node TreeWalkItem) { + if len(action) == 0 { + subroutinePrintln([]Value{state.data.getPath(node.path)}) + return + } + state.variables["path"] = pathToValueArray(node.path).clone() + state.variables["$0"] = state.data.getPath(node.path).clone() + evalExpr(state, action) +} + +func (instruction InstructionBasic) eval(state *EvalState) { + switch instruction { + case InstructionAdd: + rhs := state.popValue() + lhs := state.popValue() + state.push(lhs.add(rhs)) + case InstructionSub: + rhs := state.popValue() + lhs := state.popValue() + state.push(lhs.sub(rhs)) + case InstructionDiv: + rhs := state.popValue() + lhs := state.popValue() + state.push(lhs.div(rhs)) + case InstructionMul: + rhs := state.popValue() + lhs := state.popValue() + state.push(lhs.mul(rhs)) + case InstructionIgnore: + state.popValue() + case InstructionPushNull: + state.push(ValueNull {}) + case InstructionAssign: + rhs := state.popValue() + lhs := state.popAddress() + lhs.assign(state, rhs) + state.push(ValueNull {}) + case InstructionIndex: + index := state.popValue() + parent := state.pop() + state.push(IndexReference {parent, index}) + case InstructionDup: + val := state.pop() + state.push(val) + state.push(val.toValue(state).clone()) + default: + panic("Error: Tried to execute invalid basic instruction") + } +} + +func (n InstructionPushNumber) eval(state *EvalState) { + state.push(ValueNumber(n)) +} + +func (variable InstructionPushVariable) eval(state *EvalState) { + state.push(VariableReference(variable)) +} + +func (s InstructionPushString) eval(state *EvalState) { + state.push(ValueString(s)) +} + +type SubroutineFn func ([]Value) Value + +func printSingle(arg Value) { + switch arg.(type) { + case ValueNull: + fmt.Print("null") + case ValueBool: + fmt.Printf("%v", bool(arg.(ValueBool))) + case ValueNumber: + fmt.Printf("%v", float64(arg.(ValueNumber))) + case ValueString: + fmt.Printf("%q", string(arg.(ValueString))) + case ValueArray: + fmt.Print("[") + for i, el := range arg.(ValueArray) { + if i != 0 { + fmt.Print(", ") + } + printSingle(el) + } + fmt.Print("]") + case ValueMap: + fmt.Print("{") + isStart := true + for key, value := range arg.(ValueMap) { + if !isStart { + fmt.Print(", ") + } + fmt.Printf("%q: ", key) + printSingle(value) + isStart = false + } + fmt.Print("}") + } +} +func subroutinePrintln(args []Value) Value { + for i, arg := range args { + if i != 0 { + fmt.Print(" ") + } + printSingle(arg) + } + fmt.Print("\n") + return ValueNull{} +} + +func (call InstructionCall) eval(state *EvalState) { + args := make([]Value, call.nargs) + for i := call.nargs - 1; i >= 0; i -= 1 { + args[i] = state.popValue() + } + subroutines := map[Subroutine]SubroutineFn { + SubroutinePrintln: subroutinePrintln, + } + subroutine, isSubroutine := subroutines[call.subroutine] + if !isSubroutine { + panic("Error: Invalid subroutine") + } + state.push(subroutine(args)) +} + +func evalExpr(state *EvalState, expr Expression) Value { + for _, instruction := range expr { + instruction.eval(state) + } + return state.popValue() +} + +func pathToValueArray(path []TreePathSegment) ValueArray { + value := make(ValueArray, len(path)) + for i, segment := range path { + switch segment.(type) { + case string: + value[i] = ValueString(segment.(string)) + case int: + value[i] = ValueNumber(float64(segment.(int))) + } + } + return value +} + +type TreeWalkItem struct { + path []TreePathSegment + first bool +} + +func walkPaths(data Value, path []TreePathSegment, out chan TreeWalkItem) { + out <- TreeWalkItem {path, true} + switch data.(type) { + case ValueNull, ValueBool, ValueNumber, ValueString: + case ValueArray: + for i, el := range data.(ValueArray) { + walkPaths(el, append(path, i), out) + } + case ValueMap: + for key, el := range data.(ValueMap) { + walkPaths(el, append(path, key), out) + } + } + out <- TreeWalkItem {path, false} +} +func pathsRoutine(data Value, path []TreePathSegment, out chan TreeWalkItem) { + walkPaths(data, path, out) + close(out) +} +func getPaths(data Value) chan TreeWalkItem { + out := make(chan TreeWalkItem) + go pathsRoutine(data, nil, out) + return out +} + +func Eval(program Program, data Value) { + state := &EvalState { + stack: nil, + variables: make(map[string]Value), + data: data, + } + paths := getPaths(data) + for node := range paths { + for _, block := range program.blocks { + if matchPattern(state, block.pattern, node) { + evalAction(state, block.action, node) + } + } + } +} diff --git a/main/json.go b/main/json.go @@ -0,0 +1,97 @@ +package main + +import ( + "io" + "encoding/json" +) + +func tokenToValue(token json.Token) Value { + switch token.(type) { + case nil: + return ValueNull {} + case bool: + return ValueBool(token.(bool)) + case float64: + return ValueNumber(token.(float64)) + case string: + return ValueString(token.(string)) + default: + panic("Can't convert JSON token to value") + } +} + +func readValue(dec *json.Decoder) (value Value, empty bool) { + if !dec.More() { + return nil, true + } + t, err := dec.Token() + if err == io.EOF { + return nil, true + } else if err != nil { + panic("Invalid JSON") + } + switch t.(type) { + case nil, string, float64, bool: + v := tokenToValue(t) + return v, false + case json.Delim: + switch rune(t.(json.Delim)) { + case '[': + var value []Value + for dec.More() { + v, empty := readValue(dec) + if empty { + break + } + value = append(value, v) + } + t, err := dec.Token() + if err != nil { + panic("Invalid JSON") + } + delim, isDelim := t.(json.Delim) + if !isDelim || delim != ']' { + panic("Expected ] in JSON") + } + v := ValueArray(value) + return v, false + case '{': + value := make(map[string]Value) + for dec.More() { + t, _ := dec.Token() + key, keyIsString := t.(string) + if !keyIsString { + panic("Invalid JSON") + } + v, empty := readValue(dec) + if empty { + panic("Invalid JSON") + } + value[key] = v + } + t, err := dec.Token() + if err != nil { + panic("Invalid JSON") + } + delim, isDelim := t.(json.Delim) + if !isDelim || delim != '}' { + panic("Expected } in JSON") + } + v := ValueMap(value) + return v, false + default: + panic("Error parsing JSON") + } + default: + panic("Invalid JSON token") + } +} + +func Json(r io.Reader) Value { + dec := json.NewDecoder(r) + value, isEmpty := readValue(dec) + if isEmpty { + panic("Missing JSON input") + } + return value +} diff --git a/main/lex.go b/main/lex.go @@ -0,0 +1,381 @@ +package main + +import ( + "fmt" + "strings" + "unicode/utf8" +) + +type stateFunc func(*lexer) stateFunc + +type lexer struct { + input string + start int + pos int + width int + nestingLevel int + tokenStream chan Token +} + +func (l *lexer) run() { + for state := lexBlockStart; state != nil; { + state = state(l) + } + close(l.tokenStream) +} + +func (l *lexer) emit(t TokenType) { + l.tokenStream <- Token{ + typ: t, + val: l.input[l.start:l.pos], + } + l.start = l.pos +} + +func (l *lexer) errorf(format string, args ...interface{}) stateFunc { + l.tokenStream <- Token{ + typ: TokenErr, + val: fmt.Sprintf(format, args...), + } + return nil +} + +const eof rune = -1 + +func (l *lexer) next() rune { + if l.pos >= len(l.input) { + l.width = 0 + return eof + } + var r rune + r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) + l.pos += l.width + return r +} + +func (l *lexer) backup() { + l.pos -= l.width +} + +func (l *lexer) ignore() { + l.start = l.pos +} + +func (l *lexer) reset() { + l.pos = l.start +} + +func (l *lexer) peek() rune { + w := l.width + r := l.next() + l.backup() + l.width = w + return r +} + +func (l *lexer) accept(valid string) bool { + if strings.IndexRune(valid, l.next()) >= 0 { + return true + } + l.backup() + return false +} + +func (l *lexer) acceptAll(valid string) { + for strings.IndexRune(valid, l.next()) >= 0 {} + l.backup() +} + +func (l *lexer) acceptPassing(valid func(rune) bool) bool { + if valid(l.next()) { + return true + } + l.backup() + return false +} + +func (l *lexer) acceptAllPassing(valid func(rune) bool) { + for valid(l.next()) {} + l.backup() +} + +type TokenType int + +const ( + TokenErr TokenType = iota // Lexing error + TokenEOF // end of file + TokenNumber // number literal + TokenIdentifier // identifier + TokenAdd // + + TokenSub // - + TokenAst // * + TokenDiv // / + TokenDot // . + TokenComma // , + TokenSemicolon // ; + TokenLParen // ( + TokenRParen // ) + TokenLBrace // { + TokenRBrace // } + TokenLBrack // [ + TokenRBrack // ] + TokenIndexPattern // An index pattern segment + TokenAssign // = + TokenCircum // ^ + TokenDoubleQuote // " + TokenStringLiteral // String literal + TokenAddAssign // += + TokenSubAssign // -= + TokenAstAssign // *= + TokenDivAssign // /= +) + +type Token struct { + typ TokenType + val string +} + +func (t Token) String() string { + switch t.typ { + case TokenEOF: + return "EOF" + case TokenErr: + return t.val + } + if len(t.val) > 10 { + return fmt.Sprintf("%.10q...", t.val) + } + return fmt.Sprintf("%q", t.val) +} + +func Lex(input string) chan Token { + l := &lexer{ + input: input, + tokenStream: make(chan Token), + } + go l.run() + return l.tokenStream +} + +const ( + whitespace string = " \t" + whitespaceNewlines string = " \t\r\n" +) + +func isAlpha(r rune) bool { + return ('a' <= r && r < 'z') || ('A' <= r && r <= 'Z') +} +func isDigit(r rune) bool { + return '0' <= r && r <= '9' +} +func isAlphaNumeric(r rune) bool { + return isAlpha(r) || isDigit(r) +} +func isIdentifierStartRune(r rune) bool { + return isAlpha(r) || r == '_' || r == '$' +} +func isIdentifierRune(r rune) bool { + return isIdentifierStartRune(r) || isDigit(r) +} + +func lexBlockStart(l *lexer) stateFunc { + l.acceptAll(whitespace) + l.ignore() + if l.peek() == eof { + l.emit(TokenEOF) + return nil + } + if l.accept("^") { + l.emit(TokenCircum) + } + if l.accept("{") { + l.emit(TokenLBrace) + l.nestingLevel += 1 + return lexStartAction + } + return lexPattern +} + +func lexPattern(l *lexer) stateFunc { + r := l.next() + switch { + case isIdentifierRune(r): + l.backup() + return lexIndexPattern + case r == '(': + l.nestingLevel += 1 + l.emit(TokenLParen) + return lexFilterPattern + case r == '*': + l.emit(TokenAst) + return lexPatternEnd + } + return l.errorf("Invalid Pattern") +} + +// Next rune is identifier rune +func lexIndexPattern(l *lexer) stateFunc { + l.acceptAllPassing(isIdentifierRune) + l.emit(TokenIndexPattern) + return lexPatternEnd +} + +// Previous rune is ( +func lexFilterPattern(l *lexer) stateFunc { + for state := lexAction; state != nil; { + state = state(l) + } + return lexPatternEnd +} + +func lexPatternEnd(l *lexer) stateFunc { + if l.accept(".") { + l.emit(TokenDot) + return lexPattern + } + l.acceptAll(whitespaceNewlines) + l.ignore() + if !l.accept("{") { + if l.peek() == eof { + l.emit(TokenEOF) + return nil + } + return l.errorf("Missing Action") + } + l.emit(TokenLBrace) + l.nestingLevel += 1 + return lexStartAction +} + +// Previous rune is { +func lexStartAction(l *lexer) stateFunc { + for state := lexAction; state != nil; { + state = state(l) + } + return lexBlockStart +} + +func lexAction(l *lexer) stateFunc { + l.acceptAll(whitespaceNewlines) + l.ignore() + doubleCharTokens := map[rune]map[rune]TokenType{ + '+': { + '=': TokenAddAssign, + }, + '-': { + '=': TokenSubAssign, + }, + '*': { + '=': TokenAstAssign, + }, + '/': { + '=': TokenDivAssign, + }, + } + charTokens := map[rune]TokenType{ + '+': TokenAdd, + '-': TokenSub, + '/': TokenDiv, + '*': TokenAst, + '.': TokenDot, + ',': TokenComma, + ';': TokenSemicolon, + '=': TokenAssign, + } + r := l.next() + charToken, isCharToken := charTokens[r] + doubleCharMap, hasDoubleCharMap := doubleCharTokens[r] + if hasDoubleCharMap { + doubleCharToken, hasDoubleCharToken := doubleCharMap[l.next()] + if hasDoubleCharToken { + l.emit(doubleCharToken) + return lexAction + } else { + l.backup() + } + } + switch { + case r == eof: + return l.errorf("Unclosed Action") + case r == '(': + l.nestingLevel += 1 + l.emit(TokenLParen) + return lexAction + case r == ')': + l.nestingLevel -= 1 + l.emit(TokenRParen) + if l.nestingLevel > 0 { + return lexAction + } + return nil + case r == '{': + l.nestingLevel += 1 + l.emit(TokenLBrace) + return lexAction + case r == '}': + l.nestingLevel -= 1 + l.emit(TokenRBrace) + if l.nestingLevel > 0 { + return lexAction + } + return nil + case r == '[': + l.nestingLevel += 1 + l.emit(TokenLBrack) + return lexAction + case r == ']': + l.nestingLevel -= 1 + l.emit(TokenRBrack) + if l.nestingLevel > 0 { + return lexAction + } + return nil + case isCharToken: + l.emit(charToken) + return lexAction + case isDigit(r): + return lexNumberLiteral + case isIdentifierStartRune(r): + return lexIdentifier + case r == '"': + l.emit(TokenDoubleQuote) + return lexStringLiteral + } + return l.errorf("Invalid Token: " + string(r)) +} + +// Just accepted the first digit +func lexNumberLiteral(l *lexer) stateFunc { + l.acceptAllPassing(isDigit) + if l.accept(".") { + l.acceptAllPassing(isDigit) + } + if isIdentifierRune(l.peek()) { + l.next() + return l.errorf("Bad number: %q", l.input[l.start:l.pos]) + } + l.emit(TokenNumber) + return lexAction +} + +func lexStringLiteral(l *lexer) stateFunc { + for { + switch l.next() { + case eof: + return l.errorf("Missing closing quote in string literal") + case '"': + l.backup() + l.emit(TokenStringLiteral) + l.next() + l.emit(TokenDoubleQuote) + return lexAction + } + } +} + +// Just accepted the first character +func lexIdentifier(l *lexer) stateFunc { + l.acceptAllPassing(isIdentifierRune) + l.emit(TokenIdentifier) + return lexAction +} diff --git a/main/main.go b/main/main.go @@ -0,0 +1,29 @@ +package main + +import ( + "fmt" + "os" + "bufio" +) + +type TreePathSegment interface{} +type TreeData struct { + path []TreePathSegment + value Value +} +type TreeStream chan TreeData + +func main() { + if len(os.Args) < 2 { + fmt.Println("Missing program arg") + return + } + input := os.Args[1] + tokens := Lex(input) + program := Parse(tokens) + + stdin := bufio.NewReader(os.Stdin) + data := Json(stdin) + + Eval(program, data) +} diff --git a/main/parser.go b/main/parser.go @@ -0,0 +1,398 @@ +package main + +import ( + "strconv" + "fmt" +) + +type InstructionBasic int +const ( + InstructionAdd InstructionBasic = iota + InstructionSub + InstructionDiv + InstructionMul + InstructionIgnore + InstructionPushNull + InstructionAssign + InstructionIndex + InstructionDup +) + +type InstructionPushNumber float64 +type InstructionPushVariable string +type InstructionPushString string + +type Subroutine int +const ( + SubroutinePrintln Subroutine = iota +) +type InstructionCall struct { + subroutine Subroutine + nargs int +} + +type Instruction interface { + debug() + eval(*EvalState) +} + +func (i InstructionBasic) debug() { + switch i { + case InstructionAdd: + fmt.Println("Add") + case InstructionSub: + fmt.Println("Sub") + case InstructionDiv: + fmt.Println("Div") + case InstructionMul: + fmt.Println("Mul") + case InstructionIgnore: + fmt.Println("Ignore") + case InstructionPushNull: + fmt.Println("Push Null") + case InstructionAssign: + fmt.Println("Assign") + case InstructionIndex: + fmt.Println("Index") + case InstructionDup: + fmt.Println("Dup") + default: + fmt.Println("Unknown Basic Instruction") + } +} + +func (n InstructionPushNumber) debug() { + fmt.Printf("Push Number: %v\n", n) +} + +func (i InstructionCall) debug() { + subroutines := map[Subroutine]string { + SubroutinePrintln: "println", + } + fmt.Printf("Calling %v with %v arguments\n", subroutines[i.subroutine], i.nargs) +} + +func (s InstructionPushVariable) debug() { + fmt.Printf("Push variable: %v\n", s) +} + +func (s InstructionPushString) debug() { + fmt.Printf("Push string: %q\n", s) +} + +type Expression []Instruction + +type PatternSegmentIndex string +type PatternSegmentFilter Expression +type PatternSegmentBasic int +const ( + PatternSegmentAll PatternSegmentBasic = iota +) + +type PatternSegment interface { + debug() + matches(*EvalState, TreePathSegment) bool +} + +type Pattern struct { + segments []PatternSegment + isFirst bool +} + +func (s PatternSegmentIndex) debug() { + fmt.Printf("Index: %q\n", s) +} + +func (s PatternSegmentFilter) debug() { + fmt.Println("Filter: (") + for _, instruction := range s { + instruction.debug() + } + fmt.Println(")") +} + +func (s PatternSegmentBasic) debug() { + switch s { + case PatternSegmentAll: + fmt.Println("All") + default: + panic("Invalid basic pattern segment") + } +} + +type Block struct { + pattern Pattern + action Expression +} + +type Program struct { + blocks []Block +} + +func (p Program) debug() { + for _, block := range p.blocks { + fmt.Println("\nPattern:") + if block.pattern.isFirst { + fmt.Println("First") + } else { + fmt.Println("Last") + } + for _, s := range block.pattern.segments { + s.debug() + } + fmt.Println("\nAction:") + for _, a := range block.action { + a.debug() + } + } +} + +type parser struct { + tokenStream chan Token + prevToken Token + wasRewound bool +} + +func (p *parser) next() Token { + if p.wasRewound { + p.wasRewound = false + return p.prevToken + } + p.prevToken = <-p.tokenStream + if p.prevToken.typ == TokenErr { + fmt.Printf("Error: %q\n", p.prevToken.val) + panic("Lexing error") + } + return p.prevToken +} + +func (p *parser) rewind() { + p.wasRewound = true +} + +func (p *parser) accept(typ TokenType) (string, bool) { + token := p.next() + if typ == token.typ { + return token.val, true + } + p.rewind() + return "", false +} + +func (p *parser) peek() Token { + token := p.next() + p.rewind() + return token +} + +func (p *parser) parsePatternSegment() (segment PatternSegment, action bool, eof bool) { + token := p.next() + switch token.typ { + case TokenEOF: + p.rewind() + return nil, false, true + case TokenLBrace: + p.rewind() + return nil, true, false + case TokenIndexPattern: + return PatternSegmentIndex(token.val), false, false + case TokenLParen: + filter, noExpression := p.parseExpression(0) + if noExpression { + panic("Missing expression in filter") + } + _, hasCloseParen := p.accept(TokenRParen) + if !hasCloseParen { + panic("Missing close paren") + } + return PatternSegmentFilter(filter), false, false + case TokenAst: + return PatternSegmentAll, false, false + default: + panic("Expected pattern segment") + } +} + +func (p *parser) parsePattern() (pattern Pattern, eof bool) { + _, pattern.isFirst = p.accept(TokenCircum) + segment, action, eof := p.parsePatternSegment() + if eof { + return pattern, true + } else if action { + return pattern, false + } + pattern.segments = append(pattern.segments, segment) + for { + _, hasAnotherSegment := p.accept(TokenDot) + if !hasAnotherSegment { + break + } + segment, action, eof := p.parsePatternSegment() + if eof || action { + panic("Expected pattern segment") + } + pattern.segments = append(pattern.segments, segment) + } + return pattern, false +} + +func (p *parser) parseExpression(minPower int) (expr Expression, noExpression bool) { + token := p.next() + switch token.typ { + case TokenEOF: + p.rewind() + return nil, true + case TokenNumber: + num, err := strconv.ParseFloat(token.val, 64) + if err != nil { + panic("Invalid number") + } + expr = append(expr, InstructionPushNumber(num)) + case TokenDoubleQuote: + s, isStringLiteral := p.accept(TokenStringLiteral) + if !isStringLiteral { + panic("Missing string literal") + } + _, stringLiteralClosed := p.accept(TokenDoubleQuote) + if !stringLiteralClosed { + panic("Missing closing quote for string literal") + } + expr = append(expr, InstructionPushString(s)) + case TokenIdentifier: + _, hasLParen := p.accept(TokenLParen) + if hasLParen { + subroutines := map[string]Subroutine { + "println": SubroutinePrintln, + } + subroutine, isSubroutine := subroutines[token.val] + if !isSubroutine { + panic("Invalid subroutine") + } + nargs := 0 + for { + e, noExpression := p.parseExpression(0) + if noExpression { + break + } + expr = append(expr, e...) + nargs += 1 + _, hasComma := p.accept(TokenComma) + if !hasComma { + break + } + } + _, hasRParen := p.accept(TokenRParen) + if !hasRParen { + panic("Missing ) for subroutine call") + } + expr = append(expr, InstructionCall {subroutine, nargs}) + } else { + expr = append(expr, InstructionPushVariable(token.val)) + } + case TokenLParen: + e, noExpression := p.parseExpression(0) + if noExpression { + panic("Missing expression in ()") + } + _, hasCloseParen := p.accept(TokenRParen) + if !hasCloseParen { + panic("Missing ) in expression") + } + expr = append(expr, e...) + default: + p.rewind() + return nil, true + } + + oploop: for { + token := p.next() + binops := map[TokenType] struct{ + op InstructionBasic + left, right int + } { + TokenAdd: {InstructionAdd, 10, 11}, + TokenSub: {InstructionSub, 10, 11}, + TokenAst: {InstructionMul, 12, 13}, + TokenDiv: {InstructionDiv, 12, 13}, + TokenAssign: {InstructionAssign, 3, 2}, + } + binop, isBinop := binops[token.typ] + assigns := map[TokenType]InstructionBasic { + TokenAddAssign: InstructionAdd, + TokenSubAssign: InstructionSub, + TokenAstAssign: InstructionMul, + TokenDivAssign: InstructionDiv, + } + assignInstruction, isAssign := assigns[token.typ] + switch { + case isBinop && binop.left >= minPower: + e, noExpression := p.parseExpression(binop.right) + if noExpression { + panic("Missing expression after operator") + } + expr = append(expr, e...) + expr = append(expr, binop.op) + case isAssign && 3 >= minPower: + expr = append(expr, InstructionDup) + e, noExpression := p.parseExpression(2) + if noExpression { + panic("Missing expression after operator") + } + expr = append(expr, e...) + expr = append(expr, assignInstruction, InstructionAssign) + case token.typ == TokenSemicolon && 0 >= minPower: + e, noExpression := p.parseExpression(1) + expr = append(expr, InstructionIgnore) + if noExpression { + expr = append(expr, InstructionPushNull) + } else { + expr = append(expr, e...) + } + case token.typ == TokenDot && 20 >= minPower: + index, hasIndex := p.accept(TokenIdentifier) + if !hasIndex { + panic("Expected identifier after .") + } + expr = append(expr, InstructionPushString(index), InstructionIndex) + default: + p.rewind() + break oploop + } + } + + return expr, false +} + +func Parse(tokenStream chan Token) Program { + p := parser { + tokenStream: tokenStream, + wasRewound: false, + } + var blocks []Block + for { + pattern, eof := p.parsePattern() + if eof { + break + } + _, hasAction := p.accept(TokenLBrace) + var action Expression + if hasAction { + var noAction bool + action, noAction = p.parseExpression(0) + if noAction { + action = nil + } + _, hasActionClose := p.accept(TokenRBrace) + if !hasActionClose { + panic("Error: Missing } at end of action") + } + } + blocks = append(blocks, Block { + pattern: pattern, + action: action, + }) + } + return Program { + blocks: blocks, + } +}