From 8cf10efe3b5a1bcc70bc6e5590ee63fd5eb00c5b Mon Sep 17 00:00:00 2001 From: Charlie Stanton Date: Wed, 19 Jul 2023 11:57:59 +0100 Subject: Huge refactor to a more value based system, doing away with terminals. Also introduces unit testing --- json/read.go | 288 ++++++++++++++++++++++++++++++ json/write.go | 202 ++++++++++++++++++++++ json_array/read.go | 18 +- json_array/write.go | 27 ++- json_tokens/read.go | 16 +- json_tokens/write.go | 4 +- main/command.go | 12 +- main/lex.go | 2 +- main/main.go | 8 +- main/parse.go | 54 +----- subex/arithmetic.go | 114 +++++------- subex/filter.go | 62 +++++++ subex/main.go | 167 ++++++++++++++---- subex/main_test.go | 442 +++++++++++++++++++++++++++++++++++++++++++++++ subex/parse.go | 295 ++++++++++++++++--------------- subex/subexast.go | 296 +++++++++++++++++++------------ subex/subexast_test.go | 19 ++ subex/subexstate.go | 362 +++++++++++++++++++++----------------- subex/subexstate_test.go | 4 + walk/atom.go | 38 ++-- walk/value.go | 14 +- walk/walk.go | 259 +++++++++++++++++++++++++-- walk/walk_test.go | 45 +++++ 23 files changed, 2105 insertions(+), 643 deletions(-) create mode 100644 json/read.go create mode 100644 json/write.go create mode 100644 subex/filter.go create mode 100644 subex/main_test.go create mode 100644 subex/subexast_test.go create mode 100644 subex/subexstate_test.go create mode 100644 walk/walk_test.go diff --git a/json/read.go b/json/read.go new file mode 100644 index 0000000..6a68467 --- /dev/null +++ b/json/read.go @@ -0,0 +1,288 @@ +package json + +import ( + "bufio" + "main/walk" + "strings" + "strconv" + "errors" +) + +func isWhitespace(r rune) bool { + for _, ws := range " \t\r\n" { + if r == ws { + return true + } + } + return false +} + +func isNumberRune(r rune) bool { + return '0' <= r && r <= '9' || r == '.' +} + +type JSONReaderStructure int +const ( + JSONReaderStructureArray JSONReaderStructure = iota + JSONReaderStructureMap +) + +type JSONReaderState int +const ( + JSONReaderStateValue JSONReaderState = iota + JSONReaderStateValueEnd +) + +func NewJSONReader(reader *bufio.Reader) *JSONReader { + return &JSONReader { + path: nil, + structure: nil, + state: JSONReaderStateValue, + reader: reader, + } +} + +type JSONReader struct { + path []walk.Value + structure []JSONReaderStructure + state JSONReaderState + reader *bufio.Reader +} + +func (reader *JSONReader) Read() (walk.WalkItem, error) { + switch reader.state { + case JSONReaderStateValue: + if len(reader.structure) == 0 { + path := reader.clonePath() + value, err := reader.readValue() + if err != nil { + panic("Missing JSON input") + } + return walk.WalkItem { + Path: path, + Value: []walk.Value{value}, + }, nil + } + switch reader.structure[len(reader.structure) - 1] { + case JSONReaderStructureArray: + r, err := reader.nextNonWsRune() + if err != nil { + panic("Missing rest of array") + } + if r == ']' { + reader.structure = reader.structure[:len(reader.structure) - 1] + reader.path = reader.path[:len(reader.path) - 1] + reader.state = JSONReaderStateValueEnd + return reader.Read() + } + reader.reader.UnreadRune() + prevIndex := reader.path[len(reader.path) - 1].(walk.NumberScalar) + reader.path[len(reader.path) - 1] = prevIndex + 1 + path := reader.clonePath() + value, err := reader.readValue() + if err != nil { + panic("Missing value in array") + } + return walk.WalkItem { + Path: path, + Value: []walk.Value{value}, + }, nil + case JSONReaderStructureMap: + r, err := reader.nextNonWsRune() + if err != nil { + panic("Reached EOF inside JSON map") + } + if r == '}' { + reader.structure = reader.structure[:len(reader.structure) - 1] + reader.path = reader.path[:len(reader.path) - 1] + reader.state = JSONReaderStateValueEnd + return reader.Read() + } + if r != '"' { + panic("Expected key in map, found something else") + } + key := reader.readString() + reader.path[len(reader.path) - 1] = walk.StringStructure(key) + r, err = reader.nextNonWsRune() + if err != nil { + panic("Reached EOF after map key") + } + if r != ':' { + panic("Expected : after map key, found something else") + } + path := reader.clonePath() + value, err := reader.readValue() + if err != nil { + panic("Missing value in map") + } + return walk.WalkItem { + Path: path, + Value: []walk.Value{value}, + }, nil + default: + panic("Invalid JSONReaderStructure") + } + case JSONReaderStateValueEnd: + if len(reader.structure) == 0 { + _, err := reader.nextNonWsRune() + if err == nil { + panic("input continues after JSON value") + } + return walk.WalkItem{}, errors.New("eof") + } + switch reader.structure[len(reader.structure) - 1] { + case JSONReaderStructureArray: + r, err := reader.nextNonWsRune() + if err != nil { + panic("Missing end of array") + } + if r == ']' { + reader.path = reader.path[:len(reader.path) - 1] + reader.structure = reader.structure[:len(reader.structure) - 1] + reader.state = JSONReaderStateValueEnd + return reader.Read() + } + if r != ',' { + panic("Missing , after array value") + } + reader.state = JSONReaderStateValue + return reader.Read() + case JSONReaderStructureMap: + r, err := reader.nextNonWsRune() + if err != nil { + panic("Missing end of map") + } + if r == '}' { + reader.path = reader.path[:len(reader.path) - 1] + reader.structure = reader.structure[:len(reader.structure) - 1] + reader.state = JSONReaderStateValueEnd + return reader.Read() + } + if r != ',' { + panic("Missing , after map value") + } + reader.state = JSONReaderStateValue + return reader.Read() + default: + panic("Invalid JSONReaderStructure") + } + default: + panic("Invalid JSONReaderState") + } +} + +func (reader *JSONReader) readValue() (walk.Value, error) { + r, err := reader.nextNonWsRune() + if err != nil { + panic("Missing value in JSON") + } + switch r { + case 'n': + reader.requireString("ull") + reader.state = JSONReaderStateValueEnd + return walk.NullScalar{}, nil + case 'f': + reader.requireString("alse") + reader.state = JSONReaderStateValueEnd + return walk.BoolScalar(false), nil + case 't': + reader.requireString("rue") + reader.state = JSONReaderStateValueEnd + return walk.BoolScalar(true), nil + case '"': + v := reader.readString() + reader.state = JSONReaderStateValueEnd + return walk.StringStructure(v), nil + case '{': + reader.state = JSONReaderStateValue + reader.structure = append(reader.structure, JSONReaderStructureMap) + reader.path = append(reader.path, walk.StringStructure("")) + return walk.MapStructure(make(map[string]walk.Value)), nil + case '[': + reader.state = JSONReaderStateValue + reader.structure = append(reader.structure, JSONReaderStructureArray) + reader.path = append(reader.path, walk.NumberScalar(-1)) + return walk.ArrayStructure{}, nil + } + if isNumberRune(r) { + var builder strings.Builder + builder.WriteRune(r) + for { + r, _, err = reader.reader.ReadRune() + if err != nil { + break + } + if !isNumberRune(r) { + reader.reader.UnreadRune() + break + } + builder.WriteRune(r) + } + number, parseError := strconv.ParseFloat(builder.String(), 64) + if parseError != nil { + panic("Invalid number") + } + reader.state = JSONReaderStateValueEnd + return walk.NumberScalar(number), nil + } + panic("Invalid JSON value starting with: " + string(r)) +} + +func (reader *JSONReader) readString() string { + var builder strings.Builder + for { + r, _, err := reader.reader.ReadRune() + if err != nil { + panic("Missing rest of string") + } + if r == '"' { + break + } + if r == '\\' { + r, _, err = reader.reader.ReadRune() + if err != nil { + panic("Missing rune after \\") + } + builder.WriteRune(r) + continue + } + builder.WriteRune(r) + } + return builder.String() +} + +func (reader *JSONReader) nextNonWsRune() (rune, error) { + for { + r, _, err := reader.reader.ReadRune() + if err != nil { + return 0, err + } + if !isWhitespace(r) { + return r, nil + } + } +} + +func (reader *JSONReader) requireString(criteria string) { + for _, r := range criteria { + reader.require(r) + } +} + +func (reader *JSONReader) require(criterion rune) { + r, _, err := reader.reader.ReadRune() + if err != nil { + panic("Error while reading required rune: " + err.Error()) + } + if r != criterion { + panic("Required rune not read") + } +} + +func (reader *JSONReader) clonePath() []walk.Value { + return append([]walk.Value{}, reader.path...) +} + +func (reader *JSONReader) AssertDone() { + // TODO +} diff --git a/json/write.go b/json/write.go new file mode 100644 index 0000000..d024a56 --- /dev/null +++ b/json/write.go @@ -0,0 +1,202 @@ +package json + +import ( + "bufio" + "fmt" + "main/walk" +) + +func isNumber(value walk.Value) bool { + _, isFloat := value.(walk.NumberScalar) + return isFloat +} + +func isString(value walk.Value) bool { + _, isString := value.(walk.StringStructure) + return isString +} + +func segmentEqual(left walk.Value, right walk.Value) bool { + switch left := left.(type) { + case walk.NumberScalar: + _, isNumber := right.(walk.NumberScalar) + return isNumber + case walk.StringStructure: + right, isString := right.(walk.StringStructure) + return isString && left == right + default: + panic("Invalid path segment type") + } +} + +type JSONWriterState int +const ( + JSONWriterStateBeforeValue JSONWriterState = iota + JSONWriterStateAfterValue JSONWriterState = iota + JSONWriterStateInArray + JSONWriterStateInMap +) + +func NewJSONWriter(writer *bufio.Writer) *JSONWriter { + return &JSONWriter { + path: nil, + writer: writer, + state: JSONWriterStateBeforeValue, + } +} + +type JSONWriter struct { + path []walk.Value + writer *bufio.Writer + state JSONWriterState +} + +func (writer *JSONWriter) Write(item walk.WalkItem) error { + path := item.Path + for _, value := range item.Value { + err := writer.write(path, value) + if err != nil { + return err + } + } + return nil +} + +func (writer *JSONWriter) indent(level int) { + for i := 0; i < level; i += 1 { + writer.writer.WriteRune('\t') + } +} + +func (writer *JSONWriter) write(targetPath []walk.Value, value walk.Value) error { + diversionPoint := len(writer.path) + for diversionPoint < len(writer.path) && diversionPoint < len(targetPath) && segmentEqual(writer.path[diversionPoint], targetPath[diversionPoint]) { + diversionPoint += 1 + } + + switch writer.state { + case JSONWriterStateBeforeValue: + goto beforeValue + case JSONWriterStateAfterValue: + goto afterValue + case JSONWriterStateInMap: + goto inMap + case JSONWriterStateInArray: + goto inArray + default: + panic("Invalid JSONWriterState") + } + + beforeValue: { + switch value := value.(type) { + case walk.NullScalar: + writer.writer.WriteString("null") + writer.state = JSONWriterStateAfterValue + return nil + case walk.BoolScalar: + if value { + writer.writer.WriteString("true") + } else { + writer.writer.WriteString("false") + } + writer.state = JSONWriterStateAfterValue + return nil + case walk.NumberScalar: + writer.writer.WriteString(fmt.Sprintf("%v", value)) + writer.state = JSONWriterStateAfterValue + return nil + case walk.StringStructure: + writer.writer.WriteString(fmt.Sprintf("%q", value)) + writer.state = JSONWriterStateAfterValue + return nil + case walk.ArrayStructure: + // TODO: write the contents of the structures + writer.writer.WriteString("[\n") + writer.state = JSONWriterStateInArray + return nil + case walk.MapStructure: + writer.writer.WriteString("{\n") + writer.state = JSONWriterStateInMap + return nil + default: + panic("Invalid value type") + } + } + + afterValue: { + if len(writer.path) == 0 { + writer.writer.WriteRune('\n') + goto beforeValue + } + switch writer.path[len(writer.path) - 1].(type) { + case walk.NumberScalar: + // TODO: second part of this condition might be redundant + if len(writer.path) - 1 <= diversionPoint && len(targetPath) >= len(writer.path) && isNumber(targetPath[len(writer.path) - 1]) { + writer.writer.WriteString(",\n") + writer.path = writer.path[:len(writer.path) - 1] + goto inArray + } else { + writer.writer.WriteString("\n") + writer.indent(len(writer.path) - 1) + writer.writer.WriteString("]") + writer.path = writer.path[:len(writer.path) - 1] + goto afterValue + } + case walk.StringStructure: + if len(writer.path) -1 <= diversionPoint && len(targetPath) >= len(writer.path) && isString(targetPath[len(writer.path) - 1]) { + writer.writer.WriteString(",\n") + writer.path = writer.path[:len(writer.path) - 1] + goto inMap + } else { + writer.writer.WriteString("\n") + writer.indent(len(writer.path) - 1) + writer.writer.WriteString("}") + writer.path = writer.path[:len(writer.path) - 1] + goto afterValue + } + default: + panic("Invalid path segment type") + } + } + + inMap: { + if len(writer.path) <= diversionPoint && len(targetPath) > len(writer.path) && isString(targetPath[len(writer.path)]) { + writer.indent(len(writer.path) + 1) + writer.writer.WriteString(fmt.Sprintf("%q: ", targetPath[len(writer.path)].(walk.StringStructure))) + writer.path = append(writer.path, targetPath[len(writer.path)].(walk.StringStructure)) + goto beforeValue + } else { + writer.writer.WriteString("\n}") + goto afterValue + } + } + + inArray: { + if len(writer.path) <= diversionPoint && len(targetPath) > len(writer.path) && isNumber(targetPath[len(writer.path)]) { + writer.indent(len(writer.path) + 1) + writer.path = append(writer.path, walk.NumberScalar(0)) + goto beforeValue + } else { + writer.writer.WriteString("\n]") + goto afterValue + } + } +} + +func (writer *JSONWriter) AssertDone() { + for i := len(writer.path) - 1; i >= 0; i -= 1 { + switch writer.path[i].(type) { + case walk.NumberScalar: + writer.writer.WriteString("\n") + writer.indent(i) + writer.writer.WriteString("]") + case walk.StringStructure: + writer.writer.WriteString("\n") + writer.indent(i) + writer.writer.WriteString("}") + default: + panic("Invalid path segment type") + } + } + writer.writer.Flush() +} diff --git a/json_array/read.go b/json_array/read.go index 6334197..786bc2c 100644 --- a/json_array/read.go +++ b/json_array/read.go @@ -15,30 +15,30 @@ const ( stateDead ) -func atomiseValue(value interface{}) []walk.Atom { +func atomiseValue(value interface{}) []walk.AtomOLD { switch v := value.(type) { case nil: - return []walk.Atom{walk.NewAtomNull()} + return []walk.AtomOLD{walk.NewAtomNull()} case bool: - return []walk.Atom{walk.NewAtomBool(v)} + return []walk.AtomOLD{walk.NewAtomBool(v)} case float64: - return []walk.Atom{walk.NewAtomNumber(v)} + return []walk.AtomOLD{walk.NewAtomNumber(v)} case string: - atoms := []walk.Atom{walk.NewAtomStringTerminal()} + atoms := []walk.AtomOLD{walk.NewAtomStringTerminal()} for _, r := range v { atoms = append(atoms, walk.NewAtomStringRune(r)) } atoms = append(atoms, walk.NewAtomStringTerminal()) return atoms case []interface{}: - atoms := []walk.Atom{walk.NewAtomTerminal(walk.ArrayBegin)} + atoms := []walk.AtomOLD{walk.NewAtomTerminal(walk.ArrayBegin)} for _, element := range v { atoms = append(atoms, atomiseValue(element)...) } atoms = append(atoms, walk.NewAtomTerminal(walk.ArrayEnd)) return atoms case map[string]interface{}: - atoms := []walk.Atom{walk.NewAtomTerminal(walk.MapBegin)} + atoms := []walk.AtomOLD{walk.NewAtomTerminal(walk.MapBegin)} for key, element := range v { atoms = append(atoms, atomiseValue(key)...) atoms = append(atoms, atomiseValue(element)...) @@ -90,8 +90,8 @@ func (in *JSONArrayReader) Read() (walk.WalkItem, error) { } in.index += 1 return walk.WalkItem { - Path: []walk.Atom{walk.NewAtomNumber(float64(in.index - 1))}, - Value: atomiseValue(m), + Path: []interface{}{float64(in.index - 1)}, + Value: []interface{}{m}, }, nil case stateEnd: arrayEnd, err := in.decoder.Token() diff --git a/json_array/write.go b/json_array/write.go index 4d202c4..aaa2851 100644 --- a/json_array/write.go +++ b/json_array/write.go @@ -7,7 +7,7 @@ import ( "encoding/json" ) -func assembleValue(atoms []walk.Atom) (interface{}, []walk.Atom) { +func assembleValue(atoms []walk.AtomOLD) (interface{}, []walk.AtomOLD) { if len(atoms) == 0 { panic("Missing JSON value in output") } @@ -89,21 +89,16 @@ func assembleValue(atoms []walk.Atom) (interface{}, []walk.Atom) { } } -func outputValue(atoms []walk.Atom, writer *bufio.Writer) { - if len(atoms) == 0 { - return - } - value, atoms := assembleValue(atoms) - if len(atoms) != 0 { - panic("Tried to output more than one JSON value") - } - bytes, err := json.MarshalIndent(value, "\t", "\t") - if err != nil { - panic("Error marshalling json into bytes") - } - _, err = writer.Write(bytes) - if err != nil { - panic("Error writing value") +func outputValue(values []interface{}, writer *bufio.Writer) { + for _, value := range values { + bytes, err := json.MarshalIndent(value, "\t", "\t") + if err != nil { + panic("Error marshalling json into bytes") + } + _, err = writer.Write(bytes) + if err != nil { + panic("Error writing value") + } } } diff --git a/json_tokens/read.go b/json_tokens/read.go index 95bbb9d..b0acf71 100644 --- a/json_tokens/read.go +++ b/json_tokens/read.go @@ -33,11 +33,11 @@ const ( ) type JSONIn struct { - path []walk.Atom + path []walk.AtomOLD reader *bufio.Reader structure []JSONInStructure state JSONInState - readBuffer []walk.Atom + readBuffer []walk.AtomOLD readIndex int readBufferCapacity int actionBuffer []ReadAction @@ -46,11 +46,11 @@ type JSONIn struct { func NewJSONIn(reader *bufio.Reader) *JSONIn { return &JSONIn { - path: make([]walk.Atom, 0, 256), + path: make([]walk.AtomOLD, 0, 256), reader: reader, structure: []JSONInStructure{}, state: JSONInValueStart, - readBuffer: make([]walk.Atom, 0, 256), + readBuffer: make([]walk.AtomOLD, 0, 256), readIndex: 0, readBufferCapacity: 256, actionBuffer: make([]ReadAction, 0, 256), @@ -122,7 +122,7 @@ func (in *JSONIn) require(criterion rune) { } // Returns the first full value of a list of atoms and also a boolean to indicate if there isn't a value at the beginning -func firstValue(atoms []walk.Atom) ([]walk.Atom, bool) { +func firstValue(atoms []walk.AtomOLD) ([]walk.AtomOLD, bool) { if len(atoms) == 0 { return nil, true } @@ -141,12 +141,12 @@ func firstValue(atoms []walk.Atom) ([]walk.Atom, bool) { } } -func (in *JSONIn) readValue() []walk.Atom { +func (in *JSONIn) readValue() []walk.AtomOLD { try: value, incomplete := firstValue(in.readBuffer[in.readIndex:]) if incomplete { if in.readIndex == 0 { - newReadBuffer := make([]walk.Atom, len(in.readBuffer), in.readBufferCapacity * 2) + newReadBuffer := make([]walk.AtomOLD, len(in.readBuffer), in.readBufferCapacity * 2) in.readBufferCapacity *= 2 copy(newReadBuffer, in.readBuffer) in.readBuffer = newReadBuffer @@ -216,7 +216,7 @@ func (in *JSONIn) AssertDone() { } } -func (in *JSONIn) pushReadBuffer(atom walk.Atom) bool { +func (in *JSONIn) pushReadBuffer(atom walk.AtomOLD) bool { in.readBuffer = append(in.readBuffer, atom) return len(in.readBuffer) == in.readBufferCapacity } diff --git a/json_tokens/write.go b/json_tokens/write.go index 813f2f3..78ed186 100644 --- a/json_tokens/write.go +++ b/json_tokens/write.go @@ -29,7 +29,7 @@ func (out *JSONOut) indent(adjust int) { fmt.Fprint(out.writer, strings.Repeat("\t", len(out.structure) - 1 + adjust)) } -func (out *JSONOut) atomOut(key string, atom walk.Atom) { +func (out *JSONOut) atomOut(key string, atom walk.AtomOLD) { state := out.structure[len(out.structure) - 1] switch state { case JSONOutRoot, JSONOutMap, JSONOutArray: @@ -115,7 +115,7 @@ func (out *JSONOut) atomOut(key string, atom walk.Atom) { } } -func (out *JSONOut) Print(path walk.Path, values []walk.Atom) { +func (out *JSONOut) Print(path walk.Path, values []walk.AtomOLD) { var segment walk.PathSegment if len(path) > 0 { segment = path[len(path) - 1] diff --git a/main/command.go b/main/command.go index ef48596..5a898e2 100644 --- a/main/command.go +++ b/main/command.go @@ -1,8 +1,8 @@ package main import ( - "main/walk" "main/subex" + "main/walk" "fmt" ) @@ -46,7 +46,7 @@ func (cmd AppendNextCommand) exec(state *ProgramState) { if err != nil { panic("Missing next value") } - state.value = walk.ConcatData(state.value, nextItem.Value) + state.value = append(state.value, nextItem.Value...) state.path = nextItem.Path state.pc++ } @@ -72,12 +72,12 @@ func (cmd DeletePathCommand) String() string { return "D" } -func runSubex(state subex.Transducer, in []walk.Atom) (out []walk.Atom, error bool) { - atomsOut, error := subex.RunTransducer(state, in) +func runSubex(state subex.Transducer, in walk.ValueList) (walk.ValueList, bool) { + out, error := subex.RunTransducer(state, in) if error { return nil, true } - return atomsOut, false + return out, false } type SubstituteValueCommand struct { @@ -193,7 +193,7 @@ func (cmd SwapPathCommand) String() string { type AppendPathCommand struct {} func (cmd AppendPathCommand) exec(state *ProgramState) { - state.path = walk.ConcatData(state.path, state.value) + state.path = append(state.path, state.value...) state.pc++ } func (cmd AppendPathCommand) String() string { diff --git a/main/lex.go b/main/lex.go index 198c346..496abd0 100644 --- a/main/lex.go +++ b/main/lex.go @@ -180,7 +180,7 @@ func lexCommand(l *lexer) stateFunc { case '}': l.emit(TokenRBrace) return lexCommand - case 's', 'S', 'f', 'F', 'l', 'L', 'a', 'A': + case 's', 'S': l.emit(TokenCommand) return lexSubstitution case ':', 'b': diff --git a/main/main.go b/main/main.go index a506954..8e8c369 100644 --- a/main/main.go +++ b/main/main.go @@ -4,13 +4,13 @@ import ( "os" "bufio" "main/walk" - "main/json_array" + "main/json" ) type Program []Command type ProgramState struct { - path, value, xreg, yreg, zreg []walk.Atom + path, value, xreg, yreg, zreg walk.ValueList in walk.StredReader out walk.StredWriter program []Command @@ -45,8 +45,8 @@ func main() { stdout := bufio.NewWriter(os.Stdout) state := ProgramState { - in: json_array.NewJSONArrayReader(stdin), - out: json_array.NewJSONArrayWriter(stdout), + in: json.NewJSONReader(stdin), + out: json.NewJSONWriter(stdout), program: program, } diff --git a/main/parse.go b/main/parse.go index cbbfb9a..141ae7e 100644 --- a/main/parse.go +++ b/main/parse.go @@ -71,61 +71,13 @@ func (p *parser) parseBasicCommand(commands []Command, commandChar rune) []Comma return append(commands, NextCommand{}) case 'N': return append(commands, AppendNextCommand{}) - case 's', 'S', 'f', 'F', 'l', 'L', 'a', 'A': + case 's', 'S': ast := p.parseSubex() - switch commandChar { - case 'f': - ast = subex.SubexASTConcat { - First: ast, - Second: subex.SubexASTRepeat { - Content: subex.SubexASTCopyAny{}, - Acceptable: []subex.ConvexRange{{Start: -1, End: 0}}, - }, - } - case 'F': - ast = subex.SubexASTConcat { - First: subex.SubexASTStore { - Slot: '_', - Match: ast, - }, - Second: subex.SubexASTRepeat { - Content: subex.SubexASTCopyAny{}, - Acceptable: []subex.ConvexRange{{Start: -1, End: 0}}, - }, - } - case 'l': - ast = subex.SubexASTConcat { - First: subex.SubexASTRepeat { - Content: subex.SubexASTCopyAny{}, - Acceptable: []subex.ConvexRange{{Start: 0, End: -1}}, - }, - Second: ast, - } - case 'L': - ast = subex.SubexASTConcat { - First: subex.SubexASTRepeat { - Content: subex.SubexASTCopyAny{}, - Acceptable: []subex.ConvexRange{{Start: 0, End: -1}}, - }, - Second: subex.SubexASTStore { - Slot: '_', - Match: ast, - }, - } - case 'a', 'A': - ast = subex.SubexASTRepeat { - Acceptable: []subex.ConvexRange{{Start: -1, End: 0}}, - Content: subex.SubexASTOr { - First: ast, - Second: subex.SubexASTCopyAny{}, - }, - } - } subex := subex.CompileTransducer(ast) switch commandChar { - case 's', 'a': + case 's': return append(commands, SubstituteValueCommand {subex}, JumpCommand {len(commands) + 3}) - case 'S', 'f', 'F', 'l', 'L', 'A': + case 'S': return append(commands, SubstitutePathCommand {subex}, JumpCommand {len(commands) + 3}) default: panic("Unreachable!?!?") diff --git a/subex/arithmetic.go b/subex/arithmetic.go index 1ebd1a6..9e5e530 100644 --- a/subex/arithmetic.go +++ b/subex/arithmetic.go @@ -6,27 +6,23 @@ import ( "strconv" ) -func sumValues(atoms []walk.Atom) ([]walk.Atom, error) { +func sumValues(values walk.ValueList) (walk.ValueList, error) { allBools := true var sum float64 = 0 var any bool = false - values, err := walk.Compound(atoms) - if err != nil { - return nil, err - } for _, value := range values { switch v := value.(type) { - case walk.ValueNull: + case walk.NullScalar: allBools = false - case walk.ValueBool: - if bool(v) { + case walk.BoolScalar: + if v { sum += 1 any = true } - case walk.ValueNumber: + case walk.NumberScalar: allBools = false sum += float64(v) - case walk.ValueString: + case walk.StringStructure: allBools = false num, err := strconv.ParseFloat(string(v), 64) if err == nil { @@ -39,35 +35,31 @@ func sumValues(atoms []walk.Atom) ([]walk.Atom, error) { } } if allBools { - return []walk.Atom{walk.NewAtomBool(any)}, nil + return walk.ValueList{walk.BoolScalar(any)}, nil } else { - return []walk.Atom{walk.NewAtomNumber(sum)}, nil + return walk.ValueList{walk.NumberScalar(sum)}, nil } } // Compounds atoms into values, if all values are booleans, does AND, if not, tries to cast to numbers and multiply -func multiplyValues(atoms []walk.Atom) ([]walk.Atom, error) { +func multiplyValues(values walk.ValueList) (walk.ValueList, error) { allBools := true var product float64 = 1 var all bool = false - values, err := walk.Compound(atoms) - if err != nil { - return nil, err - } for _, value := range values { switch v := value.(type) { - case walk.ValueNull: + case walk.NullScalar: allBools = false product *= 0 - case walk.ValueBool: - if !bool(v) { + case walk.BoolScalar: + if !v { product *= 0 all = false } - case walk.ValueNumber: + case walk.NumberScalar: allBools = false product *= float64(v) - case walk.ValueString: + case walk.StringStructure: allBools = false num, err := strconv.ParseFloat(string(v), 64) if err == nil { @@ -80,35 +72,31 @@ func multiplyValues(atoms []walk.Atom) ([]walk.Atom, error) { } } if allBools { - return []walk.Atom{walk.NewAtomBool(all)}, nil + return walk.ValueList{walk.BoolScalar(all)}, nil } else { - return []walk.Atom{walk.NewAtomNumber(product)}, nil + return walk.ValueList{walk.NumberScalar(product)}, nil } } // Does tries to cast all to numbers and negates them -func negateValues(atoms []walk.Atom) ([]walk.Atom, error) { - var negatedNumbers []walk.Atom - values, err := walk.Compound(atoms) - if err != nil { - return nil, err - } +func negateValues(values walk.ValueList) (walk.ValueList, error) { + var negatedNumbers walk.ValueList for _, value := range values { switch v := value.(type) { - case walk.ValueNull: - negatedNumbers = append(negatedNumbers, walk.NewAtomNumber(0)) - case walk.ValueBool: - if bool(v) { - negatedNumbers = append(negatedNumbers, walk.NewAtomNumber(-1)) + case walk.NullScalar: + negatedNumbers = append(negatedNumbers, walk.NumberScalar(0)) + case walk.BoolScalar: + if v { + negatedNumbers = append(negatedNumbers, walk.NumberScalar(-1)) } else { - negatedNumbers = append(negatedNumbers, walk.NewAtomNumber(0)) + negatedNumbers = append(negatedNumbers, walk.NumberScalar(0)) } - case walk.ValueNumber: - negatedNumbers = append(negatedNumbers, walk.NewAtomNumber(-float64(v))) - case walk.ValueString: + case walk.NumberScalar: + negatedNumbers = append(negatedNumbers, walk.NumberScalar(-float64(v))) + case walk.StringStructure: num, err := strconv.ParseFloat(string(v), 64) if err == nil { - negatedNumbers = append(negatedNumbers, walk.NewAtomNumber(-num)) + negatedNumbers = append(negatedNumbers, walk.NumberScalar(-num)) } else { return nil, errors.New("Tried to negate non-castable string") } @@ -121,28 +109,24 @@ func negateValues(atoms []walk.Atom) ([]walk.Atom, error) { // If all are castable to numbers, takes reciprocals of all and returns them // Else errors -func reciprocalValues(atoms []walk.Atom) ([]walk.Atom, error) { - var reciprocals []walk.Atom - values, err := walk.Compound(atoms) - if err != nil { - return nil, err - } +func reciprocalValues(values walk.ValueList) (walk.ValueList, error) { + var reciprocals walk.ValueList for _, value := range values { switch v := value.(type) { - case walk.ValueNull: + case walk.NullScalar: return nil, errors.New("Tried to take reciprocal of null") - case walk.ValueBool: - if bool(v) { - reciprocals = append(reciprocals, walk.NewAtomNumber(1)) + case walk.BoolScalar: + if v { + reciprocals = append(reciprocals, walk.NumberScalar(1)) } else { return nil, errors.New("Tried to take reciprocal of false") } - case walk.ValueNumber: - reciprocals = append(reciprocals, walk.NewAtomNumber(1 / float64(v))) - case walk.ValueString: + case walk.NumberScalar: + reciprocals = append(reciprocals, walk.NumberScalar(1 / float64(v))) + case walk.StringStructure: num, err := strconv.ParseFloat(string(v), 64) if err == nil { - reciprocals = append(reciprocals, walk.NewAtomNumber(1 / num)) + reciprocals = append(reciprocals, walk.NumberScalar(1 / num)) } else { return nil, errors.New("Tried to take reciprocal of non-castable string") } @@ -155,21 +139,17 @@ func reciprocalValues(atoms []walk.Atom) ([]walk.Atom, error) { // If all are castable to booleans, NOTs all and returns them // Else errors -func notValues(atoms []walk.Atom) (notted []walk.Atom, err error) { - values, err := walk.Compound(atoms) - if err != nil { - return nil, err - } +func notValues(values walk.ValueList) (notted walk.ValueList, err error) { for _, value := range values { switch v := value.(type) { - case walk.ValueNull: - notted = append(notted, walk.NewAtomBool(true)) - case walk.ValueBool: - notted = append(notted, walk.NewAtomBool(!bool(v))) - case walk.ValueNumber: - notted = append(notted, walk.NewAtomBool(v == 0)) - case walk.ValueString: - notted = append(notted, walk.NewAtomBool(len(v) == 0)) + case walk.NullScalar: + notted = append(notted, walk.BoolScalar(true)) + case walk.BoolScalar: + notted = append(notted, walk.BoolScalar(!bool(v))) + case walk.NumberScalar: + notted = append(notted, walk.BoolScalar(v == 0)) + case walk.StringStructure: + notted = append(notted, walk.BoolScalar(len(v) == 0)) default: return nil, errors.New("Tried to NOT non-boolean") } diff --git a/subex/filter.go b/subex/filter.go new file mode 100644 index 0000000..1a1b6db --- /dev/null +++ b/subex/filter.go @@ -0,0 +1,62 @@ +package subex + +import ( + "main/walk" +) + +type valueFilter interface { + valueFilter(value walk.Value) bool +} + +type selectScalarFilter struct { + scalar walk.Scalar +} +func (scalar selectScalarFilter) valueFilter(value walk.Value) bool { + return value == scalar.scalar +} + +type anyNumberFilter struct {} +func (_ anyNumberFilter) valueFilter(value walk.Value) bool { + _, isNumber := value.(walk.NumberScalar) + return isNumber +} + +type anyBoolFilter struct {} +func (_ anyBoolFilter) valueFilter(value walk.Value) bool { + _, isBool := value.(walk.BoolScalar) + return isBool +} + +type anyValueFilter struct {} +func (_ anyValueFilter) valueFilter(value walk.Value) bool { + return true +} + +type anyArrayFilter struct {} +func (_ anyArrayFilter) valueFilter(value walk.Value) bool { + _, isArray := value.(walk.ArrayStructure) + return isArray +} + +type anyStringFilter struct {} +func (_ anyStringFilter) valueFilter(value walk.Value) bool { + _, isString := value.(walk.StringStructure) + return isString +} + + +type runeFilter interface { + runeFilter(r walk.StringRuneAtom) bool +} + +type anyRuneFilter struct {} +func (_ anyRuneFilter) runeFilter(r walk.StringRuneAtom) bool { + return true +} + +type selectRuneFilter struct { + r rune +} +func (f selectRuneFilter) runeFilter(r walk.StringRuneAtom) bool { + return f.r == rune(r) +} diff --git a/subex/main.go b/subex/main.go index ebd87cb..e2cec0b 100644 --- a/subex/main.go +++ b/subex/main.go @@ -11,15 +11,15 @@ type Transducer struct { type StoreItem struct {} // Where slots are stored -type Store [][]walk.Atom +type Store []walk.OutputList // Return a new store with all the data from this one func (store Store) clone() Store { - newStore := make([][]walk.Atom, len(store)) + newStore := make([]walk.OutputList, len(store)) copy(newStore, store) return newStore } // Return a copy of this store but with an additional slot set -func (store Store) withValue(key int, value []walk.Atom) Store { +func (store Store) withValue(key int, value walk.OutputList) Store { newStore := store.clone() newStore[key] = value return newStore @@ -46,7 +46,7 @@ func CompileTransducer(transducerAst SubexAST) Transducer { nextId: 0, ids: make(map[rune]int), } - initial := transducerAst.compileWith(&SubexNoneState{}, &slotMap) + initial := transducerAst.compileWith(&SubexNoneState{}, &slotMap, false) return Transducer { storeSize: slotMap.nextId, initialState: initial, @@ -55,54 +55,119 @@ func CompileTransducer(transducerAst SubexAST) Transducer { // An immutable stack for outputting to type OutputStack struct { - head []walk.Atom + head walk.OutputList tail *OutputStack } -func (stack OutputStack) pop() ([]walk.Atom, OutputStack) { +func (stack OutputStack) pop() (walk.OutputList, OutputStack) { return stack.head, *stack.tail } -func (stack OutputStack) push(atoms []walk.Atom) OutputStack { +func (stack OutputStack) push(atoms walk.OutputList) OutputStack { return OutputStack { head: atoms, tail: &stack, } } -func (stack OutputStack) replace(atoms []walk.Atom) OutputStack { +func (stack OutputStack) replace(atoms walk.OutputList) OutputStack { return OutputStack { head: atoms, tail: stack.tail, } } -func (stack OutputStack) peek() []walk.Atom { +func (stack OutputStack) peek() walk.OutputList { return stack.head } -func topAppend(outputStack OutputStack, atoms []walk.Atom) OutputStack { - head := outputStack.peek() - return outputStack.replace(walk.ConcatData(head, atoms)) +func topAppend(outputStack OutputStack, values []walk.Value) OutputStack { + head, isValues := outputStack.peek().(walk.ValueList) + if !isValues { + panic("Tried to append a value to an output of non-values") + } + head = append(walk.ValueList{}, head...) + head = append(head, values...) + return outputStack.replace(head) } -// One branch of subex execution -type SubexBranch struct { +func topAppendRunes(outputStack OutputStack, runes walk.RuneList) OutputStack { + head, isRunes := outputStack.peek().(walk.RuneList) + if !isRunes { + panic("Tried to append runes to a non-rune list") + } + head = append(walk.RuneList{}, head...) + head = append(head, runes...) + return outputStack.replace(head) +} + +// Addition state that goes along with a subex state in an execution branch +type auxiliaryState struct { // Content of slots in this branch store Store - // State in this branch - state SubexState // The output stack. At the end of the program, whatever is on top of this will be output // States may push or pop to the stack as they wish, creating sort of a call stack that allows states to capture the output of other states outputStack OutputStack + // How deeply nested the current execution is inside of the overall value + // i.e. starts at zero, is incremented to one when entering an array + nesting int +} + +func (aux auxiliaryState) cloneStore() auxiliaryState { + aux.store = aux.store.clone() + return aux +} + +func (aux auxiliaryState) withValue(slot int, value walk.OutputList) auxiliaryState { + aux.store = aux.store.withValue(slot, value) + return aux +} + +func (aux auxiliaryState) pushOutput(data walk.OutputList) auxiliaryState { + aux.outputStack = aux.outputStack.push(data) + return aux +} + +func (aux auxiliaryState) popOutput() (walk.OutputList, auxiliaryState) { + data, output := aux.outputStack.pop() + aux.outputStack = output + return data, aux +} + +func (aux auxiliaryState) topAppend(values walk.OutputList) auxiliaryState { + switch output := values.(type) { + case walk.ValueList: + aux.outputStack = topAppend(aux.outputStack, output) + case walk.RuneList: + aux.outputStack = topAppendRunes(aux.outputStack, output) + } + return aux +} + +func (aux auxiliaryState) incNest() auxiliaryState { + aux.nesting++ + return aux +} + +func (aux auxiliaryState) decNest() auxiliaryState { + aux.nesting-- + return aux +} + +// One branch of subex execution +type SubexBranch struct { + // State in this branch + state SubexState + // Axiliary state + aux auxiliaryState } // Read a single character and return all the branches resulting from this branch consuming it -func (pair SubexBranch) eat(char walk.Atom) []SubexBranch { - return pair.state.eat(pair.store, pair.outputStack, char) +func (pair SubexBranch) eat(char walk.Edible) []SubexBranch { + return pair.state.eat(pair.aux, char) } func (pair SubexBranch) accepting() []OutputStack { - return pair.state.accepting(pair.store, pair.outputStack) + return pair.state.accepting(pair.aux) } func equalStates(left SubexBranch, right SubexBranch) bool { // Only care about if they are the same pointer - return left.state == right.state + return left.state == right.state && left.aux.nesting == right.aux.nesting } // If two branches have the same state, only the first has a chance of being successful @@ -121,33 +186,67 @@ func pruneStates(states []SubexBranch) []SubexBranch { return states[:uniqueStates] } -// Run the subex transducer -func RunTransducer(transducer Transducer, input []walk.Atom) (output []walk.Atom, err bool) { - states := []SubexBranch{{ - state: transducer.initialState, - outputStack: OutputStack { - head: nil, - tail: nil, - }, - store: make([][]walk.Atom, transducer.storeSize), - }} +func processInput(states []SubexBranch, input walk.StructureIter, nesting int) []SubexBranch { + if len(states) == 0 { + return states + } var tmp []SubexBranch newStates := make([]SubexBranch, 0, 2) - for _, piece := range input { + for { + piece := input.Next() + if piece == nil { + break + } + + // TODO: break if there are no states at this nesting level left + // TODO: What to do if there are remaining nested states after all pieces have been used? for _, state := range states { - newStates = append(newStates, state.eat(piece)...) + if state.aux.nesting == nesting { + newStates = append(newStates, state.eat(piece)...) + } else { + newStates = append(newStates, state) + } } + + structure, isStructure := piece.(walk.Structure) + if isStructure { + iter := structure.Iter() + newStates = processInput(newStates, iter, nesting + 1) + } + tmp = states states = pruneStates(newStates) newStates = tmp[:0] if len(states) == 0 { - return nil, true + return states } } + return states +} + +// Run the subex transducer +func RunTransducer(transducer Transducer, input walk.ValueList) (output []walk.Value, err bool) { + states := []SubexBranch{{ + state: transducer.initialState, + aux: auxiliaryState { + outputStack: OutputStack { + head: walk.ValueList{}, + tail: nil, + }, + store: make([]walk.OutputList, transducer.storeSize), + nesting: 0, + }, + }} + + states = processInput(states, walk.NewValueIter(input), 0) + for _, state := range states { acceptingStacks := state.accepting() for _, stack := range acceptingStacks { - output := stack.head + output, isValues := stack.head.(walk.ValueList) + if !isValues { + panic("Tried to output a non-values list") + } return output, false } } diff --git a/subex/main_test.go b/subex/main_test.go new file mode 100644 index 0000000..f0350d2 --- /dev/null +++ b/subex/main_test.go @@ -0,0 +1,442 @@ +package subex + +import ( + "testing" + "main/walk" + "fmt" + "strings" +) + +func buildTransducer(subex string) Transducer { + lexer := NewStringRuneReader(subex) + ast := Parse(lexer) + transducer := CompileTransducer(ast) + return transducer +} + +func fatalMismatch(t *testing.T, path walk.ValueList, message string) { + var sep string + var builder strings.Builder + for _, segment := range path { + builder.WriteString(sep) + builder.WriteString(segment.Debug()) + sep = "." + } + builder.WriteString(": ") + builder.WriteString(message) + t.Fatal(builder.String()) +} + +func expectEqual(t *testing.T, path walk.ValueList, output walk.Value, expected walk.Value) { + switch expected := expected.(type) { + case walk.NullScalar: + _, isNull := output.(walk.NullScalar) + if !isNull { + fatalMismatch(t, path, fmt.Sprintf("expected null, found %s", output.Debug())) + } + case walk.BoolScalar: + b, isBool := output.(walk.BoolScalar) + if !isBool { + fatalMismatch(t, path, fmt.Sprintf("expected boolean, found %s", output.Debug())) + } + if expected != b { + fatalMismatch(t, path, fmt.Sprintf("expected %s, found %s", expected.Debug(), b.Debug())) + } + case walk.NumberScalar: + n, isNumber := output.(walk.NumberScalar) + if !isNumber { + fatalMismatch(t, path, fmt.Sprintf("expected number, found %s", output.Debug())) + } + if expected != n { + fatalMismatch(t, path, fmt.Sprintf("expected %s, found %s", expected.Debug(), n.Debug())) + } + case walk.StringStructure: + s, isString := output.(walk.StringStructure) + if !isString { + fatalMismatch(t, path, fmt.Sprintf("expected string, found %s", output.Debug())) + } + if s != expected { + fatalMismatch(t, path, fmt.Sprintf("expected %s, found %s", expected.Debug(), s.Debug())) + } + case walk.ArrayStructure: + array, isArray := output.(walk.ArrayStructure) + if !isArray { + fatalMismatch(t, path, fmt.Sprintf("expected array, found %s", output.Debug())) + } + if len(array) != len(expected) { + fatalMismatch(t, path, fmt.Sprintf("Expected array length %d, found %d", len(expected), len(array))) + } + for i, value := range expected { + expectEqual(t, append(path, walk.NumberScalar(i)), array[i], value) + } + case walk.MapStructure: + m, isMap := output.(walk.MapStructure) + if !isMap { + fatalMismatch(t, path, fmt.Sprintf("expected map, found %s", output.Debug())) + } + for key, expected := range expected { + value, hasValue := m[key] + if !hasValue { + fatalMismatch(t, path, fmt.Sprintf("expected map to have key %s, but it doesn't", key)) + } + expectEqual(t, append(path, walk.StringStructure(key)), value, expected) + } + for key := range m { + _, hasValue := expected[key] + if !hasValue { + fatalMismatch(t, path, fmt.Sprintf("Didn't expect map to have key %s, but it does", key)) + } + } + default: + panic("Expected contains an invalid value") + } +} + +func expectOutput(t *testing.T, transducer Transducer, input walk.ValueList, expected walk.ValueList) { + output, err := RunTransducer(transducer, input) + + if err { + t.Fatalf("Error") + } + + if len(output) != len(expected) { + t.Fatalf("Output has incorrect length. Expected %d, got %d", len(expected), len(output)) + } + + for i, value := range output { + expectEqual(t, walk.ValueList{walk.NumberScalar(i)}, value, expected[i]) + } +} + +func expectReject(t *testing.T, transducer Transducer, input walk.ValueList) { + _, err := RunTransducer(transducer, input) + + if !err { + t.Fatalf("Expected transducer to error, but it accepted input: %v", input) + } +} + +func TestSimpleProcessInput(t *testing.T) { + states := []SubexBranch{{ + state: SubexCopyState { + next: SubexNoneState{}, + filter: anyValueFilter{}, + }, + aux: auxiliaryState { + outputStack: OutputStack { + head: walk.ValueList{}, + tail: nil, + }, + store: nil, + nesting: 0, + }, + }} + + input := walk.ValueList{ + walk.NumberScalar(2), + } + + states = processInput(states, walk.NewValueIter(input), 0) + + if len(states) != 1 { + t.Fatalf("States has wrong length") + } + + accepting := states[0].accepting() + + if len(accepting) != 1 { + t.Fatalf("Wrong number of accepting branches") + } + + values, isValues := accepting[0].head.(walk.ValueList) + + if !isValues { + t.Fatalf("Output is not a value list") + } + + if len(values) != 1 { + t.Fatalf("Output has wrong length") + } + + if values[0] != walk.NumberScalar(2) { + t.Fatalf("Outputted the wrong value") + } +} + +func TestTopAppendFromEmpty(t *testing.T) { + output := OutputStack { + head: walk.ValueList{}, + tail: nil, + } + + output = topAppend(output, []walk.Value{walk.NumberScalar(1), walk.NumberScalar(2)}) + + values, isValues := output.head.(walk.ValueList) + + if !isValues { + t.Fatalf("head is not values") + } + + if len(values) != 2 { + t.Fatalf("values has the wrong length") + } + + if values[0] != walk.NumberScalar(1) || values[1] != walk.NumberScalar(2) { + t.Fatalf("output has the wrong values") + } +} + +func TestArrayPriority1(t *testing.T) { + expectOutput( + t, + buildTransducer(":[.$_]|."), + walk.ValueList{ + walk.ArrayStructure{ + walk.NumberScalar(5), + }, + }, + walk.ValueList{ + walk.ArrayStructure{}, + }, + ) +} + +func TestArrayPriority2(t *testing.T) { + expectOutput( + t, + buildTransducer(".|:[.$_]"), + walk.ValueList{ + walk.ArrayStructure{ + walk.NumberScalar(5), + }, + }, + walk.ValueList{ + walk.ArrayStructure{ + walk.NumberScalar(5), + }, + }, + ) +} + +func TestDropSecondArrayElement(t *testing.T) { + expectOutput( + t, + buildTransducer(":[.(.$_)(.{-0})]"), + walk.ValueList{ + walk.ArrayStructure{ + walk.NumberScalar(1), + walk.NumberScalar(2), + walk.NumberScalar(3), + walk.NumberScalar(4), + }, + }, + walk.ValueList{ + walk.ArrayStructure{ + walk.NumberScalar(1), + walk.NumberScalar(3), + walk.NumberScalar(4), + }, + }, + ) +} + +func TestDropSecondElement(t *testing.T) { + expectOutput( + t, + buildTransducer(".(.$_)(.{-0})"), + walk.ValueList{ + walk.NumberScalar(1), + walk.NumberScalar(2), + walk.NumberScalar(3), + walk.NumberScalar(4), + }, + walk.ValueList{ + walk.NumberScalar(1), + walk.NumberScalar(3), + walk.NumberScalar(4), + }, + ) +} + +func TestCopyManyValues(t *testing.T) { + expectOutput( + t, + buildTransducer(".{-0}"), + walk.ValueList{ + walk.NumberScalar(1), + walk.NumberScalar(2), + walk.NumberScalar(3), + walk.NumberScalar(4), + }, + walk.ValueList{ + walk.NumberScalar(1), + walk.NumberScalar(2), + walk.NumberScalar(3), + walk.NumberScalar(4), + }, + ) +} + +func TestCopyTwoValues(t *testing.T) { + expectOutput( + t, + buildTransducer(".."), + walk.ValueList{ + walk.NumberScalar(1), + walk.NumberScalar(2), + }, + walk.ValueList{ + walk.NumberScalar(1), + walk.NumberScalar(2), + }, + ) +} + +func TestCopyValue(t *testing.T) { + expectOutput( + t, + buildTransducer("."), + walk.ValueList{ + walk.NumberScalar(1), + }, + walk.ValueList{ + walk.NumberScalar(1), + }, + ) +} + +func TestSimpleArrayEntry(t *testing.T) { + expectOutput( + t, + buildTransducer(":[..]"), + walk.ValueList{ + walk.ArrayStructure{ + walk.NumberScalar(1), + walk.NumberScalar(2), + }, + }, + walk.ValueList{ + walk.ArrayStructure{ + walk.NumberScalar(1), + walk.NumberScalar(2), + }, + }, + ) +} + +func TestArrayEntrySum(t *testing.T) { + expectOutput( + t, + buildTransducer(":[%{-0}+]"), + walk.ValueList{ + walk.ArrayStructure{ + walk.NumberScalar(1), + walk.NumberScalar(7), + walk.NumberScalar(8), + walk.NumberScalar(3), + }, + }, + walk.ValueList{ + walk.ArrayStructure{ + walk.NumberScalar(19), + }, + }, + ) +} + +func TestStringEmptyMatch(t *testing.T) { + expectOutput( + t, + buildTransducer("~\"\""), + walk.ValueList{ + walk.StringStructure(""), + }, + walk.ValueList{ + walk.StringStructure(""), + }, + ) +} + +func TestStringSimpleMatch(t *testing.T) { + expectOutput( + t, + buildTransducer("~\"hello\""), + walk.ValueList{ + walk.StringStructure("hello"), + }, + walk.ValueList{ + walk.StringStructure("hello"), + }, + ) +} + +func TestDiscardString(t *testing.T) { + expectOutput( + t, + buildTransducer("~\"test\"$_."), + walk.ValueList{ + walk.StringStructure("test"), + walk.NumberScalar(2), + }, + walk.ValueList{ + walk.NumberScalar(2), + }, + ) +} + +func TestStringThenValue(t *testing.T) { + expectOutput( + t, + buildTransducer("~\"test\"."), + walk.ValueList{ + walk.StringStructure("test"), + walk.NumberScalar(2), + }, + walk.ValueList{ + walk.StringStructure("test"), + walk.NumberScalar(2), + }, + ) +} + +func TestCutStringFromStart(t *testing.T) { + //transducer := buildTransducer("~\"test\"$_(.{-0})") + lexer := NewStringRuneReader("~\"test\"$_(.{-0})") + ast := Parse(lexer) + t.Log(ast) + transducer := CompileTransducer(ast) + + expectOutput( + t, + transducer, + walk.ValueList{ + walk.StringStructure("test"), + walk.NumberScalar(2), + walk.StringStructure("test"), + }, + walk.ValueList{ + walk.NumberScalar(2), + walk.StringStructure("test"), + }, + ) + expectOutput( + t, + transducer, + walk.ValueList{ + walk.StringStructure("test"), + }, + walk.ValueList{}, + ) + expectReject( + t, + transducer, + walk.ValueList{ + walk.StringStructure("yeet"), + }, + ) + expectReject( + t, + transducer, + walk.ValueList{}, + ) +} diff --git a/subex/parse.go b/subex/parse.go index 746217b..a671e6d 100644 --- a/subex/parse.go +++ b/subex/parse.go @@ -22,7 +22,7 @@ func accept(l RuneReader, chars string) bool { return false } -func expectBracket(l RuneReader, ifLeft walk.Atom, ifRight walk.Atom) walk.Atom { +func expectBracket(l RuneReader, ifLeft walk.AtomOLD, ifRight walk.AtomOLD) walk.AtomOLD { switch l.Next() { case '(': return ifLeft @@ -38,7 +38,7 @@ func isNumericRune(r rune) bool { } // Having just parsed a `, read until the next ` and parse the contents into a list of non-string atoms -func parseNonStringLiteral(l RuneReader) (literals []walk.Atom) { +func parseNonStringLiteral(l RuneReader) (literals []walk.Scalar) { for { r := l.Next() if isNumericRune(r) { @@ -57,7 +57,7 @@ func parseNonStringLiteral(l RuneReader) (literals []walk.Atom) { if err != nil { panic("Invalid number literal") } - literals = append(literals, walk.NewAtomNumber(number)) + literals = append(literals, walk.NumberScalar(number)) continue } switch r { @@ -67,30 +67,22 @@ func parseNonStringLiteral(l RuneReader) (literals []walk.Atom) { continue case 'n': if accept(l, "u") && accept(l, "l") && accept(l, "l") { - literals = append(literals, walk.NewAtomNull()) + literals = append(literals, walk.NullScalar{}) } else { panic("Invalid literal") } case 't': if accept(l, "r") && accept(l, "u") && accept(l, "e") { - literals = append(literals, walk.NewAtomBool(true)) + literals = append(literals, walk.BoolScalar(true)) } else { panic("Invalid literal") } case 'f': if accept(l, "a") && accept(l, "l") && accept(l, "s") && accept(l, "e") { - literals = append(literals, walk.NewAtomBool(false)) + literals = append(literals, walk.BoolScalar(false)) } else { panic("Invalid literal") } - case '{': - literals = append(literals, walk.NewAtomTerminal(walk.MapBegin)) - case '}': - literals = append(literals, walk.NewAtomTerminal(walk.MapEnd)) - case '[': - literals = append(literals, walk.NewAtomTerminal(walk.ArrayBegin)) - case ']': - literals = append(literals, walk.NewAtomTerminal(walk.ArrayEnd)) default: panic("Invalid literal") } @@ -177,113 +169,113 @@ func parseReplacement(l RuneReader) (output []OutputContentAST) { case '`': literals := parseNonStringLiteral(l) for _, literal := range literals { - output = append(output, OutputAtomLiteralAST {literal}) + output = append(output, OutputValueLiteralAST {literal}) } - case '"': - output = append(output, OutputAtomLiteralAST {walk.NewAtomStringTerminal()}) default: - output = append(output, OutputAtomLiteralAST{atom: walk.NewAtomStringRune(r)}) + panic("Invalid value to insert") + //output = append(output, OutputValueLiteralAST{atom: walk.NewAtomStringRune(r)}) } } return output } // Parse the contents of a range subex [] into a map -func parseRangeSubex(l RuneReader) map[walk.Atom]walk.Atom { - // TODO escaping - parts := make(map[walk.Atom]walk.Atom) - var froms []walk.Atom - var hasTo bool - for { - fromsStart := l.Next() - if fromsStart == ']' { - hasTo = false - break - } else if fromsStart == '=' { - hasTo = true - break - } else if fromsStart == '`' { - literals := parseNonStringLiteral(l) - froms = append(froms, literals...) - continue - } else if fromsStart == '"' { - froms = append(froms, walk.NewAtomStringTerminal()) - continue - } - if accept(l, "-") { - fromsEnd := l.Next() - if fromsEnd == ']' || fromsEnd == '=' { - l.Rewind() - fromsEnd = fromsStart - } - for i := fromsStart; i <= fromsEnd; i += 1 { - froms = append(froms, walk.NewAtomStringRune(i)) - } - } else { - froms = append(froms, walk.NewAtomStringRune(fromsStart)) - } - } - if len(froms) == 0 { - panic("Missing from part of range expression") - } +// func parseRangeSubex(l RuneReader) map[walk.AtomOLD]walk.AtomOLD { +// // TODO escaping +// parts := make(map[walk.AtomOLD]walk.AtomOLD) +// var froms []walk.AtomOLD +// var hasTo bool +// for { +// fromsStart := l.Next() +// if fromsStart == ']' { +// hasTo = false +// break +// } else if fromsStart == '=' { +// hasTo = true +// break +// } else if fromsStart == '`' { +// literals := parseNonStringLiteral(l) +// froms = append(froms, literals...) +// continue +// } else if fromsStart == '"' { +// froms = append(froms, walk.NewAtomStringTerminal()) +// continue +// } +// if accept(l, "-") { +// fromsEnd := l.Next() +// if fromsEnd == ']' || fromsEnd == '=' { +// l.Rewind() +// fromsEnd = fromsStart +// } +// for i := fromsStart; i <= fromsEnd; i += 1 { +// froms = append(froms, walk.NewAtomStringRune(i)) +// } +// } else { +// froms = append(froms, walk.NewAtomStringRune(fromsStart)) +// } +// } +// if len(froms) == 0 { +// panic("Missing from part of range expression") +// } - var tos []walk.Atom - if hasTo { - for { - tosStart := l.Next() - if tosStart == ']' { - break - } else if tosStart == '`' { - literals := parseNonStringLiteral(l) - tos = append(tos, literals...) - continue - } else if tosStart == '"' { - tos = append(tos, walk.NewAtomStringTerminal()) - continue - } - if accept(l, "-") { - tosEnd := l.Next() - if tosEnd == ']' { - l.Rewind() - tosEnd = tosStart - } - for i := tosStart; i <= tosEnd; i += 1 { - tos = append(tos, walk.NewAtomStringRune(i)) - } - } else { - tos = append(tos, walk.NewAtomStringRune(tosStart)) - } - } - } else { - tos = froms - } - if len(tos) == 0 { - panic("Missing to part of range expression") - } +// var tos []walk.AtomOLD +// if hasTo { +// for { +// tosStart := l.Next() +// if tosStart == ']' { +// break +// } else if tosStart == '`' { +// literals := parseNonStringLiteral(l) +// tos = append(tos, literals...) +// continue +// } else if tosStart == '"' { +// tos = append(tos, walk.NewAtomStringTerminal()) +// continue +// } +// if accept(l, "-") { +// tosEnd := l.Next() +// if tosEnd == ']' { +// l.Rewind() +// tosEnd = tosStart +// } +// for i := tosStart; i <= tosEnd; i += 1 { +// tos = append(tos, walk.NewAtomStringRune(i)) +// } +// } else { +// tos = append(tos, walk.NewAtomStringRune(tosStart)) +// } +// } +// } else { +// tos = froms +// } +// if len(tos) == 0 { +// panic("Missing to part of range expression") +// } - for i, from := range froms { - parts[from] = tos[i % len(tos)] - } - return parts -} +// for i, from := range froms { +// parts[from] = tos[i % len(tos)] +// } +// return parts +// } -func parseSubex(l RuneReader, minPower int) SubexAST { +func parseSubex(l RuneReader, minPower int, runic bool) SubexAST { var lhs SubexAST r := l.Next() switch r { case eof: return nil case '(': - lhs = parseSubex(l, 0) + lhs = parseSubex(l, 0, runic) if !accept(l, ")") { panic("Missing matching )") } - case '[': - rangeParts := parseRangeSubex(l) - lhs = SubexASTRange {rangeParts} - case ')', '|', ';', '{', '+', '-', '*', '/', '!', '$', ':': + // TODO + // case '[': + // rangeParts := parseRangeSubex(l) + // lhs = SubexASTRange {rangeParts} + case ')', ']', '"', '|', ';', '{', '+', '-', '*', '/', '!', '$': l.Rewind() - return nil + return SubexASTEmpty{} case '=': replacement := parseReplacement(l) lhs = SubexASTOutput{replacement} @@ -291,47 +283,80 @@ func parseSubex(l RuneReader, minPower int) SubexAST { literals := parseNonStringLiteral(l) lhs = SubexASTEmpty{} for _, literal := range literals { - lhs = SubexASTConcat {lhs, SubexASTCopyAtom {literal}} + lhs = SubexASTConcat {lhs, SubexASTCopyScalar {literal}} } - case '^': - replacement := parseReplacement(l) - replacement = append( - []OutputContentAST{OutputAtomLiteralAST {walk.NewAtomStringTerminal()}}, - replacement... - ) - replacement = append( - replacement, - OutputAtomLiteralAST {walk.NewAtomStringTerminal()}, - ) - lhs = SubexASTOutput {replacement} + // case '^': + // replacement := parseReplacement(l) + // replacement = append( + // []OutputContentAST{OutputValueLiteralAST {walk.NewAtomStringTerminal()}}, + // replacement... + // ) + // replacement = append( + // replacement, + // OutputValueLiteralAST {walk.NewAtomStringTerminal()}, + // ) + // lhs = SubexASTOutput {replacement} case '.': - lhs = SubexASTCopyAny{} + if runic { + lhs = SubexASTCopyAnyRune{} + } else { + lhs = SubexASTCopyAnyValue{} + } case '?': lhs = SubexASTCopyBool{} case '%': lhs = SubexASTCopyNumber{} - case '_': - lhs = SubexASTCopyStringAtom{} - case '#': - lhs = SubexASTCopyString{} - case ',': - lhs = SubexASTCopyValue{} - case '"': - lhs = SubexASTCopyAtom {walk.NewAtomStringTerminal()} + case ':': + if runic { + lhs = SubexASTCopyRune {':'} + } else { + if !accept(l, "[") { + panic("Missing [ after :") + } + lhs = SubexASTEnterArray {parseSubex(l, 0, runic)} + if !accept(l, "]") { + panic("Missing matching ]") + } + } case '~': - literals := parseNonStringLiteral(l) - var replacement []OutputContentAST - for _, literal := range literals { - replacement = append(replacement, OutputAtomLiteralAST {literal}) + if runic { + lhs = SubexASTCopyRune {'~'} + } else { + if !accept(l, "\"") { + panic("Missing \" after ~") + } + lhs = SubexASTEnterString {parseSubex(l, 0, true)} + if !accept(l, "\"") { + panic("Missing matching \"") + } } - lhs = SubexASTOutput {replacement} + // TODO + // case '_': + // lhs = SubexASTCopyStringAtom{} + // case '#': + // lhs = SubexASTCopyString{} + // case ',': + // lhs = SubexASTCopyValue{} + // case '"': + // lhs = SubexASTCopyScalar {walk.NewAtomStringTerminal()} + // case '~': + // literals := parseNonStringLiteral(l) + // var replacement []OutputContentAST + // for _, literal := range literals { + // replacement = append(replacement, OutputValueLiteralAST {literal}) + // } + // lhs = SubexASTOutput {replacement} default: - lhs = SubexASTCopyAtom{Atom: walk.NewAtomStringRune(r)} + if runic { + lhs = SubexASTCopyRune {r} + } else { + panic("Tried to match rune outside of string") + } } loop: for { if minPower <= 20 { - next := parseSubex(l, 21) - if next != nil { + next := parseSubex(l, 21, runic) + if next != nil && (next != SubexASTEmpty{}) { lhs = SubexASTConcat{lhs, next} continue loop } @@ -366,20 +391,14 @@ func parseSubex(l RuneReader, minPower int) SubexAST { Slot: slot, } } - case r == ':' && minPower <= 4: - replacement := parseReplacement(l) - lhs = SubexASTConcat { - SubexASTDiscard {lhs}, - SubexASTOutput {replacement}, - } case r == '|' && minPower <= 8: - rhs := parseSubex(l, 9) + rhs := parseSubex(l, 9, runic) if rhs == nil { panic("Missing subex after |") } lhs = SubexASTOr{lhs, rhs} case r == ';' && minPower <= 10: - rhs := parseSubex(l, 11) + rhs := parseSubex(l, 11, runic) if rhs == nil { panic("Missing subex after ;") } @@ -396,7 +415,7 @@ func parseSubex(l RuneReader, minPower int) SubexAST { } func Parse(l RuneReader) SubexAST { - ast := parseSubex(l, 0) + ast := parseSubex(l, 0, false) if ast == nil { return SubexASTEmpty{} } diff --git a/subex/subexast.go b/subex/subexast.go index f4088fe..31c77ba 100644 --- a/subex/subexast.go +++ b/subex/subexast.go @@ -7,15 +7,15 @@ import ( // A node in the AST of a subex type SubexAST interface { - compileWith(next SubexState, slotMap *SlotMap) SubexState + compileWith(next SubexState, slotMap *SlotMap, runic bool) SubexState } // Process the first subex, then the second, splitting the input text in two type SubexASTConcat struct { First, Second SubexAST } -func (ast SubexASTConcat) compileWith(next SubexState, slotMap *SlotMap) SubexState { - return ast.First.compileWith(ast.Second.compileWith(next, slotMap), slotMap) +func (ast SubexASTConcat) compileWith(next SubexState, slotMap *SlotMap, runic bool) SubexState { + return ast.First.compileWith(ast.Second.compileWith(next, slotMap, runic), slotMap, runic) } func (ast SubexASTConcat) String() string { return fmt.Sprintf("(%v)(%v)", ast.First, ast.Second) @@ -26,13 +26,21 @@ type SubexASTStore struct { Match SubexAST Slot rune } -func (ast SubexASTStore) compileWith(next SubexState, slotMap *SlotMap) SubexState { +func (ast SubexASTStore) compileWith(next SubexState, slotMap *SlotMap, runic bool) SubexState { id := slotMap.getId(ast.Slot) - return &SubexCaptureBeginState { - next: ast.Match.compileWith(&SubexStoreEndState { - slot: id, - next: next, - }, slotMap), + newNext := ast.Match.compileWith(&SubexStoreEndState { + slot: id, + next: next, + }, slotMap, runic) + + if !runic { + return &SubexCaptureBeginState { + next: newNext, + } + } else { + return &SubexCaptureRunesBeginState { + next: newNext, + } } } func (ast SubexASTStore) String() string { @@ -43,10 +51,10 @@ func (ast SubexASTStore) String() string { type SubexASTOr struct { First, Second SubexAST } -func (ast SubexASTOr) compileWith(next SubexState, slotMap *SlotMap) SubexState { +func (ast SubexASTOr) compileWith(next SubexState, slotMap *SlotMap, runic bool) SubexState { return &SubexGroupState { - ast.First.compileWith(next, slotMap), - ast.Second.compileWith(next, slotMap), + ast.First.compileWith(next, slotMap, runic), + ast.Second.compileWith(next, slotMap, runic), } } func (ast SubexASTOr) String() string { @@ -76,19 +84,19 @@ func (cr ConvexRange) decrement() ConvexRange { return ConvexRange{cr.Start - 1, cr.End - 1} } } -func (cr ConvexRange) compile(content SubexAST, next SubexState, slotMap *SlotMap) SubexState { +func (cr ConvexRange) compile(content SubexAST, next SubexState, slotMap *SlotMap, runic bool) SubexState { min, _ := cr.minmax() if min != 0 { - return content.compileWith(cr.decrement().compile(content, next, slotMap), slotMap) + return content.compileWith(cr.decrement().compile(content, next, slotMap, runic), slotMap, runic) } if cr.Start == -1 { state := &SubexGroupState {nil, next} - state.first = content.compileWith(state, slotMap) + state.first = content.compileWith(state, slotMap, runic) return state } if cr.End == -1 { state := &SubexGroupState {next, nil} - state.second = content.compileWith(state, slotMap) + state.second = content.compileWith(state, slotMap, runic) return state } @@ -96,7 +104,7 @@ func (cr ConvexRange) compile(content SubexAST, next SubexState, slotMap *SlotMa state := next; for i := 0; i < cr.Start; i += 1 { state = &SubexGroupState { - content.compileWith(state, slotMap), + content.compileWith(state, slotMap, runic), next, } } @@ -106,7 +114,7 @@ func (cr ConvexRange) compile(content SubexAST, next SubexState, slotMap *SlotMa for i := 0; i < cr.End; i += 1 { state = &SubexGroupState { next, - content.compileWith(state, slotMap), + content.compileWith(state, slotMap, runic), } } return state @@ -119,10 +127,10 @@ type SubexASTRepeat struct { Content SubexAST Acceptable []ConvexRange } -func (ast SubexASTRepeat) compileWith(next SubexState, slotMap *SlotMap) SubexState { +func (ast SubexASTRepeat) compileWith(next SubexState, slotMap *SlotMap, runic bool) SubexState { var state SubexState = &SubexDeadState{} for _, convex := range ast.Acceptable { - state = &SubexGroupState {state, convex.compile(ast.Content, next, slotMap)} + state = &SubexGroupState {state, convex.compile(ast.Content, next, slotMap, runic)} } return state } @@ -131,23 +139,44 @@ func (ast SubexASTRepeat) String() string { } // Read in a single specific Atom and output it unchanged -type SubexASTCopyAtom struct { - Atom walk.Atom +type SubexASTCopyScalar struct { + Scalar walk.Scalar } -func (ast SubexASTCopyAtom) compileWith(next SubexState, slotMap *SlotMap) SubexState { - return &SubexCopyAtomState{ - atom: ast.Atom, +func (ast SubexASTCopyScalar) compileWith(next SubexState, slotMap *SlotMap, runic bool) SubexState { + return &SubexCopyState{ + filter: selectScalarFilter {ast.Scalar}, next: next, } } -func (ast SubexASTCopyAtom) String() string { +func (ast SubexASTCopyScalar) String() string { return fmt.Sprintf("a") } +type SubexASTCopyAnyRune struct {} +func (ast SubexASTCopyAnyRune) compileWith(next SubexState, slotMap *SlotMap, runic bool) SubexState { + return &SubexCopyRuneState { + next: next, + filter: anyRuneFilter{}, + } +} + +type SubexASTCopyRune struct { + rune rune +} +func (ast SubexASTCopyRune) compileWith(next SubexState, slotMap *SlotMap, runic bool) SubexState { + return &SubexCopyRuneState { + next: next, + filter: selectRuneFilter {ast.rune}, + } +} + // Read in a single atom that must be a boolean and output it unchanged type SubexASTCopyBool struct {} -func (ast SubexASTCopyBool) compileWith(next SubexState, slotMap *SlotMap) SubexState { - return &SubexCopyBoolState {next} +func (ast SubexASTCopyBool) compileWith(next SubexState, slotMap *SlotMap, runic bool) SubexState { + return &SubexCopyState { + next: next, + filter: anyBoolFilter{}, + } } func (ast SubexASTCopyBool) String() string { return "?" @@ -155,65 +184,50 @@ func (ast SubexASTCopyBool) String() string { // Read in a single atom that must be a number and output it unchanged type SubexASTCopyNumber struct {} -func (ast SubexASTCopyNumber) compileWith(next SubexState, slotMap *SlotMap) SubexState { - return &SubexCopyNumberState {next} +func (ast SubexASTCopyNumber) compileWith(next SubexState, slotMap *SlotMap, runic bool) SubexState { + return &SubexCopyState { + next: next, + filter: anyNumberFilter{}, + } } func (ast SubexASTCopyNumber) String() string { return "%" } -// Read in a single atom that must be a string atom and output it unchanged -type SubexASTCopyStringAtom struct {} -func (ast SubexASTCopyStringAtom) compileWith(next SubexState, slotMap *SlotMap) SubexState { - return &SubexCopyStringAtomState {next} -} -func (ast SubexASTCopyStringAtom) String() string { - return "_" -} - // Read in a full string value and copy it out unchanged // # is equivalent to "_{-0}" -type SubexASTCopyString struct {} -func (ast SubexASTCopyString) compileWith(next SubexState, slotMap *SlotMap) SubexState { - stringAtomState := &SubexCopyStringAtomState { - next: nil, - } - stringContentState := &SubexGroupState { - &SubexCopyAtomState { - atom: walk.NewAtomStringTerminal(), - next: next, - }, - stringAtomState, - } - stringAtomState.next = stringContentState - return &SubexCopyAtomState { - atom: walk.NewAtomStringTerminal(), - next: stringContentState, - } -} -func (ast SubexASTCopyString) String() string { - return "#" -} - -// Read in a non-terminal value and copy it out unchanged -// , is equivalent to `null`|?|%|# -type SubexASTCopyValue struct {} -func (ast SubexASTCopyValue) compileWith(next SubexState, slotMap *SlotMap) SubexState { - return &SubexGroupState { - SubexASTCopyString{}.compileWith(next, slotMap), - &SubexCopyNonStringNonTerminalAtomState {next}, - } -} -func (ast SubexASTCopyValue) String() string { - return "," -} +// TODO +// type SubexASTCopyString struct {} +// func (ast SubexASTCopyString) compileWith(next SubexState, slotMap *SlotMap) SubexState { +// stringAtomState := &SubexCopyStringAtomState { +// next: nil, +// } +// stringContentState := &SubexGroupState { +// &SubexCopyScalarState { +// scalar: walk.NewAtomStringTerminal(), +// next: next, +// }, +// stringAtomState, +// } +// stringAtomState.next = stringContentState +// return &SubexCopyScalarState { +// scalar: walk.NewAtomStringTerminal(), +// next: stringContentState, +// } +// } +// func (ast SubexASTCopyString) String() string { +// return "#" +// } // Read in any single Atom and output it unchanged -type SubexASTCopyAny struct {} -func (ast SubexASTCopyAny) compileWith(next SubexState, slotMap *SlotMap) SubexState { - return &SubexCopyAnyState{next} +type SubexASTCopyAnyValue struct {} +func (ast SubexASTCopyAnyValue) compileWith(next SubexState, slotMap *SlotMap, runic bool) SubexState { + return &SubexCopyState { + next: next, + filter: anyValueFilter{}, + } } -func (ast SubexASTCopyAny) String() string { +func (ast SubexASTCopyAnyValue) String() string { return "." } @@ -228,10 +242,10 @@ func (ast OutputLoadAST) compile(slotMap *SlotMap) OutputContent { return OutputLoad {slotMap.getId(ast.slot)} } -type OutputAtomLiteralAST struct { - atom walk.Atom +type OutputValueLiteralAST struct { + atom walk.Value } -func (ast OutputAtomLiteralAST) compile(slotMap *SlotMap) OutputContent { +func (ast OutputValueLiteralAST) compile(slotMap *SlotMap) OutputContent { return OutputAtomLiteral {ast.atom} } @@ -239,7 +253,7 @@ func (ast OutputAtomLiteralAST) compile(slotMap *SlotMap) OutputContent { type SubexASTOutput struct { Replacement []OutputContentAST } -func (ast SubexASTOutput) compileWith(next SubexState, slotMap *SlotMap) SubexState { +func (ast SubexASTOutput) compileWith(next SubexState, slotMap *SlotMap, runic bool) SubexState { var content []OutputContent for _, el := range ast.Replacement { content = append(content, el.compile(slotMap)) @@ -257,13 +271,13 @@ func (ast SubexASTOutput) String() string { type SubexASTJoin struct { Content, Delimiter SubexAST } -func (ast SubexASTJoin) compileWith(next SubexState, slotMap *SlotMap) SubexState { +func (ast SubexASTJoin) compileWith(next SubexState, slotMap *SlotMap, runic bool) SubexState { afterContentState := &SubexGroupState { nil, next, } - manyContentsState := ast.Content.compileWith(afterContentState, slotMap) - afterContentState.first = ast.Delimiter.compileWith(manyContentsState, slotMap) + manyContentsState := ast.Content.compileWith(afterContentState, slotMap, runic) + afterContentState.first = ast.Delimiter.compileWith(manyContentsState, slotMap, runic) return &SubexGroupState { manyContentsState, next, @@ -275,30 +289,30 @@ func (ast SubexASTJoin) String() string { // Run each input Atom through a map to produce an output Atom // Atoms not in the map cause this to not match -type SubexASTRange struct { - Parts map[walk.Atom]walk.Atom -} -func (ast SubexASTRange) compileWith(next SubexState, slotMap *SlotMap) SubexState { - return &SubexRangeState { - parts: ast.Parts, - next: next, - } -} -func (ast SubexASTRange) String() string { - return fmt.Sprintf("[abc=xyz]") -} +// type SubexASTRange struct { +// Parts map[walk.Atom]walk.Atom +// } +// func (ast SubexASTRange) compileWith(next SubexState, slotMap *SlotMap) SubexState { +// return &SubexRangeState { +// parts: ast.Parts, +// next: next, +// } +// } +// func (ast SubexASTRange) String() string { +// return fmt.Sprintf("[abc=xyz]") +// } // Run content, if content is a list of booleans, OR them, if all values are castable to numbers, sum them and output the total // Reject if neither of these cases match type SubexASTSum struct { Content SubexAST } -func (ast SubexASTSum) compileWith(next SubexState, slotMap *SlotMap) SubexState { +func (ast SubexASTSum) compileWith(next SubexState, slotMap *SlotMap, runic bool) SubexState { return &SubexCaptureBeginState { next: ast.Content.compileWith(&SubexArithmeticEndState { next: next, calculate: sumValues, - }, slotMap), + }, slotMap, runic), } } func (ast SubexASTSum) String() string { @@ -309,12 +323,12 @@ func (ast SubexASTSum) String() string { type SubexASTProduct struct { Content SubexAST } -func (ast SubexASTProduct) compileWith(next SubexState, slotMap *SlotMap) SubexState { +func (ast SubexASTProduct) compileWith(next SubexState, slotMap *SlotMap, runic bool) SubexState { return &SubexCaptureBeginState { next: ast.Content.compileWith(&SubexArithmeticEndState { next: next, calculate: multiplyValues, - }, slotMap), + }, slotMap, runic), } } func (ast SubexASTProduct) String() string { @@ -326,12 +340,12 @@ func (ast SubexASTProduct) String() string { type SubexASTNegate struct { Content SubexAST } -func (ast SubexASTNegate) compileWith(next SubexState, slotMap *SlotMap) SubexState { +func (ast SubexASTNegate) compileWith(next SubexState, slotMap *SlotMap, runic bool) SubexState { return &SubexCaptureBeginState { next: ast.Content.compileWith(&SubexArithmeticEndState { next: next, calculate: negateValues, - }, slotMap), + }, slotMap, runic), } } func (ast SubexASTNegate) String() string { @@ -344,12 +358,12 @@ func (ast SubexASTNegate) String() string { type SubexASTReciprocal struct { Content SubexAST } -func (ast SubexASTReciprocal) compileWith(next SubexState, slotMap *SlotMap) SubexState { +func (ast SubexASTReciprocal) compileWith(next SubexState, slotMap *SlotMap, runic bool) SubexState { return &SubexCaptureBeginState { next: ast.Content.compileWith(&SubexArithmeticEndState { next: next, calculate: reciprocalValues, - }, slotMap), + }, slotMap, runic), } } func (ast SubexASTReciprocal) String() string { @@ -362,12 +376,12 @@ func (ast SubexASTReciprocal) String() string { type SubexASTNot struct { Content SubexAST } -func (ast SubexASTNot) compileWith(next SubexState, slotMap *SlotMap) SubexState { +func (ast SubexASTNot) compileWith(next SubexState, slotMap *SlotMap, runic bool) SubexState { return &SubexCaptureBeginState { next: ast.Content.compileWith(&SubexArithmeticEndState { next: next, calculate: notValues, - }, slotMap), + }, slotMap, runic), } } func (ast SubexASTNot) String() string { @@ -376,7 +390,7 @@ func (ast SubexASTNot) String() string { // Does nothing type SubexASTEmpty struct {} -func (ast SubexASTEmpty) compileWith(next SubexState, slotMap *SlotMap) SubexState { +func (ast SubexASTEmpty) compileWith(next SubexState, slotMap *SlotMap, runic bool) SubexState { return next } func (ast SubexASTEmpty) String() string { @@ -387,11 +401,73 @@ func (ast SubexASTEmpty) String() string { type SubexASTDiscard struct { Content SubexAST } -func (ast SubexASTDiscard) compileWith(next SubexState, slotMap *SlotMap) SubexState { - return &SubexCaptureBeginState { - next: ast.Content.compileWith(&SubexDiscardState {next}, slotMap), +func (ast SubexASTDiscard) compileWith(next SubexState, slotMap *SlotMap, runic bool) SubexState { + newNext := ast.Content.compileWith(&SubexDiscardState {next}, slotMap, runic) + if !runic { + return &SubexCaptureBeginState { + next: newNext, + } + } else { + return &SubexCaptureRunesBeginState { + next: newNext, + } } } func (ast SubexASTDiscard) String() string { return fmt.Sprintf("(%v)$_", ast.Content) } + +// Go into an array, pass the content each of the values in the array to eat and then leave the array +type SubexASTEnterArray struct { + Content SubexAST +} +func (ast SubexASTEnterArray) compileWith(next SubexState, slotMap *SlotMap, runic bool) SubexState { + return &SubexCaptureBeginState { + next: &SubexIncrementNestState { + next: &SubexCopyState { + filter: anyArrayFilter{}, + next: &SubexDiscardState { + next: &SubexCaptureBeginState { + next: ast.Content.compileWith( + &SubexDiscardTerminalState { + terminal: walk.ArrayEndTerminal{}, + next: &SubexDecrementNestState { + next: &SubexConstructArrayState {next: next}, + }, + }, + slotMap, + runic, + ), + }, + }, + }, + }, + } +} + +type SubexASTEnterString struct { + Content SubexAST +} +func (ast SubexASTEnterString) compileWith(next SubexState, slotMap *SlotMap, runic bool) SubexState { + return &SubexCaptureBeginState { + next: &SubexIncrementNestState { + next: &SubexCopyState { + filter: anyStringFilter{}, + next: &SubexDiscardState { + next: &SubexCaptureRunesBeginState { + next: ast.Content.compileWith( + &SubexDecrementNestState { + next: &SubexDiscardTerminalState { + terminal: walk.StringEndTerminal{}, + next: &SubexConstructStringState {next: next}, + }, + }, + slotMap, + true, + ), + }, + }, + }, + }, + } +} diff --git a/subex/subexast_test.go b/subex/subexast_test.go new file mode 100644 index 0000000..156162e --- /dev/null +++ b/subex/subexast_test.go @@ -0,0 +1,19 @@ +package subex + +import ( + "testing" +) + +func expectASTEqual(t *testing.T, ast SubexAST, expected SubexAST) { + if ast == expected { + return + } + + t.Fatalf("Expected %v, found %v", expected, ast) +} + +func expectAST(t *testing.T, subex string, expected SubexAST) { + lexer := NewStringRuneReader(subex) + ast := Parse(lexer) + expectASTEqual(t, ast, expected) +} diff --git a/subex/subexstate.go b/subex/subexstate.go index 7ecff0c..7ffd592 100644 --- a/subex/subexstate.go +++ b/subex/subexstate.go @@ -1,5 +1,8 @@ package subex +// TODO: Simplify this implementation by combining similar states into one type +// e.g. Combine all of the copy states into a single type that has a filter function + import ( "main/walk" ) @@ -7,21 +10,58 @@ import ( // A state of execution for the transducer type SubexState interface { // Eat a Atom and transition to any number of new states - eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch + eat(aux auxiliaryState, char walk.Edible) []SubexBranch // Find accepting states reachable through epsilon transitions and return their outputs - accepting(store Store, outputStack OutputStack) []OutputStack + accepting(aux auxiliaryState) []OutputStack } // Try first, if it fails then try second type SubexGroupState struct { first, second SubexState } -func (state SubexGroupState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { - otherStore := store.clone() - return append(state.first.eat(store, outputStack, char), state.second.eat(otherStore, outputStack, char)...) +func (state SubexGroupState) eat(aux auxiliaryState, char walk.Edible) []SubexBranch { + otherAux := aux.cloneStore() + return append(state.first.eat(aux, char), state.second.eat(otherAux, char)...) +} +func (state SubexGroupState) accepting(aux auxiliaryState) []OutputStack { + otherAux := aux.cloneStore() + return append(state.first.accepting(aux), state.second.accepting(otherAux)...) +} + +type SubexCopyState struct { + next SubexState + filter valueFilter +} +func (state SubexCopyState) eat(aux auxiliaryState, edible walk.Edible) []SubexBranch { + value, isValue := edible.(walk.Value) + if !isValue || !state.filter.valueFilter(value) { + return nil + } + return []SubexBranch{{ + state: state.next, + aux: aux.topAppend(walk.ValueList{value}), + }} +} +func (state SubexCopyState) accepting(aux auxiliaryState) []OutputStack { + return nil +} + +type SubexCopyRuneState struct { + next SubexState + filter runeFilter +} +func (state SubexCopyRuneState) eat(aux auxiliaryState, edible walk.Edible) []SubexBranch { + r, isRune := edible.(walk.StringRuneAtom) + if !isRune || !state.filter.runeFilter(r) { + return nil + } + return []SubexBranch{{ + state: state.next, + aux: aux.topAppend(walk.RuneList{r}), + }} } -func (state SubexGroupState) accepting(store Store, outputStack OutputStack) []OutputStack { - return append(state.first.accepting(store, outputStack), state.second.accepting(store, outputStack)...) +func (state SubexCopyRuneState) accepting(aux auxiliaryState) []OutputStack { + return nil } // Just pushes to the OutputStack and hands over to the next state @@ -29,24 +69,37 @@ func (state SubexGroupState) accepting(store Store, outputStack OutputStack) []O type SubexCaptureBeginState struct { next SubexState } -func (state SubexCaptureBeginState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { - return state.next.eat(store, outputStack.push(nil), char) +func (state SubexCaptureBeginState) eat(aux auxiliaryState, char walk.Edible) []SubexBranch { + return state.next.eat(aux.pushOutput(walk.ValueList{}), char) +} +func (state SubexCaptureBeginState) accepting(aux auxiliaryState) []OutputStack { + return state.next.accepting(aux.pushOutput(walk.ValueList{})) } -func (state SubexCaptureBeginState) accepting(store Store, outputStack OutputStack) []OutputStack { - return state.next.accepting(store, outputStack.push(nil)) +func (state SubexCaptureBeginState) String() string { + return "CaptureBeginState" +} + +type SubexCaptureRunesBeginState struct { + next SubexState +} +func (state SubexCaptureRunesBeginState) eat(aux auxiliaryState, char walk.Edible) []SubexBranch { + return state.next.eat(aux.pushOutput(walk.RuneList{}), char) +} +func (state SubexCaptureRunesBeginState) accepting(aux auxiliaryState) []OutputStack { + return state.next.accepting(aux.pushOutput(walk.RuneList{})) } // Discard the top of the OutputStack type SubexDiscardState struct { next SubexState } -func (state SubexDiscardState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { - _, newStack := outputStack.pop() - return state.next.eat(store, newStack, char) +func (state SubexDiscardState) eat(aux auxiliaryState, char walk.Edible) []SubexBranch { + _, newAux := aux.popOutput() + return state.next.eat(newAux, char) } -func (state SubexDiscardState) accepting(store Store, outputStack OutputStack) []OutputStack { - _, newStack := outputStack.pop() - return state.next.accepting(store, newStack) +func (state SubexDiscardState) accepting(aux auxiliaryState) []OutputStack { + _, newAux := aux.popOutput() + return state.next.accepting(newAux) } // Pop the top of the OutputStack which contains the stuff outputted since the start of the store @@ -55,35 +108,41 @@ type SubexStoreEndState struct { slot int next SubexState } -func (state SubexStoreEndState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { - toStore, newStack := outputStack.pop() - return state.next.eat(store.withValue(state.slot, toStore), newStack, char) +func (state SubexStoreEndState) eat(aux auxiliaryState, char walk.Edible) []SubexBranch { + toStore, aux := aux.popOutput() + aux = aux.withValue(state.slot, toStore) + return state.next.eat(aux, char) } -func (state SubexStoreEndState) accepting(store Store, outputStack OutputStack) []OutputStack { - toStore, newStack := outputStack.pop() - return state.next.accepting(store.withValue(state.slot, toStore), newStack) +func (state SubexStoreEndState) accepting(aux auxiliaryState) []OutputStack { + toStore, aux := aux.popOutput() + aux = aux.withValue(state.slot, toStore) + return state.next.accepting(aux) } // A part of an output literal, either an Atom or a slot from which to load type OutputContent interface { // Given the current store, return the []Atom produced by the TransducerOutput - build(Store) []walk.Atom + build(Store) walk.ValueList } // An OutputContent which is just an Atom literal type OutputAtomLiteral struct { - atom walk.Atom + atom walk.Value } -func (replacement OutputAtomLiteral) build(store Store) []walk.Atom { - return []walk.Atom{replacement.atom} +func (replacement OutputAtomLiteral) build(store Store) walk.ValueList { + return walk.ValueList{replacement.atom} } // An OutputContent which is a slot that is loaded from type OutputLoad struct { slot int } -func (replacement OutputLoad) build(store Store) []walk.Atom { - return store[replacement.slot] +func (replacement OutputLoad) build(store Store) walk.ValueList { + values, isValues := store[replacement.slot].(walk.ValueList) + if !isValues { + panic("Tried to output non-values list") + } + return values } // Don't read in anything, just output the series of data and slots specified @@ -92,189 +151,176 @@ type SubexOutputState struct { next SubexState } // Given a store, return what is outputted by an epsilon transition from this state -func (state SubexOutputState) build(store Store) []walk.Atom { - var result []walk.Atom +// TODO: separate into buildValues and buildRunes +func (state SubexOutputState) build(store Store) walk.ValueList { + var result walk.ValueList for _, part := range state.content { result = append(result, part.build(store)...) } return result } -func (state SubexOutputState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { - content := state.build(store) - nextStates := state.next.eat(store, topAppend(outputStack, content), char) +func (state SubexOutputState) eat(aux auxiliaryState, char walk.Edible) []SubexBranch { + content := state.build(aux.store) + nextStates := state.next.eat(aux.topAppend(content), char) return nextStates } -func (state SubexOutputState) accepting(store Store, outputStack OutputStack) []OutputStack { - content := state.build(store) - outputStacks := state.next.accepting(store, topAppend(outputStack, content)) +func (state SubexOutputState) accepting(aux auxiliaryState) []OutputStack { + content := state.build(aux.store) + outputStacks := state.next.accepting(aux.topAppend(content)) return outputStacks } // A final state, transitions to nothing but is accepting type SubexNoneState struct {} -func (state SubexNoneState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { +func (state SubexNoneState) eat(aux auxiliaryState, char walk.Edible) []SubexBranch { return nil } -func (state SubexNoneState) accepting(store Store, outputStack OutputStack) []OutputStack { - return []OutputStack{outputStack} +func (state SubexNoneState) accepting(aux auxiliaryState) []OutputStack { + return []OutputStack{aux.outputStack} } // A dead end state, handy for making internals work nicer but technically redundant type SubexDeadState struct {} -func (state SubexDeadState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { +func (state SubexDeadState) eat(aux auxiliaryState, char walk.Edible) []SubexBranch { return nil } -func (state SubexDeadState) accepting (store Store, outputStack OutputStack) []OutputStack { +func (state SubexDeadState) accepting (aux auxiliaryState) []OutputStack { return nil } -// Read in a specific Atom and output it -type SubexCopyAtomState struct { - atom walk.Atom - next SubexState -} -func (state SubexCopyAtomState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { - // TODO can I compare Atom values with == ? - if char == state.atom { - return []SubexBranch{{ - state: state.next, - outputStack: topAppend(outputStack, []walk.Atom{char}), - store: store, - }} - } - return nil -} -func (state SubexCopyAtomState) accepting(store Store, outputStack OutputStack) []OutputStack { - return nil -} +// Read in an Atom and apply a map to generate an Atom to output +// If the input isn't in the map transition to nothing +// TODO +// type SubexRangeState struct { +// parts map[walk.Atom]walk.Atom +// next SubexState +// } +// func (state SubexRangeState) eat(aux auxiliaryState, char walk.Atom) []SubexBranch { +// out, exists := state.parts[char] +// if !exists { +// return nil +// } else { +// return []SubexBranch{{ +// state: state.next, +// outputStack: topAppend(outputStack, []walk.Atom{out}), +// store: store, +// }} +// } +// } +// func (state SubexRangeState) accepting(aux auxiliaryState) []OutputStack { +// return nil +// } -// Read in a boolean atom and output it -type SubexCopyBoolState struct { - next SubexState -} -func (state SubexCopyBoolState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { - if char.Typ == walk.AtomBool { - return []SubexBranch{{ - state: state.next, - outputStack: topAppend(outputStack, []walk.Atom{char}), - store: store, - }} - } - return nil -} -func (state SubexCopyBoolState) accepting(store Store, outputStack OutputStack) []OutputStack { - return nil -} -// Read in a number atom and output it -type SubexCopyNumberState struct { +type SubexArithmeticEndState struct { next SubexState + calculate func(walk.ValueList) (walk.ValueList, error) } -func (state SubexCopyNumberState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { - if char.Typ == walk.AtomNumber { - return []SubexBranch{{ - state: state.next, - outputStack: topAppend(outputStack, []walk.Atom{char}), - store: store, - }} +func (state SubexArithmeticEndState) eat(aux auxiliaryState, char walk.Edible) []SubexBranch { + toCompute, aux := aux.popOutput() + values, isValues := toCompute.(walk.ValueList) + if !isValues { + panic("Tried to do arithmetic on non-values") } - return nil -} -func (state SubexCopyNumberState) accepting(store Store, outputStack OutputStack) []OutputStack { - return nil -} - -// Read in a string atom and output it -type SubexCopyStringAtomState struct { - next SubexState -} -func (state SubexCopyStringAtomState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { - if char.Typ == walk.AtomStringRune { - return []SubexBranch{{ - state: state.next, - outputStack: topAppend(outputStack, []walk.Atom{char}), - store: store, - }} + result, err := state.calculate(values) + if err != nil { + return nil } - return nil + return state.next.eat(aux.topAppend(result), char) } -func (state SubexCopyStringAtomState) accepting(store Store, outputStack OutputStack) []OutputStack { - return nil +func (state SubexArithmeticEndState) accepting(aux auxiliaryState) []OutputStack { + toCompute, aux := aux.popOutput() + values, isValues := toCompute.(walk.ValueList) + if !isValues { + panic("Tried to do arithmetic on non-values") + } + result, err := state.calculate(values) + if err != nil { + return nil + } + return state.next.accepting(aux.topAppend(result)) } -// Read in an atom and copy it out as long as it is not part of a string -type SubexCopyNonStringNonTerminalAtomState struct { +type SubexDiscardTerminalState struct { + terminal walk.Terminal next SubexState } -func (state SubexCopyNonStringNonTerminalAtomState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { - if char.Typ == walk.AtomStringRune || char.Typ == walk.AtomStringTerminal || char.Typ == walk.AtomTerminal { +func (state SubexDiscardTerminalState) eat(aux auxiliaryState, edible walk.Edible) []SubexBranch { + if edible != state.terminal { return nil } return []SubexBranch{{ state: state.next, - outputStack: topAppend(outputStack, []walk.Atom{char}), - store: store, + aux: aux, }} } -func (state SubexCopyNonStringNonTerminalAtomState) accepting(store Store, outputStack OutputStack) []OutputStack { +func (state SubexDiscardTerminalState) accepting(aux auxiliaryState) []OutputStack { return nil } -// Read in any Atom and output it -type SubexCopyAnyState struct { +type SubexConstructArrayState struct { next SubexState } -func (state SubexCopyAnyState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { - return []SubexBranch{{ - state: state.next, - outputStack: topAppend(outputStack, []walk.Atom{char}), - store: store, - }} -} -func (state SubexCopyAnyState) accepting(store Store, outputStack OutputStack) []OutputStack { - return nil +func (state SubexConstructArrayState) eat(aux auxiliaryState, edible walk.Edible) []SubexBranch { + outputs, aux := aux.popOutput() + values, isValues := outputs.(walk.ValueList) + if !isValues { + panic("Tried to create an array from non-values") + } + array := walk.ArrayStructure(values) + return state.next.eat(aux.topAppend(walk.ValueList{array}), edible) +} +func (state SubexConstructArrayState) accepting(aux auxiliaryState) []OutputStack { + outputs, aux := aux.popOutput() + values, isValues := outputs.(walk.ValueList) + if !isValues { + panic("Tried to create an array from non-values") + } + array := walk.ArrayStructure(values) + return state.next.accepting(aux.topAppend(walk.ValueList{array})) } -// Read in an Atom and apply a map to generate an Atom to output -// If the input isn't in the map transition to nothing -type SubexRangeState struct { - parts map[walk.Atom]walk.Atom +type SubexConstructStringState struct { next SubexState } -func (state SubexRangeState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { - out, exists := state.parts[char] - if !exists { - return nil - } else { - return []SubexBranch{{ - state: state.next, - outputStack: topAppend(outputStack, []walk.Atom{out}), - store: store, - }} +func (state SubexConstructStringState) eat(aux auxiliaryState, edible walk.Edible) []SubexBranch { + outputs, aux := aux.popOutput() + runes, isRunes := outputs.(walk.RuneList) + if !isRunes { + panic("Tried to create a string from non-runes") } -} -func (state SubexRangeState) accepting(store Store, outputStack OutputStack) []OutputStack { - return nil + s := walk.StringStructure(runes) + return state.next.eat(aux.topAppend(walk.ValueList{s}), edible) +} +func (state SubexConstructStringState) accepting(aux auxiliaryState) []OutputStack { + outputs, aux := aux.popOutput() + runes, isRunes := outputs.(walk.RuneList) + if !isRunes { + panic("Tried to create a string from non-runes") + } + s := walk.StringStructure(runes) + return state.next.accepting(aux.topAppend(walk.ValueList{s})) } +type SubexIncrementNestState struct { + next SubexState +} +func (state SubexIncrementNestState) eat(aux auxiliaryState, edible walk.Edible) []SubexBranch { + return state.next.eat(aux.incNest(), edible) +} +func (state SubexIncrementNestState) accepting(aux auxiliaryState) []OutputStack { + return state.next.accepting(aux.incNest()) +} +func (state SubexIncrementNestState) String() string { + return "IncrementNestState" +} -type SubexArithmeticEndState struct { +type SubexDecrementNestState struct { next SubexState - calculate func([]walk.Atom) ([]walk.Atom, error) } -func (state SubexArithmeticEndState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { - toCompute, newStack := outputStack.pop() - result, err := state.calculate(toCompute) - if err != nil { - return nil - } - return state.next.eat(store, topAppend(newStack, result), char) +func (state SubexDecrementNestState) eat(aux auxiliaryState, edible walk.Edible) []SubexBranch { + return state.next.eat(aux.decNest(), edible) } -func (state SubexArithmeticEndState) accepting(store Store, outputStack OutputStack) []OutputStack { - toCompute, newStack := outputStack.pop() - result, err := state.calculate(toCompute) - if err != nil { - return nil - } - return state.next.accepting(store, topAppend(newStack, result)) +func (state SubexDecrementNestState) accepting(aux auxiliaryState) []OutputStack { + return state.next.accepting(aux.decNest()) } diff --git a/subex/subexstate_test.go b/subex/subexstate_test.go new file mode 100644 index 0000000..4eb8969 --- /dev/null +++ b/subex/subexstate_test.go @@ -0,0 +1,4 @@ +package subex + +import ( +) diff --git a/walk/atom.go b/walk/atom.go index 299c39d..471f030 100644 --- a/walk/atom.go +++ b/walk/atom.go @@ -14,78 +14,78 @@ const ( AtomStringTerminal AtomStringRune ) -type Atom struct { +type AtomOLD struct { Typ AtomType data uint64 } -func NewAtomNull() Atom { - return Atom { +func NewAtomNull() AtomOLD { + return AtomOLD { Typ: AtomNull, data: 0, } } -func NewAtomBool(v bool) Atom { +func NewAtomBool(v bool) AtomOLD { if v { - return Atom { + return AtomOLD { Typ: AtomBool, data: 1, } } else { - return Atom { + return AtomOLD { Typ: AtomBool, data: 0, } } } -func (v Atom) Bool() bool { +func (v AtomOLD) Bool() bool { if v.Typ != AtomBool { panic("Tried to use non-bool as bool") } return v.data == 1 } -func NewAtomNumber(v float64) Atom { - return Atom { +func NewAtomNumber(v float64) AtomOLD { + return AtomOLD { Typ: AtomNumber, data: math.Float64bits(v), } } -func (v Atom) Number() float64 { +func (v AtomOLD) Number() float64 { if v.Typ != AtomNumber { panic("Tried to use non-number as number") } return math.Float64frombits(v.data) } -func NewAtomTerminal(v ValueTerminal) Atom { - return Atom { +func NewAtomTerminal(v ValueTerminal) AtomOLD { + return AtomOLD { Typ: AtomTerminal, data: uint64(v), } } -func (v Atom) Terminal() ValueTerminal { +func (v AtomOLD) Terminal() ValueTerminal { if v.Typ != AtomTerminal { panic("Tried to use non-terminal as terminal") } return ValueTerminal(v.data) } -func NewAtomStringTerminal() Atom { - return Atom { +func NewAtomStringTerminal() AtomOLD { + return AtomOLD { Typ: AtomStringTerminal, data: 0, } } -func NewAtomStringRune(v rune) Atom { - return Atom { +func NewAtomStringRune(v rune) AtomOLD { + return AtomOLD { Typ: AtomStringRune, data: uint64(v), } } -func (v Atom) StringRune() rune { +func (v AtomOLD) StringRune() rune { if v.Typ != AtomStringRune { panic("Tried to use non-stringrune as stringrune") } return rune(v.data) } -func (v Atom) String() string { +func (v AtomOLD) String() string { switch v.Typ { case AtomNull: return "null" diff --git a/walk/value.go b/walk/value.go index 2e2c3c9..4459d89 100644 --- a/walk/value.go +++ b/walk/value.go @@ -11,7 +11,7 @@ const ( MapBegin MapEnd ) -func (value ValueTerminal) Atomise(in []Atom) []Atom { +func (value ValueTerminal) Atomise(in []AtomOLD) []AtomOLD { return append(in, NewAtomTerminal(value)) } func (value ValueTerminal) String() string { @@ -30,7 +30,7 @@ func (value ValueTerminal) String() string { } type ValueNull struct {} -func (value ValueNull) Atomise(in []Atom) []Atom { +func (value ValueNull) Atomise(in []AtomOLD) []AtomOLD { return append(in, NewAtomNull()) } func (value ValueNull) String() string { @@ -38,7 +38,7 @@ func (value ValueNull) String() string { } type ValueBool bool -func (value ValueBool) Atomise(in []Atom) []Atom { +func (value ValueBool) Atomise(in []AtomOLD) []AtomOLD { return append(in, NewAtomBool(bool(value))) } func (value ValueBool) String() string { @@ -50,7 +50,7 @@ func (value ValueBool) String() string { } type ValueNumber float64 -func (value ValueNumber) Atomise(in []Atom) []Atom { +func (value ValueNumber) Atomise(in []AtomOLD) []AtomOLD { return append(in, NewAtomNumber(float64(value))) } func (value ValueNumber) String() string { @@ -59,7 +59,7 @@ func (value ValueNumber) String() string { } type ValueString string -func (value ValueString) Atomise(in []Atom) []Atom { +func (value ValueString) Atomise(in []AtomOLD) []AtomOLD { in = append(in, NewAtomStringTerminal()) for _, char := range value { in = append(in, NewAtomStringRune(char)) @@ -71,8 +71,8 @@ func (value ValueString) String() string { return fmt.Sprintf("\"%s\"", string(value)) } -type Value interface { +type ValueOLD interface { // Append this values atoms to the input - Atomise(in []Atom) []Atom + Atomise(in []AtomOLD) []AtomOLD String() string } diff --git a/walk/walk.go b/walk/walk.go index 1073c67..65fac6e 100644 --- a/walk/walk.go +++ b/walk/walk.go @@ -1,17 +1,232 @@ package walk import ( - "strings" + "fmt" "math" + "strings" "unicode/utf8" ) +type valueIter struct { + values []Value + index int +} +func (iter *valueIter) Next() Edible { + if iter.index >= len(iter.values) { + return nil + } + iter.index += 1 + return iter.values[iter.index - 1] +} + +func NewValueIter(values []Value) StructureIter { + return &valueIter { + values: values, + index: 0, + } +} + +type OutputList interface { + outputList() +} + +type StructureIter interface { + Next() Edible +} + +type Edible interface { + edible() +} + +type Atom interface { + Edible + atom() +} + +type Scalar interface { + Atom + Value +} + +type Structure interface { + Value + structure() + Iter() StructureIter +} + +type Value interface { + Edible + value() + Debug() string +} + +type Terminal interface { + Atom + terminal() +} + +type ValueList []Value +func (_ ValueList) outputList() {} + +type RuneList []StringRuneAtom +func (_ RuneList) outputList() {} + +type NullScalar struct{} +func (_ NullScalar) edible() {} +func (_ NullScalar) atom() {} +func (_ NullScalar) value() {} +func (_ NullScalar) Debug() string { + return "null" +} + +type BoolScalar bool +func (_ BoolScalar) edible() {} +func (_ BoolScalar) atom() {} +func (_ BoolScalar) value() {} +func (b BoolScalar) Debug() string { + if b { + return "true" + } + return "false" +} + +type NumberScalar float64 +func (_ NumberScalar) edible() {} +func (_ NumberScalar) atom() {} +func (_ NumberScalar) value() {} +func (n NumberScalar) Debug() string { + return fmt.Sprintf("%v", float64(n)) +} + +type StringStructure string +func (_ StringStructure) edible() {} +func (_ StringStructure) value() {} +func (_ StringStructure) structure() {} +func (s StringStructure) Iter() StructureIter { + return &stringStructureIter { + string: string(s), + position: 0, + } +} +func (s StringStructure) Debug() string { + return fmt.Sprintf("%q", string(s)) +} + +type stringStructureIter struct { + string string + position int +} +func (iter *stringStructureIter) Next() Edible { + if iter.position == -1 { + return nil + } + r, width := utf8.DecodeRuneInString(iter.string[iter.position:]) + if width == 0 { + iter.position = -1 + return StringEndTerminal{} + } + iter.position += width + return StringRuneAtom(r) +} + +type StringBeginTerminal struct{} +func (_ StringBeginTerminal) edible() {} +func (_ StringBeginTerminal) atom() {} +func (_ StringBeginTerminal) terminal() {} + +type StringEndTerminal struct{} +func (_ StringEndTerminal) edible() {} +func (_ StringEndTerminal) atom() {} +func (_ StringEndTerminal) terminal() {} + +type StringRuneAtom rune +func (_ StringRuneAtom) edible() {} +func (_ StringRuneAtom) atom() {} + +type ArrayStructure []Value +func (_ ArrayStructure) edible() {} +func (_ ArrayStructure) value() {} +func (_ ArrayStructure) structure() {} +func (array ArrayStructure) Iter() StructureIter { + return &arrayStructureIter { + array: []Value(array), + index: 0, + } +} +func (array ArrayStructure) Debug() string { + builder := strings.Builder{} + builder.WriteRune('[') + var sep string + for _, element := range array { + builder.WriteString(sep) + builder.WriteString(fmt.Sprintf("%v", element)) + sep = ", " + } + builder.WriteRune(']') + return builder.String() +} + +type arrayStructureIter struct { + array []Value + index int +} +func (iter *arrayStructureIter) Next() Edible { + if iter.index > len(iter.array) { + return nil + } + if iter.index == len(iter.array) { + iter.index += 1 + return ArrayEndTerminal{} + } + iter.index += 1 + return iter.array[iter.index - 1] +} + +type ArrayBeginTerminal struct{} +func (_ ArrayBeginTerminal) edible() {} +func (_ ArrayBeginTerminal) atom() {} +func (_ ArrayBeginTerminal) terminal() {} + +type ArrayEndTerminal struct{} +func (_ ArrayEndTerminal) edible() {} +func (_ ArrayEndTerminal) atom() {} +func (_ ArrayEndTerminal) terminal() {} + +type MapStructure map[string]Value +func (_ MapStructure) edible() {} +func (_ MapStructure) value() {} +func (_ MapStructure) structure() {} +func (m MapStructure) Debug() string { + builder := strings.Builder{} + builder.WriteRune('{') + var sep string + for key, value := range m { + builder.WriteString(sep) + builder.WriteString(fmt.Sprintf("%q", key)) + builder.WriteString(": ") + builder.WriteString(fmt.Sprintf("%q", value)) + sep = ", " + } + builder.WriteRune('}') + return builder.String() +} + +type MapBeginTerminal struct{} +func (_ MapBeginTerminal) edible() {} +func (_ MapBeginTerminal) atom() {} +func (_ MapBeginTerminal) terminal() {} + +type MapEndTerminal struct{} +func (_ MapEndTerminal) edible() {} +func (_ MapEndTerminal) atom() {} +func (_ MapEndTerminal) terminal() {} + // int or string type PathSegment interface {} type Path []PathSegment -func (path Path) ToWalkValues() []Value { - var values []Value +func (path Path) ToWalkValues() []ValueOLD { + var values []ValueOLD for _, segment := range path { switch s := segment.(type) { case int: @@ -25,7 +240,7 @@ func (path Path) ToWalkValues() []Value { return values } -func PathFromWalkValues(values []Value) Path { +func PathFromWalkValues(values []ValueOLD) Path { var segments []PathSegment for _, value := range values { switch v := value.(type) { @@ -41,18 +256,36 @@ func PathFromWalkValues(values []Value) Path { } type WalkItem struct { - Value []Atom - Path []Atom + Value ValueList + Path ValueList +} + +func (item WalkItem) Debug() string { + builder := strings.Builder{} + var sep string + for _, pathSegment := range item.Path { + builder.WriteString(sep) + builder.WriteString(fmt.Sprintf("%s", pathSegment.Debug())) + sep = "." + } + builder.WriteString(": ") + sep = "" + for _, value := range item.Value { + builder.WriteString(sep) + builder.WriteString(fmt.Sprintf("%s", value.Debug())) + sep = ", " + } + return builder.String() } -func ConcatData(first []Atom, second []Atom) []Atom { - res := make([]Atom, 0, len(first) + len(second)) +func ConcatData(first []AtomOLD, second []AtomOLD) []AtomOLD { + res := make([]AtomOLD, 0, len(first) + len(second)) res = append(res, first...) res = append(res, second...) return res } -func Atomise(in []Value) (out []Atom) { +func Atomise(in []ValueOLD) (out []AtomOLD) { numAtoms := 0 for _, value := range in { switch v := value.(type) { @@ -64,7 +297,7 @@ func Atomise(in []Value) (out []Atom) { panic("Invalid WalkValue") } } - out = make([]Atom, 0, numAtoms) + out = make([]AtomOLD, 0, numAtoms) for _, value := range in { out = value.Atomise(out) } @@ -96,11 +329,11 @@ func (err CompoundError) Error() string { } type CompoundResult struct { - value Value + value ValueOLD error error } -func Compound(in []Atom) (out []Value, error error) { +func Compound(in []AtomOLD) (out []ValueOLD, error error) { numValues := 0 i := 0 inString := false @@ -118,7 +351,7 @@ func Compound(in []Atom) (out []Value, error error) { } } i = 0 - out = make([]Value, 0, numValues) + out = make([]ValueOLD, 0, numValues) for { if i >= len(in) { break diff --git a/walk/walk_test.go b/walk/walk_test.go new file mode 100644 index 0000000..c05da02 --- /dev/null +++ b/walk/walk_test.go @@ -0,0 +1,45 @@ +package walk + +import ( + "testing" + "log" +) + +func TestValueIter(t *testing.T) { + values := ValueList{ + NumberScalar(1), + NumberScalar(2), + NumberScalar(3), + } + + valuesCopy := ValueList{} + + iter := NewValueIter(values) + + for { + edible := iter.Next() + if edible == nil { + break + } + + log.Println(edible) + + value, isValue := edible.(Value) + + if !isValue { + t.Fatalf("Iterator produced a non-value") + } + + valuesCopy = append(valuesCopy, value) + } + + if len(values) != len(valuesCopy) { + t.Fatalf("iter gave the wrong number of values") + } + + for i, value := range values { + if value != valuesCopy[i] { + t.Fatalf("iter produced an incorrect value") + } + } +} -- cgit v1.2.3