diff options
-rw-r--r-- | main/main.go | 14 | ||||
-rw-r--r-- | subex/main.go | 89 | ||||
-rw-r--r-- | subex/parse.go | 41 | ||||
-rw-r--r-- | subex/subexast.go | 9 | ||||
-rw-r--r-- | subex/subexstate.go | 83 | ||||
-rw-r--r-- | walk/walk.go | 31 |
6 files changed, 167 insertions, 100 deletions
diff --git a/main/main.go b/main/main.go index d657ea2..3da414d 100644 --- a/main/main.go +++ b/main/main.go @@ -3,7 +3,6 @@ package main import ( "os" "bufio" - "fmt" "main/subex" "main/walk" ) @@ -18,18 +17,7 @@ type ProgramState struct { } func main() { - if len(os.Args) != 3 { - panic("Expected: stred [input] [subex]") - } - input := os.Args[1] - program := os.Args[2] - ast := subex.Parse(program) - transducer := subex.CompileTransducer(ast) - output, err := subex.RunTransducer(transducer, input) - if err { - output = input - } - fmt.Println(output) + subex.Main() } func mainISH() { diff --git a/subex/main.go b/subex/main.go index 3ae0618..9dbe5df 100644 --- a/subex/main.go +++ b/subex/main.go @@ -3,24 +3,36 @@ package subex import ( "os" "fmt" - "io" + "bufio" + "main/walk" ) +// A part of an insertion, either a datum or a slot from which to load +// TODO rename this type TransducerOutput interface { - build(Store) string + // Given the current store, return the []Datum produced by the TransducerOutput + build(Store) []walk.Datum } -type TransducerReplacementRune rune -func (replacement TransducerReplacementRune) build(store Store) string { - return string(replacement) +// A TransducerOutput which is just a datum literal +type TransducerReplacementRune struct { + datum walk.Datum +} +func (replacement TransducerReplacementRune) build(store Store) []walk.Datum { + return []walk.Datum{replacement.datum} } -type TransducerReplacementLoad rune -func (replacement TransducerReplacementLoad) build(store Store) string { - return store[rune(replacement)] +// A TransducerOutput which is a slot that is loaded from +type TransducerReplacementLoad struct { + datum walk.Datum +} +func (replacement TransducerReplacementLoad) build(store Store) []walk.Datum { + return store[replacement.datum] } -type Store map[rune]string +// Where slots are stored +type Store map[walk.Datum][]walk.Datum +// Return a new store with all the data from this one func (store Store) clone() Store { newStore := make(Store) for key, val := range store { @@ -28,29 +40,36 @@ func (store Store) clone() Store { } return newStore } -func (store Store) withValue(key rune, value string) Store { +// Return a copy of this store but with an additional slot set +func (store Store) withValue(key walk.Datum, value []walk.Datum) Store { newStore := store.clone() newStore[key] = value return newStore } +// Compile the SubexAST into a transducer SubexState that can be run func CompileTransducer(transducerAst SubexAST) SubexState { return transducerAst.compileWith(SubexNoneState{}) } +// One branch of subex execution type SubexBranch struct { + // Content of slots in this branch store Store + // State in this branch state SubexState - output string + // Output so far in this branch + output []walk.Datum } -func (pair SubexBranch) eat(char rune) []SubexBranch { +// Read a single character and return all the branches resulting from this branch consuming it +func (pair SubexBranch) eat(char walk.Datum) []SubexBranch { states := pair.state.eat(pair.store, char) for i := range states { - states[i].output = pair.output + states[i].output + states[i].output = append(pair.output, states[i].output...) } return states } -func (pair SubexBranch) accepting() []string { +func (pair SubexBranch) accepting() [][]walk.Datum { return pair.state.accepting(pair.store) } @@ -59,6 +78,8 @@ func equalStates(left SubexBranch, right SubexBranch) bool { return left.state == right.state } +// If two branches have the same state, only the first has a chance of being successful +// This function removes all of the pointless execution branches to save execution time func pruneStates(states []SubexBranch) (newStates []SubexBranch) { outer: for _, state := range states { for _, newState := range newStates { @@ -71,44 +92,54 @@ func pruneStates(states []SubexBranch) (newStates []SubexBranch) { return newStates } -func RunTransducer(transducer SubexState, input string) (output string, err bool) { +// Run the subex transducer +func RunTransducer(transducer SubexState, input <-chan walk.Datum) (output []walk.Datum, err bool) { states := []SubexBranch{{ state: transducer, - output: "", + output: nil, store: make(Store), }} - for _, char := range input { - fmt.Printf("Running with %d states\n", len(states)) + for piece := range input { var newStates []SubexBranch for _, state := range states { - newStates = append(newStates, state.eat(char)...) + newStates = append(newStates, state.eat(piece)...) } states = pruneStates(newStates) } for _, state := range states { outputEnds := state.accepting() for _, outputEnd := range outputEnds { - return state.output + outputEnd, false + return append(state.output, outputEnd...), false } } - return "", true + return nil, true } func Main() { if len(os.Args) != 2 { panic("Expected: program [subex]") } - inputBytes, inputErr := io.ReadAll(os.Stdin) - input := string(inputBytes) - if inputErr != nil { - fmt.Println("Error reading") + stdin := bufio.NewReader(os.Stdin); + jsonStream := walk.Json(stdin); + var tokens []walk.WalkValue; + for token := range jsonStream { + tokens = append(tokens, token.Value); } program := os.Args[1] ast := Parse(program) transducer := CompileTransducer(ast) - output, err := RunTransducer(transducer, input) - if err { - output = input + pieces := make(chan walk.Datum) + go func(out chan<- walk.Datum, input []walk.WalkValue) { + for _, value := range input { + value.Pieces(out) + } + close(out) + }(pieces, tokens) + output, err := RunTransducer(transducer, pieces) + // TODO recombine data into values and then convert into items with empty paths + if !err { + fmt.Print(output) + } else { + fmt.Print("Error") } - fmt.Print(output) } diff --git a/subex/parse.go b/subex/parse.go index af575eb..0c79dc3 100644 --- a/subex/parse.go +++ b/subex/parse.go @@ -1,5 +1,9 @@ package subex +import ( + "main/walk" +) + func parseReplacement(l *RuneReader) (output []TransducerOutput) { loop: for { r := l.next() @@ -13,17 +17,18 @@ func parseReplacement(l *RuneReader) (output []TransducerOutput) { if slot == eof { panic("Missing slot character") } - output = append(output, TransducerReplacementLoad(slot)) + output = append(output, TransducerReplacementLoad{datum: slot}) default: - output = append(output, TransducerReplacementRune(r)) + output = append(output, TransducerReplacementRune{datum: r}) } } return output } -func parseRangeSubex(l *RuneReader) map[rune]rune { - parts := make(map[rune]rune) - var froms []rune +// Parse the contents of a range subex [] into a map +func parseRangeSubex(l *RuneReader) map[walk.Datum]walk.Datum { + parts := make(map[walk.Datum]walk.Datum) + var froms []walk.Datum var hasTo bool for { fromsStart := l.next() @@ -34,43 +39,41 @@ func parseRangeSubex(l *RuneReader) map[rune]rune { hasTo = true break } - var fromsEnd rune if l.accept("-") { - fromsEnd = l.next() + fromsEnd := l.next() if fromsEnd == ']' || fromsEnd == '=' { l.rewind() fromsEnd = fromsStart } + for i := fromsStart; i <= fromsEnd; i += 1 { + froms = append(froms, i) + } } else { - fromsEnd = fromsStart - } - for i := fromsStart; i <= fromsEnd; i += 1 { - froms = append(froms, i) + froms = append(froms, fromsStart) } } if len(froms) == 0 { panic("Missing from part of range expression") } - var tos []rune + var tos []walk.Datum if hasTo { for { tosStart := l.next() if tosStart == ']' { break } - var tosEnd rune if l.accept("-") { - tosEnd = l.next() + tosEnd := l.next() if tosEnd == ']' { l.rewind() tosEnd = tosStart } + for i := tosStart; i <= tosEnd; i += 1 { + tos = append(tos, i) + } } else { - tosEnd = tosStart - } - for i := tosStart; i <= tosEnd; i += 1 { - tos = append(tos, i) + tos = append(tos, tosStart) } } } else { @@ -122,7 +125,7 @@ func parseSubex(l *RuneReader, minPower int) SubexAST { case '.': lhs = SubexASTCopyAny{} default: - lhs = SubexASTCopyRune(r) + lhs = SubexASTCopyRune{datum: r} } loop: for { if minPower <= 0 { diff --git a/subex/subexast.go b/subex/subexast.go index c583245..0afd3e3 100644 --- a/subex/subexast.go +++ b/subex/subexast.go @@ -2,6 +2,7 @@ package subex import ( "fmt" + "main/walk" ) type SubexAST interface { @@ -90,10 +91,12 @@ func (ast SubexASTRepeat) compileWith(next SubexState) SubexState { return next } -type SubexASTCopyRune rune +type SubexASTCopyRune struct { + datum walk.Datum +} func (ast SubexASTCopyRune) compileWith(next SubexState) SubexState { return &SubexCopyRuneState{ - rune: rune(ast), + rune: ast.datum, next: next, } } @@ -153,7 +156,7 @@ func (ast SubexASTJoin) compileWith(next SubexState) SubexState { } type SubexASTRange struct { - parts map[rune]rune + parts map[walk.Datum]walk.Datum } func (ast SubexASTRange) compileWith(next SubexState) SubexState { return &SubexRangeState { diff --git a/subex/subexstate.go b/subex/subexstate.go index 2e613e8..9e0d61a 100644 --- a/subex/subexstate.go +++ b/subex/subexstate.go @@ -1,35 +1,41 @@ package subex import ( - "strings" + "main/walk" ) +// A state of execution for the transducer type SubexState interface { - eat(store Store, char rune) []SubexBranch - accepting(store Store) []string + // Eat a datum and transition to any number of new states + eat(store Store, char walk.Datum) []SubexBranch + // Find accepting states reachable through epsilon transitions and return their outputs + accepting(store Store) [][]walk.Datum } +// Try first, if it fails then try second type SubexGroupState struct { first, second SubexState } -func (state SubexGroupState) eat(store Store, char rune) []SubexBranch { +func (state SubexGroupState) eat(store Store, char walk.Datum) []SubexBranch { otherStore := store.clone() return append(state.first.eat(store, char), state.second.eat(otherStore, char)...) } -func (state SubexGroupState) accepting(store Store) []string { +func (state SubexGroupState) accepting(store Store) [][]walk.Datum { return append(state.first.accepting(store), state.second.accepting(store)...) } +// Run the match machine and store the output in a slot for later use +// Output nothing type SubexStoreState struct { match SubexState slot rune next SubexState - toStore string + toStore []walk.Datum } -func (state SubexStoreState) eat(store Store, char rune) (nextStates []SubexBranch) { +func (state SubexStoreState) eat(store Store, char walk.Datum) (nextStates []SubexBranch) { acceptedOutputs := state.match.accepting(store) for _, acceptedOutput := range acceptedOutputs { - nextStore := store.withValue(state.slot, state.toStore + acceptedOutput) + nextStore := store.withValue(state.slot, append(state.toStore, acceptedOutput...)) nextStates = append(nextStates, state.next.eat(nextStore.clone(), char)...) } nextMatchStates := state.match.eat(store.clone(), char) @@ -39,107 +45,116 @@ func (state SubexStoreState) eat(store Store, char rune) (nextStates []SubexBran match: matchState.state, slot: state.slot, next: state.next, - toStore: state.toStore + matchState.output, + toStore: append(state.toStore, matchState.output...), }, - output: "", + output: nil, store: store.clone(), }) } return nextStates } -func (state SubexStoreState) accepting(store Store) (outputs []string) { +func (state SubexStoreState) accepting(store Store) (outputs [][]walk.Datum) { acceptedOutputs := state.match.accepting(store) for _, acceptedOutput := range acceptedOutputs { - nextStore := store.withValue(state.slot, state.toStore + acceptedOutput) + nextStore := store.withValue(state.slot, append(state.toStore, acceptedOutput...)) outputs = append(outputs, state.next.accepting(nextStore)...) } return outputs } +// Don't read in anything, just output the series of data and slots specified type SubexOutputState struct { content []TransducerOutput next SubexState } -func (state SubexOutputState) build(store Store) string { - var builder strings.Builder +// Given a store, return what is outputted by an epsilon transition from this state +func (state SubexOutputState) build(store Store) []walk.Datum { + var result []walk.Datum for _, part := range state.content { - builder.WriteString(part.build(store)) + result = append(result, part.build(store)...) } - return builder.String() + return result } -func (state SubexOutputState) eat(store Store, char rune) []SubexBranch { +func (state SubexOutputState) eat(store Store, char walk.Datum) []SubexBranch { content := state.build(store) nextStates := state.next.eat(store, char) for i := range nextStates { - nextStates[i].output = content + nextStates[i].output + nextStates[i].output = append(content, nextStates[i].output...) } return nextStates } -func (state SubexOutputState) accepting(store Store) []string { +func (state SubexOutputState) accepting(store Store) [][]walk.Datum { content := state.build(store) outputs := state.next.accepting(store) for i := range outputs { - outputs[i] = content + outputs[i] + outputs[i] = append(content, outputs[i]...) } return outputs } +// A final state, transitions to nothing but is accepting type SubexNoneState struct {} -func (state SubexNoneState) eat(store Store, char rune) []SubexBranch { +func (state SubexNoneState) eat(store Store, char walk.Datum) []SubexBranch { return nil } -func (state SubexNoneState) accepting(store Store) []string { - return []string{""} +func (state SubexNoneState) accepting(store Store) [][]walk.Datum { + return [][]walk.Datum{nil} } +// Read in a specific datum and output it +// TODO rename to better reflect datum instead of rune type SubexCopyRuneState struct { - rune rune + rune walk.Datum next SubexState } -func (state SubexCopyRuneState) eat(store Store, char rune) []SubexBranch { +func (state SubexCopyRuneState) eat(store Store, char walk.Datum) []SubexBranch { + // TODO can I compare Datum values with == ? if char == state.rune { return []SubexBranch{{ state: state.next, - output: string(char), + output: []walk.Datum{char}, store: store, }} } return nil } -func (state SubexCopyRuneState) accepting(store Store) []string { +func (state SubexCopyRuneState) accepting(store Store) [][]walk.Datum { return nil } +// Read in any datum and output it type SubexCopyAnyState struct { next SubexState } -func (state SubexCopyAnyState) eat(store Store, char rune) []SubexBranch { +func (state SubexCopyAnyState) eat(store Store, char walk.Datum) []SubexBranch { return []SubexBranch{{ state: state.next, - output: string(char), + output: []walk.Datum{char}, store: store, }} } -func (state SubexCopyAnyState) accepting(store Store) []string { +func (state SubexCopyAnyState) accepting(store Store) [][]walk.Datum { return nil } +// Read in a datum and apply a map to generate a datum to output +// If the input isn't in the map transition to nothing type SubexRangeState struct { - parts map[rune]rune + parts map[walk.Datum]walk.Datum next SubexState } -func (state SubexRangeState) eat(store Store, char rune) []SubexBranch { +func (state SubexRangeState) eat(store Store, char walk.Datum) []SubexBranch { out, exists := state.parts[char] if !exists { return nil } else { return []SubexBranch{{ state: state.next, - output: string(out), + output: []walk.Datum{out}, store: store, }} } } -func (state SubexRangeState) accepting(store Store) []string { +func (state SubexRangeState) accepting(store Store) [][]walk.Datum { return nil } diff --git a/walk/walk.go b/walk/walk.go index 19180b4..1df7a6e 100644 --- a/walk/walk.go +++ b/walk/walk.go @@ -16,12 +16,39 @@ const ( MapBegin MapEnd ) +func (value TerminalValue) Pieces(out chan<- Datum) { + out<-value +} + type ValueNull struct {} +func (value ValueNull) Pieces(out chan<- Datum) { + out<-value +} type ValueBool bool +func (value ValueBool) Pieces(out chan<- Datum) { + out<-value +} type ValueNumber float64 +func (value ValueNumber) Pieces(out chan<- Datum) { + out<-value +} + +type StartString struct {} +type EndString struct {} type ValueString string +func (value ValueString) Pieces(out chan<- Datum) { + out<-StartString{} + for _, char := range value { + out<-char + } + out<-EndString{} +} -type WalkValue interface {} +type Datum interface {} + +type WalkValue interface { + Pieces(out chan<- Datum) +} type WalkItem struct { Value WalkValue @@ -297,7 +324,7 @@ func jsonOutValue(in *WalkItemStream, indent int, doIndent bool) WalkValue { jsonOutMap(in, indent) return nil default: - return token + return token.Value } default: panic("Invalid WalkValue") |