diff options
author | Charlie Stanton <charlie@shtanton.xyz> | 2023-04-19 11:18:22 +0100 |
---|---|---|
committer | Charlie Stanton <charlie@shtanton.xyz> | 2023-04-19 11:18:22 +0100 |
commit | 184368ae155fcdd50dde5c2e4b0c87e0d69acdd7 (patch) | |
tree | e3e26ad223ec50da7738b1734ec5f30c43b93649 | |
parent | c6905e06ba73f47495ba86cebe1cb42f141f308d (diff) | |
download | stred-go-184368ae155fcdd50dde5c2e4b0c87e0d69acdd7.tar |
Replaces the parent/child implementation for operators like store and sum with an output stack
Previously a store state was a parent of another state machine that it would run inside of itself in order to capture the output to be stored.
This was limited as the greedyness of the child would not be transferred to the parent.
The new implementation gives states more control over the output state and turns it into a stack.
By pushing to the stack before the child and popping afterwards, all of the child's output can be retrieved while the child is very much part of the complete machine.
-rw-r--r-- | subex/main.go | 60 | ||||
-rw-r--r-- | subex/subexast.go | 16 | ||||
-rw-r--r-- | subex/subexstate.go | 211 |
3 files changed, 119 insertions, 168 deletions
diff --git a/subex/main.go b/subex/main.go index 2a5f5ad..4451a00 100644 --- a/subex/main.go +++ b/subex/main.go @@ -29,25 +29,58 @@ func CompileTransducer(transducerAst SubexAST) SubexState { return transducerAst.compileWith(SubexNoneState{}) } +// An immutable stack for outputting to +type OutputStack interface { + pop() ([]walk.Atom, OutputStack) + push(atoms []walk.Atom) OutputStack +} + +type OutputStackNil struct {} +func (stack OutputStackNil) pop() ([]walk.Atom, OutputStack) { + panic("Tried to pop from an empty OutputStack") +} +func (stack OutputStackNil) push(atoms []walk.Atom) OutputStack { + return &OutputStackCons { + head: atoms, + tail: stack, + } +} + +type OutputStackCons struct { + head []walk.Atom + tail OutputStack +} +func (stack OutputStackCons) pop() ([]walk.Atom, OutputStack) { + return stack.head, stack.tail +} +func (stack OutputStackCons) push(atoms []walk.Atom) OutputStack { + return &OutputStackCons { + head: atoms, + tail: stack, + } +} + +func topAppend(outputStack OutputStack, atoms []walk.Atom) OutputStack { + head, tail := outputStack.pop() + return tail.push(walk.ConcatData(head, atoms)) +} + // One branch of subex execution type SubexBranch struct { // Content of slots in this branch store Store // State in this branch state SubexState - // Output so far in this branch - output []walk.Atom + // The output stack. At the end of the program, whatever is on top of this will be output + // States may push or pop to the stack as they wish, creating sort of a call stack that allows states to capture the output of other states + outputStack OutputStack } // Read a single character and return all the branches resulting from this branch consuming it func (pair SubexBranch) eat(char walk.Atom) []SubexBranch { - states := pair.state.eat(pair.store, char) - for i := range states { - states[i].output = walk.ConcatData(pair.output, states[i].output) - } - return states + return pair.state.eat(pair.store, pair.outputStack, char) } -func (pair SubexBranch) accepting() [][]walk.Atom { - return pair.state.accepting(pair.store) +func (pair SubexBranch) accepting() []OutputStack { + return pair.state.accepting(pair.store, pair.outputStack) } func equalStates(left SubexBranch, right SubexBranch) bool { @@ -73,7 +106,7 @@ func pruneStates(states []SubexBranch) (newStates []SubexBranch) { func RunTransducer(transducer SubexState, input <-chan walk.Atom) (output []walk.Atom, err bool) { states := []SubexBranch{{ state: transducer, - output: nil, + outputStack: OutputStackNil{}.push(nil), store: make(Store), }} for piece := range input { @@ -84,9 +117,10 @@ func RunTransducer(transducer SubexState, input <-chan walk.Atom) (output []walk states = pruneStates(newStates) } for _, state := range states { - outputEnds := state.accepting() - for _, outputEnd := range outputEnds { - return walk.ConcatData(state.output, outputEnd), false + acceptingStacks := state.accepting() + for _, stack := range acceptingStacks { + output, _ := stack.pop() + return output, false } } return nil, true diff --git a/subex/subexast.go b/subex/subexast.go index 9e7067b..cec75f7 100644 --- a/subex/subexast.go +++ b/subex/subexast.go @@ -27,10 +27,11 @@ type SubexASTStore struct { slot rune } func (ast SubexASTStore) compileWith(next SubexState) SubexState { - return &SubexStoreState { - match: ast.match.compileWith(&SubexNoneState{}), - slot: ast.slot, - next: next, + return &SubexStoreBeginState { + next: ast.match.compileWith(&SubexStoreEndState { + slot: ast.slot, + next: next, + }), } } func (ast SubexASTStore) String() string { @@ -188,8 +189,9 @@ type SubexASTSum struct { content SubexAST } func (ast SubexASTSum) compileWith(next SubexState) SubexState { - return &SubexSumState { - inputState: ast.content.compileWith(&SubexNoneState{}), - next: next, + return &SubexSumBeginState { + next: ast.content.compileWith(&SubexSumEndState { + next: next, + }), } } diff --git a/subex/subexstate.go b/subex/subexstate.go index 855d44a..dbc8340 100644 --- a/subex/subexstate.go +++ b/subex/subexstate.go @@ -9,106 +9,48 @@ import ( // A state of execution for the transducer type SubexState interface { // Eat a Atom and transition to any number of new states - eat(store Store, char walk.Atom) []SubexBranch + eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch // Find accepting states reachable through epsilon transitions and return their outputs - accepting(store Store) [][]walk.Atom -} - -type SubexParentState interface { - SubexState - // Get the child - child() SubexState - // The child outputted output, what should be passed as accumulator data into the next version of the parent state - nextAcc(output []walk.Atom) interface{} - // Given the final accumulated data, run the next state after the parent, immutably borrows store - feedNext(acc interface{}, store Store, char walk.Atom) []SubexBranch - // Given the final accumulated data, get the accepted outputs from the next state, immutably borrows store - acceptNext(acc interface{}, store Store) [][]walk.Atom - // Given the next child and next accumulator data, generate the next parent - nextParent(child SubexState, acc interface{}) SubexState + accepting(store Store, outputStack OutputStack) []OutputStack } // Try first, if it fails then try second type SubexGroupState struct { first, second SubexState } -func (state SubexGroupState) eat(store Store, char walk.Atom) []SubexBranch { +func (state SubexGroupState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { otherStore := store.clone() - return append(state.first.eat(store, char), state.second.eat(otherStore, char)...) + return append(state.first.eat(store, outputStack, char), state.second.eat(otherStore, outputStack, char)...) } -func (state SubexGroupState) accepting(store Store) [][]walk.Atom { - return append(state.first.accepting(store), state.second.accepting(store)...) +func (state SubexGroupState) accepting(store Store, outputStack OutputStack) []OutputStack { + return append(state.first.accepting(store, outputStack), state.second.accepting(store, outputStack)...) } -// Helper so states that are actually collections of states distinguished by a child state -// can pass eaten characters on to their children more easily -func feedChild(parent SubexParentState, store Store, char walk.Atom) (nextStates []SubexBranch) { - child := parent.child() - accepteds := child.accepting(store) - for _, accepted := range accepteds { - acc := parent.nextAcc(accepted) - nextStates = append(nextStates, parent.feedNext(acc, store, char)...) - } - nextChildren := child.eat(store, char) - for _, nextChild := range nextChildren { - acc := parent.nextAcc(nextChild.output) - nextStates = append(nextStates, SubexBranch{ - state: parent.nextParent(nextChild.state, acc), - output: nil, - store: nextChild.store, - }) - } - return nextStates +// Push an empty value onto the OutputStack and epsilon transition to next +// This value will be added to until SubexStoreEndState is reached when it will be stored +type SubexStoreBeginState struct { + next SubexState } - -func acceptChild(parent SubexParentState, store Store) (outputs [][]walk.Atom) { - child := parent.child() - accepteds := child.accepting(store) - for _, accepted := range accepteds { - acc := parent.nextAcc(accepted) - outputs = append(outputs, parent.acceptNext(acc, store)...) - } - return outputs +func (state SubexStoreBeginState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { + return state.next.eat(store, outputStack.push(nil), char) +} +func (state SubexStoreBeginState) accepting(store Store, outputStack OutputStack) []OutputStack { + return state.next.accepting(store, outputStack.push(nil)) } -// Run the match machine and store the output in a slot for later use -// Output nothing -type SubexStoreState struct { - match SubexState +// Pop the top of the OutputStack which contains the stuff outputted since the start of the store +// This outputted data gets stored in a slot +type SubexStoreEndState struct { slot rune next SubexState - toStore []walk.Atom -} -func (state SubexStoreState) child() SubexState { - return state.match -} -func (state SubexStoreState) nextAcc(output []walk.Atom) interface{} { - return walk.ConcatData(state.toStore, output) -} -func (state SubexStoreState) feedNext(acc interface{}, store Store, char walk.Atom) []SubexBranch { - toStore := acc.([]walk.Atom) - nextStore := store.withValue(state.slot, toStore) - return state.next.eat(nextStore, char) -} -func (state SubexStoreState) acceptNext(acc interface{}, store Store) [][]walk.Atom { - toStore := acc.([]walk.Atom) - nextStore := store.withValue(state.slot, toStore) - return state.next.accepting(nextStore) -} -func (state SubexStoreState) nextParent(match SubexState, acc interface{}) SubexState { - toStore := acc.([]walk.Atom) - return &SubexStoreState { - match: match, - slot: state.slot, - next: state.next, - toStore: toStore, - } } -func (state SubexStoreState) eat(store Store, char walk.Atom) (nextStates []SubexBranch) { - return feedChild(state, store, char) +func (state SubexStoreEndState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { + toStore, newStack := outputStack.pop() + return state.next.eat(store.withValue(state.slot, toStore), newStack, char) } -func (state SubexStoreState) accepting(store Store) (outputs [][]walk.Atom) { - return acceptChild(state, store) +func (state SubexStoreEndState) accepting(store Store, outputStack OutputStack) []OutputStack { + toStore, newStack := outputStack.pop() + return state.next.accepting(store.withValue(state.slot, toStore), newStack) } // A part of an output literal, either an Atom or a slot from which to load @@ -146,38 +88,32 @@ func (state SubexOutputState) build(store Store) []walk.Atom { } return result } -func (state SubexOutputState) eat(store Store, char walk.Atom) []SubexBranch { +func (state SubexOutputState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { content := state.build(store) - nextStates := state.next.eat(store, char) - for i := range nextStates { - nextStates[i].output = walk.ConcatData(content, nextStates[i].output) - } + nextStates := state.next.eat(store, topAppend(outputStack, content), char) return nextStates } -func (state SubexOutputState) accepting(store Store) [][]walk.Atom { +func (state SubexOutputState) accepting(store Store, outputStack OutputStack) []OutputStack { content := state.build(store) - outputs := state.next.accepting(store) - for i := range outputs { - outputs[i] = walk.ConcatData(content, outputs[i]) - } - return outputs + outputStacks := state.next.accepting(store, topAppend(outputStack, content)) + return outputStacks } // A final state, transitions to nothing but is accepting type SubexNoneState struct {} -func (state SubexNoneState) eat(store Store, char walk.Atom) []SubexBranch { +func (state SubexNoneState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { return nil } -func (state SubexNoneState) accepting(store Store) [][]walk.Atom { - return [][]walk.Atom{nil} +func (state SubexNoneState) accepting(store Store, outputStack OutputStack) []OutputStack { + return []OutputStack{outputStack} } // A dead end state, handy for making internals work nicer but technically redundant type SubexDeadState struct {} -func (state SubexDeadState) eat(store Store, char walk.Atom) []SubexBranch { +func (state SubexDeadState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { return nil } -func (state SubexDeadState) accepting (store Store) [][]walk.Atom { +func (state SubexDeadState) accepting (store Store, outputStack OutputStack) []OutputStack { return nil } @@ -186,18 +122,18 @@ type SubexCopyAtomState struct { atom walk.Atom next SubexState } -func (state SubexCopyAtomState) eat(store Store, char walk.Atom) []SubexBranch { +func (state SubexCopyAtomState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { // TODO can I compare Atom values with == ? if char == state.atom { return []SubexBranch{{ state: state.next, - output: []walk.Atom{char}, + outputStack: topAppend(outputStack, []walk.Atom{char}), store: store, }} } return nil } -func (state SubexCopyAtomState) accepting(store Store) [][]walk.Atom { +func (state SubexCopyAtomState) accepting(store Store, outputStack OutputStack) []OutputStack { return nil } @@ -205,14 +141,14 @@ func (state SubexCopyAtomState) accepting(store Store) [][]walk.Atom { type SubexCopyAnyState struct { next SubexState } -func (state SubexCopyAnyState) eat(store Store, char walk.Atom) []SubexBranch { +func (state SubexCopyAnyState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { return []SubexBranch{{ state: state.next, - output: []walk.Atom{char}, + outputStack: topAppend(outputStack, []walk.Atom{char}), store: store, }} } -func (state SubexCopyAnyState) accepting(store Store) [][]walk.Atom { +func (state SubexCopyAnyState) accepting(store Store, outputStack OutputStack) []OutputStack { return nil } @@ -222,19 +158,19 @@ type SubexRangeState struct { parts map[walk.Atom]walk.Atom next SubexState } -func (state SubexRangeState) eat(store Store, char walk.Atom) []SubexBranch { +func (state SubexRangeState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { out, exists := state.parts[char] if !exists { return nil } else { return []SubexBranch{{ state: state.next, - output: []walk.Atom{out}, + outputStack: topAppend(outputStack, []walk.Atom{out}), store: store, }} } } -func (state SubexRangeState) accepting(store Store) [][]walk.Atom { +func (state SubexRangeState) accepting(store Store, outputStack OutputStack) []OutputStack { return nil } @@ -267,56 +203,35 @@ func sumValues(atoms []walk.Atom) (walk.ValueNumber, error) { return walk.ValueNumber(sum), nil } -// Run the inputState machine and sum any values output, output the sum -// Cast non numbers into numbers, branch dies if it is not castable -type SubexSumState struct { - inputState SubexState +// At the start of a sum, just pushes to the OutputStack allowing the end to capture what was output in the middle +// Tries to cast values to numbers to sum them and rejects if values are not castable +type SubexSumBeginState struct { next SubexState - acc []walk.Atom } -func (state SubexSumState) child() SubexState { - return state.inputState +func (state SubexSumBeginState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { + return state.next.eat(store, outputStack.push(nil), char) } -func (state SubexSumState) nextAcc(output []walk.Atom) interface{} { - return walk.ConcatData(state.acc, output) +func (state SubexSumBeginState) accepting(store Store, outputStack OutputStack) []OutputStack { + return state.next.accepting(store, outputStack.push(nil)) } -func (state SubexSumState) feedNext(acc interface{}, store Store, char walk.Atom) []SubexBranch { - childOutput := acc.([]walk.Atom) - total, err := sumValues(childOutput) + +// At the end of a sum, pops what has been output since the start, sums and outputs it +type SubexSumEndState struct { + next SubexState +} +func (state SubexSumEndState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { + toSum, newStack := outputStack.pop() + sum, err := sumValues(toSum) if err != nil { return nil } - output := []walk.Atom{total} - nextStates := state.next.eat(store.clone(), char) - for i := range nextStates { - nextStates[i].output = walk.ConcatData(output, nextStates[i].output) - } - return nextStates + return state.next.eat(store, topAppend(newStack, []walk.Atom{sum}), char) } -func (state SubexSumState) acceptNext(acc interface{}, store Store) [][]walk.Atom { - childOutput := acc.([]walk.Atom) - total, err := sumValues(childOutput) +func (state SubexSumEndState) accepting(store Store, outputStack OutputStack) []OutputStack { + toSum, newStack := outputStack.pop() + sum, err := sumValues(toSum) if err != nil { return nil } - output := []walk.Atom{total} - outputs := state.next.accepting(store.clone()) - for i := range outputs { - outputs[i] = walk.ConcatData(output, outputs[i]) - } - return outputs -} -func (state SubexSumState) nextParent(child SubexState, acc interface{}) SubexState { - childOutput := acc.([]walk.Atom) - return &SubexSumState { - inputState: child, - next: state.next, - acc: childOutput, - } -} -func (state SubexSumState) eat(store Store, char walk.Atom) (nextStates []SubexBranch) { - return feedChild(state, store, char) -} -func (state SubexSumState) accepting(store Store) (outputs [][]walk.Atom) { - return acceptChild(state, store) + return state.next.accepting(store, topAppend(newStack, []walk.Atom{sum})) } |