diff options
Diffstat (limited to 'subex')
| -rw-r--r-- | subex/main.go | 66 | ||||
| -rw-r--r-- | subex/parse.go | 36 | ||||
| -rw-r--r-- | subex/subexast.go | 27 | ||||
| -rw-r--r-- | subex/subexstate.go | 65 | 
4 files changed, 105 insertions, 89 deletions
| diff --git a/subex/main.go b/subex/main.go index 138de9a..fd59047 100644 --- a/subex/main.go +++ b/subex/main.go @@ -8,31 +8,33 @@ import (  	"strings"  ) -// A part of an insertion, either a datum or a slot from which to load +// A part of an insertion, either an Atom or a slot from which to load  // TODO rename this  type TransducerOutput interface { -	// Given the current store, return the []Datum produced by the TransducerOutput -	build(Store) []walk.Datum +	// Given the current store, return the []Atom produced by the TransducerOutput +	build(Store) []walk.Atom  } -// A TransducerOutput which is just a datum literal -type TransducerReplacementRune struct { -	datum walk.Datum +// A TransducerOutput which is just an Atom literal +type TransducerReplacementAtom struct { +	atom walk.Atom  } -func (replacement TransducerReplacementRune) build(store Store) []walk.Datum { -	return []walk.Datum{replacement.datum} +func (replacement TransducerReplacementAtom) build(store Store) []walk.Atom { +	return []walk.Atom{replacement.atom}  } +// TODO should be a single field called slot with a type of rune  // A TransducerOutput which is a slot that is loaded from  type TransducerReplacementLoad struct { -	datum walk.Datum +	atom walk.Atom  } -func (replacement TransducerReplacementLoad) build(store Store) []walk.Datum { -	return store[replacement.datum] +func (replacement TransducerReplacementLoad) build(store Store) []walk.Atom { +	return store[replacement.atom]  }  // Where slots are stored -type Store map[walk.Datum][]walk.Datum +// TODO should map from runes as only runes can be slots +type Store map[walk.Atom][]walk.Atom  // Return a new store with all the data from this one  func (store Store) clone() Store {  	newStore := make(Store) @@ -42,7 +44,7 @@ func (store Store) clone() Store {  	return newStore  }  // Return a copy of this store but with an additional slot set -func (store Store) withValue(key walk.Datum, value []walk.Datum) Store { +func (store Store) withValue(key walk.Atom, value []walk.Atom) Store {  	newStore := store.clone()  	newStore[key] = value  	return newStore @@ -60,17 +62,17 @@ type SubexBranch struct {  	// State in this branch  	state SubexState  	// Output so far in this branch -	output []walk.Datum +	output []walk.Atom  }  // Read a single character and return all the branches resulting from this branch consuming it -func (pair SubexBranch) eat(char walk.Datum) []SubexBranch { +func (pair SubexBranch) eat(char walk.Atom) []SubexBranch {  	states := pair.state.eat(pair.store, char)  	for i := range states {  		states[i].output = walk.ConcatData(pair.output, states[i].output)  	}  	return states  } -func (pair SubexBranch) accepting() [][]walk.Datum { +func (pair SubexBranch) accepting() [][]walk.Atom {  	return pair.state.accepting(pair.store)  } @@ -94,7 +96,7 @@ func pruneStates(states []SubexBranch) (newStates []SubexBranch) {  }  // Run the subex transducer -func RunTransducer(transducer SubexState, input <-chan walk.Datum) (output []walk.Datum, err bool) { +func RunTransducer(transducer SubexState, input <-chan walk.Atom) (output []walk.Atom, err bool) {  	states := []SubexBranch{{  		state: transducer,  		output: nil, @@ -129,8 +131,8 @@ func Main() {  	program := os.Args[1]  	ast := Parse(program)  	transducer := CompileTransducer(ast) -	pieces := make(chan walk.Datum) -	go func(out chan<- walk.Datum, input []walk.WalkValue) { +	pieces := make(chan walk.Atom) +	go func(out chan<- walk.Atom, input []walk.WalkValue) {  		for _, value := range input {  			value.Pieces(out)  		} @@ -138,21 +140,21 @@ func Main() {  	}(pieces, tokens)  	output, err := RunTransducer(transducer, pieces)  	if !err { -		dataIn := make(chan walk.Datum) -		go func(out chan<- walk.Datum, in []walk.Datum) { -			for _, datum := range in { -				out<-datum +		dataIn := make(chan walk.Atom) +		go func(out chan<- walk.Atom, in []walk.Atom) { +			for _, atom := range in { +				out<-atom  			}  			close(out)  		}(dataIn, output)  		valueOut := make(chan walk.WalkValue) -		go func(out chan<- walk.WalkValue, in <-chan walk.Datum) { +		go func(out chan<- walk.WalkValue, in <-chan walk.Atom) {  			for { -				datum, hasDatum := <-in -				if !hasDatum { +				atom, hasAtom := <-in +				if !hasAtom {  					break  				} -				switch v := datum.(type) { +				switch v := atom.(type) {  					case walk.TerminalValue:  						out<-v  						continue @@ -171,22 +173,22 @@ func Main() {  						panic("Error! subex output an EndString before BeginString")  					case walk.StartString:  					default: -						panic("Unknown datum type") +						panic("Unknown atom type")  				}  				// Handle string start  				var builder strings.Builder  				loop: for { -					datum, hasDatum := <-in -					if !hasDatum { +					atom, hasAtom := <-in +					if !hasAtom {  						panic("Missing EndString")  					} -					switch v := datum.(type) { +					switch v := atom.(type) {  						case walk.EndString:  							break loop  						case rune:  							builder.WriteRune(v)  						default: -							panic("Invalid datum in string") +							panic("Invalid atom in string")  					}  				}  				out<-walk.ValueString(builder.String()) diff --git a/subex/parse.go b/subex/parse.go index 59b784d..1d64c20 100644 --- a/subex/parse.go +++ b/subex/parse.go @@ -4,7 +4,7 @@ import (  	"main/walk"  ) -func expectBracket(l *RuneReader, ifLeft walk.Datum, ifRight walk.Datum) walk.Datum { +func expectBracket(l *RuneReader, ifLeft walk.Atom, ifRight walk.Atom) walk.Atom {  	switch l.next() {  		case '(':  			return ifLeft @@ -15,8 +15,8 @@ func expectBracket(l *RuneReader, ifLeft walk.Datum, ifRight walk.Datum) walk.Da  	}  } -// Having just read termType, read in a bracket and return the corresponding walk.Datum -func parseTerminatorDatumLiteral(termType rune, l *RuneReader) walk.Datum { +// Having just read termType, read in a bracket and return the corresponding Atom +func parseTerminatorAtomLiteral(termType rune, l *RuneReader) walk.Atom {  	switch termType {  		case '@':  			return expectBracket(l, walk.ArrayBegin, walk.ArrayEnd) @@ -43,21 +43,21 @@ func parseReplacement(l *RuneReader) (output []TransducerOutput) {  				if slot == eof {  					panic("Missing slot character")  				} -				output = append(output, TransducerReplacementLoad{datum: slot}) +				output = append(output, TransducerReplacementLoad{atom: slot})  			case '@', '~', '#': -				output = append(output, TransducerReplacementRune{datum: parseTerminatorDatumLiteral(r, l)}) +				output = append(output, TransducerReplacementAtom{atom: parseTerminatorAtomLiteral(r, l)})  			default: -				output = append(output, TransducerReplacementRune{datum: r}) +				output = append(output, TransducerReplacementAtom{atom: r})  		}  	}  	return output  }  // Parse the contents of a range subex [] into a map -func parseRangeSubex(l *RuneReader) map[walk.Datum]walk.Datum { +func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom {  	// TODO escaping -	parts := make(map[walk.Datum]walk.Datum) -	var froms []walk.Datum +	parts := make(map[walk.Atom]walk.Atom) +	var froms []walk.Atom  	var hasTo bool  	for {  		fromsStart := l.next() @@ -68,9 +68,9 @@ func parseRangeSubex(l *RuneReader) map[walk.Datum]walk.Datum {  			hasTo = true  			break  		} else { -			datum := parseTerminatorDatumLiteral(fromsStart, l) -			if datum != nil { -				froms = append(froms, datum) +			atom := parseTerminatorAtomLiteral(fromsStart, l) +			if atom != nil { +				froms = append(froms, atom)  				continue  			}  		} @@ -91,16 +91,16 @@ func parseRangeSubex(l *RuneReader) map[walk.Datum]walk.Datum {  		panic("Missing from part of range expression")  	} -	var tos []walk.Datum +	var tos []walk.Atom  	if hasTo {  		for {  			tosStart := l.next()  			if tosStart == ']' {  				break  			} else { -				datum := parseTerminatorDatumLiteral(tosStart, l) -				if datum != nil { -					tos = append(tos, datum) +				atom := parseTerminatorAtomLiteral(tosStart, l) +				if atom != nil { +					tos = append(tos, atom)  					continue  				}  			} @@ -166,9 +166,9 @@ func parseSubex(l *RuneReader, minPower int) SubexAST {  		case '.':  			lhs = SubexASTCopyAny{}  		case '@', '#', '~': -			lhs = SubexASTCopyRune{datum: parseTerminatorDatumLiteral(r, l)} +			lhs = SubexASTCopyAtom{atom: parseTerminatorAtomLiteral(r, l)}  		default: -			lhs = SubexASTCopyRune{datum: r} +			lhs = SubexASTCopyAtom{atom: r}  	}  	loop: for {  		if minPower <= 0 { diff --git a/subex/subexast.go b/subex/subexast.go index 0afd3e3..c49f215 100644 --- a/subex/subexast.go +++ b/subex/subexast.go @@ -5,10 +5,12 @@ import (  	"main/walk"  ) +// A node in the AST of a subex  type SubexAST interface {  	compileWith(next SubexState) SubexState  } +// Process the first subex, then the second, splitting the input text in two  type SubexASTConcat struct {  	first, second SubexAST  } @@ -19,6 +21,7 @@ func (ast SubexASTConcat) String() string {  	return fmt.Sprintf("(%v)(%v)", ast.first, ast.second)  } +// Processing a subex and storing the output in a slot instead of outputting it  type SubexASTStore struct {  	match SubexAST  	slot rune @@ -34,6 +37,7 @@ func (ast SubexASTStore) String() string {  	return fmt.Sprintf("$%c(%v)", ast.slot, ast.match)  } +// Try to run the first subex, if it fails then backtrack and use the second  type SubexASTOr struct {  	first, second SubexAST  } @@ -44,6 +48,7 @@ func (ast SubexASTOr) compileWith(next SubexState) SubexState {  	}  } +// Run the content subex as many times as possible as the input is read in  type SubexASTMaximise struct {  	content SubexAST  } @@ -59,6 +64,7 @@ func (ast SubexASTMaximise) String() string {  	return fmt.Sprintf("(%v)*", ast.content)  } +// Run the content subex as few times as possible as the input is read in  type SubexASTMinimise struct {  	content SubexAST  } @@ -74,6 +80,7 @@ func (ast SubexASTMinimise) String() string {  	return fmt.Sprintf("(%v)-", ast.content)  } +// Run the subex as many times as possible but at least min times and at most max times  type SubexASTRepeat struct {  	content SubexAST  	min, max int @@ -91,16 +98,18 @@ func (ast SubexASTRepeat) compileWith(next SubexState) SubexState {  	return next  } -type SubexASTCopyRune struct { -	datum walk.Datum +// Read in a single specific Atom and output it unchanged +type SubexASTCopyAtom struct { +	atom walk.Atom  } -func (ast SubexASTCopyRune) compileWith(next SubexState) SubexState { -	return &SubexCopyRuneState{ -		rune: ast.datum, +func (ast SubexASTCopyAtom) compileWith(next SubexState) SubexState { +	return &SubexCopyAtomState{ +		atom: ast.atom,  		next: next,  	}  } +// Read in any single Atom and output it unchanged  type SubexASTCopyAny struct {}  func (ast SubexASTCopyAny) compileWith(next SubexState) SubexState {  	return &SubexCopyAnyState{next} @@ -109,6 +118,7 @@ func (ast SubexASTCopyAny) String() string {  	return "."  } +// Output a series of Atoms without reading anything from input  type SubexASTOutput struct {  	replacement []TransducerOutput  } @@ -119,6 +129,7 @@ func (ast SubexASTOutput) compileWith(next SubexState) SubexState {  	}  } +// Try to use a subex but just skip over this if it doesn't match  type SubexASTTry struct {  	content SubexAST  } @@ -129,6 +140,7 @@ func (ast SubexASTTry) compileWith(next SubexState) SubexState {  	}  } +// Try to skip over this subex but use it should that not match  type SubexASTMaybe struct {  	content SubexAST  } @@ -139,6 +151,7 @@ func (ast SubexASTMaybe) compileWith(next SubexState) SubexState {  	}  } +// Read in a repeated subex separated by a delimiter. Greedy  type SubexASTJoin struct {  	content, delimiter SubexAST  } @@ -155,8 +168,10 @@ func (ast SubexASTJoin) compileWith(next SubexState) SubexState {  	}  } +// Run each input Atom through a map to produce an output Atom +// Atoms not in the map cause this to not match  type SubexASTRange struct { -	parts map[walk.Datum]walk.Datum +	parts map[walk.Atom]walk.Atom  }  func (ast SubexASTRange) compileWith(next SubexState) SubexState {  	return &SubexRangeState { diff --git a/subex/subexstate.go b/subex/subexstate.go index 415714f..12ace42 100644 --- a/subex/subexstate.go +++ b/subex/subexstate.go @@ -6,21 +6,21 @@ import (  // A state of execution for the transducer  type SubexState interface { -	// Eat a datum and transition to any number of new states -	eat(store Store, char walk.Datum) []SubexBranch +	// Eat a Atom and transition to any number of new states +	eat(store Store, char walk.Atom) []SubexBranch  	// Find accepting states reachable through epsilon transitions and return their outputs -	accepting(store Store) [][]walk.Datum +	accepting(store Store) [][]walk.Atom  }  // Try first, if it fails then try second  type SubexGroupState struct {  	first, second SubexState  } -func (state SubexGroupState) eat(store Store, char walk.Datum) []SubexBranch { +func (state SubexGroupState) eat(store Store, char walk.Atom) []SubexBranch {  	otherStore := store.clone()  	return append(state.first.eat(store, char), state.second.eat(otherStore, char)...)  } -func (state SubexGroupState) accepting(store Store) [][]walk.Datum { +func (state SubexGroupState) accepting(store Store) [][]walk.Atom {  	return append(state.first.accepting(store), state.second.accepting(store)...)  } @@ -30,9 +30,9 @@ type SubexStoreState struct {  	match SubexState  	slot rune  	next SubexState -	toStore []walk.Datum +	toStore []walk.Atom  } -func (state SubexStoreState) eat(store Store, char walk.Datum) (nextStates []SubexBranch) { +func (state SubexStoreState) eat(store Store, char walk.Atom) (nextStates []SubexBranch) {  	acceptedOutputs := state.match.accepting(store)  	for _, acceptedOutput := range acceptedOutputs {  		nextStore := store.withValue(state.slot, walk.ConcatData(state.toStore, acceptedOutput)) @@ -53,7 +53,7 @@ func (state SubexStoreState) eat(store Store, char walk.Datum) (nextStates []Sub  	}  	return nextStates  } -func (state SubexStoreState) accepting(store Store) (outputs [][]walk.Datum) { +func (state SubexStoreState) accepting(store Store) (outputs [][]walk.Atom) {  	acceptedOutputs := state.match.accepting(store)  	for _, acceptedOutput := range acceptedOutputs {  		nextStore := store.withValue(state.slot, walk.ConcatData(state.toStore, acceptedOutput)) @@ -68,14 +68,14 @@ type SubexOutputState struct {  	next SubexState  }  // Given a store, return what is outputted by an epsilon transition from this state -func (state SubexOutputState) build(store Store) []walk.Datum { -	var result []walk.Datum +func (state SubexOutputState) build(store Store) []walk.Atom { +	var result []walk.Atom  	for _, part := range state.content {  		result = append(result, part.build(store)...)  	}  	return result  } -func (state SubexOutputState) eat(store Store, char walk.Datum) []SubexBranch { +func (state SubexOutputState) eat(store Store, char walk.Atom) []SubexBranch {  	content := state.build(store)  	nextStates := state.next.eat(store, char)  	for i := range nextStates { @@ -83,7 +83,7 @@ func (state SubexOutputState) eat(store Store, char walk.Datum) []SubexBranch {  	}  	return nextStates  } -func (state SubexOutputState) accepting(store Store) [][]walk.Datum { +func (state SubexOutputState) accepting(store Store) [][]walk.Atom {  	content := state.build(store)  	outputs := state.next.accepting(store)  	for i := range outputs { @@ -94,67 +94,66 @@ func (state SubexOutputState) accepting(store Store) [][]walk.Datum {  // A final state, transitions to nothing but is accepting  type SubexNoneState struct {} -func (state SubexNoneState) eat(store Store, char walk.Datum) []SubexBranch { +func (state SubexNoneState) eat(store Store, char walk.Atom) []SubexBranch {  	return nil  } -func (state SubexNoneState) accepting(store Store) [][]walk.Datum { -	return [][]walk.Datum{nil} +func (state SubexNoneState) accepting(store Store) [][]walk.Atom { +	return [][]walk.Atom{nil}  } -// Read in a specific datum and output it -// TODO rename to better reflect datum instead of rune -type SubexCopyRuneState struct { -	rune walk.Datum +// Read in a specific Atom and output it +type SubexCopyAtomState struct { +	atom walk.Atom  	next SubexState  } -func (state SubexCopyRuneState) eat(store Store, char walk.Datum) []SubexBranch { -	// TODO can I compare Datum values with == ? -	if char == state.rune { +func (state SubexCopyAtomState) eat(store Store, char walk.Atom) []SubexBranch { +	// TODO can I compare Atom values with == ? +	if char == state.atom {  		return []SubexBranch{{  			state: state.next, -			output: []walk.Datum{char}, +			output: []walk.Atom{char},  			store: store,  		}}  	}  	return nil  } -func (state SubexCopyRuneState) accepting(store Store) [][]walk.Datum { +func (state SubexCopyAtomState) accepting(store Store) [][]walk.Atom {  	return nil  } -// Read in any datum and output it +// Read in any Atom and output it  type SubexCopyAnyState struct {  	next SubexState  } -func (state SubexCopyAnyState) eat(store Store, char walk.Datum) []SubexBranch { +func (state SubexCopyAnyState) eat(store Store, char walk.Atom) []SubexBranch {  	return []SubexBranch{{  		state: state.next, -		output: []walk.Datum{char}, +		output: []walk.Atom{char},  		store: store,  	}}  } -func (state SubexCopyAnyState) accepting(store Store) [][]walk.Datum { +func (state SubexCopyAnyState) accepting(store Store) [][]walk.Atom {  	return nil  } -// Read in a datum and apply a map to generate a datum to output +// Read in an Atom and apply a map to generate an Atom to output  // If the input isn't in the map transition to nothing  type SubexRangeState struct { -	parts map[walk.Datum]walk.Datum +	parts map[walk.Atom]walk.Atom  	next SubexState  } -func (state SubexRangeState) eat(store Store, char walk.Datum) []SubexBranch { +func (state SubexRangeState) eat(store Store, char walk.Atom) []SubexBranch {  	out, exists := state.parts[char]  	if !exists {  		return nil  	} else {  		return []SubexBranch{{  			state: state.next, -			output: []walk.Datum{out}, +			output: []walk.Atom{out},  			store: store,  		}}  	}  } -func (state SubexRangeState) accepting(store Store) [][]walk.Datum { +func (state SubexRangeState) accepting(store Store) [][]walk.Atom {  	return nil  } | 
