From fba426b3910f16c8abc6f819da3138f03e5f0b1a Mon Sep 17 00:00:00 2001
From: Charlie Stanton <charlie@shtanton.xyz>
Date: Sun, 19 Feb 2023 08:59:16 +0000
Subject: Introduces subex processing

Doesn't integrate it at all yet
---
 main/main.go        |   9 +--
 subex/lex.go        |  34 ++++++++++
 subex/main.go       | 114 ++++++++++++++++++++++++++++++++++
 subex/parse.go      | 175 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 subex/subexast.go   | 163 ++++++++++++++++++++++++++++++++++++++++++++++++
 subex/subexstate.go | 145 +++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 636 insertions(+), 4 deletions(-)
 create mode 100644 subex/lex.go
 create mode 100644 subex/main.go
 create mode 100644 subex/parse.go
 create mode 100644 subex/subexast.go
 create mode 100644 subex/subexstate.go

diff --git a/main/main.go b/main/main.go
index 08514ab..3a701ed 100644
--- a/main/main.go
+++ b/main/main.go
@@ -6,6 +6,7 @@ import (
 	"fmt"
 	"strings"
 	"unicode/utf8"
+	"main/subex"
 )
 
 type PathSegment interface {}
@@ -619,13 +620,13 @@ func runTransducer(transducer TransducerState, input string) (output string, err
 
 func main() {
 	if len(os.Args) != 3 {
-		panic("Expected: program [input] [subex]")
+		panic("Expected: stred [input] [subex]")
 	}
 	input := os.Args[1]
 	program := os.Args[2]
-	ast := parse(program)
-	transducer := compileTransducer(ast)
-	output, err := runTransducer(transducer, input)
+	ast := subex.Parse(program)
+	transducer := subex.CompileTransducer(ast)
+	output, err := subex.RunTransducer(transducer, input)
 	if err {
 		output = input
 	}
diff --git a/subex/lex.go b/subex/lex.go
new file mode 100644
index 0000000..f020b23
--- /dev/null
+++ b/subex/lex.go
@@ -0,0 +1,34 @@
+package subex
+
+import (
+	"unicode/utf8"
+)
+
+const eof rune = -1
+type RuneReader struct {
+	input string
+	pos, width int
+}
+func (l *RuneReader) next() rune {
+	if l.pos >= len(l.input) {
+		l.width = 0
+		return eof
+	}
+	var r rune
+	r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
+	l.pos += l.width
+	return r
+}
+func (l *RuneReader) accept(chars string) bool {
+	r := l.next()
+	for _, char := range chars {
+		if char == r {
+			return true
+		}
+	}
+	l.rewind()
+	return false
+}
+func (l *RuneReader) rewind() {
+	l.pos -= l.width
+}
diff --git a/subex/main.go b/subex/main.go
new file mode 100644
index 0000000..3ae0618
--- /dev/null
+++ b/subex/main.go
@@ -0,0 +1,114 @@
+package subex
+
+import (
+	"os"
+	"fmt"
+	"io"
+)
+
+type TransducerOutput interface {
+	build(Store) string
+}
+
+type TransducerReplacementRune rune
+func (replacement TransducerReplacementRune) build(store Store) string {
+	return string(replacement)
+}
+
+type TransducerReplacementLoad rune
+func (replacement TransducerReplacementLoad) build(store Store) string {
+	return store[rune(replacement)]
+}
+
+type Store map[rune]string
+func (store Store) clone() Store {
+	newStore := make(Store)
+	for key, val := range store {
+		newStore[key] = val
+	}
+	return newStore
+}
+func (store Store) withValue(key rune, value string) Store {
+	newStore := store.clone()
+	newStore[key] = value
+	return newStore
+}
+
+func CompileTransducer(transducerAst SubexAST) SubexState {
+	return transducerAst.compileWith(SubexNoneState{})
+}
+
+type SubexBranch struct {
+	store Store
+	state SubexState
+	output string
+}
+func (pair SubexBranch) eat(char rune) []SubexBranch {
+	states := pair.state.eat(pair.store, char)
+	for i := range states {
+		states[i].output = pair.output + states[i].output
+	}
+	return states
+}
+func (pair SubexBranch) accepting() []string {
+	return pair.state.accepting(pair.store)
+}
+
+func equalStates(left SubexBranch, right SubexBranch) bool {
+	// Only care about if they are the same pointer
+	return left.state == right.state
+}
+
+func pruneStates(states []SubexBranch) (newStates []SubexBranch) {
+	outer: for _, state := range states {
+		for _, newState := range newStates {
+			if equalStates(state, newState) {
+				continue outer
+			}
+		}
+		newStates = append(newStates, state)
+	}
+	return newStates
+}
+
+func RunTransducer(transducer SubexState, input string) (output string, err bool) {
+	states := []SubexBranch{{
+		state: transducer,
+		output: "",
+		store: make(Store),
+	}}
+	for _, char := range input {
+		fmt.Printf("Running with %d states\n", len(states))
+		var newStates []SubexBranch
+		for _, state := range states {
+			newStates = append(newStates, state.eat(char)...)
+		}
+		states = pruneStates(newStates)
+	}
+	for _, state := range states {
+		outputEnds := state.accepting()
+		for _, outputEnd := range outputEnds {
+			return state.output + outputEnd, false
+		}
+	}
+	return "", true
+}
+
+func Main() {
+	if len(os.Args) != 2 {
+		panic("Expected: program [subex]")
+	}
+	inputBytes, inputErr := io.ReadAll(os.Stdin)
+	input := string(inputBytes)
+	if inputErr != nil {
+		fmt.Println("Error reading")
+	}
+	program := os.Args[1]
+	ast := Parse(program)
+	transducer := CompileTransducer(ast)
+	output, err := RunTransducer(transducer, input)
+	if err {
+		output = input
+	}
+	fmt.Print(output)
+}
diff --git a/subex/parse.go b/subex/parse.go
new file mode 100644
index 0000000..af575eb
--- /dev/null
+++ b/subex/parse.go
@@ -0,0 +1,175 @@
+package subex
+
+func parseReplacement(l *RuneReader) (output []TransducerOutput) {
+	loop: for {
+		r := l.next()
+		switch r {
+			case eof:
+				panic("Missing closing \"")
+			case '"':
+				break loop
+			case '$':
+				slot := l.next()
+				if slot == eof {
+					panic("Missing slot character")
+				}
+				output = append(output, TransducerReplacementLoad(slot))
+			default:
+				output = append(output, TransducerReplacementRune(r))
+		}
+	}
+	return output
+}
+
+func parseRangeSubex(l *RuneReader) map[rune]rune {
+	parts := make(map[rune]rune)
+	var froms []rune
+	var hasTo bool
+	for {
+		fromsStart := l.next()
+		if fromsStart == ']' {
+			hasTo = false
+			break
+		} else if fromsStart == '=' {
+			hasTo = true
+			break
+		}
+		var fromsEnd rune
+		if l.accept("-") {
+			fromsEnd = l.next()
+			if fromsEnd == ']' || fromsEnd == '=' {
+				l.rewind()
+				fromsEnd = fromsStart
+			}
+		} else {
+			fromsEnd = fromsStart
+		}
+		for i := fromsStart; i <= fromsEnd; i += 1 {
+			froms = append(froms, i)
+		}
+	}
+	if len(froms) == 0 {
+		panic("Missing from part of range expression")
+	}
+
+	var tos []rune
+	if hasTo {
+		for {
+			tosStart := l.next()
+			if tosStart == ']' {
+				break
+			}
+			var tosEnd rune
+			if l.accept("-") {
+				tosEnd = l.next()
+				if tosEnd == ']' {
+					l.rewind()
+					tosEnd = tosStart
+				}
+			} else {
+				tosEnd = tosStart
+			}
+			for i := tosStart; i <= tosEnd; i += 1 {
+				tos = append(tos, i)
+			}
+		}
+	} else {
+		tos = froms
+	}
+	if len(tos) == 0 {
+		panic("Missing to part of range expression")
+	}
+	
+	for i, from := range froms {
+		parts[from] = tos[i % len(tos)]
+	}
+	return parts
+}
+
+func parseSubex(l *RuneReader, minPower int) SubexAST {
+	var lhs SubexAST
+	r := l.next()
+	switch r {
+		case eof:
+			return nil
+		case '(':
+			lhs = parseSubex(l, 0)
+			if !l.accept(")") {
+				panic("Missing matching )")
+			}
+		case '[':
+			rangeParts := parseRangeSubex(l)
+			lhs = SubexASTRange {rangeParts}
+		case ')', '*', '-', '|', '!', '?', ';':
+			l.rewind()
+			return nil
+		case '$':
+			slot := l.next()
+			if slot == eof {
+				panic("Missing slot character")
+			}
+			match := parseSubex(l, 100)
+			if match == nil {
+				panic("Missing regex for store")
+			}
+			lhs = SubexASTStore{
+				match: match,
+				slot: slot,
+			}
+		case '"':
+			replacement := parseReplacement(l)
+			lhs = SubexASTOutput{replacement}
+		case '.':
+			lhs = SubexASTCopyAny{}
+		default:
+			lhs = SubexASTCopyRune(r)
+	}
+	loop: for {
+		if minPower <= 0 {
+			next := parseSubex(l, 1)
+			if next != nil {
+				lhs = SubexASTConcat{lhs, next}
+				continue loop
+			}
+		}
+		r := l.next()
+		switch {
+			case r == '*' && minPower <= 8:
+				lhs = SubexASTMaximise{lhs}
+			case r == '-' && minPower <= 8:
+				lhs = SubexASTMinimise{lhs}
+			case r == '!' && minPower <= 8:
+				lhs = SubexASTTry{lhs}
+			case r == '?' && minPower <= 8:
+				lhs = SubexASTMaybe{lhs}
+			case r == '|' && minPower <= 4:
+				rhs := parseSubex(l, 5)
+				if rhs == nil {
+					panic("Missing subex after |")
+				}
+				lhs = SubexASTOr{lhs, rhs}
+			case r == ';' && minPower <= 2:
+				rhs := parseSubex(l, 3)
+				if rhs == nil {
+					panic("Missing subex after ;")
+				}
+				lhs = SubexASTJoin{
+					content: lhs,
+					delimiter: rhs,
+				}
+			default:
+				l.rewind()
+				break loop
+		}
+	}
+	return lhs
+}
+
+func Parse(input string) SubexAST {
+	l := RuneReader {
+		input: input,
+		pos: 0,
+		width: 0,
+	}
+	return parseSubex(&l, 0)
+}
diff --git a/subex/subexast.go b/subex/subexast.go
new file mode 100644
index 0000000..c583245
--- /dev/null
+++ b/subex/subexast.go
@@ -0,0 +1,163 @@
+package subex
+
+import (
+	"fmt"
+)
+
+type SubexAST interface {
+	compileWith(next SubexState) SubexState
+}
+
+type SubexASTConcat struct {
+	first, second SubexAST
+}
+func (ast SubexASTConcat) compileWith(next SubexState) SubexState {
+	return ast.first.compileWith(ast.second.compileWith(next))
+}
+func (ast SubexASTConcat) String() string {
+	return fmt.Sprintf("(%v)(%v)", ast.first, ast.second)
+}
+
+type SubexASTStore struct {
+	match SubexAST
+	slot rune
+}
+func (ast SubexASTStore) compileWith(next SubexState) SubexState {
+	return &SubexStoreState {
+		match: ast.match.compileWith(&SubexNoneState{}),
+		slot: ast.slot,
+		next: next,
+	}
+}
+func (ast SubexASTStore) String() string {
+	return fmt.Sprintf("$%c(%v)", ast.slot, ast.match)
+}
+
+type SubexASTOr struct {
+	first, second SubexAST
+}
+func (ast SubexASTOr) compileWith(next SubexState) SubexState {
+	return &SubexGroupState {
+		ast.first.compileWith(next),
+		ast.second.compileWith(next),
+	}
+}
+
+type SubexASTMaximise struct {
+	content SubexAST
+}
+func (ast SubexASTMaximise) compileWith(next SubexState) SubexState {
+	state := &SubexGroupState {
+		nil,
+		next,
+	}
+	state.first = ast.content.compileWith(state)
+	return state
+}
+func (ast SubexASTMaximise) String() string {
+	return fmt.Sprintf("(%v)*", ast.content)
+}
+
+type SubexASTMinimise struct {
+	content SubexAST
+}
+func (ast SubexASTMinimise) compileWith(next SubexState) SubexState {
+	state := &SubexGroupState {
+		next,
+		nil,
+	}
+	state.second = ast.content.compileWith(state)
+	return state
+}
+func (ast SubexASTMinimise) String() string {
+	return fmt.Sprintf("(%v)-", ast.content)
+}
+
+type SubexASTRepeat struct {
+	content SubexAST
+	min, max int
+}
+func (ast SubexASTRepeat) compileWith(next SubexState) SubexState {
+	for i := ast.min; i < ast.max; i += 1 {
+		next = &SubexGroupState {
+			ast.content.compileWith(next),
+			next,
+		}
+	}
+	for i := 0; i < ast.min; i += 1 {
+		next = ast.content.compileWith(next)
+	}
+	return next
+}
+
+type SubexASTCopyRune rune
+func (ast SubexASTCopyRune) compileWith(next SubexState) SubexState {
+	return &SubexCopyRuneState{
+		rune: rune(ast),
+		next: next,
+	}
+}
+
+type SubexASTCopyAny struct {}
+func (ast SubexASTCopyAny) compileWith(next SubexState) SubexState {
+	return &SubexCopyAnyState{next}
+}
+func (ast SubexASTCopyAny) String() string {
+	return "."
+}
+
+type SubexASTOutput struct {
+	replacement []TransducerOutput
+}
+func (ast SubexASTOutput) compileWith(next SubexState) SubexState {
+	return &SubexOutputState{
+		content: ast.replacement,
+		next: next,
+	}
+}
+
+type SubexASTTry struct {
+	content SubexAST
+}
+func (ast SubexASTTry) compileWith(next SubexState) SubexState {
+	return &SubexGroupState {
+		ast.content.compileWith(next),
+		next,
+	}
+}
+
+type SubexASTMaybe struct {
+	content SubexAST
+}
+func (ast SubexASTMaybe) compileWith(next SubexState) SubexState {
+	return &SubexGroupState {
+		next,
+		ast.content.compileWith(next),
+	}
+}
+
+type SubexASTJoin struct {
+	content, delimiter SubexAST
+}
+func (ast SubexASTJoin) compileWith(next SubexState) SubexState {
+	afterContentState := &SubexGroupState {
+		nil,
+		next,
+	}
+	manyContentsState := ast.content.compileWith(afterContentState)
+	afterContentState.first = ast.delimiter.compileWith(manyContentsState)
+	return &SubexGroupState {
+		manyContentsState,
+		next,
+	}
+}
+
+type SubexASTRange struct {
+	parts map[rune]rune
+}
+func (ast SubexASTRange) compileWith(next SubexState) SubexState {
+	return &SubexRangeState {
+		parts: ast.parts,
+		next: next,
+	}
+}
diff --git a/subex/subexstate.go b/subex/subexstate.go
new file mode 100644
index 0000000..2e613e8
--- /dev/null
+++ b/subex/subexstate.go
@@ -0,0 +1,145 @@
+package subex
+
+import (
+	"strings"
+)
+
+type SubexState interface {
+	eat(store Store, char rune) []SubexBranch
+	accepting(store Store) []string
+}
+
+type SubexGroupState struct {
+	first, second SubexState
+}
+func (state SubexGroupState) eat(store Store, char rune) []SubexBranch {
+	otherStore := store.clone()
+	return append(state.first.eat(store, char), state.second.eat(otherStore, char)...)
+}
+func (state SubexGroupState) accepting(store Store) []string {
+	return append(state.first.accepting(store), state.second.accepting(store)...)
+}
+
+type SubexStoreState struct {
+	match SubexState
+	slot rune
+	next SubexState
+	toStore string
+}
+func (state SubexStoreState) eat(store Store, char rune) (nextStates []SubexBranch) {
+	acceptedOutputs := state.match.accepting(store)
+	for _, acceptedOutput := range acceptedOutputs {
+		nextStore := store.withValue(state.slot, state.toStore + acceptedOutput)
+		nextStates = append(nextStates, state.next.eat(nextStore.clone(), char)...)
+	}
+	nextMatchStates := state.match.eat(store.clone(), char)
+	for _, matchState := range nextMatchStates {
+		nextStates = append(nextStates, SubexBranch {
+			state: SubexStoreState {
+				match: matchState.state,
+				slot: state.slot,
+				next: state.next,
+				toStore: state.toStore + matchState.output,
+			},
+			output: "",
+			store: store.clone(),
+		})
+	}
+	return nextStates
+}
+func (state SubexStoreState) accepting(store Store) (outputs []string) {
+	acceptedOutputs := state.match.accepting(store)
+	for _, acceptedOutput := range acceptedOutputs {
+		nextStore := store.withValue(state.slot, state.toStore + acceptedOutput)
+		outputs = append(outputs, state.next.accepting(nextStore)...)
+	}
+	return outputs
+}
+
+type SubexOutputState struct {
+	content []TransducerOutput
+	next SubexState
+}
+func (state SubexOutputState) build(store Store) string {
+	var builder strings.Builder
+	for _, part := range state.content {
+		builder.WriteString(part.build(store))
+	}
+	return builder.String()
+}
+func (state SubexOutputState) eat(store Store, char rune) []SubexBranch {
+	content := state.build(store)
+	nextStates := state.next.eat(store, char)
+	for i := range nextStates {
+		nextStates[i].output = content + nextStates[i].output
+	}
+	return nextStates
+}
+func (state SubexOutputState) accepting(store Store) []string {
+	content := state.build(store)
+	outputs := state.next.accepting(store)
+	for i := range outputs {
+		outputs[i] = content + outputs[i]
+	}
+	return outputs
+}
+
+type SubexNoneState struct {}
+func (state SubexNoneState) eat(store Store, char rune) []SubexBranch {
+	return nil
+}
+func (state SubexNoneState) accepting(store Store) []string {
+	return []string{""}
+}
+
+type SubexCopyRuneState struct {
+	rune rune
+	next SubexState
+}
+func (state SubexCopyRuneState) eat(store Store, char rune) []SubexBranch {
+	if char == state.rune {
+		return []SubexBranch{{
+			state: state.next,
+			output: string(char),
+			store: store,
+		}}
+	}
+	return nil
+}
+func (state SubexCopyRuneState) accepting(store Store) []string {
+	return nil
+}
+
+type SubexCopyAnyState struct {
+	next SubexState
+}
+func (state SubexCopyAnyState) eat(store Store, char rune) []SubexBranch {
+	return []SubexBranch{{
+		state: state.next,
+		output: string(char),
+		store: store,
+	}}
+}
+func (state SubexCopyAnyState) accepting(store Store) []string {
+	return nil
+}
+
+type SubexRangeState struct {
+	parts map[rune]rune
+	next SubexState
+}
+func (state SubexRangeState) eat(store Store, char rune) []SubexBranch {
+	out, exists := state.parts[char]
+	if !exists {
+		return nil
+	} else {
+		return []SubexBranch{{
+			state: state.next,
+			output: string(out),
+			store: store,
+		}}
+	}
+}
+func (state SubexRangeState) accepting(store Store) []string {
+	return nil
+}
-- 
cgit v1.2.3