<- Back to shtanton's homepage
aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCharlie Stanton <charlie@shtanton.xyz>2022-09-21 19:37:22 +0100
committerCharlie Stanton <charlie@shtanton.xyz>2022-09-21 19:37:22 +0100
commitb81f564164a512fc3dba155c7bd25613b201c070 (patch)
tree16e2b5b56432d26cca86c38fd0734bb963a49213
parent96812b9ea732cc7ae26efce4568c19aec0000abc (diff)
downloadstred-go-b81f564164a512fc3dba155c7bd25613b201c070.tar
Implements the first version of subex
-rw-r--r--main/main.go564
1 files changed, 564 insertions, 0 deletions
diff --git a/main/main.go b/main/main.go
index 5503fb1..7bb5152 100644
--- a/main/main.go
+++ b/main/main.go
@@ -3,6 +3,9 @@ package main
import (
"os"
"bufio"
+ "fmt"
+ "strings"
+ "unicode/utf8"
)
type PathSegment interface {}
@@ -36,7 +39,568 @@ type ProgramState struct {
program []Command
}
+//const eof rune = -1
+type RuneReader struct {
+ input string
+ pos, width int
+}
+func (l *RuneReader) next() rune {
+ if l.pos >= len(l.input) {
+ l.width = 0
+ return eof
+ }
+ var r rune
+ r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
+ l.pos += l.width
+ return r
+}
+func (l *RuneReader) accept(chars string) bool {
+ r := l.next()
+ for _, char := range chars {
+ if char == r {
+ return true
+ }
+ }
+ l.rewind()
+ return false
+}
+func (l *RuneReader) rewind() {
+ l.pos -= l.width
+}
+
+func parseReplacement(l *RuneReader) (output []TransducerOutput) {
+ loop: for {
+ r := l.next()
+ switch r {
+ case eof:
+ panic("Missing closing \"")
+ case '"':
+ break loop
+ case '$':
+ slot := l.next()
+ if slot == eof {
+ panic("Missing slot character")
+ }
+ output = append(output, TransducerReplacementLoad(slot))
+ default:
+ output = append(output, TransducerReplacementRune(r))
+ }
+ }
+ return output
+}
+
+func parseRegex(l *RuneReader, minPower int) RegexAST {
+ var lhs RegexAST
+ r := l.next()
+ switch r {
+ case eof:
+ return nil
+ case ')', '*', '-':
+ l.rewind()
+ return nil
+ case '(':
+ lhs = parseRegex(l, 0)
+ if !l.accept(")") {
+ panic("Missing matching )")
+ }
+ case '.':
+ lhs = RegexASTAny{}
+ default:
+ lhs = RegexASTRune(r)
+ }
+ loop: for {
+ if minPower <= 0 {
+ next := parseRegex(l, 1)
+ if next != nil {
+ lhs = RegexASTConcat{lhs, next}
+ continue loop
+ }
+ }
+ r := l.next()
+ switch {
+ case r == '*' && minPower <= 4:
+ lhs = RegexASTMaximise{lhs}
+ case r == '-' && minPower <= 4:
+ lhs = RegexASTMinimise{lhs}
+ default:
+ l.rewind()
+ break loop
+ }
+ }
+ return lhs
+}
+
+func parseSubex(l *RuneReader, minPower int) TransducerAST {
+ var lhs TransducerAST
+ r := l.next()
+ switch r {
+ case eof:
+ return nil
+ case '(':
+ lhs = parseSubex(l, 0)
+ if !l.accept(")") {
+ panic("Missing matching )")
+ }
+ case ')', '*', '-':
+ l.rewind()
+ return nil
+ case '$':
+ slot := l.next()
+ if slot == eof {
+ panic("Missing slot character")
+ }
+ match := parseRegex(l, 100)
+ if match == nil {
+ panic("Missing regex for store")
+ }
+ lhs = TransducerASTStore{
+ match: match,
+ slot: slot,
+ }
+ case '"':
+ replacement := parseReplacement(l)
+ lhs = TransducerASTOutput{replacement}
+ case '.':
+ lhs = TransducerASTCopyAny{}
+ default:
+ lhs = TransducerASTCopyRune(r)
+ }
+ loop: for {
+ if minPower <= 0 {
+ next := parseSubex(l, 1)
+ if next != nil {
+ lhs = TransducerASTConcat{lhs, next}
+ continue loop
+ }
+ }
+ r := l.next()
+ switch {
+ case r == '*' && minPower <= 4:
+ lhs = TransducerASTMaximise{lhs}
+ case r == '-' && minPower <= 4:
+ lhs = TransducerASTMinimise{lhs}
+ default:
+ l.rewind()
+ break loop
+ }
+ }
+ return lhs
+}
+
+func parse(input string) TransducerAST {
+ l := RuneReader {
+ input: input,
+ pos: 0,
+ width: 0,
+ }
+ return parseSubex(&l, 0)
+}
+
+type TransducerOutput interface {
+ build(Store) string
+}
+
+type TransducerReplacementRune rune
+func (replacement TransducerReplacementRune) build(store Store) string {
+ return string(replacement)
+}
+
+type TransducerReplacementLoad rune
+func (replacement TransducerReplacementLoad) build(store Store) string {
+ return store[rune(replacement)]
+}
+
+type RegexState interface {
+ eat(char rune) []RegexState
+ accepting() bool
+}
+
+type RegexNoneState struct {}
+func (state RegexNoneState) eat(char rune) []RegexState {
+ return nil
+}
+func (state RegexNoneState) accepting() bool {
+ return true
+}
+
+type RegexAnyState struct {
+ next RegexState
+}
+func (state RegexAnyState) eat(char rune) []RegexState {
+ return []RegexState{state.next}
+}
+func (state RegexAnyState) accepting() bool {
+ return false
+}
+
+type RegexRuneState struct {
+ rune rune
+ next RegexState
+}
+func (state RegexRuneState) eat(char rune) []RegexState {
+ if char == state.rune {
+ return []RegexState{state.next}
+ }
+ return nil
+}
+func (state RegexRuneState) accepting() bool {
+ return false
+}
+
+type RegexGroupState struct {
+ first, second RegexState
+}
+func (state RegexGroupState) eat(char rune) []RegexState {
+ return append(state.first.eat(char), state.second.eat(char)...)
+}
+func (state RegexGroupState) accepting() bool {
+ return state.first.accepting() || state.second.accepting()
+}
+
+type RegexAST interface {
+ compileWith(next RegexState) RegexState
+}
+
+type RegexASTRune rune
+func (ast RegexASTRune) compileWith(next RegexState) RegexState {
+ return RegexRuneState{
+ rune: rune(ast),
+ next: next,
+ }
+}
+func (ast RegexASTRune) String() string {
+ return string(rune(ast))
+}
+
+type RegexASTAny struct {}
+func (ast RegexASTAny) compileWith(next RegexState) RegexState {
+ return RegexAnyState{next}
+}
+func (ast RegexASTAny) String() string {
+ return "."
+}
+
+type RegexASTConcat struct {
+ first, second RegexAST
+}
+func (ast RegexASTConcat) compileWith(next RegexState) RegexState {
+ return ast.first.compileWith(ast.second.compileWith(next))
+}
+func (ast RegexASTConcat) String() string {
+ return fmt.Sprintf("Concat{%v, %v}", ast.first, ast.second)
+}
+
+type RegexASTMaximise struct {
+ content RegexAST
+}
+func (ast RegexASTMaximise) compileWith(next RegexState) RegexState {
+ state := &RegexGroupState{
+ nil,
+ next,
+ }
+ state.first = ast.content.compileWith(state)
+ return state
+}
+
+type RegexASTMinimise struct {
+ content RegexAST
+}
+func (ast RegexASTMinimise) compileWith(next RegexState) RegexState {
+ state := &RegexGroupState{
+ next,
+ nil,
+ }
+ state.second = ast.content.compileWith(state)
+ return state
+}
+
+type Store map[rune]string
+func (store Store) clone() Store {
+ newStore := make(Store)
+ for key, val := range store {
+ newStore[key] = val
+ }
+ return newStore
+}
+
+type TransducerState interface {
+ eat(store Store, char rune) []TransducerStatePair
+ accepting(store Store) []string
+}
+
+type TransducerGroupState struct {
+ first, second TransducerState
+}
+func (state TransducerGroupState) eat(store Store, char rune) []TransducerStatePair {
+ otherStore := store.clone()
+ return append(state.first.eat(store, char), state.second.eat(otherStore, char)...)
+}
+func (state TransducerGroupState) accepting(store Store) []string {
+ return append(state.first.accepting(store), state.second.accepting(store)...)
+}
+
+type TransducerStoreState struct {
+ match RegexState
+ slot rune
+ next TransducerState
+ input string
+}
+func (state TransducerStoreState) eat(store Store, char rune) []TransducerStatePair {
+ var nextStates []TransducerStatePair
+ if state.match.accepting() {
+ store[state.slot] = state.input
+ nextStates = state.next.eat(store, char)
+ }
+ nextRegexStates := state.match.eat(char)
+ for _, regexState := range nextRegexStates {
+ nextStates = append(nextStates, TransducerStatePair {
+ state: TransducerStoreState {
+ match: regexState,
+ slot: state.slot,
+ next: state.next,
+ input: state.input + string(char),
+ },
+ output: "",
+ store: store.clone(),
+ })
+ }
+ return nextStates
+}
+func (state TransducerStoreState) accepting(store Store) []string {
+ if state.match.accepting() {
+ return state.next.accepting(store)
+ }
+ return nil
+}
+
+type TransducerOutputState struct {
+ content []TransducerOutput
+ next TransducerState
+}
+func (state TransducerOutputState) build(store Store) string {
+ var builder strings.Builder
+ for _, part := range state.content {
+ builder.WriteString(part.build(store))
+ }
+ return builder.String()
+}
+func (state TransducerOutputState) eat(store Store, char rune) []TransducerStatePair {
+ content := state.build(store)
+ nextStates := state.next.eat(store, char)
+ for i := range nextStates {
+ nextStates[i].output = content + nextStates[i].output
+ }
+ return nextStates
+}
+func (state TransducerOutputState) accepting(store Store) []string {
+ content := state.build(store)
+ outputs := state.next.accepting(store)
+ for i := range outputs {
+ outputs[i] = content + outputs[i]
+ }
+ return outputs
+}
+
+type TransducerNoneState struct {}
+func (state TransducerNoneState) eat(store Store, char rune) []TransducerStatePair {
+ return nil
+}
+func (state TransducerNoneState) accepting(store Store) []string {
+ return []string{""}
+}
+
+type TransducerCopyRuneState struct {
+ rune rune
+ next TransducerState
+}
+func (state TransducerCopyRuneState) eat(store Store, char rune) []TransducerStatePair {
+ if char == state.rune {
+ return []TransducerStatePair{{
+ state: state.next,
+ output: string(char),
+ store: store,
+ }}
+ }
+ return nil
+}
+func (state TransducerCopyRuneState) accepting(store Store) []string {
+ return nil
+}
+
+type TransducerCopyAnyState struct {
+ next TransducerState
+}
+func (state TransducerCopyAnyState) eat(store Store, char rune) []TransducerStatePair {
+ return []TransducerStatePair{{
+ state: state.next,
+ output: string(char),
+ store: store,
+ }}
+}
+func (state TransducerCopyAnyState) accepting(store Store) []string {
+ return nil
+}
+
+type TransducerAST interface {
+ compileWithNext(next TransducerState) TransducerState
+}
+
+type TransducerASTConcat struct {
+ first, second TransducerAST
+}
+func (ast TransducerASTConcat) compileWithNext(next TransducerState) TransducerState {
+ return ast.first.compileWithNext(ast.second.compileWithNext(next))
+}
+func (ast TransducerASTConcat) String() string {
+ return fmt.Sprintf("(%v)(%v)", ast.first, ast.second)
+}
+
+type TransducerASTStore struct {
+ match RegexAST
+ slot rune
+}
+func (ast TransducerASTStore) compileWithNext(next TransducerState) TransducerState {
+ return TransducerStoreState {
+ match: ast.match.compileWith(RegexNoneState{}),
+ slot: ast.slot,
+ next: next,
+ }
+}
+func (ast TransducerASTStore) String() string {
+ return fmt.Sprintf("$%c(%v)", ast.slot, ast.match)
+}
+
+type TransducerASTMaximise struct {
+ content TransducerAST
+}
+func (ast TransducerASTMaximise) compileWithNext(next TransducerState) TransducerState {
+ state := &TransducerGroupState {
+ nil,
+ next,
+ }
+ state.first = ast.content.compileWithNext(state)
+ return state
+}
+func (ast TransducerASTMaximise) String() string {
+ return fmt.Sprintf("(%v)*", ast.content)
+}
+
+type TransducerASTMinimise struct {
+ content TransducerAST
+}
+func (ast TransducerASTMinimise) compileWithNext(next TransducerState) TransducerState {
+ state := &TransducerGroupState {
+ next,
+ nil,
+ }
+ state.second = ast.content.compileWithNext(state)
+ return state
+}
+func (ast TransducerASTMinimise) String() string {
+ return fmt.Sprintf("(%v)-", ast.content)
+}
+
+type TransducerASTRepeat struct {
+ content TransducerAST
+ min, max int
+}
+func (ast TransducerASTRepeat) compileWithNext(next TransducerState) TransducerState {
+ for i := ast.min; i < ast.max; i += 1 {
+ next = TransducerGroupState {
+ ast.content.compileWithNext(next),
+ next,
+ }
+ }
+ for i := 0; i < ast.min; i += 1 {
+ next = ast.content.compileWithNext(next)
+ }
+ return next
+}
+
+type TransducerASTCopyRune rune
+func (ast TransducerASTCopyRune) compileWithNext(next TransducerState) TransducerState {
+ return TransducerCopyRuneState{
+ rune: rune(ast),
+ next: next,
+ }
+}
+
+type TransducerASTCopyAny struct {}
+func (ast TransducerASTCopyAny) compileWithNext(next TransducerState) TransducerState {
+ return TransducerCopyAnyState{next}
+}
+func (ast TransducerASTCopyAny) String() string {
+ return "."
+}
+
+type TransducerASTOutput struct {
+ replacement []TransducerOutput
+}
+func (ast TransducerASTOutput) compileWithNext(next TransducerState) TransducerState {
+ return TransducerOutputState{
+ content: ast.replacement,
+ next: next,
+ }
+}
+
+func compileTransducer(transducerAst TransducerAST) TransducerState {
+ return transducerAst.compileWithNext(TransducerNoneState{})
+}
+
+type TransducerStatePair struct {
+ store Store
+ state TransducerState
+ output string
+}
+func (pair TransducerStatePair) eat(char rune) []TransducerStatePair {
+ states := pair.state.eat(pair.store, char)
+ for i := range states {
+ states[i].output = pair.output + states[i].output
+ }
+ return states
+}
+func (pair TransducerStatePair) accepting() []string {
+ return pair.state.accepting(pair.store)
+}
+
+func runTransducer(transducer TransducerState, input string) (output string, err bool) {
+ states := []TransducerStatePair{{
+ state: transducer,
+ output: "",
+ store: make(Store),
+ }}
+ for _, char := range input {
+ var newStates []TransducerStatePair
+ for _, state := range states {
+ newStates = append(newStates, state.eat(char)...)
+ }
+ states = newStates
+ }
+ for _, state := range states {
+ outputEnds := state.accepting()
+ for _, outputEnd := range outputEnds {
+ return state.output + outputEnd, false
+ }
+ }
+ return "", true
+}
+
func main() {
+ if len(os.Args) != 3 {
+ panic("Expected: program [input] [subex]")
+ }
+ input := os.Args[1]
+ program := os.Args[2]
+ ast := parse(program)
+ transducer := compileTransducer(ast)
+ output, err := runTransducer(transducer, input)
+ if err {
+ output = input
+ }
+ fmt.Println(output)
+}
+
+func mainISH() {
quiet := false
var input string
hasInput := false