From 9d82785f46949151b783d83648b39ce9ba40c615 Mon Sep 17 00:00:00 2001 From: Charlie Stanton Date: Sat, 30 Mar 2024 09:42:00 +0000 Subject: Add none structures and allow mismatched destructuring --- subex/main.go | 1 - subex/main_test.go | 56 ++++++++++++++++- subex/parse.go | 175 ++++++++++++++++++++++++++++++++++++++++------------- subex/subexast.go | 145 +++++++++++++++++++++++++++----------------- 4 files changed, 278 insertions(+), 99 deletions(-) (limited to 'subex') diff --git a/subex/main.go b/subex/main.go index 86a8d41..f8d9093 100644 --- a/subex/main.go +++ b/subex/main.go @@ -276,7 +276,6 @@ func processInput(states []SubexEatBranch, input walk.Edible, nesting int) []Sub newStates := make([]SubexEatBranch, 0, 2) for _, state := range states { - // TODO: What if nesting is changed by an epsilon state? if state.aux.nesting == nesting { newStates = addStates(newStates, state.eat(input)) } else if state.aux.nesting < nesting { diff --git a/subex/main_test.go b/subex/main_test.go index 78a62c4..d7424b3 100644 --- a/subex/main_test.go +++ b/subex/main_test.go @@ -60,6 +60,15 @@ func TestSubexMain(t *testing.T) { }}, }, }, + { + subex: `~(.)~`, + input: []walk.Value { + walk.StringValue("a"), + }, + expected: []walk.Value { + walk.StringValue("a"), + }, + }, { subex: `~(.$_(.{-0}))~`, input: []walk.Value { @@ -182,9 +191,54 @@ func TestSubexMain(t *testing.T) { }, }, }, + { + subex: "-(`0`.)@", + input: []walk.Value { + walk.NumberValue(4), + }, + expected: []walk.Value { + walk.ArrayValue { + { + Index: 0, + Value: walk.NumberValue(4), + }, + }, + }, + }, + { + subex: `@(.$_~(.{-0})-{-0})~`, + input: []walk.Value { + walk.ArrayValue { + { + Index: 0, + Value: walk.StringValue("ab"), + }, + { + Index: 1, + Value: walk.StringValue("cd"), + }, + { + Index: 2, + Value: walk.StringValue("efg"), + }, + { + Index: 3, + Value: walk.StringValue(""), + }, + { + Index: 4, + Value: walk.StringValue("hijklm"), + }, + }, + }, + expected: []walk.Value { + walk.StringValue("abcdefghijklm"), + }, + }, } - for _, test := range tests { + for i, test := range tests { + t.Logf("Running test: %d", i) lexer := NewStringRuneReader(test.subex) ast := Parse(lexer) transducer := CompileTransducer(ast) diff --git a/subex/parse.go b/subex/parse.go index 9602a4b..f1565f5 100644 --- a/subex/parse.go +++ b/subex/parse.go @@ -8,10 +8,44 @@ import ( type Type int const ( - ValueType Type = iota + AnyType Type = iota + ValueType RuneType ) +func resolveTypes(t1 Type, t2 Type) Type { + if t1 == AnyType { + return t2 + } + + if t2 == AnyType { + return t1 + } + + if t1 == t2 { + return t1 + } + + panic("Types don't match in parser") +} + +type Structure int +const ( + NoneStructure Structure = iota + StringStructure + ArrayStructure +) +func (s Structure) innerType() Type { + switch s { + case StringStructure: + return RuneType + case ArrayStructure: + return ValueType + default: + panic("Invalid structure") + } +} + type RuneReader interface { Next() rune Rewind() @@ -270,48 +304,94 @@ func parseRuneReplacement(l RuneReader) (output []OutputRuneAST) { // return parts // } -func parseSubex(l RuneReader, minPower int, inType Type, outType Type) SubexAST { - var lhs SubexAST +func parseDestructure(l RuneReader, destructure Structure, inType Type) (lhs SubexAST, outType Type) { + if !accept(l, "(") { + panic("Missing ( after destructure start") + } + + var innerInType Type + var expectedInType Type + switch destructure { + case NoneStructure: + innerInType = inType + expectedInType = inType + case StringStructure: + innerInType = RuneType + expectedInType = ValueType + case ArrayStructure: + innerInType = ValueType + expectedInType = ValueType + default: + panic("Invalid structure") + } + + resolveTypes(inType, expectedInType) + + lhs, innerOutType := parseSubex(l, 0, innerInType) + if !accept(l, ")") { + panic("Missing matching )") + } + + var structure Structure + var expectedInnerOutType Type + r := l.Next() + switch r { + case '-': + structure = NoneStructure + expectedInnerOutType = innerOutType + case '~': + structure = StringStructure + expectedInnerOutType = RuneType + case '@': + structure = ArrayStructure + expectedInnerOutType = ValueType + default: + panic("Missing matching destructure") + } + + innerOutType = resolveTypes(innerOutType, expectedInnerOutType) + + switch structure { + case NoneStructure: + outType = innerOutType + case StringStructure: + outType = ValueType + case ArrayStructure: + outType = ValueType + } + + lhs = SubexASTDestructure { + Destructure: destructure, + Structure: structure, + Content: lhs, + } + + return lhs, outType +} + +func parseSubex(l RuneReader, minPower int, inType Type) (lhs SubexAST, outType Type) { r := l.Next() switch r { case eof: - return nil + return nil, inType case '(': - lhs = parseSubex(l, 0, inType, outType) + lhs, outType = parseSubex(l, 0, inType) if !accept(l, ")") { panic("Missing matching )") } + case '-': + lhs, outType = parseDestructure(l, NoneStructure, inType) case '~': - if !accept(l, "(") { - panic("Missing ( after ~") - } - lhs = parseSubex(l, 0, RuneType, RuneType) - if !accept(l, ")") { - panic("Missing matching )") - } - if !accept(l, "~") { - panic("Missing matching ~") - } - lhs = SubexASTEnterString {lhs} + lhs, outType = parseDestructure(l, StringStructure, inType) case '@': - if !accept(l, "(") { - panic("Missing ( after @") - } - lhs = parseSubex(l, 0, ValueType, ValueType) - if !accept(l, ")") { - panic("Missing matching )") - } - if !accept(l, "@") { - panic("Missing matching ~") - } - lhs = SubexASTEnterArray {lhs} + lhs, outType = parseDestructure(l, ArrayStructure, inType) // TODO // case '[': // rangeParts := parseRangeSubex(l) // lhs = SubexASTRange {rangeParts} - case ')', ']', '"', '|', ';', '{', '+', '-', '*', '/', '!', '=', '$': + case ')', ']', '"', '|', ';', '{', '+', '*', '/', '!', '=', '$': l.Rewind() - return SubexASTEmpty{} + return SubexASTEmpty{}, inType // case '=': // replacement := parseReplacement(l) // lhs = SubexASTOutput{replacement} @@ -327,19 +407,20 @@ func parseSubex(l RuneReader, minPower int, inType Type, outType Type) SubexAST // ) // lhs = SubexASTOutput {replacement} case '.': - if inType != outType { - panic("Copying value changes type!") - } + outType = inType if inType == RuneType { lhs = SubexASTCopyAnyRune{} } else { lhs = SubexASTCopyAnyValue{} } case '?': + outType = inType lhs = SubexASTCopyBool{} case '%': + outType = inType lhs = SubexASTCopyNumber{} case '`': + outType = inType lhs = SubexASTOutputValues {parseValueReplacement(l)} // TODO // case '_': @@ -351,9 +432,7 @@ func parseSubex(l RuneReader, minPower int, inType Type, outType Type) SubexAST // case '"': // lhs = SubexASTCopyScalar {walk.NewAtomStringTerminal()} default: - if inType != outType { - panic("inType and outType don't match in copy") - } + outType = inType if inType == RuneType { lhs = SubexASTCopyRune {r} } else { @@ -367,8 +446,9 @@ func parseSubex(l RuneReader, minPower int, inType Type, outType Type) SubexAST } loop: for { if minPower <= 20 { - next := parseSubex(l, 21, inType, outType) + next, outType2 := parseSubex(l, 21, inType) if next != nil && (next != SubexASTEmpty{}) { + outType = resolveTypes(outType, outType2) lhs = SubexASTConcat{lhs, next} continue loop } @@ -382,14 +462,18 @@ func parseSubex(l RuneReader, minPower int, inType Type, outType Type) SubexAST } case r == '+' && minPower <= 4: lhs = SubexASTSum {lhs} + resolveTypes(inType, ValueType) + outType = resolveTypes(outType, ValueType) case r == '*' && minPower <= 4: lhs = SubexASTProduct {lhs} - case r == '-' && minPower <= 4: - lhs = SubexASTNegate {lhs} + resolveTypes(inType, ValueType) + outType = resolveTypes(outType, ValueType) // case r == '/' && minPower <= 4: // lhs = SubexASTReciprocal {lhs} case r == '!' && minPower <= 4: lhs = SubexASTNot {lhs} + resolveTypes(inType, ValueType) + outType = resolveTypes(outType, ValueType) // case r == '=' && minPower <= 4: // lhs = SubexASTEqual {lhs} case r == '$' && minPower <= 4: @@ -398,15 +482,21 @@ func parseSubex(l RuneReader, minPower int, inType Type, outType Type) SubexAST panic("Missing slot character") } if slot == '_' { - lhs = SubexASTDiscard {lhs} + lhs = SubexASTDiscard { + Content: lhs, + InnerOutType: outType, + } } else { + resolveTypes(inType, ValueType) lhs = SubexASTStoreValues { Match: lhs, Slot: slot, } } + outType = AnyType case r == '|' && minPower <= 8: - rhs := parseSubex(l, 9, inType, outType) + rhs, outType2 := parseSubex(l, 9, inType) + outType = resolveTypes(outType, outType2) if rhs == nil { panic("Missing subex after |") } @@ -425,11 +515,12 @@ func parseSubex(l RuneReader, minPower int, inType Type, outType Type) SubexAST break loop } } - return lhs + return lhs, outType } func Parse(l RuneReader) SubexAST { - ast := parseSubex(l, 0, ValueType, ValueType) + ast, outType := parseSubex(l, 0, ValueType) + outType = resolveTypes(outType, ValueType) if ast == nil { return SubexASTEmpty{} } diff --git a/subex/subexast.go b/subex/subexast.go index cef853b..7070baf 100644 --- a/subex/subexast.go +++ b/subex/subexast.go @@ -132,9 +132,6 @@ type SubexASTRepeat struct { Acceptable []ConvexRange } func (ast SubexASTRepeat) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { - if inType != outType { - panic("Invalid types") - } var state SubexState = &SubexDeadState{} for _, convex := range ast.Acceptable { state = &SubexGroupState {state, convex.compile(ast.Content, next, slotMap, inType, outType)} @@ -223,7 +220,8 @@ func (ast SubexASTCopyNumber) String() string { type SubexASTCopyAnyValue struct {} func (ast SubexASTCopyAnyValue) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { if inType != ValueType || outType != ValueType { - panic("Invalid types for SubexASTNot") + fmt.Printf("%v, %v", inType, outType) + panic("Invalid types for SubexASTCopyAnyValue") } return &SubexCopyState { next: next, @@ -446,9 +444,10 @@ func (ast SubexASTEmpty) String() string { // Discards the output from the content subex type SubexASTDiscard struct { Content SubexAST + InnerOutType Type } func (ast SubexASTDiscard) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { - newNext := ast.Content.compileWith(&SubexDiscardState {next}, slotMap, inType, outType) + newNext := ast.Content.compileWith(&SubexDiscardState {next}, slotMap, inType, ast.InnerOutType) if inType == ValueType { return &SubexCaptureBeginState { next: newNext, @@ -463,65 +462,101 @@ func (ast SubexASTDiscard) String() string { return fmt.Sprintf("(%v)$_", ast.Content) } -// Go into an array, pass the content each of the values in the array to eat and then leave the array -type SubexASTEnterArray struct { +type SubexASTDestructure struct { + Destructure Structure + Structure Structure Content SubexAST } -func (ast SubexASTEnterArray) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { - if inType != ValueType || outType != ValueType { - panic("Invalid types for SubexASTEnterArray") +func (ast SubexASTDestructure) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { + var innerOutType Type + var construct SubexState + switch ast.Structure { + case NoneStructure: + innerOutType = outType + construct = next + case StringStructure: + innerOutType = RuneType + construct = &SubexConstructStringState { + next: next, + } + case ArrayStructure: + innerOutType = ValueType + construct = &SubexConstructArrayState { + next: next, + } } - return &SubexCaptureBeginState { - next: &SubexCopyState { - filter: anyArrayFilter{}, - next: &SubexDiscardState { - next: &SubexIncrementNestState { - next: &SubexCaptureBeginState { - next: ast.Content.compileWith( - &SubexDiscardTerminalState { - terminal: walk.ArrayEnd, - next: &SubexDecrementNestState { - next: &SubexConstructArrayState {next: next}, - }, - }, - slotMap, - ValueType, - ValueType, - ), - }, - }, + + var innerInType Type + var destructFooter SubexState + switch ast.Destructure { + case NoneStructure: + innerInType = inType + destructFooter = construct + case StringStructure: + innerInType = RuneType + destructFooter = &SubexDiscardTerminalState { + terminal: walk.StringEnd, + next: &SubexDecrementNestState { + next: construct, + }, + } + case ArrayStructure: + innerInType = ValueType + destructFooter = &SubexDiscardTerminalState { + terminal: walk.ArrayEnd, + next: &SubexDecrementNestState { + next: construct, }, - }, + } } -} -type SubexASTEnterString struct { - Content SubexAST -} -func (ast SubexASTEnterString) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { - if inType != ValueType || outType != ValueType { - panic("Invalid types for SubexASTEnterString") + inner := ast.Content.compileWith( + destructFooter, + slotMap, + innerInType, + innerOutType, + ) + + var beginConstruct SubexState + switch ast.Structure { + case NoneStructure: + beginConstruct = inner + case StringStructure: + beginConstruct = &SubexCaptureRunesBeginState { + next: inner, + } + case ArrayStructure: + beginConstruct = &SubexCaptureBeginState { + next: inner, + } } - return &SubexCaptureBeginState { - next: &SubexCopyState { - filter: anyStringFilter{}, - next: &SubexDiscardState { - next: &SubexIncrementNestState { - next: &SubexCaptureRunesBeginState { - next: ast.Content.compileWith( - &SubexDiscardTerminalState { - terminal: walk.StringEnd, - next: &SubexDecrementNestState { - next: &SubexConstructStringState {next: next}, - }, - }, - slotMap, - RuneType, - RuneType, - ), + + switch ast.Destructure { + case NoneStructure: + return beginConstruct + case StringStructure: + return &SubexCaptureBeginState { + next: &SubexCopyState { + filter: anyStringFilter{}, + next: &SubexDiscardState { + next: &SubexIncrementNestState { + next: beginConstruct, + }, + }, + }, + } + case ArrayStructure: + return &SubexCaptureBeginState { + next: &SubexCopyState { + filter: anyArrayFilter{}, + next: &SubexDiscardState { + next: &SubexIncrementNestState { + next: beginConstruct, }, }, }, - }, + } + default: + panic("Invalid destructure in ast") } } -- cgit v1.2.3