<- Back to shtanton's homepage
aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--subex/main.go1
-rw-r--r--subex/main_test.go56
-rw-r--r--subex/parse.go175
-rw-r--r--subex/subexast.go145
4 files changed, 278 insertions, 99 deletions
diff --git a/subex/main.go b/subex/main.go
index 86a8d41..f8d9093 100644
--- a/subex/main.go
+++ b/subex/main.go
@@ -276,7 +276,6 @@ func processInput(states []SubexEatBranch, input walk.Edible, nesting int) []Sub
newStates := make([]SubexEatBranch, 0, 2)
for _, state := range states {
- // TODO: What if nesting is changed by an epsilon state?
if state.aux.nesting == nesting {
newStates = addStates(newStates, state.eat(input))
} else if state.aux.nesting < nesting {
diff --git a/subex/main_test.go b/subex/main_test.go
index 78a62c4..d7424b3 100644
--- a/subex/main_test.go
+++ b/subex/main_test.go
@@ -61,6 +61,15 @@ func TestSubexMain(t *testing.T) {
},
},
{
+ subex: `~(.)~`,
+ input: []walk.Value {
+ walk.StringValue("a"),
+ },
+ expected: []walk.Value {
+ walk.StringValue("a"),
+ },
+ },
+ {
subex: `~(.$_(.{-0}))~`,
input: []walk.Value {
walk.StringValue("hello"),
@@ -182,9 +191,54 @@ func TestSubexMain(t *testing.T) {
},
},
},
+ {
+ subex: "-(`0`.)@",
+ input: []walk.Value {
+ walk.NumberValue(4),
+ },
+ expected: []walk.Value {
+ walk.ArrayValue {
+ {
+ Index: 0,
+ Value: walk.NumberValue(4),
+ },
+ },
+ },
+ },
+ {
+ subex: `@(.$_~(.{-0})-{-0})~`,
+ input: []walk.Value {
+ walk.ArrayValue {
+ {
+ Index: 0,
+ Value: walk.StringValue("ab"),
+ },
+ {
+ Index: 1,
+ Value: walk.StringValue("cd"),
+ },
+ {
+ Index: 2,
+ Value: walk.StringValue("efg"),
+ },
+ {
+ Index: 3,
+ Value: walk.StringValue(""),
+ },
+ {
+ Index: 4,
+ Value: walk.StringValue("hijklm"),
+ },
+ },
+ },
+ expected: []walk.Value {
+ walk.StringValue("abcdefghijklm"),
+ },
+ },
}
- for _, test := range tests {
+ for i, test := range tests {
+ t.Logf("Running test: %d", i)
lexer := NewStringRuneReader(test.subex)
ast := Parse(lexer)
transducer := CompileTransducer(ast)
diff --git a/subex/parse.go b/subex/parse.go
index 9602a4b..f1565f5 100644
--- a/subex/parse.go
+++ b/subex/parse.go
@@ -8,10 +8,44 @@ import (
type Type int
const (
- ValueType Type = iota
+ AnyType Type = iota
+ ValueType
RuneType
)
+func resolveTypes(t1 Type, t2 Type) Type {
+ if t1 == AnyType {
+ return t2
+ }
+
+ if t2 == AnyType {
+ return t1
+ }
+
+ if t1 == t2 {
+ return t1
+ }
+
+ panic("Types don't match in parser")
+}
+
+type Structure int
+const (
+ NoneStructure Structure = iota
+ StringStructure
+ ArrayStructure
+)
+func (s Structure) innerType() Type {
+ switch s {
+ case StringStructure:
+ return RuneType
+ case ArrayStructure:
+ return ValueType
+ default:
+ panic("Invalid structure")
+ }
+}
+
type RuneReader interface {
Next() rune
Rewind()
@@ -270,48 +304,94 @@ func parseRuneReplacement(l RuneReader) (output []OutputRuneAST) {
// return parts
// }
-func parseSubex(l RuneReader, minPower int, inType Type, outType Type) SubexAST {
- var lhs SubexAST
+func parseDestructure(l RuneReader, destructure Structure, inType Type) (lhs SubexAST, outType Type) {
+ if !accept(l, "(") {
+ panic("Missing ( after destructure start")
+ }
+
+ var innerInType Type
+ var expectedInType Type
+ switch destructure {
+ case NoneStructure:
+ innerInType = inType
+ expectedInType = inType
+ case StringStructure:
+ innerInType = RuneType
+ expectedInType = ValueType
+ case ArrayStructure:
+ innerInType = ValueType
+ expectedInType = ValueType
+ default:
+ panic("Invalid structure")
+ }
+
+ resolveTypes(inType, expectedInType)
+
+ lhs, innerOutType := parseSubex(l, 0, innerInType)
+ if !accept(l, ")") {
+ panic("Missing matching )")
+ }
+
+ var structure Structure
+ var expectedInnerOutType Type
+ r := l.Next()
+ switch r {
+ case '-':
+ structure = NoneStructure
+ expectedInnerOutType = innerOutType
+ case '~':
+ structure = StringStructure
+ expectedInnerOutType = RuneType
+ case '@':
+ structure = ArrayStructure
+ expectedInnerOutType = ValueType
+ default:
+ panic("Missing matching destructure")
+ }
+
+ innerOutType = resolveTypes(innerOutType, expectedInnerOutType)
+
+ switch structure {
+ case NoneStructure:
+ outType = innerOutType
+ case StringStructure:
+ outType = ValueType
+ case ArrayStructure:
+ outType = ValueType
+ }
+
+ lhs = SubexASTDestructure {
+ Destructure: destructure,
+ Structure: structure,
+ Content: lhs,
+ }
+
+ return lhs, outType
+}
+
+func parseSubex(l RuneReader, minPower int, inType Type) (lhs SubexAST, outType Type) {
r := l.Next()
switch r {
case eof:
- return nil
+ return nil, inType
case '(':
- lhs = parseSubex(l, 0, inType, outType)
+ lhs, outType = parseSubex(l, 0, inType)
if !accept(l, ")") {
panic("Missing matching )")
}
+ case '-':
+ lhs, outType = parseDestructure(l, NoneStructure, inType)
case '~':
- if !accept(l, "(") {
- panic("Missing ( after ~")
- }
- lhs = parseSubex(l, 0, RuneType, RuneType)
- if !accept(l, ")") {
- panic("Missing matching )")
- }
- if !accept(l, "~") {
- panic("Missing matching ~")
- }
- lhs = SubexASTEnterString {lhs}
+ lhs, outType = parseDestructure(l, StringStructure, inType)
case '@':
- if !accept(l, "(") {
- panic("Missing ( after @")
- }
- lhs = parseSubex(l, 0, ValueType, ValueType)
- if !accept(l, ")") {
- panic("Missing matching )")
- }
- if !accept(l, "@") {
- panic("Missing matching ~")
- }
- lhs = SubexASTEnterArray {lhs}
+ lhs, outType = parseDestructure(l, ArrayStructure, inType)
// TODO
// case '[':
// rangeParts := parseRangeSubex(l)
// lhs = SubexASTRange {rangeParts}
- case ')', ']', '"', '|', ';', '{', '+', '-', '*', '/', '!', '=', '$':
+ case ')', ']', '"', '|', ';', '{', '+', '*', '/', '!', '=', '$':
l.Rewind()
- return SubexASTEmpty{}
+ return SubexASTEmpty{}, inType
// case '=':
// replacement := parseReplacement(l)
// lhs = SubexASTOutput{replacement}
@@ -327,19 +407,20 @@ func parseSubex(l RuneReader, minPower int, inType Type, outType Type) SubexAST
// )
// lhs = SubexASTOutput {replacement}
case '.':
- if inType != outType {
- panic("Copying value changes type!")
- }
+ outType = inType
if inType == RuneType {
lhs = SubexASTCopyAnyRune{}
} else {
lhs = SubexASTCopyAnyValue{}
}
case '?':
+ outType = inType
lhs = SubexASTCopyBool{}
case '%':
+ outType = inType
lhs = SubexASTCopyNumber{}
case '`':
+ outType = inType
lhs = SubexASTOutputValues {parseValueReplacement(l)}
// TODO
// case '_':
@@ -351,9 +432,7 @@ func parseSubex(l RuneReader, minPower int, inType Type, outType Type) SubexAST
// case '"':
// lhs = SubexASTCopyScalar {walk.NewAtomStringTerminal()}
default:
- if inType != outType {
- panic("inType and outType don't match in copy")
- }
+ outType = inType
if inType == RuneType {
lhs = SubexASTCopyRune {r}
} else {
@@ -367,8 +446,9 @@ func parseSubex(l RuneReader, minPower int, inType Type, outType Type) SubexAST
}
loop: for {
if minPower <= 20 {
- next := parseSubex(l, 21, inType, outType)
+ next, outType2 := parseSubex(l, 21, inType)
if next != nil && (next != SubexASTEmpty{}) {
+ outType = resolveTypes(outType, outType2)
lhs = SubexASTConcat{lhs, next}
continue loop
}
@@ -382,14 +462,18 @@ func parseSubex(l RuneReader, minPower int, inType Type, outType Type) SubexAST
}
case r == '+' && minPower <= 4:
lhs = SubexASTSum {lhs}
+ resolveTypes(inType, ValueType)
+ outType = resolveTypes(outType, ValueType)
case r == '*' && minPower <= 4:
lhs = SubexASTProduct {lhs}
- case r == '-' && minPower <= 4:
- lhs = SubexASTNegate {lhs}
+ resolveTypes(inType, ValueType)
+ outType = resolveTypes(outType, ValueType)
// case r == '/' && minPower <= 4:
// lhs = SubexASTReciprocal {lhs}
case r == '!' && minPower <= 4:
lhs = SubexASTNot {lhs}
+ resolveTypes(inType, ValueType)
+ outType = resolveTypes(outType, ValueType)
// case r == '=' && minPower <= 4:
// lhs = SubexASTEqual {lhs}
case r == '$' && minPower <= 4:
@@ -398,15 +482,21 @@ func parseSubex(l RuneReader, minPower int, inType Type, outType Type) SubexAST
panic("Missing slot character")
}
if slot == '_' {
- lhs = SubexASTDiscard {lhs}
+ lhs = SubexASTDiscard {
+ Content: lhs,
+ InnerOutType: outType,
+ }
} else {
+ resolveTypes(inType, ValueType)
lhs = SubexASTStoreValues {
Match: lhs,
Slot: slot,
}
}
+ outType = AnyType
case r == '|' && minPower <= 8:
- rhs := parseSubex(l, 9, inType, outType)
+ rhs, outType2 := parseSubex(l, 9, inType)
+ outType = resolveTypes(outType, outType2)
if rhs == nil {
panic("Missing subex after |")
}
@@ -425,11 +515,12 @@ func parseSubex(l RuneReader, minPower int, inType Type, outType Type) SubexAST
break loop
}
}
- return lhs
+ return lhs, outType
}
func Parse(l RuneReader) SubexAST {
- ast := parseSubex(l, 0, ValueType, ValueType)
+ ast, outType := parseSubex(l, 0, ValueType)
+ outType = resolveTypes(outType, ValueType)
if ast == nil {
return SubexASTEmpty{}
}
diff --git a/subex/subexast.go b/subex/subexast.go
index cef853b..7070baf 100644
--- a/subex/subexast.go
+++ b/subex/subexast.go
@@ -132,9 +132,6 @@ type SubexASTRepeat struct {
Acceptable []ConvexRange
}
func (ast SubexASTRepeat) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState {
- if inType != outType {
- panic("Invalid types")
- }
var state SubexState = &SubexDeadState{}
for _, convex := range ast.Acceptable {
state = &SubexGroupState {state, convex.compile(ast.Content, next, slotMap, inType, outType)}
@@ -223,7 +220,8 @@ func (ast SubexASTCopyNumber) String() string {
type SubexASTCopyAnyValue struct {}
func (ast SubexASTCopyAnyValue) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState {
if inType != ValueType || outType != ValueType {
- panic("Invalid types for SubexASTNot")
+ fmt.Printf("%v, %v", inType, outType)
+ panic("Invalid types for SubexASTCopyAnyValue")
}
return &SubexCopyState {
next: next,
@@ -446,9 +444,10 @@ func (ast SubexASTEmpty) String() string {
// Discards the output from the content subex
type SubexASTDiscard struct {
Content SubexAST
+ InnerOutType Type
}
func (ast SubexASTDiscard) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState {
- newNext := ast.Content.compileWith(&SubexDiscardState {next}, slotMap, inType, outType)
+ newNext := ast.Content.compileWith(&SubexDiscardState {next}, slotMap, inType, ast.InnerOutType)
if inType == ValueType {
return &SubexCaptureBeginState {
next: newNext,
@@ -463,65 +462,101 @@ func (ast SubexASTDiscard) String() string {
return fmt.Sprintf("(%v)$_", ast.Content)
}
-// Go into an array, pass the content each of the values in the array to eat and then leave the array
-type SubexASTEnterArray struct {
+type SubexASTDestructure struct {
+ Destructure Structure
+ Structure Structure
Content SubexAST
}
-func (ast SubexASTEnterArray) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState {
- if inType != ValueType || outType != ValueType {
- panic("Invalid types for SubexASTEnterArray")
+func (ast SubexASTDestructure) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState {
+ var innerOutType Type
+ var construct SubexState
+ switch ast.Structure {
+ case NoneStructure:
+ innerOutType = outType
+ construct = next
+ case StringStructure:
+ innerOutType = RuneType
+ construct = &SubexConstructStringState {
+ next: next,
+ }
+ case ArrayStructure:
+ innerOutType = ValueType
+ construct = &SubexConstructArrayState {
+ next: next,
+ }
}
- return &SubexCaptureBeginState {
- next: &SubexCopyState {
- filter: anyArrayFilter{},
- next: &SubexDiscardState {
- next: &SubexIncrementNestState {
- next: &SubexCaptureBeginState {
- next: ast.Content.compileWith(
- &SubexDiscardTerminalState {
- terminal: walk.ArrayEnd,
- next: &SubexDecrementNestState {
- next: &SubexConstructArrayState {next: next},
- },
- },
- slotMap,
- ValueType,
- ValueType,
- ),
- },
- },
+
+ var innerInType Type
+ var destructFooter SubexState
+ switch ast.Destructure {
+ case NoneStructure:
+ innerInType = inType
+ destructFooter = construct
+ case StringStructure:
+ innerInType = RuneType
+ destructFooter = &SubexDiscardTerminalState {
+ terminal: walk.StringEnd,
+ next: &SubexDecrementNestState {
+ next: construct,
+ },
+ }
+ case ArrayStructure:
+ innerInType = ValueType
+ destructFooter = &SubexDiscardTerminalState {
+ terminal: walk.ArrayEnd,
+ next: &SubexDecrementNestState {
+ next: construct,
},
- },
+ }
}
-}
-type SubexASTEnterString struct {
- Content SubexAST
-}
-func (ast SubexASTEnterString) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState {
- if inType != ValueType || outType != ValueType {
- panic("Invalid types for SubexASTEnterString")
+ inner := ast.Content.compileWith(
+ destructFooter,
+ slotMap,
+ innerInType,
+ innerOutType,
+ )
+
+ var beginConstruct SubexState
+ switch ast.Structure {
+ case NoneStructure:
+ beginConstruct = inner
+ case StringStructure:
+ beginConstruct = &SubexCaptureRunesBeginState {
+ next: inner,
+ }
+ case ArrayStructure:
+ beginConstruct = &SubexCaptureBeginState {
+ next: inner,
+ }
}
- return &SubexCaptureBeginState {
- next: &SubexCopyState {
- filter: anyStringFilter{},
- next: &SubexDiscardState {
- next: &SubexIncrementNestState {
- next: &SubexCaptureRunesBeginState {
- next: ast.Content.compileWith(
- &SubexDiscardTerminalState {
- terminal: walk.StringEnd,
- next: &SubexDecrementNestState {
- next: &SubexConstructStringState {next: next},
- },
- },
- slotMap,
- RuneType,
- RuneType,
- ),
+
+ switch ast.Destructure {
+ case NoneStructure:
+ return beginConstruct
+ case StringStructure:
+ return &SubexCaptureBeginState {
+ next: &SubexCopyState {
+ filter: anyStringFilter{},
+ next: &SubexDiscardState {
+ next: &SubexIncrementNestState {
+ next: beginConstruct,
+ },
+ },
+ },
+ }
+ case ArrayStructure:
+ return &SubexCaptureBeginState {
+ next: &SubexCopyState {
+ filter: anyArrayFilter{},
+ next: &SubexDiscardState {
+ next: &SubexIncrementNestState {
+ next: beginConstruct,
},
},
},
- },
+ }
+ default:
+ panic("Invalid destructure in ast")
}
}