<- Back to shtanton's homepage
aboutsummaryrefslogtreecommitdiff
path: root/subex/parse.go
diff options
context:
space:
mode:
authorCharlie Stanton <charlie@shtanton.xyz>2023-07-19 11:57:59 +0100
committerCharlie Stanton <charlie@shtanton.xyz>2023-07-19 11:57:59 +0100
commit8cf10efe3b5a1bcc70bc6e5590ee63fd5eb00c5b (patch)
tree7a16883c17c2bdcc49b2f9d4f333dfc76c66248f /subex/parse.go
parent3c34366bdd5d817a184d6b1c901d03a16b6faa4b (diff)
downloadstred-go-8cf10efe3b5a1bcc70bc6e5590ee63fd5eb00c5b.tar
Huge refactor to a more value based system, doing away with terminals. Also introduces unit testing
Diffstat (limited to 'subex/parse.go')
-rw-r--r--subex/parse.go295
1 files changed, 157 insertions, 138 deletions
diff --git a/subex/parse.go b/subex/parse.go
index 746217b..a671e6d 100644
--- a/subex/parse.go
+++ b/subex/parse.go
@@ -22,7 +22,7 @@ func accept(l RuneReader, chars string) bool {
return false
}
-func expectBracket(l RuneReader, ifLeft walk.Atom, ifRight walk.Atom) walk.Atom {
+func expectBracket(l RuneReader, ifLeft walk.AtomOLD, ifRight walk.AtomOLD) walk.AtomOLD {
switch l.Next() {
case '(':
return ifLeft
@@ -38,7 +38,7 @@ func isNumericRune(r rune) bool {
}
// Having just parsed a `, read until the next ` and parse the contents into a list of non-string atoms
-func parseNonStringLiteral(l RuneReader) (literals []walk.Atom) {
+func parseNonStringLiteral(l RuneReader) (literals []walk.Scalar) {
for {
r := l.Next()
if isNumericRune(r) {
@@ -57,7 +57,7 @@ func parseNonStringLiteral(l RuneReader) (literals []walk.Atom) {
if err != nil {
panic("Invalid number literal")
}
- literals = append(literals, walk.NewAtomNumber(number))
+ literals = append(literals, walk.NumberScalar(number))
continue
}
switch r {
@@ -67,30 +67,22 @@ func parseNonStringLiteral(l RuneReader) (literals []walk.Atom) {
continue
case 'n':
if accept(l, "u") && accept(l, "l") && accept(l, "l") {
- literals = append(literals, walk.NewAtomNull())
+ literals = append(literals, walk.NullScalar{})
} else {
panic("Invalid literal")
}
case 't':
if accept(l, "r") && accept(l, "u") && accept(l, "e") {
- literals = append(literals, walk.NewAtomBool(true))
+ literals = append(literals, walk.BoolScalar(true))
} else {
panic("Invalid literal")
}
case 'f':
if accept(l, "a") && accept(l, "l") && accept(l, "s") && accept(l, "e") {
- literals = append(literals, walk.NewAtomBool(false))
+ literals = append(literals, walk.BoolScalar(false))
} else {
panic("Invalid literal")
}
- case '{':
- literals = append(literals, walk.NewAtomTerminal(walk.MapBegin))
- case '}':
- literals = append(literals, walk.NewAtomTerminal(walk.MapEnd))
- case '[':
- literals = append(literals, walk.NewAtomTerminal(walk.ArrayBegin))
- case ']':
- literals = append(literals, walk.NewAtomTerminal(walk.ArrayEnd))
default:
panic("Invalid literal")
}
@@ -177,113 +169,113 @@ func parseReplacement(l RuneReader) (output []OutputContentAST) {
case '`':
literals := parseNonStringLiteral(l)
for _, literal := range literals {
- output = append(output, OutputAtomLiteralAST {literal})
+ output = append(output, OutputValueLiteralAST {literal})
}
- case '"':
- output = append(output, OutputAtomLiteralAST {walk.NewAtomStringTerminal()})
default:
- output = append(output, OutputAtomLiteralAST{atom: walk.NewAtomStringRune(r)})
+ panic("Invalid value to insert")
+ //output = append(output, OutputValueLiteralAST{atom: walk.NewAtomStringRune(r)})
}
}
return output
}
// Parse the contents of a range subex [] into a map
-func parseRangeSubex(l RuneReader) map[walk.Atom]walk.Atom {
- // TODO escaping
- parts := make(map[walk.Atom]walk.Atom)
- var froms []walk.Atom
- var hasTo bool
- for {
- fromsStart := l.Next()
- if fromsStart == ']' {
- hasTo = false
- break
- } else if fromsStart == '=' {
- hasTo = true
- break
- } else if fromsStart == '`' {
- literals := parseNonStringLiteral(l)
- froms = append(froms, literals...)
- continue
- } else if fromsStart == '"' {
- froms = append(froms, walk.NewAtomStringTerminal())
- continue
- }
- if accept(l, "-") {
- fromsEnd := l.Next()
- if fromsEnd == ']' || fromsEnd == '=' {
- l.Rewind()
- fromsEnd = fromsStart
- }
- for i := fromsStart; i <= fromsEnd; i += 1 {
- froms = append(froms, walk.NewAtomStringRune(i))
- }
- } else {
- froms = append(froms, walk.NewAtomStringRune(fromsStart))
- }
- }
- if len(froms) == 0 {
- panic("Missing from part of range expression")
- }
+// func parseRangeSubex(l RuneReader) map[walk.AtomOLD]walk.AtomOLD {
+// // TODO escaping
+// parts := make(map[walk.AtomOLD]walk.AtomOLD)
+// var froms []walk.AtomOLD
+// var hasTo bool
+// for {
+// fromsStart := l.Next()
+// if fromsStart == ']' {
+// hasTo = false
+// break
+// } else if fromsStart == '=' {
+// hasTo = true
+// break
+// } else if fromsStart == '`' {
+// literals := parseNonStringLiteral(l)
+// froms = append(froms, literals...)
+// continue
+// } else if fromsStart == '"' {
+// froms = append(froms, walk.NewAtomStringTerminal())
+// continue
+// }
+// if accept(l, "-") {
+// fromsEnd := l.Next()
+// if fromsEnd == ']' || fromsEnd == '=' {
+// l.Rewind()
+// fromsEnd = fromsStart
+// }
+// for i := fromsStart; i <= fromsEnd; i += 1 {
+// froms = append(froms, walk.NewAtomStringRune(i))
+// }
+// } else {
+// froms = append(froms, walk.NewAtomStringRune(fromsStart))
+// }
+// }
+// if len(froms) == 0 {
+// panic("Missing from part of range expression")
+// }
- var tos []walk.Atom
- if hasTo {
- for {
- tosStart := l.Next()
- if tosStart == ']' {
- break
- } else if tosStart == '`' {
- literals := parseNonStringLiteral(l)
- tos = append(tos, literals...)
- continue
- } else if tosStart == '"' {
- tos = append(tos, walk.NewAtomStringTerminal())
- continue
- }
- if accept(l, "-") {
- tosEnd := l.Next()
- if tosEnd == ']' {
- l.Rewind()
- tosEnd = tosStart
- }
- for i := tosStart; i <= tosEnd; i += 1 {
- tos = append(tos, walk.NewAtomStringRune(i))
- }
- } else {
- tos = append(tos, walk.NewAtomStringRune(tosStart))
- }
- }
- } else {
- tos = froms
- }
- if len(tos) == 0 {
- panic("Missing to part of range expression")
- }
+// var tos []walk.AtomOLD
+// if hasTo {
+// for {
+// tosStart := l.Next()
+// if tosStart == ']' {
+// break
+// } else if tosStart == '`' {
+// literals := parseNonStringLiteral(l)
+// tos = append(tos, literals...)
+// continue
+// } else if tosStart == '"' {
+// tos = append(tos, walk.NewAtomStringTerminal())
+// continue
+// }
+// if accept(l, "-") {
+// tosEnd := l.Next()
+// if tosEnd == ']' {
+// l.Rewind()
+// tosEnd = tosStart
+// }
+// for i := tosStart; i <= tosEnd; i += 1 {
+// tos = append(tos, walk.NewAtomStringRune(i))
+// }
+// } else {
+// tos = append(tos, walk.NewAtomStringRune(tosStart))
+// }
+// }
+// } else {
+// tos = froms
+// }
+// if len(tos) == 0 {
+// panic("Missing to part of range expression")
+// }
- for i, from := range froms {
- parts[from] = tos[i % len(tos)]
- }
- return parts
-}
+// for i, from := range froms {
+// parts[from] = tos[i % len(tos)]
+// }
+// return parts
+// }
-func parseSubex(l RuneReader, minPower int) SubexAST {
+func parseSubex(l RuneReader, minPower int, runic bool) SubexAST {
var lhs SubexAST
r := l.Next()
switch r {
case eof:
return nil
case '(':
- lhs = parseSubex(l, 0)
+ lhs = parseSubex(l, 0, runic)
if !accept(l, ")") {
panic("Missing matching )")
}
- case '[':
- rangeParts := parseRangeSubex(l)
- lhs = SubexASTRange {rangeParts}
- case ')', '|', ';', '{', '+', '-', '*', '/', '!', '$', ':':
+ // TODO
+ // case '[':
+ // rangeParts := parseRangeSubex(l)
+ // lhs = SubexASTRange {rangeParts}
+ case ')', ']', '"', '|', ';', '{', '+', '-', '*', '/', '!', '$':
l.Rewind()
- return nil
+ return SubexASTEmpty{}
case '=':
replacement := parseReplacement(l)
lhs = SubexASTOutput{replacement}
@@ -291,47 +283,80 @@ func parseSubex(l RuneReader, minPower int) SubexAST {
literals := parseNonStringLiteral(l)
lhs = SubexASTEmpty{}
for _, literal := range literals {
- lhs = SubexASTConcat {lhs, SubexASTCopyAtom {literal}}
+ lhs = SubexASTConcat {lhs, SubexASTCopyScalar {literal}}
}
- case '^':
- replacement := parseReplacement(l)
- replacement = append(
- []OutputContentAST{OutputAtomLiteralAST {walk.NewAtomStringTerminal()}},
- replacement...
- )
- replacement = append(
- replacement,
- OutputAtomLiteralAST {walk.NewAtomStringTerminal()},
- )
- lhs = SubexASTOutput {replacement}
+ // case '^':
+ // replacement := parseReplacement(l)
+ // replacement = append(
+ // []OutputContentAST{OutputValueLiteralAST {walk.NewAtomStringTerminal()}},
+ // replacement...
+ // )
+ // replacement = append(
+ // replacement,
+ // OutputValueLiteralAST {walk.NewAtomStringTerminal()},
+ // )
+ // lhs = SubexASTOutput {replacement}
case '.':
- lhs = SubexASTCopyAny{}
+ if runic {
+ lhs = SubexASTCopyAnyRune{}
+ } else {
+ lhs = SubexASTCopyAnyValue{}
+ }
case '?':
lhs = SubexASTCopyBool{}
case '%':
lhs = SubexASTCopyNumber{}
- case '_':
- lhs = SubexASTCopyStringAtom{}
- case '#':
- lhs = SubexASTCopyString{}
- case ',':
- lhs = SubexASTCopyValue{}
- case '"':
- lhs = SubexASTCopyAtom {walk.NewAtomStringTerminal()}
+ case ':':
+ if runic {
+ lhs = SubexASTCopyRune {':'}
+ } else {
+ if !accept(l, "[") {
+ panic("Missing [ after :")
+ }
+ lhs = SubexASTEnterArray {parseSubex(l, 0, runic)}
+ if !accept(l, "]") {
+ panic("Missing matching ]")
+ }
+ }
case '~':
- literals := parseNonStringLiteral(l)
- var replacement []OutputContentAST
- for _, literal := range literals {
- replacement = append(replacement, OutputAtomLiteralAST {literal})
+ if runic {
+ lhs = SubexASTCopyRune {'~'}
+ } else {
+ if !accept(l, "\"") {
+ panic("Missing \" after ~")
+ }
+ lhs = SubexASTEnterString {parseSubex(l, 0, true)}
+ if !accept(l, "\"") {
+ panic("Missing matching \"")
+ }
}
- lhs = SubexASTOutput {replacement}
+ // TODO
+ // case '_':
+ // lhs = SubexASTCopyStringAtom{}
+ // case '#':
+ // lhs = SubexASTCopyString{}
+ // case ',':
+ // lhs = SubexASTCopyValue{}
+ // case '"':
+ // lhs = SubexASTCopyScalar {walk.NewAtomStringTerminal()}
+ // case '~':
+ // literals := parseNonStringLiteral(l)
+ // var replacement []OutputContentAST
+ // for _, literal := range literals {
+ // replacement = append(replacement, OutputValueLiteralAST {literal})
+ // }
+ // lhs = SubexASTOutput {replacement}
default:
- lhs = SubexASTCopyAtom{Atom: walk.NewAtomStringRune(r)}
+ if runic {
+ lhs = SubexASTCopyRune {r}
+ } else {
+ panic("Tried to match rune outside of string")
+ }
}
loop: for {
if minPower <= 20 {
- next := parseSubex(l, 21)
- if next != nil {
+ next := parseSubex(l, 21, runic)
+ if next != nil && (next != SubexASTEmpty{}) {
lhs = SubexASTConcat{lhs, next}
continue loop
}
@@ -366,20 +391,14 @@ func parseSubex(l RuneReader, minPower int) SubexAST {
Slot: slot,
}
}
- case r == ':' && minPower <= 4:
- replacement := parseReplacement(l)
- lhs = SubexASTConcat {
- SubexASTDiscard {lhs},
- SubexASTOutput {replacement},
- }
case r == '|' && minPower <= 8:
- rhs := parseSubex(l, 9)
+ rhs := parseSubex(l, 9, runic)
if rhs == nil {
panic("Missing subex after |")
}
lhs = SubexASTOr{lhs, rhs}
case r == ';' && minPower <= 10:
- rhs := parseSubex(l, 11)
+ rhs := parseSubex(l, 11, runic)
if rhs == nil {
panic("Missing subex after ;")
}
@@ -396,7 +415,7 @@ func parseSubex(l RuneReader, minPower int) SubexAST {
}
func Parse(l RuneReader) SubexAST {
- ast := parseSubex(l, 0)
+ ast := parseSubex(l, 0, false)
if ast == nil {
return SubexASTEmpty{}
}