diff options
author | Charlie Stanton <charlie@shtanton.xyz> | 2023-02-19 08:59:16 +0000 |
---|---|---|
committer | Charlie Stanton <charlie@shtanton.xyz> | 2023-02-19 08:59:16 +0000 |
commit | fba426b3910f16c8abc6f819da3138f03e5f0b1a (patch) | |
tree | 9ce7473194a7ac4d97278cff3e95e58fd3277c72 /subex/parse.go | |
parent | 3636825c64bb6c172b0858d7a08c30acfcd68bdd (diff) | |
download | stred-go-fba426b3910f16c8abc6f819da3138f03e5f0b1a.tar |
Introduces subex processing
Doesn't integrate it at all yet
Diffstat (limited to 'subex/parse.go')
-rw-r--r-- | subex/parse.go | 175 |
1 files changed, 175 insertions, 0 deletions
diff --git a/subex/parse.go b/subex/parse.go new file mode 100644 index 0000000..af575eb --- /dev/null +++ b/subex/parse.go @@ -0,0 +1,175 @@ +package subex + +func parseReplacement(l *RuneReader) (output []TransducerOutput) { + loop: for { + r := l.next() + switch r { + case eof: + panic("Missing closing \"") + case '"': + break loop + case '$': + slot := l.next() + if slot == eof { + panic("Missing slot character") + } + output = append(output, TransducerReplacementLoad(slot)) + default: + output = append(output, TransducerReplacementRune(r)) + } + } + return output +} + +func parseRangeSubex(l *RuneReader) map[rune]rune { + parts := make(map[rune]rune) + var froms []rune + var hasTo bool + for { + fromsStart := l.next() + if fromsStart == ']' { + hasTo = false + break + } else if fromsStart == '=' { + hasTo = true + break + } + var fromsEnd rune + if l.accept("-") { + fromsEnd = l.next() + if fromsEnd == ']' || fromsEnd == '=' { + l.rewind() + fromsEnd = fromsStart + } + } else { + fromsEnd = fromsStart + } + for i := fromsStart; i <= fromsEnd; i += 1 { + froms = append(froms, i) + } + } + if len(froms) == 0 { + panic("Missing from part of range expression") + } + + var tos []rune + if hasTo { + for { + tosStart := l.next() + if tosStart == ']' { + break + } + var tosEnd rune + if l.accept("-") { + tosEnd = l.next() + if tosEnd == ']' { + l.rewind() + tosEnd = tosStart + } + } else { + tosEnd = tosStart + } + for i := tosStart; i <= tosEnd; i += 1 { + tos = append(tos, i) + } + } + } else { + tos = froms + } + if len(tos) == 0 { + panic("Missing to part of range expression") + } + + for i, from := range froms { + parts[from] = tos[i % len(tos)] + } + return parts +} + +func parseSubex(l *RuneReader, minPower int) SubexAST { + var lhs SubexAST + r := l.next() + switch r { + case eof: + return nil + case '(': + lhs = parseSubex(l, 0) + if !l.accept(")") { + panic("Missing matching )") + } + case '[': + rangeParts := parseRangeSubex(l) + lhs = SubexASTRange {rangeParts} + case ')', '*', '-', '|', '!', '?', ';': + l.rewind() + return nil + case '$': + slot := l.next() + if slot == eof { + panic("Missing slot character") + } + match := parseSubex(l, 100) + if match == nil { + panic("Missing regex for store") + } + lhs = SubexASTStore{ + match: match, + slot: slot, + } + case '"': + replacement := parseReplacement(l) + lhs = SubexASTOutput{replacement} + case '.': + lhs = SubexASTCopyAny{} + default: + lhs = SubexASTCopyRune(r) + } + loop: for { + if minPower <= 0 { + next := parseSubex(l, 1) + if next != nil { + lhs = SubexASTConcat{lhs, next} + continue loop + } + } + r := l.next() + switch { + case r == '*' && minPower <= 8: + lhs = SubexASTMaximise{lhs} + case r == '-' && minPower <= 8: + lhs = SubexASTMinimise{lhs} + case r == '!' && minPower <= 8: + lhs = SubexASTTry{lhs} + case r == '?' && minPower <= 8: + lhs = SubexASTMaybe{lhs} + case r == '|' && minPower <= 4: + rhs := parseSubex(l, 5) + if rhs == nil { + panic("Missing subex after |") + } + lhs = SubexASTOr{lhs, rhs} + case r == ';' && minPower <= 2: + rhs := parseSubex(l, 3) + if rhs == nil { + panic("Missing subex after ;") + } + lhs = SubexASTJoin{ + content: lhs, + delimiter: rhs, + } + default: + l.rewind() + break loop + } + } + return lhs +} + +func Parse(input string) SubexAST { + l := RuneReader { + input: input, + pos: 0, + width: 0, + } + return parseSubex(&l, 0) +} |