From 8e9f0b186745afd51579d2a6136a57705efc7574 Mon Sep 17 00:00:00 2001 From: Charlie Stanton Date: Tue, 18 Apr 2023 12:47:55 +0100 Subject: Adds the repeat construct, obsoleting maximise, minimise, try, maybe and probably more The repeat construct repeats a subex a number of times, this number is based on a provided list which is ordered by priority and can be unbounded. --- subex/parse.go | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) (limited to 'subex/parse.go') diff --git a/subex/parse.go b/subex/parse.go index f2c77bc..24ae082 100644 --- a/subex/parse.go +++ b/subex/parse.go @@ -29,6 +29,68 @@ func parseTerminatorAtomLiteral(termType rune, l *RuneReader) walk.Atom { } } +func charIsDigit(c rune) bool { + return '0' <= c && c <= '9' +} + +// Parse a positive integer, reads digits 0-9 and stops at the first non-digit +func parseInt(l *RuneReader) (output int) { + for { + char := l.next() + if charIsDigit(char) { + output = output * 10 + int(char - '0') + } else { + break + } + } + l.rewind() + return output +} + +// Having just read {, read in and parse the range contents +func parseRepeatRange(l *RuneReader) (output []ConvexRange) { + loop: for { + var start, end int + char := l.next() + l.rewind() + if char == '-' { + start = -1 + } else { + start = parseInt(l) + } + switch l.next() { + case ',': + output = append(output, ConvexRange{start, start}) + continue loop + case '-': + char := l.next() + if charIsDigit(char) { + l.rewind() + end = parseInt(l) + } else { + l.rewind() + end = -1 + } + case '}': + output = append(output, ConvexRange{start, start}) + break loop + default: + panic("Invalid character in repeat specifier") + } + switch l.next() { + case ',': + output = append(output, ConvexRange{start, end}) + continue loop + case '}': + output = append(output, ConvexRange{start, end}) + break loop + default: + panic("Invalid character in repeat specifier") + } + } + return output +} + func parseReplacement(l *RuneReader) (output []OutputContent) { // TODO escaping loop: for { @@ -144,7 +206,7 @@ func parseSubex(l *RuneReader, minPower int) SubexAST { case '[': rangeParts := parseRangeSubex(l) lhs = SubexASTRange {rangeParts} - case ')', '*', '-', '|', '!', '?', ';': + case ')', '*', '-', '|', '!', '?', ';', '{': l.rewind() return nil case '$': @@ -180,6 +242,11 @@ func parseSubex(l *RuneReader, minPower int) SubexAST { } r := l.next() switch { + case r == '{' && minPower <= 8: + lhs = SubexASTRepeat{ + content: lhs, + acceptable: parseRepeatRange(l), + } case r == '*' && minPower <= 8: lhs = SubexASTMaximise{lhs} case r == '-' && minPower <= 8: @@ -203,6 +270,13 @@ func parseSubex(l *RuneReader, minPower int) SubexAST { content: lhs, delimiter: rhs, } + //case r == '+' && minPower <= 6: + // rhs := parseSubex(l, 7) + // if rhs == nil { + // panic("Missing subex after +") + // } + // // TODO: Implement this. Runs subex on the left, then subex on the right, then sums the outputs of each and outputs that + // lhs = SubexASTAdd{lhs, rhs} default: l.rewind() break loop -- cgit v1.2.3