diff options
author | Charlie Stanton <charlie@shtanton.xyz> | 2023-04-19 14:34:22 +0100 |
---|---|---|
committer | Charlie Stanton <charlie@shtanton.xyz> | 2023-04-19 14:34:22 +0100 |
commit | 10f847acc7087317b0fbe20b7cf3307a0fafab8a (patch) | |
tree | 4abf2f4009fcac55013672e841b2f9d3a2b2fb52 /subex | |
parent | 5089fe689f17a3489b6be76588b8fc7f93d70e55 (diff) | |
download | stred-go-10f847acc7087317b0fbe20b7cf3307a0fafab8a.tar |
Changes the parsing API for subex to be more suitable to being part of a larger program
Diffstat (limited to 'subex')
-rw-r--r-- | subex/lex.go | 16 | ||||
-rw-r--r-- | subex/main.go | 7 | ||||
-rw-r--r-- | subex/parse.go | 91 |
3 files changed, 60 insertions, 54 deletions
diff --git a/subex/lex.go b/subex/lex.go index f020b23..74bf370 100644 --- a/subex/lex.go +++ b/subex/lex.go @@ -5,11 +5,11 @@ import ( ) const eof rune = -1 -type RuneReader struct { +type StringRuneReader struct { input string pos, width int } -func (l *RuneReader) next() rune { +func (l *StringRuneReader) Next() rune { if l.pos >= len(l.input) { l.width = 0 return eof @@ -19,16 +19,6 @@ func (l *RuneReader) next() rune { l.pos += l.width return r } -func (l *RuneReader) accept(chars string) bool { - r := l.next() - for _, char := range chars { - if char == r { - return true - } - } - l.rewind() - return false -} -func (l *RuneReader) rewind() { +func (l *StringRuneReader) Rewind() { l.pos -= l.width } diff --git a/subex/main.go b/subex/main.go index 091625b..9824f10 100644 --- a/subex/main.go +++ b/subex/main.go @@ -131,7 +131,12 @@ func Main() { panic("Expected: program [subex]") } program := os.Args[1] - ast := Parse(program) + reader := &StringRuneReader { + input: program, + pos: 0, + width: 0, + } + ast := Parse(reader) transducer := CompileTransducer(ast) stdin := bufio.NewReader(os.Stdin); diff --git a/subex/parse.go b/subex/parse.go index d6ef995..e6efc2e 100644 --- a/subex/parse.go +++ b/subex/parse.go @@ -4,8 +4,24 @@ import ( "main/walk" ) -func expectBracket(l *RuneReader, ifLeft walk.Atom, ifRight walk.Atom) walk.Atom { - switch l.next() { +type RuneReader interface { + Next() rune + Rewind() +} + +func accept(l RuneReader, chars string) bool { + r := l.Next() + for _, char := range chars { + if char == r { + return true + } + } + l.Rewind() + return false +} + +func expectBracket(l RuneReader, ifLeft walk.Atom, ifRight walk.Atom) walk.Atom { + switch l.Next() { case '(': return ifLeft case ')': @@ -16,7 +32,7 @@ func expectBracket(l *RuneReader, ifLeft walk.Atom, ifRight walk.Atom) walk.Atom } // Having just read termType, read in a bracket and return the corresponding Atom -func parseTerminatorAtomLiteral(termType rune, l *RuneReader) walk.Atom { +func parseTerminatorAtomLiteral(termType rune, l RuneReader) walk.Atom { switch termType { case '@': return expectBracket(l, walk.ArrayBegin, walk.ArrayEnd) @@ -34,41 +50,41 @@ func charIsDigit(c rune) bool { } // Parse a positive integer, reads digits 0-9 and stops at the first non-digit -func parseInt(l *RuneReader) (output int) { +func parseInt(l RuneReader) (output int) { for { - char := l.next() + char := l.Next() if charIsDigit(char) { output = output * 10 + int(char - '0') } else { break } } - l.rewind() + l.Rewind() return output } // Having just read {, read in and parse the range contents -func parseRepeatRange(l *RuneReader) (output []ConvexRange) { +func parseRepeatRange(l RuneReader) (output []ConvexRange) { loop: for { var start, end int - char := l.next() - l.rewind() + char := l.Next() + l.Rewind() if char == '-' { start = -1 } else { start = parseInt(l) } - switch l.next() { + switch l.Next() { case ',': output = append(output, ConvexRange{start, start}) continue loop case '-': - char := l.next() + char := l.Next() if charIsDigit(char) { - l.rewind() + l.Rewind() end = parseInt(l) } else { - l.rewind() + l.Rewind() end = -1 } case '}': @@ -77,7 +93,7 @@ func parseRepeatRange(l *RuneReader) (output []ConvexRange) { default: panic("Invalid character in repeat specifier") } - switch l.next() { + switch l.Next() { case ',': output = append(output, ConvexRange{start, end}) continue loop @@ -91,17 +107,17 @@ func parseRepeatRange(l *RuneReader) (output []ConvexRange) { return output } -func parseReplacement(l *RuneReader) (output []OutputContent) { +func parseReplacement(l RuneReader) (output []OutputContent) { // TODO escaping loop: for { - r := l.next() + r := l.Next() switch r { case eof: panic("Missing closing \"") case '"': break loop case '$': - slot := l.next() + slot := l.Next() if slot == eof { panic("Missing slot character") } @@ -116,13 +132,13 @@ func parseReplacement(l *RuneReader) (output []OutputContent) { } // Parse the contents of a range subex [] into a map -func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom { +func parseRangeSubex(l RuneReader) map[walk.Atom]walk.Atom { // TODO escaping parts := make(map[walk.Atom]walk.Atom) var froms []walk.Atom var hasTo bool for { - fromsStart := l.next() + fromsStart := l.Next() if fromsStart == ']' { hasTo = false break @@ -136,10 +152,10 @@ func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom { continue } } - if l.accept("-") { - fromsEnd := l.next() + if accept(l, "-") { + fromsEnd := l.Next() if fromsEnd == ']' || fromsEnd == '=' { - l.rewind() + l.Rewind() fromsEnd = fromsStart } for i := fromsStart; i <= fromsEnd; i += 1 { @@ -156,7 +172,7 @@ func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom { var tos []walk.Atom if hasTo { for { - tosStart := l.next() + tosStart := l.Next() if tosStart == ']' { break } else { @@ -166,10 +182,10 @@ func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom { continue } } - if l.accept("-") { - tosEnd := l.next() + if accept(l, "-") { + tosEnd := l.Next() if tosEnd == ']' { - l.rewind() + l.Rewind() tosEnd = tosStart } for i := tosStart; i <= tosEnd; i += 1 { @@ -192,22 +208,22 @@ func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom { return parts } -func parseSubex(l *RuneReader, minPower int) SubexAST { +func parseSubex(l RuneReader, minPower int) SubexAST { var lhs SubexAST - r := l.next() + r := l.Next() switch r { case eof: return nil case '(': lhs = parseSubex(l, 0) - if !l.accept(")") { + if !accept(l, ")") { panic("Missing matching )") } case '[': rangeParts := parseRangeSubex(l) lhs = SubexASTRange {rangeParts} case ')', '|', ';', '{', '+', '$': - l.rewind() + l.Rewind() return nil case '"': replacement := parseReplacement(l) @@ -227,7 +243,7 @@ func parseSubex(l *RuneReader, minPower int) SubexAST { continue loop } } - r := l.next() + r := l.Next() switch { case r == '{' && minPower <= 8: lhs = SubexASTRepeat { @@ -245,7 +261,7 @@ func parseSubex(l *RuneReader, minPower int) SubexAST { case r == '!' && minPower <= 8: lhs = SubexASTNot {lhs} case r == '$' && minPower <= 8: - slot := l.next() + slot := l.Next() if slot == eof { panic("Missing slot character") } @@ -269,18 +285,13 @@ func parseSubex(l *RuneReader, minPower int) SubexAST { delimiter: rhs, } default: - l.rewind() + l.Rewind() break loop } } return lhs } -func Parse(input string) SubexAST { - l := RuneReader { - input: input, - pos: 0, - width: 0, - } - return parseSubex(&l, 0) +func Parse(l RuneReader) SubexAST { + return parseSubex(l, 0) } |