From 10f847acc7087317b0fbe20b7cf3307a0fafab8a Mon Sep 17 00:00:00 2001
From: Charlie Stanton <charlie@shtanton.xyz>
Date: Wed, 19 Apr 2023 14:34:22 +0100
Subject: Changes the parsing API for subex to be more suitable to being part
 of a larger program

---
 subex/lex.go   | 16 ++---------
 subex/main.go  |  7 ++++-
 subex/parse.go | 91 ++++++++++++++++++++++++++++++++--------------------------
 3 files changed, 60 insertions(+), 54 deletions(-)

(limited to 'subex')

diff --git a/subex/lex.go b/subex/lex.go
index f020b23..74bf370 100644
--- a/subex/lex.go
+++ b/subex/lex.go
@@ -5,11 +5,11 @@ import (
 )
 
 const eof rune = -1
-type RuneReader struct {
+type StringRuneReader struct {
 	input string
 	pos, width int
 }
-func (l *RuneReader) next() rune {
+func (l *StringRuneReader) Next() rune {
 	if l.pos >= len(l.input) {
 		l.width = 0
 		return eof
@@ -19,16 +19,6 @@ func (l *RuneReader) next() rune {
 	l.pos += l.width
 	return r
 }
-func (l *RuneReader) accept(chars string) bool {
-	r := l.next()
-	for _, char := range chars {
-		if char == r {
-			return true
-		}
-	}
-	l.rewind()
-	return false
-}
-func (l *RuneReader) rewind() {
+func (l *StringRuneReader) Rewind() {
 	l.pos -= l.width
 }
diff --git a/subex/main.go b/subex/main.go
index 091625b..9824f10 100644
--- a/subex/main.go
+++ b/subex/main.go
@@ -131,7 +131,12 @@ func Main() {
 		panic("Expected: program [subex]")
 	}
 	program := os.Args[1]
-	ast := Parse(program)
+	reader := &StringRuneReader {
+		input: program,
+		pos: 0,
+		width: 0,
+	}
+	ast := Parse(reader)
 	transducer := CompileTransducer(ast)
 
 	stdin := bufio.NewReader(os.Stdin);
diff --git a/subex/parse.go b/subex/parse.go
index d6ef995..e6efc2e 100644
--- a/subex/parse.go
+++ b/subex/parse.go
@@ -4,8 +4,24 @@ import (
 	"main/walk"
 )
 
-func expectBracket(l *RuneReader, ifLeft walk.Atom, ifRight walk.Atom) walk.Atom {
-	switch l.next() {
+type RuneReader interface {
+	Next() rune
+	Rewind()
+}
+
+func accept(l RuneReader, chars string) bool {
+	r := l.Next()
+	for _, char := range chars {
+		if char == r {
+			return true
+		}
+	}
+	l.Rewind()
+	return false
+}
+
+func expectBracket(l RuneReader, ifLeft walk.Atom, ifRight walk.Atom) walk.Atom {
+	switch l.Next() {
 		case '(':
 			return ifLeft
 		case ')':
@@ -16,7 +32,7 @@ func expectBracket(l *RuneReader, ifLeft walk.Atom, ifRight walk.Atom) walk.Atom
 }
 
 // Having just read termType, read in a bracket and return the corresponding Atom
-func parseTerminatorAtomLiteral(termType rune, l *RuneReader) walk.Atom {
+func parseTerminatorAtomLiteral(termType rune, l RuneReader) walk.Atom {
 	switch termType {
 		case '@':
 			return expectBracket(l, walk.ArrayBegin, walk.ArrayEnd)
@@ -34,41 +50,41 @@ func charIsDigit(c rune) bool {
 }
 
 // Parse a positive integer, reads digits 0-9 and stops at the first non-digit
-func parseInt(l *RuneReader) (output int) {
+func parseInt(l RuneReader) (output int) {
 	for {
-		char := l.next()
+		char := l.Next()
 		if charIsDigit(char) {
 			output = output * 10 + int(char - '0')
 		} else {
 			break
 		}
 	}
-	l.rewind()
+	l.Rewind()
 	return output
 }
 
 // Having just read {, read in and parse the range contents
-func parseRepeatRange(l *RuneReader) (output []ConvexRange) {
+func parseRepeatRange(l RuneReader) (output []ConvexRange) {
 	loop: for {
 		var start, end int
-		char := l.next()
-		l.rewind()
+		char := l.Next()
+		l.Rewind()
 		if char == '-' {
 			start = -1
 		} else {
 			start = parseInt(l)
 		}
-		switch l.next() {
+		switch l.Next() {
 			case ',':
 				output = append(output, ConvexRange{start, start})
 				continue loop
 			case '-':
-				char := l.next()
+				char := l.Next()
 				if charIsDigit(char) {
-					l.rewind()
+					l.Rewind()
 					end = parseInt(l)
 				} else {
-					l.rewind()
+					l.Rewind()
 					end = -1
 				}
 			case '}':
@@ -77,7 +93,7 @@ func parseRepeatRange(l *RuneReader) (output []ConvexRange) {
 			default:
 				panic("Invalid character in repeat specifier")
 		}
-		switch l.next() {
+		switch l.Next() {
 			case ',':
 				output = append(output, ConvexRange{start, end})
 				continue loop
@@ -91,17 +107,17 @@ func parseRepeatRange(l *RuneReader) (output []ConvexRange) {
 	return output
 }
 
-func parseReplacement(l *RuneReader) (output []OutputContent) {
+func parseReplacement(l RuneReader) (output []OutputContent) {
 	// TODO escaping
 	loop: for {
-		r := l.next()
+		r := l.Next()
 		switch r {
 			case eof:
 				panic("Missing closing \"")
 			case '"':
 				break loop
 			case '$':
-				slot := l.next()
+				slot := l.Next()
 				if slot == eof {
 					panic("Missing slot character")
 				}
@@ -116,13 +132,13 @@ func parseReplacement(l *RuneReader) (output []OutputContent) {
 }
 
 // Parse the contents of a range subex [] into a map
-func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom {
+func parseRangeSubex(l RuneReader) map[walk.Atom]walk.Atom {
 	// TODO escaping
 	parts := make(map[walk.Atom]walk.Atom)
 	var froms []walk.Atom
 	var hasTo bool
 	for {
-		fromsStart := l.next()
+		fromsStart := l.Next()
 		if fromsStart == ']' {
 			hasTo = false
 			break
@@ -136,10 +152,10 @@ func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom {
 				continue
 			}
 		}
-		if l.accept("-") {
-			fromsEnd := l.next()
+		if accept(l, "-") {
+			fromsEnd := l.Next()
 			if fromsEnd == ']' || fromsEnd == '=' {
-				l.rewind()
+				l.Rewind()
 				fromsEnd = fromsStart
 			}
 			for i := fromsStart; i <= fromsEnd; i += 1 {
@@ -156,7 +172,7 @@ func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom {
 	var tos []walk.Atom
 	if hasTo {
 		for {
-			tosStart := l.next()
+			tosStart := l.Next()
 			if tosStart == ']' {
 				break
 			} else {
@@ -166,10 +182,10 @@ func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom {
 					continue
 				}
 			}
-			if l.accept("-") {
-				tosEnd := l.next()
+			if accept(l, "-") {
+				tosEnd := l.Next()
 				if tosEnd == ']' {
-					l.rewind()
+					l.Rewind()
 					tosEnd = tosStart
 				}
 				for i := tosStart; i <= tosEnd; i += 1 {
@@ -192,22 +208,22 @@ func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom {
 	return parts
 }
 
-func parseSubex(l *RuneReader, minPower int) SubexAST {
+func parseSubex(l RuneReader, minPower int) SubexAST {
 	var lhs SubexAST
-	r := l.next()
+	r := l.Next()
 	switch r {
 		case eof:
 			return nil
 		case '(':
 			lhs = parseSubex(l, 0)
-			if !l.accept(")") {
+			if !accept(l, ")") {
 				panic("Missing matching )")
 			}
 		case '[':
 			rangeParts := parseRangeSubex(l)
 			lhs = SubexASTRange {rangeParts}
 		case ')', '|', ';', '{', '+', '$':
-			l.rewind()
+			l.Rewind()
 			return nil
 		case '"':
 			replacement := parseReplacement(l)
@@ -227,7 +243,7 @@ func parseSubex(l *RuneReader, minPower int) SubexAST {
 				continue loop
 			}
 		}
-		r := l.next()
+		r := l.Next()
 		switch {
 			case r == '{' && minPower <= 8:
 				lhs = SubexASTRepeat {
@@ -245,7 +261,7 @@ func parseSubex(l *RuneReader, minPower int) SubexAST {
 			case r == '!' && minPower <= 8:
 				lhs = SubexASTNot {lhs}
 			case r == '$' && minPower <= 8:
-				slot := l.next()
+				slot := l.Next()
 				if slot == eof {
 					panic("Missing slot character")
 				}
@@ -269,18 +285,13 @@ func parseSubex(l *RuneReader, minPower int) SubexAST {
 					delimiter: rhs,
 				}
 			default:
-				l.rewind()
+				l.Rewind()
 				break loop
 		}
 	}
 	return lhs
 }
 
-func Parse(input string) SubexAST {
-	l := RuneReader {
-		input: input,
-		pos: 0,
-		width: 0,
-	}
-	return parseSubex(&l, 0)
+func Parse(l RuneReader) SubexAST {
+	return parseSubex(l, 0)
 }
-- 
cgit v1.2.3