package subex import ( "fmt" "main/walk" "strconv" "strings" ) type Type int const ( AnyType Type = iota ValueType RuneType ) func resolveTypes(t1 Type, t2 Type) Type { if t1 == AnyType { return t2 } if t2 == AnyType { return t1 } if t1 == t2 { return t1 } panic("Types don't match in parser") } type Structure int const ( NoneStructure Structure = iota StringStructure ArrayStructure ArrayValuesStructure MapStructure ) func (s Structure) String() string { switch s { case NoneStructure: return "-" case StringStructure: return "~" case ArrayStructure: return "@" case ArrayValuesStructure: return ":" case MapStructure: return "#" default: panic("Invalid structure") } } type RuneReader interface { Next() rune Rewind() } func accept(l RuneReader, chars string) bool { r := l.Next() for _, char := range chars { if char == r { return true } } l.Rewind() return false } func isNumericRune(r rune) bool { return '0' <= r && r <= '9' || r == '.' } // Having just parsed a `, read until the next ` and parse the contents into a list of non-string atoms func parseScalarLiteral(l RuneReader) (walk.Scalar, bool) { r := l.Next() if isNumericRune(r) { var builder strings.Builder builder.WriteRune(r) for { r := l.Next() if !isNumericRune(r) { l.Rewind() break } builder.WriteRune(r) } numberString := builder.String() number, err := strconv.ParseFloat(numberString, 64) if err != nil { panic("Invalid number literal") } return walk.NumberValue(number), true } switch r { case 'n': if accept(l, "u") && accept(l, "l") && accept(l, "l") { return walk.NullValue{}, true } else { panic("Invalid literal") } case 't': if accept(l, "r") && accept(l, "u") && accept(l, "e") { return walk.BoolValue(true), true } else { panic("Invalid literal") } case 'f': if accept(l, "a") && accept(l, "l") && accept(l, "s") && accept(l, "e") { return walk.BoolValue(false), true } else { panic("Invalid literal") } default: fmt.Printf("%c\n", r) panic("Invalid literal") } } func charIsDigit(c rune) bool { return '0' <= c && c <= '9' } // Parse a positive integer, reads digits 0-9 and stops at the first non-digit func parseInt(l RuneReader) (output int) { for { char := l.Next() if charIsDigit(char) { output = output * 10 + int(char - '0') } else { break } } l.Rewind() return output } // Having just read {, read in and parse the range contents func parseRepeatRange(l RuneReader) (output []ConvexRange) { loop: for { var start, end int char := l.Next() l.Rewind() if char == '-' { start = -1 } else { start = parseInt(l) } switch l.Next() { case ',': output = append(output, ConvexRange{start, start}) continue loop case '-': char := l.Next() if charIsDigit(char) { l.Rewind() end = parseInt(l) } else { l.Rewind() end = -1 } case '}': output = append(output, ConvexRange{start, start}) break loop default: panic("Invalid character in repeat specifier") } switch l.Next() { case ',': output = append(output, ConvexRange{start, end}) continue loop case '}': output = append(output, ConvexRange{start, end}) break loop default: panic("Invalid character in repeat specifier") } } return output } func parseValueReplacement(l RuneReader, end rune) (output SubexAST) { output = SubexASTEmpty{} // TODO escaping // TODO add arrays, maps and strings loop: for { r := l.Next() switch r { case eof: panic("Missing closing `") case ' ': case end: break loop case '$': slot := l.Next() if slot == eof { panic("Missing slot character") } output = SubexASTConcat { First: output, Second: SubexASTOutputValueLoad { slot: slot, }, } // TODO: destructures case '#': if !accept(l, "(") { panic("Missing ( after #") } output = SubexASTConcat { First: output, Second: SubexASTDestructure { Destructure: NoneStructure, Structure: MapStructure, Content: parseValueReplacement(l, ')'), }, } if !accept(l, "#") { panic("Missing # after )") } case '"': output = SubexASTConcat { First: output, Second: SubexASTDestructure { Destructure: NoneStructure, Structure: StringStructure, Content: parseRuneReplacement(l, '"'), }, } default: l.Rewind() scalar, ok := parseScalarLiteral(l) if !ok { panic("Invalid scalar literal") } output = SubexASTConcat { First: output, Second: SubexASTOutputValueLiteral { literal: scalar, }, } } } return output } func parseRuneReplacement(l RuneReader, end rune) (output SubexAST) { output = SubexASTEmpty{} // TODO escaping loop: for { r := l.Next() switch r { case eof: panic("Missing closing `") case end: break loop case '$': slot := l.Next() if slot == eof { panic("Missing slot character") } output = SubexASTConcat { First: output, Second: SubexASTOutputRuneLoad { slot: slot, }, } default: output = SubexASTConcat { First: output, Second: SubexASTOutputRuneLiteral { literal: r, }, } } } return output } // Parse the contents of a range subex [] into a map // func parseRangeSubex(l RuneReader) map[walk.AtomOLD]walk.AtomOLD { // // TODO escaping // parts := make(map[walk.AtomOLD]walk.AtomOLD) // var froms []walk.AtomOLD // var hasTo bool // for { // fromsStart := l.Next() // if fromsStart == ']' { // hasTo = false // break // } else if fromsStart == '=' { // hasTo = true // break // } else if fromsStart == '`' { // literals := parseNonStringLiteral(l) // froms = append(froms, literals...) // continue // } else if fromsStart == '"' { // froms = append(froms, walk.NewAtomStringTerminal()) // continue // } // if accept(l, "-") { // fromsEnd := l.Next() // if fromsEnd == ']' || fromsEnd == '=' { // l.Rewind() // fromsEnd = fromsStart // } // for i := fromsStart; i <= fromsEnd; i += 1 { // froms = append(froms, walk.NewAtomStringRune(i)) // } // } else { // froms = append(froms, walk.NewAtomStringRune(fromsStart)) // } // } // if len(froms) == 0 { // panic("Missing from part of range expression") // } // var tos []walk.AtomOLD // if hasTo { // for { // tosStart := l.Next() // if tosStart == ']' { // break // } else if tosStart == '`' { // literals := parseNonStringLiteral(l) // tos = append(tos, literals...) // continue // } else if tosStart == '"' { // tos = append(tos, walk.NewAtomStringTerminal()) // continue // } // if accept(l, "-") { // tosEnd := l.Next() // if tosEnd == ']' { // l.Rewind() // tosEnd = tosStart // } // for i := tosStart; i <= tosEnd; i += 1 { // tos = append(tos, walk.NewAtomStringRune(i)) // } // } else { // tos = append(tos, walk.NewAtomStringRune(tosStart)) // } // } // } else { // tos = froms // } // if len(tos) == 0 { // panic("Missing to part of range expression") // } // for i, from := range froms { // parts[from] = tos[i % len(tos)] // } // return parts // } func parseDestructure(l RuneReader, destructure Structure, inType Type) (lhs SubexAST, outType Type) { if !accept(l, "(") { panic("Missing ( after destructure start") } var innerInType Type var expectedInType Type switch destructure { case NoneStructure: innerInType = inType expectedInType = inType case StringStructure: innerInType = RuneType expectedInType = ValueType case ArrayStructure: innerInType = ValueType expectedInType = ValueType case ArrayValuesStructure: innerInType = ValueType expectedInType = ValueType case MapStructure: innerInType = ValueType expectedInType = ValueType default: panic("Invalid structure") } resolveTypes(inType, expectedInType) lhs, innerOutType := parseSubex(l, 0, innerInType) if !accept(l, ")") { panic("Missing matching )") } var structure Structure var expectedInnerOutType Type r := l.Next() switch r { case '-': structure = NoneStructure expectedInnerOutType = innerOutType case '~': structure = StringStructure expectedInnerOutType = RuneType case '@': structure = ArrayStructure expectedInnerOutType = ValueType case ':': structure = ArrayValuesStructure expectedInnerOutType = ValueType case '#': structure = MapStructure expectedInnerOutType = ValueType default: panic("Missing matching destructure") } innerOutType = resolveTypes(innerOutType, expectedInnerOutType) switch structure { case NoneStructure: outType = innerOutType case StringStructure: outType = ValueType case ArrayStructure: outType = ValueType case ArrayValuesStructure: outType = ValueType case MapStructure: outType = ValueType } lhs = SubexASTDestructure { Destructure: destructure, Structure: structure, Content: lhs, } return lhs, outType } func parseSubex(l RuneReader, minPower int, inType Type) (lhs SubexAST, outType Type) { start: r := l.Next() switch r { case eof: return nil, inType case '(': lhs, outType = parseSubex(l, 0, inType) if !accept(l, ")") { panic("Missing matching )") } case '-': lhs, outType = parseDestructure(l, NoneStructure, inType) case '~': lhs, outType = parseDestructure(l, StringStructure, inType) case '@': lhs, outType = parseDestructure(l, ArrayStructure, inType) case ':': lhs, outType = parseDestructure(l, ArrayValuesStructure, inType) case '#': lhs, outType = parseDestructure(l, MapStructure, inType) case '"': if inType == ValueType { var innerOutType Type lhs, innerOutType = parseSubex(l, 0, RuneType) if !accept(l, "\"") { panic("Missing matching \"") } resolveTypes(innerOutType, RuneType) lhs = SubexASTDestructure { Destructure: StringStructure, Structure: StringStructure, Content: lhs, } outType = ValueType } else { l.Rewind() return SubexASTEmpty{}, inType } // TODO // case '[': // rangeParts := parseRangeSubex(l) // lhs = SubexASTRange {rangeParts} case ')', ']', '|', ';', '{', '+', '*', '/', '!', '=', '$': l.Rewind() return SubexASTEmpty{}, inType // case '=': // replacement := parseReplacement(l) // lhs = SubexASTOutput{replacement} // case '^': // replacement := parseReplacement(l) // replacement = append( // []OutputContentAST{OutputValueLiteralAST {walk.NewAtomStringTerminal()}}, // replacement... // ) // replacement = append( // replacement, // OutputValueLiteralAST {walk.NewAtomStringTerminal()}, // ) // lhs = SubexASTOutput {replacement} case '.': outType = inType if inType == RuneType { lhs = SubexASTCopyAnyRune{} } else { lhs = SubexASTCopyAnyValue{} } case '?': outType = inType lhs = SubexASTCopyBool{} case '%': outType = inType lhs = SubexASTCopyNumber{} case '`': outType = inType lhs = parseValueReplacement(l, '`') case ' ': if inType == RuneType { outType = RuneType lhs = SubexASTCopyRune {' '} } else { goto start } default: outType = inType if inType == RuneType { lhs = SubexASTCopyRune {r} } else { l.Rewind() scalar, ok := parseScalarLiteral(l) if !ok { panic("Invalid subex") } lhs = SubexASTCopyScalar {scalar} } } loop: for { if minPower <= 20 { next, outType2 := parseSubex(l, 21, inType) if next != nil && (next != SubexASTEmpty{}) { outType = resolveTypes(outType, outType2) lhs = SubexASTConcat{lhs, next} continue loop } } r := l.Next() switch { case r == '{' && minPower <= 4: lhs = SubexASTRepeat { Content: lhs, Acceptable: parseRepeatRange(l), } case r == '+' && minPower <= 4: lhs = SubexASTSum {lhs} resolveTypes(inType, ValueType) outType = resolveTypes(outType, ValueType) case r == '*' && minPower <= 4: lhs = SubexASTProduct {lhs} resolveTypes(inType, ValueType) outType = resolveTypes(outType, ValueType) // case r == '/' && minPower <= 4: // lhs = SubexASTReciprocal {lhs} case r == '!' && minPower <= 4: lhs = SubexASTNot {lhs} resolveTypes(inType, ValueType) outType = resolveTypes(outType, ValueType) // case r == '=' && minPower <= 4: // lhs = SubexASTEqual {lhs} case r == '$' && minPower <= 4: slot := l.Next() if slot == eof { panic("Missing slot character") } if slot == '_' { lhs = SubexASTDiscard { Content: lhs, InnerOutType: outType, } } else { if inType == ValueType { lhs = SubexASTStoreValues { Match: lhs, Slot: slot, } } else { lhs = SubexASTStoreRunes { Match: lhs, Slot: slot, } } } outType = AnyType case r == '|' && minPower <= 8: rhs, outType2 := parseSubex(l, 9, inType) outType = resolveTypes(outType, outType2) if rhs == nil { panic("Missing subex after |") } lhs = SubexASTOr{lhs, rhs} /*case r == ';' && minPower <= 10: rhs := parseSubex(l, 11, inType, outType) if rhs == nil { panic("Missing subex after ;") } lhs = SubexASTJoin { Content: lhs, Delimiter: rhs, }*/ default: l.Rewind() break loop } } return lhs, outType } func Parse(l RuneReader) SubexAST { ast, outType := parseSubex(l, 0, ValueType) outType = resolveTypes(outType, ValueType) if ast == nil { return SubexASTEmpty{} } return ast }