diff options
-rw-r--r-- | example.cudl | 65 | ||||
-rw-r--r-- | grammar.ebnf | 14 | ||||
-rw-r--r-- | grammar_no_schema.ebnf | 14 | ||||
-rw-r--r-- | spec.txt | 75 |
4 files changed, 168 insertions, 0 deletions
diff --git a/example.cudl b/example.cudl new file mode 100644 index 0000000..3ee0ab6 --- /dev/null +++ b/example.cudl @@ -0,0 +1,65 @@ +# Without a schema + +{ + people: [{ + name: "Jack Smith" + sequence: [2 1 3 4 7 11 18 29 47] + language: "zig" + "hello world":| + """ + const std = @import("std"); + + pub fn main() !void { + const stdout = try std.io.getStdOut(); + try stdout.write("hello world\n"); + } + """ + "is me": %true + } { + name: "John Doe" + sequence: [2 3 5 7 11 13 17 19 23] + language: "C" + "hello world":| + """ + int puts(const char *s); + + int main(int argc, char *argv[]) { + puts("hello world"); + return 0; + } + """ + "is me": %false + }] +} + +# With a schema + +people: [ + name: Jack Smith + sequence: [2 1 3 4 7 11 18 29 47] + language: zig + "hello world":| + """ + const std = @import("std"); + + pub fn main() !void { + const stdout = try std.io.getStdOut(); + try stdout.write("hello world\n"); + } + """ + "is me": %true + ; + name: John Doe + sequence: [2 3 5 7 11 13 17 19 23] + language: C + "hello world":| + """ + int puts(const char *s); + + int main(int argc, char *argv[]) { + puts("hello world"); + return 0; + } + """ + "is me": %false +] diff --git a/grammar.ebnf b/grammar.ebnf new file mode 100644 index 0000000..e97bfad --- /dev/null +++ b/grammar.ebnf @@ -0,0 +1,14 @@ +value = map | array | string | multiline_string | number | boolean | null ; +map = "{", {field}, "}" | {field}, ";" | {field} >> (EOF | "]") ; +field = key, ":", value ; +key = '"', {-'"' | '""'}, '"' | (alphanumeric | "_" | "-"), {alphanumeric | "_" | "-"} ; +array = "[", {value}, "]" ; +string = '"', {-'"' | '\"'}, -("\" | '"'), '"' | '"\""' | '""' | {alphanumeric | " " | "_"}, "," | {alphanumeric | " " | "_"} >> inline_end ; +inline_end = "\r" | "\n" | "]" | "}" | ";" | EOF ; +multiline_string = "|", end_sequence, "\n", ?any character?, "\n", end_sequence ; +end_sequence = {-("\r" | "\n")} ; +integer = ["-"], {digit}, ["e", {digit}] ; +float = ["-"], {digit}, [".", {digit}], ["e", {digit}] ; +number = (integer | float), "," | (integer | float) >> (inline_end | whitespace) ; +boolean = "%true" | "%false" ; +null = "%null" ; diff --git a/grammar_no_schema.ebnf b/grammar_no_schema.ebnf new file mode 100644 index 0000000..629c59a --- /dev/null +++ b/grammar_no_schema.ebnf @@ -0,0 +1,14 @@ +value = map | array | string | multiline_string | number | boolean | null ; +map = "{", {field}, "}" ; +field = key, ":", value ; +key = '"', {-'"' | '""'}, '"' | (alphanumeric | "_" | "-"), {alphanumeric | "_" | "-"} ; +array = "[", {value}, "]" ; +string = '"', {-'"' | '\"'}, -("\" | '"'), '"' | '"\""' | '""' ; +inline_end = "\r" | "\n" | "]" | "}" | ";" | EOF ; +multiline_string = "|", end_sequence, "\n", ?any character?, "\n", end_sequence ; +end_sequence = {-("\r" | "\n")} ; +integer = ["-"], {digit}, ["e", {digit}] ; +float = ["-"], {digit}, [".", {digit}], ["e", {digit}] ; +number = (integer | float), "," | (integer | float) >> (inline_end | whitespace) ; +boolean = "%true" | "%false" ; +null = "%null" ; diff --git a/spec.txt b/spec.txt new file mode 100644 index 0000000..1efee05 --- /dev/null +++ b/spec.txt @@ -0,0 +1,75 @@ +# Spec attempt 5 + +* Every file contains 1 value, which may have other values nested inside it. +* A schema can be provided when a file is parsed which gives it's value a type. +* The schema can contain the types map, array, string, int, float, bool, nullable and "any" which will infer the type while parsing. +* Each value must have certain characters around it depending on its type. It must have a preceeding character or starting character if it's type is not given in the schema. +* Whitespace at the start of the file and after a value is ignored, however "succeeded by" always refers to the character immediately following the value, with no whitespace in between. +* Unless stated otherwise, the preceeding and succeeding character is part of the syntax of the value (is consumed by the parser when the value is consumed). + +An inline-end character is one of the following: +* linefeed +* carriage return +* ] +* } +* EOF + +## Map + +A sequence of key:value pairs. No delimeter is needed as every value will have a ending marker. +If a key starts with a quote then it continues until another quote ends it. Quotes can be escaped by using 2 of them. +Otherwise a key must match [A-Za-z0-9_-]+ + +A map can be preceeded by a { and succeeded by a } +A map can be preceeded by nothing and succeeded by a semicolon or EOF. +A map can be preceeded by nothing and succeeded by a ], which is not consumed. + +## Array + +A sequence of values, no delimeter. + +An array is preceeded by a [ and succeeded by a ] + +## String + +A string can be preceeded by a " and succeeded by a " +A string can be preceeded by nothing and succeeded by a comma +A string can be preceeded by nothing and succeeded by an inline-end character, which is not consumed. +An unquoted string must match [A-Za-z0-9_ ]+ +The following escape sequences are available for quoted strings and keys: +``` +\b - backspace +\t - tab +\n - linefeed +\r - carriage return +\" - quote +\\ - backslash +\uXXXX - unicode XXXX +``` + +### Multiline string + +A string can also be a multiline string. If a value starts with a pipe, it should be followed by a newline. Then there should be a sequence of 0 or more whitespace characters, which defines the indent sequence, followed by a sequence of non-whitespace characters (the terminating sequence) then a newline. Then every line much start with the indent sequence (which is not part of the string value) and the multiline string runs until it encounters the indent sequence followed by the terminating sequence on its own line. The final newline is not included in the value of the string. + +### Integer/Float + +A schema can declare a value to be an integer or float. + +An integer matches -?[0-9]+(e[0-9]+)? +A float matches -?[0-9]+(\.[0-9]+)?(e[0-9]+)? + +## Number + +A number starts with a digit or - and is succeeded by: +* A comma, which is consumed +* An inline-end character or whitespace, which are not consumed + +Numbers can be integers or floats depending on the implementation. + +## Boolean + +A boolean is either %true or %false. It is not succeeded by anything. + +## Null + +A null is written as %null |