cudl.c (13581B)
1 #include <stdio.h> 2 #include <ctype.h> 3 #include <stdlib.h> 4 #include <string.h> 5 #include "cudl.h" 6 7 #define STRIP_WHITESPACE(text) while (isspace(*(text))) (text)++ 8 9 #define IS_KEY_CHAR(c) (\ 10 'a' <= (c) && (c) <= 'z' ||\ 11 'A' <= (c) && (c) <= 'Z' ||\ 12 '0' <= (c) && (c) <= '9' ||\ 13 (c) == '_' || (c) == '-'\ 14 ) 15 16 #define IS_DIGIT(c) (\ 17 '0' <= (c) && (c) <= '9'\ 18 ) 19 20 int cudl_err = CUDL_OK; 21 22 static char *fread_all(FILE *file) { 23 size_t size; 24 char *buffer; 25 fseek(file, 0, SEEK_END); 26 size = ftell(file); 27 rewind(file); 28 clearerr(file); 29 if ((buffer = malloc(size + 1)) == NULL) 30 return NULL; 31 if (fread(buffer, 1, size, file) != size) { 32 free(buffer); 33 return NULL; 34 } 35 buffer[size] = '\0'; 36 return buffer; 37 } 38 39 void cudl_debug(struct cudl_value value) { 40 int i; 41 switch (value.tag) { 42 case CUDL_TAG_NULL: 43 printf("%%null"); 44 break; 45 case CUDL_TAG_BOOL: 46 if (value.data.boolean) 47 printf("%%true"); 48 else 49 printf("%%false"); 50 break; 51 case CUDL_TAG_NUMBER: 52 printf("%lf", value.data.number); 53 break; 54 case CUDL_TAG_STRING: 55 printf("\"%s\"", value.data.string); 56 break;; 57 case CUDL_TAG_ARRAY: 58 printf("["); 59 for (i = 0; i < value.data.array.length; i++) { 60 if (i != 0) 61 printf(" "); 62 cudl_debug(value.data.array.values[i]); 63 } 64 printf("]"); 65 break; 66 case CUDL_TAG_MAP: 67 printf("{"); 68 for (i = 0; i < value.data.map.length; i++) { 69 if (i != 0) 70 printf(" "); 71 printf("\"%s\": ", value.data.map.fields[i].key); 72 cudl_debug(value.data.map.fields[i].value); 73 } 74 printf("}"); 75 break; 76 default: 77 printf("UNKNOWN"); 78 break; 79 } 80 } 81 82 /* Free all children of the value, not the value itself */ 83 void cudl_deinit_value(struct cudl_value value) { 84 int i; 85 switch (value.tag) { 86 case CUDL_TAG_ARRAY: 87 for (i = 0; i < value.data.array.length; i++) { 88 cudl_deinit_value(value.data.array.values[i]); 89 } 90 free(value.data.array.values); 91 break; 92 case CUDL_TAG_NULL: 93 default: 94 break; 95 } 96 } 97 98 /* Parse a value from input and store it in value. 99 * Return the number of bytes consumed. 100 * Input must end with a null byte */ 101 static size_t parse_value(char *input, struct cudl_value *value); 102 103 static size_t parse_bool_or_null(char *input, struct cudl_value *value) { 104 if (strncmp(input, "null", 4) == 0) { 105 value->tag = CUDL_TAG_NULL; 106 return 4; 107 } 108 if (strncmp(input, "true", 4) == 0) { 109 value->tag = CUDL_TAG_BOOL; 110 value->data.boolean = 1; 111 return 4; 112 } 113 if (strncmp(input, "false", 5) == 0) { 114 value->tag = CUDL_TAG_BOOL; 115 value->data.boolean = 0; 116 return 5; 117 } 118 cudl_err = CUDL_ERR_EXPECTED_BOOL_OR_NULL; 119 return 0; 120 } 121 122 static size_t parse_number(char *input, struct cudl_value *value) { 123 double number; 124 size_t i, exponentStart; 125 int exponent, otherExponent; 126 int exponentUsed; 127 exponentUsed = 0; 128 number = 0; 129 i = input[0] == '-'; 130 for (;; i++) { 131 if (IS_DIGIT(input[i])) { 132 number = number * 10 + (input[i] - '0'); 133 exponent++; 134 continue; 135 } else if (input[i] == '.') { 136 exponent = 0; 137 exponentUsed = 1; 138 continue; 139 } 140 break; 141 } 142 if (input[0] == '-') 143 number = 0 - number; 144 if (!exponentUsed) 145 exponent = 0; 146 otherExponent = 0; 147 if (input[i] == 'e' && (IS_DIGIT(input[i+1]) || (input[i+1] == '-' && IS_DIGIT(input[i+2])))) { 148 i++; 149 exponentStart = i; 150 i += input[i] == '-'; 151 for (;; i++) { 152 if (IS_DIGIT(input[i])) 153 otherExponent = otherExponent * 10 + (input[i] - '0'); 154 else 155 break; 156 } 157 if (input[exponentStart] == '-') 158 otherExponent = 0 - otherExponent; 159 } 160 exponent = exponent - otherExponent; 161 for (; exponent > 0; exponent--) { 162 number /= 10; 163 } 164 for (; exponent < 0; exponent++) { 165 number *= 10; 166 } 167 value->tag = CUDL_TAG_NUMBER; 168 value->data.number = number; 169 return i; 170 } 171 172 /* Convert UCS character to utf-8 bytes. 173 * Return number of bytes generated. 174 * Sets cudl_error on error. 175 * Shamelessly lifted from https://github.com/cktan/tomc99 */ 176 static size_t cudl_ucs_to_utf8(int64_t ucs, char utf8[6]) { 177 if ( 178 0xd800 <= ucs && ucs <= 0xdfff || 179 0xfffe <= ucs && ucs <= 0xffff || 180 ucs < 0 181 ) { 182 cudl_err = CUDL_ERR_UNRECOGNISED_UNICODE; 183 return 0; 184 } 185 186 /* 0x00000000 - 0x0000007F: 187 0xxxxxxx 188 */ 189 if (ucs <= 0x7F) { 190 utf8[0] = (unsigned char) ucs; 191 return 1; 192 } 193 194 /* 0x00000080 - 0x000007FF: 195 110xxxxx 10xxxxxx 196 */ 197 if (ucs <= 0x000007FF) { 198 utf8[0] = 0xc0 | (ucs >> 6); 199 utf8[1] = 0x80 | (ucs & 0x3f); 200 return 2; 201 } 202 203 /* 0x00000800 - 0x0000FFFF: 204 1110xxxx 10xxxxxx 10xxxxxx 205 */ 206 if (ucs <= 0x0000FFFF) { 207 utf8[0] = 0xe0 | (ucs >> 12); 208 utf8[1] = 0x80 | ((ucs >> 6) & 0x3f); 209 utf8[2] = 0x80 | (ucs & 0x3f); 210 return 3; 211 } 212 213 /* 0x00010000 - 0x001FFFFF: 214 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 215 */ 216 if (ucs <= 0x001FFFFF) { 217 utf8[0] = 0xf0 | (ucs >> 18); 218 utf8[1] = 0x80 | ((ucs >> 12) & 0x3f); 219 utf8[2] = 0x80 | ((ucs >> 6) & 0x3f); 220 utf8[3] = 0x80 | (ucs & 0x3f); 221 return 4; 222 } 223 224 /* 0x00200000 - 0x03FFFFFF: 225 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 226 */ 227 if (ucs <= 0x03FFFFFF) { 228 utf8[0] = 0xf8 | (ucs >> 24); 229 utf8[1] = 0x80 | ((ucs >> 18) & 0x3f); 230 utf8[2] = 0x80 | ((ucs >> 12) & 0x3f); 231 utf8[3] = 0x80 | ((ucs >> 6) & 0x3f); 232 utf8[4] = 0x80 | (ucs & 0x3f); 233 return 5; 234 } 235 236 /* 0x04000000 - 0x7FFFFFFF: 237 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 238 */ 239 if (ucs <= 0x7FFFFFFF) { 240 utf8[0] = 0xfc | (ucs >> 30); 241 utf8[1] = 0x80 | ((ucs >> 24) & 0x3f); 242 utf8[2] = 0x80 | ((ucs >> 18) & 0x3f); 243 utf8[3] = 0x80 | ((ucs >> 12) & 0x3f); 244 utf8[4] = 0x80 | ((ucs >> 6) & 0x3f); 245 utf8[5] = 0x80 | (ucs & 0x3f); 246 return 6; 247 } 248 249 cudl_err = CUDL_ERR_UNRECOGNISED_UNICODE; 250 return 0; 251 } 252 253 /* Parse a string starting after the opening quote. 254 * Set string to be the contents of the string. 255 * No memory is allocated if an error occurs. */ 256 static size_t parse_quoted_string(char *input, char **string) { 257 size_t length, capacity; 258 char *original_input, *newstring; 259 int64_t ucs; 260 int ucs_length, i; 261 262 length = 0; 263 capacity = 32; 264 original_input = input; 265 if ((*string = malloc(capacity)) == NULL) { 266 cudl_err = CUDL_ERR_OUT_OF_MEMORY; 267 return 0; 268 } 269 for (;;) { 270 if (*input == '\0') { 271 cudl_err = CUDL_ERR_UNMATCHED_QUOTE; 272 free(*string); 273 return 0; 274 } 275 if (*input == '"') { 276 if ((newstring = realloc(*string, length + 1)) == NULL) { 277 cudl_err = CUDL_ERR_OUT_OF_MEMORY; 278 free(*string); 279 return 0; 280 } 281 *string = newstring; 282 (*string)[length] = '\0'; 283 input++; 284 return input - original_input; 285 } 286 if (length >= capacity) { 287 if ((newstring = realloc(*string, capacity * 2)) == NULL) { 288 cudl_err = CUDL_ERR_OUT_OF_MEMORY; 289 free(*string); 290 return 0; 291 } 292 *string = newstring; 293 capacity *= 2; 294 } 295 if (*input == '\\') { 296 input++; 297 switch (*input) { 298 case '\0': 299 cudl_err = CUDL_ERR_EXPECTED_ESCAPE_SEQUENCE; 300 free(*string); 301 return 0; 302 case 'b': 303 (*string)[length++] = '\b'; 304 input++; 305 break; 306 case 't': 307 (*string)[length++] = '\t'; 308 input++; 309 break; 310 case 'n': 311 (*string)[length++] = '\n'; 312 input++; 313 break; 314 case 'r': 315 (*string)[length++] = '\r'; 316 input++; 317 break; 318 case '"': 319 (*string)[length++] = '"'; 320 input++; 321 break; 322 case '\\': 323 (*string)[length++] = '\\'; 324 input++; 325 break; 326 case 'u': 327 case 'U': 328 ucs = 0; 329 ucs_length = (*input == 'u') ? 4 : 8; 330 input++; 331 for (i = 0; i < ucs_length; i++) { 332 if (input[i] == '\0') { 333 cudl_err = CUDL_ERR_EXPECTED_ESCAPE_SEQUENCE; 334 free(*string); 335 return 0; 336 } 337 if ('0' <= input[i] && input[i] <= '9') { 338 ucs = (ucs << 4) + (input[i] - '0'); 339 } else if ('a' <= input[i] && input[i] <= 'z') { 340 ucs = (ucs << 4) + (input[i] - 'a' + 10); 341 } else if ('A' <= input[i] && input[i] <= 'Z') { 342 ucs = (ucs << 4) + (input[i] - 'A' + 10); 343 } else { 344 cudl_err = CUDL_ERR_EXPECTED_ESCAPE_SEQUENCE; 345 free(*string); 346 return 0; 347 } 348 } 349 if (length + 6 > capacity) { 350 if ((newstring = realloc(*string, capacity * 2)) == NULL) { 351 cudl_err = CUDL_ERR_OUT_OF_MEMORY; 352 free(*string); 353 return 0; 354 } 355 *string = newstring; 356 capacity *= 2; 357 } 358 length += cudl_ucs_to_utf8(ucs, (*string) + length); 359 if (cudl_err) { 360 free(*string); 361 return 0; 362 } 363 input += ucs_length; 364 break; 365 default: 366 (*string)[length++] = *input; 367 input++; 368 break; 369 } 370 } else { 371 (*string)[length++] = *(input++); 372 } 373 } 374 } 375 376 static size_t parse_array(char *input, struct cudl_value *value) { 377 size_t length, capacity; 378 struct cudl_value *values, *newvalues; 379 int i; 380 char *original_input; 381 382 original_input = input; 383 value->tag = CUDL_TAG_ARRAY; 384 length = 0; 385 capacity = 8; 386 if ((values = malloc(capacity * sizeof(struct cudl_value))) == NULL) { 387 cudl_err = CUDL_ERR_OUT_OF_MEMORY; 388 return 0; 389 } 390 391 STRIP_WHITESPACE(input); 392 for (;;) { 393 if (*input == '\0') { 394 cudl_err = CUDL_ERR_UNMATCHED_BRACK; 395 for (i = 0; i < length; i++) 396 cudl_deinit_value(values[i]); 397 free(values); 398 return 0; 399 } else if (*input == ']') { 400 input++; 401 values = realloc(values, length * sizeof(struct cudl_value)); 402 value->data.array.length = length; 403 value->data.array.values = values; 404 return input - original_input; 405 } 406 if (length >= capacity) { 407 if ((newvalues = realloc(values, 2 * capacity * sizeof(struct cudl_value))) == NULL) { 408 cudl_err = CUDL_ERR_OUT_OF_MEMORY; 409 for (i = 0; i < length; i++) 410 cudl_deinit_value(values[i]); 411 free(values); 412 return 0; 413 } 414 values = newvalues; 415 capacity *= 2; 416 } 417 input += parse_value(input, values + length); 418 if (cudl_err) { 419 for (i = 0; i < length; i++) 420 cudl_deinit_value(values[i]); 421 free(values); 422 return 0; 423 } 424 length++; 425 } 426 } 427 428 static size_t parse_map_key(char *input, char **key) { 429 char *original_input; 430 switch (*input) { 431 case '\0': 432 cudl_err = CUDL_ERR_EXPECTED_MAP_KEY; 433 return 0; 434 case '"': 435 input++; 436 return parse_quoted_string(input, key) + 1; 437 default: 438 original_input = input; 439 while (IS_KEY_CHAR(*input)) 440 input++; 441 if (input == original_input) { 442 cudl_err = CUDL_ERR_EXPECTED_MAP_KEY; 443 return 0; 444 } 445 if ((*key = malloc(input - original_input + 1)) == NULL) { 446 cudl_err = CUDL_ERR_OUT_OF_MEMORY; 447 return 0; 448 } 449 memcpy(*key, original_input, input - original_input); 450 (*key)[input - original_input] = '\0'; 451 return input - original_input; 452 } 453 } 454 455 static size_t parse_map(char *input, struct cudl_value *value, char end_char) { 456 char *original_input; 457 int i; 458 struct cudl_map_field *fields, *newfields; 459 size_t length, capacity; 460 461 original_input = input; 462 value->tag = CUDL_TAG_MAP; 463 length = 0; 464 capacity = 8; 465 if ((fields = malloc(capacity * sizeof(struct cudl_map_field))) == NULL) { 466 cudl_err = CUDL_ERR_OUT_OF_MEMORY; 467 return 0; 468 } 469 470 STRIP_WHITESPACE(input); 471 for (;;) { 472 if (*input == end_char) { 473 input++; 474 fields = realloc(fields, length * sizeof(struct cudl_map_field)); 475 value->data.map.length = length; 476 value->data.map.fields = fields; 477 return input - original_input; 478 } 479 if (*input == '\0') { 480 cudl_err = CUDL_ERR_UNMATCHED_BRACE; 481 for (i = 0; i < length; i++) { 482 cudl_deinit_value(fields[i].value); 483 free(fields[i].key); 484 } 485 free(fields); 486 return 0; 487 } 488 if (length >= capacity) { 489 if ((newfields = realloc(fields, 2 * capacity * sizeof(struct cudl_map_field))) == NULL) { 490 cudl_err = CUDL_ERR_OUT_OF_MEMORY; 491 for (i = 0; i < length; i++) { 492 cudl_deinit_value(fields[i].value); 493 free(fields[i].key); 494 } 495 free(fields); 496 return 0; 497 } 498 fields = newfields; 499 capacity *= 2; 500 } 501 input += parse_map_key(input, &fields[length].key); 502 if (cudl_err) { 503 for (i = 0; i < length; i++) { 504 cudl_deinit_value(fields[i].value); 505 free(fields[i].key); 506 } 507 free(fields); 508 return 0; 509 } 510 STRIP_WHITESPACE(input); 511 if (*input != ':') { 512 cudl_err = CUDL_ERR_EXPECTED_COLON; 513 for (i = 0; i < length; i++) { 514 cudl_deinit_value(fields[i].value); 515 free(fields[i].key); 516 } 517 free(fields[length].key); 518 free(fields); 519 return 0; 520 } 521 input++; 522 STRIP_WHITESPACE(input); 523 input += parse_value(input, &fields[length].value); 524 if (cudl_err) { 525 for (i = 0; i < length; i++) { 526 cudl_deinit_value(fields[i].value); 527 free(fields[i].key); 528 } 529 free(fields[length].key); 530 free(fields); 531 return 0; 532 } 533 length++; 534 } 535 } 536 537 static size_t _parse_value(char *input, struct cudl_value *value) { 538 if (*input == '%') 539 return parse_bool_or_null(++input, value) + 1; 540 if (*input == '[') 541 return parse_array(++input, value) + 1; 542 if (*input == '{') 543 return parse_map(++input, value, '}') + 1; 544 if (*input == '"') { 545 value->tag = CUDL_TAG_STRING; 546 return parse_quoted_string(++input, &value->data.string) + 1; 547 } 548 if (IS_DIGIT(*input) || *input == '-') 549 return parse_number(input, value); 550 cudl_err = CUDL_ERR_UNRECOGNISED_VALUE; 551 return 0; 552 } 553 554 static size_t parse_value(char *input, struct cudl_value *value) { 555 char *original_input; 556 original_input = input; 557 input += _parse_value(input, value); 558 STRIP_WHITESPACE(input); 559 return input - original_input; 560 } 561 562 void cudl_parse_from_file(FILE *file, struct cudl_value *value) { 563 char *input, *original_input; 564 if ((original_input = input = fread_all(file)) == NULL) { 565 if (ferror(file)) 566 cudl_err = CUDL_ERR_READING; 567 else 568 cudl_err = CUDL_ERR_OUT_OF_MEMORY; 569 return; 570 } 571 input += cudl_parse(input, value); 572 if (*input != '\0') 573 cudl_deinit_value(*value); 574 free(original_input); 575 } 576 577 size_t cudl_parse(char *input, struct cudl_value *value) { 578 STRIP_WHITESPACE(input); 579 return parse_value(input, value); 580 }