1 /** 2 * Implements INI reader. 3 * 4 * `INIReader` is fairly low-level, configurable reader for reading INI data, 5 * which you can use to build your own object-model. 6 * 7 * High level interface is available in `dini.parser`. 8 * 9 * 10 * Unless you need to change `INIReader` behaviour, you should use one of provided 11 * preconfigured readers: 12 * 13 * - `StrictINIReader` 14 * 15 * Lower compatibility, may be bit faster. 16 * 17 * 18 * - `UniversalINIReader` 19 * 20 * Higher compatibility, may be slighly slower. 21 */ 22 module dini.reader; 23 24 import std.algorithm : countUntil, canFind, map; 25 import std.array : array; 26 import std.functional : unaryFun; 27 import std..string : representation, assumeUTF, strip, 28 stripLeft, stripRight, split, join, format; 29 import std.range : ElementType, replace; 30 import std.uni : isWhite, isSpace; 31 import std.variant : Algebraic; 32 import dini.utils : isBoxer, BoxerType, parseEscapeSequences; 33 34 35 /** 36 * Represents type of current token used by INIReader. 37 */ 38 enum INIToken 39 { 40 BLANK, /// 41 SECTION, /// 42 KEY, /// 43 COMMENT /// 44 } 45 46 47 /** 48 * Represents a block definition. 49 * 50 * Block definitions are used to define new quote and comment sequences 51 * to be accepted by INIReader. 52 * 53 * BlockDefs can be either single line or multiline. To define new single 54 * line block `INIBlockDef.mutliline` must be set to `false` AND `closing` 55 * must be set to newline string(`"\n"`). 56 */ 57 struct INIBlockDef 58 { 59 /** 60 * Opening character sequence 61 */ 62 string opening; 63 64 /** 65 * Closing character sequence 66 */ 67 string closing; 68 69 /** 70 * Should newline characters be allowed? 71 */ 72 bool multiline; 73 } 74 75 76 /** 77 * INIReader behaviour flags. 78 * 79 * These flags can be used to modify INIReader behaviour. 80 */ 81 enum INIFlags : uint 82 { 83 /** 84 * Should escape sequences be translated? 85 */ 86 ProcessEscapes = 1 << 0, 87 88 89 /** 90 * Section names will be trimmed. 91 */ 92 TrimSections = 1 << 4, 93 94 /** 95 * Key names will be trimmed. 96 */ 97 TrimKeys = 1 << 5, 98 99 /** 100 * Values will be trimmed. 101 */ 102 TrimValues = 1 << 6, 103 104 /** 105 * Section names, keys and values will be trimmed. 106 */ 107 TrimAll = TrimSections | TrimKeys | TrimValues 108 } 109 110 111 /** 112 * Defines INI format. 113 * 114 * This struct defines INI comments and quotes sequences. 115 * 116 * `INIReader` adds no default quotes or comment definitions, 117 * and thus when defining custom format make sure to include default 118 * definitions to increase compatibility. 119 */ 120 struct INIFormatDescriptor 121 { 122 /** 123 * List of comment definitions to support. 124 */ 125 INIBlockDef[] comments; 126 127 /** 128 * List of quote definitions to support. 129 */ 130 INIBlockDef[] quotes; 131 } 132 133 134 /** 135 * Strict INI format. 136 * 137 * This format is used by `MinimalINIReader`. 138 * 139 * This format defines only `;` as comment character and `"` as only quote. 140 * For more universal format consider using `UniversalINIFormat`. 141 */ 142 const INIFormatDescriptor StrictINIFormat = INIFormatDescriptor( 143 [INIBlockDef(";", "\n", false)], 144 [INIBlockDef(`"`, `"`, false)] 145 ); 146 147 148 /** 149 * Universal INI format. 150 * 151 * This format extends `StrictINIFormat` with hash-comments (`#`) and multiline 152 * triple-quotes (`"""`). 153 */ 154 const INIFormatDescriptor UniversalINIFormat = INIFormatDescriptor( 155 [INIBlockDef(";", "\n", false), INIBlockDef("#", "\n", false)], 156 [INIBlockDef(`"""`, `"""`, true), INIBlockDef(`"`, `"`, false)] 157 ); 158 159 160 /** 161 * Thrown when an parsing error occurred. 162 */ 163 class INIException : Exception 164 { 165 this(string msg = null, Throwable next = null) { super(msg, next); } 166 this(string msg, string file, size_t line, Throwable next = null) { 167 super(msg, file, line, next); 168 } 169 } 170 171 172 /** 173 * Represents parsed INI key. 174 * 175 * Prefer using `YOUR_READER.KeyType` alias. 176 */ 177 struct INIReaderKey(ValueType) 178 { 179 /** 180 * Key name 181 */ 182 string name; 183 184 /** 185 * Key value (may be boxed) 186 */ 187 ValueType value; 188 } 189 190 191 /** 192 * Splits source into tokens. 193 * 194 * This struct requires token delimeters to be ASCII-characters, 195 * Unicode is not supported **only** for token delimeters. 196 * 197 * Unless you want to modify `INIReader` behaviour prefer using one of available 198 * preconfigured variants: 199 * 200 * - `StrictINIReader` 201 * - `UniversalINIReader` 202 * 203 * 204 * `INIReader` expects three template arguments: 205 * 206 * - `Format` 207 * 208 * Instance of `INIFormatDescriptor`, defines quote and comment sequences. 209 * 210 * 211 * - `Flags` 212 * 213 * `INIReaderFlags` (can be OR-ed) 214 * 215 * 216 * - `Boxer` 217 * 218 * Name of a function that takes `(string value, INIReader reader)` and returns a value. 219 * By default all readers just proxy values, doing nothing, but this can be used to e.g. 220 * store token values as JSONValue or other Algebraic-like type. 221 * 222 * `INIReader.BoxType` is always return type of boxer function. So if you passed a boxer that 223 * returns `SomeAlgebraic` then `typeof(reader.key.value)` is `SomeAlgebraic`. 224 * 225 * 226 * Params: 227 * Format - `INIFormatDescriptor` to use. 228 * Flags - Reader behaviour flags. 229 * Boxer - Function name that can optionally box values. 230 * 231 * 232 * Examples: 233 * --- 234 * auto reader = UniversalINIReader("key=value\n"); 235 * 236 * while (reader.next) { 237 * writeln(reader.value); 238 * } 239 * --- 240 */ 241 struct INIReader(INIFormatDescriptor Format, ubyte Flags = 0x00, alias Boxer) 242 if (isBoxer!Boxer) 243 { 244 /** 245 * Reader's format descriptor. 246 */ 247 alias CurrentFormat = Format; 248 249 /** 250 * Reader's flags. 251 */ 252 alias CurrentFlags = Flags; 253 254 /** 255 * Reader's boxer. 256 */ 257 alias CurrentBoxer = Boxer; 258 259 /** 260 * Reader's Box type (boxer return type). 261 */ 262 alias BoxType = BoxerType!Boxer; 263 264 265 /** 266 * Alias for INIReaderKey!(BoxType). 267 */ 268 alias KeyType = INIReaderKey!BoxType; 269 270 /** 271 * Type of `value` property. 272 */ 273 alias TokenValue = Algebraic!(string, KeyType); 274 275 276 /** 277 * INI source bytes. 278 */ 279 immutable(ubyte)[] source; 280 281 /** 282 * INI source offset in bytes. 283 */ 284 size_t sourceOffset; 285 286 /** 287 * Type of current token. 288 */ 289 INIToken type; 290 291 /** 292 * Indicates whenever source has been exhausted. 293 */ 294 bool empty; 295 296 /** 297 * Used only with Key tokens. 298 * 299 * Indicates whenever current value has been quoted. 300 * This information can be used by Boxers to skip boxing of quoted values. 301 */ 302 bool isQuoted; 303 304 /** 305 * Current token's value. 306 */ 307 TokenValue value; 308 309 310 /** 311 * Creates new instance of `INIReader` from `source`. 312 * 313 * If passed source does not end with newline it is added (and thus allocates). 314 * To prevent allocation make sure `source` ends with new line. 315 * 316 * Params: 317 * source - INI source. 318 */ 319 this(string source) 320 { 321 // Make source end with newline 322 if (source[$-1] != '\n') 323 this.source = (source ~ "\n").representation; 324 else 325 this.source = source.representation; 326 } 327 328 /** 329 * Returns key token. 330 * 331 * Use this only if you know current token is KEY. 332 */ 333 KeyType key() @property { 334 return value.get!KeyType; 335 } 336 337 /** 338 * Returns section name. 339 * 340 * Use this only if you know current token is SECTION. 341 */ 342 string sectionName() @property { 343 return value.get!string; 344 } 345 346 /** 347 * Reads next token. 348 * 349 * Returns: 350 * True if more tokens are available, false otherwise. 351 */ 352 bool next() 353 { 354 isQuoted = false; 355 skipWhitespaces(); 356 357 if (current.length == 0) { 358 empty = true; 359 return false; 360 } 361 362 int pairIndex = -1; 363 while(source.length - sourceOffset > 0) 364 { 365 if (findPair!`comments`(pairIndex)) { 366 readComment(pairIndex); 367 break; 368 } 369 else if (current[0] == '[') { 370 readSection(); 371 break; 372 } 373 else if (isWhite(current[0])) { 374 skipWhitespaces(); 375 } 376 else { 377 readEntry(); 378 break; 379 } 380 } 381 382 return true; 383 } 384 385 bool findPair(string fieldName)(out int pairIndex) 386 { 387 if (source.length - sourceOffset > 0 && sourceOffset > 0 && source[sourceOffset - 1] == '\\') return false; 388 389 alias MemberType = typeof(__traits(getMember, Format, fieldName)); 390 foreach (size_t i, ElementType!MemberType pairs; __traits(getMember, Format, fieldName)) { 391 string opening = pairs.tupleof[0]; 392 393 if (source.length - sourceOffset < opening.length) 394 continue; 395 396 if (current[0..opening.length] == opening) { 397 pairIndex = cast(int)i; 398 return true; 399 } 400 } 401 402 return false; 403 } 404 405 void readSection() 406 { 407 type = INIToken.SECTION; 408 auto index = current.countUntil(']'); 409 if (index == -1) 410 throw new INIException("Section not closed"); 411 412 value = current[1 .. index].assumeUTF; 413 414 static if (Flags & INIFlags.TrimSections) 415 value = value.get!string.strip; 416 417 sourceOffset += index + 1; 418 } 419 420 void readComment(int pairIndex) 421 { 422 type = INIToken.COMMENT; 423 INIBlockDef commentDef = Format.comments[pairIndex]; 424 sourceOffset += commentDef.opening.length; 425 426 auto index = current.countUntil(commentDef.closing); 427 if (index == -1) 428 throw new INIException("Comment not closed"); 429 430 value = current[0.. index].assumeUTF; 431 432 if (commentDef.multiline == false && value.get!string.canFind('\n')) 433 throw new INIException("Comment not closed (multiline)"); 434 435 sourceOffset += index + commentDef.closing.length; 436 } 437 438 void readEntry() 439 { 440 type = INIToken.KEY; 441 KeyType key; 442 443 readKey(key); 444 if (current[0] == '=') { 445 sourceOffset += 1; 446 key.value = readValue(); 447 } 448 449 value = key; 450 } 451 452 void readKey(out KeyType key) 453 { 454 if (tryReadQuote(key.name)) { 455 isQuoted = true; 456 return; 457 } 458 459 auto newLineOffset = current.countUntil('\n'); 460 if (newLineOffset > 0) { // read untill newline/some assign sequence 461 auto offset = current[0..newLineOffset].countUntil('='); 462 463 if (offset == -1) 464 key.name = current[0 .. newLineOffset].assumeUTF; 465 else 466 key.name = current[0 .. offset].assumeUTF; 467 468 sourceOffset += key.name.length; 469 key.name = key.name.stripRight; 470 471 static if (Flags & INIFlags.TrimKeys) 472 key.name = key.name.stripLeft; 473 } 474 } 475 476 477 BoxType readValue() 478 { 479 auto firstNonSpaceIndex = current.countUntil!(a => !isSpace(a)); 480 if (firstNonSpaceIndex > 0) 481 sourceOffset += firstNonSpaceIndex; 482 483 string result = ""; 484 auto indexBeforeQuotes = sourceOffset; 485 486 isQuoted = tryReadQuote(result); 487 auto newlineOffset = current.countUntil('\n'); 488 string remains = current[0..newlineOffset].assumeUTF; 489 490 if (isQuoted && newlineOffset > 0) { 491 sourceOffset = indexBeforeQuotes; 492 isQuoted = false; 493 } 494 495 if (!isQuoted) { 496 bool escaped = false; 497 int[] newlineOffsets = []; 498 auto localOffset = 0; 499 for (; source.length - sourceOffset > 0; ++localOffset) { 500 if (source[sourceOffset + localOffset] == '\\') { 501 escaped = !escaped; 502 continue; 503 } 504 505 else if(escaped && source[sourceOffset + localOffset] == '\r') 506 continue; 507 508 else if(escaped && source[sourceOffset + localOffset] == '\n') 509 newlineOffsets ~= localOffset; 510 511 else if (!escaped && source[sourceOffset + localOffset] == '\n') 512 break; 513 514 escaped = false; 515 } 516 517 result = current[0..localOffset].assumeUTF.split("\n").map!((line) { 518 line = line.stripRight; 519 if (line[$-1] == '\\') return line[0..$-1].stripLeft; 520 return line.stripLeft; 521 }).array.join(); 522 sourceOffset += localOffset; 523 } 524 525 static if (Flags & INIFlags.TrimValues) 526 if (!isQuoted) 527 result = result.strip; 528 529 static if (Flags & INIFlags.ProcessEscapes) 530 result = parseEscapeSequences(result); 531 532 return Boxer(result); 533 } 534 535 bool tryReadQuote(out string result) 536 { 537 int pairIndex; 538 539 if (findPair!`quotes`(pairIndex)) { 540 auto quote = Format.quotes[pairIndex]; 541 sourceOffset += quote.opening.length; 542 543 auto closeIndex = current.countUntil(quote.closing); 544 if (closeIndex == -1) 545 throw new INIException("Unterminated string literal"); 546 547 result = current[0..closeIndex].assumeUTF; 548 sourceOffset += result.length + quote.closing.length; 549 550 if (result.canFind('\n') && quote.multiline == false) 551 throw new INIException("Unterminated string literal which spans multiple lines (invalid quotes used?)"); 552 553 return true; 554 } 555 556 return false; 557 } 558 559 void skipWhitespaces() 560 { 561 while (current.length && isWhite(current[0])) 562 sourceOffset += 1; 563 } 564 565 private immutable(ubyte)[] current() @property { 566 return source[sourceOffset..$]; 567 } 568 } 569 570 571 /** 572 * Universal `INIReader` variant. 573 * 574 * Use this variant if you want to have more compatible parser. 575 * 576 * Specifics: 577 * - Uses `UniversalINIFormat`. 578 * - Trims section names, keys and values. 579 * - Processes escapes in values (e.g. `\n`). 580 */ 581 alias UniversalINIReader = INIReader!(UniversalINIFormat, INIFlags.TrimAll | INIFlags.ProcessEscapes, (string a) => a); 582 583 584 /** 585 * Strict `INIReader` variant. 586 * 587 * Use this variant if you want to have more strict (and bit faster) parser. 588 * 589 * Specifics: 590 * - Uses `StrictINIFormat` 591 * - Only Keys are trimmed. 592 * - No escape sequences are resolved. 593 */ 594 alias StrictINIReader = INIReader!(StrictINIFormat, INIFlags.TrimKeys, (string a) => a); 595 596 597 unittest { 598 auto source = ` 599 ; comment 600 601 multiline = """ 602 this is 603 """ 604 605 numeric=-100000 606 numeric2=09843 607 [section (name)] 608 @=bar 609 `; 610 611 612 auto reader = UniversalINIReader(source); 613 alias Key = reader.KeyType; 614 615 assert(reader.next()); 616 assert(reader.type == INIToken.COMMENT); 617 assert(reader.sectionName == " comment"); 618 619 assert(reader.next()); 620 assert(reader.type == INIToken.KEY); 621 assert(reader.key.name == "multiline"); 622 assert(reader.key.value == "\n this is\n"); 623 624 assert(reader.next()); 625 assert(reader.type == INIToken.KEY); 626 assert(reader.value.get!Key.name == "numeric"); 627 assert(reader.value.get!Key.value == "-100000"); 628 629 assert(reader.next()); 630 assert(reader.type == INIToken.KEY); 631 assert(reader.value.get!Key.name == "numeric2"); 632 assert(reader.value.get!Key.value == "09843"); 633 634 assert(reader.next()); 635 assert(reader.type == INIToken.SECTION); 636 assert(reader.value.get!string == "section (name)"); 637 638 assert(reader.next()); 639 assert(reader.type == INIToken.KEY); 640 assert(reader.value.get!Key.name == "@"); 641 assert(reader.value.get!Key.value == `bar`); 642 643 assert(!reader.next()); 644 } 645 646 647 unittest { 648 auto source = ` 649 ####### TEST ######## 650 651 numeric value=15 652 ThisIsMultilineValue=thisis\ 653 verylong # comment 654 "Floating=Value"=1.51 655 656 [] # comment works 657 JustAKey 658 `; 659 660 auto reader = UniversalINIReader(source); 661 alias Key = reader.KeyType; 662 663 assert(reader.next()); 664 assert(reader.type == INIToken.COMMENT); 665 assert(reader.value.get!string == "###### TEST ########"); 666 667 assert(reader.next()); 668 assert(reader.type == INIToken.KEY); 669 assert(reader.value.get!Key.name == "numeric value"); 670 assert(reader.value.get!Key.value == `15`); 671 672 assert(reader.next()); 673 assert(reader.type == INIToken.KEY); 674 assert(reader.value.get!Key.name == "ThisIsMultilineValue"); 675 assert(reader.value.get!Key.value == `thisisverylong # comment`); 676 677 assert(reader.next()); 678 assert(reader.type == INIToken.KEY); 679 assert(reader.value.get!Key.name == "Floating=Value"); 680 assert(reader.value.get!Key.value == `1.51`); 681 682 assert(reader.next()); 683 assert(reader.type == INIToken.SECTION); 684 assert(reader.value.get!string == ""); 685 686 assert(reader.next()); 687 assert(reader.type == INIToken.COMMENT); 688 assert(reader.value.get!string == " comment works"); 689 690 assert(reader.next()); 691 assert(reader.type == INIToken.KEY); 692 assert(reader.value.get!Key.name == "JustAKey"); 693 assert(reader.value.get!Key.value == null); 694 695 assert(!reader.next()); 696 } 697 698 unittest { 699 string source = ` 700 [ Debug ] 701 sNumString=10Test 702 QuotedNum="10" 703 QuotedFloat="10.1" 704 Num=10 705 Float=10.1 706 `; 707 708 auto reader = UniversalINIReader(source); 709 alias Key = reader.KeyType; 710 711 assert(reader.next()); 712 assert(reader.type == INIToken.SECTION); 713 assert(reader.value.get!string == "Debug"); 714 715 assert(reader.next()); 716 assert(reader.type == INIToken.KEY); 717 assert(reader.value.get!Key.name == "sNumString"); 718 assert(reader.value.get!Key.value == `10Test`); 719 720 assert(reader.next()); 721 assert(reader.type == INIToken.KEY); 722 assert(reader.value.get!Key.name == "QuotedNum"); 723 assert(reader.value.get!Key.value == `10`); 724 725 assert(reader.next()); 726 assert(reader.type == INIToken.KEY); 727 assert(reader.value.get!Key.name == "QuotedFloat"); 728 assert(reader.value.get!Key.value == `10.1`); 729 730 assert(reader.next()); 731 assert(reader.type == INIToken.KEY); 732 assert(reader.value.get!Key.name == "Num"); 733 assert(reader.value.get!Key.value == "10"); 734 735 assert(reader.next()); 736 assert(reader.type == INIToken.KEY); 737 assert(reader.value.get!Key.name == "Float"); 738 assert(reader.value.get!Key.value == "10.1"); 739 740 assert(!reader.next()); 741 } 742 743 unittest { 744 string source = ` 745 [ Debug ] 746 sNumString=10Test 747 QuotedNum="10" 748 QuotedFloat="10.1" 749 Num=10 750 Float=10.1 751 `; 752 753 auto reader = StrictINIReader(source); 754 alias Key = reader.KeyType; 755 756 assert(reader.next()); 757 assert(reader.type == INIToken.SECTION); 758 assert(reader.value.get!string == " Debug "); 759 760 assert(reader.next()); 761 assert(reader.type == INIToken.KEY); 762 assert(reader.value.get!Key.name == "sNumString"); 763 assert(reader.value.get!Key.value == `10Test`); 764 765 assert(reader.next()); 766 assert(reader.type == INIToken.KEY); 767 assert(reader.value.get!Key.name == "QuotedNum"); 768 assert(reader.value.get!Key.value == `10`); 769 770 assert(reader.next()); 771 assert(reader.type == INIToken.KEY); 772 assert(reader.value.get!Key.name == "QuotedFloat"); 773 assert(reader.value.get!Key.value == `10.1`); 774 775 assert(reader.next()); 776 assert(reader.type == INIToken.KEY); 777 assert(reader.value.get!Key.name == "Num"); 778 assert(reader.value.get!Key.value == `10`); 779 780 assert(reader.next()); 781 assert(reader.type == INIToken.KEY); 782 assert(reader.value.get!Key.name == "Float"); 783 assert(reader.value.get!Key.value == `10.1`); 784 785 assert(!reader.next()); 786 }