1 /**
2  * Implements INI reader.
3  *
4  * `INIReader` is fairly low-level, configurable reader for reading INI data,
5  * which you can use to build your own object-model.
6  *
7  * High level interface is available in `dini.parser`.
8  *
9  *
10  * Unless you need to change `INIReader` behaviour, you should use one of provided
11  * preconfigured readers:
12  *
13  *  - `StrictINIReader`
14  *
15  *     Lower compatibility, may be bit faster.
16  *
17  *
18  *  - `UniversalINIReader`
19  *
20  *     Higher compatibility, may be slighly slower.
21  */
22 module dini.reader;
23 
24 import std.algorithm  : countUntil, canFind, map;
25 import std.array 	  : array;
26 import std.functional : unaryFun;
27 import std..string 	  : representation, assumeUTF, strip,
28 	stripLeft, stripRight, split, join, format;
29 import std.range 	  : ElementType, replace;
30 import std.uni 		  : isWhite, isSpace;
31 import std.variant 	  : Algebraic;
32 import dini.utils     : isBoxer, BoxerType, parseEscapeSequences;
33 
34 
35 /**
36  * Represents type of current token used by INIReader.
37  */
38 enum INIToken
39 {
40     BLANK, 	 ///
41     SECTION, ///
42     KEY,	 ///
43     COMMENT	 ///
44 }
45 
46 
47 /**
48  * Represents a block definition.
49  *
50  * Block definitions are used to define new quote and comment sequences
51  * to be accepted by INIReader.
52  *
53  * BlockDefs can be either single line or multiline. To define new single
54  * line block `INIBlockDef.mutliline` must be set to `false` AND `closing`
55  * must be set to newline string(`"\n"`).
56  */
57 struct INIBlockDef
58 {
59 	/**
60 	 * Opening character sequence
61 	 */
62 	string opening;
63 
64 	/**
65 	 * Closing character sequence
66 	 */
67 	string closing;
68 
69 	/**
70 	 * Should newline characters be allowed?
71 	 */
72 	bool multiline;
73 }
74 
75 
76 /**
77  * INIReader behaviour flags.
78  *
79  * These flags can be used to modify INIReader behaviour.
80  */
81 enum INIFlags : uint
82 {
83     /**
84      * Should escape sequences be translated?
85      */
86 	ProcessEscapes 	= 1 << 0,
87 
88 
89     /**
90      * Section names will be trimmed.
91      */
92 	TrimSections	= 1 << 4,
93 
94     /**
95      * Key names will be trimmed.
96      */
97 	TrimKeys 		= 1 << 5,
98 
99     /**
100      * Values will be trimmed.
101      */
102 	TrimValues		= 1 << 6,
103 
104     /**
105      * Section names, keys and values will be trimmed.
106      */
107 	TrimAll			= TrimSections | TrimKeys | TrimValues
108 }
109 
110 
111 /**
112  * Defines INI format.
113  *
114  * This struct defines INI comments and quotes sequences.
115  *
116  * `INIReader` adds no default quotes or comment definitions,
117  * and thus when defining custom format make sure to include default
118  * definitions to increase compatibility.
119  */
120 struct INIFormatDescriptor
121 {
122     /**
123      * List of comment definitions to support.
124      */
125     INIBlockDef[] comments;
126 
127     /**
128      * List of quote definitions to support.
129      */
130     INIBlockDef[] quotes;
131 }
132 
133 
134 /**
135  * Strict INI format.
136  *
137  * This format is used by `MinimalINIReader`.
138  *
139  * This format defines only `;` as comment character and `"` as only quote.
140  * For more universal format consider using `UniversalINIFormat`.
141  */
142 const INIFormatDescriptor StrictINIFormat = INIFormatDescriptor(
143     [INIBlockDef(";", "\n", false)],
144     [INIBlockDef(`"`, `"`, false)]
145 );
146 
147 
148 /**
149  * Universal INI format.
150  *
151  * This format extends `StrictINIFormat` with hash-comments (`#`) and multiline
152  * triple-quotes (`"""`).
153  */
154 const INIFormatDescriptor UniversalINIFormat = INIFormatDescriptor(
155     [INIBlockDef(";", "\n", false), INIBlockDef("#", "\n", false)],
156     [INIBlockDef(`"""`, `"""`, true), INIBlockDef(`"`, `"`, false)]
157 );
158 
159 
160 /**
161  * Thrown when an parsing error occurred.
162  */
163 class INIException : Exception
164 {
165 	this(string msg = null, Throwable next = null) { super(msg, next); }
166 	this(string msg, string file, size_t line, Throwable next = null) {
167 		super(msg, file, line, next);
168 	}
169 }
170 
171 
172 /**
173  * Represents parsed INI key.
174  *
175  * Prefer using `YOUR_READER.KeyType` alias.
176  */
177 struct INIReaderKey(ValueType)
178 {
179     /**
180      * Key name
181      */
182     string name;
183 
184     /**
185      * Key value (may be boxed)
186      */
187     ValueType value;
188 }
189 
190 
191 /**
192  * Splits source into tokens.
193  *
194  * This struct requires token delimeters to be ASCII-characters,
195  * Unicode is not supported **only** for token delimeters.
196  *
197  * Unless you want to modify `INIReader` behaviour prefer using one of available
198  * preconfigured variants:
199  *
200  *  - `StrictINIReader`
201  *  - `UniversalINIReader`
202  *
203  *
204  * `INIReader` expects three template arguments:
205  *
206  *   - `Format`
207  *
208  *      Instance of `INIFormatDescriptor`, defines quote and comment sequences.
209  *
210  *
211  *   - `Flags`
212  *
213  *     `INIReaderFlags` (can be OR-ed)
214  *
215  *
216  *   - `Boxer`
217  *
218  *      Name of a function that takes `(string value, INIReader reader)` and returns a value.
219  *      By default all readers just proxy values, doing nothing, but this can be used to e.g.
220  *      store token values as JSONValue or other Algebraic-like type.
221  *
222  *      `INIReader.BoxType` is always return type of boxer function. So if you passed a boxer that
223  *      returns `SomeAlgebraic` then `typeof(reader.key.value)` is `SomeAlgebraic`.
224  *
225  *
226  * Params:
227  *   Format - `INIFormatDescriptor` to use.
228  *   Flags  - Reader behaviour flags.
229  *   Boxer  - Function name that can optionally box values.
230  *
231  *
232  * Examples:
233  * ---
234  * auto reader = UniversalINIReader("key=value\n");
235  *
236  * while (reader.next) {
237  *    writeln(reader.value);
238  * }
239  * ---
240  */
241 struct INIReader(INIFormatDescriptor Format, ubyte Flags = 0x00, alias Boxer)
242     if (isBoxer!Boxer)
243 {
244     /**
245      * Reader's format descriptor.
246      */
247 	alias CurrentFormat = Format;
248 
249     /**
250      * Reader's flags.
251      */
252 	alias CurrentFlags = Flags;
253 
254     /**
255      * Reader's boxer.
256      */
257 	alias CurrentBoxer = Boxer;
258 
259     /**
260      * Reader's Box type (boxer return type).
261      */
262 	alias BoxType = BoxerType!Boxer;
263 
264 
265     /**
266      * Alias for INIReaderKey!(BoxType).
267      */
268 	alias KeyType = INIReaderKey!BoxType;
269 
270 	/**
271 	 * Type of `value` property.
272 	 */
273 	alias TokenValue = Algebraic!(string, KeyType);
274 
275 
276     /**
277      * INI source bytes.
278      */
279     immutable(ubyte)[] source;
280 
281     /**
282      * INI source offset in bytes.
283      */
284 	size_t sourceOffset;
285 
286 	/**
287 	 * Type of current token.
288 	 */
289     INIToken type;
290 
291     /**
292      * Indicates whenever source has been exhausted.
293      */
294     bool empty;
295 
296     /**
297      * Used only with Key tokens.
298      *
299      * Indicates whenever current value has been quoted.
300      * This information can be used by Boxers to skip boxing of quoted values.
301      */
302 	bool isQuoted;
303 
304 	/**
305 	 * Current token's value.
306 	 */
307 	TokenValue value;
308 
309 
310     /**
311      * Creates new instance of `INIReader` from `source`.
312      *
313      * If passed source does not end with newline it is added (and thus allocates).
314      * To prevent allocation make sure `source` ends with new line.
315      *
316      * Params:
317      *  source - INI source.
318      */
319 	this(string source)
320 	{
321 		// Make source end with newline
322 		if (source[$-1] != '\n')
323 			this.source = (source ~ "\n").representation;
324 		else
325 			this.source = source.representation;
326 	}
327 
328     /**
329      * Returns key token.
330      *
331      * Use this only if you know current token is KEY.
332      */
333     KeyType key() @property {
334         return value.get!KeyType;
335     }
336 
337     /**
338      * Returns section name.
339      *
340      * Use this only if you know current token is SECTION.
341      */
342     string sectionName() @property {
343         return value.get!string;
344     }
345 
346     /**
347      * Reads next token.
348      *
349      * Returns:
350      *  True if more tokens are available, false otherwise.
351      */
352     bool next()
353     {
354 		isQuoted = false;
355         skipWhitespaces();
356 
357 		if (current.length == 0) {
358 			empty = true;
359             return false;
360 		}
361 
362         int pairIndex = -1;
363 		while(source.length - sourceOffset > 0)
364 		{
365             if (findPair!`comments`(pairIndex)) {
366                 readComment(pairIndex);
367                 break;
368             }
369             else if (current[0] == '[') {
370                 readSection();
371                 break;
372             }
373             else if (isWhite(current[0])) {
374                 skipWhitespaces();
375             }
376             else {
377 				readEntry();
378                 break;
379             }
380         }
381 
382         return true;
383     }
384 
385     bool findPair(string fieldName)(out int pairIndex)
386     {
387 		if (source.length - sourceOffset > 0 && sourceOffset > 0 && source[sourceOffset - 1] == '\\') return false;
388 
389 		alias MemberType = typeof(__traits(getMember, Format, fieldName));
390 		foreach (size_t i, ElementType!MemberType pairs; __traits(getMember, Format, fieldName)) {
391 			string opening = pairs.tupleof[0];
392 
393             if (source.length - sourceOffset < opening.length)
394                 continue;
395 
396             if (current[0..opening.length] == opening) {
397                 pairIndex = cast(int)i;
398                 return true;
399             }
400         }
401 
402         return false;
403     }
404 
405     void readSection()
406 	{
407         type = INIToken.SECTION;
408         auto index = current.countUntil(']');
409         if (index == -1)
410 			throw new INIException("Section not closed");
411 
412         value = current[1 .. index].assumeUTF;
413 
414 		static if (Flags & INIFlags.TrimSections)
415 			value = value.get!string.strip;
416 
417         sourceOffset += index + 1;
418     }
419 
420     void readComment(int pairIndex)
421 	{
422         type = INIToken.COMMENT;
423 		INIBlockDef commentDef = Format.comments[pairIndex];
424 		sourceOffset += commentDef.opening.length;
425 
426         auto index = current.countUntil(commentDef.closing);
427         if (index == -1)
428 			throw new INIException("Comment not closed");
429 
430 		value = current[0.. index].assumeUTF;
431 
432 		if (commentDef.multiline == false && value.get!string.canFind('\n'))
433 			throw new INIException("Comment not closed (multiline)");
434 
435 		sourceOffset += index + commentDef.closing.length;
436     }
437 
438     void readEntry()
439 	{
440         type = INIToken.KEY;
441 		KeyType key;
442 
443 		readKey(key);
444 		if (current[0] == '=') {
445 			sourceOffset += 1;
446 			key.value = readValue();
447 		}
448 
449         value = key;
450     }
451 
452 	void readKey(out KeyType key)
453 	{
454 		if (tryReadQuote(key.name)) {
455 			isQuoted = true;
456 			return;
457 		}
458 
459 		auto newLineOffset = current.countUntil('\n');
460 		if (newLineOffset > 0) { // read untill newline/some assign sequence
461 			auto offset = current[0..newLineOffset].countUntil('=');
462 
463 			if (offset == -1)
464 				key.name = current[0 .. newLineOffset].assumeUTF;
465 			else
466 				key.name = current[0 .. offset].assumeUTF;
467 
468 			sourceOffset += key.name.length;
469 			key.name = key.name.stripRight;
470 
471 			static if (Flags & INIFlags.TrimKeys)
472 				key.name = key.name.stripLeft;
473 		}
474 	}
475 
476 
477 	BoxType readValue()
478 	{
479         auto firstNonSpaceIndex = current.countUntil!(a => !isSpace(a));
480         if (firstNonSpaceIndex > 0)
481 			sourceOffset += firstNonSpaceIndex;
482 
483 		string result = "";
484 		auto indexBeforeQuotes = sourceOffset;
485 
486 		isQuoted = tryReadQuote(result);
487         auto newlineOffset = current.countUntil('\n');
488 		string remains = current[0..newlineOffset].assumeUTF;
489 
490 		if (isQuoted && newlineOffset > 0) {
491 			sourceOffset = indexBeforeQuotes;
492 			isQuoted = false;
493 		}
494 
495 		if (!isQuoted) {
496 			bool escaped = false;
497 			int[] newlineOffsets = [];
498 			auto localOffset = 0;
499 			for (; source.length - sourceOffset > 0; ++localOffset) {
500 				if (source[sourceOffset + localOffset] == '\\') {
501 					escaped = !escaped;
502 					continue;
503 				}
504 
505 				else if(escaped && source[sourceOffset + localOffset] == '\r')
506 					continue;
507 
508 				else if(escaped && source[sourceOffset + localOffset] == '\n')
509 					newlineOffsets ~= localOffset;
510 
511 				else if (!escaped && source[sourceOffset + localOffset] == '\n')
512 					break;
513 
514 				escaped = false;
515 			}
516 
517 			result = current[0..localOffset].assumeUTF.split("\n").map!((line) {
518 				line = line.stripRight;
519 				if (line[$-1] == '\\') return line[0..$-1].stripLeft;
520 				return line.stripLeft;
521 			}).array.join();
522 			sourceOffset += localOffset;
523 		}
524 
525 		static if (Flags & INIFlags.TrimValues)
526 			if (!isQuoted)
527 				result = result.strip;
528 
529     	static if (Flags & INIFlags.ProcessEscapes)
530 			result = parseEscapeSequences(result);
531 
532         return Boxer(result);
533     }
534 
535 	bool tryReadQuote(out string result)
536 	{
537 		int pairIndex;
538 
539 		if (findPair!`quotes`(pairIndex)) {
540 			auto quote = Format.quotes[pairIndex];
541 			sourceOffset += quote.opening.length;
542 
543 			auto closeIndex = current.countUntil(quote.closing);
544 			if (closeIndex == -1)
545 				throw new INIException("Unterminated string literal");
546 
547 			result = current[0..closeIndex].assumeUTF;
548 			sourceOffset += result.length + quote.closing.length;
549 
550 			if (result.canFind('\n') && quote.multiline == false)
551 				throw new INIException("Unterminated string literal which spans multiple lines (invalid quotes used?)");
552 
553 			return true;
554 		}
555 
556 		return false;
557 	}
558 
559     void skipWhitespaces()
560 	{
561 		while (current.length && isWhite(current[0]))
562 			sourceOffset += 1;
563     }
564 
565 	private immutable(ubyte)[] current() @property {
566 		return source[sourceOffset..$];
567 	}
568 }
569 
570 
571 /**
572  * Universal `INIReader` variant.
573  *
574  * Use this variant if you want to have more compatible parser.
575  *
576  * Specifics:
577  *   - Uses `UniversalINIFormat`.
578  *   - Trims section names, keys and values.
579  *   - Processes escapes in values (e.g. `\n`).
580  */
581 alias UniversalINIReader = INIReader!(UniversalINIFormat, INIFlags.TrimAll | INIFlags.ProcessEscapes, (string a) => a);
582 
583 
584 /**
585  * Strict `INIReader` variant.
586  *
587  * Use this variant if you want to have more strict (and bit faster) parser.
588  *
589  * Specifics:
590  *   - Uses `StrictINIFormat`
591  *   - Only Keys are trimmed.
592  *   - No escape sequences are resolved.
593  */
594 alias StrictINIReader = INIReader!(StrictINIFormat, INIFlags.TrimKeys, (string a) => a);
595 
596 
597 unittest {
598     auto source = `
599 ; comment
600 
601 multiline = """
602   this is
603 """
604 
605 numeric=-100000
606 numeric2=09843
607 [section (name)]
608 @=bar
609 `;
610 
611 
612 	auto reader = UniversalINIReader(source);
613 	alias Key = reader.KeyType;
614 
615 	assert(reader.next());
616     assert(reader.type == INIToken.COMMENT);
617 	assert(reader.sectionName == " comment");
618 
619 	assert(reader.next());
620     assert(reader.type == INIToken.KEY);
621     assert(reader.key.name == "multiline");
622     assert(reader.key.value == "\n  this is\n");
623 
624 	assert(reader.next());
625 	assert(reader.type == INIToken.KEY);
626 	assert(reader.value.get!Key.name == "numeric");
627 	assert(reader.value.get!Key.value == "-100000");
628 
629 	assert(reader.next());
630 	assert(reader.type == INIToken.KEY);
631 	assert(reader.value.get!Key.name == "numeric2");
632 	assert(reader.value.get!Key.value == "09843");
633 
634 	assert(reader.next());
635     assert(reader.type == INIToken.SECTION);
636 	assert(reader.value.get!string == "section (name)");
637 
638 	assert(reader.next());
639 	assert(reader.type == INIToken.KEY);
640 	assert(reader.value.get!Key.name == "@");
641 	assert(reader.value.get!Key.value == `bar`);
642 
643 	assert(!reader.next());
644 }
645 
646 
647 unittest {
648 	auto source = `
649 ####### TEST ########
650 
651 numeric value=15
652 ThisIsMultilineValue=thisis\
653   verylong # comment
654 "Floating=Value"=1.51
655 
656 [] # comment works
657 JustAKey
658 `;
659 
660 	auto reader = UniversalINIReader(source);
661 	alias Key = reader.KeyType;
662 
663 	assert(reader.next());
664 	assert(reader.type == INIToken.COMMENT);
665 	assert(reader.value.get!string == "###### TEST ########");
666 
667 	assert(reader.next());
668 	assert(reader.type == INIToken.KEY);
669 	assert(reader.value.get!Key.name == "numeric value");
670 	assert(reader.value.get!Key.value == `15`);
671 
672 	assert(reader.next());
673 	assert(reader.type == INIToken.KEY);
674 	assert(reader.value.get!Key.name == "ThisIsMultilineValue");
675 	assert(reader.value.get!Key.value == `thisisverylong # comment`);
676 
677 	assert(reader.next());
678 	assert(reader.type == INIToken.KEY);
679 	assert(reader.value.get!Key.name == "Floating=Value");
680 	assert(reader.value.get!Key.value == `1.51`);
681 
682 	assert(reader.next());
683 	assert(reader.type == INIToken.SECTION);
684 	assert(reader.value.get!string == "");
685 
686 	assert(reader.next());
687 	assert(reader.type == INIToken.COMMENT);
688 	assert(reader.value.get!string == " comment works");
689 
690 	assert(reader.next());
691 	assert(reader.type == INIToken.KEY);
692 	assert(reader.value.get!Key.name == "JustAKey");
693 	assert(reader.value.get!Key.value == null);
694 
695 	assert(!reader.next());
696 }
697 
698 unittest {
699 	string source = `
700 	[ Debug ]
701 sNumString=10Test
702 QuotedNum="10"
703 QuotedFloat="10.1"
704 Num=10
705 Float=10.1
706 `;
707 
708 	auto reader = UniversalINIReader(source);
709 	alias Key = reader.KeyType;
710 
711 	assert(reader.next());
712 	assert(reader.type == INIToken.SECTION);
713 	assert(reader.value.get!string == "Debug");
714 
715 	assert(reader.next());
716 	assert(reader.type == INIToken.KEY);
717 	assert(reader.value.get!Key.name == "sNumString");
718 	assert(reader.value.get!Key.value == `10Test`);
719 
720 	assert(reader.next());
721 	assert(reader.type == INIToken.KEY);
722 	assert(reader.value.get!Key.name == "QuotedNum");
723 	assert(reader.value.get!Key.value == `10`);
724 
725 	assert(reader.next());
726 	assert(reader.type == INIToken.KEY);
727 	assert(reader.value.get!Key.name == "QuotedFloat");
728 	assert(reader.value.get!Key.value == `10.1`);
729 
730 	assert(reader.next());
731 	assert(reader.type == INIToken.KEY);
732 	assert(reader.value.get!Key.name == "Num");
733 	assert(reader.value.get!Key.value == "10");
734 
735 	assert(reader.next());
736 	assert(reader.type == INIToken.KEY);
737 	assert(reader.value.get!Key.name == "Float");
738 	assert(reader.value.get!Key.value == "10.1");
739 
740 	assert(!reader.next());
741 }
742 
743 unittest {
744 	string source = `
745 	[ Debug ]
746 sNumString=10Test
747 QuotedNum="10"
748 QuotedFloat="10.1"
749 Num=10
750 Float=10.1
751 `;
752 
753 	auto reader = StrictINIReader(source);
754 	alias Key = reader.KeyType;
755 
756 	assert(reader.next());
757 	assert(reader.type == INIToken.SECTION);
758 	assert(reader.value.get!string == " Debug ");
759 
760 	assert(reader.next());
761 	assert(reader.type == INIToken.KEY);
762 	assert(reader.value.get!Key.name == "sNumString");
763 	assert(reader.value.get!Key.value == `10Test`);
764 
765 	assert(reader.next());
766 	assert(reader.type == INIToken.KEY);
767 	assert(reader.value.get!Key.name == "QuotedNum");
768 	assert(reader.value.get!Key.value == `10`);
769 
770 	assert(reader.next());
771 	assert(reader.type == INIToken.KEY);
772 	assert(reader.value.get!Key.name == "QuotedFloat");
773 	assert(reader.value.get!Key.value == `10.1`);
774 
775 	assert(reader.next());
776 	assert(reader.type == INIToken.KEY);
777 	assert(reader.value.get!Key.name == "Num");
778 	assert(reader.value.get!Key.value == `10`);
779 
780 	assert(reader.next());
781 	assert(reader.type == INIToken.KEY);
782 	assert(reader.value.get!Key.name == "Float");
783 	assert(reader.value.get!Key.value == `10.1`);
784 
785 	assert(!reader.next());
786 }