001/*
002 * MIT License
003 * 
004 * Copyright (c) 2016 Michael Angstadt
005 * 
006 * Permission is hereby granted, free of charge, to any person obtaining a copy
007 * of this software and associated documentation files (the "Software"), to deal
008 * in the Software without restriction, including without limitation the rights
009 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
010 * copies of the Software, and to permit persons to whom the Software is
011 * furnished to do so, subject to the following conditions:
012 * 
013 * The above copyright notice and this permission notice shall be included in
014 * all copies or substantial portions of the Software.
015 * 
016 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
017 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
018 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
019 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
020 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
021 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
022 * SOFTWARE.
023 */
024
025package com.github.mangstadt.vinnie.io;
026
027import static com.github.mangstadt.vinnie.Utils.ltrim;
028import static com.github.mangstadt.vinnie.Utils.rtrim;
029
030import java.io.Closeable;
031import java.io.IOException;
032import java.io.InputStreamReader;
033import java.io.Reader;
034import java.nio.charset.Charset;
035import java.nio.charset.IllegalCharsetNameException;
036import java.nio.charset.UnsupportedCharsetException;
037import java.util.ArrayList;
038import java.util.List;
039
040import com.github.mangstadt.vinnie.SyntaxStyle;
041import com.github.mangstadt.vinnie.VObjectProperty;
042import com.github.mangstadt.vinnie.codec.DecoderException;
043import com.github.mangstadt.vinnie.codec.QuotedPrintableCodec;
044
045/**
046 * <p>
047 * Parses a vobject data stream.
048 * </p>
049 * <p>
050 * <b>Example:</b>
051 * </p>
052 * 
053 * <pre class="brush:java">
054 * Reader reader = ...
055 * SyntaxRules rules = SyntaxRules.vcard();
056 * VObjectReader vobjectReader = new VObjectReader(reader, rules);
057 * vobjectReader.parse(new VObjectDataListener(){ ... });
058 * vobjectReader.close();
059 * </pre>
060 * 
061 * <p>
062 * <b>Quoted-printable Encoding</b>
063 * </p>
064 * <p>
065 * Property values encoded in quoted-printable encoding are automatically
066 * decoded. A property value is considered to be encoded in quoted-printable
067 * encoding if it has a "ENCODING=QUOTED-PRINTABLE" parameter. Even though the
068 * property value is automatically decoded, the ENCODING and CHARSET parameters
069 * are not removed from the parsed property object so that the caller can
070 * determine its original encoding.
071 * </p>
072 * 
073 * <pre class="brush:java">
074 * Reader reader = new StringReader("NOTE;ENCODING=QUOTED-PRINTABLE;CHARSET=UTF-8:=C2=A1Hola, mundo!");
075 * VObjectReader vobjectReader = new VObjectReader(reader, ...);
076 * vobjectReader.parse(new VObjectDataAdapter() {
077 *   public void onProperty(VObjectProperty property, Context context) {
078 *     assertEquals("¡Hola, mundo!", property.getValue());
079 *     assertEquals("QUOTED-PRINTABLE", property.getParameters().first("ENCODING"));
080 *     assertEquals("UTF-8", property.getParameters().first("CHARSET"));
081 *   }
082 * });
083 * vobjectReader.close();
084 * </pre>
085 * 
086 * <p>
087 * If a CHARSET parameter is not present in the quoted-printable property, then
088 * the character set of the input stream will be used to decode the value. If
089 * this cannot be determined, then the local JVM's default character set will be
090 * used. However, this behavior can be overridden by supplying your own
091 * character set to use in the event that a CHARSET parameter is not present.
092 * </p>
093 * 
094 * <pre class="brush:java">
095 * Reader reader = new StringReader("NOTE;ENCODING=QUOTED-PRINTABLE:=A1Hola, mundo!");
096 * VObjectReader vobjectReader = new VObjectReader(reader, ...);
097 * vobjectReader.setDefaultQuotedPrintableCharset(Charset.forName("Windows-1252"));
098 * vobjectReader.parse(new VObjectDataAdapter() {
099 *   public void onProperty(VObjectProperty property, Context context) {
100 *     assertEquals("¡Hola, mundo!", property.getValue());
101 *     assertEquals("QUOTED-PRINTABLE", property.getParameters().first("ENCODING"));
102 *     assertNull(property.getParameters().first("CHARSET"));
103 *   }
104 * });
105 * vobjectReader.close();
106 * </pre>
107 * <p>
108 * Nameless ENCODING parameters are also recognized for backwards compatibility
109 * with old-style syntax.
110 * </p>
111 * 
112 * <pre>
113 * NOTE;QUOTED-PRINTABLE;CHARSET=UTF-8:=C2=A1Hola, mundo!
114 * </pre>
115 * 
116 * <p>
117 * If there is an error decoding a quoted-printable value, then a warning will
118 * be emitted and the value will be treated as plain-text.
119 * </p>
120 * 
121 * <pre class="brush:java">
122 * Reader reader = new StringReader("NOTE;ENCODING=QUOTED-PRINTABLE;CHARSET=UTF-8:=ZZ invalid");
123 * VObjectReader vobjectReader = new VObjectReader(reader, ...);
124 * vobjectReader.parse(new VObjectDataAdapter() {
125 *   public void onProperty(VObjectProperty property, Context context) {
126 *     assertEquals("=ZZ invalid", property.getValue());
127 *   }
128 *   public void onWarning(Warning warning, VObjectProperty property, Exception thrown, Context context) {
129 *     assertEquals(Warning.QUOTED_PRINTABLE_ERROR, warning);
130 *   }
131 * });
132 * vobjectReader.close();
133 * </pre>
134 * 
135 * <p>
136 * <b>Circumflex Accent Encoding</b>
137 * </p>
138 * <p>
139 * Circumflex accent encoding allows newlines and double quote characters to be
140 * included inside of parameter values. Parameter values that are encoded using
141 * this encoding scheme are automatically decoded. Note that this encoding
142 * mechanism is only supported by new-style syntax.
143 * </p>
144 * 
145 * <pre class="brush:java">
146 * Reader reader = new StringReader("NOTE;X-AUTHOR=Fox ^'Spooky^' Mulder:The truth is out there.");
147 * VObjectReader vobjectReader = new VObjectReader(reader, new SyntaxRules(SyntaxStyle.NEW));
148 * vobjectReader.parse(new VObjectDataAdapter() {
149 *   public void onProperty(VObjectProperty property, Context context) {
150 *     assertEquals("Fox \"Spooky\" Mulder", property.getParameters().first("X-AUTHOR"));
151 *   }
152 * });
153 * vobjectReader.close();
154 * </pre>
155 * 
156 * <p>
157 * In the rare event that your vobject data has raw "^" characters in its
158 * parameter values, and it does not use this encoding scheme, circumflex accent
159 * decoding can be turned off.
160 * </p>
161 * 
162 * <pre class="brush:java">
163 * Reader reader = new StringReader("NOTE;X-EMOTE=^_^:Good morning!");
164 * VObjectReader vobjectReader = new VObjectReader(reader, new SyntaxRules(SyntaxStyle.NEW));
165 * vobjectReader.setCaretDecodingEnabled(false);
166 * vobjectReader.parse(new VObjectDataAdapter() {
167 *   public void onProperty(VObjectProperty property, Context context) {
168 *     assertEquals("^_^", property.getParameters().first("X-EMOTE"));
169 *   }
170 * });
171 * vobjectReader.close();
172 * </pre>
173 * 
174 * <p>
175 * <b>Line Folding</b>
176 * </p>
177 * <p>
178 * Folded lines are automatically unfolded when read.
179 * </p>
180 * 
181 * <pre class="brush:java">
182 * String string = 
183 * "NOTE:Lorem ipsum dolor sit amet\\, consectetur adipiscing elit. Vestibulum u\r\n" +
184 * " ltricies tempor orci ac dignissim.";
185 * Reader reader = new StringReader(string);
186 * VObjectReader vobjectReader = new VObjectReader(reader, ...);
187 * vobjectReader.parse(new VObjectDataAdapter() {
188 *   public void onProperty(VObjectProperty property, Context context) {
189 *     assertEquals("Lorem ipsum dolor sit amet\\, consectetur adipiscing elit. Vestibulum ultricies tempor orci ac dignissim.", property.getValue());
190 *   }
191 * });
192 * vobjectReader.close();
193 * </pre>
194 * 
195 * @author Michael Angstadt
196 */
197public class VObjectReader implements Closeable {
198        /**
199         * The local computer's newline character sequence.
200         */
201        private final String NEWLINE = System.getProperty("line.separator");
202
203        private final Reader reader;
204        private final SyntaxRules syntaxRules;
205
206        private boolean caretDecodingEnabled = true;
207        private Charset defaultQuotedPrintableCharset;
208
209        private final ComponentStack stack;
210
211        /**
212         * String buffer used when tokenizing a property.
213         */
214        private final Buffer buffer = new Buffer();
215
216        /**
217         * Keeps track of the current status of the parser.
218         */
219        private final Context context;
220
221        /**
222         * The character that was read when it was determined that the current
223         * property being parsed has ended.
224         */
225        private int leftOver = -1;
226
227        /**
228         * The current line number the parser is on.
229         */
230        private int lineNumber = 1;
231
232        /**
233         * Has the entire stream been consumed?
234         */
235        private boolean eos = false;
236
237        /**
238         * Creates a new vobject reader.
239         * @param reader the input stream
240         * @param syntaxRules defines the rules that are used to determine what kind
241         * of syntax the data is in
242         */
243        public VObjectReader(Reader reader, SyntaxRules syntaxRules) {
244                this.reader = reader;
245                this.syntaxRules = syntaxRules;
246                stack = new ComponentStack(syntaxRules.getDefaultSyntaxStyle());
247                context = new Context(stack.names);
248
249                if (reader instanceof InputStreamReader) {
250                        InputStreamReader isr = (InputStreamReader) reader;
251                        defaultQuotedPrintableCharset = Charset.forName(isr.getEncoding());
252                } else {
253                        defaultQuotedPrintableCharset = Charset.defaultCharset();
254                }
255        }
256
257        /**
258         * <p>
259         * Gets the default character set to use when decoding quoted-printable
260         * values of properties that lack CHARSET parameters, or of properties whose
261         * CHARSET parameters are not recognized by the local JVM.
262         * </p>
263         * <p>
264         * By default, this is set to the character set of the {@link Reader} object
265         * that this class was constructed with. If the character set of the
266         * {@link Reader} object could not be determined, then it will be set to the
267         * local JVM's default character set.
268         * </p>
269         * @return the default quoted-printable character set
270         */
271        public Charset getDefaultQuotedPrintableCharset() {
272                return defaultQuotedPrintableCharset;
273        }
274
275        /**
276         * <p>
277         * Sets the character set to use when decoding quoted-printable values of
278         * properties that lack CHARSET parameters, or of properties whose CHARSET
279         * parameters are not recognized by the local JVM.
280         * </p>
281         * <p>
282         * By default, this is set to the character set of the {@link Reader} object
283         * that this class was constructed with. If the character set of the
284         * {@link Reader} object could not be determined, then it will be set to the
285         * local JVM's default character set.
286         * </p>
287         * @param charset the default quoted-printable character set (cannot be
288         * null)
289         */
290        public void setDefaultQuotedPrintableCharset(Charset charset) {
291                defaultQuotedPrintableCharset = charset;
292        }
293
294        /**
295         * <p>
296         * Gets whether the reader will decode parameter values that use circumflex
297         * accent encoding (enabled by default). This escaping mechanism allows
298         * newlines and double quotes to be included in parameter values. It is only
299         * supported by new style syntax.
300         * </p>
301         * 
302         * <table class="simpleTable">
303         * <caption>Characters encoded by circumflex accent encoding</caption>
304         * <tr>
305         * <th>Raw Character</th>
306         * <th>Encoded Character</th>
307         * </tr>
308         * <tr>
309         * <td>{@code "}</td>
310         * <td>{@code ^'}</td>
311         * </tr>
312         * <tr>
313         * <td><i>newline</i></td>
314         * <td>{@code ^n}</td>
315         * </tr>
316         * <tr>
317         * <td>{@code ^}</td>
318         * <td>{@code ^^}</td>
319         * </tr>
320         * </table>
321         * 
322         * <p>
323         * Example:
324         * </p>
325         * 
326         * <pre>
327         * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPittsburgh, PA 15212":40.446816;80.00566
328         * </pre>
329         * 
330         * @return true if circumflex accent decoding is enabled, false if not
331         * @see <a href="http://tools.ietf.org/html/rfc6868">RFC 6868</a>
332         */
333        public boolean isCaretDecodingEnabled() {
334                return caretDecodingEnabled;
335        }
336
337        /**
338         * <p>
339         * Sets whether the reader will decode parameter values that use circumflex
340         * accent encoding (enabled by default). This escaping mechanism allows
341         * newlines and double quotes to be included in parameter values. It is only
342         * supported by new style syntax.
343         * </p>
344         * 
345         * <table class="simpleTable">
346         * <caption>Characters encoded by circumflex accent encoding</caption>
347         * <tr>
348         * <th>Raw Character</th>
349         * <th>Encoded Character</th>
350         * </tr>
351         * <tr>
352         * <td>{@code "}</td>
353         * <td>{@code ^'}</td>
354         * </tr>
355         * <tr>
356         * <td><i>newline</i></td>
357         * <td>{@code ^n}</td>
358         * </tr>
359         * <tr>
360         * <td>{@code ^}</td>
361         * <td>{@code ^^}</td>
362         * </tr>
363         * </table>
364         * 
365         * <p>
366         * Example:
367         * </p>
368         * 
369         * <pre>
370         * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPittsburgh, PA 15212":geo:40.446816,-80.00566
371         * </pre>
372         * 
373         * @param enable true to use circumflex accent decoding, false not to
374         * @see <a href="http://tools.ietf.org/html/rfc6868">RFC 6868</a>
375         */
376        public void setCaretDecodingEnabled(boolean enable) {
377                caretDecodingEnabled = enable;
378        }
379
380        /**
381         * <p>
382         * Starts or continues to parse the data off the input stream.
383         * </p>
384         * <p>
385         * This method blocks until one of the following events happen:
386         * </p>
387         * <ol>
388         * <li>The end of the input stream has been reached or</li>
389         * <li>One of the methods in the given {@link VObjectDataListener}
390         * implementation has invoked {@link Context#stop()}.</li>
391         * </ol>
392         * @param listener callback interface for handling data as it is read off
393         * the input stream
394         * @throws IOException if there's a problem reading from the input stream
395         */
396        public void parse(VObjectDataListener listener) throws IOException {
397                context.stop = false;
398
399                while (!eos && !context.stop) {
400                        context.lineNumber = lineNumber;
401                        buffer.clear();
402                        context.unfoldedLine.clear();
403
404                        VObjectProperty property = parseProperty(listener);
405
406                        if (context.unfoldedLine.size() == 0) {
407                                //input stream was empty
408                                return;
409                        }
410
411                        if (property == null) {
412                                listener.onWarning(Warning.MALFORMED_LINE, null, null, context);
413                                continue;
414                        }
415
416                        /*
417                         * Note: Property names are trimmed when checking for BEGIN and END
418                         * properties because old style syntax allows there to be whitespace
419                         * around the colon character for these two properties. Component
420                         * names are trimmed for the same reason.
421                         */
422
423                        if ("BEGIN".equalsIgnoreCase(property.getName().trim())) {
424                                String componentName = property.getValue().trim().toUpperCase();
425                                if (componentName.length() == 0) {
426                                        listener.onWarning(Warning.EMPTY_BEGIN, null, null, context);
427                                        continue;
428                                }
429
430                                listener.onComponentBegin(componentName, context);
431
432                                stack.push(componentName);
433                                continue;
434                        }
435
436                        if ("END".equalsIgnoreCase(property.getName().trim())) {
437                                String componentName = property.getValue().trim().toUpperCase();
438                                if (componentName.length() == 0) {
439                                        listener.onWarning(Warning.EMPTY_END, null, null, context);
440                                        continue;
441                                }
442
443                                //find the component that this END property matches up with
444                                int popCount = stack.popCount(componentName);
445                                if (popCount == 0) {
446                                        //END property does not match up with any BEGIN properties, so ignore
447                                        listener.onWarning(Warning.UNMATCHED_END, null, null, context);
448                                        continue;
449                                }
450
451                                while (popCount > 0) {
452                                        String poppedName = stack.pop();
453                                        listener.onComponentEnd(poppedName, context);
454                                        popCount--;
455                                }
456                                continue;
457                        }
458
459                        if ("VERSION".equalsIgnoreCase(property.getName())) {
460                                String parentComponent = stack.peekName();
461                                if (syntaxRules.hasSyntaxRules(parentComponent)) {
462                                        SyntaxStyle style = syntaxRules.getSyntaxStyle(parentComponent, property.getValue());
463                                        if (style == null) {
464                                                listener.onWarning(Warning.UNKNOWN_VERSION, property, null, context);
465                                        } else {
466                                                listener.onVersion(property.getValue(), context);
467                                                stack.updateSyntax(style);
468                                                continue;
469                                        }
470                                }
471                        }
472
473                        listener.onProperty(property, context);
474                }
475        }
476
477        /**
478         * Parses the next property off the input stream.
479         * @param listener the data listener (for reporting warnings)
480         * @return the parsed property or null if the property could not be parsed
481         * @throws IOException if there was a problem reading from the input stream
482         */
483        private VObjectProperty parseProperty(VObjectDataListener listener) throws IOException {
484                VObjectProperty property = new VObjectProperty();
485
486                /*
487                 * The syntax style to assume the data is in.
488                 */
489                SyntaxStyle syntax = stack.peekSyntax();
490
491                /*
492                 * The name of the parameter we're currently inside of.
493                 */
494                String curParamName = null;
495
496                /*
497                 * The character that was used to escape the current character (for
498                 * parameter values).
499                 */
500                char paramValueEscapeChar = 0;
501
502                /*
503                 * Are we currently inside a parameter value that is surrounded with
504                 * double-quotes?
505                 */
506                boolean inQuotes = false;
507
508                /*
509                 * Are we currently inside the property value?
510                 */
511                boolean inValue = false;
512
513                /*
514                 * Does the line use quoted-printable encoding, and does it end all of
515                 * its folded lines with a "=" character?
516                 */
517                boolean foldedQuotedPrintableLine = false;
518
519                /*
520                 * Are we currently inside the whitespace that prepends a folded line?
521                 */
522                boolean inFoldedLineWhitespace = false;
523
524                /*
525                 * The current character.
526                 */
527                char ch = 0;
528
529                /*
530                 * The previous character.
531                 */
532                char prevChar;
533
534                while (true) {
535                        prevChar = ch;
536
537                        int read = nextChar();
538                        if (read < 0) {
539                                //end of stream
540                                eos = true;
541                                break;
542                        }
543
544                        ch = (char) read;
545
546                        if (prevChar == '\r' && ch == '\n') {
547                                /*
548                                 * The newline was already processed when the "\r" character was
549                                 * encountered, so ignore the accompanying "\n" character.
550                                 */
551                                continue;
552                        }
553
554                        if (isNewline(ch)) {
555                                foldedQuotedPrintableLine = (inValue && prevChar == '=' && property.getParameters().isQuotedPrintable());
556                                if (foldedQuotedPrintableLine) {
557                                        /*
558                                         * Remove the "=" character that sometimes appears at the
559                                         * end of quoted-printable lines that are followed by a
560                                         * folded line.
561                                         */
562                                        buffer.chop();
563                                        context.unfoldedLine.chop();
564                                }
565
566                                //keep track of the current line number
567                                lineNumber++;
568
569                                continue;
570                        }
571
572                        if (isNewline(prevChar)) {
573                                if (isWhitespace(ch)) {
574                                        /*
575                                         * This line is a continuation of the previous line (the
576                                         * line is folded).
577                                         */
578                                        inFoldedLineWhitespace = true;
579                                        continue;
580                                }
581
582                                if (foldedQuotedPrintableLine) {
583                                        /*
584                                         * The property's parameters indicate that the property
585                                         * value is quoted-printable. And the previous line ended
586                                         * with an equals sign. This means that folding whitespace
587                                         * may not be prepended to folded lines like it should.
588                                         */
589                                } else {
590                                        /*
591                                         * We're reached the end of the property.
592                                         */
593                                        leftOver = ch;
594                                        break;
595                                }
596                        }
597
598                        if (inFoldedLineWhitespace) {
599                                if (isWhitespace(ch) && syntax == SyntaxStyle.OLD) {
600                                        /*
601                                         * 2.1 allows multiple whitespace characters to be used for
602                                         * folding (section 2.1.3).
603                                         */
604                                        continue;
605                                }
606                                inFoldedLineWhitespace = false;
607                        }
608
609                        context.unfoldedLine.append(ch);
610
611                        if (inValue) {
612                                buffer.append(ch);
613                                continue;
614                        }
615
616                        //decode escaped parameter value character
617                        if (paramValueEscapeChar != 0) {
618                                char escapeChar = paramValueEscapeChar;
619                                paramValueEscapeChar = 0;
620
621                                switch (escapeChar) {
622                                case '\\':
623                                        switch (ch) {
624                                        case '\\':
625                                                buffer.append(ch);
626                                                continue;
627                                        case ';':
628                                                /*
629                                                 * Semicolons can only be escaped in old style parameter
630                                                 * values. If a new style parameter value has
631                                                 * semicolons, the value should be surrounded in double
632                                                 * quotes.
633                                                 */
634                                                buffer.append(ch);
635                                                continue;
636                                        }
637                                        break;
638                                case '^':
639                                        switch (ch) {
640                                        case '^':
641                                                buffer.append(ch);
642                                                continue;
643                                        case 'n':
644                                                buffer.append(NEWLINE);
645                                                continue;
646                                        case '\'':
647                                                buffer.append('"');
648                                                continue;
649                                        }
650                                        break;
651                                }
652
653                                /*
654                                 * Treat the escape character as a normal character because it's
655                                 * not a valid escape sequence.
656                                 */
657                                buffer.append(escapeChar).append(ch);
658                                continue;
659                        }
660
661                        //check for a parameter value escape character
662                        if (curParamName != null) {
663                                switch (syntax) {
664                                case OLD:
665                                        if (ch == '\\') {
666                                                paramValueEscapeChar = ch;
667                                                continue;
668                                        }
669                                        break;
670                                case NEW:
671                                        if (ch == '^' && caretDecodingEnabled) {
672                                                paramValueEscapeChar = ch;
673                                                continue;
674                                        }
675                                        break;
676                                }
677                        }
678
679                        //set the group
680                        if (ch == '.' && property.getGroup() == null && property.getName() == null) {
681                                property.setGroup(buffer.getAndClear());
682                                continue;
683                        }
684
685                        if ((ch == ';' || ch == ':') && !inQuotes) {
686                                if (property.getName() == null) {
687                                        //set the property name
688                                        property.setName(buffer.getAndClear());
689                                } else {
690                                        //set a parameter value
691                                        String paramValue = buffer.getAndClear();
692                                        if (syntax == SyntaxStyle.OLD) {
693                                                //old style allows whitespace to surround the "=", so remove it
694                                                paramValue = ltrim(paramValue);
695                                        }
696                                        property.getParameters().put(curParamName, paramValue);
697                                        curParamName = null;
698                                }
699
700                                if (ch == ':') {
701                                        //the rest of the line is the property value
702                                        inValue = true;
703                                }
704                                continue;
705                        }
706
707                        if (property.getName() != null) {
708                                //it's a multi-valued parameter
709                                if (ch == ',' && curParamName != null && !inQuotes && syntax != SyntaxStyle.OLD) {
710                                        String paramValue = buffer.getAndClear();
711                                        property.getParameters().put(curParamName, paramValue);
712                                        continue;
713                                }
714
715                                //set the parameter name
716                                if (ch == '=' && curParamName == null) {
717                                        String paramName = buffer.getAndClear().toUpperCase();
718                                        if (syntax == SyntaxStyle.OLD) {
719                                                //old style allows whitespace to surround the "=", so remove it
720                                                paramName = rtrim(paramName);
721                                        }
722                                        curParamName = paramName;
723                                        continue;
724                                }
725
726                                //entering/leaving a double-quoted parameter value (new style only)
727                                if (ch == '"' && curParamName != null && syntax != SyntaxStyle.OLD) {
728                                        inQuotes = !inQuotes;
729                                        continue;
730                                }
731                        }
732
733                        buffer.append(ch);
734                }
735
736                /*
737                 * Line or stream ended before the property value was reached.
738                 */
739                if (!inValue) {
740                        return null;
741                }
742
743                property.setValue(buffer.getAndClear());
744                if (property.getParameters().isQuotedPrintable()) {
745                        decodeQuotedPrintable(property, listener);
746                }
747
748                return property;
749        }
750
751        /**
752         * Decodes the given property's value from quoted-printable encoding.
753         * @param property the property
754         * @param listener the data listener
755         */
756        private void decodeQuotedPrintable(VObjectProperty property, VObjectDataListener listener) {
757                Charset charset = getCharset(property, listener);
758                if (charset == null) {
759                        charset = defaultQuotedPrintableCharset;
760                }
761
762                String value = property.getValue();
763                QuotedPrintableCodec codec = new QuotedPrintableCodec(charset.name());
764                try {
765                        value = codec.decode(value);
766                } catch (DecoderException e) {
767                        listener.onWarning(Warning.QUOTED_PRINTABLE_ERROR, property, e, context);
768                        return;
769                }
770
771                property.setValue(value);
772        }
773
774        /**
775         * Gets the character set the given property is encoded in.
776         * @param property the property
777         * @param listener the data listener
778         * @return the character set or null if the character is not set or could
779         * not be determined
780         */
781        private Charset getCharset(VObjectProperty property, VObjectDataListener listener) {
782                Exception thrown;
783                try {
784                        return property.getParameters().getCharset();
785                } catch (IllegalCharsetNameException e) {
786                        //name contains illegal characters
787                        thrown = e;
788                } catch (UnsupportedCharsetException e) {
789                        //not recognized by the JVM
790                        thrown = e;
791                }
792
793                listener.onWarning(Warning.UNKNOWN_CHARSET, property, thrown, context);
794                return null;
795        }
796
797        /**
798         * Gets the next character in the input stream.
799         * @return the next character or -1 if the end of the stream has been
800         * reached
801         * @throws IOException if there's a problem reading from the input stream
802         */
803        private int nextChar() throws IOException {
804                if (leftOver >= 0) {
805                        /*
806                         * Use the character that was left over from the previous invocation
807                         * of "readLine()".
808                         */
809                        int ch = leftOver;
810                        leftOver = -1;
811                        return ch;
812                }
813
814                return reader.read();
815        }
816
817        /**
818         * Determines if the given character is a newline character.
819         * @param ch the character
820         * @return true if it's a newline character, false if not
821         */
822        private static boolean isNewline(char ch) {
823                return ch == '\n' || ch == '\r';
824        }
825
826        /**
827         * Determines if the given character is a space or a tab.
828         * @param ch the character
829         * @return true if it's a space or a tab, false if not
830         */
831        private static boolean isWhitespace(char ch) {
832                return ch == ' ' || ch == '\t';
833        }
834
835        /**
836         * Keeps track of the hierarchy of nested components and their syntax
837         * styles.
838         */
839        private static class ComponentStack {
840                /**
841                 * The hierarchy of components the parser is currently inside of.
842                 */
843                private final List<String> names = new ArrayList<String>();
844
845                /**
846                 * <p>
847                 * The syntax style of each component in the hierarchy.
848                 * </p>
849                 * 
850                 * <p>
851                 * Note: This will always be one element larger than the "names" list
852                 * because it must remember the style of the "root" (for properties that
853                 * are not inside of a component, should there happen to be any).
854                 * </p>
855                 */
856                private final List<SyntaxStyle> syntax = new ArrayList<SyntaxStyle>();
857
858                /**
859                 * Creates a new stack.
860                 * @param defaultSyntax the default syntax style
861                 */
862                public ComponentStack(SyntaxStyle defaultSyntax) {
863                        syntax.add(defaultSyntax);
864                }
865
866                /**
867                 * Pushes a component onto the stack.
868                 * @param component the component name
869                 */
870                public void push(String component) {
871                        names.add(component);
872                        syntax.add(peekSyntax());
873                }
874
875                /**
876                 * Removes the top component from the stack.
877                 * @return the name of the component that was removed
878                 */
879                public String pop() {
880                        syntax.remove(syntax.size() - 1);
881                        return names.remove(names.size() - 1);
882                }
883
884                /**
885                 * Gets the number of calls to {@link #pop()} it would take to pop the
886                 * given component name.
887                 * @param name the component name
888                 * @return the number of pops or 0 if the name was not found
889                 */
890                public int popCount(String name) {
891                        int index = names.lastIndexOf(name);
892                        return (index < 0) ? 0 : names.size() - index;
893                }
894
895                /**
896                 * Gets the top component name.
897                 * @return the top component name or null if the name stack is empty
898                 */
899                public String peekName() {
900                        return names.isEmpty() ? null : names.get(names.size() - 1);
901                }
902
903                /**
904                 * Gets the top syntax style.
905                 * @return the top syntax style or null if the syntax stack is empty
906                 */
907                public SyntaxStyle peekSyntax() {
908                        return syntax.isEmpty() ? null : syntax.get(syntax.size() - 1);
909                }
910
911                /**
912                 * Replaces the top syntax style.
913                 * @param style the syntax style
914                 */
915                public void updateSyntax(SyntaxStyle style) {
916                        syntax.set(syntax.size() - 1, style);
917                }
918        }
919
920        /**
921         * Closes the underlying input stream.
922         */
923        public void close() throws IOException {
924                reader.close();
925        }
926}