001 /*
002 * Copyright 2007-2016 UnboundID Corp.
003 * All Rights Reserved.
004 */
005 /*
006 * Copyright (C) 2008-2016 UnboundID Corp.
007 *
008 * This program is free software; you can redistribute it and/or modify
009 * it under the terms of the GNU General Public License (GPLv2 only)
010 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only)
011 * as published by the Free Software Foundation.
012 *
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
016 * GNU General Public License for more details.
017 *
018 * You should have received a copy of the GNU General Public License
019 * along with this program; if not, see <http://www.gnu.org/licenses>.
020 */
021 package com.unboundid.ldif;
022
023
024
025 import java.io.BufferedReader;
026 import java.io.BufferedWriter;
027 import java.io.Closeable;
028 import java.io.File;
029 import java.io.FileInputStream;
030 import java.io.FileWriter;
031 import java.io.InputStream;
032 import java.io.InputStreamReader;
033 import java.io.IOException;
034 import java.text.ParseException;
035 import java.util.ArrayList;
036 import java.util.Collection;
037 import java.util.Iterator;
038 import java.util.HashSet;
039 import java.util.LinkedHashMap;
040 import java.util.List;
041 import java.util.Set;
042 import java.util.concurrent.BlockingQueue;
043 import java.util.concurrent.ArrayBlockingQueue;
044 import java.util.concurrent.TimeUnit;
045 import java.util.concurrent.atomic.AtomicBoolean;
046 import java.nio.charset.Charset;
047
048 import com.unboundid.asn1.ASN1OctetString;
049 import com.unboundid.ldap.matchingrules.CaseIgnoreStringMatchingRule;
050 import com.unboundid.ldap.matchingrules.MatchingRule;
051 import com.unboundid.ldap.sdk.Attribute;
052 import com.unboundid.ldap.sdk.Control;
053 import com.unboundid.ldap.sdk.Entry;
054 import com.unboundid.ldap.sdk.Modification;
055 import com.unboundid.ldap.sdk.ModificationType;
056 import com.unboundid.ldap.sdk.LDAPException;
057 import com.unboundid.ldap.sdk.schema.AttributeTypeDefinition;
058 import com.unboundid.ldap.sdk.schema.Schema;
059 import com.unboundid.util.AggregateInputStream;
060 import com.unboundid.util.Base64;
061 import com.unboundid.util.LDAPSDKThreadFactory;
062 import com.unboundid.util.ThreadSafety;
063 import com.unboundid.util.ThreadSafetyLevel;
064 import com.unboundid.util.parallel.AsynchronousParallelProcessor;
065 import com.unboundid.util.parallel.Result;
066 import com.unboundid.util.parallel.ParallelProcessor;
067 import com.unboundid.util.parallel.Processor;
068
069 import static com.unboundid.ldif.LDIFMessages.*;
070 import static com.unboundid.util.Debug.*;
071 import static com.unboundid.util.StaticUtils.*;
072 import static com.unboundid.util.Validator.*;
073
074 /**
075 * This class provides an LDIF reader, which can be used to read and decode
076 * entries and change records from a data source using the LDAP Data Interchange
077 * Format as per <A HREF="http://www.ietf.org/rfc/rfc2849.txt">RFC 2849</A>.
078 * <BR>
079 * This class is not synchronized. If multiple threads read from the
080 * LDIFReader, they must be synchronized externally.
081 * <BR><BR>
082 * <H2>Example</H2>
083 * The following example iterates through all entries contained in an LDIF file
084 * and attempts to add them to a directory server:
085 * <PRE>
086 * LDIFReader ldifReader = new LDIFReader(pathToLDIFFile);
087 *
088 * int entriesRead = 0;
089 * int entriesAdded = 0;
090 * int errorsEncountered = 0;
091 * while (true)
092 * {
093 * Entry entry;
094 * try
095 * {
096 * entry = ldifReader.readEntry();
097 * if (entry == null)
098 * {
099 * // All entries have been read.
100 * break;
101 * }
102 *
103 * entriesRead++;
104 * }
105 * catch (LDIFException le)
106 * {
107 * errorsEncountered++;
108 * if (le.mayContinueReading())
109 * {
110 * // A recoverable error occurred while attempting to read a change
111 * // record, at or near line number le.getLineNumber()
112 * // The entry will be skipped, but we'll try to keep reading from the
113 * // LDIF file.
114 * continue;
115 * }
116 * else
117 * {
118 * // An unrecoverable error occurred while attempting to read an entry
119 * // at or near line number le.getLineNumber()
120 * // No further LDIF processing will be performed.
121 * break;
122 * }
123 * }
124 * catch (IOException ioe)
125 * {
126 * // An I/O error occurred while attempting to read from the LDIF file.
127 * // No further LDIF processing will be performed.
128 * errorsEncountered++;
129 * break;
130 * }
131 *
132 * LDAPResult addResult;
133 * try
134 * {
135 * addResult = connection.add(entry);
136 * // If we got here, then the change should have been processed
137 * // successfully.
138 * entriesAdded++;
139 * }
140 * catch (LDAPException le)
141 * {
142 * // If we got here, then the change attempt failed.
143 * addResult = le.toLDAPResult();
144 * errorsEncountered++;
145 * }
146 * }
147 *
148 * ldifReader.close();
149 * </PRE>
150 */
151 @ThreadSafety(level=ThreadSafetyLevel.NOT_THREADSAFE)
152 public final class LDIFReader
153 implements Closeable
154 {
155 /**
156 * The default buffer size (128KB) that will be used when reading from the
157 * data source.
158 */
159 public static final int DEFAULT_BUFFER_SIZE = 128 * 1024;
160
161
162
163 /*
164 * When processing asynchronously, this determines how many of the allocated
165 * worker threads are used to parse each batch of read entries.
166 */
167 private static final int ASYNC_MIN_PER_PARSING_THREAD = 3;
168
169
170
171 /**
172 * When processing asynchronously, this specifies the size of the pending and
173 * completed queues.
174 */
175 private static final int ASYNC_QUEUE_SIZE = 500;
176
177
178
179 /**
180 * Special entry used internally to signal that the LDIFReaderEntryTranslator
181 * has signalled that a read Entry should be skipped by returning null,
182 * which normally implies EOF.
183 */
184 private static final Entry SKIP_ENTRY = new Entry("cn=skipped");
185
186
187
188 /**
189 * The default base path that will be prepended to relative paths. It will
190 * end with a trailing slash.
191 */
192 private static final String DEFAULT_RELATIVE_BASE_PATH;
193 static
194 {
195 final File currentDir;
196 String currentDirString = System.getProperty("user.dir");
197 if (currentDirString == null)
198 {
199 currentDir = new File(".");
200 }
201 else
202 {
203 currentDir = new File(currentDirString);
204 }
205
206 final String currentDirAbsolutePath = currentDir.getAbsolutePath();
207 if (currentDirAbsolutePath.endsWith(File.separator))
208 {
209 DEFAULT_RELATIVE_BASE_PATH = currentDirAbsolutePath;
210 }
211 else
212 {
213 DEFAULT_RELATIVE_BASE_PATH = currentDirAbsolutePath + File.separator;
214 }
215 }
216
217
218
219 // The buffered reader that will be used to read LDIF data.
220 private final BufferedReader reader;
221
222 // The behavior that should be exhibited when encountering duplicate attribute
223 // values.
224 private volatile DuplicateValueBehavior duplicateValueBehavior;
225
226 // A line number counter.
227 private long lineNumberCounter = 0;
228
229 // The change record translator to use, if any.
230 private final LDIFReaderChangeRecordTranslator changeRecordTranslator;
231
232 // The entry translator to use, if any.
233 private final LDIFReaderEntryTranslator entryTranslator;
234
235 // The schema that will be used when processing, if applicable.
236 private Schema schema;
237
238 // Specifies the base path that will be prepended to relative paths for file
239 // URLs.
240 private volatile String relativeBasePath;
241
242 // The behavior that should be exhibited with regard to illegal trailing
243 // spaces in attribute values.
244 private volatile TrailingSpaceBehavior trailingSpaceBehavior;
245
246 // True iff we are processing asynchronously.
247 private final boolean isAsync;
248
249 //
250 // The following only apply to asynchronous processing.
251 //
252
253 // Parses entries asynchronously.
254 private final AsynchronousParallelProcessor<UnparsedLDIFRecord, LDIFRecord>
255 asyncParser;
256
257 // Set to true when the end of the input is reached.
258 private final AtomicBoolean asyncParsingComplete;
259
260 // The records that have been read and parsed.
261 private final BlockingQueue<Result<UnparsedLDIFRecord, LDIFRecord>>
262 asyncParsedRecords;
263
264
265
266 /**
267 * Creates a new LDIF reader that will read data from the specified file.
268 *
269 * @param path The path to the file from which the data is to be read. It
270 * must not be {@code null}.
271 *
272 * @throws IOException If a problem occurs while opening the file for
273 * reading.
274 */
275 public LDIFReader(final String path)
276 throws IOException
277 {
278 this(new FileInputStream(path));
279 }
280
281
282
283 /**
284 * Creates a new LDIF reader that will read data from the specified file
285 * and parses the LDIF records asynchronously using the specified number of
286 * threads.
287 *
288 * @param path The path to the file from which the data is to be read. It
289 * must not be {@code null}.
290 * @param numParseThreads If this value is greater than zero, then the
291 * specified number of threads will be used to
292 * asynchronously read and parse the LDIF file.
293 *
294 * @throws IOException If a problem occurs while opening the file for
295 * reading.
296 *
297 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
298 * constructor for more details about asynchronous processing.
299 */
300 public LDIFReader(final String path, final int numParseThreads)
301 throws IOException
302 {
303 this(new FileInputStream(path), numParseThreads);
304 }
305
306
307
308 /**
309 * Creates a new LDIF reader that will read data from the specified file.
310 *
311 * @param file The file from which the data is to be read. It must not be
312 * {@code null}.
313 *
314 * @throws IOException If a problem occurs while opening the file for
315 * reading.
316 */
317 public LDIFReader(final File file)
318 throws IOException
319 {
320 this(new FileInputStream(file));
321 }
322
323
324
325 /**
326 * Creates a new LDIF reader that will read data from the specified file
327 * and optionally parses the LDIF records asynchronously using the specified
328 * number of threads.
329 *
330 * @param file The file from which the data is to be read. It
331 * must not be {@code null}.
332 * @param numParseThreads If this value is greater than zero, then the
333 * specified number of threads will be used to
334 * asynchronously read and parse the LDIF file.
335 *
336 * @throws IOException If a problem occurs while opening the file for
337 * reading.
338 */
339 public LDIFReader(final File file, final int numParseThreads)
340 throws IOException
341 {
342 this(new FileInputStream(file), numParseThreads);
343 }
344
345
346
347 /**
348 * Creates a new LDIF reader that will read data from the specified files in
349 * the order in which they are provided and optionally parses the LDIF records
350 * asynchronously using the specified number of threads.
351 *
352 * @param files The files from which the data is to be read. It
353 * must not be {@code null} or empty.
354 * @param numParseThreads If this value is greater than zero, then the
355 * specified number of threads will be used to
356 * asynchronously read and parse the LDIF file.
357 * @param entryTranslator The LDIFReaderEntryTranslator to apply to entries
358 * before they are returned. This is normally
359 * {@code null}, which causes entries to be returned
360 * unaltered. This is particularly useful when
361 * parsing the input file in parallel because the
362 * entry translation is also done in parallel.
363 *
364 * @throws IOException If a problem occurs while opening the file for
365 * reading.
366 */
367 public LDIFReader(final File[] files, final int numParseThreads,
368 final LDIFReaderEntryTranslator entryTranslator)
369 throws IOException
370 {
371 this(files, numParseThreads, entryTranslator, null);
372 }
373
374
375
376 /**
377 * Creates a new LDIF reader that will read data from the specified files in
378 * the order in which they are provided and optionally parses the LDIF records
379 * asynchronously using the specified number of threads.
380 *
381 * @param files The files from which the data is to be
382 * read. It must not be {@code null} or
383 * empty.
384 * @param numParseThreads If this value is greater than zero, then
385 * the specified number of threads will be
386 * used to asynchronously read and parse the
387 * LDIF file.
388 * @param entryTranslator The LDIFReaderEntryTranslator to apply to
389 * entries before they are returned. This is
390 * normally {@code null}, which causes entries
391 * to be returned unaltered. This is
392 * particularly useful when parsing the input
393 * file in parallel because the entry
394 * translation is also done in parallel.
395 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to
396 * apply to change records before they are
397 * returned. This is normally {@code null},
398 * which causes change records to be returned
399 * unaltered. This is particularly useful
400 * when parsing the input file in parallel
401 * because the change record translation is
402 * also done in parallel.
403 *
404 * @throws IOException If a problem occurs while opening the file for
405 * reading.
406 */
407 public LDIFReader(final File[] files, final int numParseThreads,
408 final LDIFReaderEntryTranslator entryTranslator,
409 final LDIFReaderChangeRecordTranslator changeRecordTranslator)
410 throws IOException
411 {
412 this(files, numParseThreads, entryTranslator, changeRecordTranslator,
413 "UTF-8");
414 }
415
416
417
418 /**
419 * Creates a new LDIF reader that will read data from the specified files in
420 * the order in which they are provided and optionally parses the LDIF records
421 * asynchronously using the specified number of threads.
422 *
423 * @param files The files from which the data is to be
424 * read. It must not be {@code null} or
425 * empty.
426 * @param numParseThreads If this value is greater than zero, then
427 * the specified number of threads will be
428 * used to asynchronously read and parse the
429 * LDIF file.
430 * @param entryTranslator The LDIFReaderEntryTranslator to apply to
431 * entries before they are returned. This is
432 * normally {@code null}, which causes entries
433 * to be returned unaltered. This is
434 * particularly useful when parsing the input
435 * file in parallel because the entry
436 * translation is also done in parallel.
437 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to
438 * apply to change records before they are
439 * returned. This is normally {@code null},
440 * which causes change records to be returned
441 * unaltered. This is particularly useful
442 * when parsing the input file in parallel
443 * because the change record translation is
444 * also done in parallel.
445 * @param characterSet The character set to use when reading from
446 * the input stream. It must not be
447 * {@code null}.
448 *
449 * @throws IOException If a problem occurs while opening the file for
450 * reading.
451 */
452 public LDIFReader(final File[] files, final int numParseThreads,
453 final LDIFReaderEntryTranslator entryTranslator,
454 final LDIFReaderChangeRecordTranslator changeRecordTranslator,
455 final String characterSet)
456 throws IOException
457 {
458 this(createAggregateInputStream(files), numParseThreads, entryTranslator,
459 changeRecordTranslator, characterSet);
460 }
461
462
463
464 /**
465 * Creates a new aggregate input stream that will read data from the specified
466 * files. If there are multiple files, then a "padding" file will be inserted
467 * between them to ensure that there is at least one blank line between the
468 * end of one file and the beginning of another.
469 *
470 * @param files The files from which the data is to be read. It must not be
471 * {@code null} or empty.
472 *
473 * @return The input stream to use to read data from the provided files.
474 *
475 * @throws IOException If a problem is encountered while attempting to
476 * create the input stream.
477 */
478 private static InputStream createAggregateInputStream(final File... files)
479 throws IOException
480 {
481 if (files.length == 0)
482 {
483 throw new IOException(ERR_READ_NO_LDIF_FILES.get());
484 }
485 else if (files.length == 1)
486 {
487 return new FileInputStream(files[0]);
488 }
489 else
490 {
491 final File spacerFile =
492 File.createTempFile("ldif-reader-spacer", ".ldif");
493 spacerFile.deleteOnExit();
494
495 final BufferedWriter spacerWriter =
496 new BufferedWriter(new FileWriter(spacerFile));
497 try
498 {
499 spacerWriter.newLine();
500 spacerWriter.newLine();
501 }
502 finally
503 {
504 spacerWriter.close();
505 }
506
507 final File[] returnArray = new File[(files.length * 2) - 1];
508 returnArray[0] = files[0];
509
510 int pos = 1;
511 for (int i=1; i < files.length; i++)
512 {
513 returnArray[pos++] = spacerFile;
514 returnArray[pos++] = files[i];
515 }
516
517 return new AggregateInputStream(returnArray);
518 }
519 }
520
521
522
523 /**
524 * Creates a new LDIF reader that will read data from the provided input
525 * stream.
526 *
527 * @param inputStream The input stream from which the data is to be read.
528 * It must not be {@code null}.
529 */
530 public LDIFReader(final InputStream inputStream)
531 {
532 this(inputStream, 0);
533 }
534
535
536
537 /**
538 * Creates a new LDIF reader that will read data from the specified stream
539 * and parses the LDIF records asynchronously using the specified number of
540 * threads.
541 *
542 * @param inputStream The input stream from which the data is to be read.
543 * It must not be {@code null}.
544 * @param numParseThreads If this value is greater than zero, then the
545 * specified number of threads will be used to
546 * asynchronously read and parse the LDIF file.
547 *
548 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
549 * constructor for more details about asynchronous processing.
550 */
551 public LDIFReader(final InputStream inputStream, final int numParseThreads)
552 {
553 // UTF-8 is required by RFC 2849. Java guarantees it's always available.
554 this(new BufferedReader(new InputStreamReader(inputStream,
555 Charset.forName("UTF-8")),
556 DEFAULT_BUFFER_SIZE),
557 numParseThreads);
558 }
559
560
561
562 /**
563 * Creates a new LDIF reader that will read data from the specified stream
564 * and parses the LDIF records asynchronously using the specified number of
565 * threads.
566 *
567 * @param inputStream The input stream from which the data is to be read.
568 * It must not be {@code null}.
569 * @param numParseThreads If this value is greater than zero, then the
570 * specified number of threads will be used to
571 * asynchronously read and parse the LDIF file.
572 * @param entryTranslator The LDIFReaderEntryTranslator to apply to read
573 * entries before they are returned. This is normally
574 * {@code null}, which causes entries to be returned
575 * unaltered. This is particularly useful when parsing
576 * the input file in parallel because the entry
577 * translation is also done in parallel.
578 *
579 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
580 * constructor for more details about asynchronous processing.
581 */
582 public LDIFReader(final InputStream inputStream, final int numParseThreads,
583 final LDIFReaderEntryTranslator entryTranslator)
584 {
585 this(inputStream, numParseThreads, entryTranslator, null);
586 }
587
588
589
590 /**
591 * Creates a new LDIF reader that will read data from the specified stream
592 * and parses the LDIF records asynchronously using the specified number of
593 * threads.
594 *
595 * @param inputStream The input stream from which the data is to
596 * be read. It must not be {@code null}.
597 * @param numParseThreads If this value is greater than zero, then
598 * the specified number of threads will be
599 * used to asynchronously read and parse the
600 * LDIF file.
601 * @param entryTranslator The LDIFReaderEntryTranslator to apply to
602 * entries before they are returned. This is
603 * normally {@code null}, which causes entries
604 * to be returned unaltered. This is
605 * particularly useful when parsing the input
606 * file in parallel because the entry
607 * translation is also done in parallel.
608 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to
609 * apply to change records before they are
610 * returned. This is normally {@code null},
611 * which causes change records to be returned
612 * unaltered. This is particularly useful
613 * when parsing the input file in parallel
614 * because the change record translation is
615 * also done in parallel.
616 *
617 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
618 * constructor for more details about asynchronous processing.
619 */
620 public LDIFReader(final InputStream inputStream, final int numParseThreads,
621 final LDIFReaderEntryTranslator entryTranslator,
622 final LDIFReaderChangeRecordTranslator changeRecordTranslator)
623 {
624 // UTF-8 is required by RFC 2849. Java guarantees it's always available.
625 this(inputStream, numParseThreads, entryTranslator, changeRecordTranslator,
626 "UTF-8");
627 }
628
629
630
631 /**
632 * Creates a new LDIF reader that will read data from the specified stream
633 * and parses the LDIF records asynchronously using the specified number of
634 * threads.
635 *
636 * @param inputStream The input stream from which the data is to
637 * be read. It must not be {@code null}.
638 * @param numParseThreads If this value is greater than zero, then
639 * the specified number of threads will be
640 * used to asynchronously read and parse the
641 * LDIF file.
642 * @param entryTranslator The LDIFReaderEntryTranslator to apply to
643 * entries before they are returned. This is
644 * normally {@code null}, which causes entries
645 * to be returned unaltered. This is
646 * particularly useful when parsing the input
647 * file in parallel because the entry
648 * translation is also done in parallel.
649 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to
650 * apply to change records before they are
651 * returned. This is normally {@code null},
652 * which causes change records to be returned
653 * unaltered. This is particularly useful
654 * when parsing the input file in parallel
655 * because the change record translation is
656 * also done in parallel.
657 * @param characterSet The character set to use when reading from
658 * the input stream. It must not be
659 * {@code null}.
660 *
661 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
662 * constructor for more details about asynchronous processing.
663 */
664 public LDIFReader(final InputStream inputStream, final int numParseThreads,
665 final LDIFReaderEntryTranslator entryTranslator,
666 final LDIFReaderChangeRecordTranslator changeRecordTranslator,
667 final String characterSet)
668 {
669 this(new BufferedReader(
670 new InputStreamReader(inputStream, Charset.forName(characterSet)),
671 DEFAULT_BUFFER_SIZE),
672 numParseThreads, entryTranslator, changeRecordTranslator);
673 }
674
675
676
677 /**
678 * Creates a new LDIF reader that will use the provided buffered reader to
679 * read the LDIF data. The encoding of the underlying Reader must be set to
680 * "UTF-8" as required by RFC 2849.
681 *
682 * @param reader The buffered reader that will be used to read the LDIF
683 * data. It must not be {@code null}.
684 */
685 public LDIFReader(final BufferedReader reader)
686 {
687 this(reader, 0);
688 }
689
690
691
692 /**
693 * Creates a new LDIF reader that will read data from the specified buffered
694 * reader and parses the LDIF records asynchronously using the specified
695 * number of threads. The encoding of the underlying Reader must be set to
696 * "UTF-8" as required by RFC 2849.
697 *
698 * @param reader The buffered reader that will be used to read the LDIF data.
699 * It must not be {@code null}.
700 * @param numParseThreads If this value is greater than zero, then the
701 * specified number of threads will be used to
702 * asynchronously read and parse the LDIF file.
703 *
704 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
705 * constructor for more details about asynchronous processing.
706 */
707 public LDIFReader(final BufferedReader reader, final int numParseThreads)
708 {
709 this(reader, numParseThreads, null);
710 }
711
712
713
714 /**
715 * Creates a new LDIF reader that will read data from the specified buffered
716 * reader and parses the LDIF records asynchronously using the specified
717 * number of threads. The encoding of the underlying Reader must be set to
718 * "UTF-8" as required by RFC 2849.
719 *
720 * @param reader The buffered reader that will be used to read the LDIF data.
721 * It must not be {@code null}.
722 * @param numParseThreads If this value is greater than zero, then the
723 * specified number of threads will be used to
724 * asynchronously read and parse the LDIF file.
725 * This should only be set to greater than zero when
726 * performance analysis has demonstrated that reading
727 * and parsing the LDIF is a bottleneck. The default
728 * synchronous processing is normally fast enough.
729 * There is little benefit in passing in a value
730 * greater than four (unless there is an
731 * LDIFReaderEntryTranslator that does time-consuming
732 * processing). A value of zero implies the
733 * default behavior of reading and parsing LDIF
734 * records synchronously when one of the read
735 * methods is called.
736 * @param entryTranslator The LDIFReaderEntryTranslator to apply to read
737 * entries before they are returned. This is normally
738 * {@code null}, which causes entries to be returned
739 * unaltered. This is particularly useful when parsing
740 * the input file in parallel because the entry
741 * translation is also done in parallel.
742 */
743 public LDIFReader(final BufferedReader reader,
744 final int numParseThreads,
745 final LDIFReaderEntryTranslator entryTranslator)
746 {
747 this(reader, numParseThreads, entryTranslator, null);
748 }
749
750
751
752 /**
753 * Creates a new LDIF reader that will read data from the specified buffered
754 * reader and parses the LDIF records asynchronously using the specified
755 * number of threads. The encoding of the underlying Reader must be set to
756 * "UTF-8" as required by RFC 2849.
757 *
758 * @param reader The buffered reader that will be used to
759 * read the LDIF data. It must not be
760 * {@code null}.
761 * @param numParseThreads If this value is greater than zero, then
762 * the specified number of threads will be
763 * used to asynchronously read and parse the
764 * LDIF file.
765 * @param entryTranslator The LDIFReaderEntryTranslator to apply to
766 * entries before they are returned. This is
767 * normally {@code null}, which causes entries
768 * to be returned unaltered. This is
769 * particularly useful when parsing the input
770 * file in parallel because the entry
771 * translation is also done in parallel.
772 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to
773 * apply to change records before they are
774 * returned. This is normally {@code null},
775 * which causes change records to be returned
776 * unaltered. This is particularly useful
777 * when parsing the input file in parallel
778 * because the change record translation is
779 * also done in parallel.
780 */
781 public LDIFReader(final BufferedReader reader, final int numParseThreads,
782 final LDIFReaderEntryTranslator entryTranslator,
783 final LDIFReaderChangeRecordTranslator changeRecordTranslator)
784 {
785 ensureNotNull(reader);
786 ensureTrue(numParseThreads >= 0,
787 "LDIFReader.numParseThreads must not be negative.");
788
789 this.reader = reader;
790 this.entryTranslator = entryTranslator;
791 this.changeRecordTranslator = changeRecordTranslator;
792
793 duplicateValueBehavior = DuplicateValueBehavior.STRIP;
794 trailingSpaceBehavior = TrailingSpaceBehavior.REJECT;
795
796 relativeBasePath = DEFAULT_RELATIVE_BASE_PATH;
797
798 if (numParseThreads == 0)
799 {
800 isAsync = false;
801 asyncParser = null;
802 asyncParsingComplete = null;
803 asyncParsedRecords = null;
804 }
805 else
806 {
807 isAsync = true;
808 asyncParsingComplete = new AtomicBoolean(false);
809
810 // Decodes entries in parallel.
811 final LDAPSDKThreadFactory threadFactory =
812 new LDAPSDKThreadFactory("LDIFReader Worker", true, null);
813 final ParallelProcessor<UnparsedLDIFRecord, LDIFRecord> parallelParser =
814 new ParallelProcessor<UnparsedLDIFRecord, LDIFRecord>(
815 new RecordParser(), threadFactory, numParseThreads,
816 ASYNC_MIN_PER_PARSING_THREAD);
817
818 final BlockingQueue<UnparsedLDIFRecord> pendingQueue = new
819 ArrayBlockingQueue<UnparsedLDIFRecord>(ASYNC_QUEUE_SIZE);
820
821 // The output queue must be a little more than twice as big as the input
822 // queue to more easily handle being shutdown in the middle of processing
823 // when the queues are full and threads are blocked.
824 asyncParsedRecords = new ArrayBlockingQueue
825 <Result<UnparsedLDIFRecord, LDIFRecord>>(2 * ASYNC_QUEUE_SIZE + 100);
826
827 asyncParser = new AsynchronousParallelProcessor
828 <UnparsedLDIFRecord, LDIFRecord>(pendingQueue, parallelParser,
829 asyncParsedRecords);
830
831 final LineReaderThread lineReaderThread = new LineReaderThread();
832 lineReaderThread.start();
833 }
834 }
835
836
837
838 /**
839 * Reads entries from the LDIF file with the specified path and returns them
840 * as a {@code List}. This is a convenience method that should only be used
841 * for data sets that are small enough so that running out of memory isn't a
842 * concern.
843 *
844 * @param path The path to the LDIF file containing the entries to be read.
845 *
846 * @return A list of the entries read from the given LDIF file.
847 *
848 * @throws IOException If a problem occurs while attempting to read data
849 * from the specified file.
850 *
851 * @throws LDIFException If a problem is encountered while attempting to
852 * decode data read as LDIF.
853 */
854 public static List<Entry> readEntries(final String path)
855 throws IOException, LDIFException
856 {
857 return readEntries(new LDIFReader(path));
858 }
859
860
861
862 /**
863 * Reads entries from the specified LDIF file and returns them as a
864 * {@code List}. This is a convenience method that should only be used for
865 * data sets that are small enough so that running out of memory isn't a
866 * concern.
867 *
868 * @param file A reference to the LDIF file containing the entries to be
869 * read.
870 *
871 * @return A list of the entries read from the given LDIF file.
872 *
873 * @throws IOException If a problem occurs while attempting to read data
874 * from the specified file.
875 *
876 * @throws LDIFException If a problem is encountered while attempting to
877 * decode data read as LDIF.
878 */
879 public static List<Entry> readEntries(final File file)
880 throws IOException, LDIFException
881 {
882 return readEntries(new LDIFReader(file));
883 }
884
885
886
887 /**
888 * Reads and decodes LDIF entries from the provided input stream and
889 * returns them as a {@code List}. This is a convenience method that should
890 * only be used for data sets that are small enough so that running out of
891 * memory isn't a concern.
892 *
893 * @param inputStream The input stream from which the entries should be
894 * read. The input stream will be closed before
895 * returning.
896 *
897 * @return A list of the entries read from the given input stream.
898 *
899 * @throws IOException If a problem occurs while attempting to read data
900 * from the input stream.
901 *
902 * @throws LDIFException If a problem is encountered while attempting to
903 * decode data read as LDIF.
904 */
905 public static List<Entry> readEntries(final InputStream inputStream)
906 throws IOException, LDIFException
907 {
908 return readEntries(new LDIFReader(inputStream));
909 }
910
911
912
913 /**
914 * Reads entries from the provided LDIF reader and returns them as a list.
915 *
916 * @param reader The reader from which the entries should be read. It will
917 * be closed before returning.
918 *
919 * @return A list of the entries read from the provided reader.
920 *
921 * @throws IOException If a problem was encountered while attempting to read
922 * data from the LDIF data source.
923 *
924 * @throws LDIFException If a problem is encountered while attempting to
925 * decode data read as LDIF.
926 */
927 private static List<Entry> readEntries(final LDIFReader reader)
928 throws IOException, LDIFException
929 {
930 try
931 {
932 final ArrayList<Entry> entries = new ArrayList<Entry>(10);
933 while (true)
934 {
935 final Entry e = reader.readEntry();
936 if (e == null)
937 {
938 break;
939 }
940
941 entries.add(e);
942 }
943
944 return entries;
945 }
946 finally
947 {
948 reader.close();
949 }
950 }
951
952
953
954 /**
955 * Closes this LDIF reader and the underlying LDIF source.
956 *
957 * @throws IOException If a problem occurs while closing the underlying LDIF
958 * source.
959 */
960 public void close()
961 throws IOException
962 {
963 reader.close();
964
965 if (isAsync())
966 {
967 // Closing the reader will trigger the LineReaderThread to complete, but
968 // not if it's blocked submitting the next UnparsedLDIFRecord. To avoid
969 // this, we clear out the completed output queue, which is larger than
970 // the input queue, so the LineReaderThread will stop reading and
971 // shutdown the asyncParser.
972 asyncParsedRecords.clear();
973 }
974 }
975
976
977
978 /**
979 * Indicates whether to ignore any duplicate values encountered while reading
980 * LDIF records.
981 *
982 * @return {@code true} if duplicate values should be ignored, or
983 * {@code false} if any LDIF records containing duplicate values
984 * should be rejected.
985 *
986 * @deprecated Use the {@link #getDuplicateValueBehavior} method instead.
987 */
988 @Deprecated()
989 public boolean ignoreDuplicateValues()
990 {
991 return (duplicateValueBehavior == DuplicateValueBehavior.STRIP);
992 }
993
994
995
996 /**
997 * Specifies whether to ignore any duplicate values encountered while reading
998 * LDIF records.
999 *
1000 * @param ignoreDuplicateValues Indicates whether to ignore duplicate
1001 * attribute values encountered while reading
1002 * LDIF records.
1003 *
1004 * @deprecated Use the {@link #setDuplicateValueBehavior} method instead.
1005 */
1006 @Deprecated()
1007 public void setIgnoreDuplicateValues(final boolean ignoreDuplicateValues)
1008 {
1009 if (ignoreDuplicateValues)
1010 {
1011 duplicateValueBehavior = DuplicateValueBehavior.STRIP;
1012 }
1013 else
1014 {
1015 duplicateValueBehavior = DuplicateValueBehavior.REJECT;
1016 }
1017 }
1018
1019
1020
1021 /**
1022 * Retrieves the behavior that should be exhibited if the LDIF reader
1023 * encounters an entry with duplicate values.
1024 *
1025 * @return The behavior that should be exhibited if the LDIF reader
1026 * encounters an entry with duplicate values.
1027 */
1028 public DuplicateValueBehavior getDuplicateValueBehavior()
1029 {
1030 return duplicateValueBehavior;
1031 }
1032
1033
1034
1035 /**
1036 * Specifies the behavior that should be exhibited if the LDIF reader
1037 * encounters an entry with duplicate values.
1038 *
1039 * @param duplicateValueBehavior The behavior that should be exhibited if
1040 * the LDIF reader encounters an entry with
1041 * duplicate values.
1042 */
1043 public void setDuplicateValueBehavior(
1044 final DuplicateValueBehavior duplicateValueBehavior)
1045 {
1046 this.duplicateValueBehavior = duplicateValueBehavior;
1047 }
1048
1049
1050
1051 /**
1052 * Indicates whether to strip off any illegal trailing spaces that may appear
1053 * in LDIF records (e.g., after an entry DN or attribute value). The LDIF
1054 * specification strongly recommends that any value which legitimately
1055 * contains trailing spaces be base64-encoded, and any spaces which appear
1056 * after the end of non-base64-encoded values may therefore be considered
1057 * invalid. If any such trailing spaces are encountered in an LDIF record and
1058 * they are not to be stripped, then an {@link LDIFException} will be thrown
1059 * for that record.
1060 * <BR><BR>
1061 * Note that this applies only to spaces after the end of a value, and not to
1062 * spaces which may appear at the end of a line for a value that is wrapped
1063 * and continued on the next line.
1064 *
1065 * @return {@code true} if illegal trailing spaces should be stripped off, or
1066 * {@code false} if LDIF records containing illegal trailing spaces
1067 * should be rejected.
1068 *
1069 * @deprecated Use the {@link #getTrailingSpaceBehavior} method instead.
1070 */
1071 @Deprecated()
1072 public boolean stripTrailingSpaces()
1073 {
1074 return (trailingSpaceBehavior == TrailingSpaceBehavior.STRIP);
1075 }
1076
1077
1078
1079 /**
1080 * Specifies whether to strip off any illegal trailing spaces that may appear
1081 * in LDIF records (e.g., after an entry DN or attribute value). The LDIF
1082 * specification strongly recommends that any value which legitimately
1083 * contains trailing spaces be base64-encoded, and any spaces which appear
1084 * after the end of non-base64-encoded values may therefore be considered
1085 * invalid. If any such trailing spaces are encountered in an LDIF record and
1086 * they are not to be stripped, then an {@link LDIFException} will be thrown
1087 * for that record.
1088 * <BR><BR>
1089 * Note that this applies only to spaces after the end of a value, and not to
1090 * spaces which may appear at the end of a line for a value that is wrapped
1091 * and continued on the next line.
1092 *
1093 * @param stripTrailingSpaces Indicates whether to strip off any illegal
1094 * trailing spaces, or {@code false} if LDIF
1095 * records containing them should be rejected.
1096 *
1097 * @deprecated Use the {@link #setTrailingSpaceBehavior} method instead.
1098 */
1099 @Deprecated()
1100 public void setStripTrailingSpaces(final boolean stripTrailingSpaces)
1101 {
1102 trailingSpaceBehavior = stripTrailingSpaces
1103 ? TrailingSpaceBehavior.STRIP
1104 : TrailingSpaceBehavior.REJECT;
1105 }
1106
1107
1108
1109 /**
1110 * Retrieves the behavior that should be exhibited when encountering attribute
1111 * values which are not base64-encoded but contain trailing spaces. The LDIF
1112 * specification strongly recommends that any value which legitimately
1113 * contains trailing spaces be base64-encoded, but the LDAP SDK LDIF parser
1114 * may be configured to automatically strip these spaces, to preserve them, or
1115 * to reject any entry or change record containing them.
1116 *
1117 * @return The behavior that should be exhibited when encountering attribute
1118 * values which are not base64-encoded but contain trailing spaces.
1119 */
1120 public TrailingSpaceBehavior getTrailingSpaceBehavior()
1121 {
1122 return trailingSpaceBehavior;
1123 }
1124
1125
1126
1127 /**
1128 * Specifies the behavior that should be exhibited when encountering attribute
1129 * values which are not base64-encoded but contain trailing spaces. The LDIF
1130 * specification strongly recommends that any value which legitimately
1131 * contains trailing spaces be base64-encoded, but the LDAP SDK LDIF parser
1132 * may be configured to automatically strip these spaces, to preserve them, or
1133 * to reject any entry or change record containing them.
1134 *
1135 * @param trailingSpaceBehavior The behavior that should be exhibited when
1136 * encountering attribute values which are not
1137 * base64-encoded but contain trailing spaces.
1138 */
1139 public void setTrailingSpaceBehavior(
1140 final TrailingSpaceBehavior trailingSpaceBehavior)
1141 {
1142 this.trailingSpaceBehavior = trailingSpaceBehavior;
1143 }
1144
1145
1146
1147 /**
1148 * Retrieves the base path that will be prepended to relative paths in order
1149 * to obtain an absolute path. This will only be used for "file:" URLs that
1150 * have paths which do not begin with a slash.
1151 *
1152 * @return The base path that will be prepended to relative paths in order to
1153 * obtain an absolute path.
1154 */
1155 public String getRelativeBasePath()
1156 {
1157 return relativeBasePath;
1158 }
1159
1160
1161
1162 /**
1163 * Specifies the base path that will be prepended to relative paths in order
1164 * to obtain an absolute path. This will only be used for "file:" URLs that
1165 * have paths which do not begin with a space.
1166 *
1167 * @param relativeBasePath The base path that will be prepended to relative
1168 * paths in order to obtain an absolute path.
1169 */
1170 public void setRelativeBasePath(final String relativeBasePath)
1171 {
1172 setRelativeBasePath(new File(relativeBasePath));
1173 }
1174
1175
1176
1177 /**
1178 * Specifies the base path that will be prepended to relative paths in order
1179 * to obtain an absolute path. This will only be used for "file:" URLs that
1180 * have paths which do not begin with a space.
1181 *
1182 * @param relativeBasePath The base path that will be prepended to relative
1183 * paths in order to obtain an absolute path.
1184 */
1185 public void setRelativeBasePath(final File relativeBasePath)
1186 {
1187 final String path = relativeBasePath.getAbsolutePath();
1188 if (path.endsWith(File.separator))
1189 {
1190 this.relativeBasePath = path;
1191 }
1192 else
1193 {
1194 this.relativeBasePath = path + File.separator;
1195 }
1196 }
1197
1198
1199
1200 /**
1201 * Retrieves the schema that will be used when reading LDIF records, if
1202 * defined.
1203 *
1204 * @return The schema that will be used when reading LDIF records, or
1205 * {@code null} if no schema should be used and all attributes should
1206 * be treated as case-insensitive strings.
1207 */
1208 public Schema getSchema()
1209 {
1210 return schema;
1211 }
1212
1213
1214
1215 /**
1216 * Specifies the schema that should be used when reading LDIF records.
1217 *
1218 * @param schema The schema that should be used when reading LDIF records,
1219 * or {@code null} if no schema should be used and all
1220 * attributes should be treated as case-insensitive strings.
1221 */
1222 public void setSchema(final Schema schema)
1223 {
1224 this.schema = schema;
1225 }
1226
1227
1228
1229 /**
1230 * Reads a record from the LDIF source. It may be either an entry or an LDIF
1231 * change record.
1232 *
1233 * @return The record read from the LDIF source, or {@code null} if there are
1234 * no more entries to be read.
1235 *
1236 * @throws IOException If a problem occurs while trying to read from the
1237 * LDIF source.
1238 *
1239 * @throws LDIFException If the data read could not be parsed as an entry or
1240 * an LDIF change record.
1241 */
1242 public LDIFRecord readLDIFRecord()
1243 throws IOException, LDIFException
1244 {
1245 if (isAsync())
1246 {
1247 return readLDIFRecordAsync();
1248 }
1249 else
1250 {
1251 return readLDIFRecordInternal();
1252 }
1253 }
1254
1255
1256
1257 /**
1258 * Reads an entry from the LDIF source.
1259 *
1260 * @return The entry read from the LDIF source, or {@code null} if there are
1261 * no more entries to be read.
1262 *
1263 * @throws IOException If a problem occurs while attempting to read from the
1264 * LDIF source.
1265 *
1266 * @throws LDIFException If the data read could not be parsed as an entry.
1267 */
1268 public Entry readEntry()
1269 throws IOException, LDIFException
1270 {
1271 if (isAsync())
1272 {
1273 return readEntryAsync();
1274 }
1275 else
1276 {
1277 return readEntryInternal();
1278 }
1279 }
1280
1281
1282
1283 /**
1284 * Reads an LDIF change record from the LDIF source. The LDIF record must
1285 * have a changetype.
1286 *
1287 * @return The change record read from the LDIF source, or {@code null} if
1288 * there are no more records to be read.
1289 *
1290 * @throws IOException If a problem occurs while attempting to read from the
1291 * LDIF source.
1292 *
1293 * @throws LDIFException If the data read could not be parsed as an LDIF
1294 * change record.
1295 */
1296 public LDIFChangeRecord readChangeRecord()
1297 throws IOException, LDIFException
1298 {
1299 return readChangeRecord(false);
1300 }
1301
1302
1303
1304 /**
1305 * Reads an LDIF change record from the LDIF source. Optionally, if the LDIF
1306 * record does not have a changetype, then it may be assumed to be an add
1307 * change record.
1308 *
1309 * @param defaultAdd Indicates whether an LDIF record not containing a
1310 * changetype should be retrieved as an add change record.
1311 * If this is {@code false} and the record read does not
1312 * include a changetype, then an {@link LDIFException}
1313 * will be thrown.
1314 *
1315 * @return The change record read from the LDIF source, or {@code null} if
1316 * there are no more records to be read.
1317 *
1318 * @throws IOException If a problem occurs while attempting to read from the
1319 * LDIF source.
1320 *
1321 * @throws LDIFException If the data read could not be parsed as an LDIF
1322 * change record.
1323 */
1324 public LDIFChangeRecord readChangeRecord(final boolean defaultAdd)
1325 throws IOException, LDIFException
1326 {
1327 if (isAsync())
1328 {
1329 return readChangeRecordAsync(defaultAdd);
1330 }
1331 else
1332 {
1333 return readChangeRecordInternal(defaultAdd);
1334 }
1335 }
1336
1337
1338
1339 /**
1340 * Reads the next {@code LDIFRecord}, which was read and parsed by a different
1341 * thread.
1342 *
1343 * @return The next parsed record or {@code null} if there are no more
1344 * records to read.
1345 *
1346 * @throws IOException If IOException was thrown when reading or parsing
1347 * the record.
1348 *
1349 * @throws LDIFException If LDIFException was thrown parsing the record.
1350 */
1351 private LDIFRecord readLDIFRecordAsync()
1352 throws IOException, LDIFException
1353 {
1354 Result<UnparsedLDIFRecord, LDIFRecord> result = null;
1355 LDIFRecord record = null;
1356 while (record == null)
1357 {
1358 result = readLDIFRecordResultAsync();
1359 if (result == null)
1360 {
1361 return null;
1362 }
1363
1364 record = result.getOutput();
1365
1366 // This is a special value that means we should skip this Entry. We have
1367 // to use something different than null because null means EOF.
1368 if (record == SKIP_ENTRY)
1369 {
1370 record = null;
1371 }
1372 }
1373 return record;
1374 }
1375
1376
1377
1378 /**
1379 * Reads an entry asynchronously from the LDIF source.
1380 *
1381 * @return The entry read from the LDIF source, or {@code null} if there are
1382 * no more entries to be read.
1383 *
1384 * @throws IOException If a problem occurs while attempting to read from the
1385 * LDIF source.
1386 * @throws LDIFException If the data read could not be parsed as an entry.
1387 */
1388 private Entry readEntryAsync()
1389 throws IOException, LDIFException
1390 {
1391 Result<UnparsedLDIFRecord, LDIFRecord> result = null;
1392 LDIFRecord record = null;
1393 while (record == null)
1394 {
1395 result = readLDIFRecordResultAsync();
1396 if (result == null)
1397 {
1398 return null;
1399 }
1400
1401 record = result.getOutput();
1402
1403 // This is a special value that means we should skip this Entry. We have
1404 // to use something different than null because null means EOF.
1405 if (record == SKIP_ENTRY)
1406 {
1407 record = null;
1408 }
1409 }
1410
1411 if (record instanceof Entry)
1412 {
1413 return (Entry) record;
1414 }
1415 else if (record instanceof LDIFChangeRecord)
1416 {
1417 try
1418 {
1419 // Some LDIFChangeRecord can be converted to an Entry. This is really
1420 // an edge case though.
1421 return ((LDIFChangeRecord)record).toEntry();
1422 }
1423 catch (LDIFException e)
1424 {
1425 debugException(e);
1426 final long firstLineNumber = result.getInput().getFirstLineNumber();
1427 throw new LDIFException(e.getExceptionMessage(),
1428 firstLineNumber, true, e);
1429 }
1430 }
1431
1432 throw new AssertionError("LDIFRecords must either be an Entry or an " +
1433 "LDIFChangeRecord");
1434 }
1435
1436
1437
1438 /**
1439 * Reads an LDIF change record from the LDIF source asynchronously.
1440 * Optionally, if the LDIF record does not have a changetype, then it may be
1441 * assumed to be an add change record.
1442 *
1443 * @param defaultAdd Indicates whether an LDIF record not containing a
1444 * changetype should be retrieved as an add change record.
1445 * If this is {@code false} and the record read does not
1446 * include a changetype, then an {@link LDIFException} will
1447 * be thrown.
1448 *
1449 * @return The change record read from the LDIF source, or {@code null} if
1450 * there are no more records to be read.
1451 *
1452 * @throws IOException If a problem occurs while attempting to read from the
1453 * LDIF source.
1454 * @throws LDIFException If the data read could not be parsed as an LDIF
1455 * change record.
1456 */
1457 private LDIFChangeRecord readChangeRecordAsync(final boolean defaultAdd)
1458 throws IOException, LDIFException
1459 {
1460 Result<UnparsedLDIFRecord, LDIFRecord> result = null;
1461 LDIFRecord record = null;
1462 while (record == null)
1463 {
1464 result = readLDIFRecordResultAsync();
1465 if (result == null)
1466 {
1467 return null;
1468 }
1469
1470 record = result.getOutput();
1471
1472 // This is a special value that means we should skip this Entry. We have
1473 // to use something different than null because null means EOF.
1474 if (record == SKIP_ENTRY)
1475 {
1476 record = null;
1477 }
1478 }
1479
1480 if (record instanceof LDIFChangeRecord)
1481 {
1482 return (LDIFChangeRecord) record;
1483 }
1484 else if (record instanceof Entry)
1485 {
1486 if (defaultAdd)
1487 {
1488 return new LDIFAddChangeRecord((Entry) record);
1489 }
1490 else
1491 {
1492 final long firstLineNumber = result.getInput().getFirstLineNumber();
1493 throw new LDIFException(
1494 ERR_READ_NOT_CHANGE_RECORD.get(firstLineNumber), firstLineNumber,
1495 true);
1496 }
1497 }
1498
1499 throw new AssertionError("LDIFRecords must either be an Entry or an " +
1500 "LDIFChangeRecord");
1501 }
1502
1503
1504
1505 /**
1506 * Reads the next LDIF record, which was read and parsed asynchronously by
1507 * separate threads.
1508 *
1509 * @return The next LDIF record or {@code null} if there are no more records.
1510 *
1511 * @throws IOException If a problem occurs while attempting to read from the
1512 * LDIF source.
1513 *
1514 * @throws LDIFException If the data read could not be parsed as an entry.
1515 */
1516 private Result<UnparsedLDIFRecord, LDIFRecord> readLDIFRecordResultAsync()
1517 throws IOException, LDIFException
1518 {
1519 Result<UnparsedLDIFRecord, LDIFRecord> result = null;
1520
1521 // If the asynchronous reading and parsing is complete, then we don't have
1522 // to block waiting for the next record to show up on the queue. If there
1523 // isn't a record there, then return null (EOF) right away.
1524 if (asyncParsingComplete.get())
1525 {
1526 result = asyncParsedRecords.poll();
1527 }
1528 else
1529 {
1530 try
1531 {
1532 // We probably could just do a asyncParsedRecords.take() here, but
1533 // there are some edge case error scenarios where
1534 // asyncParsingComplete might be set without a special EOF sentinel
1535 // Result enqueued. So to guard against this, we have a very cautious
1536 // polling interval of 1 second. During normal processing, we never
1537 // have to wait for this to expire, when there is something to do
1538 // (like shutdown).
1539 while ((result == null) && (!asyncParsingComplete.get()))
1540 {
1541 result = asyncParsedRecords.poll(1, TimeUnit.SECONDS);
1542 }
1543
1544 // There's a very small chance that we missed the value, so double-check
1545 if (result == null)
1546 {
1547 result = asyncParsedRecords.poll();
1548 }
1549 }
1550 catch (InterruptedException e)
1551 {
1552 debugException(e);
1553 throw createIOExceptionWithCause(null, e);
1554 }
1555 }
1556 if (result == null)
1557 {
1558 return null;
1559 }
1560
1561 rethrow(result.getFailureCause());
1562
1563 // Check if we reached the end of the input
1564 final UnparsedLDIFRecord unparsedRecord = result.getInput();
1565 if (unparsedRecord.isEOF())
1566 {
1567 // This might have been set already by the LineReaderThread, but
1568 // just in case it hasn't gotten to it yet, do so here.
1569 asyncParsingComplete.set(true);
1570
1571 // Enqueue this EOF result again for any other thread that might be
1572 // blocked in asyncParsedRecords.take() even though having multiple
1573 // threads call this method concurrently breaks the contract of this
1574 // class.
1575 try
1576 {
1577 asyncParsedRecords.put(result);
1578 }
1579 catch (InterruptedException e)
1580 {
1581 // We shouldn't ever get interrupted because the put won't ever block.
1582 // Once we are done reading, this is the only item left in the queue,
1583 // so we should always be able to re-enqueue it.
1584 debugException(e);
1585 }
1586 return null;
1587 }
1588
1589 return result;
1590 }
1591
1592
1593
1594 /**
1595 * Indicates whether this LDIF reader was constructed to perform asynchronous
1596 * processing.
1597 *
1598 * @return {@code true} if this LDIFReader was constructed to perform
1599 * asynchronous processing, or {@code false} if not.
1600 */
1601 private boolean isAsync()
1602 {
1603 return isAsync;
1604 }
1605
1606
1607
1608 /**
1609 * If not {@code null}, rethrows the specified Throwable as either an
1610 * IOException or LDIFException.
1611 *
1612 * @param t The exception to rethrow. If it's {@code null}, then nothing
1613 * is thrown.
1614 *
1615 * @throws IOException If t is an IOException or a checked Exception that
1616 * is not an LDIFException.
1617 * @throws LDIFException If t is an LDIFException.
1618 */
1619 static void rethrow(final Throwable t)
1620 throws IOException, LDIFException
1621 {
1622 if (t == null)
1623 {
1624 return;
1625 }
1626
1627 if (t instanceof IOException)
1628 {
1629 throw (IOException) t;
1630 }
1631 else if (t instanceof LDIFException)
1632 {
1633 throw (LDIFException) t;
1634 }
1635 else if (t instanceof RuntimeException)
1636 {
1637 throw (RuntimeException) t;
1638 }
1639 else if (t instanceof Error)
1640 {
1641 throw (Error) t;
1642 }
1643 else
1644 {
1645 throw createIOExceptionWithCause(null, t);
1646 }
1647 }
1648
1649
1650
1651 /**
1652 * Reads a record from the LDIF source. It may be either an entry or an LDIF
1653 * change record.
1654 *
1655 * @return The record read from the LDIF source, or {@code null} if there are
1656 * no more entries to be read.
1657 *
1658 * @throws IOException If a problem occurs while trying to read from the
1659 * LDIF source.
1660 * @throws LDIFException If the data read could not be parsed as an entry or
1661 * an LDIF change record.
1662 */
1663 private LDIFRecord readLDIFRecordInternal()
1664 throws IOException, LDIFException
1665 {
1666 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord();
1667 return decodeRecord(unparsedRecord, relativeBasePath, schema);
1668 }
1669
1670
1671
1672 /**
1673 * Reads an entry from the LDIF source.
1674 *
1675 * @return The entry read from the LDIF source, or {@code null} if there are
1676 * no more entries to be read.
1677 *
1678 * @throws IOException If a problem occurs while attempting to read from the
1679 * LDIF source.
1680 * @throws LDIFException If the data read could not be parsed as an entry.
1681 */
1682 private Entry readEntryInternal()
1683 throws IOException, LDIFException
1684 {
1685 Entry e = null;
1686 while (e == null)
1687 {
1688 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord();
1689 if (unparsedRecord.isEOF())
1690 {
1691 return null;
1692 }
1693
1694 e = decodeEntry(unparsedRecord, relativeBasePath);
1695 debugLDIFRead(e);
1696
1697 if (entryTranslator != null)
1698 {
1699 e = entryTranslator.translate(e, unparsedRecord.getFirstLineNumber());
1700 }
1701 }
1702 return e;
1703 }
1704
1705
1706
1707 /**
1708 * Reads an LDIF change record from the LDIF source. Optionally, if the LDIF
1709 * record does not have a changetype, then it may be assumed to be an add
1710 * change record.
1711 *
1712 * @param defaultAdd Indicates whether an LDIF record not containing a
1713 * changetype should be retrieved as an add change record.
1714 * If this is {@code false} and the record read does not
1715 * include a changetype, then an {@link LDIFException} will
1716 * be thrown.
1717 *
1718 * @return The change record read from the LDIF source, or {@code null} if
1719 * there are no more records to be read.
1720 *
1721 * @throws IOException If a problem occurs while attempting to read from the
1722 * LDIF source.
1723 * @throws LDIFException If the data read could not be parsed as an LDIF
1724 * change record.
1725 */
1726 private LDIFChangeRecord readChangeRecordInternal(final boolean defaultAdd)
1727 throws IOException, LDIFException
1728 {
1729 LDIFChangeRecord r = null;
1730 while (r == null)
1731 {
1732 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord();
1733 if (unparsedRecord.isEOF())
1734 {
1735 return null;
1736 }
1737
1738 r = decodeChangeRecord(unparsedRecord, relativeBasePath, defaultAdd,
1739 schema);
1740 debugLDIFRead(r);
1741
1742 if (changeRecordTranslator != null)
1743 {
1744 r = changeRecordTranslator.translate(r,
1745 unparsedRecord.getFirstLineNumber());
1746 }
1747 }
1748 return r;
1749 }
1750
1751
1752
1753 /**
1754 * Reads a record (either an entry or a change record) from the LDIF source
1755 * and places it in the line list.
1756 *
1757 * @return The line number for the first line of the entry that was read.
1758 *
1759 * @throws IOException If a problem occurs while attempting to read from the
1760 * LDIF source.
1761 *
1762 * @throws LDIFException If the data read could not be parsed as a valid
1763 * LDIF record.
1764 */
1765 private UnparsedLDIFRecord readUnparsedRecord()
1766 throws IOException, LDIFException
1767 {
1768 final ArrayList<StringBuilder> lineList = new ArrayList<StringBuilder>(20);
1769 boolean lastWasComment = false;
1770 long firstLineNumber = lineNumberCounter + 1;
1771 while (true)
1772 {
1773 final String line = reader.readLine();
1774 lineNumberCounter++;
1775
1776 if (line == null)
1777 {
1778 // We've hit the end of the LDIF source. If we haven't read any entry
1779 // data, then return null. Otherwise, the last entry wasn't followed by
1780 // a blank line, which is OK, and we should decode that entry.
1781 if (lineList.isEmpty())
1782 {
1783 return new UnparsedLDIFRecord(new ArrayList<StringBuilder>(0),
1784 duplicateValueBehavior, trailingSpaceBehavior, schema, -1);
1785 }
1786 else
1787 {
1788 break;
1789 }
1790 }
1791
1792 if (line.length() == 0)
1793 {
1794 // It's a blank line. If we have read entry data, then this signals the
1795 // end of the entry. Otherwise, it's an extra space between entries,
1796 // which is OK.
1797 lastWasComment = false;
1798 if (lineList.isEmpty())
1799 {
1800 firstLineNumber++;
1801 continue;
1802 }
1803 else
1804 {
1805 break;
1806 }
1807 }
1808
1809 if (line.charAt(0) == ' ')
1810 {
1811 // The line starts with a space, which means that it must be a
1812 // continuation of the previous line. This is true even if the last
1813 // line was a comment.
1814 if (lastWasComment)
1815 {
1816 // What we've read is part of a comment, so we don't care about its
1817 // content.
1818 }
1819 else if (lineList.isEmpty())
1820 {
1821 throw new LDIFException(
1822 ERR_READ_UNEXPECTED_FIRST_SPACE.get(lineNumberCounter),
1823 lineNumberCounter, false);
1824 }
1825 else
1826 {
1827 lineList.get(lineList.size() - 1).append(line.substring(1));
1828 lastWasComment = false;
1829 }
1830 }
1831 else if (line.charAt(0) == '#')
1832 {
1833 lastWasComment = true;
1834 }
1835 else
1836 {
1837 // We want to make sure that we skip over the "version:" line if it
1838 // exists, but that should only occur at the beginning of an entry where
1839 // it can't be confused with a possible "version" attribute.
1840 if (lineList.isEmpty() && line.startsWith("version:"))
1841 {
1842 lastWasComment = true;
1843 }
1844 else
1845 {
1846 lineList.add(new StringBuilder(line));
1847 lastWasComment = false;
1848 }
1849 }
1850 }
1851
1852 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior,
1853 trailingSpaceBehavior, schema, firstLineNumber);
1854 }
1855
1856
1857
1858 /**
1859 * Decodes the provided set of LDIF lines as an entry. The provided set of
1860 * lines must contain exactly one entry. Long lines may be wrapped as per the
1861 * LDIF specification, and it is acceptable to have one or more blank lines
1862 * following the entry. A default trailing space behavior of
1863 * {@link TrailingSpaceBehavior#REJECT} will be used.
1864 *
1865 * @param ldifLines The set of lines that comprise the LDIF representation
1866 * of the entry. It must not be {@code null} or empty.
1867 *
1868 * @return The entry read from LDIF.
1869 *
1870 * @throws LDIFException If the provided LDIF data cannot be decoded as an
1871 * entry.
1872 */
1873 public static Entry decodeEntry(final String... ldifLines)
1874 throws LDIFException
1875 {
1876 final Entry e = decodeEntry(prepareRecord(DuplicateValueBehavior.STRIP,
1877 TrailingSpaceBehavior.REJECT, null, ldifLines),
1878 DEFAULT_RELATIVE_BASE_PATH);
1879 debugLDIFRead(e);
1880 return e;
1881 }
1882
1883
1884
1885 /**
1886 * Decodes the provided set of LDIF lines as an entry. The provided set of
1887 * lines must contain exactly one entry. Long lines may be wrapped as per the
1888 * LDIF specification, and it is acceptable to have one or more blank lines
1889 * following the entry. A default trailing space behavior of
1890 * {@link TrailingSpaceBehavior#REJECT} will be used.
1891 *
1892 * @param ignoreDuplicateValues Indicates whether to ignore duplicate
1893 * attribute values encountered while parsing.
1894 * @param schema The schema to use when parsing the record,
1895 * if applicable.
1896 * @param ldifLines The set of lines that comprise the LDIF
1897 * representation of the entry. It must not be
1898 * {@code null} or empty.
1899 *
1900 * @return The entry read from LDIF.
1901 *
1902 * @throws LDIFException If the provided LDIF data cannot be decoded as an
1903 * entry.
1904 */
1905 public static Entry decodeEntry(final boolean ignoreDuplicateValues,
1906 final Schema schema,
1907 final String... ldifLines)
1908 throws LDIFException
1909 {
1910 return decodeEntry(ignoreDuplicateValues, TrailingSpaceBehavior.REJECT,
1911 schema, ldifLines);
1912 }
1913
1914
1915
1916 /**
1917 * Decodes the provided set of LDIF lines as an entry. The provided set of
1918 * lines must contain exactly one entry. Long lines may be wrapped as per the
1919 * LDIF specification, and it is acceptable to have one or more blank lines
1920 * following the entry.
1921 *
1922 * @param ignoreDuplicateValues Indicates whether to ignore duplicate
1923 * attribute values encountered while parsing.
1924 * @param trailingSpaceBehavior The behavior that should be exhibited when
1925 * encountering attribute values which are not
1926 * base64-encoded but contain trailing spaces.
1927 * It must not be {@code null}.
1928 * @param schema The schema to use when parsing the record,
1929 * if applicable.
1930 * @param ldifLines The set of lines that comprise the LDIF
1931 * representation of the entry. It must not be
1932 * {@code null} or empty.
1933 *
1934 * @return The entry read from LDIF.
1935 *
1936 * @throws LDIFException If the provided LDIF data cannot be decoded as an
1937 * entry.
1938 */
1939 public static Entry decodeEntry(
1940 final boolean ignoreDuplicateValues,
1941 final TrailingSpaceBehavior trailingSpaceBehavior,
1942 final Schema schema,
1943 final String... ldifLines) throws LDIFException
1944 {
1945 final Entry e = decodeEntry(prepareRecord(
1946 (ignoreDuplicateValues
1947 ? DuplicateValueBehavior.STRIP
1948 : DuplicateValueBehavior.REJECT),
1949 trailingSpaceBehavior, schema, ldifLines),
1950 DEFAULT_RELATIVE_BASE_PATH);
1951 debugLDIFRead(e);
1952 return e;
1953 }
1954
1955
1956
1957 /**
1958 * Decodes the provided set of LDIF lines as an LDIF change record. The
1959 * provided set of lines must contain exactly one change record and it must
1960 * include a changetype. Long lines may be wrapped as per the LDIF
1961 * specification, and it is acceptable to have one or more blank lines
1962 * following the entry.
1963 *
1964 * @param ldifLines The set of lines that comprise the LDIF representation
1965 * of the change record. It must not be {@code null} or
1966 * empty.
1967 *
1968 * @return The change record read from LDIF.
1969 *
1970 * @throws LDIFException If the provided LDIF data cannot be decoded as a
1971 * change record.
1972 */
1973 public static LDIFChangeRecord decodeChangeRecord(final String... ldifLines)
1974 throws LDIFException
1975 {
1976 return decodeChangeRecord(false, ldifLines);
1977 }
1978
1979
1980
1981 /**
1982 * Decodes the provided set of LDIF lines as an LDIF change record. The
1983 * provided set of lines must contain exactly one change record. Long lines
1984 * may be wrapped as per the LDIF specification, and it is acceptable to have
1985 * one or more blank lines following the entry.
1986 *
1987 * @param defaultAdd Indicates whether an LDIF record not containing a
1988 * changetype should be retrieved as an add change record.
1989 * If this is {@code false} and the record read does not
1990 * include a changetype, then an {@link LDIFException}
1991 * will be thrown.
1992 * @param ldifLines The set of lines that comprise the LDIF representation
1993 * of the change record. It must not be {@code null} or
1994 * empty.
1995 *
1996 * @return The change record read from LDIF.
1997 *
1998 * @throws LDIFException If the provided LDIF data cannot be decoded as a
1999 * change record.
2000 */
2001 public static LDIFChangeRecord decodeChangeRecord(final boolean defaultAdd,
2002 final String... ldifLines)
2003 throws LDIFException
2004 {
2005 final LDIFChangeRecord r =
2006 decodeChangeRecord(
2007 prepareRecord(DuplicateValueBehavior.STRIP,
2008 TrailingSpaceBehavior.REJECT, null, ldifLines),
2009 DEFAULT_RELATIVE_BASE_PATH, defaultAdd, null);
2010 debugLDIFRead(r);
2011 return r;
2012 }
2013
2014
2015
2016 /**
2017 * Decodes the provided set of LDIF lines as an LDIF change record. The
2018 * provided set of lines must contain exactly one change record. Long lines
2019 * may be wrapped as per the LDIF specification, and it is acceptable to have
2020 * one or more blank lines following the entry.
2021 *
2022 * @param ignoreDuplicateValues Indicates whether to ignore duplicate
2023 * attribute values encountered while parsing.
2024 * @param schema The schema to use when processing the change
2025 * record, or {@code null} if no schema should
2026 * be used and all values should be treated as
2027 * case-insensitive strings.
2028 * @param defaultAdd Indicates whether an LDIF record not
2029 * containing a changetype should be retrieved
2030 * as an add change record. If this is
2031 * {@code false} and the record read does not
2032 * include a changetype, then an
2033 * {@link LDIFException} will be thrown.
2034 * @param ldifLines The set of lines that comprise the LDIF
2035 * representation of the change record. It
2036 * must not be {@code null} or empty.
2037 *
2038 * @return The change record read from LDIF.
2039 *
2040 * @throws LDIFException If the provided LDIF data cannot be decoded as a
2041 * change record.
2042 */
2043 public static LDIFChangeRecord decodeChangeRecord(
2044 final boolean ignoreDuplicateValues,
2045 final Schema schema,
2046 final boolean defaultAdd,
2047 final String... ldifLines)
2048 throws LDIFException
2049 {
2050 return decodeChangeRecord(ignoreDuplicateValues,
2051 TrailingSpaceBehavior.REJECT, schema, defaultAdd, ldifLines);
2052 }
2053
2054
2055
2056 /**
2057 * Decodes the provided set of LDIF lines as an LDIF change record. The
2058 * provided set of lines must contain exactly one change record. Long lines
2059 * may be wrapped as per the LDIF specification, and it is acceptable to have
2060 * one or more blank lines following the entry.
2061 *
2062 * @param ignoreDuplicateValues Indicates whether to ignore duplicate
2063 * attribute values encountered while parsing.
2064 * @param trailingSpaceBehavior The behavior that should be exhibited when
2065 * encountering attribute values which are not
2066 * base64-encoded but contain trailing spaces.
2067 * It must not be {@code null}.
2068 * @param schema The schema to use when processing the change
2069 * record, or {@code null} if no schema should
2070 * be used and all values should be treated as
2071 * case-insensitive strings.
2072 * @param defaultAdd Indicates whether an LDIF record not
2073 * containing a changetype should be retrieved
2074 * as an add change record. If this is
2075 * {@code false} and the record read does not
2076 * include a changetype, then an
2077 * {@link LDIFException} will be thrown.
2078 * @param ldifLines The set of lines that comprise the LDIF
2079 * representation of the change record. It
2080 * must not be {@code null} or empty.
2081 *
2082 * @return The change record read from LDIF.
2083 *
2084 * @throws LDIFException If the provided LDIF data cannot be decoded as a
2085 * change record.
2086 */
2087 public static LDIFChangeRecord decodeChangeRecord(
2088 final boolean ignoreDuplicateValues,
2089 final TrailingSpaceBehavior trailingSpaceBehavior,
2090 final Schema schema,
2091 final boolean defaultAdd,
2092 final String... ldifLines)
2093 throws LDIFException
2094 {
2095 final LDIFChangeRecord r = decodeChangeRecord(
2096 prepareRecord(
2097 (ignoreDuplicateValues
2098 ? DuplicateValueBehavior.STRIP
2099 : DuplicateValueBehavior.REJECT),
2100 trailingSpaceBehavior, schema, ldifLines),
2101 DEFAULT_RELATIVE_BASE_PATH, defaultAdd, null);
2102 debugLDIFRead(r);
2103 return r;
2104 }
2105
2106
2107
2108 /**
2109 * Parses the provided set of lines into a list of {@code StringBuilder}
2110 * objects suitable for decoding into an entry or LDIF change record.
2111 * Comments will be ignored and wrapped lines will be unwrapped.
2112 *
2113 * @param duplicateValueBehavior The behavior that should be exhibited if
2114 * the LDIF reader encounters an entry with
2115 * duplicate values.
2116 * @param trailingSpaceBehavior The behavior that should be exhibited when
2117 * encountering attribute values which are not
2118 * base64-encoded but contain trailing spaces.
2119 * @param schema The schema to use when parsing the record,
2120 * if applicable.
2121 * @param ldifLines The set of lines that comprise the record
2122 * to decode. It must not be {@code null} or
2123 * empty.
2124 *
2125 * @return The prepared list of {@code StringBuilder} objects ready to be
2126 * decoded.
2127 *
2128 * @throws LDIFException If the provided lines do not contain valid LDIF
2129 * content.
2130 */
2131 private static UnparsedLDIFRecord prepareRecord(
2132 final DuplicateValueBehavior duplicateValueBehavior,
2133 final TrailingSpaceBehavior trailingSpaceBehavior,
2134 final Schema schema, final String... ldifLines)
2135 throws LDIFException
2136 {
2137 ensureNotNull(ldifLines);
2138 ensureFalse(ldifLines.length == 0,
2139 "LDIFReader.prepareRecord.ldifLines must not be empty.");
2140
2141 boolean lastWasComment = false;
2142 final ArrayList<StringBuilder> lineList =
2143 new ArrayList<StringBuilder>(ldifLines.length);
2144 for (int i=0; i < ldifLines.length; i++)
2145 {
2146 final String line = ldifLines[i];
2147 if (line.length() == 0)
2148 {
2149 // This is only acceptable if there are no more non-empty lines in the
2150 // array.
2151 for (int j=i+1; j < ldifLines.length; j++)
2152 {
2153 if (ldifLines[j].length() > 0)
2154 {
2155 throw new LDIFException(ERR_READ_UNEXPECTED_BLANK.get(i), i, true,
2156 ldifLines, null);
2157 }
2158
2159 // If we've gotten here, then we know that we're at the end of the
2160 // entry. If we have read data, then we can decode it as an entry.
2161 // Otherwise, there was no real data in the provided LDIF lines.
2162 if (lineList.isEmpty())
2163 {
2164 throw new LDIFException(ERR_READ_ONLY_BLANKS.get(), 0, true,
2165 ldifLines, null);
2166 }
2167 else
2168 {
2169 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior,
2170 trailingSpaceBehavior, schema, 0);
2171 }
2172 }
2173 }
2174
2175 if (line.charAt(0) == ' ')
2176 {
2177 if (i > 0)
2178 {
2179 if (! lastWasComment)
2180 {
2181 lineList.get(lineList.size() - 1).append(line.substring(1));
2182 }
2183 }
2184 else
2185 {
2186 throw new LDIFException(
2187 ERR_READ_UNEXPECTED_FIRST_SPACE_NO_NUMBER.get(), 0,
2188 true, ldifLines, null);
2189 }
2190 }
2191 else if (line.charAt(0) == '#')
2192 {
2193 lastWasComment = true;
2194 }
2195 else
2196 {
2197 lineList.add(new StringBuilder(line));
2198 lastWasComment = false;
2199 }
2200 }
2201
2202 if (lineList.isEmpty())
2203 {
2204 throw new LDIFException(ERR_READ_NO_DATA.get(), 0, true, ldifLines, null);
2205 }
2206 else
2207 {
2208 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior,
2209 trailingSpaceBehavior, schema, 0);
2210 }
2211 }
2212
2213
2214
2215 /**
2216 * Decodes the unparsed record that was read from the LDIF source. It may be
2217 * either an entry or an LDIF change record.
2218 *
2219 * @param unparsedRecord The unparsed LDIF record that was read from the
2220 * input. It must not be {@code null} or empty.
2221 * @param relativeBasePath The base path that will be prepended to relative
2222 * paths in order to obtain an absolute path.
2223 * @param schema The schema to use when parsing.
2224 *
2225 * @return The parsed record, or {@code null} if there are no more entries to
2226 * be read.
2227 *
2228 * @throws LDIFException If the data read could not be parsed as an entry or
2229 * an LDIF change record.
2230 */
2231 private static LDIFRecord decodeRecord(
2232 final UnparsedLDIFRecord unparsedRecord,
2233 final String relativeBasePath,
2234 final Schema schema)
2235 throws LDIFException
2236 {
2237 // If there was an error reading from the input, then we rethrow it here.
2238 final Exception readError = unparsedRecord.getFailureCause();
2239 if (readError != null)
2240 {
2241 if (readError instanceof LDIFException)
2242 {
2243 // If the error was an LDIFException, which will normally be the case,
2244 // then rethrow it with all of the same state. We could just
2245 // throw (LDIFException) readError;
2246 // but that's considered bad form.
2247 final LDIFException ldifEx = (LDIFException) readError;
2248 throw new LDIFException(ldifEx.getMessage(),
2249 ldifEx.getLineNumber(),
2250 ldifEx.mayContinueReading(),
2251 ldifEx.getDataLines(),
2252 ldifEx.getCause());
2253 }
2254 else
2255 {
2256 throw new LDIFException(getExceptionMessage(readError),
2257 -1, true, readError);
2258 }
2259 }
2260
2261 if (unparsedRecord.isEOF())
2262 {
2263 return null;
2264 }
2265
2266 final ArrayList<StringBuilder> lineList = unparsedRecord.getLineList();
2267 if (unparsedRecord.getLineList() == null)
2268 {
2269 return null; // We can get here if there was an error reading the lines.
2270 }
2271
2272 final LDIFRecord r;
2273 if (lineList.size() == 1)
2274 {
2275 r = decodeEntry(unparsedRecord, relativeBasePath);
2276 }
2277 else
2278 {
2279 final String lowerSecondLine = toLowerCase(lineList.get(1).toString());
2280 if (lowerSecondLine.startsWith("control:") ||
2281 lowerSecondLine.startsWith("changetype:"))
2282 {
2283 r = decodeChangeRecord(unparsedRecord, relativeBasePath, true, schema);
2284 }
2285 else
2286 {
2287 r = decodeEntry(unparsedRecord, relativeBasePath);
2288 }
2289 }
2290
2291 debugLDIFRead(r);
2292 return r;
2293 }
2294
2295
2296
2297 /**
2298 * Decodes the provided set of LDIF lines as an entry. The provided list must
2299 * not contain any blank lines or comments, and lines are not allowed to be
2300 * wrapped.
2301 *
2302 * @param unparsedRecord The unparsed LDIF record that was read from the
2303 * input. It must not be {@code null} or empty.
2304 * @param relativeBasePath The base path that will be prepended to relative
2305 * paths in order to obtain an absolute path.
2306 *
2307 * @return The entry read from LDIF.
2308 *
2309 * @throws LDIFException If the provided LDIF data cannot be read as an
2310 * entry.
2311 */
2312 private static Entry decodeEntry(final UnparsedLDIFRecord unparsedRecord,
2313 final String relativeBasePath)
2314 throws LDIFException
2315 {
2316 final ArrayList<StringBuilder> ldifLines = unparsedRecord.getLineList();
2317 final long firstLineNumber = unparsedRecord.getFirstLineNumber();
2318
2319 final Iterator<StringBuilder> iterator = ldifLines.iterator();
2320
2321 // The first line must start with either "version:" or "dn:". If the first
2322 // line starts with "version:" then the second must start with "dn:".
2323 StringBuilder line = iterator.next();
2324 handleTrailingSpaces(line, null, firstLineNumber,
2325 unparsedRecord.getTrailingSpaceBehavior());
2326 int colonPos = line.indexOf(":");
2327 if ((colonPos > 0) &&
2328 line.substring(0, colonPos).equalsIgnoreCase("version"))
2329 {
2330 // The first line is "version:". Under most conditions, this will be
2331 // handled by the LDIF reader, but this can happen if you call
2332 // decodeEntry with a set of data that includes a version. At any rate,
2333 // read the next line, which must specify the DN.
2334 line = iterator.next();
2335 handleTrailingSpaces(line, null, firstLineNumber,
2336 unparsedRecord.getTrailingSpaceBehavior());
2337 }
2338
2339 colonPos = line.indexOf(":");
2340 if ((colonPos < 0) ||
2341 (! line.substring(0, colonPos).equalsIgnoreCase("dn")))
2342 {
2343 throw new LDIFException(
2344 ERR_READ_DN_LINE_DOESNT_START_WITH_DN.get(firstLineNumber),
2345 firstLineNumber, true, ldifLines, null);
2346 }
2347
2348 final String dn;
2349 final int length = line.length();
2350 if (length == (colonPos+1))
2351 {
2352 // The colon was the last character on the line. This is acceptable and
2353 // indicates that the entry has the null DN.
2354 dn = "";
2355 }
2356 else if (line.charAt(colonPos+1) == ':')
2357 {
2358 // Skip over any spaces leading up to the value, and then the rest of the
2359 // string is the base64-encoded DN.
2360 int pos = colonPos+2;
2361 while ((pos < length) && (line.charAt(pos) == ' '))
2362 {
2363 pos++;
2364 }
2365
2366 try
2367 {
2368 final byte[] dnBytes = Base64.decode(line.substring(pos));
2369 dn = new String(dnBytes, "UTF-8");
2370 }
2371 catch (final ParseException pe)
2372 {
2373 debugException(pe);
2374 throw new LDIFException(
2375 ERR_READ_CANNOT_BASE64_DECODE_DN.get(firstLineNumber,
2376 pe.getMessage()),
2377 firstLineNumber, true, ldifLines, pe);
2378 }
2379 catch (final Exception e)
2380 {
2381 debugException(e);
2382 throw new LDIFException(
2383 ERR_READ_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, e),
2384 firstLineNumber, true, ldifLines, e);
2385 }
2386 }
2387 else
2388 {
2389 // Skip over any spaces leading up to the value, and then the rest of the
2390 // string is the DN.
2391 int pos = colonPos+1;
2392 while ((pos < length) && (line.charAt(pos) == ' '))
2393 {
2394 pos++;
2395 }
2396
2397 dn = line.substring(pos);
2398 }
2399
2400
2401 // The remaining lines must be the attributes for the entry. However, we
2402 // will allow the case in which an entry does not have any attributes, to be
2403 // able to support reading search result entries in which no attributes were
2404 // returned.
2405 if (! iterator.hasNext())
2406 {
2407 return new Entry(dn, unparsedRecord.getSchema());
2408 }
2409
2410 return new Entry(dn, unparsedRecord.getSchema(),
2411 parseAttributes(dn, unparsedRecord.getDuplicateValueBehavior(),
2412 unparsedRecord.getTrailingSpaceBehavior(),
2413 unparsedRecord.getSchema(), ldifLines, iterator, relativeBasePath,
2414 firstLineNumber));
2415 }
2416
2417
2418
2419 /**
2420 * Decodes the provided set of LDIF lines as a change record. The provided
2421 * list must not contain any blank lines or comments, and lines are not
2422 * allowed to be wrapped.
2423 *
2424 * @param unparsedRecord The unparsed LDIF record that was read from the
2425 * input. It must not be {@code null} or empty.
2426 * @param relativeBasePath The base path that will be prepended to relative
2427 * paths in order to obtain an absolute path.
2428 * @param defaultAdd Indicates whether an LDIF record not containing a
2429 * changetype should be retrieved as an add change
2430 * record. If this is {@code false} and the record
2431 * read does not include a changetype, then an
2432 * {@link LDIFException} will be thrown.
2433 * @param schema The schema to use in parsing.
2434 *
2435 * @return The change record read from LDIF.
2436 *
2437 * @throws LDIFException If the provided LDIF data cannot be decoded as a
2438 * change record.
2439 */
2440 private static LDIFChangeRecord decodeChangeRecord(
2441 final UnparsedLDIFRecord unparsedRecord,
2442 final String relativeBasePath,
2443 final boolean defaultAdd,
2444 final Schema schema)
2445 throws LDIFException
2446 {
2447 final ArrayList<StringBuilder> ldifLines = unparsedRecord.getLineList();
2448 final long firstLineNumber = unparsedRecord.getFirstLineNumber();
2449
2450 Iterator<StringBuilder> iterator = ldifLines.iterator();
2451
2452 // The first line must start with either "version:" or "dn:". If the first
2453 // line starts with "version:" then the second must start with "dn:".
2454 StringBuilder line = iterator.next();
2455 handleTrailingSpaces(line, null, firstLineNumber,
2456 unparsedRecord.getTrailingSpaceBehavior());
2457 int colonPos = line.indexOf(":");
2458 int linesRead = 1;
2459 if ((colonPos > 0) &&
2460 line.substring(0, colonPos).equalsIgnoreCase("version"))
2461 {
2462 // The first line is "version:". Under most conditions, this will be
2463 // handled by the LDIF reader, but this can happen if you call
2464 // decodeEntry with a set of data that includes a version. At any rate,
2465 // read the next line, which must specify the DN.
2466 line = iterator.next();
2467 linesRead++;
2468 handleTrailingSpaces(line, null, firstLineNumber,
2469 unparsedRecord.getTrailingSpaceBehavior());
2470 }
2471
2472 colonPos = line.indexOf(":");
2473 if ((colonPos < 0) ||
2474 (! line.substring(0, colonPos).equalsIgnoreCase("dn")))
2475 {
2476 throw new LDIFException(
2477 ERR_READ_DN_LINE_DOESNT_START_WITH_DN.get(firstLineNumber),
2478 firstLineNumber, true, ldifLines, null);
2479 }
2480
2481 final String dn;
2482 int length = line.length();
2483 if (length == (colonPos+1))
2484 {
2485 // The colon was the last character on the line. This is acceptable and
2486 // indicates that the entry has the null DN.
2487 dn = "";
2488 }
2489 else if (line.charAt(colonPos+1) == ':')
2490 {
2491 // Skip over any spaces leading up to the value, and then the rest of the
2492 // string is the base64-encoded DN.
2493 int pos = colonPos+2;
2494 while ((pos < length) && (line.charAt(pos) == ' '))
2495 {
2496 pos++;
2497 }
2498
2499 try
2500 {
2501 final byte[] dnBytes = Base64.decode(line.substring(pos));
2502 dn = new String(dnBytes, "UTF-8");
2503 }
2504 catch (final ParseException pe)
2505 {
2506 debugException(pe);
2507 throw new LDIFException(
2508 ERR_READ_CR_CANNOT_BASE64_DECODE_DN.get(firstLineNumber,
2509 pe.getMessage()),
2510 firstLineNumber, true, ldifLines, pe);
2511 }
2512 catch (final Exception e)
2513 {
2514 debugException(e);
2515 throw new LDIFException(
2516 ERR_READ_CR_CANNOT_BASE64_DECODE_DN.get(firstLineNumber,
2517 e),
2518 firstLineNumber, true, ldifLines, e);
2519 }
2520 }
2521 else
2522 {
2523 // Skip over any spaces leading up to the value, and then the rest of the
2524 // string is the DN.
2525 int pos = colonPos+1;
2526 while ((pos < length) && (line.charAt(pos) == ' '))
2527 {
2528 pos++;
2529 }
2530
2531 dn = line.substring(pos);
2532 }
2533
2534
2535 // An LDIF change record may contain zero or more controls, with the end of
2536 // the controls signified by the changetype. The changetype element must be
2537 // present, unless defaultAdd is true in which case the first thing that is
2538 // neither control or changetype will trigger the start of add attribute
2539 // parsing.
2540 if (! iterator.hasNext())
2541 {
2542 throw new LDIFException(ERR_READ_CR_TOO_SHORT.get(firstLineNumber),
2543 firstLineNumber, true, ldifLines, null);
2544 }
2545
2546 String changeType = null;
2547 ArrayList<Control> controls = null;
2548 while (true)
2549 {
2550 line = iterator.next();
2551 handleTrailingSpaces(line, dn, firstLineNumber,
2552 unparsedRecord.getTrailingSpaceBehavior());
2553 colonPos = line.indexOf(":");
2554 if (colonPos < 0)
2555 {
2556 throw new LDIFException(
2557 ERR_READ_CR_SECOND_LINE_MISSING_COLON.get(firstLineNumber),
2558 firstLineNumber, true, ldifLines, null);
2559 }
2560
2561 final String token = toLowerCase(line.substring(0, colonPos));
2562 if (token.equals("control"))
2563 {
2564 if (controls == null)
2565 {
2566 controls = new ArrayList<Control>(5);
2567 }
2568
2569 controls.add(decodeControl(line, colonPos, firstLineNumber, ldifLines,
2570 relativeBasePath));
2571 }
2572 else if (token.equals("changetype"))
2573 {
2574 changeType =
2575 decodeChangeType(line, colonPos, firstLineNumber, ldifLines);
2576 break;
2577 }
2578 else if (defaultAdd)
2579 {
2580 // The line we read wasn't a control or changetype declaration, so we'll
2581 // assume it's an attribute in an add record. However, we're not ready
2582 // for that yet, and since we can't rewind an iterator we'll create a
2583 // new one that hasn't yet gotten to this line.
2584 changeType = "add";
2585 iterator = ldifLines.iterator();
2586 for (int i=0; i < linesRead; i++)
2587 {
2588 iterator.next();
2589 }
2590 break;
2591 }
2592 else
2593 {
2594 throw new LDIFException(
2595 ERR_READ_CR_CT_LINE_DOESNT_START_WITH_CONTROL_OR_CT.get(
2596 firstLineNumber),
2597 firstLineNumber, true, ldifLines, null);
2598 }
2599
2600 linesRead++;
2601 }
2602
2603
2604 // Make sure that the change type is acceptable and then decode the rest of
2605 // the change record accordingly.
2606 final String lowerChangeType = toLowerCase(changeType);
2607 if (lowerChangeType.equals("add"))
2608 {
2609 // There must be at least one more line. If not, then that's an error.
2610 // Otherwise, parse the rest of the data as attribute-value pairs.
2611 if (iterator.hasNext())
2612 {
2613 final Collection<Attribute> attrs =
2614 parseAttributes(dn, unparsedRecord.getDuplicateValueBehavior(),
2615 unparsedRecord.getTrailingSpaceBehavior(),
2616 unparsedRecord.getSchema(), ldifLines, iterator,
2617 relativeBasePath, firstLineNumber);
2618 final Attribute[] attributes = new Attribute[attrs.size()];
2619 final Iterator<Attribute> attrIterator = attrs.iterator();
2620 for (int i=0; i < attributes.length; i++)
2621 {
2622 attributes[i] = attrIterator.next();
2623 }
2624
2625 return new LDIFAddChangeRecord(dn, attributes, controls);
2626 }
2627 else
2628 {
2629 throw new LDIFException(ERR_READ_CR_NO_ATTRIBUTES.get(firstLineNumber),
2630 firstLineNumber, true, ldifLines, null);
2631 }
2632 }
2633 else if (lowerChangeType.equals("delete"))
2634 {
2635 // There shouldn't be any more data. If there is, then that's an error.
2636 // Otherwise, we can just return the delete change record with what we
2637 // already know.
2638 if (iterator.hasNext())
2639 {
2640 throw new LDIFException(
2641 ERR_READ_CR_EXTRA_DELETE_DATA.get(firstLineNumber),
2642 firstLineNumber, true, ldifLines, null);
2643 }
2644 else
2645 {
2646 return new LDIFDeleteChangeRecord(dn, controls);
2647 }
2648 }
2649 else if (lowerChangeType.equals("modify"))
2650 {
2651 // There must be at least one more line. If not, then that's an error.
2652 // Otherwise, parse the rest of the data as a set of modifications.
2653 if (iterator.hasNext())
2654 {
2655 final Modification[] mods = parseModifications(dn,
2656 unparsedRecord.getTrailingSpaceBehavior(), ldifLines, iterator,
2657 firstLineNumber, schema);
2658 return new LDIFModifyChangeRecord(dn, mods, controls);
2659 }
2660 else
2661 {
2662 throw new LDIFException(ERR_READ_CR_NO_MODS.get(firstLineNumber),
2663 firstLineNumber, true, ldifLines, null);
2664 }
2665 }
2666 else if (lowerChangeType.equals("moddn") ||
2667 lowerChangeType.equals("modrdn"))
2668 {
2669 // There must be at least one more line. If not, then that's an error.
2670 // Otherwise, parse the rest of the data as a set of modifications.
2671 if (iterator.hasNext())
2672 {
2673 return parseModifyDNChangeRecord(ldifLines, iterator, dn, controls,
2674 unparsedRecord.getTrailingSpaceBehavior(), firstLineNumber);
2675 }
2676 else
2677 {
2678 throw new LDIFException(ERR_READ_CR_NO_NEWRDN.get(firstLineNumber),
2679 firstLineNumber, true, ldifLines, null);
2680 }
2681 }
2682 else
2683 {
2684 throw new LDIFException(ERR_READ_CR_INVALID_CT.get(changeType,
2685 firstLineNumber),
2686 firstLineNumber, true, ldifLines, null);
2687 }
2688 }
2689
2690
2691
2692 /**
2693 * Decodes information about a control from the provided line.
2694 *
2695 * @param line The line to process.
2696 * @param colonPos The position of the colon that separates the
2697 * control token string from tbe encoded control.
2698 * @param firstLineNumber The line number for the start of the record.
2699 * @param ldifLines The lines that comprise the LDIF representation
2700 * of the full record being parsed.
2701 * @param relativeBasePath The base path that will be prepended to relative
2702 * paths in order to obtain an absolute path.
2703 *
2704 * @return The decoded control.
2705 *
2706 * @throws LDIFException If a problem is encountered while trying to decode
2707 * the changetype.
2708 */
2709 private static Control decodeControl(final StringBuilder line,
2710 final int colonPos,
2711 final long firstLineNumber,
2712 final ArrayList<StringBuilder> ldifLines,
2713 final String relativeBasePath)
2714 throws LDIFException
2715 {
2716 final String controlString;
2717 int length = line.length();
2718 if (length == (colonPos+1))
2719 {
2720 // The colon was the last character on the line. This is not
2721 // acceptable.
2722 throw new LDIFException(
2723 ERR_READ_CONTROL_LINE_NO_CONTROL_VALUE.get(firstLineNumber),
2724 firstLineNumber, true, ldifLines, null);
2725 }
2726 else if (line.charAt(colonPos+1) == ':')
2727 {
2728 // Skip over any spaces leading up to the value, and then the rest of
2729 // the string is the base64-encoded control representation. This is
2730 // unusual and unnecessary, but is nevertheless acceptable.
2731 int pos = colonPos+2;
2732 while ((pos < length) && (line.charAt(pos) == ' '))
2733 {
2734 pos++;
2735 }
2736
2737 try
2738 {
2739 final byte[] controlBytes = Base64.decode(line.substring(pos));
2740 controlString = new String(controlBytes, "UTF-8");
2741 }
2742 catch (final ParseException pe)
2743 {
2744 debugException(pe);
2745 throw new LDIFException(
2746 ERR_READ_CANNOT_BASE64_DECODE_CONTROL.get(
2747 firstLineNumber, pe.getMessage()),
2748 firstLineNumber, true, ldifLines, pe);
2749 }
2750 catch (final Exception e)
2751 {
2752 debugException(e);
2753 throw new LDIFException(
2754 ERR_READ_CANNOT_BASE64_DECODE_CONTROL.get(firstLineNumber, e),
2755 firstLineNumber, true, ldifLines, e);
2756 }
2757 }
2758 else
2759 {
2760 // Skip over any spaces leading up to the value, and then the rest of
2761 // the string is the encoded control.
2762 int pos = colonPos+1;
2763 while ((pos < length) && (line.charAt(pos) == ' '))
2764 {
2765 pos++;
2766 }
2767
2768 controlString = line.substring(pos);
2769 }
2770
2771 // If the resulting control definition is empty, then that's invalid.
2772 if (controlString.length() == 0)
2773 {
2774 throw new LDIFException(
2775 ERR_READ_CONTROL_LINE_NO_CONTROL_VALUE.get(firstLineNumber),
2776 firstLineNumber, true, ldifLines, null);
2777 }
2778
2779
2780 // The first element of the control must be the OID, and it must be followed
2781 // by a space (to separate it from the criticality), a colon (to separate it
2782 // from the value and indicate a default criticality of false), or the end
2783 // of the line (to indicate a default criticality of false and no value).
2784 String oid = null;
2785 boolean hasCriticality = false;
2786 boolean hasValue = false;
2787 int pos = 0;
2788 length = controlString.length();
2789 while (pos < length)
2790 {
2791 final char c = controlString.charAt(pos);
2792 if (c == ':')
2793 {
2794 // This indicates that there is no criticality and that the value
2795 // immediately follows the OID.
2796 oid = controlString.substring(0, pos++);
2797 hasValue = true;
2798 break;
2799 }
2800 else if (c == ' ')
2801 {
2802 // This indicates that there is a criticality. We don't know anything
2803 // about the presence of a value yet.
2804 oid = controlString.substring(0, pos++);
2805 hasCriticality = true;
2806 break;
2807 }
2808 else
2809 {
2810 pos++;
2811 }
2812 }
2813
2814 if (oid == null)
2815 {
2816 // This indicates that the string representation of the control is only
2817 // the OID.
2818 return new Control(controlString, false);
2819 }
2820
2821
2822 // See if we need to read the criticality. If so, then do so now.
2823 // Otherwise, assume a default criticality of false.
2824 final boolean isCritical;
2825 if (hasCriticality)
2826 {
2827 // Skip over any spaces before the criticality.
2828 while (controlString.charAt(pos) == ' ')
2829 {
2830 pos++;
2831 }
2832
2833 // Read until we find a colon or the end of the string.
2834 final int criticalityStartPos = pos;
2835 while (pos < length)
2836 {
2837 final char c = controlString.charAt(pos);
2838 if (c == ':')
2839 {
2840 hasValue = true;
2841 break;
2842 }
2843 else
2844 {
2845 pos++;
2846 }
2847 }
2848
2849 final String criticalityString =
2850 toLowerCase(controlString.substring(criticalityStartPos, pos));
2851 if (criticalityString.equals("true"))
2852 {
2853 isCritical = true;
2854 }
2855 else if (criticalityString.equals("false"))
2856 {
2857 isCritical = false;
2858 }
2859 else
2860 {
2861 throw new LDIFException(
2862 ERR_READ_CONTROL_LINE_INVALID_CRITICALITY.get(criticalityString,
2863 firstLineNumber),
2864 firstLineNumber, true, ldifLines, null);
2865 }
2866
2867 if (hasValue)
2868 {
2869 pos++;
2870 }
2871 }
2872 else
2873 {
2874 isCritical = false;
2875 }
2876
2877 // See if we need to read the value. If so, then do so now. It may be
2878 // a string, or it may be base64-encoded. It could conceivably even be read
2879 // from a URL.
2880 final ASN1OctetString value;
2881 if (hasValue)
2882 {
2883 // The character immediately after the colon that precedes the value may
2884 // be one of the following:
2885 // - A second colon (optionally followed by a single space) to indicate
2886 // that the value is base64-encoded.
2887 // - A less-than symbol to indicate that the value should be read from a
2888 // location specified by a URL.
2889 // - A single space that precedes the non-base64-encoded value.
2890 // - The first character of the non-base64-encoded value.
2891 switch (controlString.charAt(pos))
2892 {
2893 case ':':
2894 try
2895 {
2896 if (controlString.length() == (pos+1))
2897 {
2898 value = new ASN1OctetString();
2899 }
2900 else if (controlString.charAt(pos+1) == ' ')
2901 {
2902 value = new ASN1OctetString(
2903 Base64.decode(controlString.substring(pos+2)));
2904 }
2905 else
2906 {
2907 value = new ASN1OctetString(
2908 Base64.decode(controlString.substring(pos+1)));
2909 }
2910 }
2911 catch (final Exception e)
2912 {
2913 debugException(e);
2914 throw new LDIFException(
2915 ERR_READ_CONTROL_LINE_CANNOT_BASE64_DECODE_VALUE.get(
2916 firstLineNumber, getExceptionMessage(e)),
2917 firstLineNumber, true, ldifLines, e);
2918 }
2919 break;
2920 case '<':
2921 try
2922 {
2923 final String urlString;
2924 if (controlString.charAt(pos+1) == ' ')
2925 {
2926 urlString = controlString.substring(pos+2);
2927 }
2928 else
2929 {
2930 urlString = controlString.substring(pos+1);
2931 }
2932 value = new ASN1OctetString(retrieveURLBytes(urlString,
2933 relativeBasePath, firstLineNumber));
2934 }
2935 catch (final Exception e)
2936 {
2937 debugException(e);
2938 throw new LDIFException(
2939 ERR_READ_CONTROL_LINE_CANNOT_RETRIEVE_VALUE_FROM_URL.get(
2940 firstLineNumber, getExceptionMessage(e)),
2941 firstLineNumber, true, ldifLines, e);
2942 }
2943 break;
2944 case ' ':
2945 value = new ASN1OctetString(controlString.substring(pos+1));
2946 break;
2947 default:
2948 value = new ASN1OctetString(controlString.substring(pos));
2949 break;
2950 }
2951 }
2952 else
2953 {
2954 value = null;
2955 }
2956
2957 return new Control(oid, isCritical, value);
2958 }
2959
2960
2961
2962 /**
2963 * Decodes the changetype element from the provided line.
2964 *
2965 * @param line The line to process.
2966 * @param colonPos The position of the colon that separates the
2967 * changetype string from its value.
2968 * @param firstLineNumber The line number for the start of the record.
2969 * @param ldifLines The lines that comprise the LDIF representation of
2970 * the full record being parsed.
2971 *
2972 * @return The decoded changetype string.
2973 *
2974 * @throws LDIFException If a problem is encountered while trying to decode
2975 * the changetype.
2976 */
2977 private static String decodeChangeType(final StringBuilder line,
2978 final int colonPos, final long firstLineNumber,
2979 final ArrayList<StringBuilder> ldifLines)
2980 throws LDIFException
2981 {
2982 final int length = line.length();
2983 if (length == (colonPos+1))
2984 {
2985 // The colon was the last character on the line. This is not
2986 // acceptable.
2987 throw new LDIFException(
2988 ERR_READ_CT_LINE_NO_CT_VALUE.get(firstLineNumber), firstLineNumber,
2989 true, ldifLines, null);
2990 }
2991 else if (line.charAt(colonPos+1) == ':')
2992 {
2993 // Skip over any spaces leading up to the value, and then the rest of
2994 // the string is the base64-encoded changetype. This is unusual and
2995 // unnecessary, but is nevertheless acceptable.
2996 int pos = colonPos+2;
2997 while ((pos < length) && (line.charAt(pos) == ' '))
2998 {
2999 pos++;
3000 }
3001
3002 try
3003 {
3004 final byte[] changeTypeBytes = Base64.decode(line.substring(pos));
3005 return new String(changeTypeBytes, "UTF-8");
3006 }
3007 catch (final ParseException pe)
3008 {
3009 debugException(pe);
3010 throw new LDIFException(
3011 ERR_READ_CANNOT_BASE64_DECODE_CT.get(firstLineNumber,
3012 pe.getMessage()),
3013 firstLineNumber, true, ldifLines, pe);
3014 }
3015 catch (final Exception e)
3016 {
3017 debugException(e);
3018 throw new LDIFException(
3019 ERR_READ_CANNOT_BASE64_DECODE_CT.get(firstLineNumber, e),
3020 firstLineNumber, true, ldifLines, e);
3021 }
3022 }
3023 else
3024 {
3025 // Skip over any spaces leading up to the value, and then the rest of
3026 // the string is the changetype.
3027 int pos = colonPos+1;
3028 while ((pos < length) && (line.charAt(pos) == ' '))
3029 {
3030 pos++;
3031 }
3032
3033 return line.substring(pos);
3034 }
3035 }
3036
3037
3038
3039 /**
3040 * Parses the data available through the provided iterator as a collection of
3041 * attributes suitable for use in an entry or an add change record.
3042 *
3043 * @param dn The DN of the record being read.
3044 * @param duplicateValueBehavior The behavior that should be exhibited if
3045 * the LDIF reader encounters an entry with
3046 * duplicate values.
3047 * @param trailingSpaceBehavior The behavior that should be exhibited when
3048 * encountering attribute values which are not
3049 * base64-encoded but contain trailing spaces.
3050 * @param schema The schema to use when parsing the
3051 * attributes, or {@code null} if none is
3052 * needed.
3053 * @param ldifLines The lines that comprise the LDIF
3054 * representation of the full record being
3055 * parsed.
3056 * @param iterator The iterator to use to access the attribute
3057 * lines.
3058 * @param relativeBasePath The base path that will be prepended to
3059 * relative paths in order to obtain an
3060 * absolute path.
3061 * @param firstLineNumber The line number for the start of the
3062 * record.
3063 *
3064 * @return The collection of attributes that were read.
3065 *
3066 * @throws LDIFException If the provided LDIF data cannot be decoded as a
3067 * set of attributes.
3068 */
3069 private static ArrayList<Attribute> parseAttributes(final String dn,
3070 final DuplicateValueBehavior duplicateValueBehavior,
3071 final TrailingSpaceBehavior trailingSpaceBehavior, final Schema schema,
3072 final ArrayList<StringBuilder> ldifLines,
3073 final Iterator<StringBuilder> iterator, final String relativeBasePath,
3074 final long firstLineNumber)
3075 throws LDIFException
3076 {
3077 final LinkedHashMap<String,Object> attributes =
3078 new LinkedHashMap<String,Object>(ldifLines.size());
3079 while (iterator.hasNext())
3080 {
3081 final StringBuilder line = iterator.next();
3082 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
3083 final int colonPos = line.indexOf(":");
3084 if (colonPos <= 0)
3085 {
3086 throw new LDIFException(ERR_READ_NO_ATTR_COLON.get(firstLineNumber),
3087 firstLineNumber, true, ldifLines, null);
3088 }
3089
3090 final String attributeName = line.substring(0, colonPos);
3091 final String lowerName = toLowerCase(attributeName);
3092
3093 final MatchingRule matchingRule;
3094 if (schema == null)
3095 {
3096 matchingRule = CaseIgnoreStringMatchingRule.getInstance();
3097 }
3098 else
3099 {
3100 matchingRule =
3101 MatchingRule.selectEqualityMatchingRule(attributeName, schema);
3102 }
3103
3104 Attribute attr;
3105 final LDIFAttribute ldifAttr;
3106 final Object attrObject = attributes.get(lowerName);
3107 if (attrObject == null)
3108 {
3109 attr = null;
3110 ldifAttr = null;
3111 }
3112 else
3113 {
3114 if (attrObject instanceof Attribute)
3115 {
3116 attr = (Attribute) attrObject;
3117 ldifAttr = new LDIFAttribute(attr.getName(), matchingRule,
3118 attr.getRawValues()[0]);
3119 attributes.put(lowerName, ldifAttr);
3120 }
3121 else
3122 {
3123 attr = null;
3124 ldifAttr = (LDIFAttribute) attrObject;
3125 }
3126 }
3127
3128 final int length = line.length();
3129 if (length == (colonPos+1))
3130 {
3131 // This means that the attribute has a zero-length value, which is
3132 // acceptable.
3133 if (attrObject == null)
3134 {
3135 attr = new Attribute(attributeName, matchingRule, "");
3136 attributes.put(lowerName, attr);
3137 }
3138 else
3139 {
3140 try
3141 {
3142 if (! ldifAttr.addValue(new ASN1OctetString(),
3143 duplicateValueBehavior))
3144 {
3145 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP)
3146 {
3147 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn,
3148 firstLineNumber, attributeName), firstLineNumber, true,
3149 ldifLines, null);
3150 }
3151 }
3152 }
3153 catch (LDAPException le)
3154 {
3155 throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn,
3156 firstLineNumber, attributeName, getExceptionMessage(le)),
3157 firstLineNumber, true, ldifLines, le);
3158 }
3159 }
3160 }
3161 else if (line.charAt(colonPos+1) == ':')
3162 {
3163 // Skip over any spaces leading up to the value, and then the rest of
3164 // the string is the base64-encoded attribute value.
3165 int pos = colonPos+2;
3166 while ((pos < length) && (line.charAt(pos) == ' '))
3167 {
3168 pos++;
3169 }
3170
3171 try
3172 {
3173 final byte[] valueBytes = Base64.decode(line.substring(pos));
3174 if (attrObject == null)
3175 {
3176 attr = new Attribute(attributeName, matchingRule, valueBytes);
3177 attributes.put(lowerName, attr);
3178 }
3179 else
3180 {
3181 try
3182 {
3183 if (! ldifAttr.addValue(new ASN1OctetString(valueBytes),
3184 duplicateValueBehavior))
3185 {
3186 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP)
3187 {
3188 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn,
3189 firstLineNumber, attributeName), firstLineNumber, true,
3190 ldifLines, null);
3191 }
3192 }
3193 }
3194 catch (LDAPException le)
3195 {
3196 throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn,
3197 firstLineNumber, attributeName, getExceptionMessage(le)),
3198 firstLineNumber, true, ldifLines, le);
3199 }
3200 }
3201 }
3202 catch (final ParseException pe)
3203 {
3204 debugException(pe);
3205 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get(
3206 attributeName, firstLineNumber,
3207 pe.getMessage()),
3208 firstLineNumber, true, ldifLines, pe);
3209 }
3210 }
3211 else if (line.charAt(colonPos+1) == '<')
3212 {
3213 // Skip over any spaces leading up to the value, and then the rest of
3214 // the string is a URL that indicates where to get the real content.
3215 // At the present time, we'll only support the file URLs.
3216 int pos = colonPos+2;
3217 while ((pos < length) && (line.charAt(pos) == ' '))
3218 {
3219 pos++;
3220 }
3221
3222 final byte[] urlBytes;
3223 final String urlString = line.substring(pos);
3224 try
3225 {
3226 urlBytes =
3227 retrieveURLBytes(urlString, relativeBasePath, firstLineNumber);
3228 }
3229 catch (final Exception e)
3230 {
3231 debugException(e);
3232 throw new LDIFException(
3233 ERR_READ_URL_EXCEPTION.get(attributeName, urlString,
3234 firstLineNumber, e),
3235 firstLineNumber, true, ldifLines, e);
3236 }
3237
3238 if (attrObject == null)
3239 {
3240 attr = new Attribute(attributeName, matchingRule, urlBytes);
3241 attributes.put(lowerName, attr);
3242 }
3243 else
3244 {
3245 try
3246 {
3247 if (! ldifAttr.addValue(new ASN1OctetString(urlBytes),
3248 duplicateValueBehavior))
3249 {
3250 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP)
3251 {
3252 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn,
3253 firstLineNumber, attributeName), firstLineNumber, true,
3254 ldifLines, null);
3255 }
3256 }
3257 }
3258 catch (final LDIFException le)
3259 {
3260 debugException(le);
3261 throw le;
3262 }
3263 catch (final Exception e)
3264 {
3265 debugException(e);
3266 throw new LDIFException(
3267 ERR_READ_URL_EXCEPTION.get(attributeName, urlString,
3268 firstLineNumber, e),
3269 firstLineNumber, true, ldifLines, e);
3270 }
3271 }
3272 }
3273 else
3274 {
3275 // Skip over any spaces leading up to the value, and then the rest of
3276 // the string is the value.
3277 int pos = colonPos+1;
3278 while ((pos < length) && (line.charAt(pos) == ' '))
3279 {
3280 pos++;
3281 }
3282
3283 final String valueString = line.substring(pos);
3284 if (attrObject == null)
3285 {
3286 attr = new Attribute(attributeName, matchingRule, valueString);
3287 attributes.put(lowerName, attr);
3288 }
3289 else
3290 {
3291 try
3292 {
3293 if (! ldifAttr.addValue(new ASN1OctetString(valueString),
3294 duplicateValueBehavior))
3295 {
3296 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP)
3297 {
3298 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn,
3299 firstLineNumber, attributeName), firstLineNumber, true,
3300 ldifLines, null);
3301 }
3302 }
3303 }
3304 catch (LDAPException le)
3305 {
3306 throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn,
3307 firstLineNumber, attributeName, getExceptionMessage(le)),
3308 firstLineNumber, true, ldifLines, le);
3309 }
3310 }
3311 }
3312 }
3313
3314 final ArrayList<Attribute> attrList =
3315 new ArrayList<Attribute>(attributes.size());
3316 for (final Object o : attributes.values())
3317 {
3318 if (o instanceof Attribute)
3319 {
3320 attrList.add((Attribute) o);
3321 }
3322 else
3323 {
3324 attrList.add(((LDIFAttribute) o).toAttribute());
3325 }
3326 }
3327
3328 return attrList;
3329 }
3330
3331
3332
3333 /**
3334 * Retrieves the bytes that make up the file referenced by the given URL.
3335 *
3336 * @param urlString The string representation of the URL to retrieve.
3337 * @param relativeBasePath The base path that will be prepended to relative
3338 * paths in order to obtain an absolute path.
3339 * @param firstLineNumber The line number for the start of the record.
3340 *
3341 * @return The bytes contained in the specified file, or an empty array if
3342 * the specified file is empty.
3343 *
3344 * @throws LDIFException If the provided URL is malformed or references a
3345 * nonexistent file.
3346 *
3347 * @throws IOException If a problem is encountered while attempting to read
3348 * from the target file.
3349 */
3350 private static byte[] retrieveURLBytes(final String urlString,
3351 final String relativeBasePath,
3352 final long firstLineNumber)
3353 throws LDIFException, IOException
3354 {
3355 int pos;
3356 String path;
3357 final String lowerURLString = toLowerCase(urlString);
3358 if (lowerURLString.startsWith("file:/"))
3359 {
3360 pos = 6;
3361 while ((pos < urlString.length()) && (urlString.charAt(pos) == '/'))
3362 {
3363 pos++;
3364 }
3365
3366 path = urlString.substring(pos-1);
3367 }
3368 else if (lowerURLString.startsWith("file:"))
3369 {
3370 // A file: URL that doesn't include a slash will be interpreted as a
3371 // relative path.
3372 path = relativeBasePath + urlString.substring(5);
3373 }
3374 else
3375 {
3376 throw new LDIFException(ERR_READ_URL_INVALID_SCHEME.get(urlString),
3377 firstLineNumber, true);
3378 }
3379
3380 final File f = new File(path);
3381 if (! f.exists())
3382 {
3383 throw new LDIFException(
3384 ERR_READ_URL_NO_SUCH_FILE.get(urlString, f.getAbsolutePath()),
3385 firstLineNumber, true);
3386 }
3387
3388 // In order to conserve memory, we'll only allow values to be read from
3389 // files no larger than 10 megabytes.
3390 final long fileSize = f.length();
3391 if (fileSize > (10 * 1024 * 1024))
3392 {
3393 throw new LDIFException(
3394 ERR_READ_URL_FILE_TOO_LARGE.get(urlString, f.getAbsolutePath(),
3395 (10*1024*1024)),
3396 firstLineNumber, true);
3397 }
3398
3399 int fileBytesRemaining = (int) fileSize;
3400 final byte[] fileData = new byte[(int) fileSize];
3401 final FileInputStream fis = new FileInputStream(f);
3402 try
3403 {
3404 int fileBytesRead = 0;
3405 while (fileBytesRead < fileSize)
3406 {
3407 final int bytesRead =
3408 fis.read(fileData, fileBytesRead, fileBytesRemaining);
3409 if (bytesRead < 0)
3410 {
3411 // We hit the end of the file before we expected to. This shouldn't
3412 // happen unless the file size changed since we first looked at it,
3413 // which we won't allow.
3414 throw new LDIFException(
3415 ERR_READ_URL_FILE_SIZE_CHANGED.get(urlString,
3416 f.getAbsolutePath()),
3417 firstLineNumber, true);
3418 }
3419
3420 fileBytesRead += bytesRead;
3421 fileBytesRemaining -= bytesRead;
3422 }
3423
3424 if (fis.read() != -1)
3425 {
3426 // There is still more data to read. This shouldn't happen unless the
3427 // file size changed since we first looked at it, which we won't allow.
3428 throw new LDIFException(
3429 ERR_READ_URL_FILE_SIZE_CHANGED.get(urlString, f.getAbsolutePath()),
3430 firstLineNumber, true);
3431 }
3432 }
3433 finally
3434 {
3435 fis.close();
3436 }
3437
3438 return fileData;
3439 }
3440
3441
3442
3443 /**
3444 * Parses the data available through the provided iterator into an array of
3445 * modifications suitable for use in a modify change record.
3446 *
3447 * @param dn The DN of the entry being parsed.
3448 * @param trailingSpaceBehavior The behavior that should be exhibited when
3449 * encountering attribute values which are not
3450 * base64-encoded but contain trailing spaces.
3451 * @param ldifLines The lines that comprise the LDIF
3452 * representation of the full record being
3453 * parsed.
3454 * @param iterator The iterator to use to access the
3455 * modification data.
3456 * @param firstLineNumber The line number for the start of the record.
3457 * @param schema The schema to use in processing.
3458 *
3459 * @return An array containing the modifications that were read.
3460 *
3461 * @throws LDIFException If the provided LDIF data cannot be decoded as a
3462 * set of modifications.
3463 */
3464 private static Modification[] parseModifications(final String dn,
3465 final TrailingSpaceBehavior trailingSpaceBehavior,
3466 final ArrayList<StringBuilder> ldifLines,
3467 final Iterator<StringBuilder> iterator,
3468 final long firstLineNumber, final Schema schema)
3469 throws LDIFException
3470 {
3471 final ArrayList<Modification> modList =
3472 new ArrayList<Modification>(ldifLines.size());
3473
3474 while (iterator.hasNext())
3475 {
3476 // The first line must start with "add:", "delete:", "replace:", or
3477 // "increment:" followed by an attribute name.
3478 StringBuilder line = iterator.next();
3479 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
3480 int colonPos = line.indexOf(":");
3481 if (colonPos < 0)
3482 {
3483 throw new LDIFException(ERR_READ_MOD_CR_NO_MODTYPE.get(firstLineNumber),
3484 firstLineNumber, true, ldifLines, null);
3485 }
3486
3487 final ModificationType modType;
3488 final String modTypeStr = toLowerCase(line.substring(0, colonPos));
3489 if (modTypeStr.equals("add"))
3490 {
3491 modType = ModificationType.ADD;
3492 }
3493 else if (modTypeStr.equals("delete"))
3494 {
3495 modType = ModificationType.DELETE;
3496 }
3497 else if (modTypeStr.equals("replace"))
3498 {
3499 modType = ModificationType.REPLACE;
3500 }
3501 else if (modTypeStr.equals("increment"))
3502 {
3503 modType = ModificationType.INCREMENT;
3504 }
3505 else
3506 {
3507 throw new LDIFException(ERR_READ_MOD_CR_INVALID_MODTYPE.get(modTypeStr,
3508 firstLineNumber),
3509 firstLineNumber, true, ldifLines, null);
3510 }
3511
3512 String attributeName;
3513 int length = line.length();
3514 if (length == (colonPos+1))
3515 {
3516 // The colon was the last character on the line. This is not
3517 // acceptable.
3518 throw new LDIFException(ERR_READ_MOD_CR_MODTYPE_NO_ATTR.get(
3519 firstLineNumber),
3520 firstLineNumber, true, ldifLines, null);
3521 }
3522 else if (line.charAt(colonPos+1) == ':')
3523 {
3524 // Skip over any spaces leading up to the value, and then the rest of
3525 // the string is the base64-encoded attribute name.
3526 int pos = colonPos+2;
3527 while ((pos < length) && (line.charAt(pos) == ' '))
3528 {
3529 pos++;
3530 }
3531
3532 try
3533 {
3534 final byte[] dnBytes = Base64.decode(line.substring(pos));
3535 attributeName = new String(dnBytes, "UTF-8");
3536 }
3537 catch (final ParseException pe)
3538 {
3539 debugException(pe);
3540 throw new LDIFException(
3541 ERR_READ_MOD_CR_MODTYPE_CANNOT_BASE64_DECODE_ATTR.get(
3542 firstLineNumber, pe.getMessage()),
3543 firstLineNumber, true, ldifLines, pe);
3544 }
3545 catch (final Exception e)
3546 {
3547 debugException(e);
3548 throw new LDIFException(
3549 ERR_READ_MOD_CR_MODTYPE_CANNOT_BASE64_DECODE_ATTR.get(
3550 firstLineNumber, e),
3551 firstLineNumber, true, ldifLines, e);
3552 }
3553 }
3554 else
3555 {
3556 // Skip over any spaces leading up to the value, and then the rest of
3557 // the string is the attribute name.
3558 int pos = colonPos+1;
3559 while ((pos < length) && (line.charAt(pos) == ' '))
3560 {
3561 pos++;
3562 }
3563
3564 attributeName = line.substring(pos);
3565 }
3566
3567 if (attributeName.length() == 0)
3568 {
3569 throw new LDIFException(ERR_READ_MOD_CR_MODTYPE_NO_ATTR.get(
3570 firstLineNumber),
3571 firstLineNumber, true, ldifLines, null);
3572 }
3573
3574
3575 // The next zero or more lines may be the set of attribute values. Keep
3576 // reading until we reach the end of the iterator or until we find a line
3577 // with just a "-".
3578 final ArrayList<ASN1OctetString> valueList =
3579 new ArrayList<ASN1OctetString>(ldifLines.size());
3580 while (iterator.hasNext())
3581 {
3582 line = iterator.next();
3583 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
3584 if (line.toString().equals("-"))
3585 {
3586 break;
3587 }
3588
3589 colonPos = line.indexOf(":");
3590 if (colonPos < 0)
3591 {
3592 throw new LDIFException(ERR_READ_NO_ATTR_COLON.get(firstLineNumber),
3593 firstLineNumber, true, ldifLines, null);
3594 }
3595 else if (! line.substring(0, colonPos).equalsIgnoreCase(attributeName))
3596 {
3597 // There are a couple of cases in which this might be acceptable:
3598 // - If the two names are logically equivalent, but have an alternate
3599 // name (or OID) for the target attribute type, or if there are
3600 // attribute options and the options are just in a different order.
3601 // - If this is the first value for the target attribute and the
3602 // alternate name includes a "binary" option that the original
3603 // attribute name did not have. In this case, all subsequent values
3604 // will also be required to have the binary option.
3605 final String alternateName = line.substring(0, colonPos);
3606
3607
3608 // Check to see if the base names are equivalent.
3609 boolean baseNameEquivalent = false;
3610 final String expectedBaseName = Attribute.getBaseName(attributeName);
3611 final String alternateBaseName = Attribute.getBaseName(alternateName);
3612 if (alternateBaseName.equalsIgnoreCase(expectedBaseName))
3613 {
3614 baseNameEquivalent = true;
3615 }
3616 else
3617 {
3618 if (schema != null)
3619 {
3620 final AttributeTypeDefinition expectedAT =
3621 schema.getAttributeType(expectedBaseName);
3622 final AttributeTypeDefinition alternateAT =
3623 schema.getAttributeType(alternateBaseName);
3624 if ((expectedAT != null) && (alternateAT != null) &&
3625 expectedAT.equals(alternateAT))
3626 {
3627 baseNameEquivalent = true;
3628 }
3629 }
3630 }
3631
3632
3633 // Check to see if the attribute options are equivalent.
3634 final Set<String> expectedOptions =
3635 Attribute.getOptions(attributeName);
3636 final Set<String> lowerExpectedOptions =
3637 new HashSet<String>(expectedOptions.size());
3638 for (final String s : expectedOptions)
3639 {
3640 lowerExpectedOptions.add(toLowerCase(s));
3641 }
3642
3643 final Set<String> alternateOptions =
3644 Attribute.getOptions(alternateName);
3645 final Set<String> lowerAlternateOptions =
3646 new HashSet<String>(alternateOptions.size());
3647 for (final String s : alternateOptions)
3648 {
3649 lowerAlternateOptions.add(toLowerCase(s));
3650 }
3651
3652 final boolean optionsEquivalent =
3653 lowerAlternateOptions.equals(lowerExpectedOptions);
3654
3655
3656 if (baseNameEquivalent && optionsEquivalent)
3657 {
3658 // This is fine. The two attribute descriptions are logically
3659 // equivalent. We'll continue using the attribute description that
3660 // was provided first.
3661 }
3662 else if (valueList.isEmpty() && baseNameEquivalent &&
3663 lowerAlternateOptions.remove("binary") &&
3664 lowerAlternateOptions.equals(lowerExpectedOptions))
3665 {
3666 // This means that the provided value is the first value for the
3667 // attribute, and that the only significant difference is that the
3668 // provided attribute description included an unexpected "binary"
3669 // option. We'll accept this, but will require any additional
3670 // values for this modification to also include the binary option,
3671 // and we'll use the binary option in the attribute that is
3672 // eventually created.
3673 attributeName = alternateName;
3674 }
3675 else
3676 {
3677 // This means that either the base names are different or the sets
3678 // of options are incompatible. This is not acceptable.
3679 throw new LDIFException(ERR_READ_MOD_CR_ATTR_MISMATCH.get(
3680 firstLineNumber,
3681 line.substring(0, colonPos),
3682 attributeName),
3683 firstLineNumber, true, ldifLines, null);
3684 }
3685 }
3686
3687 length = line.length();
3688 final ASN1OctetString value;
3689 if (length == (colonPos+1))
3690 {
3691 // The colon was the last character on the line. This is fine.
3692 value = new ASN1OctetString();
3693 }
3694 else if (line.charAt(colonPos+1) == ':')
3695 {
3696 // Skip over any spaces leading up to the value, and then the rest of
3697 // the string is the base64-encoded value. This is unusual and
3698 // unnecessary, but is nevertheless acceptable.
3699 int pos = colonPos+2;
3700 while ((pos < length) && (line.charAt(pos) == ' '))
3701 {
3702 pos++;
3703 }
3704
3705 try
3706 {
3707 value = new ASN1OctetString(Base64.decode(line.substring(pos)));
3708 }
3709 catch (final ParseException pe)
3710 {
3711 debugException(pe);
3712 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get(
3713 attributeName, firstLineNumber, pe.getMessage()),
3714 firstLineNumber, true, ldifLines, pe);
3715 }
3716 catch (final Exception e)
3717 {
3718 debugException(e);
3719 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get(
3720 firstLineNumber, e),
3721 firstLineNumber, true, ldifLines, e);
3722 }
3723 }
3724 else
3725 {
3726 // Skip over any spaces leading up to the value, and then the rest of
3727 // the string is the value.
3728 int pos = colonPos+1;
3729 while ((pos < length) && (line.charAt(pos) == ' '))
3730 {
3731 pos++;
3732 }
3733
3734 value = new ASN1OctetString(line.substring(pos));
3735 }
3736
3737 valueList.add(value);
3738 }
3739
3740 final ASN1OctetString[] values = new ASN1OctetString[valueList.size()];
3741 valueList.toArray(values);
3742
3743 // If it's an add modification type, then there must be at least one
3744 // value.
3745 if ((modType.intValue() == ModificationType.ADD.intValue()) &&
3746 (values.length == 0))
3747 {
3748 throw new LDIFException(ERR_READ_MOD_CR_NO_ADD_VALUES.get(attributeName,
3749 firstLineNumber),
3750 firstLineNumber, true, ldifLines, null);
3751 }
3752
3753 // If it's an increment modification type, then there must be exactly one
3754 // value.
3755 if ((modType.intValue() == ModificationType.INCREMENT.intValue()) &&
3756 (values.length != 1))
3757 {
3758 throw new LDIFException(ERR_READ_MOD_CR_INVALID_INCR_VALUE_COUNT.get(
3759 firstLineNumber, attributeName),
3760 firstLineNumber, true, ldifLines, null);
3761 }
3762
3763 modList.add(new Modification(modType, attributeName, values));
3764 }
3765
3766 final Modification[] mods = new Modification[modList.size()];
3767 modList.toArray(mods);
3768 return mods;
3769 }
3770
3771
3772
3773 /**
3774 * Parses the data available through the provided iterator as the body of a
3775 * modify DN change record (i.e., the newrdn, deleteoldrdn, and optional
3776 * newsuperior lines).
3777 *
3778 * @param ldifLines The lines that comprise the LDIF
3779 * representation of the full record being
3780 * parsed.
3781 * @param iterator The iterator to use to access the modify DN
3782 * data.
3783 * @param dn The current DN of the entry.
3784 * @param controls The set of controls to include in the change
3785 * record.
3786 * @param trailingSpaceBehavior The behavior that should be exhibited when
3787 * encountering attribute values which are not
3788 * base64-encoded but contain trailing spaces.
3789 * @param firstLineNumber The line number for the start of the record.
3790 *
3791 * @return The decoded modify DN change record.
3792 *
3793 * @throws LDIFException If the provided LDIF data cannot be decoded as a
3794 * modify DN change record.
3795 */
3796 private static LDIFModifyDNChangeRecord parseModifyDNChangeRecord(
3797 final ArrayList<StringBuilder> ldifLines,
3798 final Iterator<StringBuilder> iterator, final String dn,
3799 final List<Control> controls,
3800 final TrailingSpaceBehavior trailingSpaceBehavior,
3801 final long firstLineNumber)
3802 throws LDIFException
3803 {
3804 // The next line must be the new RDN, and it must start with "newrdn:".
3805 StringBuilder line = iterator.next();
3806 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
3807 int colonPos = line.indexOf(":");
3808 if ((colonPos < 0) ||
3809 (! line.substring(0, colonPos).equalsIgnoreCase("newrdn")))
3810 {
3811 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_COLON.get(
3812 firstLineNumber),
3813 firstLineNumber, true, ldifLines, null);
3814 }
3815
3816 final String newRDN;
3817 int length = line.length();
3818 if (length == (colonPos+1))
3819 {
3820 // The colon was the last character on the line. This is not acceptable.
3821 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_VALUE.get(
3822 firstLineNumber),
3823 firstLineNumber, true, ldifLines, null);
3824 }
3825 else if (line.charAt(colonPos+1) == ':')
3826 {
3827 // Skip over any spaces leading up to the value, and then the rest of the
3828 // string is the base64-encoded new RDN.
3829 int pos = colonPos+2;
3830 while ((pos < length) && (line.charAt(pos) == ' '))
3831 {
3832 pos++;
3833 }
3834
3835 try
3836 {
3837 final byte[] dnBytes = Base64.decode(line.substring(pos));
3838 newRDN = new String(dnBytes, "UTF-8");
3839 }
3840 catch (final ParseException pe)
3841 {
3842 debugException(pe);
3843 throw new LDIFException(
3844 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWRDN.get(firstLineNumber,
3845 pe.getMessage()),
3846 firstLineNumber, true, ldifLines, pe);
3847 }
3848 catch (final Exception e)
3849 {
3850 debugException(e);
3851 throw new LDIFException(
3852 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWRDN.get(firstLineNumber,
3853 e),
3854 firstLineNumber, true, ldifLines, e);
3855 }
3856 }
3857 else
3858 {
3859 // Skip over any spaces leading up to the value, and then the rest of the
3860 // string is the new RDN.
3861 int pos = colonPos+1;
3862 while ((pos < length) && (line.charAt(pos) == ' '))
3863 {
3864 pos++;
3865 }
3866
3867 newRDN = line.substring(pos);
3868 }
3869
3870 if (newRDN.length() == 0)
3871 {
3872 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_VALUE.get(
3873 firstLineNumber),
3874 firstLineNumber, true, ldifLines, null);
3875 }
3876
3877
3878 // The next line must be the deleteOldRDN flag, and it must start with
3879 // 'deleteoldrdn:'.
3880 if (! iterator.hasNext())
3881 {
3882 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_COLON.get(
3883 firstLineNumber),
3884 firstLineNumber, true, ldifLines, null);
3885 }
3886
3887 line = iterator.next();
3888 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
3889 colonPos = line.indexOf(":");
3890 if ((colonPos < 0) ||
3891 (! line.substring(0, colonPos).equalsIgnoreCase("deleteoldrdn")))
3892 {
3893 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_COLON.get(
3894 firstLineNumber),
3895 firstLineNumber, true, ldifLines, null);
3896 }
3897
3898 final String deleteOldRDNStr;
3899 length = line.length();
3900 if (length == (colonPos+1))
3901 {
3902 // The colon was the last character on the line. This is not acceptable.
3903 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_VALUE.get(
3904 firstLineNumber),
3905 firstLineNumber, true, ldifLines, null);
3906 }
3907 else if (line.charAt(colonPos+1) == ':')
3908 {
3909 // Skip over any spaces leading up to the value, and then the rest of the
3910 // string is the base64-encoded value. This is unusual and
3911 // unnecessary, but is nevertheless acceptable.
3912 int pos = colonPos+2;
3913 while ((pos < length) && (line.charAt(pos) == ' '))
3914 {
3915 pos++;
3916 }
3917
3918 try
3919 {
3920 final byte[] changeTypeBytes = Base64.decode(line.substring(pos));
3921 deleteOldRDNStr = new String(changeTypeBytes, "UTF-8");
3922 }
3923 catch (final ParseException pe)
3924 {
3925 debugException(pe);
3926 throw new LDIFException(
3927 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_DELOLDRDN.get(
3928 firstLineNumber, pe.getMessage()),
3929 firstLineNumber, true, ldifLines, pe);
3930 }
3931 catch (final Exception e)
3932 {
3933 debugException(e);
3934 throw new LDIFException(
3935 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_DELOLDRDN.get(
3936 firstLineNumber, e),
3937 firstLineNumber, true, ldifLines, e);
3938 }
3939 }
3940 else
3941 {
3942 // Skip over any spaces leading up to the value, and then the rest of the
3943 // string is the value.
3944 int pos = colonPos+1;
3945 while ((pos < length) && (line.charAt(pos) == ' '))
3946 {
3947 pos++;
3948 }
3949
3950 deleteOldRDNStr = line.substring(pos);
3951 }
3952
3953 final boolean deleteOldRDN;
3954 if (deleteOldRDNStr.equals("0"))
3955 {
3956 deleteOldRDN = false;
3957 }
3958 else if (deleteOldRDNStr.equals("1"))
3959 {
3960 deleteOldRDN = true;
3961 }
3962 else if (deleteOldRDNStr.equalsIgnoreCase("false") ||
3963 deleteOldRDNStr.equalsIgnoreCase("no"))
3964 {
3965 // This is technically illegal, but we'll allow it.
3966 deleteOldRDN = false;
3967 }
3968 else if (deleteOldRDNStr.equalsIgnoreCase("true") ||
3969 deleteOldRDNStr.equalsIgnoreCase("yes"))
3970 {
3971 // This is also technically illegal, but we'll allow it.
3972 deleteOldRDN = false;
3973 }
3974 else
3975 {
3976 throw new LDIFException(ERR_READ_MODDN_CR_INVALID_DELOLDRDN.get(
3977 deleteOldRDNStr, firstLineNumber),
3978 firstLineNumber, true, ldifLines, null);
3979 }
3980
3981
3982 // If there is another line, then it must be the new superior DN and it must
3983 // start with "newsuperior:". If this is absent, then it's fine.
3984 final String newSuperiorDN;
3985 if (iterator.hasNext())
3986 {
3987 line = iterator.next();
3988 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
3989 colonPos = line.indexOf(":");
3990 if ((colonPos < 0) ||
3991 (! line.substring(0, colonPos).equalsIgnoreCase("newsuperior")))
3992 {
3993 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWSUPERIOR_COLON.get(
3994 firstLineNumber),
3995 firstLineNumber, true, ldifLines, null);
3996 }
3997
3998 length = line.length();
3999 if (length == (colonPos+1))
4000 {
4001 // The colon was the last character on the line. This is fine.
4002 newSuperiorDN = "";
4003 }
4004 else if (line.charAt(colonPos+1) == ':')
4005 {
4006 // Skip over any spaces leading up to the value, and then the rest of
4007 // the string is the base64-encoded new superior DN.
4008 int pos = colonPos+2;
4009 while ((pos < length) && (line.charAt(pos) == ' '))
4010 {
4011 pos++;
4012 }
4013
4014 try
4015 {
4016 final byte[] dnBytes = Base64.decode(line.substring(pos));
4017 newSuperiorDN = new String(dnBytes, "UTF-8");
4018 }
4019 catch (final ParseException pe)
4020 {
4021 debugException(pe);
4022 throw new LDIFException(
4023 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWSUPERIOR.get(
4024 firstLineNumber, pe.getMessage()),
4025 firstLineNumber, true, ldifLines, pe);
4026 }
4027 catch (final Exception e)
4028 {
4029 debugException(e);
4030 throw new LDIFException(
4031 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWSUPERIOR.get(
4032 firstLineNumber, e),
4033 firstLineNumber, true, ldifLines, e);
4034 }
4035 }
4036 else
4037 {
4038 // Skip over any spaces leading up to the value, and then the rest of
4039 // the string is the new superior DN.
4040 int pos = colonPos+1;
4041 while ((pos < length) && (line.charAt(pos) == ' '))
4042 {
4043 pos++;
4044 }
4045
4046 newSuperiorDN = line.substring(pos);
4047 }
4048 }
4049 else
4050 {
4051 newSuperiorDN = null;
4052 }
4053
4054
4055 // There must not be any more lines.
4056 if (iterator.hasNext())
4057 {
4058 throw new LDIFException(ERR_READ_CR_EXTRA_MODDN_DATA.get(firstLineNumber),
4059 firstLineNumber, true, ldifLines, null);
4060 }
4061
4062 return new LDIFModifyDNChangeRecord(dn, newRDN, deleteOldRDN,
4063 newSuperiorDN, controls);
4064 }
4065
4066
4067
4068 /**
4069 * Examines the line contained in the provided buffer to determine whether it
4070 * may contain one or more illegal trailing spaces. If it does, then those
4071 * spaces will either be stripped out or an exception will be thrown to
4072 * indicate that they are illegal.
4073 *
4074 * @param buffer The buffer to be examined.
4075 * @param dn The DN of the LDIF record being parsed. It
4076 * may be {@code null} if the DN is not yet
4077 * known (e.g., because the provided line is
4078 * expected to contain that DN).
4079 * @param firstLineNumber The approximate line number in the LDIF
4080 * source on which the LDIF record begins.
4081 * @param trailingSpaceBehavior The behavior that should be exhibited when
4082 * encountering attribute values which are not
4083 * base64-encoded but contain trailing spaces.
4084 *
4085 * @throws LDIFException If the line contained in the provided buffer ends
4086 * with one or more illegal trailing spaces and
4087 * {@code stripTrailingSpaces} was provided with a
4088 * value of {@code false}.
4089 */
4090 private static void handleTrailingSpaces(final StringBuilder buffer,
4091 final String dn, final long firstLineNumber,
4092 final TrailingSpaceBehavior trailingSpaceBehavior)
4093 throws LDIFException
4094 {
4095 int pos = buffer.length() - 1;
4096 boolean trailingFound = false;
4097 while ((pos >= 0) && (buffer.charAt(pos) == ' '))
4098 {
4099 trailingFound = true;
4100 pos--;
4101 }
4102
4103 if (trailingFound && (buffer.charAt(pos) != ':'))
4104 {
4105 switch (trailingSpaceBehavior)
4106 {
4107 case STRIP:
4108 buffer.setLength(pos+1);
4109 break;
4110
4111 case REJECT:
4112 if (dn == null)
4113 {
4114 throw new LDIFException(
4115 ERR_READ_ILLEGAL_TRAILING_SPACE_WITHOUT_DN.get(firstLineNumber,
4116 buffer.toString()),
4117 firstLineNumber, true);
4118 }
4119 else
4120 {
4121 throw new LDIFException(
4122 ERR_READ_ILLEGAL_TRAILING_SPACE_WITH_DN.get(dn,
4123 firstLineNumber, buffer.toString()),
4124 firstLineNumber, true);
4125 }
4126
4127 case RETAIN:
4128 default:
4129 // No action will be taken.
4130 break;
4131 }
4132 }
4133 }
4134
4135
4136
4137 /**
4138 * This represents an unparsed LDIFRecord. It stores the line number of the
4139 * first line of the record and each line of the record.
4140 */
4141 private static final class UnparsedLDIFRecord
4142 {
4143 private final ArrayList<StringBuilder> lineList;
4144 private final long firstLineNumber;
4145 private final Exception failureCause;
4146 private final boolean isEOF;
4147 private final DuplicateValueBehavior duplicateValueBehavior;
4148 private final Schema schema;
4149 private final TrailingSpaceBehavior trailingSpaceBehavior;
4150
4151
4152
4153 /**
4154 * Constructor.
4155 *
4156 * @param lineList The lines that comprise the LDIF record.
4157 * @param duplicateValueBehavior The behavior to exhibit if the entry
4158 * contains duplicate attribute values.
4159 * @param trailingSpaceBehavior Specifies the behavior to exhibit when
4160 * encountering trailing spaces in
4161 * non-base64-encoded attribute values.
4162 * @param schema The schema to use when parsing, if
4163 * applicable.
4164 * @param firstLineNumber The first line number of the LDIF record.
4165 */
4166 private UnparsedLDIFRecord(final ArrayList<StringBuilder> lineList,
4167 final DuplicateValueBehavior duplicateValueBehavior,
4168 final TrailingSpaceBehavior trailingSpaceBehavior,
4169 final Schema schema, final long firstLineNumber)
4170 {
4171 this.lineList = lineList;
4172 this.firstLineNumber = firstLineNumber;
4173 this.duplicateValueBehavior = duplicateValueBehavior;
4174 this.trailingSpaceBehavior = trailingSpaceBehavior;
4175 this.schema = schema;
4176
4177 failureCause = null;
4178 isEOF =
4179 (firstLineNumber < 0) || ((lineList != null) && lineList.isEmpty());
4180 }
4181
4182
4183
4184 /**
4185 * Constructor.
4186 *
4187 * @param failureCause The Exception thrown when reading from the input.
4188 */
4189 private UnparsedLDIFRecord(final Exception failureCause)
4190 {
4191 this.failureCause = failureCause;
4192
4193 lineList = null;
4194 firstLineNumber = 0;
4195 duplicateValueBehavior = DuplicateValueBehavior.REJECT;
4196 trailingSpaceBehavior = TrailingSpaceBehavior.REJECT;
4197 schema = null;
4198 isEOF = false;
4199 }
4200
4201
4202
4203 /**
4204 * Return the lines that comprise the LDIF record.
4205 *
4206 * @return The lines that comprise the LDIF record.
4207 */
4208 private ArrayList<StringBuilder> getLineList()
4209 {
4210 return lineList;
4211 }
4212
4213
4214
4215 /**
4216 * Retrieves the behavior to exhibit when encountering duplicate attribute
4217 * values.
4218 *
4219 * @return The behavior to exhibit when encountering duplicate attribute
4220 * values.
4221 */
4222 private DuplicateValueBehavior getDuplicateValueBehavior()
4223 {
4224 return duplicateValueBehavior;
4225 }
4226
4227
4228
4229 /**
4230 * Retrieves the behavior that should be exhibited when encountering
4231 * attribute values which are not base64-encoded but contain trailing
4232 * spaces. The LDIF specification strongly recommends that any value which
4233 * legitimately contains trailing spaces be base64-encoded, but the LDAP SDK
4234 * LDIF parser may be configured to automatically strip these spaces, to
4235 * preserve them, or to reject any entry or change record containing them.
4236 *
4237 * @return The behavior that should be exhibited when encountering
4238 * attribute values which are not base64-encoded but contain
4239 * trailing spaces.
4240 */
4241 private TrailingSpaceBehavior getTrailingSpaceBehavior()
4242 {
4243 return trailingSpaceBehavior;
4244 }
4245
4246
4247
4248 /**
4249 * Retrieves the schema that should be used when parsing the record, if
4250 * applicable.
4251 *
4252 * @return The schema that should be used when parsing the record, or
4253 * {@code null} if none should be used.
4254 */
4255 private Schema getSchema()
4256 {
4257 return schema;
4258 }
4259
4260
4261
4262 /**
4263 * Return the first line number of the LDIF record.
4264 *
4265 * @return The first line number of the LDIF record.
4266 */
4267 private long getFirstLineNumber()
4268 {
4269 return firstLineNumber;
4270 }
4271
4272
4273
4274 /**
4275 * Return {@code true} iff the end of the input was reached.
4276 *
4277 * @return {@code true} iff the end of the input was reached.
4278 */
4279 private boolean isEOF()
4280 {
4281 return isEOF;
4282 }
4283
4284
4285
4286 /**
4287 * Returns the reason that reading the record lines failed. This normally
4288 * is only non-null if something bad happened to the input stream (like
4289 * a disk read error).
4290 *
4291 * @return The reason that reading the record lines failed.
4292 */
4293 private Exception getFailureCause()
4294 {
4295 return failureCause;
4296 }
4297 }
4298
4299
4300 /**
4301 * When processing in asynchronous mode, this thread is responsible for
4302 * reading the raw unparsed records from the input and submitting them for
4303 * processing.
4304 */
4305 private final class LineReaderThread
4306 extends Thread
4307 {
4308 /**
4309 * Constructor.
4310 */
4311 private LineReaderThread()
4312 {
4313 super("Asynchronous LDIF line reader");
4314 setDaemon(true);
4315 }
4316
4317
4318
4319 /**
4320 * Reads raw, unparsed records from the input and submits them for
4321 * processing until the input is finished or closed.
4322 */
4323 @Override()
4324 public void run()
4325 {
4326 try
4327 {
4328 boolean stopProcessing = false;
4329 while (!stopProcessing)
4330 {
4331 UnparsedLDIFRecord unparsedRecord = null;
4332 try
4333 {
4334 unparsedRecord = readUnparsedRecord();
4335 }
4336 catch (IOException e)
4337 {
4338 debugException(e);
4339 unparsedRecord = new UnparsedLDIFRecord(e);
4340 stopProcessing = true;
4341 }
4342 catch (Exception e)
4343 {
4344 debugException(e);
4345 unparsedRecord = new UnparsedLDIFRecord(e);
4346 }
4347
4348 try
4349 {
4350 asyncParser.submit(unparsedRecord);
4351 }
4352 catch (InterruptedException e)
4353 {
4354 debugException(e);
4355 // If this thread is interrupted, then someone wants us to stop
4356 // processing, so that's what we'll do.
4357 stopProcessing = true;
4358 }
4359
4360 if ((unparsedRecord == null) || (unparsedRecord.isEOF()))
4361 {
4362 stopProcessing = true;
4363 }
4364 }
4365 }
4366 finally
4367 {
4368 try
4369 {
4370 asyncParser.shutdown();
4371 }
4372 catch (InterruptedException e)
4373 {
4374 debugException(e);
4375 }
4376 finally
4377 {
4378 asyncParsingComplete.set(true);
4379 }
4380 }
4381 }
4382 }
4383
4384
4385
4386 /**
4387 * Used to parse Records asynchronously.
4388 */
4389 private final class RecordParser implements Processor<UnparsedLDIFRecord,
4390 LDIFRecord>
4391 {
4392 /**
4393 * {@inheritDoc}
4394 */
4395 public LDIFRecord process(final UnparsedLDIFRecord input)
4396 throws LDIFException
4397 {
4398 LDIFRecord record = decodeRecord(input, relativeBasePath, schema);
4399
4400 if ((record instanceof Entry) && (entryTranslator != null))
4401 {
4402 record = entryTranslator.translate((Entry) record,
4403 input.getFirstLineNumber());
4404
4405 if (record == null)
4406 {
4407 record = SKIP_ENTRY;
4408 }
4409 }
4410 if ((record instanceof LDIFChangeRecord) &&
4411 (changeRecordTranslator != null))
4412 {
4413 record = changeRecordTranslator.translate((LDIFChangeRecord) record,
4414 input.getFirstLineNumber());
4415
4416 if (record == null)
4417 {
4418 record = SKIP_ENTRY;
4419 }
4420 }
4421 return record;
4422 }
4423 }
4424 }