001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018package org.apache.commons.compress.archivers.tar; 019 020import java.io.ByteArrayOutputStream; 021import java.io.Closeable; 022import java.io.File; 023import java.io.IOException; 024import java.io.InputStream; 025import java.nio.ByteBuffer; 026import java.nio.channels.SeekableByteChannel; 027import java.nio.file.Files; 028import java.nio.file.Path; 029import java.util.ArrayList; 030import java.util.HashMap; 031import java.util.LinkedList; 032import java.util.List; 033import java.util.Map; 034 035import org.apache.commons.compress.archivers.zip.ZipEncoding; 036import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 037import org.apache.commons.compress.utils.ArchiveUtils; 038import org.apache.commons.compress.utils.BoundedArchiveInputStream; 039import org.apache.commons.compress.utils.BoundedInputStream; 040import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream; 041import org.apache.commons.compress.utils.SeekableInMemoryByteChannel; 042 043/** 044 * The TarFile provides random access to UNIX archives. 045 * @since 1.21 046 */ 047public class TarFile implements Closeable { 048 049 private static final int SMALL_BUFFER_SIZE = 256; 050 051 private final byte[] smallBuf = new byte[SMALL_BUFFER_SIZE]; 052 053 private final SeekableByteChannel archive; 054 055 /** 056 * The encoding of the tar file 057 */ 058 private final ZipEncoding zipEncoding; 059 060 private final LinkedList<TarArchiveEntry> entries = new LinkedList<>(); 061 062 private final int blockSize; 063 064 private final boolean lenient; 065 066 private final int recordSize; 067 068 private final ByteBuffer recordBuffer; 069 070 // the global sparse headers, this is only used in PAX Format 0.X 071 private final List<TarArchiveStructSparse> globalSparseHeaders = new ArrayList<>(); 072 073 private boolean hasHitEOF; 074 075 /** 076 * The meta-data about the current entry 077 */ 078 private TarArchiveEntry currEntry; 079 080 // the global PAX header 081 private Map<String, String> globalPaxHeaders = new HashMap<>(); 082 083 private final Map<String, List<InputStream>> sparseInputStreams = new HashMap<>(); 084 085 /** 086 * Constructor for TarFile. 087 * 088 * @param content the content to use 089 * @throws IOException when reading the tar archive fails 090 */ 091 public TarFile(final byte[] content) throws IOException { 092 this(new SeekableInMemoryByteChannel(content)); 093 } 094 095 /** 096 * Constructor for TarFile. 097 * 098 * @param content the content to use 099 * @param encoding the encoding to use 100 * @throws IOException when reading the tar archive fails 101 */ 102 public TarFile(final byte[] content, final String encoding) throws IOException { 103 this(new SeekableInMemoryByteChannel(content), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding, false); 104 } 105 106 /** 107 * Constructor for TarFile. 108 * 109 * @param content the content to use 110 * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be 111 * ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an 112 * exception instead. 113 * @throws IOException when reading the tar archive fails 114 */ 115 public TarFile(final byte[] content, final boolean lenient) throws IOException { 116 this(new SeekableInMemoryByteChannel(content), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient); 117 } 118 119 /** 120 * Constructor for TarFile. 121 * 122 * @param archive the file of the archive to use 123 * @throws IOException when reading the tar archive fails 124 */ 125 public TarFile(final File archive) throws IOException { 126 this(archive.toPath()); 127 } 128 129 /** 130 * Constructor for TarFile. 131 * 132 * @param archive the file of the archive to use 133 * @param encoding the encoding to use 134 * @throws IOException when reading the tar archive fails 135 */ 136 public TarFile(final File archive, final String encoding) throws IOException { 137 this(archive.toPath(), encoding); 138 } 139 140 /** 141 * Constructor for TarFile. 142 * 143 * @param archive the file of the archive to use 144 * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be 145 * ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an 146 * exception instead. 147 * @throws IOException when reading the tar archive fails 148 */ 149 public TarFile(final File archive, final boolean lenient) throws IOException { 150 this(archive.toPath(), lenient); 151 } 152 153 /** 154 * Constructor for TarFile. 155 * 156 * @param archivePath the path of the archive to use 157 * @throws IOException when reading the tar archive fails 158 */ 159 public TarFile(final Path archivePath) throws IOException { 160 this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, false); 161 } 162 163 /** 164 * Constructor for TarFile. 165 * 166 * @param archivePath the path of the archive to use 167 * @param encoding the encoding to use 168 * @throws IOException when reading the tar archive fails 169 */ 170 public TarFile(final Path archivePath, final String encoding) throws IOException { 171 this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding, false); 172 } 173 174 /** 175 * Constructor for TarFile. 176 * 177 * @param archivePath the path of the archive to use 178 * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be 179 * ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an 180 * exception instead. 181 * @throws IOException when reading the tar archive fails 182 */ 183 public TarFile(final Path archivePath, final boolean lenient) throws IOException { 184 this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient); 185 } 186 187 /** 188 * Constructor for TarFile. 189 * 190 * @param content the content to use 191 * @throws IOException when reading the tar archive fails 192 */ 193 public TarFile(final SeekableByteChannel content) throws IOException { 194 this(content, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, false); 195 } 196 197 /** 198 * Constructor for TarFile. 199 * 200 * @param archive the seekable byte channel to use 201 * @param blockSize the blocks size to use 202 * @param recordSize the record size to use 203 * @param encoding the encoding to use 204 * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be 205 * ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an 206 * exception instead. 207 * @throws IOException when reading the tar archive fails 208 */ 209 public TarFile(final SeekableByteChannel archive, final int blockSize, final int recordSize, final String encoding, final boolean lenient) throws IOException { 210 this.archive = archive; 211 this.hasHitEOF = false; 212 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 213 this.recordSize = recordSize; 214 this.recordBuffer = ByteBuffer.allocate(this.recordSize); 215 this.blockSize = blockSize; 216 this.lenient = lenient; 217 218 TarArchiveEntry entry; 219 while ((entry = getNextTarEntry()) != null) { 220 entries.add(entry); 221 } 222 } 223 224 /** 225 * Get the next entry in this tar archive. This will skip 226 * to the end of the current entry, if there is one, and 227 * place the position of the channel at the header of the 228 * next entry, and read the header and instantiate a new 229 * TarEntry from the header bytes and return that entry. 230 * If there are no more entries in the archive, null will 231 * be returned to indicate that the end of the archive has 232 * been reached. 233 * 234 * @return The next TarEntry in the archive, or null if there is no next entry. 235 * @throws IOException when reading the next TarEntry fails 236 */ 237 private TarArchiveEntry getNextTarEntry() throws IOException { 238 if (isAtEOF()) { 239 return null; 240 } 241 242 if (currEntry != null) { 243 // Skip to the end of the entry 244 repositionForwardTo(currEntry.getDataOffset() + currEntry.getSize()); 245 throwExceptionIfPositionIsNotInArchive(); 246 skipRecordPadding(); 247 } 248 249 final ByteBuffer headerBuf = getRecord(); 250 if (null == headerBuf) { 251 /* hit EOF */ 252 currEntry = null; 253 return null; 254 } 255 256 try { 257 final long position = archive.position(); 258 currEntry = new TarArchiveEntry(globalPaxHeaders, headerBuf.array(), zipEncoding, lenient, position); 259 } catch (final IllegalArgumentException e) { 260 throw new IOException("Error detected parsing the header", e); 261 } 262 263 if (currEntry.isGNULongLinkEntry()) { 264 final byte[] longLinkData = getLongNameData(); 265 if (longLinkData == null) { 266 // Bugzilla: 40334 267 // Malformed tar file - long link entry name not followed by 268 // entry 269 return null; 270 } 271 currEntry.setLinkName(zipEncoding.decode(longLinkData)); 272 } 273 274 if (currEntry.isGNULongNameEntry()) { 275 final byte[] longNameData = getLongNameData(); 276 if (longNameData == null) { 277 // Bugzilla: 40334 278 // Malformed tar file - long entry name not followed by 279 // entry 280 return null; 281 } 282 283 // COMPRESS-509 : the name of directories should end with '/' 284 final String name = zipEncoding.decode(longNameData); 285 currEntry.setName(name); 286 if (currEntry.isDirectory() && !name.endsWith("/")) { 287 currEntry.setName(name + "/"); 288 } 289 } 290 291 if (currEntry.isGlobalPaxHeader()) { // Process Global Pax headers 292 readGlobalPaxHeaders(); 293 } 294 295 try { 296 if (currEntry.isPaxHeader()) { // Process Pax headers 297 paxHeaders(); 298 } else if (!globalPaxHeaders.isEmpty()) { 299 applyPaxHeadersToCurrentEntry(globalPaxHeaders, globalSparseHeaders); 300 } 301 } catch (NumberFormatException e) { 302 throw new IOException("Error detected parsing the pax header", e); 303 } 304 305 if (currEntry.isOldGNUSparse()) { // Process sparse files 306 readOldGNUSparse(); 307 } 308 309 return currEntry; 310 } 311 312 /** 313 * Adds the sparse chunks from the current entry to the sparse chunks, 314 * including any additional sparse entries following the current entry. 315 * 316 * @throws IOException when reading the sparse entry fails 317 */ 318 private void readOldGNUSparse() throws IOException { 319 if (currEntry.isExtended()) { 320 TarArchiveSparseEntry entry; 321 do { 322 final ByteBuffer headerBuf = getRecord(); 323 if (headerBuf == null) { 324 throw new IOException("premature end of tar archive. Didn't find extended_header after header with extended flag."); 325 } 326 entry = new TarArchiveSparseEntry(headerBuf.array()); 327 currEntry.getSparseHeaders().addAll(entry.getSparseHeaders()); 328 currEntry.setDataOffset(currEntry.getDataOffset() + recordSize); 329 } while (entry.isExtended()); 330 } 331 332 // sparse headers are all done reading, we need to build 333 // sparse input streams using these sparse headers 334 buildSparseInputStreams(); 335 } 336 337 /** 338 * Build the input streams consisting of all-zero input streams and non-zero input streams. 339 * When reading from the non-zero input streams, the data is actually read from the original input stream. 340 * The size of each input stream is introduced by the sparse headers. 341 * 342 * @implNote Some all-zero input streams and non-zero input streams have the size of 0. We DO NOT store the 343 * 0 size input streams because they are meaningless. 344 */ 345 private void buildSparseInputStreams() throws IOException { 346 final List<InputStream> streams = new ArrayList<>(); 347 348 final List<TarArchiveStructSparse> sparseHeaders = currEntry.getOrderedSparseHeaders(); 349 350 // Stream doesn't need to be closed at all as it doesn't use any resources 351 final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream(); //NOSONAR 352 // logical offset into the extracted entry 353 long offset = 0; 354 long numberOfZeroBytesInSparseEntry = 0; 355 for (TarArchiveStructSparse sparseHeader : sparseHeaders) { 356 final long zeroBlockSize = sparseHeader.getOffset() - offset; 357 if (zeroBlockSize < 0) { 358 // sparse header says to move backwards inside of the extracted entry 359 throw new IOException("Corrupted struct sparse detected"); 360 } 361 362 // only store the zero block if it is not empty 363 if (zeroBlockSize > 0) { 364 streams.add(new BoundedInputStream(zeroInputStream, zeroBlockSize)); 365 numberOfZeroBytesInSparseEntry += zeroBlockSize; 366 } 367 368 // only store the input streams with non-zero size 369 if (sparseHeader.getNumbytes() > 0) { 370 final long start = 371 currEntry.getDataOffset() + sparseHeader.getOffset() - numberOfZeroBytesInSparseEntry; 372 if (start + sparseHeader.getNumbytes() < start) { 373 // possible integer overflow 374 throw new IOException("Unreadable TAR archive, sparse block offset or length too big"); 375 } 376 streams.add(new BoundedSeekableByteChannelInputStream(start, sparseHeader.getNumbytes(), archive)); 377 } 378 379 offset = sparseHeader.getOffset() + sparseHeader.getNumbytes(); 380 } 381 382 sparseInputStreams.put(currEntry.getName(), streams); 383 } 384 385 /** 386 * Update the current entry with the read pax headers 387 * @param headers Headers read from the pax header 388 * @param sparseHeaders Sparse headers read from pax header 389 */ 390 private void applyPaxHeadersToCurrentEntry(final Map<String, String> headers, final List<TarArchiveStructSparse> sparseHeaders) 391 throws IOException { 392 currEntry.updateEntryFromPaxHeaders(headers); 393 currEntry.setSparseHeaders(sparseHeaders); 394 } 395 396 /** 397 * <p> 398 * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) 399 * may appear multi times, and they look like: 400 * <pre> 401 * GNU.sparse.size=size 402 * GNU.sparse.numblocks=numblocks 403 * repeat numblocks times 404 * GNU.sparse.offset=offset 405 * GNU.sparse.numbytes=numbytes 406 * end repeat 407 * </pre> 408 * 409 * <p> 410 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 411 * <pre> 412 * GNU.sparse.map 413 * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 414 * </pre> 415 * 416 * <p> 417 * For PAX Format 1.X: 418 * <br> 419 * The sparse map itself is stored in the file data block, preceding the actual file data. 420 * It consists of a series of decimal numbers delimited by newlines. The map is padded with nulls to the nearest block boundary. 421 * The first number gives the number of entries in the map. Following are map entries, each one consisting of two numbers 422 * giving the offset and size of the data block it describes. 423 * @throws IOException 424 */ 425 private void paxHeaders() throws IOException { 426 List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 427 final Map<String, String> headers; 428 try (final InputStream input = getInputStream(currEntry)) { 429 headers = TarUtils.parsePaxHeaders(input, sparseHeaders, globalPaxHeaders, currEntry.getSize()); 430 } 431 432 // for 0.1 PAX Headers 433 if (headers.containsKey("GNU.sparse.map")) { 434 sparseHeaders = new ArrayList<>(TarUtils.parseFromPAX01SparseHeaders(headers.get("GNU.sparse.map"))); 435 } 436 getNextTarEntry(); // Get the actual file entry 437 if (currEntry == null) { 438 throw new IOException("premature end of tar archive. Didn't find any entry after PAX header."); 439 } 440 applyPaxHeadersToCurrentEntry(headers, sparseHeaders); 441 442 // for 1.0 PAX Format, the sparse map is stored in the file data block 443 if (currEntry.isPaxGNU1XSparse()) { 444 try (final InputStream input = getInputStream(currEntry)) { 445 sparseHeaders = TarUtils.parsePAX1XSparseHeaders(input, recordSize); 446 } 447 currEntry.setSparseHeaders(sparseHeaders); 448 // data of the entry is after the pax gnu entry. So we need to update the data position once again 449 currEntry.setDataOffset(currEntry.getDataOffset() + recordSize); 450 } 451 452 // sparse headers are all done reading, we need to build 453 // sparse input streams using these sparse headers 454 buildSparseInputStreams(); 455 } 456 457 private void readGlobalPaxHeaders() throws IOException { 458 try (InputStream input = getInputStream(currEntry)) { 459 globalPaxHeaders = TarUtils.parsePaxHeaders(input, globalSparseHeaders, globalPaxHeaders, 460 currEntry.getSize()); 461 } 462 getNextTarEntry(); // Get the actual file entry 463 464 if (currEntry == null) { 465 throw new IOException("Error detected parsing the pax header"); 466 } 467 } 468 469 /** 470 * Get the next entry in this tar archive as longname data. 471 * 472 * @return The next entry in the archive as longname data, or null. 473 * @throws IOException on error 474 */ 475 private byte[] getLongNameData() throws IOException { 476 final ByteArrayOutputStream longName = new ByteArrayOutputStream(); 477 int length; 478 try (final InputStream in = getInputStream(currEntry)) { 479 while ((length = in.read(smallBuf)) >= 0) { 480 longName.write(smallBuf, 0, length); 481 } 482 } 483 getNextTarEntry(); 484 if (currEntry == null) { 485 // Bugzilla: 40334 486 // Malformed tar file - long entry name not followed by entry 487 return null; 488 } 489 byte[] longNameData = longName.toByteArray(); 490 // remove trailing null terminator(s) 491 length = longNameData.length; 492 while (length > 0 && longNameData[length - 1] == 0) { 493 --length; 494 } 495 if (length != longNameData.length) { 496 final byte[] l = new byte[length]; 497 System.arraycopy(longNameData, 0, l, 0, length); 498 longNameData = l; 499 } 500 return longNameData; 501 } 502 503 /** 504 * The last record block should be written at the full size, so skip any 505 * additional space used to fill a record after an entry 506 * 507 * @throws IOException when skipping the padding of the record fails 508 */ 509 private void skipRecordPadding() throws IOException { 510 if (!isDirectory() && currEntry.getSize() > 0 && currEntry.getSize() % recordSize != 0) { 511 final long numRecords = (currEntry.getSize() / recordSize) + 1; 512 final long padding = (numRecords * recordSize) - currEntry.getSize(); 513 repositionForwardBy(padding); 514 throwExceptionIfPositionIsNotInArchive(); 515 } 516 } 517 518 private void repositionForwardTo(final long newPosition) throws IOException { 519 final long currPosition = archive.position(); 520 if (newPosition < currPosition) { 521 throw new IOException("trying to move backwards inside of the archive"); 522 } 523 archive.position(newPosition); 524 } 525 526 private void repositionForwardBy(final long offset) throws IOException { 527 repositionForwardTo(archive.position() + offset); 528 } 529 530 /** 531 * Checks if the current position of the SeekableByteChannel is in the archive. 532 * @throws IOException If the position is not in the archive 533 */ 534 private void throwExceptionIfPositionIsNotInArchive() throws IOException { 535 if (archive.size() < archive.position()) { 536 throw new IOException("Truncated TAR archive"); 537 } 538 } 539 540 /** 541 * Get the next record in this tar archive. This will skip 542 * over any remaining data in the current entry, if there 543 * is one, and place the input stream at the header of the 544 * next entry. 545 * 546 * <p>If there are no more entries in the archive, null will be 547 * returned to indicate that the end of the archive has been 548 * reached. At the same time the {@code hasHitEOF} marker will be 549 * set to true.</p> 550 * 551 * @return The next TarEntry in the archive, or null if there is no next entry. 552 * @throws IOException when reading the next TarEntry fails 553 */ 554 private ByteBuffer getRecord() throws IOException { 555 ByteBuffer headerBuf = readRecord(); 556 setAtEOF(isEOFRecord(headerBuf)); 557 if (isAtEOF() && headerBuf != null) { 558 // Consume rest 559 tryToConsumeSecondEOFRecord(); 560 consumeRemainderOfLastBlock(); 561 headerBuf = null; 562 } 563 return headerBuf; 564 } 565 566 /** 567 * Tries to read the next record resetting the position in the 568 * archive if it is not a EOF record. 569 * 570 * <p>This is meant to protect against cases where a tar 571 * implementation has written only one EOF record when two are 572 * expected. Actually this won't help since a non-conforming 573 * implementation likely won't fill full blocks consisting of - by 574 * default - ten records either so we probably have already read 575 * beyond the archive anyway.</p> 576 * 577 * @throws IOException if reading the record of resetting the position in the archive fails 578 */ 579 private void tryToConsumeSecondEOFRecord() throws IOException { 580 boolean shouldReset = true; 581 try { 582 shouldReset = !isEOFRecord(readRecord()); 583 } finally { 584 if (shouldReset) { 585 archive.position(archive.position() - recordSize); 586 } 587 } 588 } 589 590 /** 591 * This method is invoked once the end of the archive is hit, it 592 * tries to consume the remaining bytes under the assumption that 593 * the tool creating this archive has padded the last block. 594 */ 595 private void consumeRemainderOfLastBlock() throws IOException { 596 final long bytesReadOfLastBlock = archive.position() % blockSize; 597 if (bytesReadOfLastBlock > 0) { 598 repositionForwardBy(blockSize - bytesReadOfLastBlock); 599 } 600 } 601 602 /** 603 * Read a record from the input stream and return the data. 604 * 605 * @return The record data or null if EOF has been hit. 606 * @throws IOException if reading from the archive fails 607 */ 608 private ByteBuffer readRecord() throws IOException { 609 recordBuffer.rewind(); 610 final int readNow = archive.read(recordBuffer); 611 if (readNow != recordSize) { 612 return null; 613 } 614 return recordBuffer; 615 } 616 617 /** 618 * Get all TAR Archive Entries from the TarFile 619 * 620 * @return All entries from the tar file 621 */ 622 public List<TarArchiveEntry> getEntries() { 623 return new ArrayList<>(entries); 624 } 625 626 private boolean isEOFRecord(final ByteBuffer headerBuf) { 627 return headerBuf == null || ArchiveUtils.isArrayZero(headerBuf.array(), recordSize); 628 } 629 630 protected final boolean isAtEOF() { 631 return hasHitEOF; 632 } 633 634 protected final void setAtEOF(final boolean b) { 635 hasHitEOF = b; 636 } 637 638 private boolean isDirectory() { 639 return currEntry != null && currEntry.isDirectory(); 640 } 641 642 /** 643 * Gets the input stream for the provided Tar Archive Entry. 644 * @param entry Entry to get the input stream from 645 * @return Input stream of the provided entry 646 * @throws IOException Corrupted TAR archive. Can't read entry. 647 */ 648 public InputStream getInputStream(final TarArchiveEntry entry) throws IOException { 649 try { 650 return new BoundedTarEntryInputStream(entry, archive); 651 } catch (RuntimeException ex) { 652 throw new IOException("Corrupted TAR archive. Can't read entry", ex); 653 } 654 } 655 656 @Override 657 public void close() throws IOException { 658 archive.close(); 659 } 660 661 private final class BoundedTarEntryInputStream extends BoundedArchiveInputStream { 662 663 private final SeekableByteChannel channel; 664 665 private final TarArchiveEntry entry; 666 667 private long entryOffset; 668 669 private int currentSparseInputStreamIndex; 670 671 BoundedTarEntryInputStream(final TarArchiveEntry entry, final SeekableByteChannel channel) throws IOException { 672 super(entry.getDataOffset(), entry.getRealSize()); 673 if (channel.size() - entry.getSize() < entry.getDataOffset()) { 674 throw new IOException("entry size exceeds archive size"); 675 } 676 this.entry = entry; 677 this.channel = channel; 678 } 679 680 @Override 681 protected int read(final long pos, final ByteBuffer buf) throws IOException { 682 if (entryOffset >= entry.getRealSize()) { 683 return -1; 684 } 685 686 final int totalRead; 687 if (entry.isSparse()) { 688 totalRead = readSparse(entryOffset, buf, buf.limit()); 689 } else { 690 totalRead = readArchive(pos, buf); 691 } 692 693 if (totalRead == -1) { 694 if (buf.array().length > 0) { 695 throw new IOException("Truncated TAR archive"); 696 } 697 setAtEOF(true); 698 } else { 699 entryOffset += totalRead; 700 buf.flip(); 701 } 702 return totalRead; 703 } 704 705 private int readSparse(final long pos, final ByteBuffer buf, final int numToRead) throws IOException { 706 // if there are no actual input streams, just read from the original archive 707 final List<InputStream> entrySparseInputStreams = sparseInputStreams.get(entry.getName()); 708 if (entrySparseInputStreams == null || entrySparseInputStreams.isEmpty()) { 709 return readArchive(entry.getDataOffset() + pos, buf); 710 } 711 712 if (currentSparseInputStreamIndex >= entrySparseInputStreams.size()) { 713 return -1; 714 } 715 716 final InputStream currentInputStream = entrySparseInputStreams.get(currentSparseInputStreamIndex); 717 final byte[] bufArray = new byte[numToRead]; 718 final int readLen = currentInputStream.read(bufArray); 719 if (readLen != -1) { 720 buf.put(bufArray, 0, readLen); 721 } 722 723 // if the current input stream is the last input stream, 724 // just return the number of bytes read from current input stream 725 if (currentSparseInputStreamIndex == entrySparseInputStreams.size() - 1) { 726 return readLen; 727 } 728 729 // if EOF of current input stream is meet, open a new input stream and recursively call read 730 if (readLen == -1) { 731 currentSparseInputStreamIndex++; 732 return readSparse(pos, buf, numToRead); 733 } 734 735 // if the rest data of current input stream is not long enough, open a new input stream 736 // and recursively call read 737 if (readLen < numToRead) { 738 currentSparseInputStreamIndex++; 739 final int readLenOfNext = readSparse(pos + readLen, buf, numToRead - readLen); 740 if (readLenOfNext == -1) { 741 return readLen; 742 } 743 744 return readLen + readLenOfNext; 745 } 746 747 // if the rest data of current input stream is enough(which means readLen == len), just return readLen 748 return readLen; 749 } 750 751 private int readArchive(final long pos, final ByteBuffer buf) throws IOException { 752 channel.position(pos); 753 return channel.read(buf); 754 } 755 } 756}