001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018/* 019 * This package is based on the work done by Timothy Gerard Endres 020 * (time@ice.com) to whom the Ant project is very grateful for his great code. 021 */ 022 023package org.apache.commons.compress.archivers.tar; 024 025import java.io.ByteArrayOutputStream; 026import java.io.FileInputStream; 027import java.io.IOException; 028import java.io.InputStream; 029import java.util.ArrayList; 030import java.util.Arrays; 031import java.util.HashMap; 032import java.util.List; 033import java.util.Map; 034 035import org.apache.commons.compress.archivers.ArchiveEntry; 036import org.apache.commons.compress.archivers.ArchiveInputStream; 037import org.apache.commons.compress.archivers.zip.ZipEncoding; 038import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 039import org.apache.commons.compress.utils.ArchiveUtils; 040import org.apache.commons.compress.utils.BoundedInputStream; 041import org.apache.commons.compress.utils.IOUtils; 042 043/** 044 * The TarInputStream reads a UNIX tar archive as an InputStream. methods are provided to position at each successive entry in the archive, and the read each 045 * entry as a normal input stream using read(). 046 * 047 * @NotThreadSafe 048 */ 049public class TarArchiveInputStream extends ArchiveInputStream<TarArchiveEntry> { 050 051 private static final int SMALL_BUFFER_SIZE = 256; 052 053 /** 054 * Checks if the signature matches what is expected for a tar file. 055 * 056 * @param signature the bytes to check 057 * @param length the number of bytes to check 058 * @return true, if this stream is a tar archive stream, false otherwise 059 */ 060 public static boolean matches(final byte[] signature, final int length) { 061 final int versionOffset = TarConstants.VERSION_OFFSET; 062 final int versionLen = TarConstants.VERSIONLEN; 063 if (length < versionOffset + versionLen) { 064 return false; 065 } 066 067 final int magicOffset = TarConstants.MAGIC_OFFSET; 068 final int magicLen = TarConstants.MAGICLEN; 069 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX, signature, magicOffset, magicLen) 070 && ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_POSIX, signature, versionOffset, versionLen)) { 071 return true; 072 } 073 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_GNU, signature, magicOffset, magicLen) 074 && (ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_SPACE, signature, versionOffset, versionLen) 075 || ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_ZERO, signature, versionOffset, versionLen))) { 076 return true; 077 } 078 // COMPRESS-107 - recognize Ant tar files 079 return ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_ANT, signature, magicOffset, magicLen) 080 && ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_ANT, signature, versionOffset, versionLen); 081 } 082 083 private final byte[] smallBuf = new byte[SMALL_BUFFER_SIZE]; 084 085 /** The size the TAR header. */ 086 private final int recordSize; 087 088 /** The buffer to store the TAR header. **/ 089 private final byte[] recordBuffer; 090 091 /** The size of a block. */ 092 private final int blockSize; 093 094 /** True if stream is at EOF. */ 095 private boolean atEof; 096 097 /** Size of the current . */ 098 private long entrySize; 099 100 /** How far into the entry the stream is at. */ 101 private long entryOffset; 102 103 /** Input streams for reading sparse entries. **/ 104 private List<InputStream> sparseInputStreams; 105 106 /** The index of current input stream being read when reading sparse entries. */ 107 private int currentSparseInputStreamIndex; 108 109 /** The meta-data about the current entry. */ 110 private TarArchiveEntry currEntry; 111 112 /** The encoding of the file. */ 113 private final ZipEncoding zipEncoding; 114 115 /** The global PAX header. */ 116 private Map<String, String> globalPaxHeaders = new HashMap<>(); 117 118 /** The global sparse headers, this is only used in PAX Format 0.X. */ 119 private final List<TarArchiveStructSparse> globalSparseHeaders = new ArrayList<>(); 120 121 private final boolean lenient; 122 123 /** 124 * Constructs a new instance. 125 * 126 * @param inputStream the input stream to use 127 */ 128 public TarArchiveInputStream(final InputStream inputStream) { 129 this(inputStream, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE); 130 } 131 132 /** 133 * Constructs a new instance. 134 * 135 * @param inputStream the input stream to use 136 * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be ignored and the fields set to 137 * {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an exception instead. 138 * @since 1.19 139 */ 140 public TarArchiveInputStream(final InputStream inputStream, final boolean lenient) { 141 this(inputStream, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient); 142 } 143 144 /** 145 * Constructs a new instance. 146 * 147 * @param inputStream the input stream to use 148 * @param blockSize the block size to use 149 */ 150 public TarArchiveInputStream(final InputStream inputStream, final int blockSize) { 151 this(inputStream, blockSize, TarConstants.DEFAULT_RCDSIZE); 152 } 153 154 /** 155 * Constructs a new instance. 156 * 157 * @param inputStream the input stream to use 158 * @param blockSize the block size to use 159 * @param recordSize the record size to use 160 */ 161 public TarArchiveInputStream(final InputStream inputStream, final int blockSize, final int recordSize) { 162 this(inputStream, blockSize, recordSize, null); 163 } 164 165 /** 166 * Constructs a new instance. 167 * 168 * @param inputStream the input stream to use 169 * @param blockSize the block size to use 170 * @param recordSize the record size to use 171 * @param encoding name of the encoding to use for file names 172 * @since 1.4 173 */ 174 public TarArchiveInputStream(final InputStream inputStream, final int blockSize, final int recordSize, final String encoding) { 175 this(inputStream, blockSize, recordSize, encoding, false); 176 } 177 178 /** 179 * Constructs a new instance. 180 * 181 * @param inputStream the input stream to use 182 * @param blockSize the block size to use 183 * @param recordSize the record size to use 184 * @param encoding name of the encoding to use for file names 185 * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be ignored and the fields set to 186 * {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an exception instead. 187 * @since 1.19 188 */ 189 public TarArchiveInputStream(final InputStream inputStream, final int blockSize, final int recordSize, final String encoding, final boolean lenient) { 190 super(inputStream, encoding); 191 this.atEof = false; 192 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 193 this.recordSize = recordSize; 194 this.recordBuffer = new byte[recordSize]; 195 this.blockSize = blockSize; 196 this.lenient = lenient; 197 } 198 199 /** 200 * Constructs a new instance. 201 * 202 * @param inputStream the input stream to use 203 * @param blockSize the block size to use 204 * @param encoding name of the encoding to use for file names 205 * @since 1.4 206 */ 207 public TarArchiveInputStream(final InputStream inputStream, final int blockSize, final String encoding) { 208 this(inputStream, blockSize, TarConstants.DEFAULT_RCDSIZE, encoding); 209 } 210 211 /** 212 * Constructs a new instance. 213 * 214 * @param inputStream the input stream to use 215 * @param encoding name of the encoding to use for file names 216 * @since 1.4 217 */ 218 public TarArchiveInputStream(final InputStream inputStream, final String encoding) { 219 this(inputStream, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding); 220 } 221 222 private void applyPaxHeadersToCurrentEntry(final Map<String, String> headers, final List<TarArchiveStructSparse> sparseHeaders) throws IOException { 223 currEntry.updateEntryFromPaxHeaders(headers); 224 currEntry.setSparseHeaders(sparseHeaders); 225 } 226 227 /** 228 * Gets the available data that can be read from the current entry in the archive. This does not indicate how much data is left in the entire archive, only 229 * in the current entry. This value is determined from the entry's size header field and the amount of data already read from the current entry. 230 * Integer.MAX_VALUE is returned in case more than Integer.MAX_VALUE bytes are left in the current entry in the archive. 231 * 232 * @return The number of available bytes for the current entry. 233 * @throws IOException for signature 234 */ 235 @Override 236 public int available() throws IOException { 237 if (isDirectory()) { 238 return 0; 239 } 240 if (currEntry.getRealSize() - entryOffset > Integer.MAX_VALUE) { 241 return Integer.MAX_VALUE; 242 } 243 return (int) (currEntry.getRealSize() - entryOffset); 244 } 245 246 /** 247 * Build the input streams consisting of all-zero input streams and non-zero input streams. When reading from the non-zero input streams, the data is 248 * actually read from the original input stream. The size of each input stream is introduced by the sparse headers. 249 * 250 * NOTE : Some all-zero input streams and non-zero input streams have the size of 0. We DO NOT store the 0 size input streams because they are meaningless. 251 */ 252 private void buildSparseInputStreams() throws IOException { 253 currentSparseInputStreamIndex = -1; 254 sparseInputStreams = new ArrayList<>(); 255 256 final List<TarArchiveStructSparse> sparseHeaders = currEntry.getOrderedSparseHeaders(); 257 258 // Stream doesn't need to be closed at all as it doesn't use any resources 259 final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream(); // NOSONAR 260 // logical offset into the extracted entry 261 long offset = 0; 262 for (final TarArchiveStructSparse sparseHeader : sparseHeaders) { 263 final long zeroBlockSize = sparseHeader.getOffset() - offset; 264 if (zeroBlockSize < 0) { 265 // sparse header says to move backwards inside the extracted entry 266 throw new IOException("Corrupted struct sparse detected"); 267 } 268 269 // only store the zero block if it is not empty 270 if (zeroBlockSize > 0) { 271 sparseInputStreams.add(new BoundedInputStream(zeroInputStream, sparseHeader.getOffset() - offset)); 272 } 273 274 // only store the input streams with non-zero size 275 if (sparseHeader.getNumbytes() > 0) { 276 sparseInputStreams.add(new BoundedInputStream(in, sparseHeader.getNumbytes())); 277 } 278 279 offset = sparseHeader.getOffset() + sparseHeader.getNumbytes(); 280 } 281 282 if (!sparseInputStreams.isEmpty()) { 283 currentSparseInputStreamIndex = 0; 284 } 285 } 286 287 /** 288 * Whether this class is able to read the given entry. 289 * 290 * @return The implementation will return true if the {@link ArchiveEntry} is an instance of {@link TarArchiveEntry} 291 */ 292 @Override 293 public boolean canReadEntryData(final ArchiveEntry ae) { 294 return ae instanceof TarArchiveEntry; 295 } 296 297 /** 298 * Closes this stream. Calls the TarBuffer's close() method. 299 * 300 * @throws IOException on error 301 */ 302 @Override 303 public void close() throws IOException { 304 // Close all the input streams in sparseInputStreams 305 if (sparseInputStreams != null) { 306 for (final InputStream inputStream : sparseInputStreams) { 307 inputStream.close(); 308 } 309 } 310 311 in.close(); 312 } 313 314 /** 315 * This method is invoked once the end of the archive is hit, it tries to consume the remaining bytes under the assumption that the tool creating this 316 * archive has padded the last block. 317 */ 318 private void consumeRemainderOfLastBlock() throws IOException { 319 final long bytesReadOfLastBlock = getBytesRead() % blockSize; 320 if (bytesReadOfLastBlock > 0) { 321 final long skipped = org.apache.commons.io.IOUtils.skip(in, blockSize - bytesReadOfLastBlock); 322 count(skipped); 323 } 324 } 325 326 /** 327 * For FileInputStream, the skip always return the number you input, so we need the available bytes to determine how many bytes are actually skipped 328 * 329 * @param available available bytes returned by inputStream.available() 330 * @param skipped skipped bytes returned by inputStream.skip() 331 * @param expected bytes expected to skip 332 * @return number of bytes actually skipped 333 * @throws IOException if a truncated tar archive is detected 334 */ 335 private long getActuallySkipped(final long available, final long skipped, final long expected) throws IOException { 336 long actuallySkipped = skipped; 337 if (in instanceof FileInputStream) { 338 actuallySkipped = Math.min(skipped, available); 339 } 340 341 if (actuallySkipped != expected) { 342 throw new IOException("Truncated TAR archive"); 343 } 344 345 return actuallySkipped; 346 } 347 348 /** 349 * Gets the current TAR Archive Entry that this input stream is processing 350 * 351 * @return The current Archive Entry 352 */ 353 public TarArchiveEntry getCurrentEntry() { 354 return currEntry; 355 } 356 357 /** 358 * Gets the next entry in this tar archive as long name data. 359 * 360 * @return The next entry in the archive as long name data, or null. 361 * @throws IOException on error 362 */ 363 protected byte[] getLongNameData() throws IOException { 364 // read in the name 365 final ByteArrayOutputStream longName = new ByteArrayOutputStream(); 366 int length = 0; 367 while ((length = read(smallBuf)) >= 0) { 368 longName.write(smallBuf, 0, length); 369 } 370 getNextEntry(); 371 if (currEntry == null) { 372 // Bugzilla: 40334 373 // Malformed tar file - long entry name not followed by entry 374 return null; 375 } 376 byte[] longNameData = longName.toByteArray(); 377 // remove trailing null terminator(s) 378 length = longNameData.length; 379 while (length > 0 && longNameData[length - 1] == 0) { 380 --length; 381 } 382 if (length != longNameData.length) { 383 longNameData = Arrays.copyOf(longNameData, length); 384 } 385 return longNameData; 386 } 387 388 /** 389 * Returns the next Archive Entry in this Stream. 390 * 391 * @return the next entry, or {@code null} if there are no more entries 392 * @throws IOException if the next entry could not be read 393 */ 394 @Override 395 public TarArchiveEntry getNextEntry() throws IOException { 396 return getNextTarEntry(); 397 } 398 399 /** 400 * Gets the next entry in this tar archive. This will skip over any remaining data in the current entry, if there is one, and place the input stream at the 401 * header of the next entry, and read the header and instantiate a new TarEntry from the header bytes and return that entry. If there are no more entries in 402 * the archive, null will be returned to indicate that the end of the archive has been reached. 403 * 404 * @return The next TarEntry in the archive, or null. 405 * @throws IOException on error 406 * @deprecated Use {@link #getNextEntry()}. 407 */ 408 @Deprecated 409 public TarArchiveEntry getNextTarEntry() throws IOException { 410 if (isAtEOF()) { 411 return null; 412 } 413 414 if (currEntry != null) { 415 /* Skip will only go to the end of the current entry */ 416 org.apache.commons.io.IOUtils.skip(this, Long.MAX_VALUE); 417 418 /* skip to the end of the last record */ 419 skipRecordPadding(); 420 } 421 422 final byte[] headerBuf = getRecord(); 423 424 if (headerBuf == null) { 425 /* hit EOF */ 426 currEntry = null; 427 return null; 428 } 429 430 try { 431 currEntry = new TarArchiveEntry(globalPaxHeaders, headerBuf, zipEncoding, lenient); 432 } catch (final IllegalArgumentException e) { 433 throw new IOException("Error detected parsing the header", e); 434 } 435 436 entryOffset = 0; 437 entrySize = currEntry.getSize(); 438 439 if (currEntry.isGNULongLinkEntry()) { 440 final byte[] longLinkData = getLongNameData(); 441 if (longLinkData == null) { 442 // Bugzilla: 40334 443 // Malformed tar file - long link entry name not followed by 444 // entry 445 return null; 446 } 447 currEntry.setLinkName(zipEncoding.decode(longLinkData)); 448 } 449 450 if (currEntry.isGNULongNameEntry()) { 451 final byte[] longNameData = getLongNameData(); 452 if (longNameData == null) { 453 // Bugzilla: 40334 454 // Malformed tar file - long entry name not followed by 455 // entry 456 return null; 457 } 458 459 // COMPRESS-509 : the name of directories should end with '/' 460 final String name = zipEncoding.decode(longNameData); 461 currEntry.setName(name); 462 if (currEntry.isDirectory() && !name.endsWith("/")) { 463 currEntry.setName(name + "/"); 464 } 465 } 466 467 if (currEntry.isGlobalPaxHeader()) { // Process Global Pax headers 468 readGlobalPaxHeaders(); 469 } 470 471 try { 472 if (currEntry.isPaxHeader()) { // Process Pax headers 473 paxHeaders(); 474 } else if (!globalPaxHeaders.isEmpty()) { 475 applyPaxHeadersToCurrentEntry(globalPaxHeaders, globalSparseHeaders); 476 } 477 } catch (final NumberFormatException e) { 478 throw new IOException("Error detected parsing the pax header", e); 479 } 480 481 if (currEntry.isOldGNUSparse()) { // Process sparse files 482 readOldGNUSparse(); 483 } 484 485 // If the size of the next element in the archive has changed 486 // due to a new size being reported in the posix header 487 // information, we update entrySize here so that it contains 488 // the correct value. 489 entrySize = currEntry.getSize(); 490 491 return currEntry; 492 } 493 494 /** 495 * Gets the next record in this tar archive. This will skip over any remaining data in the current entry, if there is one, and place the input stream at the 496 * header of the next entry. 497 * 498 * <p> 499 * If there are no more entries in the archive, null will be returned to indicate that the end of the archive has been reached. At the same time the 500 * {@code hasHitEOF} marker will be set to true. 501 * </p> 502 * 503 * @return The next header in the archive, or null. 504 * @throws IOException on error 505 */ 506 private byte[] getRecord() throws IOException { 507 byte[] headerBuf = readRecord(); 508 setAtEOF(isEOFRecord(headerBuf)); 509 if (isAtEOF() && headerBuf != null) { 510 tryToConsumeSecondEOFRecord(); 511 consumeRemainderOfLastBlock(); 512 headerBuf = null; 513 } 514 return headerBuf; 515 } 516 517 /** 518 * Gets the record size being used by this stream's buffer. 519 * 520 * @return The TarBuffer record size. 521 */ 522 public int getRecordSize() { 523 return recordSize; 524 } 525 526 protected final boolean isAtEOF() { 527 return atEof; 528 } 529 530 private boolean isDirectory() { 531 return currEntry != null && currEntry.isDirectory(); 532 } 533 534 /** 535 * Determine if an archive record indicate End of Archive. End of archive is indicated by a record that consists entirely of null bytes. 536 * 537 * @param record The record data to check. 538 * @return true if the record data is an End of Archive 539 */ 540 protected boolean isEOFRecord(final byte[] record) { 541 return record == null || ArchiveUtils.isArrayZero(record, recordSize); 542 } 543 544 /** 545 * Since we do not support marking just yet, we do nothing. 546 * 547 * @param markLimit The limit to mark. 548 */ 549 @Override 550 public synchronized void mark(final int markLimit) { 551 } 552 553 /** 554 * Since we do not support marking just yet, we return false. 555 * 556 * @return False. 557 */ 558 @Override 559 public boolean markSupported() { 560 return false; 561 } 562 563 /** 564 * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) may appear multi times, and they look like: 565 * 566 * GNU.sparse.size=size GNU.sparse.numblocks=numblocks repeat numblocks times GNU.sparse.offset=offset GNU.sparse.numbytes=numbytes end repeat 567 * 568 * 569 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 570 * 571 * GNU.sparse.map Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 572 * 573 * 574 * For PAX Format 1.X: The sparse map itself is stored in the file data block, preceding the actual file data. It consists of a series of decimal numbers 575 * delimited by newlines. The map is padded with nulls to the nearest block boundary. The first number gives the number of entries in the map. Following are 576 * map entries, each one consisting of two numbers giving the offset and size of the data block it describes. 577 * 578 * @throws IOException 579 */ 580 private void paxHeaders() throws IOException { 581 List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 582 final Map<String, String> headers = TarUtils.parsePaxHeaders(this, sparseHeaders, globalPaxHeaders, entrySize); 583 584 // for 0.1 PAX Headers 585 if (headers.containsKey(TarGnuSparseKeys.MAP)) { 586 sparseHeaders = new ArrayList<>(TarUtils.parseFromPAX01SparseHeaders(headers.get(TarGnuSparseKeys.MAP))); 587 } 588 getNextEntry(); // Get the actual file entry 589 if (currEntry == null) { 590 throw new IOException("premature end of tar archive. Didn't find any entry after PAX header."); 591 } 592 applyPaxHeadersToCurrentEntry(headers, sparseHeaders); 593 594 // for 1.0 PAX Format, the sparse map is stored in the file data block 595 if (currEntry.isPaxGNU1XSparse()) { 596 sparseHeaders = TarUtils.parsePAX1XSparseHeaders(in, recordSize); 597 currEntry.setSparseHeaders(sparseHeaders); 598 } 599 600 // sparse headers are all done reading, we need to build 601 // sparse input streams using these sparse headers 602 buildSparseInputStreams(); 603 } 604 605 /** 606 * Reads bytes from the current tar archive entry. 607 * 608 * This method is aware of the boundaries of the current entry in the archive and will deal with them as if they were this stream's start and EOF. 609 * 610 * @param buf The buffer into which to place bytes read. 611 * @param offset The offset at which to place bytes read. 612 * @param numToRead The number of bytes to read. 613 * @return The number of bytes read, or -1 at EOF. 614 * @throws IOException on error 615 */ 616 @Override 617 public int read(final byte[] buf, final int offset, int numToRead) throws IOException { 618 if (numToRead == 0) { 619 return 0; 620 } 621 int totalRead = 0; 622 623 if (isAtEOF() || isDirectory()) { 624 return -1; 625 } 626 627 if (currEntry == null) { 628 throw new IllegalStateException("No current tar entry"); 629 } 630 631 if (entryOffset >= currEntry.getRealSize()) { 632 return -1; 633 } 634 635 numToRead = Math.min(numToRead, available()); 636 637 if (currEntry.isSparse()) { 638 // for sparse entries, we need to read them in another way 639 totalRead = readSparse(buf, offset, numToRead); 640 } else { 641 totalRead = in.read(buf, offset, numToRead); 642 } 643 644 if (totalRead == -1) { 645 if (numToRead > 0) { 646 throw new IOException("Truncated TAR archive"); 647 } 648 setAtEOF(true); 649 } else { 650 count(totalRead); 651 entryOffset += totalRead; 652 } 653 654 return totalRead; 655 } 656 657 private void readGlobalPaxHeaders() throws IOException { 658 globalPaxHeaders = TarUtils.parsePaxHeaders(this, globalSparseHeaders, globalPaxHeaders, entrySize); 659 getNextEntry(); // Get the actual file entry 660 661 if (currEntry == null) { 662 throw new IOException("Error detected parsing the pax header"); 663 } 664 } 665 666 /** 667 * Adds the sparse chunks from the current entry to the sparse chunks, including any additional sparse entries following the current entry. 668 * 669 * @throws IOException on error 670 */ 671 private void readOldGNUSparse() throws IOException { 672 if (currEntry.isExtended()) { 673 TarArchiveSparseEntry entry; 674 do { 675 final byte[] headerBuf = getRecord(); 676 if (headerBuf == null) { 677 throw new IOException("premature end of tar archive. Didn't find extended_header after header with extended flag."); 678 } 679 entry = new TarArchiveSparseEntry(headerBuf); 680 currEntry.getSparseHeaders().addAll(entry.getSparseHeaders()); 681 } while (entry.isExtended()); 682 } 683 684 // sparse headers are all done reading, we need to build 685 // sparse input streams using these sparse headers 686 buildSparseInputStreams(); 687 } 688 689 /** 690 * Read a record from the input stream and return the data. 691 * 692 * @return The record data or null if EOF has been hit. 693 * @throws IOException on error 694 */ 695 protected byte[] readRecord() throws IOException { 696 final int readNow = IOUtils.readFully(in, recordBuffer); 697 count(readNow); 698 if (readNow != recordSize) { 699 return null; 700 } 701 702 return recordBuffer; 703 } 704 705 /** 706 * For sparse tar entries, there are many "holes"(consisting of all 0) in the file. Only the non-zero data is stored in tar files, and they are stored 707 * separately. The structure of non-zero data is introduced by the sparse headers using the offset, where a block of non-zero data starts, and numbytes, the 708 * length of the non-zero data block. When reading sparse entries, the actual data is read out with "holes" and non-zero data combined together according to 709 * the sparse headers. 710 * 711 * @param buf The buffer into which to place bytes read. 712 * @param offset The offset at which to place bytes read. 713 * @param numToRead The number of bytes to read. 714 * @return The number of bytes read, or -1 at EOF. 715 * @throws IOException on error 716 */ 717 private int readSparse(final byte[] buf, final int offset, final int numToRead) throws IOException { 718 // if there are no actual input streams, just read from the original input stream 719 if (sparseInputStreams == null || sparseInputStreams.isEmpty()) { 720 return in.read(buf, offset, numToRead); 721 } 722 723 if (currentSparseInputStreamIndex >= sparseInputStreams.size()) { 724 return -1; 725 } 726 727 final InputStream currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex); 728 final int readLen = currentInputStream.read(buf, offset, numToRead); 729 730 // if the current input stream is the last input stream, 731 // just return the number of bytes read from current input stream 732 if (currentSparseInputStreamIndex == sparseInputStreams.size() - 1) { 733 return readLen; 734 } 735 736 // if EOF of current input stream is meet, open a new input stream and recursively call read 737 if (readLen == -1) { 738 currentSparseInputStreamIndex++; 739 return readSparse(buf, offset, numToRead); 740 } 741 742 // if the rest data of current input stream is not long enough, open a new input stream 743 // and recursively call read 744 if (readLen < numToRead) { 745 currentSparseInputStreamIndex++; 746 final int readLenOfNext = readSparse(buf, offset + readLen, numToRead - readLen); 747 if (readLenOfNext == -1) { 748 return readLen; 749 } 750 751 return readLen + readLenOfNext; 752 } 753 754 // if the rest data of current input stream is enough(which means readLen == len), just return readLen 755 return readLen; 756 } 757 758 /** 759 * Since we do not support marking just yet, we do nothing. 760 */ 761 @Override 762 public synchronized void reset() { 763 } 764 765 protected final void setAtEOF(final boolean b) { 766 atEof = b; 767 } 768 769 protected final void setCurrentEntry(final TarArchiveEntry e) { 770 currEntry = e; 771 } 772 773 /** 774 * Skips over and discards {@code n} bytes of data from this input stream. The {@code skip} method may, for a variety of reasons, end up skipping over some 775 * smaller number of bytes, possibly {@code 0}. This may result from any of a number of conditions; reaching end of file or end of entry before {@code n} 776 * bytes have been skipped; are only two possibilities. The actual number of bytes skipped is returned. If {@code n} is negative, no bytes are skipped. 777 * 778 * 779 * @param n the number of bytes to be skipped. 780 * @return the actual number of bytes skipped. 781 * @throws IOException if a truncated tar archive is detected or some other I/O error occurs 782 */ 783 @Override 784 public long skip(final long n) throws IOException { 785 if (n <= 0 || isDirectory()) { 786 return 0; 787 } 788 789 final long availableOfInputStream = in.available(); 790 final long available = currEntry.getRealSize() - entryOffset; 791 final long numToSkip = Math.min(n, available); 792 long skipped; 793 794 if (!currEntry.isSparse()) { 795 skipped = org.apache.commons.io.IOUtils.skip(in, numToSkip); 796 // for non-sparse entry, we should get the bytes actually skipped bytes along with 797 // inputStream.available() if inputStream is instance of FileInputStream 798 skipped = getActuallySkipped(availableOfInputStream, skipped, numToSkip); 799 } else { 800 skipped = skipSparse(numToSkip); 801 } 802 803 count(skipped); 804 entryOffset += skipped; 805 return skipped; 806 } 807 808 /** 809 * The last record block should be written at the full size, so skip any additional space used to fill a record after an entry. 810 * 811 * @throws IOException if a truncated tar archive is detected 812 */ 813 private void skipRecordPadding() throws IOException { 814 if (!isDirectory() && this.entrySize > 0 && this.entrySize % this.recordSize != 0) { 815 final long available = in.available(); 816 final long numRecords = this.entrySize / this.recordSize + 1; 817 final long padding = numRecords * this.recordSize - this.entrySize; 818 long skipped = org.apache.commons.io.IOUtils.skip(in, padding); 819 820 skipped = getActuallySkipped(available, skipped, padding); 821 822 count(skipped); 823 } 824 } 825 826 /** 827 * Skip n bytes from current input stream, if the current input stream doesn't have enough data to skip, jump to the next input stream and skip the rest 828 * bytes, keep doing this until total n bytes are skipped or the input streams are all skipped 829 * 830 * @param n bytes of data to skip 831 * @return actual bytes of data skipped 832 * @throws IOException 833 */ 834 private long skipSparse(final long n) throws IOException { 835 if (sparseInputStreams == null || sparseInputStreams.isEmpty()) { 836 return in.skip(n); 837 } 838 839 long bytesSkipped = 0; 840 841 while (bytesSkipped < n && currentSparseInputStreamIndex < sparseInputStreams.size()) { 842 final InputStream currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex); 843 bytesSkipped += currentInputStream.skip(n - bytesSkipped); 844 845 if (bytesSkipped < n) { 846 currentSparseInputStreamIndex++; 847 } 848 } 849 850 return bytesSkipped; 851 } 852 853 /** 854 * Tries to read the next record rewinding the stream if it is not an EOF record. 855 * 856 * <p> 857 * This is meant to protect against cases where a tar implementation has written only one EOF record when two are expected. Actually this won't help since a 858 * non-conforming implementation likely won't fill full blocks consisting of - by default - ten records either so we probably have already read beyond the 859 * archive anyway. 860 * </p> 861 */ 862 private void tryToConsumeSecondEOFRecord() throws IOException { 863 boolean shouldReset = true; 864 final boolean marked = in.markSupported(); 865 if (marked) { 866 in.mark(recordSize); 867 } 868 try { 869 shouldReset = !isEOFRecord(readRecord()); 870 } finally { 871 if (shouldReset && marked) { 872 pushedBackBytes(recordSize); 873 in.reset(); 874 } 875 } 876 } 877}