001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.tar; 020 021import java.io.ByteArrayOutputStream; 022import java.io.IOException; 023import java.io.InputStream; 024import java.io.UncheckedIOException; 025import java.math.BigInteger; 026import java.nio.ByteBuffer; 027import java.nio.charset.Charset; 028import java.nio.charset.StandardCharsets; 029import java.util.ArrayList; 030import java.util.Collections; 031import java.util.HashMap; 032import java.util.List; 033import java.util.Map; 034 035import org.apache.commons.compress.archivers.zip.ZipEncoding; 036import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 037import org.apache.commons.compress.utils.CharsetNames; 038import org.apache.commons.compress.utils.IOUtils; 039import org.apache.commons.compress.utils.ParsingUtils; 040 041/** 042 * This class provides static utility methods to work with byte streams. 043 * 044 * @Immutable 045 */ 046// CheckStyle:HideUtilityClassConstructorCheck OFF (bc) 047public class TarUtils { 048 049 private static final int BYTE_MASK = 255; 050 051 static final ZipEncoding DEFAULT_ENCODING = ZipEncodingHelper.getZipEncoding(Charset.defaultCharset()); 052 053 /** 054 * Encapsulates the algorithms used up to Commons Compress 1.3 as ZipEncoding. 055 */ 056 static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() { 057 @Override 058 public boolean canEncode(final String name) { 059 return true; 060 } 061 062 @Override 063 public String decode(final byte[] buffer) { 064 final int length = buffer.length; 065 final StringBuilder result = new StringBuilder(length); 066 067 for (final byte b : buffer) { 068 if (b == 0) { // Trailing null 069 break; 070 } 071 result.append((char) (b & 0xFF)); // Allow for sign-extension 072 } 073 074 return result.toString(); 075 } 076 077 @Override 078 public ByteBuffer encode(final String name) { 079 final int length = name.length(); 080 final byte[] buf = new byte[length]; 081 082 // copy until end of input or output is reached. 083 for (int i = 0; i < length; ++i) { 084 buf[i] = (byte) name.charAt(i); 085 } 086 return ByteBuffer.wrap(buf); 087 } 088 }; 089 090 /** 091 * Computes the checksum of a tar entry header. 092 * 093 * @param buf The tar entry's header buffer. 094 * @return The computed checksum. 095 */ 096 public static long computeCheckSum(final byte[] buf) { 097 long sum = 0; 098 099 for (final byte element : buf) { 100 sum += BYTE_MASK & element; 101 } 102 103 return sum; 104 } 105 106 // Helper method to generate the exception message 107 private static String exceptionMessage(final byte[] buffer, final int offset, final int length, final int current, final byte currentByte) { 108 // default charset is good enough for an exception message, 109 // 110 // the alternative was to modify parseOctal and 111 // parseOctalOrBinary to receive the ZipEncoding of the 112 // archive (deprecating the existing public methods, of 113 // course) and dealing with the fact that ZipEncoding#decode 114 // can throw an IOException which parseOctal* doesn't declare 115 String string = new String(buffer, offset, length, Charset.defaultCharset()); 116 117 string = string.replace("\0", "{NUL}"); // Replace NULs to allow string to be printed 118 return "Invalid byte " + currentByte + " at offset " + (current - offset) + " in '" + string + "' len=" + length; 119 } 120 121 private static void formatBigIntegerBinary(final long value, final byte[] buf, final int offset, final int length, final boolean negative) { 122 final BigInteger val = BigInteger.valueOf(value); 123 final byte[] b = val.toByteArray(); 124 final int len = b.length; 125 if (len > length - 1) { 126 throw new IllegalArgumentException("Value " + value + " is too large for " + length + " byte field."); 127 } 128 final int off = offset + length - len; 129 System.arraycopy(b, 0, buf, off, len); 130 final byte fill = (byte) (negative ? 0xff : 0); 131 for (int i = offset + 1; i < off; i++) { 132 buf[i] = fill; 133 } 134 } 135 136 /** 137 * Writes an octal value into a buffer. 138 * 139 * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by NUL and then 140 * space. 141 * 142 * @param value The value to convert 143 * @param buf The destination buffer 144 * @param offset The starting offset into the buffer. 145 * @param length The size of the buffer. 146 * @return The updated value of offset, i.e. offset+length 147 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 148 */ 149 public static int formatCheckSumOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 150 151 int idx = length - 2; // for NUL and space 152 formatUnsignedOctalString(value, buf, offset, idx); 153 154 buf[offset + idx++] = 0; // Trailing null 155 buf[offset + idx] = (byte) ' '; // Trailing space 156 157 return offset + length; 158 } 159 160 private static void formatLongBinary(final long value, final byte[] buf, final int offset, final int length, final boolean negative) { 161 final int bits = (length - 1) * 8; 162 final long max = 1L << bits; 163 long val = Math.abs(value); // Long.MIN_VALUE stays Long.MIN_VALUE 164 if (val < 0 || val >= max) { 165 throw new IllegalArgumentException("Value " + value + " is too large for " + length + " byte field."); 166 } 167 if (negative) { 168 val ^= max - 1; 169 val++; 170 val |= 0xffL << bits; 171 } 172 for (int i = offset + length - 1; i >= offset; i--) { 173 buf[i] = (byte) val; 174 val >>= 8; 175 } 176 } 177 178 /** 179 * Writes an octal long integer into a buffer. 180 * 181 * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by a space. 182 * 183 * @param value The value to write as octal 184 * @param buf The destinationbuffer. 185 * @param offset The starting offset into the buffer. 186 * @param length The length of the buffer 187 * @return The updated offset 188 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 189 */ 190 public static int formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 191 192 final int idx = length - 1; // For space 193 194 formatUnsignedOctalString(value, buf, offset, idx); 195 buf[offset + idx] = (byte) ' '; // Trailing space 196 197 return offset + length; 198 } 199 200 /** 201 * Writes a long integer into a buffer as an octal string if this will fit, or as a binary number otherwise. 202 * 203 * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by a space. 204 * 205 * @param value The value to write into the buffer. 206 * @param buf The destination buffer. 207 * @param offset The starting offset into the buffer. 208 * @param length The length of the buffer. 209 * @return The updated offset. 210 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer. 211 * @since 1.4 212 */ 213 public static int formatLongOctalOrBinaryBytes(final long value, final byte[] buf, final int offset, final int length) { 214 215 // Check whether we are dealing with UID/GID or SIZE field 216 final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE; 217 218 final boolean negative = value < 0; 219 if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars 220 return formatLongOctalBytes(value, buf, offset, length); 221 } 222 223 if (length < 9) { 224 formatLongBinary(value, buf, offset, length, negative); 225 } else { 226 formatBigIntegerBinary(value, buf, offset, length, negative); 227 } 228 229 buf[offset] = (byte) (negative ? 0xff : 0x80); 230 return offset + length; 231 } 232 233 /** 234 * Copies a name into a buffer. Copies characters from the name into the buffer starting at the specified offset. If the buffer is longer than the name, the 235 * buffer is filled with trailing NULs. If the name is longer than the buffer, the output is truncated. 236 * 237 * @param name The header name from which to copy the characters. 238 * @param buf The buffer where the name is to be stored. 239 * @param offset The starting offset into the buffer 240 * @param length The maximum number of header bytes to copy. 241 * @return The updated offset, i.e. offset + length 242 */ 243 public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length) { 244 try { 245 return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING); 246 } catch (final IOException ex) { // NOSONAR 247 try { 248 return formatNameBytes(name, buf, offset, length, FALLBACK_ENCODING); 249 } catch (final IOException ex2) { 250 // impossible 251 throw new UncheckedIOException(ex2); // NOSONAR 252 } 253 } 254 } 255 256 /** 257 * Copies a name into a buffer. Copies characters from the name into the buffer starting at the specified offset. If the buffer is longer than the name, the 258 * buffer is filled with trailing NULs. If the name is longer than the buffer, the output is truncated. 259 * 260 * @param name The header name from which to copy the characters. 261 * @param buf The buffer where the name is to be stored. 262 * @param offset The starting offset into the buffer 263 * @param length The maximum number of header bytes to copy. 264 * @param encoding name of the encoding to use for file names 265 * @since 1.4 266 * @return The updated offset, i.e. offset + length 267 * @throws IOException on error 268 */ 269 public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length, final ZipEncoding encoding) throws IOException { 270 int len = name.length(); 271 ByteBuffer b = encoding.encode(name); 272 while (b.limit() > length && len > 0) { 273 b = encoding.encode(name.substring(0, --len)); 274 } 275 final int limit = b.limit() - b.position(); 276 System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit); 277 278 // Pad any remaining output bytes with NUL 279 for (int i = limit; i < length; ++i) { 280 buf[offset + i] = 0; 281 } 282 283 return offset + length; 284 } 285 286 /** 287 * Writes an octal integer into a buffer. 288 * 289 * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by space and NUL 290 * 291 * @param value The value to write 292 * @param buf The buffer to receive the output 293 * @param offset The starting offset into the buffer 294 * @param length The size of the output buffer 295 * @return The updated offset, i.e. offset+length 296 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 297 */ 298 public static int formatOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 299 300 int idx = length - 2; // For space and trailing null 301 formatUnsignedOctalString(value, buf, offset, idx); 302 303 buf[offset + idx++] = (byte) ' '; // Trailing space 304 buf[offset + idx] = 0; // Trailing null 305 306 return offset + length; 307 } 308 309 /** 310 * Fills a buffer with unsigned octal number, padded with leading zeroes. 311 * 312 * @param value number to convert to octal - treated as unsigned 313 * @param buffer destination buffer 314 * @param offset starting offset in buffer 315 * @param length length of buffer to fill 316 * @throws IllegalArgumentException if the value will not fit in the buffer 317 */ 318 public static void formatUnsignedOctalString(final long value, final byte[] buffer, final int offset, final int length) { 319 int remaining = length; 320 remaining--; 321 if (value == 0) { 322 buffer[offset + remaining--] = (byte) '0'; 323 } else { 324 long val = value; 325 for (; remaining >= 0 && val != 0; --remaining) { 326 // CheckStyle:MagicNumber OFF 327 buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7)); 328 val = val >>> 3; 329 // CheckStyle:MagicNumber ON 330 } 331 if (val != 0) { 332 throw new IllegalArgumentException(value + "=" + Long.toOctalString(value) + " will not fit in octal number buffer of length " + length); 333 } 334 } 335 336 for (; remaining >= 0; --remaining) { // leading zeros 337 buffer[offset + remaining] = (byte) '0'; 338 } 339 } 340 341 private static long parseBinaryBigInteger(final byte[] buffer, final int offset, final int length, final boolean negative) { 342 final byte[] remainder = new byte[length - 1]; 343 System.arraycopy(buffer, offset + 1, remainder, 0, length - 1); 344 BigInteger val = new BigInteger(remainder); 345 if (negative) { 346 // 2's complement 347 val = val.add(BigInteger.valueOf(-1)).not(); 348 } 349 if (val.bitLength() > 63) { 350 throw new IllegalArgumentException("At offset " + offset + ", " + length + " byte binary number" + " exceeds maximum signed long" + " value"); 351 } 352 return negative ? -val.longValue() : val.longValue(); 353 } 354 355 private static long parseBinaryLong(final byte[] buffer, final int offset, final int length, final boolean negative) { 356 if (length >= 9) { 357 throw new IllegalArgumentException("At offset " + offset + ", " + length + " byte binary number" + " exceeds maximum signed long" + " value"); 358 } 359 long val = 0; 360 for (int i = 1; i < length; i++) { 361 val = (val << 8) + (buffer[offset + i] & 0xff); 362 } 363 if (negative) { 364 // 2's complement 365 val--; 366 val ^= (long) Math.pow(2.0, (length - 1) * 8.0) - 1; 367 } 368 return negative ? -val : val; 369 } 370 371 /** 372 * Parses a boolean byte from a buffer. Leading spaces and NUL are ignored. The buffer may contain trailing spaces or NULs. 373 * 374 * @param buffer The buffer from which to parse. 375 * @param offset The offset into the buffer from which to parse. 376 * @return The boolean value of the bytes. 377 * @throws IllegalArgumentException if an invalid byte is detected. 378 */ 379 public static boolean parseBoolean(final byte[] buffer, final int offset) { 380 return buffer[offset] == 1; 381 } 382 383 /** 384 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map GNU.sparse.map Map of non-null data chunks. It is a string 385 * consisting of comma-separated values "offset,size[,offset-1,size-1...]" 386 * 387 * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 388 * @return unmodifiable list of sparse headers parsed from sparse map 389 * @throws IOException Corrupted TAR archive. 390 * @since 1.21 391 */ 392 protected static List<TarArchiveStructSparse> parseFromPAX01SparseHeaders(final String sparseMap) throws IOException { 393 final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 394 final String[] sparseHeaderStrings = sparseMap.split(","); 395 if (sparseHeaderStrings.length % 2 == 1) { 396 throw new IOException("Corrupted TAR archive. Bad format in GNU.sparse.map PAX Header"); 397 } 398 399 for (int i = 0; i < sparseHeaderStrings.length; i += 2) { 400 final long sparseOffset = ParsingUtils.parseLongValue(sparseHeaderStrings[i]); 401 if (sparseOffset < 0) { 402 throw new IOException("Corrupted TAR archive." + " Sparse struct offset contains negative value"); 403 } 404 final long sparseNumbytes = ParsingUtils.parseLongValue(sparseHeaderStrings[i + 1]); 405 if (sparseNumbytes < 0) { 406 throw new IOException("Corrupted TAR archive." + " Sparse struct numbytes contains negative value"); 407 } 408 sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes)); 409 } 410 411 return Collections.unmodifiableList(sparseHeaders); 412 } 413 414 /** 415 * Parses an entry name from a buffer. Parsing stops when a NUL is found or the buffer length is reached. 416 * 417 * @param buffer The buffer from which to parse. 418 * @param offset The offset into the buffer from which to parse. 419 * @param length The maximum number of bytes to parse. 420 * @return The entry name. 421 */ 422 public static String parseName(final byte[] buffer, final int offset, final int length) { 423 try { 424 return parseName(buffer, offset, length, DEFAULT_ENCODING); 425 } catch (final IOException ex) { // NOSONAR 426 try { 427 return parseName(buffer, offset, length, FALLBACK_ENCODING); 428 } catch (final IOException ex2) { 429 // impossible 430 throw new UncheckedIOException(ex2); // NOSONAR 431 } 432 } 433 } 434 435 /** 436 * Parses an entry name from a buffer. Parsing stops when a NUL is found or the buffer length is reached. 437 * 438 * @param buffer The buffer from which to parse. 439 * @param offset The offset into the buffer from which to parse. 440 * @param length The maximum number of bytes to parse. 441 * @param encoding name of the encoding to use for file names 442 * @since 1.4 443 * @return The entry name. 444 * @throws IOException on error 445 */ 446 public static String parseName(final byte[] buffer, final int offset, final int length, final ZipEncoding encoding) throws IOException { 447 448 int len = 0; 449 for (int i = offset; len < length && buffer[i] != 0; i++) { 450 len++; 451 } 452 if (len > 0) { 453 final byte[] b = new byte[len]; 454 System.arraycopy(buffer, offset, b, 0, len); 455 return encoding.decode(b); 456 } 457 return ""; 458 } 459 460 /** 461 * Parses an octal string from a buffer. 462 * 463 * <p> 464 * Leading spaces are ignored. The buffer must contain a trailing space or NUL, and may contain an additional trailing space or NUL. 465 * </p> 466 * 467 * <p> 468 * The input buffer is allowed to contain all NULs, in which case the method returns 0L (this allows for missing fields). 469 * </p> 470 * 471 * <p> 472 * To work-around some tar implementations that insert a leading NUL this method returns 0 if it detects a leading NUL since Commons Compress 1.4. 473 * </p> 474 * 475 * @param buffer The buffer from which to parse. 476 * @param offset The offset into the buffer from which to parse. 477 * @param length The maximum number of bytes to parse - must be at least 2 bytes. 478 * @return The long value of the octal string. 479 * @throws IllegalArgumentException if the trailing space/NUL is missing or if an invalid byte is detected. 480 */ 481 public static long parseOctal(final byte[] buffer, final int offset, final int length) { 482 long result = 0; 483 int end = offset + length; 484 int start = offset; 485 486 if (length < 2) { 487 throw new IllegalArgumentException("Length " + length + " must be at least 2"); 488 } 489 490 if (buffer[start] == 0) { 491 return 0L; 492 } 493 494 // Skip leading spaces 495 while (start < end) { 496 if (buffer[start] != ' ') { 497 break; 498 } 499 start++; 500 } 501 502 // Trim all trailing NULs and spaces. 503 // The ustar and POSIX tar specs require a trailing NUL or 504 // space but some implementations use the extra digit for big 505 // sizes/uids/gids ... 506 byte trailer = buffer[end - 1]; 507 while (start < end && (trailer == 0 || trailer == ' ')) { 508 end--; 509 trailer = buffer[end - 1]; 510 } 511 512 for (; start < end; start++) { 513 final byte currentByte = buffer[start]; 514 // CheckStyle:MagicNumber OFF 515 if (currentByte < '0' || currentByte > '7') { 516 throw new IllegalArgumentException(exceptionMessage(buffer, offset, length, start, currentByte)); 517 } 518 result = (result << 3) + (currentByte - '0'); // convert from ASCII 519 // CheckStyle:MagicNumber ON 520 } 521 522 return result; 523 } 524 525 /** 526 * Computes the value contained in a byte buffer. If the most significant bit of the first byte in the buffer is set, this bit is ignored and the rest of 527 * the buffer is interpreted as a binary number. Otherwise, the buffer is interpreted as an octal number as per the parseOctal function above. 528 * 529 * @param buffer The buffer from which to parse. 530 * @param offset The offset into the buffer from which to parse. 531 * @param length The maximum number of bytes to parse. 532 * @return The long value of the octal or binary string. 533 * @throws IllegalArgumentException if the trailing space/NUL is missing or an invalid byte is detected in an octal number, or if a binary number would 534 * exceed the size of a signed long 64-bit integer. 535 * @since 1.4 536 */ 537 public static long parseOctalOrBinary(final byte[] buffer, final int offset, final int length) { 538 539 if ((buffer[offset] & 0x80) == 0) { 540 return parseOctal(buffer, offset, length); 541 } 542 final boolean negative = buffer[offset] == (byte) 0xff; 543 if (length < 9) { 544 return parseBinaryLong(buffer, offset, length, negative); 545 } 546 return parseBinaryBigInteger(buffer, offset, length, negative); 547 } 548 549 /** 550 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 551 * 552 * <p> 553 * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 554 * </p> 555 * <p> 556 * Will internally invoke {@link #parseFromPAX01SparseHeaders} and map IOExceptions to a RzuntimeException, You should use 557 * {@link #parseFromPAX01SparseHeaders} directly instead. 558 * </p> 559 * 560 * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 561 * @return sparse headers parsed from sparse map 562 * @deprecated use #parseFromPAX01SparseHeaders instead 563 */ 564 @Deprecated 565 protected static List<TarArchiveStructSparse> parsePAX01SparseHeaders(final String sparseMap) { 566 try { 567 return parseFromPAX01SparseHeaders(sparseMap); 568 } catch (final IOException ex) { 569 throw new UncheckedIOException(ex.getMessage(), ex); 570 } 571 } 572 573 /** 574 * For PAX Format 1.X: The sparse map itself is stored in the file data block, preceding the actual file data. It consists of a series of decimal numbers 575 * delimited by newlines. The map is padded with nulls to the nearest block boundary. The first number gives the number of entries in the map. Following are 576 * map entries, each one consisting of two numbers giving the offset and size of the data block it describes. 577 * 578 * @param inputStream parsing source. 579 * @param recordSize The size the TAR header 580 * @return sparse headers 581 * @throws IOException if an I/O error occurs. 582 */ 583 protected static List<TarArchiveStructSparse> parsePAX1XSparseHeaders(final InputStream inputStream, final int recordSize) throws IOException { 584 // for 1.X PAX Headers 585 final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 586 long bytesRead = 0; 587 588 long[] readResult = readLineOfNumberForPax1X(inputStream); 589 long sparseHeadersCount = readResult[0]; 590 if (sparseHeadersCount < 0) { 591 // overflow while reading number? 592 throw new IOException("Corrupted TAR archive. Negative value in sparse headers block"); 593 } 594 bytesRead += readResult[1]; 595 while (sparseHeadersCount-- > 0) { 596 readResult = readLineOfNumberForPax1X(inputStream); 597 final long sparseOffset = readResult[0]; 598 if (sparseOffset < 0) { 599 throw new IOException("Corrupted TAR archive." + " Sparse header block offset contains negative value"); 600 } 601 bytesRead += readResult[1]; 602 603 readResult = readLineOfNumberForPax1X(inputStream); 604 final long sparseNumbytes = readResult[0]; 605 if (sparseNumbytes < 0) { 606 throw new IOException("Corrupted TAR archive." + " Sparse header block numbytes contains negative value"); 607 } 608 bytesRead += readResult[1]; 609 sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes)); 610 } 611 612 // skip the rest of this record data 613 final long bytesToSkip = recordSize - bytesRead % recordSize; 614 org.apache.commons.io.IOUtils.skip(inputStream, bytesToSkip); 615 return sparseHeaders; 616 } 617 618 /** 619 * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) may appear multi times, and they look like: 620 * 621 * <pre> 622 * GNU.sparse.size=size 623 * GNU.sparse.numblocks=numblocks 624 * repeat numblocks times 625 * GNU.sparse.offset=offset 626 * GNU.sparse.numbytes=numbytes 627 * end repeat 628 * </pre> 629 * <p> 630 * For PAX Format 0.1, the sparse headers are stored in a single variable: GNU.sparse.map 631 * </p> 632 * <p> 633 * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 634 * </p> 635 * 636 * @param inputStream input stream to read keys and values 637 * @param sparseHeaders used in PAX Format 0.0 & 0.1, as it may appear multiple times, the sparse headers need to be stored in an array, not a map 638 * @param globalPaxHeaders global PAX headers of the tar archive 639 * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry. 640 * @throws IOException if an I/O error occurs. 641 * @deprecated use the four-arg version instead 642 */ 643 @Deprecated 644 protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders, 645 final Map<String, String> globalPaxHeaders) throws IOException { 646 return parsePaxHeaders(inputStream, sparseHeaders, globalPaxHeaders, -1); 647 } 648 649 /** 650 * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) may appear multi times, and they look like: 651 * 652 * <pre> 653 * GNU.sparse.size=size 654 * GNU.sparse.numblocks=numblocks 655 * repeat numblocks times 656 * GNU.sparse.offset=offset 657 * GNU.sparse.numbytes=numbytes 658 * end repeat 659 * </pre> 660 * <p> 661 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 662 * </p> 663 * <p> 664 * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 665 * </p> 666 * 667 * @param inputStream input stream to read keys and values 668 * @param sparseHeaders used in PAX Format 0.0 & 0.1, as it may appear multiple times, the sparse headers need to be stored in an array, not a map 669 * @param globalPaxHeaders global PAX headers of the tar archive 670 * @param headerSize total size of the PAX header, will be ignored if negative 671 * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry. 672 * @throws IOException if an I/O error occurs. 673 * @since 1.21 674 */ 675 protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders, 676 final Map<String, String> globalPaxHeaders, final long headerSize) throws IOException { 677 final Map<String, String> headers = new HashMap<>(globalPaxHeaders); 678 Long offset = null; 679 // Format is "length keyword=value\n"; 680 int totalRead = 0; 681 while (true) { // get length 682 int ch; 683 int len = 0; 684 int read = 0; 685 while ((ch = inputStream.read()) != -1) { 686 read++; 687 totalRead++; 688 if (ch == '\n') { // blank line in header 689 break; 690 } 691 if (ch == ' ') { // End of length string 692 // Get keyword 693 final ByteArrayOutputStream coll = new ByteArrayOutputStream(); 694 while ((ch = inputStream.read()) != -1) { 695 read++; 696 totalRead++; 697 if (totalRead < 0 || headerSize >= 0 && totalRead >= headerSize) { 698 break; 699 } 700 if (ch == '=') { // end of keyword 701 final String keyword = coll.toString(CharsetNames.UTF_8); 702 // Get rest of entry 703 final int restLen = len - read; 704 if (restLen <= 1) { // only NL 705 headers.remove(keyword); 706 } else if (headerSize >= 0 && restLen > headerSize - totalRead) { 707 throw new IOException("Paxheader value size " + restLen + " exceeds size of header record"); 708 } else { 709 final byte[] rest = IOUtils.readRange(inputStream, restLen); 710 final int got = rest.length; 711 if (got != restLen) { 712 throw new IOException("Failed to read " + "Paxheader. Expected " + restLen + " bytes, read " + got); 713 } 714 totalRead += restLen; 715 // Drop trailing NL 716 if (rest[restLen - 1] != '\n') { 717 throw new IOException("Failed to read Paxheader." + "Value should end with a newline"); 718 } 719 final String value = new String(rest, 0, restLen - 1, StandardCharsets.UTF_8); 720 headers.put(keyword, value); 721 722 // for 0.0 PAX Headers 723 if (keyword.equals(TarGnuSparseKeys.OFFSET)) { 724 if (offset != null) { 725 // previous GNU.sparse.offset header but no numBytes 726 sparseHeaders.add(new TarArchiveStructSparse(offset, 0)); 727 } 728 try { 729 offset = Long.valueOf(value); 730 } catch (final NumberFormatException ex) { 731 throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " contains a non-numeric value"); 732 } 733 if (offset < 0) { 734 throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " contains negative value"); 735 } 736 } 737 738 // for 0.0 PAX Headers 739 if (keyword.equals(TarGnuSparseKeys.NUMBYTES)) { 740 if (offset == null) { 741 throw new IOException( 742 "Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " is expected before GNU.sparse.numbytes shows up."); 743 } 744 final long numbytes = ParsingUtils.parseLongValue(value); 745 if (numbytes < 0) { 746 throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.NUMBYTES + " contains negative value"); 747 } 748 sparseHeaders.add(new TarArchiveStructSparse(offset, numbytes)); 749 offset = null; 750 } 751 } 752 break; 753 } 754 coll.write((byte) ch); 755 } 756 break; // Processed single header 757 } 758 759 // COMPRESS-530 : throw if we encounter a non-number while reading length 760 if (ch < '0' || ch > '9') { 761 throw new IOException("Failed to read Paxheader. Encountered a non-number while reading length"); 762 } 763 764 len *= 10; 765 len += ch - '0'; 766 } 767 if (ch == -1) { // EOF 768 break; 769 } 770 } 771 if (offset != null) { 772 // offset but no numBytes 773 sparseHeaders.add(new TarArchiveStructSparse(offset, 0)); 774 } 775 return headers; 776 } 777 778 /** 779 * Parses the content of a PAX 1.0 sparse block. 780 * 781 * @since 1.20 782 * @param buffer The buffer from which to parse. 783 * @param offset The offset into the buffer from which to parse. 784 * @return a parsed sparse struct 785 */ 786 public static TarArchiveStructSparse parseSparse(final byte[] buffer, final int offset) { 787 final long sparseOffset = parseOctalOrBinary(buffer, offset, TarConstants.SPARSE_OFFSET_LEN); 788 final long sparseNumbytes = parseOctalOrBinary(buffer, offset + TarConstants.SPARSE_OFFSET_LEN, TarConstants.SPARSE_NUMBYTES_LEN); 789 790 return new TarArchiveStructSparse(sparseOffset, sparseNumbytes); 791 } 792 793 /** 794 * For 1.X PAX Format, the sparse headers are stored in the file data block, preceding the actual file data. It consists of a series of decimal numbers 795 * delimited by newlines. 796 * 797 * @param inputStream the input stream of the tar file 798 * @return the decimal number delimited by '\n', and the bytes read from input stream 799 * @throws IOException 800 */ 801 private static long[] readLineOfNumberForPax1X(final InputStream inputStream) throws IOException { 802 int number; 803 long result = 0; 804 long bytesRead = 0; 805 806 while ((number = inputStream.read()) != '\n') { 807 bytesRead += 1; 808 if (number == -1) { 809 throw new IOException("Unexpected EOF when reading parse information of 1.X PAX format"); 810 } 811 if (number < '0' || number > '9') { 812 throw new IOException("Corrupted TAR archive. Non-numeric value in sparse headers block"); 813 } 814 result = result * 10 + (number - '0'); 815 } 816 bytesRead += 1; 817 818 return new long[] { result, bytesRead }; 819 } 820 821 /** 822 * @since 1.21 823 */ 824 static List<TarArchiveStructSparse> readSparseStructs(final byte[] buffer, final int offset, final int entries) throws IOException { 825 final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 826 for (int i = 0; i < entries; i++) { 827 try { 828 final TarArchiveStructSparse sparseHeader = parseSparse(buffer, 829 offset + i * (TarConstants.SPARSE_OFFSET_LEN + TarConstants.SPARSE_NUMBYTES_LEN)); 830 831 if (sparseHeader.getOffset() < 0) { 832 throw new IOException("Corrupted TAR archive, sparse entry with negative offset"); 833 } 834 if (sparseHeader.getNumbytes() < 0) { 835 throw new IOException("Corrupted TAR archive, sparse entry with negative numbytes"); 836 } 837 sparseHeaders.add(sparseHeader); 838 } catch (final IllegalArgumentException ex) { 839 // thrown internally by parseOctalOrBinary 840 throw new IOException("Corrupted TAR archive, sparse entry is invalid", ex); 841 } 842 } 843 return Collections.unmodifiableList(sparseHeaders); 844 } 845 846 /** 847 * Wikipedia <a href="https://en.wikipedia.org/wiki/Tar_(computing)#File_header">says</a>: <blockquote> The checksum is calculated by taking the sum of the 848 * unsigned byte values of the header block with the eight checksum bytes taken to be ASCII spaces (decimal value 32). It is stored as a six digit octal 849 * number with leading zeroes followed by a NUL and then a space. Various implementations do not adhere to this format. For better compatibility, ignore 850 * leading and trailing whitespace, and get the first six digits. In addition, some historic tar implementations treated bytes as signed. Implementations 851 * typically calculate the checksum both ways, and treat it as good if either the signed or unsigned sum matches the included checksum. </blockquote> 852 * <p> 853 * The return value of this method should be treated as a best-effort heuristic rather than an absolute and final truth. The checksum verification logic may 854 * well evolve over time as more special cases are encountered. 855 * </p> 856 * 857 * @param header tar header 858 * @return whether the checksum is reasonably good 859 * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a> 860 * @since 1.5 861 */ 862 public static boolean verifyCheckSum(final byte[] header) { 863 final long storedSum = parseOctal(header, TarConstants.CHKSUM_OFFSET, TarConstants.CHKSUMLEN); 864 long unsignedSum = 0; 865 long signedSum = 0; 866 867 for (int i = 0; i < header.length; i++) { 868 byte b = header[i]; 869 if (TarConstants.CHKSUM_OFFSET <= i && i < TarConstants.CHKSUM_OFFSET + TarConstants.CHKSUMLEN) { 870 b = ' '; 871 } 872 unsignedSum += 0xff & b; 873 signedSum += b; 874 } 875 return storedSum == unsignedSum || storedSum == signedSum; 876 } 877 878 /** Prevents instantiation. */ 879 private TarUtils() { 880 } 881 882}