001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 */
017package org.apache.commons.compress.archivers.zip;
018
019import java.io.BufferedInputStream;
020import java.io.ByteArrayInputStream;
021import java.io.Closeable;
022import java.io.EOFException;
023import java.io.File;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.SequenceInputStream;
027import java.nio.ByteBuffer;
028import java.nio.ByteOrder;
029import java.nio.channels.FileChannel;
030import java.nio.channels.SeekableByteChannel;
031import java.nio.charset.Charset;
032import java.nio.charset.StandardCharsets;
033import java.nio.file.Files;
034import java.nio.file.OpenOption;
035import java.nio.file.Path;
036import java.nio.file.StandardOpenOption;
037import java.util.ArrayList;
038import java.util.Arrays;
039import java.util.Collections;
040import java.util.Comparator;
041import java.util.EnumSet;
042import java.util.Enumeration;
043import java.util.HashMap;
044import java.util.LinkedList;
045import java.util.List;
046import java.util.Map;
047import java.util.stream.Collectors;
048import java.util.stream.IntStream;
049import java.util.zip.Inflater;
050import java.util.zip.ZipException;
051
052import org.apache.commons.compress.archivers.EntryStreamOffsets;
053import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
054import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream;
055import org.apache.commons.compress.utils.BoundedArchiveInputStream;
056import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream;
057import org.apache.commons.compress.utils.CharsetNames;
058import org.apache.commons.compress.utils.IOUtils;
059import org.apache.commons.compress.utils.InputStreamStatistics;
060import org.apache.commons.compress.utils.SeekableInMemoryByteChannel;
061import org.apache.commons.io.Charsets;
062import org.apache.commons.io.FilenameUtils;
063import org.apache.commons.io.build.AbstractOrigin.ByteArrayOrigin;
064import org.apache.commons.io.build.AbstractStreamBuilder;
065import org.apache.commons.io.input.CountingInputStream;
066
067/**
068 * Replacement for {@link java.util.zip.ZipFile}.
069 * <p>
070 * This class adds support for file name encodings other than UTF-8 (which is required to work on ZIP files created by native ZIP tools and is able to skip a
071 * preamble like the one found in self extracting archives. Furthermore it returns instances of
072 * {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry} instead of {@link java.util.zip.ZipEntry}.
073 * </p>
074 * <p>
075 * It doesn't extend {@link java.util.zip.ZipFile} as it would have to reimplement all methods anyway. Like {@link java.util.zip.ZipFile}, it uses
076 * SeekableByteChannel under the covers and supports compressed and uncompressed entries. As of Apache Commons Compress 1.3 it also transparently supports Zip64
077 * extensions and thus individual entries and archives larger than 4 GB or with more than 65,536 entries.
078 * </p>
079 * <p>
080 * The method signatures mimic the ones of {@link java.util.zip.ZipFile}, with a couple of exceptions:
081 * </p>
082 * <ul>
083 * <li>There is no getName method.</li>
084 * <li>entries has been renamed to getEntries.</li>
085 * <li>getEntries and getEntry return {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry} instances.</li>
086 * <li>close is allowed to throw IOException.</li>
087 * </ul>
088 */
089public class ZipFile implements Closeable {
090
091    /**
092     * Lock-free implementation of BoundedInputStream. The implementation uses positioned reads on the underlying archive file channel and therefore performs
093     * significantly faster in concurrent environment.
094     */
095    private static class BoundedFileChannelInputStream extends BoundedArchiveInputStream {
096        private final FileChannel archive;
097
098        BoundedFileChannelInputStream(final long start, final long remaining, final FileChannel archive) {
099            super(start, remaining);
100            this.archive = archive;
101        }
102
103        @Override
104        protected int read(final long pos, final ByteBuffer buf) throws IOException {
105            final int read = archive.read(buf, pos);
106            buf.flip();
107            return read;
108        }
109    }
110
111    /**
112     * Builds new {@link ZipFile} instances.
113     * <p>
114     * The channel will be opened for reading, assuming the specified encoding for file names.
115     * </p>
116     * <p>
117     * See {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} to read from an in-memory archive.
118     * </p>
119     * <p>
120     * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time
121     * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory.
122     * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is
123     * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
124     * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.
125     * </p>
126     *
127     * @since 1.26.0
128     */
129    public static class Builder extends AbstractStreamBuilder<ZipFile, Builder> {
130
131        static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8;
132
133        private SeekableByteChannel seekableByteChannel;
134        private boolean useUnicodeExtraFields = true;
135        private boolean ignoreLocalFileHeader;
136        private long maxNumberOfDisks = 1;
137
138        public Builder() {
139            setCharset(DEFAULT_CHARSET);
140            setCharsetDefault(DEFAULT_CHARSET);
141        }
142
143        @SuppressWarnings("resource") // caller closes
144        @Override
145        public ZipFile get() throws IOException {
146            final SeekableByteChannel actualChannel;
147            final String actualDescription;
148            if (seekableByteChannel != null) {
149                actualChannel = seekableByteChannel;
150                actualDescription = actualChannel.getClass().getSimpleName();
151            } else if (checkOrigin() instanceof ByteArrayOrigin) {
152                actualChannel = new SeekableInMemoryByteChannel(checkOrigin().getByteArray());
153                actualDescription = actualChannel.getClass().getSimpleName();
154            } else {
155                OpenOption[] openOptions = getOpenOptions();
156                if (openOptions.length == 0) {
157                    openOptions = new OpenOption[] { StandardOpenOption.READ };
158                }
159                final Path path = getPath();
160                actualChannel = openZipChannel(path, maxNumberOfDisks, openOptions);
161                actualDescription = path.toString();
162            }
163            final boolean closeOnError = seekableByteChannel != null;
164            return new ZipFile(actualChannel, actualDescription, getCharset(), useUnicodeExtraFields, closeOnError, ignoreLocalFileHeader);
165        }
166
167        /**
168         * Sets whether to ignore information stored inside the local file header.
169         *
170         * @param ignoreLocalFileHeader whether to ignore information stored inside.
171         * @return this.
172         */
173        public Builder setIgnoreLocalFileHeader(final boolean ignoreLocalFileHeader) {
174            this.ignoreLocalFileHeader = ignoreLocalFileHeader;
175            return this;
176        }
177
178        /**
179         * Sets max number of multi archive disks, default is 1 (no multi archive).
180         *
181         * @param maxNumberOfDisks max number of multi archive disks.
182         *
183         * @return this.
184         */
185        public Builder setMaxNumberOfDisks(final long maxNumberOfDisks) {
186            this.maxNumberOfDisks = maxNumberOfDisks;
187            return this;
188        }
189
190        /**
191         * The actual channel, overrides any other input aspects like a File, Path, and so on.
192         *
193         * @param seekableByteChannel The actual channel.
194         * @return this.
195         */
196        public Builder setSeekableByteChannel(final SeekableByteChannel seekableByteChannel) {
197            this.seekableByteChannel = seekableByteChannel;
198            return this;
199        }
200
201        /**
202         * Sets whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
203         *
204         * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
205         * @return this.
206         */
207        public Builder setUseUnicodeExtraFields(final boolean useUnicodeExtraFields) {
208            this.useUnicodeExtraFields = useUnicodeExtraFields;
209            return this;
210        }
211
212    }
213
214    /**
215     * Extends ZipArchiveEntry to store the offset within the archive.
216     */
217    private static final class Entry extends ZipArchiveEntry {
218
219        @Override
220        public boolean equals(final Object other) {
221            if (super.equals(other)) {
222                // super.equals would return false if other were not an Entry
223                final Entry otherEntry = (Entry) other;
224                return getLocalHeaderOffset() == otherEntry.getLocalHeaderOffset() && super.getDataOffset() == otherEntry.getDataOffset()
225                        && super.getDiskNumberStart() == otherEntry.getDiskNumberStart();
226            }
227            return false;
228        }
229
230        @Override
231        public int hashCode() {
232            return 3 * super.hashCode() + (int) getLocalHeaderOffset() + (int) (getLocalHeaderOffset() >> 32);
233        }
234    }
235
236    private static final class NameAndComment {
237        private final byte[] name;
238        private final byte[] comment;
239
240        private NameAndComment(final byte[] name, final byte[] comment) {
241            this.name = name;
242            this.comment = comment;
243        }
244    }
245
246    private static final class StoredStatisticsStream extends CountingInputStream implements InputStreamStatistics {
247        StoredStatisticsStream(final InputStream in) {
248            super(in);
249        }
250
251        @Override
252        public long getCompressedCount() {
253            return super.getByteCount();
254        }
255
256        @Override
257        public long getUncompressedCount() {
258            return getCompressedCount();
259        }
260    }
261
262    private static final EnumSet<StandardOpenOption> READ = EnumSet.of(StandardOpenOption.READ);
263
264    private static final int HASH_SIZE = 509;
265    static final int NIBLET_MASK = 0x0f;
266    static final int BYTE_SHIFT = 8;
267    private static final int POS_0 = 0;
268    private static final int POS_1 = 1;
269    private static final int POS_2 = 2;
270    private static final int POS_3 = 3;
271    private static final byte[] ONE_ZERO_BYTE = new byte[1];
272
273    /**
274     * Length of a "central directory" entry structure without file name, extra fields or comment.
275     */
276    private static final int CFH_LEN =
277    // @formatter:off
278        /* version made by                 */ ZipConstants.SHORT
279        /* version needed to extract       */ + ZipConstants.SHORT
280        /* general purpose bit flag        */ + ZipConstants.SHORT
281        /* compression method              */ + ZipConstants.SHORT
282        /* last mod file time              */ + ZipConstants.SHORT
283        /* last mod file date              */ + ZipConstants.SHORT
284        /* crc-32                          */ + ZipConstants.WORD
285        /* compressed size                 */ + ZipConstants.WORD
286        /* uncompressed size               */ + ZipConstants.WORD
287        /* file name length                */ + ZipConstants. SHORT
288        /* extra field length              */ + ZipConstants.SHORT
289        /* file comment length             */ + ZipConstants.SHORT
290        /* disk number start               */ + ZipConstants.SHORT
291        /* internal file attributes        */ + ZipConstants.SHORT
292        /* external file attributes        */ + ZipConstants.WORD
293        /* relative offset of local header */ + ZipConstants.WORD;
294    // @formatter:on
295
296    private static final long CFH_SIG = ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
297
298    /**
299     * Length of the "End of central directory record" - which is supposed to be the last structure of the archive - without file comment.
300     */
301    static final int MIN_EOCD_SIZE =
302    // @formatter:off
303        /* end of central dir signature    */ ZipConstants.WORD
304        /* number of this disk             */ + ZipConstants.SHORT
305        /* number of the disk with the     */
306        /* start of the central directory  */ + ZipConstants.SHORT
307        /* total number of entries in      */
308        /* the central dir on this disk    */ + ZipConstants.SHORT
309        /* total number of entries in      */
310        /* the central dir                 */ + ZipConstants.SHORT
311        /* size of the central directory   */ + ZipConstants.WORD
312        /* offset of start of central      */
313        /* directory with respect to       */
314        /* the starting disk number        */ + ZipConstants.WORD
315        /* ZIP file comment length         */ + ZipConstants.SHORT;
316    // @formatter:on
317
318    /**
319     * Maximum length of the "End of central directory record" with a file comment.
320     */
321    private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
322    // @formatter:off
323        /* maximum length of ZIP file comment */ + ZipConstants.ZIP64_MAGIC_SHORT;
324    // @formatter:on
325
326    /**
327     * Offset of the field that holds the location of the length of the central directory inside the "End of central directory record" relative to the start of
328     * the "End of central directory record".
329     */
330    private static final int CFD_LENGTH_OFFSET =
331    // @formatter:off
332        /* end of central dir signature    */ ZipConstants.WORD
333        /* number of this disk             */ + ZipConstants.SHORT
334        /* number of the disk with the     */
335        /* start of the central directory  */ + ZipConstants.SHORT
336        /* total number of entries in      */
337        /* the central dir on this disk    */ + ZipConstants.SHORT
338        /* total number of entries in      */
339        /* the central dir                 */ + ZipConstants.SHORT;
340    // @formatter:on
341
342    /**
343     * Offset of the field that holds the disk number of the first central directory entry inside the "End of central directory record" relative to the start of
344     * the "End of central directory record".
345     */
346    private static final int CFD_DISK_OFFSET =
347    // @formatter:off
348            /* end of central dir signature    */ ZipConstants.WORD
349            /* number of this disk             */ + ZipConstants.SHORT;
350    // @formatter:on
351
352    /**
353     * Offset of the field that holds the location of the first central directory entry inside the "End of central directory record" relative to the "number of
354     * the disk with the start of the central directory".
355     */
356    private static final int CFD_LOCATOR_RELATIVE_OFFSET =
357    // @formatter:off
358            /* total number of entries in      */
359            /* the central dir on this disk    */ + ZipConstants.SHORT
360            /* total number of entries in      */
361            /* the central dir                 */ + ZipConstants.SHORT
362            /* size of the central directory   */ + ZipConstants.WORD;
363    // @formatter:on
364
365    /**
366     * Length of the "Zip64 end of central directory locator" - which should be right in front of the "end of central directory record" if one is present at
367     * all.
368     */
369    private static final int ZIP64_EOCDL_LENGTH =
370    // @formatter:off
371        /* zip64 end of central dir locator sig */ ZipConstants.WORD
372        /* number of the disk with the start    */
373        /* start of the zip64 end of            */
374        /* central directory                    */ + ZipConstants.WORD
375        /* relative offset of the zip64         */
376        /* end of central directory record      */ + ZipConstants.DWORD
377        /* total number of disks                */ + ZipConstants.WORD;
378    // @formatter:on
379
380    /**
381     * Offset of the field that holds the location of the "Zip64 end of central directory record" inside the "Zip64 end of central directory locator" relative
382     * to the start of the "Zip64 end of central directory locator".
383     */
384    private static final int ZIP64_EOCDL_LOCATOR_OFFSET =
385    // @formatter:off
386        /* zip64 end of central dir locator sig */ ZipConstants.WORD
387        /* number of the disk with the start    */
388        /* start of the zip64 end of            */
389        /* central directory                    */ + ZipConstants.WORD;
390    // @formatter:on
391
392    /**
393     * Offset of the field that holds the location of the first central directory entry inside the "Zip64 end of central directory record" relative to the start
394     * of the "Zip64 end of central directory record".
395     */
396    private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET =
397    // @formatter:off
398        /* zip64 end of central dir        */
399        /* signature                       */ ZipConstants.WORD
400        /* size of zip64 end of central    */
401        /* directory record                */ + ZipConstants.DWORD
402        /* version made by                 */ + ZipConstants.SHORT
403        /* version needed to extract       */ + ZipConstants.SHORT
404        /* number of this disk             */ + ZipConstants.WORD
405        /* number of the disk with the     */
406        /* start of the central directory  */ + ZipConstants.WORD
407        /* total number of entries in the  */
408        /* central directory on this disk  */ + ZipConstants.DWORD
409        /* total number of entries in the  */
410        /* central directory               */ + ZipConstants.DWORD
411        /* size of the central directory   */ + ZipConstants.DWORD;
412    // @formatter:on
413
414    /**
415     * Offset of the field that holds the disk number of the first central directory entry inside the "Zip64 end of central directory record" relative to the
416     * start of the "Zip64 end of central directory record".
417     */
418    private static final int ZIP64_EOCD_CFD_DISK_OFFSET =
419    // @formatter:off
420            /* zip64 end of central dir        */
421            /* signature                       */ ZipConstants.WORD
422            /* size of zip64 end of central    */
423            /* directory record                */ + ZipConstants.DWORD
424            /* version made by                 */ + ZipConstants.SHORT
425            /* version needed to extract       */ + ZipConstants.SHORT
426            /* number of this disk             */ + ZipConstants.WORD;
427    // @formatter:on
428
429    /**
430     * Offset of the field that holds the location of the first central directory entry inside the "Zip64 end of central directory record" relative to the
431     * "number of the disk with the start of the central directory".
432     */
433    private static final int ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET =
434    // @formatter:off
435            /* total number of entries in the  */
436            /* central directory on this disk  */ ZipConstants.DWORD
437            /* total number of entries in the  */
438            /* central directory               */ + ZipConstants.DWORD
439            /* size of the central directory   */ + ZipConstants.DWORD;
440    // @formatter:on
441
442    /**
443     * Number of bytes in local file header up to the &quot;length of file name&quot; entry.
444     */
445    private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
446    // @formatter:off
447        /* local file header signature     */ ZipConstants.WORD
448        /* version needed to extract       */ + ZipConstants.SHORT
449        /* general purpose bit flag        */ + ZipConstants.SHORT
450        /* compression method              */ + ZipConstants.SHORT
451        /* last mod file time              */ + ZipConstants.SHORT
452        /* last mod file date              */ + ZipConstants.SHORT
453        /* crc-32                          */ + ZipConstants.WORD
454        /* compressed size                 */ + ZipConstants.WORD
455        /* uncompressed size               */ + (long) ZipConstants.WORD;
456    // @formatter:on
457
458    /**
459     * Compares two ZipArchiveEntries based on their offset within the archive.
460     * <p>
461     * Won't return any meaningful results if one of the entries isn't part of the archive at all.
462     * </p>
463     *
464     * @since 1.1
465     */
466    private static final Comparator<ZipArchiveEntry> offsetComparator = Comparator.comparingLong(ZipArchiveEntry::getDiskNumberStart)
467            .thenComparingLong(ZipArchiveEntry::getLocalHeaderOffset);
468
469    /**
470     * Creates a new Builder.
471     *
472     * @return a new Builder.
473     * @since 1.26.0
474     */
475    public static Builder builder() {
476        return new Builder();
477    }
478
479    /**
480     * Closes a ZIP file quietly; throwing no IOException, does nothing on null input.
481     *
482     * @param zipFile file to close, can be null
483     */
484    public static void closeQuietly(final ZipFile zipFile) {
485        org.apache.commons.io.IOUtils.closeQuietly(zipFile);
486    }
487
488    /**
489     * Creates a new SeekableByteChannel for reading.
490     *
491     * @param path the path to the file to open or create
492     * @return a new seekable byte channel
493     * @throws IOException if an I/O error occurs
494     */
495    private static SeekableByteChannel newReadByteChannel(final Path path) throws IOException {
496        return Files.newByteChannel(path, READ);
497    }
498
499    private static SeekableByteChannel openZipChannel(final Path path, final long maxNumberOfDisks, final OpenOption[] openOptions) throws IOException {
500        final FileChannel channel = FileChannel.open(path, StandardOpenOption.READ);
501        final List<FileChannel> channels = new ArrayList<>();
502        try {
503            final boolean is64 = positionAtEndOfCentralDirectoryRecord(channel);
504            long numberOfDisks;
505            if (is64) {
506                channel.position(channel.position() + ZipConstants.WORD + ZipConstants.WORD + ZipConstants.DWORD);
507                final ByteBuffer buf = ByteBuffer.allocate(ZipConstants.WORD);
508                buf.order(ByteOrder.LITTLE_ENDIAN);
509                IOUtils.readFully(channel, buf);
510                buf.flip();
511                numberOfDisks = buf.getInt() & 0xffffffffL;
512            } else {
513                channel.position(channel.position() + ZipConstants.WORD);
514                final ByteBuffer buf = ByteBuffer.allocate(ZipConstants.SHORT);
515                buf.order(ByteOrder.LITTLE_ENDIAN);
516                IOUtils.readFully(channel, buf);
517                buf.flip();
518                numberOfDisks = (buf.getShort() & 0xffff) + 1;
519            }
520            if (numberOfDisks > Math.min(maxNumberOfDisks, Integer.MAX_VALUE)) {
521                throw new IOException("Too many disks for zip archive, max=" + Math.min(maxNumberOfDisks, Integer.MAX_VALUE) + " actual=" + numberOfDisks);
522            }
523
524            if (numberOfDisks <= 1) {
525                return channel;
526            }
527            channel.close();
528
529            final Path parent = path.getParent();
530            final String basename = FilenameUtils.removeExtension(path.getFileName().toString());
531
532            return ZipSplitReadOnlySeekableByteChannel.forPaths(IntStream.range(0, (int) numberOfDisks).mapToObj(i -> {
533                if (i == numberOfDisks - 1) {
534                    return path;
535                }
536                final Path lowercase = parent.resolve(String.format("%s.z%02d", basename, i + 1));
537                if (Files.exists(lowercase)) {
538                    return lowercase;
539                }
540                final Path uppercase = parent.resolve(String.format("%s.Z%02d", basename, i + 1));
541                if (Files.exists(uppercase)) {
542                    return uppercase;
543                }
544                return lowercase;
545            }).collect(Collectors.toList()), openOptions);
546        } catch (final Throwable ex) {
547            org.apache.commons.io.IOUtils.closeQuietly(channel);
548            channels.forEach(org.apache.commons.io.IOUtils::closeQuietly);
549            throw ex;
550        }
551    }
552
553    /**
554     * Searches for the and positions the stream at the start of the &quot;End of central dir record&quot;.
555     *
556     * @return true if it's Zip64 end of central directory or false if it's Zip32
557     */
558    private static boolean positionAtEndOfCentralDirectoryRecord(final SeekableByteChannel channel) throws IOException {
559        final boolean found = tryToLocateSignature(channel, MIN_EOCD_SIZE, MAX_EOCD_SIZE, ZipArchiveOutputStream.EOCD_SIG);
560        if (!found) {
561            throw new ZipException("Archive is not a ZIP archive");
562        }
563        boolean found64 = false;
564        final long position = channel.position();
565        if (position > ZIP64_EOCDL_LENGTH) {
566            final ByteBuffer wordBuf = ByteBuffer.allocate(4);
567            channel.position(channel.position() - ZIP64_EOCDL_LENGTH);
568            wordBuf.rewind();
569            IOUtils.readFully(channel, wordBuf);
570            wordBuf.flip();
571            found64 = wordBuf.equals(ByteBuffer.wrap(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG));
572            if (!found64) {
573                channel.position(position);
574            } else {
575                channel.position(channel.position() - ZipConstants.WORD);
576            }
577        }
578
579        return found64;
580    }
581
582    /**
583     * Searches the archive backwards from minDistance to maxDistance for the given signature, positions the RandomaccessFile right at the signature if it has
584     * been found.
585     */
586    private static boolean tryToLocateSignature(final SeekableByteChannel channel, final long minDistanceFromEnd, final long maxDistanceFromEnd,
587            final byte[] sig) throws IOException {
588        final ByteBuffer wordBuf = ByteBuffer.allocate(ZipConstants.WORD);
589        boolean found = false;
590        long off = channel.size() - minDistanceFromEnd;
591        final long stopSearching = Math.max(0L, channel.size() - maxDistanceFromEnd);
592        if (off >= 0) {
593            for (; off >= stopSearching; off--) {
594                channel.position(off);
595                try {
596                    wordBuf.rewind();
597                    IOUtils.readFully(channel, wordBuf);
598                    wordBuf.flip();
599                } catch (final EOFException ex) { // NOSONAR
600                    break;
601                }
602                int curr = wordBuf.get();
603                if (curr == sig[POS_0]) {
604                    curr = wordBuf.get();
605                    if (curr == sig[POS_1]) {
606                        curr = wordBuf.get();
607                        if (curr == sig[POS_2]) {
608                            curr = wordBuf.get();
609                            if (curr == sig[POS_3]) {
610                                found = true;
611                                break;
612                            }
613                        }
614                    }
615                }
616            }
617        }
618        if (found) {
619            channel.position(off);
620        }
621        return found;
622    }
623
624    /**
625     * List of entries in the order they appear inside the central directory.
626     */
627    private final List<ZipArchiveEntry> entries = new LinkedList<>();
628
629    /**
630     * Maps String to list of ZipArchiveEntrys, name -> actual entries.
631     */
632    private final Map<String, LinkedList<ZipArchiveEntry>> nameMap = new HashMap<>(HASH_SIZE);
633
634    /**
635     * The encoding to use for file names and the file comment.
636     * <p>
637     * For a list of possible values see <a href="Supported Encodings">https://docs.oracle.com/javase/8/docs/technotes/guides/intl/encoding.doc.html</a>.
638     * Defaults to UTF-8.
639     * </p>
640     */
641    private final Charset encoding;
642
643    /**
644     * The ZIP encoding to use for file names and the file comment.
645     */
646    private final ZipEncoding zipEncoding;
647
648    /**
649     * The actual data source.
650     */
651    private final SeekableByteChannel archive;
652
653    /**
654     * Whether to look for and use Unicode extra fields.
655     */
656    private final boolean useUnicodeExtraFields;
657
658    /**
659     * Whether the file is closed.
660     */
661    private volatile boolean closed = true;
662
663    /**
664     * Whether the ZIP archive is a split ZIP archive
665     */
666    private final boolean isSplitZipArchive;
667
668    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
669    private final byte[] dwordBuf = new byte[ZipConstants.DWORD];
670
671    private final byte[] wordBuf = new byte[ZipConstants.WORD];
672
673    private final byte[] cfhBuf = new byte[CFH_LEN];
674
675    private final byte[] shortBuf = new byte[ZipConstants.SHORT];
676
677    private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf);
678
679    private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf);
680
681    private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf);
682
683    private final ByteBuffer shortBbuf = ByteBuffer.wrap(shortBuf);
684
685    private long centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset;
686
687    private long centralDirectoryStartOffset;
688
689    private long firstLocalFileHeaderOffset;
690
691    /**
692     * Opens the given file for reading, assuming "UTF8" for file names.
693     *
694     * @param file the archive.
695     *
696     * @throws IOException if an error occurs while reading the file.
697     * @deprecated Use {@link Builder#get()}.
698     */
699    @Deprecated
700    public ZipFile(final File file) throws IOException {
701        this(file, CharsetNames.UTF_8);
702    }
703
704    /**
705     * Opens the given file for reading, assuming the specified encoding for file names and scanning for Unicode extra fields.
706     *
707     * @param file     the archive.
708     * @param encoding the encoding to use for file names, use null for the platform's default encoding
709     * @throws IOException if an error occurs while reading the file.
710     * @deprecated Use {@link Builder#get()}.
711     */
712    @Deprecated
713    public ZipFile(final File file, final String encoding) throws IOException {
714        this(file.toPath(), encoding, true);
715    }
716
717    /**
718     * Opens the given file for reading, assuming the specified encoding for file names.
719     *
720     * @param file                  the archive.
721     * @param encoding              the encoding to use for file names, use null for the platform's default encoding
722     * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
723     * @throws IOException if an error occurs while reading the file.
724     * @deprecated Use {@link Builder#get()}.
725     */
726    @Deprecated
727    public ZipFile(final File file, final String encoding, final boolean useUnicodeExtraFields) throws IOException {
728        this(file.toPath(), encoding, useUnicodeExtraFields, false);
729    }
730
731    /**
732     * Opens the given file for reading, assuming the specified encoding for file names.
733     * <p>
734     * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time
735     * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory.
736     * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is
737     * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
738     * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.
739     * </p>
740     *
741     * @param file                  the archive.
742     * @param encoding              the encoding to use for file names, use null for the platform's default encoding
743     * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
744     * @param ignoreLocalFileHeader whether to ignore information stored inside the local file header (see the notes in this method's Javadoc)
745     * @throws IOException if an error occurs while reading the file.
746     * @since 1.19
747     * @deprecated Use {@link Builder#get()}.
748     */
749    @Deprecated
750    @SuppressWarnings("resource") // Caller closes
751    public ZipFile(final File file, final String encoding, final boolean useUnicodeExtraFields, final boolean ignoreLocalFileHeader) throws IOException {
752        this(newReadByteChannel(file.toPath()), file.getAbsolutePath(), encoding, useUnicodeExtraFields, true, ignoreLocalFileHeader);
753    }
754
755    /**
756     * Opens the given path for reading, assuming "UTF-8" for file names.
757     *
758     * @param path path to the archive.
759     * @throws IOException if an error occurs while reading the file.
760     * @since 1.22
761     * @deprecated Use {@link Builder#get()}.
762     */
763    @Deprecated
764    public ZipFile(final Path path) throws IOException {
765        this(path, CharsetNames.UTF_8);
766    }
767
768    /**
769     * Opens the given path for reading, assuming the specified encoding for file names and scanning for Unicode extra fields.
770     *
771     * @param path     path to the archive.
772     * @param encoding the encoding to use for file names, use null for the platform's default encoding
773     * @throws IOException if an error occurs while reading the file.
774     * @since 1.22
775     * @deprecated Use {@link Builder#get()}.
776     */
777    @Deprecated
778    public ZipFile(final Path path, final String encoding) throws IOException {
779        this(path, encoding, true);
780    }
781
782    /**
783     * Opens the given path for reading, assuming the specified encoding for file names.
784     *
785     * @param path                  path to the archive.
786     * @param encoding              the encoding to use for file names, use null for the platform's default encoding
787     * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
788     * @throws IOException if an error occurs while reading the file.
789     * @since 1.22
790     * @deprecated Use {@link Builder#get()}.
791     */
792    @Deprecated
793    public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields) throws IOException {
794        this(path, encoding, useUnicodeExtraFields, false);
795    }
796
797    /**
798     * Opens the given path for reading, assuming the specified encoding for file names.
799     * <p>
800     * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time
801     * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory.
802     * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is
803     * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
804     * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.
805     * </p>
806     *
807     * @param path                  path to the archive.
808     * @param encoding              the encoding to use for file names, use null for the platform's default encoding
809     * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
810     * @param ignoreLocalFileHeader whether to ignore information stored inside the local file header (see the notes in this method's Javadoc)
811     * @throws IOException if an error occurs while reading the file.
812     * @since 1.22
813     * @deprecated Use {@link Builder#get()}.
814     */
815    @SuppressWarnings("resource") // Caller closes
816    @Deprecated
817    public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields, final boolean ignoreLocalFileHeader) throws IOException {
818        this(newReadByteChannel(path), path.toAbsolutePath().toString(), encoding, useUnicodeExtraFields, true, ignoreLocalFileHeader);
819    }
820
821    /**
822     * Opens the given channel for reading, assuming "UTF-8" for file names.
823     * <p>
824     * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive.
825     * </p>
826     *
827     * @param channel the archive.
828     *
829     * @throws IOException if an error occurs while reading the file.
830     * @since 1.13
831     * @deprecated Use {@link Builder#get()}.
832     */
833    @Deprecated
834    public ZipFile(final SeekableByteChannel channel) throws IOException {
835        this(channel, "a SeekableByteChannel", CharsetNames.UTF_8, true);
836    }
837
838    /**
839     * Opens the given channel for reading, assuming the specified encoding for file names.
840     * <p>
841     * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive.
842     * </p>
843     *
844     * @param channel  the archive.
845     * @param encoding the encoding to use for file names, use null for the platform's default encoding
846     * @throws IOException if an error occurs while reading the file.
847     * @since 1.13
848     * @deprecated Use {@link Builder#get()}.
849     */
850    @Deprecated
851    public ZipFile(final SeekableByteChannel channel, final String encoding) throws IOException {
852        this(channel, "a SeekableByteChannel", encoding, true);
853    }
854
855    private ZipFile(final SeekableByteChannel channel, final String channelDescription, final Charset encoding, final boolean useUnicodeExtraFields,
856            final boolean closeOnError, final boolean ignoreLocalFileHeader) throws IOException {
857        this.isSplitZipArchive = channel instanceof ZipSplitReadOnlySeekableByteChannel;
858        this.encoding = Charsets.toCharset(encoding, Builder.DEFAULT_CHARSET);
859        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
860        this.useUnicodeExtraFields = useUnicodeExtraFields;
861        this.archive = channel;
862        boolean success = false;
863        try {
864            final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = populateFromCentralDirectory();
865            if (!ignoreLocalFileHeader) {
866                resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
867            }
868            fillNameMap();
869            success = true;
870        } catch (final IOException e) {
871            throw new IOException("Error reading Zip content from " + channelDescription, e);
872        } finally {
873            this.closed = !success;
874            if (!success && closeOnError) {
875                org.apache.commons.io.IOUtils.closeQuietly(archive);
876            }
877        }
878    }
879
880    /**
881     * Opens the given channel for reading, assuming the specified encoding for file names.
882     * <p>
883     * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive.
884     * </p>
885     *
886     * @param channel               the archive.
887     * @param channelDescription    description of the archive, used for error messages only.
888     * @param encoding              the encoding to use for file names, use null for the platform's default encoding
889     * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
890     * @throws IOException if an error occurs while reading the file.
891     * @since 1.13
892     * @deprecated Use {@link Builder#get()}.
893     */
894    @Deprecated
895    public ZipFile(final SeekableByteChannel channel, final String channelDescription, final String encoding, final boolean useUnicodeExtraFields)
896            throws IOException {
897        this(channel, channelDescription, encoding, useUnicodeExtraFields, false, false);
898    }
899
900    /**
901     * Opens the given channel for reading, assuming the specified encoding for file names.
902     * <p>
903     * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive.
904     * </p>
905     * <p>
906     * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time
907     * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory.
908     * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is
909     * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
910     * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.
911     * </p>
912     *
913     * @param channel               the archive.
914     * @param channelDescription    description of the archive, used for error messages only.
915     * @param encoding              the encoding to use for file names, use null for the platform's default encoding
916     * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
917     * @param ignoreLocalFileHeader whether to ignore information stored inside the local file header (see the notes in this method's Javadoc)
918     * @throws IOException if an error occurs while reading the file.
919     * @since 1.19
920     * @deprecated Use {@link Builder#get()}.
921     */
922    @Deprecated
923    public ZipFile(final SeekableByteChannel channel, final String channelDescription, final String encoding, final boolean useUnicodeExtraFields,
924            final boolean ignoreLocalFileHeader) throws IOException {
925        this(channel, channelDescription, encoding, useUnicodeExtraFields, false, ignoreLocalFileHeader);
926    }
927
928    private ZipFile(final SeekableByteChannel channel, final String channelDescription, final String encoding, final boolean useUnicodeExtraFields,
929            final boolean closeOnError, final boolean ignoreLocalFileHeader) throws IOException {
930        this(channel, channelDescription, Charsets.toCharset(encoding), useUnicodeExtraFields, closeOnError, ignoreLocalFileHeader);
931    }
932
933    /**
934     * Opens the given file for reading, assuming "UTF-8".
935     *
936     * @param name name of the archive.
937     * @throws IOException if an error occurs while reading the file.
938     * @deprecated Use {@link Builder#get()}.
939     */
940    @Deprecated
941    public ZipFile(final String name) throws IOException {
942        this(new File(name).toPath(), CharsetNames.UTF_8);
943    }
944
945    /**
946     * Opens the given file for reading, assuming the specified encoding for file names, scanning unicode extra fields.
947     *
948     * @param name     name of the archive.
949     * @param encoding the encoding to use for file names, use null for the platform's default encoding
950     * @throws IOException if an error occurs while reading the file.
951     * @deprecated Use {@link Builder#get()}.
952     */
953    @Deprecated
954    public ZipFile(final String name, final String encoding) throws IOException {
955        this(new File(name).toPath(), encoding, true);
956    }
957
958    /**
959     * Whether this class is able to read the given entry.
960     * <p>
961     * May return false if it is set up to use encryption or a compression method that hasn't been implemented yet.
962     * </p>
963     *
964     * @since 1.1
965     * @param entry the entry
966     * @return whether this class is able to read the given entry.
967     */
968    public boolean canReadEntryData(final ZipArchiveEntry entry) {
969        return ZipUtil.canHandleEntryData(entry);
970    }
971
972    /**
973     * Closes the archive.
974     *
975     * @throws IOException if an error occurs closing the archive.
976     */
977    @Override
978    public void close() throws IOException {
979        // this flag is only written here and read in finalize() which
980        // can never be run in parallel.
981        // no synchronization needed.
982        closed = true;
983        archive.close();
984    }
985
986    /**
987     * Transfer selected entries from this ZIP file to a given #ZipArchiveOutputStream. Compression and all other attributes will be as in this file.
988     * <p>
989     * This method transfers entries based on the central directory of the ZIP file.
990     * </p>
991     *
992     * @param target    The zipArchiveOutputStream to write the entries to
993     * @param predicate A predicate that selects which entries to write
994     * @throws IOException on error
995     */
996    public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate) throws IOException {
997        final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder();
998        while (src.hasMoreElements()) {
999            final ZipArchiveEntry entry = src.nextElement();
1000            if (predicate.test(entry)) {
1001                target.addRawArchiveEntry(entry, getRawInputStream(entry));
1002            }
1003        }
1004    }
1005
1006    /**
1007     * Creates new BoundedInputStream, according to implementation of underlying archive channel.
1008     */
1009    private BoundedArchiveInputStream createBoundedInputStream(final long start, final long remaining) {
1010        if (start < 0 || remaining < 0 || start + remaining < start) {
1011            throw new IllegalArgumentException("Corrupted archive, stream boundaries" + " are out of range");
1012        }
1013        return archive instanceof FileChannel ? new BoundedFileChannelInputStream(start, remaining, (FileChannel) archive)
1014                : new BoundedSeekableByteChannelInputStream(start, remaining, archive);
1015    }
1016
1017    private void fillNameMap() {
1018        entries.forEach(ze -> {
1019            // entries are filled in populateFromCentralDirectory and
1020            // never modified
1021            final String name = ze.getName();
1022            final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.computeIfAbsent(name, k -> new LinkedList<>());
1023            entriesOfThatName.addLast(ze);
1024        });
1025    }
1026
1027    /**
1028     * Ensures that the close method of this ZIP file is called when there are no more references to it.
1029     *
1030     * @see #close()
1031     */
1032    @Override
1033    protected void finalize() throws Throwable {
1034        try {
1035            if (!closed) {
1036                close();
1037            }
1038        } finally {
1039            super.finalize();
1040        }
1041    }
1042
1043    /**
1044     * Gets an InputStream for reading the content before the first local file header.
1045     *
1046     * @return null if there is no content before the first local file header. Otherwise, returns a stream to read the content before the first local file
1047     *         header.
1048     * @since 1.23
1049     */
1050    public InputStream getContentBeforeFirstLocalFileHeader() {
1051        return firstLocalFileHeaderOffset == 0 ? null : createBoundedInputStream(0, firstLocalFileHeaderOffset);
1052    }
1053
1054    private long getDataOffset(final ZipArchiveEntry ze) throws IOException {
1055        final long s = ze.getDataOffset();
1056        if (s == EntryStreamOffsets.OFFSET_UNKNOWN) {
1057            setDataOffset(ze);
1058            return ze.getDataOffset();
1059        }
1060        return s;
1061    }
1062
1063    /**
1064     * Gets the encoding to use for file names and the file comment.
1065     *
1066     * @return null if using the platform's default character encoding.
1067     */
1068    public String getEncoding() {
1069        return encoding.name();
1070    }
1071
1072    /**
1073     * Gets all entries.
1074     * <p>
1075     * Entries will be returned in the same order they appear within the archive's central directory.
1076     * </p>
1077     *
1078     * @return all entries as {@link ZipArchiveEntry} instances
1079     */
1080    public Enumeration<ZipArchiveEntry> getEntries() {
1081        return Collections.enumeration(entries);
1082    }
1083
1084    /**
1085     * Gets all named entries in the same order they appear within the archive's central directory.
1086     *
1087     * @param name name of the entry.
1088     * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the given name
1089     * @since 1.6
1090     */
1091    public Iterable<ZipArchiveEntry> getEntries(final String name) {
1092        return nameMap.getOrDefault(name, ZipArchiveEntry.EMPTY_LINKED_LIST);
1093    }
1094
1095    /**
1096     * Gets all entries in physical order.
1097     * <p>
1098     * Entries will be returned in the same order their contents appear within the archive.
1099     * </p>
1100     *
1101     * @return all entries as {@link ZipArchiveEntry} instances
1102     *
1103     * @since 1.1
1104     */
1105    public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() {
1106        final ZipArchiveEntry[] allEntries = entries.toArray(ZipArchiveEntry.EMPTY_ARRAY);
1107        return Collections.enumeration(Arrays.asList(sortByOffset(allEntries)));
1108    }
1109
1110    /**
1111     * Gets all named entries in the same order their contents appear within the archive.
1112     *
1113     * @param name name of the entry.
1114     * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the given name
1115     * @since 1.6
1116     */
1117    public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) {
1118        final LinkedList<ZipArchiveEntry> linkedList = nameMap.getOrDefault(name, ZipArchiveEntry.EMPTY_LINKED_LIST);
1119        return Arrays.asList(sortByOffset(linkedList.toArray(ZipArchiveEntry.EMPTY_ARRAY)));
1120    }
1121
1122    /**
1123     * Gets a named entry or {@code null} if no entry by that name exists.
1124     * <p>
1125     * If multiple entries with the same name exist the first entry in the archive's central directory by that name is returned.
1126     * </p>
1127     *
1128     * @param name name of the entry.
1129     * @return the ZipArchiveEntry corresponding to the given name - or {@code null} if not present.
1130     */
1131    public ZipArchiveEntry getEntry(final String name) {
1132        final LinkedList<ZipArchiveEntry> entries = nameMap.get(name);
1133        return entries != null ? entries.getFirst() : null;
1134    }
1135
1136    /**
1137     * Gets the offset of the first local file header in the file.
1138     *
1139     * @return the length of the content before the first local file header
1140     * @since 1.23
1141     */
1142    public long getFirstLocalFileHeaderOffset() {
1143        return firstLocalFileHeaderOffset;
1144    }
1145
1146    /**
1147     * Gets an InputStream for reading the contents of the given entry.
1148     *
1149     * @param entry the entry to get the stream for.
1150     * @return a stream to read the entry from. The returned stream implements {@link InputStreamStatistics}.
1151     * @throws IOException if unable to create an input stream from the zipEntry.
1152     */
1153    public InputStream getInputStream(final ZipArchiveEntry entry) throws IOException {
1154        if (!(entry instanceof Entry)) {
1155            return null;
1156        }
1157        // cast validity is checked just above
1158        ZipUtil.checkRequestedFeatures(entry);
1159
1160        // doesn't get closed if the method is not supported - which
1161        // should never happen because of the checkRequestedFeatures
1162        // call above
1163        final InputStream is = new BufferedInputStream(getRawInputStream(entry)); // NOSONAR
1164        switch (ZipMethod.getMethodByCode(entry.getMethod())) {
1165        case STORED:
1166            return new StoredStatisticsStream(is);
1167        case UNSHRINKING:
1168            return new UnshrinkingInputStream(is);
1169        case IMPLODING:
1170            try {
1171                return new ExplodingInputStream(entry.getGeneralPurposeBit().getSlidingDictionarySize(),
1172                        entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), is);
1173            } catch (final IllegalArgumentException ex) {
1174                throw new IOException("bad IMPLODE data", ex);
1175            }
1176        case DEFLATED:
1177            final Inflater inflater = new Inflater(true);
1178            // Inflater with nowrap=true has this odd contract for a zero padding
1179            // byte following the data stream; this used to be zlib's requirement
1180            // and has been fixed a long time ago, but the contract persists so
1181            // we comply.
1182            // https://docs.oracle.com/javase/8/docs/api/java/util/zip/Inflater.html#Inflater(boolean)
1183            return new InflaterInputStreamWithStatistics(new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)), inflater) {
1184                @Override
1185                public void close() throws IOException {
1186                    try {
1187                        super.close();
1188                    } finally {
1189                        inflater.end();
1190                    }
1191                }
1192            };
1193        case BZIP2:
1194            return new BZip2CompressorInputStream(is);
1195        case ENHANCED_DEFLATED:
1196            return new Deflate64CompressorInputStream(is);
1197        case AES_ENCRYPTED:
1198        case EXPANDING_LEVEL_1:
1199        case EXPANDING_LEVEL_2:
1200        case EXPANDING_LEVEL_3:
1201        case EXPANDING_LEVEL_4:
1202        case JPEG:
1203        case LZMA:
1204        case PKWARE_IMPLODING:
1205        case PPMD:
1206        case TOKENIZATION:
1207        case UNKNOWN:
1208        case WAVPACK:
1209        case XZ:
1210        default:
1211            throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(entry.getMethod()), entry);
1212        }
1213    }
1214
1215    /**
1216     * Gets the raw stream of the archive entry (compressed form).
1217     * <p>
1218     * This method does not relate to how/if we understand the payload in the stream, since we really only intend to move it on to somewhere else.
1219     * </p>
1220     * <p>
1221     * Since version 1.22, this method will make an attempt to read the entry's data stream offset, even if the {@code ignoreLocalFileHeader} parameter was
1222     * {@code true} in the constructor. An IOException can also be thrown from the body of the method if this lookup fails for some reason.
1223     * </p>
1224     *
1225     * @param entry The entry to get the stream for
1226     * @return The raw input stream containing (possibly) compressed data.
1227     * @since 1.11
1228     * @throws IOException if there is a problem reading data offset (added in version 1.22).
1229     */
1230    public InputStream getRawInputStream(final ZipArchiveEntry entry) throws IOException {
1231        if (!(entry instanceof Entry)) {
1232            return null;
1233        }
1234        final long start = getDataOffset(entry);
1235        if (start == EntryStreamOffsets.OFFSET_UNKNOWN) {
1236            return null;
1237        }
1238        return createBoundedInputStream(start, entry.getCompressedSize());
1239    }
1240
1241    /**
1242     * Gets the entry's content as a String if isUnixSymlink() returns true for it, otherwise returns null.
1243     * <p>
1244     * This method assumes the symbolic link's file name uses the same encoding that as been specified for this ZipFile.
1245     * </p>
1246     *
1247     * @param entry ZipArchiveEntry object that represents the symbolic link
1248     * @return entry's content as a String
1249     * @throws IOException problem with content's input stream
1250     * @since 1.5
1251     */
1252    public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException {
1253        if (entry != null && entry.isUnixSymlink()) {
1254            try (InputStream in = getInputStream(entry)) {
1255                return zipEncoding.decode(org.apache.commons.io.IOUtils.toByteArray(in));
1256            }
1257        }
1258        return null;
1259    }
1260
1261    /**
1262     * Reads the central directory of the given archive and populates the internal tables with ZipArchiveEntry instances.
1263     * <p>
1264     * The ZipArchiveEntrys will know all data that can be obtained from the central directory alone, but not the data that requires the local file header or
1265     * additional data to be read.
1266     * </p>
1267     *
1268     * @return a map of zip entries that didn't have the language encoding flag set when read.
1269     */
1270    private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory() throws IOException {
1271        final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag = new HashMap<>();
1272
1273        positionAtCentralDirectory();
1274        centralDirectoryStartOffset = archive.position();
1275
1276        wordBbuf.rewind();
1277        IOUtils.readFully(archive, wordBbuf);
1278        long sig = ZipLong.getValue(wordBuf);
1279
1280        if (sig != CFH_SIG && startsWithLocalFileHeader()) {
1281            throw new IOException("Central directory is empty, can't expand" + " corrupt archive.");
1282        }
1283
1284        while (sig == CFH_SIG) {
1285            readCentralDirectoryEntry(noUTF8Flag);
1286            wordBbuf.rewind();
1287            IOUtils.readFully(archive, wordBbuf);
1288            sig = ZipLong.getValue(wordBuf);
1289        }
1290        return noUTF8Flag;
1291    }
1292
1293    /**
1294     * Searches for either the &quot;Zip64 end of central directory locator&quot; or the &quot;End of central dir record&quot;, parses it and positions the
1295     * stream at the first central directory record.
1296     */
1297    private void positionAtCentralDirectory() throws IOException {
1298        final boolean is64 = positionAtEndOfCentralDirectoryRecord(archive);
1299        if (!is64) {
1300            positionAtCentralDirectory32();
1301        } else {
1302            positionAtCentralDirectory64();
1303        }
1304    }
1305
1306    /**
1307     * Parses the &quot;End of central dir record&quot; and positions the stream at the first central directory record.
1308     *
1309     * Expects stream to be positioned at the beginning of the &quot;End of central dir record&quot;.
1310     */
1311    private void positionAtCentralDirectory32() throws IOException {
1312        final long endOfCentralDirectoryRecordOffset = archive.position();
1313        if (isSplitZipArchive) {
1314            skipBytes(CFD_DISK_OFFSET);
1315            shortBbuf.rewind();
1316            IOUtils.readFully(archive, shortBbuf);
1317            centralDirectoryStartDiskNumber = ZipShort.getValue(shortBuf);
1318
1319            skipBytes(CFD_LOCATOR_RELATIVE_OFFSET);
1320
1321            wordBbuf.rewind();
1322            IOUtils.readFully(archive, wordBbuf);
1323            centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf);
1324            ((ZipSplitReadOnlySeekableByteChannel) archive).position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset);
1325        } else {
1326            skipBytes(CFD_LENGTH_OFFSET);
1327            wordBbuf.rewind();
1328            IOUtils.readFully(archive, wordBbuf);
1329            final long centralDirectoryLength = ZipLong.getValue(wordBuf);
1330
1331            wordBbuf.rewind();
1332            IOUtils.readFully(archive, wordBbuf);
1333            centralDirectoryStartDiskNumber = 0;
1334            centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf);
1335
1336            firstLocalFileHeaderOffset = Long.max(endOfCentralDirectoryRecordOffset - centralDirectoryLength - centralDirectoryStartRelativeOffset, 0L);
1337            archive.position(centralDirectoryStartRelativeOffset + firstLocalFileHeaderOffset);
1338        }
1339    }
1340
1341    /**
1342     * Parses the &quot;Zip64 end of central directory locator&quot;, finds the &quot;Zip64 end of central directory record&quot; using the parsed information,
1343     * parses that and positions the stream at the first central directory record.
1344     *
1345     * Expects stream to be positioned right behind the &quot;Zip64 end of central directory locator&quot;'s signature.
1346     */
1347    private void positionAtCentralDirectory64() throws IOException {
1348        skipBytes(ZipConstants.WORD);
1349        if (isSplitZipArchive) {
1350            wordBbuf.rewind();
1351            IOUtils.readFully(archive, wordBbuf);
1352            final long diskNumberOfEOCD = ZipLong.getValue(wordBuf);
1353
1354            dwordBbuf.rewind();
1355            IOUtils.readFully(archive, dwordBbuf);
1356            final long relativeOffsetOfEOCD = ZipEightByteInteger.getLongValue(dwordBuf);
1357            ((ZipSplitReadOnlySeekableByteChannel) archive).position(diskNumberOfEOCD, relativeOffsetOfEOCD);
1358        } else {
1359            skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET - ZipConstants.WORD /* signature has already been read */);
1360            dwordBbuf.rewind();
1361            IOUtils.readFully(archive, dwordBbuf);
1362            archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
1363        }
1364
1365        wordBbuf.rewind();
1366        IOUtils.readFully(archive, wordBbuf);
1367        if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) {
1368            throw new ZipException("Archive's ZIP64 end of central directory locator is corrupt.");
1369        }
1370
1371        if (isSplitZipArchive) {
1372            skipBytes(ZIP64_EOCD_CFD_DISK_OFFSET - ZipConstants.WORD /* signature has already been read */);
1373            wordBbuf.rewind();
1374            IOUtils.readFully(archive, wordBbuf);
1375            centralDirectoryStartDiskNumber = ZipLong.getValue(wordBuf);
1376
1377            skipBytes(ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET);
1378
1379            dwordBbuf.rewind();
1380            IOUtils.readFully(archive, dwordBbuf);
1381            centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf);
1382            ((ZipSplitReadOnlySeekableByteChannel) archive).position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset);
1383        } else {
1384            skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET - ZipConstants.WORD /* signature has already been read */);
1385            dwordBbuf.rewind();
1386            IOUtils.readFully(archive, dwordBbuf);
1387            centralDirectoryStartDiskNumber = 0;
1388            centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf);
1389            archive.position(centralDirectoryStartRelativeOffset);
1390        }
1391    }
1392
1393    /**
1394     * Reads an individual entry of the central directory, creates an ZipArchiveEntry from it and adds it to the global maps.
1395     *
1396     * @param noUTF8Flag map used to collect entries that don't have their UTF-8 flag set and whose name will be set by data read from the local file header
1397     *                   later. The current entry may be added to this map.
1398     */
1399    private void readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag) throws IOException {
1400        cfhBbuf.rewind();
1401        IOUtils.readFully(archive, cfhBbuf);
1402        int off = 0;
1403        final Entry ze = new Entry();
1404
1405        final int versionMadeBy = ZipShort.getValue(cfhBuf, off);
1406        off += ZipConstants.SHORT;
1407        ze.setVersionMadeBy(versionMadeBy);
1408        ze.setPlatform(versionMadeBy >> BYTE_SHIFT & NIBLET_MASK);
1409
1410        ze.setVersionRequired(ZipShort.getValue(cfhBuf, off));
1411        off += ZipConstants.SHORT; // version required
1412
1413        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off);
1414        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
1415        final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.ZIP_ENCODING_UTF_8 : zipEncoding;
1416        if (hasUTF8Flag) {
1417            ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG);
1418        }
1419        ze.setGeneralPurposeBit(gpFlag);
1420        ze.setRawFlag(ZipShort.getValue(cfhBuf, off));
1421
1422        off += ZipConstants.SHORT;
1423
1424        // noinspection MagicConstant
1425        ze.setMethod(ZipShort.getValue(cfhBuf, off));
1426        off += ZipConstants.SHORT;
1427
1428        final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off));
1429        ze.setTime(time);
1430        off += ZipConstants.WORD;
1431
1432        ze.setCrc(ZipLong.getValue(cfhBuf, off));
1433        off += ZipConstants.WORD;
1434
1435        long size = ZipLong.getValue(cfhBuf, off);
1436        if (size < 0) {
1437            throw new IOException("broken archive, entry with negative compressed size");
1438        }
1439        ze.setCompressedSize(size);
1440        off += ZipConstants.WORD;
1441
1442        size = ZipLong.getValue(cfhBuf, off);
1443        if (size < 0) {
1444            throw new IOException("broken archive, entry with negative size");
1445        }
1446        ze.setSize(size);
1447        off += ZipConstants.WORD;
1448
1449        final int fileNameLen = ZipShort.getValue(cfhBuf, off);
1450        off += ZipConstants.SHORT;
1451        if (fileNameLen < 0) {
1452            throw new IOException("broken archive, entry with negative fileNameLen");
1453        }
1454
1455        final int extraLen = ZipShort.getValue(cfhBuf, off);
1456        off += ZipConstants.SHORT;
1457        if (extraLen < 0) {
1458            throw new IOException("broken archive, entry with negative extraLen");
1459        }
1460
1461        final int commentLen = ZipShort.getValue(cfhBuf, off);
1462        off += ZipConstants.SHORT;
1463        if (commentLen < 0) {
1464            throw new IOException("broken archive, entry with negative commentLen");
1465        }
1466
1467        ze.setDiskNumberStart(ZipShort.getValue(cfhBuf, off));
1468        off += ZipConstants.SHORT;
1469
1470        ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off));
1471        off += ZipConstants.SHORT;
1472
1473        ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off));
1474        off += ZipConstants.WORD;
1475
1476        final byte[] fileName = IOUtils.readRange(archive, fileNameLen);
1477        if (fileName.length < fileNameLen) {
1478            throw new EOFException();
1479        }
1480        ze.setName(entryEncoding.decode(fileName), fileName);
1481
1482        // LFH offset,
1483        ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off) + firstLocalFileHeaderOffset);
1484        // data offset will be filled later
1485        entries.add(ze);
1486
1487        final byte[] cdExtraData = IOUtils.readRange(archive, extraLen);
1488        if (cdExtraData.length < extraLen) {
1489            throw new EOFException();
1490        }
1491        try {
1492            ze.setCentralDirectoryExtra(cdExtraData);
1493        } catch (final RuntimeException e) {
1494            final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName());
1495            z.initCause(e);
1496            throw z;
1497        }
1498
1499        setSizesAndOffsetFromZip64Extra(ze);
1500        sanityCheckLFHOffset(ze);
1501
1502        final byte[] comment = IOUtils.readRange(archive, commentLen);
1503        if (comment.length < commentLen) {
1504            throw new EOFException();
1505        }
1506        ze.setComment(entryEncoding.decode(comment));
1507
1508        if (!hasUTF8Flag && useUnicodeExtraFields) {
1509            noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
1510        }
1511
1512        ze.setStreamContiguous(true);
1513    }
1514
1515    /**
1516     * Walks through all recorded entries and adds the data available from the local file header.
1517     * <p>
1518     * Also records the offsets for the data to read from the entries.
1519     * </p>
1520     */
1521    private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag) throws IOException {
1522        for (final ZipArchiveEntry zipArchiveEntry : entries) {
1523            // entries are filled in populateFromCentralDirectory and never modified
1524            final Entry ze = (Entry) zipArchiveEntry;
1525            final int[] lens = setDataOffset(ze);
1526            final int fileNameLen = lens[0];
1527            final int extraFieldLen = lens[1];
1528            skipBytes(fileNameLen);
1529            final byte[] localExtraData = IOUtils.readRange(archive, extraFieldLen);
1530            if (localExtraData.length < extraFieldLen) {
1531                throw new EOFException();
1532            }
1533            try {
1534                ze.setExtra(localExtraData);
1535            } catch (final RuntimeException e) {
1536                final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName());
1537                z.initCause(e);
1538                throw z;
1539            }
1540
1541            if (entriesWithoutUTF8Flag.containsKey(ze)) {
1542                final NameAndComment nc = entriesWithoutUTF8Flag.get(ze);
1543                ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, nc.comment);
1544            }
1545        }
1546    }
1547
1548    private void sanityCheckLFHOffset(final ZipArchiveEntry entry) throws IOException {
1549        if (entry.getDiskNumberStart() < 0) {
1550            throw new IOException("broken archive, entry with negative disk number");
1551        }
1552        if (entry.getLocalHeaderOffset() < 0) {
1553            throw new IOException("broken archive, entry with negative local file header offset");
1554        }
1555        if (isSplitZipArchive) {
1556            if (entry.getDiskNumberStart() > centralDirectoryStartDiskNumber) {
1557                throw new IOException("local file header for " + entry.getName() + " starts on a later disk than central directory");
1558            }
1559            if (entry.getDiskNumberStart() == centralDirectoryStartDiskNumber && entry.getLocalHeaderOffset() > centralDirectoryStartRelativeOffset) {
1560                throw new IOException("local file header for " + entry.getName() + " starts after central directory");
1561            }
1562        } else if (entry.getLocalHeaderOffset() > centralDirectoryStartOffset) {
1563            throw new IOException("local file header for " + entry.getName() + " starts after central directory");
1564        }
1565    }
1566
1567    private int[] setDataOffset(final ZipArchiveEntry entry) throws IOException {
1568        long offset = entry.getLocalHeaderOffset();
1569        if (isSplitZipArchive) {
1570            ((ZipSplitReadOnlySeekableByteChannel) archive).position(entry.getDiskNumberStart(), offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
1571            // the offset should be updated to the global offset
1572            offset = archive.position() - LFH_OFFSET_FOR_FILENAME_LENGTH;
1573        } else {
1574            archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
1575        }
1576        wordBbuf.rewind();
1577        IOUtils.readFully(archive, wordBbuf);
1578        wordBbuf.flip();
1579        wordBbuf.get(shortBuf);
1580        final int fileNameLen = ZipShort.getValue(shortBuf);
1581        wordBbuf.get(shortBuf);
1582        final int extraFieldLen = ZipShort.getValue(shortBuf);
1583        entry.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH + ZipConstants.SHORT + ZipConstants.SHORT + fileNameLen + extraFieldLen);
1584        if (entry.getDataOffset() + entry.getCompressedSize() > centralDirectoryStartOffset) {
1585            throw new IOException("data for " + entry.getName() + " overlaps with central directory.");
1586        }
1587        return new int[] { fileNameLen, extraFieldLen };
1588    }
1589
1590    /**
1591     * If the entry holds a Zip64 extended information extra field, read sizes from there if the entry's sizes are set to 0xFFFFFFFFF, do the same for the
1592     * offset of the local file header.
1593     * <p>
1594     * Ensures the Zip64 extra either knows both compressed and uncompressed size or neither of both as the internal logic in ExtraFieldUtils forces the field
1595     * to create local header data even if they are never used - and here a field with only one size would be invalid.
1596     * </p>
1597     */
1598    private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry entry) throws IOException {
1599        final ZipExtraField extra = entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
1600        if (extra != null && !(extra instanceof Zip64ExtendedInformationExtraField)) {
1601            throw new ZipException("archive contains unparseable zip64 extra field");
1602        }
1603        final Zip64ExtendedInformationExtraField z64 = (Zip64ExtendedInformationExtraField) extra;
1604        if (z64 != null) {
1605            final boolean hasUncompressedSize = entry.getSize() == ZipConstants.ZIP64_MAGIC;
1606            final boolean hasCompressedSize = entry.getCompressedSize() == ZipConstants.ZIP64_MAGIC;
1607            final boolean hasRelativeHeaderOffset = entry.getLocalHeaderOffset() == ZipConstants.ZIP64_MAGIC;
1608            final boolean hasDiskStart = entry.getDiskNumberStart() == ZipConstants.ZIP64_MAGIC_SHORT;
1609            z64.reparseCentralDirectoryData(hasUncompressedSize, hasCompressedSize, hasRelativeHeaderOffset, hasDiskStart);
1610
1611            if (hasUncompressedSize) {
1612                final long size = z64.getSize().getLongValue();
1613                if (size < 0) {
1614                    throw new IOException("broken archive, entry with negative size");
1615                }
1616                entry.setSize(size);
1617            } else if (hasCompressedSize) {
1618                z64.setSize(new ZipEightByteInteger(entry.getSize()));
1619            }
1620
1621            if (hasCompressedSize) {
1622                final long size = z64.getCompressedSize().getLongValue();
1623                if (size < 0) {
1624                    throw new IOException("broken archive, entry with negative compressed size");
1625                }
1626                entry.setCompressedSize(size);
1627            } else if (hasUncompressedSize) {
1628                z64.setCompressedSize(new ZipEightByteInteger(entry.getCompressedSize()));
1629            }
1630
1631            if (hasRelativeHeaderOffset) {
1632                entry.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue());
1633            }
1634
1635            if (hasDiskStart) {
1636                entry.setDiskNumberStart(z64.getDiskStartNumber().getValue());
1637            }
1638        }
1639    }
1640
1641    /**
1642     * Skips the given number of bytes or throws an EOFException if skipping failed.
1643     */
1644    private void skipBytes(final int count) throws IOException {
1645        final long currentPosition = archive.position();
1646        final long newPosition = currentPosition + count;
1647        if (newPosition > archive.size()) {
1648            throw new EOFException();
1649        }
1650        archive.position(newPosition);
1651    }
1652
1653    /**
1654     * Sorts entries in place by offset.
1655     *
1656     * @param allEntries entries to sort
1657     * @return the given entries, sorted.
1658     */
1659    private ZipArchiveEntry[] sortByOffset(final ZipArchiveEntry[] allEntries) {
1660        Arrays.sort(allEntries, offsetComparator);
1661        return allEntries;
1662    }
1663
1664    /**
1665     * Checks whether the archive starts with an LFH. If it doesn't, it may be an empty archive.
1666     */
1667    private boolean startsWithLocalFileHeader() throws IOException {
1668        archive.position(firstLocalFileHeaderOffset);
1669        wordBbuf.rewind();
1670        IOUtils.readFully(archive, wordBbuf);
1671        return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG);
1672    }
1673}