001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers;
020
021import java.io.ByteArrayInputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.io.OutputStream;
025import java.security.AccessController;
026import java.security.PrivilegedAction;
027import java.util.Collections;
028import java.util.Locale;
029import java.util.ServiceLoader;
030import java.util.Set;
031import java.util.SortedMap;
032import java.util.TreeMap;
033
034import org.apache.commons.compress.archivers.ar.ArArchiveInputStream;
035import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream;
036import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream;
037import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream;
038import org.apache.commons.compress.archivers.cpio.CpioArchiveOutputStream;
039import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream;
040import org.apache.commons.compress.archivers.jar.JarArchiveInputStream;
041import org.apache.commons.compress.archivers.jar.JarArchiveOutputStream;
042import org.apache.commons.compress.archivers.sevenz.SevenZFile;
043import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
044import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
045import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
046import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
047import org.apache.commons.compress.utils.IOUtils;
048import org.apache.commons.compress.utils.Sets;
049
050/**
051 * Factory to create Archive[In|Out]putStreams from names or the first bytes of
052 * the InputStream. In order to add other implementations, you should extend
053 * ArchiveStreamFactory and override the appropriate methods (and call their
054 * implementation from super of course).
055 *
056 * Compressing a ZIP-File:
057 *
058 * <pre>
059 * final OutputStream out = Files.newOutputStream(output.toPath());
060 * ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream(ArchiveStreamFactory.ZIP, out);
061 *
062 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml"));
063 * IOUtils.copy(Files.newInputStream(file1.toPath()), os);
064 * os.closeArchiveEntry();
065 *
066 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml"));
067 * IOUtils.copy(Files.newInputStream(file2.toPath()), os);
068 * os.closeArchiveEntry();
069 * os.close();
070 * </pre>
071 *
072 * Decompressing a ZIP-File:
073 *
074 * <pre>
075 * final InputStream is = Files.newInputStream(input.toPath());
076 * ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is);
077 * ZipArchiveEntry entry = (ZipArchiveEntry)in.getNextEntry();
078 * OutputStream out = Files.newOutputStream(dir.toPath().resolve(entry.getName()));
079 * IOUtils.copy(in, out);
080 * out.close();
081 * in.close();
082 * </pre>
083 * @Immutable provided that the deprecated method setEntryEncoding is not used.
084 * @ThreadSafe even if the deprecated method setEntryEncoding is used
085 */
086public class ArchiveStreamFactory implements ArchiveStreamProvider {
087
088    private static final int TAR_HEADER_SIZE = 512;
089
090    private static final int DUMP_SIGNATURE_SIZE = 32;
091
092    private static final int SIGNATURE_SIZE = 12;
093
094    /**
095     * The singleton instance using the platform default encoding.
096     * @since 1.21
097     */
098    public static final ArchiveStreamFactory DEFAULT = new ArchiveStreamFactory();
099
100    /**
101     * Constant (value {@value}) used to identify the APK archive format.
102     * <p>
103     * APK file extensions are .apk, .xapk, .apks, .apkm
104     * </p>
105     *
106     * @since 1.22
107     */
108    public static final String APK = "apk";
109
110    /**
111     * Constant (value {@value}) used to identify the XAPK archive format.
112     * <p>
113     * APK file extensions are .apk, .xapk, .apks, .apkm
114     * </p>
115     *
116     * @since 1.22
117     */
118    public static final String XAPK = "xapk";
119
120    /**
121     * Constant (value {@value}) used to identify the APKS archive format.
122     * <p>
123     * APK file extensions are .apk, .xapk, .apks, .apkm
124     * </p>
125     *
126     * @since 1.22
127     */
128    public static final String APKS = "apks";
129
130    /**
131     * Constant (value {@value}) used to identify the APKM archive format.
132     * <p>
133     * APK file extensions are .apk, .xapk, .apks, .apkm
134     * </p>
135     *
136     * @since 1.22
137     */
138    public static final String APKM = "apkm";
139
140    /**
141     * Constant (value {@value}) used to identify the AR archive format.
142     * @since 1.1
143     */
144    public static final String AR = "ar";
145
146    /**
147     * Constant (value {@value}) used to identify the ARJ archive format.
148     * Not supported as an output stream type.
149     * @since 1.6
150     */
151    public static final String ARJ = "arj";
152
153    /**
154     * Constant (value {@value}) used to identify the CPIO archive format.
155     * @since 1.1
156     */
157    public static final String CPIO = "cpio";
158
159    /**
160     * Constant (value {@value}) used to identify the Unix DUMP archive format.
161     * Not supported as an output stream type.
162     * @since 1.3
163     */
164    public static final String DUMP = "dump";
165
166    /**
167     * Constant (value {@value}) used to identify the JAR archive format.
168     * @since 1.1
169     */
170    public static final String JAR = "jar";
171
172    /**
173     * Constant used to identify the TAR archive format.
174     * @since 1.1
175     */
176    public static final String TAR = "tar";
177
178    /**
179     * Constant (value {@value}) used to identify the ZIP archive format.
180     * @since 1.1
181     */
182    public static final String ZIP = "zip";
183
184    /**
185     * Constant (value {@value}) used to identify the 7z archive format.
186     * @since 1.8
187     */
188    public static final String SEVEN_Z = "7z";
189
190    /**
191     * Entry encoding, null for the platform default.
192     */
193    private final String encoding;
194
195    /**
196     * Entry encoding, null for the default.
197     */
198    private volatile String entryEncoding;
199
200    private SortedMap<String, ArchiveStreamProvider> archiveInputStreamProviders;
201
202    private SortedMap<String, ArchiveStreamProvider> archiveOutputStreamProviders;
203
204    static void putAll(final Set<String> names, final ArchiveStreamProvider provider, final TreeMap<String, ArchiveStreamProvider> map) {
205        names.forEach(name -> map.put(toKey(name), provider));
206    }
207
208    private static Iterable<ArchiveStreamProvider> archiveStreamProviderIterable() {
209        return ServiceLoader.load(ArchiveStreamProvider.class, ClassLoader.getSystemClassLoader());
210    }
211    
212    private static String toKey(final String name) {
213        return name.toUpperCase(Locale.ROOT);
214    }
215
216    /**
217     * Constructs a new sorted map from input stream provider names to provider
218     * objects.
219     *
220     * <p>
221     * The map returned by this method will have one entry for each provider for
222     * which support is available in the current Java virtual machine. If two or
223     * more supported provider have the same name then the resulting map will
224     * contain just one of them; which one it will contain is not specified.
225     * </p>
226     *
227     * <p>
228     * The invocation of this method, and the subsequent use of the resulting
229     * map, may cause time-consuming disk or network I/O operations to occur.
230     * This method is provided for applications that need to enumerate all of
231     * the available providers, for example to allow user provider selection.
232     * </p>
233     *
234     * <p>
235     * This method may return different results at different times if new
236     * providers are dynamically made available to the current Java virtual
237     * machine.
238     * </p>
239     *
240     * @return An immutable, map from names to provider objects
241     * @since 1.13
242     */
243    public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveInputStreamProviders() {
244        return AccessController.doPrivileged((PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>) () -> {
245            final TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>();
246            putAll(DEFAULT.getInputStreamArchiveNames(), DEFAULT, map);
247            archiveStreamProviderIterable().forEach(provider -> putAll(provider.getInputStreamArchiveNames(), provider, map));
248            return map;
249        });
250    }
251
252    /**
253     * Constructs a new sorted map from output stream provider names to provider
254     * objects.
255     *
256     * <p>
257     * The map returned by this method will have one entry for each provider for
258     * which support is available in the current Java virtual machine. If two or
259     * more supported provider have the same name then the resulting map will
260     * contain just one of them; which one it will contain is not specified.
261     * </p>
262     *
263     * <p>
264     * The invocation of this method, and the subsequent use of the resulting
265     * map, may cause time-consuming disk or network I/O operations to occur.
266     * This method is provided for applications that need to enumerate all of
267     * the available providers, for example to allow user provider selection.
268     * </p>
269     *
270     * <p>
271     * This method may return different results at different times if new
272     * providers are dynamically made available to the current Java virtual
273     * machine.
274     * </p>
275     *
276     * @return An immutable, map from names to provider objects
277     * @since 1.13
278     */
279    public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveOutputStreamProviders() {
280        return AccessController.doPrivileged((PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>) () -> {
281            final TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>();
282            putAll(DEFAULT.getOutputStreamArchiveNames(), DEFAULT, map);
283            archiveStreamProviderIterable().forEach(provider -> putAll(provider.getOutputStreamArchiveNames(), provider, map));
284            return map;
285        });
286    }
287
288    /**
289     * Create an instance using the platform default encoding.
290     */
291    public ArchiveStreamFactory() {
292        this(null);
293    }
294
295    /**
296     * Create an instance using the specified encoding.
297     *
298     * @param encoding the encoding to be used.
299     *
300     * @since 1.10
301     */
302    public ArchiveStreamFactory(final String encoding) {
303        this.encoding = encoding;
304        // Also set the original field so can continue to use it.
305        this.entryEncoding = encoding;
306    }
307
308    /**
309     * Returns the encoding to use for arj, jar, zip, dump, cpio and tar
310     * files, or null for the archiver default.
311     *
312     * @return entry encoding, or null for the archiver default
313     * @since 1.5
314     */
315    public String getEntryEncoding() {
316        return entryEncoding;
317    }
318
319    /**
320     * Sets the encoding to use for arj, jar, zip, dump, cpio and tar files. Use null for the archiver default.
321     *
322     * @param entryEncoding the entry encoding, null uses the archiver default.
323     * @since 1.5
324     * @deprecated 1.10 use {@link #ArchiveStreamFactory(String)} to specify the encoding
325     * @throws IllegalStateException if the constructor {@link #ArchiveStreamFactory(String)}
326     * was used to specify the factory encoding.
327     */
328    @Deprecated
329    public void setEntryEncoding(final String entryEncoding) {
330        // Note: this does not detect new ArchiveStreamFactory(null) but that does not set the encoding anyway
331        if (encoding != null) {
332            throw new IllegalStateException("Cannot overide encoding set by the constructor");
333        }
334        this.entryEncoding = entryEncoding;
335    }
336
337    /**
338     * Creates an archive input stream from an archiver name and an input stream.
339     *
340     * @param archiverName the archive name,
341     * i.e. {@value #AR}, {@value #ARJ}, {@value #ZIP}, {@value #TAR}, {@value #JAR}, {@value #CPIO}, {@value #DUMP} or {@value #SEVEN_Z}
342     * @param in the input stream
343     * @return the archive input stream
344     * @throws ArchiveException if the archiver name is not known
345     * @throws StreamingNotSupportedException if the format cannot be
346     * read from a stream
347     * @throws IllegalArgumentException if the archiver name or stream is null
348     */
349    public ArchiveInputStream createArchiveInputStream(final String archiverName, final InputStream in) throws ArchiveException {
350        return createArchiveInputStream(archiverName, in, entryEncoding);
351    }
352
353    @Override
354    public ArchiveInputStream createArchiveInputStream(final String archiverName, final InputStream in,
355            final String actualEncoding) throws ArchiveException {
356
357        if (archiverName == null) {
358            throw new IllegalArgumentException("Archivername must not be null.");
359        }
360
361        if (in == null) {
362            throw new IllegalArgumentException("InputStream must not be null.");
363        }
364
365        if (AR.equalsIgnoreCase(archiverName)) {
366            return new ArArchiveInputStream(in);
367        }
368        if (ARJ.equalsIgnoreCase(archiverName)) {
369            if (actualEncoding != null) {
370                return new ArjArchiveInputStream(in, actualEncoding);
371            }
372            return new ArjArchiveInputStream(in);
373        }
374        if (ZIP.equalsIgnoreCase(archiverName)) {
375            if (actualEncoding != null) {
376                return new ZipArchiveInputStream(in, actualEncoding);
377            }
378            return new ZipArchiveInputStream(in);
379        }
380        if (TAR.equalsIgnoreCase(archiverName)) {
381            if (actualEncoding != null) {
382                return new TarArchiveInputStream(in, actualEncoding);
383            }
384            return new TarArchiveInputStream(in);
385        }
386        if (JAR.equalsIgnoreCase(archiverName) || APK.equalsIgnoreCase(archiverName)) {
387            if (actualEncoding != null) {
388                return new JarArchiveInputStream(in, actualEncoding);
389            }
390            return new JarArchiveInputStream(in);
391        }
392        if (CPIO.equalsIgnoreCase(archiverName)) {
393            if (actualEncoding != null) {
394                return new CpioArchiveInputStream(in, actualEncoding);
395            }
396            return new CpioArchiveInputStream(in);
397        }
398        if (DUMP.equalsIgnoreCase(archiverName)) {
399            if (actualEncoding != null) {
400                return new DumpArchiveInputStream(in, actualEncoding);
401            }
402            return new DumpArchiveInputStream(in);
403        }
404        if (SEVEN_Z.equalsIgnoreCase(archiverName)) {
405            throw new StreamingNotSupportedException(SEVEN_Z);
406        }
407
408        final ArchiveStreamProvider archiveStreamProvider = getArchiveInputStreamProviders().get(toKey(archiverName));
409        if (archiveStreamProvider != null) {
410            return archiveStreamProvider.createArchiveInputStream(archiverName, in, actualEncoding);
411        }
412
413        throw new ArchiveException("Archiver: " + archiverName + " not found.");
414    }
415
416    /**
417     * Creates an archive output stream from an archiver name and an output stream.
418     *
419     * @param archiverName the archive name,
420     * i.e. {@value #AR}, {@value #ZIP}, {@value #TAR}, {@value #JAR} or {@value #CPIO}
421     * @param out the output stream
422     * @return the archive output stream
423     * @throws ArchiveException if the archiver name is not known
424     * @throws StreamingNotSupportedException if the format cannot be
425     * written to a stream
426     * @throws IllegalArgumentException if the archiver name or stream is null
427     */
428    public ArchiveOutputStream createArchiveOutputStream(final String archiverName, final OutputStream out)
429            throws ArchiveException {
430        return createArchiveOutputStream(archiverName, out, entryEncoding);
431    }
432
433    @Override
434    public ArchiveOutputStream createArchiveOutputStream(
435            final String archiverName, final OutputStream out, final String actualEncoding)
436            throws ArchiveException {
437        if (archiverName == null) {
438            throw new IllegalArgumentException("Archivername must not be null.");
439        }
440        if (out == null) {
441            throw new IllegalArgumentException("OutputStream must not be null.");
442        }
443
444        if (AR.equalsIgnoreCase(archiverName)) {
445            return new ArArchiveOutputStream(out);
446        }
447        if (ZIP.equalsIgnoreCase(archiverName)) {
448            final ZipArchiveOutputStream zip = new ZipArchiveOutputStream(out);
449            if (actualEncoding != null) {
450                zip.setEncoding(actualEncoding);
451            }
452            return zip;
453        }
454        if (TAR.equalsIgnoreCase(archiverName)) {
455            if (actualEncoding != null) {
456                return new TarArchiveOutputStream(out, actualEncoding);
457            }
458            return new TarArchiveOutputStream(out);
459        }
460        if (JAR.equalsIgnoreCase(archiverName)) {
461            if (actualEncoding != null) {
462                return new JarArchiveOutputStream(out, actualEncoding);
463            }
464            return new JarArchiveOutputStream(out);
465        }
466        if (CPIO.equalsIgnoreCase(archiverName)) {
467            if (actualEncoding != null) {
468                return new CpioArchiveOutputStream(out, actualEncoding);
469            }
470            return new CpioArchiveOutputStream(out);
471        }
472        if (SEVEN_Z.equalsIgnoreCase(archiverName)) {
473            throw new StreamingNotSupportedException(SEVEN_Z);
474        }
475
476        final ArchiveStreamProvider archiveStreamProvider = getArchiveOutputStreamProviders().get(toKey(archiverName));
477        if (archiveStreamProvider != null) {
478            return archiveStreamProvider.createArchiveOutputStream(archiverName, out, actualEncoding);
479        }
480
481        throw new ArchiveException("Archiver: " + archiverName + " not found.");
482    }
483
484    /**
485     * Create an archive input stream from an input stream, autodetecting
486     * the archive type from the first few bytes of the stream. The InputStream
487     * must support marks, like BufferedInputStream.
488     *
489     * @param in the input stream
490     * @return the archive input stream
491     * @throws ArchiveException if the archiver name is not known
492     * @throws StreamingNotSupportedException if the format cannot be
493     * read from a stream
494     * @throws IllegalArgumentException if the stream is null or does not support mark
495     */
496    public ArchiveInputStream createArchiveInputStream(final InputStream in)
497            throws ArchiveException {
498        return createArchiveInputStream(detect(in), in);
499    }
500
501    /**
502     * Try to determine the type of Archiver
503     * @param in input stream
504     * @return type of archiver if found
505     * @throws ArchiveException if an archiver cannot be detected in the stream
506     * @since 1.14
507     */
508    public static String detect(final InputStream in) throws ArchiveException {
509        if (in == null) {
510            throw new IllegalArgumentException("Stream must not be null.");
511        }
512
513        if (!in.markSupported()) {
514            throw new IllegalArgumentException("Mark is not supported.");
515        }
516
517        final byte[] signature = new byte[SIGNATURE_SIZE];
518        in.mark(signature.length);
519        int signatureLength = -1;
520        try {
521            signatureLength = IOUtils.readFully(in, signature);
522            in.reset();
523        } catch (final IOException e) {
524            throw new ArchiveException("IOException while reading signature.", e);
525        }
526
527        if (ZipArchiveInputStream.matches(signature, signatureLength)) {
528            return ZIP;
529        }
530        if (JarArchiveInputStream.matches(signature, signatureLength)) {
531            return JAR;
532        }
533        if (ArArchiveInputStream.matches(signature, signatureLength)) {
534            return AR;
535        }
536        if (CpioArchiveInputStream.matches(signature, signatureLength)) {
537            return CPIO;
538        }
539        if (ArjArchiveInputStream.matches(signature, signatureLength)) {
540            return ARJ;
541        }
542        if (SevenZFile.matches(signature, signatureLength)) {
543            return SEVEN_Z;
544        }
545
546        // Dump needs a bigger buffer to check the signature;
547        final byte[] dumpsig = new byte[DUMP_SIGNATURE_SIZE];
548        in.mark(dumpsig.length);
549        try {
550            signatureLength = IOUtils.readFully(in, dumpsig);
551            in.reset();
552        } catch (final IOException e) {
553            throw new ArchiveException("IOException while reading dump signature", e);
554        }
555        if (DumpArchiveInputStream.matches(dumpsig, signatureLength)) {
556            return DUMP;
557        }
558
559        // Tar needs an even bigger buffer to check the signature; read the first block
560        final byte[] tarHeader = new byte[TAR_HEADER_SIZE];
561        in.mark(tarHeader.length);
562        try {
563            signatureLength = IOUtils.readFully(in, tarHeader);
564            in.reset();
565        } catch (final IOException e) {
566            throw new ArchiveException("IOException while reading tar signature", e);
567        }
568        if (TarArchiveInputStream.matches(tarHeader, signatureLength)) {
569            return TAR;
570        }
571
572        // COMPRESS-117 - improve auto-recognition
573        if (signatureLength >= TAR_HEADER_SIZE) {
574            TarArchiveInputStream tais = null;
575            try {
576                tais = new TarArchiveInputStream(new ByteArrayInputStream(tarHeader));
577                // COMPRESS-191 - verify the header checksum
578                if (tais.getNextTarEntry().isCheckSumOK()) {
579                    return TAR;
580                }
581            } catch (final Exception e) { // NOPMD NOSONAR
582                // can generate IllegalArgumentException as well
583                // as IOException
584                // autodetection, simply not a TAR
585                // ignored
586            } finally {
587                IOUtils.closeQuietly(tais);
588            }
589        }
590        throw new ArchiveException("No Archiver found for the stream signature");
591    }
592
593    public SortedMap<String, ArchiveStreamProvider> getArchiveInputStreamProviders() {
594        if (archiveInputStreamProviders == null) {
595            archiveInputStreamProviders = Collections
596                    .unmodifiableSortedMap(findAvailableArchiveInputStreamProviders());
597        }
598        return archiveInputStreamProviders;
599    }
600
601    public SortedMap<String, ArchiveStreamProvider> getArchiveOutputStreamProviders() {
602        if (archiveOutputStreamProviders == null) {
603            archiveOutputStreamProviders = Collections
604                    .unmodifiableSortedMap(findAvailableArchiveOutputStreamProviders());
605        }
606        return archiveOutputStreamProviders;
607    }
608
609    @Override
610    public Set<String> getInputStreamArchiveNames() {
611        return Sets.newHashSet(AR, ARJ, ZIP, TAR, JAR, CPIO, DUMP, SEVEN_Z);
612    }
613
614    @Override
615    public Set<String> getOutputStreamArchiveNames() {
616        return Sets.newHashSet(AR, ZIP, TAR, JAR, CPIO, SEVEN_Z);
617    }
618
619}