001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 *
017 */
018
019package org.apache.commons.compress.archivers.zip;
020
021import java.io.File;
022import java.io.IOException;
023import java.io.Serializable;
024import java.nio.ByteBuffer;
025import java.nio.channels.SeekableByteChannel;
026import java.nio.file.Files;
027import java.nio.file.Path;
028import java.nio.file.StandardOpenOption;
029import java.util.ArrayList;
030import java.util.Arrays;
031import java.util.Comparator;
032import java.util.List;
033import java.util.Objects;
034import java.util.regex.Pattern;
035import java.util.stream.Collectors;
036import java.util.stream.Stream;
037
038import org.apache.commons.compress.archivers.ArchiveStreamFactory;
039import org.apache.commons.compress.utils.FileNameUtils;
040import org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel;
041
042/**
043 * {@link MultiReadOnlySeekableByteChannel} that knows what a split ZIP archive should look like.
044 *
045 * <p>If you want to read a split archive using {@link ZipFile} then create an instance of this class from the parts of
046 * the archive.</p>
047 *
048 * @since 1.20
049 */
050public class ZipSplitReadOnlySeekableByteChannel extends MultiReadOnlySeekableByteChannel {
051
052    private static final Path[] EMPTY_PATH_ARRAY = {};
053    private static final int ZIP_SPLIT_SIGNATURE_LENGTH = 4;
054    private final ByteBuffer zipSplitSignatureByteBuffer =
055        ByteBuffer.allocate(ZIP_SPLIT_SIGNATURE_LENGTH);
056
057    /**
058     * Concatenates the given channels.
059     *
060     * <p>The channels should be add in ascending order, e.g. z01,
061     * z02, ... z99, zip please note that the .zip file is the last
062     * segment and should be added as the last one in the channels</p>
063     *
064     * @param channels the channels to concatenate
065     * @throws NullPointerException if channels is null
066     * @throws IOException if the first channel doesn't seem to hold
067     * the beginning of a split archive
068     */
069    public ZipSplitReadOnlySeekableByteChannel(final List<SeekableByteChannel> channels)
070        throws IOException {
071        super(channels);
072
073        // the first split zip segment should begin with zip split signature
074        assertSplitSignature(channels);
075    }
076
077    /**
078     * Based on the zip specification:
079     *
080     * <p>
081     * 8.5.3 Spanned/Split archives created using PKZIP for Windows
082     * (V2.50 or greater), PKZIP Command Line (V2.50 or greater),
083     * or PKZIP Explorer will include a special spanning
084     * signature as the first 4 bytes of the first segment of
085     * the archive.  This signature (0x08074b50) will be
086     * followed immediately by the local header signature for
087     * the first file in the archive.
088     *
089     * <p>
090     * the first 4 bytes of the first zip split segment should be the zip split signature(0x08074B50)
091     *
092     * @param channels channels to be validated
093     * @throws IOException
094     */
095    private void assertSplitSignature(final List<SeekableByteChannel> channels)
096        throws IOException {
097        final SeekableByteChannel channel = channels.get(0);
098        // the zip split file signature is at the beginning of the first split segment
099        channel.position(0L);
100
101        zipSplitSignatureByteBuffer.rewind();
102        channel.read(zipSplitSignatureByteBuffer);
103        final ZipLong signature = new ZipLong(zipSplitSignatureByteBuffer.array());
104        if (!signature.equals(ZipLong.DD_SIG)) {
105            channel.position(0L);
106            throw new IOException("The first zip split segment does not begin with split zip file signature");
107        }
108
109        channel.position(0L);
110    }
111
112    /**
113     * Concatenates the given channels.
114     *
115     * @param channels the channels to concatenate, note that the LAST CHANNEL of channels should be the LAST SEGMENT(.zip)
116     *                 and theses channels should be added in correct order (e.g. .z01, .z02... .z99, .zip)
117     * @return SeekableByteChannel that concatenates all provided channels
118     * @throws NullPointerException if channels is null
119     * @throws IOException if reading channels fails
120     */
121    public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel... channels) throws IOException {
122        if (Objects.requireNonNull(channels, "channels must not be null").length == 1) {
123            return channels[0];
124        }
125        return new ZipSplitReadOnlySeekableByteChannel(Arrays.asList(channels));
126    }
127
128    /**
129     * Concatenates the given channels.
130     *
131     * @param lastSegmentChannel channel of the last segment of split zip segments, its extension should be .zip
132     * @param channels           the channels to concatenate except for the last segment,
133     *                           note theses channels should be added in correct order (e.g. .z01, .z02... .z99)
134     * @return SeekableByteChannel that concatenates all provided channels
135     * @throws NullPointerException if lastSegmentChannel or channels is null
136     * @throws IOException if the first channel doesn't seem to hold
137     * the beginning of a split archive
138     */
139    public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel lastSegmentChannel,
140        final Iterable<SeekableByteChannel> channels) throws IOException {
141        Objects.requireNonNull(channels, "channels");
142        Objects.requireNonNull(lastSegmentChannel, "lastSegmentChannel");
143
144        final List<SeekableByteChannel> channelsList = new ArrayList<>();
145        channels.forEach(channelsList::add);
146        channelsList.add(lastSegmentChannel);
147
148        return forOrderedSeekableByteChannels(channelsList.toArray(new SeekableByteChannel[0]));
149    }
150
151    /**
152     * Concatenates zip split files from the last segment(the extension SHOULD be .zip)
153     *
154     * @param lastSegmentFile the last segment of zip split files, note that the extension SHOULD be .zip
155     * @return SeekableByteChannel that concatenates all zip split files
156     * @throws IllegalArgumentException if the lastSegmentFile's extension is NOT .zip
157     * @throws IOException if the first channel doesn't seem to hold
158     * the beginning of a split archive
159     */
160    public static SeekableByteChannel buildFromLastSplitSegment(final File lastSegmentFile) throws IOException {
161        return buildFromLastSplitSegment(lastSegmentFile.toPath());
162    }
163
164    /**
165     * Concatenates zip split files from the last segment (the extension MUST be .zip)
166     * @param lastSegmentPath the last segment of zip split files, note that the extension MUST be .zip
167     * @return SeekableByteChannel that concatenates all zip split files
168     * @throws IllegalArgumentException if the lastSegmentPath's extension is NOT .zip
169     * @throws IOException if the first channel doesn't seem to hold
170     * the beginning of a split archive
171     * @since 1.22
172     */
173    public static SeekableByteChannel buildFromLastSplitSegment(final Path lastSegmentPath) throws IOException {
174        final String extension = FileNameUtils.getExtension(lastSegmentPath);
175        if (!extension.equalsIgnoreCase(ArchiveStreamFactory.ZIP)) {
176            throw new IllegalArgumentException("The extension of last zip split segment should be .zip");
177        }
178
179        final Path parent = Objects.nonNull(lastSegmentPath.getParent()) ? lastSegmentPath.getParent()
180                : lastSegmentPath.getFileSystem().getPath(".");
181        final String fileBaseName = FileNameUtils.getBaseName(lastSegmentPath);
182        final ArrayList<Path> splitZipSegments;
183
184        // zip split segments should be like z01,z02....z(n-1) based on the zip specification
185        final Pattern pattern = Pattern.compile(Pattern.quote(fileBaseName) + ".[zZ][0-9]+");
186        try (Stream<Path> walk = Files.walk(parent, 1)) {
187            splitZipSegments = walk
188                    .filter(Files::isRegularFile)
189                    .filter(path -> pattern.matcher(path.getFileName().toString()).matches())
190                    .sorted(new ZipSplitSegmentComparator())
191                    .collect(Collectors.toCollection(ArrayList::new));
192        }
193
194        return forPaths(lastSegmentPath, splitZipSegments);
195    }
196
197    /**
198     * Concatenates the given files.
199     *
200     * @param files the files to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip)
201     *              and theses files should be added in correct order (e.g. .z01, .z02... .z99, .zip)
202     * @return SeekableByteChannel that concatenates all provided files
203     * @throws NullPointerException if files is null
204     * @throws IOException          if opening a channel for one of the files fails
205     * @throws IOException if the first channel doesn't seem to hold
206     * the beginning of a split archive
207     */
208    public static SeekableByteChannel forFiles(final File... files) throws IOException {
209        final List<Path> paths = new ArrayList<>();
210        for (final File f : Objects.requireNonNull(files, "files must not be null")) {
211            paths.add(f.toPath());
212        }
213
214        return forPaths(paths.toArray(EMPTY_PATH_ARRAY));
215    }
216
217    /**
218     * Concatenates the given file paths.
219     * @param paths the file paths to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip)
220     * and these files should be added in correct order (e.g.: .z01, .z02... .z99, .zip)
221     * @return SeekableByteChannel that concatenates all provided files
222     * @throws NullPointerException if files is null
223     * @throws IOException if opening a channel for one of the files fails
224     * @throws IOException if the first channel doesn't seem to hold
225     * the beginning of a split archive
226     * @since 1.22
227     */
228    public static SeekableByteChannel forPaths(final Path... paths) throws IOException {
229        final List<SeekableByteChannel> channels = new ArrayList<>();
230        for (final Path path : Objects.requireNonNull(paths, "paths must not be null")) {
231            channels.add(Files.newByteChannel(path, StandardOpenOption.READ));
232        }
233        if (channels.size() == 1) {
234            return channels.get(0);
235        }
236        return new ZipSplitReadOnlySeekableByteChannel(channels);
237    }
238
239    /**
240     * Concatenates the given files.
241     *
242     * @param lastSegmentFile the last segment of split zip segments, its extension should be .zip
243     * @param files           the files to concatenate except for the last segment,
244     *                        note theses files should be added in correct order (e.g. .z01, .z02... .z99)
245     * @return SeekableByteChannel that concatenates all provided files
246     * @throws IOException if the first channel doesn't seem to hold
247     * the beginning of a split archive
248     * @throws NullPointerException if files or lastSegmentFile is null
249     */
250    public static SeekableByteChannel forFiles(final File lastSegmentFile, final Iterable<File> files) throws IOException {
251        Objects.requireNonNull(files, "files");
252        Objects.requireNonNull(lastSegmentFile, "lastSegmentFile");
253
254        final List<Path> filesList = new ArrayList<>();
255        files.forEach(f -> filesList.add(f.toPath()));
256
257        return forPaths(lastSegmentFile.toPath(), filesList);
258    }
259
260    /**
261     * Concatenates the given file paths.
262     * @param lastSegmentPath the last segment path of split zip segments, its extension must be .zip
263     * @param paths the file paths to concatenate except for the last segment,
264     * note these files should be added in correct order (e.g.: .z01, .z02... .z99)
265     * @return SeekableByteChannel that concatenates all provided files
266     * @throws IOException if the first channel doesn't seem to hold
267     * the beginning of a split archive
268     * @throws NullPointerException if files or lastSegmentPath is null
269     * @since 1.22
270     */
271    public static SeekableByteChannel forPaths(final Path lastSegmentPath, final Iterable<Path> paths) throws IOException {
272        Objects.requireNonNull(paths, "paths");
273        Objects.requireNonNull(lastSegmentPath, "lastSegmentPath");
274
275        final List<Path> filesList = new ArrayList<>();
276        paths.forEach(filesList::add);
277        filesList.add(lastSegmentPath);
278
279        return forPaths(filesList.toArray(EMPTY_PATH_ARRAY));
280    }
281
282    private static class ZipSplitSegmentComparator implements Comparator<Path>, Serializable {
283        private static final long serialVersionUID = 20200123L;
284
285        @Override
286        public int compare(final Path file1, final Path file2) {
287            final String extension1 = FileNameUtils.getExtension(file1);
288            final String extension2 = FileNameUtils.getExtension(file2);
289
290            if (!extension1.startsWith("z")) {
291                return -1;
292            }
293
294            if (!extension2.startsWith("z")) {
295                return 1;
296            }
297
298            final Integer splitSegmentNumber1 = Integer.parseInt(extension1.substring(1));
299            final Integer splitSegmentNumber2 = Integer.parseInt(extension2.substring(1));
300
301            return splitSegmentNumber1.compareTo(splitSegmentNumber2);
302        }
303    }
304}