001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018 019package org.apache.commons.compress.archivers.zip; 020 021import java.io.File; 022import java.io.IOException; 023import java.io.Serializable; 024import java.nio.ByteBuffer; 025import java.nio.channels.SeekableByteChannel; 026import java.nio.file.Files; 027import java.nio.file.Path; 028import java.nio.file.StandardOpenOption; 029import java.util.ArrayList; 030import java.util.Arrays; 031import java.util.Comparator; 032import java.util.List; 033import java.util.Objects; 034import java.util.regex.Pattern; 035import java.util.stream.Collectors; 036import java.util.stream.Stream; 037 038import org.apache.commons.compress.archivers.ArchiveStreamFactory; 039import org.apache.commons.compress.utils.FileNameUtils; 040import org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel; 041 042/** 043 * {@link MultiReadOnlySeekableByteChannel} that knows what a split ZIP archive should look like. 044 * 045 * <p>If you want to read a split archive using {@link ZipFile} then create an instance of this class from the parts of 046 * the archive.</p> 047 * 048 * @since 1.20 049 */ 050public class ZipSplitReadOnlySeekableByteChannel extends MultiReadOnlySeekableByteChannel { 051 052 private static final Path[] EMPTY_PATH_ARRAY = {}; 053 private static final int ZIP_SPLIT_SIGNATURE_LENGTH = 4; 054 private final ByteBuffer zipSplitSignatureByteBuffer = 055 ByteBuffer.allocate(ZIP_SPLIT_SIGNATURE_LENGTH); 056 057 /** 058 * Concatenates the given channels. 059 * 060 * <p>The channels should be add in ascending order, e.g. z01, 061 * z02, ... z99, zip please note that the .zip file is the last 062 * segment and should be added as the last one in the channels</p> 063 * 064 * @param channels the channels to concatenate 065 * @throws NullPointerException if channels is null 066 * @throws IOException if the first channel doesn't seem to hold 067 * the beginning of a split archive 068 */ 069 public ZipSplitReadOnlySeekableByteChannel(final List<SeekableByteChannel> channels) 070 throws IOException { 071 super(channels); 072 073 // the first split zip segment should begin with zip split signature 074 assertSplitSignature(channels); 075 } 076 077 /** 078 * Based on the zip specification: 079 * 080 * <p> 081 * 8.5.3 Spanned/Split archives created using PKZIP for Windows 082 * (V2.50 or greater), PKZIP Command Line (V2.50 or greater), 083 * or PKZIP Explorer will include a special spanning 084 * signature as the first 4 bytes of the first segment of 085 * the archive. This signature (0x08074b50) will be 086 * followed immediately by the local header signature for 087 * the first file in the archive. 088 * 089 * <p> 090 * the first 4 bytes of the first zip split segment should be the zip split signature(0x08074B50) 091 * 092 * @param channels channels to be validated 093 * @throws IOException 094 */ 095 private void assertSplitSignature(final List<SeekableByteChannel> channels) 096 throws IOException { 097 final SeekableByteChannel channel = channels.get(0); 098 // the zip split file signature is at the beginning of the first split segment 099 channel.position(0L); 100 101 zipSplitSignatureByteBuffer.rewind(); 102 channel.read(zipSplitSignatureByteBuffer); 103 final ZipLong signature = new ZipLong(zipSplitSignatureByteBuffer.array()); 104 if (!signature.equals(ZipLong.DD_SIG)) { 105 channel.position(0L); 106 throw new IOException("The first zip split segment does not begin with split zip file signature"); 107 } 108 109 channel.position(0L); 110 } 111 112 /** 113 * Concatenates the given channels. 114 * 115 * @param channels the channels to concatenate, note that the LAST CHANNEL of channels should be the LAST SEGMENT(.zip) 116 * and theses channels should be added in correct order (e.g. .z01, .z02... .z99, .zip) 117 * @return SeekableByteChannel that concatenates all provided channels 118 * @throws NullPointerException if channels is null 119 * @throws IOException if reading channels fails 120 */ 121 public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel... channels) throws IOException { 122 if (Objects.requireNonNull(channels, "channels must not be null").length == 1) { 123 return channels[0]; 124 } 125 return new ZipSplitReadOnlySeekableByteChannel(Arrays.asList(channels)); 126 } 127 128 /** 129 * Concatenates the given channels. 130 * 131 * @param lastSegmentChannel channel of the last segment of split zip segments, its extension should be .zip 132 * @param channels the channels to concatenate except for the last segment, 133 * note theses channels should be added in correct order (e.g. .z01, .z02... .z99) 134 * @return SeekableByteChannel that concatenates all provided channels 135 * @throws NullPointerException if lastSegmentChannel or channels is null 136 * @throws IOException if the first channel doesn't seem to hold 137 * the beginning of a split archive 138 */ 139 public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel lastSegmentChannel, 140 final Iterable<SeekableByteChannel> channels) throws IOException { 141 Objects.requireNonNull(channels, "channels"); 142 Objects.requireNonNull(lastSegmentChannel, "lastSegmentChannel"); 143 144 final List<SeekableByteChannel> channelsList = new ArrayList<>(); 145 channels.forEach(channelsList::add); 146 channelsList.add(lastSegmentChannel); 147 148 return forOrderedSeekableByteChannels(channelsList.toArray(new SeekableByteChannel[0])); 149 } 150 151 /** 152 * Concatenates zip split files from the last segment(the extension SHOULD be .zip) 153 * 154 * @param lastSegmentFile the last segment of zip split files, note that the extension SHOULD be .zip 155 * @return SeekableByteChannel that concatenates all zip split files 156 * @throws IllegalArgumentException if the lastSegmentFile's extension is NOT .zip 157 * @throws IOException if the first channel doesn't seem to hold 158 * the beginning of a split archive 159 */ 160 public static SeekableByteChannel buildFromLastSplitSegment(final File lastSegmentFile) throws IOException { 161 return buildFromLastSplitSegment(lastSegmentFile.toPath()); 162 } 163 164 /** 165 * Concatenates zip split files from the last segment (the extension MUST be .zip) 166 * @param lastSegmentPath the last segment of zip split files, note that the extension MUST be .zip 167 * @return SeekableByteChannel that concatenates all zip split files 168 * @throws IllegalArgumentException if the lastSegmentPath's extension is NOT .zip 169 * @throws IOException if the first channel doesn't seem to hold 170 * the beginning of a split archive 171 * @since 1.22 172 */ 173 public static SeekableByteChannel buildFromLastSplitSegment(final Path lastSegmentPath) throws IOException { 174 final String extension = FileNameUtils.getExtension(lastSegmentPath); 175 if (!extension.equalsIgnoreCase(ArchiveStreamFactory.ZIP)) { 176 throw new IllegalArgumentException("The extension of last zip split segment should be .zip"); 177 } 178 179 final Path parent = Objects.nonNull(lastSegmentPath.getParent()) ? lastSegmentPath.getParent() 180 : lastSegmentPath.getFileSystem().getPath("."); 181 final String fileBaseName = FileNameUtils.getBaseName(lastSegmentPath); 182 final ArrayList<Path> splitZipSegments; 183 184 // zip split segments should be like z01,z02....z(n-1) based on the zip specification 185 final Pattern pattern = Pattern.compile(Pattern.quote(fileBaseName) + ".[zZ][0-9]+"); 186 try (Stream<Path> walk = Files.walk(parent, 1)) { 187 splitZipSegments = walk 188 .filter(Files::isRegularFile) 189 .filter(path -> pattern.matcher(path.getFileName().toString()).matches()) 190 .sorted(new ZipSplitSegmentComparator()) 191 .collect(Collectors.toCollection(ArrayList::new)); 192 } 193 194 return forPaths(lastSegmentPath, splitZipSegments); 195 } 196 197 /** 198 * Concatenates the given files. 199 * 200 * @param files the files to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) 201 * and theses files should be added in correct order (e.g. .z01, .z02... .z99, .zip) 202 * @return SeekableByteChannel that concatenates all provided files 203 * @throws NullPointerException if files is null 204 * @throws IOException if opening a channel for one of the files fails 205 * @throws IOException if the first channel doesn't seem to hold 206 * the beginning of a split archive 207 */ 208 public static SeekableByteChannel forFiles(final File... files) throws IOException { 209 final List<Path> paths = new ArrayList<>(); 210 for (final File f : Objects.requireNonNull(files, "files must not be null")) { 211 paths.add(f.toPath()); 212 } 213 214 return forPaths(paths.toArray(EMPTY_PATH_ARRAY)); 215 } 216 217 /** 218 * Concatenates the given file paths. 219 * @param paths the file paths to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) 220 * and these files should be added in correct order (e.g.: .z01, .z02... .z99, .zip) 221 * @return SeekableByteChannel that concatenates all provided files 222 * @throws NullPointerException if files is null 223 * @throws IOException if opening a channel for one of the files fails 224 * @throws IOException if the first channel doesn't seem to hold 225 * the beginning of a split archive 226 * @since 1.22 227 */ 228 public static SeekableByteChannel forPaths(final Path... paths) throws IOException { 229 final List<SeekableByteChannel> channels = new ArrayList<>(); 230 for (final Path path : Objects.requireNonNull(paths, "paths must not be null")) { 231 channels.add(Files.newByteChannel(path, StandardOpenOption.READ)); 232 } 233 if (channels.size() == 1) { 234 return channels.get(0); 235 } 236 return new ZipSplitReadOnlySeekableByteChannel(channels); 237 } 238 239 /** 240 * Concatenates the given files. 241 * 242 * @param lastSegmentFile the last segment of split zip segments, its extension should be .zip 243 * @param files the files to concatenate except for the last segment, 244 * note theses files should be added in correct order (e.g. .z01, .z02... .z99) 245 * @return SeekableByteChannel that concatenates all provided files 246 * @throws IOException if the first channel doesn't seem to hold 247 * the beginning of a split archive 248 * @throws NullPointerException if files or lastSegmentFile is null 249 */ 250 public static SeekableByteChannel forFiles(final File lastSegmentFile, final Iterable<File> files) throws IOException { 251 Objects.requireNonNull(files, "files"); 252 Objects.requireNonNull(lastSegmentFile, "lastSegmentFile"); 253 254 final List<Path> filesList = new ArrayList<>(); 255 files.forEach(f -> filesList.add(f.toPath())); 256 257 return forPaths(lastSegmentFile.toPath(), filesList); 258 } 259 260 /** 261 * Concatenates the given file paths. 262 * @param lastSegmentPath the last segment path of split zip segments, its extension must be .zip 263 * @param paths the file paths to concatenate except for the last segment, 264 * note these files should be added in correct order (e.g.: .z01, .z02... .z99) 265 * @return SeekableByteChannel that concatenates all provided files 266 * @throws IOException if the first channel doesn't seem to hold 267 * the beginning of a split archive 268 * @throws NullPointerException if files or lastSegmentPath is null 269 * @since 1.22 270 */ 271 public static SeekableByteChannel forPaths(final Path lastSegmentPath, final Iterable<Path> paths) throws IOException { 272 Objects.requireNonNull(paths, "paths"); 273 Objects.requireNonNull(lastSegmentPath, "lastSegmentPath"); 274 275 final List<Path> filesList = new ArrayList<>(); 276 paths.forEach(filesList::add); 277 filesList.add(lastSegmentPath); 278 279 return forPaths(filesList.toArray(EMPTY_PATH_ARRAY)); 280 } 281 282 private static class ZipSplitSegmentComparator implements Comparator<Path>, Serializable { 283 private static final long serialVersionUID = 20200123L; 284 285 @Override 286 public int compare(final Path file1, final Path file2) { 287 final String extension1 = FileNameUtils.getExtension(file1); 288 final String extension2 = FileNameUtils.getExtension(file2); 289 290 if (!extension1.startsWith("z")) { 291 return -1; 292 } 293 294 if (!extension2.startsWith("z")) { 295 return 1; 296 } 297 298 final Integer splitSegmentNumber1 = Integer.parseInt(extension1.substring(1)); 299 final Integer splitSegmentNumber2 = Integer.parseInt(extension2.substring(1)); 300 301 return splitSegmentNumber1.compareTo(splitSegmentNumber2); 302 } 303 } 304}