001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 */
018 package org.apache.commons.compress.archivers.zip;
019
020 import java.io.File;
021 import java.io.IOException;
022 import java.io.InputStream;
023 import java.io.RandomAccessFile;
024 import java.util.Arrays;
025 import java.util.Collections;
026 import java.util.Comparator;
027 import java.util.Enumeration;
028 import java.util.HashMap;
029 import java.util.LinkedHashMap;
030 import java.util.Map;
031 import java.util.zip.Inflater;
032 import java.util.zip.InflaterInputStream;
033 import java.util.zip.ZipException;
034
035 /**
036 * Replacement for <code>java.util.ZipFile</code>.
037 *
038 * <p>This class adds support for file name encodings other than UTF-8
039 * (which is required to work on ZIP files created by native zip tools
040 * and is able to skip a preamble like the one found in self
041 * extracting archives. Furthermore it returns instances of
042 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
043 * instead of <code>java.util.zip.ZipEntry</code>.</p>
044 *
045 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would
046 * have to reimplement all methods anyway. Like
047 * <code>java.util.ZipFile</code>, it uses RandomAccessFile under the
048 * covers and supports compressed and uncompressed entries.</p>
049 *
050 * <p>The method signatures mimic the ones of
051 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions:
052 *
053 * <ul>
054 * <li>There is no getName method.</li>
055 * <li>entries has been renamed to getEntries.</li>
056 * <li>getEntries and getEntry return
057 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
058 * instances.</li>
059 * <li>close is allowed to throw IOException.</li>
060 * </ul>
061 *
062 */
063 public class ZipFile {
064 private static final int HASH_SIZE = 509;
065 private static final int SHORT = 2;
066 private static final int WORD = 4;
067 static final int NIBLET_MASK = 0x0f;
068 static final int BYTE_SHIFT = 8;
069 private static final int POS_0 = 0;
070 private static final int POS_1 = 1;
071 private static final int POS_2 = 2;
072 private static final int POS_3 = 3;
073
074 /**
075 * Maps ZipArchiveEntrys to Longs, recording the offsets of the local
076 * file headers.
077 */
078 private final Map entries = new LinkedHashMap(HASH_SIZE);
079
080 /**
081 * Maps String to ZipArchiveEntrys, name -> actual entry.
082 */
083 private final Map nameMap = new HashMap(HASH_SIZE);
084
085 private static final class OffsetEntry {
086 private long headerOffset = -1;
087 private long dataOffset = -1;
088 }
089
090 /**
091 * The encoding to use for filenames and the file comment.
092 *
093 * <p>For a list of possible values see <a
094 * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
095 * Defaults to UTF-8.</p>
096 */
097 private final String encoding;
098
099 /**
100 * The zip encoding to use for filenames and the file comment.
101 */
102 private final ZipEncoding zipEncoding;
103
104 /**
105 * The actual data source.
106 */
107 private final RandomAccessFile archive;
108
109 /**
110 * Whether to look for and use Unicode extra fields.
111 */
112 private final boolean useUnicodeExtraFields;
113
114 /**
115 * Opens the given file for reading, assuming "UTF8" for file names.
116 *
117 * @param f the archive.
118 *
119 * @throws IOException if an error occurs while reading the file.
120 */
121 public ZipFile(File f) throws IOException {
122 this(f, ZipEncodingHelper.UTF8);
123 }
124
125 /**
126 * Opens the given file for reading, assuming "UTF8".
127 *
128 * @param name name of the archive.
129 *
130 * @throws IOException if an error occurs while reading the file.
131 */
132 public ZipFile(String name) throws IOException {
133 this(new File(name), ZipEncodingHelper.UTF8);
134 }
135
136 /**
137 * Opens the given file for reading, assuming the specified
138 * encoding for file names, scanning unicode extra fields.
139 *
140 * @param name name of the archive.
141 * @param encoding the encoding to use for file names, use null
142 * for the platform's default encoding
143 *
144 * @throws IOException if an error occurs while reading the file.
145 */
146 public ZipFile(String name, String encoding) throws IOException {
147 this(new File(name), encoding, true);
148 }
149
150 /**
151 * Opens the given file for reading, assuming the specified
152 * encoding for file names and scanning for unicode extra fields.
153 *
154 * @param f the archive.
155 * @param encoding the encoding to use for file names, use null
156 * for the platform's default encoding
157 *
158 * @throws IOException if an error occurs while reading the file.
159 */
160 public ZipFile(File f, String encoding) throws IOException {
161 this(f, encoding, true);
162 }
163
164 /**
165 * Opens the given file for reading, assuming the specified
166 * encoding for file names.
167 *
168 * @param f the archive.
169 * @param encoding the encoding to use for file names, use null
170 * for the platform's default encoding
171 * @param useUnicodeExtraFields whether to use InfoZIP Unicode
172 * Extra Fields (if present) to set the file names.
173 *
174 * @throws IOException if an error occurs while reading the file.
175 */
176 public ZipFile(File f, String encoding, boolean useUnicodeExtraFields)
177 throws IOException {
178 this.encoding = encoding;
179 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
180 this.useUnicodeExtraFields = useUnicodeExtraFields;
181 archive = new RandomAccessFile(f, "r");
182 boolean success = false;
183 try {
184 Map entriesWithoutUTF8Flag = populateFromCentralDirectory();
185 resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
186 success = true;
187 } finally {
188 if (!success) {
189 try {
190 archive.close();
191 } catch (IOException e2) {
192 // swallow, throw the original exception instead
193 }
194 }
195 }
196 }
197
198 /**
199 * The encoding to use for filenames and the file comment.
200 *
201 * @return null if using the platform's default character encoding.
202 */
203 public String getEncoding() {
204 return encoding;
205 }
206
207 /**
208 * Closes the archive.
209 * @throws IOException if an error occurs closing the archive.
210 */
211 public void close() throws IOException {
212 archive.close();
213 }
214
215 /**
216 * close a zipfile quietly; throw no io fault, do nothing
217 * on a null parameter
218 * @param zipfile file to close, can be null
219 */
220 public static void closeQuietly(ZipFile zipfile) {
221 if (zipfile != null) {
222 try {
223 zipfile.close();
224 } catch (IOException e) {
225 //ignore
226 }
227 }
228 }
229
230 /**
231 * Returns all entries.
232 *
233 * <p>Entries will be returned in the same order they appear
234 * within the archive's central directory.</p>
235 *
236 * @return all entries as {@link ZipArchiveEntry} instances
237 */
238 public Enumeration getEntries() {
239 return Collections.enumeration(entries.keySet());
240 }
241
242 /**
243 * Returns all entries in physical order.
244 *
245 * <p>Entries will be returned in the same order their contents
246 * appear within the archive.</p>
247 *
248 * @return all entries as {@link ZipArchiveEntry} instances
249 *
250 * @since Commons Compress 1.1
251 */
252 public Enumeration getEntriesInPhysicalOrder() {
253 Object[] allEntries = entries.keySet().toArray();
254 Arrays.sort(allEntries, OFFSET_COMPARATOR);
255 return Collections.enumeration(Arrays.asList(allEntries));
256 }
257
258 /**
259 * Returns a named entry - or <code>null</code> if no entry by
260 * that name exists.
261 * @param name name of the entry.
262 * @return the ZipArchiveEntry corresponding to the given name - or
263 * <code>null</code> if not present.
264 */
265 public ZipArchiveEntry getEntry(String name) {
266 return (ZipArchiveEntry) nameMap.get(name);
267 }
268
269 /**
270 * Whether this class is able to read the given entry.
271 *
272 * <p>May return false if it is set up to use encryption or a
273 * compression method that hasn't been implemented yet.</p>
274 * @since Apache Commons Compress 1.1
275 */
276 public boolean canReadEntryData(ZipArchiveEntry ze) {
277 return ZipUtil.canHandleEntryData(ze);
278 }
279
280 /**
281 * Returns an InputStream for reading the contents of the given entry.
282 *
283 * @param ze the entry to get the stream for.
284 * @return a stream to read the entry from.
285 * @throws IOException if unable to create an input stream from the zipenty
286 * @throws ZipException if the zipentry uses an unsupported feature
287 */
288 public InputStream getInputStream(ZipArchiveEntry ze)
289 throws IOException, ZipException {
290 OffsetEntry offsetEntry = (OffsetEntry) entries.get(ze);
291 if (offsetEntry == null) {
292 return null;
293 }
294 ZipUtil.checkRequestedFeatures(ze);
295 long start = offsetEntry.dataOffset;
296 BoundedInputStream bis =
297 new BoundedInputStream(start, ze.getCompressedSize());
298 switch (ze.getMethod()) {
299 case ZipArchiveEntry.STORED:
300 return bis;
301 case ZipArchiveEntry.DEFLATED:
302 bis.addDummy();
303 return new InflaterInputStream(bis, new Inflater(true));
304 default:
305 throw new ZipException("Found unsupported compression method "
306 + ze.getMethod());
307 }
308 }
309
310 private static final int CFH_LEN =
311 /* version made by */ SHORT
312 /* version needed to extract */ + SHORT
313 /* general purpose bit flag */ + SHORT
314 /* compression method */ + SHORT
315 /* last mod file time */ + SHORT
316 /* last mod file date */ + SHORT
317 /* crc-32 */ + WORD
318 /* compressed size */ + WORD
319 /* uncompressed size */ + WORD
320 /* filename length */ + SHORT
321 /* extra field length */ + SHORT
322 /* file comment length */ + SHORT
323 /* disk number start */ + SHORT
324 /* internal file attributes */ + SHORT
325 /* external file attributes */ + WORD
326 /* relative offset of local header */ + WORD;
327
328 /**
329 * Reads the central directory of the given archive and populates
330 * the internal tables with ZipArchiveEntry instances.
331 *
332 * <p>The ZipArchiveEntrys will know all data that can be obtained from
333 * the central directory alone, but not the data that requires the
334 * local file header or additional data to be read.</p>
335 *
336 * @return a Map<ZipArchiveEntry, NameAndComment>> of
337 * zipentries that didn't have the language encoding flag set when
338 * read.
339 */
340 private Map populateFromCentralDirectory()
341 throws IOException {
342 HashMap noUTF8Flag = new HashMap();
343
344 positionAtCentralDirectory();
345
346 byte[] cfh = new byte[CFH_LEN];
347
348 byte[] signatureBytes = new byte[WORD];
349 archive.readFully(signatureBytes);
350 long sig = ZipLong.getValue(signatureBytes);
351 final long cfhSig = ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
352 if (sig != cfhSig && startsWithLocalFileHeader()) {
353 throw new IOException("central directory is empty, can't expand"
354 + " corrupt archive.");
355 }
356 while (sig == cfhSig) {
357 archive.readFully(cfh);
358 int off = 0;
359 ZipArchiveEntry ze = new ZipArchiveEntry();
360
361 int versionMadeBy = ZipShort.getValue(cfh, off);
362 off += SHORT;
363 ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK);
364
365 off += SHORT; // skip version info
366
367 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfh, off);
368 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
369 final ZipEncoding entryEncoding =
370 hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
371 ze.setGeneralPurposeBit(gpFlag);
372
373 off += SHORT;
374
375 ze.setMethod(ZipShort.getValue(cfh, off));
376 off += SHORT;
377
378 // FIXME this is actually not very cpu cycles friendly as we are converting from
379 // dos to java while the underlying Sun implementation will convert
380 // from java to dos time for internal storage...
381 long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfh, off));
382 ze.setTime(time);
383 off += WORD;
384
385 ze.setCrc(ZipLong.getValue(cfh, off));
386 off += WORD;
387
388 ze.setCompressedSize(ZipLong.getValue(cfh, off));
389 off += WORD;
390
391 ze.setSize(ZipLong.getValue(cfh, off));
392 off += WORD;
393
394 int fileNameLen = ZipShort.getValue(cfh, off);
395 off += SHORT;
396
397 int extraLen = ZipShort.getValue(cfh, off);
398 off += SHORT;
399
400 int commentLen = ZipShort.getValue(cfh, off);
401 off += SHORT;
402
403 off += SHORT; // disk number
404
405 ze.setInternalAttributes(ZipShort.getValue(cfh, off));
406 off += SHORT;
407
408 ze.setExternalAttributes(ZipLong.getValue(cfh, off));
409 off += WORD;
410
411 byte[] fileName = new byte[fileNameLen];
412 archive.readFully(fileName);
413 ze.setName(entryEncoding.decode(fileName));
414
415 // LFH offset,
416 OffsetEntry offset = new OffsetEntry();
417 offset.headerOffset = ZipLong.getValue(cfh, off);
418 // data offset will be filled later
419 entries.put(ze, offset);
420
421 nameMap.put(ze.getName(), ze);
422
423 byte[] cdExtraData = new byte[extraLen];
424 archive.readFully(cdExtraData);
425 ze.setCentralDirectoryExtra(cdExtraData);
426
427 byte[] comment = new byte[commentLen];
428 archive.readFully(comment);
429 ze.setComment(entryEncoding.decode(comment));
430
431 archive.readFully(signatureBytes);
432 sig = ZipLong.getValue(signatureBytes);
433
434 if (!hasUTF8Flag && useUnicodeExtraFields) {
435 noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
436 }
437 }
438 return noUTF8Flag;
439 }
440
441 private static final int MIN_EOCD_SIZE =
442 /* end of central dir signature */ WORD
443 /* number of this disk */ + SHORT
444 /* number of the disk with the */
445 /* start of the central directory */ + SHORT
446 /* total number of entries in */
447 /* the central dir on this disk */ + SHORT
448 /* total number of entries in */
449 /* the central dir */ + SHORT
450 /* size of the central directory */ + WORD
451 /* offset of start of central */
452 /* directory with respect to */
453 /* the starting disk number */ + WORD
454 /* zipfile comment length */ + SHORT;
455
456 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
457 /* maximum length of zipfile comment */ + 0xFFFF;
458
459 private static final int CFD_LOCATOR_OFFSET =
460 /* end of central dir signature */ WORD
461 /* number of this disk */ + SHORT
462 /* number of the disk with the */
463 /* start of the central directory */ + SHORT
464 /* total number of entries in */
465 /* the central dir on this disk */ + SHORT
466 /* total number of entries in */
467 /* the central dir */ + SHORT
468 /* size of the central directory */ + WORD;
469
470 /**
471 * Searches for the "End of central dir record", parses
472 * it and positions the stream at the first central directory
473 * record.
474 */
475 private void positionAtCentralDirectory()
476 throws IOException {
477 boolean found = false;
478 long off = archive.length() - MIN_EOCD_SIZE;
479 long stopSearching = Math.max(0L, archive.length() - MAX_EOCD_SIZE);
480 if (off >= 0) {
481 archive.seek(off);
482 byte[] sig = ZipArchiveOutputStream.EOCD_SIG;
483 int curr = archive.read();
484 while (off >= stopSearching && curr != -1) {
485 if (curr == sig[POS_0]) {
486 curr = archive.read();
487 if (curr == sig[POS_1]) {
488 curr = archive.read();
489 if (curr == sig[POS_2]) {
490 curr = archive.read();
491 if (curr == sig[POS_3]) {
492 found = true;
493 break;
494 }
495 }
496 }
497 }
498 archive.seek(--off);
499 curr = archive.read();
500 }
501 }
502 if (!found) {
503 throw new ZipException("archive is not a ZIP archive");
504 }
505 archive.seek(off + CFD_LOCATOR_OFFSET);
506 byte[] cfdOffset = new byte[WORD];
507 archive.readFully(cfdOffset);
508 archive.seek(ZipLong.getValue(cfdOffset));
509 }
510
511 /**
512 * Number of bytes in local file header up to the "length of
513 * filename" entry.
514 */
515 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
516 /* local file header signature */ WORD
517 /* version needed to extract */ + SHORT
518 /* general purpose bit flag */ + SHORT
519 /* compression method */ + SHORT
520 /* last mod file time */ + SHORT
521 /* last mod file date */ + SHORT
522 /* crc-32 */ + WORD
523 /* compressed size */ + WORD
524 /* uncompressed size */ + WORD;
525
526 /**
527 * Walks through all recorded entries and adds the data available
528 * from the local file header.
529 *
530 * <p>Also records the offsets for the data to read from the
531 * entries.</p>
532 */
533 private void resolveLocalFileHeaderData(Map entriesWithoutUTF8Flag)
534 throws IOException {
535 Enumeration e = getEntries();
536 while (e.hasMoreElements()) {
537 ZipArchiveEntry ze = (ZipArchiveEntry) e.nextElement();
538 OffsetEntry offsetEntry = (OffsetEntry) entries.get(ze);
539 long offset = offsetEntry.headerOffset;
540 archive.seek(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
541 byte[] b = new byte[SHORT];
542 archive.readFully(b);
543 int fileNameLen = ZipShort.getValue(b);
544 archive.readFully(b);
545 int extraFieldLen = ZipShort.getValue(b);
546 int lenToSkip = fileNameLen;
547 while (lenToSkip > 0) {
548 int skipped = archive.skipBytes(lenToSkip);
549 if (skipped <= 0) {
550 throw new RuntimeException("failed to skip file name in"
551 + " local file header");
552 }
553 lenToSkip -= skipped;
554 }
555 byte[] localExtraData = new byte[extraFieldLen];
556 archive.readFully(localExtraData);
557 ze.setExtra(localExtraData);
558 /*dataOffsets.put(ze,
559 new Long(offset + LFH_OFFSET_FOR_FILENAME_LENGTH
560 + SHORT + SHORT + fileNameLen + extraFieldLen));
561 */
562 offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH
563 + SHORT + SHORT + fileNameLen + extraFieldLen;
564
565 if (entriesWithoutUTF8Flag.containsKey(ze)) {
566 String orig = ze.getName();
567 NameAndComment nc = (NameAndComment) entriesWithoutUTF8Flag.get(ze);
568 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name,
569 nc.comment);
570 if (!orig.equals(ze.getName())) {
571 nameMap.remove(orig);
572 nameMap.put(ze.getName(), ze);
573 }
574 }
575 }
576 }
577
578 /**
579 * Checks whether the archive starts with a LFH. If it doesn't,
580 * it may be an empty archive.
581 */
582 private boolean startsWithLocalFileHeader() throws IOException {
583 archive.seek(0);
584 final byte[] start = new byte[WORD];
585 archive.readFully(start);
586 for (int i = 0; i < start.length; i++) {
587 if (start[i] != ZipArchiveOutputStream.LFH_SIG[i]) {
588 return false;
589 }
590 }
591 return true;
592 }
593
594 /**
595 * InputStream that delegates requests to the underlying
596 * RandomAccessFile, making sure that only bytes from a certain
597 * range can be read.
598 */
599 private class BoundedInputStream extends InputStream {
600 private long remaining;
601 private long loc;
602 private boolean addDummyByte = false;
603
604 BoundedInputStream(long start, long remaining) {
605 this.remaining = remaining;
606 loc = start;
607 }
608
609 public int read() throws IOException {
610 if (remaining-- <= 0) {
611 if (addDummyByte) {
612 addDummyByte = false;
613 return 0;
614 }
615 return -1;
616 }
617 synchronized (archive) {
618 archive.seek(loc++);
619 return archive.read();
620 }
621 }
622
623 public int read(byte[] b, int off, int len) throws IOException {
624 if (remaining <= 0) {
625 if (addDummyByte) {
626 addDummyByte = false;
627 b[off] = 0;
628 return 1;
629 }
630 return -1;
631 }
632
633 if (len <= 0) {
634 return 0;
635 }
636
637 if (len > remaining) {
638 len = (int) remaining;
639 }
640 int ret = -1;
641 synchronized (archive) {
642 archive.seek(loc);
643 ret = archive.read(b, off, len);
644 }
645 if (ret > 0) {
646 loc += ret;
647 remaining -= ret;
648 }
649 return ret;
650 }
651
652 /**
653 * Inflater needs an extra dummy byte for nowrap - see
654 * Inflater's javadocs.
655 */
656 void addDummy() {
657 addDummyByte = true;
658 }
659 }
660
661 private static final class NameAndComment {
662 private final byte[] name;
663 private final byte[] comment;
664 private NameAndComment(byte[] name, byte[] comment) {
665 this.name = name;
666 this.comment = comment;
667 }
668 }
669
670 /**
671 * Compares two ZipArchiveEntries based on their offset within the archive.
672 *
673 * <p>Won't return any meaningful results if one of the entries
674 * isn't part of the archive at all.</p>
675 *
676 * @since Commons Compress 1.1
677 */
678 private final Comparator OFFSET_COMPARATOR =
679 new Comparator() {
680 public int compare(Object o1, Object o2) {
681 if (o1 == o2)
682 return 0;
683
684 ZipArchiveEntry e1 = (ZipArchiveEntry) o1;
685 ZipArchiveEntry e2 = (ZipArchiveEntry) o2;
686
687 OffsetEntry off1 = (OffsetEntry) entries.get(e1);
688 OffsetEntry off2 = (OffsetEntry) entries.get(e2);
689 if (off1 == null) {
690 return 1;
691 }
692 if (off2 == null) {
693 return -1;
694 }
695 long val = (off1.headerOffset - off2.headerOffset);
696 return val == 0 ? 0 : val < 0 ? -1 : +1;
697 }
698 };
699 }