001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 */
018
019 /*
020 * This package is based on the work done by Timothy Gerard Endres
021 * (time@ice.com) to whom the Ant project is very grateful for his great code.
022 */
023
024 package org.apache.commons.compress.archivers.tar;
025
026 import java.io.BufferedReader;
027 import java.io.IOException;
028 import java.io.InputStream;
029 import java.io.InputStreamReader;
030 import java.util.HashMap;
031 import java.util.Iterator;
032 import java.util.Map;
033 import java.util.Map.Entry;
034
035 import org.apache.commons.compress.archivers.ArchiveEntry;
036 import org.apache.commons.compress.archivers.ArchiveInputStream;
037 import org.apache.commons.compress.utils.ArchiveUtils;
038
039 /**
040 * The TarInputStream reads a UNIX tar archive as an InputStream.
041 * methods are provided to position at each successive entry in
042 * the archive, and the read each entry as a normal input stream
043 * using read().
044 * @NotThreadSafe
045 */
046 public class TarArchiveInputStream extends ArchiveInputStream {
047 private static final int SMALL_BUFFER_SIZE = 256;
048 private static final int BUFFER_SIZE = 8 * 1024;
049
050 private boolean hasHitEOF;
051 private long entrySize;
052 private long entryOffset;
053 private byte[] readBuf;
054 protected final TarBuffer buffer;
055 private TarArchiveEntry currEntry;
056
057 /**
058 * Constructor for TarInputStream.
059 * @param is the input stream to use
060 */
061 public TarArchiveInputStream(InputStream is) {
062 this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE);
063 }
064
065 /**
066 * Constructor for TarInputStream.
067 * @param is the input stream to use
068 * @param blockSize the block size to use
069 */
070 public TarArchiveInputStream(InputStream is, int blockSize) {
071 this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE);
072 }
073
074 /**
075 * Constructor for TarInputStream.
076 * @param is the input stream to use
077 * @param blockSize the block size to use
078 * @param recordSize the record size to use
079 */
080 public TarArchiveInputStream(InputStream is, int blockSize, int recordSize) {
081 this.buffer = new TarBuffer(is, blockSize, recordSize);
082 this.readBuf = null;
083 this.hasHitEOF = false;
084 }
085
086 /**
087 * Closes this stream. Calls the TarBuffer's close() method.
088 * @throws IOException on error
089 */
090 public void close() throws IOException {
091 buffer.close();
092 }
093
094 /**
095 * Get the record size being used by this stream's TarBuffer.
096 *
097 * @return The TarBuffer record size.
098 */
099 public int getRecordSize() {
100 return buffer.getRecordSize();
101 }
102
103 /**
104 * Get the available data that can be read from the current
105 * entry in the archive. This does not indicate how much data
106 * is left in the entire archive, only in the current entry.
107 * This value is determined from the entry's size header field
108 * and the amount of data already read from the current entry.
109 * Integer.MAX_VALUE is returen in case more than Integer.MAX_VALUE
110 * bytes are left in the current entry in the archive.
111 *
112 * @return The number of available bytes for the current entry.
113 * @throws IOException for signature
114 */
115 public int available() throws IOException {
116 if (entrySize - entryOffset > Integer.MAX_VALUE) {
117 return Integer.MAX_VALUE;
118 }
119 return (int) (entrySize - entryOffset);
120 }
121
122 /**
123 * Skip bytes in the input buffer. This skips bytes in the
124 * current entry's data, not the entire archive, and will
125 * stop at the end of the current entry's data if the number
126 * to skip extends beyond that point.
127 *
128 * @param numToSkip The number of bytes to skip.
129 * @return the number actually skipped
130 * @throws IOException on error
131 */
132 public long skip(long numToSkip) throws IOException {
133 // REVIEW
134 // This is horribly inefficient, but it ensures that we
135 // properly skip over bytes via the TarBuffer...
136 //
137 byte[] skipBuf = new byte[BUFFER_SIZE];
138 long skip = numToSkip;
139 while (skip > 0) {
140 int realSkip = (int) (skip > skipBuf.length ? skipBuf.length : skip);
141 int numRead = read(skipBuf, 0, realSkip);
142 if (numRead == -1) {
143 break;
144 }
145 skip -= numRead;
146 }
147 return (numToSkip - skip);
148 }
149
150 /**
151 * Since we do not support marking just yet, we do nothing.
152 */
153 public synchronized void reset() {
154 }
155
156 /**
157 * Get the next entry in this tar archive. This will skip
158 * over any remaining data in the current entry, if there
159 * is one, and place the input stream at the header of the
160 * next entry, and read the header and instantiate a new
161 * TarEntry from the header bytes and return that entry.
162 * If there are no more entries in the archive, null will
163 * be returned to indicate that the end of the archive has
164 * been reached.
165 *
166 * @return The next TarEntry in the archive, or null.
167 * @throws IOException on error
168 */
169 public TarArchiveEntry getNextTarEntry() throws IOException {
170 if (hasHitEOF) {
171 return null;
172 }
173
174 if (currEntry != null) {
175 long numToSkip = entrySize - entryOffset;
176
177 while (numToSkip > 0) {
178 long skipped = skip(numToSkip);
179 if (skipped <= 0) {
180 throw new RuntimeException("failed to skip current tar entry");
181 }
182 numToSkip -= skipped;
183 }
184
185 readBuf = null;
186 }
187
188 byte[] headerBuf = buffer.readRecord();
189
190 if (headerBuf == null) {
191 hasHitEOF = true;
192 } else if (buffer.isEOFRecord(headerBuf)) {
193 hasHitEOF = true;
194 }
195
196 if (hasHitEOF) {
197 currEntry = null;
198 } else {
199 currEntry = new TarArchiveEntry(headerBuf);
200 entryOffset = 0;
201 entrySize = currEntry.getSize();
202 }
203
204 if (currEntry != null && currEntry.isGNULongNameEntry()) {
205 // read in the name
206 StringBuffer longName = new StringBuffer();
207 byte[] buf = new byte[SMALL_BUFFER_SIZE];
208 int length = 0;
209 while ((length = read(buf)) >= 0) {
210 longName.append(new String(buf, 0, length));
211 }
212 getNextEntry();
213 if (currEntry == null) {
214 // Bugzilla: 40334
215 // Malformed tar file - long entry name not followed by entry
216 return null;
217 }
218 // remove trailing null terminator
219 if (longName.length() > 0
220 && longName.charAt(longName.length() - 1) == 0) {
221 longName.deleteCharAt(longName.length() - 1);
222 }
223 currEntry.setName(longName.toString());
224 }
225
226 if (currEntry != null && currEntry.isPaxHeader()){ // Process Pax headers
227 paxHeaders();
228 }
229
230 return currEntry;
231 }
232
233 private void paxHeaders() throws IOException{
234 BufferedReader br = new BufferedReader(new InputStreamReader(this, "UTF-8"));
235 Map headers = new HashMap();
236 // Format is "length keyword=value\n";
237 while(true){ // get length
238 int ch;
239 int len=0;
240 int read=0;
241 while((ch = br.read()) != -1){
242 read++;
243 if (ch == ' '){ // End of length string
244 // Get keyword
245 StringBuffer sb = new StringBuffer();
246 while((ch = br.read()) != -1){
247 read++;
248 if (ch == '='){ // end of keyword
249 String keyword = sb.toString();
250 // Get rest of entry
251 char[] cbuf = new char[len-read];
252 int got = br.read(cbuf);
253 if (got != len-read){
254 throw new IOException("Failed to read Paxheader. Expected "+(len-read)+" chars, read "+got);
255 }
256 String value = new String(cbuf, 0 , len-read-1); // Drop trailing NL
257 headers.put(keyword, value);
258 break;
259 }
260 sb.append((char)ch);
261 }
262 break; // Processed single header
263 }
264 len *= 10;
265 len += ch - '0';
266 }
267 if (ch == -1){ // EOF
268 break;
269 }
270 }
271 getNextEntry(); // Get the actual file entry
272 /*
273 * The following headers are defined for Pax.
274 * atime, ctime, mtime, charset: cannot use these without changing TarArchiveEntry fields
275 * comment
276 * gid, gname
277 * linkpath
278 * size
279 * uid,uname
280 */
281 Iterator hdrs = headers.entrySet().iterator();
282 while(hdrs.hasNext()){
283 Entry ent = (Entry) hdrs.next();
284 String key = (String) ent.getKey();
285 String val = (String) ent.getValue();
286 if ("path".equals(key)){
287 currEntry.setName(val);
288 } else if ("linkpath".equals(key)){
289 currEntry.setLinkName(val);
290 } else if ("gid".equals(key)){
291 currEntry.setGroupId(Integer.parseInt(val));
292 } else if ("gname".equals(key)){
293 currEntry.setGroupName(val);
294 } else if ("uid".equals(key)){
295 currEntry.setUserId(Integer.parseInt(val));
296 } else if ("uname".equals(key)){
297 currEntry.setUserName(val);
298 } else if ("size".equals(key)){
299 currEntry.setSize(Long.parseLong(val));
300 }
301 }
302 }
303
304 public ArchiveEntry getNextEntry() throws IOException {
305 return getNextTarEntry();
306 }
307
308 /**
309 * Reads bytes from the current tar archive entry.
310 *
311 * This method is aware of the boundaries of the current
312 * entry in the archive and will deal with them as if they
313 * were this stream's start and EOF.
314 *
315 * @param buf The buffer into which to place bytes read.
316 * @param offset The offset at which to place bytes read.
317 * @param numToRead The number of bytes to read.
318 * @return The number of bytes read, or -1 at EOF.
319 * @throws IOException on error
320 */
321 public int read(byte[] buf, int offset, int numToRead) throws IOException {
322 int totalRead = 0;
323
324 if (entryOffset >= entrySize) {
325 return -1;
326 }
327
328 if ((numToRead + entryOffset) > entrySize) {
329 numToRead = (int) (entrySize - entryOffset);
330 }
331
332 if (readBuf != null) {
333 int sz = (numToRead > readBuf.length) ? readBuf.length
334 : numToRead;
335
336 System.arraycopy(readBuf, 0, buf, offset, sz);
337
338 if (sz >= readBuf.length) {
339 readBuf = null;
340 } else {
341 int newLen = readBuf.length - sz;
342 byte[] newBuf = new byte[newLen];
343
344 System.arraycopy(readBuf, sz, newBuf, 0, newLen);
345
346 readBuf = newBuf;
347 }
348
349 totalRead += sz;
350 numToRead -= sz;
351 offset += sz;
352 }
353
354 while (numToRead > 0) {
355 byte[] rec = buffer.readRecord();
356
357 if (rec == null) {
358 // Unexpected EOF!
359 throw new IOException("unexpected EOF with " + numToRead
360 + " bytes unread. Occured at byte: " + getBytesRead());
361 }
362 count(rec.length);
363 int sz = numToRead;
364 int recLen = rec.length;
365
366 if (recLen > sz) {
367 System.arraycopy(rec, 0, buf, offset, sz);
368
369 readBuf = new byte[recLen - sz];
370
371 System.arraycopy(rec, sz, readBuf, 0, recLen - sz);
372 } else {
373 sz = recLen;
374
375 System.arraycopy(rec, 0, buf, offset, recLen);
376 }
377
378 totalRead += sz;
379 numToRead -= sz;
380 offset += sz;
381 }
382
383 entryOffset += totalRead;
384
385 return totalRead;
386 }
387
388 protected final TarArchiveEntry getCurrentEntry() {
389 return currEntry;
390 }
391
392 protected final void setCurrentEntry(TarArchiveEntry e) {
393 currEntry = e;
394 }
395
396 protected final boolean isAtEOF() {
397 return hasHitEOF;
398 }
399
400 protected final void setAtEOF(boolean b) {
401 hasHitEOF = b;
402 }
403
404 /**
405 * Checks if the signature matches what is expected for a tar file.
406 *
407 * @param signature
408 * the bytes to check
409 * @param length
410 * the number of bytes to check
411 * @return true, if this stream is a tar archive stream, false otherwise
412 */
413 public static boolean matches(byte[] signature, int length) {
414 if (length < TarConstants.VERSION_OFFSET+TarConstants.VERSIONLEN) {
415 return false;
416 }
417
418 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX,
419 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN)
420 &&
421 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_POSIX,
422 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
423 ){
424 return true;
425 }
426 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_GNU,
427 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN)
428 &&
429 (
430 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_SPACE,
431 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
432 ||
433 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_ZERO,
434 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
435 )
436 ){
437 return true;
438 }
439 // COMPRESS-107 - recognise Ant tar files
440 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_ANT,
441 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN)
442 &&
443 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_ANT,
444 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
445 ){
446 return true;
447 }
448 return false;
449 }
450
451 }