001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 package org.apache.hadoop.hdfs;
020
021 import java.io.IOException;
022 import java.io.InputStream;
023 import java.net.HttpURLConnection;
024 import java.net.URL;
025 import java.util.List;
026 import java.util.Map;
027 import java.util.StringTokenizer;
028
029 import org.apache.commons.io.input.BoundedInputStream;
030 import org.apache.hadoop.fs.FSInputStream;
031
032 import com.google.common.annotations.VisibleForTesting;
033 import com.google.common.net.HttpHeaders;
034
035 /**
036 * To support HTTP byte streams, a new connection to an HTTP server needs to be
037 * created each time. This class hides the complexity of those multiple
038 * connections from the client. Whenever seek() is called, a new connection
039 * is made on the successive read(). The normal input stream functions are
040 * connected to the currently active input stream.
041 */
042 public abstract class ByteRangeInputStream extends FSInputStream {
043
044 /**
045 * This class wraps a URL and provides method to open connection.
046 * It can be overridden to change how a connection is opened.
047 */
048 public static abstract class URLOpener {
049 protected URL url;
050
051 public URLOpener(URL u) {
052 url = u;
053 }
054
055 public void setURL(URL u) {
056 url = u;
057 }
058
059 public URL getURL() {
060 return url;
061 }
062
063 /** Connect to server with a data offset. */
064 protected abstract HttpURLConnection connect(final long offset,
065 final boolean resolved) throws IOException;
066 }
067
068 enum StreamStatus {
069 NORMAL, SEEK, CLOSED
070 }
071 protected InputStream in;
072 protected URLOpener originalURL;
073 protected URLOpener resolvedURL;
074 protected long startPos = 0;
075 protected long currentPos = 0;
076 protected Long fileLength = null;
077
078 StreamStatus status = StreamStatus.SEEK;
079
080 /**
081 * Create with the specified URLOpeners. Original url is used to open the
082 * stream for the first time. Resolved url is used in subsequent requests.
083 * @param o Original url
084 * @param r Resolved url
085 */
086 public ByteRangeInputStream(URLOpener o, URLOpener r) {
087 this.originalURL = o;
088 this.resolvedURL = r;
089 }
090
091 protected abstract URL getResolvedUrl(final HttpURLConnection connection
092 ) throws IOException;
093
094 @VisibleForTesting
095 protected InputStream getInputStream() throws IOException {
096 switch (status) {
097 case NORMAL:
098 break;
099 case SEEK:
100 if (in != null) {
101 in.close();
102 }
103 in = openInputStream();
104 status = StreamStatus.NORMAL;
105 break;
106 case CLOSED:
107 throw new IOException("Stream closed");
108 }
109 return in;
110 }
111
112 @VisibleForTesting
113 protected InputStream openInputStream() throws IOException {
114 // Use the original url if no resolved url exists, eg. if
115 // it's the first time a request is made.
116 final boolean resolved = resolvedURL.getURL() != null;
117 final URLOpener opener = resolved? resolvedURL: originalURL;
118
119 final HttpURLConnection connection = opener.connect(startPos, resolved);
120 resolvedURL.setURL(getResolvedUrl(connection));
121
122 InputStream in = connection.getInputStream();
123 final Map<String, List<String>> headers = connection.getHeaderFields();
124 if (isChunkedTransferEncoding(headers)) {
125 // file length is not known
126 fileLength = null;
127 } else {
128 // for non-chunked transfer-encoding, get content-length
129 final String cl = connection.getHeaderField(HttpHeaders.CONTENT_LENGTH);
130 if (cl == null) {
131 throw new IOException(HttpHeaders.CONTENT_LENGTH + " is missing: "
132 + headers);
133 }
134 final long streamlength = Long.parseLong(cl);
135 fileLength = startPos + streamlength;
136
137 // Java has a bug with >2GB request streams. It won't bounds check
138 // the reads so the transfer blocks until the server times out
139 in = new BoundedInputStream(in, streamlength);
140 }
141
142 return in;
143 }
144
145 private static boolean isChunkedTransferEncoding(
146 final Map<String, List<String>> headers) {
147 return contains(headers, HttpHeaders.TRANSFER_ENCODING, "chunked")
148 || contains(headers, HttpHeaders.TE, "chunked");
149 }
150
151 /** Does the HTTP header map contain the given key, value pair? */
152 private static boolean contains(final Map<String, List<String>> headers,
153 final String key, final String value) {
154 final List<String> values = headers.get(key);
155 if (values != null) {
156 for(String v : values) {
157 for(final StringTokenizer t = new StringTokenizer(v, ",");
158 t.hasMoreTokens(); ) {
159 if (value.equalsIgnoreCase(t.nextToken())) {
160 return true;
161 }
162 }
163 }
164 }
165 return false;
166 }
167
168 private int update(final int n) throws IOException {
169 if (n != -1) {
170 currentPos += n;
171 } else if (fileLength != null && currentPos < fileLength) {
172 throw new IOException("Got EOF but currentPos = " + currentPos
173 + " < filelength = " + fileLength);
174 }
175 return n;
176 }
177
178 @Override
179 public int read() throws IOException {
180 final int b = getInputStream().read();
181 update((b == -1) ? -1 : 1);
182 return b;
183 }
184
185 @Override
186 public int read(byte b[], int off, int len) throws IOException {
187 return update(getInputStream().read(b, off, len));
188 }
189
190 /**
191 * Seek to the given offset from the start of the file.
192 * The next read() will be from that location. Can't
193 * seek past the end of the file.
194 */
195 @Override
196 public void seek(long pos) throws IOException {
197 if (pos != currentPos) {
198 startPos = pos;
199 currentPos = pos;
200 if (status != StreamStatus.CLOSED) {
201 status = StreamStatus.SEEK;
202 }
203 }
204 }
205
206 /**
207 * Return the current offset from the start of the file
208 */
209 @Override
210 public long getPos() throws IOException {
211 return currentPos;
212 }
213
214 /**
215 * Seeks a different copy of the data. Returns true if
216 * found a new source, false otherwise.
217 */
218 @Override
219 public boolean seekToNewSource(long targetPos) throws IOException {
220 return false;
221 }
222
223 @Override
224 public void close() throws IOException {
225 if (in != null) {
226 in.close();
227 in = null;
228 }
229 status = StreamStatus.CLOSED;
230 }
231 }