001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 package org.apache.hadoop.hdfs.web;
020
021 import java.io.IOException;
022 import java.io.InputStream;
023 import java.net.HttpURLConnection;
024 import java.net.URL;
025 import java.util.List;
026 import java.util.Map;
027 import java.util.StringTokenizer;
028
029 import org.apache.commons.io.input.BoundedInputStream;
030 import org.apache.hadoop.fs.FSInputStream;
031
032 import com.google.common.annotations.VisibleForTesting;
033 import com.google.common.net.HttpHeaders;
034
035 /**
036 * To support HTTP byte streams, a new connection to an HTTP server needs to be
037 * created each time. This class hides the complexity of those multiple
038 * connections from the client. Whenever seek() is called, a new connection
039 * is made on the successive read(). The normal input stream functions are
040 * connected to the currently active input stream.
041 */
042 public abstract class ByteRangeInputStream extends FSInputStream {
043
044 /**
045 * This class wraps a URL and provides method to open connection.
046 * It can be overridden to change how a connection is opened.
047 */
048 public static abstract class URLOpener {
049 protected URL url;
050
051 public URLOpener(URL u) {
052 url = u;
053 }
054
055 public void setURL(URL u) {
056 url = u;
057 }
058
059 public URL getURL() {
060 return url;
061 }
062
063 /** Connect to server with a data offset. */
064 protected abstract HttpURLConnection connect(final long offset,
065 final boolean resolved) throws IOException;
066 }
067
068 enum StreamStatus {
069 NORMAL, SEEK, CLOSED
070 }
071 protected InputStream in;
072 protected final URLOpener originalURL;
073 protected final URLOpener resolvedURL;
074 protected long startPos = 0;
075 protected long currentPos = 0;
076 protected Long fileLength = null;
077
078 StreamStatus status = StreamStatus.SEEK;
079
080 /**
081 * Create with the specified URLOpeners. Original url is used to open the
082 * stream for the first time. Resolved url is used in subsequent requests.
083 * @param o Original url
084 * @param r Resolved url
085 */
086 public ByteRangeInputStream(URLOpener o, URLOpener r) throws IOException {
087 this.originalURL = o;
088 this.resolvedURL = r;
089 getInputStream();
090 }
091
092 protected abstract URL getResolvedUrl(final HttpURLConnection connection
093 ) throws IOException;
094
095 @VisibleForTesting
096 protected InputStream getInputStream() throws IOException {
097 switch (status) {
098 case NORMAL:
099 break;
100 case SEEK:
101 if (in != null) {
102 in.close();
103 }
104 in = openInputStream();
105 status = StreamStatus.NORMAL;
106 break;
107 case CLOSED:
108 throw new IOException("Stream closed");
109 }
110 return in;
111 }
112
113 @VisibleForTesting
114 protected InputStream openInputStream() throws IOException {
115 // Use the original url if no resolved url exists, eg. if
116 // it's the first time a request is made.
117 final boolean resolved = resolvedURL.getURL() != null;
118 final URLOpener opener = resolved? resolvedURL: originalURL;
119
120 final HttpURLConnection connection = opener.connect(startPos, resolved);
121 resolvedURL.setURL(getResolvedUrl(connection));
122
123 InputStream in = connection.getInputStream();
124 final Map<String, List<String>> headers = connection.getHeaderFields();
125 if (isChunkedTransferEncoding(headers)) {
126 // file length is not known
127 fileLength = null;
128 } else {
129 // for non-chunked transfer-encoding, get content-length
130 final String cl = connection.getHeaderField(HttpHeaders.CONTENT_LENGTH);
131 if (cl == null) {
132 throw new IOException(HttpHeaders.CONTENT_LENGTH + " is missing: "
133 + headers);
134 }
135 final long streamlength = Long.parseLong(cl);
136 fileLength = startPos + streamlength;
137
138 // Java has a bug with >2GB request streams. It won't bounds check
139 // the reads so the transfer blocks until the server times out
140 in = new BoundedInputStream(in, streamlength);
141 }
142
143 return in;
144 }
145
146 private static boolean isChunkedTransferEncoding(
147 final Map<String, List<String>> headers) {
148 return contains(headers, HttpHeaders.TRANSFER_ENCODING, "chunked")
149 || contains(headers, HttpHeaders.TE, "chunked");
150 }
151
152 /** Does the HTTP header map contain the given key, value pair? */
153 private static boolean contains(final Map<String, List<String>> headers,
154 final String key, final String value) {
155 final List<String> values = headers.get(key);
156 if (values != null) {
157 for(String v : values) {
158 for(final StringTokenizer t = new StringTokenizer(v, ",");
159 t.hasMoreTokens(); ) {
160 if (value.equalsIgnoreCase(t.nextToken())) {
161 return true;
162 }
163 }
164 }
165 }
166 return false;
167 }
168
169 private int update(final int n) throws IOException {
170 if (n != -1) {
171 currentPos += n;
172 } else if (fileLength != null && currentPos < fileLength) {
173 throw new IOException("Got EOF but currentPos = " + currentPos
174 + " < filelength = " + fileLength);
175 }
176 return n;
177 }
178
179 @Override
180 public int read() throws IOException {
181 final int b = getInputStream().read();
182 update((b == -1) ? -1 : 1);
183 return b;
184 }
185
186 @Override
187 public int read(byte b[], int off, int len) throws IOException {
188 return update(getInputStream().read(b, off, len));
189 }
190
191 /**
192 * Seek to the given offset from the start of the file.
193 * The next read() will be from that location. Can't
194 * seek past the end of the file.
195 */
196 @Override
197 public void seek(long pos) throws IOException {
198 if (pos != currentPos) {
199 startPos = pos;
200 currentPos = pos;
201 if (status != StreamStatus.CLOSED) {
202 status = StreamStatus.SEEK;
203 }
204 }
205 }
206
207 /**
208 * Return the current offset from the start of the file
209 */
210 @Override
211 public long getPos() throws IOException {
212 return currentPos;
213 }
214
215 /**
216 * Seeks a different copy of the data. Returns true if
217 * found a new source, false otherwise.
218 */
219 @Override
220 public boolean seekToNewSource(long targetPos) throws IOException {
221 return false;
222 }
223
224 @Override
225 public void close() throws IOException {
226 if (in != null) {
227 in.close();
228 in = null;
229 }
230 status = StreamStatus.CLOSED;
231 }
232 }