001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.hdfs.server.namenode;
019
020 import java.io.File;
021 import java.io.FileInputStream;
022 import java.io.FileNotFoundException;
023 import java.io.FileOutputStream;
024 import java.io.IOException;
025 import java.io.InputStream;
026 import java.io.OutputStream;
027 import java.net.HttpURLConnection;
028 import java.net.URISyntaxException;
029 import java.net.URL;
030 import java.security.DigestInputStream;
031 import java.security.MessageDigest;
032 import java.util.ArrayList;
033 import java.util.List;
034 import java.util.Map;
035 import java.util.Map.Entry;
036
037 import javax.servlet.http.HttpServletRequest;
038 import javax.servlet.http.HttpServletResponse;
039
040 import org.apache.commons.logging.Log;
041 import org.apache.commons.logging.LogFactory;
042 import org.apache.hadoop.classification.InterfaceAudience;
043 import org.apache.hadoop.conf.Configuration;
044 import org.apache.hadoop.fs.FileUtil;
045 import org.apache.hadoop.hdfs.DFSConfigKeys;
046 import org.apache.hadoop.hdfs.HdfsConfiguration;
047 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
048 import org.apache.hadoop.hdfs.server.common.Storage;
049 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
050 import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
051 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
052 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
053 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
054 import org.apache.hadoop.hdfs.util.DataTransferThrottler;
055 import org.apache.hadoop.hdfs.web.URLConnectionFactory;
056 import org.apache.hadoop.io.IOUtils;
057 import org.apache.hadoop.io.MD5Hash;
058 import org.apache.hadoop.security.UserGroupInformation;
059 import org.apache.hadoop.security.authentication.client.AuthenticationException;
060 import org.apache.hadoop.util.Time;
061 import org.apache.http.client.utils.URIBuilder;
062
063 import com.google.common.annotations.VisibleForTesting;
064 import com.google.common.collect.Lists;
065
066
067 /**
068 * This class provides fetching a specified file from the NameNode.
069 */
070 @InterfaceAudience.Private
071 public class TransferFsImage {
072
073 public final static String CONTENT_LENGTH = "Content-Length";
074 public final static String FILE_LENGTH = "File-Length";
075 public final static String MD5_HEADER = "X-MD5-Digest";
076
077 private final static String CONTENT_TYPE = "Content-Type";
078 private final static String CONTENT_TRANSFER_ENCODING = "Content-Transfer-Encoding";
079
080 @VisibleForTesting
081 static int timeout = 0;
082 private static final URLConnectionFactory connectionFactory;
083 private static final boolean isSpnegoEnabled;
084
085 static {
086 Configuration conf = new Configuration();
087 connectionFactory = URLConnectionFactory
088 .newDefaultURLConnectionFactory(conf);
089 isSpnegoEnabled = UserGroupInformation.isSecurityEnabled();
090 }
091
092 private static final Log LOG = LogFactory.getLog(TransferFsImage.class);
093
094 public static void downloadMostRecentImageToDirectory(URL infoServer,
095 File dir) throws IOException {
096 String fileId = ImageServlet.getParamStringForMostRecentImage();
097 getFileClient(infoServer, fileId, Lists.newArrayList(dir),
098 null, false);
099 }
100
101 public static MD5Hash downloadImageToStorage(URL fsName, long imageTxId,
102 Storage dstStorage, boolean needDigest) throws IOException {
103 String fileid = ImageServlet.getParamStringForImage(null,
104 imageTxId, dstStorage);
105 String fileName = NNStorage.getCheckpointImageFileName(imageTxId);
106
107 List<File> dstFiles = dstStorage.getFiles(
108 NameNodeDirType.IMAGE, fileName);
109 if (dstFiles.isEmpty()) {
110 throw new IOException("No targets in destination storage!");
111 }
112
113 MD5Hash hash = getFileClient(fsName, fileid, dstFiles, dstStorage, needDigest);
114 LOG.info("Downloaded file " + dstFiles.get(0).getName() + " size " +
115 dstFiles.get(0).length() + " bytes.");
116 return hash;
117 }
118
119 static MD5Hash handleUploadImageRequest(HttpServletRequest request,
120 long imageTxId, Storage dstStorage, InputStream stream,
121 long advertisedSize, DataTransferThrottler throttler) throws IOException {
122
123 String fileName = NNStorage.getCheckpointImageFileName(imageTxId);
124
125 List<File> dstFiles = dstStorage.getFiles(NameNodeDirType.IMAGE, fileName);
126 if (dstFiles.isEmpty()) {
127 throw new IOException("No targets in destination storage!");
128 }
129
130 MD5Hash advertisedDigest = parseMD5Header(request);
131 MD5Hash hash = receiveFile(fileName, dstFiles, dstStorage, true,
132 advertisedSize, advertisedDigest, fileName, stream, throttler);
133 LOG.info("Downloaded file " + dstFiles.get(0).getName() + " size "
134 + dstFiles.get(0).length() + " bytes.");
135 return hash;
136 }
137
138 static void downloadEditsToStorage(URL fsName, RemoteEditLog log,
139 NNStorage dstStorage) throws IOException {
140 assert log.getStartTxId() > 0 && log.getEndTxId() > 0 :
141 "bad log: " + log;
142 String fileid = ImageServlet.getParamStringForLog(
143 log, dstStorage);
144 String finalFileName = NNStorage.getFinalizedEditsFileName(
145 log.getStartTxId(), log.getEndTxId());
146
147 List<File> finalFiles = dstStorage.getFiles(NameNodeDirType.EDITS,
148 finalFileName);
149 assert !finalFiles.isEmpty() : "No checkpoint targets.";
150
151 for (File f : finalFiles) {
152 if (f.exists() && FileUtil.canRead(f)) {
153 LOG.info("Skipping download of remote edit log " +
154 log + " since it already is stored locally at " + f);
155 return;
156 } else if (LOG.isDebugEnabled()) {
157 LOG.debug("Dest file: " + f);
158 }
159 }
160
161 final long milliTime = System.currentTimeMillis();
162 String tmpFileName = NNStorage.getTemporaryEditsFileName(
163 log.getStartTxId(), log.getEndTxId(), milliTime);
164 List<File> tmpFiles = dstStorage.getFiles(NameNodeDirType.EDITS,
165 tmpFileName);
166 getFileClient(fsName, fileid, tmpFiles, dstStorage, false);
167 LOG.info("Downloaded file " + tmpFiles.get(0).getName() + " size " +
168 finalFiles.get(0).length() + " bytes.");
169
170 CheckpointFaultInjector.getInstance().beforeEditsRename();
171
172 for (StorageDirectory sd : dstStorage.dirIterable(NameNodeDirType.EDITS)) {
173 File tmpFile = NNStorage.getTemporaryEditsFile(sd,
174 log.getStartTxId(), log.getEndTxId(), milliTime);
175 File finalizedFile = NNStorage.getFinalizedEditsFile(sd,
176 log.getStartTxId(), log.getEndTxId());
177 if (LOG.isDebugEnabled()) {
178 LOG.debug("Renaming " + tmpFile + " to " + finalizedFile);
179 }
180 boolean success = tmpFile.renameTo(finalizedFile);
181 if (!success) {
182 LOG.warn("Unable to rename edits file from " + tmpFile
183 + " to " + finalizedFile);
184 }
185 }
186 }
187
188 /**
189 * Requests that the NameNode download an image from this node.
190 *
191 * @param fsName the http address for the remote NN
192 * @param conf Configuration
193 * @param storage the storage directory to transfer the image from
194 * @param nnf the NameNodeFile type of the image
195 * @param txid the transaction ID of the image to be uploaded
196 */
197 public static void uploadImageFromStorage(URL fsName, Configuration conf,
198 NNStorage storage, NameNodeFile nnf, long txid) throws IOException {
199
200 URL url = new URL(fsName, ImageServlet.PATH_SPEC);
201 long startTime = Time.monotonicNow();
202 try {
203 uploadImage(url, conf, storage, nnf, txid);
204 } catch (HttpPutFailedException e) {
205 if (e.getResponseCode() == HttpServletResponse.SC_CONFLICT) {
206 // this is OK - this means that a previous attempt to upload
207 // this checkpoint succeeded even though we thought it failed.
208 LOG.info("Image upload with txid " + txid +
209 " conflicted with a previous image upload to the " +
210 "same NameNode. Continuing...", e);
211 return;
212 } else {
213 throw e;
214 }
215 }
216 double xferSec = Math.max(
217 ((float) (Time.monotonicNow() - startTime)) / 1000.0, 0.001);
218 LOG.info("Uploaded image with txid " + txid + " to namenode at " + fsName
219 + " in " + xferSec + " seconds");
220 }
221
222 /*
223 * Uploads the imagefile using HTTP PUT method
224 */
225 private static void uploadImage(URL url, Configuration conf,
226 NNStorage storage, NameNodeFile nnf, long txId) throws IOException {
227
228 File imageFile = storage.findImageFile(nnf, txId);
229 if (imageFile == null) {
230 throw new IOException("Could not find image with txid " + txId);
231 }
232
233 HttpURLConnection connection = null;
234 try {
235 URIBuilder uriBuilder = new URIBuilder(url.toURI());
236
237 // write all params for image upload request as query itself.
238 // Request body contains the image to be uploaded.
239 Map<String, String> params = ImageServlet.getParamsForPutImage(storage,
240 txId, imageFile.length(), nnf);
241 for (Entry<String, String> entry : params.entrySet()) {
242 uriBuilder.addParameter(entry.getKey(), entry.getValue());
243 }
244
245 URL urlWithParams = uriBuilder.build().toURL();
246 connection = (HttpURLConnection) connectionFactory.openConnection(
247 urlWithParams, UserGroupInformation.isSecurityEnabled());
248 // Set the request to PUT
249 connection.setRequestMethod("PUT");
250 connection.setDoOutput(true);
251
252
253 int chunkSize = conf.getInt(
254 DFSConfigKeys.DFS_IMAGE_TRANSFER_CHUNKSIZE_KEY,
255 DFSConfigKeys.DFS_IMAGE_TRANSFER_CHUNKSIZE_DEFAULT);
256 if (imageFile.length() > chunkSize) {
257 // using chunked streaming mode to support upload of 2GB+ files and to
258 // avoid internal buffering.
259 // this mode should be used only if more than chunkSize data is present
260 // to upload. otherwise upload may not happen sometimes.
261 connection.setChunkedStreamingMode(chunkSize);
262 }
263
264 setTimeout(connection);
265
266 // set headers for verification
267 ImageServlet.setVerificationHeadersForPut(connection, imageFile);
268
269 // Write the file to output stream.
270 writeFileToPutRequest(conf, connection, imageFile);
271
272 int responseCode = connection.getResponseCode();
273 if (responseCode != HttpURLConnection.HTTP_OK) {
274 throw new HttpPutFailedException(connection.getResponseMessage(),
275 responseCode);
276 }
277 } catch (AuthenticationException e) {
278 throw new IOException(e);
279 } catch (URISyntaxException e) {
280 throw new IOException(e);
281 } finally {
282 if (connection != null) {
283 connection.disconnect();
284 }
285 }
286 }
287
288 private static void writeFileToPutRequest(Configuration conf,
289 HttpURLConnection connection, File imageFile)
290 throws FileNotFoundException, IOException {
291 connection.setRequestProperty(CONTENT_TYPE, "application/octet-stream");
292 connection.setRequestProperty(CONTENT_TRANSFER_ENCODING, "binary");
293 OutputStream output = connection.getOutputStream();
294 FileInputStream input = new FileInputStream(imageFile);
295 try {
296 copyFileToStream(output, imageFile, input,
297 ImageServlet.getThrottler(conf));
298 } finally {
299 IOUtils.closeStream(input);
300 IOUtils.closeStream(output);
301 }
302 }
303
304 /**
305 * A server-side method to respond to a getfile http request
306 * Copies the contents of the local file into the output stream.
307 */
308 public static void copyFileToStream(OutputStream out, File localfile,
309 FileInputStream infile, DataTransferThrottler throttler)
310 throws IOException {
311 byte buf[] = new byte[HdfsConstants.IO_FILE_BUFFER_SIZE];
312 try {
313 CheckpointFaultInjector.getInstance()
314 .aboutToSendFile(localfile);
315
316 if (CheckpointFaultInjector.getInstance().
317 shouldSendShortFile(localfile)) {
318 // Test sending image shorter than localfile
319 long len = localfile.length();
320 buf = new byte[(int)Math.min(len/2, HdfsConstants.IO_FILE_BUFFER_SIZE)];
321 // This will read at most half of the image
322 // and the rest of the image will be sent over the wire
323 infile.read(buf);
324 }
325 int num = 1;
326 while (num > 0) {
327 num = infile.read(buf);
328 if (num <= 0) {
329 break;
330 }
331 if (CheckpointFaultInjector.getInstance()
332 .shouldCorruptAByte(localfile)) {
333 // Simulate a corrupted byte on the wire
334 LOG.warn("SIMULATING A CORRUPT BYTE IN IMAGE TRANSFER!");
335 buf[0]++;
336 }
337
338 out.write(buf, 0, num);
339 if (throttler != null) {
340 throttler.throttle(num);
341 }
342 }
343 } finally {
344 if (out != null) {
345 out.close();
346 }
347 }
348 }
349
350 /**
351 * Client-side Method to fetch file from a server
352 * Copies the response from the URL to a list of local files.
353 * @param dstStorage if an error occurs writing to one of the files,
354 * this storage object will be notified.
355 * @Return a digest of the received file if getChecksum is true
356 */
357 static MD5Hash getFileClient(URL infoServer,
358 String queryString, List<File> localPaths,
359 Storage dstStorage, boolean getChecksum) throws IOException {
360 URL url = new URL(infoServer, ImageServlet.PATH_SPEC + "?" + queryString);
361 LOG.info("Opening connection to " + url);
362 return doGetUrl(url, localPaths, dstStorage, getChecksum);
363 }
364
365 public static MD5Hash doGetUrl(URL url, List<File> localPaths,
366 Storage dstStorage, boolean getChecksum) throws IOException {
367 HttpURLConnection connection;
368 try {
369 connection = (HttpURLConnection)
370 connectionFactory.openConnection(url, isSpnegoEnabled);
371 } catch (AuthenticationException e) {
372 throw new IOException(e);
373 }
374
375 setTimeout(connection);
376
377 if (connection.getResponseCode() != HttpURLConnection.HTTP_OK) {
378 throw new HttpGetFailedException(
379 "Image transfer servlet at " + url +
380 " failed with status code " + connection.getResponseCode() +
381 "\nResponse message:\n" + connection.getResponseMessage(),
382 connection);
383 }
384
385 long advertisedSize;
386 String contentLength = connection.getHeaderField(CONTENT_LENGTH);
387 if (contentLength != null) {
388 advertisedSize = Long.parseLong(contentLength);
389 } else {
390 throw new IOException(CONTENT_LENGTH + " header is not provided " +
391 "by the namenode when trying to fetch " + url);
392 }
393 MD5Hash advertisedDigest = parseMD5Header(connection);
394 String fsImageName = connection
395 .getHeaderField(ImageServlet.HADOOP_IMAGE_EDITS_HEADER);
396 InputStream stream = connection.getInputStream();
397
398 return receiveFile(url.toExternalForm(), localPaths, dstStorage,
399 getChecksum, advertisedSize, advertisedDigest, fsImageName, stream,
400 null);
401 }
402
403 private static void setTimeout(HttpURLConnection connection) {
404 if (timeout <= 0) {
405 Configuration conf = new HdfsConfiguration();
406 timeout = conf.getInt(DFSConfigKeys.DFS_IMAGE_TRANSFER_TIMEOUT_KEY,
407 DFSConfigKeys.DFS_IMAGE_TRANSFER_TIMEOUT_DEFAULT);
408 LOG.info("Image Transfer timeout configured to " + timeout
409 + " milliseconds");
410 }
411
412 if (timeout > 0) {
413 connection.setConnectTimeout(timeout);
414 connection.setReadTimeout(timeout);
415 }
416 }
417
418 private static MD5Hash receiveFile(String url, List<File> localPaths,
419 Storage dstStorage, boolean getChecksum, long advertisedSize,
420 MD5Hash advertisedDigest, String fsImageName, InputStream stream,
421 DataTransferThrottler throttler) throws IOException {
422 long startTime = Time.monotonicNow();
423 if (localPaths != null) {
424 // If the local paths refer to directories, use the server-provided header
425 // as the filename within that directory
426 List<File> newLocalPaths = new ArrayList<File>();
427 for (File localPath : localPaths) {
428 if (localPath.isDirectory()) {
429 if (fsImageName == null) {
430 throw new IOException("No filename header provided by server");
431 }
432 newLocalPaths.add(new File(localPath, fsImageName));
433 } else {
434 newLocalPaths.add(localPath);
435 }
436 }
437 localPaths = newLocalPaths;
438 }
439
440
441 long received = 0;
442 MessageDigest digester = null;
443 if (getChecksum) {
444 digester = MD5Hash.getDigester();
445 stream = new DigestInputStream(stream, digester);
446 }
447 boolean finishedReceiving = false;
448
449 List<FileOutputStream> outputStreams = Lists.newArrayList();
450
451 try {
452 if (localPaths != null) {
453 for (File f : localPaths) {
454 try {
455 if (f.exists()) {
456 LOG.warn("Overwriting existing file " + f
457 + " with file downloaded from " + url);
458 }
459 outputStreams.add(new FileOutputStream(f));
460 } catch (IOException ioe) {
461 LOG.warn("Unable to download file " + f, ioe);
462 // This will be null if we're downloading the fsimage to a file
463 // outside of an NNStorage directory.
464 if (dstStorage != null &&
465 (dstStorage instanceof StorageErrorReporter)) {
466 ((StorageErrorReporter)dstStorage).reportErrorOnFile(f);
467 }
468 }
469 }
470
471 if (outputStreams.isEmpty()) {
472 throw new IOException(
473 "Unable to download to any storage directory");
474 }
475 }
476
477 int num = 1;
478 byte[] buf = new byte[HdfsConstants.IO_FILE_BUFFER_SIZE];
479 while (num > 0) {
480 num = stream.read(buf);
481 if (num > 0) {
482 received += num;
483 for (FileOutputStream fos : outputStreams) {
484 fos.write(buf, 0, num);
485 }
486 if (throttler != null) {
487 throttler.throttle(num);
488 }
489 }
490 }
491 finishedReceiving = true;
492 } finally {
493 stream.close();
494 for (FileOutputStream fos : outputStreams) {
495 fos.getChannel().force(true);
496 fos.close();
497 }
498 if (finishedReceiving && received != advertisedSize) {
499 // only throw this exception if we think we read all of it on our end
500 // -- otherwise a client-side IOException would be masked by this
501 // exception that makes it look like a server-side problem!
502 throw new IOException("File " + url + " received length " + received +
503 " is not of the advertised size " +
504 advertisedSize);
505 }
506 }
507 double xferSec = Math.max(
508 ((float)(Time.monotonicNow() - startTime)) / 1000.0, 0.001);
509 long xferKb = received / 1024;
510 LOG.info(String.format("Transfer took %.2fs at %.2f KB/s",
511 xferSec, xferKb / xferSec));
512
513 if (digester != null) {
514 MD5Hash computedDigest = new MD5Hash(digester.digest());
515
516 if (advertisedDigest != null &&
517 !computedDigest.equals(advertisedDigest)) {
518 throw new IOException("File " + url + " computed digest " +
519 computedDigest + " does not match advertised digest " +
520 advertisedDigest);
521 }
522 return computedDigest;
523 } else {
524 return null;
525 }
526 }
527
528 private static MD5Hash parseMD5Header(HttpURLConnection connection) {
529 String header = connection.getHeaderField(MD5_HEADER);
530 return (header != null) ? new MD5Hash(header) : null;
531 }
532
533 private static MD5Hash parseMD5Header(HttpServletRequest request) {
534 String header = request.getHeader(MD5_HEADER);
535 return (header != null) ? new MD5Hash(header) : null;
536 }
537
538 public static class HttpGetFailedException extends IOException {
539 private static final long serialVersionUID = 1L;
540 private final int responseCode;
541
542 HttpGetFailedException(String msg, HttpURLConnection connection) throws IOException {
543 super(msg);
544 this.responseCode = connection.getResponseCode();
545 }
546
547 public int getResponseCode() {
548 return responseCode;
549 }
550 }
551
552 public static class HttpPutFailedException extends IOException {
553 private static final long serialVersionUID = 1L;
554 private final int responseCode;
555
556 HttpPutFailedException(String msg, int responseCode) throws IOException {
557 super(msg);
558 this.responseCode = responseCode;
559 }
560
561 public int getResponseCode() {
562 return responseCode;
563 }
564 }
565
566 }