001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.hdfs.server.namenode;
019
020 import java.io.Closeable;
021 import java.io.File;
022 import java.io.IOException;
023 import java.io.RandomAccessFile;
024 import java.net.URI;
025 import java.net.UnknownHostException;
026 import java.util.ArrayList;
027 import java.util.Collection;
028 import java.util.EnumSet;
029 import java.util.HashMap;
030 import java.util.Iterator;
031 import java.util.List;
032 import java.util.Properties;
033 import java.util.UUID;
034 import java.util.concurrent.CopyOnWriteArrayList;
035
036 import org.apache.hadoop.classification.InterfaceAudience;
037 import org.apache.hadoop.conf.Configuration;
038 import org.apache.hadoop.fs.FileUtil;
039 import org.apache.hadoop.hdfs.DFSUtil;
040 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
041 import org.apache.hadoop.hdfs.protocol.LayoutVersion;
042 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType;
043 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
044 import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
045 import org.apache.hadoop.hdfs.server.common.Storage;
046 import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
047 import org.apache.hadoop.hdfs.server.common.Util;
048 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
049 import org.apache.hadoop.hdfs.util.PersistentLongFile;
050 import org.apache.hadoop.io.IOUtils;
051 import org.apache.hadoop.net.DNS;
052 import org.apache.hadoop.util.Time;
053
054 import com.google.common.annotations.VisibleForTesting;
055 import com.google.common.base.Preconditions;
056 import com.google.common.collect.Lists;
057
058 /**
059 * NNStorage is responsible for management of the StorageDirectories used by
060 * the NameNode.
061 */
062 @InterfaceAudience.Private
063 public class NNStorage extends Storage implements Closeable,
064 StorageErrorReporter {
065 static final String DEPRECATED_MESSAGE_DIGEST_PROPERTY = "imageMD5Digest";
066 static final String LOCAL_URI_SCHEME = "file";
067
068 //
069 // The filenames used for storing the images
070 //
071 public enum NameNodeFile {
072 IMAGE ("fsimage"),
073 TIME ("fstime"), // from "old" pre-HDFS-1073 format
074 SEEN_TXID ("seen_txid"),
075 EDITS ("edits"),
076 IMAGE_NEW ("fsimage.ckpt"),
077 IMAGE_ROLLBACK("fsimage_rollback"),
078 EDITS_NEW ("edits.new"), // from "old" pre-HDFS-1073 format
079 EDITS_INPROGRESS ("edits_inprogress"),
080 EDITS_TMP ("edits_tmp");
081
082 private String fileName = null;
083 private NameNodeFile(String name) { this.fileName = name; }
084 @VisibleForTesting
085 public String getName() { return fileName; }
086 }
087
088 /**
089 * Implementation of StorageDirType specific to namenode storage
090 * A Storage directory could be of type IMAGE which stores only fsimage,
091 * or of type EDITS which stores edits or of type IMAGE_AND_EDITS which
092 * stores both fsimage and edits.
093 */
094 @VisibleForTesting
095 public static enum NameNodeDirType implements StorageDirType {
096 UNDEFINED,
097 IMAGE,
098 EDITS,
099 IMAGE_AND_EDITS;
100
101 @Override
102 public StorageDirType getStorageDirType() {
103 return this;
104 }
105
106 @Override
107 public boolean isOfType(StorageDirType type) {
108 if ((this == IMAGE_AND_EDITS) && (type == IMAGE || type == EDITS))
109 return true;
110 return this == type;
111 }
112 }
113
114 protected String blockpoolID = ""; // id of the block pool
115
116 /**
117 * flag that controls if we try to restore failed storages
118 */
119 private boolean restoreFailedStorage = false;
120 private final Object restorationLock = new Object();
121 private boolean disablePreUpgradableLayoutCheck = false;
122
123
124 /**
125 * TxId of the last transaction that was included in the most
126 * recent fsimage file. This does not include any transactions
127 * that have since been written to the edit log.
128 */
129 protected volatile long mostRecentCheckpointTxId = HdfsConstants.INVALID_TXID;
130
131 /**
132 * Time of the last checkpoint, in milliseconds since the epoch.
133 */
134 private long mostRecentCheckpointTime = 0;
135
136 /**
137 * list of failed (and thus removed) storages
138 */
139 final protected List<StorageDirectory> removedStorageDirs
140 = new CopyOnWriteArrayList<StorageDirectory>();
141
142 /**
143 * Properties from old layout versions that may be needed
144 * during upgrade only.
145 */
146 private HashMap<String, String> deprecatedProperties;
147
148 /**
149 * Construct the NNStorage.
150 * @param conf Namenode configuration.
151 * @param imageDirs Directories the image can be stored in.
152 * @param editsDirs Directories the editlog can be stored in.
153 * @throws IOException if any directories are inaccessible.
154 */
155 public NNStorage(Configuration conf,
156 Collection<URI> imageDirs, Collection<URI> editsDirs)
157 throws IOException {
158 super(NodeType.NAME_NODE);
159
160 storageDirs = new CopyOnWriteArrayList<StorageDirectory>();
161
162 // this may modify the editsDirs, so copy before passing in
163 setStorageDirectories(imageDirs,
164 Lists.newArrayList(editsDirs),
165 FSNamesystem.getSharedEditsDirs(conf));
166 }
167
168 @Override // Storage
169 public boolean isPreUpgradableLayout(StorageDirectory sd) throws IOException {
170 if (disablePreUpgradableLayoutCheck) {
171 return false;
172 }
173
174 File oldImageDir = new File(sd.getRoot(), "image");
175 if (!oldImageDir.exists()) {
176 return false;
177 }
178 // check the layout version inside the image file
179 File oldF = new File(oldImageDir, "fsimage");
180 RandomAccessFile oldFile = new RandomAccessFile(oldF, "rws");
181 try {
182 oldFile.seek(0);
183 int oldVersion = oldFile.readInt();
184 oldFile.close();
185 oldFile = null;
186 if (oldVersion < LAST_PRE_UPGRADE_LAYOUT_VERSION)
187 return false;
188 } finally {
189 IOUtils.cleanup(LOG, oldFile);
190 }
191 return true;
192 }
193
194 @Override // Closeable
195 public void close() throws IOException {
196 unlockAll();
197 storageDirs.clear();
198 }
199
200 /**
201 * Set flag whether an attempt should be made to restore failed storage
202 * directories at the next available oppurtuinity.
203 *
204 * @param val Whether restoration attempt should be made.
205 */
206 void setRestoreFailedStorage(boolean val) {
207 LOG.warn("set restore failed storage to " + val);
208 restoreFailedStorage=val;
209 }
210
211 /**
212 * @return Whether failed storage directories are to be restored.
213 */
214 boolean getRestoreFailedStorage() {
215 return restoreFailedStorage;
216 }
217
218 /**
219 * See if any of removed storages is "writable" again, and can be returned
220 * into service.
221 */
222 void attemptRestoreRemovedStorage() {
223 // if directory is "alive" - copy the images there...
224 if(!restoreFailedStorage || removedStorageDirs.size() == 0)
225 return; //nothing to restore
226
227 /* We don't want more than one thread trying to restore at a time */
228 synchronized (this.restorationLock) {
229 LOG.info("NNStorage.attemptRestoreRemovedStorage: check removed(failed) "+
230 "storarge. removedStorages size = " + removedStorageDirs.size());
231 for(Iterator<StorageDirectory> it
232 = this.removedStorageDirs.iterator(); it.hasNext();) {
233 StorageDirectory sd = it.next();
234 File root = sd.getRoot();
235 LOG.info("currently disabled dir " + root.getAbsolutePath() +
236 "; type="+sd.getStorageDirType()
237 + ";canwrite="+FileUtil.canWrite(root));
238 if(root.exists() && FileUtil.canWrite(root)) {
239 LOG.info("restoring dir " + sd.getRoot().getAbsolutePath());
240 this.addStorageDir(sd); // restore
241 this.removedStorageDirs.remove(sd);
242 }
243 }
244 }
245 }
246
247 /**
248 * @return A list of storage directories which are in the errored state.
249 */
250 List<StorageDirectory> getRemovedStorageDirs() {
251 return this.removedStorageDirs;
252 }
253
254 /**
255 * See {@link NNStorage#setStorageDirectories(Collection, Collection, Collection)}
256 */
257 @VisibleForTesting
258 synchronized void setStorageDirectories(Collection<URI> fsNameDirs,
259 Collection<URI> fsEditsDirs)
260 throws IOException {
261 setStorageDirectories(fsNameDirs, fsEditsDirs, new ArrayList<URI>());
262 }
263
264 /**
265 * Set the storage directories which will be used. This should only ever be
266 * called from inside NNStorage. However, it needs to remain package private
267 * for testing, as StorageDirectories need to be reinitialised after using
268 * Mockito.spy() on this class, as Mockito doesn't work well with inner
269 * classes, such as StorageDirectory in this case.
270 *
271 * Synchronized due to initialization of storageDirs and removedStorageDirs.
272 *
273 * @param fsNameDirs Locations to store images.
274 * @param fsEditsDirs Locations to store edit logs.
275 * @throws IOException
276 */
277 @VisibleForTesting
278 synchronized void setStorageDirectories(Collection<URI> fsNameDirs,
279 Collection<URI> fsEditsDirs,
280 Collection<URI> sharedEditsDirs)
281 throws IOException {
282 this.storageDirs.clear();
283 this.removedStorageDirs.clear();
284
285 // Add all name dirs with appropriate NameNodeDirType
286 for (URI dirName : fsNameDirs) {
287 checkSchemeConsistency(dirName);
288 boolean isAlsoEdits = false;
289 for (URI editsDirName : fsEditsDirs) {
290 if (editsDirName.compareTo(dirName) == 0) {
291 isAlsoEdits = true;
292 fsEditsDirs.remove(editsDirName);
293 break;
294 }
295 }
296 NameNodeDirType dirType = (isAlsoEdits) ?
297 NameNodeDirType.IMAGE_AND_EDITS :
298 NameNodeDirType.IMAGE;
299 // Add to the list of storage directories, only if the
300 // URI is of type file://
301 if(dirName.getScheme().compareTo("file") == 0) {
302 this.addStorageDir(new StorageDirectory(new File(dirName.getPath()),
303 dirType,
304 sharedEditsDirs.contains(dirName))); // Don't lock the dir if it's shared.
305 }
306 }
307
308 // Add edits dirs if they are different from name dirs
309 for (URI dirName : fsEditsDirs) {
310 checkSchemeConsistency(dirName);
311 // Add to the list of storage directories, only if the
312 // URI is of type file://
313 if(dirName.getScheme().compareTo("file") == 0)
314 this.addStorageDir(new StorageDirectory(new File(dirName.getPath()),
315 NameNodeDirType.EDITS, sharedEditsDirs.contains(dirName)));
316 }
317 }
318
319 /**
320 * Return the storage directory corresponding to the passed URI
321 * @param uri URI of a storage directory
322 * @return The matching storage directory or null if none found
323 */
324 StorageDirectory getStorageDirectory(URI uri) {
325 try {
326 uri = Util.fileAsURI(new File(uri));
327 Iterator<StorageDirectory> it = dirIterator();
328 for (; it.hasNext(); ) {
329 StorageDirectory sd = it.next();
330 if (Util.fileAsURI(sd.getRoot()).equals(uri)) {
331 return sd;
332 }
333 }
334 } catch (IOException ioe) {
335 LOG.warn("Error converting file to URI", ioe);
336 }
337 return null;
338 }
339
340 /**
341 * Checks the consistency of a URI, in particular if the scheme
342 * is specified
343 * @param u URI whose consistency is being checked.
344 */
345 private static void checkSchemeConsistency(URI u) throws IOException {
346 String scheme = u.getScheme();
347 // the URI should have a proper scheme
348 if(scheme == null) {
349 throw new IOException("Undefined scheme for " + u);
350 }
351 }
352
353 /**
354 * Retrieve current directories of type IMAGE
355 * @return Collection of URI representing image directories
356 * @throws IOException in case of URI processing error
357 */
358 Collection<URI> getImageDirectories() throws IOException {
359 return getDirectories(NameNodeDirType.IMAGE);
360 }
361
362 /**
363 * Retrieve current directories of type EDITS
364 * @return Collection of URI representing edits directories
365 * @throws IOException in case of URI processing error
366 */
367 Collection<URI> getEditsDirectories() throws IOException {
368 return getDirectories(NameNodeDirType.EDITS);
369 }
370
371 /**
372 * Return number of storage directories of the given type.
373 * @param dirType directory type
374 * @return number of storage directories of type dirType
375 */
376 int getNumStorageDirs(NameNodeDirType dirType) {
377 if(dirType == null)
378 return getNumStorageDirs();
379 Iterator<StorageDirectory> it = dirIterator(dirType);
380 int numDirs = 0;
381 for(; it.hasNext(); it.next())
382 numDirs++;
383 return numDirs;
384 }
385
386 /**
387 * Return the list of locations being used for a specific purpose.
388 * i.e. Image or edit log storage.
389 *
390 * @param dirType Purpose of locations requested.
391 * @throws IOException
392 */
393 Collection<URI> getDirectories(NameNodeDirType dirType)
394 throws IOException {
395 ArrayList<URI> list = new ArrayList<URI>();
396 Iterator<StorageDirectory> it = (dirType == null) ? dirIterator() :
397 dirIterator(dirType);
398 for ( ;it.hasNext(); ) {
399 StorageDirectory sd = it.next();
400 try {
401 list.add(Util.fileAsURI(sd.getRoot()));
402 } catch (IOException e) {
403 throw new IOException("Exception while processing " +
404 "StorageDirectory " + sd.getRoot(), e);
405 }
406 }
407 return list;
408 }
409
410 /**
411 * Determine the last transaction ID noted in this storage directory.
412 * This txid is stored in a special seen_txid file since it might not
413 * correspond to the latest image or edit log. For example, an image-only
414 * directory will have this txid incremented when edits logs roll, even
415 * though the edits logs are in a different directory.
416 *
417 * @param sd StorageDirectory to check
418 * @return If file exists and can be read, last recorded txid. If not, 0L.
419 * @throws IOException On errors processing file pointed to by sd
420 */
421 static long readTransactionIdFile(StorageDirectory sd) throws IOException {
422 File txidFile = getStorageFile(sd, NameNodeFile.SEEN_TXID);
423 return PersistentLongFile.readFile(txidFile, 0);
424 }
425
426 /**
427 * Write last checkpoint time into a separate file.
428 *
429 * @param sd
430 * @throws IOException
431 */
432 void writeTransactionIdFile(StorageDirectory sd, long txid) throws IOException {
433 Preconditions.checkArgument(txid >= 0, "bad txid: " + txid);
434
435 File txIdFile = getStorageFile(sd, NameNodeFile.SEEN_TXID);
436 PersistentLongFile.writeFile(txIdFile, txid);
437 }
438
439 /**
440 * Set the transaction ID and time of the last checkpoint
441 *
442 * @param txid transaction id of the last checkpoint
443 * @param time time of the last checkpoint, in millis since the epoch
444 */
445 void setMostRecentCheckpointInfo(long txid, long time) {
446 this.mostRecentCheckpointTxId = txid;
447 this.mostRecentCheckpointTime = time;
448 }
449
450 /**
451 * @return the transaction ID of the last checkpoint.
452 */
453 public long getMostRecentCheckpointTxId() {
454 return mostRecentCheckpointTxId;
455 }
456
457 /**
458 * @return the time of the most recent checkpoint in millis since the epoch.
459 */
460 long getMostRecentCheckpointTime() {
461 return mostRecentCheckpointTime;
462 }
463
464 /**
465 * Write a small file in all available storage directories that
466 * indicates that the namespace has reached some given transaction ID.
467 *
468 * This is used when the image is loaded to avoid accidental rollbacks
469 * in the case where an edit log is fully deleted but there is no
470 * checkpoint. See TestNameEditsConfigs.testNameEditsConfigsFailure()
471 * @param txid the txid that has been reached
472 */
473 public void writeTransactionIdFileToStorage(long txid) {
474 // Write txid marker in all storage directories
475 for (StorageDirectory sd : storageDirs) {
476 try {
477 writeTransactionIdFile(sd, txid);
478 } catch(IOException e) {
479 // Close any edits stream associated with this dir and remove directory
480 LOG.warn("writeTransactionIdToStorage failed on " + sd,
481 e);
482 reportErrorsOnDirectory(sd);
483 }
484 }
485 }
486
487 /**
488 * Return the name of the image file that is uploaded by periodic
489 * checkpointing
490 *
491 * @return List of filenames to save checkpoints to.
492 */
493 public File[] getFsImageNameCheckpoint(long txid) {
494 ArrayList<File> list = new ArrayList<File>();
495 for (Iterator<StorageDirectory> it =
496 dirIterator(NameNodeDirType.IMAGE); it.hasNext();) {
497 list.add(getStorageFile(it.next(), NameNodeFile.IMAGE_NEW, txid));
498 }
499 return list.toArray(new File[list.size()]);
500 }
501
502 /**
503 * @return The first image file with the given txid and image type.
504 */
505 public File getFsImageName(long txid, NameNodeFile nnf) {
506 for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE);
507 it.hasNext();) {
508 StorageDirectory sd = it.next();
509 File fsImage = getStorageFile(sd, nnf, txid);
510 if (FileUtil.canRead(sd.getRoot()) && fsImage.exists()) {
511 return fsImage;
512 }
513 }
514 return null;
515 }
516
517 /**
518 * @return The first image file whose txid is the same with the given txid and
519 * image type is one of the given types.
520 */
521 public File getFsImage(long txid, EnumSet<NameNodeFile> nnfs) {
522 for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE);
523 it.hasNext();) {
524 StorageDirectory sd = it.next();
525 for (NameNodeFile nnf : nnfs) {
526 File fsImage = getStorageFile(sd, nnf, txid);
527 if (FileUtil.canRead(sd.getRoot()) && fsImage.exists()) {
528 return fsImage;
529 }
530 }
531 }
532 return null;
533 }
534
535 public File getFsImageName(long txid) {
536 return getFsImageName(txid, NameNodeFile.IMAGE);
537 }
538
539 public File getHighestFsImageName() {
540 return getFsImageName(getMostRecentCheckpointTxId());
541 }
542
543 /** Create new dfs name directory. Caution: this destroys all files
544 * in this filesystem. */
545 private void format(StorageDirectory sd) throws IOException {
546 sd.clearDirectory(); // create currrent dir
547 writeProperties(sd);
548 writeTransactionIdFile(sd, 0);
549
550 LOG.info("Storage directory " + sd.getRoot()
551 + " has been successfully formatted.");
552 }
553
554 /**
555 * Format all available storage directories.
556 */
557 public void format(NamespaceInfo nsInfo) throws IOException {
558 Preconditions.checkArgument(nsInfo.getLayoutVersion() == 0 ||
559 nsInfo.getLayoutVersion() == HdfsConstants.NAMENODE_LAYOUT_VERSION,
560 "Bad layout version: %s", nsInfo.getLayoutVersion());
561
562 this.setStorageInfo(nsInfo);
563 this.blockpoolID = nsInfo.getBlockPoolID();
564 for (Iterator<StorageDirectory> it =
565 dirIterator(); it.hasNext();) {
566 StorageDirectory sd = it.next();
567 format(sd);
568 }
569 }
570
571 public static NamespaceInfo newNamespaceInfo()
572 throws UnknownHostException {
573 return new NamespaceInfo(newNamespaceID(), newClusterID(),
574 newBlockPoolID(), 0L);
575 }
576
577 public void format() throws IOException {
578 this.layoutVersion = HdfsConstants.NAMENODE_LAYOUT_VERSION;
579 for (Iterator<StorageDirectory> it =
580 dirIterator(); it.hasNext();) {
581 StorageDirectory sd = it.next();
582 format(sd);
583 }
584 }
585
586 /**
587 * Generate new namespaceID.
588 *
589 * namespaceID is a persistent attribute of the namespace.
590 * It is generated when the namenode is formatted and remains the same
591 * during the life cycle of the namenode.
592 * When a datanodes register they receive it as the registrationID,
593 * which is checked every time the datanode is communicating with the
594 * namenode. Datanodes that do not 'know' the namespaceID are rejected.
595 *
596 * @return new namespaceID
597 */
598 private static int newNamespaceID() {
599 int newID = 0;
600 while(newID == 0)
601 newID = DFSUtil.getRandom().nextInt(0x7FFFFFFF); // use 31 bits only
602 return newID;
603 }
604
605 @Override // Storage
606 protected void setFieldsFromProperties(
607 Properties props, StorageDirectory sd) throws IOException {
608 super.setFieldsFromProperties(props, sd);
609 if (layoutVersion == 0) {
610 throw new IOException("NameNode directory "
611 + sd.getRoot() + " is not formatted.");
612 }
613
614 // Set Block pool ID in version with federation support
615 if (NameNodeLayoutVersion.supports(
616 LayoutVersion.Feature.FEDERATION, getLayoutVersion())) {
617 String sbpid = props.getProperty("blockpoolID");
618 setBlockPoolID(sd.getRoot(), sbpid);
619 }
620 setDeprecatedPropertiesForUpgrade(props);
621 }
622
623 /**
624 * Pull any properties out of the VERSION file that are from older
625 * versions of HDFS and only necessary during upgrade.
626 */
627 private void setDeprecatedPropertiesForUpgrade(Properties props) {
628 deprecatedProperties = new HashMap<String, String>();
629 String md5 = props.getProperty(DEPRECATED_MESSAGE_DIGEST_PROPERTY);
630 if (md5 != null) {
631 deprecatedProperties.put(DEPRECATED_MESSAGE_DIGEST_PROPERTY, md5);
632 }
633 }
634
635 /**
636 * Return a property that was stored in an earlier version of HDFS.
637 *
638 * This should only be used during upgrades.
639 */
640 String getDeprecatedProperty(String prop) {
641 assert getLayoutVersion() > HdfsConstants.NAMENODE_LAYOUT_VERSION :
642 "getDeprecatedProperty should only be done when loading " +
643 "storage from past versions during upgrade.";
644 return deprecatedProperties.get(prop);
645 }
646
647 /**
648 * Write version file into the storage directory.
649 *
650 * The version file should always be written last.
651 * Missing or corrupted version file indicates that
652 * the checkpoint is not valid.
653 *
654 * @param sd storage directory
655 * @throws IOException
656 */
657 @Override // Storage
658 protected void setPropertiesFromFields(Properties props,
659 StorageDirectory sd
660 ) throws IOException {
661 super.setPropertiesFromFields(props, sd);
662 // Set blockpoolID in version with federation support
663 if (NameNodeLayoutVersion.supports(
664 LayoutVersion.Feature.FEDERATION, getLayoutVersion())) {
665 props.setProperty("blockpoolID", blockpoolID);
666 }
667 }
668
669 static File getStorageFile(StorageDirectory sd, NameNodeFile type, long imageTxId) {
670 return new File(sd.getCurrentDir(),
671 String.format("%s_%019d", type.getName(), imageTxId));
672 }
673
674 /**
675 * Get a storage file for one of the files that doesn't need a txid associated
676 * (e.g version, seen_txid)
677 */
678 static File getStorageFile(StorageDirectory sd, NameNodeFile type) {
679 return new File(sd.getCurrentDir(), type.getName());
680 }
681
682 @VisibleForTesting
683 public static String getCheckpointImageFileName(long txid) {
684 return getNameNodeFileName(NameNodeFile.IMAGE_NEW, txid);
685 }
686
687 @VisibleForTesting
688 public static String getImageFileName(long txid) {
689 return getNameNodeFileName(NameNodeFile.IMAGE, txid);
690 }
691
692 @VisibleForTesting
693 public static String getRollbackImageFileName(long txid) {
694 return getNameNodeFileName(NameNodeFile.IMAGE_ROLLBACK, txid);
695 }
696
697 private static String getNameNodeFileName(NameNodeFile nnf, long txid) {
698 return String.format("%s_%019d", nnf.getName(), txid);
699 }
700
701 @VisibleForTesting
702 public static String getInProgressEditsFileName(long startTxId) {
703 return getNameNodeFileName(NameNodeFile.EDITS_INPROGRESS, startTxId);
704 }
705
706 static File getInProgressEditsFile(StorageDirectory sd, long startTxId) {
707 return new File(sd.getCurrentDir(), getInProgressEditsFileName(startTxId));
708 }
709
710 static File getFinalizedEditsFile(StorageDirectory sd,
711 long startTxId, long endTxId) {
712 return new File(sd.getCurrentDir(),
713 getFinalizedEditsFileName(startTxId, endTxId));
714 }
715
716 static File getTemporaryEditsFile(StorageDirectory sd,
717 long startTxId, long endTxId, long timestamp) {
718 return new File(sd.getCurrentDir(),
719 getTemporaryEditsFileName(startTxId, endTxId, timestamp));
720 }
721
722 static File getImageFile(StorageDirectory sd, NameNodeFile nnf, long txid) {
723 return new File(sd.getCurrentDir(), getNameNodeFileName(nnf, txid));
724 }
725
726 @VisibleForTesting
727 public static String getFinalizedEditsFileName(long startTxId, long endTxId) {
728 return String.format("%s_%019d-%019d", NameNodeFile.EDITS.getName(),
729 startTxId, endTxId);
730 }
731
732 public static String getTemporaryEditsFileName(long startTxId, long endTxId,
733 long timestamp) {
734 return String.format("%s_%019d-%019d_%019d", NameNodeFile.EDITS_TMP.getName(),
735 startTxId, endTxId, timestamp);
736 }
737
738 /**
739 * Return the first readable finalized edits file for the given txid.
740 */
741 File findFinalizedEditsFile(long startTxId, long endTxId)
742 throws IOException {
743 File ret = findFile(NameNodeDirType.EDITS,
744 getFinalizedEditsFileName(startTxId, endTxId));
745 if (ret == null) {
746 throw new IOException(
747 "No edits file for txid " + startTxId + "-" + endTxId + " exists!");
748 }
749 return ret;
750 }
751
752 /**
753 * Return the first readable image file for the given txid and image type, or
754 * null if no such image can be found
755 */
756 File findImageFile(NameNodeFile nnf, long txid) {
757 return findFile(NameNodeDirType.IMAGE,
758 getNameNodeFileName(nnf, txid));
759 }
760
761 /**
762 * Return the first readable storage file of the given name
763 * across any of the 'current' directories in SDs of the
764 * given type, or null if no such file exists.
765 */
766 private File findFile(NameNodeDirType dirType, String name) {
767 for (StorageDirectory sd : dirIterable(dirType)) {
768 File candidate = new File(sd.getCurrentDir(), name);
769 if (FileUtil.canRead(sd.getCurrentDir()) &&
770 candidate.exists()) {
771 return candidate;
772 }
773 }
774 return null;
775 }
776
777 /**
778 * Disable the check for pre-upgradable layouts. Needed for BackupImage.
779 * @param val Whether to disable the preupgradeable layout check.
780 */
781 void setDisablePreUpgradableLayoutCheck(boolean val) {
782 disablePreUpgradableLayoutCheck = val;
783 }
784
785 /**
786 * Marks a list of directories as having experienced an error.
787 *
788 * @param sds A list of storage directories to mark as errored.
789 * @throws IOException
790 */
791 void reportErrorsOnDirectories(List<StorageDirectory> sds) {
792 for (StorageDirectory sd : sds) {
793 reportErrorsOnDirectory(sd);
794 }
795 }
796
797 /**
798 * Reports that a directory has experienced an error.
799 * Notifies listeners that the directory is no longer
800 * available.
801 *
802 * @param sd A storage directory to mark as errored.
803 * @throws IOException
804 */
805 private void reportErrorsOnDirectory(StorageDirectory sd) {
806 LOG.error("Error reported on storage directory " + sd);
807
808 String lsd = listStorageDirectories();
809 LOG.debug("current list of storage dirs:" + lsd);
810
811 LOG.warn("About to remove corresponding storage: "
812 + sd.getRoot().getAbsolutePath());
813 try {
814 sd.unlock();
815 } catch (Exception e) {
816 LOG.warn("Unable to unlock bad storage directory: "
817 + sd.getRoot().getPath(), e);
818 }
819
820 if (this.storageDirs.remove(sd)) {
821 this.removedStorageDirs.add(sd);
822 }
823
824 lsd = listStorageDirectories();
825 LOG.debug("at the end current list of storage dirs:" + lsd);
826 }
827
828 /**
829 * Processes the startup options for the clusterid and blockpoolid
830 * for the upgrade.
831 * @param startOpt Startup options
832 * @param layoutVersion Layout version for the upgrade
833 * @throws IOException
834 */
835 void processStartupOptionsForUpgrade(StartupOption startOpt, int layoutVersion)
836 throws IOException {
837 if (startOpt == StartupOption.UPGRADE) {
838 // If upgrade from a release that does not support federation,
839 // if clusterId is provided in the startupOptions use it.
840 // Else generate a new cluster ID
841 if (!NameNodeLayoutVersion.supports(
842 LayoutVersion.Feature.FEDERATION, layoutVersion)) {
843 if (startOpt.getClusterId() == null) {
844 startOpt.setClusterId(newClusterID());
845 }
846 setClusterID(startOpt.getClusterId());
847 setBlockPoolID(newBlockPoolID());
848 } else {
849 // Upgrade from one version of federation to another supported
850 // version of federation doesn't require clusterID.
851 // Warn the user if the current clusterid didn't match with the input
852 // clusterid.
853 if (startOpt.getClusterId() != null
854 && !startOpt.getClusterId().equals(getClusterID())) {
855 LOG.warn("Clusterid mismatch - current clusterid: " + getClusterID()
856 + ", Ignoring given clusterid: " + startOpt.getClusterId());
857 }
858 }
859 LOG.info("Using clusterid: " + getClusterID());
860 }
861 }
862
863 /**
864 * Report that an IOE has occurred on some file which may
865 * or may not be within one of the NN image storage directories.
866 */
867 @Override
868 public void reportErrorOnFile(File f) {
869 // We use getAbsolutePath here instead of getCanonicalPath since we know
870 // that there is some IO problem on that drive.
871 // getCanonicalPath may need to call stat() or readlink() and it's likely
872 // those calls would fail due to the same underlying IO problem.
873 String absPath = f.getAbsolutePath();
874 for (StorageDirectory sd : storageDirs) {
875 String dirPath = sd.getRoot().getAbsolutePath();
876 if (!dirPath.endsWith(File.separator)) {
877 dirPath += File.separator;
878 }
879 if (absPath.startsWith(dirPath)) {
880 reportErrorsOnDirectory(sd);
881 return;
882 }
883 }
884
885 }
886
887 /**
888 * Generate new clusterID.
889 *
890 * clusterID is a persistent attribute of the cluster.
891 * It is generated when the cluster is created and remains the same
892 * during the life cycle of the cluster. When a new name node is formated, if
893 * this is a new cluster, a new clusterID is geneated and stored. Subsequent
894 * name node must be given the same ClusterID during its format to be in the
895 * same cluster.
896 * When a datanode register it receive the clusterID and stick with it.
897 * If at any point, name node or data node tries to join another cluster, it
898 * will be rejected.
899 *
900 * @return new clusterID
901 */
902 public static String newClusterID() {
903 return "CID-" + UUID.randomUUID().toString();
904 }
905
906 void setClusterID(String cid) {
907 clusterID = cid;
908 }
909
910 /**
911 * try to find current cluster id in the VERSION files
912 * returns first cluster id found in any VERSION file
913 * null in case none found
914 * @return clusterId or null in case no cluster id found
915 */
916 public String determineClusterId() {
917 String cid = null;
918 Iterator<StorageDirectory> sdit = dirIterator(NameNodeDirType.IMAGE);
919 while(sdit.hasNext()) {
920 StorageDirectory sd = sdit.next();
921 try {
922 Properties props = readPropertiesFile(sd.getVersionFile());
923 cid = props.getProperty("clusterID");
924 LOG.info("current cluster id for sd="+sd.getCurrentDir() +
925 ";lv=" + layoutVersion + ";cid=" + cid);
926
927 if(cid != null && !cid.equals(""))
928 return cid;
929 } catch (Exception e) {
930 LOG.warn("this sd not available: " + e.getLocalizedMessage());
931 } //ignore
932 }
933 LOG.warn("couldn't find any VERSION file containing valid ClusterId");
934 return null;
935 }
936
937 /**
938 * Generate new blockpoolID.
939 *
940 * @return new blockpoolID
941 */
942 static String newBlockPoolID() throws UnknownHostException{
943 String ip = "unknownIP";
944 try {
945 ip = DNS.getDefaultIP("default");
946 } catch (UnknownHostException e) {
947 LOG.warn("Could not find ip address of \"default\" inteface.");
948 throw e;
949 }
950
951 int rand = DFSUtil.getSecureRandom().nextInt(Integer.MAX_VALUE);
952 String bpid = "BP-" + rand + "-"+ ip + "-" + Time.now();
953 return bpid;
954 }
955
956 /** Validate and set block pool ID */
957 void setBlockPoolID(String bpid) {
958 blockpoolID = bpid;
959 }
960
961 /** Validate and set block pool ID */
962 private void setBlockPoolID(File storage, String bpid)
963 throws InconsistentFSStateException {
964 if (bpid == null || bpid.equals("")) {
965 throw new InconsistentFSStateException(storage, "file "
966 + Storage.STORAGE_FILE_VERSION + " has no block pool Id.");
967 }
968
969 if (!blockpoolID.equals("") && !blockpoolID.equals(bpid)) {
970 throw new InconsistentFSStateException(storage,
971 "Unexepcted blockpoolID " + bpid + " . Expected " + blockpoolID);
972 }
973 setBlockPoolID(bpid);
974 }
975
976 public String getBlockPoolID() {
977 return blockpoolID;
978 }
979
980 /**
981 * Iterate over all current storage directories, inspecting them
982 * with the given inspector.
983 */
984 void inspectStorageDirs(FSImageStorageInspector inspector)
985 throws IOException {
986
987 // Process each of the storage directories to find the pair of
988 // newest image file and edit file
989 for (Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) {
990 StorageDirectory sd = it.next();
991 inspector.inspectDirectory(sd);
992 }
993 }
994
995 /**
996 * Iterate over all of the storage dirs, reading their contents to determine
997 * their layout versions. Returns an FSImageStorageInspector which has
998 * inspected each directory.
999 *
1000 * <b>Note:</b> this can mutate the storage info fields (ctime, version, etc).
1001 * @throws IOException if no valid storage dirs are found or no valid layout version
1002 */
1003 FSImageStorageInspector readAndInspectDirs(EnumSet<NameNodeFile> fileTypes)
1004 throws IOException {
1005 Integer layoutVersion = null;
1006 boolean multipleLV = false;
1007 StringBuilder layoutVersions = new StringBuilder();
1008
1009 // First determine what range of layout versions we're going to inspect
1010 for (Iterator<StorageDirectory> it = dirIterator(false);
1011 it.hasNext();) {
1012 StorageDirectory sd = it.next();
1013 if (!sd.getVersionFile().exists()) {
1014 FSImage.LOG.warn("Storage directory " + sd + " contains no VERSION file. Skipping...");
1015 continue;
1016 }
1017 readProperties(sd); // sets layoutVersion
1018 int lv = getLayoutVersion();
1019 if (layoutVersion == null) {
1020 layoutVersion = Integer.valueOf(lv);
1021 } else if (!layoutVersion.equals(lv)) {
1022 multipleLV = true;
1023 }
1024 layoutVersions.append("(").append(sd.getRoot()).append(", ").append(lv).append(") ");
1025 }
1026
1027 if (layoutVersion == null) {
1028 throw new IOException("No storage directories contained VERSION information");
1029 }
1030 if (multipleLV) {
1031 throw new IOException(
1032 "Storage directories contain multiple layout versions: "
1033 + layoutVersions);
1034 }
1035 // If the storage directories are with the new layout version
1036 // (ie edits_<txnid>) then use the new inspector, which will ignore
1037 // the old format dirs.
1038 FSImageStorageInspector inspector;
1039 if (NameNodeLayoutVersion.supports(
1040 LayoutVersion.Feature.TXID_BASED_LAYOUT, getLayoutVersion())) {
1041 inspector = new FSImageTransactionalStorageInspector(fileTypes);
1042 } else {
1043 inspector = new FSImagePreTransactionalStorageInspector();
1044 }
1045
1046 inspectStorageDirs(inspector);
1047 return inspector;
1048 }
1049
1050 public NamespaceInfo getNamespaceInfo() {
1051 return new NamespaceInfo(
1052 getNamespaceID(),
1053 getClusterID(),
1054 getBlockPoolID(),
1055 getCTime());
1056 }
1057 }