001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.namenode; 019 020import java.io.Closeable; 021import java.io.File; 022import java.io.IOException; 023import java.io.RandomAccessFile; 024import java.net.URI; 025import java.net.UnknownHostException; 026import java.util.ArrayList; 027import java.util.Collection; 028import java.util.EnumSet; 029import java.util.HashMap; 030import java.util.Iterator; 031import java.util.List; 032import java.util.Map; 033import java.util.Properties; 034import java.util.UUID; 035import java.util.concurrent.CopyOnWriteArrayList; 036import java.util.concurrent.ThreadLocalRandom; 037 038import org.apache.hadoop.classification.InterfaceAudience; 039import org.apache.hadoop.conf.Configuration; 040import org.apache.hadoop.fs.FileUtil; 041import org.apache.hadoop.hdfs.DFSUtil; 042import org.apache.hadoop.hdfs.protocol.LayoutVersion; 043import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; 044import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType; 045import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; 046import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException; 047import org.apache.hadoop.hdfs.server.common.IncorrectVersionException; 048import org.apache.hadoop.hdfs.server.common.Storage; 049import org.apache.hadoop.hdfs.server.common.StorageErrorReporter; 050import org.apache.hadoop.hdfs.server.common.Util; 051import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; 052import org.apache.hadoop.hdfs.util.PersistentLongFile; 053import org.apache.hadoop.io.IOUtils; 054import org.apache.hadoop.net.DNS; 055import org.apache.hadoop.util.Time; 056import org.mortbay.util.ajax.JSON; 057 058import com.google.common.annotations.VisibleForTesting; 059import com.google.common.base.Preconditions; 060import com.google.common.collect.Lists; 061 062/** 063 * NNStorage is responsible for management of the StorageDirectories used by 064 * the NameNode. 065 */ 066@InterfaceAudience.Private 067public class NNStorage extends Storage implements Closeable, 068 StorageErrorReporter { 069 static final String DEPRECATED_MESSAGE_DIGEST_PROPERTY = "imageMD5Digest"; 070 static final String LOCAL_URI_SCHEME = "file"; 071 072 // 073 // The filenames used for storing the images 074 // 075 public enum NameNodeFile { 076 IMAGE ("fsimage"), 077 TIME ("fstime"), // from "old" pre-HDFS-1073 format 078 SEEN_TXID ("seen_txid"), 079 EDITS ("edits"), 080 IMAGE_NEW ("fsimage.ckpt"), 081 IMAGE_ROLLBACK("fsimage_rollback"), 082 EDITS_NEW ("edits.new"), // from "old" pre-HDFS-1073 format 083 EDITS_INPROGRESS ("edits_inprogress"), 084 EDITS_TMP ("edits_tmp"), 085 IMAGE_LEGACY_OIV ("fsimage_legacy_oiv"); // For pre-PB format 086 087 private String fileName = null; 088 private NameNodeFile(String name) { this.fileName = name; } 089 @VisibleForTesting 090 public String getName() { return fileName; } 091 } 092 093 /** 094 * Implementation of StorageDirType specific to namenode storage 095 * A Storage directory could be of type IMAGE which stores only fsimage, 096 * or of type EDITS which stores edits or of type IMAGE_AND_EDITS which 097 * stores both fsimage and edits. 098 */ 099 @VisibleForTesting 100 public static enum NameNodeDirType implements StorageDirType { 101 UNDEFINED, 102 IMAGE, 103 EDITS, 104 IMAGE_AND_EDITS; 105 106 @Override 107 public StorageDirType getStorageDirType() { 108 return this; 109 } 110 111 @Override 112 public boolean isOfType(StorageDirType type) { 113 if ((this == IMAGE_AND_EDITS) && (type == IMAGE || type == EDITS)) 114 return true; 115 return this == type; 116 } 117 } 118 119 protected String blockpoolID = ""; // id of the block pool 120 121 /** 122 * flag that controls if we try to restore failed storages 123 */ 124 private boolean restoreFailedStorage = false; 125 private final Object restorationLock = new Object(); 126 private boolean disablePreUpgradableLayoutCheck = false; 127 128 129 /** 130 * TxId of the last transaction that was included in the most 131 * recent fsimage file. This does not include any transactions 132 * that have since been written to the edit log. 133 */ 134 protected volatile long mostRecentCheckpointTxId = HdfsServerConstants.INVALID_TXID; 135 136 /** 137 * Time of the last checkpoint, in milliseconds since the epoch. 138 */ 139 private long mostRecentCheckpointTime = 0; 140 141 /** 142 * list of failed (and thus removed) storages 143 */ 144 final protected List<StorageDirectory> removedStorageDirs 145 = new CopyOnWriteArrayList<StorageDirectory>(); 146 147 /** 148 * Properties from old layout versions that may be needed 149 * during upgrade only. 150 */ 151 private HashMap<String, String> deprecatedProperties; 152 153 /** 154 * Name directories size for metric. 155 */ 156 private Map<String, Long> nameDirSizeMap = new HashMap<>(); 157 158 /** 159 * Construct the NNStorage. 160 * @param conf Namenode configuration. 161 * @param imageDirs Directories the image can be stored in. 162 * @param editsDirs Directories the editlog can be stored in. 163 * @throws IOException if any directories are inaccessible. 164 */ 165 public NNStorage(Configuration conf, 166 Collection<URI> imageDirs, Collection<URI> editsDirs) 167 throws IOException { 168 super(NodeType.NAME_NODE); 169 170 storageDirs = new CopyOnWriteArrayList<StorageDirectory>(); 171 172 // this may modify the editsDirs, so copy before passing in 173 setStorageDirectories(imageDirs, 174 Lists.newArrayList(editsDirs), 175 FSNamesystem.getSharedEditsDirs(conf)); 176 //Update NameDirSize metric value after NN start 177 updateNameDirSize(); 178 } 179 180 @Override // Storage 181 public boolean isPreUpgradableLayout(StorageDirectory sd) throws IOException { 182 if (disablePreUpgradableLayoutCheck) { 183 return false; 184 } 185 186 File oldImageDir = new File(sd.getRoot(), "image"); 187 if (!oldImageDir.exists()) { 188 return false; 189 } 190 // check the layout version inside the image file 191 File oldF = new File(oldImageDir, "fsimage"); 192 RandomAccessFile oldFile = new RandomAccessFile(oldF, "rws"); 193 try { 194 oldFile.seek(0); 195 int oldVersion = oldFile.readInt(); 196 oldFile.close(); 197 oldFile = null; 198 if (oldVersion < LAST_PRE_UPGRADE_LAYOUT_VERSION) 199 return false; 200 } finally { 201 IOUtils.cleanup(LOG, oldFile); 202 } 203 return true; 204 } 205 206 @Override // Closeable 207 public void close() throws IOException { 208 unlockAll(); 209 storageDirs.clear(); 210 } 211 212 /** 213 * Set flag whether an attempt should be made to restore failed storage 214 * directories at the next available oppurtuinity. 215 * 216 * @param val Whether restoration attempt should be made. 217 */ 218 void setRestoreFailedStorage(boolean val) { 219 LOG.warn("set restore failed storage to " + val); 220 restoreFailedStorage=val; 221 } 222 223 /** 224 * @return Whether failed storage directories are to be restored. 225 */ 226 boolean getRestoreFailedStorage() { 227 return restoreFailedStorage; 228 } 229 230 /** 231 * See if any of removed storages is "writable" again, and can be returned 232 * into service. 233 */ 234 void attemptRestoreRemovedStorage() { 235 // if directory is "alive" - copy the images there... 236 if(!restoreFailedStorage || removedStorageDirs.size() == 0) 237 return; //nothing to restore 238 239 /* We don't want more than one thread trying to restore at a time */ 240 synchronized (this.restorationLock) { 241 LOG.info("NNStorage.attemptRestoreRemovedStorage: check removed(failed) "+ 242 "storage. removedStorages size = " + removedStorageDirs.size()); 243 for(Iterator<StorageDirectory> it 244 = this.removedStorageDirs.iterator(); it.hasNext();) { 245 StorageDirectory sd = it.next(); 246 File root = sd.getRoot(); 247 LOG.info("currently disabled dir " + root.getAbsolutePath() + 248 "; type="+sd.getStorageDirType() 249 + ";canwrite="+FileUtil.canWrite(root)); 250 if(root.exists() && FileUtil.canWrite(root)) { 251 LOG.info("restoring dir " + sd.getRoot().getAbsolutePath()); 252 this.addStorageDir(sd); // restore 253 this.removedStorageDirs.remove(sd); 254 } 255 } 256 } 257 } 258 259 /** 260 * @return A list of storage directories which are in the errored state. 261 */ 262 List<StorageDirectory> getRemovedStorageDirs() { 263 return this.removedStorageDirs; 264 } 265 266 /** 267 * See {@link NNStorage#setStorageDirectories(Collection, Collection, Collection)} 268 */ 269 @VisibleForTesting 270 synchronized void setStorageDirectories(Collection<URI> fsNameDirs, 271 Collection<URI> fsEditsDirs) 272 throws IOException { 273 setStorageDirectories(fsNameDirs, fsEditsDirs, new ArrayList<URI>()); 274 } 275 276 /** 277 * Set the storage directories which will be used. This should only ever be 278 * called from inside NNStorage. However, it needs to remain package private 279 * for testing, as StorageDirectories need to be reinitialised after using 280 * Mockito.spy() on this class, as Mockito doesn't work well with inner 281 * classes, such as StorageDirectory in this case. 282 * 283 * Synchronized due to initialization of storageDirs and removedStorageDirs. 284 * 285 * @param fsNameDirs Locations to store images. 286 * @param fsEditsDirs Locations to store edit logs. 287 * @throws IOException 288 */ 289 @VisibleForTesting 290 synchronized void setStorageDirectories(Collection<URI> fsNameDirs, 291 Collection<URI> fsEditsDirs, 292 Collection<URI> sharedEditsDirs) 293 throws IOException { 294 this.storageDirs.clear(); 295 this.removedStorageDirs.clear(); 296 297 // Add all name dirs with appropriate NameNodeDirType 298 for (URI dirName : fsNameDirs) { 299 checkSchemeConsistency(dirName); 300 boolean isAlsoEdits = false; 301 for (URI editsDirName : fsEditsDirs) { 302 if (editsDirName.compareTo(dirName) == 0) { 303 isAlsoEdits = true; 304 fsEditsDirs.remove(editsDirName); 305 break; 306 } 307 } 308 NameNodeDirType dirType = (isAlsoEdits) ? 309 NameNodeDirType.IMAGE_AND_EDITS : 310 NameNodeDirType.IMAGE; 311 // Add to the list of storage directories, only if the 312 // URI is of type file:// 313 if(dirName.getScheme().compareTo("file") == 0) { 314 this.addStorageDir(new StorageDirectory(new File(dirName.getPath()), 315 dirType, 316 sharedEditsDirs.contains(dirName))); // Don't lock the dir if it's shared. 317 } 318 } 319 320 // Add edits dirs if they are different from name dirs 321 for (URI dirName : fsEditsDirs) { 322 checkSchemeConsistency(dirName); 323 // Add to the list of storage directories, only if the 324 // URI is of type file:// 325 if(dirName.getScheme().compareTo("file") == 0) 326 this.addStorageDir(new StorageDirectory(new File(dirName.getPath()), 327 NameNodeDirType.EDITS, sharedEditsDirs.contains(dirName))); 328 } 329 } 330 331 /** 332 * Return the storage directory corresponding to the passed URI 333 * @param uri URI of a storage directory 334 * @return The matching storage directory or null if none found 335 */ 336 public StorageDirectory getStorageDirectory(URI uri) { 337 try { 338 uri = Util.fileAsURI(new File(uri)); 339 Iterator<StorageDirectory> it = dirIterator(); 340 for (; it.hasNext(); ) { 341 StorageDirectory sd = it.next(); 342 if (Util.fileAsURI(sd.getRoot()).equals(uri)) { 343 return sd; 344 } 345 } 346 } catch (IOException ioe) { 347 LOG.warn("Error converting file to URI", ioe); 348 } 349 return null; 350 } 351 352 /** 353 * Checks the consistency of a URI, in particular if the scheme 354 * is specified 355 * @param u URI whose consistency is being checked. 356 */ 357 private static void checkSchemeConsistency(URI u) throws IOException { 358 String scheme = u.getScheme(); 359 // the URI should have a proper scheme 360 if(scheme == null) { 361 throw new IOException("Undefined scheme for " + u); 362 } 363 } 364 365 /** 366 * Retrieve current directories of type IMAGE 367 * @return Collection of URI representing image directories 368 * @throws IOException in case of URI processing error 369 */ 370 Collection<URI> getImageDirectories() throws IOException { 371 return getDirectories(NameNodeDirType.IMAGE); 372 } 373 374 /** 375 * Retrieve current directories of type EDITS 376 * @return Collection of URI representing edits directories 377 * @throws IOException in case of URI processing error 378 */ 379 Collection<URI> getEditsDirectories() throws IOException { 380 return getDirectories(NameNodeDirType.EDITS); 381 } 382 383 /** 384 * Return number of storage directories of the given type. 385 * @param dirType directory type 386 * @return number of storage directories of type dirType 387 */ 388 int getNumStorageDirs(NameNodeDirType dirType) { 389 if(dirType == null) 390 return getNumStorageDirs(); 391 Iterator<StorageDirectory> it = dirIterator(dirType); 392 int numDirs = 0; 393 for(; it.hasNext(); it.next()) 394 numDirs++; 395 return numDirs; 396 } 397 398 /** 399 * Return the list of locations being used for a specific purpose. 400 * i.e. Image or edit log storage. 401 * 402 * @param dirType Purpose of locations requested. 403 * @throws IOException 404 */ 405 Collection<URI> getDirectories(NameNodeDirType dirType) 406 throws IOException { 407 ArrayList<URI> list = new ArrayList<URI>(); 408 Iterator<StorageDirectory> it = (dirType == null) ? dirIterator() : 409 dirIterator(dirType); 410 for ( ;it.hasNext(); ) { 411 StorageDirectory sd = it.next(); 412 try { 413 list.add(Util.fileAsURI(sd.getRoot())); 414 } catch (IOException e) { 415 throw new IOException("Exception while processing " + 416 "StorageDirectory " + sd.getRoot(), e); 417 } 418 } 419 return list; 420 } 421 422 /** 423 * Determine the last transaction ID noted in this storage directory. 424 * This txid is stored in a special seen_txid file since it might not 425 * correspond to the latest image or edit log. For example, an image-only 426 * directory will have this txid incremented when edits logs roll, even 427 * though the edits logs are in a different directory. 428 * 429 * @param sd StorageDirectory to check 430 * @return If file exists and can be read, last recorded txid. If not, 0L. 431 * @throws IOException On errors processing file pointed to by sd 432 */ 433 static long readTransactionIdFile(StorageDirectory sd) throws IOException { 434 File txidFile = getStorageFile(sd, NameNodeFile.SEEN_TXID); 435 return PersistentLongFile.readFile(txidFile, 0); 436 } 437 438 /** 439 * Write last checkpoint time into a separate file. 440 * @param sd storage directory 441 * @throws IOException 442 */ 443 void writeTransactionIdFile(StorageDirectory sd, long txid) throws IOException { 444 Preconditions.checkArgument(txid >= 0, "bad txid: " + txid); 445 446 File txIdFile = getStorageFile(sd, NameNodeFile.SEEN_TXID); 447 PersistentLongFile.writeFile(txIdFile, txid); 448 } 449 450 /** 451 * Set the transaction ID and time of the last checkpoint 452 * 453 * @param txid transaction id of the last checkpoint 454 * @param time time of the last checkpoint, in millis since the epoch 455 */ 456 void setMostRecentCheckpointInfo(long txid, long time) { 457 this.mostRecentCheckpointTxId = txid; 458 this.mostRecentCheckpointTime = time; 459 } 460 461 /** 462 * @return the transaction ID of the last checkpoint. 463 */ 464 public long getMostRecentCheckpointTxId() { 465 return mostRecentCheckpointTxId; 466 } 467 468 /** 469 * @return the time of the most recent checkpoint in millis since the epoch. 470 */ 471 long getMostRecentCheckpointTime() { 472 return mostRecentCheckpointTime; 473 } 474 475 /** 476 * Write a small file in all available storage directories that 477 * indicates that the namespace has reached some given transaction ID. 478 * 479 * This is used when the image is loaded to avoid accidental rollbacks 480 * in the case where an edit log is fully deleted but there is no 481 * checkpoint. See TestNameEditsConfigs.testNameEditsConfigsFailure() 482 * @param txid the txid that has been reached 483 */ 484 public void writeTransactionIdFileToStorage(long txid) { 485 writeTransactionIdFileToStorage(txid, null); 486 } 487 488 /** 489 * Write a small file in all available storage directories that 490 * indicates that the namespace has reached some given transaction ID. 491 * 492 * This is used when the image is loaded to avoid accidental rollbacks 493 * in the case where an edit log is fully deleted but there is no 494 * checkpoint. See TestNameEditsConfigs.testNameEditsConfigsFailure() 495 * @param txid the txid that has been reached 496 * @param type the type of directory 497 */ 498 public void writeTransactionIdFileToStorage(long txid, 499 NameNodeDirType type) { 500 // Write txid marker in all storage directories 501 for (Iterator<StorageDirectory> it = dirIterator(type); it.hasNext();) { 502 StorageDirectory sd = it.next(); 503 try { 504 writeTransactionIdFile(sd, txid); 505 } catch(IOException e) { 506 // Close any edits stream associated with this dir and remove directory 507 LOG.warn("writeTransactionIdToStorage failed on " + sd, 508 e); 509 reportErrorsOnDirectory(sd); 510 } 511 } 512 } 513 514 /** 515 * Return the name of the image file that is uploaded by periodic 516 * checkpointing 517 * 518 * @return List of filenames to save checkpoints to. 519 */ 520 public File[] getFsImageNameCheckpoint(long txid) { 521 ArrayList<File> list = new ArrayList<File>(); 522 for (Iterator<StorageDirectory> it = 523 dirIterator(NameNodeDirType.IMAGE); it.hasNext();) { 524 list.add(getStorageFile(it.next(), NameNodeFile.IMAGE_NEW, txid)); 525 } 526 return list.toArray(new File[list.size()]); 527 } 528 529 /** 530 * @return The first image file with the given txid and image type. 531 */ 532 public File getFsImageName(long txid, NameNodeFile nnf) { 533 for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE); 534 it.hasNext();) { 535 StorageDirectory sd = it.next(); 536 File fsImage = getStorageFile(sd, nnf, txid); 537 if (FileUtil.canRead(sd.getRoot()) && fsImage.exists()) { 538 return fsImage; 539 } 540 } 541 return null; 542 } 543 544 /** 545 * @return The first image file whose txid is the same with the given txid and 546 * image type is one of the given types. 547 */ 548 public File getFsImage(long txid, EnumSet<NameNodeFile> nnfs) { 549 for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE); 550 it.hasNext();) { 551 StorageDirectory sd = it.next(); 552 for (NameNodeFile nnf : nnfs) { 553 File fsImage = getStorageFile(sd, nnf, txid); 554 if (FileUtil.canRead(sd.getRoot()) && fsImage.exists()) { 555 return fsImage; 556 } 557 } 558 } 559 return null; 560 } 561 562 public File getFsImageName(long txid) { 563 return getFsImageName(txid, NameNodeFile.IMAGE); 564 } 565 566 public File getHighestFsImageName() { 567 return getFsImageName(getMostRecentCheckpointTxId()); 568 } 569 570 /** Create new dfs name directory. Caution: this destroys all files 571 * in this filesystem. */ 572 private void format(StorageDirectory sd) throws IOException { 573 sd.clearDirectory(); // create currrent dir 574 writeProperties(sd); 575 writeTransactionIdFile(sd, 0); 576 577 LOG.info("Storage directory " + sd.getRoot() 578 + " has been successfully formatted."); 579 } 580 581 /** 582 * Format all available storage directories. 583 */ 584 public void format(NamespaceInfo nsInfo) throws IOException { 585 Preconditions.checkArgument(nsInfo.getLayoutVersion() == 0 || 586 nsInfo.getLayoutVersion() == HdfsServerConstants.NAMENODE_LAYOUT_VERSION, 587 "Bad layout version: %s", nsInfo.getLayoutVersion()); 588 589 this.setStorageInfo(nsInfo); 590 this.blockpoolID = nsInfo.getBlockPoolID(); 591 for (Iterator<StorageDirectory> it = 592 dirIterator(); it.hasNext();) { 593 StorageDirectory sd = it.next(); 594 format(sd); 595 } 596 } 597 598 public static NamespaceInfo newNamespaceInfo() 599 throws UnknownHostException { 600 return new NamespaceInfo(newNamespaceID(), newClusterID(), 601 newBlockPoolID(), Time.now()); 602 } 603 604 public void format() throws IOException { 605 this.layoutVersion = HdfsServerConstants.NAMENODE_LAYOUT_VERSION; 606 for (Iterator<StorageDirectory> it = 607 dirIterator(); it.hasNext();) { 608 StorageDirectory sd = it.next(); 609 format(sd); 610 } 611 } 612 613 /** 614 * Generate new namespaceID. 615 * 616 * namespaceID is a persistent attribute of the namespace. 617 * It is generated when the namenode is formatted and remains the same 618 * during the life cycle of the namenode. 619 * When a datanodes register they receive it as the registrationID, 620 * which is checked every time the datanode is communicating with the 621 * namenode. Datanodes that do not 'know' the namespaceID are rejected. 622 * 623 * @return new namespaceID 624 */ 625 private static int newNamespaceID() { 626 int newID = 0; 627 while(newID == 0) 628 newID = ThreadLocalRandom.current().nextInt(0x7FFFFFFF); // use 31 bits 629 return newID; 630 } 631 632 @Override // Storage 633 protected void setFieldsFromProperties( 634 Properties props, StorageDirectory sd) throws IOException { 635 super.setFieldsFromProperties(props, sd); 636 if (layoutVersion == 0) { 637 throw new IOException("NameNode directory " 638 + sd.getRoot() + " is not formatted."); 639 } 640 641 // Set Block pool ID in version with federation support 642 if (NameNodeLayoutVersion.supports( 643 LayoutVersion.Feature.FEDERATION, getLayoutVersion())) { 644 String sbpid = props.getProperty("blockpoolID"); 645 setBlockPoolID(sd.getRoot(), sbpid); 646 } 647 setDeprecatedPropertiesForUpgrade(props); 648 } 649 650 void readProperties(StorageDirectory sd, StartupOption startupOption) 651 throws IOException { 652 Properties props = readPropertiesFile(sd.getVersionFile()); 653 if (HdfsServerConstants.RollingUpgradeStartupOption.ROLLBACK.matches 654 (startupOption)) { 655 int lv = Integer.parseInt(getProperty(props, sd, "layoutVersion")); 656 if (lv > getServiceLayoutVersion()) { 657 // we should not use a newer version for rollingUpgrade rollback 658 throw new IncorrectVersionException(getServiceLayoutVersion(), lv, 659 "storage directory " + sd.getRoot().getAbsolutePath()); 660 } 661 props.setProperty("layoutVersion", 662 Integer.toString(HdfsServerConstants.NAMENODE_LAYOUT_VERSION)); 663 } 664 setFieldsFromProperties(props, sd); 665 } 666 667 /** 668 * Pull any properties out of the VERSION file that are from older 669 * versions of HDFS and only necessary during upgrade. 670 */ 671 private void setDeprecatedPropertiesForUpgrade(Properties props) { 672 deprecatedProperties = new HashMap<String, String>(); 673 String md5 = props.getProperty(DEPRECATED_MESSAGE_DIGEST_PROPERTY); 674 if (md5 != null) { 675 deprecatedProperties.put(DEPRECATED_MESSAGE_DIGEST_PROPERTY, md5); 676 } 677 } 678 679 /** 680 * Return a property that was stored in an earlier version of HDFS. 681 * 682 * This should only be used during upgrades. 683 */ 684 String getDeprecatedProperty(String prop) { 685 assert getLayoutVersion() > HdfsServerConstants.NAMENODE_LAYOUT_VERSION : 686 "getDeprecatedProperty should only be done when loading " + 687 "storage from past versions during upgrade."; 688 return deprecatedProperties.get(prop); 689 } 690 691 /** 692 * Write version file into the storage directory. 693 * 694 * The version file should always be written last. 695 * Missing or corrupted version file indicates that 696 * the checkpoint is not valid. 697 * 698 * @param sd storage directory 699 * @throws IOException 700 */ 701 @Override // Storage 702 protected void setPropertiesFromFields(Properties props, 703 StorageDirectory sd 704 ) throws IOException { 705 super.setPropertiesFromFields(props, sd); 706 // Set blockpoolID in version with federation support 707 if (NameNodeLayoutVersion.supports( 708 LayoutVersion.Feature.FEDERATION, getLayoutVersion())) { 709 props.setProperty("blockpoolID", blockpoolID); 710 } 711 } 712 713 static File getStorageFile(StorageDirectory sd, NameNodeFile type, long imageTxId) { 714 return new File(sd.getCurrentDir(), 715 String.format("%s_%019d", type.getName(), imageTxId)); 716 } 717 718 /** 719 * Get a storage file for one of the files that doesn't need a txid associated 720 * (e.g version, seen_txid) 721 */ 722 static File getStorageFile(StorageDirectory sd, NameNodeFile type) { 723 return new File(sd.getCurrentDir(), type.getName()); 724 } 725 726 @VisibleForTesting 727 public static String getCheckpointImageFileName(long txid) { 728 return getNameNodeFileName(NameNodeFile.IMAGE_NEW, txid); 729 } 730 731 @VisibleForTesting 732 public static String getImageFileName(long txid) { 733 return getNameNodeFileName(NameNodeFile.IMAGE, txid); 734 } 735 736 @VisibleForTesting 737 public static String getRollbackImageFileName(long txid) { 738 return getNameNodeFileName(NameNodeFile.IMAGE_ROLLBACK, txid); 739 } 740 741 public static String getLegacyOIVImageFileName(long txid) { 742 return getNameNodeFileName(NameNodeFile.IMAGE_LEGACY_OIV, txid); 743 } 744 745 private static String getNameNodeFileName(NameNodeFile nnf, long txid) { 746 return String.format("%s_%019d", nnf.getName(), txid); 747 } 748 749 @VisibleForTesting 750 public static String getInProgressEditsFileName(long startTxId) { 751 return getNameNodeFileName(NameNodeFile.EDITS_INPROGRESS, startTxId); 752 } 753 754 static File getInProgressEditsFile(StorageDirectory sd, long startTxId) { 755 return new File(sd.getCurrentDir(), getInProgressEditsFileName(startTxId)); 756 } 757 758 static File getFinalizedEditsFile(StorageDirectory sd, 759 long startTxId, long endTxId) { 760 return new File(sd.getCurrentDir(), 761 getFinalizedEditsFileName(startTxId, endTxId)); 762 } 763 764 static File getTemporaryEditsFile(StorageDirectory sd, 765 long startTxId, long endTxId, long timestamp) { 766 return new File(sd.getCurrentDir(), 767 getTemporaryEditsFileName(startTxId, endTxId, timestamp)); 768 } 769 770 static File getImageFile(StorageDirectory sd, NameNodeFile nnf, long txid) { 771 return new File(sd.getCurrentDir(), getNameNodeFileName(nnf, txid)); 772 } 773 774 @VisibleForTesting 775 public static String getFinalizedEditsFileName(long startTxId, long endTxId) { 776 return String.format("%s_%019d-%019d", NameNodeFile.EDITS.getName(), 777 startTxId, endTxId); 778 } 779 780 public static String getTemporaryEditsFileName(long startTxId, long endTxId, 781 long timestamp) { 782 return String.format("%s_%019d-%019d_%019d", NameNodeFile.EDITS_TMP.getName(), 783 startTxId, endTxId, timestamp); 784 } 785 786 /** 787 * Return the first readable finalized edits file for the given txid. 788 */ 789 File findFinalizedEditsFile(long startTxId, long endTxId) 790 throws IOException { 791 File ret = findFile(NameNodeDirType.EDITS, 792 getFinalizedEditsFileName(startTxId, endTxId)); 793 if (ret == null) { 794 throw new IOException( 795 "No edits file for txid " + startTxId + "-" + endTxId + " exists!"); 796 } 797 return ret; 798 } 799 800 /** 801 * Return the first readable image file for the given txid and image type, or 802 * null if no such image can be found 803 */ 804 File findImageFile(NameNodeFile nnf, long txid) { 805 return findFile(NameNodeDirType.IMAGE, 806 getNameNodeFileName(nnf, txid)); 807 } 808 809 /** 810 * Return the first readable storage file of the given name 811 * across any of the 'current' directories in SDs of the 812 * given type, or null if no such file exists. 813 */ 814 private File findFile(NameNodeDirType dirType, String name) { 815 for (StorageDirectory sd : dirIterable(dirType)) { 816 File candidate = new File(sd.getCurrentDir(), name); 817 if (FileUtil.canRead(sd.getCurrentDir()) && 818 candidate.exists()) { 819 return candidate; 820 } 821 } 822 return null; 823 } 824 825 /** 826 * Disable the check for pre-upgradable layouts. Needed for BackupImage. 827 * @param val Whether to disable the preupgradeable layout check. 828 */ 829 void setDisablePreUpgradableLayoutCheck(boolean val) { 830 disablePreUpgradableLayoutCheck = val; 831 } 832 833 /** 834 * Marks a list of directories as having experienced an error. 835 * 836 * @param sds A list of storage directories to mark as errored. 837 */ 838 void reportErrorsOnDirectories(List<StorageDirectory> sds) { 839 for (StorageDirectory sd : sds) { 840 reportErrorsOnDirectory(sd); 841 } 842 } 843 844 /** 845 * Reports that a directory has experienced an error. 846 * Notifies listeners that the directory is no longer 847 * available. 848 * 849 * @param sd A storage directory to mark as errored. 850 */ 851 private void reportErrorsOnDirectory(StorageDirectory sd) { 852 LOG.error("Error reported on storage directory " + sd); 853 854 String lsd = listStorageDirectories(); 855 LOG.debug("current list of storage dirs:" + lsd); 856 857 LOG.warn("About to remove corresponding storage: " 858 + sd.getRoot().getAbsolutePath()); 859 try { 860 sd.unlock(); 861 } catch (Exception e) { 862 LOG.warn("Unable to unlock bad storage directory: " 863 + sd.getRoot().getPath(), e); 864 } 865 866 if (this.storageDirs.remove(sd)) { 867 this.removedStorageDirs.add(sd); 868 } 869 870 lsd = listStorageDirectories(); 871 LOG.debug("at the end current list of storage dirs:" + lsd); 872 } 873 874 /** 875 * Processes the startup options for the clusterid and blockpoolid 876 * for the upgrade. 877 * @param startOpt Startup options 878 * @param layoutVersion Layout version for the upgrade 879 * @throws IOException 880 */ 881 void processStartupOptionsForUpgrade(StartupOption startOpt, int layoutVersion) 882 throws IOException { 883 if (startOpt == StartupOption.UPGRADE || startOpt == StartupOption.UPGRADEONLY) { 884 // If upgrade from a release that does not support federation, 885 // if clusterId is provided in the startupOptions use it. 886 // Else generate a new cluster ID 887 if (!NameNodeLayoutVersion.supports( 888 LayoutVersion.Feature.FEDERATION, layoutVersion)) { 889 if (startOpt.getClusterId() == null) { 890 startOpt.setClusterId(newClusterID()); 891 } 892 setClusterID(startOpt.getClusterId()); 893 setBlockPoolID(newBlockPoolID()); 894 } else { 895 // Upgrade from one version of federation to another supported 896 // version of federation doesn't require clusterID. 897 // Warn the user if the current clusterid didn't match with the input 898 // clusterid. 899 if (startOpt.getClusterId() != null 900 && !startOpt.getClusterId().equals(getClusterID())) { 901 LOG.warn("Clusterid mismatch - current clusterid: " + getClusterID() 902 + ", Ignoring given clusterid: " + startOpt.getClusterId()); 903 } 904 } 905 LOG.info("Using clusterid: " + getClusterID()); 906 } 907 } 908 909 /** 910 * Report that an IOE has occurred on some file which may 911 * or may not be within one of the NN image storage directories. 912 */ 913 @Override 914 public void reportErrorOnFile(File f) { 915 // We use getAbsolutePath here instead of getCanonicalPath since we know 916 // that there is some IO problem on that drive. 917 // getCanonicalPath may need to call stat() or readlink() and it's likely 918 // those calls would fail due to the same underlying IO problem. 919 String absPath = f.getAbsolutePath(); 920 for (StorageDirectory sd : storageDirs) { 921 String dirPath = sd.getRoot().getAbsolutePath(); 922 if (!dirPath.endsWith(File.separator)) { 923 dirPath += File.separator; 924 } 925 if (absPath.startsWith(dirPath)) { 926 reportErrorsOnDirectory(sd); 927 return; 928 } 929 } 930 931 } 932 933 /** 934 * Generate new clusterID. 935 * 936 * clusterID is a persistent attribute of the cluster. 937 * It is generated when the cluster is created and remains the same 938 * during the life cycle of the cluster. When a new name node is formated, if 939 * this is a new cluster, a new clusterID is geneated and stored. Subsequent 940 * name node must be given the same ClusterID during its format to be in the 941 * same cluster. 942 * When a datanode register it receive the clusterID and stick with it. 943 * If at any point, name node or data node tries to join another cluster, it 944 * will be rejected. 945 * 946 * @return new clusterID 947 */ 948 public static String newClusterID() { 949 return "CID-" + UUID.randomUUID().toString(); 950 } 951 952 void setClusterID(String cid) { 953 clusterID = cid; 954 } 955 956 /** 957 * try to find current cluster id in the VERSION files 958 * returns first cluster id found in any VERSION file 959 * null in case none found 960 * @return clusterId or null in case no cluster id found 961 */ 962 public String determineClusterId() { 963 String cid = null; 964 Iterator<StorageDirectory> sdit = dirIterator(NameNodeDirType.IMAGE); 965 while(sdit.hasNext()) { 966 StorageDirectory sd = sdit.next(); 967 try { 968 Properties props = readPropertiesFile(sd.getVersionFile()); 969 cid = props.getProperty("clusterID"); 970 LOG.info("current cluster id for sd="+sd.getCurrentDir() + 971 ";lv=" + layoutVersion + ";cid=" + cid); 972 973 if(cid != null && !cid.equals("")) 974 return cid; 975 } catch (Exception e) { 976 LOG.warn("this sd not available: " + e.getLocalizedMessage()); 977 } //ignore 978 } 979 LOG.warn("couldn't find any VERSION file containing valid ClusterId"); 980 return null; 981 } 982 983 /** 984 * Generate new blockpoolID. 985 * 986 * @return new blockpoolID 987 */ 988 static String newBlockPoolID() throws UnknownHostException{ 989 String ip = "unknownIP"; 990 try { 991 ip = DNS.getDefaultIP("default"); 992 } catch (UnknownHostException e) { 993 LOG.warn("Could not find ip address of \"default\" inteface."); 994 throw e; 995 } 996 997 int rand = DFSUtil.getSecureRandom().nextInt(Integer.MAX_VALUE); 998 String bpid = "BP-" + rand + "-"+ ip + "-" + Time.now(); 999 return bpid; 1000 } 1001 1002 /** Validate and set block pool ID */ 1003 public void setBlockPoolID(String bpid) { 1004 blockpoolID = bpid; 1005 } 1006 1007 /** Validate and set block pool ID */ 1008 private void setBlockPoolID(File storage, String bpid) 1009 throws InconsistentFSStateException { 1010 if (bpid == null || bpid.equals("")) { 1011 throw new InconsistentFSStateException(storage, "file " 1012 + Storage.STORAGE_FILE_VERSION + " has no block pool Id."); 1013 } 1014 1015 if (!blockpoolID.equals("") && !blockpoolID.equals(bpid)) { 1016 throw new InconsistentFSStateException(storage, 1017 "Unexepcted blockpoolID " + bpid + " . Expected " + blockpoolID); 1018 } 1019 setBlockPoolID(bpid); 1020 } 1021 1022 public String getBlockPoolID() { 1023 return blockpoolID; 1024 } 1025 1026 /** 1027 * Iterate over all current storage directories, inspecting them 1028 * with the given inspector. 1029 */ 1030 void inspectStorageDirs(FSImageStorageInspector inspector) 1031 throws IOException { 1032 1033 // Process each of the storage directories to find the pair of 1034 // newest image file and edit file 1035 for (Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) { 1036 StorageDirectory sd = it.next(); 1037 inspector.inspectDirectory(sd); 1038 } 1039 } 1040 1041 /** 1042 * Iterate over all of the storage dirs, reading their contents to determine 1043 * their layout versions. Returns an FSImageStorageInspector which has 1044 * inspected each directory. 1045 * 1046 * <b>Note:</b> this can mutate the storage info fields (ctime, version, etc). 1047 * @throws IOException if no valid storage dirs are found or no valid layout version 1048 */ 1049 FSImageStorageInspector readAndInspectDirs(EnumSet<NameNodeFile> fileTypes, 1050 StartupOption startupOption) throws IOException { 1051 Integer layoutVersion = null; 1052 boolean multipleLV = false; 1053 StringBuilder layoutVersions = new StringBuilder(); 1054 1055 // First determine what range of layout versions we're going to inspect 1056 for (Iterator<StorageDirectory> it = dirIterator(false); 1057 it.hasNext();) { 1058 StorageDirectory sd = it.next(); 1059 if (!sd.getVersionFile().exists()) { 1060 FSImage.LOG.warn("Storage directory " + sd + " contains no VERSION file. Skipping..."); 1061 continue; 1062 } 1063 readProperties(sd, startupOption); // sets layoutVersion 1064 int lv = getLayoutVersion(); 1065 if (layoutVersion == null) { 1066 layoutVersion = Integer.valueOf(lv); 1067 } else if (!layoutVersion.equals(lv)) { 1068 multipleLV = true; 1069 } 1070 layoutVersions.append("(").append(sd.getRoot()).append(", ").append(lv).append(") "); 1071 } 1072 1073 if (layoutVersion == null) { 1074 throw new IOException("No storage directories contained VERSION information"); 1075 } 1076 if (multipleLV) { 1077 throw new IOException( 1078 "Storage directories contain multiple layout versions: " 1079 + layoutVersions); 1080 } 1081 // If the storage directories are with the new layout version 1082 // (ie edits_<txnid>) then use the new inspector, which will ignore 1083 // the old format dirs. 1084 FSImageStorageInspector inspector; 1085 if (NameNodeLayoutVersion.supports( 1086 LayoutVersion.Feature.TXID_BASED_LAYOUT, getLayoutVersion())) { 1087 inspector = new FSImageTransactionalStorageInspector(fileTypes); 1088 } else { 1089 inspector = new FSImagePreTransactionalStorageInspector(); 1090 } 1091 1092 inspectStorageDirs(inspector); 1093 return inspector; 1094 } 1095 1096 public NamespaceInfo getNamespaceInfo() { 1097 return new NamespaceInfo( 1098 getNamespaceID(), 1099 getClusterID(), 1100 getBlockPoolID(), 1101 getCTime()); 1102 } 1103 1104 public String getNNDirectorySize() { 1105 return JSON.toString(nameDirSizeMap); 1106 } 1107 1108 public void updateNameDirSize() { 1109 Map<String, Long> nnDirSizeMap = new HashMap<>(); 1110 for (Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) { 1111 StorageDirectory sd = it.next(); 1112 if (!sd.isShared()) { 1113 nnDirSizeMap.put(sd.getRoot().getAbsolutePath(), sd.getDirecorySize()); 1114 } 1115 } 1116 nameDirSizeMap.clear(); 1117 nameDirSizeMap.putAll(nnDirSizeMap); 1118 } 1119}