001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.common; 019 020import java.io.File; 021import java.io.FileOutputStream; 022import java.io.FileNotFoundException; 023import java.io.IOException; 024import java.io.RandomAccessFile; 025import java.lang.management.ManagementFactory; 026import java.nio.channels.FileLock; 027import java.nio.channels.OverlappingFileLockException; 028import java.util.ArrayList; 029import java.util.Iterator; 030import java.util.List; 031import java.util.Properties; 032 033import org.apache.commons.io.FileUtils; 034import org.apache.commons.logging.Log; 035import org.apache.commons.logging.LogFactory; 036import org.apache.hadoop.classification.InterfaceAudience; 037import org.apache.hadoop.fs.FileUtil; 038import org.apache.hadoop.fs.Path; 039import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType; 040import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; 041import org.apache.hadoop.io.nativeio.NativeIO; 042import org.apache.hadoop.io.nativeio.NativeIOException; 043import org.apache.hadoop.util.ToolRunner; 044import org.apache.hadoop.util.VersionInfo; 045 046import com.google.common.base.Charsets; 047import com.google.common.base.Preconditions; 048 049 050 051/** 052 * Storage information file. 053 * <p> 054 * Local storage information is stored in a separate file VERSION. 055 * It contains type of the node, 056 * the storage layout version, the namespace id, and 057 * the fs state creation time. 058 * <p> 059 * Local storage can reside in multiple directories. 060 * Each directory should contain the same VERSION file as the others. 061 * During startup Hadoop servers (name-node and data-nodes) read their local 062 * storage information from them. 063 * <p> 064 * The servers hold a lock for each storage directory while they run so that 065 * other nodes were not able to startup sharing the same storage. 066 * The locks are released when the servers stop (normally or abnormally). 067 * 068 */ 069@InterfaceAudience.Private 070public abstract class Storage extends StorageInfo { 071 public static final Log LOG = LogFactory.getLog(Storage.class.getName()); 072 073 // last layout version that did not support upgrades 074 public static final int LAST_PRE_UPGRADE_LAYOUT_VERSION = -3; 075 076 // this corresponds to Hadoop-0.18 077 public static final int LAST_UPGRADABLE_LAYOUT_VERSION = -16; 078 protected static final String LAST_UPGRADABLE_HADOOP_VERSION = "Hadoop-0.18"; 079 080 /** Layout versions of 0.20.203 release */ 081 public static final int[] LAYOUT_VERSIONS_203 = {-19, -31}; 082 083 public static final String STORAGE_FILE_LOCK = "in_use.lock"; 084 public static final String STORAGE_DIR_CURRENT = "current"; 085 public static final String STORAGE_DIR_PREVIOUS = "previous"; 086 public static final String STORAGE_TMP_REMOVED = "removed.tmp"; 087 public static final String STORAGE_TMP_PREVIOUS = "previous.tmp"; 088 public static final String STORAGE_TMP_FINALIZED = "finalized.tmp"; 089 public static final String STORAGE_TMP_LAST_CKPT = "lastcheckpoint.tmp"; 090 public static final String STORAGE_PREVIOUS_CKPT = "previous.checkpoint"; 091 092 /** 093 * The blocksBeingWritten directory which was used in some 1.x and earlier 094 * releases. 095 */ 096 public static final String STORAGE_1_BBW = "blocksBeingWritten"; 097 098 public enum StorageState { 099 NON_EXISTENT, 100 NOT_FORMATTED, 101 COMPLETE_UPGRADE, 102 RECOVER_UPGRADE, 103 COMPLETE_FINALIZE, 104 COMPLETE_ROLLBACK, 105 RECOVER_ROLLBACK, 106 COMPLETE_CHECKPOINT, 107 RECOVER_CHECKPOINT, 108 NORMAL; 109 } 110 111 /** 112 * An interface to denote storage directory type 113 * Implementations can define a type for storage directory by implementing 114 * this interface. 115 */ 116 @InterfaceAudience.Private 117 public interface StorageDirType { 118 public StorageDirType getStorageDirType(); 119 public boolean isOfType(StorageDirType type); 120 } 121 122 protected List<StorageDirectory> storageDirs = new ArrayList<StorageDirectory>(); 123 124 private class DirIterator implements Iterator<StorageDirectory> { 125 final StorageDirType dirType; 126 final boolean includeShared; 127 int prevIndex; // for remove() 128 int nextIndex; // for next() 129 130 DirIterator(StorageDirType dirType, boolean includeShared) { 131 this.dirType = dirType; 132 this.nextIndex = 0; 133 this.prevIndex = 0; 134 this.includeShared = includeShared; 135 } 136 137 @Override 138 public boolean hasNext() { 139 if (storageDirs.isEmpty() || nextIndex >= storageDirs.size()) 140 return false; 141 if (dirType != null || !includeShared) { 142 while (nextIndex < storageDirs.size()) { 143 if (shouldReturnNextDir()) 144 break; 145 nextIndex++; 146 } 147 if (nextIndex >= storageDirs.size()) 148 return false; 149 } 150 return true; 151 } 152 153 @Override 154 public StorageDirectory next() { 155 StorageDirectory sd = getStorageDir(nextIndex); 156 prevIndex = nextIndex; 157 nextIndex++; 158 if (dirType != null || !includeShared) { 159 while (nextIndex < storageDirs.size()) { 160 if (shouldReturnNextDir()) 161 break; 162 nextIndex++; 163 } 164 } 165 return sd; 166 } 167 168 @Override 169 public void remove() { 170 nextIndex = prevIndex; // restore previous state 171 storageDirs.remove(prevIndex); // remove last returned element 172 hasNext(); // reset nextIndex to correct place 173 } 174 175 private boolean shouldReturnNextDir() { 176 StorageDirectory sd = getStorageDir(nextIndex); 177 return (dirType == null || sd.getStorageDirType().isOfType(dirType)) && 178 (includeShared || !sd.isShared()); 179 } 180 } 181 182 /** 183 * @return A list of the given File in every available storage directory, 184 * regardless of whether it might exist. 185 */ 186 public List<File> getFiles(StorageDirType dirType, String fileName) { 187 ArrayList<File> list = new ArrayList<File>(); 188 Iterator<StorageDirectory> it = 189 (dirType == null) ? dirIterator() : dirIterator(dirType); 190 for ( ;it.hasNext(); ) { 191 list.add(new File(it.next().getCurrentDir(), fileName)); 192 } 193 return list; 194 } 195 196 197 /** 198 * Return default iterator 199 * This iterator returns all entries in storageDirs 200 */ 201 public Iterator<StorageDirectory> dirIterator() { 202 return dirIterator(null); 203 } 204 205 /** 206 * Return iterator based on Storage Directory Type 207 * This iterator selects entries in storageDirs of type dirType and returns 208 * them via the Iterator 209 */ 210 public Iterator<StorageDirectory> dirIterator(StorageDirType dirType) { 211 return dirIterator(dirType, true); 212 } 213 214 /** 215 * Return all entries in storageDirs, potentially excluding shared dirs. 216 * @param includeShared whether or not to include shared dirs. 217 * @return an iterator over the configured storage dirs. 218 */ 219 public Iterator<StorageDirectory> dirIterator(boolean includeShared) { 220 return dirIterator(null, includeShared); 221 } 222 223 /** 224 * @param dirType all entries will be of this type of dir 225 * @param includeShared true to include any shared directories, 226 * false otherwise 227 * @return an iterator over the configured storage dirs. 228 */ 229 public Iterator<StorageDirectory> dirIterator(StorageDirType dirType, 230 boolean includeShared) { 231 return new DirIterator(dirType, includeShared); 232 } 233 234 public Iterable<StorageDirectory> dirIterable(final StorageDirType dirType) { 235 return new Iterable<StorageDirectory>() { 236 @Override 237 public Iterator<StorageDirectory> iterator() { 238 return dirIterator(dirType); 239 } 240 }; 241 } 242 243 244 /** 245 * generate storage list (debug line) 246 */ 247 public String listStorageDirectories() { 248 StringBuilder buf = new StringBuilder(); 249 for (StorageDirectory sd : storageDirs) { 250 buf.append(sd.getRoot() + "(" + sd.getStorageDirType() + ");"); 251 } 252 return buf.toString(); 253 } 254 255 /** 256 * One of the storage directories. 257 */ 258 @InterfaceAudience.Private 259 public static class StorageDirectory implements FormatConfirmable { 260 final File root; // root directory 261 // whether or not this dir is shared between two separate NNs for HA, or 262 // between multiple block pools in the case of federation. 263 final boolean isShared; 264 final StorageDirType dirType; // storage dir type 265 FileLock lock; // storage lock 266 267 private String storageUuid = null; // Storage directory identifier. 268 269 public StorageDirectory(File dir) { 270 // default dirType is null 271 this(dir, null, false); 272 } 273 274 public StorageDirectory(File dir, StorageDirType dirType) { 275 this(dir, dirType, false); 276 } 277 278 public void setStorageUuid(String storageUuid) { 279 this.storageUuid = storageUuid; 280 } 281 282 public String getStorageUuid() { 283 return storageUuid; 284 } 285 286 /** 287 * Constructor 288 * @param dir directory corresponding to the storage 289 * @param dirType storage directory type 290 * @param isShared whether or not this dir is shared between two NNs. true 291 * disables locking on the storage directory, false enables locking 292 */ 293 public StorageDirectory(File dir, StorageDirType dirType, boolean isShared) { 294 this.root = dir; 295 this.lock = null; 296 this.dirType = dirType; 297 this.isShared = isShared; 298 } 299 300 /** 301 * Get root directory of this storage 302 */ 303 public File getRoot() { 304 return root; 305 } 306 307 /** 308 * Get storage directory type 309 */ 310 public StorageDirType getStorageDirType() { 311 return dirType; 312 } 313 314 /** 315 * Get storage directory size. 316 */ 317 public long getDirecorySize() { 318 try { 319 if (!isShared() && root != null && root.exists()) { 320 return FileUtils.sizeOfDirectory(root); 321 } 322 } catch (Exception e) { 323 LOG.warn("Failed to get directory size :" + root, e); 324 } 325 return 0; 326 } 327 328 public void read(File from, Storage storage) throws IOException { 329 Properties props = readPropertiesFile(from); 330 storage.setFieldsFromProperties(props, this); 331 } 332 333 /** 334 * Clear and re-create storage directory. 335 * <p> 336 * Removes contents of the current directory and creates an empty directory. 337 * 338 * This does not fully format storage directory. 339 * It cannot write the version file since it should be written last after 340 * all other storage type dependent files are written. 341 * Derived storage is responsible for setting specific storage values and 342 * writing the version file to disk. 343 * 344 * @throws IOException 345 */ 346 public void clearDirectory() throws IOException { 347 File curDir = this.getCurrentDir(); 348 if (curDir.exists()) 349 if (!(FileUtil.fullyDelete(curDir))) 350 throw new IOException("Cannot remove current directory: " + curDir); 351 if (!curDir.mkdirs()) 352 throw new IOException("Cannot create directory " + curDir); 353 } 354 355 /** 356 * Directory {@code current} contains latest files defining 357 * the file system meta-data. 358 * 359 * @return the directory path 360 */ 361 public File getCurrentDir() { 362 return new File(root, STORAGE_DIR_CURRENT); 363 } 364 365 /** 366 * File {@code VERSION} contains the following fields: 367 * <ol> 368 * <li>node type</li> 369 * <li>layout version</li> 370 * <li>namespaceID</li> 371 * <li>fs state creation time</li> 372 * <li>other fields specific for this node type</li> 373 * </ol> 374 * The version file is always written last during storage directory updates. 375 * The existence of the version file indicates that all other files have 376 * been successfully written in the storage directory, the storage is valid 377 * and does not need to be recovered. 378 * 379 * @return the version file path 380 */ 381 public File getVersionFile() { 382 return new File(new File(root, STORAGE_DIR_CURRENT), STORAGE_FILE_VERSION); 383 } 384 385 /** 386 * File {@code VERSION} from the {@code previous} directory. 387 * 388 * @return the previous version file path 389 */ 390 public File getPreviousVersionFile() { 391 return new File(new File(root, STORAGE_DIR_PREVIOUS), STORAGE_FILE_VERSION); 392 } 393 394 /** 395 * Directory {@code previous} contains the previous file system state, 396 * which the system can be rolled back to. 397 * 398 * @return the directory path 399 */ 400 public File getPreviousDir() { 401 return new File(root, STORAGE_DIR_PREVIOUS); 402 } 403 404 /** 405 * {@code previous.tmp} is a transient directory, which holds 406 * current file system state while the new state is saved into the new 407 * {@code current} during upgrade. 408 * If the saving succeeds {@code previous.tmp} will be moved to 409 * {@code previous}, otherwise it will be renamed back to 410 * {@code current} by the recovery procedure during startup. 411 * 412 * @return the directory path 413 */ 414 public File getPreviousTmp() { 415 return new File(root, STORAGE_TMP_PREVIOUS); 416 } 417 418 /** 419 * {@code removed.tmp} is a transient directory, which holds 420 * current file system state while the previous state is moved into 421 * {@code current} during rollback. 422 * If the moving succeeds {@code removed.tmp} will be removed, 423 * otherwise it will be renamed back to 424 * {@code current} by the recovery procedure during startup. 425 * 426 * @return the directory path 427 */ 428 public File getRemovedTmp() { 429 return new File(root, STORAGE_TMP_REMOVED); 430 } 431 432 /** 433 * {@code finalized.tmp} is a transient directory, which holds 434 * the {@code previous} file system state while it is being removed 435 * in response to the finalize request. 436 * Finalize operation will remove {@code finalized.tmp} when completed, 437 * otherwise the removal will resume upon the system startup. 438 * 439 * @return the directory path 440 */ 441 public File getFinalizedTmp() { 442 return new File(root, STORAGE_TMP_FINALIZED); 443 } 444 445 /** 446 * {@code lastcheckpoint.tmp} is a transient directory, which holds 447 * current file system state while the new state is saved into the new 448 * {@code current} during regular namespace updates. 449 * If the saving succeeds {@code lastcheckpoint.tmp} will be moved to 450 * {@code previous.checkpoint}, otherwise it will be renamed back to 451 * {@code current} by the recovery procedure during startup. 452 * 453 * @return the directory path 454 */ 455 public File getLastCheckpointTmp() { 456 return new File(root, STORAGE_TMP_LAST_CKPT); 457 } 458 459 /** 460 * {@code previous.checkpoint} is a directory, which holds the previous 461 * (before the last save) state of the storage directory. 462 * The directory is created as a reference only, it does not play role 463 * in state recovery procedures, and is recycled automatically, 464 * but it may be useful for manual recovery of a stale state of the system. 465 * 466 * @return the directory path 467 */ 468 public File getPreviousCheckpoint() { 469 return new File(root, STORAGE_PREVIOUS_CKPT); 470 } 471 472 /** 473 * Check consistency of the storage directory 474 * 475 * @param startOpt a startup option. 476 * 477 * @return state {@link StorageState} of the storage directory 478 * @throws InconsistentFSStateException if directory state is not 479 * consistent and cannot be recovered. 480 * @throws IOException 481 */ 482 public StorageState analyzeStorage(StartupOption startOpt, Storage storage) 483 throws IOException { 484 assert root != null : "root is null"; 485 boolean hadMkdirs = false; 486 String rootPath = root.getCanonicalPath(); 487 try { // check that storage exists 488 if (!root.exists()) { 489 // storage directory does not exist 490 if (startOpt != StartupOption.FORMAT && 491 startOpt != StartupOption.HOTSWAP) { 492 LOG.warn("Storage directory " + rootPath + " does not exist"); 493 return StorageState.NON_EXISTENT; 494 } 495 LOG.info(rootPath + " does not exist. Creating ..."); 496 if (!root.mkdirs()) 497 throw new IOException("Cannot create directory " + rootPath); 498 hadMkdirs = true; 499 } 500 // or is inaccessible 501 if (!root.isDirectory()) { 502 LOG.warn(rootPath + "is not a directory"); 503 return StorageState.NON_EXISTENT; 504 } 505 if (!FileUtil.canWrite(root)) { 506 LOG.warn("Cannot access storage directory " + rootPath); 507 return StorageState.NON_EXISTENT; 508 } 509 } catch(SecurityException ex) { 510 LOG.warn("Cannot access storage directory " + rootPath, ex); 511 return StorageState.NON_EXISTENT; 512 } 513 514 this.lock(); // lock storage if it exists 515 516 // If startOpt is HOTSWAP, it returns NOT_FORMATTED for empty directory, 517 // while it also checks the layout version. 518 if (startOpt == HdfsServerConstants.StartupOption.FORMAT || 519 (startOpt == StartupOption.HOTSWAP && hadMkdirs)) 520 return StorageState.NOT_FORMATTED; 521 522 if (startOpt != HdfsServerConstants.StartupOption.IMPORT) { 523 storage.checkOldLayoutStorage(this); 524 } 525 526 // check whether current directory is valid 527 File versionFile = getVersionFile(); 528 boolean hasCurrent = versionFile.exists(); 529 530 // check which directories exist 531 boolean hasPrevious = getPreviousDir().exists(); 532 boolean hasPreviousTmp = getPreviousTmp().exists(); 533 boolean hasRemovedTmp = getRemovedTmp().exists(); 534 boolean hasFinalizedTmp = getFinalizedTmp().exists(); 535 boolean hasCheckpointTmp = getLastCheckpointTmp().exists(); 536 537 if (!(hasPreviousTmp || hasRemovedTmp 538 || hasFinalizedTmp || hasCheckpointTmp)) { 539 // no temp dirs - no recovery 540 if (hasCurrent) 541 return StorageState.NORMAL; 542 if (hasPrevious) 543 throw new InconsistentFSStateException(root, 544 "version file in current directory is missing."); 545 return StorageState.NOT_FORMATTED; 546 } 547 548 if ((hasPreviousTmp?1:0) + (hasRemovedTmp?1:0) 549 + (hasFinalizedTmp?1:0) + (hasCheckpointTmp?1:0) > 1) 550 // more than one temp dirs 551 throw new InconsistentFSStateException(root, 552 "too many temporary directories."); 553 554 // # of temp dirs == 1 should either recover or complete a transition 555 if (hasCheckpointTmp) { 556 return hasCurrent ? StorageState.COMPLETE_CHECKPOINT 557 : StorageState.RECOVER_CHECKPOINT; 558 } 559 560 if (hasFinalizedTmp) { 561 if (hasPrevious) 562 throw new InconsistentFSStateException(root, 563 STORAGE_DIR_PREVIOUS + " and " + STORAGE_TMP_FINALIZED 564 + "cannot exist together."); 565 return StorageState.COMPLETE_FINALIZE; 566 } 567 568 if (hasPreviousTmp) { 569 if (hasPrevious) 570 throw new InconsistentFSStateException(root, 571 STORAGE_DIR_PREVIOUS + " and " + STORAGE_TMP_PREVIOUS 572 + " cannot exist together."); 573 if (hasCurrent) 574 return StorageState.COMPLETE_UPGRADE; 575 return StorageState.RECOVER_UPGRADE; 576 } 577 578 assert hasRemovedTmp : "hasRemovedTmp must be true"; 579 if (!(hasCurrent ^ hasPrevious)) 580 throw new InconsistentFSStateException(root, 581 "one and only one directory " + STORAGE_DIR_CURRENT 582 + " or " + STORAGE_DIR_PREVIOUS 583 + " must be present when " + STORAGE_TMP_REMOVED 584 + " exists."); 585 if (hasCurrent) 586 return StorageState.COMPLETE_ROLLBACK; 587 return StorageState.RECOVER_ROLLBACK; 588 } 589 590 /** 591 * Complete or recover storage state from previously failed transition. 592 * 593 * @param curState specifies what/how the state should be recovered 594 * @throws IOException 595 */ 596 public void doRecover(StorageState curState) throws IOException { 597 File curDir = getCurrentDir(); 598 String rootPath = root.getCanonicalPath(); 599 switch(curState) { 600 case COMPLETE_UPGRADE: // mv previous.tmp -> previous 601 LOG.info("Completing previous upgrade for storage directory " 602 + rootPath); 603 rename(getPreviousTmp(), getPreviousDir()); 604 return; 605 case RECOVER_UPGRADE: // mv previous.tmp -> current 606 LOG.info("Recovering storage directory " + rootPath 607 + " from previous upgrade"); 608 if (curDir.exists()) 609 deleteDir(curDir); 610 rename(getPreviousTmp(), curDir); 611 return; 612 case COMPLETE_ROLLBACK: // rm removed.tmp 613 LOG.info("Completing previous rollback for storage directory " 614 + rootPath); 615 deleteDir(getRemovedTmp()); 616 return; 617 case RECOVER_ROLLBACK: // mv removed.tmp -> current 618 LOG.info("Recovering storage directory " + rootPath 619 + " from previous rollback"); 620 rename(getRemovedTmp(), curDir); 621 return; 622 case COMPLETE_FINALIZE: // rm finalized.tmp 623 LOG.info("Completing previous finalize for storage directory " 624 + rootPath); 625 deleteDir(getFinalizedTmp()); 626 return; 627 case COMPLETE_CHECKPOINT: // mv lastcheckpoint.tmp -> previous.checkpoint 628 LOG.info("Completing previous checkpoint for storage directory " 629 + rootPath); 630 File prevCkptDir = getPreviousCheckpoint(); 631 if (prevCkptDir.exists()) 632 deleteDir(prevCkptDir); 633 rename(getLastCheckpointTmp(), prevCkptDir); 634 return; 635 case RECOVER_CHECKPOINT: // mv lastcheckpoint.tmp -> current 636 LOG.info("Recovering storage directory " + rootPath 637 + " from failed checkpoint"); 638 if (curDir.exists()) 639 deleteDir(curDir); 640 rename(getLastCheckpointTmp(), curDir); 641 return; 642 default: 643 throw new IOException("Unexpected FS state: " + curState 644 + " for storage directory: " + rootPath); 645 } 646 } 647 648 /** 649 * @return true if the storage directory should prompt the user prior 650 * to formatting (i.e if the directory appears to contain some data) 651 * @throws IOException if the SD cannot be accessed due to an IO error 652 */ 653 @Override 654 public boolean hasSomeData() throws IOException { 655 // Its alright for a dir not to exist, or to exist (properly accessible) 656 // and be completely empty. 657 if (!root.exists()) return false; 658 659 if (!root.isDirectory()) { 660 // a file where you expect a directory should not cause silent 661 // formatting 662 return true; 663 } 664 665 if (FileUtil.listFiles(root).length == 0) { 666 // Empty dir can format without prompt. 667 return false; 668 } 669 670 return true; 671 } 672 673 public boolean isShared() { 674 return isShared; 675 } 676 677 678 /** 679 * Lock storage to provide exclusive access. 680 * 681 * <p> Locking is not supported by all file systems. 682 * E.g., NFS does not consistently support exclusive locks. 683 * 684 * <p> If locking is supported we guarantee exclusive access to the 685 * storage directory. Otherwise, no guarantee is given. 686 * 687 * @throws IOException if locking fails 688 */ 689 public void lock() throws IOException { 690 if (isShared()) { 691 LOG.info("Locking is disabled for " + this.root); 692 return; 693 } 694 FileLock newLock = tryLock(); 695 if (newLock == null) { 696 String msg = "Cannot lock storage " + this.root 697 + ". The directory is already locked"; 698 LOG.info(msg); 699 throw new IOException(msg); 700 } 701 // Don't overwrite lock until success - this way if we accidentally 702 // call lock twice, the internal state won't be cleared by the second 703 // (failed) lock attempt 704 lock = newLock; 705 } 706 707 /** 708 * Attempts to acquire an exclusive lock on the storage. 709 * 710 * @return A lock object representing the newly-acquired lock or 711 * <code>null</code> if storage is already locked. 712 * @throws IOException if locking fails. 713 */ 714 @SuppressWarnings("resource") 715 FileLock tryLock() throws IOException { 716 boolean deletionHookAdded = false; 717 File lockF = new File(root, STORAGE_FILE_LOCK); 718 if (!lockF.exists()) { 719 lockF.deleteOnExit(); 720 deletionHookAdded = true; 721 } 722 RandomAccessFile file = new RandomAccessFile(lockF, "rws"); 723 String jvmName = ManagementFactory.getRuntimeMXBean().getName(); 724 FileLock res = null; 725 try { 726 res = file.getChannel().tryLock(); 727 if (null == res) { 728 LOG.error("Unable to acquire file lock on path " + lockF.toString()); 729 throw new OverlappingFileLockException(); 730 } 731 file.write(jvmName.getBytes(Charsets.UTF_8)); 732 LOG.info("Lock on " + lockF + " acquired by nodename " + jvmName); 733 } catch(OverlappingFileLockException oe) { 734 // Cannot read from the locked file on Windows. 735 String lockingJvmName = Path.WINDOWS ? "" : (" " + file.readLine()); 736 LOG.error("It appears that another node " + lockingJvmName 737 + " has already locked the storage directory: " + root, oe); 738 file.close(); 739 return null; 740 } catch(IOException e) { 741 LOG.error("Failed to acquire lock on " + lockF 742 + ". If this storage directory is mounted via NFS, " 743 + "ensure that the appropriate nfs lock services are running.", e); 744 file.close(); 745 throw e; 746 } 747 if (!deletionHookAdded) { 748 // If the file existed prior to our startup, we didn't 749 // call deleteOnExit above. But since we successfully locked 750 // the dir, we can take care of cleaning it up. 751 lockF.deleteOnExit(); 752 } 753 return res; 754 } 755 756 /** 757 * Unlock storage. 758 * 759 * @throws IOException 760 */ 761 public void unlock() throws IOException { 762 if (this.lock == null) 763 return; 764 this.lock.release(); 765 lock.channel().close(); 766 lock = null; 767 } 768 769 @Override 770 public String toString() { 771 return "Storage Directory " + this.root; 772 } 773 774 /** 775 * Check whether underlying file system supports file locking. 776 * 777 * @return <code>true</code> if exclusive locks are supported or 778 * <code>false</code> otherwise. 779 * @throws IOException 780 * @see StorageDirectory#lock() 781 */ 782 public boolean isLockSupported() throws IOException { 783 FileLock firstLock = null; 784 FileLock secondLock = null; 785 try { 786 firstLock = lock; 787 if(firstLock == null) { 788 firstLock = tryLock(); 789 if(firstLock == null) 790 return true; 791 } 792 secondLock = tryLock(); 793 if(secondLock == null) 794 return true; 795 } finally { 796 if(firstLock != null && firstLock != lock) { 797 firstLock.release(); 798 firstLock.channel().close(); 799 } 800 if(secondLock != null) { 801 secondLock.release(); 802 secondLock.channel().close(); 803 } 804 } 805 return false; 806 } 807 } 808 809 /** 810 * Create empty storage info of the specified type 811 */ 812 protected Storage(NodeType type) { 813 super(type); 814 } 815 816 protected Storage(StorageInfo storageInfo) { 817 super(storageInfo); 818 } 819 820 public int getNumStorageDirs() { 821 return storageDirs.size(); 822 } 823 824 public StorageDirectory getStorageDir(int idx) { 825 return storageDirs.get(idx); 826 } 827 828 /** 829 * @return the storage directory, with the precondition that this storage 830 * has exactly one storage directory 831 */ 832 public StorageDirectory getSingularStorageDir() { 833 Preconditions.checkState(storageDirs.size() == 1); 834 return storageDirs.get(0); 835 } 836 837 protected void addStorageDir(StorageDirectory sd) { 838 storageDirs.add(sd); 839 } 840 841 /** 842 * Returns true if the storage directory on the given directory is already 843 * loaded. 844 * @param root the root directory of a {@link StorageDirectory} 845 * @throws IOException if failed to get canonical path. 846 */ 847 protected boolean containsStorageDir(File root) throws IOException { 848 for (StorageDirectory sd : storageDirs) { 849 if (sd.getRoot().getCanonicalPath().equals(root.getCanonicalPath())) { 850 return true; 851 } 852 } 853 return false; 854 } 855 856 /** 857 * Return true if the layout of the given storage directory is from a version 858 * of Hadoop prior to the introduction of the "current" and "previous" 859 * directories which allow upgrade and rollback. 860 */ 861 public abstract boolean isPreUpgradableLayout(StorageDirectory sd) 862 throws IOException; 863 864 /** 865 * Check if the given storage directory comes from a version of Hadoop 866 * prior to when the directory layout changed (ie 0.13). If this is 867 * the case, this method throws an IOException. 868 */ 869 private void checkOldLayoutStorage(StorageDirectory sd) throws IOException { 870 if (isPreUpgradableLayout(sd)) { 871 checkVersionUpgradable(0); 872 } 873 } 874 875 /** 876 * Checks if the upgrade from {@code oldVersion} is supported. 877 * @param oldVersion the version of the metadata to check with the current 878 * version 879 * @throws IOException if upgrade is not supported 880 */ 881 public static void checkVersionUpgradable(int oldVersion) 882 throws IOException { 883 if (oldVersion > LAST_UPGRADABLE_LAYOUT_VERSION) { 884 String msg = "*********** Upgrade is not supported from this " + 885 " older version " + oldVersion + 886 " of storage to the current version." + 887 " Please upgrade to " + LAST_UPGRADABLE_HADOOP_VERSION + 888 " or a later version and then upgrade to current" + 889 " version. Old layout version is " + 890 (oldVersion == 0 ? "'too old'" : (""+oldVersion)) + 891 " and latest layout version this software version can" + 892 " upgrade from is " + LAST_UPGRADABLE_LAYOUT_VERSION + 893 ". ************"; 894 LOG.error(msg); 895 throw new IOException(msg); 896 } 897 898 } 899 900 /** 901 * Iterate over each of the {@link FormatConfirmable} objects, 902 * potentially checking with the user whether it should be formatted. 903 * 904 * If running in interactive mode, will prompt the user for each 905 * directory to allow them to format anyway. Otherwise, returns 906 * false, unless 'force' is specified. 907 * 908 * @param force format regardless of whether dirs exist 909 * @param interactive prompt the user when a dir exists 910 * @return true if formatting should proceed 911 * @throws IOException if some storage cannot be accessed 912 */ 913 public static boolean confirmFormat( 914 Iterable<? extends FormatConfirmable> items, 915 boolean force, boolean interactive) throws IOException { 916 for (FormatConfirmable item : items) { 917 if (!item.hasSomeData()) 918 continue; 919 if (force) { // Don't confirm, always format. 920 System.err.println( 921 "Data exists in " + item + ". Formatting anyway."); 922 continue; 923 } 924 if (!interactive) { // Don't ask - always don't format 925 System.err.println( 926 "Running in non-interactive mode, and data appears to exist in " + 927 item + ". Not formatting."); 928 return false; 929 } 930 if (!ToolRunner.confirmPrompt("Re-format filesystem in " + item + " ?")) { 931 System.err.println("Format aborted in " + item); 932 return false; 933 } 934 } 935 936 return true; 937 } 938 939 /** 940 * Interface for classes which need to have the user confirm their 941 * formatting during NameNode -format and other similar operations. 942 * 943 * This is currently a storage directory or journal manager. 944 */ 945 @InterfaceAudience.Private 946 public interface FormatConfirmable { 947 /** 948 * @return true if the storage seems to have some valid data in it, 949 * and the user should be required to confirm the format. Otherwise, 950 * false. 951 * @throws IOException if the storage cannot be accessed at all. 952 */ 953 public boolean hasSomeData() throws IOException; 954 955 /** 956 * @return a string representation of the formattable item, suitable 957 * for display to the user inside a prompt 958 */ 959 public String toString(); 960 } 961 962 /** 963 * Set common storage fields into the given properties object. 964 * Should be overloaded if additional fields need to be set. 965 * 966 * @param props the Properties object to write into 967 */ 968 protected void setPropertiesFromFields(Properties props, 969 StorageDirectory sd) 970 throws IOException { 971 props.setProperty("layoutVersion", String.valueOf(layoutVersion)); 972 props.setProperty("storageType", storageType.toString()); 973 props.setProperty("namespaceID", String.valueOf(namespaceID)); 974 // Set clusterID in version with federation support 975 if (versionSupportsFederation(getServiceLayoutFeatureMap())) { 976 props.setProperty("clusterID", clusterID); 977 } 978 props.setProperty("cTime", String.valueOf(cTime)); 979 } 980 981 /** 982 * Write properties to the VERSION file in the given storage directory. 983 */ 984 public void writeProperties(StorageDirectory sd) throws IOException { 985 writeProperties(sd.getVersionFile(), sd); 986 } 987 988 public void writeProperties(File to, StorageDirectory sd) throws IOException { 989 Properties props = new Properties(); 990 setPropertiesFromFields(props, sd); 991 writeProperties(to, props); 992 } 993 994 public static void writeProperties(File to, Properties props) 995 throws IOException { 996 try (RandomAccessFile file = new RandomAccessFile(to, "rws"); 997 FileOutputStream out = new FileOutputStream(file.getFD())) { 998 file.seek(0); 999 /* 1000 * If server is interrupted before this line, 1001 * the version file will remain unchanged. 1002 */ 1003 props.store(out, null); 1004 /* 1005 * Now the new fields are flushed to the head of the file, but file 1006 * length can still be larger then required and therefore the file can 1007 * contain whole or corrupted fields from its old contents in the end. 1008 * If server is interrupted here and restarted later these extra fields 1009 * either should not effect server behavior or should be handled 1010 * by the server correctly. 1011 */ 1012 file.setLength(out.getChannel().position()); 1013 } 1014 } 1015 1016 public static void rename(File from, File to) throws IOException { 1017 try { 1018 NativeIO.renameTo(from, to); 1019 } catch (NativeIOException e) { 1020 throw new IOException("Failed to rename " + from.getCanonicalPath() 1021 + " to " + to.getCanonicalPath() + " due to failure in native rename. " 1022 + e.toString()); 1023 } 1024 } 1025 1026 /** 1027 * Copies a file (usually large) to a new location using native unbuffered IO. 1028 * <p> 1029 * This method copies the contents of the specified source file 1030 * to the specified destination file using OS specific unbuffered IO. 1031 * The goal is to avoid churning the file system buffer cache when copying 1032 * large files. 1033 * 1034 * We can't use FileUtils#copyFile from apache-commons-io because it 1035 * is a buffered IO based on FileChannel#transferFrom, which uses MmapByteBuffer 1036 * internally. 1037 * 1038 * The directory holding the destination file is created if it does not exist. 1039 * If the destination file exists, then this method will delete it first. 1040 * <p> 1041 * <strong>Note:</strong> Setting <code>preserveFileDate</code> to 1042 * {@code true} tries to preserve the file's last modified 1043 * date/times using {@link File#setLastModified(long)}, however it is 1044 * not guaranteed that the operation will succeed. 1045 * If the modification operation fails, no indication is provided. 1046 * 1047 * @param srcFile an existing file to copy, must not be {@code null} 1048 * @param destFile the new file, must not be {@code null} 1049 * @param preserveFileDate true if the file date of the copy 1050 * should be the same as the original 1051 * 1052 * @throws NullPointerException if source or destination is {@code null} 1053 * @throws IOException if source or destination is invalid 1054 * @throws IOException if an IO error occurs during copying 1055 */ 1056 public static void nativeCopyFileUnbuffered(File srcFile, File destFile, 1057 boolean preserveFileDate) throws IOException { 1058 if (srcFile == null) { 1059 throw new NullPointerException("Source must not be null"); 1060 } 1061 if (destFile == null) { 1062 throw new NullPointerException("Destination must not be null"); 1063 } 1064 if (srcFile.exists() == false) { 1065 throw new FileNotFoundException("Source '" + srcFile + "' does not exist"); 1066 } 1067 if (srcFile.isDirectory()) { 1068 throw new IOException("Source '" + srcFile + "' exists but is a directory"); 1069 } 1070 if (srcFile.getCanonicalPath().equals(destFile.getCanonicalPath())) { 1071 throw new IOException("Source '" + srcFile + "' and destination '" + 1072 destFile + "' are the same"); 1073 } 1074 File parentFile = destFile.getParentFile(); 1075 if (parentFile != null) { 1076 if (!parentFile.mkdirs() && !parentFile.isDirectory()) { 1077 throw new IOException("Destination '" + parentFile 1078 + "' directory cannot be created"); 1079 } 1080 } 1081 if (destFile.exists()) { 1082 if (FileUtil.canWrite(destFile) == false) { 1083 throw new IOException("Destination '" + destFile 1084 + "' exists but is read-only"); 1085 } else { 1086 if (destFile.delete() == false) { 1087 throw new IOException("Destination '" + destFile 1088 + "' exists but cannot be deleted"); 1089 } 1090 } 1091 } 1092 try { 1093 NativeIO.copyFileUnbuffered(srcFile, destFile); 1094 } catch (NativeIOException e) { 1095 throw new IOException("Failed to copy " + srcFile.getCanonicalPath() 1096 + " to " + destFile.getCanonicalPath() 1097 + " due to failure in NativeIO#copyFileUnbuffered(). " 1098 + e.toString()); 1099 } 1100 if (srcFile.length() != destFile.length()) { 1101 throw new IOException("Failed to copy full contents from '" + srcFile 1102 + "' to '" + destFile + "'"); 1103 } 1104 if (preserveFileDate) { 1105 if (destFile.setLastModified(srcFile.lastModified()) == false) { 1106 if (LOG.isDebugEnabled()) { 1107 LOG.debug("Failed to preserve last modified date from'" + srcFile 1108 + "' to '" + destFile + "'"); 1109 } 1110 } 1111 } 1112 } 1113 1114 /** 1115 * Recursively delete all the content of the directory first and then 1116 * the directory itself from the local filesystem. 1117 * @param dir The directory to delete 1118 * @throws IOException 1119 */ 1120 public static void deleteDir(File dir) throws IOException { 1121 if (!FileUtil.fullyDelete(dir)) 1122 throw new IOException("Failed to delete " + dir.getCanonicalPath()); 1123 } 1124 1125 /** 1126 * Write all data storage files. 1127 * @throws IOException 1128 */ 1129 public void writeAll() throws IOException { 1130 this.layoutVersion = getServiceLayoutVersion(); 1131 for (Iterator<StorageDirectory> it = storageDirs.iterator(); it.hasNext();) { 1132 writeProperties(it.next()); 1133 } 1134 } 1135 1136 /** 1137 * Unlock all storage directories. 1138 * @throws IOException 1139 */ 1140 public void unlockAll() throws IOException { 1141 for (Iterator<StorageDirectory> it = storageDirs.iterator(); it.hasNext();) { 1142 it.next().unlock(); 1143 } 1144 } 1145 1146 public static String getBuildVersion() { 1147 return VersionInfo.getRevision(); 1148 } 1149 1150 public static String getRegistrationID(StorageInfo storage) { 1151 return "NS-" + Integer.toString(storage.getNamespaceID()) 1152 + "-" + storage.getClusterID() 1153 + "-" + Long.toString(storage.getCTime()); 1154 } 1155 1156 public static boolean is203LayoutVersion(int layoutVersion) { 1157 for (int lv203 : LAYOUT_VERSIONS_203) { 1158 if (lv203 == layoutVersion) { 1159 return true; 1160 } 1161 } 1162 return false; 1163 } 1164}