001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.common;
019
020import java.io.File;
021import java.io.FileOutputStream;
022import java.io.FileNotFoundException;
023import java.io.IOException;
024import java.io.RandomAccessFile;
025import java.lang.management.ManagementFactory;
026import java.nio.channels.FileLock;
027import java.nio.channels.OverlappingFileLockException;
028import java.util.ArrayList;
029import java.util.Iterator;
030import java.util.List;
031import java.util.Properties;
032
033import org.apache.commons.io.FileUtils;
034import org.apache.commons.logging.Log;
035import org.apache.commons.logging.LogFactory;
036import org.apache.hadoop.classification.InterfaceAudience;
037import org.apache.hadoop.fs.FileUtil;
038import org.apache.hadoop.fs.Path;
039import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType;
040import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
041import org.apache.hadoop.io.nativeio.NativeIO;
042import org.apache.hadoop.io.nativeio.NativeIOException;
043import org.apache.hadoop.util.ToolRunner;
044import org.apache.hadoop.util.VersionInfo;
045
046import com.google.common.base.Charsets;
047import com.google.common.base.Preconditions;
048
049
050
051/**
052 * Storage information file.
053 * <p>
054 * Local storage information is stored in a separate file VERSION.
055 * It contains type of the node, 
056 * the storage layout version, the namespace id, and 
057 * the fs state creation time.
058 * <p>
059 * Local storage can reside in multiple directories. 
060 * Each directory should contain the same VERSION file as the others.
061 * During startup Hadoop servers (name-node and data-nodes) read their local 
062 * storage information from them.
063 * <p>
064 * The servers hold a lock for each storage directory while they run so that 
065 * other nodes were not able to startup sharing the same storage.
066 * The locks are released when the servers stop (normally or abnormally).
067 * 
068 */
069@InterfaceAudience.Private
070public abstract class Storage extends StorageInfo {
071  public static final Log LOG = LogFactory.getLog(Storage.class.getName());
072
073  // last layout version that did not support upgrades
074  public static final int LAST_PRE_UPGRADE_LAYOUT_VERSION = -3;
075  
076  // this corresponds to Hadoop-0.18
077  public static final int LAST_UPGRADABLE_LAYOUT_VERSION = -16;
078  protected static final String LAST_UPGRADABLE_HADOOP_VERSION = "Hadoop-0.18";
079  
080  /** Layout versions of 0.20.203 release */
081  public static final int[] LAYOUT_VERSIONS_203 = {-19, -31};
082
083  public    static final String STORAGE_FILE_LOCK     = "in_use.lock";
084  public    static final String STORAGE_DIR_CURRENT   = "current";
085  public    static final String STORAGE_DIR_PREVIOUS  = "previous";
086  public    static final String STORAGE_TMP_REMOVED   = "removed.tmp";
087  public    static final String STORAGE_TMP_PREVIOUS  = "previous.tmp";
088  public    static final String STORAGE_TMP_FINALIZED = "finalized.tmp";
089  public    static final String STORAGE_TMP_LAST_CKPT = "lastcheckpoint.tmp";
090  public    static final String STORAGE_PREVIOUS_CKPT = "previous.checkpoint";
091  
092  /**
093   * The blocksBeingWritten directory which was used in some 1.x and earlier
094   * releases.
095   */
096  public static final String STORAGE_1_BBW = "blocksBeingWritten";
097  
098  public enum StorageState {
099    NON_EXISTENT,
100    NOT_FORMATTED,
101    COMPLETE_UPGRADE,
102    RECOVER_UPGRADE,
103    COMPLETE_FINALIZE,
104    COMPLETE_ROLLBACK,
105    RECOVER_ROLLBACK,
106    COMPLETE_CHECKPOINT,
107    RECOVER_CHECKPOINT,
108    NORMAL;
109  }
110  
111  /**
112   * An interface to denote storage directory type
113   * Implementations can define a type for storage directory by implementing
114   * this interface.
115   */
116  @InterfaceAudience.Private
117  public interface StorageDirType {
118    public StorageDirType getStorageDirType();
119    public boolean isOfType(StorageDirType type);
120  }
121  
122  protected List<StorageDirectory> storageDirs = new ArrayList<StorageDirectory>();
123  
124  private class DirIterator implements Iterator<StorageDirectory> {
125    final StorageDirType dirType;
126    final boolean includeShared;
127    int prevIndex; // for remove()
128    int nextIndex; // for next()
129    
130    DirIterator(StorageDirType dirType, boolean includeShared) {
131      this.dirType = dirType;
132      this.nextIndex = 0;
133      this.prevIndex = 0;
134      this.includeShared = includeShared;
135    }
136    
137    @Override
138    public boolean hasNext() {
139      if (storageDirs.isEmpty() || nextIndex >= storageDirs.size())
140        return false;
141      if (dirType != null || !includeShared) {
142        while (nextIndex < storageDirs.size()) {
143          if (shouldReturnNextDir())
144            break;
145          nextIndex++;
146        }
147        if (nextIndex >= storageDirs.size())
148         return false;
149      }
150      return true;
151    }
152    
153    @Override
154    public StorageDirectory next() {
155      StorageDirectory sd = getStorageDir(nextIndex);
156      prevIndex = nextIndex;
157      nextIndex++;
158      if (dirType != null || !includeShared) {
159        while (nextIndex < storageDirs.size()) {
160          if (shouldReturnNextDir())
161            break;
162          nextIndex++;
163        }
164      }
165      return sd;
166    }
167    
168    @Override
169    public void remove() {
170      nextIndex = prevIndex; // restore previous state
171      storageDirs.remove(prevIndex); // remove last returned element
172      hasNext(); // reset nextIndex to correct place
173    }
174    
175    private boolean shouldReturnNextDir() {
176      StorageDirectory sd = getStorageDir(nextIndex);
177      return (dirType == null || sd.getStorageDirType().isOfType(dirType)) &&
178          (includeShared || !sd.isShared());
179    }
180  }
181  
182  /**
183   * @return A list of the given File in every available storage directory,
184   * regardless of whether it might exist.
185   */
186  public List<File> getFiles(StorageDirType dirType, String fileName) {
187    ArrayList<File> list = new ArrayList<File>();
188    Iterator<StorageDirectory> it =
189      (dirType == null) ? dirIterator() : dirIterator(dirType);
190    for ( ;it.hasNext(); ) {
191      list.add(new File(it.next().getCurrentDir(), fileName));
192    }
193    return list;
194  }
195
196
197  /**
198   * Return default iterator
199   * This iterator returns all entries in storageDirs
200   */
201  public Iterator<StorageDirectory> dirIterator() {
202    return dirIterator(null);
203  }
204  
205  /**
206   * Return iterator based on Storage Directory Type
207   * This iterator selects entries in storageDirs of type dirType and returns
208   * them via the Iterator
209   */
210  public Iterator<StorageDirectory> dirIterator(StorageDirType dirType) {
211    return dirIterator(dirType, true);
212  }
213  
214  /**
215   * Return all entries in storageDirs, potentially excluding shared dirs.
216   * @param includeShared whether or not to include shared dirs.
217   * @return an iterator over the configured storage dirs.
218   */
219  public Iterator<StorageDirectory> dirIterator(boolean includeShared) {
220    return dirIterator(null, includeShared);
221  }
222  
223  /**
224   * @param dirType all entries will be of this type of dir
225   * @param includeShared true to include any shared directories,
226   *        false otherwise
227   * @return an iterator over the configured storage dirs.
228   */
229  public Iterator<StorageDirectory> dirIterator(StorageDirType dirType,
230      boolean includeShared) {
231    return new DirIterator(dirType, includeShared);
232  }
233  
234  public Iterable<StorageDirectory> dirIterable(final StorageDirType dirType) {
235    return new Iterable<StorageDirectory>() {
236      @Override
237      public Iterator<StorageDirectory> iterator() {
238        return dirIterator(dirType);
239      }
240    };
241  }
242  
243  
244  /**
245   * generate storage list (debug line)
246   */
247  public String listStorageDirectories() {
248    StringBuilder buf = new StringBuilder();
249    for (StorageDirectory sd : storageDirs) {
250      buf.append(sd.getRoot() + "(" + sd.getStorageDirType() + ");");
251    }
252    return buf.toString();
253  }
254  
255  /**
256   * One of the storage directories.
257   */
258  @InterfaceAudience.Private
259  public static class StorageDirectory implements FormatConfirmable {
260    final File root;              // root directory
261    // whether or not this dir is shared between two separate NNs for HA, or
262    // between multiple block pools in the case of federation.
263    final boolean isShared;
264    final StorageDirType dirType; // storage dir type
265    FileLock lock;                // storage lock
266
267    private String storageUuid = null;      // Storage directory identifier.
268    
269    public StorageDirectory(File dir) {
270      // default dirType is null
271      this(dir, null, false);
272    }
273    
274    public StorageDirectory(File dir, StorageDirType dirType) {
275      this(dir, dirType, false);
276    }
277    
278    public void setStorageUuid(String storageUuid) {
279      this.storageUuid = storageUuid;
280    }
281
282    public String getStorageUuid() {
283      return storageUuid;
284    }
285
286    /**
287     * Constructor
288     * @param dir directory corresponding to the storage
289     * @param dirType storage directory type
290     * @param isShared whether or not this dir is shared between two NNs. true
291     *          disables locking on the storage directory, false enables locking
292     */
293    public StorageDirectory(File dir, StorageDirType dirType, boolean isShared) {
294      this.root = dir;
295      this.lock = null;
296      this.dirType = dirType;
297      this.isShared = isShared;
298    }
299    
300    /**
301     * Get root directory of this storage
302     */
303    public File getRoot() {
304      return root;
305    }
306
307    /**
308     * Get storage directory type
309     */
310    public StorageDirType getStorageDirType() {
311      return dirType;
312    }    
313
314    /**
315     * Get storage directory size.
316     */
317    public long getDirecorySize() {
318      try {
319        if (!isShared() && root != null && root.exists()) {
320          return FileUtils.sizeOfDirectory(root);
321        }
322      } catch (Exception e) {
323        LOG.warn("Failed to get directory size :" + root, e);
324      }
325      return 0;
326    }
327
328    public void read(File from, Storage storage) throws IOException {
329      Properties props = readPropertiesFile(from);
330      storage.setFieldsFromProperties(props, this);
331    }
332
333    /**
334     * Clear and re-create storage directory.
335     * <p>
336     * Removes contents of the current directory and creates an empty directory.
337     * 
338     * This does not fully format storage directory. 
339     * It cannot write the version file since it should be written last after  
340     * all other storage type dependent files are written.
341     * Derived storage is responsible for setting specific storage values and
342     * writing the version file to disk.
343     * 
344     * @throws IOException
345     */
346    public void clearDirectory() throws IOException {
347      File curDir = this.getCurrentDir();
348      if (curDir.exists())
349        if (!(FileUtil.fullyDelete(curDir)))
350          throw new IOException("Cannot remove current directory: " + curDir);
351      if (!curDir.mkdirs())
352        throw new IOException("Cannot create directory " + curDir);
353    }
354
355    /**
356     * Directory {@code current} contains latest files defining
357     * the file system meta-data.
358     * 
359     * @return the directory path
360     */
361    public File getCurrentDir() {
362      return new File(root, STORAGE_DIR_CURRENT);
363    }
364
365    /**
366     * File {@code VERSION} contains the following fields:
367     * <ol>
368     * <li>node type</li>
369     * <li>layout version</li>
370     * <li>namespaceID</li>
371     * <li>fs state creation time</li>
372     * <li>other fields specific for this node type</li>
373     * </ol>
374     * The version file is always written last during storage directory updates.
375     * The existence of the version file indicates that all other files have
376     * been successfully written in the storage directory, the storage is valid
377     * and does not need to be recovered.
378     * 
379     * @return the version file path
380     */
381    public File getVersionFile() {
382      return new File(new File(root, STORAGE_DIR_CURRENT), STORAGE_FILE_VERSION);
383    }
384
385    /**
386     * File {@code VERSION} from the {@code previous} directory.
387     * 
388     * @return the previous version file path
389     */
390    public File getPreviousVersionFile() {
391      return new File(new File(root, STORAGE_DIR_PREVIOUS), STORAGE_FILE_VERSION);
392    }
393
394    /**
395     * Directory {@code previous} contains the previous file system state,
396     * which the system can be rolled back to.
397     * 
398     * @return the directory path
399     */
400    public File getPreviousDir() {
401      return new File(root, STORAGE_DIR_PREVIOUS);
402    }
403
404    /**
405     * {@code previous.tmp} is a transient directory, which holds
406     * current file system state while the new state is saved into the new
407     * {@code current} during upgrade.
408     * If the saving succeeds {@code previous.tmp} will be moved to
409     * {@code previous}, otherwise it will be renamed back to 
410     * {@code current} by the recovery procedure during startup.
411     * 
412     * @return the directory path
413     */
414    public File getPreviousTmp() {
415      return new File(root, STORAGE_TMP_PREVIOUS);
416    }
417
418    /**
419     * {@code removed.tmp} is a transient directory, which holds
420     * current file system state while the previous state is moved into
421     * {@code current} during rollback.
422     * If the moving succeeds {@code removed.tmp} will be removed,
423     * otherwise it will be renamed back to 
424     * {@code current} by the recovery procedure during startup.
425     * 
426     * @return the directory path
427     */
428    public File getRemovedTmp() {
429      return new File(root, STORAGE_TMP_REMOVED);
430    }
431
432    /**
433     * {@code finalized.tmp} is a transient directory, which holds
434     * the {@code previous} file system state while it is being removed
435     * in response to the finalize request.
436     * Finalize operation will remove {@code finalized.tmp} when completed,
437     * otherwise the removal will resume upon the system startup.
438     * 
439     * @return the directory path
440     */
441    public File getFinalizedTmp() {
442      return new File(root, STORAGE_TMP_FINALIZED);
443    }
444
445    /**
446     * {@code lastcheckpoint.tmp} is a transient directory, which holds
447     * current file system state while the new state is saved into the new
448     * {@code current} during regular namespace updates.
449     * If the saving succeeds {@code lastcheckpoint.tmp} will be moved to
450     * {@code previous.checkpoint}, otherwise it will be renamed back to 
451     * {@code current} by the recovery procedure during startup.
452     * 
453     * @return the directory path
454     */
455    public File getLastCheckpointTmp() {
456      return new File(root, STORAGE_TMP_LAST_CKPT);
457    }
458
459    /**
460     * {@code previous.checkpoint} is a directory, which holds the previous
461     * (before the last save) state of the storage directory.
462     * The directory is created as a reference only, it does not play role
463     * in state recovery procedures, and is recycled automatically, 
464     * but it may be useful for manual recovery of a stale state of the system.
465     * 
466     * @return the directory path
467     */
468    public File getPreviousCheckpoint() {
469      return new File(root, STORAGE_PREVIOUS_CKPT);
470    }
471
472    /**
473     * Check consistency of the storage directory
474     * 
475     * @param startOpt a startup option.
476     *  
477     * @return state {@link StorageState} of the storage directory 
478     * @throws InconsistentFSStateException if directory state is not 
479     * consistent and cannot be recovered.
480     * @throws IOException
481     */
482    public StorageState analyzeStorage(StartupOption startOpt, Storage storage)
483        throws IOException {
484      assert root != null : "root is null";
485      boolean hadMkdirs = false;
486      String rootPath = root.getCanonicalPath();
487      try { // check that storage exists
488        if (!root.exists()) {
489          // storage directory does not exist
490          if (startOpt != StartupOption.FORMAT &&
491              startOpt != StartupOption.HOTSWAP) {
492            LOG.warn("Storage directory " + rootPath + " does not exist");
493            return StorageState.NON_EXISTENT;
494          }
495          LOG.info(rootPath + " does not exist. Creating ...");
496          if (!root.mkdirs())
497            throw new IOException("Cannot create directory " + rootPath);
498          hadMkdirs = true;
499        }
500        // or is inaccessible
501        if (!root.isDirectory()) {
502          LOG.warn(rootPath + "is not a directory");
503          return StorageState.NON_EXISTENT;
504        }
505        if (!FileUtil.canWrite(root)) {
506          LOG.warn("Cannot access storage directory " + rootPath);
507          return StorageState.NON_EXISTENT;
508        }
509      } catch(SecurityException ex) {
510        LOG.warn("Cannot access storage directory " + rootPath, ex);
511        return StorageState.NON_EXISTENT;
512      }
513
514      this.lock(); // lock storage if it exists
515
516      // If startOpt is HOTSWAP, it returns NOT_FORMATTED for empty directory,
517      // while it also checks the layout version.
518      if (startOpt == HdfsServerConstants.StartupOption.FORMAT ||
519          (startOpt == StartupOption.HOTSWAP && hadMkdirs))
520        return StorageState.NOT_FORMATTED;
521
522      if (startOpt != HdfsServerConstants.StartupOption.IMPORT) {
523        storage.checkOldLayoutStorage(this);
524      }
525
526      // check whether current directory is valid
527      File versionFile = getVersionFile();
528      boolean hasCurrent = versionFile.exists();
529
530      // check which directories exist
531      boolean hasPrevious = getPreviousDir().exists();
532      boolean hasPreviousTmp = getPreviousTmp().exists();
533      boolean hasRemovedTmp = getRemovedTmp().exists();
534      boolean hasFinalizedTmp = getFinalizedTmp().exists();
535      boolean hasCheckpointTmp = getLastCheckpointTmp().exists();
536
537      if (!(hasPreviousTmp || hasRemovedTmp
538          || hasFinalizedTmp || hasCheckpointTmp)) {
539        // no temp dirs - no recovery
540        if (hasCurrent)
541          return StorageState.NORMAL;
542        if (hasPrevious)
543          throw new InconsistentFSStateException(root,
544                              "version file in current directory is missing.");
545        return StorageState.NOT_FORMATTED;
546      }
547
548      if ((hasPreviousTmp?1:0) + (hasRemovedTmp?1:0)
549          + (hasFinalizedTmp?1:0) + (hasCheckpointTmp?1:0) > 1)
550        // more than one temp dirs
551        throw new InconsistentFSStateException(root,
552                                               "too many temporary directories.");
553
554      // # of temp dirs == 1 should either recover or complete a transition
555      if (hasCheckpointTmp) {
556        return hasCurrent ? StorageState.COMPLETE_CHECKPOINT
557                          : StorageState.RECOVER_CHECKPOINT;
558      }
559
560      if (hasFinalizedTmp) {
561        if (hasPrevious)
562          throw new InconsistentFSStateException(root,
563                                                 STORAGE_DIR_PREVIOUS + " and " + STORAGE_TMP_FINALIZED
564                                                 + "cannot exist together.");
565        return StorageState.COMPLETE_FINALIZE;
566      }
567
568      if (hasPreviousTmp) {
569        if (hasPrevious)
570          throw new InconsistentFSStateException(root,
571                                                 STORAGE_DIR_PREVIOUS + " and " + STORAGE_TMP_PREVIOUS
572                                                 + " cannot exist together.");
573        if (hasCurrent)
574          return StorageState.COMPLETE_UPGRADE;
575        return StorageState.RECOVER_UPGRADE;
576      }
577      
578      assert hasRemovedTmp : "hasRemovedTmp must be true";
579      if (!(hasCurrent ^ hasPrevious))
580        throw new InconsistentFSStateException(root,
581                                               "one and only one directory " + STORAGE_DIR_CURRENT 
582                                               + " or " + STORAGE_DIR_PREVIOUS 
583                                               + " must be present when " + STORAGE_TMP_REMOVED
584                                               + " exists.");
585      if (hasCurrent)
586        return StorageState.COMPLETE_ROLLBACK;
587      return StorageState.RECOVER_ROLLBACK;
588    }
589
590    /**
591     * Complete or recover storage state from previously failed transition.
592     * 
593     * @param curState specifies what/how the state should be recovered
594     * @throws IOException
595     */
596    public void doRecover(StorageState curState) throws IOException {
597      File curDir = getCurrentDir();
598      String rootPath = root.getCanonicalPath();
599      switch(curState) {
600      case COMPLETE_UPGRADE:  // mv previous.tmp -> previous
601        LOG.info("Completing previous upgrade for storage directory " 
602                 + rootPath);
603        rename(getPreviousTmp(), getPreviousDir());
604        return;
605      case RECOVER_UPGRADE:   // mv previous.tmp -> current
606        LOG.info("Recovering storage directory " + rootPath
607                 + " from previous upgrade");
608        if (curDir.exists())
609          deleteDir(curDir);
610        rename(getPreviousTmp(), curDir);
611        return;
612      case COMPLETE_ROLLBACK: // rm removed.tmp
613        LOG.info("Completing previous rollback for storage directory "
614                 + rootPath);
615        deleteDir(getRemovedTmp());
616        return;
617      case RECOVER_ROLLBACK:  // mv removed.tmp -> current
618        LOG.info("Recovering storage directory " + rootPath
619                 + " from previous rollback");
620        rename(getRemovedTmp(), curDir);
621        return;
622      case COMPLETE_FINALIZE: // rm finalized.tmp
623        LOG.info("Completing previous finalize for storage directory "
624                 + rootPath);
625        deleteDir(getFinalizedTmp());
626        return;
627      case COMPLETE_CHECKPOINT: // mv lastcheckpoint.tmp -> previous.checkpoint
628        LOG.info("Completing previous checkpoint for storage directory " 
629                 + rootPath);
630        File prevCkptDir = getPreviousCheckpoint();
631        if (prevCkptDir.exists())
632          deleteDir(prevCkptDir);
633        rename(getLastCheckpointTmp(), prevCkptDir);
634        return;
635      case RECOVER_CHECKPOINT:  // mv lastcheckpoint.tmp -> current
636        LOG.info("Recovering storage directory " + rootPath
637                 + " from failed checkpoint");
638        if (curDir.exists())
639          deleteDir(curDir);
640        rename(getLastCheckpointTmp(), curDir);
641        return;
642      default:
643        throw new IOException("Unexpected FS state: " + curState
644            + " for storage directory: " + rootPath);
645      }
646    }
647    
648    /**
649     * @return true if the storage directory should prompt the user prior
650     * to formatting (i.e if the directory appears to contain some data)
651     * @throws IOException if the SD cannot be accessed due to an IO error
652     */
653    @Override
654    public boolean hasSomeData() throws IOException {
655      // Its alright for a dir not to exist, or to exist (properly accessible)
656      // and be completely empty.
657      if (!root.exists()) return false;
658      
659      if (!root.isDirectory()) {
660        // a file where you expect a directory should not cause silent
661        // formatting
662        return true;
663      }
664      
665      if (FileUtil.listFiles(root).length == 0) {
666        // Empty dir can format without prompt.
667        return false;
668      }
669      
670      return true;
671    }
672    
673    public boolean isShared() {
674      return isShared;
675    }
676
677
678    /**
679     * Lock storage to provide exclusive access.
680     * 
681     * <p> Locking is not supported by all file systems.
682     * E.g., NFS does not consistently support exclusive locks.
683     * 
684     * <p> If locking is supported we guarantee exclusive access to the
685     * storage directory. Otherwise, no guarantee is given.
686     * 
687     * @throws IOException if locking fails
688     */
689    public void lock() throws IOException {
690      if (isShared()) {
691        LOG.info("Locking is disabled for " + this.root);
692        return;
693      }
694      FileLock newLock = tryLock();
695      if (newLock == null) {
696        String msg = "Cannot lock storage " + this.root 
697          + ". The directory is already locked";
698        LOG.info(msg);
699        throw new IOException(msg);
700      }
701      // Don't overwrite lock until success - this way if we accidentally
702      // call lock twice, the internal state won't be cleared by the second
703      // (failed) lock attempt
704      lock = newLock;
705    }
706
707    /**
708     * Attempts to acquire an exclusive lock on the storage.
709     * 
710     * @return A lock object representing the newly-acquired lock or
711     * <code>null</code> if storage is already locked.
712     * @throws IOException if locking fails.
713     */
714    @SuppressWarnings("resource")
715    FileLock tryLock() throws IOException {
716      boolean deletionHookAdded = false;
717      File lockF = new File(root, STORAGE_FILE_LOCK);
718      if (!lockF.exists()) {
719        lockF.deleteOnExit();
720        deletionHookAdded = true;
721      }
722      RandomAccessFile file = new RandomAccessFile(lockF, "rws");
723      String jvmName = ManagementFactory.getRuntimeMXBean().getName();
724      FileLock res = null;
725      try {
726        res = file.getChannel().tryLock();
727        if (null == res) {
728          LOG.error("Unable to acquire file lock on path " + lockF.toString());
729          throw new OverlappingFileLockException();
730        }
731        file.write(jvmName.getBytes(Charsets.UTF_8));
732        LOG.info("Lock on " + lockF + " acquired by nodename " + jvmName);
733      } catch(OverlappingFileLockException oe) {
734        // Cannot read from the locked file on Windows.
735        String lockingJvmName = Path.WINDOWS ? "" : (" " + file.readLine());
736        LOG.error("It appears that another node " + lockingJvmName
737            + " has already locked the storage directory: " + root, oe);
738        file.close();
739        return null;
740      } catch(IOException e) {
741        LOG.error("Failed to acquire lock on " + lockF
742            + ". If this storage directory is mounted via NFS, " 
743            + "ensure that the appropriate nfs lock services are running.", e);
744        file.close();
745        throw e;
746      }
747      if (!deletionHookAdded) {
748        // If the file existed prior to our startup, we didn't
749        // call deleteOnExit above. But since we successfully locked
750        // the dir, we can take care of cleaning it up.
751        lockF.deleteOnExit();
752      }
753      return res;
754    }
755
756    /**
757     * Unlock storage.
758     * 
759     * @throws IOException
760     */
761    public void unlock() throws IOException {
762      if (this.lock == null)
763        return;
764      this.lock.release();
765      lock.channel().close();
766      lock = null;
767    }
768    
769    @Override
770    public String toString() {
771      return "Storage Directory " + this.root;
772    }
773
774    /**
775     * Check whether underlying file system supports file locking.
776     * 
777     * @return <code>true</code> if exclusive locks are supported or
778     *         <code>false</code> otherwise.
779     * @throws IOException
780     * @see StorageDirectory#lock()
781     */
782    public boolean isLockSupported() throws IOException {
783      FileLock firstLock = null;
784      FileLock secondLock = null;
785      try {
786        firstLock = lock;
787        if(firstLock == null) {
788          firstLock = tryLock();
789          if(firstLock == null)
790            return true;
791        }
792        secondLock = tryLock();
793        if(secondLock == null)
794          return true;
795      } finally {
796        if(firstLock != null && firstLock != lock) {
797          firstLock.release();
798          firstLock.channel().close();
799        }
800        if(secondLock != null) {
801          secondLock.release();
802          secondLock.channel().close();
803        }
804      }
805      return false;
806    }
807  }
808
809  /**
810   * Create empty storage info of the specified type
811   */
812  protected Storage(NodeType type) {
813    super(type);
814  }
815  
816  protected Storage(StorageInfo storageInfo) {
817    super(storageInfo);
818  }
819  
820  public int getNumStorageDirs() {
821    return storageDirs.size();
822  }
823  
824  public StorageDirectory getStorageDir(int idx) {
825    return storageDirs.get(idx);
826  }
827  
828  /**
829   * @return the storage directory, with the precondition that this storage
830   * has exactly one storage directory
831   */
832  public StorageDirectory getSingularStorageDir() {
833    Preconditions.checkState(storageDirs.size() == 1);
834    return storageDirs.get(0);
835  }
836  
837  protected void addStorageDir(StorageDirectory sd) {
838    storageDirs.add(sd);
839  }
840
841  /**
842   * Returns true if the storage directory on the given directory is already
843   * loaded.
844   * @param root the root directory of a {@link StorageDirectory}
845   * @throws IOException if failed to get canonical path.
846   */
847  protected boolean containsStorageDir(File root) throws IOException {
848    for (StorageDirectory sd : storageDirs) {
849      if (sd.getRoot().getCanonicalPath().equals(root.getCanonicalPath())) {
850        return true;
851      }
852    }
853    return false;
854  }
855
856  /**
857   * Return true if the layout of the given storage directory is from a version
858   * of Hadoop prior to the introduction of the "current" and "previous"
859   * directories which allow upgrade and rollback.
860   */
861  public abstract boolean isPreUpgradableLayout(StorageDirectory sd)
862  throws IOException;
863
864  /**
865   * Check if the given storage directory comes from a version of Hadoop
866   * prior to when the directory layout changed (ie 0.13). If this is
867   * the case, this method throws an IOException.
868   */
869  private void checkOldLayoutStorage(StorageDirectory sd) throws IOException {
870    if (isPreUpgradableLayout(sd)) {
871      checkVersionUpgradable(0);
872    }
873  }
874
875  /**
876   * Checks if the upgrade from {@code oldVersion} is supported.
877   * @param oldVersion the version of the metadata to check with the current
878   *                   version
879   * @throws IOException if upgrade is not supported
880   */
881  public static void checkVersionUpgradable(int oldVersion) 
882                                     throws IOException {
883    if (oldVersion > LAST_UPGRADABLE_LAYOUT_VERSION) {
884      String msg = "*********** Upgrade is not supported from this " +
885                   " older version " + oldVersion + 
886                   " of storage to the current version." + 
887                   " Please upgrade to " + LAST_UPGRADABLE_HADOOP_VERSION +
888                   " or a later version and then upgrade to current" +
889                   " version. Old layout version is " + 
890                   (oldVersion == 0 ? "'too old'" : (""+oldVersion)) +
891                   " and latest layout version this software version can" +
892                   " upgrade from is " + LAST_UPGRADABLE_LAYOUT_VERSION +
893                   ". ************";
894      LOG.error(msg);
895      throw new IOException(msg); 
896    }
897    
898  }
899  
900  /**
901   * Iterate over each of the {@link FormatConfirmable} objects,
902   * potentially checking with the user whether it should be formatted.
903   * 
904   * If running in interactive mode, will prompt the user for each
905   * directory to allow them to format anyway. Otherwise, returns
906   * false, unless 'force' is specified.
907   * 
908   * @param force format regardless of whether dirs exist
909   * @param interactive prompt the user when a dir exists
910   * @return true if formatting should proceed
911   * @throws IOException if some storage cannot be accessed
912   */
913  public static boolean confirmFormat(
914      Iterable<? extends FormatConfirmable> items,
915      boolean force, boolean interactive) throws IOException {
916    for (FormatConfirmable item : items) {
917      if (!item.hasSomeData())
918        continue;
919      if (force) { // Don't confirm, always format.
920        System.err.println(
921            "Data exists in " + item + ". Formatting anyway.");
922        continue;
923      }
924      if (!interactive) { // Don't ask - always don't format
925        System.err.println(
926            "Running in non-interactive mode, and data appears to exist in " +
927            item + ". Not formatting.");
928        return false;
929      }
930      if (!ToolRunner.confirmPrompt("Re-format filesystem in " + item + " ?")) {
931        System.err.println("Format aborted in " + item);
932        return false;
933      }
934    }
935    
936    return true;
937  }
938  
939  /**
940   * Interface for classes which need to have the user confirm their
941   * formatting during NameNode -format and other similar operations.
942   * 
943   * This is currently a storage directory or journal manager.
944   */
945  @InterfaceAudience.Private
946  public interface FormatConfirmable {
947    /**
948     * @return true if the storage seems to have some valid data in it,
949     * and the user should be required to confirm the format. Otherwise,
950     * false.
951     * @throws IOException if the storage cannot be accessed at all.
952     */
953    public boolean hasSomeData() throws IOException;
954    
955    /**
956     * @return a string representation of the formattable item, suitable
957     * for display to the user inside a prompt
958     */
959    public String toString();
960  }
961  
962  /**
963   * Set common storage fields into the given properties object.
964   * Should be overloaded if additional fields need to be set.
965   * 
966   * @param props the Properties object to write into
967   */
968  protected void setPropertiesFromFields(Properties props, 
969                                         StorageDirectory sd)
970      throws IOException {
971    props.setProperty("layoutVersion", String.valueOf(layoutVersion));
972    props.setProperty("storageType", storageType.toString());
973    props.setProperty("namespaceID", String.valueOf(namespaceID));
974    // Set clusterID in version with federation support
975    if (versionSupportsFederation(getServiceLayoutFeatureMap())) {
976      props.setProperty("clusterID", clusterID);
977    }
978    props.setProperty("cTime", String.valueOf(cTime));
979  }
980
981  /**
982   * Write properties to the VERSION file in the given storage directory.
983   */
984  public void writeProperties(StorageDirectory sd) throws IOException {
985    writeProperties(sd.getVersionFile(), sd);
986  }
987  
988  public void writeProperties(File to, StorageDirectory sd) throws IOException {
989    Properties props = new Properties();
990    setPropertiesFromFields(props, sd);
991    writeProperties(to, props);
992  }
993
994  public static void writeProperties(File to, Properties props)
995      throws IOException {
996    try (RandomAccessFile file = new RandomAccessFile(to, "rws");
997        FileOutputStream out = new FileOutputStream(file.getFD())) {
998      file.seek(0);
999      /*
1000       * If server is interrupted before this line,
1001       * the version file will remain unchanged.
1002       */
1003      props.store(out, null);
1004      /*
1005       * Now the new fields are flushed to the head of the file, but file
1006       * length can still be larger then required and therefore the file can
1007       * contain whole or corrupted fields from its old contents in the end.
1008       * If server is interrupted here and restarted later these extra fields
1009       * either should not effect server behavior or should be handled
1010       * by the server correctly.
1011       */
1012      file.setLength(out.getChannel().position());
1013    }
1014  }
1015
1016  public static void rename(File from, File to) throws IOException {
1017    try {
1018      NativeIO.renameTo(from, to);
1019    } catch (NativeIOException e) {
1020      throw new IOException("Failed to rename " + from.getCanonicalPath()
1021        + " to " + to.getCanonicalPath() + " due to failure in native rename. "
1022        + e.toString());
1023    }
1024  }
1025
1026  /**
1027   * Copies a file (usually large) to a new location using native unbuffered IO.
1028   * <p>
1029   * This method copies the contents of the specified source file
1030   * to the specified destination file using OS specific unbuffered IO.
1031   * The goal is to avoid churning the file system buffer cache when copying
1032   * large files.
1033   *
1034   * We can't use FileUtils#copyFile from apache-commons-io because it
1035   * is a buffered IO based on FileChannel#transferFrom, which uses MmapByteBuffer
1036   * internally.
1037   *
1038   * The directory holding the destination file is created if it does not exist.
1039   * If the destination file exists, then this method will delete it first.
1040   * <p>
1041   * <strong>Note:</strong> Setting <code>preserveFileDate</code> to
1042   * {@code true} tries to preserve the file's last modified
1043   * date/times using {@link File#setLastModified(long)}, however it is
1044   * not guaranteed that the operation will succeed.
1045   * If the modification operation fails, no indication is provided.
1046   *
1047   * @param srcFile  an existing file to copy, must not be {@code null}
1048   * @param destFile  the new file, must not be {@code null}
1049   * @param preserveFileDate  true if the file date of the copy
1050   *  should be the same as the original
1051   *
1052   * @throws NullPointerException if source or destination is {@code null}
1053   * @throws IOException if source or destination is invalid
1054   * @throws IOException if an IO error occurs during copying
1055   */
1056  public static void nativeCopyFileUnbuffered(File srcFile, File destFile,
1057      boolean preserveFileDate) throws IOException {
1058    if (srcFile == null) {
1059      throw new NullPointerException("Source must not be null");
1060    }
1061    if (destFile == null) {
1062      throw new NullPointerException("Destination must not be null");
1063    }
1064    if (srcFile.exists() == false) {
1065      throw new FileNotFoundException("Source '" + srcFile + "' does not exist");
1066    }
1067    if (srcFile.isDirectory()) {
1068      throw new IOException("Source '" + srcFile + "' exists but is a directory");
1069    }
1070    if (srcFile.getCanonicalPath().equals(destFile.getCanonicalPath())) {
1071      throw new IOException("Source '" + srcFile + "' and destination '" +
1072          destFile + "' are the same");
1073    }
1074    File parentFile = destFile.getParentFile();
1075    if (parentFile != null) {
1076      if (!parentFile.mkdirs() && !parentFile.isDirectory()) {
1077        throw new IOException("Destination '" + parentFile
1078            + "' directory cannot be created");
1079      }
1080    }
1081    if (destFile.exists()) {
1082      if (FileUtil.canWrite(destFile) == false) {
1083        throw new IOException("Destination '" + destFile
1084            + "' exists but is read-only");
1085      } else {
1086        if (destFile.delete() == false) {
1087          throw new IOException("Destination '" + destFile
1088              + "' exists but cannot be deleted");
1089        }
1090      }
1091    }
1092    try {
1093      NativeIO.copyFileUnbuffered(srcFile, destFile);
1094    } catch (NativeIOException e) {
1095      throw new IOException("Failed to copy " + srcFile.getCanonicalPath()
1096          + " to " + destFile.getCanonicalPath()
1097          + " due to failure in NativeIO#copyFileUnbuffered(). "
1098          + e.toString());
1099    }
1100    if (srcFile.length() != destFile.length()) {
1101      throw new IOException("Failed to copy full contents from '" + srcFile
1102          + "' to '" + destFile + "'");
1103    }
1104    if (preserveFileDate) {
1105      if (destFile.setLastModified(srcFile.lastModified()) == false) {
1106        if (LOG.isDebugEnabled()) {
1107          LOG.debug("Failed to preserve last modified date from'" + srcFile
1108            + "' to '" + destFile + "'");
1109        }
1110      }
1111    }
1112  }
1113
1114  /**
1115   * Recursively delete all the content of the directory first and then 
1116   * the directory itself from the local filesystem.
1117   * @param dir The directory to delete
1118   * @throws IOException
1119   */
1120  public static void deleteDir(File dir) throws IOException {
1121    if (!FileUtil.fullyDelete(dir))
1122      throw new IOException("Failed to delete " + dir.getCanonicalPath());
1123  }
1124  
1125  /**
1126   * Write all data storage files.
1127   * @throws IOException
1128   */
1129  public void writeAll() throws IOException {
1130    this.layoutVersion = getServiceLayoutVersion();
1131    for (Iterator<StorageDirectory> it = storageDirs.iterator(); it.hasNext();) {
1132      writeProperties(it.next());
1133    }
1134  }
1135
1136  /**
1137   * Unlock all storage directories.
1138   * @throws IOException
1139   */
1140  public void unlockAll() throws IOException {
1141    for (Iterator<StorageDirectory> it = storageDirs.iterator(); it.hasNext();) {
1142      it.next().unlock();
1143    }
1144  }
1145
1146  public static String getBuildVersion() {
1147    return VersionInfo.getRevision();
1148  }
1149
1150  public static String getRegistrationID(StorageInfo storage) {
1151    return "NS-" + Integer.toString(storage.getNamespaceID())
1152      + "-" + storage.getClusterID()
1153      + "-" + Long.toString(storage.getCTime());
1154  }
1155  
1156  public static boolean is203LayoutVersion(int layoutVersion) {
1157    for (int lv203 : LAYOUT_VERSIONS_203) {
1158      if (lv203 == layoutVersion) {
1159        return true;
1160      }
1161    }
1162    return false;
1163  }
1164}