001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.hdfs.server.common;
019    
020    import java.io.File;
021    import java.io.FileOutputStream;
022    import java.io.FileNotFoundException;
023    import java.io.IOException;
024    import java.io.RandomAccessFile;
025    import java.lang.management.ManagementFactory;
026    import java.nio.channels.FileLock;
027    import java.nio.channels.OverlappingFileLockException;
028    import java.util.ArrayList;
029    import java.util.Iterator;
030    import java.util.List;
031    import java.util.Properties;
032    
033    import org.apache.commons.logging.Log;
034    import org.apache.commons.logging.LogFactory;
035    import org.apache.hadoop.classification.InterfaceAudience;
036    import org.apache.hadoop.fs.FileUtil;
037    import org.apache.hadoop.fs.Path;
038    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType;
039    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
040    import org.apache.hadoop.io.nativeio.NativeIO;
041    import org.apache.hadoop.io.nativeio.NativeIOException;
042    import org.apache.hadoop.util.ToolRunner;
043    import org.apache.hadoop.util.VersionInfo;
044    
045    import com.google.common.base.Charsets;
046    import com.google.common.base.Preconditions;
047    
048    
049    
050    /**
051     * Storage information file.
052     * <p>
053     * Local storage information is stored in a separate file VERSION.
054     * It contains type of the node, 
055     * the storage layout version, the namespace id, and 
056     * the fs state creation time.
057     * <p>
058     * Local storage can reside in multiple directories. 
059     * Each directory should contain the same VERSION file as the others.
060     * During startup Hadoop servers (name-node and data-nodes) read their local 
061     * storage information from them.
062     * <p>
063     * The servers hold a lock for each storage directory while they run so that 
064     * other nodes were not able to startup sharing the same storage.
065     * The locks are released when the servers stop (normally or abnormally).
066     * 
067     */
068    @InterfaceAudience.Private
069    public abstract class Storage extends StorageInfo {
070      public static final Log LOG = LogFactory.getLog(Storage.class.getName());
071    
072      // last layout version that did not support upgrades
073      public static final int LAST_PRE_UPGRADE_LAYOUT_VERSION = -3;
074      
075      // this corresponds to Hadoop-0.18
076      public static final int LAST_UPGRADABLE_LAYOUT_VERSION = -16;
077      protected static final String LAST_UPGRADABLE_HADOOP_VERSION = "Hadoop-0.18";
078      
079      /** Layout versions of 0.20.203 release */
080      public static final int[] LAYOUT_VERSIONS_203 = {-19, -31};
081    
082      public    static final String STORAGE_FILE_LOCK     = "in_use.lock";
083      public    static final String STORAGE_DIR_CURRENT   = "current";
084      public    static final String STORAGE_DIR_PREVIOUS  = "previous";
085      public    static final String STORAGE_TMP_REMOVED   = "removed.tmp";
086      public    static final String STORAGE_TMP_PREVIOUS  = "previous.tmp";
087      public    static final String STORAGE_TMP_FINALIZED = "finalized.tmp";
088      public    static final String STORAGE_TMP_LAST_CKPT = "lastcheckpoint.tmp";
089      public    static final String STORAGE_PREVIOUS_CKPT = "previous.checkpoint";
090      
091      /**
092       * The blocksBeingWritten directory which was used in some 1.x and earlier
093       * releases.
094       */
095      public static final String STORAGE_1_BBW = "blocksBeingWritten";
096      
097      public enum StorageState {
098        NON_EXISTENT,
099        NOT_FORMATTED,
100        COMPLETE_UPGRADE,
101        RECOVER_UPGRADE,
102        COMPLETE_FINALIZE,
103        COMPLETE_ROLLBACK,
104        RECOVER_ROLLBACK,
105        COMPLETE_CHECKPOINT,
106        RECOVER_CHECKPOINT,
107        NORMAL;
108      }
109      
110      /**
111       * An interface to denote storage directory type
112       * Implementations can define a type for storage directory by implementing
113       * this interface.
114       */
115      @InterfaceAudience.Private
116      public interface StorageDirType {
117        public StorageDirType getStorageDirType();
118        public boolean isOfType(StorageDirType type);
119      }
120      
121      protected List<StorageDirectory> storageDirs = new ArrayList<StorageDirectory>();
122      
123      private class DirIterator implements Iterator<StorageDirectory> {
124        final StorageDirType dirType;
125        final boolean includeShared;
126        int prevIndex; // for remove()
127        int nextIndex; // for next()
128        
129        DirIterator(StorageDirType dirType, boolean includeShared) {
130          this.dirType = dirType;
131          this.nextIndex = 0;
132          this.prevIndex = 0;
133          this.includeShared = includeShared;
134        }
135        
136        @Override
137        public boolean hasNext() {
138          if (storageDirs.isEmpty() || nextIndex >= storageDirs.size())
139            return false;
140          if (dirType != null || !includeShared) {
141            while (nextIndex < storageDirs.size()) {
142              if (shouldReturnNextDir())
143                break;
144              nextIndex++;
145            }
146            if (nextIndex >= storageDirs.size())
147             return false;
148          }
149          return true;
150        }
151        
152        @Override
153        public StorageDirectory next() {
154          StorageDirectory sd = getStorageDir(nextIndex);
155          prevIndex = nextIndex;
156          nextIndex++;
157          if (dirType != null || !includeShared) {
158            while (nextIndex < storageDirs.size()) {
159              if (shouldReturnNextDir())
160                break;
161              nextIndex++;
162            }
163          }
164          return sd;
165        }
166        
167        @Override
168        public void remove() {
169          nextIndex = prevIndex; // restore previous state
170          storageDirs.remove(prevIndex); // remove last returned element
171          hasNext(); // reset nextIndex to correct place
172        }
173        
174        private boolean shouldReturnNextDir() {
175          StorageDirectory sd = getStorageDir(nextIndex);
176          return (dirType == null || sd.getStorageDirType().isOfType(dirType)) &&
177              (includeShared || !sd.isShared());
178        }
179      }
180      
181      /**
182       * @return A list of the given File in every available storage directory,
183       * regardless of whether it might exist.
184       */
185      public List<File> getFiles(StorageDirType dirType, String fileName) {
186        ArrayList<File> list = new ArrayList<File>();
187        Iterator<StorageDirectory> it =
188          (dirType == null) ? dirIterator() : dirIterator(dirType);
189        for ( ;it.hasNext(); ) {
190          list.add(new File(it.next().getCurrentDir(), fileName));
191        }
192        return list;
193      }
194    
195    
196      /**
197       * Return default iterator
198       * This iterator returns all entries in storageDirs
199       */
200      public Iterator<StorageDirectory> dirIterator() {
201        return dirIterator(null);
202      }
203      
204      /**
205       * Return iterator based on Storage Directory Type
206       * This iterator selects entries in storageDirs of type dirType and returns
207       * them via the Iterator
208       */
209      public Iterator<StorageDirectory> dirIterator(StorageDirType dirType) {
210        return dirIterator(dirType, true);
211      }
212      
213      /**
214       * Return all entries in storageDirs, potentially excluding shared dirs.
215       * @param includeShared whether or not to include shared dirs.
216       * @return an iterator over the configured storage dirs.
217       */
218      public Iterator<StorageDirectory> dirIterator(boolean includeShared) {
219        return dirIterator(null, includeShared);
220      }
221      
222      /**
223       * @param dirType all entries will be of this type of dir
224       * @param includeShared true to include any shared directories,
225       *        false otherwise
226       * @return an iterator over the configured storage dirs.
227       */
228      public Iterator<StorageDirectory> dirIterator(StorageDirType dirType,
229          boolean includeShared) {
230        return new DirIterator(dirType, includeShared);
231      }
232      
233      public Iterable<StorageDirectory> dirIterable(final StorageDirType dirType) {
234        return new Iterable<StorageDirectory>() {
235          @Override
236          public Iterator<StorageDirectory> iterator() {
237            return dirIterator(dirType);
238          }
239        };
240      }
241      
242      
243      /**
244       * generate storage list (debug line)
245       */
246      public String listStorageDirectories() {
247        StringBuilder buf = new StringBuilder();
248        for (StorageDirectory sd : storageDirs) {
249          buf.append(sd.getRoot() + "(" + sd.getStorageDirType() + ");");
250        }
251        return buf.toString();
252      }
253      
254      /**
255       * One of the storage directories.
256       */
257      @InterfaceAudience.Private
258      public static class StorageDirectory implements FormatConfirmable {
259        final File root;              // root directory
260        // whether or not this dir is shared between two separate NNs for HA, or
261        // between multiple block pools in the case of federation.
262        final boolean isShared;
263        final StorageDirType dirType; // storage dir type
264        FileLock lock;                // storage lock
265    
266        private String storageUuid = null;      // Storage directory identifier.
267        
268        public StorageDirectory(File dir) {
269          // default dirType is null
270          this(dir, null, false);
271        }
272        
273        public StorageDirectory(File dir, StorageDirType dirType) {
274          this(dir, dirType, false);
275        }
276        
277        public void setStorageUuid(String storageUuid) {
278          this.storageUuid = storageUuid;
279        }
280    
281        public String getStorageUuid() {
282          return storageUuid;
283        }
284    
285        /**
286         * Constructor
287         * @param dir directory corresponding to the storage
288         * @param dirType storage directory type
289         * @param isShared whether or not this dir is shared between two NNs. true
290         *          disables locking on the storage directory, false enables locking
291         */
292        public StorageDirectory(File dir, StorageDirType dirType, boolean isShared) {
293          this.root = dir;
294          this.lock = null;
295          this.dirType = dirType;
296          this.isShared = isShared;
297        }
298        
299        /**
300         * Get root directory of this storage
301         */
302        public File getRoot() {
303          return root;
304        }
305    
306        /**
307         * Get storage directory type
308         */
309        public StorageDirType getStorageDirType() {
310          return dirType;
311        }    
312    
313        public void read(File from, Storage storage) throws IOException {
314          Properties props = readPropertiesFile(from);
315          storage.setFieldsFromProperties(props, this);
316        }
317    
318        /**
319         * Clear and re-create storage directory.
320         * <p>
321         * Removes contents of the current directory and creates an empty directory.
322         * 
323         * This does not fully format storage directory. 
324         * It cannot write the version file since it should be written last after  
325         * all other storage type dependent files are written.
326         * Derived storage is responsible for setting specific storage values and
327         * writing the version file to disk.
328         * 
329         * @throws IOException
330         */
331        public void clearDirectory() throws IOException {
332          File curDir = this.getCurrentDir();
333          if (curDir.exists())
334            if (!(FileUtil.fullyDelete(curDir)))
335              throw new IOException("Cannot remove current directory: " + curDir);
336          if (!curDir.mkdirs())
337            throw new IOException("Cannot create directory " + curDir);
338        }
339    
340        /**
341         * Directory {@code current} contains latest files defining
342         * the file system meta-data.
343         * 
344         * @return the directory path
345         */
346        public File getCurrentDir() {
347          return new File(root, STORAGE_DIR_CURRENT);
348        }
349    
350        /**
351         * File {@code VERSION} contains the following fields:
352         * <ol>
353         * <li>node type</li>
354         * <li>layout version</li>
355         * <li>namespaceID</li>
356         * <li>fs state creation time</li>
357         * <li>other fields specific for this node type</li>
358         * </ol>
359         * The version file is always written last during storage directory updates.
360         * The existence of the version file indicates that all other files have
361         * been successfully written in the storage directory, the storage is valid
362         * and does not need to be recovered.
363         * 
364         * @return the version file path
365         */
366        public File getVersionFile() {
367          return new File(new File(root, STORAGE_DIR_CURRENT), STORAGE_FILE_VERSION);
368        }
369    
370        /**
371         * File {@code VERSION} from the {@code previous} directory.
372         * 
373         * @return the previous version file path
374         */
375        public File getPreviousVersionFile() {
376          return new File(new File(root, STORAGE_DIR_PREVIOUS), STORAGE_FILE_VERSION);
377        }
378    
379        /**
380         * Directory {@code previous} contains the previous file system state,
381         * which the system can be rolled back to.
382         * 
383         * @return the directory path
384         */
385        public File getPreviousDir() {
386          return new File(root, STORAGE_DIR_PREVIOUS);
387        }
388    
389        /**
390         * {@code previous.tmp} is a transient directory, which holds
391         * current file system state while the new state is saved into the new
392         * {@code current} during upgrade.
393         * If the saving succeeds {@code previous.tmp} will be moved to
394         * {@code previous}, otherwise it will be renamed back to 
395         * {@code current} by the recovery procedure during startup.
396         * 
397         * @return the directory path
398         */
399        public File getPreviousTmp() {
400          return new File(root, STORAGE_TMP_PREVIOUS);
401        }
402    
403        /**
404         * {@code removed.tmp} is a transient directory, which holds
405         * current file system state while the previous state is moved into
406         * {@code current} during rollback.
407         * If the moving succeeds {@code removed.tmp} will be removed,
408         * otherwise it will be renamed back to 
409         * {@code current} by the recovery procedure during startup.
410         * 
411         * @return the directory path
412         */
413        public File getRemovedTmp() {
414          return new File(root, STORAGE_TMP_REMOVED);
415        }
416    
417        /**
418         * {@code finalized.tmp} is a transient directory, which holds
419         * the {@code previous} file system state while it is being removed
420         * in response to the finalize request.
421         * Finalize operation will remove {@code finalized.tmp} when completed,
422         * otherwise the removal will resume upon the system startup.
423         * 
424         * @return the directory path
425         */
426        public File getFinalizedTmp() {
427          return new File(root, STORAGE_TMP_FINALIZED);
428        }
429    
430        /**
431         * {@code lastcheckpoint.tmp} is a transient directory, which holds
432         * current file system state while the new state is saved into the new
433         * {@code current} during regular namespace updates.
434         * If the saving succeeds {@code lastcheckpoint.tmp} will be moved to
435         * {@code previous.checkpoint}, otherwise it will be renamed back to 
436         * {@code current} by the recovery procedure during startup.
437         * 
438         * @return the directory path
439         */
440        public File getLastCheckpointTmp() {
441          return new File(root, STORAGE_TMP_LAST_CKPT);
442        }
443    
444        /**
445         * {@code previous.checkpoint} is a directory, which holds the previous
446         * (before the last save) state of the storage directory.
447         * The directory is created as a reference only, it does not play role
448         * in state recovery procedures, and is recycled automatically, 
449         * but it may be useful for manual recovery of a stale state of the system.
450         * 
451         * @return the directory path
452         */
453        public File getPreviousCheckpoint() {
454          return new File(root, STORAGE_PREVIOUS_CKPT);
455        }
456    
457        /**
458         * Check consistency of the storage directory
459         * 
460         * @param startOpt a startup option.
461         *  
462         * @return state {@link StorageState} of the storage directory 
463         * @throws InconsistentFSStateException if directory state is not 
464         * consistent and cannot be recovered.
465         * @throws IOException
466         */
467        public StorageState analyzeStorage(StartupOption startOpt, Storage storage)
468            throws IOException {
469          assert root != null : "root is null";
470          boolean hadMkdirs = false;
471          String rootPath = root.getCanonicalPath();
472          try { // check that storage exists
473            if (!root.exists()) {
474              // storage directory does not exist
475              if (startOpt != StartupOption.FORMAT &&
476                  startOpt != StartupOption.HOTSWAP) {
477                LOG.warn("Storage directory " + rootPath + " does not exist");
478                return StorageState.NON_EXISTENT;
479              }
480              LOG.info(rootPath + " does not exist. Creating ...");
481              if (!root.mkdirs())
482                throw new IOException("Cannot create directory " + rootPath);
483              hadMkdirs = true;
484            }
485            // or is inaccessible
486            if (!root.isDirectory()) {
487              LOG.warn(rootPath + "is not a directory");
488              return StorageState.NON_EXISTENT;
489            }
490            if (!FileUtil.canWrite(root)) {
491              LOG.warn("Cannot access storage directory " + rootPath);
492              return StorageState.NON_EXISTENT;
493            }
494          } catch(SecurityException ex) {
495            LOG.warn("Cannot access storage directory " + rootPath, ex);
496            return StorageState.NON_EXISTENT;
497          }
498    
499          this.lock(); // lock storage if it exists
500    
501          // If startOpt is HOTSWAP, it returns NOT_FORMATTED for empty directory,
502          // while it also checks the layout version.
503          if (startOpt == HdfsServerConstants.StartupOption.FORMAT ||
504              (startOpt == StartupOption.HOTSWAP && hadMkdirs))
505            return StorageState.NOT_FORMATTED;
506    
507          if (startOpt != HdfsServerConstants.StartupOption.IMPORT) {
508            storage.checkOldLayoutStorage(this);
509          }
510    
511          // check whether current directory is valid
512          File versionFile = getVersionFile();
513          boolean hasCurrent = versionFile.exists();
514    
515          // check which directories exist
516          boolean hasPrevious = getPreviousDir().exists();
517          boolean hasPreviousTmp = getPreviousTmp().exists();
518          boolean hasRemovedTmp = getRemovedTmp().exists();
519          boolean hasFinalizedTmp = getFinalizedTmp().exists();
520          boolean hasCheckpointTmp = getLastCheckpointTmp().exists();
521    
522          if (!(hasPreviousTmp || hasRemovedTmp
523              || hasFinalizedTmp || hasCheckpointTmp)) {
524            // no temp dirs - no recovery
525            if (hasCurrent)
526              return StorageState.NORMAL;
527            if (hasPrevious)
528              throw new InconsistentFSStateException(root,
529                                  "version file in current directory is missing.");
530            return StorageState.NOT_FORMATTED;
531          }
532    
533          if ((hasPreviousTmp?1:0) + (hasRemovedTmp?1:0)
534              + (hasFinalizedTmp?1:0) + (hasCheckpointTmp?1:0) > 1)
535            // more than one temp dirs
536            throw new InconsistentFSStateException(root,
537                                                   "too many temporary directories.");
538    
539          // # of temp dirs == 1 should either recover or complete a transition
540          if (hasCheckpointTmp) {
541            return hasCurrent ? StorageState.COMPLETE_CHECKPOINT
542                              : StorageState.RECOVER_CHECKPOINT;
543          }
544    
545          if (hasFinalizedTmp) {
546            if (hasPrevious)
547              throw new InconsistentFSStateException(root,
548                                                     STORAGE_DIR_PREVIOUS + " and " + STORAGE_TMP_FINALIZED
549                                                     + "cannot exist together.");
550            return StorageState.COMPLETE_FINALIZE;
551          }
552    
553          if (hasPreviousTmp) {
554            if (hasPrevious)
555              throw new InconsistentFSStateException(root,
556                                                     STORAGE_DIR_PREVIOUS + " and " + STORAGE_TMP_PREVIOUS
557                                                     + " cannot exist together.");
558            if (hasCurrent)
559              return StorageState.COMPLETE_UPGRADE;
560            return StorageState.RECOVER_UPGRADE;
561          }
562          
563          assert hasRemovedTmp : "hasRemovedTmp must be true";
564          if (!(hasCurrent ^ hasPrevious))
565            throw new InconsistentFSStateException(root,
566                                                   "one and only one directory " + STORAGE_DIR_CURRENT 
567                                                   + " or " + STORAGE_DIR_PREVIOUS 
568                                                   + " must be present when " + STORAGE_TMP_REMOVED
569                                                   + " exists.");
570          if (hasCurrent)
571            return StorageState.COMPLETE_ROLLBACK;
572          return StorageState.RECOVER_ROLLBACK;
573        }
574    
575        /**
576         * Complete or recover storage state from previously failed transition.
577         * 
578         * @param curState specifies what/how the state should be recovered
579         * @throws IOException
580         */
581        public void doRecover(StorageState curState) throws IOException {
582          File curDir = getCurrentDir();
583          String rootPath = root.getCanonicalPath();
584          switch(curState) {
585          case COMPLETE_UPGRADE:  // mv previous.tmp -> previous
586            LOG.info("Completing previous upgrade for storage directory " 
587                     + rootPath);
588            rename(getPreviousTmp(), getPreviousDir());
589            return;
590          case RECOVER_UPGRADE:   // mv previous.tmp -> current
591            LOG.info("Recovering storage directory " + rootPath
592                     + " from previous upgrade");
593            if (curDir.exists())
594              deleteDir(curDir);
595            rename(getPreviousTmp(), curDir);
596            return;
597          case COMPLETE_ROLLBACK: // rm removed.tmp
598            LOG.info("Completing previous rollback for storage directory "
599                     + rootPath);
600            deleteDir(getRemovedTmp());
601            return;
602          case RECOVER_ROLLBACK:  // mv removed.tmp -> current
603            LOG.info("Recovering storage directory " + rootPath
604                     + " from previous rollback");
605            rename(getRemovedTmp(), curDir);
606            return;
607          case COMPLETE_FINALIZE: // rm finalized.tmp
608            LOG.info("Completing previous finalize for storage directory "
609                     + rootPath);
610            deleteDir(getFinalizedTmp());
611            return;
612          case COMPLETE_CHECKPOINT: // mv lastcheckpoint.tmp -> previous.checkpoint
613            LOG.info("Completing previous checkpoint for storage directory " 
614                     + rootPath);
615            File prevCkptDir = getPreviousCheckpoint();
616            if (prevCkptDir.exists())
617              deleteDir(prevCkptDir);
618            rename(getLastCheckpointTmp(), prevCkptDir);
619            return;
620          case RECOVER_CHECKPOINT:  // mv lastcheckpoint.tmp -> current
621            LOG.info("Recovering storage directory " + rootPath
622                     + " from failed checkpoint");
623            if (curDir.exists())
624              deleteDir(curDir);
625            rename(getLastCheckpointTmp(), curDir);
626            return;
627          default:
628            throw new IOException("Unexpected FS state: " + curState);
629          }
630        }
631        
632        /**
633         * @return true if the storage directory should prompt the user prior
634         * to formatting (i.e if the directory appears to contain some data)
635         * @throws IOException if the SD cannot be accessed due to an IO error
636         */
637        @Override
638        public boolean hasSomeData() throws IOException {
639          // Its alright for a dir not to exist, or to exist (properly accessible)
640          // and be completely empty.
641          if (!root.exists()) return false;
642          
643          if (!root.isDirectory()) {
644            // a file where you expect a directory should not cause silent
645            // formatting
646            return true;
647          }
648          
649          if (FileUtil.listFiles(root).length == 0) {
650            // Empty dir can format without prompt.
651            return false;
652          }
653          
654          return true;
655        }
656        
657        public boolean isShared() {
658          return isShared;
659        }
660    
661    
662        /**
663         * Lock storage to provide exclusive access.
664         * 
665         * <p> Locking is not supported by all file systems.
666         * E.g., NFS does not consistently support exclusive locks.
667         * 
668         * <p> If locking is supported we guarantee exclusive access to the
669         * storage directory. Otherwise, no guarantee is given.
670         * 
671         * @throws IOException if locking fails
672         */
673        public void lock() throws IOException {
674          if (isShared()) {
675            LOG.info("Locking is disabled");
676            return;
677          }
678          FileLock newLock = tryLock();
679          if (newLock == null) {
680            String msg = "Cannot lock storage " + this.root 
681              + ". The directory is already locked";
682            LOG.info(msg);
683            throw new IOException(msg);
684          }
685          // Don't overwrite lock until success - this way if we accidentally
686          // call lock twice, the internal state won't be cleared by the second
687          // (failed) lock attempt
688          lock = newLock;
689        }
690    
691        /**
692         * Attempts to acquire an exclusive lock on the storage.
693         * 
694         * @return A lock object representing the newly-acquired lock or
695         * <code>null</code> if storage is already locked.
696         * @throws IOException if locking fails.
697         */
698        @SuppressWarnings("resource")
699        FileLock tryLock() throws IOException {
700          boolean deletionHookAdded = false;
701          File lockF = new File(root, STORAGE_FILE_LOCK);
702          if (!lockF.exists()) {
703            lockF.deleteOnExit();
704            deletionHookAdded = true;
705          }
706          RandomAccessFile file = new RandomAccessFile(lockF, "rws");
707          String jvmName = ManagementFactory.getRuntimeMXBean().getName();
708          FileLock res = null;
709          try {
710            res = file.getChannel().tryLock();
711            if (null == res) {
712              throw new OverlappingFileLockException();
713            }
714            file.write(jvmName.getBytes(Charsets.UTF_8));
715            LOG.info("Lock on " + lockF + " acquired by nodename " + jvmName);
716          } catch(OverlappingFileLockException oe) {
717            // Cannot read from the locked file on Windows.
718            String lockingJvmName = Path.WINDOWS ? "" : (" " + file.readLine());
719            LOG.error("It appears that another namenode" + lockingJvmName
720                + " has already locked the storage directory");
721            file.close();
722            return null;
723          } catch(IOException e) {
724            LOG.error("Failed to acquire lock on " + lockF + ". If this storage directory is mounted via NFS, " 
725                + "ensure that the appropriate nfs lock services are running.", e);
726            file.close();
727            throw e;
728          }
729          if (res != null && !deletionHookAdded) {
730            // If the file existed prior to our startup, we didn't
731            // call deleteOnExit above. But since we successfully locked
732            // the dir, we can take care of cleaning it up.
733            lockF.deleteOnExit();
734          }
735          return res;
736        }
737    
738        /**
739         * Unlock storage.
740         * 
741         * @throws IOException
742         */
743        public void unlock() throws IOException {
744          if (this.lock == null)
745            return;
746          this.lock.release();
747          lock.channel().close();
748          lock = null;
749        }
750        
751        @Override
752        public String toString() {
753          return "Storage Directory " + this.root;
754        }
755    
756        /**
757         * Check whether underlying file system supports file locking.
758         * 
759         * @return <code>true</code> if exclusive locks are supported or
760         *         <code>false</code> otherwise.
761         * @throws IOException
762         * @see StorageDirectory#lock()
763         */
764        public boolean isLockSupported() throws IOException {
765          FileLock firstLock = null;
766          FileLock secondLock = null;
767          try {
768            firstLock = lock;
769            if(firstLock == null) {
770              firstLock = tryLock();
771              if(firstLock == null)
772                return true;
773            }
774            secondLock = tryLock();
775            if(secondLock == null)
776              return true;
777          } finally {
778            if(firstLock != null && firstLock != lock) {
779              firstLock.release();
780              firstLock.channel().close();
781            }
782            if(secondLock != null) {
783              secondLock.release();
784              secondLock.channel().close();
785            }
786          }
787          return false;
788        }
789      }
790    
791      /**
792       * Create empty storage info of the specified type
793       */
794      protected Storage(NodeType type) {
795        super(type);
796      }
797      
798      protected Storage(StorageInfo storageInfo) {
799        super(storageInfo);
800      }
801      
802      public int getNumStorageDirs() {
803        return storageDirs.size();
804      }
805      
806      public StorageDirectory getStorageDir(int idx) {
807        return storageDirs.get(idx);
808      }
809      
810      /**
811       * @return the storage directory, with the precondition that this storage
812       * has exactly one storage directory
813       */
814      public StorageDirectory getSingularStorageDir() {
815        Preconditions.checkState(storageDirs.size() == 1);
816        return storageDirs.get(0);
817      }
818      
819      protected void addStorageDir(StorageDirectory sd) {
820        storageDirs.add(sd);
821      }
822    
823      /**
824       * Return true if the layout of the given storage directory is from a version
825       * of Hadoop prior to the introduction of the "current" and "previous"
826       * directories which allow upgrade and rollback.
827       */
828      public abstract boolean isPreUpgradableLayout(StorageDirectory sd)
829      throws IOException;
830    
831      /**
832       * Check if the given storage directory comes from a version of Hadoop
833       * prior to when the directory layout changed (ie 0.13). If this is
834       * the case, this method throws an IOException.
835       */
836      private void checkOldLayoutStorage(StorageDirectory sd) throws IOException {
837        if (isPreUpgradableLayout(sd)) {
838          checkVersionUpgradable(0);
839        }
840      }
841    
842      /**
843       * Checks if the upgrade from {@code oldVersion} is supported.
844       * @param oldVersion the version of the metadata to check with the current
845       *                   version
846       * @throws IOException if upgrade is not supported
847       */
848      public static void checkVersionUpgradable(int oldVersion) 
849                                         throws IOException {
850        if (oldVersion > LAST_UPGRADABLE_LAYOUT_VERSION) {
851          String msg = "*********** Upgrade is not supported from this " +
852                       " older version " + oldVersion + 
853                       " of storage to the current version." + 
854                       " Please upgrade to " + LAST_UPGRADABLE_HADOOP_VERSION +
855                       " or a later version and then upgrade to current" +
856                       " version. Old layout version is " + 
857                       (oldVersion == 0 ? "'too old'" : (""+oldVersion)) +
858                       " and latest layout version this software version can" +
859                       " upgrade from is " + LAST_UPGRADABLE_LAYOUT_VERSION +
860                       ". ************";
861          LOG.error(msg);
862          throw new IOException(msg); 
863        }
864        
865      }
866      
867      /**
868       * Iterate over each of the {@link FormatConfirmable} objects,
869       * potentially checking with the user whether it should be formatted.
870       * 
871       * If running in interactive mode, will prompt the user for each
872       * directory to allow them to format anyway. Otherwise, returns
873       * false, unless 'force' is specified.
874       * 
875       * @param force format regardless of whether dirs exist
876       * @param interactive prompt the user when a dir exists
877       * @return true if formatting should proceed
878       * @throws IOException if some storage cannot be accessed
879       */
880      public static boolean confirmFormat(
881          Iterable<? extends FormatConfirmable> items,
882          boolean force, boolean interactive) throws IOException {
883        for (FormatConfirmable item : items) {
884          if (!item.hasSomeData())
885            continue;
886          if (force) { // Don't confirm, always format.
887            System.err.println(
888                "Data exists in " + item + ". Formatting anyway.");
889            continue;
890          }
891          if (!interactive) { // Don't ask - always don't format
892            System.err.println(
893                "Running in non-interactive mode, and data appears to exist in " +
894                item + ". Not formatting.");
895            return false;
896          }
897          if (!ToolRunner.confirmPrompt("Re-format filesystem in " + item + " ?")) {
898            System.err.println("Format aborted in " + item);
899            return false;
900          }
901        }
902        
903        return true;
904      }
905      
906      /**
907       * Interface for classes which need to have the user confirm their
908       * formatting during NameNode -format and other similar operations.
909       * 
910       * This is currently a storage directory or journal manager.
911       */
912      @InterfaceAudience.Private
913      public interface FormatConfirmable {
914        /**
915         * @return true if the storage seems to have some valid data in it,
916         * and the user should be required to confirm the format. Otherwise,
917         * false.
918         * @throws IOException if the storage cannot be accessed at all.
919         */
920        public boolean hasSomeData() throws IOException;
921        
922        /**
923         * @return a string representation of the formattable item, suitable
924         * for display to the user inside a prompt
925         */
926        public String toString();
927      }
928      
929      /**
930       * Set common storage fields into the given properties object.
931       * Should be overloaded if additional fields need to be set.
932       * 
933       * @param props the Properties object to write into
934       */
935      protected void setPropertiesFromFields(Properties props, 
936                                             StorageDirectory sd)
937          throws IOException {
938        props.setProperty("layoutVersion", String.valueOf(layoutVersion));
939        props.setProperty("storageType", storageType.toString());
940        props.setProperty("namespaceID", String.valueOf(namespaceID));
941        // Set clusterID in version with federation support
942        if (versionSupportsFederation(getServiceLayoutFeatureMap())) {
943          props.setProperty("clusterID", clusterID);
944        }
945        props.setProperty("cTime", String.valueOf(cTime));
946      }
947    
948      /**
949       * Write properties to the VERSION file in the given storage directory.
950       */
951      public void writeProperties(StorageDirectory sd) throws IOException {
952        writeProperties(sd.getVersionFile(), sd);
953      }
954      
955      public void writeProperties(File to, StorageDirectory sd) throws IOException {
956        Properties props = new Properties();
957        setPropertiesFromFields(props, sd);
958        writeProperties(to, sd, props);
959      }
960    
961      public static void writeProperties(File to, StorageDirectory sd,
962          Properties props) throws IOException {
963        RandomAccessFile file = new RandomAccessFile(to, "rws");
964        FileOutputStream out = null;
965        try {
966          file.seek(0);
967          out = new FileOutputStream(file.getFD());
968          /*
969           * If server is interrupted before this line, 
970           * the version file will remain unchanged.
971           */
972          props.store(out, null);
973          /*
974           * Now the new fields are flushed to the head of the file, but file 
975           * length can still be larger then required and therefore the file can 
976           * contain whole or corrupted fields from its old contents in the end.
977           * If server is interrupted here and restarted later these extra fields
978           * either should not effect server behavior or should be handled
979           * by the server correctly.
980           */
981          file.setLength(out.getChannel().position());
982        } finally {
983          if (out != null) {
984            out.close();
985          }
986          file.close();
987        }
988      }
989    
990      public static void rename(File from, File to) throws IOException {
991        try {
992          NativeIO.renameTo(from, to);
993        } catch (NativeIOException e) {
994          throw new IOException("Failed to rename " + from.getCanonicalPath()
995            + " to " + to.getCanonicalPath() + " due to failure in native rename. "
996            + e.toString());
997        }
998      }
999    
1000      /**
1001       * Copies a file (usually large) to a new location using native unbuffered IO.
1002       * <p>
1003       * This method copies the contents of the specified source file
1004       * to the specified destination file using OS specific unbuffered IO.
1005       * The goal is to avoid churning the file system buffer cache when copying
1006       * large files.
1007       *
1008       * We can't use FileUtils#copyFile from apache-commons-io because it
1009       * is a buffered IO based on FileChannel#transferFrom, which uses MmapByteBuffer
1010       * internally.
1011       *
1012       * The directory holding the destination file is created if it does not exist.
1013       * If the destination file exists, then this method will delete it first.
1014       * <p>
1015       * <strong>Note:</strong> Setting <code>preserveFileDate</code> to
1016       * {@code true} tries to preserve the file's last modified
1017       * date/times using {@link File#setLastModified(long)}, however it is
1018       * not guaranteed that the operation will succeed.
1019       * If the modification operation fails, no indication is provided.
1020       *
1021       * @param srcFile  an existing file to copy, must not be {@code null}
1022       * @param destFile  the new file, must not be {@code null}
1023       * @param preserveFileDate  true if the file date of the copy
1024       *  should be the same as the original
1025       *
1026       * @throws NullPointerException if source or destination is {@code null}
1027       * @throws IOException if source or destination is invalid
1028       * @throws IOException if an IO error occurs during copying
1029       */
1030      public static void nativeCopyFileUnbuffered(File srcFile, File destFile,
1031          boolean preserveFileDate) throws IOException {
1032        if (srcFile == null) {
1033          throw new NullPointerException("Source must not be null");
1034        }
1035        if (destFile == null) {
1036          throw new NullPointerException("Destination must not be null");
1037        }
1038        if (srcFile.exists() == false) {
1039          throw new FileNotFoundException("Source '" + srcFile + "' does not exist");
1040        }
1041        if (srcFile.isDirectory()) {
1042          throw new IOException("Source '" + srcFile + "' exists but is a directory");
1043        }
1044        if (srcFile.getCanonicalPath().equals(destFile.getCanonicalPath())) {
1045          throw new IOException("Source '" + srcFile + "' and destination '" +
1046              destFile + "' are the same");
1047        }
1048        File parentFile = destFile.getParentFile();
1049        if (parentFile != null) {
1050          if (!parentFile.mkdirs() && !parentFile.isDirectory()) {
1051            throw new IOException("Destination '" + parentFile
1052                + "' directory cannot be created");
1053          }
1054        }
1055        if (destFile.exists()) {
1056          if (FileUtil.canWrite(destFile) == false) {
1057            throw new IOException("Destination '" + destFile
1058                + "' exists but is read-only");
1059          } else {
1060            if (destFile.delete() == false) {
1061              throw new IOException("Destination '" + destFile
1062                  + "' exists but cannot be deleted");
1063            }
1064          }
1065        }
1066        try {
1067          NativeIO.copyFileUnbuffered(srcFile, destFile);
1068        } catch (NativeIOException e) {
1069          throw new IOException("Failed to copy " + srcFile.getCanonicalPath()
1070              + " to " + destFile.getCanonicalPath()
1071              + " due to failure in NativeIO#copyFileUnbuffered(). "
1072              + e.toString());
1073        }
1074        if (srcFile.length() != destFile.length()) {
1075          throw new IOException("Failed to copy full contents from '" + srcFile
1076              + "' to '" + destFile + "'");
1077        }
1078        if (preserveFileDate) {
1079          if (destFile.setLastModified(srcFile.lastModified()) == false) {
1080            if (LOG.isDebugEnabled()) {
1081              LOG.debug("Failed to preserve last modified date from'" + srcFile
1082                + "' to '" + destFile + "'");
1083            }
1084          }
1085        }
1086      }
1087    
1088      /**
1089       * Recursively delete all the content of the directory first and then 
1090       * the directory itself from the local filesystem.
1091       * @param dir The directory to delete
1092       * @throws IOException
1093       */
1094      public static void deleteDir(File dir) throws IOException {
1095        if (!FileUtil.fullyDelete(dir))
1096          throw new IOException("Failed to delete " + dir.getCanonicalPath());
1097      }
1098      
1099      /**
1100       * Write all data storage files.
1101       * @throws IOException
1102       */
1103      public void writeAll() throws IOException {
1104        this.layoutVersion = getServiceLayoutVersion();
1105        for (Iterator<StorageDirectory> it = storageDirs.iterator(); it.hasNext();) {
1106          writeProperties(it.next());
1107        }
1108      }
1109    
1110      /**
1111       * Unlock all storage directories.
1112       * @throws IOException
1113       */
1114      public void unlockAll() throws IOException {
1115        for (Iterator<StorageDirectory> it = storageDirs.iterator(); it.hasNext();) {
1116          it.next().unlock();
1117        }
1118      }
1119    
1120      public static String getBuildVersion() {
1121        return VersionInfo.getRevision();
1122      }
1123    
1124      public static String getRegistrationID(StorageInfo storage) {
1125        return "NS-" + Integer.toString(storage.getNamespaceID())
1126          + "-" + storage.getClusterID()
1127          + "-" + Long.toString(storage.getCTime());
1128      }
1129      
1130      public static boolean is203LayoutVersion(int layoutVersion) {
1131        for (int lv203 : LAYOUT_VERSIONS_203) {
1132          if (lv203 == layoutVersion) {
1133            return true;
1134          }
1135        }
1136        return false;
1137      }
1138    }