001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.hdfs.server.common;
019    
020    import java.io.File;
021    import java.io.FileInputStream;
022    import java.io.FileOutputStream;
023    import java.io.IOException;
024    import java.io.RandomAccessFile;
025    import java.lang.management.ManagementFactory;
026    import java.nio.channels.FileLock;
027    import java.nio.channels.OverlappingFileLockException;
028    import java.util.ArrayList;
029    import java.util.List;
030    import java.util.Iterator;
031    import java.util.Properties;
032    
033    import org.apache.commons.logging.Log;
034    import org.apache.commons.logging.LogFactory;
035    import org.apache.hadoop.classification.InterfaceAudience;
036    import org.apache.hadoop.fs.Path;
037    import org.apache.hadoop.hdfs.protocol.HdfsConstants;
038    import org.apache.hadoop.hdfs.protocol.LayoutVersion;
039    import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
040    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType;
041    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
042    import org.apache.hadoop.fs.FileUtil;
043    import org.apache.hadoop.util.ToolRunner;
044    import org.apache.hadoop.util.VersionInfo;
045    
046    import com.google.common.base.Preconditions;
047    
048    import com.google.common.base.Charsets;
049    
050    
051    
052    /**
053     * Storage information file.
054     * <p>
055     * Local storage information is stored in a separate file VERSION.
056     * It contains type of the node, 
057     * the storage layout version, the namespace id, and 
058     * the fs state creation time.
059     * <p>
060     * Local storage can reside in multiple directories. 
061     * Each directory should contain the same VERSION file as the others.
062     * During startup Hadoop servers (name-node and data-nodes) read their local 
063     * storage information from them.
064     * <p>
065     * The servers hold a lock for each storage directory while they run so that 
066     * other nodes were not able to startup sharing the same storage.
067     * The locks are released when the servers stop (normally or abnormally).
068     * 
069     */
070    @InterfaceAudience.Private
071    public abstract class Storage extends StorageInfo {
072      public static final Log LOG = LogFactory.getLog(Storage.class.getName());
073    
074      // last layout version that did not support upgrades
075      public static final int LAST_PRE_UPGRADE_LAYOUT_VERSION = -3;
076      
077      // this corresponds to Hadoop-0.18
078      public static final int LAST_UPGRADABLE_LAYOUT_VERSION = -16;
079      protected static final String LAST_UPGRADABLE_HADOOP_VERSION = "Hadoop-0.18";
080      
081      /** Layout versions of 0.20.203 release */
082      public static final int[] LAYOUT_VERSIONS_203 = {-19, -31};
083    
084      public    static final String STORAGE_FILE_LOCK     = "in_use.lock";
085      protected static final String STORAGE_FILE_VERSION  = "VERSION";
086      public    static final String STORAGE_DIR_CURRENT   = "current";
087      public    static final String STORAGE_DIR_PREVIOUS  = "previous";
088      public    static final String STORAGE_TMP_REMOVED   = "removed.tmp";
089      public    static final String STORAGE_TMP_PREVIOUS  = "previous.tmp";
090      public    static final String STORAGE_TMP_FINALIZED = "finalized.tmp";
091      public    static final String STORAGE_TMP_LAST_CKPT = "lastcheckpoint.tmp";
092      public    static final String STORAGE_PREVIOUS_CKPT = "previous.checkpoint";
093      
094      /**
095       * The blocksBeingWritten directory which was used in some 1.x and earlier
096       * releases.
097       */
098      public static final String STORAGE_1_BBW = "blocksBeingWritten";
099      
100      public enum StorageState {
101        NON_EXISTENT,
102        NOT_FORMATTED,
103        COMPLETE_UPGRADE,
104        RECOVER_UPGRADE,
105        COMPLETE_FINALIZE,
106        COMPLETE_ROLLBACK,
107        RECOVER_ROLLBACK,
108        COMPLETE_CHECKPOINT,
109        RECOVER_CHECKPOINT,
110        NORMAL;
111      }
112      
113      /**
114       * An interface to denote storage directory type
115       * Implementations can define a type for storage directory by implementing
116       * this interface.
117       */
118      @InterfaceAudience.Private
119      public interface StorageDirType {
120        public StorageDirType getStorageDirType();
121        public boolean isOfType(StorageDirType type);
122      }
123      
124      protected NodeType storageType;    // Type of the node using this storage 
125      protected List<StorageDirectory> storageDirs = new ArrayList<StorageDirectory>();
126      
127      private class DirIterator implements Iterator<StorageDirectory> {
128        StorageDirType dirType;
129        int prevIndex; // for remove()
130        int nextIndex; // for next()
131        
132        DirIterator(StorageDirType dirType) {
133          this.dirType = dirType;
134          this.nextIndex = 0;
135          this.prevIndex = 0;
136        }
137        
138        @Override
139        public boolean hasNext() {
140          if (storageDirs.isEmpty() || nextIndex >= storageDirs.size())
141            return false;
142          if (dirType != null) {
143            while (nextIndex < storageDirs.size()) {
144              if (getStorageDir(nextIndex).getStorageDirType().isOfType(dirType))
145                break;
146              nextIndex++;
147            }
148            if (nextIndex >= storageDirs.size())
149             return false;
150          }
151          return true;
152        }
153        
154        @Override
155        public StorageDirectory next() {
156          StorageDirectory sd = getStorageDir(nextIndex);
157          prevIndex = nextIndex;
158          nextIndex++;
159          if (dirType != null) {
160            while (nextIndex < storageDirs.size()) {
161              if (getStorageDir(nextIndex).getStorageDirType().isOfType(dirType))
162                break;
163              nextIndex++;
164            }
165          }
166          return sd;
167        }
168        
169        @Override
170        public void remove() {
171          nextIndex = prevIndex; // restore previous state
172          storageDirs.remove(prevIndex); // remove last returned element
173          hasNext(); // reset nextIndex to correct place
174        }
175      }
176      
177      /**
178       * @return A list of the given File in every available storage directory,
179       * regardless of whether it might exist.
180       */
181      public List<File> getFiles(StorageDirType dirType, String fileName) {
182        ArrayList<File> list = new ArrayList<File>();
183        Iterator<StorageDirectory> it =
184          (dirType == null) ? dirIterator() : dirIterator(dirType);
185        for ( ;it.hasNext(); ) {
186          list.add(new File(it.next().getCurrentDir(), fileName));
187        }
188        return list;
189      }
190    
191    
192      /**
193       * Return default iterator
194       * This iterator returns all entries in storageDirs
195       */
196      public Iterator<StorageDirectory> dirIterator() {
197        return dirIterator(null);
198      }
199      
200      /**
201       * Return iterator based on Storage Directory Type
202       * This iterator selects entries in storageDirs of type dirType and returns
203       * them via the Iterator
204       */
205      public Iterator<StorageDirectory> dirIterator(StorageDirType dirType) {
206        return new DirIterator(dirType);
207      }
208      
209      public Iterable<StorageDirectory> dirIterable(final StorageDirType dirType) {
210        return new Iterable<StorageDirectory>() {
211          @Override
212          public Iterator<StorageDirectory> iterator() {
213            return dirIterator(dirType);
214          }
215        };
216      }
217      
218      
219      /**
220       * generate storage list (debug line)
221       */
222      public String listStorageDirectories() {
223        StringBuilder buf = new StringBuilder();
224        for (StorageDirectory sd : storageDirs) {
225          buf.append(sd.getRoot() + "(" + sd.getStorageDirType() + ");");
226        }
227        return buf.toString();
228      }
229      
230      /**
231       * One of the storage directories.
232       */
233      @InterfaceAudience.Private
234      public static class StorageDirectory implements FormatConfirmable {
235        final File root;              // root directory
236        final boolean useLock;        // flag to enable storage lock
237        final StorageDirType dirType; // storage dir type
238        FileLock lock;                // storage lock
239        
240        public StorageDirectory(File dir) {
241          // default dirType is null
242          this(dir, null, true);
243        }
244        
245        public StorageDirectory(File dir, StorageDirType dirType) {
246          this(dir, dirType, true);
247        }
248        
249        /**
250         * Constructor
251         * @param dir directory corresponding to the storage
252         * @param dirType storage directory type
253         * @param useLock true - enables locking on the storage directory and false
254         *          disables locking
255         */
256        public StorageDirectory(File dir, StorageDirType dirType, boolean useLock) {
257          this.root = dir;
258          this.lock = null;
259          this.dirType = dirType;
260          this.useLock = useLock;
261        }
262        
263        /**
264         * Get root directory of this storage
265         */
266        public File getRoot() {
267          return root;
268        }
269    
270        /**
271         * Get storage directory type
272         */
273        public StorageDirType getStorageDirType() {
274          return dirType;
275        }    
276    
277        public void read(File from, Storage storage) throws IOException {
278          Properties props = readPropertiesFile(from);
279          storage.setFieldsFromProperties(props, this);
280        }
281    
282        /**
283         * Clear and re-create storage directory.
284         * <p>
285         * Removes contents of the current directory and creates an empty directory.
286         * 
287         * This does not fully format storage directory. 
288         * It cannot write the version file since it should be written last after  
289         * all other storage type dependent files are written.
290         * Derived storage is responsible for setting specific storage values and
291         * writing the version file to disk.
292         * 
293         * @throws IOException
294         */
295        public void clearDirectory() throws IOException {
296          File curDir = this.getCurrentDir();
297          if (curDir.exists())
298            if (!(FileUtil.fullyDelete(curDir)))
299              throw new IOException("Cannot remove current directory: " + curDir);
300          if (!curDir.mkdirs())
301            throw new IOException("Cannot create directory " + curDir);
302        }
303    
304        /**
305         * Directory {@code current} contains latest files defining
306         * the file system meta-data.
307         * 
308         * @return the directory path
309         */
310        public File getCurrentDir() {
311          return new File(root, STORAGE_DIR_CURRENT);
312        }
313    
314        /**
315         * File {@code VERSION} contains the following fields:
316         * <ol>
317         * <li>node type</li>
318         * <li>layout version</li>
319         * <li>namespaceID</li>
320         * <li>fs state creation time</li>
321         * <li>other fields specific for this node type</li>
322         * </ol>
323         * The version file is always written last during storage directory updates.
324         * The existence of the version file indicates that all other files have
325         * been successfully written in the storage directory, the storage is valid
326         * and does not need to be recovered.
327         * 
328         * @return the version file path
329         */
330        public File getVersionFile() {
331          return new File(new File(root, STORAGE_DIR_CURRENT), STORAGE_FILE_VERSION);
332        }
333    
334        /**
335         * File {@code VERSION} from the {@code previous} directory.
336         * 
337         * @return the previous version file path
338         */
339        public File getPreviousVersionFile() {
340          return new File(new File(root, STORAGE_DIR_PREVIOUS), STORAGE_FILE_VERSION);
341        }
342    
343        /**
344         * Directory {@code previous} contains the previous file system state,
345         * which the system can be rolled back to.
346         * 
347         * @return the directory path
348         */
349        public File getPreviousDir() {
350          return new File(root, STORAGE_DIR_PREVIOUS);
351        }
352    
353        /**
354         * {@code previous.tmp} is a transient directory, which holds
355         * current file system state while the new state is saved into the new
356         * {@code current} during upgrade.
357         * If the saving succeeds {@code previous.tmp} will be moved to
358         * {@code previous}, otherwise it will be renamed back to 
359         * {@code current} by the recovery procedure during startup.
360         * 
361         * @return the directory path
362         */
363        public File getPreviousTmp() {
364          return new File(root, STORAGE_TMP_PREVIOUS);
365        }
366    
367        /**
368         * {@code removed.tmp} is a transient directory, which holds
369         * current file system state while the previous state is moved into
370         * {@code current} during rollback.
371         * If the moving succeeds {@code removed.tmp} will be removed,
372         * otherwise it will be renamed back to 
373         * {@code current} by the recovery procedure during startup.
374         * 
375         * @return the directory path
376         */
377        public File getRemovedTmp() {
378          return new File(root, STORAGE_TMP_REMOVED);
379        }
380    
381        /**
382         * {@code finalized.tmp} is a transient directory, which holds
383         * the {@code previous} file system state while it is being removed
384         * in response to the finalize request.
385         * Finalize operation will remove {@code finalized.tmp} when completed,
386         * otherwise the removal will resume upon the system startup.
387         * 
388         * @return the directory path
389         */
390        public File getFinalizedTmp() {
391          return new File(root, STORAGE_TMP_FINALIZED);
392        }
393    
394        /**
395         * {@code lastcheckpoint.tmp} is a transient directory, which holds
396         * current file system state while the new state is saved into the new
397         * {@code current} during regular namespace updates.
398         * If the saving succeeds {@code lastcheckpoint.tmp} will be moved to
399         * {@code previous.checkpoint}, otherwise it will be renamed back to 
400         * {@code current} by the recovery procedure during startup.
401         * 
402         * @return the directory path
403         */
404        public File getLastCheckpointTmp() {
405          return new File(root, STORAGE_TMP_LAST_CKPT);
406        }
407    
408        /**
409         * {@code previous.checkpoint} is a directory, which holds the previous
410         * (before the last save) state of the storage directory.
411         * The directory is created as a reference only, it does not play role
412         * in state recovery procedures, and is recycled automatically, 
413         * but it may be useful for manual recovery of a stale state of the system.
414         * 
415         * @return the directory path
416         */
417        public File getPreviousCheckpoint() {
418          return new File(root, STORAGE_PREVIOUS_CKPT);
419        }
420    
421        /**
422         * Check consistency of the storage directory
423         * 
424         * @param startOpt a startup option.
425         *  
426         * @return state {@link StorageState} of the storage directory 
427         * @throws InconsistentFSStateException if directory state is not 
428         * consistent and cannot be recovered.
429         * @throws IOException
430         */
431        public StorageState analyzeStorage(StartupOption startOpt, Storage storage)
432            throws IOException {
433          assert root != null : "root is null";
434          String rootPath = root.getCanonicalPath();
435          try { // check that storage exists
436            if (!root.exists()) {
437              // storage directory does not exist
438              if (startOpt != StartupOption.FORMAT) {
439                LOG.warn("Storage directory " + rootPath + " does not exist");
440                return StorageState.NON_EXISTENT;
441              }
442              LOG.info(rootPath + " does not exist. Creating ...");
443              if (!root.mkdirs())
444                throw new IOException("Cannot create directory " + rootPath);
445            }
446            // or is inaccessible
447            if (!root.isDirectory()) {
448              LOG.warn(rootPath + "is not a directory");
449              return StorageState.NON_EXISTENT;
450            }
451            if (!FileUtil.canWrite(root)) {
452              LOG.warn("Cannot access storage directory " + rootPath);
453              return StorageState.NON_EXISTENT;
454            }
455          } catch(SecurityException ex) {
456            LOG.warn("Cannot access storage directory " + rootPath, ex);
457            return StorageState.NON_EXISTENT;
458          }
459    
460          this.lock(); // lock storage if it exists
461    
462          if (startOpt == HdfsServerConstants.StartupOption.FORMAT)
463            return StorageState.NOT_FORMATTED;
464    
465          if (startOpt != HdfsServerConstants.StartupOption.IMPORT) {
466            storage.checkOldLayoutStorage(this);
467          }
468    
469          // check whether current directory is valid
470          File versionFile = getVersionFile();
471          boolean hasCurrent = versionFile.exists();
472    
473          // check which directories exist
474          boolean hasPrevious = getPreviousDir().exists();
475          boolean hasPreviousTmp = getPreviousTmp().exists();
476          boolean hasRemovedTmp = getRemovedTmp().exists();
477          boolean hasFinalizedTmp = getFinalizedTmp().exists();
478          boolean hasCheckpointTmp = getLastCheckpointTmp().exists();
479    
480          if (!(hasPreviousTmp || hasRemovedTmp
481              || hasFinalizedTmp || hasCheckpointTmp)) {
482            // no temp dirs - no recovery
483            if (hasCurrent)
484              return StorageState.NORMAL;
485            if (hasPrevious)
486              throw new InconsistentFSStateException(root,
487                                  "version file in current directory is missing.");
488            return StorageState.NOT_FORMATTED;
489          }
490    
491          if ((hasPreviousTmp?1:0) + (hasRemovedTmp?1:0)
492              + (hasFinalizedTmp?1:0) + (hasCheckpointTmp?1:0) > 1)
493            // more than one temp dirs
494            throw new InconsistentFSStateException(root,
495                                                   "too many temporary directories.");
496    
497          // # of temp dirs == 1 should either recover or complete a transition
498          if (hasCheckpointTmp) {
499            return hasCurrent ? StorageState.COMPLETE_CHECKPOINT
500                              : StorageState.RECOVER_CHECKPOINT;
501          }
502    
503          if (hasFinalizedTmp) {
504            if (hasPrevious)
505              throw new InconsistentFSStateException(root,
506                                                     STORAGE_DIR_PREVIOUS + " and " + STORAGE_TMP_FINALIZED
507                                                     + "cannot exist together.");
508            return StorageState.COMPLETE_FINALIZE;
509          }
510    
511          if (hasPreviousTmp) {
512            if (hasPrevious)
513              throw new InconsistentFSStateException(root,
514                                                     STORAGE_DIR_PREVIOUS + " and " + STORAGE_TMP_PREVIOUS
515                                                     + " cannot exist together.");
516            if (hasCurrent)
517              return StorageState.COMPLETE_UPGRADE;
518            return StorageState.RECOVER_UPGRADE;
519          }
520          
521          assert hasRemovedTmp : "hasRemovedTmp must be true";
522          if (!(hasCurrent ^ hasPrevious))
523            throw new InconsistentFSStateException(root,
524                                                   "one and only one directory " + STORAGE_DIR_CURRENT 
525                                                   + " or " + STORAGE_DIR_PREVIOUS 
526                                                   + " must be present when " + STORAGE_TMP_REMOVED
527                                                   + " exists.");
528          if (hasCurrent)
529            return StorageState.COMPLETE_ROLLBACK;
530          return StorageState.RECOVER_ROLLBACK;
531        }
532    
533        /**
534         * Complete or recover storage state from previously failed transition.
535         * 
536         * @param curState specifies what/how the state should be recovered
537         * @throws IOException
538         */
539        public void doRecover(StorageState curState) throws IOException {
540          File curDir = getCurrentDir();
541          String rootPath = root.getCanonicalPath();
542          switch(curState) {
543          case COMPLETE_UPGRADE:  // mv previous.tmp -> previous
544            LOG.info("Completing previous upgrade for storage directory " 
545                     + rootPath);
546            rename(getPreviousTmp(), getPreviousDir());
547            return;
548          case RECOVER_UPGRADE:   // mv previous.tmp -> current
549            LOG.info("Recovering storage directory " + rootPath
550                     + " from previous upgrade");
551            if (curDir.exists())
552              deleteDir(curDir);
553            rename(getPreviousTmp(), curDir);
554            return;
555          case COMPLETE_ROLLBACK: // rm removed.tmp
556            LOG.info("Completing previous rollback for storage directory "
557                     + rootPath);
558            deleteDir(getRemovedTmp());
559            return;
560          case RECOVER_ROLLBACK:  // mv removed.tmp -> current
561            LOG.info("Recovering storage directory " + rootPath
562                     + " from previous rollback");
563            rename(getRemovedTmp(), curDir);
564            return;
565          case COMPLETE_FINALIZE: // rm finalized.tmp
566            LOG.info("Completing previous finalize for storage directory "
567                     + rootPath);
568            deleteDir(getFinalizedTmp());
569            return;
570          case COMPLETE_CHECKPOINT: // mv lastcheckpoint.tmp -> previous.checkpoint
571            LOG.info("Completing previous checkpoint for storage directory " 
572                     + rootPath);
573            File prevCkptDir = getPreviousCheckpoint();
574            if (prevCkptDir.exists())
575              deleteDir(prevCkptDir);
576            rename(getLastCheckpointTmp(), prevCkptDir);
577            return;
578          case RECOVER_CHECKPOINT:  // mv lastcheckpoint.tmp -> current
579            LOG.info("Recovering storage directory " + rootPath
580                     + " from failed checkpoint");
581            if (curDir.exists())
582              deleteDir(curDir);
583            rename(getLastCheckpointTmp(), curDir);
584            return;
585          default:
586            throw new IOException("Unexpected FS state: " + curState);
587          }
588        }
589        
590        /**
591         * @return true if the storage directory should prompt the user prior
592         * to formatting (i.e if the directory appears to contain some data)
593         * @throws IOException if the SD cannot be accessed due to an IO error
594         */
595        @Override
596        public boolean hasSomeData() throws IOException {
597          // Its alright for a dir not to exist, or to exist (properly accessible)
598          // and be completely empty.
599          if (!root.exists()) return false;
600          
601          if (!root.isDirectory()) {
602            // a file where you expect a directory should not cause silent
603            // formatting
604            return true;
605          }
606          
607          if (FileUtil.listFiles(root).length == 0) {
608            // Empty dir can format without prompt.
609            return false;
610          }
611          
612          return true;
613        }
614    
615    
616        /**
617         * Lock storage to provide exclusive access.
618         * 
619         * <p> Locking is not supported by all file systems.
620         * E.g., NFS does not consistently support exclusive locks.
621         * 
622         * <p> If locking is supported we guarantee exclusive access to the
623         * storage directory. Otherwise, no guarantee is given.
624         * 
625         * @throws IOException if locking fails
626         */
627        public void lock() throws IOException {
628          if (!useLock) {
629            LOG.info("Locking is disabled");
630            return;
631          }
632          FileLock newLock = tryLock();
633          if (newLock == null) {
634            String msg = "Cannot lock storage " + this.root 
635              + ". The directory is already locked";
636            LOG.info(msg);
637            throw new IOException(msg);
638          }
639          // Don't overwrite lock until success - this way if we accidentally
640          // call lock twice, the internal state won't be cleared by the second
641          // (failed) lock attempt
642          lock = newLock;
643        }
644    
645        /**
646         * Attempts to acquire an exclusive lock on the storage.
647         * 
648         * @return A lock object representing the newly-acquired lock or
649         * <code>null</code> if storage is already locked.
650         * @throws IOException if locking fails.
651         */
652        FileLock tryLock() throws IOException {
653          boolean deletionHookAdded = false;
654          File lockF = new File(root, STORAGE_FILE_LOCK);
655          if (!lockF.exists()) {
656            lockF.deleteOnExit();
657            deletionHookAdded = true;
658          }
659          RandomAccessFile file = new RandomAccessFile(lockF, "rws");
660          String jvmName = ManagementFactory.getRuntimeMXBean().getName();
661          FileLock res = null;
662          try {
663            res = file.getChannel().tryLock();
664            file.write(jvmName.getBytes(Charsets.UTF_8));
665            LOG.info("Lock on " + lockF + " acquired by nodename " + jvmName);
666          } catch(OverlappingFileLockException oe) {
667            // Cannot read from the locked file on Windows.
668            String lockingJvmName = Path.WINDOWS ? "" : (" " + file.readLine());
669            LOG.error("It appears that another namenode" + lockingJvmName
670                + " has already locked the storage directory");
671            file.close();
672            return null;
673          } catch(IOException e) {
674            LOG.error("Failed to acquire lock on " + lockF + ". If this storage directory is mounted via NFS, " 
675                + "ensure that the appropriate nfs lock services are running.", e);
676            file.close();
677            throw e;
678          }
679          if (res != null && !deletionHookAdded) {
680            // If the file existed prior to our startup, we didn't
681            // call deleteOnExit above. But since we successfully locked
682            // the dir, we can take care of cleaning it up.
683            lockF.deleteOnExit();
684          }
685          return res;
686        }
687    
688        /**
689         * Unlock storage.
690         * 
691         * @throws IOException
692         */
693        public void unlock() throws IOException {
694          if (this.lock == null)
695            return;
696          this.lock.release();
697          lock.channel().close();
698          lock = null;
699        }
700        
701        @Override
702        public String toString() {
703          return "Storage Directory " + this.root;
704        }
705    
706        /**
707         * Check whether underlying file system supports file locking.
708         * 
709         * @return <code>true</code> if exclusive locks are supported or
710         *         <code>false</code> otherwise.
711         * @throws IOException
712         * @see StorageDirectory#lock()
713         */
714        public boolean isLockSupported() throws IOException {
715          FileLock firstLock = null;
716          FileLock secondLock = null;
717          try {
718            firstLock = lock;
719            if(firstLock == null) {
720              firstLock = tryLock();
721              if(firstLock == null)
722                return true;
723            }
724            secondLock = tryLock();
725            if(secondLock == null)
726              return true;
727          } finally {
728            if(firstLock != null && firstLock != lock) {
729              firstLock.release();
730              firstLock.channel().close();
731            }
732            if(secondLock != null) {
733              secondLock.release();
734              secondLock.channel().close();
735            }
736          }
737          return false;
738        }
739      }
740    
741      /**
742       * Create empty storage info of the specified type
743       */
744      protected Storage(NodeType type) {
745        super();
746        this.storageType = type;
747      }
748      
749      protected Storage(NodeType type, StorageInfo storageInfo) {
750        super(storageInfo);
751        this.storageType = type;
752      }
753      
754      public int getNumStorageDirs() {
755        return storageDirs.size();
756      }
757      
758      public StorageDirectory getStorageDir(int idx) {
759        return storageDirs.get(idx);
760      }
761      
762      /**
763       * @return the storage directory, with the precondition that this storage
764       * has exactly one storage directory
765       */
766      public StorageDirectory getSingularStorageDir() {
767        Preconditions.checkState(storageDirs.size() == 1);
768        return storageDirs.get(0);
769      }
770      
771      protected void addStorageDir(StorageDirectory sd) {
772        storageDirs.add(sd);
773      }
774    
775      /**
776       * Return true if the layout of the given storage directory is from a version
777       * of Hadoop prior to the introduction of the "current" and "previous"
778       * directories which allow upgrade and rollback.
779       */
780      public abstract boolean isPreUpgradableLayout(StorageDirectory sd)
781      throws IOException;
782    
783      /**
784       * Check if the given storage directory comes from a version of Hadoop
785       * prior to when the directory layout changed (ie 0.13). If this is
786       * the case, this method throws an IOException.
787       */
788      private void checkOldLayoutStorage(StorageDirectory sd) throws IOException {
789        if (isPreUpgradableLayout(sd)) {
790          checkVersionUpgradable(0);
791        }
792      }
793    
794      /**
795       * Checks if the upgrade from the given old version is supported. If
796       * no upgrade is supported, it throws IncorrectVersionException.
797       * 
798       * @param oldVersion
799       */
800      public static void checkVersionUpgradable(int oldVersion) 
801                                         throws IOException {
802        if (oldVersion > LAST_UPGRADABLE_LAYOUT_VERSION) {
803          String msg = "*********** Upgrade is not supported from this " +
804                       " older version " + oldVersion + 
805                       " of storage to the current version." + 
806                       " Please upgrade to " + LAST_UPGRADABLE_HADOOP_VERSION +
807                       " or a later version and then upgrade to current" +
808                       " version. Old layout version is " + 
809                       (oldVersion == 0 ? "'too old'" : (""+oldVersion)) +
810                       " and latest layout version this software version can" +
811                       " upgrade from is " + LAST_UPGRADABLE_LAYOUT_VERSION +
812                       ". ************";
813          LOG.error(msg);
814          throw new IOException(msg); 
815        }
816        
817      }
818      
819      /**
820       * Iterate over each of the {@link FormatConfirmable} objects,
821       * potentially checking with the user whether it should be formatted.
822       * 
823       * If running in interactive mode, will prompt the user for each
824       * directory to allow them to format anyway. Otherwise, returns
825       * false, unless 'force' is specified.
826       * 
827       * @param force format regardless of whether dirs exist
828       * @param interactive prompt the user when a dir exists
829       * @return true if formatting should proceed
830       * @throws IOException if some storage cannot be accessed
831       */
832      public static boolean confirmFormat(
833          Iterable<? extends FormatConfirmable> items,
834          boolean force, boolean interactive) throws IOException {
835        for (FormatConfirmable item : items) {
836          if (!item.hasSomeData())
837            continue;
838          if (force) { // Don't confirm, always format.
839            System.err.println(
840                "Data exists in " + item + ". Formatting anyway.");
841            continue;
842          }
843          if (!interactive) { // Don't ask - always don't format
844            System.err.println(
845                "Running in non-interactive mode, and data appears to exist in " +
846                item + ". Not formatting.");
847            return false;
848          }
849          if (!ToolRunner.confirmPrompt("Re-format filesystem in " + item + " ?")) {
850            System.err.println("Format aborted in " + item);
851            return false;
852          }
853        }
854        
855        return true;
856      }
857      
858      /**
859       * Interface for classes which need to have the user confirm their
860       * formatting during NameNode -format and other similar operations.
861       * 
862       * This is currently a storage directory or journal manager.
863       */
864      @InterfaceAudience.Private
865      public interface FormatConfirmable {
866        /**
867         * @return true if the storage seems to have some valid data in it,
868         * and the user should be required to confirm the format. Otherwise,
869         * false.
870         * @throws IOException if the storage cannot be accessed at all.
871         */
872        public boolean hasSomeData() throws IOException;
873        
874        /**
875         * @return a string representation of the formattable item, suitable
876         * for display to the user inside a prompt
877         */
878        public String toString();
879      }
880      
881      /**
882       * Get common storage fields.
883       * Should be overloaded if additional fields need to be get.
884       * 
885       * @param props
886       * @throws IOException
887       */
888      protected void setFieldsFromProperties(
889          Properties props, StorageDirectory sd) throws IOException {
890        setLayoutVersion(props, sd);
891        setNamespaceID(props, sd);
892        setStorageType(props, sd);
893        setcTime(props, sd);
894        setClusterId(props, layoutVersion, sd);
895      }
896      
897      /**
898       * Set common storage fields into the given properties object.
899       * Should be overloaded if additional fields need to be set.
900       * 
901       * @param props the Properties object to write into
902       */
903      protected void setPropertiesFromFields(Properties props, 
904                                             StorageDirectory sd)
905          throws IOException {
906        props.setProperty("layoutVersion", String.valueOf(layoutVersion));
907        props.setProperty("storageType", storageType.toString());
908        props.setProperty("namespaceID", String.valueOf(namespaceID));
909        // Set clusterID in version with federation support
910        if (versionSupportsFederation()) {
911          props.setProperty("clusterID", clusterID);
912        }
913        props.setProperty("cTime", String.valueOf(cTime));
914      }
915    
916      /**
917       * Read properties from the VERSION file in the given storage directory.
918       */
919      public void readProperties(StorageDirectory sd) throws IOException {
920        Properties props = readPropertiesFile(sd.getVersionFile());
921        setFieldsFromProperties(props, sd);
922      }
923    
924      /**
925       * Read properties from the the previous/VERSION file in the given storage directory.
926       */
927      public void readPreviousVersionProperties(StorageDirectory sd)
928          throws IOException {
929        Properties props = readPropertiesFile(sd.getPreviousVersionFile());
930        setFieldsFromProperties(props, sd);
931      }
932    
933      /**
934       * Write properties to the VERSION file in the given storage directory.
935       */
936      public void writeProperties(StorageDirectory sd) throws IOException {
937        writeProperties(sd.getVersionFile(), sd);
938      }
939    
940      public void writeProperties(File to, StorageDirectory sd) throws IOException {
941        Properties props = new Properties();
942        setPropertiesFromFields(props, sd);
943        RandomAccessFile file = new RandomAccessFile(to, "rws");
944        FileOutputStream out = null;
945        try {
946          file.seek(0);
947          out = new FileOutputStream(file.getFD());
948          /*
949           * If server is interrupted before this line, 
950           * the version file will remain unchanged.
951           */
952          props.store(out, null);
953          /*
954           * Now the new fields are flushed to the head of the file, but file 
955           * length can still be larger then required and therefore the file can 
956           * contain whole or corrupted fields from its old contents in the end.
957           * If server is interrupted here and restarted later these extra fields
958           * either should not effect server behavior or should be handled
959           * by the server correctly.
960           */
961          file.setLength(out.getChannel().position());
962        } finally {
963          if (out != null) {
964            out.close();
965          }
966          file.close();
967        }
968      }
969      
970      public static Properties readPropertiesFile(File from) throws IOException {
971        RandomAccessFile file = new RandomAccessFile(from, "rws");
972        FileInputStream in = null;
973        Properties props = new Properties();
974        try {
975          in = new FileInputStream(file.getFD());
976          file.seek(0);
977          props.load(in);
978        } finally {
979          if (in != null) {
980            in.close();
981          }
982          file.close();
983        }
984        return props;
985      }
986    
987      public static void rename(File from, File to) throws IOException {
988        if (!from.renameTo(to))
989          throw new IOException("Failed to rename " 
990                                + from.getCanonicalPath() + " to " + to.getCanonicalPath());
991      }
992    
993      /**
994       * Recursively delete all the content of the directory first and then 
995       * the directory itself from the local filesystem.
996       * @param dir The directory to delete
997       * @throws IOException
998       */
999      public static void deleteDir(File dir) throws IOException {
1000        if (!FileUtil.fullyDelete(dir))
1001          throw new IOException("Failed to delete " + dir.getCanonicalPath());
1002      }
1003      
1004      /**
1005       * Write all data storage files.
1006       * @throws IOException
1007       */
1008      public void writeAll() throws IOException {
1009        this.layoutVersion = HdfsConstants.LAYOUT_VERSION;
1010        for (Iterator<StorageDirectory> it = storageDirs.iterator(); it.hasNext();) {
1011          writeProperties(it.next());
1012        }
1013      }
1014    
1015      /**
1016       * Unlock all storage directories.
1017       * @throws IOException
1018       */
1019      public void unlockAll() throws IOException {
1020        for (Iterator<StorageDirectory> it = storageDirs.iterator(); it.hasNext();) {
1021          it.next().unlock();
1022        }
1023      }
1024    
1025      public static String getBuildVersion() {
1026        return VersionInfo.getRevision();
1027      }
1028    
1029      public static String getRegistrationID(StorageInfo storage) {
1030        return "NS-" + Integer.toString(storage.getNamespaceID())
1031          + "-" + storage.getClusterID()
1032          + "-" + Integer.toString(storage.getLayoutVersion())
1033          + "-" + Long.toString(storage.getCTime());
1034      }
1035      
1036      String getProperty(Properties props, StorageDirectory sd,
1037          String name) throws InconsistentFSStateException {
1038        String property = props.getProperty(name);
1039        if (property == null) {
1040          throw new InconsistentFSStateException(sd.root, "file "
1041              + STORAGE_FILE_VERSION + " has " + name + " missing.");
1042        }
1043        return property;
1044      }
1045      
1046      /** Validate and set storage type from {@link Properties}*/
1047      protected void setStorageType(Properties props, StorageDirectory sd)
1048          throws InconsistentFSStateException {
1049        NodeType type = NodeType.valueOf(getProperty(props, sd, "storageType"));
1050        if (!storageType.equals(type)) {
1051          throw new InconsistentFSStateException(sd.root,
1052              "node type is incompatible with others.");
1053        }
1054        storageType = type;
1055      }
1056      
1057      /** Validate and set ctime from {@link Properties}*/
1058      protected void setcTime(Properties props, StorageDirectory sd)
1059          throws InconsistentFSStateException {
1060        cTime = Long.parseLong(getProperty(props, sd, "cTime"));
1061      }
1062    
1063      /** Validate and set clusterId from {@link Properties}*/
1064      protected void setClusterId(Properties props, int layoutVersion,
1065          StorageDirectory sd) throws InconsistentFSStateException {
1066        // Set cluster ID in version that supports federation
1067        if (LayoutVersion.supports(Feature.FEDERATION, layoutVersion)) {
1068          String cid = getProperty(props, sd, "clusterID");
1069          if (!(clusterID.equals("") || cid.equals("") || clusterID.equals(cid))) {
1070            throw new InconsistentFSStateException(sd.getRoot(),
1071                "cluster Id is incompatible with others.");
1072          }
1073          clusterID = cid;
1074        }
1075      }
1076      
1077      /** Validate and set layout version from {@link Properties}*/
1078      protected void setLayoutVersion(Properties props, StorageDirectory sd)
1079          throws IncorrectVersionException, InconsistentFSStateException {
1080        int lv = Integer.parseInt(getProperty(props, sd, "layoutVersion"));
1081        if (lv < HdfsConstants.LAYOUT_VERSION) { // future version
1082          throw new IncorrectVersionException(lv, "storage directory "
1083              + sd.root.getAbsolutePath());
1084        }
1085        layoutVersion = lv;
1086      }
1087      
1088      /** Validate and set namespaceID version from {@link Properties}*/
1089      protected void setNamespaceID(Properties props, StorageDirectory sd)
1090          throws InconsistentFSStateException {
1091        int nsId = Integer.parseInt(getProperty(props, sd, "namespaceID"));
1092        if (namespaceID != 0 && nsId != 0 && namespaceID != nsId) {
1093          throw new InconsistentFSStateException(sd.root,
1094              "namespaceID is incompatible with others.");
1095        }
1096        namespaceID = nsId;
1097      }
1098      
1099      public static boolean is203LayoutVersion(int layoutVersion) {
1100        for (int lv203 : LAYOUT_VERSIONS_203) {
1101          if (lv203 == layoutVersion) {
1102            return true;
1103          }
1104        }
1105        return false;
1106      }
1107    }