001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.hdfs.server.namenode;
019    
020    import java.io.Closeable;
021    import java.io.File;
022    import java.io.IOException;
023    import java.io.RandomAccessFile;
024    import java.net.URI;
025    import java.net.UnknownHostException;
026    import java.util.ArrayList;
027    import java.util.Collection;
028    import java.util.EnumSet;
029    import java.util.HashMap;
030    import java.util.Iterator;
031    import java.util.List;
032    import java.util.Properties;
033    import java.util.UUID;
034    import java.util.concurrent.CopyOnWriteArrayList;
035    
036    import org.apache.hadoop.classification.InterfaceAudience;
037    import org.apache.hadoop.conf.Configuration;
038    import org.apache.hadoop.fs.FileUtil;
039    import org.apache.hadoop.hdfs.DFSUtil;
040    import org.apache.hadoop.hdfs.protocol.HdfsConstants;
041    import org.apache.hadoop.hdfs.protocol.LayoutVersion;
042    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType;
043    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
044    import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
045    import org.apache.hadoop.hdfs.server.common.Storage;
046    import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
047    import org.apache.hadoop.hdfs.server.common.Util;
048    import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
049    import org.apache.hadoop.hdfs.util.PersistentLongFile;
050    import org.apache.hadoop.io.IOUtils;
051    import org.apache.hadoop.net.DNS;
052    import org.apache.hadoop.util.Time;
053    
054    import com.google.common.annotations.VisibleForTesting;
055    import com.google.common.base.Preconditions;
056    import com.google.common.collect.Lists;
057    
058    /**
059     * NNStorage is responsible for management of the StorageDirectories used by
060     * the NameNode.
061     */
062    @InterfaceAudience.Private
063    public class NNStorage extends Storage implements Closeable,
064        StorageErrorReporter {
065      static final String DEPRECATED_MESSAGE_DIGEST_PROPERTY = "imageMD5Digest";
066      static final String LOCAL_URI_SCHEME = "file";
067    
068      //
069      // The filenames used for storing the images
070      //
071      public enum NameNodeFile {
072        IMAGE     ("fsimage"),
073        TIME      ("fstime"), // from "old" pre-HDFS-1073 format
074        SEEN_TXID ("seen_txid"),
075        EDITS     ("edits"),
076        IMAGE_NEW ("fsimage.ckpt"),
077        IMAGE_ROLLBACK("fsimage_rollback"),
078        EDITS_NEW ("edits.new"), // from "old" pre-HDFS-1073 format
079        EDITS_INPROGRESS ("edits_inprogress"),
080        EDITS_TMP ("edits_tmp");
081    
082        private String fileName = null;
083        private NameNodeFile(String name) { this.fileName = name; }
084        @VisibleForTesting
085        public String getName() { return fileName; }
086      }
087    
088      /**
089       * Implementation of StorageDirType specific to namenode storage
090       * A Storage directory could be of type IMAGE which stores only fsimage,
091       * or of type EDITS which stores edits or of type IMAGE_AND_EDITS which
092       * stores both fsimage and edits.
093       */
094      @VisibleForTesting
095      public static enum NameNodeDirType implements StorageDirType {
096        UNDEFINED,
097        IMAGE,
098        EDITS,
099        IMAGE_AND_EDITS;
100    
101        @Override
102        public StorageDirType getStorageDirType() {
103          return this;
104        }
105    
106        @Override
107        public boolean isOfType(StorageDirType type) {
108          if ((this == IMAGE_AND_EDITS) && (type == IMAGE || type == EDITS))
109            return true;
110          return this == type;
111        }
112      }
113    
114      protected String blockpoolID = ""; // id of the block pool
115      
116      /**
117       * flag that controls if we try to restore failed storages
118       */
119      private boolean restoreFailedStorage = false;
120      private final Object restorationLock = new Object();
121      private boolean disablePreUpgradableLayoutCheck = false;
122    
123    
124      /**
125       * TxId of the last transaction that was included in the most
126       * recent fsimage file. This does not include any transactions
127       * that have since been written to the edit log.
128       */
129      protected volatile long mostRecentCheckpointTxId = HdfsConstants.INVALID_TXID;
130      
131      /**
132       * Time of the last checkpoint, in milliseconds since the epoch.
133       */
134      private long mostRecentCheckpointTime = 0;
135    
136      /**
137       * list of failed (and thus removed) storages
138       */
139      final protected List<StorageDirectory> removedStorageDirs
140        = new CopyOnWriteArrayList<StorageDirectory>();
141    
142      /**
143       * Properties from old layout versions that may be needed
144       * during upgrade only.
145       */
146      private HashMap<String, String> deprecatedProperties;
147    
148      /**
149       * Construct the NNStorage.
150       * @param conf Namenode configuration.
151       * @param imageDirs Directories the image can be stored in.
152       * @param editsDirs Directories the editlog can be stored in.
153       * @throws IOException if any directories are inaccessible.
154       */
155      public NNStorage(Configuration conf, 
156                       Collection<URI> imageDirs, Collection<URI> editsDirs) 
157          throws IOException {
158        super(NodeType.NAME_NODE);
159    
160        storageDirs = new CopyOnWriteArrayList<StorageDirectory>();
161        
162        // this may modify the editsDirs, so copy before passing in
163        setStorageDirectories(imageDirs, 
164                              Lists.newArrayList(editsDirs),
165                              FSNamesystem.getSharedEditsDirs(conf));
166      }
167    
168      @Override // Storage
169      public boolean isPreUpgradableLayout(StorageDirectory sd) throws IOException {
170        if (disablePreUpgradableLayoutCheck) {
171          return false;
172        }
173    
174        File oldImageDir = new File(sd.getRoot(), "image");
175        if (!oldImageDir.exists()) {
176          return false;
177        }
178        // check the layout version inside the image file
179        File oldF = new File(oldImageDir, "fsimage");
180        RandomAccessFile oldFile = new RandomAccessFile(oldF, "rws");
181        try {
182          oldFile.seek(0);
183          int oldVersion = oldFile.readInt();
184          oldFile.close();
185          oldFile = null;
186          if (oldVersion < LAST_PRE_UPGRADE_LAYOUT_VERSION)
187            return false;
188        } finally {
189          IOUtils.cleanup(LOG, oldFile);
190        }
191        return true;
192      }
193    
194      @Override // Closeable
195      public void close() throws IOException {
196        unlockAll();
197        storageDirs.clear();
198      }
199    
200      /**
201       * Set flag whether an attempt should be made to restore failed storage
202       * directories at the next available oppurtuinity.
203       *
204       * @param val Whether restoration attempt should be made.
205       */
206      void setRestoreFailedStorage(boolean val) {
207        LOG.warn("set restore failed storage to " + val);
208        restoreFailedStorage=val;
209      }
210    
211      /**
212       * @return Whether failed storage directories are to be restored.
213       */
214      boolean getRestoreFailedStorage() {
215        return restoreFailedStorage;
216      }
217    
218      /**
219       * See if any of removed storages is "writable" again, and can be returned
220       * into service.
221       */
222      void attemptRestoreRemovedStorage() {
223        // if directory is "alive" - copy the images there...
224        if(!restoreFailedStorage || removedStorageDirs.size() == 0)
225          return; //nothing to restore
226    
227        /* We don't want more than one thread trying to restore at a time */
228        synchronized (this.restorationLock) {
229          LOG.info("NNStorage.attemptRestoreRemovedStorage: check removed(failed) "+
230                   "storarge. removedStorages size = " + removedStorageDirs.size());
231          for(Iterator<StorageDirectory> it
232                = this.removedStorageDirs.iterator(); it.hasNext();) {
233            StorageDirectory sd = it.next();
234            File root = sd.getRoot();
235            LOG.info("currently disabled dir " + root.getAbsolutePath() +
236                     "; type="+sd.getStorageDirType() 
237                     + ";canwrite="+FileUtil.canWrite(root));
238            if(root.exists() && FileUtil.canWrite(root)) {
239              LOG.info("restoring dir " + sd.getRoot().getAbsolutePath());
240              this.addStorageDir(sd); // restore
241              this.removedStorageDirs.remove(sd);
242            }
243          }
244        }
245      }
246    
247      /**
248       * @return A list of storage directories which are in the errored state.
249       */
250      List<StorageDirectory> getRemovedStorageDirs() {
251        return this.removedStorageDirs;
252      }
253      
254      /**
255       * See {@link NNStorage#setStorageDirectories(Collection, Collection, Collection)}
256       */
257      @VisibleForTesting
258      synchronized void setStorageDirectories(Collection<URI> fsNameDirs,
259                                              Collection<URI> fsEditsDirs)
260          throws IOException {
261        setStorageDirectories(fsNameDirs, fsEditsDirs, new ArrayList<URI>());
262      }
263    
264      /**
265       * Set the storage directories which will be used. This should only ever be
266       * called from inside NNStorage. However, it needs to remain package private
267       * for testing, as StorageDirectories need to be reinitialised after using
268       * Mockito.spy() on this class, as Mockito doesn't work well with inner
269       * classes, such as StorageDirectory in this case.
270       *
271       * Synchronized due to initialization of storageDirs and removedStorageDirs.
272       *
273       * @param fsNameDirs Locations to store images.
274       * @param fsEditsDirs Locations to store edit logs.
275       * @throws IOException
276       */
277      @VisibleForTesting
278      synchronized void setStorageDirectories(Collection<URI> fsNameDirs,
279                                              Collection<URI> fsEditsDirs,
280                                              Collection<URI> sharedEditsDirs)
281          throws IOException {
282        this.storageDirs.clear();
283        this.removedStorageDirs.clear();
284    
285       // Add all name dirs with appropriate NameNodeDirType
286        for (URI dirName : fsNameDirs) {
287          checkSchemeConsistency(dirName);
288          boolean isAlsoEdits = false;
289          for (URI editsDirName : fsEditsDirs) {
290            if (editsDirName.compareTo(dirName) == 0) {
291              isAlsoEdits = true;
292              fsEditsDirs.remove(editsDirName);
293              break;
294            }
295          }
296          NameNodeDirType dirType = (isAlsoEdits) ?
297                              NameNodeDirType.IMAGE_AND_EDITS :
298                              NameNodeDirType.IMAGE;
299          // Add to the list of storage directories, only if the
300          // URI is of type file://
301          if(dirName.getScheme().compareTo("file") == 0) {
302            this.addStorageDir(new StorageDirectory(new File(dirName.getPath()),
303                dirType,
304                sharedEditsDirs.contains(dirName))); // Don't lock the dir if it's shared.
305          }
306        }
307    
308        // Add edits dirs if they are different from name dirs
309        for (URI dirName : fsEditsDirs) {
310          checkSchemeConsistency(dirName);
311          // Add to the list of storage directories, only if the
312          // URI is of type file://
313          if(dirName.getScheme().compareTo("file") == 0)
314            this.addStorageDir(new StorageDirectory(new File(dirName.getPath()),
315                        NameNodeDirType.EDITS, sharedEditsDirs.contains(dirName)));
316        }
317      }
318    
319      /**
320       * Return the storage directory corresponding to the passed URI
321       * @param uri URI of a storage directory
322       * @return The matching storage directory or null if none found
323       */
324      StorageDirectory getStorageDirectory(URI uri) {
325        try {
326          uri = Util.fileAsURI(new File(uri));
327          Iterator<StorageDirectory> it = dirIterator();
328          for (; it.hasNext(); ) {
329            StorageDirectory sd = it.next();
330            if (Util.fileAsURI(sd.getRoot()).equals(uri)) {
331              return sd;
332            }
333          }
334        } catch (IOException ioe) {
335          LOG.warn("Error converting file to URI", ioe);
336        }
337        return null;
338      }
339    
340      /**
341       * Checks the consistency of a URI, in particular if the scheme
342       * is specified 
343       * @param u URI whose consistency is being checked.
344       */
345      private static void checkSchemeConsistency(URI u) throws IOException {
346        String scheme = u.getScheme();
347        // the URI should have a proper scheme
348        if(scheme == null) {
349          throw new IOException("Undefined scheme for " + u);
350        }
351      }
352    
353      /**
354       * Retrieve current directories of type IMAGE
355       * @return Collection of URI representing image directories
356       * @throws IOException in case of URI processing error
357       */
358      Collection<URI> getImageDirectories() throws IOException {
359        return getDirectories(NameNodeDirType.IMAGE);
360      }
361    
362      /**
363       * Retrieve current directories of type EDITS
364       * @return Collection of URI representing edits directories
365       * @throws IOException in case of URI processing error
366       */
367      Collection<URI> getEditsDirectories() throws IOException {
368        return getDirectories(NameNodeDirType.EDITS);
369      }
370    
371      /**
372       * Return number of storage directories of the given type.
373       * @param dirType directory type
374       * @return number of storage directories of type dirType
375       */
376      int getNumStorageDirs(NameNodeDirType dirType) {
377        if(dirType == null)
378          return getNumStorageDirs();
379        Iterator<StorageDirectory> it = dirIterator(dirType);
380        int numDirs = 0;
381        for(; it.hasNext(); it.next())
382          numDirs++;
383        return numDirs;
384      }
385    
386      /**
387       * Return the list of locations being used for a specific purpose.
388       * i.e. Image or edit log storage.
389       *
390       * @param dirType Purpose of locations requested.
391       * @throws IOException
392       */
393      Collection<URI> getDirectories(NameNodeDirType dirType)
394          throws IOException {
395        ArrayList<URI> list = new ArrayList<URI>();
396        Iterator<StorageDirectory> it = (dirType == null) ? dirIterator() :
397                                        dirIterator(dirType);
398        for ( ;it.hasNext(); ) {
399          StorageDirectory sd = it.next();
400          try {
401            list.add(Util.fileAsURI(sd.getRoot()));
402          } catch (IOException e) {
403            throw new IOException("Exception while processing " +
404                "StorageDirectory " + sd.getRoot(), e);
405          }
406        }
407        return list;
408      }
409      
410      /**
411       * Determine the last transaction ID noted in this storage directory.
412       * This txid is stored in a special seen_txid file since it might not
413       * correspond to the latest image or edit log. For example, an image-only
414       * directory will have this txid incremented when edits logs roll, even
415       * though the edits logs are in a different directory.
416       *
417       * @param sd StorageDirectory to check
418       * @return If file exists and can be read, last recorded txid. If not, 0L.
419       * @throws IOException On errors processing file pointed to by sd
420       */
421      static long readTransactionIdFile(StorageDirectory sd) throws IOException {
422        File txidFile = getStorageFile(sd, NameNodeFile.SEEN_TXID);
423        return PersistentLongFile.readFile(txidFile, 0);
424      }
425      
426      /**
427       * Write last checkpoint time into a separate file.
428       *
429       * @param sd
430       * @throws IOException
431       */
432      void writeTransactionIdFile(StorageDirectory sd, long txid) throws IOException {
433        Preconditions.checkArgument(txid >= 0, "bad txid: " + txid);
434        
435        File txIdFile = getStorageFile(sd, NameNodeFile.SEEN_TXID);
436        PersistentLongFile.writeFile(txIdFile, txid);
437      }
438    
439      /**
440       * Set the transaction ID and time of the last checkpoint
441       * 
442       * @param txid transaction id of the last checkpoint
443       * @param time time of the last checkpoint, in millis since the epoch
444       */
445      void setMostRecentCheckpointInfo(long txid, long time) {
446        this.mostRecentCheckpointTxId = txid;
447        this.mostRecentCheckpointTime = time;
448      }
449    
450      /**
451       * @return the transaction ID of the last checkpoint.
452       */
453      public long getMostRecentCheckpointTxId() {
454        return mostRecentCheckpointTxId;
455      }
456      
457      /**
458       * @return the time of the most recent checkpoint in millis since the epoch.
459       */
460      long getMostRecentCheckpointTime() {
461        return mostRecentCheckpointTime;
462      }
463    
464      /**
465       * Write a small file in all available storage directories that
466       * indicates that the namespace has reached some given transaction ID.
467       * 
468       * This is used when the image is loaded to avoid accidental rollbacks
469       * in the case where an edit log is fully deleted but there is no
470       * checkpoint. See TestNameEditsConfigs.testNameEditsConfigsFailure()
471       * @param txid the txid that has been reached
472       */
473      public void writeTransactionIdFileToStorage(long txid) {
474        // Write txid marker in all storage directories
475        for (StorageDirectory sd : storageDirs) {
476          try {
477            writeTransactionIdFile(sd, txid);
478          } catch(IOException e) {
479            // Close any edits stream associated with this dir and remove directory
480            LOG.warn("writeTransactionIdToStorage failed on " + sd,
481                e);
482            reportErrorsOnDirectory(sd);
483          }
484        }
485      }
486    
487      /**
488       * Return the name of the image file that is uploaded by periodic
489       * checkpointing
490       *
491       * @return List of filenames to save checkpoints to.
492       */
493      public File[] getFsImageNameCheckpoint(long txid) {
494        ArrayList<File> list = new ArrayList<File>();
495        for (Iterator<StorageDirectory> it =
496                     dirIterator(NameNodeDirType.IMAGE); it.hasNext();) {
497          list.add(getStorageFile(it.next(), NameNodeFile.IMAGE_NEW, txid));
498        }
499        return list.toArray(new File[list.size()]);
500      }
501    
502      /**
503       * @return The first image file with the given txid and image type.
504       */
505      public File getFsImageName(long txid, NameNodeFile nnf) {
506        for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE);
507            it.hasNext();) {
508          StorageDirectory sd = it.next();
509          File fsImage = getStorageFile(sd, nnf, txid);
510          if (FileUtil.canRead(sd.getRoot()) && fsImage.exists()) {
511            return fsImage;
512          }
513        }
514        return null;
515      }
516    
517      /**
518       * @return The first image file whose txid is the same with the given txid and
519       * image type is one of the given types.
520       */
521      public File getFsImage(long txid, EnumSet<NameNodeFile> nnfs) {
522        for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE);
523            it.hasNext();) {
524          StorageDirectory sd = it.next();
525          for (NameNodeFile nnf : nnfs) {
526            File fsImage = getStorageFile(sd, nnf, txid);
527            if (FileUtil.canRead(sd.getRoot()) && fsImage.exists()) {
528              return fsImage;
529            }
530          }
531        }
532        return null;
533      }
534    
535      public File getFsImageName(long txid) {
536        return getFsImageName(txid, NameNodeFile.IMAGE);
537      }
538    
539      public File getHighestFsImageName() {
540        return getFsImageName(getMostRecentCheckpointTxId());
541      }
542    
543      /** Create new dfs name directory.  Caution: this destroys all files
544       * in this filesystem. */
545      private void format(StorageDirectory sd) throws IOException {
546        sd.clearDirectory(); // create currrent dir
547        writeProperties(sd);
548        writeTransactionIdFile(sd, 0);
549    
550        LOG.info("Storage directory " + sd.getRoot()
551                 + " has been successfully formatted.");
552      }
553    
554      /**
555       * Format all available storage directories.
556       */
557      public void format(NamespaceInfo nsInfo) throws IOException {
558        Preconditions.checkArgument(nsInfo.getLayoutVersion() == 0 ||
559            nsInfo.getLayoutVersion() == HdfsConstants.NAMENODE_LAYOUT_VERSION,
560            "Bad layout version: %s", nsInfo.getLayoutVersion());
561        
562        this.setStorageInfo(nsInfo);
563        this.blockpoolID = nsInfo.getBlockPoolID();
564        for (Iterator<StorageDirectory> it =
565                               dirIterator(); it.hasNext();) {
566          StorageDirectory sd = it.next();
567          format(sd);
568        }
569      }
570      
571      public static NamespaceInfo newNamespaceInfo()
572          throws UnknownHostException {
573        return new NamespaceInfo(newNamespaceID(), newClusterID(),
574            newBlockPoolID(), 0L);
575      }
576      
577      public void format() throws IOException {
578        this.layoutVersion = HdfsConstants.NAMENODE_LAYOUT_VERSION;
579        for (Iterator<StorageDirectory> it =
580                               dirIterator(); it.hasNext();) {
581          StorageDirectory sd = it.next();
582          format(sd);
583        }
584      }
585    
586      /**
587       * Generate new namespaceID.
588       *
589       * namespaceID is a persistent attribute of the namespace.
590       * It is generated when the namenode is formatted and remains the same
591       * during the life cycle of the namenode.
592       * When a datanodes register they receive it as the registrationID,
593       * which is checked every time the datanode is communicating with the
594       * namenode. Datanodes that do not 'know' the namespaceID are rejected.
595       *
596       * @return new namespaceID
597       */
598      private static int newNamespaceID() {
599        int newID = 0;
600        while(newID == 0)
601          newID = DFSUtil.getRandom().nextInt(0x7FFFFFFF);  // use 31 bits only
602        return newID;
603      }
604    
605      @Override // Storage
606      protected void setFieldsFromProperties(
607          Properties props, StorageDirectory sd) throws IOException {
608        super.setFieldsFromProperties(props, sd);
609        if (layoutVersion == 0) {
610          throw new IOException("NameNode directory "
611                                + sd.getRoot() + " is not formatted.");
612        }
613    
614        // Set Block pool ID in version with federation support
615        if (NameNodeLayoutVersion.supports(
616            LayoutVersion.Feature.FEDERATION, getLayoutVersion())) {
617          String sbpid = props.getProperty("blockpoolID");
618          setBlockPoolID(sd.getRoot(), sbpid);
619        }
620        setDeprecatedPropertiesForUpgrade(props);
621      }
622    
623      /**
624       * Pull any properties out of the VERSION file that are from older
625       * versions of HDFS and only necessary during upgrade.
626       */
627      private void setDeprecatedPropertiesForUpgrade(Properties props) {
628        deprecatedProperties = new HashMap<String, String>();
629        String md5 = props.getProperty(DEPRECATED_MESSAGE_DIGEST_PROPERTY);
630        if (md5 != null) {
631          deprecatedProperties.put(DEPRECATED_MESSAGE_DIGEST_PROPERTY, md5);
632        }
633      }
634      
635      /**
636       * Return a property that was stored in an earlier version of HDFS.
637       * 
638       * This should only be used during upgrades.
639       */
640      String getDeprecatedProperty(String prop) {
641        assert getLayoutVersion() > HdfsConstants.NAMENODE_LAYOUT_VERSION :
642          "getDeprecatedProperty should only be done when loading " +
643          "storage from past versions during upgrade.";
644        return deprecatedProperties.get(prop);
645      }
646    
647      /**
648       * Write version file into the storage directory.
649       *
650       * The version file should always be written last.
651       * Missing or corrupted version file indicates that
652       * the checkpoint is not valid.
653       *
654       * @param sd storage directory
655       * @throws IOException
656       */
657      @Override // Storage
658      protected void setPropertiesFromFields(Properties props,
659                               StorageDirectory sd
660                               ) throws IOException {
661        super.setPropertiesFromFields(props, sd);
662        // Set blockpoolID in version with federation support
663        if (NameNodeLayoutVersion.supports(
664            LayoutVersion.Feature.FEDERATION, getLayoutVersion())) {
665          props.setProperty("blockpoolID", blockpoolID);
666        }
667      }
668      
669      static File getStorageFile(StorageDirectory sd, NameNodeFile type, long imageTxId) {
670        return new File(sd.getCurrentDir(),
671                        String.format("%s_%019d", type.getName(), imageTxId));
672      }
673      
674      /**
675       * Get a storage file for one of the files that doesn't need a txid associated
676       * (e.g version, seen_txid)
677       */
678      static File getStorageFile(StorageDirectory sd, NameNodeFile type) {
679        return new File(sd.getCurrentDir(), type.getName());
680      }
681    
682      @VisibleForTesting
683      public static String getCheckpointImageFileName(long txid) {
684        return getNameNodeFileName(NameNodeFile.IMAGE_NEW, txid);
685      }
686    
687      @VisibleForTesting
688      public static String getImageFileName(long txid) {
689        return getNameNodeFileName(NameNodeFile.IMAGE, txid);
690      }
691    
692      @VisibleForTesting
693      public static String getRollbackImageFileName(long txid) {
694        return getNameNodeFileName(NameNodeFile.IMAGE_ROLLBACK, txid);
695      }
696    
697      private static String getNameNodeFileName(NameNodeFile nnf, long txid) {
698        return String.format("%s_%019d", nnf.getName(), txid);
699      }
700    
701      @VisibleForTesting
702      public static String getInProgressEditsFileName(long startTxId) {
703        return getNameNodeFileName(NameNodeFile.EDITS_INPROGRESS, startTxId);
704      }
705      
706      static File getInProgressEditsFile(StorageDirectory sd, long startTxId) {
707        return new File(sd.getCurrentDir(), getInProgressEditsFileName(startTxId));
708      }
709      
710      static File getFinalizedEditsFile(StorageDirectory sd,
711          long startTxId, long endTxId) {
712        return new File(sd.getCurrentDir(),
713            getFinalizedEditsFileName(startTxId, endTxId));
714      }
715    
716      static File getTemporaryEditsFile(StorageDirectory sd,
717          long startTxId, long endTxId, long timestamp) {
718        return new File(sd.getCurrentDir(),
719            getTemporaryEditsFileName(startTxId, endTxId, timestamp));
720      }
721    
722      static File getImageFile(StorageDirectory sd, NameNodeFile nnf, long txid) {
723        return new File(sd.getCurrentDir(), getNameNodeFileName(nnf, txid));
724      }
725    
726      @VisibleForTesting
727      public static String getFinalizedEditsFileName(long startTxId, long endTxId) {
728        return String.format("%s_%019d-%019d", NameNodeFile.EDITS.getName(),
729                             startTxId, endTxId);
730      }
731    
732      public static String getTemporaryEditsFileName(long startTxId, long endTxId,
733          long timestamp) {
734        return String.format("%s_%019d-%019d_%019d", NameNodeFile.EDITS_TMP.getName(),
735                             startTxId, endTxId, timestamp);
736      }
737      
738      /**
739       * Return the first readable finalized edits file for the given txid.
740       */
741      File findFinalizedEditsFile(long startTxId, long endTxId)
742      throws IOException {
743        File ret = findFile(NameNodeDirType.EDITS,
744            getFinalizedEditsFileName(startTxId, endTxId));
745        if (ret == null) {
746          throw new IOException(
747              "No edits file for txid " + startTxId + "-" + endTxId + " exists!");
748        }
749        return ret;
750      }
751        
752      /**
753       * Return the first readable image file for the given txid and image type, or
754       * null if no such image can be found
755       */
756      File findImageFile(NameNodeFile nnf, long txid) {
757        return findFile(NameNodeDirType.IMAGE,
758            getNameNodeFileName(nnf, txid));
759      }
760    
761      /**
762       * Return the first readable storage file of the given name
763       * across any of the 'current' directories in SDs of the
764       * given type, or null if no such file exists.
765       */
766      private File findFile(NameNodeDirType dirType, String name) {
767        for (StorageDirectory sd : dirIterable(dirType)) {
768          File candidate = new File(sd.getCurrentDir(), name);
769          if (FileUtil.canRead(sd.getCurrentDir()) &&
770              candidate.exists()) {
771            return candidate;
772          }
773        }
774        return null;
775      }
776    
777      /**
778       * Disable the check for pre-upgradable layouts. Needed for BackupImage.
779       * @param val Whether to disable the preupgradeable layout check.
780       */
781      void setDisablePreUpgradableLayoutCheck(boolean val) {
782        disablePreUpgradableLayoutCheck = val;
783      }
784    
785      /**
786       * Marks a list of directories as having experienced an error.
787       *
788       * @param sds A list of storage directories to mark as errored.
789       * @throws IOException
790       */
791      void reportErrorsOnDirectories(List<StorageDirectory> sds) {
792        for (StorageDirectory sd : sds) {
793          reportErrorsOnDirectory(sd);
794        }
795      }
796    
797      /**
798       * Reports that a directory has experienced an error.
799       * Notifies listeners that the directory is no longer
800       * available.
801       *
802       * @param sd A storage directory to mark as errored.
803       * @throws IOException
804       */
805      private void reportErrorsOnDirectory(StorageDirectory sd) {
806        LOG.error("Error reported on storage directory " + sd);
807    
808        String lsd = listStorageDirectories();
809        LOG.debug("current list of storage dirs:" + lsd);
810    
811        LOG.warn("About to remove corresponding storage: "
812                 + sd.getRoot().getAbsolutePath());
813        try {
814          sd.unlock();
815        } catch (Exception e) {
816          LOG.warn("Unable to unlock bad storage directory: "
817                   +  sd.getRoot().getPath(), e);
818        }
819    
820        if (this.storageDirs.remove(sd)) {
821          this.removedStorageDirs.add(sd);
822        }
823        
824        lsd = listStorageDirectories();
825        LOG.debug("at the end current list of storage dirs:" + lsd);
826      }
827      
828      /** 
829       * Processes the startup options for the clusterid and blockpoolid 
830       * for the upgrade. 
831       * @param startOpt Startup options 
832       * @param layoutVersion Layout version for the upgrade 
833       * @throws IOException
834       */
835      void processStartupOptionsForUpgrade(StartupOption startOpt, int layoutVersion)
836          throws IOException {
837        if (startOpt == StartupOption.UPGRADE) {
838          // If upgrade from a release that does not support federation,
839          // if clusterId is provided in the startupOptions use it.
840          // Else generate a new cluster ID      
841          if (!NameNodeLayoutVersion.supports(
842              LayoutVersion.Feature.FEDERATION, layoutVersion)) {
843            if (startOpt.getClusterId() == null) {
844              startOpt.setClusterId(newClusterID());
845            }
846            setClusterID(startOpt.getClusterId());
847            setBlockPoolID(newBlockPoolID());
848          } else {
849            // Upgrade from one version of federation to another supported
850            // version of federation doesn't require clusterID.
851            // Warn the user if the current clusterid didn't match with the input
852            // clusterid.
853            if (startOpt.getClusterId() != null
854                && !startOpt.getClusterId().equals(getClusterID())) {
855              LOG.warn("Clusterid mismatch - current clusterid: " + getClusterID()
856                  + ", Ignoring given clusterid: " + startOpt.getClusterId());
857            }
858          }
859          LOG.info("Using clusterid: " + getClusterID());
860        }
861      }
862      
863      /**
864       * Report that an IOE has occurred on some file which may
865       * or may not be within one of the NN image storage directories.
866       */
867      @Override
868      public void reportErrorOnFile(File f) {
869        // We use getAbsolutePath here instead of getCanonicalPath since we know
870        // that there is some IO problem on that drive.
871        // getCanonicalPath may need to call stat() or readlink() and it's likely
872        // those calls would fail due to the same underlying IO problem.
873        String absPath = f.getAbsolutePath();
874        for (StorageDirectory sd : storageDirs) {
875          String dirPath = sd.getRoot().getAbsolutePath();
876          if (!dirPath.endsWith(File.separator)) {
877            dirPath += File.separator;
878          }
879          if (absPath.startsWith(dirPath)) {
880            reportErrorsOnDirectory(sd);
881            return;
882          }
883        }
884        
885      }
886      
887      /**
888       * Generate new clusterID.
889       * 
890       * clusterID is a persistent attribute of the cluster.
891       * It is generated when the cluster is created and remains the same
892       * during the life cycle of the cluster.  When a new name node is formated, if 
893       * this is a new cluster, a new clusterID is geneated and stored.  Subsequent 
894       * name node must be given the same ClusterID during its format to be in the 
895       * same cluster.
896       * When a datanode register it receive the clusterID and stick with it.
897       * If at any point, name node or data node tries to join another cluster, it 
898       * will be rejected.
899       * 
900       * @return new clusterID
901       */ 
902      public static String newClusterID() {
903        return "CID-" + UUID.randomUUID().toString();
904      }
905    
906      void setClusterID(String cid) {
907        clusterID = cid;
908      }
909    
910      /**
911       * try to find current cluster id in the VERSION files
912       * returns first cluster id found in any VERSION file
913       * null in case none found
914       * @return clusterId or null in case no cluster id found
915       */
916      public String determineClusterId() {
917        String cid = null;
918        Iterator<StorageDirectory> sdit = dirIterator(NameNodeDirType.IMAGE);
919        while(sdit.hasNext()) {
920          StorageDirectory sd = sdit.next();
921          try {
922            Properties props = readPropertiesFile(sd.getVersionFile());
923            cid = props.getProperty("clusterID");
924            LOG.info("current cluster id for sd="+sd.getCurrentDir() + 
925                ";lv=" + layoutVersion + ";cid=" + cid);
926            
927            if(cid != null && !cid.equals(""))
928              return cid;
929          } catch (Exception e) {
930            LOG.warn("this sd not available: " + e.getLocalizedMessage());
931          } //ignore
932        }
933        LOG.warn("couldn't find any VERSION file containing valid ClusterId");
934        return null;
935      }
936    
937      /**
938       * Generate new blockpoolID.
939       * 
940       * @return new blockpoolID
941       */ 
942      static String newBlockPoolID() throws UnknownHostException{
943        String ip = "unknownIP";
944        try {
945          ip = DNS.getDefaultIP("default");
946        } catch (UnknownHostException e) {
947          LOG.warn("Could not find ip address of \"default\" inteface.");
948          throw e;
949        }
950        
951        int rand = DFSUtil.getSecureRandom().nextInt(Integer.MAX_VALUE);
952        String bpid = "BP-" + rand + "-"+ ip + "-" + Time.now();
953        return bpid;
954      }
955    
956      /** Validate and set block pool ID */
957      void setBlockPoolID(String bpid) {
958        blockpoolID = bpid;
959      }
960    
961      /** Validate and set block pool ID */
962      private void setBlockPoolID(File storage, String bpid)
963          throws InconsistentFSStateException {
964        if (bpid == null || bpid.equals("")) {
965          throw new InconsistentFSStateException(storage, "file "
966              + Storage.STORAGE_FILE_VERSION + " has no block pool Id.");
967        }
968        
969        if (!blockpoolID.equals("") && !blockpoolID.equals(bpid)) {
970          throw new InconsistentFSStateException(storage,
971              "Unexepcted blockpoolID " + bpid + " . Expected " + blockpoolID);
972        }
973        setBlockPoolID(bpid);
974      }
975      
976      public String getBlockPoolID() {
977        return blockpoolID;
978      }
979    
980      /**
981       * Iterate over all current storage directories, inspecting them
982       * with the given inspector.
983       */
984      void inspectStorageDirs(FSImageStorageInspector inspector)
985          throws IOException {
986    
987        // Process each of the storage directories to find the pair of
988        // newest image file and edit file
989        for (Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) {
990          StorageDirectory sd = it.next();
991          inspector.inspectDirectory(sd);
992        }
993      }
994    
995      /**
996       * Iterate over all of the storage dirs, reading their contents to determine
997       * their layout versions. Returns an FSImageStorageInspector which has
998       * inspected each directory.
999       * 
1000       * <b>Note:</b> this can mutate the storage info fields (ctime, version, etc).
1001       * @throws IOException if no valid storage dirs are found or no valid layout version
1002       */
1003      FSImageStorageInspector readAndInspectDirs(EnumSet<NameNodeFile> fileTypes)
1004          throws IOException {
1005        Integer layoutVersion = null;
1006        boolean multipleLV = false;
1007        StringBuilder layoutVersions = new StringBuilder();
1008    
1009        // First determine what range of layout versions we're going to inspect
1010        for (Iterator<StorageDirectory> it = dirIterator(false);
1011             it.hasNext();) {
1012          StorageDirectory sd = it.next();
1013          if (!sd.getVersionFile().exists()) {
1014            FSImage.LOG.warn("Storage directory " + sd + " contains no VERSION file. Skipping...");
1015            continue;
1016          }
1017          readProperties(sd); // sets layoutVersion
1018          int lv = getLayoutVersion();
1019          if (layoutVersion == null) {
1020            layoutVersion = Integer.valueOf(lv);
1021          } else if (!layoutVersion.equals(lv)) {
1022            multipleLV = true;
1023          }
1024          layoutVersions.append("(").append(sd.getRoot()).append(", ").append(lv).append(") ");
1025        }
1026        
1027        if (layoutVersion == null) {
1028          throw new IOException("No storage directories contained VERSION information");
1029        }
1030        if (multipleLV) {            
1031          throw new IOException(
1032              "Storage directories contain multiple layout versions: "
1033                  + layoutVersions);
1034        }
1035        // If the storage directories are with the new layout version
1036        // (ie edits_<txnid>) then use the new inspector, which will ignore
1037        // the old format dirs.
1038        FSImageStorageInspector inspector;
1039        if (NameNodeLayoutVersion.supports(
1040            LayoutVersion.Feature.TXID_BASED_LAYOUT, getLayoutVersion())) {
1041          inspector = new FSImageTransactionalStorageInspector(fileTypes);
1042        } else {
1043          inspector = new FSImagePreTransactionalStorageInspector();
1044        }
1045        
1046        inspectStorageDirs(inspector);
1047        return inspector;
1048      }
1049    
1050      public NamespaceInfo getNamespaceInfo() {
1051        return new NamespaceInfo(
1052            getNamespaceID(),
1053            getClusterID(),
1054            getBlockPoolID(),
1055            getCTime());
1056      }
1057    }