001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.namenode;
019
020import java.io.Closeable;
021import java.io.File;
022import java.io.IOException;
023import java.io.RandomAccessFile;
024import java.net.URI;
025import java.net.UnknownHostException;
026import java.util.ArrayList;
027import java.util.Collection;
028import java.util.EnumSet;
029import java.util.HashMap;
030import java.util.Iterator;
031import java.util.List;
032import java.util.Map;
033import java.util.Properties;
034import java.util.UUID;
035import java.util.concurrent.CopyOnWriteArrayList;
036import java.util.concurrent.ThreadLocalRandom;
037
038import org.apache.hadoop.classification.InterfaceAudience;
039import org.apache.hadoop.conf.Configuration;
040import org.apache.hadoop.fs.FileUtil;
041import org.apache.hadoop.hdfs.DFSUtil;
042import org.apache.hadoop.hdfs.protocol.LayoutVersion;
043import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
044import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType;
045import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
046import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
047import org.apache.hadoop.hdfs.server.common.IncorrectVersionException;
048import org.apache.hadoop.hdfs.server.common.Storage;
049import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
050import org.apache.hadoop.hdfs.server.common.Util;
051import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
052import org.apache.hadoop.hdfs.util.PersistentLongFile;
053import org.apache.hadoop.io.IOUtils;
054import org.apache.hadoop.net.DNS;
055import org.apache.hadoop.util.Time;
056import org.mortbay.util.ajax.JSON;
057
058import com.google.common.annotations.VisibleForTesting;
059import com.google.common.base.Preconditions;
060import com.google.common.collect.Lists;
061
062/**
063 * NNStorage is responsible for management of the StorageDirectories used by
064 * the NameNode.
065 */
066@InterfaceAudience.Private
067public class NNStorage extends Storage implements Closeable,
068    StorageErrorReporter {
069  static final String DEPRECATED_MESSAGE_DIGEST_PROPERTY = "imageMD5Digest";
070  static final String LOCAL_URI_SCHEME = "file";
071
072  //
073  // The filenames used for storing the images
074  //
075  public enum NameNodeFile {
076    IMAGE     ("fsimage"),
077    TIME      ("fstime"), // from "old" pre-HDFS-1073 format
078    SEEN_TXID ("seen_txid"),
079    EDITS     ("edits"),
080    IMAGE_NEW ("fsimage.ckpt"),
081    IMAGE_ROLLBACK("fsimage_rollback"),
082    EDITS_NEW ("edits.new"), // from "old" pre-HDFS-1073 format
083    EDITS_INPROGRESS ("edits_inprogress"),
084    EDITS_TMP ("edits_tmp"),
085    IMAGE_LEGACY_OIV ("fsimage_legacy_oiv");  // For pre-PB format
086
087    private String fileName = null;
088    private NameNodeFile(String name) { this.fileName = name; }
089    @VisibleForTesting
090    public String getName() { return fileName; }
091  }
092
093  /**
094   * Implementation of StorageDirType specific to namenode storage
095   * A Storage directory could be of type IMAGE which stores only fsimage,
096   * or of type EDITS which stores edits or of type IMAGE_AND_EDITS which
097   * stores both fsimage and edits.
098   */
099  @VisibleForTesting
100  public static enum NameNodeDirType implements StorageDirType {
101    UNDEFINED,
102    IMAGE,
103    EDITS,
104    IMAGE_AND_EDITS;
105
106    @Override
107    public StorageDirType getStorageDirType() {
108      return this;
109    }
110
111    @Override
112    public boolean isOfType(StorageDirType type) {
113      if ((this == IMAGE_AND_EDITS) && (type == IMAGE || type == EDITS))
114        return true;
115      return this == type;
116    }
117  }
118
119  protected String blockpoolID = ""; // id of the block pool
120  
121  /**
122   * flag that controls if we try to restore failed storages
123   */
124  private boolean restoreFailedStorage = false;
125  private final Object restorationLock = new Object();
126  private boolean disablePreUpgradableLayoutCheck = false;
127
128
129  /**
130   * TxId of the last transaction that was included in the most
131   * recent fsimage file. This does not include any transactions
132   * that have since been written to the edit log.
133   */
134  protected volatile long mostRecentCheckpointTxId = HdfsServerConstants.INVALID_TXID;
135  
136  /**
137   * Time of the last checkpoint, in milliseconds since the epoch.
138   */
139  private long mostRecentCheckpointTime = 0;
140
141  /**
142   * list of failed (and thus removed) storages
143   */
144  final protected List<StorageDirectory> removedStorageDirs
145    = new CopyOnWriteArrayList<StorageDirectory>();
146
147  /**
148   * Properties from old layout versions that may be needed
149   * during upgrade only.
150   */
151  private HashMap<String, String> deprecatedProperties;
152
153  /**
154   * Name directories size for metric.
155   */
156  private Map<String, Long> nameDirSizeMap = new HashMap<>();
157
158  /**
159   * Construct the NNStorage.
160   * @param conf Namenode configuration.
161   * @param imageDirs Directories the image can be stored in.
162   * @param editsDirs Directories the editlog can be stored in.
163   * @throws IOException if any directories are inaccessible.
164   */
165  public NNStorage(Configuration conf, 
166                   Collection<URI> imageDirs, Collection<URI> editsDirs) 
167      throws IOException {
168    super(NodeType.NAME_NODE);
169
170    storageDirs = new CopyOnWriteArrayList<StorageDirectory>();
171    
172    // this may modify the editsDirs, so copy before passing in
173    setStorageDirectories(imageDirs, 
174                          Lists.newArrayList(editsDirs),
175                          FSNamesystem.getSharedEditsDirs(conf));
176    //Update NameDirSize metric value after NN start
177    updateNameDirSize();
178  }
179
180  @Override // Storage
181  public boolean isPreUpgradableLayout(StorageDirectory sd) throws IOException {
182    if (disablePreUpgradableLayoutCheck) {
183      return false;
184    }
185
186    File oldImageDir = new File(sd.getRoot(), "image");
187    if (!oldImageDir.exists()) {
188      return false;
189    }
190    // check the layout version inside the image file
191    File oldF = new File(oldImageDir, "fsimage");
192    RandomAccessFile oldFile = new RandomAccessFile(oldF, "rws");
193    try {
194      oldFile.seek(0);
195      int oldVersion = oldFile.readInt();
196      oldFile.close();
197      oldFile = null;
198      if (oldVersion < LAST_PRE_UPGRADE_LAYOUT_VERSION)
199        return false;
200    } finally {
201      IOUtils.cleanup(LOG, oldFile);
202    }
203    return true;
204  }
205
206  @Override // Closeable
207  public void close() throws IOException {
208    unlockAll();
209    storageDirs.clear();
210  }
211
212  /**
213   * Set flag whether an attempt should be made to restore failed storage
214   * directories at the next available oppurtuinity.
215   *
216   * @param val Whether restoration attempt should be made.
217   */
218  void setRestoreFailedStorage(boolean val) {
219    LOG.warn("set restore failed storage to " + val);
220    restoreFailedStorage=val;
221  }
222
223  /**
224   * @return Whether failed storage directories are to be restored.
225   */
226  boolean getRestoreFailedStorage() {
227    return restoreFailedStorage;
228  }
229
230  /**
231   * See if any of removed storages is "writable" again, and can be returned
232   * into service.
233   */
234  void attemptRestoreRemovedStorage() {
235    // if directory is "alive" - copy the images there...
236    if(!restoreFailedStorage || removedStorageDirs.size() == 0)
237      return; //nothing to restore
238
239    /* We don't want more than one thread trying to restore at a time */
240    synchronized (this.restorationLock) {
241      LOG.info("NNStorage.attemptRestoreRemovedStorage: check removed(failed) "+
242               "storage. removedStorages size = " + removedStorageDirs.size());
243      for(Iterator<StorageDirectory> it
244            = this.removedStorageDirs.iterator(); it.hasNext();) {
245        StorageDirectory sd = it.next();
246        File root = sd.getRoot();
247        LOG.info("currently disabled dir " + root.getAbsolutePath() +
248                 "; type="+sd.getStorageDirType() 
249                 + ";canwrite="+FileUtil.canWrite(root));
250        if(root.exists() && FileUtil.canWrite(root)) {
251          LOG.info("restoring dir " + sd.getRoot().getAbsolutePath());
252          this.addStorageDir(sd); // restore
253          this.removedStorageDirs.remove(sd);
254        }
255      }
256    }
257  }
258
259  /**
260   * @return A list of storage directories which are in the errored state.
261   */
262  List<StorageDirectory> getRemovedStorageDirs() {
263    return this.removedStorageDirs;
264  }
265  
266  /**
267   * See {@link NNStorage#setStorageDirectories(Collection, Collection, Collection)}
268   */
269  @VisibleForTesting
270  synchronized void setStorageDirectories(Collection<URI> fsNameDirs,
271                                          Collection<URI> fsEditsDirs)
272      throws IOException {
273    setStorageDirectories(fsNameDirs, fsEditsDirs, new ArrayList<URI>());
274  }
275
276  /**
277   * Set the storage directories which will be used. This should only ever be
278   * called from inside NNStorage. However, it needs to remain package private
279   * for testing, as StorageDirectories need to be reinitialised after using
280   * Mockito.spy() on this class, as Mockito doesn't work well with inner
281   * classes, such as StorageDirectory in this case.
282   *
283   * Synchronized due to initialization of storageDirs and removedStorageDirs.
284   *
285   * @param fsNameDirs Locations to store images.
286   * @param fsEditsDirs Locations to store edit logs.
287   * @throws IOException
288   */
289  @VisibleForTesting
290  synchronized void setStorageDirectories(Collection<URI> fsNameDirs,
291                                          Collection<URI> fsEditsDirs,
292                                          Collection<URI> sharedEditsDirs)
293      throws IOException {
294    this.storageDirs.clear();
295    this.removedStorageDirs.clear();
296
297   // Add all name dirs with appropriate NameNodeDirType
298    for (URI dirName : fsNameDirs) {
299      checkSchemeConsistency(dirName);
300      boolean isAlsoEdits = false;
301      for (URI editsDirName : fsEditsDirs) {
302        if (editsDirName.compareTo(dirName) == 0) {
303          isAlsoEdits = true;
304          fsEditsDirs.remove(editsDirName);
305          break;
306        }
307      }
308      NameNodeDirType dirType = (isAlsoEdits) ?
309                          NameNodeDirType.IMAGE_AND_EDITS :
310                          NameNodeDirType.IMAGE;
311      // Add to the list of storage directories, only if the
312      // URI is of type file://
313      if(dirName.getScheme().compareTo("file") == 0) {
314        this.addStorageDir(new StorageDirectory(new File(dirName.getPath()),
315            dirType,
316            sharedEditsDirs.contains(dirName))); // Don't lock the dir if it's shared.
317      }
318    }
319
320    // Add edits dirs if they are different from name dirs
321    for (URI dirName : fsEditsDirs) {
322      checkSchemeConsistency(dirName);
323      // Add to the list of storage directories, only if the
324      // URI is of type file://
325      if(dirName.getScheme().compareTo("file") == 0)
326        this.addStorageDir(new StorageDirectory(new File(dirName.getPath()),
327                    NameNodeDirType.EDITS, sharedEditsDirs.contains(dirName)));
328    }
329  }
330
331  /**
332   * Return the storage directory corresponding to the passed URI
333   * @param uri URI of a storage directory
334   * @return The matching storage directory or null if none found
335   */
336  public StorageDirectory getStorageDirectory(URI uri) {
337    try {
338      uri = Util.fileAsURI(new File(uri));
339      Iterator<StorageDirectory> it = dirIterator();
340      for (; it.hasNext(); ) {
341        StorageDirectory sd = it.next();
342        if (Util.fileAsURI(sd.getRoot()).equals(uri)) {
343          return sd;
344        }
345      }
346    } catch (IOException ioe) {
347      LOG.warn("Error converting file to URI", ioe);
348    }
349    return null;
350  }
351
352  /**
353   * Checks the consistency of a URI, in particular if the scheme
354   * is specified 
355   * @param u URI whose consistency is being checked.
356   */
357  private static void checkSchemeConsistency(URI u) throws IOException {
358    String scheme = u.getScheme();
359    // the URI should have a proper scheme
360    if(scheme == null) {
361      throw new IOException("Undefined scheme for " + u);
362    }
363  }
364
365  /**
366   * Retrieve current directories of type IMAGE
367   * @return Collection of URI representing image directories
368   * @throws IOException in case of URI processing error
369   */
370  Collection<URI> getImageDirectories() throws IOException {
371    return getDirectories(NameNodeDirType.IMAGE);
372  }
373
374  /**
375   * Retrieve current directories of type EDITS
376   * @return Collection of URI representing edits directories
377   * @throws IOException in case of URI processing error
378   */
379  Collection<URI> getEditsDirectories() throws IOException {
380    return getDirectories(NameNodeDirType.EDITS);
381  }
382
383  /**
384   * Return number of storage directories of the given type.
385   * @param dirType directory type
386   * @return number of storage directories of type dirType
387   */
388  int getNumStorageDirs(NameNodeDirType dirType) {
389    if(dirType == null)
390      return getNumStorageDirs();
391    Iterator<StorageDirectory> it = dirIterator(dirType);
392    int numDirs = 0;
393    for(; it.hasNext(); it.next())
394      numDirs++;
395    return numDirs;
396  }
397
398  /**
399   * Return the list of locations being used for a specific purpose.
400   * i.e. Image or edit log storage.
401   *
402   * @param dirType Purpose of locations requested.
403   * @throws IOException
404   */
405  Collection<URI> getDirectories(NameNodeDirType dirType)
406      throws IOException {
407    ArrayList<URI> list = new ArrayList<URI>();
408    Iterator<StorageDirectory> it = (dirType == null) ? dirIterator() :
409                                    dirIterator(dirType);
410    for ( ;it.hasNext(); ) {
411      StorageDirectory sd = it.next();
412      try {
413        list.add(Util.fileAsURI(sd.getRoot()));
414      } catch (IOException e) {
415        throw new IOException("Exception while processing " +
416            "StorageDirectory " + sd.getRoot(), e);
417      }
418    }
419    return list;
420  }
421  
422  /**
423   * Determine the last transaction ID noted in this storage directory.
424   * This txid is stored in a special seen_txid file since it might not
425   * correspond to the latest image or edit log. For example, an image-only
426   * directory will have this txid incremented when edits logs roll, even
427   * though the edits logs are in a different directory.
428   *
429   * @param sd StorageDirectory to check
430   * @return If file exists and can be read, last recorded txid. If not, 0L.
431   * @throws IOException On errors processing file pointed to by sd
432   */
433  static long readTransactionIdFile(StorageDirectory sd) throws IOException {
434    File txidFile = getStorageFile(sd, NameNodeFile.SEEN_TXID);
435    return PersistentLongFile.readFile(txidFile, 0);
436  }
437  
438  /**
439   * Write last checkpoint time into a separate file.
440   * @param sd storage directory
441   * @throws IOException
442   */
443  void writeTransactionIdFile(StorageDirectory sd, long txid) throws IOException {
444    Preconditions.checkArgument(txid >= 0, "bad txid: " + txid);
445    
446    File txIdFile = getStorageFile(sd, NameNodeFile.SEEN_TXID);
447    PersistentLongFile.writeFile(txIdFile, txid);
448  }
449
450  /**
451   * Set the transaction ID and time of the last checkpoint
452   * 
453   * @param txid transaction id of the last checkpoint
454   * @param time time of the last checkpoint, in millis since the epoch
455   */
456  void setMostRecentCheckpointInfo(long txid, long time) {
457    this.mostRecentCheckpointTxId = txid;
458    this.mostRecentCheckpointTime = time;
459  }
460
461  /**
462   * @return the transaction ID of the last checkpoint.
463   */
464  public long getMostRecentCheckpointTxId() {
465    return mostRecentCheckpointTxId;
466  }
467  
468  /**
469   * @return the time of the most recent checkpoint in millis since the epoch.
470   */
471  long getMostRecentCheckpointTime() {
472    return mostRecentCheckpointTime;
473  }
474
475  /**
476   * Write a small file in all available storage directories that
477   * indicates that the namespace has reached some given transaction ID.
478   * 
479   * This is used when the image is loaded to avoid accidental rollbacks
480   * in the case where an edit log is fully deleted but there is no
481   * checkpoint. See TestNameEditsConfigs.testNameEditsConfigsFailure()
482   * @param txid the txid that has been reached
483   */
484  public void writeTransactionIdFileToStorage(long txid) {
485    writeTransactionIdFileToStorage(txid, null);
486  }
487
488  /**
489   * Write a small file in all available storage directories that
490   * indicates that the namespace has reached some given transaction ID.
491   *
492   * This is used when the image is loaded to avoid accidental rollbacks
493   * in the case where an edit log is fully deleted but there is no
494   * checkpoint. See TestNameEditsConfigs.testNameEditsConfigsFailure()
495   * @param txid the txid that has been reached
496   * @param type the type of directory
497   */
498  public void writeTransactionIdFileToStorage(long txid,
499      NameNodeDirType type) {
500    // Write txid marker in all storage directories
501    for (Iterator<StorageDirectory> it = dirIterator(type); it.hasNext();) {
502      StorageDirectory sd = it.next();
503      try {
504        writeTransactionIdFile(sd, txid);
505      } catch(IOException e) {
506        // Close any edits stream associated with this dir and remove directory
507        LOG.warn("writeTransactionIdToStorage failed on " + sd,
508            e);
509        reportErrorsOnDirectory(sd);
510      }
511    }
512  }
513
514  /**
515   * Return the name of the image file that is uploaded by periodic
516   * checkpointing
517   *
518   * @return List of filenames to save checkpoints to.
519   */
520  public File[] getFsImageNameCheckpoint(long txid) {
521    ArrayList<File> list = new ArrayList<File>();
522    for (Iterator<StorageDirectory> it =
523                 dirIterator(NameNodeDirType.IMAGE); it.hasNext();) {
524      list.add(getStorageFile(it.next(), NameNodeFile.IMAGE_NEW, txid));
525    }
526    return list.toArray(new File[list.size()]);
527  }
528
529  /**
530   * @return The first image file with the given txid and image type.
531   */
532  public File getFsImageName(long txid, NameNodeFile nnf) {
533    for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE);
534        it.hasNext();) {
535      StorageDirectory sd = it.next();
536      File fsImage = getStorageFile(sd, nnf, txid);
537      if (FileUtil.canRead(sd.getRoot()) && fsImage.exists()) {
538        return fsImage;
539      }
540    }
541    return null;
542  }
543
544  /**
545   * @return The first image file whose txid is the same with the given txid and
546   * image type is one of the given types.
547   */
548  public File getFsImage(long txid, EnumSet<NameNodeFile> nnfs) {
549    for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE);
550        it.hasNext();) {
551      StorageDirectory sd = it.next();
552      for (NameNodeFile nnf : nnfs) {
553        File fsImage = getStorageFile(sd, nnf, txid);
554        if (FileUtil.canRead(sd.getRoot()) && fsImage.exists()) {
555          return fsImage;
556        }
557      }
558    }
559    return null;
560  }
561
562  public File getFsImageName(long txid) {
563    return getFsImageName(txid, NameNodeFile.IMAGE);
564  }
565
566  public File getHighestFsImageName() {
567    return getFsImageName(getMostRecentCheckpointTxId());
568  }
569
570  /** Create new dfs name directory.  Caution: this destroys all files
571   * in this filesystem. */
572  private void format(StorageDirectory sd) throws IOException {
573    sd.clearDirectory(); // create currrent dir
574    writeProperties(sd);
575    writeTransactionIdFile(sd, 0);
576
577    LOG.info("Storage directory " + sd.getRoot()
578             + " has been successfully formatted.");
579  }
580
581  /**
582   * Format all available storage directories.
583   */
584  public void format(NamespaceInfo nsInfo) throws IOException {
585    Preconditions.checkArgument(nsInfo.getLayoutVersion() == 0 ||
586        nsInfo.getLayoutVersion() == HdfsServerConstants.NAMENODE_LAYOUT_VERSION,
587        "Bad layout version: %s", nsInfo.getLayoutVersion());
588    
589    this.setStorageInfo(nsInfo);
590    this.blockpoolID = nsInfo.getBlockPoolID();
591    for (Iterator<StorageDirectory> it =
592                           dirIterator(); it.hasNext();) {
593      StorageDirectory sd = it.next();
594      format(sd);
595    }
596  }
597  
598  public static NamespaceInfo newNamespaceInfo()
599      throws UnknownHostException {
600    return new NamespaceInfo(newNamespaceID(), newClusterID(),
601        newBlockPoolID(), Time.now());
602  }
603  
604  public void format() throws IOException {
605    this.layoutVersion = HdfsServerConstants.NAMENODE_LAYOUT_VERSION;
606    for (Iterator<StorageDirectory> it =
607                           dirIterator(); it.hasNext();) {
608      StorageDirectory sd = it.next();
609      format(sd);
610    }
611  }
612
613  /**
614   * Generate new namespaceID.
615   *
616   * namespaceID is a persistent attribute of the namespace.
617   * It is generated when the namenode is formatted and remains the same
618   * during the life cycle of the namenode.
619   * When a datanodes register they receive it as the registrationID,
620   * which is checked every time the datanode is communicating with the
621   * namenode. Datanodes that do not 'know' the namespaceID are rejected.
622   *
623   * @return new namespaceID
624   */
625  private static int newNamespaceID() {
626    int newID = 0;
627    while(newID == 0)
628      newID = ThreadLocalRandom.current().nextInt(0x7FFFFFFF);  // use 31 bits
629    return newID;
630  }
631
632  @Override // Storage
633  protected void setFieldsFromProperties(
634      Properties props, StorageDirectory sd) throws IOException {
635    super.setFieldsFromProperties(props, sd);
636    if (layoutVersion == 0) {
637      throw new IOException("NameNode directory "
638                            + sd.getRoot() + " is not formatted.");
639    }
640
641    // Set Block pool ID in version with federation support
642    if (NameNodeLayoutVersion.supports(
643        LayoutVersion.Feature.FEDERATION, getLayoutVersion())) {
644      String sbpid = props.getProperty("blockpoolID");
645      setBlockPoolID(sd.getRoot(), sbpid);
646    }
647    setDeprecatedPropertiesForUpgrade(props);
648  }
649
650  void readProperties(StorageDirectory sd, StartupOption startupOption)
651      throws IOException {
652    Properties props = readPropertiesFile(sd.getVersionFile());
653    if (HdfsServerConstants.RollingUpgradeStartupOption.ROLLBACK.matches
654        (startupOption)) {
655      int lv = Integer.parseInt(getProperty(props, sd, "layoutVersion"));
656      if (lv > getServiceLayoutVersion()) {
657        // we should not use a newer version for rollingUpgrade rollback
658        throw new IncorrectVersionException(getServiceLayoutVersion(), lv,
659            "storage directory " + sd.getRoot().getAbsolutePath());
660      }
661      props.setProperty("layoutVersion",
662          Integer.toString(HdfsServerConstants.NAMENODE_LAYOUT_VERSION));
663    }
664    setFieldsFromProperties(props, sd);
665  }
666
667  /**
668   * Pull any properties out of the VERSION file that are from older
669   * versions of HDFS and only necessary during upgrade.
670   */
671  private void setDeprecatedPropertiesForUpgrade(Properties props) {
672    deprecatedProperties = new HashMap<String, String>();
673    String md5 = props.getProperty(DEPRECATED_MESSAGE_DIGEST_PROPERTY);
674    if (md5 != null) {
675      deprecatedProperties.put(DEPRECATED_MESSAGE_DIGEST_PROPERTY, md5);
676    }
677  }
678  
679  /**
680   * Return a property that was stored in an earlier version of HDFS.
681   * 
682   * This should only be used during upgrades.
683   */
684  String getDeprecatedProperty(String prop) {
685    assert getLayoutVersion() > HdfsServerConstants.NAMENODE_LAYOUT_VERSION :
686      "getDeprecatedProperty should only be done when loading " +
687      "storage from past versions during upgrade.";
688    return deprecatedProperties.get(prop);
689  }
690
691  /**
692   * Write version file into the storage directory.
693   *
694   * The version file should always be written last.
695   * Missing or corrupted version file indicates that
696   * the checkpoint is not valid.
697   *
698   * @param sd storage directory
699   * @throws IOException
700   */
701  @Override // Storage
702  protected void setPropertiesFromFields(Properties props,
703                           StorageDirectory sd
704                           ) throws IOException {
705    super.setPropertiesFromFields(props, sd);
706    // Set blockpoolID in version with federation support
707    if (NameNodeLayoutVersion.supports(
708        LayoutVersion.Feature.FEDERATION, getLayoutVersion())) {
709      props.setProperty("blockpoolID", blockpoolID);
710    }
711  }
712  
713  static File getStorageFile(StorageDirectory sd, NameNodeFile type, long imageTxId) {
714    return new File(sd.getCurrentDir(),
715                    String.format("%s_%019d", type.getName(), imageTxId));
716  }
717  
718  /**
719   * Get a storage file for one of the files that doesn't need a txid associated
720   * (e.g version, seen_txid)
721   */
722  static File getStorageFile(StorageDirectory sd, NameNodeFile type) {
723    return new File(sd.getCurrentDir(), type.getName());
724  }
725
726  @VisibleForTesting
727  public static String getCheckpointImageFileName(long txid) {
728    return getNameNodeFileName(NameNodeFile.IMAGE_NEW, txid);
729  }
730
731  @VisibleForTesting
732  public static String getImageFileName(long txid) {
733    return getNameNodeFileName(NameNodeFile.IMAGE, txid);
734  }
735
736  @VisibleForTesting
737  public static String getRollbackImageFileName(long txid) {
738    return getNameNodeFileName(NameNodeFile.IMAGE_ROLLBACK, txid);
739  }
740
741  public static String getLegacyOIVImageFileName(long txid) {
742    return getNameNodeFileName(NameNodeFile.IMAGE_LEGACY_OIV, txid);
743  }
744
745  private static String getNameNodeFileName(NameNodeFile nnf, long txid) {
746    return String.format("%s_%019d", nnf.getName(), txid);
747  }
748
749  @VisibleForTesting
750  public static String getInProgressEditsFileName(long startTxId) {
751    return getNameNodeFileName(NameNodeFile.EDITS_INPROGRESS, startTxId);
752  }
753  
754  static File getInProgressEditsFile(StorageDirectory sd, long startTxId) {
755    return new File(sd.getCurrentDir(), getInProgressEditsFileName(startTxId));
756  }
757  
758  static File getFinalizedEditsFile(StorageDirectory sd,
759      long startTxId, long endTxId) {
760    return new File(sd.getCurrentDir(),
761        getFinalizedEditsFileName(startTxId, endTxId));
762  }
763
764  static File getTemporaryEditsFile(StorageDirectory sd,
765      long startTxId, long endTxId, long timestamp) {
766    return new File(sd.getCurrentDir(),
767        getTemporaryEditsFileName(startTxId, endTxId, timestamp));
768  }
769
770  static File getImageFile(StorageDirectory sd, NameNodeFile nnf, long txid) {
771    return new File(sd.getCurrentDir(), getNameNodeFileName(nnf, txid));
772  }
773
774  @VisibleForTesting
775  public static String getFinalizedEditsFileName(long startTxId, long endTxId) {
776    return String.format("%s_%019d-%019d", NameNodeFile.EDITS.getName(),
777                         startTxId, endTxId);
778  }
779
780  public static String getTemporaryEditsFileName(long startTxId, long endTxId,
781      long timestamp) {
782    return String.format("%s_%019d-%019d_%019d", NameNodeFile.EDITS_TMP.getName(),
783                         startTxId, endTxId, timestamp);
784  }
785  
786  /**
787   * Return the first readable finalized edits file for the given txid.
788   */
789  File findFinalizedEditsFile(long startTxId, long endTxId)
790  throws IOException {
791    File ret = findFile(NameNodeDirType.EDITS,
792        getFinalizedEditsFileName(startTxId, endTxId));
793    if (ret == null) {
794      throw new IOException(
795          "No edits file for txid " + startTxId + "-" + endTxId + " exists!");
796    }
797    return ret;
798  }
799    
800  /**
801   * Return the first readable image file for the given txid and image type, or
802   * null if no such image can be found
803   */
804  File findImageFile(NameNodeFile nnf, long txid) {
805    return findFile(NameNodeDirType.IMAGE,
806        getNameNodeFileName(nnf, txid));
807  }
808
809  /**
810   * Return the first readable storage file of the given name
811   * across any of the 'current' directories in SDs of the
812   * given type, or null if no such file exists.
813   */
814  private File findFile(NameNodeDirType dirType, String name) {
815    for (StorageDirectory sd : dirIterable(dirType)) {
816      File candidate = new File(sd.getCurrentDir(), name);
817      if (FileUtil.canRead(sd.getCurrentDir()) &&
818          candidate.exists()) {
819        return candidate;
820      }
821    }
822    return null;
823  }
824
825  /**
826   * Disable the check for pre-upgradable layouts. Needed for BackupImage.
827   * @param val Whether to disable the preupgradeable layout check.
828   */
829  void setDisablePreUpgradableLayoutCheck(boolean val) {
830    disablePreUpgradableLayoutCheck = val;
831  }
832
833  /**
834   * Marks a list of directories as having experienced an error.
835   *
836   * @param sds A list of storage directories to mark as errored.
837   */
838  void reportErrorsOnDirectories(List<StorageDirectory> sds) {
839    for (StorageDirectory sd : sds) {
840      reportErrorsOnDirectory(sd);
841    }
842  }
843
844  /**
845   * Reports that a directory has experienced an error.
846   * Notifies listeners that the directory is no longer
847   * available.
848   *
849   * @param sd A storage directory to mark as errored.
850   */
851  private void reportErrorsOnDirectory(StorageDirectory sd) {
852    LOG.error("Error reported on storage directory " + sd);
853
854    String lsd = listStorageDirectories();
855    LOG.debug("current list of storage dirs:" + lsd);
856
857    LOG.warn("About to remove corresponding storage: "
858             + sd.getRoot().getAbsolutePath());
859    try {
860      sd.unlock();
861    } catch (Exception e) {
862      LOG.warn("Unable to unlock bad storage directory: "
863               +  sd.getRoot().getPath(), e);
864    }
865
866    if (this.storageDirs.remove(sd)) {
867      this.removedStorageDirs.add(sd);
868    }
869    
870    lsd = listStorageDirectories();
871    LOG.debug("at the end current list of storage dirs:" + lsd);
872  }
873  
874  /** 
875   * Processes the startup options for the clusterid and blockpoolid 
876   * for the upgrade. 
877   * @param startOpt Startup options 
878   * @param layoutVersion Layout version for the upgrade 
879   * @throws IOException
880   */
881  void processStartupOptionsForUpgrade(StartupOption startOpt, int layoutVersion)
882      throws IOException {
883    if (startOpt == StartupOption.UPGRADE || startOpt == StartupOption.UPGRADEONLY) {
884      // If upgrade from a release that does not support federation,
885      // if clusterId is provided in the startupOptions use it.
886      // Else generate a new cluster ID      
887      if (!NameNodeLayoutVersion.supports(
888          LayoutVersion.Feature.FEDERATION, layoutVersion)) {
889        if (startOpt.getClusterId() == null) {
890          startOpt.setClusterId(newClusterID());
891        }
892        setClusterID(startOpt.getClusterId());
893        setBlockPoolID(newBlockPoolID());
894      } else {
895        // Upgrade from one version of federation to another supported
896        // version of federation doesn't require clusterID.
897        // Warn the user if the current clusterid didn't match with the input
898        // clusterid.
899        if (startOpt.getClusterId() != null
900            && !startOpt.getClusterId().equals(getClusterID())) {
901          LOG.warn("Clusterid mismatch - current clusterid: " + getClusterID()
902              + ", Ignoring given clusterid: " + startOpt.getClusterId());
903        }
904      }
905      LOG.info("Using clusterid: " + getClusterID());
906    }
907  }
908  
909  /**
910   * Report that an IOE has occurred on some file which may
911   * or may not be within one of the NN image storage directories.
912   */
913  @Override
914  public void reportErrorOnFile(File f) {
915    // We use getAbsolutePath here instead of getCanonicalPath since we know
916    // that there is some IO problem on that drive.
917    // getCanonicalPath may need to call stat() or readlink() and it's likely
918    // those calls would fail due to the same underlying IO problem.
919    String absPath = f.getAbsolutePath();
920    for (StorageDirectory sd : storageDirs) {
921      String dirPath = sd.getRoot().getAbsolutePath();
922      if (!dirPath.endsWith(File.separator)) {
923        dirPath += File.separator;
924      }
925      if (absPath.startsWith(dirPath)) {
926        reportErrorsOnDirectory(sd);
927        return;
928      }
929    }
930    
931  }
932  
933  /**
934   * Generate new clusterID.
935   * 
936   * clusterID is a persistent attribute of the cluster.
937   * It is generated when the cluster is created and remains the same
938   * during the life cycle of the cluster.  When a new name node is formated, if 
939   * this is a new cluster, a new clusterID is geneated and stored.  Subsequent 
940   * name node must be given the same ClusterID during its format to be in the 
941   * same cluster.
942   * When a datanode register it receive the clusterID and stick with it.
943   * If at any point, name node or data node tries to join another cluster, it 
944   * will be rejected.
945   * 
946   * @return new clusterID
947   */ 
948  public static String newClusterID() {
949    return "CID-" + UUID.randomUUID().toString();
950  }
951
952  void setClusterID(String cid) {
953    clusterID = cid;
954  }
955
956  /**
957   * try to find current cluster id in the VERSION files
958   * returns first cluster id found in any VERSION file
959   * null in case none found
960   * @return clusterId or null in case no cluster id found
961   */
962  public String determineClusterId() {
963    String cid = null;
964    Iterator<StorageDirectory> sdit = dirIterator(NameNodeDirType.IMAGE);
965    while(sdit.hasNext()) {
966      StorageDirectory sd = sdit.next();
967      try {
968        Properties props = readPropertiesFile(sd.getVersionFile());
969        cid = props.getProperty("clusterID");
970        LOG.info("current cluster id for sd="+sd.getCurrentDir() + 
971            ";lv=" + layoutVersion + ";cid=" + cid);
972        
973        if(cid != null && !cid.equals(""))
974          return cid;
975      } catch (Exception e) {
976        LOG.warn("this sd not available: " + e.getLocalizedMessage());
977      } //ignore
978    }
979    LOG.warn("couldn't find any VERSION file containing valid ClusterId");
980    return null;
981  }
982
983  /**
984   * Generate new blockpoolID.
985   * 
986   * @return new blockpoolID
987   */ 
988  static String newBlockPoolID() throws UnknownHostException{
989    String ip = "unknownIP";
990    try {
991      ip = DNS.getDefaultIP("default");
992    } catch (UnknownHostException e) {
993      LOG.warn("Could not find ip address of \"default\" inteface.");
994      throw e;
995    }
996    
997    int rand = DFSUtil.getSecureRandom().nextInt(Integer.MAX_VALUE);
998    String bpid = "BP-" + rand + "-"+ ip + "-" + Time.now();
999    return bpid;
1000  }
1001
1002  /** Validate and set block pool ID */
1003  public void setBlockPoolID(String bpid) {
1004    blockpoolID = bpid;
1005  }
1006
1007  /** Validate and set block pool ID */
1008  private void setBlockPoolID(File storage, String bpid)
1009      throws InconsistentFSStateException {
1010    if (bpid == null || bpid.equals("")) {
1011      throw new InconsistentFSStateException(storage, "file "
1012          + Storage.STORAGE_FILE_VERSION + " has no block pool Id.");
1013    }
1014    
1015    if (!blockpoolID.equals("") && !blockpoolID.equals(bpid)) {
1016      throw new InconsistentFSStateException(storage,
1017          "Unexepcted blockpoolID " + bpid + " . Expected " + blockpoolID);
1018    }
1019    setBlockPoolID(bpid);
1020  }
1021  
1022  public String getBlockPoolID() {
1023    return blockpoolID;
1024  }
1025
1026  /**
1027   * Iterate over all current storage directories, inspecting them
1028   * with the given inspector.
1029   */
1030  void inspectStorageDirs(FSImageStorageInspector inspector)
1031      throws IOException {
1032
1033    // Process each of the storage directories to find the pair of
1034    // newest image file and edit file
1035    for (Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) {
1036      StorageDirectory sd = it.next();
1037      inspector.inspectDirectory(sd);
1038    }
1039  }
1040
1041  /**
1042   * Iterate over all of the storage dirs, reading their contents to determine
1043   * their layout versions. Returns an FSImageStorageInspector which has
1044   * inspected each directory.
1045   * 
1046   * <b>Note:</b> this can mutate the storage info fields (ctime, version, etc).
1047   * @throws IOException if no valid storage dirs are found or no valid layout version
1048   */
1049  FSImageStorageInspector readAndInspectDirs(EnumSet<NameNodeFile> fileTypes,
1050      StartupOption startupOption) throws IOException {
1051    Integer layoutVersion = null;
1052    boolean multipleLV = false;
1053    StringBuilder layoutVersions = new StringBuilder();
1054
1055    // First determine what range of layout versions we're going to inspect
1056    for (Iterator<StorageDirectory> it = dirIterator(false);
1057         it.hasNext();) {
1058      StorageDirectory sd = it.next();
1059      if (!sd.getVersionFile().exists()) {
1060        FSImage.LOG.warn("Storage directory " + sd + " contains no VERSION file. Skipping...");
1061        continue;
1062      }
1063      readProperties(sd, startupOption); // sets layoutVersion
1064      int lv = getLayoutVersion();
1065      if (layoutVersion == null) {
1066        layoutVersion = Integer.valueOf(lv);
1067      } else if (!layoutVersion.equals(lv)) {
1068        multipleLV = true;
1069      }
1070      layoutVersions.append("(").append(sd.getRoot()).append(", ").append(lv).append(") ");
1071    }
1072    
1073    if (layoutVersion == null) {
1074      throw new IOException("No storage directories contained VERSION information");
1075    }
1076    if (multipleLV) {            
1077      throw new IOException(
1078          "Storage directories contain multiple layout versions: "
1079              + layoutVersions);
1080    }
1081    // If the storage directories are with the new layout version
1082    // (ie edits_<txnid>) then use the new inspector, which will ignore
1083    // the old format dirs.
1084    FSImageStorageInspector inspector;
1085    if (NameNodeLayoutVersion.supports(
1086        LayoutVersion.Feature.TXID_BASED_LAYOUT, getLayoutVersion())) {
1087      inspector = new FSImageTransactionalStorageInspector(fileTypes);
1088    } else {
1089      inspector = new FSImagePreTransactionalStorageInspector();
1090    }
1091    
1092    inspectStorageDirs(inspector);
1093    return inspector;
1094  }
1095
1096  public NamespaceInfo getNamespaceInfo() {
1097    return new NamespaceInfo(
1098        getNamespaceID(),
1099        getClusterID(),
1100        getBlockPoolID(),
1101        getCTime());
1102  }
1103
1104  public String getNNDirectorySize() {
1105    return JSON.toString(nameDirSizeMap);
1106  }
1107
1108  public void updateNameDirSize() {
1109    Map<String, Long> nnDirSizeMap = new HashMap<>();
1110    for (Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) {
1111      StorageDirectory sd = it.next();
1112      if (!sd.isShared()) {
1113        nnDirSizeMap.put(sd.getRoot().getAbsolutePath(), sd.getDirecorySize());
1114      }
1115    }
1116    nameDirSizeMap.clear();
1117    nameDirSizeMap.putAll(nnDirSizeMap);
1118  }
1119}