001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.hdfs.server.namenode;
019    
020    import com.google.common.annotations.VisibleForTesting;
021    import com.google.common.base.Joiner;
022    import com.google.common.base.Preconditions;
023    import com.google.common.collect.Lists;
024    
025    import org.apache.commons.logging.Log;
026    import org.apache.commons.logging.LogFactory;
027    import org.apache.hadoop.HadoopIllegalArgumentException;
028    import org.apache.hadoop.classification.InterfaceAudience;
029    import org.apache.hadoop.conf.Configuration;
030    import org.apache.hadoop.fs.FileSystem;
031    import org.apache.hadoop.fs.Trash;
032    import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
033    import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
034    import org.apache.hadoop.ha.HAServiceStatus;
035    import org.apache.hadoop.ha.HealthCheckFailedException;
036    import org.apache.hadoop.ha.ServiceFailedException;
037    import org.apache.hadoop.hdfs.DFSConfigKeys;
038    import org.apache.hadoop.hdfs.DFSUtil;
039    import org.apache.hadoop.hdfs.HAUtil;
040    import org.apache.hadoop.hdfs.HdfsConfiguration;
041    import org.apache.hadoop.hdfs.protocol.ClientProtocol;
042    import org.apache.hadoop.hdfs.protocol.HdfsConstants;
043    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
044    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption;
045    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
046    import org.apache.hadoop.hdfs.server.namenode.ha.*;
047    import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
048    import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
049    import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgressMetrics;
050    import org.apache.hadoop.hdfs.server.protocol.*;
051    import org.apache.hadoop.ipc.Server;
052    import org.apache.hadoop.ipc.StandbyException;
053    import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
054    import org.apache.hadoop.metrics2.util.MBeans;
055    import org.apache.hadoop.net.NetUtils;
056    import org.apache.hadoop.security.AccessControlException;
057    import org.apache.hadoop.security.RefreshUserMappingsProtocol;
058    import org.apache.hadoop.security.SecurityUtil;
059    import org.apache.hadoop.security.UserGroupInformation;
060    import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol;
061    import org.apache.hadoop.ipc.RefreshCallQueueProtocol;
062    import org.apache.hadoop.tools.GetUserMappingsProtocol;
063    import org.apache.hadoop.util.ExitUtil.ExitException;
064    import org.apache.hadoop.util.JvmPauseMonitor;
065    import org.apache.hadoop.util.ServicePlugin;
066    import org.apache.hadoop.util.StringUtils;
067    
068    import javax.management.ObjectName;
069    
070    import java.io.IOException;
071    import java.io.PrintStream;
072    import java.net.InetSocketAddress;
073    import java.net.URI;
074    import java.security.PrivilegedExceptionAction;
075    import java.util.ArrayList;
076    import java.util.Arrays;
077    import java.util.Collection;
078    import java.util.List;
079    
080    import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY;
081    import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT;
082    import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY;
083    import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
084    import static org.apache.hadoop.util.ExitUtil.terminate;
085    import static org.apache.hadoop.util.ToolRunner.confirmPrompt;
086    
087    /**********************************************************
088     * NameNode serves as both directory namespace manager and
089     * "inode table" for the Hadoop DFS.  There is a single NameNode
090     * running in any DFS deployment.  (Well, except when there
091     * is a second backup/failover NameNode, or when using federated NameNodes.)
092     *
093     * The NameNode controls two critical tables:
094     *   1)  filename->blocksequence (namespace)
095     *   2)  block->machinelist ("inodes")
096     *
097     * The first table is stored on disk and is very precious.
098     * The second table is rebuilt every time the NameNode comes up.
099     *
100     * 'NameNode' refers to both this class as well as the 'NameNode server'.
101     * The 'FSNamesystem' class actually performs most of the filesystem
102     * management.  The majority of the 'NameNode' class itself is concerned
103     * with exposing the IPC interface and the HTTP server to the outside world,
104     * plus some configuration management.
105     *
106     * NameNode implements the
107     * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} interface, which
108     * allows clients to ask for DFS services.
109     * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} is not designed for
110     * direct use by authors of DFS client code.  End-users should instead use the
111     * {@link org.apache.hadoop.fs.FileSystem} class.
112     *
113     * NameNode also implements the
114     * {@link org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol} interface,
115     * used by DataNodes that actually store DFS data blocks.  These
116     * methods are invoked repeatedly and automatically by all the
117     * DataNodes in a DFS deployment.
118     *
119     * NameNode also implements the
120     * {@link org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol} interface,
121     * used by secondary namenodes or rebalancing processes to get partial
122     * NameNode state, for example partial blocksMap etc.
123     **********************************************************/
124    @InterfaceAudience.Private
125    public class NameNode implements NameNodeStatusMXBean {
126      static{
127        HdfsConfiguration.init();
128      }
129    
130      /**
131       * Categories of operations supported by the namenode.
132       */
133      public static enum OperationCategory {
134        /** Operations that are state agnostic */
135        UNCHECKED,
136        /** Read operation that does not change the namespace state */
137        READ,
138        /** Write operation that changes the namespace state */
139        WRITE,
140        /** Operations related to checkpointing */
141        CHECKPOINT,
142        /** Operations related to {@link JournalProtocol} */
143        JOURNAL
144      }
145      
146      /**
147       * HDFS configuration can have three types of parameters:
148       * <ol>
149       * <li>Parameters that are common for all the name services in the cluster.</li>
150       * <li>Parameters that are specific to a name service. These keys are suffixed
151       * with nameserviceId in the configuration. For example,
152       * "dfs.namenode.rpc-address.nameservice1".</li>
153       * <li>Parameters that are specific to a single name node. These keys are suffixed
154       * with nameserviceId and namenodeId in the configuration. for example,
155       * "dfs.namenode.rpc-address.nameservice1.namenode1"</li>
156       * </ol>
157       * 
158       * In the latter cases, operators may specify the configuration without
159       * any suffix, with a nameservice suffix, or with a nameservice and namenode
160       * suffix. The more specific suffix will take precedence.
161       * 
162       * These keys are specific to a given namenode, and thus may be configured
163       * globally, for a nameservice, or for a specific namenode within a nameservice.
164       */
165      public static final String[] NAMENODE_SPECIFIC_KEYS = {
166        DFS_NAMENODE_RPC_ADDRESS_KEY,
167        DFS_NAMENODE_RPC_BIND_HOST_KEY,
168        DFS_NAMENODE_NAME_DIR_KEY,
169        DFS_NAMENODE_EDITS_DIR_KEY,
170        DFS_NAMENODE_SHARED_EDITS_DIR_KEY,
171        DFS_NAMENODE_CHECKPOINT_DIR_KEY,
172        DFS_NAMENODE_CHECKPOINT_EDITS_DIR_KEY,
173        DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
174        DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY,
175        DFS_NAMENODE_HTTP_ADDRESS_KEY,
176        DFS_NAMENODE_HTTPS_ADDRESS_KEY,
177        DFS_NAMENODE_KEYTAB_FILE_KEY,
178        DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY,
179        DFS_NAMENODE_SECONDARY_HTTPS_ADDRESS_KEY,
180        DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY,
181        DFS_NAMENODE_BACKUP_ADDRESS_KEY,
182        DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY,
183        DFS_NAMENODE_BACKUP_SERVICE_RPC_ADDRESS_KEY,
184        DFS_NAMENODE_USER_NAME_KEY,
185        DFS_NAMENODE_INTERNAL_SPNEGO_USER_NAME_KEY,
186        DFS_HA_FENCE_METHODS_KEY,
187        DFS_HA_ZKFC_PORT_KEY,
188        DFS_HA_FENCE_METHODS_KEY
189      };
190      
191      /**
192       * @see #NAMENODE_SPECIFIC_KEYS
193       * These keys are specific to a nameservice, but may not be overridden
194       * for a specific namenode.
195       */
196      public static final String[] NAMESERVICE_SPECIFIC_KEYS = {
197        DFS_HA_AUTO_FAILOVER_ENABLED_KEY
198      };
199      
200      private static final String USAGE = "Usage: java NameNode ["
201          + StartupOption.BACKUP.getName() + "] | ["
202          + StartupOption.CHECKPOINT.getName() + "] | ["
203          + StartupOption.FORMAT.getName() + " ["
204          + StartupOption.CLUSTERID.getName() + " cid ] ["
205          + StartupOption.FORCE.getName() + "] ["
206          + StartupOption.NONINTERACTIVE.getName() + "] ] | ["
207          + StartupOption.UPGRADE.getName() + 
208            " [" + StartupOption.CLUSTERID.getName() + " cid]" +
209            " [" + StartupOption.RENAMERESERVED.getName() + "<k-v pairs>] ] | ["
210          + StartupOption.ROLLBACK.getName() + "] | ["
211          + StartupOption.ROLLINGUPGRADE.getName() + " <"
212          + RollingUpgradeStartupOption.DOWNGRADE.name().toLowerCase() + "|"
213          + RollingUpgradeStartupOption.ROLLBACK.name().toLowerCase() + "> ] | ["
214          + StartupOption.FINALIZE.getName() + "] | ["
215          + StartupOption.IMPORT.getName() + "] | ["
216          + StartupOption.INITIALIZESHAREDEDITS.getName() + "] | ["
217          + StartupOption.BOOTSTRAPSTANDBY.getName() + "] | ["
218          + StartupOption.RECOVER.getName() + " [ " + StartupOption.FORCE.getName()
219          + " ] ]";
220      
221      public long getProtocolVersion(String protocol, 
222                                     long clientVersion) throws IOException {
223        if (protocol.equals(ClientProtocol.class.getName())) {
224          return ClientProtocol.versionID; 
225        } else if (protocol.equals(DatanodeProtocol.class.getName())){
226          return DatanodeProtocol.versionID;
227        } else if (protocol.equals(NamenodeProtocol.class.getName())){
228          return NamenodeProtocol.versionID;
229        } else if (protocol.equals(RefreshAuthorizationPolicyProtocol.class.getName())){
230          return RefreshAuthorizationPolicyProtocol.versionID;
231        } else if (protocol.equals(RefreshUserMappingsProtocol.class.getName())){
232          return RefreshUserMappingsProtocol.versionID;
233        } else if (protocol.equals(RefreshCallQueueProtocol.class.getName())) {
234          return RefreshCallQueueProtocol.versionID;
235        } else if (protocol.equals(GetUserMappingsProtocol.class.getName())){
236          return GetUserMappingsProtocol.versionID;
237        } else {
238          throw new IOException("Unknown protocol to name node: " + protocol);
239        }
240      }
241        
242      public static final int DEFAULT_PORT = 8020;
243      public static final Log LOG = LogFactory.getLog(NameNode.class.getName());
244      public static final Log stateChangeLog = LogFactory.getLog("org.apache.hadoop.hdfs.StateChange");
245      public static final Log blockStateChangeLog = LogFactory.getLog("BlockStateChange");
246      public static final HAState ACTIVE_STATE = new ActiveState();
247      public static final HAState STANDBY_STATE = new StandbyState();
248      
249      protected FSNamesystem namesystem; 
250      protected final Configuration conf;
251      protected final NamenodeRole role;
252      private volatile HAState state;
253      private final boolean haEnabled;
254      private final HAContext haContext;
255      protected final boolean allowStaleStandbyReads;
256    
257      
258      /** httpServer */
259      protected NameNodeHttpServer httpServer;
260      private Thread emptier;
261      /** only used for testing purposes  */
262      protected boolean stopRequested = false;
263      /** Registration information of this name-node  */
264      protected NamenodeRegistration nodeRegistration;
265      /** Activated plug-ins. */
266      private List<ServicePlugin> plugins;
267      
268      private NameNodeRpcServer rpcServer;
269    
270      private JvmPauseMonitor pauseMonitor;
271      private ObjectName nameNodeStatusBeanName;
272      /**
273       * The namenode address that clients will use to access this namenode
274       * or the name service. For HA configurations using logical URI, it
275       * will be the logical address.
276       */
277      private String clientNamenodeAddress;
278      
279      /** Format a new filesystem.  Destroys any filesystem that may already
280       * exist at this location.  **/
281      public static void format(Configuration conf) throws IOException {
282        format(conf, true, true);
283      }
284    
285      static NameNodeMetrics metrics;
286      private static final StartupProgress startupProgress = new StartupProgress();
287      /** Return the {@link FSNamesystem} object.
288       * @return {@link FSNamesystem} object.
289       */
290      public FSNamesystem getNamesystem() {
291        return namesystem;
292      }
293    
294      public NamenodeProtocols getRpcServer() {
295        return rpcServer;
296      }
297      
298      static void initMetrics(Configuration conf, NamenodeRole role) {
299        metrics = NameNodeMetrics.create(conf, role);
300      }
301    
302      public static NameNodeMetrics getNameNodeMetrics() {
303        return metrics;
304      }
305    
306      /**
307       * Returns object used for reporting namenode startup progress.
308       * 
309       * @return StartupProgress for reporting namenode startup progress
310       */
311      public static StartupProgress getStartupProgress() {
312        return startupProgress;
313      }
314    
315      /**
316       * Return the service name of the issued delegation token.
317       *
318       * @return The name service id in HA-mode, or the rpc address in non-HA mode
319       */
320      public String getTokenServiceName() {
321        return getClientNamenodeAddress();
322      }
323    
324      /**
325       * Set the namenode address that will be used by clients to access this
326       * namenode or name service. This needs to be called before the config
327       * is overriden.
328       */
329      public void setClientNamenodeAddress(Configuration conf) {
330        String nnAddr = conf.get(FS_DEFAULT_NAME_KEY);
331        if (nnAddr == null) {
332          // default fs is not set.
333          clientNamenodeAddress = null;
334          return;
335        }
336    
337        LOG.info(FS_DEFAULT_NAME_KEY + " is " + nnAddr);
338        URI nnUri = URI.create(nnAddr);
339    
340        String nnHost = nnUri.getHost();
341        if (nnHost == null) {
342          clientNamenodeAddress = null;
343          return;
344        }
345    
346        if (DFSUtil.getNameServiceIds(conf).contains(nnHost)) {
347          // host name is logical
348          clientNamenodeAddress = nnHost;
349        } else if (nnUri.getPort() > 0) {
350          // physical address with a valid port
351          clientNamenodeAddress = nnUri.getAuthority();
352        } else {
353          // the port is missing or 0. Figure out real bind address later.
354          clientNamenodeAddress = null;
355          return;
356        }
357        LOG.info("Clients are to use " + clientNamenodeAddress + " to access"
358            + " this namenode/service.");
359      }
360    
361      /**
362       * Get the namenode address to be used by clients.
363       * @return nn address
364       */
365      public String getClientNamenodeAddress() {
366        return clientNamenodeAddress;
367      }
368    
369      public static InetSocketAddress getAddress(String address) {
370        return NetUtils.createSocketAddr(address, DEFAULT_PORT);
371      }
372      
373      /**
374       * Set the configuration property for the service rpc address
375       * to address
376       */
377      public static void setServiceAddress(Configuration conf,
378                                               String address) {
379        LOG.info("Setting ADDRESS " + address);
380        conf.set(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, address);
381      }
382      
383      /**
384       * Fetches the address for services to use when connecting to namenode
385       * based on the value of fallback returns null if the special
386       * address is not specified or returns the default namenode address
387       * to be used by both clients and services.
388       * Services here are datanodes, backup node, any non client connection
389       */
390      public static InetSocketAddress getServiceAddress(Configuration conf,
391                                                            boolean fallback) {
392        String addr = conf.get(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY);
393        if (addr == null || addr.isEmpty()) {
394          return fallback ? getAddress(conf) : null;
395        }
396        return getAddress(addr);
397      }
398    
399      public static InetSocketAddress getAddress(Configuration conf) {
400        URI filesystemURI = FileSystem.getDefaultUri(conf);
401        return getAddress(filesystemURI);
402      }
403    
404    
405      /**
406       * TODO:FEDERATION
407       * @param filesystemURI
408       * @return address of file system
409       */
410      public static InetSocketAddress getAddress(URI filesystemURI) {
411        String authority = filesystemURI.getAuthority();
412        if (authority == null) {
413          throw new IllegalArgumentException(String.format(
414              "Invalid URI for NameNode address (check %s): %s has no authority.",
415              FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString()));
416        }
417        if (!HdfsConstants.HDFS_URI_SCHEME.equalsIgnoreCase(
418            filesystemURI.getScheme())) {
419          throw new IllegalArgumentException(String.format(
420              "Invalid URI for NameNode address (check %s): %s is not of scheme '%s'.",
421              FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString(),
422              HdfsConstants.HDFS_URI_SCHEME));
423        }
424        return getAddress(authority);
425      }
426    
427      public static URI getUri(InetSocketAddress namenode) {
428        int port = namenode.getPort();
429        String portString = port == DEFAULT_PORT ? "" : (":"+port);
430        return URI.create(HdfsConstants.HDFS_URI_SCHEME + "://" 
431            + namenode.getHostName()+portString);
432      }
433    
434      //
435      // Common NameNode methods implementation for the active name-node role.
436      //
437      public NamenodeRole getRole() {
438        return role;
439      }
440    
441      boolean isRole(NamenodeRole that) {
442        return role.equals(that);
443      }
444    
445      /**
446       * Given a configuration get the address of the service rpc server
447       * If the service rpc is not configured returns null
448       */
449      protected InetSocketAddress getServiceRpcServerAddress(Configuration conf) {
450        return NameNode.getServiceAddress(conf, false);
451      }
452    
453      protected InetSocketAddress getRpcServerAddress(Configuration conf) {
454        return getAddress(conf);
455      }
456      
457      /** Given a configuration get the bind host of the service rpc server
458       *  If the bind host is not configured returns null.
459       */
460      protected String getServiceRpcServerBindHost(Configuration conf) {
461        String addr = conf.getTrimmed(DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY);
462        if (addr == null || addr.isEmpty()) {
463          return null;
464        }
465        return addr;
466      }
467    
468      /** Given a configuration get the bind host of the client rpc server
469       *  If the bind host is not configured returns null.
470       */
471      protected String getRpcServerBindHost(Configuration conf) {
472        String addr = conf.getTrimmed(DFS_NAMENODE_RPC_BIND_HOST_KEY);
473        if (addr == null || addr.isEmpty()) {
474          return null;
475        }
476        return addr;
477      }
478       
479      /**
480       * Modifies the configuration passed to contain the service rpc address setting
481       */
482      protected void setRpcServiceServerAddress(Configuration conf,
483          InetSocketAddress serviceRPCAddress) {
484        setServiceAddress(conf, NetUtils.getHostPortString(serviceRPCAddress));
485      }
486    
487      protected void setRpcServerAddress(Configuration conf,
488          InetSocketAddress rpcAddress) {
489        FileSystem.setDefaultUri(conf, getUri(rpcAddress));
490      }
491    
492      protected InetSocketAddress getHttpServerAddress(Configuration conf) {
493        return getHttpAddress(conf);
494      }
495    
496      /** @return the NameNode HTTP address. */
497      public static InetSocketAddress getHttpAddress(Configuration conf) {
498        return  NetUtils.createSocketAddr(
499            conf.get(DFS_NAMENODE_HTTP_ADDRESS_KEY, DFS_NAMENODE_HTTP_ADDRESS_DEFAULT));
500      }
501    
502      protected void loadNamesystem(Configuration conf) throws IOException {
503        this.namesystem = FSNamesystem.loadFromDisk(conf);
504      }
505    
506      NamenodeRegistration getRegistration() {
507        return nodeRegistration;
508      }
509    
510      NamenodeRegistration setRegistration() {
511        nodeRegistration = new NamenodeRegistration(
512            NetUtils.getHostPortString(rpcServer.getRpcAddress()),
513            NetUtils.getHostPortString(getHttpAddress()),
514            getFSImage().getStorage(), getRole());
515        return nodeRegistration;
516      }
517    
518      /* optimize ugi lookup for RPC operations to avoid a trip through
519       * UGI.getCurrentUser which is synch'ed
520       */
521      public static UserGroupInformation getRemoteUser() throws IOException {
522        UserGroupInformation ugi = Server.getRemoteUser();
523        return (ugi != null) ? ugi : UserGroupInformation.getCurrentUser();
524      }
525    
526    
527      /**
528       * Login as the configured user for the NameNode.
529       */
530      void loginAsNameNodeUser(Configuration conf) throws IOException {
531        InetSocketAddress socAddr = getRpcServerAddress(conf);
532        SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
533            DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName());
534      }
535      
536      /**
537       * Initialize name-node.
538       * 
539       * @param conf the configuration
540       */
541      protected void initialize(Configuration conf) throws IOException {
542        if (conf.get(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS) == null) {
543          String intervals = conf.get(DFS_METRICS_PERCENTILES_INTERVALS_KEY);
544          if (intervals != null) {
545            conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS,
546              intervals);
547          }
548        }
549    
550        UserGroupInformation.setConfiguration(conf);
551        loginAsNameNodeUser(conf);
552    
553        NameNode.initMetrics(conf, this.getRole());
554        StartupProgressMetrics.register(startupProgress);
555    
556        if (NamenodeRole.NAMENODE == role) {
557          startHttpServer(conf);
558        }
559        loadNamesystem(conf);
560    
561        rpcServer = createRpcServer(conf);
562        if (clientNamenodeAddress == null) {
563          // This is expected for MiniDFSCluster. Set it now using 
564          // the RPC server's bind address.
565          clientNamenodeAddress = 
566              NetUtils.getHostPortString(rpcServer.getRpcAddress());
567          LOG.info("Clients are to use " + clientNamenodeAddress + " to access"
568              + " this namenode/service.");
569        }
570        if (NamenodeRole.NAMENODE == role) {
571          httpServer.setNameNodeAddress(getNameNodeAddress());
572          httpServer.setFSImage(getFSImage());
573        }
574        
575        pauseMonitor = new JvmPauseMonitor(conf);
576        pauseMonitor.start();
577    
578        startCommonServices(conf);
579      }
580      
581      /**
582       * Create the RPC server implementation. Used as an extension point for the
583       * BackupNode.
584       */
585      protected NameNodeRpcServer createRpcServer(Configuration conf)
586          throws IOException {
587        return new NameNodeRpcServer(conf, this);
588      }
589    
590      /** Start the services common to active and standby states */
591      private void startCommonServices(Configuration conf) throws IOException {
592        namesystem.startCommonServices(conf, haContext);
593        registerNNSMXBean();
594        if (NamenodeRole.NAMENODE != role) {
595          startHttpServer(conf);
596          httpServer.setNameNodeAddress(getNameNodeAddress());
597          httpServer.setFSImage(getFSImage());
598        }
599        rpcServer.start();
600        plugins = conf.getInstances(DFS_NAMENODE_PLUGINS_KEY,
601            ServicePlugin.class);
602        for (ServicePlugin p: plugins) {
603          try {
604            p.start(this);
605          } catch (Throwable t) {
606            LOG.warn("ServicePlugin " + p + " could not be started", t);
607          }
608        }
609        LOG.info(getRole() + " RPC up at: " + rpcServer.getRpcAddress());
610        if (rpcServer.getServiceRpcAddress() != null) {
611          LOG.info(getRole() + " service RPC up at: "
612              + rpcServer.getServiceRpcAddress());
613        }
614      }
615      
616      private void stopCommonServices() {
617        if(rpcServer != null) rpcServer.stop();
618        if(namesystem != null) namesystem.close();
619        if (pauseMonitor != null) pauseMonitor.stop();
620        if (plugins != null) {
621          for (ServicePlugin p : plugins) {
622            try {
623              p.stop();
624            } catch (Throwable t) {
625              LOG.warn("ServicePlugin " + p + " could not be stopped", t);
626            }
627          }
628        }   
629        stopHttpServer();
630      }
631      
632      private void startTrashEmptier(final Configuration conf) throws IOException {
633        long trashInterval =
634            conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT);
635        if (trashInterval == 0) {
636          return;
637        } else if (trashInterval < 0) {
638          throw new IOException("Cannot start trash emptier with negative interval."
639              + " Set " + FS_TRASH_INTERVAL_KEY + " to a positive value.");
640        }
641        
642        // This may be called from the transitionToActive code path, in which
643        // case the current user is the administrator, not the NN. The trash
644        // emptier needs to run as the NN. See HDFS-3972.
645        FileSystem fs = SecurityUtil.doAsLoginUser(
646            new PrivilegedExceptionAction<FileSystem>() {
647              @Override
648              public FileSystem run() throws IOException {
649                return FileSystem.get(conf);
650              }
651            });
652        this.emptier = new Thread(new Trash(fs, conf).getEmptier(), "Trash Emptier");
653        this.emptier.setDaemon(true);
654        this.emptier.start();
655      }
656      
657      private void stopTrashEmptier() {
658        if (this.emptier != null) {
659          emptier.interrupt();
660          emptier = null;
661        }
662      }
663      
664      private void startHttpServer(final Configuration conf) throws IOException {
665        httpServer = new NameNodeHttpServer(conf, this, getHttpServerAddress(conf));
666        httpServer.start();
667        httpServer.setStartupProgress(startupProgress);
668      }
669      
670      private void stopHttpServer() {
671        try {
672          if (httpServer != null) httpServer.stop();
673        } catch (Exception e) {
674          LOG.error("Exception while stopping httpserver", e);
675        }
676      }
677    
678      /**
679       * Start NameNode.
680       * <p>
681       * The name-node can be started with one of the following startup options:
682       * <ul> 
683       * <li>{@link StartupOption#REGULAR REGULAR} - normal name node startup</li>
684       * <li>{@link StartupOption#FORMAT FORMAT} - format name node</li>
685       * <li>{@link StartupOption#BACKUP BACKUP} - start backup node</li>
686       * <li>{@link StartupOption#CHECKPOINT CHECKPOINT} - start checkpoint node</li>
687       * <li>{@link StartupOption#UPGRADE UPGRADE} - start the cluster  
688       * upgrade and create a snapshot of the current file system state</li> 
689       * <li>{@link StartupOption#RECOVER RECOVERY} - recover name node
690       * metadata</li>
691       * <li>{@link StartupOption#ROLLBACK ROLLBACK} - roll the  
692       *            cluster back to the previous state</li>
693       * <li>{@link StartupOption#FINALIZE FINALIZE} - finalize 
694       *            previous upgrade</li>
695       * <li>{@link StartupOption#IMPORT IMPORT} - import checkpoint</li>
696       * </ul>
697       * The option is passed via configuration field: 
698       * <tt>dfs.namenode.startup</tt>
699       * 
700       * The conf will be modified to reflect the actual ports on which 
701       * the NameNode is up and running if the user passes the port as
702       * <code>zero</code> in the conf.
703       * 
704       * @param conf  confirguration
705       * @throws IOException
706       */
707      public NameNode(Configuration conf) throws IOException {
708        this(conf, NamenodeRole.NAMENODE);
709      }
710    
711      protected NameNode(Configuration conf, NamenodeRole role) 
712          throws IOException { 
713        this.conf = conf;
714        this.role = role;
715        setClientNamenodeAddress(conf);
716        String nsId = getNameServiceId(conf);
717        String namenodeId = HAUtil.getNameNodeId(conf, nsId);
718        this.haEnabled = HAUtil.isHAEnabled(conf, nsId);
719        state = createHAState(getStartupOption(conf));
720        this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf);
721        this.haContext = createHAContext();
722        try {
723          initializeGenericKeys(conf, nsId, namenodeId);
724          initialize(conf);
725          try {
726            haContext.writeLock();
727            state.prepareToEnterState(haContext);
728            state.enterState(haContext);
729          } finally {
730            haContext.writeUnlock();
731          }
732        } catch (IOException e) {
733          this.stop();
734          throw e;
735        } catch (HadoopIllegalArgumentException e) {
736          this.stop();
737          throw e;
738        }
739      }
740    
741      protected HAState createHAState(StartupOption startOpt) {
742        if (!haEnabled || startOpt == StartupOption.UPGRADE) {
743          return ACTIVE_STATE;
744        } else {
745          return STANDBY_STATE;
746        }
747      }
748    
749      protected HAContext createHAContext() {
750        return new NameNodeHAContext();
751      }
752    
753      /**
754       * Wait for service to finish.
755       * (Normally, it runs forever.)
756       */
757      public void join() {
758        try {
759          rpcServer.join();
760        } catch (InterruptedException ie) {
761          LOG.info("Caught interrupted exception ", ie);
762        }
763      }
764    
765      /**
766       * Stop all NameNode threads and wait for all to finish.
767       */
768      public void stop() {
769        synchronized(this) {
770          if (stopRequested)
771            return;
772          stopRequested = true;
773        }
774        try {
775          if (state != null) {
776            state.exitState(haContext);
777          }
778        } catch (ServiceFailedException e) {
779          LOG.warn("Encountered exception while exiting state ", e);
780        } finally {
781          stopCommonServices();
782          if (metrics != null) {
783            metrics.shutdown();
784          }
785          if (namesystem != null) {
786            namesystem.shutdown();
787          }
788          if (nameNodeStatusBeanName != null) {
789            MBeans.unregister(nameNodeStatusBeanName);
790            nameNodeStatusBeanName = null;
791          }
792        }
793      }
794    
795      synchronized boolean isStopRequested() {
796        return stopRequested;
797      }
798    
799      /**
800       * Is the cluster currently in safe mode?
801       */
802      public boolean isInSafeMode() {
803        return namesystem.isInSafeMode();
804      }
805        
806      /** get FSImage */
807      @VisibleForTesting
808      public FSImage getFSImage() {
809        return namesystem.dir.fsImage;
810      }
811    
812      /**
813       * @return NameNode RPC address
814       */
815      public InetSocketAddress getNameNodeAddress() {
816        return rpcServer.getRpcAddress();
817      }
818    
819      /**
820       * @return NameNode RPC address in "host:port" string form
821       */
822      public String getNameNodeAddressHostPortString() {
823        return NetUtils.getHostPortString(rpcServer.getRpcAddress());
824      }
825    
826      /**
827       * @return NameNode service RPC address if configured, the
828       *    NameNode RPC address otherwise
829       */
830      public InetSocketAddress getServiceRpcAddress() {
831        final InetSocketAddress serviceAddr = rpcServer.getServiceRpcAddress();
832        return serviceAddr == null ? rpcServer.getRpcAddress() : serviceAddr;
833      }
834    
835      /**
836       * @return NameNode HTTP address, used by the Web UI, image transfer,
837       *    and HTTP-based file system clients like Hftp and WebHDFS
838       */
839      public InetSocketAddress getHttpAddress() {
840        return httpServer.getHttpAddress();
841      }
842    
843      /**
844       * @return NameNode HTTPS address, used by the Web UI, image transfer,
845       *    and HTTP-based file system clients like Hftp and WebHDFS
846       */
847      public InetSocketAddress getHttpsAddress() {
848        return httpServer.getHttpsAddress();
849      }
850    
851      /**
852       * Verify that configured directories exist, then
853       * Interactively confirm that formatting is desired 
854       * for each existing directory and format them.
855       * 
856       * @param conf
857       * @param force
858       * @return true if formatting was aborted, false otherwise
859       * @throws IOException
860       */
861      private static boolean format(Configuration conf, boolean force,
862          boolean isInteractive) throws IOException {
863        String nsId = DFSUtil.getNamenodeNameServiceId(conf);
864        String namenodeId = HAUtil.getNameNodeId(conf, nsId);
865        initializeGenericKeys(conf, nsId, namenodeId);
866        checkAllowFormat(conf);
867    
868        if (UserGroupInformation.isSecurityEnabled()) {
869          InetSocketAddress socAddr = getAddress(conf);
870          SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
871              DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName());
872        }
873        
874        Collection<URI> nameDirsToFormat = FSNamesystem.getNamespaceDirs(conf);
875        List<URI> sharedDirs = FSNamesystem.getSharedEditsDirs(conf);
876        List<URI> dirsToPrompt = new ArrayList<URI>();
877        dirsToPrompt.addAll(nameDirsToFormat);
878        dirsToPrompt.addAll(sharedDirs);
879        List<URI> editDirsToFormat = 
880                     FSNamesystem.getNamespaceEditsDirs(conf);
881    
882        // if clusterID is not provided - see if you can find the current one
883        String clusterId = StartupOption.FORMAT.getClusterId();
884        if(clusterId == null || clusterId.equals("")) {
885          //Generate a new cluster id
886          clusterId = NNStorage.newClusterID();
887        }
888        System.out.println("Formatting using clusterid: " + clusterId);
889        
890        FSImage fsImage = new FSImage(conf, nameDirsToFormat, editDirsToFormat);
891        try {
892          FSNamesystem fsn = new FSNamesystem(conf, fsImage);
893          fsImage.getEditLog().initJournalsForWrite();
894    
895          if (!fsImage.confirmFormat(force, isInteractive)) {
896            return true; // aborted
897          }
898    
899          fsImage.format(fsn, clusterId);
900        } catch (IOException ioe) {
901          LOG.warn("Encountered exception during format: ", ioe);
902          fsImage.close();
903          throw ioe;
904        }
905        return false;
906      }
907    
908      public static void checkAllowFormat(Configuration conf) throws IOException {
909        if (!conf.getBoolean(DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY, 
910            DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT)) {
911          throw new IOException("The option " + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY
912                    + " is set to false for this filesystem, so it "
913                    + "cannot be formatted. You will need to set "
914                    + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY +" parameter "
915                    + "to true in order to format this filesystem");
916        }
917      }
918      
919      @VisibleForTesting
920      public static boolean initializeSharedEdits(Configuration conf) throws IOException {
921        return initializeSharedEdits(conf, true);
922      }
923      
924      @VisibleForTesting
925      public static boolean initializeSharedEdits(Configuration conf,
926          boolean force) throws IOException {
927        return initializeSharedEdits(conf, force, false);
928      }
929    
930      /**
931       * Clone the supplied configuration but remove the shared edits dirs.
932       *
933       * @param conf Supplies the original configuration.
934       * @return Cloned configuration without the shared edit dirs.
935       * @throws IOException on failure to generate the configuration.
936       */
937      private static Configuration getConfigurationWithoutSharedEdits(
938          Configuration conf)
939          throws IOException {
940        List<URI> editsDirs = FSNamesystem.getNamespaceEditsDirs(conf, false);
941        String editsDirsString = Joiner.on(",").join(editsDirs);
942    
943        Configuration confWithoutShared = new Configuration(conf);
944        confWithoutShared.unset(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY);
945        confWithoutShared.setStrings(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY,
946            editsDirsString);
947        return confWithoutShared;
948      }
949    
950      /**
951       * Format a new shared edits dir and copy in enough edit log segments so that
952       * the standby NN can start up.
953       * 
954       * @param conf configuration
955       * @param force format regardless of whether or not the shared edits dir exists
956       * @param interactive prompt the user when a dir exists
957       * @return true if the command aborts, false otherwise
958       */
959      private static boolean initializeSharedEdits(Configuration conf,
960          boolean force, boolean interactive) throws IOException {
961        String nsId = DFSUtil.getNamenodeNameServiceId(conf);
962        String namenodeId = HAUtil.getNameNodeId(conf, nsId);
963        initializeGenericKeys(conf, nsId, namenodeId);
964        
965        if (conf.get(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY) == null) {
966          LOG.fatal("No shared edits directory configured for namespace " +
967              nsId + " namenode " + namenodeId);
968          return false;
969        }
970    
971        if (UserGroupInformation.isSecurityEnabled()) {
972          InetSocketAddress socAddr = getAddress(conf);
973          SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
974              DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName());
975        }
976    
977        NNStorage existingStorage = null;
978        FSImage sharedEditsImage = null;
979        try {
980          FSNamesystem fsns =
981              FSNamesystem.loadFromDisk(getConfigurationWithoutSharedEdits(conf));
982          
983          existingStorage = fsns.getFSImage().getStorage();
984          NamespaceInfo nsInfo = existingStorage.getNamespaceInfo();
985          
986          List<URI> sharedEditsDirs = FSNamesystem.getSharedEditsDirs(conf);
987          
988          sharedEditsImage = new FSImage(conf,
989              Lists.<URI>newArrayList(),
990              sharedEditsDirs);
991          sharedEditsImage.getEditLog().initJournalsForWrite();
992          
993          if (!sharedEditsImage.confirmFormat(force, interactive)) {
994            return true; // abort
995          }
996          
997          NNStorage newSharedStorage = sharedEditsImage.getStorage();
998          // Call Storage.format instead of FSImage.format here, since we don't
999          // actually want to save a checkpoint - just prime the dirs with
1000          // the existing namespace info
1001          newSharedStorage.format(nsInfo);
1002          sharedEditsImage.getEditLog().formatNonFileJournals(nsInfo);
1003    
1004          // Need to make sure the edit log segments are in good shape to initialize
1005          // the shared edits dir.
1006          fsns.getFSImage().getEditLog().close();
1007          fsns.getFSImage().getEditLog().initJournalsForWrite();
1008          fsns.getFSImage().getEditLog().recoverUnclosedStreams();
1009    
1010          copyEditLogSegmentsToSharedDir(fsns, sharedEditsDirs, newSharedStorage,
1011              conf);
1012        } catch (IOException ioe) {
1013          LOG.error("Could not initialize shared edits dir", ioe);
1014          return true; // aborted
1015        } finally {
1016          if (sharedEditsImage != null) {
1017            try {
1018              sharedEditsImage.close();
1019            }  catch (IOException ioe) {
1020              LOG.warn("Could not close sharedEditsImage", ioe);
1021            }
1022          }
1023          // Have to unlock storage explicitly for the case when we're running in a
1024          // unit test, which runs in the same JVM as NNs.
1025          if (existingStorage != null) {
1026            try {
1027              existingStorage.unlockAll();
1028            } catch (IOException ioe) {
1029              LOG.warn("Could not unlock storage directories", ioe);
1030              return true; // aborted
1031            }
1032          }
1033        }
1034        return false; // did not abort
1035      }
1036    
1037      private static void copyEditLogSegmentsToSharedDir(FSNamesystem fsns,
1038          Collection<URI> sharedEditsDirs, NNStorage newSharedStorage,
1039          Configuration conf) throws IOException {
1040        Preconditions.checkArgument(!sharedEditsDirs.isEmpty(),
1041            "No shared edits specified");
1042        // Copy edit log segments into the new shared edits dir.
1043        List<URI> sharedEditsUris = new ArrayList<URI>(sharedEditsDirs);
1044        FSEditLog newSharedEditLog = new FSEditLog(conf, newSharedStorage,
1045            sharedEditsUris);
1046        newSharedEditLog.initJournalsForWrite();
1047        newSharedEditLog.recoverUnclosedStreams();
1048        
1049        FSEditLog sourceEditLog = fsns.getFSImage().editLog;
1050        
1051        long fromTxId = fsns.getFSImage().getMostRecentCheckpointTxId();
1052        
1053        Collection<EditLogInputStream> streams = null;
1054        try {
1055          streams = sourceEditLog.selectInputStreams(fromTxId + 1, 0);
1056    
1057          // Set the nextTxid to the CheckpointTxId+1
1058          newSharedEditLog.setNextTxId(fromTxId + 1);
1059    
1060          // Copy all edits after last CheckpointTxId to shared edits dir
1061          for (EditLogInputStream stream : streams) {
1062            LOG.debug("Beginning to copy stream " + stream + " to shared edits");
1063            FSEditLogOp op;
1064            boolean segmentOpen = false;
1065            while ((op = stream.readOp()) != null) {
1066              if (LOG.isTraceEnabled()) {
1067                LOG.trace("copying op: " + op);
1068              }
1069              if (!segmentOpen) {
1070                newSharedEditLog.startLogSegment(op.txid, false);
1071                segmentOpen = true;
1072              }
1073    
1074              newSharedEditLog.logEdit(op);
1075    
1076              if (op.opCode == FSEditLogOpCodes.OP_END_LOG_SEGMENT) {
1077                newSharedEditLog.logSync();
1078                newSharedEditLog.endCurrentLogSegment(false);
1079                LOG.debug("ending log segment because of END_LOG_SEGMENT op in "
1080                    + stream);
1081                segmentOpen = false;
1082              }
1083            }
1084    
1085            if (segmentOpen) {
1086              LOG.debug("ending log segment because of end of stream in " + stream);
1087              newSharedEditLog.logSync();
1088              newSharedEditLog.endCurrentLogSegment(false);
1089              segmentOpen = false;
1090            }
1091          }
1092        } finally {
1093          if (streams != null) {
1094            FSEditLog.closeAllStreams(streams);
1095          }
1096        }
1097      }
1098      
1099      @VisibleForTesting
1100      public static boolean doRollback(Configuration conf,
1101          boolean isConfirmationNeeded) throws IOException {
1102        String nsId = DFSUtil.getNamenodeNameServiceId(conf);
1103        String namenodeId = HAUtil.getNameNodeId(conf, nsId);
1104        initializeGenericKeys(conf, nsId, namenodeId);
1105    
1106        FSNamesystem nsys = new FSNamesystem(conf, new FSImage(conf));
1107        System.err.print(
1108            "\"rollBack\" will remove the current state of the file system,\n"
1109            + "returning you to the state prior to initiating your recent.\n"
1110            + "upgrade. This action is permanent and cannot be undone. If you\n"
1111            + "are performing a rollback in an HA environment, you should be\n"
1112            + "certain that no NameNode process is running on any host.");
1113        if (isConfirmationNeeded) {
1114          if (!confirmPrompt("Roll back file system state?")) {
1115            System.err.println("Rollback aborted.");
1116            return true;
1117          }
1118        }
1119        nsys.dir.fsImage.doRollback(nsys);
1120        return false;
1121      }
1122    
1123      private static void printUsage(PrintStream out) {
1124        out.println(USAGE + "\n");
1125      }
1126    
1127      @VisibleForTesting
1128      static StartupOption parseArguments(String args[]) {
1129        int argsLen = (args == null) ? 0 : args.length;
1130        StartupOption startOpt = StartupOption.REGULAR;
1131        for(int i=0; i < argsLen; i++) {
1132          String cmd = args[i];
1133          if (StartupOption.FORMAT.getName().equalsIgnoreCase(cmd)) {
1134            startOpt = StartupOption.FORMAT;
1135            for (i = i + 1; i < argsLen; i++) {
1136              if (args[i].equalsIgnoreCase(StartupOption.CLUSTERID.getName())) {
1137                i++;
1138                if (i >= argsLen) {
1139                  // if no cluster id specified, return null
1140                  LOG.fatal("Must specify a valid cluster ID after the "
1141                      + StartupOption.CLUSTERID.getName() + " flag");
1142                  return null;
1143                }
1144                String clusterId = args[i];
1145                // Make sure an id is specified and not another flag
1146                if (clusterId.isEmpty() ||
1147                    clusterId.equalsIgnoreCase(StartupOption.FORCE.getName()) ||
1148                    clusterId.equalsIgnoreCase(
1149                        StartupOption.NONINTERACTIVE.getName())) {
1150                  LOG.fatal("Must specify a valid cluster ID after the "
1151                      + StartupOption.CLUSTERID.getName() + " flag");
1152                  return null;
1153                }
1154                startOpt.setClusterId(clusterId);
1155              }
1156    
1157              if (args[i].equalsIgnoreCase(StartupOption.FORCE.getName())) {
1158                startOpt.setForceFormat(true);
1159              }
1160    
1161              if (args[i].equalsIgnoreCase(StartupOption.NONINTERACTIVE.getName())) {
1162                startOpt.setInteractiveFormat(false);
1163              }
1164            }
1165          } else if (StartupOption.GENCLUSTERID.getName().equalsIgnoreCase(cmd)) {
1166            startOpt = StartupOption.GENCLUSTERID;
1167          } else if (StartupOption.REGULAR.getName().equalsIgnoreCase(cmd)) {
1168            startOpt = StartupOption.REGULAR;
1169          } else if (StartupOption.BACKUP.getName().equalsIgnoreCase(cmd)) {
1170            startOpt = StartupOption.BACKUP;
1171          } else if (StartupOption.CHECKPOINT.getName().equalsIgnoreCase(cmd)) {
1172            startOpt = StartupOption.CHECKPOINT;
1173          } else if (StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd)) {
1174            startOpt = StartupOption.UPGRADE;
1175            /* Can be followed by CLUSTERID with a required parameter or
1176             * RENAMERESERVED with an optional parameter
1177             */
1178            while (i + 1 < argsLen) {
1179              String flag = args[i + 1];
1180              if (flag.equalsIgnoreCase(StartupOption.CLUSTERID.getName())) {
1181                if (i + 2 < argsLen) {
1182                  i += 2;
1183                  startOpt.setClusterId(args[i]);
1184                } else {
1185                  LOG.fatal("Must specify a valid cluster ID after the "
1186                      + StartupOption.CLUSTERID.getName() + " flag");
1187                  return null;
1188                }
1189              } else if (flag.equalsIgnoreCase(StartupOption.RENAMERESERVED
1190                  .getName())) {
1191                if (i + 2 < argsLen) {
1192                  FSImageFormat.setRenameReservedPairs(args[i + 2]);
1193                  i += 2;
1194                } else {
1195                  FSImageFormat.useDefaultRenameReservedPairs();
1196                  i += 1;
1197                }
1198              } else {
1199                LOG.fatal("Unknown upgrade flag " + flag);
1200                return null;
1201              }
1202            }
1203          } else if (StartupOption.ROLLINGUPGRADE.getName().equalsIgnoreCase(cmd)) {
1204            startOpt = StartupOption.ROLLINGUPGRADE;
1205            ++i;
1206            startOpt.setRollingUpgradeStartupOption(args[i]);
1207          } else if (StartupOption.ROLLBACK.getName().equalsIgnoreCase(cmd)) {
1208            startOpt = StartupOption.ROLLBACK;
1209          } else if (StartupOption.FINALIZE.getName().equalsIgnoreCase(cmd)) {
1210            startOpt = StartupOption.FINALIZE;
1211          } else if (StartupOption.IMPORT.getName().equalsIgnoreCase(cmd)) {
1212            startOpt = StartupOption.IMPORT;
1213          } else if (StartupOption.BOOTSTRAPSTANDBY.getName().equalsIgnoreCase(cmd)) {
1214            startOpt = StartupOption.BOOTSTRAPSTANDBY;
1215            return startOpt;
1216          } else if (StartupOption.INITIALIZESHAREDEDITS.getName().equalsIgnoreCase(cmd)) {
1217            startOpt = StartupOption.INITIALIZESHAREDEDITS;
1218            for (i = i + 1 ; i < argsLen; i++) {
1219              if (StartupOption.NONINTERACTIVE.getName().equals(args[i])) {
1220                startOpt.setInteractiveFormat(false);
1221              } else if (StartupOption.FORCE.getName().equals(args[i])) {
1222                startOpt.setForceFormat(true);
1223              } else {
1224                LOG.fatal("Invalid argument: " + args[i]);
1225                return null;
1226              }
1227            }
1228            return startOpt;
1229          } else if (StartupOption.RECOVER.getName().equalsIgnoreCase(cmd)) {
1230            if (startOpt != StartupOption.REGULAR) {
1231              throw new RuntimeException("Can't combine -recover with " +
1232                  "other startup options.");
1233            }
1234            startOpt = StartupOption.RECOVER;
1235            while (++i < argsLen) {
1236              if (args[i].equalsIgnoreCase(
1237                    StartupOption.FORCE.getName())) {
1238                startOpt.setForce(MetaRecoveryContext.FORCE_FIRST_CHOICE);
1239              } else {
1240                throw new RuntimeException("Error parsing recovery options: " + 
1241                  "can't understand option \"" + args[i] + "\"");
1242              }
1243            }
1244          } else {
1245            return null;
1246          }
1247        }
1248        return startOpt;
1249      }
1250    
1251      private static void setStartupOption(Configuration conf, StartupOption opt) {
1252        conf.set(DFS_NAMENODE_STARTUP_KEY, opt.name());
1253      }
1254    
1255      static StartupOption getStartupOption(Configuration conf) {
1256        return StartupOption.valueOf(conf.get(DFS_NAMENODE_STARTUP_KEY,
1257                                              StartupOption.REGULAR.toString()));
1258      }
1259    
1260      private static void doRecovery(StartupOption startOpt, Configuration conf)
1261          throws IOException {
1262        String nsId = DFSUtil.getNamenodeNameServiceId(conf);
1263        String namenodeId = HAUtil.getNameNodeId(conf, nsId);
1264        initializeGenericKeys(conf, nsId, namenodeId);
1265        if (startOpt.getForce() < MetaRecoveryContext.FORCE_ALL) {
1266          if (!confirmPrompt("You have selected Metadata Recovery mode.  " +
1267              "This mode is intended to recover lost metadata on a corrupt " +
1268              "filesystem.  Metadata recovery mode often permanently deletes " +
1269              "data from your HDFS filesystem.  Please back up your edit log " +
1270              "and fsimage before trying this!\n\n" +
1271              "Are you ready to proceed? (Y/N)\n")) {
1272            System.err.println("Recovery aborted at user request.\n");
1273            return;
1274          }
1275        }
1276        MetaRecoveryContext.LOG.info("starting recovery...");
1277        UserGroupInformation.setConfiguration(conf);
1278        NameNode.initMetrics(conf, startOpt.toNodeRole());
1279        FSNamesystem fsn = null;
1280        try {
1281          fsn = FSNamesystem.loadFromDisk(conf);
1282          fsn.getFSImage().saveNamespace(fsn);
1283          MetaRecoveryContext.LOG.info("RECOVERY COMPLETE");
1284        } catch (IOException e) {
1285          MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e);
1286          throw e;
1287        } catch (RuntimeException e) {
1288          MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e);
1289          throw e;
1290        } finally {
1291          if (fsn != null)
1292            fsn.close();
1293        }
1294      }
1295    
1296      public static NameNode createNameNode(String argv[], Configuration conf)
1297          throws IOException {
1298        LOG.info("createNameNode " + Arrays.asList(argv));
1299        if (conf == null)
1300          conf = new HdfsConfiguration();
1301        StartupOption startOpt = parseArguments(argv);
1302        if (startOpt == null) {
1303          printUsage(System.err);
1304          return null;
1305        }
1306        setStartupOption(conf, startOpt);
1307    
1308        switch (startOpt) {
1309          case FORMAT: {
1310            boolean aborted = format(conf, startOpt.getForceFormat(),
1311                startOpt.getInteractiveFormat());
1312            terminate(aborted ? 1 : 0);
1313            return null; // avoid javac warning
1314          }
1315          case GENCLUSTERID: {
1316            System.err.println("Generating new cluster id:");
1317            System.out.println(NNStorage.newClusterID());
1318            terminate(0);
1319            return null;
1320          }
1321          case FINALIZE: {
1322            System.err.println("Use of the argument '" + StartupOption.FINALIZE +
1323                "' is no longer supported. To finalize an upgrade, start the NN " +
1324                " and then run `hdfs dfsadmin -finalizeUpgrade'");
1325            terminate(1);
1326            return null; // avoid javac warning
1327          }
1328          case ROLLBACK: {
1329            boolean aborted = doRollback(conf, true);
1330            terminate(aborted ? 1 : 0);
1331            return null; // avoid warning
1332          }
1333          case BOOTSTRAPSTANDBY: {
1334            String toolArgs[] = Arrays.copyOfRange(argv, 1, argv.length);
1335            int rc = BootstrapStandby.run(toolArgs, conf);
1336            terminate(rc);
1337            return null; // avoid warning
1338          }
1339          case INITIALIZESHAREDEDITS: {
1340            boolean aborted = initializeSharedEdits(conf,
1341                startOpt.getForceFormat(),
1342                startOpt.getInteractiveFormat());
1343            terminate(aborted ? 1 : 0);
1344            return null; // avoid warning
1345          }
1346          case BACKUP:
1347          case CHECKPOINT: {
1348            NamenodeRole role = startOpt.toNodeRole();
1349            DefaultMetricsSystem.initialize(role.toString().replace(" ", ""));
1350            return new BackupNode(conf, role);
1351          }
1352          case RECOVER: {
1353            NameNode.doRecovery(startOpt, conf);
1354            return null;
1355          }
1356          default: {
1357            DefaultMetricsSystem.initialize("NameNode");
1358            return new NameNode(conf);
1359          }
1360        }
1361      }
1362    
1363      /**
1364       * In federation configuration is set for a set of
1365       * namenode and secondary namenode/backup/checkpointer, which are
1366       * grouped under a logical nameservice ID. The configuration keys specific 
1367       * to them have suffix set to configured nameserviceId.
1368       * 
1369       * This method copies the value from specific key of format key.nameserviceId
1370       * to key, to set up the generic configuration. Once this is done, only
1371       * generic version of the configuration is read in rest of the code, for
1372       * backward compatibility and simpler code changes.
1373       * 
1374       * @param conf
1375       *          Configuration object to lookup specific key and to set the value
1376       *          to the key passed. Note the conf object is modified
1377       * @param nameserviceId name service Id (to distinguish federated NNs)
1378       * @param namenodeId the namenode ID (to distinguish HA NNs)
1379       * @see DFSUtil#setGenericConf(Configuration, String, String, String...)
1380       */
1381      public static void initializeGenericKeys(Configuration conf,
1382          String nameserviceId, String namenodeId) {
1383        if ((nameserviceId != null && !nameserviceId.isEmpty()) || 
1384            (namenodeId != null && !namenodeId.isEmpty())) {
1385          if (nameserviceId != null) {
1386            conf.set(DFS_NAMESERVICE_ID, nameserviceId);
1387          }
1388          if (namenodeId != null) {
1389            conf.set(DFS_HA_NAMENODE_ID_KEY, namenodeId);
1390          }
1391          
1392          DFSUtil.setGenericConf(conf, nameserviceId, namenodeId,
1393              NAMENODE_SPECIFIC_KEYS);
1394          DFSUtil.setGenericConf(conf, nameserviceId, null,
1395              NAMESERVICE_SPECIFIC_KEYS);
1396        }
1397        
1398        // If the RPC address is set use it to (re-)configure the default FS
1399        if (conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY) != null) {
1400          URI defaultUri = URI.create(HdfsConstants.HDFS_URI_SCHEME + "://"
1401              + conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY));
1402          conf.set(FS_DEFAULT_NAME_KEY, defaultUri.toString());
1403          LOG.debug("Setting " + FS_DEFAULT_NAME_KEY + " to " + defaultUri.toString());
1404        }
1405      }
1406        
1407      /** 
1408       * Get the name service Id for the node
1409       * @return name service Id or null if federation is not configured
1410       */
1411      protected String getNameServiceId(Configuration conf) {
1412        return DFSUtil.getNamenodeNameServiceId(conf);
1413      }
1414      
1415      /**
1416       */
1417      public static void main(String argv[]) throws Exception {
1418        if (DFSUtil.parseHelpArgument(argv, NameNode.USAGE, System.out, true)) {
1419          System.exit(0);
1420        }
1421    
1422        try {
1423          StringUtils.startupShutdownMessage(NameNode.class, argv, LOG);
1424          NameNode namenode = createNameNode(argv, null);
1425          if (namenode != null) {
1426            namenode.join();
1427          }
1428        } catch (Throwable e) {
1429          LOG.fatal("Exception in namenode join", e);
1430          terminate(1, e);
1431        }
1432      }
1433    
1434      synchronized void monitorHealth() 
1435          throws HealthCheckFailedException, AccessControlException {
1436        namesystem.checkSuperuserPrivilege();
1437        if (!haEnabled) {
1438          return; // no-op, if HA is not enabled
1439        }
1440        getNamesystem().checkAvailableResources();
1441        if (!getNamesystem().nameNodeHasResourcesAvailable()) {
1442          throw new HealthCheckFailedException(
1443              "The NameNode has no resources available");
1444        }
1445      }
1446      
1447      synchronized void transitionToActive() 
1448          throws ServiceFailedException, AccessControlException {
1449        namesystem.checkSuperuserPrivilege();
1450        if (!haEnabled) {
1451          throw new ServiceFailedException("HA for namenode is not enabled");
1452        }
1453        state.setState(haContext, ACTIVE_STATE);
1454      }
1455      
1456      synchronized void transitionToStandby() 
1457          throws ServiceFailedException, AccessControlException {
1458        namesystem.checkSuperuserPrivilege();
1459        if (!haEnabled) {
1460          throw new ServiceFailedException("HA for namenode is not enabled");
1461        }
1462        state.setState(haContext, STANDBY_STATE);
1463      }
1464    
1465      synchronized HAServiceStatus getServiceStatus()
1466          throws ServiceFailedException, AccessControlException {
1467        namesystem.checkSuperuserPrivilege();
1468        if (!haEnabled) {
1469          throw new ServiceFailedException("HA for namenode is not enabled");
1470        }
1471        if (state == null) {
1472          return new HAServiceStatus(HAServiceState.INITIALIZING);
1473        }
1474        HAServiceState retState = state.getServiceState();
1475        HAServiceStatus ret = new HAServiceStatus(retState);
1476        if (retState == HAServiceState.STANDBY) {
1477          String safemodeTip = namesystem.getSafeModeTip();
1478          if (!safemodeTip.isEmpty()) {
1479            ret.setNotReadyToBecomeActive(
1480                "The NameNode is in safemode. " +
1481                safemodeTip);
1482          } else {
1483            ret.setReadyToBecomeActive();
1484          }
1485        } else if (retState == HAServiceState.ACTIVE) {
1486          ret.setReadyToBecomeActive();
1487        } else {
1488          ret.setNotReadyToBecomeActive("State is " + state);
1489        }
1490        return ret;
1491      }
1492    
1493      synchronized HAServiceState getServiceState() {
1494        if (state == null) {
1495          return HAServiceState.INITIALIZING;
1496        }
1497        return state.getServiceState();
1498      }
1499    
1500      /**
1501       * Register NameNodeStatusMXBean
1502       */
1503      private void registerNNSMXBean() {
1504        nameNodeStatusBeanName = MBeans.register("NameNode", "NameNodeStatus", this);
1505      }
1506    
1507      @Override // NameNodeStatusMXBean
1508      public String getNNRole() {
1509        String roleStr = "";
1510        NamenodeRole role = getRole();
1511        if (null != role) {
1512          roleStr = role.toString();
1513        }
1514        return roleStr;
1515      }
1516    
1517      @Override // NameNodeStatusMXBean
1518      public String getState() {
1519        String servStateStr = "";
1520        HAServiceState servState = getServiceState();
1521        if (null != servState) {
1522          servStateStr = servState.toString();
1523        }
1524        return servStateStr;
1525      }
1526    
1527      @Override // NameNodeStatusMXBean
1528      public String getHostAndPort() {
1529        return getNameNodeAddressHostPortString();
1530      }
1531    
1532      @Override // NameNodeStatusMXBean
1533      public boolean isSecurityEnabled() {
1534        return UserGroupInformation.isSecurityEnabled();
1535      }
1536    
1537      /**
1538       * Shutdown the NN immediately in an ungraceful way. Used when it would be
1539       * unsafe for the NN to continue operating, e.g. during a failed HA state
1540       * transition.
1541       * 
1542       * @param t exception which warrants the shutdown. Printed to the NN log
1543       *          before exit.
1544       * @throws ExitException thrown only for testing.
1545       */
1546      protected synchronized void doImmediateShutdown(Throwable t)
1547          throws ExitException {
1548        String message = "Error encountered requiring NN shutdown. " +
1549            "Shutting down immediately.";
1550        try {
1551          LOG.fatal(message, t);
1552        } catch (Throwable ignored) {
1553          // This is unlikely to happen, but there's nothing we can do if it does.
1554        }
1555        terminate(1, t);
1556      }
1557      
1558      /**
1559       * Class used to expose {@link NameNode} as context to {@link HAState}
1560       */
1561      protected class NameNodeHAContext implements HAContext {
1562        @Override
1563        public void setState(HAState s) {
1564          state = s;
1565        }
1566    
1567        @Override
1568        public HAState getState() {
1569          return state;
1570        }
1571    
1572        @Override
1573        public void startActiveServices() throws IOException {
1574          try {
1575            namesystem.startActiveServices();
1576            startTrashEmptier(conf);
1577          } catch (Throwable t) {
1578            doImmediateShutdown(t);
1579          }
1580        }
1581    
1582        @Override
1583        public void stopActiveServices() throws IOException {
1584          try {
1585            if (namesystem != null) {
1586              namesystem.stopActiveServices();
1587            }
1588            stopTrashEmptier();
1589          } catch (Throwable t) {
1590            doImmediateShutdown(t);
1591          }
1592        }
1593    
1594        @Override
1595        public void startStandbyServices() throws IOException {
1596          try {
1597            namesystem.startStandbyServices(conf);
1598          } catch (Throwable t) {
1599            doImmediateShutdown(t);
1600          }
1601        }
1602    
1603        @Override
1604        public void prepareToStopStandbyServices() throws ServiceFailedException {
1605          try {
1606            namesystem.prepareToStopStandbyServices();
1607          } catch (Throwable t) {
1608            doImmediateShutdown(t);
1609          }
1610        }
1611        
1612        @Override
1613        public void stopStandbyServices() throws IOException {
1614          try {
1615            if (namesystem != null) {
1616              namesystem.stopStandbyServices();
1617            }
1618          } catch (Throwable t) {
1619            doImmediateShutdown(t);
1620          }
1621        }
1622        
1623        @Override
1624        public void writeLock() {
1625          namesystem.writeLock();
1626          namesystem.lockRetryCache();
1627        }
1628        
1629        @Override
1630        public void writeUnlock() {
1631          namesystem.unlockRetryCache();
1632          namesystem.writeUnlock();
1633        }
1634        
1635        /** Check if an operation of given category is allowed */
1636        @Override
1637        public void checkOperation(final OperationCategory op)
1638            throws StandbyException {
1639          state.checkOperation(haContext, op);
1640        }
1641        
1642        @Override
1643        public boolean allowStaleReads() {
1644          return allowStaleStandbyReads;
1645        }
1646    
1647      }
1648      
1649      public boolean isStandbyState() {
1650        return (state.equals(STANDBY_STATE));
1651      }
1652    
1653      /**
1654       * Check that a request to change this node's HA state is valid.
1655       * In particular, verifies that, if auto failover is enabled, non-forced
1656       * requests from the HAAdmin CLI are rejected, and vice versa.
1657       *
1658       * @param req the request to check
1659       * @throws AccessControlException if the request is disallowed
1660       */
1661      void checkHaStateChange(StateChangeRequestInfo req)
1662          throws AccessControlException {
1663        boolean autoHaEnabled = conf.getBoolean(DFS_HA_AUTO_FAILOVER_ENABLED_KEY,
1664            DFS_HA_AUTO_FAILOVER_ENABLED_DEFAULT);
1665        switch (req.getSource()) {
1666        case REQUEST_BY_USER:
1667          if (autoHaEnabled) {
1668            throw new AccessControlException(
1669                "Manual HA control for this NameNode is disallowed, because " +
1670                "automatic HA is enabled.");
1671          }
1672          break;
1673        case REQUEST_BY_USER_FORCED:
1674          if (autoHaEnabled) {
1675            LOG.warn("Allowing manual HA control from " +
1676                Server.getRemoteAddress() +
1677                " even though automatic HA is enabled, because the user " +
1678                "specified the force flag");
1679          }
1680          break;
1681        case REQUEST_BY_ZKFC:
1682          if (!autoHaEnabled) {
1683            throw new AccessControlException(
1684                "Request from ZK failover controller at " +
1685                Server.getRemoteAddress() + " denied since automatic HA " +
1686                "is not enabled"); 
1687          }
1688          break;
1689        }
1690      }
1691    }