001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.hdfs.server.namenode;
019
020 import com.google.common.annotations.VisibleForTesting;
021 import com.google.common.base.Joiner;
022 import com.google.common.base.Preconditions;
023 import com.google.common.collect.Lists;
024
025 import org.apache.commons.logging.Log;
026 import org.apache.commons.logging.LogFactory;
027 import org.apache.hadoop.HadoopIllegalArgumentException;
028 import org.apache.hadoop.classification.InterfaceAudience;
029 import org.apache.hadoop.conf.Configuration;
030 import org.apache.hadoop.fs.FileSystem;
031 import org.apache.hadoop.fs.Trash;
032 import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
033 import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
034 import org.apache.hadoop.ha.HAServiceStatus;
035 import org.apache.hadoop.ha.HealthCheckFailedException;
036 import org.apache.hadoop.ha.ServiceFailedException;
037 import org.apache.hadoop.hdfs.DFSConfigKeys;
038 import org.apache.hadoop.hdfs.DFSUtil;
039 import org.apache.hadoop.hdfs.HAUtil;
040 import org.apache.hadoop.hdfs.HdfsConfiguration;
041 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
042 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
043 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
044 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption;
045 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
046 import org.apache.hadoop.hdfs.server.namenode.ha.*;
047 import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
048 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
049 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgressMetrics;
050 import org.apache.hadoop.hdfs.server.protocol.*;
051 import org.apache.hadoop.ipc.Server;
052 import org.apache.hadoop.ipc.StandbyException;
053 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
054 import org.apache.hadoop.metrics2.util.MBeans;
055 import org.apache.hadoop.net.NetUtils;
056 import org.apache.hadoop.security.AccessControlException;
057 import org.apache.hadoop.security.RefreshUserMappingsProtocol;
058 import org.apache.hadoop.security.SecurityUtil;
059 import org.apache.hadoop.security.UserGroupInformation;
060 import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol;
061 import org.apache.hadoop.ipc.RefreshCallQueueProtocol;
062 import org.apache.hadoop.tools.GetUserMappingsProtocol;
063 import org.apache.hadoop.tracing.SpanReceiverHost;
064 import org.apache.hadoop.tracing.TraceAdminProtocol;
065 import org.apache.hadoop.util.ExitUtil.ExitException;
066 import org.apache.hadoop.util.JvmPauseMonitor;
067 import org.apache.hadoop.util.ServicePlugin;
068 import org.apache.hadoop.util.StringUtils;
069
070 import javax.management.ObjectName;
071
072 import java.io.IOException;
073 import java.io.PrintStream;
074 import java.net.InetSocketAddress;
075 import java.net.URI;
076 import java.security.PrivilegedExceptionAction;
077 import java.util.ArrayList;
078 import java.util.Arrays;
079 import java.util.Collection;
080 import java.util.List;
081
082 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY;
083 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT;
084 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY;
085 import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
086 import static org.apache.hadoop.util.ExitUtil.terminate;
087 import static org.apache.hadoop.util.ToolRunner.confirmPrompt;
088
089 /**********************************************************
090 * NameNode serves as both directory namespace manager and
091 * "inode table" for the Hadoop DFS. There is a single NameNode
092 * running in any DFS deployment. (Well, except when there
093 * is a second backup/failover NameNode, or when using federated NameNodes.)
094 *
095 * The NameNode controls two critical tables:
096 * 1) filename->blocksequence (namespace)
097 * 2) block->machinelist ("inodes")
098 *
099 * The first table is stored on disk and is very precious.
100 * The second table is rebuilt every time the NameNode comes up.
101 *
102 * 'NameNode' refers to both this class as well as the 'NameNode server'.
103 * The 'FSNamesystem' class actually performs most of the filesystem
104 * management. The majority of the 'NameNode' class itself is concerned
105 * with exposing the IPC interface and the HTTP server to the outside world,
106 * plus some configuration management.
107 *
108 * NameNode implements the
109 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} interface, which
110 * allows clients to ask for DFS services.
111 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} is not designed for
112 * direct use by authors of DFS client code. End-users should instead use the
113 * {@link org.apache.hadoop.fs.FileSystem} class.
114 *
115 * NameNode also implements the
116 * {@link org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol} interface,
117 * used by DataNodes that actually store DFS data blocks. These
118 * methods are invoked repeatedly and automatically by all the
119 * DataNodes in a DFS deployment.
120 *
121 * NameNode also implements the
122 * {@link org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol} interface,
123 * used by secondary namenodes or rebalancing processes to get partial
124 * NameNode state, for example partial blocksMap etc.
125 **********************************************************/
126 @InterfaceAudience.Private
127 public class NameNode implements NameNodeStatusMXBean {
128 static{
129 HdfsConfiguration.init();
130 }
131
132 /**
133 * Categories of operations supported by the namenode.
134 */
135 public static enum OperationCategory {
136 /** Operations that are state agnostic */
137 UNCHECKED,
138 /** Read operation that does not change the namespace state */
139 READ,
140 /** Write operation that changes the namespace state */
141 WRITE,
142 /** Operations related to checkpointing */
143 CHECKPOINT,
144 /** Operations related to {@link JournalProtocol} */
145 JOURNAL
146 }
147
148 /**
149 * HDFS configuration can have three types of parameters:
150 * <ol>
151 * <li>Parameters that are common for all the name services in the cluster.</li>
152 * <li>Parameters that are specific to a name service. These keys are suffixed
153 * with nameserviceId in the configuration. For example,
154 * "dfs.namenode.rpc-address.nameservice1".</li>
155 * <li>Parameters that are specific to a single name node. These keys are suffixed
156 * with nameserviceId and namenodeId in the configuration. for example,
157 * "dfs.namenode.rpc-address.nameservice1.namenode1"</li>
158 * </ol>
159 *
160 * In the latter cases, operators may specify the configuration without
161 * any suffix, with a nameservice suffix, or with a nameservice and namenode
162 * suffix. The more specific suffix will take precedence.
163 *
164 * These keys are specific to a given namenode, and thus may be configured
165 * globally, for a nameservice, or for a specific namenode within a nameservice.
166 */
167 public static final String[] NAMENODE_SPECIFIC_KEYS = {
168 DFS_NAMENODE_RPC_ADDRESS_KEY,
169 DFS_NAMENODE_RPC_BIND_HOST_KEY,
170 DFS_NAMENODE_NAME_DIR_KEY,
171 DFS_NAMENODE_EDITS_DIR_KEY,
172 DFS_NAMENODE_SHARED_EDITS_DIR_KEY,
173 DFS_NAMENODE_CHECKPOINT_DIR_KEY,
174 DFS_NAMENODE_CHECKPOINT_EDITS_DIR_KEY,
175 DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
176 DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY,
177 DFS_NAMENODE_HTTP_ADDRESS_KEY,
178 DFS_NAMENODE_HTTPS_ADDRESS_KEY,
179 DFS_NAMENODE_HTTP_BIND_HOST_KEY,
180 DFS_NAMENODE_HTTPS_BIND_HOST_KEY,
181 DFS_NAMENODE_KEYTAB_FILE_KEY,
182 DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY,
183 DFS_NAMENODE_SECONDARY_HTTPS_ADDRESS_KEY,
184 DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY,
185 DFS_NAMENODE_BACKUP_ADDRESS_KEY,
186 DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY,
187 DFS_NAMENODE_BACKUP_SERVICE_RPC_ADDRESS_KEY,
188 DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY,
189 DFS_NAMENODE_KERBEROS_INTERNAL_SPNEGO_PRINCIPAL_KEY,
190 DFS_HA_FENCE_METHODS_KEY,
191 DFS_HA_ZKFC_PORT_KEY,
192 DFS_HA_FENCE_METHODS_KEY
193 };
194
195 /**
196 * @see #NAMENODE_SPECIFIC_KEYS
197 * These keys are specific to a nameservice, but may not be overridden
198 * for a specific namenode.
199 */
200 public static final String[] NAMESERVICE_SPECIFIC_KEYS = {
201 DFS_HA_AUTO_FAILOVER_ENABLED_KEY
202 };
203
204 private static final String USAGE = "Usage: java NameNode ["
205 + StartupOption.BACKUP.getName() + "] | \n\t["
206 + StartupOption.CHECKPOINT.getName() + "] | \n\t["
207 + StartupOption.FORMAT.getName() + " ["
208 + StartupOption.CLUSTERID.getName() + " cid ] ["
209 + StartupOption.FORCE.getName() + "] ["
210 + StartupOption.NONINTERACTIVE.getName() + "] ] | \n\t["
211 + StartupOption.UPGRADE.getName() +
212 " [" + StartupOption.CLUSTERID.getName() + " cid]" +
213 " [" + StartupOption.RENAMERESERVED.getName() + "<k-v pairs>] ] | \n\t["
214 + StartupOption.UPGRADEONLY.getName() +
215 " [" + StartupOption.CLUSTERID.getName() + " cid]" +
216 " [" + StartupOption.RENAMERESERVED.getName() + "<k-v pairs>] ] | \n\t["
217 + StartupOption.ROLLBACK.getName() + "] | \n\t["
218 + StartupOption.ROLLINGUPGRADE.getName() + " "
219 + RollingUpgradeStartupOption.getAllOptionString() + " ] | \n\t["
220 + StartupOption.FINALIZE.getName() + "] | \n\t["
221 + StartupOption.IMPORT.getName() + "] | \n\t["
222 + StartupOption.INITIALIZESHAREDEDITS.getName() + "] | \n\t["
223 + StartupOption.BOOTSTRAPSTANDBY.getName() + "] | \n\t["
224 + StartupOption.RECOVER.getName() + " [ "
225 + StartupOption.FORCE.getName() + "] ] | \n\t["
226 + StartupOption.METADATAVERSION.getName() + " ] "
227 + " ]";
228
229
230 public long getProtocolVersion(String protocol,
231 long clientVersion) throws IOException {
232 if (protocol.equals(ClientProtocol.class.getName())) {
233 return ClientProtocol.versionID;
234 } else if (protocol.equals(DatanodeProtocol.class.getName())){
235 return DatanodeProtocol.versionID;
236 } else if (protocol.equals(NamenodeProtocol.class.getName())){
237 return NamenodeProtocol.versionID;
238 } else if (protocol.equals(RefreshAuthorizationPolicyProtocol.class.getName())){
239 return RefreshAuthorizationPolicyProtocol.versionID;
240 } else if (protocol.equals(RefreshUserMappingsProtocol.class.getName())){
241 return RefreshUserMappingsProtocol.versionID;
242 } else if (protocol.equals(RefreshCallQueueProtocol.class.getName())) {
243 return RefreshCallQueueProtocol.versionID;
244 } else if (protocol.equals(GetUserMappingsProtocol.class.getName())){
245 return GetUserMappingsProtocol.versionID;
246 } else if (protocol.equals(TraceAdminProtocol.class.getName())){
247 return TraceAdminProtocol.versionID;
248 } else {
249 throw new IOException("Unknown protocol to name node: " + protocol);
250 }
251 }
252
253 public static final int DEFAULT_PORT = 8020;
254 public static final Log LOG = LogFactory.getLog(NameNode.class.getName());
255 public static final Log stateChangeLog = LogFactory.getLog("org.apache.hadoop.hdfs.StateChange");
256 public static final Log blockStateChangeLog = LogFactory.getLog("BlockStateChange");
257 public static final HAState ACTIVE_STATE = new ActiveState();
258 public static final HAState STANDBY_STATE = new StandbyState();
259
260 protected FSNamesystem namesystem;
261 protected final Configuration conf;
262 protected final NamenodeRole role;
263 private volatile HAState state;
264 private final boolean haEnabled;
265 private final HAContext haContext;
266 protected final boolean allowStaleStandbyReads;
267
268
269 /** httpServer */
270 protected NameNodeHttpServer httpServer;
271 private Thread emptier;
272 /** only used for testing purposes */
273 protected boolean stopRequested = false;
274 /** Registration information of this name-node */
275 protected NamenodeRegistration nodeRegistration;
276 /** Activated plug-ins. */
277 private List<ServicePlugin> plugins;
278
279 private NameNodeRpcServer rpcServer;
280
281 private JvmPauseMonitor pauseMonitor;
282 private ObjectName nameNodeStatusBeanName;
283 SpanReceiverHost spanReceiverHost;
284 /**
285 * The namenode address that clients will use to access this namenode
286 * or the name service. For HA configurations using logical URI, it
287 * will be the logical address.
288 */
289 private String clientNamenodeAddress;
290
291 /** Format a new filesystem. Destroys any filesystem that may already
292 * exist at this location. **/
293 public static void format(Configuration conf) throws IOException {
294 format(conf, true, true);
295 }
296
297 static NameNodeMetrics metrics;
298 private static final StartupProgress startupProgress = new StartupProgress();
299 /** Return the {@link FSNamesystem} object.
300 * @return {@link FSNamesystem} object.
301 */
302 public FSNamesystem getNamesystem() {
303 return namesystem;
304 }
305
306 public NamenodeProtocols getRpcServer() {
307 return rpcServer;
308 }
309
310 static void initMetrics(Configuration conf, NamenodeRole role) {
311 metrics = NameNodeMetrics.create(conf, role);
312 }
313
314 public static NameNodeMetrics getNameNodeMetrics() {
315 return metrics;
316 }
317
318 /**
319 * Returns object used for reporting namenode startup progress.
320 *
321 * @return StartupProgress for reporting namenode startup progress
322 */
323 public static StartupProgress getStartupProgress() {
324 return startupProgress;
325 }
326
327 /**
328 * Return the service name of the issued delegation token.
329 *
330 * @return The name service id in HA-mode, or the rpc address in non-HA mode
331 */
332 public String getTokenServiceName() {
333 return getClientNamenodeAddress();
334 }
335
336 /**
337 * Set the namenode address that will be used by clients to access this
338 * namenode or name service. This needs to be called before the config
339 * is overriden.
340 */
341 public void setClientNamenodeAddress(Configuration conf) {
342 String nnAddr = conf.get(FS_DEFAULT_NAME_KEY);
343 if (nnAddr == null) {
344 // default fs is not set.
345 clientNamenodeAddress = null;
346 return;
347 }
348
349 LOG.info(FS_DEFAULT_NAME_KEY + " is " + nnAddr);
350 URI nnUri = URI.create(nnAddr);
351
352 String nnHost = nnUri.getHost();
353 if (nnHost == null) {
354 clientNamenodeAddress = null;
355 return;
356 }
357
358 if (DFSUtil.getNameServiceIds(conf).contains(nnHost)) {
359 // host name is logical
360 clientNamenodeAddress = nnHost;
361 } else if (nnUri.getPort() > 0) {
362 // physical address with a valid port
363 clientNamenodeAddress = nnUri.getAuthority();
364 } else {
365 // the port is missing or 0. Figure out real bind address later.
366 clientNamenodeAddress = null;
367 return;
368 }
369 LOG.info("Clients are to use " + clientNamenodeAddress + " to access"
370 + " this namenode/service.");
371 }
372
373 /**
374 * Get the namenode address to be used by clients.
375 * @return nn address
376 */
377 public String getClientNamenodeAddress() {
378 return clientNamenodeAddress;
379 }
380
381 public static InetSocketAddress getAddress(String address) {
382 return NetUtils.createSocketAddr(address, DEFAULT_PORT);
383 }
384
385 /**
386 * Set the configuration property for the service rpc address
387 * to address
388 */
389 public static void setServiceAddress(Configuration conf,
390 String address) {
391 LOG.info("Setting ADDRESS " + address);
392 conf.set(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, address);
393 }
394
395 /**
396 * Fetches the address for services to use when connecting to namenode
397 * based on the value of fallback returns null if the special
398 * address is not specified or returns the default namenode address
399 * to be used by both clients and services.
400 * Services here are datanodes, backup node, any non client connection
401 */
402 public static InetSocketAddress getServiceAddress(Configuration conf,
403 boolean fallback) {
404 String addr = conf.get(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY);
405 if (addr == null || addr.isEmpty()) {
406 return fallback ? getAddress(conf) : null;
407 }
408 return getAddress(addr);
409 }
410
411 public static InetSocketAddress getAddress(Configuration conf) {
412 URI filesystemURI = FileSystem.getDefaultUri(conf);
413 return getAddress(filesystemURI);
414 }
415
416
417 /**
418 * @return address of file system
419 */
420 public static InetSocketAddress getAddress(URI filesystemURI) {
421 String authority = filesystemURI.getAuthority();
422 if (authority == null) {
423 throw new IllegalArgumentException(String.format(
424 "Invalid URI for NameNode address (check %s): %s has no authority.",
425 FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString()));
426 }
427 if (!HdfsConstants.HDFS_URI_SCHEME.equalsIgnoreCase(
428 filesystemURI.getScheme())) {
429 throw new IllegalArgumentException(String.format(
430 "Invalid URI for NameNode address (check %s): %s is not of scheme '%s'.",
431 FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString(),
432 HdfsConstants.HDFS_URI_SCHEME));
433 }
434 return getAddress(authority);
435 }
436
437 public static URI getUri(InetSocketAddress namenode) {
438 int port = namenode.getPort();
439 String portString = port == DEFAULT_PORT ? "" : (":"+port);
440 return URI.create(HdfsConstants.HDFS_URI_SCHEME + "://"
441 + namenode.getHostName()+portString);
442 }
443
444 //
445 // Common NameNode methods implementation for the active name-node role.
446 //
447 public NamenodeRole getRole() {
448 return role;
449 }
450
451 boolean isRole(NamenodeRole that) {
452 return role.equals(that);
453 }
454
455 /**
456 * Given a configuration get the address of the service rpc server
457 * If the service rpc is not configured returns null
458 */
459 protected InetSocketAddress getServiceRpcServerAddress(Configuration conf) {
460 return NameNode.getServiceAddress(conf, false);
461 }
462
463 protected InetSocketAddress getRpcServerAddress(Configuration conf) {
464 return getAddress(conf);
465 }
466
467 /** Given a configuration get the bind host of the service rpc server
468 * If the bind host is not configured returns null.
469 */
470 protected String getServiceRpcServerBindHost(Configuration conf) {
471 String addr = conf.getTrimmed(DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY);
472 if (addr == null || addr.isEmpty()) {
473 return null;
474 }
475 return addr;
476 }
477
478 /** Given a configuration get the bind host of the client rpc server
479 * If the bind host is not configured returns null.
480 */
481 protected String getRpcServerBindHost(Configuration conf) {
482 String addr = conf.getTrimmed(DFS_NAMENODE_RPC_BIND_HOST_KEY);
483 if (addr == null || addr.isEmpty()) {
484 return null;
485 }
486 return addr;
487 }
488
489 /**
490 * Modifies the configuration passed to contain the service rpc address setting
491 */
492 protected void setRpcServiceServerAddress(Configuration conf,
493 InetSocketAddress serviceRPCAddress) {
494 setServiceAddress(conf, NetUtils.getHostPortString(serviceRPCAddress));
495 }
496
497 protected void setRpcServerAddress(Configuration conf,
498 InetSocketAddress rpcAddress) {
499 FileSystem.setDefaultUri(conf, getUri(rpcAddress));
500 }
501
502 protected InetSocketAddress getHttpServerAddress(Configuration conf) {
503 return getHttpAddress(conf);
504 }
505
506 /**
507 * HTTP server address for binding the endpoint. This method is
508 * for use by the NameNode and its derivatives. It may return
509 * a different address than the one that should be used by clients to
510 * connect to the NameNode. See
511 * {@link DFSConfigKeys#DFS_NAMENODE_HTTP_BIND_HOST_KEY}
512 *
513 * @param conf
514 * @return
515 */
516 protected InetSocketAddress getHttpServerBindAddress(Configuration conf) {
517 InetSocketAddress bindAddress = getHttpServerAddress(conf);
518
519 // If DFS_NAMENODE_HTTP_BIND_HOST_KEY exists then it overrides the
520 // host name portion of DFS_NAMENODE_HTTP_ADDRESS_KEY.
521 final String bindHost = conf.getTrimmed(DFS_NAMENODE_HTTP_BIND_HOST_KEY);
522 if (bindHost != null && !bindHost.isEmpty()) {
523 bindAddress = new InetSocketAddress(bindHost, bindAddress.getPort());
524 }
525
526 return bindAddress;
527 }
528
529 /** @return the NameNode HTTP address. */
530 public static InetSocketAddress getHttpAddress(Configuration conf) {
531 return NetUtils.createSocketAddr(
532 conf.get(DFS_NAMENODE_HTTP_ADDRESS_KEY, DFS_NAMENODE_HTTP_ADDRESS_DEFAULT));
533 }
534
535 protected void loadNamesystem(Configuration conf) throws IOException {
536 this.namesystem = FSNamesystem.loadFromDisk(conf);
537 }
538
539 NamenodeRegistration getRegistration() {
540 return nodeRegistration;
541 }
542
543 NamenodeRegistration setRegistration() {
544 nodeRegistration = new NamenodeRegistration(
545 NetUtils.getHostPortString(rpcServer.getRpcAddress()),
546 NetUtils.getHostPortString(getHttpAddress()),
547 getFSImage().getStorage(), getRole());
548 return nodeRegistration;
549 }
550
551 /* optimize ugi lookup for RPC operations to avoid a trip through
552 * UGI.getCurrentUser which is synch'ed
553 */
554 public static UserGroupInformation getRemoteUser() throws IOException {
555 UserGroupInformation ugi = Server.getRemoteUser();
556 return (ugi != null) ? ugi : UserGroupInformation.getCurrentUser();
557 }
558
559
560 /**
561 * Login as the configured user for the NameNode.
562 */
563 void loginAsNameNodeUser(Configuration conf) throws IOException {
564 InetSocketAddress socAddr = getRpcServerAddress(conf);
565 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
566 DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, socAddr.getHostName());
567 }
568
569 /**
570 * Initialize name-node.
571 *
572 * @param conf the configuration
573 */
574 protected void initialize(Configuration conf) throws IOException {
575 if (conf.get(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS) == null) {
576 String intervals = conf.get(DFS_METRICS_PERCENTILES_INTERVALS_KEY);
577 if (intervals != null) {
578 conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS,
579 intervals);
580 }
581 }
582
583 UserGroupInformation.setConfiguration(conf);
584 loginAsNameNodeUser(conf);
585
586 NameNode.initMetrics(conf, this.getRole());
587 StartupProgressMetrics.register(startupProgress);
588
589 if (NamenodeRole.NAMENODE == role) {
590 startHttpServer(conf);
591 }
592
593 this.spanReceiverHost = SpanReceiverHost.getInstance(conf);
594
595 loadNamesystem(conf);
596
597 rpcServer = createRpcServer(conf);
598 if (clientNamenodeAddress == null) {
599 // This is expected for MiniDFSCluster. Set it now using
600 // the RPC server's bind address.
601 clientNamenodeAddress =
602 NetUtils.getHostPortString(rpcServer.getRpcAddress());
603 LOG.info("Clients are to use " + clientNamenodeAddress + " to access"
604 + " this namenode/service.");
605 }
606 if (NamenodeRole.NAMENODE == role) {
607 httpServer.setNameNodeAddress(getNameNodeAddress());
608 httpServer.setFSImage(getFSImage());
609 }
610
611 pauseMonitor = new JvmPauseMonitor(conf);
612 pauseMonitor.start();
613 metrics.getJvmMetrics().setPauseMonitor(pauseMonitor);
614
615 startCommonServices(conf);
616 }
617
618 /**
619 * Create the RPC server implementation. Used as an extension point for the
620 * BackupNode.
621 */
622 protected NameNodeRpcServer createRpcServer(Configuration conf)
623 throws IOException {
624 return new NameNodeRpcServer(conf, this);
625 }
626
627 /** Start the services common to active and standby states */
628 private void startCommonServices(Configuration conf) throws IOException {
629 namesystem.startCommonServices(conf, haContext);
630 registerNNSMXBean();
631 if (NamenodeRole.NAMENODE != role) {
632 startHttpServer(conf);
633 httpServer.setNameNodeAddress(getNameNodeAddress());
634 httpServer.setFSImage(getFSImage());
635 }
636 rpcServer.start();
637 plugins = conf.getInstances(DFS_NAMENODE_PLUGINS_KEY,
638 ServicePlugin.class);
639 for (ServicePlugin p: plugins) {
640 try {
641 p.start(this);
642 } catch (Throwable t) {
643 LOG.warn("ServicePlugin " + p + " could not be started", t);
644 }
645 }
646 LOG.info(getRole() + " RPC up at: " + rpcServer.getRpcAddress());
647 if (rpcServer.getServiceRpcAddress() != null) {
648 LOG.info(getRole() + " service RPC up at: "
649 + rpcServer.getServiceRpcAddress());
650 }
651 }
652
653 private void stopCommonServices() {
654 if(rpcServer != null) rpcServer.stop();
655 if(namesystem != null) namesystem.close();
656 if (pauseMonitor != null) pauseMonitor.stop();
657 if (plugins != null) {
658 for (ServicePlugin p : plugins) {
659 try {
660 p.stop();
661 } catch (Throwable t) {
662 LOG.warn("ServicePlugin " + p + " could not be stopped", t);
663 }
664 }
665 }
666 stopHttpServer();
667 }
668
669 private void startTrashEmptier(final Configuration conf) throws IOException {
670 long trashInterval =
671 conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT);
672 if (trashInterval == 0) {
673 return;
674 } else if (trashInterval < 0) {
675 throw new IOException("Cannot start trash emptier with negative interval."
676 + " Set " + FS_TRASH_INTERVAL_KEY + " to a positive value.");
677 }
678
679 // This may be called from the transitionToActive code path, in which
680 // case the current user is the administrator, not the NN. The trash
681 // emptier needs to run as the NN. See HDFS-3972.
682 FileSystem fs = SecurityUtil.doAsLoginUser(
683 new PrivilegedExceptionAction<FileSystem>() {
684 @Override
685 public FileSystem run() throws IOException {
686 return FileSystem.get(conf);
687 }
688 });
689 this.emptier = new Thread(new Trash(fs, conf).getEmptier(), "Trash Emptier");
690 this.emptier.setDaemon(true);
691 this.emptier.start();
692 }
693
694 private void stopTrashEmptier() {
695 if (this.emptier != null) {
696 emptier.interrupt();
697 emptier = null;
698 }
699 }
700
701 private void startHttpServer(final Configuration conf) throws IOException {
702 httpServer = new NameNodeHttpServer(conf, this, getHttpServerBindAddress(conf));
703 httpServer.start();
704 httpServer.setStartupProgress(startupProgress);
705 }
706
707 private void stopHttpServer() {
708 try {
709 if (httpServer != null) httpServer.stop();
710 } catch (Exception e) {
711 LOG.error("Exception while stopping httpserver", e);
712 }
713 }
714
715 /**
716 * Start NameNode.
717 * <p>
718 * The name-node can be started with one of the following startup options:
719 * <ul>
720 * <li>{@link StartupOption#REGULAR REGULAR} - normal name node startup</li>
721 * <li>{@link StartupOption#FORMAT FORMAT} - format name node</li>
722 * <li>{@link StartupOption#BACKUP BACKUP} - start backup node</li>
723 * <li>{@link StartupOption#CHECKPOINT CHECKPOINT} - start checkpoint node</li>
724 * <li>{@link StartupOption#UPGRADE UPGRADE} - start the cluster
725 * <li>{@link StartupOption#UPGRADEONLY UPGRADEONLY} - upgrade the cluster
726 * upgrade and create a snapshot of the current file system state</li>
727 * <li>{@link StartupOption#RECOVER RECOVERY} - recover name node
728 * metadata</li>
729 * <li>{@link StartupOption#ROLLBACK ROLLBACK} - roll the
730 * cluster back to the previous state</li>
731 * <li>{@link StartupOption#FINALIZE FINALIZE} - finalize
732 * previous upgrade</li>
733 * <li>{@link StartupOption#IMPORT IMPORT} - import checkpoint</li>
734 * </ul>
735 * The option is passed via configuration field:
736 * <tt>dfs.namenode.startup</tt>
737 *
738 * The conf will be modified to reflect the actual ports on which
739 * the NameNode is up and running if the user passes the port as
740 * <code>zero</code> in the conf.
741 *
742 * @param conf confirguration
743 * @throws IOException
744 */
745 public NameNode(Configuration conf) throws IOException {
746 this(conf, NamenodeRole.NAMENODE);
747 }
748
749 protected NameNode(Configuration conf, NamenodeRole role)
750 throws IOException {
751 this.conf = conf;
752 this.role = role;
753 setClientNamenodeAddress(conf);
754 String nsId = getNameServiceId(conf);
755 String namenodeId = HAUtil.getNameNodeId(conf, nsId);
756 this.haEnabled = HAUtil.isHAEnabled(conf, nsId);
757 state = createHAState(getStartupOption(conf));
758 this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf);
759 this.haContext = createHAContext();
760 try {
761 initializeGenericKeys(conf, nsId, namenodeId);
762 initialize(conf);
763 try {
764 haContext.writeLock();
765 state.prepareToEnterState(haContext);
766 state.enterState(haContext);
767 } finally {
768 haContext.writeUnlock();
769 }
770 } catch (IOException e) {
771 this.stop();
772 throw e;
773 } catch (HadoopIllegalArgumentException e) {
774 this.stop();
775 throw e;
776 }
777 }
778
779 protected HAState createHAState(StartupOption startOpt) {
780 if (!haEnabled || startOpt == StartupOption.UPGRADE
781 || startOpt == StartupOption.UPGRADEONLY) {
782 return ACTIVE_STATE;
783 } else {
784 return STANDBY_STATE;
785 }
786 }
787
788 protected HAContext createHAContext() {
789 return new NameNodeHAContext();
790 }
791
792 /**
793 * Wait for service to finish.
794 * (Normally, it runs forever.)
795 */
796 public void join() {
797 try {
798 rpcServer.join();
799 } catch (InterruptedException ie) {
800 LOG.info("Caught interrupted exception ", ie);
801 }
802 }
803
804 /**
805 * Stop all NameNode threads and wait for all to finish.
806 */
807 public void stop() {
808 synchronized(this) {
809 if (stopRequested)
810 return;
811 stopRequested = true;
812 }
813 try {
814 if (state != null) {
815 state.exitState(haContext);
816 }
817 } catch (ServiceFailedException e) {
818 LOG.warn("Encountered exception while exiting state ", e);
819 } finally {
820 stopCommonServices();
821 if (metrics != null) {
822 metrics.shutdown();
823 }
824 if (namesystem != null) {
825 namesystem.shutdown();
826 }
827 if (nameNodeStatusBeanName != null) {
828 MBeans.unregister(nameNodeStatusBeanName);
829 nameNodeStatusBeanName = null;
830 }
831 if (this.spanReceiverHost != null) {
832 this.spanReceiverHost.closeReceivers();
833 }
834 }
835 }
836
837 synchronized boolean isStopRequested() {
838 return stopRequested;
839 }
840
841 /**
842 * Is the cluster currently in safe mode?
843 */
844 public boolean isInSafeMode() {
845 return namesystem.isInSafeMode();
846 }
847
848 /** get FSImage */
849 @VisibleForTesting
850 public FSImage getFSImage() {
851 return namesystem.getFSImage();
852 }
853
854 /**
855 * @return NameNode RPC address
856 */
857 public InetSocketAddress getNameNodeAddress() {
858 return rpcServer.getRpcAddress();
859 }
860
861 /**
862 * @return NameNode RPC address in "host:port" string form
863 */
864 public String getNameNodeAddressHostPortString() {
865 return NetUtils.getHostPortString(rpcServer.getRpcAddress());
866 }
867
868 /**
869 * @return NameNode service RPC address if configured, the
870 * NameNode RPC address otherwise
871 */
872 public InetSocketAddress getServiceRpcAddress() {
873 final InetSocketAddress serviceAddr = rpcServer.getServiceRpcAddress();
874 return serviceAddr == null ? rpcServer.getRpcAddress() : serviceAddr;
875 }
876
877 /**
878 * @return NameNode HTTP address, used by the Web UI, image transfer,
879 * and HTTP-based file system clients like Hftp and WebHDFS
880 */
881 public InetSocketAddress getHttpAddress() {
882 return httpServer.getHttpAddress();
883 }
884
885 /**
886 * @return NameNode HTTPS address, used by the Web UI, image transfer,
887 * and HTTP-based file system clients like Hftp and WebHDFS
888 */
889 public InetSocketAddress getHttpsAddress() {
890 return httpServer.getHttpsAddress();
891 }
892
893 /**
894 * Verify that configured directories exist, then
895 * Interactively confirm that formatting is desired
896 * for each existing directory and format them.
897 *
898 * @param conf configuration to use
899 * @param force if true, format regardless of whether dirs exist
900 * @return true if formatting was aborted, false otherwise
901 * @throws IOException
902 */
903 private static boolean format(Configuration conf, boolean force,
904 boolean isInteractive) throws IOException {
905 String nsId = DFSUtil.getNamenodeNameServiceId(conf);
906 String namenodeId = HAUtil.getNameNodeId(conf, nsId);
907 initializeGenericKeys(conf, nsId, namenodeId);
908 checkAllowFormat(conf);
909
910 if (UserGroupInformation.isSecurityEnabled()) {
911 InetSocketAddress socAddr = getAddress(conf);
912 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
913 DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, socAddr.getHostName());
914 }
915
916 Collection<URI> nameDirsToFormat = FSNamesystem.getNamespaceDirs(conf);
917 List<URI> sharedDirs = FSNamesystem.getSharedEditsDirs(conf);
918 List<URI> dirsToPrompt = new ArrayList<URI>();
919 dirsToPrompt.addAll(nameDirsToFormat);
920 dirsToPrompt.addAll(sharedDirs);
921 List<URI> editDirsToFormat =
922 FSNamesystem.getNamespaceEditsDirs(conf);
923
924 // if clusterID is not provided - see if you can find the current one
925 String clusterId = StartupOption.FORMAT.getClusterId();
926 if(clusterId == null || clusterId.equals("")) {
927 //Generate a new cluster id
928 clusterId = NNStorage.newClusterID();
929 }
930 System.out.println("Formatting using clusterid: " + clusterId);
931
932 FSImage fsImage = new FSImage(conf, nameDirsToFormat, editDirsToFormat);
933 try {
934 FSNamesystem fsn = new FSNamesystem(conf, fsImage);
935 fsImage.getEditLog().initJournalsForWrite();
936
937 if (!fsImage.confirmFormat(force, isInteractive)) {
938 return true; // aborted
939 }
940
941 fsImage.format(fsn, clusterId);
942 } catch (IOException ioe) {
943 LOG.warn("Encountered exception during format: ", ioe);
944 fsImage.close();
945 throw ioe;
946 }
947 return false;
948 }
949
950 public static void checkAllowFormat(Configuration conf) throws IOException {
951 if (!conf.getBoolean(DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY,
952 DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT)) {
953 throw new IOException("The option " + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY
954 + " is set to false for this filesystem, so it "
955 + "cannot be formatted. You will need to set "
956 + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY +" parameter "
957 + "to true in order to format this filesystem");
958 }
959 }
960
961 @VisibleForTesting
962 public static boolean initializeSharedEdits(Configuration conf) throws IOException {
963 return initializeSharedEdits(conf, true);
964 }
965
966 @VisibleForTesting
967 public static boolean initializeSharedEdits(Configuration conf,
968 boolean force) throws IOException {
969 return initializeSharedEdits(conf, force, false);
970 }
971
972 /**
973 * Clone the supplied configuration but remove the shared edits dirs.
974 *
975 * @param conf Supplies the original configuration.
976 * @return Cloned configuration without the shared edit dirs.
977 * @throws IOException on failure to generate the configuration.
978 */
979 private static Configuration getConfigurationWithoutSharedEdits(
980 Configuration conf)
981 throws IOException {
982 List<URI> editsDirs = FSNamesystem.getNamespaceEditsDirs(conf, false);
983 String editsDirsString = Joiner.on(",").join(editsDirs);
984
985 Configuration confWithoutShared = new Configuration(conf);
986 confWithoutShared.unset(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY);
987 confWithoutShared.setStrings(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY,
988 editsDirsString);
989 return confWithoutShared;
990 }
991
992 /**
993 * Format a new shared edits dir and copy in enough edit log segments so that
994 * the standby NN can start up.
995 *
996 * @param conf configuration
997 * @param force format regardless of whether or not the shared edits dir exists
998 * @param interactive prompt the user when a dir exists
999 * @return true if the command aborts, false otherwise
1000 */
1001 private static boolean initializeSharedEdits(Configuration conf,
1002 boolean force, boolean interactive) throws IOException {
1003 String nsId = DFSUtil.getNamenodeNameServiceId(conf);
1004 String namenodeId = HAUtil.getNameNodeId(conf, nsId);
1005 initializeGenericKeys(conf, nsId, namenodeId);
1006
1007 if (conf.get(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY) == null) {
1008 LOG.fatal("No shared edits directory configured for namespace " +
1009 nsId + " namenode " + namenodeId);
1010 return false;
1011 }
1012
1013 if (UserGroupInformation.isSecurityEnabled()) {
1014 InetSocketAddress socAddr = getAddress(conf);
1015 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
1016 DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, socAddr.getHostName());
1017 }
1018
1019 NNStorage existingStorage = null;
1020 FSImage sharedEditsImage = null;
1021 try {
1022 FSNamesystem fsns =
1023 FSNamesystem.loadFromDisk(getConfigurationWithoutSharedEdits(conf));
1024
1025 existingStorage = fsns.getFSImage().getStorage();
1026 NamespaceInfo nsInfo = existingStorage.getNamespaceInfo();
1027
1028 List<URI> sharedEditsDirs = FSNamesystem.getSharedEditsDirs(conf);
1029
1030 sharedEditsImage = new FSImage(conf,
1031 Lists.<URI>newArrayList(),
1032 sharedEditsDirs);
1033 sharedEditsImage.getEditLog().initJournalsForWrite();
1034
1035 if (!sharedEditsImage.confirmFormat(force, interactive)) {
1036 return true; // abort
1037 }
1038
1039 NNStorage newSharedStorage = sharedEditsImage.getStorage();
1040 // Call Storage.format instead of FSImage.format here, since we don't
1041 // actually want to save a checkpoint - just prime the dirs with
1042 // the existing namespace info
1043 newSharedStorage.format(nsInfo);
1044 sharedEditsImage.getEditLog().formatNonFileJournals(nsInfo);
1045
1046 // Need to make sure the edit log segments are in good shape to initialize
1047 // the shared edits dir.
1048 fsns.getFSImage().getEditLog().close();
1049 fsns.getFSImage().getEditLog().initJournalsForWrite();
1050 fsns.getFSImage().getEditLog().recoverUnclosedStreams();
1051
1052 copyEditLogSegmentsToSharedDir(fsns, sharedEditsDirs, newSharedStorage,
1053 conf);
1054 } catch (IOException ioe) {
1055 LOG.error("Could not initialize shared edits dir", ioe);
1056 return true; // aborted
1057 } finally {
1058 if (sharedEditsImage != null) {
1059 try {
1060 sharedEditsImage.close();
1061 } catch (IOException ioe) {
1062 LOG.warn("Could not close sharedEditsImage", ioe);
1063 }
1064 }
1065 // Have to unlock storage explicitly for the case when we're running in a
1066 // unit test, which runs in the same JVM as NNs.
1067 if (existingStorage != null) {
1068 try {
1069 existingStorage.unlockAll();
1070 } catch (IOException ioe) {
1071 LOG.warn("Could not unlock storage directories", ioe);
1072 return true; // aborted
1073 }
1074 }
1075 }
1076 return false; // did not abort
1077 }
1078
1079 private static void copyEditLogSegmentsToSharedDir(FSNamesystem fsns,
1080 Collection<URI> sharedEditsDirs, NNStorage newSharedStorage,
1081 Configuration conf) throws IOException {
1082 Preconditions.checkArgument(!sharedEditsDirs.isEmpty(),
1083 "No shared edits specified");
1084 // Copy edit log segments into the new shared edits dir.
1085 List<URI> sharedEditsUris = new ArrayList<URI>(sharedEditsDirs);
1086 FSEditLog newSharedEditLog = new FSEditLog(conf, newSharedStorage,
1087 sharedEditsUris);
1088 newSharedEditLog.initJournalsForWrite();
1089 newSharedEditLog.recoverUnclosedStreams();
1090
1091 FSEditLog sourceEditLog = fsns.getFSImage().editLog;
1092
1093 long fromTxId = fsns.getFSImage().getMostRecentCheckpointTxId();
1094
1095 Collection<EditLogInputStream> streams = null;
1096 try {
1097 streams = sourceEditLog.selectInputStreams(fromTxId + 1, 0);
1098
1099 // Set the nextTxid to the CheckpointTxId+1
1100 newSharedEditLog.setNextTxId(fromTxId + 1);
1101
1102 // Copy all edits after last CheckpointTxId to shared edits dir
1103 for (EditLogInputStream stream : streams) {
1104 LOG.debug("Beginning to copy stream " + stream + " to shared edits");
1105 FSEditLogOp op;
1106 boolean segmentOpen = false;
1107 while ((op = stream.readOp()) != null) {
1108 if (LOG.isTraceEnabled()) {
1109 LOG.trace("copying op: " + op);
1110 }
1111 if (!segmentOpen) {
1112 newSharedEditLog.startLogSegment(op.txid, false);
1113 segmentOpen = true;
1114 }
1115
1116 newSharedEditLog.logEdit(op);
1117
1118 if (op.opCode == FSEditLogOpCodes.OP_END_LOG_SEGMENT) {
1119 newSharedEditLog.logSync();
1120 newSharedEditLog.endCurrentLogSegment(false);
1121 LOG.debug("ending log segment because of END_LOG_SEGMENT op in "
1122 + stream);
1123 segmentOpen = false;
1124 }
1125 }
1126
1127 if (segmentOpen) {
1128 LOG.debug("ending log segment because of end of stream in " + stream);
1129 newSharedEditLog.logSync();
1130 newSharedEditLog.endCurrentLogSegment(false);
1131 segmentOpen = false;
1132 }
1133 }
1134 } finally {
1135 if (streams != null) {
1136 FSEditLog.closeAllStreams(streams);
1137 }
1138 }
1139 }
1140
1141 @VisibleForTesting
1142 public static boolean doRollback(Configuration conf,
1143 boolean isConfirmationNeeded) throws IOException {
1144 String nsId = DFSUtil.getNamenodeNameServiceId(conf);
1145 String namenodeId = HAUtil.getNameNodeId(conf, nsId);
1146 initializeGenericKeys(conf, nsId, namenodeId);
1147
1148 FSNamesystem nsys = new FSNamesystem(conf, new FSImage(conf));
1149 System.err.print(
1150 "\"rollBack\" will remove the current state of the file system,\n"
1151 + "returning you to the state prior to initiating your recent.\n"
1152 + "upgrade. This action is permanent and cannot be undone. If you\n"
1153 + "are performing a rollback in an HA environment, you should be\n"
1154 + "certain that no NameNode process is running on any host.");
1155 if (isConfirmationNeeded) {
1156 if (!confirmPrompt("Roll back file system state?")) {
1157 System.err.println("Rollback aborted.");
1158 return true;
1159 }
1160 }
1161 nsys.getFSImage().doRollback(nsys);
1162 return false;
1163 }
1164
1165 private static void printUsage(PrintStream out) {
1166 out.println(USAGE + "\n");
1167 }
1168
1169 @VisibleForTesting
1170 static StartupOption parseArguments(String args[]) {
1171 int argsLen = (args == null) ? 0 : args.length;
1172 StartupOption startOpt = StartupOption.REGULAR;
1173 for(int i=0; i < argsLen; i++) {
1174 String cmd = args[i];
1175 if (StartupOption.FORMAT.getName().equalsIgnoreCase(cmd)) {
1176 startOpt = StartupOption.FORMAT;
1177 for (i = i + 1; i < argsLen; i++) {
1178 if (args[i].equalsIgnoreCase(StartupOption.CLUSTERID.getName())) {
1179 i++;
1180 if (i >= argsLen) {
1181 // if no cluster id specified, return null
1182 LOG.fatal("Must specify a valid cluster ID after the "
1183 + StartupOption.CLUSTERID.getName() + " flag");
1184 return null;
1185 }
1186 String clusterId = args[i];
1187 // Make sure an id is specified and not another flag
1188 if (clusterId.isEmpty() ||
1189 clusterId.equalsIgnoreCase(StartupOption.FORCE.getName()) ||
1190 clusterId.equalsIgnoreCase(
1191 StartupOption.NONINTERACTIVE.getName())) {
1192 LOG.fatal("Must specify a valid cluster ID after the "
1193 + StartupOption.CLUSTERID.getName() + " flag");
1194 return null;
1195 }
1196 startOpt.setClusterId(clusterId);
1197 }
1198
1199 if (args[i].equalsIgnoreCase(StartupOption.FORCE.getName())) {
1200 startOpt.setForceFormat(true);
1201 }
1202
1203 if (args[i].equalsIgnoreCase(StartupOption.NONINTERACTIVE.getName())) {
1204 startOpt.setInteractiveFormat(false);
1205 }
1206 }
1207 } else if (StartupOption.GENCLUSTERID.getName().equalsIgnoreCase(cmd)) {
1208 startOpt = StartupOption.GENCLUSTERID;
1209 } else if (StartupOption.REGULAR.getName().equalsIgnoreCase(cmd)) {
1210 startOpt = StartupOption.REGULAR;
1211 } else if (StartupOption.BACKUP.getName().equalsIgnoreCase(cmd)) {
1212 startOpt = StartupOption.BACKUP;
1213 } else if (StartupOption.CHECKPOINT.getName().equalsIgnoreCase(cmd)) {
1214 startOpt = StartupOption.CHECKPOINT;
1215 } else if (StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd)
1216 || StartupOption.UPGRADEONLY.getName().equalsIgnoreCase(cmd)) {
1217 startOpt = StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd) ?
1218 StartupOption.UPGRADE : StartupOption.UPGRADEONLY;
1219 /* Can be followed by CLUSTERID with a required parameter or
1220 * RENAMERESERVED with an optional parameter
1221 */
1222 while (i + 1 < argsLen) {
1223 String flag = args[i + 1];
1224 if (flag.equalsIgnoreCase(StartupOption.CLUSTERID.getName())) {
1225 if (i + 2 < argsLen) {
1226 i += 2;
1227 startOpt.setClusterId(args[i]);
1228 } else {
1229 LOG.fatal("Must specify a valid cluster ID after the "
1230 + StartupOption.CLUSTERID.getName() + " flag");
1231 return null;
1232 }
1233 } else if (flag.equalsIgnoreCase(StartupOption.RENAMERESERVED
1234 .getName())) {
1235 if (i + 2 < argsLen) {
1236 FSImageFormat.setRenameReservedPairs(args[i + 2]);
1237 i += 2;
1238 } else {
1239 FSImageFormat.useDefaultRenameReservedPairs();
1240 i += 1;
1241 }
1242 } else {
1243 LOG.fatal("Unknown upgrade flag " + flag);
1244 return null;
1245 }
1246 }
1247 } else if (StartupOption.ROLLINGUPGRADE.getName().equalsIgnoreCase(cmd)) {
1248 startOpt = StartupOption.ROLLINGUPGRADE;
1249 ++i;
1250 if (i >= argsLen) {
1251 LOG.fatal("Must specify a rolling upgrade startup option "
1252 + RollingUpgradeStartupOption.getAllOptionString());
1253 return null;
1254 }
1255 startOpt.setRollingUpgradeStartupOption(args[i]);
1256 } else if (StartupOption.ROLLBACK.getName().equalsIgnoreCase(cmd)) {
1257 startOpt = StartupOption.ROLLBACK;
1258 } else if (StartupOption.FINALIZE.getName().equalsIgnoreCase(cmd)) {
1259 startOpt = StartupOption.FINALIZE;
1260 } else if (StartupOption.IMPORT.getName().equalsIgnoreCase(cmd)) {
1261 startOpt = StartupOption.IMPORT;
1262 } else if (StartupOption.BOOTSTRAPSTANDBY.getName().equalsIgnoreCase(cmd)) {
1263 startOpt = StartupOption.BOOTSTRAPSTANDBY;
1264 return startOpt;
1265 } else if (StartupOption.INITIALIZESHAREDEDITS.getName().equalsIgnoreCase(cmd)) {
1266 startOpt = StartupOption.INITIALIZESHAREDEDITS;
1267 for (i = i + 1 ; i < argsLen; i++) {
1268 if (StartupOption.NONINTERACTIVE.getName().equals(args[i])) {
1269 startOpt.setInteractiveFormat(false);
1270 } else if (StartupOption.FORCE.getName().equals(args[i])) {
1271 startOpt.setForceFormat(true);
1272 } else {
1273 LOG.fatal("Invalid argument: " + args[i]);
1274 return null;
1275 }
1276 }
1277 return startOpt;
1278 } else if (StartupOption.RECOVER.getName().equalsIgnoreCase(cmd)) {
1279 if (startOpt != StartupOption.REGULAR) {
1280 throw new RuntimeException("Can't combine -recover with " +
1281 "other startup options.");
1282 }
1283 startOpt = StartupOption.RECOVER;
1284 while (++i < argsLen) {
1285 if (args[i].equalsIgnoreCase(
1286 StartupOption.FORCE.getName())) {
1287 startOpt.setForce(MetaRecoveryContext.FORCE_FIRST_CHOICE);
1288 } else {
1289 throw new RuntimeException("Error parsing recovery options: " +
1290 "can't understand option \"" + args[i] + "\"");
1291 }
1292 }
1293 } else if (StartupOption.METADATAVERSION.getName().equalsIgnoreCase(cmd)) {
1294 startOpt = StartupOption.METADATAVERSION;
1295 } else {
1296 return null;
1297 }
1298 }
1299 return startOpt;
1300 }
1301
1302 private static void setStartupOption(Configuration conf, StartupOption opt) {
1303 conf.set(DFS_NAMENODE_STARTUP_KEY, opt.name());
1304 }
1305
1306 static StartupOption getStartupOption(Configuration conf) {
1307 return StartupOption.valueOf(conf.get(DFS_NAMENODE_STARTUP_KEY,
1308 StartupOption.REGULAR.toString()));
1309 }
1310
1311 private static void doRecovery(StartupOption startOpt, Configuration conf)
1312 throws IOException {
1313 String nsId = DFSUtil.getNamenodeNameServiceId(conf);
1314 String namenodeId = HAUtil.getNameNodeId(conf, nsId);
1315 initializeGenericKeys(conf, nsId, namenodeId);
1316 if (startOpt.getForce() < MetaRecoveryContext.FORCE_ALL) {
1317 if (!confirmPrompt("You have selected Metadata Recovery mode. " +
1318 "This mode is intended to recover lost metadata on a corrupt " +
1319 "filesystem. Metadata recovery mode often permanently deletes " +
1320 "data from your HDFS filesystem. Please back up your edit log " +
1321 "and fsimage before trying this!\n\n" +
1322 "Are you ready to proceed? (Y/N)\n")) {
1323 System.err.println("Recovery aborted at user request.\n");
1324 return;
1325 }
1326 }
1327 MetaRecoveryContext.LOG.info("starting recovery...");
1328 UserGroupInformation.setConfiguration(conf);
1329 NameNode.initMetrics(conf, startOpt.toNodeRole());
1330 FSNamesystem fsn = null;
1331 try {
1332 fsn = FSNamesystem.loadFromDisk(conf);
1333 fsn.getFSImage().saveNamespace(fsn);
1334 MetaRecoveryContext.LOG.info("RECOVERY COMPLETE");
1335 } catch (IOException e) {
1336 MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e);
1337 throw e;
1338 } catch (RuntimeException e) {
1339 MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e);
1340 throw e;
1341 } finally {
1342 if (fsn != null)
1343 fsn.close();
1344 }
1345 }
1346
1347 /**
1348 * Verify that configured directories exist, then print the metadata versions
1349 * of the software and the image.
1350 *
1351 * @param conf configuration to use
1352 * @throws IOException
1353 */
1354 private static boolean printMetadataVersion(Configuration conf)
1355 throws IOException {
1356 final String nsId = DFSUtil.getNamenodeNameServiceId(conf);
1357 final String namenodeId = HAUtil.getNameNodeId(conf, nsId);
1358 NameNode.initializeGenericKeys(conf, nsId, namenodeId);
1359 final FSImage fsImage = new FSImage(conf);
1360 final FSNamesystem fs = new FSNamesystem(conf, fsImage, false);
1361 return fsImage.recoverTransitionRead(
1362 StartupOption.METADATAVERSION, fs, null);
1363 }
1364
1365 public static NameNode createNameNode(String argv[], Configuration conf)
1366 throws IOException {
1367 LOG.info("createNameNode " + Arrays.asList(argv));
1368 if (conf == null)
1369 conf = new HdfsConfiguration();
1370 StartupOption startOpt = parseArguments(argv);
1371 if (startOpt == null) {
1372 printUsage(System.err);
1373 return null;
1374 }
1375 setStartupOption(conf, startOpt);
1376
1377 switch (startOpt) {
1378 case FORMAT: {
1379 boolean aborted = format(conf, startOpt.getForceFormat(),
1380 startOpt.getInteractiveFormat());
1381 terminate(aborted ? 1 : 0);
1382 return null; // avoid javac warning
1383 }
1384 case GENCLUSTERID: {
1385 System.err.println("Generating new cluster id:");
1386 System.out.println(NNStorage.newClusterID());
1387 terminate(0);
1388 return null;
1389 }
1390 case FINALIZE: {
1391 System.err.println("Use of the argument '" + StartupOption.FINALIZE +
1392 "' is no longer supported. To finalize an upgrade, start the NN " +
1393 " and then run `hdfs dfsadmin -finalizeUpgrade'");
1394 terminate(1);
1395 return null; // avoid javac warning
1396 }
1397 case ROLLBACK: {
1398 boolean aborted = doRollback(conf, true);
1399 terminate(aborted ? 1 : 0);
1400 return null; // avoid warning
1401 }
1402 case BOOTSTRAPSTANDBY: {
1403 String toolArgs[] = Arrays.copyOfRange(argv, 1, argv.length);
1404 int rc = BootstrapStandby.run(toolArgs, conf);
1405 terminate(rc);
1406 return null; // avoid warning
1407 }
1408 case INITIALIZESHAREDEDITS: {
1409 boolean aborted = initializeSharedEdits(conf,
1410 startOpt.getForceFormat(),
1411 startOpt.getInteractiveFormat());
1412 terminate(aborted ? 1 : 0);
1413 return null; // avoid warning
1414 }
1415 case BACKUP:
1416 case CHECKPOINT: {
1417 NamenodeRole role = startOpt.toNodeRole();
1418 DefaultMetricsSystem.initialize(role.toString().replace(" ", ""));
1419 return new BackupNode(conf, role);
1420 }
1421 case RECOVER: {
1422 NameNode.doRecovery(startOpt, conf);
1423 return null;
1424 }
1425 case METADATAVERSION: {
1426 printMetadataVersion(conf);
1427 terminate(0);
1428 return null; // avoid javac warning
1429 }
1430 case UPGRADEONLY: {
1431 DefaultMetricsSystem.initialize("NameNode");
1432 new NameNode(conf);
1433 terminate(0);
1434 return null;
1435 }
1436 default: {
1437 DefaultMetricsSystem.initialize("NameNode");
1438 return new NameNode(conf);
1439 }
1440 }
1441 }
1442
1443 /**
1444 * In federation configuration is set for a set of
1445 * namenode and secondary namenode/backup/checkpointer, which are
1446 * grouped under a logical nameservice ID. The configuration keys specific
1447 * to them have suffix set to configured nameserviceId.
1448 *
1449 * This method copies the value from specific key of format key.nameserviceId
1450 * to key, to set up the generic configuration. Once this is done, only
1451 * generic version of the configuration is read in rest of the code, for
1452 * backward compatibility and simpler code changes.
1453 *
1454 * @param conf
1455 * Configuration object to lookup specific key and to set the value
1456 * to the key passed. Note the conf object is modified
1457 * @param nameserviceId name service Id (to distinguish federated NNs)
1458 * @param namenodeId the namenode ID (to distinguish HA NNs)
1459 * @see DFSUtil#setGenericConf(Configuration, String, String, String...)
1460 */
1461 public static void initializeGenericKeys(Configuration conf,
1462 String nameserviceId, String namenodeId) {
1463 if ((nameserviceId != null && !nameserviceId.isEmpty()) ||
1464 (namenodeId != null && !namenodeId.isEmpty())) {
1465 if (nameserviceId != null) {
1466 conf.set(DFS_NAMESERVICE_ID, nameserviceId);
1467 }
1468 if (namenodeId != null) {
1469 conf.set(DFS_HA_NAMENODE_ID_KEY, namenodeId);
1470 }
1471
1472 DFSUtil.setGenericConf(conf, nameserviceId, namenodeId,
1473 NAMENODE_SPECIFIC_KEYS);
1474 DFSUtil.setGenericConf(conf, nameserviceId, null,
1475 NAMESERVICE_SPECIFIC_KEYS);
1476 }
1477
1478 // If the RPC address is set use it to (re-)configure the default FS
1479 if (conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY) != null) {
1480 URI defaultUri = URI.create(HdfsConstants.HDFS_URI_SCHEME + "://"
1481 + conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY));
1482 conf.set(FS_DEFAULT_NAME_KEY, defaultUri.toString());
1483 LOG.debug("Setting " + FS_DEFAULT_NAME_KEY + " to " + defaultUri.toString());
1484 }
1485 }
1486
1487 /**
1488 * Get the name service Id for the node
1489 * @return name service Id or null if federation is not configured
1490 */
1491 protected String getNameServiceId(Configuration conf) {
1492 return DFSUtil.getNamenodeNameServiceId(conf);
1493 }
1494
1495 /**
1496 */
1497 public static void main(String argv[]) throws Exception {
1498 if (DFSUtil.parseHelpArgument(argv, NameNode.USAGE, System.out, true)) {
1499 System.exit(0);
1500 }
1501
1502 try {
1503 StringUtils.startupShutdownMessage(NameNode.class, argv, LOG);
1504 NameNode namenode = createNameNode(argv, null);
1505 if (namenode != null) {
1506 namenode.join();
1507 }
1508 } catch (Throwable e) {
1509 LOG.fatal("Failed to start namenode.", e);
1510 terminate(1, e);
1511 }
1512 }
1513
1514 synchronized void monitorHealth()
1515 throws HealthCheckFailedException, AccessControlException {
1516 namesystem.checkSuperuserPrivilege();
1517 if (!haEnabled) {
1518 return; // no-op, if HA is not enabled
1519 }
1520 getNamesystem().checkAvailableResources();
1521 if (!getNamesystem().nameNodeHasResourcesAvailable()) {
1522 throw new HealthCheckFailedException(
1523 "The NameNode has no resources available");
1524 }
1525 }
1526
1527 synchronized void transitionToActive()
1528 throws ServiceFailedException, AccessControlException {
1529 namesystem.checkSuperuserPrivilege();
1530 if (!haEnabled) {
1531 throw new ServiceFailedException("HA for namenode is not enabled");
1532 }
1533 state.setState(haContext, ACTIVE_STATE);
1534 }
1535
1536 synchronized void transitionToStandby()
1537 throws ServiceFailedException, AccessControlException {
1538 namesystem.checkSuperuserPrivilege();
1539 if (!haEnabled) {
1540 throw new ServiceFailedException("HA for namenode is not enabled");
1541 }
1542 state.setState(haContext, STANDBY_STATE);
1543 }
1544
1545 synchronized HAServiceStatus getServiceStatus()
1546 throws ServiceFailedException, AccessControlException {
1547 namesystem.checkSuperuserPrivilege();
1548 if (!haEnabled) {
1549 throw new ServiceFailedException("HA for namenode is not enabled");
1550 }
1551 if (state == null) {
1552 return new HAServiceStatus(HAServiceState.INITIALIZING);
1553 }
1554 HAServiceState retState = state.getServiceState();
1555 HAServiceStatus ret = new HAServiceStatus(retState);
1556 if (retState == HAServiceState.STANDBY) {
1557 String safemodeTip = namesystem.getSafeModeTip();
1558 if (!safemodeTip.isEmpty()) {
1559 ret.setNotReadyToBecomeActive(
1560 "The NameNode is in safemode. " +
1561 safemodeTip);
1562 } else {
1563 ret.setReadyToBecomeActive();
1564 }
1565 } else if (retState == HAServiceState.ACTIVE) {
1566 ret.setReadyToBecomeActive();
1567 } else {
1568 ret.setNotReadyToBecomeActive("State is " + state);
1569 }
1570 return ret;
1571 }
1572
1573 synchronized HAServiceState getServiceState() {
1574 if (state == null) {
1575 return HAServiceState.INITIALIZING;
1576 }
1577 return state.getServiceState();
1578 }
1579
1580 /**
1581 * Register NameNodeStatusMXBean
1582 */
1583 private void registerNNSMXBean() {
1584 nameNodeStatusBeanName = MBeans.register("NameNode", "NameNodeStatus", this);
1585 }
1586
1587 @Override // NameNodeStatusMXBean
1588 public String getNNRole() {
1589 String roleStr = "";
1590 NamenodeRole role = getRole();
1591 if (null != role) {
1592 roleStr = role.toString();
1593 }
1594 return roleStr;
1595 }
1596
1597 @Override // NameNodeStatusMXBean
1598 public String getState() {
1599 String servStateStr = "";
1600 HAServiceState servState = getServiceState();
1601 if (null != servState) {
1602 servStateStr = servState.toString();
1603 }
1604 return servStateStr;
1605 }
1606
1607 @Override // NameNodeStatusMXBean
1608 public String getHostAndPort() {
1609 return getNameNodeAddressHostPortString();
1610 }
1611
1612 @Override // NameNodeStatusMXBean
1613 public boolean isSecurityEnabled() {
1614 return UserGroupInformation.isSecurityEnabled();
1615 }
1616
1617 /**
1618 * Shutdown the NN immediately in an ungraceful way. Used when it would be
1619 * unsafe for the NN to continue operating, e.g. during a failed HA state
1620 * transition.
1621 *
1622 * @param t exception which warrants the shutdown. Printed to the NN log
1623 * before exit.
1624 * @throws ExitException thrown only for testing.
1625 */
1626 protected synchronized void doImmediateShutdown(Throwable t)
1627 throws ExitException {
1628 String message = "Error encountered requiring NN shutdown. " +
1629 "Shutting down immediately.";
1630 try {
1631 LOG.fatal(message, t);
1632 } catch (Throwable ignored) {
1633 // This is unlikely to happen, but there's nothing we can do if it does.
1634 }
1635 terminate(1, t);
1636 }
1637
1638 /**
1639 * Class used to expose {@link NameNode} as context to {@link HAState}
1640 */
1641 protected class NameNodeHAContext implements HAContext {
1642 @Override
1643 public void setState(HAState s) {
1644 state = s;
1645 }
1646
1647 @Override
1648 public HAState getState() {
1649 return state;
1650 }
1651
1652 @Override
1653 public void startActiveServices() throws IOException {
1654 try {
1655 namesystem.startActiveServices();
1656 startTrashEmptier(conf);
1657 } catch (Throwable t) {
1658 doImmediateShutdown(t);
1659 }
1660 }
1661
1662 @Override
1663 public void stopActiveServices() throws IOException {
1664 try {
1665 if (namesystem != null) {
1666 namesystem.stopActiveServices();
1667 }
1668 stopTrashEmptier();
1669 } catch (Throwable t) {
1670 doImmediateShutdown(t);
1671 }
1672 }
1673
1674 @Override
1675 public void startStandbyServices() throws IOException {
1676 try {
1677 namesystem.startStandbyServices(conf);
1678 } catch (Throwable t) {
1679 doImmediateShutdown(t);
1680 }
1681 }
1682
1683 @Override
1684 public void prepareToStopStandbyServices() throws ServiceFailedException {
1685 try {
1686 namesystem.prepareToStopStandbyServices();
1687 } catch (Throwable t) {
1688 doImmediateShutdown(t);
1689 }
1690 }
1691
1692 @Override
1693 public void stopStandbyServices() throws IOException {
1694 try {
1695 if (namesystem != null) {
1696 namesystem.stopStandbyServices();
1697 }
1698 } catch (Throwable t) {
1699 doImmediateShutdown(t);
1700 }
1701 }
1702
1703 @Override
1704 public void writeLock() {
1705 namesystem.writeLock();
1706 namesystem.lockRetryCache();
1707 }
1708
1709 @Override
1710 public void writeUnlock() {
1711 namesystem.unlockRetryCache();
1712 namesystem.writeUnlock();
1713 }
1714
1715 /** Check if an operation of given category is allowed */
1716 @Override
1717 public void checkOperation(final OperationCategory op)
1718 throws StandbyException {
1719 state.checkOperation(haContext, op);
1720 }
1721
1722 @Override
1723 public boolean allowStaleReads() {
1724 return allowStaleStandbyReads;
1725 }
1726
1727 }
1728
1729 public boolean isStandbyState() {
1730 return (state.equals(STANDBY_STATE));
1731 }
1732
1733 public boolean isActiveState() {
1734 return (state.equals(ACTIVE_STATE));
1735 }
1736
1737 /**
1738 * Check that a request to change this node's HA state is valid.
1739 * In particular, verifies that, if auto failover is enabled, non-forced
1740 * requests from the HAAdmin CLI are rejected, and vice versa.
1741 *
1742 * @param req the request to check
1743 * @throws AccessControlException if the request is disallowed
1744 */
1745 void checkHaStateChange(StateChangeRequestInfo req)
1746 throws AccessControlException {
1747 boolean autoHaEnabled = conf.getBoolean(DFS_HA_AUTO_FAILOVER_ENABLED_KEY,
1748 DFS_HA_AUTO_FAILOVER_ENABLED_DEFAULT);
1749 switch (req.getSource()) {
1750 case REQUEST_BY_USER:
1751 if (autoHaEnabled) {
1752 throw new AccessControlException(
1753 "Manual HA control for this NameNode is disallowed, because " +
1754 "automatic HA is enabled.");
1755 }
1756 break;
1757 case REQUEST_BY_USER_FORCED:
1758 if (autoHaEnabled) {
1759 LOG.warn("Allowing manual HA control from " +
1760 Server.getRemoteAddress() +
1761 " even though automatic HA is enabled, because the user " +
1762 "specified the force flag");
1763 }
1764 break;
1765 case REQUEST_BY_ZKFC:
1766 if (!autoHaEnabled) {
1767 throw new AccessControlException(
1768 "Request from ZK failover controller at " +
1769 Server.getRemoteAddress() + " denied since automatic HA " +
1770 "is not enabled");
1771 }
1772 break;
1773 }
1774 }
1775 }