001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.hdfs.server.namenode;
019
020 import com.google.common.annotations.VisibleForTesting;
021 import com.google.common.base.Joiner;
022 import com.google.common.base.Preconditions;
023 import com.google.common.collect.Lists;
024
025 import org.apache.commons.logging.Log;
026 import org.apache.commons.logging.LogFactory;
027 import org.apache.hadoop.HadoopIllegalArgumentException;
028 import org.apache.hadoop.classification.InterfaceAudience;
029 import org.apache.hadoop.conf.Configuration;
030 import org.apache.hadoop.fs.FileSystem;
031 import org.apache.hadoop.fs.Trash;
032 import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
033 import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
034 import org.apache.hadoop.ha.HAServiceStatus;
035 import org.apache.hadoop.ha.HealthCheckFailedException;
036 import org.apache.hadoop.ha.ServiceFailedException;
037 import org.apache.hadoop.hdfs.DFSConfigKeys;
038 import org.apache.hadoop.hdfs.DFSUtil;
039 import org.apache.hadoop.hdfs.HAUtil;
040 import org.apache.hadoop.hdfs.HdfsConfiguration;
041 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
042 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
043 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
044 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption;
045 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
046 import org.apache.hadoop.hdfs.server.namenode.ha.*;
047 import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
048 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
049 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgressMetrics;
050 import org.apache.hadoop.hdfs.server.protocol.*;
051 import org.apache.hadoop.ipc.Server;
052 import org.apache.hadoop.ipc.StandbyException;
053 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
054 import org.apache.hadoop.metrics2.util.MBeans;
055 import org.apache.hadoop.net.NetUtils;
056 import org.apache.hadoop.security.AccessControlException;
057 import org.apache.hadoop.security.RefreshUserMappingsProtocol;
058 import org.apache.hadoop.security.SecurityUtil;
059 import org.apache.hadoop.security.UserGroupInformation;
060 import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol;
061 import org.apache.hadoop.ipc.RefreshCallQueueProtocol;
062 import org.apache.hadoop.tools.GetUserMappingsProtocol;
063 import org.apache.hadoop.util.ExitUtil.ExitException;
064 import org.apache.hadoop.util.JvmPauseMonitor;
065 import org.apache.hadoop.util.ServicePlugin;
066 import org.apache.hadoop.util.StringUtils;
067
068 import javax.management.ObjectName;
069
070 import java.io.IOException;
071 import java.io.PrintStream;
072 import java.net.InetSocketAddress;
073 import java.net.URI;
074 import java.security.PrivilegedExceptionAction;
075 import java.util.ArrayList;
076 import java.util.Arrays;
077 import java.util.Collection;
078 import java.util.List;
079
080 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY;
081 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT;
082 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY;
083 import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
084 import static org.apache.hadoop.util.ExitUtil.terminate;
085 import static org.apache.hadoop.util.ToolRunner.confirmPrompt;
086
087 /**********************************************************
088 * NameNode serves as both directory namespace manager and
089 * "inode table" for the Hadoop DFS. There is a single NameNode
090 * running in any DFS deployment. (Well, except when there
091 * is a second backup/failover NameNode, or when using federated NameNodes.)
092 *
093 * The NameNode controls two critical tables:
094 * 1) filename->blocksequence (namespace)
095 * 2) block->machinelist ("inodes")
096 *
097 * The first table is stored on disk and is very precious.
098 * The second table is rebuilt every time the NameNode comes up.
099 *
100 * 'NameNode' refers to both this class as well as the 'NameNode server'.
101 * The 'FSNamesystem' class actually performs most of the filesystem
102 * management. The majority of the 'NameNode' class itself is concerned
103 * with exposing the IPC interface and the HTTP server to the outside world,
104 * plus some configuration management.
105 *
106 * NameNode implements the
107 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} interface, which
108 * allows clients to ask for DFS services.
109 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} is not designed for
110 * direct use by authors of DFS client code. End-users should instead use the
111 * {@link org.apache.hadoop.fs.FileSystem} class.
112 *
113 * NameNode also implements the
114 * {@link org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol} interface,
115 * used by DataNodes that actually store DFS data blocks. These
116 * methods are invoked repeatedly and automatically by all the
117 * DataNodes in a DFS deployment.
118 *
119 * NameNode also implements the
120 * {@link org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol} interface,
121 * used by secondary namenodes or rebalancing processes to get partial
122 * NameNode state, for example partial blocksMap etc.
123 **********************************************************/
124 @InterfaceAudience.Private
125 public class NameNode implements NameNodeStatusMXBean {
126 static{
127 HdfsConfiguration.init();
128 }
129
130 /**
131 * Categories of operations supported by the namenode.
132 */
133 public static enum OperationCategory {
134 /** Operations that are state agnostic */
135 UNCHECKED,
136 /** Read operation that does not change the namespace state */
137 READ,
138 /** Write operation that changes the namespace state */
139 WRITE,
140 /** Operations related to checkpointing */
141 CHECKPOINT,
142 /** Operations related to {@link JournalProtocol} */
143 JOURNAL
144 }
145
146 /**
147 * HDFS configuration can have three types of parameters:
148 * <ol>
149 * <li>Parameters that are common for all the name services in the cluster.</li>
150 * <li>Parameters that are specific to a name service. These keys are suffixed
151 * with nameserviceId in the configuration. For example,
152 * "dfs.namenode.rpc-address.nameservice1".</li>
153 * <li>Parameters that are specific to a single name node. These keys are suffixed
154 * with nameserviceId and namenodeId in the configuration. for example,
155 * "dfs.namenode.rpc-address.nameservice1.namenode1"</li>
156 * </ol>
157 *
158 * In the latter cases, operators may specify the configuration without
159 * any suffix, with a nameservice suffix, or with a nameservice and namenode
160 * suffix. The more specific suffix will take precedence.
161 *
162 * These keys are specific to a given namenode, and thus may be configured
163 * globally, for a nameservice, or for a specific namenode within a nameservice.
164 */
165 public static final String[] NAMENODE_SPECIFIC_KEYS = {
166 DFS_NAMENODE_RPC_ADDRESS_KEY,
167 DFS_NAMENODE_RPC_BIND_HOST_KEY,
168 DFS_NAMENODE_NAME_DIR_KEY,
169 DFS_NAMENODE_EDITS_DIR_KEY,
170 DFS_NAMENODE_SHARED_EDITS_DIR_KEY,
171 DFS_NAMENODE_CHECKPOINT_DIR_KEY,
172 DFS_NAMENODE_CHECKPOINT_EDITS_DIR_KEY,
173 DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
174 DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY,
175 DFS_NAMENODE_HTTP_ADDRESS_KEY,
176 DFS_NAMENODE_HTTPS_ADDRESS_KEY,
177 DFS_NAMENODE_KEYTAB_FILE_KEY,
178 DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY,
179 DFS_NAMENODE_SECONDARY_HTTPS_ADDRESS_KEY,
180 DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY,
181 DFS_NAMENODE_BACKUP_ADDRESS_KEY,
182 DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY,
183 DFS_NAMENODE_BACKUP_SERVICE_RPC_ADDRESS_KEY,
184 DFS_NAMENODE_USER_NAME_KEY,
185 DFS_NAMENODE_INTERNAL_SPNEGO_USER_NAME_KEY,
186 DFS_HA_FENCE_METHODS_KEY,
187 DFS_HA_ZKFC_PORT_KEY,
188 DFS_HA_FENCE_METHODS_KEY
189 };
190
191 /**
192 * @see #NAMENODE_SPECIFIC_KEYS
193 * These keys are specific to a nameservice, but may not be overridden
194 * for a specific namenode.
195 */
196 public static final String[] NAMESERVICE_SPECIFIC_KEYS = {
197 DFS_HA_AUTO_FAILOVER_ENABLED_KEY
198 };
199
200 private static final String USAGE = "Usage: java NameNode ["
201 + StartupOption.BACKUP.getName() + "] | ["
202 + StartupOption.CHECKPOINT.getName() + "] | ["
203 + StartupOption.FORMAT.getName() + " ["
204 + StartupOption.CLUSTERID.getName() + " cid ] ["
205 + StartupOption.FORCE.getName() + "] ["
206 + StartupOption.NONINTERACTIVE.getName() + "] ] | ["
207 + StartupOption.UPGRADE.getName() +
208 " [" + StartupOption.CLUSTERID.getName() + " cid]" +
209 " [" + StartupOption.RENAMERESERVED.getName() + "<k-v pairs>] ] | ["
210 + StartupOption.ROLLBACK.getName() + "] | ["
211 + StartupOption.ROLLINGUPGRADE.getName() + " <"
212 + RollingUpgradeStartupOption.DOWNGRADE.name().toLowerCase() + "|"
213 + RollingUpgradeStartupOption.ROLLBACK.name().toLowerCase() + "> ] | ["
214 + StartupOption.FINALIZE.getName() + "] | ["
215 + StartupOption.IMPORT.getName() + "] | ["
216 + StartupOption.INITIALIZESHAREDEDITS.getName() + "] | ["
217 + StartupOption.BOOTSTRAPSTANDBY.getName() + "] | ["
218 + StartupOption.RECOVER.getName() + " [ " + StartupOption.FORCE.getName()
219 + " ] ]";
220
221 public long getProtocolVersion(String protocol,
222 long clientVersion) throws IOException {
223 if (protocol.equals(ClientProtocol.class.getName())) {
224 return ClientProtocol.versionID;
225 } else if (protocol.equals(DatanodeProtocol.class.getName())){
226 return DatanodeProtocol.versionID;
227 } else if (protocol.equals(NamenodeProtocol.class.getName())){
228 return NamenodeProtocol.versionID;
229 } else if (protocol.equals(RefreshAuthorizationPolicyProtocol.class.getName())){
230 return RefreshAuthorizationPolicyProtocol.versionID;
231 } else if (protocol.equals(RefreshUserMappingsProtocol.class.getName())){
232 return RefreshUserMappingsProtocol.versionID;
233 } else if (protocol.equals(RefreshCallQueueProtocol.class.getName())) {
234 return RefreshCallQueueProtocol.versionID;
235 } else if (protocol.equals(GetUserMappingsProtocol.class.getName())){
236 return GetUserMappingsProtocol.versionID;
237 } else {
238 throw new IOException("Unknown protocol to name node: " + protocol);
239 }
240 }
241
242 public static final int DEFAULT_PORT = 8020;
243 public static final Log LOG = LogFactory.getLog(NameNode.class.getName());
244 public static final Log stateChangeLog = LogFactory.getLog("org.apache.hadoop.hdfs.StateChange");
245 public static final Log blockStateChangeLog = LogFactory.getLog("BlockStateChange");
246 public static final HAState ACTIVE_STATE = new ActiveState();
247 public static final HAState STANDBY_STATE = new StandbyState();
248
249 protected FSNamesystem namesystem;
250 protected final Configuration conf;
251 protected final NamenodeRole role;
252 private volatile HAState state;
253 private final boolean haEnabled;
254 private final HAContext haContext;
255 protected final boolean allowStaleStandbyReads;
256
257
258 /** httpServer */
259 protected NameNodeHttpServer httpServer;
260 private Thread emptier;
261 /** only used for testing purposes */
262 protected boolean stopRequested = false;
263 /** Registration information of this name-node */
264 protected NamenodeRegistration nodeRegistration;
265 /** Activated plug-ins. */
266 private List<ServicePlugin> plugins;
267
268 private NameNodeRpcServer rpcServer;
269
270 private JvmPauseMonitor pauseMonitor;
271 private ObjectName nameNodeStatusBeanName;
272 /**
273 * The namenode address that clients will use to access this namenode
274 * or the name service. For HA configurations using logical URI, it
275 * will be the logical address.
276 */
277 private String clientNamenodeAddress;
278
279 /** Format a new filesystem. Destroys any filesystem that may already
280 * exist at this location. **/
281 public static void format(Configuration conf) throws IOException {
282 format(conf, true, true);
283 }
284
285 static NameNodeMetrics metrics;
286 private static final StartupProgress startupProgress = new StartupProgress();
287 /** Return the {@link FSNamesystem} object.
288 * @return {@link FSNamesystem} object.
289 */
290 public FSNamesystem getNamesystem() {
291 return namesystem;
292 }
293
294 public NamenodeProtocols getRpcServer() {
295 return rpcServer;
296 }
297
298 static void initMetrics(Configuration conf, NamenodeRole role) {
299 metrics = NameNodeMetrics.create(conf, role);
300 }
301
302 public static NameNodeMetrics getNameNodeMetrics() {
303 return metrics;
304 }
305
306 /**
307 * Returns object used for reporting namenode startup progress.
308 *
309 * @return StartupProgress for reporting namenode startup progress
310 */
311 public static StartupProgress getStartupProgress() {
312 return startupProgress;
313 }
314
315 /**
316 * Return the service name of the issued delegation token.
317 *
318 * @return The name service id in HA-mode, or the rpc address in non-HA mode
319 */
320 public String getTokenServiceName() {
321 return getClientNamenodeAddress();
322 }
323
324 /**
325 * Set the namenode address that will be used by clients to access this
326 * namenode or name service. This needs to be called before the config
327 * is overriden.
328 */
329 public void setClientNamenodeAddress(Configuration conf) {
330 String nnAddr = conf.get(FS_DEFAULT_NAME_KEY);
331 if (nnAddr == null) {
332 // default fs is not set.
333 clientNamenodeAddress = null;
334 return;
335 }
336
337 LOG.info(FS_DEFAULT_NAME_KEY + " is " + nnAddr);
338 URI nnUri = URI.create(nnAddr);
339
340 String nnHost = nnUri.getHost();
341 if (nnHost == null) {
342 clientNamenodeAddress = null;
343 return;
344 }
345
346 if (DFSUtil.getNameServiceIds(conf).contains(nnHost)) {
347 // host name is logical
348 clientNamenodeAddress = nnHost;
349 } else if (nnUri.getPort() > 0) {
350 // physical address with a valid port
351 clientNamenodeAddress = nnUri.getAuthority();
352 } else {
353 // the port is missing or 0. Figure out real bind address later.
354 clientNamenodeAddress = null;
355 return;
356 }
357 LOG.info("Clients are to use " + clientNamenodeAddress + " to access"
358 + " this namenode/service.");
359 }
360
361 /**
362 * Get the namenode address to be used by clients.
363 * @return nn address
364 */
365 public String getClientNamenodeAddress() {
366 return clientNamenodeAddress;
367 }
368
369 public static InetSocketAddress getAddress(String address) {
370 return NetUtils.createSocketAddr(address, DEFAULT_PORT);
371 }
372
373 /**
374 * Set the configuration property for the service rpc address
375 * to address
376 */
377 public static void setServiceAddress(Configuration conf,
378 String address) {
379 LOG.info("Setting ADDRESS " + address);
380 conf.set(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, address);
381 }
382
383 /**
384 * Fetches the address for services to use when connecting to namenode
385 * based on the value of fallback returns null if the special
386 * address is not specified or returns the default namenode address
387 * to be used by both clients and services.
388 * Services here are datanodes, backup node, any non client connection
389 */
390 public static InetSocketAddress getServiceAddress(Configuration conf,
391 boolean fallback) {
392 String addr = conf.get(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY);
393 if (addr == null || addr.isEmpty()) {
394 return fallback ? getAddress(conf) : null;
395 }
396 return getAddress(addr);
397 }
398
399 public static InetSocketAddress getAddress(Configuration conf) {
400 URI filesystemURI = FileSystem.getDefaultUri(conf);
401 return getAddress(filesystemURI);
402 }
403
404
405 /**
406 * TODO:FEDERATION
407 * @param filesystemURI
408 * @return address of file system
409 */
410 public static InetSocketAddress getAddress(URI filesystemURI) {
411 String authority = filesystemURI.getAuthority();
412 if (authority == null) {
413 throw new IllegalArgumentException(String.format(
414 "Invalid URI for NameNode address (check %s): %s has no authority.",
415 FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString()));
416 }
417 if (!HdfsConstants.HDFS_URI_SCHEME.equalsIgnoreCase(
418 filesystemURI.getScheme())) {
419 throw new IllegalArgumentException(String.format(
420 "Invalid URI for NameNode address (check %s): %s is not of scheme '%s'.",
421 FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString(),
422 HdfsConstants.HDFS_URI_SCHEME));
423 }
424 return getAddress(authority);
425 }
426
427 public static URI getUri(InetSocketAddress namenode) {
428 int port = namenode.getPort();
429 String portString = port == DEFAULT_PORT ? "" : (":"+port);
430 return URI.create(HdfsConstants.HDFS_URI_SCHEME + "://"
431 + namenode.getHostName()+portString);
432 }
433
434 //
435 // Common NameNode methods implementation for the active name-node role.
436 //
437 public NamenodeRole getRole() {
438 return role;
439 }
440
441 boolean isRole(NamenodeRole that) {
442 return role.equals(that);
443 }
444
445 /**
446 * Given a configuration get the address of the service rpc server
447 * If the service rpc is not configured returns null
448 */
449 protected InetSocketAddress getServiceRpcServerAddress(Configuration conf) {
450 return NameNode.getServiceAddress(conf, false);
451 }
452
453 protected InetSocketAddress getRpcServerAddress(Configuration conf) {
454 return getAddress(conf);
455 }
456
457 /** Given a configuration get the bind host of the service rpc server
458 * If the bind host is not configured returns null.
459 */
460 protected String getServiceRpcServerBindHost(Configuration conf) {
461 String addr = conf.getTrimmed(DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY);
462 if (addr == null || addr.isEmpty()) {
463 return null;
464 }
465 return addr;
466 }
467
468 /** Given a configuration get the bind host of the client rpc server
469 * If the bind host is not configured returns null.
470 */
471 protected String getRpcServerBindHost(Configuration conf) {
472 String addr = conf.getTrimmed(DFS_NAMENODE_RPC_BIND_HOST_KEY);
473 if (addr == null || addr.isEmpty()) {
474 return null;
475 }
476 return addr;
477 }
478
479 /**
480 * Modifies the configuration passed to contain the service rpc address setting
481 */
482 protected void setRpcServiceServerAddress(Configuration conf,
483 InetSocketAddress serviceRPCAddress) {
484 setServiceAddress(conf, NetUtils.getHostPortString(serviceRPCAddress));
485 }
486
487 protected void setRpcServerAddress(Configuration conf,
488 InetSocketAddress rpcAddress) {
489 FileSystem.setDefaultUri(conf, getUri(rpcAddress));
490 }
491
492 protected InetSocketAddress getHttpServerAddress(Configuration conf) {
493 return getHttpAddress(conf);
494 }
495
496 /** @return the NameNode HTTP address. */
497 public static InetSocketAddress getHttpAddress(Configuration conf) {
498 return NetUtils.createSocketAddr(
499 conf.get(DFS_NAMENODE_HTTP_ADDRESS_KEY, DFS_NAMENODE_HTTP_ADDRESS_DEFAULT));
500 }
501
502 protected void loadNamesystem(Configuration conf) throws IOException {
503 this.namesystem = FSNamesystem.loadFromDisk(conf);
504 }
505
506 NamenodeRegistration getRegistration() {
507 return nodeRegistration;
508 }
509
510 NamenodeRegistration setRegistration() {
511 nodeRegistration = new NamenodeRegistration(
512 NetUtils.getHostPortString(rpcServer.getRpcAddress()),
513 NetUtils.getHostPortString(getHttpAddress()),
514 getFSImage().getStorage(), getRole());
515 return nodeRegistration;
516 }
517
518 /* optimize ugi lookup for RPC operations to avoid a trip through
519 * UGI.getCurrentUser which is synch'ed
520 */
521 public static UserGroupInformation getRemoteUser() throws IOException {
522 UserGroupInformation ugi = Server.getRemoteUser();
523 return (ugi != null) ? ugi : UserGroupInformation.getCurrentUser();
524 }
525
526
527 /**
528 * Login as the configured user for the NameNode.
529 */
530 void loginAsNameNodeUser(Configuration conf) throws IOException {
531 InetSocketAddress socAddr = getRpcServerAddress(conf);
532 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
533 DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName());
534 }
535
536 /**
537 * Initialize name-node.
538 *
539 * @param conf the configuration
540 */
541 protected void initialize(Configuration conf) throws IOException {
542 if (conf.get(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS) == null) {
543 String intervals = conf.get(DFS_METRICS_PERCENTILES_INTERVALS_KEY);
544 if (intervals != null) {
545 conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS,
546 intervals);
547 }
548 }
549
550 UserGroupInformation.setConfiguration(conf);
551 loginAsNameNodeUser(conf);
552
553 NameNode.initMetrics(conf, this.getRole());
554 StartupProgressMetrics.register(startupProgress);
555
556 if (NamenodeRole.NAMENODE == role) {
557 startHttpServer(conf);
558 }
559 loadNamesystem(conf);
560
561 rpcServer = createRpcServer(conf);
562 if (clientNamenodeAddress == null) {
563 // This is expected for MiniDFSCluster. Set it now using
564 // the RPC server's bind address.
565 clientNamenodeAddress =
566 NetUtils.getHostPortString(rpcServer.getRpcAddress());
567 LOG.info("Clients are to use " + clientNamenodeAddress + " to access"
568 + " this namenode/service.");
569 }
570 if (NamenodeRole.NAMENODE == role) {
571 httpServer.setNameNodeAddress(getNameNodeAddress());
572 httpServer.setFSImage(getFSImage());
573 }
574
575 pauseMonitor = new JvmPauseMonitor(conf);
576 pauseMonitor.start();
577
578 startCommonServices(conf);
579 }
580
581 /**
582 * Create the RPC server implementation. Used as an extension point for the
583 * BackupNode.
584 */
585 protected NameNodeRpcServer createRpcServer(Configuration conf)
586 throws IOException {
587 return new NameNodeRpcServer(conf, this);
588 }
589
590 /** Start the services common to active and standby states */
591 private void startCommonServices(Configuration conf) throws IOException {
592 namesystem.startCommonServices(conf, haContext);
593 registerNNSMXBean();
594 if (NamenodeRole.NAMENODE != role) {
595 startHttpServer(conf);
596 httpServer.setNameNodeAddress(getNameNodeAddress());
597 httpServer.setFSImage(getFSImage());
598 }
599 rpcServer.start();
600 plugins = conf.getInstances(DFS_NAMENODE_PLUGINS_KEY,
601 ServicePlugin.class);
602 for (ServicePlugin p: plugins) {
603 try {
604 p.start(this);
605 } catch (Throwable t) {
606 LOG.warn("ServicePlugin " + p + " could not be started", t);
607 }
608 }
609 LOG.info(getRole() + " RPC up at: " + rpcServer.getRpcAddress());
610 if (rpcServer.getServiceRpcAddress() != null) {
611 LOG.info(getRole() + " service RPC up at: "
612 + rpcServer.getServiceRpcAddress());
613 }
614 }
615
616 private void stopCommonServices() {
617 if(rpcServer != null) rpcServer.stop();
618 if(namesystem != null) namesystem.close();
619 if (pauseMonitor != null) pauseMonitor.stop();
620 if (plugins != null) {
621 for (ServicePlugin p : plugins) {
622 try {
623 p.stop();
624 } catch (Throwable t) {
625 LOG.warn("ServicePlugin " + p + " could not be stopped", t);
626 }
627 }
628 }
629 stopHttpServer();
630 }
631
632 private void startTrashEmptier(final Configuration conf) throws IOException {
633 long trashInterval =
634 conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT);
635 if (trashInterval == 0) {
636 return;
637 } else if (trashInterval < 0) {
638 throw new IOException("Cannot start trash emptier with negative interval."
639 + " Set " + FS_TRASH_INTERVAL_KEY + " to a positive value.");
640 }
641
642 // This may be called from the transitionToActive code path, in which
643 // case the current user is the administrator, not the NN. The trash
644 // emptier needs to run as the NN. See HDFS-3972.
645 FileSystem fs = SecurityUtil.doAsLoginUser(
646 new PrivilegedExceptionAction<FileSystem>() {
647 @Override
648 public FileSystem run() throws IOException {
649 return FileSystem.get(conf);
650 }
651 });
652 this.emptier = new Thread(new Trash(fs, conf).getEmptier(), "Trash Emptier");
653 this.emptier.setDaemon(true);
654 this.emptier.start();
655 }
656
657 private void stopTrashEmptier() {
658 if (this.emptier != null) {
659 emptier.interrupt();
660 emptier = null;
661 }
662 }
663
664 private void startHttpServer(final Configuration conf) throws IOException {
665 httpServer = new NameNodeHttpServer(conf, this, getHttpServerAddress(conf));
666 httpServer.start();
667 httpServer.setStartupProgress(startupProgress);
668 }
669
670 private void stopHttpServer() {
671 try {
672 if (httpServer != null) httpServer.stop();
673 } catch (Exception e) {
674 LOG.error("Exception while stopping httpserver", e);
675 }
676 }
677
678 /**
679 * Start NameNode.
680 * <p>
681 * The name-node can be started with one of the following startup options:
682 * <ul>
683 * <li>{@link StartupOption#REGULAR REGULAR} - normal name node startup</li>
684 * <li>{@link StartupOption#FORMAT FORMAT} - format name node</li>
685 * <li>{@link StartupOption#BACKUP BACKUP} - start backup node</li>
686 * <li>{@link StartupOption#CHECKPOINT CHECKPOINT} - start checkpoint node</li>
687 * <li>{@link StartupOption#UPGRADE UPGRADE} - start the cluster
688 * upgrade and create a snapshot of the current file system state</li>
689 * <li>{@link StartupOption#RECOVER RECOVERY} - recover name node
690 * metadata</li>
691 * <li>{@link StartupOption#ROLLBACK ROLLBACK} - roll the
692 * cluster back to the previous state</li>
693 * <li>{@link StartupOption#FINALIZE FINALIZE} - finalize
694 * previous upgrade</li>
695 * <li>{@link StartupOption#IMPORT IMPORT} - import checkpoint</li>
696 * </ul>
697 * The option is passed via configuration field:
698 * <tt>dfs.namenode.startup</tt>
699 *
700 * The conf will be modified to reflect the actual ports on which
701 * the NameNode is up and running if the user passes the port as
702 * <code>zero</code> in the conf.
703 *
704 * @param conf confirguration
705 * @throws IOException
706 */
707 public NameNode(Configuration conf) throws IOException {
708 this(conf, NamenodeRole.NAMENODE);
709 }
710
711 protected NameNode(Configuration conf, NamenodeRole role)
712 throws IOException {
713 this.conf = conf;
714 this.role = role;
715 setClientNamenodeAddress(conf);
716 String nsId = getNameServiceId(conf);
717 String namenodeId = HAUtil.getNameNodeId(conf, nsId);
718 this.haEnabled = HAUtil.isHAEnabled(conf, nsId);
719 state = createHAState(getStartupOption(conf));
720 this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf);
721 this.haContext = createHAContext();
722 try {
723 initializeGenericKeys(conf, nsId, namenodeId);
724 initialize(conf);
725 try {
726 haContext.writeLock();
727 state.prepareToEnterState(haContext);
728 state.enterState(haContext);
729 } finally {
730 haContext.writeUnlock();
731 }
732 } catch (IOException e) {
733 this.stop();
734 throw e;
735 } catch (HadoopIllegalArgumentException e) {
736 this.stop();
737 throw e;
738 }
739 }
740
741 protected HAState createHAState(StartupOption startOpt) {
742 if (!haEnabled || startOpt == StartupOption.UPGRADE) {
743 return ACTIVE_STATE;
744 } else {
745 return STANDBY_STATE;
746 }
747 }
748
749 protected HAContext createHAContext() {
750 return new NameNodeHAContext();
751 }
752
753 /**
754 * Wait for service to finish.
755 * (Normally, it runs forever.)
756 */
757 public void join() {
758 try {
759 rpcServer.join();
760 } catch (InterruptedException ie) {
761 LOG.info("Caught interrupted exception ", ie);
762 }
763 }
764
765 /**
766 * Stop all NameNode threads and wait for all to finish.
767 */
768 public void stop() {
769 synchronized(this) {
770 if (stopRequested)
771 return;
772 stopRequested = true;
773 }
774 try {
775 if (state != null) {
776 state.exitState(haContext);
777 }
778 } catch (ServiceFailedException e) {
779 LOG.warn("Encountered exception while exiting state ", e);
780 } finally {
781 stopCommonServices();
782 if (metrics != null) {
783 metrics.shutdown();
784 }
785 if (namesystem != null) {
786 namesystem.shutdown();
787 }
788 if (nameNodeStatusBeanName != null) {
789 MBeans.unregister(nameNodeStatusBeanName);
790 nameNodeStatusBeanName = null;
791 }
792 }
793 }
794
795 synchronized boolean isStopRequested() {
796 return stopRequested;
797 }
798
799 /**
800 * Is the cluster currently in safe mode?
801 */
802 public boolean isInSafeMode() {
803 return namesystem.isInSafeMode();
804 }
805
806 /** get FSImage */
807 @VisibleForTesting
808 public FSImage getFSImage() {
809 return namesystem.dir.fsImage;
810 }
811
812 /**
813 * @return NameNode RPC address
814 */
815 public InetSocketAddress getNameNodeAddress() {
816 return rpcServer.getRpcAddress();
817 }
818
819 /**
820 * @return NameNode RPC address in "host:port" string form
821 */
822 public String getNameNodeAddressHostPortString() {
823 return NetUtils.getHostPortString(rpcServer.getRpcAddress());
824 }
825
826 /**
827 * @return NameNode service RPC address if configured, the
828 * NameNode RPC address otherwise
829 */
830 public InetSocketAddress getServiceRpcAddress() {
831 final InetSocketAddress serviceAddr = rpcServer.getServiceRpcAddress();
832 return serviceAddr == null ? rpcServer.getRpcAddress() : serviceAddr;
833 }
834
835 /**
836 * @return NameNode HTTP address, used by the Web UI, image transfer,
837 * and HTTP-based file system clients like Hftp and WebHDFS
838 */
839 public InetSocketAddress getHttpAddress() {
840 return httpServer.getHttpAddress();
841 }
842
843 /**
844 * @return NameNode HTTPS address, used by the Web UI, image transfer,
845 * and HTTP-based file system clients like Hftp and WebHDFS
846 */
847 public InetSocketAddress getHttpsAddress() {
848 return httpServer.getHttpsAddress();
849 }
850
851 /**
852 * Verify that configured directories exist, then
853 * Interactively confirm that formatting is desired
854 * for each existing directory and format them.
855 *
856 * @param conf
857 * @param force
858 * @return true if formatting was aborted, false otherwise
859 * @throws IOException
860 */
861 private static boolean format(Configuration conf, boolean force,
862 boolean isInteractive) throws IOException {
863 String nsId = DFSUtil.getNamenodeNameServiceId(conf);
864 String namenodeId = HAUtil.getNameNodeId(conf, nsId);
865 initializeGenericKeys(conf, nsId, namenodeId);
866 checkAllowFormat(conf);
867
868 if (UserGroupInformation.isSecurityEnabled()) {
869 InetSocketAddress socAddr = getAddress(conf);
870 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
871 DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName());
872 }
873
874 Collection<URI> nameDirsToFormat = FSNamesystem.getNamespaceDirs(conf);
875 List<URI> sharedDirs = FSNamesystem.getSharedEditsDirs(conf);
876 List<URI> dirsToPrompt = new ArrayList<URI>();
877 dirsToPrompt.addAll(nameDirsToFormat);
878 dirsToPrompt.addAll(sharedDirs);
879 List<URI> editDirsToFormat =
880 FSNamesystem.getNamespaceEditsDirs(conf);
881
882 // if clusterID is not provided - see if you can find the current one
883 String clusterId = StartupOption.FORMAT.getClusterId();
884 if(clusterId == null || clusterId.equals("")) {
885 //Generate a new cluster id
886 clusterId = NNStorage.newClusterID();
887 }
888 System.out.println("Formatting using clusterid: " + clusterId);
889
890 FSImage fsImage = new FSImage(conf, nameDirsToFormat, editDirsToFormat);
891 try {
892 FSNamesystem fsn = new FSNamesystem(conf, fsImage);
893 fsImage.getEditLog().initJournalsForWrite();
894
895 if (!fsImage.confirmFormat(force, isInteractive)) {
896 return true; // aborted
897 }
898
899 fsImage.format(fsn, clusterId);
900 } catch (IOException ioe) {
901 LOG.warn("Encountered exception during format: ", ioe);
902 fsImage.close();
903 throw ioe;
904 }
905 return false;
906 }
907
908 public static void checkAllowFormat(Configuration conf) throws IOException {
909 if (!conf.getBoolean(DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY,
910 DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT)) {
911 throw new IOException("The option " + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY
912 + " is set to false for this filesystem, so it "
913 + "cannot be formatted. You will need to set "
914 + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY +" parameter "
915 + "to true in order to format this filesystem");
916 }
917 }
918
919 @VisibleForTesting
920 public static boolean initializeSharedEdits(Configuration conf) throws IOException {
921 return initializeSharedEdits(conf, true);
922 }
923
924 @VisibleForTesting
925 public static boolean initializeSharedEdits(Configuration conf,
926 boolean force) throws IOException {
927 return initializeSharedEdits(conf, force, false);
928 }
929
930 /**
931 * Clone the supplied configuration but remove the shared edits dirs.
932 *
933 * @param conf Supplies the original configuration.
934 * @return Cloned configuration without the shared edit dirs.
935 * @throws IOException on failure to generate the configuration.
936 */
937 private static Configuration getConfigurationWithoutSharedEdits(
938 Configuration conf)
939 throws IOException {
940 List<URI> editsDirs = FSNamesystem.getNamespaceEditsDirs(conf, false);
941 String editsDirsString = Joiner.on(",").join(editsDirs);
942
943 Configuration confWithoutShared = new Configuration(conf);
944 confWithoutShared.unset(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY);
945 confWithoutShared.setStrings(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY,
946 editsDirsString);
947 return confWithoutShared;
948 }
949
950 /**
951 * Format a new shared edits dir and copy in enough edit log segments so that
952 * the standby NN can start up.
953 *
954 * @param conf configuration
955 * @param force format regardless of whether or not the shared edits dir exists
956 * @param interactive prompt the user when a dir exists
957 * @return true if the command aborts, false otherwise
958 */
959 private static boolean initializeSharedEdits(Configuration conf,
960 boolean force, boolean interactive) throws IOException {
961 String nsId = DFSUtil.getNamenodeNameServiceId(conf);
962 String namenodeId = HAUtil.getNameNodeId(conf, nsId);
963 initializeGenericKeys(conf, nsId, namenodeId);
964
965 if (conf.get(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY) == null) {
966 LOG.fatal("No shared edits directory configured for namespace " +
967 nsId + " namenode " + namenodeId);
968 return false;
969 }
970
971 if (UserGroupInformation.isSecurityEnabled()) {
972 InetSocketAddress socAddr = getAddress(conf);
973 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
974 DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName());
975 }
976
977 NNStorage existingStorage = null;
978 FSImage sharedEditsImage = null;
979 try {
980 FSNamesystem fsns =
981 FSNamesystem.loadFromDisk(getConfigurationWithoutSharedEdits(conf));
982
983 existingStorage = fsns.getFSImage().getStorage();
984 NamespaceInfo nsInfo = existingStorage.getNamespaceInfo();
985
986 List<URI> sharedEditsDirs = FSNamesystem.getSharedEditsDirs(conf);
987
988 sharedEditsImage = new FSImage(conf,
989 Lists.<URI>newArrayList(),
990 sharedEditsDirs);
991 sharedEditsImage.getEditLog().initJournalsForWrite();
992
993 if (!sharedEditsImage.confirmFormat(force, interactive)) {
994 return true; // abort
995 }
996
997 NNStorage newSharedStorage = sharedEditsImage.getStorage();
998 // Call Storage.format instead of FSImage.format here, since we don't
999 // actually want to save a checkpoint - just prime the dirs with
1000 // the existing namespace info
1001 newSharedStorage.format(nsInfo);
1002 sharedEditsImage.getEditLog().formatNonFileJournals(nsInfo);
1003
1004 // Need to make sure the edit log segments are in good shape to initialize
1005 // the shared edits dir.
1006 fsns.getFSImage().getEditLog().close();
1007 fsns.getFSImage().getEditLog().initJournalsForWrite();
1008 fsns.getFSImage().getEditLog().recoverUnclosedStreams();
1009
1010 copyEditLogSegmentsToSharedDir(fsns, sharedEditsDirs, newSharedStorage,
1011 conf);
1012 } catch (IOException ioe) {
1013 LOG.error("Could not initialize shared edits dir", ioe);
1014 return true; // aborted
1015 } finally {
1016 if (sharedEditsImage != null) {
1017 try {
1018 sharedEditsImage.close();
1019 } catch (IOException ioe) {
1020 LOG.warn("Could not close sharedEditsImage", ioe);
1021 }
1022 }
1023 // Have to unlock storage explicitly for the case when we're running in a
1024 // unit test, which runs in the same JVM as NNs.
1025 if (existingStorage != null) {
1026 try {
1027 existingStorage.unlockAll();
1028 } catch (IOException ioe) {
1029 LOG.warn("Could not unlock storage directories", ioe);
1030 return true; // aborted
1031 }
1032 }
1033 }
1034 return false; // did not abort
1035 }
1036
1037 private static void copyEditLogSegmentsToSharedDir(FSNamesystem fsns,
1038 Collection<URI> sharedEditsDirs, NNStorage newSharedStorage,
1039 Configuration conf) throws IOException {
1040 Preconditions.checkArgument(!sharedEditsDirs.isEmpty(),
1041 "No shared edits specified");
1042 // Copy edit log segments into the new shared edits dir.
1043 List<URI> sharedEditsUris = new ArrayList<URI>(sharedEditsDirs);
1044 FSEditLog newSharedEditLog = new FSEditLog(conf, newSharedStorage,
1045 sharedEditsUris);
1046 newSharedEditLog.initJournalsForWrite();
1047 newSharedEditLog.recoverUnclosedStreams();
1048
1049 FSEditLog sourceEditLog = fsns.getFSImage().editLog;
1050
1051 long fromTxId = fsns.getFSImage().getMostRecentCheckpointTxId();
1052
1053 Collection<EditLogInputStream> streams = null;
1054 try {
1055 streams = sourceEditLog.selectInputStreams(fromTxId + 1, 0);
1056
1057 // Set the nextTxid to the CheckpointTxId+1
1058 newSharedEditLog.setNextTxId(fromTxId + 1);
1059
1060 // Copy all edits after last CheckpointTxId to shared edits dir
1061 for (EditLogInputStream stream : streams) {
1062 LOG.debug("Beginning to copy stream " + stream + " to shared edits");
1063 FSEditLogOp op;
1064 boolean segmentOpen = false;
1065 while ((op = stream.readOp()) != null) {
1066 if (LOG.isTraceEnabled()) {
1067 LOG.trace("copying op: " + op);
1068 }
1069 if (!segmentOpen) {
1070 newSharedEditLog.startLogSegment(op.txid, false);
1071 segmentOpen = true;
1072 }
1073
1074 newSharedEditLog.logEdit(op);
1075
1076 if (op.opCode == FSEditLogOpCodes.OP_END_LOG_SEGMENT) {
1077 newSharedEditLog.logSync();
1078 newSharedEditLog.endCurrentLogSegment(false);
1079 LOG.debug("ending log segment because of END_LOG_SEGMENT op in "
1080 + stream);
1081 segmentOpen = false;
1082 }
1083 }
1084
1085 if (segmentOpen) {
1086 LOG.debug("ending log segment because of end of stream in " + stream);
1087 newSharedEditLog.logSync();
1088 newSharedEditLog.endCurrentLogSegment(false);
1089 segmentOpen = false;
1090 }
1091 }
1092 } finally {
1093 if (streams != null) {
1094 FSEditLog.closeAllStreams(streams);
1095 }
1096 }
1097 }
1098
1099 @VisibleForTesting
1100 public static boolean doRollback(Configuration conf,
1101 boolean isConfirmationNeeded) throws IOException {
1102 String nsId = DFSUtil.getNamenodeNameServiceId(conf);
1103 String namenodeId = HAUtil.getNameNodeId(conf, nsId);
1104 initializeGenericKeys(conf, nsId, namenodeId);
1105
1106 FSNamesystem nsys = new FSNamesystem(conf, new FSImage(conf));
1107 System.err.print(
1108 "\"rollBack\" will remove the current state of the file system,\n"
1109 + "returning you to the state prior to initiating your recent.\n"
1110 + "upgrade. This action is permanent and cannot be undone. If you\n"
1111 + "are performing a rollback in an HA environment, you should be\n"
1112 + "certain that no NameNode process is running on any host.");
1113 if (isConfirmationNeeded) {
1114 if (!confirmPrompt("Roll back file system state?")) {
1115 System.err.println("Rollback aborted.");
1116 return true;
1117 }
1118 }
1119 nsys.dir.fsImage.doRollback(nsys);
1120 return false;
1121 }
1122
1123 private static void printUsage(PrintStream out) {
1124 out.println(USAGE + "\n");
1125 }
1126
1127 @VisibleForTesting
1128 static StartupOption parseArguments(String args[]) {
1129 int argsLen = (args == null) ? 0 : args.length;
1130 StartupOption startOpt = StartupOption.REGULAR;
1131 for(int i=0; i < argsLen; i++) {
1132 String cmd = args[i];
1133 if (StartupOption.FORMAT.getName().equalsIgnoreCase(cmd)) {
1134 startOpt = StartupOption.FORMAT;
1135 for (i = i + 1; i < argsLen; i++) {
1136 if (args[i].equalsIgnoreCase(StartupOption.CLUSTERID.getName())) {
1137 i++;
1138 if (i >= argsLen) {
1139 // if no cluster id specified, return null
1140 LOG.fatal("Must specify a valid cluster ID after the "
1141 + StartupOption.CLUSTERID.getName() + " flag");
1142 return null;
1143 }
1144 String clusterId = args[i];
1145 // Make sure an id is specified and not another flag
1146 if (clusterId.isEmpty() ||
1147 clusterId.equalsIgnoreCase(StartupOption.FORCE.getName()) ||
1148 clusterId.equalsIgnoreCase(
1149 StartupOption.NONINTERACTIVE.getName())) {
1150 LOG.fatal("Must specify a valid cluster ID after the "
1151 + StartupOption.CLUSTERID.getName() + " flag");
1152 return null;
1153 }
1154 startOpt.setClusterId(clusterId);
1155 }
1156
1157 if (args[i].equalsIgnoreCase(StartupOption.FORCE.getName())) {
1158 startOpt.setForceFormat(true);
1159 }
1160
1161 if (args[i].equalsIgnoreCase(StartupOption.NONINTERACTIVE.getName())) {
1162 startOpt.setInteractiveFormat(false);
1163 }
1164 }
1165 } else if (StartupOption.GENCLUSTERID.getName().equalsIgnoreCase(cmd)) {
1166 startOpt = StartupOption.GENCLUSTERID;
1167 } else if (StartupOption.REGULAR.getName().equalsIgnoreCase(cmd)) {
1168 startOpt = StartupOption.REGULAR;
1169 } else if (StartupOption.BACKUP.getName().equalsIgnoreCase(cmd)) {
1170 startOpt = StartupOption.BACKUP;
1171 } else if (StartupOption.CHECKPOINT.getName().equalsIgnoreCase(cmd)) {
1172 startOpt = StartupOption.CHECKPOINT;
1173 } else if (StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd)) {
1174 startOpt = StartupOption.UPGRADE;
1175 /* Can be followed by CLUSTERID with a required parameter or
1176 * RENAMERESERVED with an optional parameter
1177 */
1178 while (i + 1 < argsLen) {
1179 String flag = args[i + 1];
1180 if (flag.equalsIgnoreCase(StartupOption.CLUSTERID.getName())) {
1181 if (i + 2 < argsLen) {
1182 i += 2;
1183 startOpt.setClusterId(args[i]);
1184 } else {
1185 LOG.fatal("Must specify a valid cluster ID after the "
1186 + StartupOption.CLUSTERID.getName() + " flag");
1187 return null;
1188 }
1189 } else if (flag.equalsIgnoreCase(StartupOption.RENAMERESERVED
1190 .getName())) {
1191 if (i + 2 < argsLen) {
1192 FSImageFormat.setRenameReservedPairs(args[i + 2]);
1193 i += 2;
1194 } else {
1195 FSImageFormat.useDefaultRenameReservedPairs();
1196 i += 1;
1197 }
1198 } else {
1199 LOG.fatal("Unknown upgrade flag " + flag);
1200 return null;
1201 }
1202 }
1203 } else if (StartupOption.ROLLINGUPGRADE.getName().equalsIgnoreCase(cmd)) {
1204 startOpt = StartupOption.ROLLINGUPGRADE;
1205 ++i;
1206 startOpt.setRollingUpgradeStartupOption(args[i]);
1207 } else if (StartupOption.ROLLBACK.getName().equalsIgnoreCase(cmd)) {
1208 startOpt = StartupOption.ROLLBACK;
1209 } else if (StartupOption.FINALIZE.getName().equalsIgnoreCase(cmd)) {
1210 startOpt = StartupOption.FINALIZE;
1211 } else if (StartupOption.IMPORT.getName().equalsIgnoreCase(cmd)) {
1212 startOpt = StartupOption.IMPORT;
1213 } else if (StartupOption.BOOTSTRAPSTANDBY.getName().equalsIgnoreCase(cmd)) {
1214 startOpt = StartupOption.BOOTSTRAPSTANDBY;
1215 return startOpt;
1216 } else if (StartupOption.INITIALIZESHAREDEDITS.getName().equalsIgnoreCase(cmd)) {
1217 startOpt = StartupOption.INITIALIZESHAREDEDITS;
1218 for (i = i + 1 ; i < argsLen; i++) {
1219 if (StartupOption.NONINTERACTIVE.getName().equals(args[i])) {
1220 startOpt.setInteractiveFormat(false);
1221 } else if (StartupOption.FORCE.getName().equals(args[i])) {
1222 startOpt.setForceFormat(true);
1223 } else {
1224 LOG.fatal("Invalid argument: " + args[i]);
1225 return null;
1226 }
1227 }
1228 return startOpt;
1229 } else if (StartupOption.RECOVER.getName().equalsIgnoreCase(cmd)) {
1230 if (startOpt != StartupOption.REGULAR) {
1231 throw new RuntimeException("Can't combine -recover with " +
1232 "other startup options.");
1233 }
1234 startOpt = StartupOption.RECOVER;
1235 while (++i < argsLen) {
1236 if (args[i].equalsIgnoreCase(
1237 StartupOption.FORCE.getName())) {
1238 startOpt.setForce(MetaRecoveryContext.FORCE_FIRST_CHOICE);
1239 } else {
1240 throw new RuntimeException("Error parsing recovery options: " +
1241 "can't understand option \"" + args[i] + "\"");
1242 }
1243 }
1244 } else {
1245 return null;
1246 }
1247 }
1248 return startOpt;
1249 }
1250
1251 private static void setStartupOption(Configuration conf, StartupOption opt) {
1252 conf.set(DFS_NAMENODE_STARTUP_KEY, opt.name());
1253 }
1254
1255 static StartupOption getStartupOption(Configuration conf) {
1256 return StartupOption.valueOf(conf.get(DFS_NAMENODE_STARTUP_KEY,
1257 StartupOption.REGULAR.toString()));
1258 }
1259
1260 private static void doRecovery(StartupOption startOpt, Configuration conf)
1261 throws IOException {
1262 String nsId = DFSUtil.getNamenodeNameServiceId(conf);
1263 String namenodeId = HAUtil.getNameNodeId(conf, nsId);
1264 initializeGenericKeys(conf, nsId, namenodeId);
1265 if (startOpt.getForce() < MetaRecoveryContext.FORCE_ALL) {
1266 if (!confirmPrompt("You have selected Metadata Recovery mode. " +
1267 "This mode is intended to recover lost metadata on a corrupt " +
1268 "filesystem. Metadata recovery mode often permanently deletes " +
1269 "data from your HDFS filesystem. Please back up your edit log " +
1270 "and fsimage before trying this!\n\n" +
1271 "Are you ready to proceed? (Y/N)\n")) {
1272 System.err.println("Recovery aborted at user request.\n");
1273 return;
1274 }
1275 }
1276 MetaRecoveryContext.LOG.info("starting recovery...");
1277 UserGroupInformation.setConfiguration(conf);
1278 NameNode.initMetrics(conf, startOpt.toNodeRole());
1279 FSNamesystem fsn = null;
1280 try {
1281 fsn = FSNamesystem.loadFromDisk(conf);
1282 fsn.getFSImage().saveNamespace(fsn);
1283 MetaRecoveryContext.LOG.info("RECOVERY COMPLETE");
1284 } catch (IOException e) {
1285 MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e);
1286 throw e;
1287 } catch (RuntimeException e) {
1288 MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e);
1289 throw e;
1290 } finally {
1291 if (fsn != null)
1292 fsn.close();
1293 }
1294 }
1295
1296 public static NameNode createNameNode(String argv[], Configuration conf)
1297 throws IOException {
1298 LOG.info("createNameNode " + Arrays.asList(argv));
1299 if (conf == null)
1300 conf = new HdfsConfiguration();
1301 StartupOption startOpt = parseArguments(argv);
1302 if (startOpt == null) {
1303 printUsage(System.err);
1304 return null;
1305 }
1306 setStartupOption(conf, startOpt);
1307
1308 switch (startOpt) {
1309 case FORMAT: {
1310 boolean aborted = format(conf, startOpt.getForceFormat(),
1311 startOpt.getInteractiveFormat());
1312 terminate(aborted ? 1 : 0);
1313 return null; // avoid javac warning
1314 }
1315 case GENCLUSTERID: {
1316 System.err.println("Generating new cluster id:");
1317 System.out.println(NNStorage.newClusterID());
1318 terminate(0);
1319 return null;
1320 }
1321 case FINALIZE: {
1322 System.err.println("Use of the argument '" + StartupOption.FINALIZE +
1323 "' is no longer supported. To finalize an upgrade, start the NN " +
1324 " and then run `hdfs dfsadmin -finalizeUpgrade'");
1325 terminate(1);
1326 return null; // avoid javac warning
1327 }
1328 case ROLLBACK: {
1329 boolean aborted = doRollback(conf, true);
1330 terminate(aborted ? 1 : 0);
1331 return null; // avoid warning
1332 }
1333 case BOOTSTRAPSTANDBY: {
1334 String toolArgs[] = Arrays.copyOfRange(argv, 1, argv.length);
1335 int rc = BootstrapStandby.run(toolArgs, conf);
1336 terminate(rc);
1337 return null; // avoid warning
1338 }
1339 case INITIALIZESHAREDEDITS: {
1340 boolean aborted = initializeSharedEdits(conf,
1341 startOpt.getForceFormat(),
1342 startOpt.getInteractiveFormat());
1343 terminate(aborted ? 1 : 0);
1344 return null; // avoid warning
1345 }
1346 case BACKUP:
1347 case CHECKPOINT: {
1348 NamenodeRole role = startOpt.toNodeRole();
1349 DefaultMetricsSystem.initialize(role.toString().replace(" ", ""));
1350 return new BackupNode(conf, role);
1351 }
1352 case RECOVER: {
1353 NameNode.doRecovery(startOpt, conf);
1354 return null;
1355 }
1356 default: {
1357 DefaultMetricsSystem.initialize("NameNode");
1358 return new NameNode(conf);
1359 }
1360 }
1361 }
1362
1363 /**
1364 * In federation configuration is set for a set of
1365 * namenode and secondary namenode/backup/checkpointer, which are
1366 * grouped under a logical nameservice ID. The configuration keys specific
1367 * to them have suffix set to configured nameserviceId.
1368 *
1369 * This method copies the value from specific key of format key.nameserviceId
1370 * to key, to set up the generic configuration. Once this is done, only
1371 * generic version of the configuration is read in rest of the code, for
1372 * backward compatibility and simpler code changes.
1373 *
1374 * @param conf
1375 * Configuration object to lookup specific key and to set the value
1376 * to the key passed. Note the conf object is modified
1377 * @param nameserviceId name service Id (to distinguish federated NNs)
1378 * @param namenodeId the namenode ID (to distinguish HA NNs)
1379 * @see DFSUtil#setGenericConf(Configuration, String, String, String...)
1380 */
1381 public static void initializeGenericKeys(Configuration conf,
1382 String nameserviceId, String namenodeId) {
1383 if ((nameserviceId != null && !nameserviceId.isEmpty()) ||
1384 (namenodeId != null && !namenodeId.isEmpty())) {
1385 if (nameserviceId != null) {
1386 conf.set(DFS_NAMESERVICE_ID, nameserviceId);
1387 }
1388 if (namenodeId != null) {
1389 conf.set(DFS_HA_NAMENODE_ID_KEY, namenodeId);
1390 }
1391
1392 DFSUtil.setGenericConf(conf, nameserviceId, namenodeId,
1393 NAMENODE_SPECIFIC_KEYS);
1394 DFSUtil.setGenericConf(conf, nameserviceId, null,
1395 NAMESERVICE_SPECIFIC_KEYS);
1396 }
1397
1398 // If the RPC address is set use it to (re-)configure the default FS
1399 if (conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY) != null) {
1400 URI defaultUri = URI.create(HdfsConstants.HDFS_URI_SCHEME + "://"
1401 + conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY));
1402 conf.set(FS_DEFAULT_NAME_KEY, defaultUri.toString());
1403 LOG.debug("Setting " + FS_DEFAULT_NAME_KEY + " to " + defaultUri.toString());
1404 }
1405 }
1406
1407 /**
1408 * Get the name service Id for the node
1409 * @return name service Id or null if federation is not configured
1410 */
1411 protected String getNameServiceId(Configuration conf) {
1412 return DFSUtil.getNamenodeNameServiceId(conf);
1413 }
1414
1415 /**
1416 */
1417 public static void main(String argv[]) throws Exception {
1418 if (DFSUtil.parseHelpArgument(argv, NameNode.USAGE, System.out, true)) {
1419 System.exit(0);
1420 }
1421
1422 try {
1423 StringUtils.startupShutdownMessage(NameNode.class, argv, LOG);
1424 NameNode namenode = createNameNode(argv, null);
1425 if (namenode != null) {
1426 namenode.join();
1427 }
1428 } catch (Throwable e) {
1429 LOG.fatal("Exception in namenode join", e);
1430 terminate(1, e);
1431 }
1432 }
1433
1434 synchronized void monitorHealth()
1435 throws HealthCheckFailedException, AccessControlException {
1436 namesystem.checkSuperuserPrivilege();
1437 if (!haEnabled) {
1438 return; // no-op, if HA is not enabled
1439 }
1440 getNamesystem().checkAvailableResources();
1441 if (!getNamesystem().nameNodeHasResourcesAvailable()) {
1442 throw new HealthCheckFailedException(
1443 "The NameNode has no resources available");
1444 }
1445 }
1446
1447 synchronized void transitionToActive()
1448 throws ServiceFailedException, AccessControlException {
1449 namesystem.checkSuperuserPrivilege();
1450 if (!haEnabled) {
1451 throw new ServiceFailedException("HA for namenode is not enabled");
1452 }
1453 state.setState(haContext, ACTIVE_STATE);
1454 }
1455
1456 synchronized void transitionToStandby()
1457 throws ServiceFailedException, AccessControlException {
1458 namesystem.checkSuperuserPrivilege();
1459 if (!haEnabled) {
1460 throw new ServiceFailedException("HA for namenode is not enabled");
1461 }
1462 state.setState(haContext, STANDBY_STATE);
1463 }
1464
1465 synchronized HAServiceStatus getServiceStatus()
1466 throws ServiceFailedException, AccessControlException {
1467 namesystem.checkSuperuserPrivilege();
1468 if (!haEnabled) {
1469 throw new ServiceFailedException("HA for namenode is not enabled");
1470 }
1471 if (state == null) {
1472 return new HAServiceStatus(HAServiceState.INITIALIZING);
1473 }
1474 HAServiceState retState = state.getServiceState();
1475 HAServiceStatus ret = new HAServiceStatus(retState);
1476 if (retState == HAServiceState.STANDBY) {
1477 String safemodeTip = namesystem.getSafeModeTip();
1478 if (!safemodeTip.isEmpty()) {
1479 ret.setNotReadyToBecomeActive(
1480 "The NameNode is in safemode. " +
1481 safemodeTip);
1482 } else {
1483 ret.setReadyToBecomeActive();
1484 }
1485 } else if (retState == HAServiceState.ACTIVE) {
1486 ret.setReadyToBecomeActive();
1487 } else {
1488 ret.setNotReadyToBecomeActive("State is " + state);
1489 }
1490 return ret;
1491 }
1492
1493 synchronized HAServiceState getServiceState() {
1494 if (state == null) {
1495 return HAServiceState.INITIALIZING;
1496 }
1497 return state.getServiceState();
1498 }
1499
1500 /**
1501 * Register NameNodeStatusMXBean
1502 */
1503 private void registerNNSMXBean() {
1504 nameNodeStatusBeanName = MBeans.register("NameNode", "NameNodeStatus", this);
1505 }
1506
1507 @Override // NameNodeStatusMXBean
1508 public String getNNRole() {
1509 String roleStr = "";
1510 NamenodeRole role = getRole();
1511 if (null != role) {
1512 roleStr = role.toString();
1513 }
1514 return roleStr;
1515 }
1516
1517 @Override // NameNodeStatusMXBean
1518 public String getState() {
1519 String servStateStr = "";
1520 HAServiceState servState = getServiceState();
1521 if (null != servState) {
1522 servStateStr = servState.toString();
1523 }
1524 return servStateStr;
1525 }
1526
1527 @Override // NameNodeStatusMXBean
1528 public String getHostAndPort() {
1529 return getNameNodeAddressHostPortString();
1530 }
1531
1532 @Override // NameNodeStatusMXBean
1533 public boolean isSecurityEnabled() {
1534 return UserGroupInformation.isSecurityEnabled();
1535 }
1536
1537 /**
1538 * Shutdown the NN immediately in an ungraceful way. Used when it would be
1539 * unsafe for the NN to continue operating, e.g. during a failed HA state
1540 * transition.
1541 *
1542 * @param t exception which warrants the shutdown. Printed to the NN log
1543 * before exit.
1544 * @throws ExitException thrown only for testing.
1545 */
1546 protected synchronized void doImmediateShutdown(Throwable t)
1547 throws ExitException {
1548 String message = "Error encountered requiring NN shutdown. " +
1549 "Shutting down immediately.";
1550 try {
1551 LOG.fatal(message, t);
1552 } catch (Throwable ignored) {
1553 // This is unlikely to happen, but there's nothing we can do if it does.
1554 }
1555 terminate(1, t);
1556 }
1557
1558 /**
1559 * Class used to expose {@link NameNode} as context to {@link HAState}
1560 */
1561 protected class NameNodeHAContext implements HAContext {
1562 @Override
1563 public void setState(HAState s) {
1564 state = s;
1565 }
1566
1567 @Override
1568 public HAState getState() {
1569 return state;
1570 }
1571
1572 @Override
1573 public void startActiveServices() throws IOException {
1574 try {
1575 namesystem.startActiveServices();
1576 startTrashEmptier(conf);
1577 } catch (Throwable t) {
1578 doImmediateShutdown(t);
1579 }
1580 }
1581
1582 @Override
1583 public void stopActiveServices() throws IOException {
1584 try {
1585 if (namesystem != null) {
1586 namesystem.stopActiveServices();
1587 }
1588 stopTrashEmptier();
1589 } catch (Throwable t) {
1590 doImmediateShutdown(t);
1591 }
1592 }
1593
1594 @Override
1595 public void startStandbyServices() throws IOException {
1596 try {
1597 namesystem.startStandbyServices(conf);
1598 } catch (Throwable t) {
1599 doImmediateShutdown(t);
1600 }
1601 }
1602
1603 @Override
1604 public void prepareToStopStandbyServices() throws ServiceFailedException {
1605 try {
1606 namesystem.prepareToStopStandbyServices();
1607 } catch (Throwable t) {
1608 doImmediateShutdown(t);
1609 }
1610 }
1611
1612 @Override
1613 public void stopStandbyServices() throws IOException {
1614 try {
1615 if (namesystem != null) {
1616 namesystem.stopStandbyServices();
1617 }
1618 } catch (Throwable t) {
1619 doImmediateShutdown(t);
1620 }
1621 }
1622
1623 @Override
1624 public void writeLock() {
1625 namesystem.writeLock();
1626 namesystem.lockRetryCache();
1627 }
1628
1629 @Override
1630 public void writeUnlock() {
1631 namesystem.unlockRetryCache();
1632 namesystem.writeUnlock();
1633 }
1634
1635 /** Check if an operation of given category is allowed */
1636 @Override
1637 public void checkOperation(final OperationCategory op)
1638 throws StandbyException {
1639 state.checkOperation(haContext, op);
1640 }
1641
1642 @Override
1643 public boolean allowStaleReads() {
1644 return allowStaleStandbyReads;
1645 }
1646
1647 }
1648
1649 public boolean isStandbyState() {
1650 return (state.equals(STANDBY_STATE));
1651 }
1652
1653 /**
1654 * Check that a request to change this node's HA state is valid.
1655 * In particular, verifies that, if auto failover is enabled, non-forced
1656 * requests from the HAAdmin CLI are rejected, and vice versa.
1657 *
1658 * @param req the request to check
1659 * @throws AccessControlException if the request is disallowed
1660 */
1661 void checkHaStateChange(StateChangeRequestInfo req)
1662 throws AccessControlException {
1663 boolean autoHaEnabled = conf.getBoolean(DFS_HA_AUTO_FAILOVER_ENABLED_KEY,
1664 DFS_HA_AUTO_FAILOVER_ENABLED_DEFAULT);
1665 switch (req.getSource()) {
1666 case REQUEST_BY_USER:
1667 if (autoHaEnabled) {
1668 throw new AccessControlException(
1669 "Manual HA control for this NameNode is disallowed, because " +
1670 "automatic HA is enabled.");
1671 }
1672 break;
1673 case REQUEST_BY_USER_FORCED:
1674 if (autoHaEnabled) {
1675 LOG.warn("Allowing manual HA control from " +
1676 Server.getRemoteAddress() +
1677 " even though automatic HA is enabled, because the user " +
1678 "specified the force flag");
1679 }
1680 break;
1681 case REQUEST_BY_ZKFC:
1682 if (!autoHaEnabled) {
1683 throw new AccessControlException(
1684 "Request from ZK failover controller at " +
1685 Server.getRemoteAddress() + " denied since automatic HA " +
1686 "is not enabled");
1687 }
1688 break;
1689 }
1690 }
1691 }