001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.hdfs.server.namenode;
019
020 import java.io.File;
021 import java.io.IOException;
022 import java.io.PrintStream;
023 import java.net.InetSocketAddress;
024 import java.net.URI;
025 import java.security.PrivilegedExceptionAction;
026 import java.util.ArrayList;
027 import java.util.Arrays;
028 import java.util.Collection;
029 import java.util.Iterator;
030 import java.util.List;
031 import org.apache.commons.logging.Log;
032 import org.apache.commons.logging.LogFactory;
033 import org.apache.hadoop.HadoopIllegalArgumentException;
034 import org.apache.hadoop.classification.InterfaceAudience;
035 import org.apache.hadoop.conf.Configuration;
036 import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
037 import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
038 import org.apache.hadoop.ha.HAServiceStatus;
039 import org.apache.hadoop.ha.HealthCheckFailedException;
040 import org.apache.hadoop.ha.ServiceFailedException;
041 import org.apache.hadoop.fs.FileSystem;
042 import org.apache.hadoop.fs.FileUtil;
043 import org.apache.hadoop.fs.Trash;
044 import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
045 import static org.apache.hadoop.util.ExitUtil.terminate;
046 import static org.apache.hadoop.util.ToolRunner.confirmPrompt;
047
048 import org.apache.hadoop.hdfs.DFSConfigKeys;
049 import org.apache.hadoop.hdfs.DFSUtil;
050 import org.apache.hadoop.hdfs.HAUtil;
051 import org.apache.hadoop.hdfs.HdfsConfiguration;
052 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
053 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
054 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
055 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
056 import org.apache.hadoop.hdfs.server.namenode.ha.ActiveState;
057 import org.apache.hadoop.hdfs.server.namenode.ha.BootstrapStandby;
058 import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
059 import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
060 import org.apache.hadoop.hdfs.server.namenode.ha.StandbyState;
061 import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
062 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
063 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgressMetrics;
064 import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
065 import org.apache.hadoop.hdfs.server.protocol.JournalProtocol;
066 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
067 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
068 import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
069 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
070 import org.apache.hadoop.ipc.Server;
071 import org.apache.hadoop.ipc.StandbyException;
072 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
073 import org.apache.hadoop.net.NetUtils;
074 import org.apache.hadoop.security.AccessControlException;
075 import org.apache.hadoop.security.RefreshUserMappingsProtocol;
076 import org.apache.hadoop.security.SecurityUtil;
077 import org.apache.hadoop.security.UserGroupInformation;
078 import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol;
079 import org.apache.hadoop.tools.GetUserMappingsProtocol;
080 import org.apache.hadoop.util.ExitUtil.ExitException;
081 import org.apache.hadoop.util.ServicePlugin;
082 import org.apache.hadoop.util.StringUtils;
083
084 import com.google.common.annotations.VisibleForTesting;
085 import com.google.common.base.Joiner;
086 import com.google.common.base.Preconditions;
087 import com.google.common.collect.Lists;
088
089 /**********************************************************
090 * NameNode serves as both directory namespace manager and
091 * "inode table" for the Hadoop DFS. There is a single NameNode
092 * running in any DFS deployment. (Well, except when there
093 * is a second backup/failover NameNode, or when using federated NameNodes.)
094 *
095 * The NameNode controls two critical tables:
096 * 1) filename->blocksequence (namespace)
097 * 2) block->machinelist ("inodes")
098 *
099 * The first table is stored on disk and is very precious.
100 * The second table is rebuilt every time the NameNode comes up.
101 *
102 * 'NameNode' refers to both this class as well as the 'NameNode server'.
103 * The 'FSNamesystem' class actually performs most of the filesystem
104 * management. The majority of the 'NameNode' class itself is concerned
105 * with exposing the IPC interface and the HTTP server to the outside world,
106 * plus some configuration management.
107 *
108 * NameNode implements the
109 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} interface, which
110 * allows clients to ask for DFS services.
111 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} is not designed for
112 * direct use by authors of DFS client code. End-users should instead use the
113 * {@link org.apache.hadoop.fs.FileSystem} class.
114 *
115 * NameNode also implements the
116 * {@link org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol} interface,
117 * used by DataNodes that actually store DFS data blocks. These
118 * methods are invoked repeatedly and automatically by all the
119 * DataNodes in a DFS deployment.
120 *
121 * NameNode also implements the
122 * {@link org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol} interface,
123 * used by secondary namenodes or rebalancing processes to get partial
124 * NameNode state, for example partial blocksMap etc.
125 **********************************************************/
126 @InterfaceAudience.Private
127 public class NameNode {
128 static{
129 HdfsConfiguration.init();
130 }
131
132 /**
133 * Categories of operations supported by the namenode.
134 */
135 public static enum OperationCategory {
136 /** Operations that are state agnostic */
137 UNCHECKED,
138 /** Read operation that does not change the namespace state */
139 READ,
140 /** Write operation that changes the namespace state */
141 WRITE,
142 /** Operations related to checkpointing */
143 CHECKPOINT,
144 /** Operations related to {@link JournalProtocol} */
145 JOURNAL
146 }
147
148 /**
149 * HDFS configuration can have three types of parameters:
150 * <ol>
151 * <li>Parameters that are common for all the name services in the cluster.</li>
152 * <li>Parameters that are specific to a name service. These keys are suffixed
153 * with nameserviceId in the configuration. For example,
154 * "dfs.namenode.rpc-address.nameservice1".</li>
155 * <li>Parameters that are specific to a single name node. These keys are suffixed
156 * with nameserviceId and namenodeId in the configuration. for example,
157 * "dfs.namenode.rpc-address.nameservice1.namenode1"</li>
158 * </ol>
159 *
160 * In the latter cases, operators may specify the configuration without
161 * any suffix, with a nameservice suffix, or with a nameservice and namenode
162 * suffix. The more specific suffix will take precedence.
163 *
164 * These keys are specific to a given namenode, and thus may be configured
165 * globally, for a nameservice, or for a specific namenode within a nameservice.
166 */
167 public static final String[] NAMENODE_SPECIFIC_KEYS = {
168 DFS_NAMENODE_RPC_ADDRESS_KEY,
169 DFS_NAMENODE_RPC_BIND_HOST_KEY,
170 DFS_NAMENODE_NAME_DIR_KEY,
171 DFS_NAMENODE_EDITS_DIR_KEY,
172 DFS_NAMENODE_SHARED_EDITS_DIR_KEY,
173 DFS_NAMENODE_CHECKPOINT_DIR_KEY,
174 DFS_NAMENODE_CHECKPOINT_EDITS_DIR_KEY,
175 DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
176 DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY,
177 DFS_NAMENODE_HTTP_ADDRESS_KEY,
178 DFS_NAMENODE_KEYTAB_FILE_KEY,
179 DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY,
180 DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY,
181 DFS_NAMENODE_BACKUP_ADDRESS_KEY,
182 DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY,
183 DFS_NAMENODE_BACKUP_SERVICE_RPC_ADDRESS_KEY,
184 DFS_NAMENODE_USER_NAME_KEY,
185 DFS_NAMENODE_INTERNAL_SPNEGO_USER_NAME_KEY,
186 DFS_HA_FENCE_METHODS_KEY,
187 DFS_HA_ZKFC_PORT_KEY,
188 DFS_HA_FENCE_METHODS_KEY
189 };
190
191 /**
192 * @see #NAMENODE_SPECIFIC_KEYS
193 * These keys are specific to a nameservice, but may not be overridden
194 * for a specific namenode.
195 */
196 public static final String[] NAMESERVICE_SPECIFIC_KEYS = {
197 DFS_HA_AUTO_FAILOVER_ENABLED_KEY
198 };
199
200 private static final String USAGE = "Usage: java NameNode ["
201 + StartupOption.BACKUP.getName() + "] | ["
202 + StartupOption.CHECKPOINT.getName() + "] | ["
203 + StartupOption.FORMAT.getName() + " ["
204 + StartupOption.CLUSTERID.getName() + " cid ] ["
205 + StartupOption.FORCE.getName() + "] ["
206 + StartupOption.NONINTERACTIVE.getName() + "] ] | ["
207 + StartupOption.UPGRADE.getName() + "] | ["
208 + StartupOption.ROLLBACK.getName() + "] | ["
209 + StartupOption.FINALIZE.getName() + "] | ["
210 + StartupOption.IMPORT.getName() + "] | ["
211 + StartupOption.INITIALIZESHAREDEDITS.getName() + "] | ["
212 + StartupOption.BOOTSTRAPSTANDBY.getName() + "] | ["
213 + StartupOption.RECOVER.getName() + " [ " + StartupOption.FORCE.getName()
214 + " ] ]";
215
216 public long getProtocolVersion(String protocol,
217 long clientVersion) throws IOException {
218 if (protocol.equals(ClientProtocol.class.getName())) {
219 return ClientProtocol.versionID;
220 } else if (protocol.equals(DatanodeProtocol.class.getName())){
221 return DatanodeProtocol.versionID;
222 } else if (protocol.equals(NamenodeProtocol.class.getName())){
223 return NamenodeProtocol.versionID;
224 } else if (protocol.equals(RefreshAuthorizationPolicyProtocol.class.getName())){
225 return RefreshAuthorizationPolicyProtocol.versionID;
226 } else if (protocol.equals(RefreshUserMappingsProtocol.class.getName())){
227 return RefreshUserMappingsProtocol.versionID;
228 } else if (protocol.equals(GetUserMappingsProtocol.class.getName())){
229 return GetUserMappingsProtocol.versionID;
230 } else {
231 throw new IOException("Unknown protocol to name node: " + protocol);
232 }
233 }
234
235 public static final int DEFAULT_PORT = 8020;
236 public static final Log LOG = LogFactory.getLog(NameNode.class.getName());
237 public static final Log stateChangeLog = LogFactory.getLog("org.apache.hadoop.hdfs.StateChange");
238 public static final Log blockStateChangeLog = LogFactory.getLog("BlockStateChange");
239 public static final HAState ACTIVE_STATE = new ActiveState();
240 public static final HAState STANDBY_STATE = new StandbyState();
241
242 protected FSNamesystem namesystem;
243 protected final Configuration conf;
244 protected NamenodeRole role;
245 private volatile HAState state;
246 private final boolean haEnabled;
247 private final HAContext haContext;
248 protected boolean allowStaleStandbyReads;
249
250
251 /** httpServer */
252 protected NameNodeHttpServer httpServer;
253 private Thread emptier;
254 /** only used for testing purposes */
255 protected boolean stopRequested = false;
256 /** Registration information of this name-node */
257 protected NamenodeRegistration nodeRegistration;
258 /** Activated plug-ins. */
259 private List<ServicePlugin> plugins;
260
261 private NameNodeRpcServer rpcServer;
262
263 /** Format a new filesystem. Destroys any filesystem that may already
264 * exist at this location. **/
265 public static void format(Configuration conf) throws IOException {
266 format(conf, true, true);
267 }
268
269 static NameNodeMetrics metrics;
270 private static final StartupProgress startupProgress = new StartupProgress();
271 static {
272 StartupProgressMetrics.register(startupProgress);
273 }
274
275 /** Return the {@link FSNamesystem} object.
276 * @return {@link FSNamesystem} object.
277 */
278 public FSNamesystem getNamesystem() {
279 return namesystem;
280 }
281
282 public NamenodeProtocols getRpcServer() {
283 return rpcServer;
284 }
285
286 static void initMetrics(Configuration conf, NamenodeRole role) {
287 metrics = NameNodeMetrics.create(conf, role);
288 }
289
290 public static NameNodeMetrics getNameNodeMetrics() {
291 return metrics;
292 }
293
294 /**
295 * Returns object used for reporting namenode startup progress.
296 *
297 * @return StartupProgress for reporting namenode startup progress
298 */
299 public static StartupProgress getStartupProgress() {
300 return startupProgress;
301 }
302
303 public static InetSocketAddress getAddress(String address) {
304 return NetUtils.createSocketAddr(address, DEFAULT_PORT);
305 }
306
307 /**
308 * Set the configuration property for the service rpc address
309 * to address
310 */
311 public static void setServiceAddress(Configuration conf,
312 String address) {
313 LOG.info("Setting ADDRESS " + address);
314 conf.set(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, address);
315 }
316
317 /**
318 * Fetches the address for services to use when connecting to namenode
319 * based on the value of fallback returns null if the special
320 * address is not specified or returns the default namenode address
321 * to be used by both clients and services.
322 * Services here are datanodes, backup node, any non client connection
323 */
324 public static InetSocketAddress getServiceAddress(Configuration conf,
325 boolean fallback) {
326 String addr = conf.get(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY);
327 if (addr == null || addr.isEmpty()) {
328 return fallback ? getAddress(conf) : null;
329 }
330 return getAddress(addr);
331 }
332
333 public static InetSocketAddress getAddress(Configuration conf) {
334 URI filesystemURI = FileSystem.getDefaultUri(conf);
335 return getAddress(filesystemURI);
336 }
337
338
339 /**
340 * TODO:FEDERATION
341 * @param filesystemURI
342 * @return address of file system
343 */
344 public static InetSocketAddress getAddress(URI filesystemURI) {
345 String authority = filesystemURI.getAuthority();
346 if (authority == null) {
347 throw new IllegalArgumentException(String.format(
348 "Invalid URI for NameNode address (check %s): %s has no authority.",
349 FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString()));
350 }
351 if (!HdfsConstants.HDFS_URI_SCHEME.equalsIgnoreCase(
352 filesystemURI.getScheme())) {
353 throw new IllegalArgumentException(String.format(
354 "Invalid URI for NameNode address (check %s): %s is not of scheme '%s'.",
355 FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString(),
356 HdfsConstants.HDFS_URI_SCHEME));
357 }
358 return getAddress(authority);
359 }
360
361 public static URI getUri(InetSocketAddress namenode) {
362 int port = namenode.getPort();
363 String portString = port == DEFAULT_PORT ? "" : (":"+port);
364 return URI.create(HdfsConstants.HDFS_URI_SCHEME + "://"
365 + namenode.getHostName()+portString);
366 }
367
368 //
369 // Common NameNode methods implementation for the active name-node role.
370 //
371 public NamenodeRole getRole() {
372 return role;
373 }
374
375 boolean isRole(NamenodeRole that) {
376 return role.equals(that);
377 }
378
379 /**
380 * Given a configuration get the address of the service rpc server
381 * If the service rpc is not configured returns null
382 */
383 protected InetSocketAddress getServiceRpcServerAddress(Configuration conf) {
384 return NameNode.getServiceAddress(conf, false);
385 }
386
387 protected InetSocketAddress getRpcServerAddress(Configuration conf) {
388 return getAddress(conf);
389 }
390
391 /** Given a configuration get the bind host of the service rpc server
392 * If the bind host is not configured returns null.
393 */
394 protected String getServiceRpcServerBindHost(Configuration conf) {
395 String addr = conf.getTrimmed(DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY);
396 if (addr == null || addr.isEmpty()) {
397 return null;
398 }
399 return addr;
400 }
401
402 /** Given a configuration get the bind host of the client rpc server
403 * If the bind host is not configured returns null.
404 */
405 protected String getRpcServerBindHost(Configuration conf) {
406 String addr = conf.getTrimmed(DFS_NAMENODE_RPC_BIND_HOST_KEY);
407 if (addr == null || addr.isEmpty()) {
408 return null;
409 }
410 return addr;
411 }
412
413 /**
414 * Modifies the configuration passed to contain the service rpc address setting
415 */
416 protected void setRpcServiceServerAddress(Configuration conf,
417 InetSocketAddress serviceRPCAddress) {
418 setServiceAddress(conf, NetUtils.getHostPortString(serviceRPCAddress));
419 }
420
421 protected void setRpcServerAddress(Configuration conf,
422 InetSocketAddress rpcAddress) {
423 FileSystem.setDefaultUri(conf, getUri(rpcAddress));
424 }
425
426 protected InetSocketAddress getHttpServerAddress(Configuration conf) {
427 return getHttpAddress(conf);
428 }
429
430 /** @return the NameNode HTTP address set in the conf. */
431 public static InetSocketAddress getHttpAddress(Configuration conf) {
432 return NetUtils.createSocketAddr(
433 conf.get(DFS_NAMENODE_HTTP_ADDRESS_KEY, DFS_NAMENODE_HTTP_ADDRESS_DEFAULT));
434 }
435
436 protected void setHttpServerAddress(Configuration conf) {
437 String hostPort = NetUtils.getHostPortString(getHttpAddress());
438 conf.set(DFS_NAMENODE_HTTP_ADDRESS_KEY, hostPort);
439 LOG.info("Web-server up at: " + hostPort);
440 }
441
442 protected void loadNamesystem(Configuration conf) throws IOException {
443 this.namesystem = FSNamesystem.loadFromDisk(conf);
444 }
445
446 NamenodeRegistration getRegistration() {
447 return nodeRegistration;
448 }
449
450 NamenodeRegistration setRegistration() {
451 nodeRegistration = new NamenodeRegistration(
452 NetUtils.getHostPortString(rpcServer.getRpcAddress()),
453 NetUtils.getHostPortString(getHttpAddress()),
454 getFSImage().getStorage(), getRole());
455 return nodeRegistration;
456 }
457
458 /* optimize ugi lookup for RPC operations to avoid a trip through
459 * UGI.getCurrentUser which is synch'ed
460 */
461 public static UserGroupInformation getRemoteUser() throws IOException {
462 UserGroupInformation ugi = Server.getRemoteUser();
463 return (ugi != null) ? ugi : UserGroupInformation.getCurrentUser();
464 }
465
466
467 /**
468 * Login as the configured user for the NameNode.
469 */
470 void loginAsNameNodeUser(Configuration conf) throws IOException {
471 InetSocketAddress socAddr = getRpcServerAddress(conf);
472 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
473 DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName());
474 }
475
476 /**
477 * Initialize name-node.
478 *
479 * @param conf the configuration
480 */
481 protected void initialize(Configuration conf) throws IOException {
482 UserGroupInformation.setConfiguration(conf);
483 loginAsNameNodeUser(conf);
484
485 NameNode.initMetrics(conf, this.getRole());
486
487 if (NamenodeRole.NAMENODE == role) {
488 startHttpServer(conf);
489 validateConfigurationSettingsOrAbort(conf);
490 }
491 loadNamesystem(conf);
492
493 rpcServer = createRpcServer(conf);
494 if (NamenodeRole.NAMENODE == role) {
495 httpServer.setNameNodeAddress(getNameNodeAddress());
496 httpServer.setFSImage(getFSImage());
497 } else {
498 validateConfigurationSettingsOrAbort(conf);
499 }
500
501 startCommonServices(conf);
502 }
503
504 /**
505 * Create the RPC server implementation. Used as an extension point for the
506 * BackupNode.
507 */
508 protected NameNodeRpcServer createRpcServer(Configuration conf)
509 throws IOException {
510 return new NameNodeRpcServer(conf, this);
511 }
512
513 /**
514 * Verifies that the final Configuration Settings look ok for the NameNode to
515 * properly start up
516 * Things to check for include:
517 * - HTTP Server Port does not equal the RPC Server Port
518 * @param conf
519 * @throws IOException
520 */
521 protected void validateConfigurationSettings(final Configuration conf)
522 throws IOException {
523 // check to make sure the web port and rpc port do not match
524 if(getHttpServerAddress(conf).getPort()
525 == getRpcServerAddress(conf).getPort()) {
526 String errMsg = "dfs.namenode.rpc-address " +
527 "("+ getRpcServerAddress(conf) + ") and " +
528 "dfs.namenode.http-address ("+ getHttpServerAddress(conf) + ") " +
529 "configuration keys are bound to the same port, unable to start " +
530 "NameNode. Port: " + getRpcServerAddress(conf).getPort();
531 throw new IOException(errMsg);
532 }
533 }
534
535 /**
536 * Validate NameNode configuration. Log a fatal error and abort if
537 * configuration is invalid.
538 *
539 * @param conf Configuration to validate
540 * @throws IOException thrown if conf is invalid
541 */
542 private void validateConfigurationSettingsOrAbort(Configuration conf)
543 throws IOException {
544 try {
545 validateConfigurationSettings(conf);
546 } catch (IOException e) {
547 LOG.fatal(e.toString());
548 throw e;
549 }
550 }
551
552 /** Start the services common to active and standby states */
553 private void startCommonServices(Configuration conf) throws IOException {
554 namesystem.startCommonServices(conf, haContext);
555 if (NamenodeRole.NAMENODE != role) {
556 startHttpServer(conf);
557 httpServer.setNameNodeAddress(getNameNodeAddress());
558 httpServer.setFSImage(getFSImage());
559 }
560 rpcServer.start();
561 plugins = conf.getInstances(DFS_NAMENODE_PLUGINS_KEY,
562 ServicePlugin.class);
563 for (ServicePlugin p: plugins) {
564 try {
565 p.start(this);
566 } catch (Throwable t) {
567 LOG.warn("ServicePlugin " + p + " could not be started", t);
568 }
569 }
570 LOG.info(getRole() + " RPC up at: " + rpcServer.getRpcAddress());
571 if (rpcServer.getServiceRpcAddress() != null) {
572 LOG.info(getRole() + " service RPC up at: "
573 + rpcServer.getServiceRpcAddress());
574 }
575 }
576
577 private void stopCommonServices() {
578 if(namesystem != null) namesystem.close();
579 if(rpcServer != null) rpcServer.stop();
580 if (plugins != null) {
581 for (ServicePlugin p : plugins) {
582 try {
583 p.stop();
584 } catch (Throwable t) {
585 LOG.warn("ServicePlugin " + p + " could not be stopped", t);
586 }
587 }
588 }
589 stopHttpServer();
590 }
591
592 private void startTrashEmptier(final Configuration conf) throws IOException {
593 long trashInterval =
594 conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT);
595 if (trashInterval == 0) {
596 return;
597 } else if (trashInterval < 0) {
598 throw new IOException("Cannot start trash emptier with negative interval."
599 + " Set " + FS_TRASH_INTERVAL_KEY + " to a positive value.");
600 }
601
602 // This may be called from the transitionToActive code path, in which
603 // case the current user is the administrator, not the NN. The trash
604 // emptier needs to run as the NN. See HDFS-3972.
605 FileSystem fs = SecurityUtil.doAsLoginUser(
606 new PrivilegedExceptionAction<FileSystem>() {
607 @Override
608 public FileSystem run() throws IOException {
609 return FileSystem.get(conf);
610 }
611 });
612 this.emptier = new Thread(new Trash(fs, conf).getEmptier(), "Trash Emptier");
613 this.emptier.setDaemon(true);
614 this.emptier.start();
615 }
616
617 private void stopTrashEmptier() {
618 if (this.emptier != null) {
619 emptier.interrupt();
620 emptier = null;
621 }
622 }
623
624 private void startHttpServer(final Configuration conf) throws IOException {
625 httpServer = new NameNodeHttpServer(conf, this, getHttpServerAddress(conf));
626 httpServer.start();
627 httpServer.setStartupProgress(startupProgress);
628 setHttpServerAddress(conf);
629 }
630
631 private void stopHttpServer() {
632 try {
633 if (httpServer != null) httpServer.stop();
634 } catch (Exception e) {
635 LOG.error("Exception while stopping httpserver", e);
636 }
637 }
638
639 /**
640 * Start NameNode.
641 * <p>
642 * The name-node can be started with one of the following startup options:
643 * <ul>
644 * <li>{@link StartupOption#REGULAR REGULAR} - normal name node startup</li>
645 * <li>{@link StartupOption#FORMAT FORMAT} - format name node</li>
646 * <li>{@link StartupOption#BACKUP BACKUP} - start backup node</li>
647 * <li>{@link StartupOption#CHECKPOINT CHECKPOINT} - start checkpoint node</li>
648 * <li>{@link StartupOption#UPGRADE UPGRADE} - start the cluster
649 * upgrade and create a snapshot of the current file system state</li>
650 * <li>{@link StartupOption#RECOVERY RECOVERY} - recover name node
651 * metadata</li>
652 * <li>{@link StartupOption#ROLLBACK ROLLBACK} - roll the
653 * cluster back to the previous state</li>
654 * <li>{@link StartupOption#FINALIZE FINALIZE} - finalize
655 * previous upgrade</li>
656 * <li>{@link StartupOption#IMPORT IMPORT} - import checkpoint</li>
657 * </ul>
658 * The option is passed via configuration field:
659 * <tt>dfs.namenode.startup</tt>
660 *
661 * The conf will be modified to reflect the actual ports on which
662 * the NameNode is up and running if the user passes the port as
663 * <code>zero</code> in the conf.
664 *
665 * @param conf confirguration
666 * @throws IOException
667 */
668 public NameNode(Configuration conf) throws IOException {
669 this(conf, NamenodeRole.NAMENODE);
670 }
671
672 protected NameNode(Configuration conf, NamenodeRole role)
673 throws IOException {
674 this.conf = conf;
675 this.role = role;
676 String nsId = getNameServiceId(conf);
677 String namenodeId = HAUtil.getNameNodeId(conf, nsId);
678 this.haEnabled = HAUtil.isHAEnabled(conf, nsId);
679 state = createHAState();
680 this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf);
681 this.haContext = createHAContext();
682 try {
683 initializeGenericKeys(conf, nsId, namenodeId);
684 initialize(conf);
685 state.prepareToEnterState(haContext);
686 state.enterState(haContext);
687 } catch (IOException e) {
688 this.stop();
689 throw e;
690 } catch (HadoopIllegalArgumentException e) {
691 this.stop();
692 throw e;
693 }
694 }
695
696 protected HAState createHAState() {
697 return !haEnabled ? ACTIVE_STATE : STANDBY_STATE;
698 }
699
700 protected HAContext createHAContext() {
701 return new NameNodeHAContext();
702 }
703
704 /**
705 * Wait for service to finish.
706 * (Normally, it runs forever.)
707 */
708 public void join() {
709 try {
710 rpcServer.join();
711 } catch (InterruptedException ie) {
712 LOG.info("Caught interrupted exception ", ie);
713 }
714 }
715
716 /**
717 * Stop all NameNode threads and wait for all to finish.
718 */
719 public void stop() {
720 synchronized(this) {
721 if (stopRequested)
722 return;
723 stopRequested = true;
724 }
725 try {
726 if (state != null) {
727 state.exitState(haContext);
728 }
729 } catch (ServiceFailedException e) {
730 LOG.warn("Encountered exception while exiting state ", e);
731 } finally {
732 stopCommonServices();
733 if (metrics != null) {
734 metrics.shutdown();
735 }
736 if (namesystem != null) {
737 namesystem.shutdown();
738 }
739 }
740 }
741
742 synchronized boolean isStopRequested() {
743 return stopRequested;
744 }
745
746 /**
747 * Is the cluster currently in safe mode?
748 */
749 public boolean isInSafeMode() {
750 return namesystem.isInSafeMode();
751 }
752
753 /** get FSImage */
754 @VisibleForTesting
755 public FSImage getFSImage() {
756 return namesystem.dir.fsImage;
757 }
758
759 /**
760 * @return NameNode RPC address
761 */
762 public InetSocketAddress getNameNodeAddress() {
763 return rpcServer.getRpcAddress();
764 }
765
766 /**
767 * @return NameNode RPC address in "host:port" string form
768 */
769 public String getNameNodeAddressHostPortString() {
770 return NetUtils.getHostPortString(rpcServer.getRpcAddress());
771 }
772
773 /**
774 * @return NameNode service RPC address if configured, the
775 * NameNode RPC address otherwise
776 */
777 public InetSocketAddress getServiceRpcAddress() {
778 final InetSocketAddress serviceAddr = rpcServer.getServiceRpcAddress();
779 return serviceAddr == null ? rpcServer.getRpcAddress() : serviceAddr;
780 }
781
782 /**
783 * @return NameNode HTTP address, used by the Web UI, image transfer,
784 * and HTTP-based file system clients like Hftp and WebHDFS
785 */
786 public InetSocketAddress getHttpAddress() {
787 return httpServer.getHttpAddress();
788 }
789
790 /**
791 * Verify that configured directories exist, then
792 * Interactively confirm that formatting is desired
793 * for each existing directory and format them.
794 *
795 * @param conf
796 * @param force
797 * @return true if formatting was aborted, false otherwise
798 * @throws IOException
799 */
800 private static boolean format(Configuration conf, boolean force,
801 boolean isInteractive) throws IOException {
802 String nsId = DFSUtil.getNamenodeNameServiceId(conf);
803 String namenodeId = HAUtil.getNameNodeId(conf, nsId);
804 initializeGenericKeys(conf, nsId, namenodeId);
805 checkAllowFormat(conf);
806
807 if (UserGroupInformation.isSecurityEnabled()) {
808 InetSocketAddress socAddr = getAddress(conf);
809 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
810 DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName());
811 }
812
813 Collection<URI> nameDirsToFormat = FSNamesystem.getNamespaceDirs(conf);
814 List<URI> sharedDirs = FSNamesystem.getSharedEditsDirs(conf);
815 List<URI> dirsToPrompt = new ArrayList<URI>();
816 dirsToPrompt.addAll(nameDirsToFormat);
817 dirsToPrompt.addAll(sharedDirs);
818 List<URI> editDirsToFormat =
819 FSNamesystem.getNamespaceEditsDirs(conf);
820
821 // if clusterID is not provided - see if you can find the current one
822 String clusterId = StartupOption.FORMAT.getClusterId();
823 if(clusterId == null || clusterId.equals("")) {
824 //Generate a new cluster id
825 clusterId = NNStorage.newClusterID();
826 }
827 System.out.println("Formatting using clusterid: " + clusterId);
828
829 FSImage fsImage = new FSImage(conf, nameDirsToFormat, editDirsToFormat);
830 FSNamesystem fsn = new FSNamesystem(conf, fsImage);
831 fsImage.getEditLog().initJournalsForWrite();
832
833 if (!fsImage.confirmFormat(force, isInteractive)) {
834 return true; // aborted
835 }
836
837 fsImage.format(fsn, clusterId);
838 return false;
839 }
840
841 public static void checkAllowFormat(Configuration conf) throws IOException {
842 if (!conf.getBoolean(DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY,
843 DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT)) {
844 throw new IOException("The option " + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY
845 + " is set to false for this filesystem, so it "
846 + "cannot be formatted. You will need to set "
847 + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY +" parameter "
848 + "to true in order to format this filesystem");
849 }
850 }
851
852 @VisibleForTesting
853 public static boolean initializeSharedEdits(Configuration conf) throws IOException {
854 return initializeSharedEdits(conf, true);
855 }
856
857 @VisibleForTesting
858 public static boolean initializeSharedEdits(Configuration conf,
859 boolean force) throws IOException {
860 return initializeSharedEdits(conf, force, false);
861 }
862
863 /**
864 * Clone the supplied configuration but remove the shared edits dirs.
865 *
866 * @param conf Supplies the original configuration.
867 * @return Cloned configuration without the shared edit dirs.
868 * @throws IOException on failure to generate the configuration.
869 */
870 private static Configuration getConfigurationWithoutSharedEdits(
871 Configuration conf)
872 throws IOException {
873 List<URI> editsDirs = FSNamesystem.getNamespaceEditsDirs(conf, false);
874 String editsDirsString = Joiner.on(",").join(editsDirs);
875
876 Configuration confWithoutShared = new Configuration(conf);
877 confWithoutShared.unset(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY);
878 confWithoutShared.setStrings(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY,
879 editsDirsString);
880 return confWithoutShared;
881 }
882
883 /**
884 * Format a new shared edits dir and copy in enough edit log segments so that
885 * the standby NN can start up.
886 *
887 * @param conf configuration
888 * @param force format regardless of whether or not the shared edits dir exists
889 * @param interactive prompt the user when a dir exists
890 * @return true if the command aborts, false otherwise
891 */
892 private static boolean initializeSharedEdits(Configuration conf,
893 boolean force, boolean interactive) throws IOException {
894 String nsId = DFSUtil.getNamenodeNameServiceId(conf);
895 String namenodeId = HAUtil.getNameNodeId(conf, nsId);
896 initializeGenericKeys(conf, nsId, namenodeId);
897
898 if (conf.get(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY) == null) {
899 LOG.fatal("No shared edits directory configured for namespace " +
900 nsId + " namenode " + namenodeId);
901 return false;
902 }
903
904 if (UserGroupInformation.isSecurityEnabled()) {
905 InetSocketAddress socAddr = getAddress(conf);
906 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
907 DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName());
908 }
909
910 NNStorage existingStorage = null;
911 try {
912 FSNamesystem fsns =
913 FSNamesystem.loadFromDisk(getConfigurationWithoutSharedEdits(conf));
914
915 existingStorage = fsns.getFSImage().getStorage();
916 NamespaceInfo nsInfo = existingStorage.getNamespaceInfo();
917
918 List<URI> sharedEditsDirs = FSNamesystem.getSharedEditsDirs(conf);
919
920 FSImage sharedEditsImage = new FSImage(conf,
921 Lists.<URI>newArrayList(),
922 sharedEditsDirs);
923 sharedEditsImage.getEditLog().initJournalsForWrite();
924
925 if (!sharedEditsImage.confirmFormat(force, interactive)) {
926 return true; // abort
927 }
928
929 NNStorage newSharedStorage = sharedEditsImage.getStorage();
930 // Call Storage.format instead of FSImage.format here, since we don't
931 // actually want to save a checkpoint - just prime the dirs with
932 // the existing namespace info
933 newSharedStorage.format(nsInfo);
934 sharedEditsImage.getEditLog().formatNonFileJournals(nsInfo);
935
936 // Need to make sure the edit log segments are in good shape to initialize
937 // the shared edits dir.
938 fsns.getFSImage().getEditLog().close();
939 fsns.getFSImage().getEditLog().initJournalsForWrite();
940 fsns.getFSImage().getEditLog().recoverUnclosedStreams();
941
942 copyEditLogSegmentsToSharedDir(fsns, sharedEditsDirs, newSharedStorage,
943 conf);
944 } catch (IOException ioe) {
945 LOG.error("Could not initialize shared edits dir", ioe);
946 return true; // aborted
947 } finally {
948 // Have to unlock storage explicitly for the case when we're running in a
949 // unit test, which runs in the same JVM as NNs.
950 if (existingStorage != null) {
951 try {
952 existingStorage.unlockAll();
953 } catch (IOException ioe) {
954 LOG.warn("Could not unlock storage directories", ioe);
955 return true; // aborted
956 }
957 }
958 }
959 return false; // did not abort
960 }
961
962 private static void copyEditLogSegmentsToSharedDir(FSNamesystem fsns,
963 Collection<URI> sharedEditsDirs, NNStorage newSharedStorage,
964 Configuration conf) throws IOException {
965 Preconditions.checkArgument(!sharedEditsDirs.isEmpty(),
966 "No shared edits specified");
967 // Copy edit log segments into the new shared edits dir.
968 List<URI> sharedEditsUris = new ArrayList<URI>(sharedEditsDirs);
969 FSEditLog newSharedEditLog = new FSEditLog(conf, newSharedStorage,
970 sharedEditsUris);
971 newSharedEditLog.initJournalsForWrite();
972 newSharedEditLog.recoverUnclosedStreams();
973
974 FSEditLog sourceEditLog = fsns.getFSImage().editLog;
975
976 long fromTxId = fsns.getFSImage().getMostRecentCheckpointTxId();
977
978 Collection<EditLogInputStream> streams = null;
979 try {
980 streams = sourceEditLog.selectInputStreams(fromTxId + 1, 0);
981
982 // Set the nextTxid to the CheckpointTxId+1
983 newSharedEditLog.setNextTxId(fromTxId + 1);
984
985 // Copy all edits after last CheckpointTxId to shared edits dir
986 for (EditLogInputStream stream : streams) {
987 LOG.debug("Beginning to copy stream " + stream + " to shared edits");
988 FSEditLogOp op;
989 boolean segmentOpen = false;
990 while ((op = stream.readOp()) != null) {
991 if (LOG.isTraceEnabled()) {
992 LOG.trace("copying op: " + op);
993 }
994 if (!segmentOpen) {
995 newSharedEditLog.startLogSegment(op.txid, false);
996 segmentOpen = true;
997 }
998
999 newSharedEditLog.logEdit(op);
1000
1001 if (op.opCode == FSEditLogOpCodes.OP_END_LOG_SEGMENT) {
1002 newSharedEditLog.logSync();
1003 newSharedEditLog.endCurrentLogSegment(false);
1004 LOG.debug("ending log segment because of END_LOG_SEGMENT op in "
1005 + stream);
1006 segmentOpen = false;
1007 }
1008 }
1009
1010 if (segmentOpen) {
1011 LOG.debug("ending log segment because of end of stream in " + stream);
1012 newSharedEditLog.logSync();
1013 newSharedEditLog.endCurrentLogSegment(false);
1014 segmentOpen = false;
1015 }
1016 }
1017 } finally {
1018 if (streams != null) {
1019 FSEditLog.closeAllStreams(streams);
1020 }
1021 }
1022 }
1023
1024 private static boolean finalize(Configuration conf,
1025 boolean isConfirmationNeeded
1026 ) throws IOException {
1027 String nsId = DFSUtil.getNamenodeNameServiceId(conf);
1028 String namenodeId = HAUtil.getNameNodeId(conf, nsId);
1029 initializeGenericKeys(conf, nsId, namenodeId);
1030
1031 FSNamesystem nsys = new FSNamesystem(conf, new FSImage(conf));
1032 System.err.print(
1033 "\"finalize\" will remove the previous state of the files system.\n"
1034 + "Recent upgrade will become permanent.\n"
1035 + "Rollback option will not be available anymore.\n");
1036 if (isConfirmationNeeded) {
1037 if (!confirmPrompt("Finalize filesystem state?")) {
1038 System.err.println("Finalize aborted.");
1039 return true;
1040 }
1041 }
1042 nsys.dir.fsImage.finalizeUpgrade();
1043 return false;
1044 }
1045
1046 private static void printUsage(PrintStream out) {
1047 out.println(USAGE + "\n");
1048 }
1049
1050 private static StartupOption parseArguments(String args[]) {
1051 int argsLen = (args == null) ? 0 : args.length;
1052 StartupOption startOpt = StartupOption.REGULAR;
1053 for(int i=0; i < argsLen; i++) {
1054 String cmd = args[i];
1055 if (StartupOption.FORMAT.getName().equalsIgnoreCase(cmd)) {
1056 startOpt = StartupOption.FORMAT;
1057 for (i = i + 1; i < argsLen; i++) {
1058 if (args[i].equalsIgnoreCase(StartupOption.CLUSTERID.getName())) {
1059 i++;
1060 if (i >= argsLen) {
1061 // if no cluster id specified, return null
1062 LOG.fatal("Must specify a valid cluster ID after the "
1063 + StartupOption.CLUSTERID.getName() + " flag");
1064 return null;
1065 }
1066 String clusterId = args[i];
1067 // Make sure an id is specified and not another flag
1068 if (clusterId.isEmpty() ||
1069 clusterId.equalsIgnoreCase(StartupOption.FORCE.getName()) ||
1070 clusterId.equalsIgnoreCase(
1071 StartupOption.NONINTERACTIVE.getName())) {
1072 LOG.fatal("Must specify a valid cluster ID after the "
1073 + StartupOption.CLUSTERID.getName() + " flag");
1074 return null;
1075 }
1076 startOpt.setClusterId(clusterId);
1077 }
1078
1079 if (args[i].equalsIgnoreCase(StartupOption.FORCE.getName())) {
1080 startOpt.setForceFormat(true);
1081 }
1082
1083 if (args[i].equalsIgnoreCase(StartupOption.NONINTERACTIVE.getName())) {
1084 startOpt.setInteractiveFormat(false);
1085 }
1086 }
1087 } else if (StartupOption.GENCLUSTERID.getName().equalsIgnoreCase(cmd)) {
1088 startOpt = StartupOption.GENCLUSTERID;
1089 } else if (StartupOption.REGULAR.getName().equalsIgnoreCase(cmd)) {
1090 startOpt = StartupOption.REGULAR;
1091 } else if (StartupOption.BACKUP.getName().equalsIgnoreCase(cmd)) {
1092 startOpt = StartupOption.BACKUP;
1093 } else if (StartupOption.CHECKPOINT.getName().equalsIgnoreCase(cmd)) {
1094 startOpt = StartupOption.CHECKPOINT;
1095 } else if (StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd)) {
1096 startOpt = StartupOption.UPGRADE;
1097 // might be followed by two args
1098 if (i + 2 < argsLen
1099 && args[i + 1].equalsIgnoreCase(StartupOption.CLUSTERID.getName())) {
1100 i += 2;
1101 startOpt.setClusterId(args[i]);
1102 }
1103 } else if (StartupOption.ROLLBACK.getName().equalsIgnoreCase(cmd)) {
1104 startOpt = StartupOption.ROLLBACK;
1105 } else if (StartupOption.FINALIZE.getName().equalsIgnoreCase(cmd)) {
1106 startOpt = StartupOption.FINALIZE;
1107 } else if (StartupOption.IMPORT.getName().equalsIgnoreCase(cmd)) {
1108 startOpt = StartupOption.IMPORT;
1109 } else if (StartupOption.BOOTSTRAPSTANDBY.getName().equalsIgnoreCase(cmd)) {
1110 startOpt = StartupOption.BOOTSTRAPSTANDBY;
1111 return startOpt;
1112 } else if (StartupOption.INITIALIZESHAREDEDITS.getName().equalsIgnoreCase(cmd)) {
1113 startOpt = StartupOption.INITIALIZESHAREDEDITS;
1114 for (i = i + 1 ; i < argsLen; i++) {
1115 if (StartupOption.NONINTERACTIVE.getName().equals(args[i])) {
1116 startOpt.setInteractiveFormat(false);
1117 } else if (StartupOption.FORCE.getName().equals(args[i])) {
1118 startOpt.setForceFormat(true);
1119 } else {
1120 LOG.fatal("Invalid argument: " + args[i]);
1121 return null;
1122 }
1123 }
1124 return startOpt;
1125 } else if (StartupOption.RECOVER.getName().equalsIgnoreCase(cmd)) {
1126 if (startOpt != StartupOption.REGULAR) {
1127 throw new RuntimeException("Can't combine -recover with " +
1128 "other startup options.");
1129 }
1130 startOpt = StartupOption.RECOVER;
1131 while (++i < argsLen) {
1132 if (args[i].equalsIgnoreCase(
1133 StartupOption.FORCE.getName())) {
1134 startOpt.setForce(MetaRecoveryContext.FORCE_FIRST_CHOICE);
1135 } else {
1136 throw new RuntimeException("Error parsing recovery options: " +
1137 "can't understand option \"" + args[i] + "\"");
1138 }
1139 }
1140 } else {
1141 return null;
1142 }
1143 }
1144 return startOpt;
1145 }
1146
1147 private static void setStartupOption(Configuration conf, StartupOption opt) {
1148 conf.set(DFS_NAMENODE_STARTUP_KEY, opt.toString());
1149 }
1150
1151 static StartupOption getStartupOption(Configuration conf) {
1152 return StartupOption.valueOf(conf.get(DFS_NAMENODE_STARTUP_KEY,
1153 StartupOption.REGULAR.toString()));
1154 }
1155
1156 private static void doRecovery(StartupOption startOpt, Configuration conf)
1157 throws IOException {
1158 String nsId = DFSUtil.getNamenodeNameServiceId(conf);
1159 String namenodeId = HAUtil.getNameNodeId(conf, nsId);
1160 initializeGenericKeys(conf, nsId, namenodeId);
1161 if (startOpt.getForce() < MetaRecoveryContext.FORCE_ALL) {
1162 if (!confirmPrompt("You have selected Metadata Recovery mode. " +
1163 "This mode is intended to recover lost metadata on a corrupt " +
1164 "filesystem. Metadata recovery mode often permanently deletes " +
1165 "data from your HDFS filesystem. Please back up your edit log " +
1166 "and fsimage before trying this!\n\n" +
1167 "Are you ready to proceed? (Y/N)\n")) {
1168 System.err.println("Recovery aborted at user request.\n");
1169 return;
1170 }
1171 }
1172 MetaRecoveryContext.LOG.info("starting recovery...");
1173 UserGroupInformation.setConfiguration(conf);
1174 NameNode.initMetrics(conf, startOpt.toNodeRole());
1175 FSNamesystem fsn = null;
1176 try {
1177 fsn = FSNamesystem.loadFromDisk(conf);
1178 fsn.saveNamespace();
1179 MetaRecoveryContext.LOG.info("RECOVERY COMPLETE");
1180 } catch (IOException e) {
1181 MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e);
1182 throw e;
1183 } catch (RuntimeException e) {
1184 MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e);
1185 throw e;
1186 } finally {
1187 if (fsn != null)
1188 fsn.close();
1189 }
1190 }
1191
1192 public static NameNode createNameNode(String argv[], Configuration conf)
1193 throws IOException {
1194 if (conf == null)
1195 conf = new HdfsConfiguration();
1196 StartupOption startOpt = parseArguments(argv);
1197 if (startOpt == null) {
1198 printUsage(System.err);
1199 return null;
1200 }
1201 setStartupOption(conf, startOpt);
1202
1203 if (HAUtil.isHAEnabled(conf, DFSUtil.getNamenodeNameServiceId(conf)) &&
1204 (startOpt == StartupOption.UPGRADE ||
1205 startOpt == StartupOption.ROLLBACK ||
1206 startOpt == StartupOption.FINALIZE)) {
1207 throw new HadoopIllegalArgumentException("Invalid startup option. " +
1208 "Cannot perform DFS upgrade with HA enabled.");
1209 }
1210
1211 switch (startOpt) {
1212 case FORMAT: {
1213 boolean aborted = format(conf, startOpt.getForceFormat(),
1214 startOpt.getInteractiveFormat());
1215 terminate(aborted ? 1 : 0);
1216 return null; // avoid javac warning
1217 }
1218 case GENCLUSTERID: {
1219 System.err.println("Generating new cluster id:");
1220 System.out.println(NNStorage.newClusterID());
1221 terminate(0);
1222 return null;
1223 }
1224 case FINALIZE: {
1225 boolean aborted = finalize(conf, true);
1226 terminate(aborted ? 1 : 0);
1227 return null; // avoid javac warning
1228 }
1229 case BOOTSTRAPSTANDBY: {
1230 String toolArgs[] = Arrays.copyOfRange(argv, 1, argv.length);
1231 int rc = BootstrapStandby.run(toolArgs, conf);
1232 terminate(rc);
1233 return null; // avoid warning
1234 }
1235 case INITIALIZESHAREDEDITS: {
1236 boolean aborted = initializeSharedEdits(conf,
1237 startOpt.getForceFormat(),
1238 startOpt.getInteractiveFormat());
1239 terminate(aborted ? 1 : 0);
1240 return null; // avoid warning
1241 }
1242 case BACKUP:
1243 case CHECKPOINT: {
1244 NamenodeRole role = startOpt.toNodeRole();
1245 DefaultMetricsSystem.initialize(role.toString().replace(" ", ""));
1246 return new BackupNode(conf, role);
1247 }
1248 case RECOVER: {
1249 NameNode.doRecovery(startOpt, conf);
1250 return null;
1251 }
1252 default: {
1253 DefaultMetricsSystem.initialize("NameNode");
1254 return new NameNode(conf);
1255 }
1256 }
1257 }
1258
1259 /**
1260 * In federation configuration is set for a set of
1261 * namenode and secondary namenode/backup/checkpointer, which are
1262 * grouped under a logical nameservice ID. The configuration keys specific
1263 * to them have suffix set to configured nameserviceId.
1264 *
1265 * This method copies the value from specific key of format key.nameserviceId
1266 * to key, to set up the generic configuration. Once this is done, only
1267 * generic version of the configuration is read in rest of the code, for
1268 * backward compatibility and simpler code changes.
1269 *
1270 * @param conf
1271 * Configuration object to lookup specific key and to set the value
1272 * to the key passed. Note the conf object is modified
1273 * @param nameserviceId name service Id (to distinguish federated NNs)
1274 * @param namenodeId the namenode ID (to distinguish HA NNs)
1275 * @see DFSUtil#setGenericConf(Configuration, String, String, String...)
1276 */
1277 public static void initializeGenericKeys(Configuration conf,
1278 String nameserviceId, String namenodeId) {
1279 if ((nameserviceId != null && !nameserviceId.isEmpty()) ||
1280 (namenodeId != null && !namenodeId.isEmpty())) {
1281 if (nameserviceId != null) {
1282 conf.set(DFS_NAMESERVICE_ID, nameserviceId);
1283 }
1284 if (namenodeId != null) {
1285 conf.set(DFS_HA_NAMENODE_ID_KEY, namenodeId);
1286 }
1287
1288 DFSUtil.setGenericConf(conf, nameserviceId, namenodeId,
1289 NAMENODE_SPECIFIC_KEYS);
1290 DFSUtil.setGenericConf(conf, nameserviceId, null,
1291 NAMESERVICE_SPECIFIC_KEYS);
1292 }
1293
1294 // If the RPC address is set use it to (re-)configure the default FS
1295 if (conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY) != null) {
1296 URI defaultUri = URI.create(HdfsConstants.HDFS_URI_SCHEME + "://"
1297 + conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY));
1298 conf.set(FS_DEFAULT_NAME_KEY, defaultUri.toString());
1299 LOG.debug("Setting " + FS_DEFAULT_NAME_KEY + " to " + defaultUri.toString());
1300 }
1301 }
1302
1303 /**
1304 * Get the name service Id for the node
1305 * @return name service Id or null if federation is not configured
1306 */
1307 protected String getNameServiceId(Configuration conf) {
1308 return DFSUtil.getNamenodeNameServiceId(conf);
1309 }
1310
1311 /**
1312 */
1313 public static void main(String argv[]) throws Exception {
1314 if (DFSUtil.parseHelpArgument(argv, NameNode.USAGE, System.out, true)) {
1315 System.exit(0);
1316 }
1317
1318 try {
1319 StringUtils.startupShutdownMessage(NameNode.class, argv, LOG);
1320 NameNode namenode = createNameNode(argv, null);
1321 if (namenode != null) {
1322 namenode.join();
1323 }
1324 } catch (Throwable e) {
1325 LOG.fatal("Exception in namenode join", e);
1326 terminate(1, e);
1327 }
1328 }
1329
1330 synchronized void monitorHealth()
1331 throws HealthCheckFailedException, AccessControlException {
1332 namesystem.checkSuperuserPrivilege();
1333 if (!haEnabled) {
1334 return; // no-op, if HA is not enabled
1335 }
1336 getNamesystem().checkAvailableResources();
1337 if (!getNamesystem().nameNodeHasResourcesAvailable()) {
1338 throw new HealthCheckFailedException(
1339 "The NameNode has no resources available");
1340 }
1341 }
1342
1343 synchronized void transitionToActive()
1344 throws ServiceFailedException, AccessControlException {
1345 namesystem.checkSuperuserPrivilege();
1346 if (!haEnabled) {
1347 throw new ServiceFailedException("HA for namenode is not enabled");
1348 }
1349 state.setState(haContext, ACTIVE_STATE);
1350 }
1351
1352 synchronized void transitionToStandby()
1353 throws ServiceFailedException, AccessControlException {
1354 namesystem.checkSuperuserPrivilege();
1355 if (!haEnabled) {
1356 throw new ServiceFailedException("HA for namenode is not enabled");
1357 }
1358 state.setState(haContext, STANDBY_STATE);
1359 }
1360
1361 synchronized HAServiceStatus getServiceStatus()
1362 throws ServiceFailedException, AccessControlException {
1363 namesystem.checkSuperuserPrivilege();
1364 if (!haEnabled) {
1365 throw new ServiceFailedException("HA for namenode is not enabled");
1366 }
1367 if (state == null) {
1368 return new HAServiceStatus(HAServiceState.INITIALIZING);
1369 }
1370 HAServiceState retState = state.getServiceState();
1371 HAServiceStatus ret = new HAServiceStatus(retState);
1372 if (retState == HAServiceState.STANDBY) {
1373 String safemodeTip = namesystem.getSafeModeTip();
1374 if (!safemodeTip.isEmpty()) {
1375 ret.setNotReadyToBecomeActive(
1376 "The NameNode is in safemode. " +
1377 safemodeTip);
1378 } else {
1379 ret.setReadyToBecomeActive();
1380 }
1381 } else if (retState == HAServiceState.ACTIVE) {
1382 ret.setReadyToBecomeActive();
1383 } else {
1384 ret.setNotReadyToBecomeActive("State is " + state);
1385 }
1386 return ret;
1387 }
1388
1389 synchronized HAServiceState getServiceState() {
1390 if (state == null) {
1391 return HAServiceState.INITIALIZING;
1392 }
1393 return state.getServiceState();
1394 }
1395
1396 /**
1397 * Shutdown the NN immediately in an ungraceful way. Used when it would be
1398 * unsafe for the NN to continue operating, e.g. during a failed HA state
1399 * transition.
1400 *
1401 * @param t exception which warrants the shutdown. Printed to the NN log
1402 * before exit.
1403 * @throws ExitException thrown only for testing.
1404 */
1405 protected synchronized void doImmediateShutdown(Throwable t)
1406 throws ExitException {
1407 String message = "Error encountered requiring NN shutdown. " +
1408 "Shutting down immediately.";
1409 try {
1410 LOG.fatal(message, t);
1411 } catch (Throwable ignored) {
1412 // This is unlikely to happen, but there's nothing we can do if it does.
1413 }
1414 terminate(1, t);
1415 }
1416
1417 /**
1418 * Class used to expose {@link NameNode} as context to {@link HAState}
1419 */
1420 protected class NameNodeHAContext implements HAContext {
1421 @Override
1422 public void setState(HAState s) {
1423 state = s;
1424 }
1425
1426 @Override
1427 public HAState getState() {
1428 return state;
1429 }
1430
1431 @Override
1432 public void startActiveServices() throws IOException {
1433 try {
1434 namesystem.startActiveServices();
1435 startTrashEmptier(conf);
1436 } catch (Throwable t) {
1437 doImmediateShutdown(t);
1438 }
1439 }
1440
1441 @Override
1442 public void stopActiveServices() throws IOException {
1443 try {
1444 if (namesystem != null) {
1445 namesystem.stopActiveServices();
1446 }
1447 stopTrashEmptier();
1448 } catch (Throwable t) {
1449 doImmediateShutdown(t);
1450 }
1451 }
1452
1453 @Override
1454 public void startStandbyServices() throws IOException {
1455 try {
1456 namesystem.startStandbyServices(conf);
1457 } catch (Throwable t) {
1458 doImmediateShutdown(t);
1459 }
1460 }
1461
1462 @Override
1463 public void prepareToStopStandbyServices() throws ServiceFailedException {
1464 try {
1465 namesystem.prepareToStopStandbyServices();
1466 } catch (Throwable t) {
1467 doImmediateShutdown(t);
1468 }
1469 }
1470
1471 @Override
1472 public void stopStandbyServices() throws IOException {
1473 try {
1474 if (namesystem != null) {
1475 namesystem.stopStandbyServices();
1476 }
1477 } catch (Throwable t) {
1478 doImmediateShutdown(t);
1479 }
1480 }
1481
1482 @Override
1483 public void writeLock() {
1484 namesystem.writeLock();
1485 }
1486
1487 @Override
1488 public void writeUnlock() {
1489 namesystem.writeUnlock();
1490 }
1491
1492 /** Check if an operation of given category is allowed */
1493 @Override
1494 public void checkOperation(final OperationCategory op)
1495 throws StandbyException {
1496 state.checkOperation(haContext, op);
1497 }
1498
1499 @Override
1500 public boolean allowStaleReads() {
1501 return allowStaleStandbyReads;
1502 }
1503
1504 }
1505
1506 public boolean isStandbyState() {
1507 return (state.equals(STANDBY_STATE));
1508 }
1509
1510 /**
1511 * Check that a request to change this node's HA state is valid.
1512 * In particular, verifies that, if auto failover is enabled, non-forced
1513 * requests from the HAAdmin CLI are rejected, and vice versa.
1514 *
1515 * @param req the request to check
1516 * @throws AccessControlException if the request is disallowed
1517 */
1518 void checkHaStateChange(StateChangeRequestInfo req)
1519 throws AccessControlException {
1520 boolean autoHaEnabled = conf.getBoolean(DFS_HA_AUTO_FAILOVER_ENABLED_KEY,
1521 DFS_HA_AUTO_FAILOVER_ENABLED_DEFAULT);
1522 switch (req.getSource()) {
1523 case REQUEST_BY_USER:
1524 if (autoHaEnabled) {
1525 throw new AccessControlException(
1526 "Manual HA control for this NameNode is disallowed, because " +
1527 "automatic HA is enabled.");
1528 }
1529 break;
1530 case REQUEST_BY_USER_FORCED:
1531 if (autoHaEnabled) {
1532 LOG.warn("Allowing manual HA control from " +
1533 Server.getRemoteAddress() +
1534 " even though automatic HA is enabled, because the user " +
1535 "specified the force flag");
1536 }
1537 break;
1538 case REQUEST_BY_ZKFC:
1539 if (!autoHaEnabled) {
1540 throw new AccessControlException(
1541 "Request from ZK failover controller at " +
1542 Server.getRemoteAddress() + " denied since automatic HA " +
1543 "is not enabled");
1544 }
1545 break;
1546 }
1547 }
1548 }