001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.namenode; 019 020import static org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.EncryptedKeyVersion; 021import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT; 022import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY; 023import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT; 024import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY; 025import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_DEFAULT; 026import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_KEY; 027import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_DEFAULT; 028import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY; 029import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CHECKSUM_TYPE_DEFAULT; 030import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY; 031import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT; 032import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_KEY; 033import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_DEFAULT; 034import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_KEY; 035import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_DEFAULT; 036import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_KEY; 037import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT; 038import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY; 039import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOGGERS_KEY; 040import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT; 041import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY; 042import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT; 043import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY; 044import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT; 045import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY; 046import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME; 047import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT; 048import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY; 049import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT; 050import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY; 051import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT; 052import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY; 053import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT; 054import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY; 055import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY; 056import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY; 057import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS; 058import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT; 059import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD; 060import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT; 061import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT; 062import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY; 063import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC; 064import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC_DEFAULT; 065import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_INODE_ATTRIBUTES_PROVIDER_KEY; 066import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_DEFAULT; 067import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_KEY; 068import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY; 069import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_DEFAULT; 070import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY; 071import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY; 072import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT; 073import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY; 074import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT; 075import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY; 076import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT; 077import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY; 078import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY; 079import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_DEFAULT; 080import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY; 081import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT; 082import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY; 083import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY; 084import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_DEFAULT; 085import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY; 086import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT; 087import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY; 088import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_DEFAULT; 089import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY; 090import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_DEFAULT; 091import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_KEY; 092import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.SECURITY_XATTR_UNREADABLE_BY_SUPERUSER; 093import static org.apache.hadoop.util.Time.now; 094import static org.apache.hadoop.util.Time.monotonicNow; 095 096import java.io.BufferedWriter; 097import java.io.ByteArrayInputStream; 098import java.io.DataInput; 099import java.io.DataInputStream; 100import java.io.DataOutputStream; 101import java.io.File; 102import java.io.FileNotFoundException; 103import java.io.FileOutputStream; 104import java.io.IOException; 105import java.io.OutputStreamWriter; 106import java.io.PrintWriter; 107import java.io.StringWriter; 108import java.lang.management.ManagementFactory; 109import java.net.InetAddress; 110import java.net.URI; 111import java.security.GeneralSecurityException; 112import java.util.ArrayList; 113import java.util.Arrays; 114import java.util.Collection; 115import java.util.Collections; 116import java.util.Date; 117import java.util.EnumSet; 118import java.util.HashMap; 119import java.util.HashSet; 120import java.util.Iterator; 121import java.util.LinkedHashSet; 122import java.util.List; 123import java.util.Map; 124import java.util.Set; 125import java.util.TreeMap; 126import java.util.concurrent.TimeUnit; 127import java.util.concurrent.locks.Condition; 128import java.util.concurrent.locks.ReentrantLock; 129import java.util.concurrent.locks.ReentrantReadWriteLock; 130 131import javax.management.NotCompliantMBeanException; 132import javax.management.ObjectName; 133import javax.management.StandardMBean; 134 135import org.apache.commons.logging.Log; 136import org.apache.commons.logging.LogFactory; 137import org.apache.commons.logging.impl.Log4JLogger; 138import org.apache.hadoop.HadoopIllegalArgumentException; 139import org.apache.hadoop.classification.InterfaceAudience; 140import org.apache.hadoop.conf.Configuration; 141import org.apache.hadoop.crypto.CipherSuite; 142import org.apache.hadoop.crypto.CryptoProtocolVersion; 143import org.apache.hadoop.crypto.key.KeyProvider; 144import org.apache.hadoop.crypto.CryptoCodec; 145import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension; 146import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries; 147import org.apache.hadoop.fs.CacheFlag; 148import org.apache.hadoop.fs.ContentSummary; 149import org.apache.hadoop.fs.CreateFlag; 150import org.apache.hadoop.fs.FileAlreadyExistsException; 151import org.apache.hadoop.fs.FileEncryptionInfo; 152import org.apache.hadoop.fs.FileStatus; 153import org.apache.hadoop.fs.FileSystem; 154import org.apache.hadoop.fs.FsServerDefaults; 155import org.apache.hadoop.fs.InvalidPathException; 156import org.apache.hadoop.fs.Options; 157import org.apache.hadoop.fs.ParentNotDirectoryException; 158import org.apache.hadoop.fs.Path; 159import org.apache.hadoop.fs.UnresolvedLinkException; 160import org.apache.hadoop.fs.XAttr; 161import org.apache.hadoop.fs.XAttrSetFlag; 162import org.apache.hadoop.fs.permission.AclEntry; 163import org.apache.hadoop.fs.permission.AclStatus; 164import org.apache.hadoop.fs.permission.FsAction; 165import org.apache.hadoop.fs.permission.FsPermission; 166import org.apache.hadoop.fs.permission.PermissionStatus; 167import org.apache.hadoop.fs.StorageType; 168import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; 169import org.apache.hadoop.ha.ServiceFailedException; 170import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy; 171import org.apache.hadoop.hdfs.DFSConfigKeys; 172import org.apache.hadoop.hdfs.DFSUtil; 173import org.apache.hadoop.hdfs.HAUtil; 174import org.apache.hadoop.hdfs.HdfsConfiguration; 175import org.apache.hadoop.hdfs.UnknownCryptoProtocolVersionException; 176import org.apache.hadoop.hdfs.XAttrHelper; 177import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException; 178import org.apache.hadoop.hdfs.protocol.Block; 179import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry; 180import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo; 181import org.apache.hadoop.hdfs.protocol.CachePoolEntry; 182import org.apache.hadoop.hdfs.protocol.CachePoolInfo; 183import org.apache.hadoop.hdfs.protocol.ClientProtocol; 184import org.apache.hadoop.hdfs.protocol.DatanodeID; 185import org.apache.hadoop.hdfs.protocol.DatanodeInfo; 186import org.apache.hadoop.hdfs.protocol.DirectoryListing; 187import org.apache.hadoop.hdfs.protocol.EncryptionZone; 188import org.apache.hadoop.hdfs.protocol.ExtendedBlock; 189import org.apache.hadoop.hdfs.protocol.HdfsConstants; 190import org.apache.hadoop.hdfs.protocol.LastBlockWithStatus; 191import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; 192import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; 193import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; 194import org.apache.hadoop.hdfs.protocol.LocatedBlock; 195import org.apache.hadoop.hdfs.protocol.LocatedBlocks; 196import org.apache.hadoop.hdfs.protocol.QuotaExceededException; 197import org.apache.hadoop.hdfs.protocol.RecoveryInProgressException; 198import org.apache.hadoop.hdfs.protocol.RollingUpgradeException; 199import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo; 200import org.apache.hadoop.hdfs.protocol.SnapshotAccessControlException; 201import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport; 202import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus; 203import org.apache.hadoop.hdfs.protocol.datatransfer.ReplaceDatanodeOnFailure; 204import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager; 205import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager.AccessMode; 206import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; 207import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager; 208import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager.SecretManagerState; 209import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection; 210import org.apache.hadoop.hdfs.server.blockmanagement.BlockIdManager; 211import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoContiguous; 212import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoContiguousUnderConstruction; 213import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; 214import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; 215import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager; 216import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStatistics; 217import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo; 218import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState; 219import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole; 220import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption; 221import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; 222import org.apache.hadoop.hdfs.server.common.Storage; 223import org.apache.hadoop.hdfs.server.common.Storage.StorageDirType; 224import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; 225import org.apache.hadoop.hdfs.server.common.Util; 226import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection; 227import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo; 228import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream; 229import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease; 230import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile; 231import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory; 232import org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer; 233import org.apache.hadoop.hdfs.server.namenode.ha.HAContext; 234import org.apache.hadoop.hdfs.server.namenode.ha.StandbyCheckpointer; 235import org.apache.hadoop.hdfs.server.namenode.metrics.FSNamesystemMBean; 236import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; 237import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; 238import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager; 239import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase; 240import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; 241import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter; 242import org.apache.hadoop.hdfs.server.namenode.startupprogress.Status; 243import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step; 244import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType; 245import org.apache.hadoop.hdfs.server.namenode.top.TopAuditLogger; 246import org.apache.hadoop.hdfs.server.namenode.top.TopConf; 247import org.apache.hadoop.hdfs.server.namenode.top.metrics.TopMetrics; 248import org.apache.hadoop.hdfs.server.namenode.top.window.RollingWindowManager; 249import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods; 250import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand; 251import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; 252import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport; 253import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse; 254import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat; 255import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand; 256import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration; 257import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; 258import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks; 259import org.apache.hadoop.hdfs.server.protocol.StorageReport; 260import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary; 261import org.apache.hadoop.io.EnumSetWritable; 262import org.apache.hadoop.io.IOUtils; 263import org.apache.hadoop.io.Text; 264import org.apache.hadoop.ipc.RetriableException; 265import org.apache.hadoop.ipc.RetryCache; 266import org.apache.hadoop.ipc.Server; 267import org.apache.hadoop.ipc.StandbyException; 268import org.apache.hadoop.metrics2.annotation.Metric; 269import org.apache.hadoop.metrics2.annotation.Metrics; 270import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; 271import org.apache.hadoop.metrics2.util.MBeans; 272import org.apache.hadoop.net.NetworkTopology; 273import org.apache.hadoop.net.Node; 274import org.apache.hadoop.net.NodeBase; 275import org.apache.hadoop.security.AccessControlException; 276import org.apache.hadoop.security.UserGroupInformation; 277import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; 278import org.apache.hadoop.security.token.SecretManager.InvalidToken; 279import org.apache.hadoop.security.token.Token; 280import org.apache.hadoop.security.token.TokenIdentifier; 281import org.apache.hadoop.security.token.delegation.DelegationKey; 282import org.apache.hadoop.util.ChunkedArrayList; 283import org.apache.hadoop.util.Daemon; 284import org.apache.hadoop.util.DataChecksum; 285import org.apache.hadoop.util.ReflectionUtils; 286import org.apache.hadoop.util.StringUtils; 287import org.apache.hadoop.util.VersionInfo; 288import org.apache.log4j.Appender; 289import org.apache.log4j.AsyncAppender; 290import org.apache.log4j.Logger; 291import org.codehaus.jackson.map.ObjectMapper; 292import org.mortbay.util.ajax.JSON; 293 294import com.google.common.annotations.VisibleForTesting; 295import com.google.common.base.Charsets; 296import com.google.common.base.Preconditions; 297import com.google.common.collect.ImmutableMap; 298import com.google.common.collect.Lists; 299 300/*************************************************** 301 * FSNamesystem does the actual bookkeeping work for the 302 * DataNode. 303 * 304 * It tracks several important tables. 305 * 306 * 1) valid fsname --> blocklist (kept on disk, logged) 307 * 2) Set of all valid blocks (inverted #1) 308 * 3) block --> machinelist (kept in memory, rebuilt dynamically from reports) 309 * 4) machine --> blocklist (inverted #2) 310 * 5) LRU cache of updated-heartbeat machines 311 ***************************************************/ 312@InterfaceAudience.Private 313@Metrics(context="dfs") 314public class FSNamesystem implements Namesystem, FSNamesystemMBean, 315 NameNodeMXBean { 316 public static final Log LOG = LogFactory.getLog(FSNamesystem.class); 317 318 private static final ThreadLocal<StringBuilder> auditBuffer = 319 new ThreadLocal<StringBuilder>() { 320 @Override 321 protected StringBuilder initialValue() { 322 return new StringBuilder(); 323 } 324 }; 325 326 private final BlockIdManager blockIdManager; 327 328 @VisibleForTesting 329 public boolean isAuditEnabled() { 330 return !isDefaultAuditLogger || auditLog.isInfoEnabled(); 331 } 332 333 private void logAuditEvent(boolean succeeded, String cmd, String src) 334 throws IOException { 335 logAuditEvent(succeeded, cmd, src, null, null); 336 } 337 338 private void logAuditEvent(boolean succeeded, String cmd, String src, 339 String dst, HdfsFileStatus stat) throws IOException { 340 if (isAuditEnabled() && isExternalInvocation()) { 341 logAuditEvent(succeeded, getRemoteUser(), getRemoteIp(), 342 cmd, src, dst, stat); 343 } 344 } 345 346 private void logAuditEvent(boolean succeeded, 347 UserGroupInformation ugi, InetAddress addr, String cmd, String src, 348 String dst, HdfsFileStatus stat) { 349 FileStatus status = null; 350 if (stat != null) { 351 Path symlink = stat.isSymlink() ? new Path(stat.getSymlink()) : null; 352 Path path = dst != null ? new Path(dst) : new Path(src); 353 status = new FileStatus(stat.getLen(), stat.isDir(), 354 stat.getReplication(), stat.getBlockSize(), stat.getModificationTime(), 355 stat.getAccessTime(), stat.getPermission(), stat.getOwner(), 356 stat.getGroup(), symlink, path); 357 } 358 for (AuditLogger logger : auditLoggers) { 359 if (logger instanceof HdfsAuditLogger) { 360 HdfsAuditLogger hdfsLogger = (HdfsAuditLogger) logger; 361 hdfsLogger.logAuditEvent(succeeded, ugi.toString(), addr, cmd, src, dst, 362 status, ugi, dtSecretManager); 363 } else { 364 logger.logAuditEvent(succeeded, ugi.toString(), addr, 365 cmd, src, dst, status); 366 } 367 } 368 } 369 370 /** 371 * Logger for audit events, noting successful FSNamesystem operations. Emits 372 * to FSNamesystem.audit at INFO. Each event causes a set of tab-separated 373 * <code>key=value</code> pairs to be written for the following properties: 374 * <code> 375 * ugi=<ugi in RPC> 376 * ip=<remote IP> 377 * cmd=<command> 378 * src=<src path> 379 * dst=<dst path (optional)> 380 * perm=<permissions (optional)> 381 * </code> 382 */ 383 public static final Log auditLog = LogFactory.getLog( 384 FSNamesystem.class.getName() + ".audit"); 385 386 static final int DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED = 100; 387 static int BLOCK_DELETION_INCREMENT = 1000; 388 private final boolean isPermissionEnabled; 389 private final UserGroupInformation fsOwner; 390 private final String supergroup; 391 private final boolean standbyShouldCheckpoint; 392 393 // Scan interval is not configurable. 394 private static final long DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL = 395 TimeUnit.MILLISECONDS.convert(1, TimeUnit.HOURS); 396 final DelegationTokenSecretManager dtSecretManager; 397 private final boolean alwaysUseDelegationTokensForTests; 398 399 private static final Step STEP_AWAITING_REPORTED_BLOCKS = 400 new Step(StepType.AWAITING_REPORTED_BLOCKS); 401 402 // Tracks whether the default audit logger is the only configured audit 403 // logger; this allows isAuditEnabled() to return false in case the 404 // underlying logger is disabled, and avoid some unnecessary work. 405 private final boolean isDefaultAuditLogger; 406 private final List<AuditLogger> auditLoggers; 407 408 /** The namespace tree. */ 409 FSDirectory dir; 410 private final BlockManager blockManager; 411 private final SnapshotManager snapshotManager; 412 private final CacheManager cacheManager; 413 private final DatanodeStatistics datanodeStatistics; 414 415 private String nameserviceId; 416 417 private volatile RollingUpgradeInfo rollingUpgradeInfo = null; 418 /** 419 * A flag that indicates whether the checkpointer should checkpoint a rollback 420 * fsimage. The edit log tailer sets this flag. The checkpoint will create a 421 * rollback fsimage if the flag is true, and then change the flag to false. 422 */ 423 private volatile boolean needRollbackFsImage; 424 425 // Block pool ID used by this namenode 426 private String blockPoolId; 427 428 final LeaseManager leaseManager = new LeaseManager(this); 429 430 volatile Daemon smmthread = null; // SafeModeMonitor thread 431 432 Daemon nnrmthread = null; // NamenodeResourceMonitor thread 433 434 Daemon nnEditLogRoller = null; // NameNodeEditLogRoller thread 435 436 // A daemon to periodically clean up corrupt lazyPersist files 437 // from the name space. 438 Daemon lazyPersistFileScrubber = null; 439 /** 440 * When an active namenode will roll its own edit log, in # edits 441 */ 442 private final long editLogRollerThreshold; 443 /** 444 * Check interval of an active namenode's edit log roller thread 445 */ 446 private final int editLogRollerInterval; 447 448 /** 449 * How frequently we scan and unlink corrupt lazyPersist files. 450 * (In seconds) 451 */ 452 private final int lazyPersistFileScrubIntervalSec; 453 454 private volatile boolean hasResourcesAvailable = false; 455 private volatile boolean fsRunning = true; 456 457 /** The start time of the namesystem. */ 458 private final long startTime = now(); 459 460 /** The interval of namenode checking for the disk space availability */ 461 private final long resourceRecheckInterval; 462 463 // The actual resource checker instance. 464 NameNodeResourceChecker nnResourceChecker; 465 466 private final FsServerDefaults serverDefaults; 467 private final boolean supportAppends; 468 private final ReplaceDatanodeOnFailure dtpReplaceDatanodeOnFailure; 469 470 private volatile SafeModeInfo safeMode; // safe mode information 471 472 private final long maxFsObjects; // maximum number of fs objects 473 474 private final long minBlockSize; // minimum block size 475 private final long maxBlocksPerFile; // maximum # of blocks per file 476 477 // precision of access times. 478 private final long accessTimePrecision; 479 480 /** Lock to protect FSNamesystem. */ 481 private final FSNamesystemLock fsLock; 482 483 /** 484 * Checkpoint lock to protect FSNamesystem modification on standby NNs. 485 * Unlike fsLock, it does not affect block updates. On active NNs, this lock 486 * does not provide proper protection, because there are operations that 487 * modify both block and name system state. Even on standby, fsLock is 488 * used when block state changes need to be blocked. 489 */ 490 private final ReentrantLock cpLock; 491 492 /** 493 * Used when this NN is in standby state to read from the shared edit log. 494 */ 495 private EditLogTailer editLogTailer = null; 496 497 /** 498 * Used when this NN is in standby state to perform checkpoints. 499 */ 500 private StandbyCheckpointer standbyCheckpointer; 501 502 /** 503 * Reference to the NN's HAContext object. This is only set once 504 * {@link #startCommonServices(Configuration, HAContext)} is called. 505 */ 506 private HAContext haContext; 507 508 private final boolean haEnabled; 509 510 /** flag indicating whether replication queues have been initialized */ 511 boolean initializedReplQueues = false; 512 513 /** 514 * Whether the namenode is in the middle of starting the active service 515 */ 516 private volatile boolean startingActiveService = false; 517 518 private final RetryCache retryCache; 519 520 private KeyProviderCryptoExtension provider = null; 521 522 private volatile boolean imageLoaded = false; 523 private final Condition cond; 524 525 private final FSImage fsImage; 526 527 private final TopConf topConf; 528 private TopMetrics topMetrics; 529 530 private INodeAttributeProvider inodeAttributeProvider; 531 532 /** 533 * Notify that loading of this FSDirectory is complete, and 534 * it is imageLoaded for use 535 */ 536 void imageLoadComplete() { 537 Preconditions.checkState(!imageLoaded, "FSDirectory already loaded"); 538 setImageLoaded(); 539 } 540 541 void setImageLoaded() { 542 if(imageLoaded) return; 543 writeLock(); 544 try { 545 setImageLoaded(true); 546 dir.markNameCacheInitialized(); 547 cond.signalAll(); 548 } finally { 549 writeUnlock(); 550 } 551 } 552 553 //This is for testing purposes only 554 @VisibleForTesting 555 boolean isImageLoaded() { 556 return imageLoaded; 557 } 558 559 // exposed for unit tests 560 protected void setImageLoaded(boolean flag) { 561 imageLoaded = flag; 562 } 563 564 /** 565 * Block until the object is imageLoaded to be used. 566 */ 567 void waitForLoadingFSImage() { 568 if (!imageLoaded) { 569 writeLock(); 570 try { 571 while (!imageLoaded) { 572 try { 573 cond.await(5000, TimeUnit.MILLISECONDS); 574 } catch (InterruptedException ignored) { 575 } 576 } 577 } finally { 578 writeUnlock(); 579 } 580 } 581 } 582 583 /** 584 * Clear all loaded data 585 */ 586 void clear() { 587 dir.reset(); 588 dtSecretManager.reset(); 589 blockIdManager.clear(); 590 leaseManager.removeAllLeases(); 591 snapshotManager.clearSnapshottableDirs(); 592 cacheManager.clear(); 593 setImageLoaded(false); 594 blockManager.clear(); 595 } 596 597 @VisibleForTesting 598 LeaseManager getLeaseManager() { 599 return leaseManager; 600 } 601 602 boolean isHaEnabled() { 603 return haEnabled; 604 } 605 606 /** 607 * Check the supplied configuration for correctness. 608 * @param conf Supplies the configuration to validate. 609 * @throws IOException if the configuration could not be queried. 610 * @throws IllegalArgumentException if the configuration is invalid. 611 */ 612 private static void checkConfiguration(Configuration conf) 613 throws IOException { 614 615 final Collection<URI> namespaceDirs = 616 FSNamesystem.getNamespaceDirs(conf); 617 final Collection<URI> editsDirs = 618 FSNamesystem.getNamespaceEditsDirs(conf); 619 final Collection<URI> requiredEditsDirs = 620 FSNamesystem.getRequiredNamespaceEditsDirs(conf); 621 final Collection<URI> sharedEditsDirs = 622 FSNamesystem.getSharedEditsDirs(conf); 623 624 for (URI u : requiredEditsDirs) { 625 if (u.toString().compareTo( 626 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_DEFAULT) == 0) { 627 continue; 628 } 629 630 // Each required directory must also be in editsDirs or in 631 // sharedEditsDirs. 632 if (!editsDirs.contains(u) && 633 !sharedEditsDirs.contains(u)) { 634 throw new IllegalArgumentException( 635 "Required edits directory " + u.toString() + " not present in " + 636 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY + ". " + 637 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY + "=" + 638 editsDirs.toString() + "; " + 639 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY + "=" + 640 requiredEditsDirs.toString() + ". " + 641 DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY + "=" + 642 sharedEditsDirs.toString() + "."); 643 } 644 } 645 646 if (namespaceDirs.size() == 1) { 647 LOG.warn("Only one image storage directory (" 648 + DFS_NAMENODE_NAME_DIR_KEY + ") configured. Beware of data loss" 649 + " due to lack of redundant storage directories!"); 650 } 651 if (editsDirs.size() == 1) { 652 LOG.warn("Only one namespace edits storage directory (" 653 + DFS_NAMENODE_EDITS_DIR_KEY + ") configured. Beware of data loss" 654 + " due to lack of redundant storage directories!"); 655 } 656 } 657 658 /** 659 * Instantiates an FSNamesystem loaded from the image and edits 660 * directories specified in the passed Configuration. 661 * 662 * @param conf the Configuration which specifies the storage directories 663 * from which to load 664 * @return an FSNamesystem which contains the loaded namespace 665 * @throws IOException if loading fails 666 */ 667 static FSNamesystem loadFromDisk(Configuration conf) throws IOException { 668 669 checkConfiguration(conf); 670 FSImage fsImage = new FSImage(conf, 671 FSNamesystem.getNamespaceDirs(conf), 672 FSNamesystem.getNamespaceEditsDirs(conf)); 673 FSNamesystem namesystem = new FSNamesystem(conf, fsImage, false); 674 StartupOption startOpt = NameNode.getStartupOption(conf); 675 if (startOpt == StartupOption.RECOVER) { 676 namesystem.setSafeMode(SafeModeAction.SAFEMODE_ENTER); 677 } 678 679 long loadStart = monotonicNow(); 680 try { 681 namesystem.loadFSImage(startOpt); 682 } catch (IOException ioe) { 683 LOG.warn("Encountered exception loading fsimage", ioe); 684 fsImage.close(); 685 throw ioe; 686 } 687 long timeTakenToLoadFSImage = monotonicNow() - loadStart; 688 LOG.info("Finished loading FSImage in " + timeTakenToLoadFSImage + " msecs"); 689 NameNodeMetrics nnMetrics = NameNode.getNameNodeMetrics(); 690 if (nnMetrics != null) { 691 nnMetrics.setFsImageLoadTime((int) timeTakenToLoadFSImage); 692 } 693 return namesystem; 694 } 695 696 FSNamesystem(Configuration conf, FSImage fsImage) throws IOException { 697 this(conf, fsImage, false); 698 } 699 700 /** 701 * Create an FSNamesystem associated with the specified image. 702 * 703 * Note that this does not load any data off of disk -- if you would 704 * like that behavior, use {@link #loadFromDisk(Configuration)} 705 * 706 * @param conf configuration 707 * @param fsImage The FSImage to associate with 708 * @param ignoreRetryCache Whether or not should ignore the retry cache setup 709 * step. For Secondary NN this should be set to true. 710 * @throws IOException on bad configuration 711 */ 712 FSNamesystem(Configuration conf, FSImage fsImage, boolean ignoreRetryCache) 713 throws IOException { 714 provider = DFSUtil.createKeyProviderCryptoExtension(conf); 715 if (provider == null) { 716 LOG.info("No KeyProvider found."); 717 } else { 718 LOG.info("Found KeyProvider: " + provider.toString()); 719 } 720 if (conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY, 721 DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT)) { 722 LOG.info("Enabling async auditlog"); 723 enableAsyncAuditLog(); 724 } 725 boolean fair = conf.getBoolean("dfs.namenode.fslock.fair", true); 726 LOG.info("fsLock is fair:" + fair); 727 fsLock = new FSNamesystemLock(fair); 728 cond = fsLock.writeLock().newCondition(); 729 cpLock = new ReentrantLock(); 730 731 this.fsImage = fsImage; 732 try { 733 resourceRecheckInterval = conf.getLong( 734 DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY, 735 DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT); 736 737 this.blockManager = new BlockManager(this, conf); 738 this.datanodeStatistics = blockManager.getDatanodeManager().getDatanodeStatistics(); 739 this.blockIdManager = new BlockIdManager(blockManager); 740 741 this.fsOwner = UserGroupInformation.getCurrentUser(); 742 this.supergroup = conf.get(DFS_PERMISSIONS_SUPERUSERGROUP_KEY, 743 DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT); 744 this.isPermissionEnabled = conf.getBoolean(DFS_PERMISSIONS_ENABLED_KEY, 745 DFS_PERMISSIONS_ENABLED_DEFAULT); 746 LOG.info("fsOwner = " + fsOwner); 747 LOG.info("supergroup = " + supergroup); 748 LOG.info("isPermissionEnabled = " + isPermissionEnabled); 749 750 // block allocation has to be persisted in HA using a shared edits directory 751 // so that the standby has up-to-date namespace information 752 nameserviceId = DFSUtil.getNamenodeNameServiceId(conf); 753 this.haEnabled = HAUtil.isHAEnabled(conf, nameserviceId); 754 755 // Sanity check the HA-related config. 756 if (nameserviceId != null) { 757 LOG.info("Determined nameservice ID: " + nameserviceId); 758 } 759 LOG.info("HA Enabled: " + haEnabled); 760 if (!haEnabled && HAUtil.usesSharedEditsDir(conf)) { 761 LOG.warn("Configured NNs:\n" + DFSUtil.nnAddressesAsString(conf)); 762 throw new IOException("Invalid configuration: a shared edits dir " + 763 "must not be specified if HA is not enabled."); 764 } 765 766 // Get the checksum type from config 767 String checksumTypeStr = conf.get(DFS_CHECKSUM_TYPE_KEY, DFS_CHECKSUM_TYPE_DEFAULT); 768 DataChecksum.Type checksumType; 769 try { 770 checksumType = DataChecksum.Type.valueOf(checksumTypeStr); 771 } catch (IllegalArgumentException iae) { 772 throw new IOException("Invalid checksum type in " 773 + DFS_CHECKSUM_TYPE_KEY + ": " + checksumTypeStr); 774 } 775 776 this.serverDefaults = new FsServerDefaults( 777 conf.getLongBytes(DFS_BLOCK_SIZE_KEY, DFS_BLOCK_SIZE_DEFAULT), 778 conf.getInt(DFS_BYTES_PER_CHECKSUM_KEY, DFS_BYTES_PER_CHECKSUM_DEFAULT), 779 conf.getInt(DFS_CLIENT_WRITE_PACKET_SIZE_KEY, DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT), 780 (short) conf.getInt(DFS_REPLICATION_KEY, DFS_REPLICATION_DEFAULT), 781 conf.getInt(IO_FILE_BUFFER_SIZE_KEY, IO_FILE_BUFFER_SIZE_DEFAULT), 782 conf.getBoolean(DFS_ENCRYPT_DATA_TRANSFER_KEY, DFS_ENCRYPT_DATA_TRANSFER_DEFAULT), 783 conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT), 784 checksumType); 785 786 this.maxFsObjects = conf.getLong(DFS_NAMENODE_MAX_OBJECTS_KEY, 787 DFS_NAMENODE_MAX_OBJECTS_DEFAULT); 788 789 this.minBlockSize = conf.getLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 790 DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_DEFAULT); 791 this.maxBlocksPerFile = conf.getLong(DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_KEY, 792 DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_DEFAULT); 793 this.accessTimePrecision = conf.getLong(DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 794 DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT); 795 this.supportAppends = conf.getBoolean(DFS_SUPPORT_APPEND_KEY, DFS_SUPPORT_APPEND_DEFAULT); 796 LOG.info("Append Enabled: " + supportAppends); 797 798 this.dtpReplaceDatanodeOnFailure = ReplaceDatanodeOnFailure.get(conf); 799 800 this.standbyShouldCheckpoint = conf.getBoolean( 801 DFS_HA_STANDBY_CHECKPOINTS_KEY, DFS_HA_STANDBY_CHECKPOINTS_DEFAULT); 802 // # edit autoroll threshold is a multiple of the checkpoint threshold 803 this.editLogRollerThreshold = (long) 804 (conf.getFloat( 805 DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD, 806 DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT) * 807 conf.getLong( 808 DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 809 DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT)); 810 this.editLogRollerInterval = conf.getInt( 811 DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS, 812 DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT); 813 814 this.lazyPersistFileScrubIntervalSec = conf.getInt( 815 DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC, 816 DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC_DEFAULT); 817 818 if (this.lazyPersistFileScrubIntervalSec == 0) { 819 throw new IllegalArgumentException( 820 DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC + " must be non-zero."); 821 } 822 823 // For testing purposes, allow the DT secret manager to be started regardless 824 // of whether security is enabled. 825 alwaysUseDelegationTokensForTests = conf.getBoolean( 826 DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, 827 DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT); 828 829 this.dtSecretManager = createDelegationTokenSecretManager(conf); 830 this.dir = new FSDirectory(this, conf); 831 this.snapshotManager = new SnapshotManager(dir); 832 this.cacheManager = new CacheManager(this, conf, blockManager); 833 this.safeMode = new SafeModeInfo(conf); 834 this.topConf = new TopConf(conf); 835 this.auditLoggers = initAuditLoggers(conf); 836 this.isDefaultAuditLogger = auditLoggers.size() == 1 && 837 auditLoggers.get(0) instanceof DefaultAuditLogger; 838 this.retryCache = ignoreRetryCache ? null : initRetryCache(conf); 839 Class<? extends INodeAttributeProvider> klass = conf.getClass( 840 DFS_NAMENODE_INODE_ATTRIBUTES_PROVIDER_KEY, 841 null, INodeAttributeProvider.class); 842 if (klass != null) { 843 inodeAttributeProvider = ReflectionUtils.newInstance(klass, conf); 844 LOG.info("Using INode attribute provider: " + klass.getName()); 845 } 846 } catch(IOException e) { 847 LOG.error(getClass().getSimpleName() + " initialization failed.", e); 848 close(); 849 throw e; 850 } catch (RuntimeException re) { 851 LOG.error(getClass().getSimpleName() + " initialization failed.", re); 852 close(); 853 throw re; 854 } 855 } 856 857 @VisibleForTesting 858 public List<AuditLogger> getAuditLoggers() { 859 return auditLoggers; 860 } 861 862 @VisibleForTesting 863 public RetryCache getRetryCache() { 864 return retryCache; 865 } 866 867 void lockRetryCache() { 868 if (retryCache != null) { 869 retryCache.lock(); 870 } 871 } 872 873 void unlockRetryCache() { 874 if (retryCache != null) { 875 retryCache.unlock(); 876 } 877 } 878 879 /** Whether or not retry cache is enabled */ 880 boolean hasRetryCache() { 881 return retryCache != null; 882 } 883 884 void addCacheEntryWithPayload(byte[] clientId, int callId, Object payload) { 885 if (retryCache != null) { 886 retryCache.addCacheEntryWithPayload(clientId, callId, payload); 887 } 888 } 889 890 void addCacheEntry(byte[] clientId, int callId) { 891 if (retryCache != null) { 892 retryCache.addCacheEntry(clientId, callId); 893 } 894 } 895 896 @VisibleForTesting 897 public KeyProviderCryptoExtension getProvider() { 898 return provider; 899 } 900 901 @VisibleForTesting 902 static RetryCache initRetryCache(Configuration conf) { 903 boolean enable = conf.getBoolean(DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY, 904 DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT); 905 LOG.info("Retry cache on namenode is " + (enable ? "enabled" : "disabled")); 906 if (enable) { 907 float heapPercent = conf.getFloat( 908 DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY, 909 DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT); 910 long entryExpiryMillis = conf.getLong( 911 DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY, 912 DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT); 913 LOG.info("Retry cache will use " + heapPercent 914 + " of total heap and retry cache entry expiry time is " 915 + entryExpiryMillis + " millis"); 916 long entryExpiryNanos = entryExpiryMillis * 1000 * 1000; 917 return new RetryCache("NameNodeRetryCache", heapPercent, 918 entryExpiryNanos); 919 } 920 return null; 921 } 922 923 private List<AuditLogger> initAuditLoggers(Configuration conf) { 924 // Initialize the custom access loggers if configured. 925 Collection<String> alClasses = conf.getStringCollection(DFS_NAMENODE_AUDIT_LOGGERS_KEY); 926 List<AuditLogger> auditLoggers = Lists.newArrayList(); 927 if (alClasses != null && !alClasses.isEmpty()) { 928 for (String className : alClasses) { 929 try { 930 AuditLogger logger; 931 if (DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME.equals(className)) { 932 logger = new DefaultAuditLogger(); 933 } else { 934 logger = (AuditLogger) Class.forName(className).newInstance(); 935 } 936 logger.initialize(conf); 937 auditLoggers.add(logger); 938 } catch (RuntimeException re) { 939 throw re; 940 } catch (Exception e) { 941 throw new RuntimeException(e); 942 } 943 } 944 } 945 946 // Make sure there is at least one logger installed. 947 if (auditLoggers.isEmpty()) { 948 auditLoggers.add(new DefaultAuditLogger()); 949 } 950 951 // Add audit logger to calculate top users 952 if (topConf.isEnabled) { 953 topMetrics = new TopMetrics(conf, topConf.nntopReportingPeriodsMs); 954 auditLoggers.add(new TopAuditLogger(topMetrics)); 955 } 956 957 return Collections.unmodifiableList(auditLoggers); 958 } 959 960 private void loadFSImage(StartupOption startOpt) throws IOException { 961 final FSImage fsImage = getFSImage(); 962 963 // format before starting up if requested 964 if (startOpt == StartupOption.FORMAT) { 965 966 fsImage.format(this, fsImage.getStorage().determineClusterId());// reuse current id 967 968 startOpt = StartupOption.REGULAR; 969 } 970 boolean success = false; 971 writeLock(); 972 try { 973 // We shouldn't be calling saveNamespace if we've come up in standby state. 974 MetaRecoveryContext recovery = startOpt.createRecoveryContext(); 975 final boolean staleImage 976 = fsImage.recoverTransitionRead(startOpt, this, recovery); 977 if (RollingUpgradeStartupOption.ROLLBACK.matches(startOpt) || 978 RollingUpgradeStartupOption.DOWNGRADE.matches(startOpt)) { 979 rollingUpgradeInfo = null; 980 } 981 final boolean needToSave = staleImage && !haEnabled && !isRollingUpgrade(); 982 LOG.info("Need to save fs image? " + needToSave 983 + " (staleImage=" + staleImage + ", haEnabled=" + haEnabled 984 + ", isRollingUpgrade=" + isRollingUpgrade() + ")"); 985 if (needToSave) { 986 fsImage.saveNamespace(this); 987 } else { 988 updateStorageVersionForRollingUpgrade(fsImage.getLayoutVersion(), 989 startOpt); 990 // No need to save, so mark the phase done. 991 StartupProgress prog = NameNode.getStartupProgress(); 992 prog.beginPhase(Phase.SAVING_CHECKPOINT); 993 prog.endPhase(Phase.SAVING_CHECKPOINT); 994 } 995 // This will start a new log segment and write to the seen_txid file, so 996 // we shouldn't do it when coming up in standby state 997 if (!haEnabled || (haEnabled && startOpt == StartupOption.UPGRADE) 998 || (haEnabled && startOpt == StartupOption.UPGRADEONLY)) { 999 fsImage.openEditLogForWrite(); 1000 } 1001 success = true; 1002 } finally { 1003 if (!success) { 1004 fsImage.close(); 1005 } 1006 writeUnlock(); 1007 } 1008 imageLoadComplete(); 1009 } 1010 1011 private void updateStorageVersionForRollingUpgrade(final long layoutVersion, 1012 StartupOption startOpt) throws IOException { 1013 boolean rollingStarted = RollingUpgradeStartupOption.STARTED 1014 .matches(startOpt) && layoutVersion > HdfsConstants 1015 .NAMENODE_LAYOUT_VERSION; 1016 boolean rollingRollback = RollingUpgradeStartupOption.ROLLBACK 1017 .matches(startOpt); 1018 if (rollingRollback || rollingStarted) { 1019 fsImage.updateStorageVersion(); 1020 } 1021 } 1022 1023 private void startSecretManager() { 1024 if (dtSecretManager != null) { 1025 try { 1026 dtSecretManager.startThreads(); 1027 } catch (IOException e) { 1028 // Inability to start secret manager 1029 // can't be recovered from. 1030 throw new RuntimeException(e); 1031 } 1032 } 1033 } 1034 1035 private void startSecretManagerIfNecessary() { 1036 boolean shouldRun = shouldUseDelegationTokens() && 1037 !isInSafeMode() && getEditLog().isOpenForWrite(); 1038 boolean running = dtSecretManager.isRunning(); 1039 if (shouldRun && !running) { 1040 startSecretManager(); 1041 } 1042 } 1043 1044 private void stopSecretManager() { 1045 if (dtSecretManager != null) { 1046 dtSecretManager.stopThreads(); 1047 } 1048 } 1049 1050 /** 1051 * Start services common to both active and standby states 1052 */ 1053 void startCommonServices(Configuration conf, HAContext haContext) throws IOException { 1054 this.registerMBean(); // register the MBean for the FSNamesystemState 1055 writeLock(); 1056 this.haContext = haContext; 1057 try { 1058 nnResourceChecker = new NameNodeResourceChecker(conf); 1059 checkAvailableResources(); 1060 assert safeMode != null && !isPopulatingReplQueues(); 1061 StartupProgress prog = NameNode.getStartupProgress(); 1062 prog.beginPhase(Phase.SAFEMODE); 1063 prog.setTotal(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS, 1064 getCompleteBlocksTotal()); 1065 setBlockTotal(); 1066 blockManager.activate(conf); 1067 } finally { 1068 writeUnlock(); 1069 } 1070 1071 registerMXBean(); 1072 DefaultMetricsSystem.instance().register(this); 1073 if (inodeAttributeProvider != null) { 1074 inodeAttributeProvider.start(); 1075 dir.setINodeAttributeProvider(inodeAttributeProvider); 1076 } 1077 snapshotManager.registerMXBean(); 1078 } 1079 1080 /** 1081 * Stop services common to both active and standby states 1082 */ 1083 void stopCommonServices() { 1084 writeLock(); 1085 if (inodeAttributeProvider != null) { 1086 dir.setINodeAttributeProvider(null); 1087 inodeAttributeProvider.stop(); 1088 } 1089 try { 1090 if (blockManager != null) blockManager.close(); 1091 } finally { 1092 writeUnlock(); 1093 } 1094 RetryCache.clear(retryCache); 1095 } 1096 1097 /** 1098 * Start services required in active state 1099 * @throws IOException 1100 */ 1101 void startActiveServices() throws IOException { 1102 startingActiveService = true; 1103 LOG.info("Starting services required for active state"); 1104 writeLock(); 1105 try { 1106 FSEditLog editLog = getFSImage().getEditLog(); 1107 1108 if (!editLog.isOpenForWrite()) { 1109 // During startup, we're already open for write during initialization. 1110 editLog.initJournalsForWrite(); 1111 // May need to recover 1112 editLog.recoverUnclosedStreams(); 1113 1114 LOG.info("Catching up to latest edits from old active before " + 1115 "taking over writer role in edits logs"); 1116 editLogTailer.catchupDuringFailover(); 1117 1118 blockManager.setPostponeBlocksFromFuture(false); 1119 blockManager.getDatanodeManager().markAllDatanodesStale(); 1120 blockManager.clearQueues(); 1121 blockManager.processAllPendingDNMessages(); 1122 1123 // Only need to re-process the queue, If not in SafeMode. 1124 if (!isInSafeMode()) { 1125 LOG.info("Reprocessing replication and invalidation queues"); 1126 initializeReplQueues(); 1127 } 1128 1129 if (LOG.isDebugEnabled()) { 1130 LOG.debug("NameNode metadata after re-processing " + 1131 "replication and invalidation queues during failover:\n" + 1132 metaSaveAsString()); 1133 } 1134 1135 long nextTxId = getFSImage().getLastAppliedTxId() + 1; 1136 LOG.info("Will take over writing edit logs at txnid " + 1137 nextTxId); 1138 editLog.setNextTxId(nextTxId); 1139 1140 getFSImage().editLog.openForWrite(); 1141 } 1142 1143 // Enable quota checks. 1144 dir.enableQuotaChecks(); 1145 if (haEnabled) { 1146 // Renew all of the leases before becoming active. 1147 // This is because, while we were in standby mode, 1148 // the leases weren't getting renewed on this NN. 1149 // Give them all a fresh start here. 1150 leaseManager.renewAllLeases(); 1151 } 1152 leaseManager.startMonitor(); 1153 startSecretManagerIfNecessary(); 1154 1155 //ResourceMonitor required only at ActiveNN. See HDFS-2914 1156 this.nnrmthread = new Daemon(new NameNodeResourceMonitor()); 1157 nnrmthread.start(); 1158 1159 nnEditLogRoller = new Daemon(new NameNodeEditLogRoller( 1160 editLogRollerThreshold, editLogRollerInterval)); 1161 nnEditLogRoller.start(); 1162 1163 if (lazyPersistFileScrubIntervalSec > 0) { 1164 lazyPersistFileScrubber = new Daemon(new LazyPersistFileScrubber( 1165 lazyPersistFileScrubIntervalSec)); 1166 lazyPersistFileScrubber.start(); 1167 } 1168 1169 cacheManager.startMonitorThread(); 1170 blockManager.getDatanodeManager().setShouldSendCachingCommands(true); 1171 } finally { 1172 startingActiveService = false; 1173 checkSafeMode(); 1174 writeUnlock(); 1175 } 1176 } 1177 1178 /** 1179 * Initialize replication queues. 1180 */ 1181 private void initializeReplQueues() { 1182 LOG.info("initializing replication queues"); 1183 blockManager.processMisReplicatedBlocks(); 1184 initializedReplQueues = true; 1185 } 1186 1187 private boolean inActiveState() { 1188 return haContext != null && 1189 haContext.getState().getServiceState() == HAServiceState.ACTIVE; 1190 } 1191 1192 /** 1193 * @return Whether the namenode is transitioning to active state and is in the 1194 * middle of the {@link #startActiveServices()} 1195 */ 1196 public boolean inTransitionToActive() { 1197 return haEnabled && inActiveState() && startingActiveService; 1198 } 1199 1200 private boolean shouldUseDelegationTokens() { 1201 return UserGroupInformation.isSecurityEnabled() || 1202 alwaysUseDelegationTokensForTests; 1203 } 1204 1205 /** 1206 * Stop services required in active state 1207 */ 1208 void stopActiveServices() { 1209 LOG.info("Stopping services started for active state"); 1210 writeLock(); 1211 try { 1212 stopSecretManager(); 1213 leaseManager.stopMonitor(); 1214 if (nnrmthread != null) { 1215 ((NameNodeResourceMonitor) nnrmthread.getRunnable()).stopMonitor(); 1216 nnrmthread.interrupt(); 1217 } 1218 if (nnEditLogRoller != null) { 1219 ((NameNodeEditLogRoller)nnEditLogRoller.getRunnable()).stop(); 1220 nnEditLogRoller.interrupt(); 1221 } 1222 if (lazyPersistFileScrubber != null) { 1223 ((LazyPersistFileScrubber) lazyPersistFileScrubber.getRunnable()).stop(); 1224 lazyPersistFileScrubber.interrupt(); 1225 } 1226 if (dir != null && getFSImage() != null) { 1227 if (getFSImage().editLog != null) { 1228 getFSImage().editLog.close(); 1229 } 1230 // Update the fsimage with the last txid that we wrote 1231 // so that the tailer starts from the right spot. 1232 getFSImage().updateLastAppliedTxIdFromWritten(); 1233 } 1234 if (cacheManager != null) { 1235 cacheManager.stopMonitorThread(); 1236 cacheManager.clearDirectiveStats(); 1237 } 1238 blockManager.getDatanodeManager().clearPendingCachingCommands(); 1239 blockManager.getDatanodeManager().setShouldSendCachingCommands(false); 1240 // Don't want to keep replication queues when not in Active. 1241 blockManager.clearQueues(); 1242 initializedReplQueues = false; 1243 } finally { 1244 writeUnlock(); 1245 } 1246 } 1247 1248 /** 1249 * Start services required in standby state 1250 * 1251 * @throws IOException 1252 */ 1253 void startStandbyServices(final Configuration conf) throws IOException { 1254 LOG.info("Starting services required for standby state"); 1255 if (!getFSImage().editLog.isOpenForRead()) { 1256 // During startup, we're already open for read. 1257 getFSImage().editLog.initSharedJournalsForRead(); 1258 } 1259 1260 blockManager.setPostponeBlocksFromFuture(true); 1261 1262 // Disable quota checks while in standby. 1263 dir.disableQuotaChecks(); 1264 editLogTailer = new EditLogTailer(this, conf); 1265 editLogTailer.start(); 1266 if (standbyShouldCheckpoint) { 1267 standbyCheckpointer = new StandbyCheckpointer(conf, this); 1268 standbyCheckpointer.start(); 1269 } 1270 } 1271 1272 /** 1273 * Called when the NN is in Standby state and the editlog tailer tails the 1274 * OP_ROLLING_UPGRADE_START. 1275 */ 1276 void triggerRollbackCheckpoint() { 1277 setNeedRollbackFsImage(true); 1278 if (standbyCheckpointer != null) { 1279 standbyCheckpointer.triggerRollbackCheckpoint(); 1280 } 1281 } 1282 1283 /** 1284 * Called while the NN is in Standby state, but just about to be 1285 * asked to enter Active state. This cancels any checkpoints 1286 * currently being taken. 1287 */ 1288 void prepareToStopStandbyServices() throws ServiceFailedException { 1289 if (standbyCheckpointer != null) { 1290 standbyCheckpointer.cancelAndPreventCheckpoints( 1291 "About to leave standby state"); 1292 } 1293 } 1294 1295 /** Stop services required in standby state */ 1296 void stopStandbyServices() throws IOException { 1297 LOG.info("Stopping services started for standby state"); 1298 if (standbyCheckpointer != null) { 1299 standbyCheckpointer.stop(); 1300 } 1301 if (editLogTailer != null) { 1302 editLogTailer.stop(); 1303 } 1304 if (dir != null && getFSImage() != null && getFSImage().editLog != null) { 1305 getFSImage().editLog.close(); 1306 } 1307 } 1308 1309 @Override 1310 public void checkOperation(OperationCategory op) throws StandbyException { 1311 if (haContext != null) { 1312 // null in some unit tests 1313 haContext.checkOperation(op); 1314 } 1315 } 1316 1317 /** 1318 * @throws RetriableException 1319 * If 1) The NameNode is in SafeMode, 2) HA is enabled, and 3) 1320 * NameNode is in active state 1321 * @throws SafeModeException 1322 * Otherwise if NameNode is in SafeMode. 1323 */ 1324 void checkNameNodeSafeMode(String errorMsg) 1325 throws RetriableException, SafeModeException { 1326 if (isInSafeMode()) { 1327 SafeModeException se = new SafeModeException(errorMsg, safeMode); 1328 if (haEnabled && haContext != null 1329 && haContext.getState().getServiceState() == HAServiceState.ACTIVE 1330 && shouldRetrySafeMode(this.safeMode)) { 1331 throw new RetriableException(se); 1332 } else { 1333 throw se; 1334 } 1335 } 1336 } 1337 1338 boolean isPermissionEnabled() { 1339 return isPermissionEnabled; 1340 } 1341 1342 /** 1343 * We already know that the safemode is on. We will throw a RetriableException 1344 * if the safemode is not manual or caused by low resource. 1345 */ 1346 private boolean shouldRetrySafeMode(SafeModeInfo safeMode) { 1347 if (safeMode == null) { 1348 return false; 1349 } else { 1350 return !safeMode.isManual() && !safeMode.areResourcesLow(); 1351 } 1352 } 1353 1354 public static Collection<URI> getNamespaceDirs(Configuration conf) { 1355 return getStorageDirs(conf, DFS_NAMENODE_NAME_DIR_KEY); 1356 } 1357 1358 /** 1359 * Get all edits dirs which are required. If any shared edits dirs are 1360 * configured, these are also included in the set of required dirs. 1361 * 1362 * @param conf the HDFS configuration. 1363 * @return all required dirs. 1364 */ 1365 public static Collection<URI> getRequiredNamespaceEditsDirs(Configuration conf) { 1366 Set<URI> ret = new HashSet<URI>(); 1367 ret.addAll(getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY)); 1368 ret.addAll(getSharedEditsDirs(conf)); 1369 return ret; 1370 } 1371 1372 private static Collection<URI> getStorageDirs(Configuration conf, 1373 String propertyName) { 1374 Collection<String> dirNames = conf.getTrimmedStringCollection(propertyName); 1375 StartupOption startOpt = NameNode.getStartupOption(conf); 1376 if(startOpt == StartupOption.IMPORT) { 1377 // In case of IMPORT this will get rid of default directories 1378 // but will retain directories specified in hdfs-site.xml 1379 // When importing image from a checkpoint, the name-node can 1380 // start with empty set of storage directories. 1381 Configuration cE = new HdfsConfiguration(false); 1382 cE.addResource("core-default.xml"); 1383 cE.addResource("core-site.xml"); 1384 cE.addResource("hdfs-default.xml"); 1385 Collection<String> dirNames2 = cE.getTrimmedStringCollection(propertyName); 1386 dirNames.removeAll(dirNames2); 1387 if(dirNames.isEmpty()) 1388 LOG.warn("!!! WARNING !!!" + 1389 "\n\tThe NameNode currently runs without persistent storage." + 1390 "\n\tAny changes to the file system meta-data may be lost." + 1391 "\n\tRecommended actions:" + 1392 "\n\t\t- shutdown and restart NameNode with configured \"" 1393 + propertyName + "\" in hdfs-site.xml;" + 1394 "\n\t\t- use Backup Node as a persistent and up-to-date storage " + 1395 "of the file system meta-data."); 1396 } else if (dirNames.isEmpty()) { 1397 dirNames = Collections.singletonList( 1398 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_DEFAULT); 1399 } 1400 return Util.stringCollectionAsURIs(dirNames); 1401 } 1402 1403 /** 1404 * Return an ordered list of edits directories to write to. 1405 * The list is ordered such that all shared edits directories 1406 * are ordered before non-shared directories, and any duplicates 1407 * are removed. The order they are specified in the configuration 1408 * is retained. 1409 * @return Collection of shared edits directories. 1410 * @throws IOException if multiple shared edits directories are configured 1411 */ 1412 public static List<URI> getNamespaceEditsDirs(Configuration conf) 1413 throws IOException { 1414 return getNamespaceEditsDirs(conf, true); 1415 } 1416 1417 public static List<URI> getNamespaceEditsDirs(Configuration conf, 1418 boolean includeShared) 1419 throws IOException { 1420 // Use a LinkedHashSet so that order is maintained while we de-dup 1421 // the entries. 1422 LinkedHashSet<URI> editsDirs = new LinkedHashSet<URI>(); 1423 1424 if (includeShared) { 1425 List<URI> sharedDirs = getSharedEditsDirs(conf); 1426 1427 // Fail until multiple shared edits directories are supported (HDFS-2782) 1428 if (sharedDirs.size() > 1) { 1429 throw new IOException( 1430 "Multiple shared edits directories are not yet supported"); 1431 } 1432 1433 // First add the shared edits dirs. It's critical that the shared dirs 1434 // are added first, since JournalSet syncs them in the order they are listed, 1435 // and we need to make sure all edits are in place in the shared storage 1436 // before they are replicated locally. See HDFS-2874. 1437 for (URI dir : sharedDirs) { 1438 if (!editsDirs.add(dir)) { 1439 LOG.warn("Edits URI " + dir + " listed multiple times in " + 1440 DFS_NAMENODE_SHARED_EDITS_DIR_KEY + ". Ignoring duplicates."); 1441 } 1442 } 1443 } 1444 // Now add the non-shared dirs. 1445 for (URI dir : getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_KEY)) { 1446 if (!editsDirs.add(dir)) { 1447 LOG.warn("Edits URI " + dir + " listed multiple times in " + 1448 DFS_NAMENODE_SHARED_EDITS_DIR_KEY + " and " + 1449 DFS_NAMENODE_EDITS_DIR_KEY + ". Ignoring duplicates."); 1450 } 1451 } 1452 1453 if (editsDirs.isEmpty()) { 1454 // If this is the case, no edit dirs have been explicitly configured. 1455 // Image dirs are to be used for edits too. 1456 return Lists.newArrayList(getNamespaceDirs(conf)); 1457 } else { 1458 return Lists.newArrayList(editsDirs); 1459 } 1460 } 1461 1462 /** 1463 * Returns edit directories that are shared between primary and secondary. 1464 * @param conf configuration 1465 * @return collection of edit directories from {@code conf} 1466 */ 1467 public static List<URI> getSharedEditsDirs(Configuration conf) { 1468 // don't use getStorageDirs here, because we want an empty default 1469 // rather than the dir in /tmp 1470 Collection<String> dirNames = conf.getTrimmedStringCollection( 1471 DFS_NAMENODE_SHARED_EDITS_DIR_KEY); 1472 return Util.stringCollectionAsURIs(dirNames); 1473 } 1474 1475 @Override 1476 public void readLock() { 1477 this.fsLock.readLock().lock(); 1478 } 1479 @Override 1480 public void readUnlock() { 1481 this.fsLock.readLock().unlock(); 1482 } 1483 @Override 1484 public void writeLock() { 1485 this.fsLock.writeLock().lock(); 1486 } 1487 @Override 1488 public void writeLockInterruptibly() throws InterruptedException { 1489 this.fsLock.writeLock().lockInterruptibly(); 1490 } 1491 @Override 1492 public void writeUnlock() { 1493 this.fsLock.writeLock().unlock(); 1494 } 1495 @Override 1496 public boolean hasWriteLock() { 1497 return this.fsLock.isWriteLockedByCurrentThread(); 1498 } 1499 @Override 1500 public boolean hasReadLock() { 1501 return this.fsLock.getReadHoldCount() > 0 || hasWriteLock(); 1502 } 1503 1504 public int getReadHoldCount() { 1505 return this.fsLock.getReadHoldCount(); 1506 } 1507 1508 public int getWriteHoldCount() { 1509 return this.fsLock.getWriteHoldCount(); 1510 } 1511 1512 /** Lock the checkpoint lock */ 1513 public void cpLock() { 1514 this.cpLock.lock(); 1515 } 1516 1517 /** Lock the checkpoint lock interrupibly */ 1518 public void cpLockInterruptibly() throws InterruptedException { 1519 this.cpLock.lockInterruptibly(); 1520 } 1521 1522 /** Unlock the checkpoint lock */ 1523 public void cpUnlock() { 1524 this.cpLock.unlock(); 1525 } 1526 1527 1528 NamespaceInfo getNamespaceInfo() { 1529 readLock(); 1530 try { 1531 return unprotectedGetNamespaceInfo(); 1532 } finally { 1533 readUnlock(); 1534 } 1535 } 1536 1537 /** 1538 * Version of @see #getNamespaceInfo() that is not protected by a lock. 1539 */ 1540 NamespaceInfo unprotectedGetNamespaceInfo() { 1541 return new NamespaceInfo(getFSImage().getStorage().getNamespaceID(), 1542 getClusterId(), getBlockPoolId(), 1543 getFSImage().getStorage().getCTime()); 1544 } 1545 1546 /** 1547 * Close down this file system manager. 1548 * Causes heartbeat and lease daemons to stop; waits briefly for 1549 * them to finish, but a short timeout returns control back to caller. 1550 */ 1551 void close() { 1552 fsRunning = false; 1553 try { 1554 stopCommonServices(); 1555 if (smmthread != null) smmthread.interrupt(); 1556 } finally { 1557 // using finally to ensure we also wait for lease daemon 1558 try { 1559 stopActiveServices(); 1560 stopStandbyServices(); 1561 } catch (IOException ie) { 1562 } finally { 1563 IOUtils.cleanup(LOG, dir); 1564 IOUtils.cleanup(LOG, fsImage); 1565 } 1566 } 1567 } 1568 1569 @Override 1570 public boolean isRunning() { 1571 return fsRunning; 1572 } 1573 1574 @Override 1575 public boolean isInStandbyState() { 1576 if (haContext == null || haContext.getState() == null) { 1577 // We're still starting up. In this case, if HA is 1578 // on for the cluster, we always start in standby. Otherwise 1579 // start in active. 1580 return haEnabled; 1581 } 1582 1583 return HAServiceState.STANDBY == haContext.getState().getServiceState(); 1584 } 1585 1586 /** 1587 * Dump all metadata into specified file 1588 */ 1589 void metaSave(String filename) throws IOException { 1590 checkSuperuserPrivilege(); 1591 checkOperation(OperationCategory.UNCHECKED); 1592 writeLock(); 1593 try { 1594 checkOperation(OperationCategory.UNCHECKED); 1595 File file = new File(System.getProperty("hadoop.log.dir"), filename); 1596 PrintWriter out = new PrintWriter(new BufferedWriter( 1597 new OutputStreamWriter(new FileOutputStream(file), Charsets.UTF_8))); 1598 metaSave(out); 1599 out.flush(); 1600 out.close(); 1601 } finally { 1602 writeUnlock(); 1603 } 1604 } 1605 1606 private void metaSave(PrintWriter out) { 1607 assert hasWriteLock(); 1608 long totalInodes = this.dir.totalInodes(); 1609 long totalBlocks = this.getBlocksTotal(); 1610 out.println(totalInodes + " files and directories, " + totalBlocks 1611 + " blocks = " + (totalInodes + totalBlocks) + " total"); 1612 1613 blockManager.metaSave(out); 1614 } 1615 1616 private String metaSaveAsString() { 1617 StringWriter sw = new StringWriter(); 1618 PrintWriter pw = new PrintWriter(sw); 1619 metaSave(pw); 1620 pw.flush(); 1621 return sw.toString(); 1622 } 1623 1624 FsServerDefaults getServerDefaults() throws StandbyException { 1625 checkOperation(OperationCategory.READ); 1626 return serverDefaults; 1627 } 1628 1629 long getAccessTimePrecision() { 1630 return accessTimePrecision; 1631 } 1632 1633 private boolean isAccessTimeSupported() { 1634 return accessTimePrecision > 0; 1635 } 1636 1637 ///////////////////////////////////////////////////////// 1638 // 1639 // These methods are called by HadoopFS clients 1640 // 1641 ///////////////////////////////////////////////////////// 1642 /** 1643 * Set permissions for an existing file. 1644 * @throws IOException 1645 */ 1646 void setPermission(String src, FsPermission permission) throws IOException { 1647 HdfsFileStatus auditStat; 1648 checkOperation(OperationCategory.WRITE); 1649 writeLock(); 1650 try { 1651 checkOperation(OperationCategory.WRITE); 1652 checkNameNodeSafeMode("Cannot set permission for " + src); 1653 auditStat = FSDirAttrOp.setPermission(dir, src, permission); 1654 } catch (AccessControlException e) { 1655 logAuditEvent(false, "setPermission", src); 1656 throw e; 1657 } finally { 1658 writeUnlock(); 1659 } 1660 getEditLog().logSync(); 1661 logAuditEvent(true, "setPermission", src, null, auditStat); 1662 } 1663 1664 /** 1665 * Set owner for an existing file. 1666 * @throws IOException 1667 */ 1668 void setOwner(String src, String username, String group) 1669 throws IOException { 1670 HdfsFileStatus auditStat; 1671 checkOperation(OperationCategory.WRITE); 1672 writeLock(); 1673 try { 1674 checkOperation(OperationCategory.WRITE); 1675 checkNameNodeSafeMode("Cannot set owner for " + src); 1676 auditStat = FSDirAttrOp.setOwner(dir, src, username, group); 1677 } catch (AccessControlException e) { 1678 logAuditEvent(false, "setOwner", src); 1679 throw e; 1680 } finally { 1681 writeUnlock(); 1682 } 1683 getEditLog().logSync(); 1684 logAuditEvent(true, "setOwner", src, null, auditStat); 1685 } 1686 1687 static class GetBlockLocationsResult { 1688 final INodesInPath iip; 1689 final LocatedBlocks blocks; 1690 boolean updateAccessTime() { 1691 return iip != null; 1692 } 1693 private GetBlockLocationsResult(INodesInPath iip, LocatedBlocks blocks) { 1694 this.iip = iip; 1695 this.blocks = blocks; 1696 } 1697 } 1698 1699 /** 1700 * Get block locations within the specified range. 1701 * @see ClientProtocol#getBlockLocations(String, long, long) 1702 */ 1703 LocatedBlocks getBlockLocations(String clientMachine, String src, 1704 long offset, long length) throws IOException { 1705 checkOperation(OperationCategory.READ); 1706 GetBlockLocationsResult res = null; 1707 readLock(); 1708 try { 1709 checkOperation(OperationCategory.READ); 1710 res = getBlockLocations(src, offset, length, true, true); 1711 } catch (AccessControlException e) { 1712 logAuditEvent(false, "open", src); 1713 throw e; 1714 } finally { 1715 readUnlock(); 1716 } 1717 1718 logAuditEvent(true, "open", src); 1719 1720 if (res.updateAccessTime()) { 1721 writeLock(); 1722 final long now = now(); 1723 try { 1724 checkOperation(OperationCategory.WRITE); 1725 INode inode = res.iip.getLastINode(); 1726 boolean updateAccessTime = now > inode.getAccessTime() + 1727 getAccessTimePrecision(); 1728 if (!isInSafeMode() && updateAccessTime) { 1729 boolean changed = FSDirAttrOp.setTimes(dir, 1730 inode, -1, now, false, res.iip.getLatestSnapshotId()); 1731 if (changed) { 1732 getEditLog().logTimes(src, -1, now); 1733 } 1734 } 1735 } catch (Throwable e) { 1736 LOG.warn("Failed to update the access time of " + src, e); 1737 } finally { 1738 writeUnlock(); 1739 } 1740 } 1741 1742 LocatedBlocks blocks = res.blocks; 1743 if (blocks != null) { 1744 blockManager.getDatanodeManager().sortLocatedBlocks( 1745 clientMachine, blocks.getLocatedBlocks()); 1746 1747 // lastBlock is not part of getLocatedBlocks(), might need to sort it too 1748 LocatedBlock lastBlock = blocks.getLastLocatedBlock(); 1749 if (lastBlock != null) { 1750 ArrayList<LocatedBlock> lastBlockList = Lists.newArrayList(lastBlock); 1751 blockManager.getDatanodeManager().sortLocatedBlocks( 1752 clientMachine, lastBlockList); 1753 } 1754 } 1755 return blocks; 1756 } 1757 1758 /** 1759 * Get block locations within the specified range. 1760 * @see ClientProtocol#getBlockLocations(String, long, long) 1761 * @throws IOException 1762 */ 1763 GetBlockLocationsResult getBlockLocations( 1764 String src, long offset, long length, boolean needBlockToken, 1765 boolean checkSafeMode) throws IOException { 1766 if (offset < 0) { 1767 throw new HadoopIllegalArgumentException( 1768 "Negative offset is not supported. File: " + src); 1769 } 1770 if (length < 0) { 1771 throw new HadoopIllegalArgumentException( 1772 "Negative length is not supported. File: " + src); 1773 } 1774 final GetBlockLocationsResult ret = getBlockLocationsInt( 1775 src, offset, length, needBlockToken); 1776 1777 if (checkSafeMode && isInSafeMode()) { 1778 for (LocatedBlock b : ret.blocks.getLocatedBlocks()) { 1779 // if safemode & no block locations yet then throw safemodeException 1780 if ((b.getLocations() == null) || (b.getLocations().length == 0)) { 1781 SafeModeException se = new SafeModeException( 1782 "Zero blocklocations for " + src, safeMode); 1783 if (haEnabled && haContext != null && 1784 haContext.getState().getServiceState() == HAServiceState.ACTIVE) { 1785 throw new RetriableException(se); 1786 } else { 1787 throw se; 1788 } 1789 } 1790 } 1791 } 1792 return ret; 1793 } 1794 1795 private GetBlockLocationsResult getBlockLocationsInt( 1796 final String srcArg, long offset, long length, boolean needBlockToken) 1797 throws IOException { 1798 String src = srcArg; 1799 FSPermissionChecker pc = getPermissionChecker(); 1800 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 1801 src = dir.resolvePath(pc, src, pathComponents); 1802 final INodesInPath iip = dir.getINodesInPath(src, true); 1803 final INodeFile inode = INodeFile.valueOf(iip.getLastINode(), src); 1804 if (isPermissionEnabled) { 1805 dir.checkPathAccess(pc, iip, FsAction.READ); 1806 checkUnreadableBySuperuser(pc, inode, iip.getPathSnapshotId()); 1807 } 1808 1809 final long fileSize = iip.isSnapshot() 1810 ? inode.computeFileSize(iip.getPathSnapshotId()) 1811 : inode.computeFileSizeNotIncludingLastUcBlock(); 1812 boolean isUc = inode.isUnderConstruction(); 1813 if (iip.isSnapshot()) { 1814 // if src indicates a snapshot file, we need to make sure the returned 1815 // blocks do not exceed the size of the snapshot file. 1816 length = Math.min(length, fileSize - offset); 1817 isUc = false; 1818 } 1819 1820 final FileEncryptionInfo feInfo = 1821 FSDirectory.isReservedRawName(srcArg) ? null 1822 : dir.getFileEncryptionInfo(inode, iip.getPathSnapshotId(), iip); 1823 1824 final LocatedBlocks blocks = blockManager.createLocatedBlocks( 1825 inode.getBlocks(iip.getPathSnapshotId()), fileSize, 1826 isUc, offset, length, needBlockToken, iip.isSnapshot(), feInfo); 1827 1828 // Set caching information for the located blocks. 1829 for (LocatedBlock lb : blocks.getLocatedBlocks()) { 1830 cacheManager.setCachedLocations(lb); 1831 } 1832 1833 final long now = now(); 1834 boolean updateAccessTime = isAccessTimeSupported() && !isInSafeMode() 1835 && !iip.isSnapshot() 1836 && now > inode.getAccessTime() + getAccessTimePrecision(); 1837 return new GetBlockLocationsResult(updateAccessTime ? iip : null, blocks); 1838 } 1839 1840 /** 1841 * Moves all the blocks from {@code srcs} and appends them to {@code target} 1842 * To avoid rollbacks we will verify validity of ALL of the args 1843 * before we start actual move. 1844 * 1845 * This does not support ".inodes" relative path 1846 * @param target target to concat into 1847 * @param srcs file that will be concatenated 1848 * @throws IOException on error 1849 */ 1850 void concat(String target, String [] srcs, boolean logRetryCache) 1851 throws IOException { 1852 checkOperation(OperationCategory.WRITE); 1853 waitForLoadingFSImage(); 1854 HdfsFileStatus stat = null; 1855 boolean success = false; 1856 writeLock(); 1857 try { 1858 checkOperation(OperationCategory.WRITE); 1859 checkNameNodeSafeMode("Cannot concat " + target); 1860 stat = FSDirConcatOp.concat(dir, target, srcs, logRetryCache); 1861 success = true; 1862 } finally { 1863 writeUnlock(); 1864 if (success) { 1865 getEditLog().logSync(); 1866 } 1867 logAuditEvent(success, "concat", Arrays.toString(srcs), target, stat); 1868 } 1869 } 1870 1871 /** 1872 * stores the modification and access time for this inode. 1873 * The access time is precise up to an hour. The transaction, if needed, is 1874 * written to the edits log but is not flushed. 1875 */ 1876 void setTimes(String src, long mtime, long atime) throws IOException { 1877 HdfsFileStatus auditStat; 1878 checkOperation(OperationCategory.WRITE); 1879 writeLock(); 1880 try { 1881 checkOperation(OperationCategory.WRITE); 1882 checkNameNodeSafeMode("Cannot set times " + src); 1883 auditStat = FSDirAttrOp.setTimes(dir, src, mtime, atime); 1884 } catch (AccessControlException e) { 1885 logAuditEvent(false, "setTimes", src); 1886 throw e; 1887 } finally { 1888 writeUnlock(); 1889 } 1890 getEditLog().logSync(); 1891 logAuditEvent(true, "setTimes", src, null, auditStat); 1892 } 1893 1894 /** 1895 * Create a symbolic link. 1896 */ 1897 @SuppressWarnings("deprecation") 1898 void createSymlink(String target, String link, 1899 PermissionStatus dirPerms, boolean createParent, boolean logRetryCache) 1900 throws IOException { 1901 if (!FileSystem.areSymlinksEnabled()) { 1902 throw new UnsupportedOperationException("Symlinks not supported"); 1903 } 1904 HdfsFileStatus auditStat = null; 1905 checkOperation(OperationCategory.WRITE); 1906 writeLock(); 1907 try { 1908 checkOperation(OperationCategory.WRITE); 1909 checkNameNodeSafeMode("Cannot create symlink " + link); 1910 auditStat = FSDirSymlinkOp.createSymlinkInt(this, target, link, dirPerms, 1911 createParent, logRetryCache); 1912 } catch (AccessControlException e) { 1913 logAuditEvent(false, "createSymlink", link, target, null); 1914 throw e; 1915 } finally { 1916 writeUnlock(); 1917 } 1918 getEditLog().logSync(); 1919 logAuditEvent(true, "createSymlink", link, target, auditStat); 1920 } 1921 1922 /** 1923 * Set replication for an existing file. 1924 * 1925 * The NameNode sets new replication and schedules either replication of 1926 * under-replicated data blocks or removal of the excessive block copies 1927 * if the blocks are over-replicated. 1928 * 1929 * @see ClientProtocol#setReplication(String, short) 1930 * @param src file name 1931 * @param replication new replication 1932 * @return true if successful; 1933 * false if file does not exist or is a directory 1934 */ 1935 boolean setReplication(final String src, final short replication) 1936 throws IOException { 1937 boolean success = false; 1938 waitForLoadingFSImage(); 1939 checkOperation(OperationCategory.WRITE); 1940 writeLock(); 1941 try { 1942 checkOperation(OperationCategory.WRITE); 1943 checkNameNodeSafeMode("Cannot set replication for " + src); 1944 success = FSDirAttrOp.setReplication(dir, blockManager, src, replication); 1945 } catch (AccessControlException e) { 1946 logAuditEvent(false, "setReplication", src); 1947 throw e; 1948 } finally { 1949 writeUnlock(); 1950 } 1951 if (success) { 1952 getEditLog().logSync(); 1953 logAuditEvent(true, "setReplication", src); 1954 } 1955 return success; 1956 } 1957 1958 /** 1959 * Truncate file to a lower length. 1960 * Truncate cannot be reverted / recovered from as it causes data loss. 1961 * Truncation at block boundary is atomic, otherwise it requires 1962 * block recovery to truncate the last block of the file. 1963 * 1964 * @return true if client does not need to wait for block recovery, 1965 * false if client needs to wait for block recovery. 1966 */ 1967 boolean truncate(String src, long newLength, 1968 String clientName, String clientMachine, 1969 long mtime) 1970 throws IOException, UnresolvedLinkException { 1971 boolean ret; 1972 try { 1973 ret = truncateInt(src, newLength, clientName, clientMachine, mtime); 1974 } catch (AccessControlException e) { 1975 logAuditEvent(false, "truncate", src); 1976 throw e; 1977 } 1978 return ret; 1979 } 1980 1981 boolean truncateInt(String srcArg, long newLength, 1982 String clientName, String clientMachine, 1983 long mtime) 1984 throws IOException, UnresolvedLinkException { 1985 String src = srcArg; 1986 if (NameNode.stateChangeLog.isDebugEnabled()) { 1987 NameNode.stateChangeLog.debug("DIR* NameSystem.truncate: src=" 1988 + src + " newLength=" + newLength); 1989 } 1990 if (newLength < 0) { 1991 throw new HadoopIllegalArgumentException( 1992 "Cannot truncate to a negative file size: " + newLength + "."); 1993 } 1994 HdfsFileStatus stat = null; 1995 FSPermissionChecker pc = getPermissionChecker(); 1996 checkOperation(OperationCategory.WRITE); 1997 boolean res; 1998 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 1999 writeLock(); 2000 BlocksMapUpdateInfo toRemoveBlocks = new BlocksMapUpdateInfo(); 2001 try { 2002 checkOperation(OperationCategory.WRITE); 2003 checkNameNodeSafeMode("Cannot truncate for " + src); 2004 src = dir.resolvePath(pc, src, pathComponents); 2005 res = truncateInternal(src, newLength, clientName, 2006 clientMachine, mtime, pc, toRemoveBlocks); 2007 stat = dir.getAuditFileInfo(dir.getINodesInPath4Write(src, false)); 2008 } finally { 2009 writeUnlock(); 2010 } 2011 getEditLog().logSync(); 2012 if (!toRemoveBlocks.getToDeleteList().isEmpty()) { 2013 removeBlocks(toRemoveBlocks); 2014 toRemoveBlocks.clear(); 2015 } 2016 logAuditEvent(true, "truncate", src, null, stat); 2017 return res; 2018 } 2019 2020 /** 2021 * Truncate a file to a given size 2022 * Update the count at each ancestor directory with quota 2023 */ 2024 boolean truncateInternal(String src, long newLength, 2025 String clientName, String clientMachine, 2026 long mtime, FSPermissionChecker pc, 2027 BlocksMapUpdateInfo toRemoveBlocks) 2028 throws IOException, UnresolvedLinkException { 2029 assert hasWriteLock(); 2030 INodesInPath iip = dir.getINodesInPath4Write(src, true); 2031 if (isPermissionEnabled) { 2032 dir.checkPathAccess(pc, iip, FsAction.WRITE); 2033 } 2034 INodeFile file = INodeFile.valueOf(iip.getLastINode(), src); 2035 final BlockStoragePolicy lpPolicy = 2036 blockManager.getStoragePolicy("LAZY_PERSIST"); 2037 2038 if (lpPolicy != null && 2039 lpPolicy.getId() == file.getStoragePolicyID()) { 2040 throw new UnsupportedOperationException( 2041 "Cannot truncate lazy persist file " + src); 2042 } 2043 2044 // Check if the file is already being truncated with the same length 2045 final BlockInfoContiguous last = file.getLastBlock(); 2046 if (last != null && last.getBlockUCState() == BlockUCState.UNDER_RECOVERY) { 2047 final Block truncateBlock 2048 = ((BlockInfoContiguousUnderConstruction)last).getTruncateBlock(); 2049 if (truncateBlock != null) { 2050 final long truncateLength = file.computeFileSize(false, false) 2051 + truncateBlock.getNumBytes(); 2052 if (newLength == truncateLength) { 2053 return false; 2054 } 2055 } 2056 } 2057 2058 // Opening an existing file for truncate. May need lease recovery. 2059 recoverLeaseInternal(RecoverLeaseOp.TRUNCATE_FILE, 2060 iip, src, clientName, clientMachine, false); 2061 // Truncate length check. 2062 long oldLength = file.computeFileSize(); 2063 if(oldLength == newLength) { 2064 return true; 2065 } 2066 if(oldLength < newLength) { 2067 throw new HadoopIllegalArgumentException( 2068 "Cannot truncate to a larger file size. Current size: " + oldLength + 2069 ", truncate size: " + newLength + "."); 2070 } 2071 // Perform INodeFile truncation. 2072 final QuotaCounts delta = new QuotaCounts.Builder().build(); 2073 boolean onBlockBoundary = dir.truncate(iip, newLength, toRemoveBlocks, 2074 mtime, delta); 2075 Block truncateBlock = null; 2076 if(!onBlockBoundary) { 2077 // Open file for write, but don't log into edits 2078 long lastBlockDelta = file.computeFileSize() - newLength; 2079 assert lastBlockDelta > 0 : "delta is 0 only if on block bounday"; 2080 truncateBlock = prepareFileForTruncate(iip, clientName, clientMachine, 2081 lastBlockDelta, null); 2082 } 2083 2084 // update the quota: use the preferred block size for UC block 2085 dir.writeLock(); 2086 try { 2087 dir.updateCountNoQuotaCheck(iip, iip.length() - 1, delta); 2088 } finally { 2089 dir.writeUnlock(); 2090 } 2091 2092 getEditLog().logTruncate(src, clientName, clientMachine, newLength, mtime, 2093 truncateBlock); 2094 return onBlockBoundary; 2095 } 2096 2097 /** 2098 * Convert current INode to UnderConstruction. 2099 * Recreate lease. 2100 * Create new block for the truncated copy. 2101 * Schedule truncation of the replicas. 2102 * 2103 * @return the returned block will be written to editLog and passed back into 2104 * this method upon loading. 2105 */ 2106 Block prepareFileForTruncate(INodesInPath iip, 2107 String leaseHolder, 2108 String clientMachine, 2109 long lastBlockDelta, 2110 Block newBlock) 2111 throws IOException { 2112 INodeFile file = iip.getLastINode().asFile(); 2113 String src = iip.getPath(); 2114 file.recordModification(iip.getLatestSnapshotId()); 2115 file.toUnderConstruction(leaseHolder, clientMachine); 2116 assert file.isUnderConstruction() : "inode should be under construction."; 2117 leaseManager.addLease( 2118 file.getFileUnderConstructionFeature().getClientName(), src); 2119 boolean shouldRecoverNow = (newBlock == null); 2120 BlockInfoContiguous oldBlock = file.getLastBlock(); 2121 boolean shouldCopyOnTruncate = shouldCopyOnTruncate(file, oldBlock); 2122 if(newBlock == null) { 2123 newBlock = (shouldCopyOnTruncate) ? createNewBlock() : 2124 new Block(oldBlock.getBlockId(), oldBlock.getNumBytes(), 2125 nextGenerationStamp(blockIdManager.isLegacyBlock(oldBlock))); 2126 } 2127 2128 BlockInfoContiguousUnderConstruction truncatedBlockUC; 2129 if(shouldCopyOnTruncate) { 2130 // Add new truncateBlock into blocksMap and 2131 // use oldBlock as a source for copy-on-truncate recovery 2132 truncatedBlockUC = new BlockInfoContiguousUnderConstruction(newBlock, 2133 file.getBlockReplication()); 2134 truncatedBlockUC.setNumBytes(oldBlock.getNumBytes() - lastBlockDelta); 2135 truncatedBlockUC.setTruncateBlock(oldBlock); 2136 file.setLastBlock(truncatedBlockUC, blockManager.getStorages(oldBlock)); 2137 getBlockManager().addBlockCollection(truncatedBlockUC, file); 2138 2139 NameNode.stateChangeLog.info("BLOCK* prepareFileForTruncate: " 2140 + "Scheduling copy-on-truncate to new size " 2141 + truncatedBlockUC.getNumBytes() + " new block " + newBlock 2142 + " old block " + truncatedBlockUC.getTruncateBlock()); 2143 } else { 2144 // Use new generation stamp for in-place truncate recovery 2145 blockManager.convertLastBlockToUnderConstruction(file, lastBlockDelta); 2146 oldBlock = file.getLastBlock(); 2147 assert !oldBlock.isComplete() : "oldBlock should be under construction"; 2148 truncatedBlockUC = (BlockInfoContiguousUnderConstruction) oldBlock; 2149 truncatedBlockUC.setTruncateBlock(new Block(oldBlock)); 2150 truncatedBlockUC.getTruncateBlock().setNumBytes( 2151 oldBlock.getNumBytes() - lastBlockDelta); 2152 truncatedBlockUC.getTruncateBlock().setGenerationStamp( 2153 newBlock.getGenerationStamp()); 2154 2155 NameNode.stateChangeLog.debug("BLOCK* prepareFileForTruncate: " 2156 + "Scheduling in-place block truncate to new size " 2157 + truncatedBlockUC.getTruncateBlock().getNumBytes() 2158 + " block=" + truncatedBlockUC); 2159 } 2160 if (shouldRecoverNow) { 2161 truncatedBlockUC.initializeBlockRecovery(newBlock.getGenerationStamp()); 2162 } 2163 2164 return newBlock; 2165 } 2166 2167 /** 2168 * Defines if a replica needs to be copied on truncate or 2169 * can be truncated in place. 2170 */ 2171 boolean shouldCopyOnTruncate(INodeFile file, BlockInfoContiguous blk) { 2172 if(!isUpgradeFinalized()) { 2173 return true; 2174 } 2175 if (isRollingUpgrade()) { 2176 return true; 2177 } 2178 return file.isBlockInLatestSnapshot(blk); 2179 } 2180 2181 /** 2182 * Set the storage policy for a file or a directory. 2183 * 2184 * @param src file/directory path 2185 * @param policyName storage policy name 2186 */ 2187 void setStoragePolicy(String src, String policyName) throws IOException { 2188 HdfsFileStatus auditStat; 2189 waitForLoadingFSImage(); 2190 checkOperation(OperationCategory.WRITE); 2191 writeLock(); 2192 try { 2193 checkOperation(OperationCategory.WRITE); 2194 checkNameNodeSafeMode("Cannot set storage policy for " + src); 2195 auditStat = FSDirAttrOp.setStoragePolicy( 2196 dir, blockManager, src, policyName); 2197 } catch (AccessControlException e) { 2198 logAuditEvent(false, "setStoragePolicy", src); 2199 throw e; 2200 } finally { 2201 writeUnlock(); 2202 } 2203 getEditLog().logSync(); 2204 logAuditEvent(true, "setStoragePolicy", src, null, auditStat); 2205 } 2206 2207 /** 2208 * @return All the existing block storage policies 2209 */ 2210 BlockStoragePolicy[] getStoragePolicies() throws IOException { 2211 checkOperation(OperationCategory.READ); 2212 waitForLoadingFSImage(); 2213 readLock(); 2214 try { 2215 checkOperation(OperationCategory.READ); 2216 return FSDirAttrOp.getStoragePolicies(blockManager); 2217 } finally { 2218 readUnlock(); 2219 } 2220 } 2221 2222 long getPreferredBlockSize(String src) throws IOException { 2223 checkOperation(OperationCategory.READ); 2224 readLock(); 2225 try { 2226 checkOperation(OperationCategory.READ); 2227 return FSDirAttrOp.getPreferredBlockSize(dir, src); 2228 } finally { 2229 readUnlock(); 2230 } 2231 } 2232 2233 /** 2234 * If the file is within an encryption zone, select the appropriate 2235 * CryptoProtocolVersion from the list provided by the client. Since the 2236 * client may be newer, we need to handle unknown versions. 2237 * 2238 * @param zone EncryptionZone of the file 2239 * @param supportedVersions List of supported protocol versions 2240 * @return chosen protocol version 2241 * @throws IOException 2242 */ 2243 private CryptoProtocolVersion chooseProtocolVersion(EncryptionZone zone, 2244 CryptoProtocolVersion[] supportedVersions) 2245 throws UnknownCryptoProtocolVersionException, UnresolvedLinkException, 2246 SnapshotAccessControlException { 2247 Preconditions.checkNotNull(zone); 2248 Preconditions.checkNotNull(supportedVersions); 2249 // Right now, we only support a single protocol version, 2250 // so simply look for it in the list of provided options 2251 final CryptoProtocolVersion required = zone.getVersion(); 2252 2253 for (CryptoProtocolVersion c : supportedVersions) { 2254 if (c.equals(CryptoProtocolVersion.UNKNOWN)) { 2255 if (LOG.isDebugEnabled()) { 2256 LOG.debug("Ignoring unknown CryptoProtocolVersion provided by " + 2257 "client: " + c.getUnknownValue()); 2258 } 2259 continue; 2260 } 2261 if (c.equals(required)) { 2262 return c; 2263 } 2264 } 2265 throw new UnknownCryptoProtocolVersionException( 2266 "No crypto protocol versions provided by the client are supported." 2267 + " Client provided: " + Arrays.toString(supportedVersions) 2268 + " NameNode supports: " + Arrays.toString(CryptoProtocolVersion 2269 .values())); 2270 } 2271 2272 /** 2273 * Invoke KeyProvider APIs to generate an encrypted data encryption key for an 2274 * encryption zone. Should not be called with any locks held. 2275 * 2276 * @param ezKeyName key name of an encryption zone 2277 * @return New EDEK, or null if ezKeyName is null 2278 * @throws IOException 2279 */ 2280 private EncryptedKeyVersion generateEncryptedDataEncryptionKey(String 2281 ezKeyName) throws IOException { 2282 if (ezKeyName == null) { 2283 return null; 2284 } 2285 EncryptedKeyVersion edek = null; 2286 try { 2287 edek = provider.generateEncryptedKey(ezKeyName); 2288 } catch (GeneralSecurityException e) { 2289 throw new IOException(e); 2290 } 2291 Preconditions.checkNotNull(edek); 2292 return edek; 2293 } 2294 2295 /** 2296 * Create a new file entry in the namespace. 2297 * 2298 * For description of parameters and exceptions thrown see 2299 * {@link ClientProtocol#create}, except it returns valid file status upon 2300 * success 2301 */ 2302 HdfsFileStatus startFile(String src, PermissionStatus permissions, 2303 String holder, String clientMachine, EnumSet<CreateFlag> flag, 2304 boolean createParent, short replication, long blockSize, 2305 CryptoProtocolVersion[] supportedVersions, boolean logRetryCache) 2306 throws AccessControlException, SafeModeException, 2307 FileAlreadyExistsException, UnresolvedLinkException, 2308 FileNotFoundException, ParentNotDirectoryException, IOException { 2309 2310 HdfsFileStatus status = null; 2311 try { 2312 status = startFileInt(src, permissions, holder, clientMachine, flag, 2313 createParent, replication, blockSize, supportedVersions, 2314 logRetryCache); 2315 } catch (AccessControlException e) { 2316 logAuditEvent(false, "create", src); 2317 throw e; 2318 } 2319 return status; 2320 } 2321 2322 private HdfsFileStatus startFileInt(final String srcArg, 2323 PermissionStatus permissions, String holder, String clientMachine, 2324 EnumSet<CreateFlag> flag, boolean createParent, short replication, 2325 long blockSize, CryptoProtocolVersion[] supportedVersions, 2326 boolean logRetryCache) 2327 throws AccessControlException, SafeModeException, 2328 FileAlreadyExistsException, UnresolvedLinkException, 2329 FileNotFoundException, ParentNotDirectoryException, IOException { 2330 String src = srcArg; 2331 if (NameNode.stateChangeLog.isDebugEnabled()) { 2332 StringBuilder builder = new StringBuilder(); 2333 builder.append("DIR* NameSystem.startFile: src=" + src 2334 + ", holder=" + holder 2335 + ", clientMachine=" + clientMachine 2336 + ", createParent=" + createParent 2337 + ", replication=" + replication 2338 + ", createFlag=" + flag.toString() 2339 + ", blockSize=" + blockSize); 2340 builder.append(", supportedVersions="); 2341 if (supportedVersions != null) { 2342 builder.append(Arrays.toString(supportedVersions)); 2343 } else { 2344 builder.append("null"); 2345 } 2346 NameNode.stateChangeLog.debug(builder.toString()); 2347 } 2348 if (!DFSUtil.isValidName(src)) { 2349 throw new InvalidPathException(src); 2350 } 2351 blockManager.verifyReplication(src, replication, clientMachine); 2352 2353 boolean skipSync = false; 2354 HdfsFileStatus stat = null; 2355 FSPermissionChecker pc = getPermissionChecker(); 2356 checkOperation(OperationCategory.WRITE); 2357 if (blockSize < minBlockSize) { 2358 throw new IOException("Specified block size is less than configured" + 2359 " minimum value (" + DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY 2360 + "): " + blockSize + " < " + minBlockSize); 2361 } 2362 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 2363 boolean create = flag.contains(CreateFlag.CREATE); 2364 boolean overwrite = flag.contains(CreateFlag.OVERWRITE); 2365 boolean isLazyPersist = flag.contains(CreateFlag.LAZY_PERSIST); 2366 2367 waitForLoadingFSImage(); 2368 2369 /** 2370 * If the file is in an encryption zone, we optimistically create an 2371 * EDEK for the file by calling out to the configured KeyProvider. 2372 * Since this typically involves doing an RPC, we take the readLock 2373 * initially, then drop it to do the RPC. 2374 * 2375 * Since the path can flip-flop between being in an encryption zone and not 2376 * in the meantime, we need to recheck the preconditions when we retake the 2377 * lock to do the create. If the preconditions are not met, we throw a 2378 * special RetryStartFileException to ask the DFSClient to try the create 2379 * again later. 2380 */ 2381 CryptoProtocolVersion protocolVersion = null; 2382 CipherSuite suite = null; 2383 String ezKeyName = null; 2384 EncryptedKeyVersion edek = null; 2385 2386 if (provider != null) { 2387 readLock(); 2388 try { 2389 src = dir.resolvePath(pc, src, pathComponents); 2390 INodesInPath iip = dir.getINodesInPath4Write(src); 2391 // Nothing to do if the path is not within an EZ 2392 final EncryptionZone zone = dir.getEZForPath(iip); 2393 if (zone != null) { 2394 protocolVersion = chooseProtocolVersion(zone, supportedVersions); 2395 suite = zone.getSuite(); 2396 ezKeyName = zone.getKeyName(); 2397 2398 Preconditions.checkNotNull(protocolVersion); 2399 Preconditions.checkNotNull(suite); 2400 Preconditions.checkArgument(!suite.equals(CipherSuite.UNKNOWN), 2401 "Chose an UNKNOWN CipherSuite!"); 2402 Preconditions.checkNotNull(ezKeyName); 2403 } 2404 } finally { 2405 readUnlock(); 2406 } 2407 2408 Preconditions.checkState( 2409 (suite == null && ezKeyName == null) || 2410 (suite != null && ezKeyName != null), 2411 "Both suite and ezKeyName should both be null or not null"); 2412 2413 // Generate EDEK if necessary while not holding the lock 2414 edek = generateEncryptedDataEncryptionKey(ezKeyName); 2415 EncryptionFaultInjector.getInstance().startFileAfterGenerateKey(); 2416 } 2417 2418 // Proceed with the create, using the computed cipher suite and 2419 // generated EDEK 2420 BlocksMapUpdateInfo toRemoveBlocks = null; 2421 writeLock(); 2422 try { 2423 checkOperation(OperationCategory.WRITE); 2424 checkNameNodeSafeMode("Cannot create file" + src); 2425 dir.writeLock(); 2426 try { 2427 src = dir.resolvePath(pc, src, pathComponents); 2428 final INodesInPath iip = dir.getINodesInPath4Write(src); 2429 toRemoveBlocks = startFileInternal( 2430 pc, iip, permissions, holder, 2431 clientMachine, create, overwrite, 2432 createParent, replication, blockSize, 2433 isLazyPersist, suite, protocolVersion, edek, 2434 logRetryCache); 2435 stat = FSDirStatAndListingOp.getFileInfo( 2436 dir, src, false, FSDirectory.isReservedRawName(srcArg), true); 2437 } finally { 2438 dir.writeUnlock(); 2439 } 2440 } catch (StandbyException se) { 2441 skipSync = true; 2442 throw se; 2443 } finally { 2444 writeUnlock(); 2445 // There might be transactions logged while trying to recover the lease. 2446 // They need to be sync'ed even when an exception was thrown. 2447 if (!skipSync) { 2448 getEditLog().logSync(); 2449 if (toRemoveBlocks != null) { 2450 removeBlocks(toRemoveBlocks); 2451 toRemoveBlocks.clear(); 2452 } 2453 } 2454 } 2455 2456 logAuditEvent(true, "create", srcArg, null, stat); 2457 return stat; 2458 } 2459 2460 /** 2461 * Create a new file or overwrite an existing file<br> 2462 * 2463 * Once the file is create the client then allocates a new block with the next 2464 * call using {@link ClientProtocol#addBlock}. 2465 * <p> 2466 * For description of parameters and exceptions thrown see 2467 * {@link ClientProtocol#create} 2468 */ 2469 private BlocksMapUpdateInfo startFileInternal(FSPermissionChecker pc, 2470 INodesInPath iip, PermissionStatus permissions, String holder, 2471 String clientMachine, boolean create, boolean overwrite, 2472 boolean createParent, short replication, long blockSize, 2473 boolean isLazyPersist, CipherSuite suite, CryptoProtocolVersion version, 2474 EncryptedKeyVersion edek, boolean logRetryEntry) 2475 throws IOException { 2476 assert hasWriteLock(); 2477 // Verify that the destination does not exist as a directory already. 2478 final INode inode = iip.getLastINode(); 2479 final String src = iip.getPath(); 2480 if (inode != null && inode.isDirectory()) { 2481 throw new FileAlreadyExistsException(src + 2482 " already exists as a directory"); 2483 } 2484 2485 final INodeFile myFile = INodeFile.valueOf(inode, src, true); 2486 if (isPermissionEnabled) { 2487 if (overwrite && myFile != null) { 2488 dir.checkPathAccess(pc, iip, FsAction.WRITE); 2489 } 2490 /* 2491 * To overwrite existing file, need to check 'w' permission 2492 * of parent (equals to ancestor in this case) 2493 */ 2494 dir.checkAncestorAccess(pc, iip, FsAction.WRITE); 2495 } 2496 if (!createParent) { 2497 dir.verifyParentDir(iip, src); 2498 } 2499 2500 FileEncryptionInfo feInfo = null; 2501 2502 final EncryptionZone zone = dir.getEZForPath(iip); 2503 if (zone != null) { 2504 // The path is now within an EZ, but we're missing encryption parameters 2505 if (suite == null || edek == null) { 2506 throw new RetryStartFileException(); 2507 } 2508 // Path is within an EZ and we have provided encryption parameters. 2509 // Make sure that the generated EDEK matches the settings of the EZ. 2510 final String ezKeyName = zone.getKeyName(); 2511 if (!ezKeyName.equals(edek.getEncryptionKeyName())) { 2512 throw new RetryStartFileException(); 2513 } 2514 feInfo = new FileEncryptionInfo(suite, version, 2515 edek.getEncryptedKeyVersion().getMaterial(), 2516 edek.getEncryptedKeyIv(), 2517 ezKeyName, edek.getEncryptionKeyVersionName()); 2518 } 2519 2520 try { 2521 BlocksMapUpdateInfo toRemoveBlocks = null; 2522 if (myFile == null) { 2523 if (!create) { 2524 throw new FileNotFoundException("Can't overwrite non-existent " + 2525 src + " for client " + clientMachine); 2526 } 2527 } else { 2528 if (overwrite) { 2529 toRemoveBlocks = new BlocksMapUpdateInfo(); 2530 List<INode> toRemoveINodes = new ChunkedArrayList<INode>(); 2531 long ret = FSDirDeleteOp.delete(dir, iip, toRemoveBlocks, 2532 toRemoveINodes, now()); 2533 if (ret >= 0) { 2534 iip = INodesInPath.replace(iip, iip.length() - 1, null); 2535 FSDirDeleteOp.incrDeletedFileCount(ret); 2536 removeLeasesAndINodes(src, toRemoveINodes, true); 2537 } 2538 } else { 2539 // If lease soft limit time is expired, recover the lease 2540 recoverLeaseInternal(RecoverLeaseOp.CREATE_FILE, 2541 iip, src, holder, clientMachine, false); 2542 throw new FileAlreadyExistsException(src + " for client " + 2543 clientMachine + " already exists"); 2544 } 2545 } 2546 2547 checkFsObjectLimit(); 2548 INodeFile newNode = null; 2549 2550 // Always do an implicit mkdirs for parent directory tree. 2551 Map.Entry<INodesInPath, String> parent = FSDirMkdirOp 2552 .createAncestorDirectories(dir, iip, permissions); 2553 if (parent != null) { 2554 iip = dir.addFile(parent.getKey(), parent.getValue(), permissions, 2555 replication, blockSize, holder, clientMachine); 2556 newNode = iip != null ? iip.getLastINode().asFile() : null; 2557 } 2558 2559 if (newNode == null) { 2560 throw new IOException("Unable to add " + src + " to namespace"); 2561 } 2562 leaseManager.addLease(newNode.getFileUnderConstructionFeature() 2563 .getClientName(), src); 2564 2565 // Set encryption attributes if necessary 2566 if (feInfo != null) { 2567 dir.setFileEncryptionInfo(src, feInfo); 2568 newNode = dir.getInode(newNode.getId()).asFile(); 2569 } 2570 2571 setNewINodeStoragePolicy(newNode, iip, isLazyPersist); 2572 2573 // record file record in log, record new generation stamp 2574 getEditLog().logOpenFile(src, newNode, overwrite, logRetryEntry); 2575 if (NameNode.stateChangeLog.isDebugEnabled()) { 2576 NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: added " + 2577 src + " inode " + newNode.getId() + " " + holder); 2578 } 2579 return toRemoveBlocks; 2580 } catch (IOException ie) { 2581 NameNode.stateChangeLog.warn("DIR* NameSystem.startFile: " + src + " " + 2582 ie.getMessage()); 2583 throw ie; 2584 } 2585 } 2586 2587 private void setNewINodeStoragePolicy(INodeFile inode, 2588 INodesInPath iip, 2589 boolean isLazyPersist) 2590 throws IOException { 2591 2592 if (isLazyPersist) { 2593 BlockStoragePolicy lpPolicy = 2594 blockManager.getStoragePolicy("LAZY_PERSIST"); 2595 2596 // Set LAZY_PERSIST storage policy if the flag was passed to 2597 // CreateFile. 2598 if (lpPolicy == null) { 2599 throw new HadoopIllegalArgumentException( 2600 "The LAZY_PERSIST storage policy has been disabled " + 2601 "by the administrator."); 2602 } 2603 inode.setStoragePolicyID(lpPolicy.getId(), 2604 iip.getLatestSnapshotId()); 2605 } else { 2606 BlockStoragePolicy effectivePolicy = 2607 blockManager.getStoragePolicy(inode.getStoragePolicyID()); 2608 2609 if (effectivePolicy != null && 2610 effectivePolicy.isCopyOnCreateFile()) { 2611 // Copy effective policy from ancestor directory to current file. 2612 inode.setStoragePolicyID(effectivePolicy.getId(), 2613 iip.getLatestSnapshotId()); 2614 } 2615 } 2616 } 2617 2618 /** 2619 * Append to an existing file for append. 2620 * <p> 2621 * 2622 * The method returns the last block of the file if this is a partial block, 2623 * which can still be used for writing more data. The client uses the returned 2624 * block locations to form the data pipeline for this block.<br> 2625 * The method returns null if the last block is full. The client then 2626 * allocates a new block with the next call using 2627 * {@link ClientProtocol#addBlock}. 2628 * <p> 2629 * 2630 * For description of parameters and exceptions thrown see 2631 * {@link ClientProtocol#append(String, String, EnumSetWritable)} 2632 * 2633 * @return the last block locations if the block is partial or null otherwise 2634 */ 2635 private LocatedBlock appendFileInternal(FSPermissionChecker pc, 2636 INodesInPath iip, String holder, String clientMachine, boolean newBlock, 2637 boolean logRetryCache) throws IOException { 2638 assert hasWriteLock(); 2639 // Verify that the destination does not exist as a directory already. 2640 final INode inode = iip.getLastINode(); 2641 final String src = iip.getPath(); 2642 if (inode != null && inode.isDirectory()) { 2643 throw new FileAlreadyExistsException("Cannot append to directory " + src 2644 + "; already exists as a directory."); 2645 } 2646 if (isPermissionEnabled) { 2647 dir.checkPathAccess(pc, iip, FsAction.WRITE); 2648 } 2649 2650 try { 2651 if (inode == null) { 2652 throw new FileNotFoundException("failed to append to non-existent file " 2653 + src + " for client " + clientMachine); 2654 } 2655 INodeFile myFile = INodeFile.valueOf(inode, src, true); 2656 final BlockStoragePolicy lpPolicy = 2657 blockManager.getStoragePolicy("LAZY_PERSIST"); 2658 if (lpPolicy != null && 2659 lpPolicy.getId() == myFile.getStoragePolicyID()) { 2660 throw new UnsupportedOperationException( 2661 "Cannot append to lazy persist file " + src); 2662 } 2663 // Opening an existing file for append - may need to recover lease. 2664 recoverLeaseInternal(RecoverLeaseOp.APPEND_FILE, 2665 iip, src, holder, clientMachine, false); 2666 2667 final BlockInfoContiguous lastBlock = myFile.getLastBlock(); 2668 // Check that the block has at least minimum replication. 2669 if(lastBlock != null && lastBlock.isComplete() && 2670 !getBlockManager().isSufficientlyReplicated(lastBlock)) { 2671 throw new IOException("append: lastBlock=" + lastBlock + 2672 " of src=" + src + " is not sufficiently replicated yet."); 2673 } 2674 return prepareFileForAppend(src, iip, holder, clientMachine, newBlock, 2675 true, logRetryCache); 2676 } catch (IOException ie) { 2677 NameNode.stateChangeLog.warn("DIR* NameSystem.append: " +ie.getMessage()); 2678 throw ie; 2679 } 2680 } 2681 2682 /** 2683 * Convert current node to under construction. 2684 * Recreate in-memory lease record. 2685 * 2686 * @param src path to the file 2687 * @param leaseHolder identifier of the lease holder on this file 2688 * @param clientMachine identifier of the client machine 2689 * @param newBlock if the data is appended to a new block 2690 * @param writeToEditLog whether to persist this change to the edit log 2691 * @param logRetryCache whether to record RPC ids in editlog for retry cache 2692 * rebuilding 2693 * @return the last block locations if the block is partial or null otherwise 2694 * @throws UnresolvedLinkException 2695 * @throws IOException 2696 */ 2697 LocatedBlock prepareFileForAppend(String src, INodesInPath iip, 2698 String leaseHolder, String clientMachine, boolean newBlock, 2699 boolean writeToEditLog, boolean logRetryCache) throws IOException { 2700 final INodeFile file = iip.getLastINode().asFile(); 2701 final QuotaCounts delta = verifyQuotaForUCBlock(file, iip); 2702 2703 file.recordModification(iip.getLatestSnapshotId()); 2704 file.toUnderConstruction(leaseHolder, clientMachine); 2705 2706 leaseManager.addLease( 2707 file.getFileUnderConstructionFeature().getClientName(), src); 2708 2709 LocatedBlock ret = null; 2710 if (!newBlock) { 2711 ret = blockManager.convertLastBlockToUnderConstruction(file, 0); 2712 if (ret != null && delta != null) { 2713 Preconditions.checkState(delta.getStorageSpace() >= 0, 2714 "appending to a block with size larger than the preferred block size"); 2715 dir.writeLock(); 2716 try { 2717 dir.updateCountNoQuotaCheck(iip, iip.length() - 1, delta); 2718 } finally { 2719 dir.writeUnlock(); 2720 } 2721 } 2722 } else { 2723 BlockInfoContiguous lastBlock = file.getLastBlock(); 2724 if (lastBlock != null) { 2725 ExtendedBlock blk = new ExtendedBlock(this.getBlockPoolId(), lastBlock); 2726 ret = new LocatedBlock(blk, new DatanodeInfo[0]); 2727 } 2728 } 2729 2730 if (writeToEditLog) { 2731 getEditLog().logAppendFile(src, file, newBlock, logRetryCache); 2732 } 2733 return ret; 2734 } 2735 2736 /** 2737 * Verify quota when using the preferred block size for UC block. This is 2738 * usually used by append and truncate 2739 * @throws QuotaExceededException when violating the storage quota 2740 * @return expected quota usage update. null means no change or no need to 2741 * update quota usage later 2742 */ 2743 private QuotaCounts verifyQuotaForUCBlock(INodeFile file, INodesInPath iip) 2744 throws QuotaExceededException { 2745 if (!isImageLoaded() || dir.shouldSkipQuotaChecks()) { 2746 // Do not check quota if editlog is still being processed 2747 return null; 2748 } 2749 if (file.getLastBlock() != null) { 2750 final QuotaCounts delta = computeQuotaDeltaForUCBlock(file); 2751 dir.readLock(); 2752 try { 2753 FSDirectory.verifyQuota(iip, iip.length() - 1, delta, null); 2754 return delta; 2755 } finally { 2756 dir.readUnlock(); 2757 } 2758 } 2759 return null; 2760 } 2761 2762 /** Compute quota change for converting a complete block to a UC block */ 2763 private QuotaCounts computeQuotaDeltaForUCBlock(INodeFile file) { 2764 final QuotaCounts delta = new QuotaCounts.Builder().build(); 2765 final BlockInfoContiguous lastBlock = file.getLastBlock(); 2766 if (lastBlock != null) { 2767 final long diff = file.getPreferredBlockSize() - lastBlock.getNumBytes(); 2768 final short repl = file.getBlockReplication(); 2769 delta.addStorageSpace(diff * repl); 2770 final BlockStoragePolicy policy = dir.getBlockStoragePolicySuite() 2771 .getPolicy(file.getStoragePolicyID()); 2772 List<StorageType> types = policy.chooseStorageTypes(repl); 2773 for (StorageType t : types) { 2774 if (t.supportTypeQuota()) { 2775 delta.addTypeSpace(t, diff); 2776 } 2777 } 2778 } 2779 return delta; 2780 } 2781 2782 /** 2783 * Recover lease; 2784 * Immediately revoke the lease of the current lease holder and start lease 2785 * recovery so that the file can be forced to be closed. 2786 * 2787 * @param src the path of the file to start lease recovery 2788 * @param holder the lease holder's name 2789 * @param clientMachine the client machine's name 2790 * @return true if the file is already closed 2791 * @throws IOException 2792 */ 2793 boolean recoverLease(String src, String holder, String clientMachine) 2794 throws IOException { 2795 if (!DFSUtil.isValidName(src)) { 2796 throw new IOException("Invalid file name: " + src); 2797 } 2798 2799 boolean skipSync = false; 2800 FSPermissionChecker pc = getPermissionChecker(); 2801 checkOperation(OperationCategory.WRITE); 2802 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 2803 writeLock(); 2804 try { 2805 checkOperation(OperationCategory.WRITE); 2806 checkNameNodeSafeMode("Cannot recover the lease of " + src); 2807 src = dir.resolvePath(pc, src, pathComponents); 2808 final INodesInPath iip = dir.getINodesInPath4Write(src); 2809 final INodeFile inode = INodeFile.valueOf(iip.getLastINode(), src); 2810 if (!inode.isUnderConstruction()) { 2811 return true; 2812 } 2813 if (isPermissionEnabled) { 2814 dir.checkPathAccess(pc, iip, FsAction.WRITE); 2815 } 2816 2817 recoverLeaseInternal(RecoverLeaseOp.RECOVER_LEASE, 2818 iip, src, holder, clientMachine, true); 2819 } catch (StandbyException se) { 2820 skipSync = true; 2821 throw se; 2822 } finally { 2823 writeUnlock(); 2824 // There might be transactions logged while trying to recover the lease. 2825 // They need to be sync'ed even when an exception was thrown. 2826 if (!skipSync) { 2827 getEditLog().logSync(); 2828 } 2829 } 2830 return false; 2831 } 2832 2833 private enum RecoverLeaseOp { 2834 CREATE_FILE, 2835 APPEND_FILE, 2836 TRUNCATE_FILE, 2837 RECOVER_LEASE; 2838 2839 private String getExceptionMessage(String src, String holder, 2840 String clientMachine, String reason) { 2841 return "Failed to " + this + " " + src + " for " + holder + 2842 " on " + clientMachine + " because " + reason; 2843 } 2844 } 2845 2846 void recoverLeaseInternal(RecoverLeaseOp op, INodesInPath iip, 2847 String src, String holder, String clientMachine, boolean force) 2848 throws IOException { 2849 assert hasWriteLock(); 2850 INodeFile file = iip.getLastINode().asFile(); 2851 if (file != null && file.isUnderConstruction()) { 2852 // 2853 // If the file is under construction , then it must be in our 2854 // leases. Find the appropriate lease record. 2855 // 2856 Lease lease = leaseManager.getLease(holder); 2857 2858 if (!force && lease != null) { 2859 Lease leaseFile = leaseManager.getLeaseByPath(src); 2860 if (leaseFile != null && leaseFile.equals(lease)) { 2861 // We found the lease for this file but the original 2862 // holder is trying to obtain it again. 2863 throw new AlreadyBeingCreatedException( 2864 op.getExceptionMessage(src, holder, clientMachine, 2865 holder + " is already the current lease holder.")); 2866 } 2867 } 2868 // 2869 // Find the original holder. 2870 // 2871 FileUnderConstructionFeature uc = file.getFileUnderConstructionFeature(); 2872 String clientName = uc.getClientName(); 2873 lease = leaseManager.getLease(clientName); 2874 if (lease == null) { 2875 throw new AlreadyBeingCreatedException( 2876 op.getExceptionMessage(src, holder, clientMachine, 2877 "the file is under construction but no leases found.")); 2878 } 2879 if (force) { 2880 // close now: no need to wait for soft lease expiration and 2881 // close only the file src 2882 LOG.info("recoverLease: " + lease + ", src=" + src + 2883 " from client " + clientName); 2884 internalReleaseLease(lease, src, iip, holder); 2885 } else { 2886 assert lease.getHolder().equals(clientName) : 2887 "Current lease holder " + lease.getHolder() + 2888 " does not match file creator " + clientName; 2889 // 2890 // If the original holder has not renewed in the last SOFTLIMIT 2891 // period, then start lease recovery. 2892 // 2893 if (lease.expiredSoftLimit()) { 2894 LOG.info("startFile: recover " + lease + ", src=" + src + " client " 2895 + clientName); 2896 boolean isClosed = internalReleaseLease(lease, src, iip, null); 2897 if(!isClosed) 2898 throw new RecoveryInProgressException( 2899 op.getExceptionMessage(src, holder, clientMachine, 2900 "lease recovery is in progress. Try again later.")); 2901 } else { 2902 final BlockInfoContiguous lastBlock = file.getLastBlock(); 2903 if (lastBlock != null 2904 && lastBlock.getBlockUCState() == BlockUCState.UNDER_RECOVERY) { 2905 throw new RecoveryInProgressException( 2906 op.getExceptionMessage(src, holder, clientMachine, 2907 "another recovery is in progress by " 2908 + clientName + " on " + uc.getClientMachine())); 2909 } else { 2910 throw new AlreadyBeingCreatedException( 2911 op.getExceptionMessage(src, holder, clientMachine, 2912 "this file lease is currently owned by " 2913 + clientName + " on " + uc.getClientMachine())); 2914 } 2915 } 2916 } 2917 } 2918 } 2919 2920 /** 2921 * Append to an existing file in the namespace. 2922 */ 2923 LastBlockWithStatus appendFile(String src, String holder, 2924 String clientMachine, EnumSet<CreateFlag> flag, boolean logRetryCache) 2925 throws IOException { 2926 try { 2927 return appendFileInt(src, holder, clientMachine, 2928 flag.contains(CreateFlag.NEW_BLOCK), logRetryCache); 2929 } catch (AccessControlException e) { 2930 logAuditEvent(false, "append", src); 2931 throw e; 2932 } 2933 } 2934 2935 private LastBlockWithStatus appendFileInt(final String srcArg, String holder, 2936 String clientMachine, boolean newBlock, boolean logRetryCache) 2937 throws IOException { 2938 String src = srcArg; 2939 if (NameNode.stateChangeLog.isDebugEnabled()) { 2940 NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: src=" + src 2941 + ", holder=" + holder 2942 + ", clientMachine=" + clientMachine); 2943 } 2944 boolean skipSync = false; 2945 if (!supportAppends) { 2946 throw new UnsupportedOperationException( 2947 "Append is not enabled on this NameNode. Use the " + 2948 DFS_SUPPORT_APPEND_KEY + " configuration option to enable it."); 2949 } 2950 2951 LocatedBlock lb = null; 2952 HdfsFileStatus stat = null; 2953 FSPermissionChecker pc = getPermissionChecker(); 2954 checkOperation(OperationCategory.WRITE); 2955 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 2956 writeLock(); 2957 try { 2958 checkOperation(OperationCategory.WRITE); 2959 checkNameNodeSafeMode("Cannot append to file" + src); 2960 src = dir.resolvePath(pc, src, pathComponents); 2961 final INodesInPath iip = dir.getINodesInPath4Write(src); 2962 lb = appendFileInternal(pc, iip, holder, clientMachine, newBlock, 2963 logRetryCache); 2964 stat = FSDirStatAndListingOp.getFileInfo(dir, src, false, 2965 FSDirectory.isReservedRawName(srcArg), true); 2966 } catch (StandbyException se) { 2967 skipSync = true; 2968 throw se; 2969 } finally { 2970 writeUnlock(); 2971 // There might be transactions logged while trying to recover the lease. 2972 // They need to be sync'ed even when an exception was thrown. 2973 if (!skipSync) { 2974 getEditLog().logSync(); 2975 } 2976 } 2977 if (lb != null) { 2978 if (NameNode.stateChangeLog.isDebugEnabled()) { 2979 NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: file " 2980 +src+" for "+holder+" at "+clientMachine 2981 +" block " + lb.getBlock() 2982 +" block size " + lb.getBlock().getNumBytes()); 2983 } 2984 } 2985 logAuditEvent(true, "append", srcArg); 2986 return new LastBlockWithStatus(lb, stat); 2987 } 2988 2989 ExtendedBlock getExtendedBlock(Block blk) { 2990 return new ExtendedBlock(blockPoolId, blk); 2991 } 2992 2993 void setBlockPoolId(String bpid) { 2994 blockPoolId = bpid; 2995 blockManager.setBlockPoolId(blockPoolId); 2996 } 2997 2998 /** 2999 * The client would like to obtain an additional block for the indicated 3000 * filename (which is being written-to). Return an array that consists 3001 * of the block, plus a set of machines. The first on this list should 3002 * be where the client writes data. Subsequent items in the list must 3003 * be provided in the connection to the first datanode. 3004 * 3005 * Make sure the previous blocks have been reported by datanodes and 3006 * are replicated. Will return an empty 2-elt array if we want the 3007 * client to "try again later". 3008 */ 3009 LocatedBlock getAdditionalBlock(String src, long fileId, String clientName, 3010 ExtendedBlock previous, Set<Node> excludedNodes, 3011 List<String> favoredNodes) throws IOException { 3012 final long blockSize; 3013 final int replication; 3014 final byte storagePolicyID; 3015 Node clientNode = null; 3016 String clientMachine = null; 3017 3018 if(NameNode.stateChangeLog.isDebugEnabled()) { 3019 NameNode.stateChangeLog.debug("BLOCK* getAdditionalBlock: " 3020 + src + " inodeId " + fileId + " for " + clientName); 3021 } 3022 3023 // Part I. Analyze the state of the file with respect to the input data. 3024 checkOperation(OperationCategory.READ); 3025 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 3026 FSPermissionChecker pc = getPermissionChecker(); 3027 readLock(); 3028 try { 3029 checkOperation(OperationCategory.READ); 3030 src = dir.resolvePath(pc, src, pathComponents); 3031 LocatedBlock[] onRetryBlock = new LocatedBlock[1]; 3032 FileState fileState = analyzeFileState( 3033 src, fileId, clientName, previous, onRetryBlock); 3034 final INodeFile pendingFile = fileState.inode; 3035 // Check if the penultimate block is minimally replicated 3036 if (!checkFileProgress(src, pendingFile, false)) { 3037 throw new NotReplicatedYetException("Not replicated yet: " + src); 3038 } 3039 src = fileState.path; 3040 3041 if (onRetryBlock[0] != null && onRetryBlock[0].getLocations().length > 0) { 3042 // This is a retry. Just return the last block if having locations. 3043 return onRetryBlock[0]; 3044 } 3045 if (pendingFile.getBlocks().length >= maxBlocksPerFile) { 3046 throw new IOException("File has reached the limit on maximum number of" 3047 + " blocks (" + DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_KEY 3048 + "): " + pendingFile.getBlocks().length + " >= " 3049 + maxBlocksPerFile); 3050 } 3051 blockSize = pendingFile.getPreferredBlockSize(); 3052 clientMachine = pendingFile.getFileUnderConstructionFeature() 3053 .getClientMachine(); 3054 clientNode = blockManager.getDatanodeManager().getDatanodeByHost( 3055 clientMachine); 3056 replication = pendingFile.getFileReplication(); 3057 storagePolicyID = pendingFile.getStoragePolicyID(); 3058 } finally { 3059 readUnlock(); 3060 } 3061 3062 if (clientNode == null) { 3063 clientNode = getClientNode(clientMachine); 3064 } 3065 3066 // choose targets for the new block to be allocated. 3067 final DatanodeStorageInfo targets[] = getBlockManager().chooseTarget4NewBlock( 3068 src, replication, clientNode, excludedNodes, blockSize, favoredNodes, 3069 storagePolicyID); 3070 3071 // Part II. 3072 // Allocate a new block, add it to the INode and the BlocksMap. 3073 Block newBlock = null; 3074 long offset; 3075 checkOperation(OperationCategory.WRITE); 3076 waitForLoadingFSImage(); 3077 writeLock(); 3078 try { 3079 checkOperation(OperationCategory.WRITE); 3080 // Run the full analysis again, since things could have changed 3081 // while chooseTarget() was executing. 3082 LocatedBlock[] onRetryBlock = new LocatedBlock[1]; 3083 FileState fileState = 3084 analyzeFileState(src, fileId, clientName, previous, onRetryBlock); 3085 final INodeFile pendingFile = fileState.inode; 3086 src = fileState.path; 3087 3088 if (onRetryBlock[0] != null) { 3089 if (onRetryBlock[0].getLocations().length > 0) { 3090 // This is a retry. Just return the last block if having locations. 3091 return onRetryBlock[0]; 3092 } else { 3093 // add new chosen targets to already allocated block and return 3094 BlockInfoContiguous lastBlockInFile = pendingFile.getLastBlock(); 3095 ((BlockInfoContiguousUnderConstruction) lastBlockInFile) 3096 .setExpectedLocations(targets); 3097 offset = pendingFile.computeFileSize(); 3098 return makeLocatedBlock(lastBlockInFile, targets, offset); 3099 } 3100 } 3101 3102 // commit the last block and complete it if it has minimum replicas 3103 commitOrCompleteLastBlock(pendingFile, fileState.iip, 3104 ExtendedBlock.getLocalBlock(previous)); 3105 3106 // allocate new block, record block locations in INode. 3107 newBlock = createNewBlock(); 3108 INodesInPath inodesInPath = INodesInPath.fromINode(pendingFile); 3109 saveAllocatedBlock(src, inodesInPath, newBlock, targets); 3110 3111 persistNewBlock(src, pendingFile); 3112 offset = pendingFile.computeFileSize(); 3113 } finally { 3114 writeUnlock(); 3115 } 3116 getEditLog().logSync(); 3117 3118 // Return located block 3119 return makeLocatedBlock(newBlock, targets, offset); 3120 } 3121 3122 /* 3123 * Resolve clientmachine address to get a network location path 3124 */ 3125 private Node getClientNode(String clientMachine) { 3126 List<String> hosts = new ArrayList<String>(1); 3127 hosts.add(clientMachine); 3128 List<String> rName = getBlockManager().getDatanodeManager() 3129 .resolveNetworkLocation(hosts); 3130 Node clientNode = null; 3131 if (rName != null) { 3132 // Able to resolve clientMachine mapping. 3133 // Create a temp node to findout the rack local nodes 3134 clientNode = new NodeBase(rName.get(0) + NodeBase.PATH_SEPARATOR_STR 3135 + clientMachine); 3136 } 3137 return clientNode; 3138 } 3139 3140 static class FileState { 3141 public final INodeFile inode; 3142 public final String path; 3143 public final INodesInPath iip; 3144 3145 public FileState(INodeFile inode, String fullPath, INodesInPath iip) { 3146 this.inode = inode; 3147 this.path = fullPath; 3148 this.iip = iip; 3149 } 3150 } 3151 3152 FileState analyzeFileState(String src, 3153 long fileId, 3154 String clientName, 3155 ExtendedBlock previous, 3156 LocatedBlock[] onRetryBlock) 3157 throws IOException { 3158 assert hasReadLock(); 3159 3160 checkBlock(previous); 3161 onRetryBlock[0] = null; 3162 checkNameNodeSafeMode("Cannot add block to " + src); 3163 3164 // have we exceeded the configured limit of fs objects. 3165 checkFsObjectLimit(); 3166 3167 Block previousBlock = ExtendedBlock.getLocalBlock(previous); 3168 final INode inode; 3169 final INodesInPath iip; 3170 if (fileId == INodeId.GRANDFATHER_INODE_ID) { 3171 // Older clients may not have given us an inode ID to work with. 3172 // In this case, we have to try to resolve the path and hope it 3173 // hasn't changed or been deleted since the file was opened for write. 3174 iip = dir.getINodesInPath4Write(src); 3175 inode = iip.getLastINode(); 3176 } else { 3177 // Newer clients pass the inode ID, so we can just get the inode 3178 // directly. 3179 inode = dir.getInode(fileId); 3180 iip = INodesInPath.fromINode(inode); 3181 if (inode != null) { 3182 src = iip.getPath(); 3183 } 3184 } 3185 final INodeFile pendingFile = checkLease(src, clientName, inode, fileId); 3186 BlockInfoContiguous lastBlockInFile = pendingFile.getLastBlock(); 3187 if (!Block.matchingIdAndGenStamp(previousBlock, lastBlockInFile)) { 3188 // The block that the client claims is the current last block 3189 // doesn't match up with what we think is the last block. There are 3190 // four possibilities: 3191 // 1) This is the first block allocation of an append() pipeline 3192 // which started appending exactly at or exceeding the block boundary. 3193 // In this case, the client isn't passed the previous block, 3194 // so it makes the allocateBlock() call with previous=null. 3195 // We can distinguish this since the last block of the file 3196 // will be exactly a full block. 3197 // 2) This is a retry from a client that missed the response of a 3198 // prior getAdditionalBlock() call, perhaps because of a network 3199 // timeout, or because of an HA failover. In that case, we know 3200 // by the fact that the client is re-issuing the RPC that it 3201 // never began to write to the old block. Hence it is safe to 3202 // to return the existing block. 3203 // 3) This is an entirely bogus request/bug -- we should error out 3204 // rather than potentially appending a new block with an empty 3205 // one in the middle, etc 3206 // 4) This is a retry from a client that timed out while 3207 // the prior getAdditionalBlock() is still being processed, 3208 // currently working on chooseTarget(). 3209 // There are no means to distinguish between the first and 3210 // the second attempts in Part I, because the first one hasn't 3211 // changed the namesystem state yet. 3212 // We run this analysis again in Part II where case 4 is impossible. 3213 3214 BlockInfoContiguous penultimateBlock = pendingFile.getPenultimateBlock(); 3215 if (previous == null && 3216 lastBlockInFile != null && 3217 lastBlockInFile.getNumBytes() >= pendingFile.getPreferredBlockSize() && 3218 lastBlockInFile.isComplete()) { 3219 // Case 1 3220 if (NameNode.stateChangeLog.isDebugEnabled()) { 3221 NameNode.stateChangeLog.debug( 3222 "BLOCK* NameSystem.allocateBlock: handling block allocation" + 3223 " writing to a file with a complete previous block: src=" + 3224 src + " lastBlock=" + lastBlockInFile); 3225 } 3226 } else if (Block.matchingIdAndGenStamp(penultimateBlock, previousBlock)) { 3227 if (lastBlockInFile.getNumBytes() != 0) { 3228 throw new IOException( 3229 "Request looked like a retry to allocate block " + 3230 lastBlockInFile + " but it already contains " + 3231 lastBlockInFile.getNumBytes() + " bytes"); 3232 } 3233 3234 // Case 2 3235 // Return the last block. 3236 NameNode.stateChangeLog.info("BLOCK* allocateBlock: " + 3237 "caught retry for allocation of a new block in " + 3238 src + ". Returning previously allocated block " + lastBlockInFile); 3239 long offset = pendingFile.computeFileSize(); 3240 onRetryBlock[0] = makeLocatedBlock(lastBlockInFile, 3241 ((BlockInfoContiguousUnderConstruction)lastBlockInFile).getExpectedStorageLocations(), 3242 offset); 3243 return new FileState(pendingFile, src, iip); 3244 } else { 3245 // Case 3 3246 throw new IOException("Cannot allocate block in " + src + ": " + 3247 "passed 'previous' block " + previous + " does not match actual " + 3248 "last block in file " + lastBlockInFile); 3249 } 3250 } 3251 return new FileState(pendingFile, src, iip); 3252 } 3253 3254 LocatedBlock makeLocatedBlock(Block blk, DatanodeStorageInfo[] locs, 3255 long offset) throws IOException { 3256 LocatedBlock lBlk = new LocatedBlock( 3257 getExtendedBlock(blk), locs, offset, false); 3258 getBlockManager().setBlockToken( 3259 lBlk, BlockTokenSecretManager.AccessMode.WRITE); 3260 return lBlk; 3261 } 3262 3263 /** @see ClientProtocol#getAdditionalDatanode */ 3264 LocatedBlock getAdditionalDatanode(String src, long fileId, 3265 final ExtendedBlock blk, final DatanodeInfo[] existings, 3266 final String[] storageIDs, 3267 final Set<Node> excludes, 3268 final int numAdditionalNodes, final String clientName 3269 ) throws IOException { 3270 //check if the feature is enabled 3271 dtpReplaceDatanodeOnFailure.checkEnabled(); 3272 3273 Node clientnode = null; 3274 String clientMachine; 3275 final long preferredblocksize; 3276 final byte storagePolicyID; 3277 final List<DatanodeStorageInfo> chosen; 3278 checkOperation(OperationCategory.READ); 3279 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 3280 FSPermissionChecker pc = getPermissionChecker(); 3281 readLock(); 3282 try { 3283 checkOperation(OperationCategory.READ); 3284 //check safe mode 3285 checkNameNodeSafeMode("Cannot add datanode; src=" + src + ", blk=" + blk); 3286 src = dir.resolvePath(pc, src, pathComponents); 3287 3288 //check lease 3289 final INode inode; 3290 if (fileId == INodeId.GRANDFATHER_INODE_ID) { 3291 // Older clients may not have given us an inode ID to work with. 3292 // In this case, we have to try to resolve the path and hope it 3293 // hasn't changed or been deleted since the file was opened for write. 3294 inode = dir.getINode(src); 3295 } else { 3296 inode = dir.getInode(fileId); 3297 if (inode != null) src = inode.getFullPathName(); 3298 } 3299 final INodeFile file = checkLease(src, clientName, inode, fileId); 3300 clientMachine = file.getFileUnderConstructionFeature().getClientMachine(); 3301 clientnode = blockManager.getDatanodeManager().getDatanodeByHost(clientMachine); 3302 preferredblocksize = file.getPreferredBlockSize(); 3303 storagePolicyID = file.getStoragePolicyID(); 3304 3305 //find datanode storages 3306 final DatanodeManager dm = blockManager.getDatanodeManager(); 3307 chosen = Arrays.asList(dm.getDatanodeStorageInfos(existings, storageIDs)); 3308 } finally { 3309 readUnlock(); 3310 } 3311 3312 if (clientnode == null) { 3313 clientnode = getClientNode(clientMachine); 3314 } 3315 3316 // choose new datanodes. 3317 final DatanodeStorageInfo[] targets = blockManager.chooseTarget4AdditionalDatanode( 3318 src, numAdditionalNodes, clientnode, chosen, 3319 excludes, preferredblocksize, storagePolicyID); 3320 final LocatedBlock lb = new LocatedBlock(blk, targets); 3321 blockManager.setBlockToken(lb, AccessMode.COPY); 3322 return lb; 3323 } 3324 3325 /** 3326 * The client would like to let go of the given block 3327 */ 3328 boolean abandonBlock(ExtendedBlock b, long fileId, String src, String holder) 3329 throws IOException { 3330 if(NameNode.stateChangeLog.isDebugEnabled()) { 3331 NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: " + b 3332 + "of file " + src); 3333 } 3334 checkOperation(OperationCategory.WRITE); 3335 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 3336 FSPermissionChecker pc = getPermissionChecker(); 3337 waitForLoadingFSImage(); 3338 writeLock(); 3339 try { 3340 checkOperation(OperationCategory.WRITE); 3341 checkNameNodeSafeMode("Cannot abandon block " + b + " for file" + src); 3342 src = dir.resolvePath(pc, src, pathComponents); 3343 3344 final INode inode; 3345 final INodesInPath iip; 3346 if (fileId == INodeId.GRANDFATHER_INODE_ID) { 3347 // Older clients may not have given us an inode ID to work with. 3348 // In this case, we have to try to resolve the path and hope it 3349 // hasn't changed or been deleted since the file was opened for write. 3350 iip = dir.getINodesInPath(src, true); 3351 inode = iip.getLastINode(); 3352 } else { 3353 inode = dir.getInode(fileId); 3354 iip = INodesInPath.fromINode(inode); 3355 if (inode != null) { 3356 src = iip.getPath(); 3357 } 3358 } 3359 final INodeFile file = checkLease(src, holder, inode, fileId); 3360 3361 // Remove the block from the pending creates list 3362 boolean removed = dir.removeBlock(src, iip, file, 3363 ExtendedBlock.getLocalBlock(b)); 3364 if (!removed) { 3365 return true; 3366 } 3367 if(NameNode.stateChangeLog.isDebugEnabled()) { 3368 NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: " 3369 + b + " is removed from pendingCreates"); 3370 } 3371 persistBlocks(src, file, false); 3372 } finally { 3373 writeUnlock(); 3374 } 3375 getEditLog().logSync(); 3376 3377 return true; 3378 } 3379 3380 private INodeFile checkLease(String src, String holder, INode inode, 3381 long fileId) throws LeaseExpiredException, FileNotFoundException { 3382 assert hasReadLock(); 3383 final String ident = src + " (inode " + fileId + ")"; 3384 if (inode == null) { 3385 Lease lease = leaseManager.getLease(holder); 3386 throw new LeaseExpiredException( 3387 "No lease on " + ident + ": File does not exist. " 3388 + (lease != null ? lease.toString() 3389 : "Holder " + holder + " does not have any open files.")); 3390 } 3391 if (!inode.isFile()) { 3392 Lease lease = leaseManager.getLease(holder); 3393 throw new LeaseExpiredException( 3394 "No lease on " + ident + ": INode is not a regular file. " 3395 + (lease != null ? lease.toString() 3396 : "Holder " + holder + " does not have any open files.")); 3397 } 3398 final INodeFile file = inode.asFile(); 3399 if (!file.isUnderConstruction()) { 3400 Lease lease = leaseManager.getLease(holder); 3401 throw new LeaseExpiredException( 3402 "No lease on " + ident + ": File is not open for writing. " 3403 + (lease != null ? lease.toString() 3404 : "Holder " + holder + " does not have any open files.")); 3405 } 3406 // No further modification is allowed on a deleted file. 3407 // A file is considered deleted, if it is not in the inodeMap or is marked 3408 // as deleted in the snapshot feature. 3409 if (isFileDeleted(file)) { 3410 throw new FileNotFoundException(src); 3411 } 3412 String clientName = file.getFileUnderConstructionFeature().getClientName(); 3413 if (holder != null && !clientName.equals(holder)) { 3414 throw new LeaseExpiredException("Lease mismatch on " + ident + 3415 " owned by " + clientName + " but is accessed by " + holder); 3416 } 3417 return file; 3418 } 3419 3420 /** 3421 * Complete in-progress write to the given file. 3422 * @return true if successful, false if the client should continue to retry 3423 * (e.g if not all blocks have reached minimum replication yet) 3424 * @throws IOException on error (eg lease mismatch, file not open, file deleted) 3425 */ 3426 boolean completeFile(final String srcArg, String holder, 3427 ExtendedBlock last, long fileId) 3428 throws SafeModeException, UnresolvedLinkException, IOException { 3429 String src = srcArg; 3430 if (NameNode.stateChangeLog.isDebugEnabled()) { 3431 NameNode.stateChangeLog.debug("DIR* NameSystem.completeFile: " + 3432 src + " for " + holder); 3433 } 3434 checkBlock(last); 3435 boolean success = false; 3436 checkOperation(OperationCategory.WRITE); 3437 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 3438 FSPermissionChecker pc = getPermissionChecker(); 3439 waitForLoadingFSImage(); 3440 writeLock(); 3441 try { 3442 checkOperation(OperationCategory.WRITE); 3443 checkNameNodeSafeMode("Cannot complete file " + src); 3444 src = dir.resolvePath(pc, src, pathComponents); 3445 success = completeFileInternal(src, holder, 3446 ExtendedBlock.getLocalBlock(last), fileId); 3447 } finally { 3448 writeUnlock(); 3449 } 3450 getEditLog().logSync(); 3451 if (success) { 3452 NameNode.stateChangeLog.info("DIR* completeFile: " + srcArg 3453 + " is closed by " + holder); 3454 } 3455 return success; 3456 } 3457 3458 private boolean completeFileInternal(String src, String holder, Block last, 3459 long fileId) throws IOException { 3460 assert hasWriteLock(); 3461 final INodeFile pendingFile; 3462 final INodesInPath iip; 3463 INode inode = null; 3464 try { 3465 if (fileId == INodeId.GRANDFATHER_INODE_ID) { 3466 // Older clients may not have given us an inode ID to work with. 3467 // In this case, we have to try to resolve the path and hope it 3468 // hasn't changed or been deleted since the file was opened for write. 3469 iip = dir.getINodesInPath(src, true); 3470 inode = iip.getLastINode(); 3471 } else { 3472 inode = dir.getInode(fileId); 3473 iip = INodesInPath.fromINode(inode); 3474 if (inode != null) { 3475 src = iip.getPath(); 3476 } 3477 } 3478 pendingFile = checkLease(src, holder, inode, fileId); 3479 } catch (LeaseExpiredException lee) { 3480 if (inode != null && inode.isFile() && 3481 !inode.asFile().isUnderConstruction()) { 3482 // This could be a retry RPC - i.e the client tried to close 3483 // the file, but missed the RPC response. Thus, it is trying 3484 // again to close the file. If the file still exists and 3485 // the client's view of the last block matches the actual 3486 // last block, then we'll treat it as a successful close. 3487 // See HDFS-3031. 3488 final Block realLastBlock = inode.asFile().getLastBlock(); 3489 if (Block.matchingIdAndGenStamp(last, realLastBlock)) { 3490 NameNode.stateChangeLog.info("DIR* completeFile: " + 3491 "request from " + holder + " to complete inode " + fileId + 3492 "(" + src + ") which is already closed. But, it appears to be " + 3493 "an RPC retry. Returning success"); 3494 return true; 3495 } 3496 } 3497 throw lee; 3498 } 3499 // Check the state of the penultimate block. It should be completed 3500 // before attempting to complete the last one. 3501 if (!checkFileProgress(src, pendingFile, false)) { 3502 return false; 3503 } 3504 3505 // commit the last block and complete it if it has minimum replicas 3506 commitOrCompleteLastBlock(pendingFile, iip, last); 3507 3508 if (!checkFileProgress(src, pendingFile, true)) { 3509 return false; 3510 } 3511 3512 finalizeINodeFileUnderConstruction(src, pendingFile, 3513 Snapshot.CURRENT_STATE_ID); 3514 return true; 3515 } 3516 3517 /** 3518 * Save allocated block at the given pending filename 3519 * 3520 * @param src path to the file 3521 * @param inodesInPath representing each of the components of src. 3522 * The last INode is the INode for {@code src} file. 3523 * @param newBlock newly allocated block to be save 3524 * @param targets target datanodes where replicas of the new block is placed 3525 * @throws QuotaExceededException If addition of block exceeds space quota 3526 */ 3527 BlockInfoContiguous saveAllocatedBlock(String src, INodesInPath inodesInPath, 3528 Block newBlock, DatanodeStorageInfo[] targets) 3529 throws IOException { 3530 assert hasWriteLock(); 3531 BlockInfoContiguous b = dir.addBlock(src, inodesInPath, newBlock, targets); 3532 NameNode.stateChangeLog.info("BLOCK* allocate " + b + " for " + src); 3533 DatanodeStorageInfo.incrementBlocksScheduled(targets); 3534 return b; 3535 } 3536 3537 /** 3538 * Create new block with a unique block id and a new generation stamp. 3539 */ 3540 Block createNewBlock() throws IOException { 3541 assert hasWriteLock(); 3542 Block b = new Block(nextBlockId(), 0, 0); 3543 // Increment the generation stamp for every new block. 3544 b.setGenerationStamp(nextGenerationStamp(false)); 3545 return b; 3546 } 3547 3548 /** 3549 * Check that the indicated file's blocks are present and 3550 * replicated. If not, return false. If checkall is true, then check 3551 * all blocks, otherwise check only penultimate block. 3552 */ 3553 boolean checkFileProgress(String src, INodeFile v, boolean checkall) { 3554 if (checkall) { 3555 // check all blocks of the file. 3556 for (BlockInfoContiguous block: v.getBlocks()) { 3557 if (!isCompleteBlock(src, block, blockManager.minReplication)) { 3558 return false; 3559 } 3560 } 3561 } else { 3562 // check the penultimate block of this file 3563 BlockInfoContiguous b = v.getPenultimateBlock(); 3564 if (b != null 3565 && !isCompleteBlock(src, b, blockManager.minReplication)) { 3566 return false; 3567 } 3568 } 3569 return true; 3570 } 3571 3572 private static boolean isCompleteBlock(String src, BlockInfoContiguous b, int minRepl) { 3573 if (!b.isComplete()) { 3574 final BlockInfoContiguousUnderConstruction uc = (BlockInfoContiguousUnderConstruction)b; 3575 final int numNodes = b.numNodes(); 3576 LOG.info("BLOCK* " + b + " is not COMPLETE (ucState = " 3577 + uc.getBlockUCState() + ", replication# = " + numNodes 3578 + (numNodes < minRepl? " < ": " >= ") 3579 + " minimum = " + minRepl + ") in file " + src); 3580 return false; 3581 } 3582 return true; 3583 } 3584 3585 //////////////////////////////////////////////////////////////// 3586 // Here's how to handle block-copy failure during client write: 3587 // -- As usual, the client's write should result in a streaming 3588 // backup write to a k-machine sequence. 3589 // -- If one of the backup machines fails, no worries. Fail silently. 3590 // -- Before client is allowed to close and finalize file, make sure 3591 // that the blocks are backed up. Namenode may have to issue specific backup 3592 // commands to make up for earlier datanode failures. Once all copies 3593 // are made, edit namespace and return to client. 3594 //////////////////////////////////////////////////////////////// 3595 3596 /** 3597 * Change the indicated filename. 3598 * @deprecated Use {@link #renameTo(String, String, boolean, 3599 * Options.Rename...)} instead. 3600 */ 3601 @Deprecated 3602 boolean renameTo(String src, String dst, boolean logRetryCache) 3603 throws IOException { 3604 waitForLoadingFSImage(); 3605 checkOperation(OperationCategory.WRITE); 3606 FSDirRenameOp.RenameOldResult ret = null; 3607 writeLock(); 3608 try { 3609 checkOperation(OperationCategory.WRITE); 3610 checkNameNodeSafeMode("Cannot rename " + src); 3611 ret = FSDirRenameOp.renameToInt(dir, src, dst, logRetryCache); 3612 } catch (AccessControlException e) { 3613 logAuditEvent(false, "rename", src, dst, null); 3614 throw e; 3615 } finally { 3616 writeUnlock(); 3617 } 3618 boolean success = ret != null && ret.success; 3619 if (success) { 3620 getEditLog().logSync(); 3621 } 3622 logAuditEvent(success, "rename", src, dst, 3623 ret == null ? null : ret.auditStat); 3624 return success; 3625 } 3626 3627 void renameTo(final String src, final String dst, 3628 boolean logRetryCache, Options.Rename... options) 3629 throws IOException { 3630 waitForLoadingFSImage(); 3631 checkOperation(OperationCategory.WRITE); 3632 Map.Entry<BlocksMapUpdateInfo, HdfsFileStatus> res = null; 3633 writeLock(); 3634 try { 3635 checkOperation(OperationCategory.WRITE); 3636 checkNameNodeSafeMode("Cannot rename " + src); 3637 res = FSDirRenameOp.renameToInt(dir, src, dst, logRetryCache, options); 3638 } catch (AccessControlException e) { 3639 logAuditEvent(false, "rename (options=" + Arrays.toString(options) + 3640 ")", src, dst, null); 3641 throw e; 3642 } finally { 3643 writeUnlock(); 3644 } 3645 3646 getEditLog().logSync(); 3647 3648 BlocksMapUpdateInfo collectedBlocks = res.getKey(); 3649 HdfsFileStatus auditStat = res.getValue(); 3650 if (!collectedBlocks.getToDeleteList().isEmpty()) { 3651 removeBlocks(collectedBlocks); 3652 collectedBlocks.clear(); 3653 } 3654 3655 logAuditEvent(true, "rename (options=" + Arrays.toString(options) + 3656 ")", src, dst, auditStat); 3657 } 3658 3659 /** 3660 * Remove the indicated file from namespace. 3661 * 3662 * @see ClientProtocol#delete(String, boolean) for detailed description and 3663 * description of exceptions 3664 */ 3665 boolean delete(String src, boolean recursive, boolean logRetryCache) 3666 throws IOException { 3667 waitForLoadingFSImage(); 3668 checkOperation(OperationCategory.WRITE); 3669 BlocksMapUpdateInfo toRemovedBlocks = null; 3670 writeLock(); 3671 boolean ret = false; 3672 try { 3673 checkOperation(OperationCategory.WRITE); 3674 checkNameNodeSafeMode("Cannot delete " + src); 3675 toRemovedBlocks = FSDirDeleteOp.delete( 3676 this, src, recursive, logRetryCache); 3677 ret = toRemovedBlocks != null; 3678 } catch (AccessControlException e) { 3679 logAuditEvent(false, "delete", src); 3680 throw e; 3681 } finally { 3682 writeUnlock(); 3683 } 3684 if (toRemovedBlocks != null) { 3685 removeBlocks(toRemovedBlocks); // Incremental deletion of blocks 3686 } 3687 logAuditEvent(true, "delete", src); 3688 return ret; 3689 } 3690 3691 FSPermissionChecker getPermissionChecker() 3692 throws AccessControlException { 3693 return dir.getPermissionChecker(); 3694 } 3695 3696 /** 3697 * From the given list, incrementally remove the blocks from blockManager 3698 * Writelock is dropped and reacquired every BLOCK_DELETION_INCREMENT to 3699 * ensure that other waiters on the lock can get in. See HDFS-2938 3700 * 3701 * @param blocks 3702 * An instance of {@link BlocksMapUpdateInfo} which contains a list 3703 * of blocks that need to be removed from blocksMap 3704 */ 3705 void removeBlocks(BlocksMapUpdateInfo blocks) { 3706 List<Block> toDeleteList = blocks.getToDeleteList(); 3707 Iterator<Block> iter = toDeleteList.iterator(); 3708 while (iter.hasNext()) { 3709 writeLock(); 3710 try { 3711 for (int i = 0; i < BLOCK_DELETION_INCREMENT && iter.hasNext(); i++) { 3712 blockManager.removeBlock(iter.next()); 3713 } 3714 } finally { 3715 writeUnlock(); 3716 } 3717 } 3718 } 3719 3720 /** 3721 * Remove leases and inodes related to a given path 3722 * @param src The given path 3723 * @param removedINodes Containing the list of inodes to be removed from 3724 * inodesMap 3725 * @param acquireINodeMapLock Whether to acquire the lock for inode removal 3726 */ 3727 void removeLeasesAndINodes(String src, List<INode> removedINodes, 3728 final boolean acquireINodeMapLock) { 3729 assert hasWriteLock(); 3730 leaseManager.removeLeaseWithPrefixPath(src); 3731 // remove inodes from inodesMap 3732 if (removedINodes != null) { 3733 if (acquireINodeMapLock) { 3734 dir.writeLock(); 3735 } 3736 try { 3737 dir.removeFromInodeMap(removedINodes); 3738 } finally { 3739 if (acquireINodeMapLock) { 3740 dir.writeUnlock(); 3741 } 3742 } 3743 removedINodes.clear(); 3744 } 3745 } 3746 3747 /** 3748 * Removes the blocks from blocksmap and updates the safemode blocks total 3749 * 3750 * @param blocks 3751 * An instance of {@link BlocksMapUpdateInfo} which contains a list 3752 * of blocks that need to be removed from blocksMap 3753 */ 3754 void removeBlocksAndUpdateSafemodeTotal(BlocksMapUpdateInfo blocks) { 3755 assert hasWriteLock(); 3756 // In the case that we are a Standby tailing edits from the 3757 // active while in safe-mode, we need to track the total number 3758 // of blocks and safe blocks in the system. 3759 boolean trackBlockCounts = isSafeModeTrackingBlocks(); 3760 int numRemovedComplete = 0, numRemovedSafe = 0; 3761 3762 for (Block b : blocks.getToDeleteList()) { 3763 if (trackBlockCounts) { 3764 BlockInfoContiguous bi = getStoredBlock(b); 3765 if (bi.isComplete()) { 3766 numRemovedComplete++; 3767 if (bi.numNodes() >= blockManager.minReplication) { 3768 numRemovedSafe++; 3769 } 3770 } 3771 } 3772 blockManager.removeBlock(b); 3773 } 3774 if (trackBlockCounts) { 3775 if (LOG.isDebugEnabled()) { 3776 LOG.debug("Adjusting safe-mode totals for deletion." 3777 + "decreasing safeBlocks by " + numRemovedSafe 3778 + ", totalBlocks by " + numRemovedComplete); 3779 } 3780 adjustSafeModeBlockTotals(-numRemovedSafe, -numRemovedComplete); 3781 } 3782 } 3783 3784 /** 3785 * @see SafeModeInfo#shouldIncrementallyTrackBlocks 3786 */ 3787 private boolean isSafeModeTrackingBlocks() { 3788 if (!haEnabled) { 3789 // Never track blocks incrementally in non-HA code. 3790 return false; 3791 } 3792 SafeModeInfo sm = this.safeMode; 3793 return sm != null && sm.shouldIncrementallyTrackBlocks(); 3794 } 3795 3796 /** 3797 * Get the file info for a specific file. 3798 * 3799 * @param src The string representation of the path to the file 3800 * @param resolveLink whether to throw UnresolvedLinkException 3801 * if src refers to a symlink 3802 * 3803 * @throws AccessControlException if access is denied 3804 * @throws UnresolvedLinkException if a symlink is encountered. 3805 * 3806 * @return object containing information regarding the file 3807 * or null if file not found 3808 * @throws StandbyException 3809 */ 3810 HdfsFileStatus getFileInfo(final String src, boolean resolveLink) 3811 throws IOException { 3812 checkOperation(OperationCategory.READ); 3813 HdfsFileStatus stat = null; 3814 readLock(); 3815 try { 3816 checkOperation(OperationCategory.READ); 3817 stat = FSDirStatAndListingOp.getFileInfo(dir, src, resolveLink); 3818 } catch (AccessControlException e) { 3819 logAuditEvent(false, "getfileinfo", src); 3820 throw e; 3821 } finally { 3822 readUnlock(); 3823 } 3824 logAuditEvent(true, "getfileinfo", src); 3825 return stat; 3826 } 3827 3828 /** 3829 * Returns true if the file is closed 3830 */ 3831 boolean isFileClosed(final String src) throws IOException { 3832 checkOperation(OperationCategory.READ); 3833 readLock(); 3834 try { 3835 checkOperation(OperationCategory.READ); 3836 return FSDirStatAndListingOp.isFileClosed(dir, src); 3837 } catch (AccessControlException e) { 3838 logAuditEvent(false, "isFileClosed", src); 3839 throw e; 3840 } finally { 3841 readUnlock(); 3842 } 3843 } 3844 3845 /** 3846 * Create all the necessary directories 3847 */ 3848 boolean mkdirs(String src, PermissionStatus permissions, 3849 boolean createParent) throws IOException { 3850 HdfsFileStatus auditStat = null; 3851 checkOperation(OperationCategory.WRITE); 3852 writeLock(); 3853 try { 3854 checkOperation(OperationCategory.WRITE); 3855 checkNameNodeSafeMode("Cannot create directory " + src); 3856 auditStat = FSDirMkdirOp.mkdirs(this, src, permissions, createParent); 3857 } catch (AccessControlException e) { 3858 logAuditEvent(false, "mkdirs", src); 3859 throw e; 3860 } finally { 3861 writeUnlock(); 3862 } 3863 getEditLog().logSync(); 3864 logAuditEvent(true, "mkdirs", src, null, auditStat); 3865 return true; 3866 } 3867 3868 /** 3869 * Get the content summary for a specific file/dir. 3870 * 3871 * @param src The string representation of the path to the file 3872 * 3873 * @throws AccessControlException if access is denied 3874 * @throws UnresolvedLinkException if a symlink is encountered. 3875 * @throws FileNotFoundException if no file exists 3876 * @throws StandbyException 3877 * @throws IOException for issues with writing to the audit log 3878 * 3879 * @return object containing information regarding the file 3880 * or null if file not found 3881 */ 3882 ContentSummary getContentSummary(final String src) throws IOException { 3883 readLock(); 3884 boolean success = true; 3885 try { 3886 return FSDirStatAndListingOp.getContentSummary(dir, src); 3887 } catch (AccessControlException ace) { 3888 success = false; 3889 throw ace; 3890 } finally { 3891 readUnlock(); 3892 logAuditEvent(success, "contentSummary", src); 3893 } 3894 } 3895 3896 /** 3897 * Set the namespace quota and storage space quota for a directory. 3898 * See {@link ClientProtocol#setQuota(String, long, long, StorageType)} for the 3899 * contract. 3900 * 3901 * Note: This does not support ".inodes" relative path. 3902 */ 3903 void setQuota(String src, long nsQuota, long ssQuota, StorageType type) 3904 throws IOException { 3905 checkOperation(OperationCategory.WRITE); 3906 writeLock(); 3907 boolean success = false; 3908 try { 3909 checkOperation(OperationCategory.WRITE); 3910 checkNameNodeSafeMode("Cannot set quota on " + src); 3911 FSDirAttrOp.setQuota(dir, src, nsQuota, ssQuota, type); 3912 success = true; 3913 } finally { 3914 writeUnlock(); 3915 if (success) { 3916 getEditLog().logSync(); 3917 } 3918 logAuditEvent(success, "setQuota", src); 3919 } 3920 } 3921 3922 /** Persist all metadata about this file. 3923 * @param src The string representation of the path 3924 * @param fileId The inode ID that we're fsyncing. Older clients will pass 3925 * INodeId.GRANDFATHER_INODE_ID here. 3926 * @param clientName The string representation of the client 3927 * @param lastBlockLength The length of the last block 3928 * under construction reported from client. 3929 * @throws IOException if path does not exist 3930 */ 3931 void fsync(String src, long fileId, String clientName, long lastBlockLength) 3932 throws IOException { 3933 NameNode.stateChangeLog.info("BLOCK* fsync: " + src + " for " + clientName); 3934 checkOperation(OperationCategory.WRITE); 3935 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 3936 3937 FSPermissionChecker pc = getPermissionChecker(); 3938 waitForLoadingFSImage(); 3939 writeLock(); 3940 try { 3941 checkOperation(OperationCategory.WRITE); 3942 checkNameNodeSafeMode("Cannot fsync file " + src); 3943 src = dir.resolvePath(pc, src, pathComponents); 3944 final INode inode; 3945 if (fileId == INodeId.GRANDFATHER_INODE_ID) { 3946 // Older clients may not have given us an inode ID to work with. 3947 // In this case, we have to try to resolve the path and hope it 3948 // hasn't changed or been deleted since the file was opened for write. 3949 inode = dir.getINode(src); 3950 } else { 3951 inode = dir.getInode(fileId); 3952 if (inode != null) src = inode.getFullPathName(); 3953 } 3954 final INodeFile pendingFile = checkLease(src, clientName, inode, fileId); 3955 if (lastBlockLength > 0) { 3956 pendingFile.getFileUnderConstructionFeature().updateLengthOfLastBlock( 3957 pendingFile, lastBlockLength); 3958 } 3959 persistBlocks(src, pendingFile, false); 3960 } finally { 3961 writeUnlock(); 3962 } 3963 getEditLog().logSync(); 3964 } 3965 3966 /** 3967 * Move a file that is being written to be immutable. 3968 * @param src The filename 3969 * @param lease The lease for the client creating the file 3970 * @param recoveryLeaseHolder reassign lease to this holder if the last block 3971 * needs recovery; keep current holder if null. 3972 * @throws AlreadyBeingCreatedException if file is waiting to achieve minimal 3973 * replication;<br> 3974 * RecoveryInProgressException if lease recovery is in progress.<br> 3975 * IOException in case of an error. 3976 * @return true if file has been successfully finalized and closed or 3977 * false if block recovery has been initiated. Since the lease owner 3978 * has been changed and logged, caller should call logSync(). 3979 */ 3980 boolean internalReleaseLease(Lease lease, String src, INodesInPath iip, 3981 String recoveryLeaseHolder) throws IOException { 3982 LOG.info("Recovering " + lease + ", src=" + src); 3983 assert !isInSafeMode(); 3984 assert hasWriteLock(); 3985 3986 final INodeFile pendingFile = iip.getLastINode().asFile(); 3987 int nrBlocks = pendingFile.numBlocks(); 3988 BlockInfoContiguous[] blocks = pendingFile.getBlocks(); 3989 3990 int nrCompleteBlocks; 3991 BlockInfoContiguous curBlock = null; 3992 for(nrCompleteBlocks = 0; nrCompleteBlocks < nrBlocks; nrCompleteBlocks++) { 3993 curBlock = blocks[nrCompleteBlocks]; 3994 if(!curBlock.isComplete()) 3995 break; 3996 assert blockManager.checkMinReplication(curBlock) : 3997 "A COMPLETE block is not minimally replicated in " + src; 3998 } 3999 4000 // If there are no incomplete blocks associated with this file, 4001 // then reap lease immediately and close the file. 4002 if(nrCompleteBlocks == nrBlocks) { 4003 finalizeINodeFileUnderConstruction(src, pendingFile, 4004 iip.getLatestSnapshotId()); 4005 NameNode.stateChangeLog.warn("BLOCK*" 4006 + " internalReleaseLease: All existing blocks are COMPLETE," 4007 + " lease removed, file closed."); 4008 return true; // closed! 4009 } 4010 4011 // Only the last and the penultimate blocks may be in non COMPLETE state. 4012 // If the penultimate block is not COMPLETE, then it must be COMMITTED. 4013 if(nrCompleteBlocks < nrBlocks - 2 || 4014 nrCompleteBlocks == nrBlocks - 2 && 4015 curBlock != null && 4016 curBlock.getBlockUCState() != BlockUCState.COMMITTED) { 4017 final String message = "DIR* NameSystem.internalReleaseLease: " 4018 + "attempt to release a create lock on " 4019 + src + " but file is already closed."; 4020 NameNode.stateChangeLog.warn(message); 4021 throw new IOException(message); 4022 } 4023 4024 // The last block is not COMPLETE, and 4025 // that the penultimate block if exists is either COMPLETE or COMMITTED 4026 final BlockInfoContiguous lastBlock = pendingFile.getLastBlock(); 4027 BlockUCState lastBlockState = lastBlock.getBlockUCState(); 4028 BlockInfoContiguous penultimateBlock = pendingFile.getPenultimateBlock(); 4029 4030 // If penultimate block doesn't exist then its minReplication is met 4031 boolean penultimateBlockMinReplication = penultimateBlock == null ? true : 4032 blockManager.checkMinReplication(penultimateBlock); 4033 4034 switch(lastBlockState) { 4035 case COMPLETE: 4036 assert false : "Already checked that the last block is incomplete"; 4037 break; 4038 case COMMITTED: 4039 // Close file if committed blocks are minimally replicated 4040 if(penultimateBlockMinReplication && 4041 blockManager.checkMinReplication(lastBlock)) { 4042 finalizeINodeFileUnderConstruction(src, pendingFile, 4043 iip.getLatestSnapshotId()); 4044 NameNode.stateChangeLog.warn("BLOCK*" 4045 + " internalReleaseLease: Committed blocks are minimally replicated," 4046 + " lease removed, file closed."); 4047 return true; // closed! 4048 } 4049 // Cannot close file right now, since some blocks 4050 // are not yet minimally replicated. 4051 // This may potentially cause infinite loop in lease recovery 4052 // if there are no valid replicas on data-nodes. 4053 String message = "DIR* NameSystem.internalReleaseLease: " + 4054 "Failed to release lease for file " + src + 4055 ". Committed blocks are waiting to be minimally replicated." + 4056 " Try again later."; 4057 NameNode.stateChangeLog.warn(message); 4058 throw new AlreadyBeingCreatedException(message); 4059 case UNDER_CONSTRUCTION: 4060 case UNDER_RECOVERY: 4061 final BlockInfoContiguousUnderConstruction uc = (BlockInfoContiguousUnderConstruction)lastBlock; 4062 // determine if last block was intended to be truncated 4063 Block recoveryBlock = uc.getTruncateBlock(); 4064 boolean truncateRecovery = recoveryBlock != null; 4065 boolean copyOnTruncate = truncateRecovery && 4066 recoveryBlock.getBlockId() != uc.getBlockId(); 4067 assert !copyOnTruncate || 4068 recoveryBlock.getBlockId() < uc.getBlockId() && 4069 recoveryBlock.getGenerationStamp() < uc.getGenerationStamp() && 4070 recoveryBlock.getNumBytes() > uc.getNumBytes() : 4071 "wrong recoveryBlock"; 4072 4073 // setup the last block locations from the blockManager if not known 4074 if (uc.getNumExpectedLocations() == 0) { 4075 uc.setExpectedLocations(blockManager.getStorages(lastBlock)); 4076 } 4077 4078 if (uc.getNumExpectedLocations() == 0 && uc.getNumBytes() == 0) { 4079 // There is no datanode reported to this block. 4080 // may be client have crashed before writing data to pipeline. 4081 // This blocks doesn't need any recovery. 4082 // We can remove this block and close the file. 4083 pendingFile.removeLastBlock(lastBlock); 4084 finalizeINodeFileUnderConstruction(src, pendingFile, 4085 iip.getLatestSnapshotId()); 4086 NameNode.stateChangeLog.warn("BLOCK* internalReleaseLease: " 4087 + "Removed empty last block and closed file."); 4088 return true; 4089 } 4090 // start recovery of the last block for this file 4091 long blockRecoveryId = nextGenerationStamp(blockIdManager.isLegacyBlock(uc)); 4092 lease = reassignLease(lease, src, recoveryLeaseHolder, pendingFile); 4093 if(copyOnTruncate) { 4094 uc.setGenerationStamp(blockRecoveryId); 4095 } else if(truncateRecovery) { 4096 recoveryBlock.setGenerationStamp(blockRecoveryId); 4097 } 4098 uc.initializeBlockRecovery(blockRecoveryId); 4099 leaseManager.renewLease(lease); 4100 // Cannot close file right now, since the last block requires recovery. 4101 // This may potentially cause infinite loop in lease recovery 4102 // if there are no valid replicas on data-nodes. 4103 NameNode.stateChangeLog.warn( 4104 "DIR* NameSystem.internalReleaseLease: " + 4105 "File " + src + " has not been closed." + 4106 " Lease recovery is in progress. " + 4107 "RecoveryId = " + blockRecoveryId + " for block " + lastBlock); 4108 break; 4109 } 4110 return false; 4111 } 4112 4113 private Lease reassignLease(Lease lease, String src, String newHolder, 4114 INodeFile pendingFile) { 4115 assert hasWriteLock(); 4116 if(newHolder == null) 4117 return lease; 4118 // The following transaction is not synced. Make sure it's sync'ed later. 4119 logReassignLease(lease.getHolder(), src, newHolder); 4120 return reassignLeaseInternal(lease, src, newHolder, pendingFile); 4121 } 4122 4123 Lease reassignLeaseInternal(Lease lease, String src, String newHolder, 4124 INodeFile pendingFile) { 4125 assert hasWriteLock(); 4126 pendingFile.getFileUnderConstructionFeature().setClientName(newHolder); 4127 return leaseManager.reassignLease(lease, src, newHolder); 4128 } 4129 4130 private void commitOrCompleteLastBlock(final INodeFile fileINode, 4131 final INodesInPath iip, final Block commitBlock) throws IOException { 4132 assert hasWriteLock(); 4133 Preconditions.checkArgument(fileINode.isUnderConstruction()); 4134 if (!blockManager.commitOrCompleteLastBlock(fileINode, commitBlock)) { 4135 return; 4136 } 4137 4138 // Adjust disk space consumption if required 4139 final long diff = fileINode.getPreferredBlockSize() - commitBlock.getNumBytes(); 4140 if (diff > 0) { 4141 try { 4142 dir.updateSpaceConsumed(iip, 0, -diff, fileINode.getFileReplication()); 4143 } catch (IOException e) { 4144 LOG.warn("Unexpected exception while updating disk space.", e); 4145 } 4146 } 4147 } 4148 4149 private void finalizeINodeFileUnderConstruction(String src, 4150 INodeFile pendingFile, int latestSnapshot) throws IOException { 4151 assert hasWriteLock(); 4152 4153 FileUnderConstructionFeature uc = pendingFile.getFileUnderConstructionFeature(); 4154 Preconditions.checkArgument(uc != null); 4155 leaseManager.removeLease(uc.getClientName(), src); 4156 4157 pendingFile.recordModification(latestSnapshot); 4158 4159 // The file is no longer pending. 4160 // Create permanent INode, update blocks. No need to replace the inode here 4161 // since we just remove the uc feature from pendingFile 4162 pendingFile.toCompleteFile(now()); 4163 4164 waitForLoadingFSImage(); 4165 // close file and persist block allocations for this file 4166 closeFile(src, pendingFile); 4167 4168 blockManager.checkReplication(pendingFile); 4169 } 4170 4171 @VisibleForTesting 4172 BlockInfoContiguous getStoredBlock(Block block) { 4173 return blockManager.getStoredBlock(block); 4174 } 4175 4176 @Override 4177 public boolean isInSnapshot(BlockInfoContiguousUnderConstruction blockUC) { 4178 assert hasReadLock(); 4179 final BlockCollection bc = blockUC.getBlockCollection(); 4180 if (bc == null || !(bc instanceof INodeFile) 4181 || !bc.isUnderConstruction()) { 4182 return false; 4183 } 4184 4185 String fullName = bc.getName(); 4186 try { 4187 if (fullName != null && fullName.startsWith(Path.SEPARATOR) 4188 && dir.getINode(fullName) == bc) { 4189 // If file exists in normal path then no need to look in snapshot 4190 return false; 4191 } 4192 } catch (UnresolvedLinkException e) { 4193 LOG.error("Error while resolving the link : " + fullName, e); 4194 return false; 4195 } 4196 /* 4197 * 1. if bc is under construction and also with snapshot, and 4198 * bc is not in the current fsdirectory tree, bc must represent a snapshot 4199 * file. 4200 * 2. if fullName is not an absolute path, bc cannot be existent in the 4201 * current fsdirectory tree. 4202 * 3. if bc is not the current node associated with fullName, bc must be a 4203 * snapshot inode. 4204 */ 4205 return true; 4206 } 4207 4208 void commitBlockSynchronization(ExtendedBlock oldBlock, 4209 long newgenerationstamp, long newlength, 4210 boolean closeFile, boolean deleteblock, DatanodeID[] newtargets, 4211 String[] newtargetstorages) throws IOException { 4212 LOG.info("commitBlockSynchronization(oldBlock=" + oldBlock 4213 + ", newgenerationstamp=" + newgenerationstamp 4214 + ", newlength=" + newlength 4215 + ", newtargets=" + Arrays.asList(newtargets) 4216 + ", closeFile=" + closeFile 4217 + ", deleteBlock=" + deleteblock 4218 + ")"); 4219 checkOperation(OperationCategory.WRITE); 4220 String src = ""; 4221 waitForLoadingFSImage(); 4222 writeLock(); 4223 try { 4224 checkOperation(OperationCategory.WRITE); 4225 // If a DN tries to commit to the standby, the recovery will 4226 // fail, and the next retry will succeed on the new NN. 4227 4228 checkNameNodeSafeMode( 4229 "Cannot commitBlockSynchronization while in safe mode"); 4230 final BlockInfoContiguous storedBlock = getStoredBlock( 4231 ExtendedBlock.getLocalBlock(oldBlock)); 4232 if (storedBlock == null) { 4233 if (deleteblock) { 4234 // This may be a retry attempt so ignore the failure 4235 // to locate the block. 4236 if (LOG.isDebugEnabled()) { 4237 LOG.debug("Block (=" + oldBlock + ") not found"); 4238 } 4239 return; 4240 } else { 4241 throw new IOException("Block (=" + oldBlock + ") not found"); 4242 } 4243 } 4244 final long oldGenerationStamp = storedBlock.getGenerationStamp(); 4245 final long oldNumBytes = storedBlock.getNumBytes(); 4246 // 4247 // The implementation of delete operation (see @deleteInternal method) 4248 // first removes the file paths from namespace, and delays the removal 4249 // of blocks to later time for better performance. When 4250 // commitBlockSynchronization (this method) is called in between, the 4251 // blockCollection of storedBlock could have been assigned to null by 4252 // the delete operation, throw IOException here instead of NPE; if the 4253 // file path is already removed from namespace by the delete operation, 4254 // throw FileNotFoundException here, so not to proceed to the end of 4255 // this method to add a CloseOp to the edit log for an already deleted 4256 // file (See HDFS-6825). 4257 // 4258 BlockCollection blockCollection = storedBlock.getBlockCollection(); 4259 if (blockCollection == null) { 4260 throw new IOException("The blockCollection of " + storedBlock 4261 + " is null, likely because the file owning this block was" 4262 + " deleted and the block removal is delayed"); 4263 } 4264 INodeFile iFile = ((INode)blockCollection).asFile(); 4265 if (isFileDeleted(iFile)) { 4266 throw new FileNotFoundException("File not found: " 4267 + iFile.getFullPathName() + ", likely due to delayed block" 4268 + " removal"); 4269 } 4270 if ((!iFile.isUnderConstruction() || storedBlock.isComplete()) && 4271 iFile.getLastBlock().isComplete()) { 4272 if (LOG.isDebugEnabled()) { 4273 LOG.debug("Unexpected block (=" + oldBlock 4274 + ") since the file (=" + iFile.getLocalName() 4275 + ") is not under construction"); 4276 } 4277 return; 4278 } 4279 4280 BlockInfoContiguousUnderConstruction truncatedBlock = 4281 (BlockInfoContiguousUnderConstruction) iFile.getLastBlock(); 4282 long recoveryId = truncatedBlock.getBlockRecoveryId(); 4283 boolean copyTruncate = 4284 truncatedBlock.getBlockId() != storedBlock.getBlockId(); 4285 if(recoveryId != newgenerationstamp) { 4286 throw new IOException("The recovery id " + newgenerationstamp 4287 + " does not match current recovery id " 4288 + recoveryId + " for block " + oldBlock); 4289 } 4290 4291 if (deleteblock) { 4292 Block blockToDel = ExtendedBlock.getLocalBlock(oldBlock); 4293 boolean remove = iFile.removeLastBlock(blockToDel); 4294 if (remove) { 4295 blockManager.removeBlock(storedBlock); 4296 } 4297 } 4298 else { 4299 // update last block 4300 if(!copyTruncate) { 4301 storedBlock.setGenerationStamp(newgenerationstamp); 4302 storedBlock.setNumBytes(newlength); 4303 } 4304 4305 // find the DatanodeDescriptor objects 4306 ArrayList<DatanodeDescriptor> trimmedTargets = 4307 new ArrayList<DatanodeDescriptor>(newtargets.length); 4308 ArrayList<String> trimmedStorages = 4309 new ArrayList<String>(newtargets.length); 4310 if (newtargets.length > 0) { 4311 for (int i = 0; i < newtargets.length; ++i) { 4312 // try to get targetNode 4313 DatanodeDescriptor targetNode = 4314 blockManager.getDatanodeManager().getDatanode(newtargets[i]); 4315 if (targetNode != null) { 4316 trimmedTargets.add(targetNode); 4317 trimmedStorages.add(newtargetstorages[i]); 4318 } else if (LOG.isDebugEnabled()) { 4319 LOG.debug("DatanodeDescriptor (=" + newtargets[i] + ") not found"); 4320 } 4321 } 4322 } 4323 if ((closeFile) && !trimmedTargets.isEmpty()) { 4324 // the file is getting closed. Insert block locations into blockManager. 4325 // Otherwise fsck will report these blocks as MISSING, especially if the 4326 // blocksReceived from Datanodes take a long time to arrive. 4327 for (int i = 0; i < trimmedTargets.size(); i++) { 4328 DatanodeStorageInfo storageInfo = 4329 trimmedTargets.get(i).getStorageInfo(trimmedStorages.get(i)); 4330 if (storageInfo != null) { 4331 if(copyTruncate) { 4332 storageInfo.addBlock(truncatedBlock); 4333 } else { 4334 storageInfo.addBlock(storedBlock); 4335 } 4336 } 4337 } 4338 } 4339 4340 // add pipeline locations into the INodeUnderConstruction 4341 DatanodeStorageInfo[] trimmedStorageInfos = 4342 blockManager.getDatanodeManager().getDatanodeStorageInfos( 4343 trimmedTargets.toArray(new DatanodeID[trimmedTargets.size()]), 4344 trimmedStorages.toArray(new String[trimmedStorages.size()])); 4345 if(copyTruncate) { 4346 iFile.setLastBlock(truncatedBlock, trimmedStorageInfos); 4347 } else { 4348 iFile.setLastBlock(storedBlock, trimmedStorageInfos); 4349 if (closeFile) { 4350 blockManager.markBlockReplicasAsCorrupt(storedBlock, 4351 oldGenerationStamp, oldNumBytes, trimmedStorageInfos); 4352 } 4353 } 4354 } 4355 4356 if (closeFile) { 4357 if(copyTruncate) { 4358 src = closeFileCommitBlocks(iFile, truncatedBlock); 4359 if(!iFile.isBlockInLatestSnapshot(storedBlock)) { 4360 blockManager.removeBlock(storedBlock); 4361 } 4362 } else { 4363 src = closeFileCommitBlocks(iFile, storedBlock); 4364 } 4365 } else { 4366 // If this commit does not want to close the file, persist blocks 4367 src = iFile.getFullPathName(); 4368 persistBlocks(src, iFile, false); 4369 } 4370 } finally { 4371 writeUnlock(); 4372 } 4373 getEditLog().logSync(); 4374 if (closeFile) { 4375 LOG.info("commitBlockSynchronization(oldBlock=" + oldBlock 4376 + ", file=" + src 4377 + ", newgenerationstamp=" + newgenerationstamp 4378 + ", newlength=" + newlength 4379 + ", newtargets=" + Arrays.asList(newtargets) + ") successful"); 4380 } else { 4381 LOG.info("commitBlockSynchronization(" + oldBlock + ") successful"); 4382 } 4383 } 4384 4385 /** 4386 * @param pendingFile open file that needs to be closed 4387 * @param storedBlock last block 4388 * @return Path of the file that was closed. 4389 * @throws IOException on error 4390 */ 4391 @VisibleForTesting 4392 String closeFileCommitBlocks(INodeFile pendingFile, BlockInfoContiguous storedBlock) 4393 throws IOException { 4394 final INodesInPath iip = INodesInPath.fromINode(pendingFile); 4395 final String src = iip.getPath(); 4396 4397 // commit the last block and complete it if it has minimum replicas 4398 commitOrCompleteLastBlock(pendingFile, iip, storedBlock); 4399 4400 //remove lease, close file 4401 finalizeINodeFileUnderConstruction(src, pendingFile, 4402 Snapshot.findLatestSnapshot(pendingFile, Snapshot.CURRENT_STATE_ID)); 4403 4404 return src; 4405 } 4406 4407 /** 4408 * Renew the lease(s) held by the given client 4409 */ 4410 void renewLease(String holder) throws IOException { 4411 checkOperation(OperationCategory.WRITE); 4412 readLock(); 4413 try { 4414 checkOperation(OperationCategory.WRITE); 4415 checkNameNodeSafeMode("Cannot renew lease for " + holder); 4416 leaseManager.renewLease(holder); 4417 } finally { 4418 readUnlock(); 4419 } 4420 } 4421 4422 /** 4423 * Get a partial listing of the indicated directory 4424 * 4425 * @param src the directory name 4426 * @param startAfter the name to start after 4427 * @param needLocation if blockLocations need to be returned 4428 * @return a partial listing starting after startAfter 4429 * 4430 * @throws AccessControlException if access is denied 4431 * @throws UnresolvedLinkException if symbolic link is encountered 4432 * @throws IOException if other I/O error occurred 4433 */ 4434 DirectoryListing getListing(String src, byte[] startAfter, 4435 boolean needLocation) 4436 throws IOException { 4437 checkOperation(OperationCategory.READ); 4438 DirectoryListing dl = null; 4439 readLock(); 4440 try { 4441 checkOperation(NameNode.OperationCategory.READ); 4442 dl = FSDirStatAndListingOp.getListingInt(dir, src, startAfter, 4443 needLocation); 4444 } catch (AccessControlException e) { 4445 logAuditEvent(false, "listStatus", src); 4446 throw e; 4447 } finally { 4448 readUnlock(); 4449 } 4450 logAuditEvent(true, "listStatus", src); 4451 return dl; 4452 } 4453 4454 ///////////////////////////////////////////////////////// 4455 // 4456 // These methods are called by datanodes 4457 // 4458 ///////////////////////////////////////////////////////// 4459 /** 4460 * Register Datanode. 4461 * <p> 4462 * The purpose of registration is to identify whether the new datanode 4463 * serves a new data storage, and will report new data block copies, 4464 * which the namenode was not aware of; or the datanode is a replacement 4465 * node for the data storage that was previously served by a different 4466 * or the same (in terms of host:port) datanode. 4467 * The data storages are distinguished by their storageIDs. When a new 4468 * data storage is reported the namenode issues a new unique storageID. 4469 * <p> 4470 * Finally, the namenode returns its namespaceID as the registrationID 4471 * for the datanodes. 4472 * namespaceID is a persistent attribute of the name space. 4473 * The registrationID is checked every time the datanode is communicating 4474 * with the namenode. 4475 * Datanodes with inappropriate registrationID are rejected. 4476 * If the namenode stops, and then restarts it can restore its 4477 * namespaceID and will continue serving the datanodes that has previously 4478 * registered with the namenode without restarting the whole cluster. 4479 * 4480 * @see org.apache.hadoop.hdfs.server.datanode.DataNode 4481 */ 4482 void registerDatanode(DatanodeRegistration nodeReg) throws IOException { 4483 writeLock(); 4484 try { 4485 getBlockManager().getDatanodeManager().registerDatanode(nodeReg); 4486 checkSafeMode(); 4487 } finally { 4488 writeUnlock(); 4489 } 4490 } 4491 4492 /** 4493 * Get registrationID for datanodes based on the namespaceID. 4494 * 4495 * @see #registerDatanode(DatanodeRegistration) 4496 * @return registration ID 4497 */ 4498 String getRegistrationID() { 4499 return Storage.getRegistrationID(getFSImage().getStorage()); 4500 } 4501 4502 /** 4503 * The given node has reported in. This method should: 4504 * 1) Record the heartbeat, so the datanode isn't timed out 4505 * 2) Adjust usage stats for future block allocation 4506 * 4507 * If a substantial amount of time passed since the last datanode 4508 * heartbeat then request an immediate block report. 4509 * 4510 * @return an array of datanode commands 4511 * @throws IOException 4512 */ 4513 HeartbeatResponse handleHeartbeat(DatanodeRegistration nodeReg, 4514 StorageReport[] reports, long cacheCapacity, long cacheUsed, 4515 int xceiverCount, int xmitsInProgress, int failedVolumes, 4516 VolumeFailureSummary volumeFailureSummary) throws IOException { 4517 readLock(); 4518 try { 4519 //get datanode commands 4520 final int maxTransfer = blockManager.getMaxReplicationStreams() 4521 - xmitsInProgress; 4522 DatanodeCommand[] cmds = blockManager.getDatanodeManager().handleHeartbeat( 4523 nodeReg, reports, blockPoolId, cacheCapacity, cacheUsed, 4524 xceiverCount, maxTransfer, failedVolumes, volumeFailureSummary); 4525 4526 //create ha status 4527 final NNHAStatusHeartbeat haState = new NNHAStatusHeartbeat( 4528 haContext.getState().getServiceState(), 4529 getFSImage().getLastAppliedOrWrittenTxId()); 4530 4531 return new HeartbeatResponse(cmds, haState, rollingUpgradeInfo); 4532 } finally { 4533 readUnlock(); 4534 } 4535 } 4536 4537 /** 4538 * Returns whether or not there were available resources at the last check of 4539 * resources. 4540 * 4541 * @return true if there were sufficient resources available, false otherwise. 4542 */ 4543 boolean nameNodeHasResourcesAvailable() { 4544 return hasResourcesAvailable; 4545 } 4546 4547 /** 4548 * Perform resource checks and cache the results. 4549 */ 4550 void checkAvailableResources() { 4551 Preconditions.checkState(nnResourceChecker != null, 4552 "nnResourceChecker not initialized"); 4553 hasResourcesAvailable = nnResourceChecker.hasAvailableDiskSpace(); 4554 } 4555 4556 /** 4557 * Persist the block list for the inode. 4558 * @param path 4559 * @param file 4560 * @param logRetryCache 4561 */ 4562 private void persistBlocks(String path, INodeFile file, 4563 boolean logRetryCache) { 4564 assert hasWriteLock(); 4565 Preconditions.checkArgument(file.isUnderConstruction()); 4566 getEditLog().logUpdateBlocks(path, file, logRetryCache); 4567 if(NameNode.stateChangeLog.isDebugEnabled()) { 4568 NameNode.stateChangeLog.debug("persistBlocks: " + path 4569 + " with " + file.getBlocks().length + " blocks is persisted to" + 4570 " the file system"); 4571 } 4572 } 4573 4574 /** 4575 * Close file. 4576 * @param path 4577 * @param file 4578 */ 4579 private void closeFile(String path, INodeFile file) { 4580 assert hasWriteLock(); 4581 waitForLoadingFSImage(); 4582 // file is closed 4583 getEditLog().logCloseFile(path, file); 4584 if (NameNode.stateChangeLog.isDebugEnabled()) { 4585 NameNode.stateChangeLog.debug("closeFile: " 4586 +path+" with "+ file.getBlocks().length 4587 +" blocks is persisted to the file system"); 4588 } 4589 } 4590 4591 /** 4592 * Periodically calls hasAvailableResources of NameNodeResourceChecker, and if 4593 * there are found to be insufficient resources available, causes the NN to 4594 * enter safe mode. If resources are later found to have returned to 4595 * acceptable levels, this daemon will cause the NN to exit safe mode. 4596 */ 4597 class NameNodeResourceMonitor implements Runnable { 4598 boolean shouldNNRmRun = true; 4599 @Override 4600 public void run () { 4601 try { 4602 while (fsRunning && shouldNNRmRun) { 4603 checkAvailableResources(); 4604 if(!nameNodeHasResourcesAvailable()) { 4605 String lowResourcesMsg = "NameNode low on available disk space. "; 4606 if (!isInSafeMode()) { 4607 LOG.warn(lowResourcesMsg + "Entering safe mode."); 4608 } else { 4609 LOG.warn(lowResourcesMsg + "Already in safe mode."); 4610 } 4611 enterSafeMode(true); 4612 } 4613 try { 4614 Thread.sleep(resourceRecheckInterval); 4615 } catch (InterruptedException ie) { 4616 // Deliberately ignore 4617 } 4618 } 4619 } catch (Exception e) { 4620 FSNamesystem.LOG.error("Exception in NameNodeResourceMonitor: ", e); 4621 } 4622 } 4623 4624 public void stopMonitor() { 4625 shouldNNRmRun = false; 4626 } 4627 } 4628 4629 class NameNodeEditLogRoller implements Runnable { 4630 4631 private boolean shouldRun = true; 4632 private final long rollThreshold; 4633 private final long sleepIntervalMs; 4634 4635 public NameNodeEditLogRoller(long rollThreshold, int sleepIntervalMs) { 4636 this.rollThreshold = rollThreshold; 4637 this.sleepIntervalMs = sleepIntervalMs; 4638 } 4639 4640 @Override 4641 public void run() { 4642 while (fsRunning && shouldRun) { 4643 try { 4644 FSEditLog editLog = getFSImage().getEditLog(); 4645 long numEdits = 4646 editLog.getLastWrittenTxId() - editLog.getCurSegmentTxId(); 4647 if (numEdits > rollThreshold) { 4648 FSNamesystem.LOG.info("NameNode rolling its own edit log because" 4649 + " number of edits in open segment exceeds threshold of " 4650 + rollThreshold); 4651 rollEditLog(); 4652 } 4653 } catch (Exception e) { 4654 FSNamesystem.LOG.error("Swallowing exception in " 4655 + NameNodeEditLogRoller.class.getSimpleName() + ":", e); 4656 } 4657 try { 4658 Thread.sleep(sleepIntervalMs); 4659 } catch (InterruptedException e) { 4660 FSNamesystem.LOG.info(NameNodeEditLogRoller.class.getSimpleName() 4661 + " was interrupted, exiting"); 4662 break; 4663 } 4664 } 4665 } 4666 4667 public void stop() { 4668 shouldRun = false; 4669 } 4670 } 4671 4672 /** 4673 * Daemon to periodically scan the namespace for lazyPersist files 4674 * with missing blocks and unlink them. 4675 */ 4676 class LazyPersistFileScrubber implements Runnable { 4677 private volatile boolean shouldRun = true; 4678 final int scrubIntervalSec; 4679 public LazyPersistFileScrubber(final int scrubIntervalSec) { 4680 this.scrubIntervalSec = scrubIntervalSec; 4681 } 4682 4683 /** 4684 * Periodically go over the list of lazyPersist files with missing 4685 * blocks and unlink them from the namespace. 4686 */ 4687 private void clearCorruptLazyPersistFiles() 4688 throws SafeModeException, AccessControlException, 4689 UnresolvedLinkException, IOException { 4690 4691 BlockStoragePolicy lpPolicy = blockManager.getStoragePolicy("LAZY_PERSIST"); 4692 4693 List<BlockCollection> filesToDelete = new ArrayList<BlockCollection>(); 4694 4695 writeLock(); 4696 4697 try { 4698 final Iterator<Block> it = blockManager.getCorruptReplicaBlockIterator(); 4699 4700 while (it.hasNext()) { 4701 Block b = it.next(); 4702 BlockInfoContiguous blockInfo = blockManager.getStoredBlock(b); 4703 if (blockInfo.getBlockCollection().getStoragePolicyID() == lpPolicy.getId()) { 4704 filesToDelete.add(blockInfo.getBlockCollection()); 4705 } 4706 } 4707 4708 for (BlockCollection bc : filesToDelete) { 4709 LOG.warn("Removing lazyPersist file " + bc.getName() + " with no replicas."); 4710 BlocksMapUpdateInfo toRemoveBlocks = 4711 FSDirDeleteOp.deleteInternal( 4712 FSNamesystem.this, bc.getName(), 4713 INodesInPath.fromINode((INodeFile) bc), false); 4714 if (toRemoveBlocks != null) { 4715 removeBlocks(toRemoveBlocks); // Incremental deletion of blocks 4716 } 4717 } 4718 } finally { 4719 writeUnlock(); 4720 } 4721 } 4722 4723 @Override 4724 public void run() { 4725 while (fsRunning && shouldRun) { 4726 try { 4727 clearCorruptLazyPersistFiles(); 4728 Thread.sleep(scrubIntervalSec * 1000); 4729 } catch (InterruptedException e) { 4730 FSNamesystem.LOG.info( 4731 "LazyPersistFileScrubber was interrupted, exiting"); 4732 break; 4733 } catch (Exception e) { 4734 FSNamesystem.LOG.error( 4735 "Ignoring exception in LazyPersistFileScrubber:", e); 4736 } 4737 } 4738 } 4739 4740 public void stop() { 4741 shouldRun = false; 4742 } 4743 } 4744 4745 public FSImage getFSImage() { 4746 return fsImage; 4747 } 4748 4749 public FSEditLog getEditLog() { 4750 return getFSImage().getEditLog(); 4751 } 4752 4753 private void checkBlock(ExtendedBlock block) throws IOException { 4754 if (block != null && !this.blockPoolId.equals(block.getBlockPoolId())) { 4755 throw new IOException("Unexpected BlockPoolId " + block.getBlockPoolId() 4756 + " - expected " + blockPoolId); 4757 } 4758 } 4759 4760 @Metric({"MissingBlocks", "Number of missing blocks"}) 4761 public long getMissingBlocksCount() { 4762 // not locking 4763 return blockManager.getMissingBlocksCount(); 4764 } 4765 4766 @Metric({"MissingReplOneBlocks", "Number of missing blocks " + 4767 "with replication factor 1"}) 4768 public long getMissingReplOneBlocksCount() { 4769 // not locking 4770 return blockManager.getMissingReplOneBlocksCount(); 4771 } 4772 4773 @Metric({"ExpiredHeartbeats", "Number of expired heartbeats"}) 4774 public int getExpiredHeartbeats() { 4775 return datanodeStatistics.getExpiredHeartbeats(); 4776 } 4777 4778 @Metric({"TransactionsSinceLastCheckpoint", 4779 "Number of transactions since last checkpoint"}) 4780 public long getTransactionsSinceLastCheckpoint() { 4781 return getEditLog().getLastWrittenTxId() - 4782 getFSImage().getStorage().getMostRecentCheckpointTxId(); 4783 } 4784 4785 @Metric({"TransactionsSinceLastLogRoll", 4786 "Number of transactions since last edit log roll"}) 4787 public long getTransactionsSinceLastLogRoll() { 4788 if (isInStandbyState() || !getEditLog().isSegmentOpen()) { 4789 return 0; 4790 } else { 4791 return getEditLog().getLastWrittenTxId() - 4792 getEditLog().getCurSegmentTxId() + 1; 4793 } 4794 } 4795 4796 @Metric({"LastWrittenTransactionId", "Transaction ID written to the edit log"}) 4797 public long getLastWrittenTransactionId() { 4798 return getEditLog().getLastWrittenTxId(); 4799 } 4800 4801 @Metric({"LastCheckpointTime", 4802 "Time in milliseconds since the epoch of the last checkpoint"}) 4803 public long getLastCheckpointTime() { 4804 return getFSImage().getStorage().getMostRecentCheckpointTime(); 4805 } 4806 4807 /** @see ClientProtocol#getStats() */ 4808 long[] getStats() { 4809 final long[] stats = datanodeStatistics.getStats(); 4810 stats[ClientProtocol.GET_STATS_UNDER_REPLICATED_IDX] = getUnderReplicatedBlocks(); 4811 stats[ClientProtocol.GET_STATS_CORRUPT_BLOCKS_IDX] = getCorruptReplicaBlocks(); 4812 stats[ClientProtocol.GET_STATS_MISSING_BLOCKS_IDX] = getMissingBlocksCount(); 4813 stats[ClientProtocol.GET_STATS_MISSING_REPL_ONE_BLOCKS_IDX] = 4814 getMissingReplOneBlocksCount(); 4815 return stats; 4816 } 4817 4818 @Override // FSNamesystemMBean 4819 @Metric({"CapacityTotal", 4820 "Total raw capacity of data nodes in bytes"}) 4821 public long getCapacityTotal() { 4822 return datanodeStatistics.getCapacityTotal(); 4823 } 4824 4825 @Metric({"CapacityTotalGB", 4826 "Total raw capacity of data nodes in GB"}) 4827 public float getCapacityTotalGB() { 4828 return DFSUtil.roundBytesToGB(getCapacityTotal()); 4829 } 4830 4831 @Override // FSNamesystemMBean 4832 @Metric({"CapacityUsed", 4833 "Total used capacity across all data nodes in bytes"}) 4834 public long getCapacityUsed() { 4835 return datanodeStatistics.getCapacityUsed(); 4836 } 4837 4838 @Metric({"CapacityUsedGB", 4839 "Total used capacity across all data nodes in GB"}) 4840 public float getCapacityUsedGB() { 4841 return DFSUtil.roundBytesToGB(getCapacityUsed()); 4842 } 4843 4844 @Override // FSNamesystemMBean 4845 @Metric({"CapacityRemaining", "Remaining capacity in bytes"}) 4846 public long getCapacityRemaining() { 4847 return datanodeStatistics.getCapacityRemaining(); 4848 } 4849 4850 @Metric({"CapacityRemainingGB", "Remaining capacity in GB"}) 4851 public float getCapacityRemainingGB() { 4852 return DFSUtil.roundBytesToGB(getCapacityRemaining()); 4853 } 4854 4855 @Metric({"CapacityUsedNonDFS", 4856 "Total space used by data nodes for non DFS purposes in bytes"}) 4857 public long getCapacityUsedNonDFS() { 4858 return datanodeStatistics.getCapacityUsedNonDFS(); 4859 } 4860 4861 /** 4862 * Total number of connections. 4863 */ 4864 @Override // FSNamesystemMBean 4865 @Metric 4866 public int getTotalLoad() { 4867 return datanodeStatistics.getXceiverCount(); 4868 } 4869 4870 @Metric({ "SnapshottableDirectories", "Number of snapshottable directories" }) 4871 public int getNumSnapshottableDirs() { 4872 return this.snapshotManager.getNumSnapshottableDirs(); 4873 } 4874 4875 @Metric({ "Snapshots", "The number of snapshots" }) 4876 public int getNumSnapshots() { 4877 return this.snapshotManager.getNumSnapshots(); 4878 } 4879 4880 @Override 4881 public String getSnapshotStats() { 4882 Map<String, Object> info = new HashMap<String, Object>(); 4883 info.put("SnapshottableDirectories", this.getNumSnapshottableDirs()); 4884 info.put("Snapshots", this.getNumSnapshots()); 4885 return JSON.toString(info); 4886 } 4887 4888 int getNumberOfDatanodes(DatanodeReportType type) { 4889 readLock(); 4890 try { 4891 return getBlockManager().getDatanodeManager().getDatanodeListForReport( 4892 type).size(); 4893 } finally { 4894 readUnlock(); 4895 } 4896 } 4897 4898 DatanodeInfo[] datanodeReport(final DatanodeReportType type 4899 ) throws AccessControlException, StandbyException { 4900 checkSuperuserPrivilege(); 4901 checkOperation(OperationCategory.UNCHECKED); 4902 readLock(); 4903 try { 4904 checkOperation(OperationCategory.UNCHECKED); 4905 final DatanodeManager dm = getBlockManager().getDatanodeManager(); 4906 final List<DatanodeDescriptor> results = dm.getDatanodeListForReport(type); 4907 4908 DatanodeInfo[] arr = new DatanodeInfo[results.size()]; 4909 for (int i=0; i<arr.length; i++) { 4910 arr[i] = new DatanodeInfo(results.get(i)); 4911 } 4912 return arr; 4913 } finally { 4914 readUnlock(); 4915 } 4916 } 4917 4918 DatanodeStorageReport[] getDatanodeStorageReport(final DatanodeReportType type 4919 ) throws AccessControlException, StandbyException { 4920 checkSuperuserPrivilege(); 4921 checkOperation(OperationCategory.UNCHECKED); 4922 readLock(); 4923 try { 4924 checkOperation(OperationCategory.UNCHECKED); 4925 final DatanodeManager dm = getBlockManager().getDatanodeManager(); 4926 final List<DatanodeDescriptor> datanodes = dm.getDatanodeListForReport(type); 4927 4928 DatanodeStorageReport[] reports = new DatanodeStorageReport[datanodes.size()]; 4929 for (int i = 0; i < reports.length; i++) { 4930 final DatanodeDescriptor d = datanodes.get(i); 4931 reports[i] = new DatanodeStorageReport(new DatanodeInfo(d), 4932 d.getStorageReports()); 4933 } 4934 return reports; 4935 } finally { 4936 readUnlock(); 4937 } 4938 } 4939 4940 /** 4941 * Save namespace image. 4942 * This will save current namespace into fsimage file and empty edits file. 4943 * Requires superuser privilege and safe mode. 4944 * 4945 * @throws AccessControlException if superuser privilege is violated. 4946 * @throws IOException if 4947 */ 4948 void saveNamespace() throws AccessControlException, IOException { 4949 checkOperation(OperationCategory.UNCHECKED); 4950 checkSuperuserPrivilege(); 4951 4952 cpLock(); // Block if a checkpointing is in progress on standby. 4953 readLock(); 4954 try { 4955 checkOperation(OperationCategory.UNCHECKED); 4956 4957 if (!isInSafeMode()) { 4958 throw new IOException("Safe mode should be turned ON " 4959 + "in order to create namespace image."); 4960 } 4961 getFSImage().saveNamespace(this); 4962 } finally { 4963 readUnlock(); 4964 cpUnlock(); 4965 } 4966 LOG.info("New namespace image has been created"); 4967 } 4968 4969 /** 4970 * Enables/Disables/Checks restoring failed storage replicas if the storage becomes available again. 4971 * Requires superuser privilege. 4972 * 4973 * @throws AccessControlException if superuser privilege is violated. 4974 */ 4975 boolean restoreFailedStorage(String arg) throws AccessControlException, 4976 StandbyException { 4977 checkSuperuserPrivilege(); 4978 checkOperation(OperationCategory.UNCHECKED); 4979 cpLock(); // Block if a checkpointing is in progress on standby. 4980 writeLock(); 4981 try { 4982 checkOperation(OperationCategory.UNCHECKED); 4983 4984 // if it is disabled - enable it and vice versa. 4985 if(arg.equals("check")) 4986 return getFSImage().getStorage().getRestoreFailedStorage(); 4987 4988 boolean val = arg.equals("true"); // false if not 4989 getFSImage().getStorage().setRestoreFailedStorage(val); 4990 4991 return val; 4992 } finally { 4993 writeUnlock(); 4994 cpUnlock(); 4995 } 4996 } 4997 4998 Date getStartTime() { 4999 return new Date(startTime); 5000 } 5001 5002 void finalizeUpgrade() throws IOException { 5003 checkSuperuserPrivilege(); 5004 checkOperation(OperationCategory.UNCHECKED); 5005 cpLock(); // Block if a checkpointing is in progress on standby. 5006 writeLock(); 5007 try { 5008 checkOperation(OperationCategory.UNCHECKED); 5009 getFSImage().finalizeUpgrade(this.isHaEnabled() && inActiveState()); 5010 } finally { 5011 writeUnlock(); 5012 cpUnlock(); 5013 } 5014 } 5015 5016 void refreshNodes() throws IOException { 5017 checkOperation(OperationCategory.UNCHECKED); 5018 checkSuperuserPrivilege(); 5019 getBlockManager().getDatanodeManager().refreshNodes(new HdfsConfiguration()); 5020 } 5021 5022 void setBalancerBandwidth(long bandwidth) throws IOException { 5023 checkOperation(OperationCategory.UNCHECKED); 5024 checkSuperuserPrivilege(); 5025 getBlockManager().getDatanodeManager().setBalancerBandwidth(bandwidth); 5026 } 5027 5028 /** 5029 * Persist the new block (the last block of the given file). 5030 * @param path 5031 * @param file 5032 */ 5033 private void persistNewBlock(String path, INodeFile file) { 5034 Preconditions.checkArgument(file.isUnderConstruction()); 5035 getEditLog().logAddBlock(path, file); 5036 if (NameNode.stateChangeLog.isDebugEnabled()) { 5037 NameNode.stateChangeLog.debug("persistNewBlock: " 5038 + path + " with new block " + file.getLastBlock().toString() 5039 + ", current total block count is " + file.getBlocks().length); 5040 } 5041 } 5042 5043 /** 5044 * SafeModeInfo contains information related to the safe mode. 5045 * <p> 5046 * An instance of {@link SafeModeInfo} is created when the name node 5047 * enters safe mode. 5048 * <p> 5049 * During name node startup {@link SafeModeInfo} counts the number of 5050 * <em>safe blocks</em>, those that have at least the minimal number of 5051 * replicas, and calculates the ratio of safe blocks to the total number 5052 * of blocks in the system, which is the size of blocks in 5053 * {@link FSNamesystem#blockManager}. When the ratio reaches the 5054 * {@link #threshold} it starts the SafeModeMonitor daemon in order 5055 * to monitor whether the safe mode {@link #extension} is passed. 5056 * Then it leaves safe mode and destroys itself. 5057 * <p> 5058 * If safe mode is turned on manually then the number of safe blocks is 5059 * not tracked because the name node is not intended to leave safe mode 5060 * automatically in the case. 5061 * 5062 * @see ClientProtocol#setSafeMode(HdfsConstants.SafeModeAction, boolean) 5063 */ 5064 public class SafeModeInfo { 5065 // configuration fields 5066 /** Safe mode threshold condition %.*/ 5067 private final double threshold; 5068 /** Safe mode minimum number of datanodes alive */ 5069 private final int datanodeThreshold; 5070 /** 5071 * Safe mode extension after the threshold. 5072 * Make it volatile so that getSafeModeTip can read the latest value 5073 * without taking a lock. 5074 */ 5075 private volatile int extension; 5076 /** Min replication required by safe mode. */ 5077 private final int safeReplication; 5078 /** threshold for populating needed replication queues */ 5079 private final double replQueueThreshold; 5080 // internal fields 5081 /** Time when threshold was reached. 5082 * <br> -1 safe mode is off 5083 * <br> 0 safe mode is on, and threshold is not reached yet 5084 * <br> >0 safe mode is on, but we are in extension period 5085 */ 5086 private long reached = -1; 5087 private long reachedTimestamp = -1; 5088 /** Total number of blocks. */ 5089 int blockTotal; 5090 /** Number of safe blocks. */ 5091 int blockSafe; 5092 /** Number of blocks needed to satisfy safe mode threshold condition */ 5093 private int blockThreshold; 5094 /** Number of blocks needed before populating replication queues */ 5095 private int blockReplQueueThreshold; 5096 /** time of the last status printout */ 5097 private long lastStatusReport = 0; 5098 /** 5099 * Was safemode entered automatically because available resources were low. 5100 * Make it volatile so that getSafeModeTip can read the latest value 5101 * without taking a lock. 5102 */ 5103 private volatile boolean resourcesLow = false; 5104 /** Should safemode adjust its block totals as blocks come in */ 5105 private boolean shouldIncrementallyTrackBlocks = false; 5106 /** counter for tracking startup progress of reported blocks */ 5107 private Counter awaitingReportedBlocksCounter; 5108 5109 /** 5110 * Creates SafeModeInfo when the name node enters 5111 * automatic safe mode at startup. 5112 * 5113 * @param conf configuration 5114 */ 5115 private SafeModeInfo(Configuration conf) { 5116 this.threshold = conf.getFloat(DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY, 5117 DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT); 5118 if(threshold > 1.0) { 5119 LOG.warn("The threshold value should't be greater than 1, threshold: " + threshold); 5120 } 5121 this.datanodeThreshold = conf.getInt( 5122 DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY, 5123 DFS_NAMENODE_SAFEMODE_MIN_DATANODES_DEFAULT); 5124 this.extension = conf.getInt(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 0); 5125 this.safeReplication = conf.getInt(DFS_NAMENODE_REPLICATION_MIN_KEY, 5126 DFS_NAMENODE_REPLICATION_MIN_DEFAULT); 5127 5128 LOG.info(DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY + " = " + threshold); 5129 LOG.info(DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY + " = " + datanodeThreshold); 5130 LOG.info(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY + " = " + extension); 5131 5132 // default to safe mode threshold (i.e., don't populate queues before leaving safe mode) 5133 this.replQueueThreshold = 5134 conf.getFloat(DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY, 5135 (float) threshold); 5136 this.blockTotal = 0; 5137 this.blockSafe = 0; 5138 } 5139 5140 /** 5141 * In the HA case, the StandbyNode can be in safemode while the namespace 5142 * is modified by the edit log tailer. In this case, the number of total 5143 * blocks changes as edits are processed (eg blocks are added and deleted). 5144 * However, we don't want to do the incremental tracking during the 5145 * startup-time loading process -- only once the initial total has been 5146 * set after the image has been loaded. 5147 */ 5148 private boolean shouldIncrementallyTrackBlocks() { 5149 return shouldIncrementallyTrackBlocks; 5150 } 5151 5152 /** 5153 * Creates SafeModeInfo when safe mode is entered manually, or because 5154 * available resources are low. 5155 * 5156 * The {@link #threshold} is set to 1.5 so that it could never be reached. 5157 * {@link #blockTotal} is set to -1 to indicate that safe mode is manual. 5158 * 5159 * @see SafeModeInfo 5160 */ 5161 private SafeModeInfo(boolean resourcesLow) { 5162 this.threshold = 1.5f; // this threshold can never be reached 5163 this.datanodeThreshold = Integer.MAX_VALUE; 5164 this.extension = Integer.MAX_VALUE; 5165 this.safeReplication = Short.MAX_VALUE + 1; // more than maxReplication 5166 this.replQueueThreshold = 1.5f; // can never be reached 5167 this.blockTotal = -1; 5168 this.blockSafe = -1; 5169 this.resourcesLow = resourcesLow; 5170 enter(); 5171 reportStatus("STATE* Safe mode is ON.", true); 5172 } 5173 5174 /** 5175 * Check if safe mode is on. 5176 * @return true if in safe mode 5177 */ 5178 private synchronized boolean isOn() { 5179 doConsistencyCheck(); 5180 return this.reached >= 0; 5181 } 5182 5183 /** 5184 * Enter safe mode. 5185 */ 5186 private void enter() { 5187 this.reached = 0; 5188 this.reachedTimestamp = 0; 5189 } 5190 5191 /** 5192 * Leave safe mode. 5193 * <p> 5194 * Check for invalid, under- & over-replicated blocks in the end of startup. 5195 */ 5196 private synchronized void leave() { 5197 // if not done yet, initialize replication queues. 5198 // In the standby, do not populate repl queues 5199 if (!isPopulatingReplQueues() && shouldPopulateReplQueues()) { 5200 initializeReplQueues(); 5201 } 5202 long timeInSafemode = now() - startTime; 5203 NameNode.stateChangeLog.info("STATE* Leaving safe mode after " 5204 + timeInSafemode/1000 + " secs"); 5205 NameNode.getNameNodeMetrics().setSafeModeTime((int) timeInSafemode); 5206 5207 //Log the following only once (when transitioning from ON -> OFF) 5208 if (reached >= 0) { 5209 NameNode.stateChangeLog.info("STATE* Safe mode is OFF"); 5210 } 5211 reached = -1; 5212 reachedTimestamp = -1; 5213 safeMode = null; 5214 final NetworkTopology nt = blockManager.getDatanodeManager().getNetworkTopology(); 5215 NameNode.stateChangeLog.info("STATE* Network topology has " 5216 + nt.getNumOfRacks() + " racks and " 5217 + nt.getNumOfLeaves() + " datanodes"); 5218 NameNode.stateChangeLog.info("STATE* UnderReplicatedBlocks has " 5219 + blockManager.numOfUnderReplicatedBlocks() + " blocks"); 5220 5221 startSecretManagerIfNecessary(); 5222 5223 // If startup has not yet completed, end safemode phase. 5224 StartupProgress prog = NameNode.getStartupProgress(); 5225 if (prog.getStatus(Phase.SAFEMODE) != Status.COMPLETE) { 5226 prog.endStep(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS); 5227 prog.endPhase(Phase.SAFEMODE); 5228 } 5229 } 5230 5231 /** 5232 * Check whether we have reached the threshold for 5233 * initializing replication queues. 5234 */ 5235 private synchronized boolean canInitializeReplQueues() { 5236 return shouldPopulateReplQueues() 5237 && blockSafe >= blockReplQueueThreshold; 5238 } 5239 5240 /** 5241 * Safe mode can be turned off iff 5242 * the threshold is reached and 5243 * the extension time have passed. 5244 * @return true if can leave or false otherwise. 5245 */ 5246 private synchronized boolean canLeave() { 5247 if (reached == 0) { 5248 return false; 5249 } 5250 5251 if (monotonicNow() - reached < extension) { 5252 reportStatus("STATE* Safe mode ON, in safe mode extension.", false); 5253 return false; 5254 } 5255 5256 if (needEnter()) { 5257 reportStatus("STATE* Safe mode ON, thresholds not met.", false); 5258 return false; 5259 } 5260 5261 return true; 5262 } 5263 5264 /** 5265 * There is no need to enter safe mode 5266 * if DFS is empty or {@link #threshold} == 0 5267 */ 5268 private boolean needEnter() { 5269 return (threshold != 0 && blockSafe < blockThreshold) || 5270 (datanodeThreshold != 0 && getNumLiveDataNodes() < datanodeThreshold) || 5271 (!nameNodeHasResourcesAvailable()); 5272 } 5273 5274 /** 5275 * Check and trigger safe mode if needed. 5276 */ 5277 private void checkMode() { 5278 // Have to have write-lock since leaving safemode initializes 5279 // repl queues, which requires write lock 5280 assert hasWriteLock(); 5281 if (inTransitionToActive()) { 5282 return; 5283 } 5284 // if smmthread is already running, the block threshold must have been 5285 // reached before, there is no need to enter the safe mode again 5286 if (smmthread == null && needEnter()) { 5287 enter(); 5288 // check if we are ready to initialize replication queues 5289 if (canInitializeReplQueues() && !isPopulatingReplQueues() 5290 && !haEnabled) { 5291 initializeReplQueues(); 5292 } 5293 reportStatus("STATE* Safe mode ON.", false); 5294 return; 5295 } 5296 // the threshold is reached or was reached before 5297 if (!isOn() || // safe mode is off 5298 extension <= 0 || threshold <= 0) { // don't need to wait 5299 this.leave(); // leave safe mode 5300 return; 5301 } 5302 if (reached > 0) { // threshold has already been reached before 5303 reportStatus("STATE* Safe mode ON.", false); 5304 return; 5305 } 5306 // start monitor 5307 reached = monotonicNow(); 5308 reachedTimestamp = now(); 5309 if (smmthread == null) { 5310 smmthread = new Daemon(new SafeModeMonitor()); 5311 smmthread.start(); 5312 reportStatus("STATE* Safe mode extension entered.", true); 5313 } 5314 5315 // check if we are ready to initialize replication queues 5316 if (canInitializeReplQueues() && !isPopulatingReplQueues() && !haEnabled) { 5317 initializeReplQueues(); 5318 } 5319 } 5320 5321 /** 5322 * Set total number of blocks. 5323 */ 5324 private synchronized void setBlockTotal(int total) { 5325 this.blockTotal = total; 5326 this.blockThreshold = (int) (blockTotal * threshold); 5327 this.blockReplQueueThreshold = 5328 (int) (blockTotal * replQueueThreshold); 5329 if (haEnabled) { 5330 // After we initialize the block count, any further namespace 5331 // modifications done while in safe mode need to keep track 5332 // of the number of total blocks in the system. 5333 this.shouldIncrementallyTrackBlocks = true; 5334 } 5335 if(blockSafe < 0) 5336 this.blockSafe = 0; 5337 checkMode(); 5338 } 5339 5340 /** 5341 * Increment number of safe blocks if current block has 5342 * reached minimal replication. 5343 * @param replication current replication 5344 */ 5345 private synchronized void incrementSafeBlockCount(short replication) { 5346 if (replication == safeReplication) { 5347 this.blockSafe++; 5348 5349 // Report startup progress only if we haven't completed startup yet. 5350 StartupProgress prog = NameNode.getStartupProgress(); 5351 if (prog.getStatus(Phase.SAFEMODE) != Status.COMPLETE) { 5352 if (this.awaitingReportedBlocksCounter == null) { 5353 this.awaitingReportedBlocksCounter = prog.getCounter(Phase.SAFEMODE, 5354 STEP_AWAITING_REPORTED_BLOCKS); 5355 } 5356 this.awaitingReportedBlocksCounter.increment(); 5357 } 5358 5359 checkMode(); 5360 } 5361 } 5362 5363 /** 5364 * Decrement number of safe blocks if current block has 5365 * fallen below minimal replication. 5366 * @param replication current replication 5367 */ 5368 private synchronized void decrementSafeBlockCount(short replication) { 5369 if (replication == safeReplication-1) { 5370 this.blockSafe--; 5371 //blockSafe is set to -1 in manual / low resources safemode 5372 assert blockSafe >= 0 || isManual() || areResourcesLow(); 5373 checkMode(); 5374 } 5375 } 5376 5377 /** 5378 * Check if safe mode was entered manually 5379 */ 5380 private boolean isManual() { 5381 return extension == Integer.MAX_VALUE; 5382 } 5383 5384 /** 5385 * Set manual safe mode. 5386 */ 5387 private synchronized void setManual() { 5388 extension = Integer.MAX_VALUE; 5389 } 5390 5391 /** 5392 * Check if safe mode was entered due to resources being low. 5393 */ 5394 private boolean areResourcesLow() { 5395 return resourcesLow; 5396 } 5397 5398 /** 5399 * Set that resources are low for this instance of safe mode. 5400 */ 5401 private void setResourcesLow() { 5402 resourcesLow = true; 5403 } 5404 5405 /** 5406 * A tip on how safe mode is to be turned off: manually or automatically. 5407 */ 5408 String getTurnOffTip() { 5409 if(!isOn()) { 5410 return "Safe mode is OFF."; 5411 } 5412 5413 //Manual OR low-resource safemode. (Admin intervention required) 5414 String adminMsg = "It was turned on manually. "; 5415 if (areResourcesLow()) { 5416 adminMsg = "Resources are low on NN. Please add or free up more " 5417 + "resources then turn off safe mode manually. NOTE: If you turn off" 5418 + " safe mode before adding resources, " 5419 + "the NN will immediately return to safe mode. "; 5420 } 5421 if (isManual() || areResourcesLow()) { 5422 return adminMsg 5423 + "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off."; 5424 } 5425 5426 boolean thresholdsMet = true; 5427 int numLive = getNumLiveDataNodes(); 5428 String msg = ""; 5429 if (blockSafe < blockThreshold) { 5430 msg += String.format( 5431 "The reported blocks %d needs additional %d" 5432 + " blocks to reach the threshold %.4f of total blocks %d.%n", 5433 blockSafe, (blockThreshold - blockSafe) + 1, threshold, blockTotal); 5434 thresholdsMet = false; 5435 } else { 5436 msg += String.format("The reported blocks %d has reached the threshold" 5437 + " %.4f of total blocks %d. ", blockSafe, threshold, blockTotal); 5438 } 5439 if (numLive < datanodeThreshold) { 5440 msg += String.format( 5441 "The number of live datanodes %d needs an additional %d live " 5442 + "datanodes to reach the minimum number %d.%n", 5443 numLive, (datanodeThreshold - numLive), datanodeThreshold); 5444 thresholdsMet = false; 5445 } else { 5446 msg += String.format("The number of live datanodes %d has reached " 5447 + "the minimum number %d. ", 5448 numLive, datanodeThreshold); 5449 } 5450 msg += (reached > 0) ? "In safe mode extension. " : ""; 5451 msg += "Safe mode will be turned off automatically "; 5452 5453 if (!thresholdsMet) { 5454 msg += "once the thresholds have been reached."; 5455 } else if (reached + extension - monotonicNow() > 0) { 5456 msg += ("in " + (reached + extension - monotonicNow()) / 1000 + " seconds."); 5457 } else { 5458 msg += "soon."; 5459 } 5460 5461 return msg; 5462 } 5463 5464 /** 5465 * Print status every 20 seconds. 5466 */ 5467 private void reportStatus(String msg, boolean rightNow) { 5468 long curTime = now(); 5469 if(!rightNow && (curTime - lastStatusReport < 20 * 1000)) 5470 return; 5471 NameNode.stateChangeLog.info(msg + " \n" + getTurnOffTip()); 5472 lastStatusReport = curTime; 5473 } 5474 5475 @Override 5476 public String toString() { 5477 String resText = "Current safe blocks = " 5478 + blockSafe 5479 + ". Target blocks = " + blockThreshold + " for threshold = %" + threshold 5480 + ". Minimal replication = " + safeReplication + "."; 5481 if (reached > 0) 5482 resText += " Threshold was reached " + new Date(reachedTimestamp) + "."; 5483 return resText; 5484 } 5485 5486 /** 5487 * Checks consistency of the class state. 5488 * This is costly so only runs if asserts are enabled. 5489 */ 5490 private void doConsistencyCheck() { 5491 boolean assertsOn = false; 5492 assert assertsOn = true; // set to true if asserts are on 5493 if (!assertsOn) return; 5494 5495 if (blockTotal == -1 && blockSafe == -1) { 5496 return; // manual safe mode 5497 } 5498 int activeBlocks = blockManager.getActiveBlockCount(); 5499 if ((blockTotal != activeBlocks) && 5500 !(blockSafe >= 0 && blockSafe <= blockTotal)) { 5501 throw new AssertionError( 5502 " SafeMode: Inconsistent filesystem state: " 5503 + "SafeMode data: blockTotal=" + blockTotal 5504 + " blockSafe=" + blockSafe + "; " 5505 + "BlockManager data: active=" + activeBlocks); 5506 } 5507 } 5508 5509 private synchronized void adjustBlockTotals(int deltaSafe, int deltaTotal) { 5510 if (!shouldIncrementallyTrackBlocks) { 5511 return; 5512 } 5513 assert haEnabled; 5514 5515 if (LOG.isDebugEnabled()) { 5516 LOG.debug("Adjusting block totals from " + 5517 blockSafe + "/" + blockTotal + " to " + 5518 (blockSafe + deltaSafe) + "/" + (blockTotal + deltaTotal)); 5519 } 5520 assert blockSafe + deltaSafe >= 0 : "Can't reduce blockSafe " + 5521 blockSafe + " by " + deltaSafe + ": would be negative"; 5522 assert blockTotal + deltaTotal >= 0 : "Can't reduce blockTotal " + 5523 blockTotal + " by " + deltaTotal + ": would be negative"; 5524 5525 blockSafe += deltaSafe; 5526 setBlockTotal(blockTotal + deltaTotal); 5527 } 5528 } 5529 5530 /** 5531 * Periodically check whether it is time to leave safe mode. 5532 * This thread starts when the threshold level is reached. 5533 * 5534 */ 5535 class SafeModeMonitor implements Runnable { 5536 /** interval in msec for checking safe mode: {@value} */ 5537 private static final long recheckInterval = 1000; 5538 5539 /** 5540 */ 5541 @Override 5542 public void run() { 5543 while (fsRunning) { 5544 writeLock(); 5545 try { 5546 if (safeMode == null) { // Not in safe mode. 5547 break; 5548 } 5549 if (safeMode.canLeave()) { 5550 // Leave safe mode. 5551 safeMode.leave(); 5552 smmthread = null; 5553 break; 5554 } 5555 } finally { 5556 writeUnlock(); 5557 } 5558 5559 try { 5560 Thread.sleep(recheckInterval); 5561 } catch (InterruptedException ie) { 5562 // Ignored 5563 } 5564 } 5565 if (!fsRunning) { 5566 LOG.info("NameNode is being shutdown, exit SafeModeMonitor thread"); 5567 } 5568 } 5569 } 5570 5571 boolean setSafeMode(SafeModeAction action) throws IOException { 5572 if (action != SafeModeAction.SAFEMODE_GET) { 5573 checkSuperuserPrivilege(); 5574 switch(action) { 5575 case SAFEMODE_LEAVE: // leave safe mode 5576 leaveSafeMode(); 5577 break; 5578 case SAFEMODE_ENTER: // enter safe mode 5579 enterSafeMode(false); 5580 break; 5581 default: 5582 LOG.error("Unexpected safe mode action"); 5583 } 5584 } 5585 return isInSafeMode(); 5586 } 5587 5588 @Override 5589 public void checkSafeMode() { 5590 // safeMode is volatile, and may be set to null at any time 5591 SafeModeInfo safeMode = this.safeMode; 5592 if (safeMode != null) { 5593 safeMode.checkMode(); 5594 } 5595 } 5596 5597 @Override 5598 public boolean isInSafeMode() { 5599 // safeMode is volatile, and may be set to null at any time 5600 SafeModeInfo safeMode = this.safeMode; 5601 if (safeMode == null) 5602 return false; 5603 return safeMode.isOn(); 5604 } 5605 5606 @Override 5607 public boolean isInStartupSafeMode() { 5608 // safeMode is volatile, and may be set to null at any time 5609 SafeModeInfo safeMode = this.safeMode; 5610 if (safeMode == null) 5611 return false; 5612 // If the NN is in safemode, and not due to manual / low resources, we 5613 // assume it must be because of startup. If the NN had low resources during 5614 // startup, we assume it came out of startup safemode and it is now in low 5615 // resources safemode 5616 return !safeMode.isManual() && !safeMode.areResourcesLow() 5617 && safeMode.isOn(); 5618 } 5619 5620 /** 5621 * Check if replication queues are to be populated 5622 * @return true when node is HAState.Active and not in the very first safemode 5623 */ 5624 @Override 5625 public boolean isPopulatingReplQueues() { 5626 if (!shouldPopulateReplQueues()) { 5627 return false; 5628 } 5629 return initializedReplQueues; 5630 } 5631 5632 private boolean shouldPopulateReplQueues() { 5633 if(haContext == null || haContext.getState() == null) 5634 return false; 5635 return haContext.getState().shouldPopulateReplQueues(); 5636 } 5637 5638 @Override 5639 public void incrementSafeBlockCount(int replication) { 5640 // safeMode is volatile, and may be set to null at any time 5641 SafeModeInfo safeMode = this.safeMode; 5642 if (safeMode == null) 5643 return; 5644 safeMode.incrementSafeBlockCount((short)replication); 5645 } 5646 5647 @Override 5648 public void decrementSafeBlockCount(Block b) { 5649 // safeMode is volatile, and may be set to null at any time 5650 SafeModeInfo safeMode = this.safeMode; 5651 if (safeMode == null) // mostly true 5652 return; 5653 BlockInfoContiguous storedBlock = getStoredBlock(b); 5654 if (storedBlock.isComplete()) { 5655 safeMode.decrementSafeBlockCount((short)blockManager.countNodes(b).liveReplicas()); 5656 } 5657 } 5658 5659 /** 5660 * Adjust the total number of blocks safe and expected during safe mode. 5661 * If safe mode is not currently on, this is a no-op. 5662 * @param deltaSafe the change in number of safe blocks 5663 * @param deltaTotal the change i nnumber of total blocks expected 5664 */ 5665 @Override 5666 public void adjustSafeModeBlockTotals(int deltaSafe, int deltaTotal) { 5667 // safeMode is volatile, and may be set to null at any time 5668 SafeModeInfo safeMode = this.safeMode; 5669 if (safeMode == null) 5670 return; 5671 safeMode.adjustBlockTotals(deltaSafe, deltaTotal); 5672 } 5673 5674 /** 5675 * Set the total number of blocks in the system. 5676 */ 5677 public void setBlockTotal() { 5678 // safeMode is volatile, and may be set to null at any time 5679 SafeModeInfo safeMode = this.safeMode; 5680 if (safeMode == null) 5681 return; 5682 safeMode.setBlockTotal((int)getCompleteBlocksTotal()); 5683 } 5684 5685 /** 5686 * Get the total number of blocks in the system. 5687 */ 5688 @Override // FSNamesystemMBean 5689 @Metric 5690 public long getBlocksTotal() { 5691 return blockManager.getTotalBlocks(); 5692 } 5693 5694 /** 5695 * Get the total number of COMPLETE blocks in the system. 5696 * For safe mode only complete blocks are counted. 5697 */ 5698 private long getCompleteBlocksTotal() { 5699 // Calculate number of blocks under construction 5700 long numUCBlocks = 0; 5701 readLock(); 5702 numUCBlocks = leaseManager.getNumUnderConstructionBlocks(); 5703 try { 5704 return getBlocksTotal() - numUCBlocks; 5705 } finally { 5706 readUnlock(); 5707 } 5708 } 5709 5710 /** 5711 * Enter safe mode. If resourcesLow is false, then we assume it is manual 5712 * @throws IOException 5713 */ 5714 void enterSafeMode(boolean resourcesLow) throws IOException { 5715 writeLock(); 5716 try { 5717 // Stop the secret manager, since rolling the master key would 5718 // try to write to the edit log 5719 stopSecretManager(); 5720 5721 // Ensure that any concurrent operations have been fully synced 5722 // before entering safe mode. This ensures that the FSImage 5723 // is entirely stable on disk as soon as we're in safe mode. 5724 boolean isEditlogOpenForWrite = getEditLog().isOpenForWrite(); 5725 // Before Editlog is in OpenForWrite mode, editLogStream will be null. So, 5726 // logSyncAll call can be called only when Edlitlog is in OpenForWrite mode 5727 if (isEditlogOpenForWrite) { 5728 getEditLog().logSyncAll(); 5729 } 5730 if (!isInSafeMode()) { 5731 safeMode = new SafeModeInfo(resourcesLow); 5732 return; 5733 } 5734 if (resourcesLow) { 5735 safeMode.setResourcesLow(); 5736 } else { 5737 safeMode.setManual(); 5738 } 5739 if (isEditlogOpenForWrite) { 5740 getEditLog().logSyncAll(); 5741 } 5742 NameNode.stateChangeLog.info("STATE* Safe mode is ON" 5743 + safeMode.getTurnOffTip()); 5744 } finally { 5745 writeUnlock(); 5746 } 5747 } 5748 5749 /** 5750 * Leave safe mode. 5751 */ 5752 void leaveSafeMode() { 5753 writeLock(); 5754 try { 5755 if (!isInSafeMode()) { 5756 NameNode.stateChangeLog.info("STATE* Safe mode is already OFF"); 5757 return; 5758 } 5759 safeMode.leave(); 5760 } finally { 5761 writeUnlock(); 5762 } 5763 } 5764 5765 String getSafeModeTip() { 5766 // There is no need to take readLock. 5767 // Don't use isInSafeMode as this.safeMode might be set to null. 5768 // after isInSafeMode returns. 5769 boolean inSafeMode; 5770 SafeModeInfo safeMode = this.safeMode; 5771 if (safeMode == null) { 5772 inSafeMode = false; 5773 } else { 5774 inSafeMode = safeMode.isOn(); 5775 } 5776 5777 if (!inSafeMode) { 5778 return ""; 5779 } else { 5780 return safeMode.getTurnOffTip(); 5781 } 5782 } 5783 5784 CheckpointSignature rollEditLog() throws IOException { 5785 checkSuperuserPrivilege(); 5786 checkOperation(OperationCategory.JOURNAL); 5787 writeLock(); 5788 try { 5789 checkOperation(OperationCategory.JOURNAL); 5790 checkNameNodeSafeMode("Log not rolled"); 5791 if (Server.isRpcInvocation()) { 5792 LOG.info("Roll Edit Log from " + Server.getRemoteAddress()); 5793 } 5794 return getFSImage().rollEditLog(); 5795 } finally { 5796 writeUnlock(); 5797 } 5798 } 5799 5800 NamenodeCommand startCheckpoint(NamenodeRegistration backupNode, 5801 NamenodeRegistration activeNamenode) throws IOException { 5802 checkOperation(OperationCategory.CHECKPOINT); 5803 writeLock(); 5804 try { 5805 checkOperation(OperationCategory.CHECKPOINT); 5806 checkNameNodeSafeMode("Checkpoint not started"); 5807 5808 LOG.info("Start checkpoint for " + backupNode.getAddress()); 5809 NamenodeCommand cmd = getFSImage().startCheckpoint(backupNode, 5810 activeNamenode); 5811 getEditLog().logSync(); 5812 return cmd; 5813 } finally { 5814 writeUnlock(); 5815 } 5816 } 5817 5818 public void processIncrementalBlockReport(final DatanodeID nodeID, 5819 final StorageReceivedDeletedBlocks srdb) 5820 throws IOException { 5821 writeLock(); 5822 try { 5823 blockManager.processIncrementalBlockReport(nodeID, srdb); 5824 } finally { 5825 writeUnlock(); 5826 } 5827 } 5828 5829 void endCheckpoint(NamenodeRegistration registration, 5830 CheckpointSignature sig) throws IOException { 5831 checkOperation(OperationCategory.CHECKPOINT); 5832 readLock(); 5833 try { 5834 checkOperation(OperationCategory.CHECKPOINT); 5835 checkNameNodeSafeMode("Checkpoint not ended"); 5836 LOG.info("End checkpoint for " + registration.getAddress()); 5837 getFSImage().endCheckpoint(sig); 5838 } finally { 5839 readUnlock(); 5840 } 5841 } 5842 5843 PermissionStatus createFsOwnerPermissions(FsPermission permission) { 5844 return new PermissionStatus(fsOwner.getShortUserName(), supergroup, permission); 5845 } 5846 5847 private void checkUnreadableBySuperuser(FSPermissionChecker pc, 5848 INode inode, int snapshotId) 5849 throws IOException { 5850 if (pc.isSuperUser()) { 5851 for (XAttr xattr : FSDirXAttrOp.getXAttrs(dir, inode, snapshotId)) { 5852 if (XAttrHelper.getPrefixName(xattr). 5853 equals(SECURITY_XATTR_UNREADABLE_BY_SUPERUSER)) { 5854 throw new AccessControlException("Access is denied for " + 5855 pc.getUser() + " since the superuser is not allowed to " + 5856 "perform this operation."); 5857 } 5858 } 5859 } 5860 } 5861 5862 @Override 5863 public void checkSuperuserPrivilege() 5864 throws AccessControlException { 5865 if (isPermissionEnabled) { 5866 FSPermissionChecker pc = getPermissionChecker(); 5867 pc.checkSuperuserPrivilege(); 5868 } 5869 } 5870 5871 /** 5872 * Check to see if we have exceeded the limit on the number 5873 * of inodes. 5874 */ 5875 void checkFsObjectLimit() throws IOException { 5876 if (maxFsObjects != 0 && 5877 maxFsObjects <= dir.totalInodes() + getBlocksTotal()) { 5878 throw new IOException("Exceeded the configured number of objects " + 5879 maxFsObjects + " in the filesystem."); 5880 } 5881 } 5882 5883 /** 5884 * Get the total number of objects in the system. 5885 */ 5886 @Override // FSNamesystemMBean 5887 public long getMaxObjects() { 5888 return maxFsObjects; 5889 } 5890 5891 @Override // FSNamesystemMBean 5892 @Metric 5893 public long getFilesTotal() { 5894 // There is no need to take fSNamesystem's lock as 5895 // FSDirectory has its own lock. 5896 return this.dir.totalInodes(); 5897 } 5898 5899 @Override // FSNamesystemMBean 5900 @Metric 5901 public long getPendingReplicationBlocks() { 5902 return blockManager.getPendingReplicationBlocksCount(); 5903 } 5904 5905 @Override // FSNamesystemMBean 5906 @Metric 5907 public long getUnderReplicatedBlocks() { 5908 return blockManager.getUnderReplicatedBlocksCount(); 5909 } 5910 5911 /** Returns number of blocks with corrupt replicas */ 5912 @Metric({"CorruptBlocks", "Number of blocks with corrupt replicas"}) 5913 public long getCorruptReplicaBlocks() { 5914 return blockManager.getCorruptReplicaBlocksCount(); 5915 } 5916 5917 @Override // FSNamesystemMBean 5918 @Metric 5919 public long getScheduledReplicationBlocks() { 5920 return blockManager.getScheduledReplicationBlocksCount(); 5921 } 5922 5923 @Override 5924 @Metric 5925 public long getPendingDeletionBlocks() { 5926 return blockManager.getPendingDeletionBlocksCount(); 5927 } 5928 5929 @Override 5930 public long getBlockDeletionStartTime() { 5931 return startTime + blockManager.getStartupDelayBlockDeletionInMs(); 5932 } 5933 5934 @Metric 5935 public long getExcessBlocks() { 5936 return blockManager.getExcessBlocksCount(); 5937 } 5938 5939 // HA-only metric 5940 @Metric 5941 public long getPostponedMisreplicatedBlocks() { 5942 return blockManager.getPostponedMisreplicatedBlocksCount(); 5943 } 5944 5945 // HA-only metric 5946 @Metric 5947 public int getPendingDataNodeMessageCount() { 5948 return blockManager.getPendingDataNodeMessageCount(); 5949 } 5950 5951 // HA-only metric 5952 @Metric 5953 public String getHAState() { 5954 return haContext.getState().toString(); 5955 } 5956 5957 // HA-only metric 5958 @Metric 5959 public long getMillisSinceLastLoadedEdits() { 5960 if (isInStandbyState() && editLogTailer != null) { 5961 return monotonicNow() - editLogTailer.getLastLoadTimeMs(); 5962 } else { 5963 return 0; 5964 } 5965 } 5966 5967 @Metric 5968 public int getBlockCapacity() { 5969 return blockManager.getCapacity(); 5970 } 5971 5972 @Override // FSNamesystemMBean 5973 public String getFSState() { 5974 return isInSafeMode() ? "safeMode" : "Operational"; 5975 } 5976 5977 private ObjectName mbeanName; 5978 private ObjectName mxbeanName; 5979 5980 /** 5981 * Register the FSNamesystem MBean using the name 5982 * "hadoop:service=NameNode,name=FSNamesystemState" 5983 */ 5984 private void registerMBean() { 5985 // We can only implement one MXBean interface, so we keep the old one. 5986 try { 5987 StandardMBean bean = new StandardMBean(this, FSNamesystemMBean.class); 5988 mbeanName = MBeans.register("NameNode", "FSNamesystemState", bean); 5989 } catch (NotCompliantMBeanException e) { 5990 throw new RuntimeException("Bad MBean setup", e); 5991 } 5992 5993 LOG.info("Registered FSNamesystemState MBean"); 5994 } 5995 5996 /** 5997 * shutdown FSNamesystem 5998 */ 5999 void shutdown() { 6000 if (snapshotManager != null) { 6001 snapshotManager.shutdown(); 6002 } 6003 if (mbeanName != null) { 6004 MBeans.unregister(mbeanName); 6005 mbeanName = null; 6006 } 6007 if (mxbeanName != null) { 6008 MBeans.unregister(mxbeanName); 6009 mxbeanName = null; 6010 } 6011 if (dir != null) { 6012 dir.shutdown(); 6013 } 6014 if (blockManager != null) { 6015 blockManager.shutdown(); 6016 } 6017 } 6018 6019 @Override // FSNamesystemMBean 6020 public int getNumLiveDataNodes() { 6021 return getBlockManager().getDatanodeManager().getNumLiveDataNodes(); 6022 } 6023 6024 @Override // FSNamesystemMBean 6025 public int getNumDeadDataNodes() { 6026 return getBlockManager().getDatanodeManager().getNumDeadDataNodes(); 6027 } 6028 6029 @Override // FSNamesystemMBean 6030 public int getNumDecomLiveDataNodes() { 6031 final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>(); 6032 getBlockManager().getDatanodeManager().fetchDatanodes(live, null, true); 6033 int liveDecommissioned = 0; 6034 for (DatanodeDescriptor node : live) { 6035 liveDecommissioned += node.isDecommissioned() ? 1 : 0; 6036 } 6037 return liveDecommissioned; 6038 } 6039 6040 @Override // FSNamesystemMBean 6041 public int getNumDecomDeadDataNodes() { 6042 final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>(); 6043 getBlockManager().getDatanodeManager().fetchDatanodes(null, dead, true); 6044 int deadDecommissioned = 0; 6045 for (DatanodeDescriptor node : dead) { 6046 deadDecommissioned += node.isDecommissioned() ? 1 : 0; 6047 } 6048 return deadDecommissioned; 6049 } 6050 6051 @Override // FSNamesystemMBean 6052 public int getVolumeFailuresTotal() { 6053 List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>(); 6054 getBlockManager().getDatanodeManager().fetchDatanodes(live, null, true); 6055 int volumeFailuresTotal = 0; 6056 for (DatanodeDescriptor node: live) { 6057 volumeFailuresTotal += node.getVolumeFailures(); 6058 } 6059 return volumeFailuresTotal; 6060 } 6061 6062 @Override // FSNamesystemMBean 6063 public long getEstimatedCapacityLostTotal() { 6064 List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>(); 6065 getBlockManager().getDatanodeManager().fetchDatanodes(live, null, true); 6066 long estimatedCapacityLostTotal = 0; 6067 for (DatanodeDescriptor node: live) { 6068 VolumeFailureSummary volumeFailureSummary = node.getVolumeFailureSummary(); 6069 if (volumeFailureSummary != null) { 6070 estimatedCapacityLostTotal += 6071 volumeFailureSummary.getEstimatedCapacityLostTotal(); 6072 } 6073 } 6074 return estimatedCapacityLostTotal; 6075 } 6076 6077 @Override // FSNamesystemMBean 6078 public int getNumDecommissioningDataNodes() { 6079 return getBlockManager().getDatanodeManager().getDecommissioningNodes() 6080 .size(); 6081 } 6082 6083 @Override // FSNamesystemMBean 6084 @Metric({"StaleDataNodes", 6085 "Number of datanodes marked stale due to delayed heartbeat"}) 6086 public int getNumStaleDataNodes() { 6087 return getBlockManager().getDatanodeManager().getNumStaleNodes(); 6088 } 6089 6090 /** 6091 * Storages are marked as "content stale" after NN restart or fails over and 6092 * before NN receives the first Heartbeat followed by the first Blockreport. 6093 */ 6094 @Override // FSNamesystemMBean 6095 public int getNumStaleStorages() { 6096 return getBlockManager().getDatanodeManager().getNumStaleStorages(); 6097 } 6098 6099 @Override // FSNamesystemMBean 6100 public String getTopUserOpCounts() { 6101 if (!topConf.isEnabled) { 6102 return null; 6103 } 6104 6105 Date now = new Date(); 6106 final List<RollingWindowManager.TopWindow> topWindows = 6107 topMetrics.getTopWindows(); 6108 Map<String, Object> topMap = new TreeMap<String, Object>(); 6109 topMap.put("windows", topWindows); 6110 topMap.put("timestamp", DFSUtil.dateToIso8601String(now)); 6111 ObjectMapper mapper = new ObjectMapper(); 6112 try { 6113 return mapper.writeValueAsString(topMap); 6114 } catch (IOException e) { 6115 LOG.warn("Failed to fetch TopUser metrics", e); 6116 } 6117 return null; 6118 } 6119 6120 /** 6121 * Increments, logs and then returns the stamp 6122 */ 6123 long nextGenerationStamp(boolean legacyBlock) 6124 throws IOException, SafeModeException { 6125 assert hasWriteLock(); 6126 checkNameNodeSafeMode("Cannot get next generation stamp"); 6127 6128 long gs = blockIdManager.nextGenerationStamp(legacyBlock); 6129 if (legacyBlock) { 6130 getEditLog().logGenerationStampV1(gs); 6131 } else { 6132 getEditLog().logGenerationStampV2(gs); 6133 } 6134 6135 // NB: callers sync the log 6136 return gs; 6137 } 6138 6139 /** 6140 * Increments, logs and then returns the block ID 6141 */ 6142 private long nextBlockId() throws IOException { 6143 assert hasWriteLock(); 6144 checkNameNodeSafeMode("Cannot get next block ID"); 6145 final long blockId = blockIdManager.nextBlockId(); 6146 getEditLog().logAllocateBlockId(blockId); 6147 // NB: callers sync the log 6148 return blockId; 6149 } 6150 6151 private boolean isFileDeleted(INodeFile file) { 6152 // Not in the inodeMap or in the snapshot but marked deleted. 6153 if (dir.getInode(file.getId()) == null) { 6154 return true; 6155 } 6156 6157 // look at the path hierarchy to see if one parent is deleted by recursive 6158 // deletion 6159 INode tmpChild = file; 6160 INodeDirectory tmpParent = file.getParent(); 6161 while (true) { 6162 if (tmpParent == null) { 6163 return true; 6164 } 6165 6166 INode childINode = tmpParent.getChild(tmpChild.getLocalNameBytes(), 6167 Snapshot.CURRENT_STATE_ID); 6168 if (childINode == null || !childINode.equals(tmpChild)) { 6169 // a newly created INode with the same name as an already deleted one 6170 // would be a different INode than the deleted one 6171 return true; 6172 } 6173 6174 if (tmpParent.isRoot()) { 6175 break; 6176 } 6177 6178 tmpChild = tmpParent; 6179 tmpParent = tmpParent.getParent(); 6180 } 6181 6182 if (file.isWithSnapshot() && 6183 file.getFileWithSnapshotFeature().isCurrentFileDeleted()) { 6184 return true; 6185 } 6186 return false; 6187 } 6188 6189 private INodeFile checkUCBlock(ExtendedBlock block, 6190 String clientName) throws IOException { 6191 assert hasWriteLock(); 6192 checkNameNodeSafeMode("Cannot get a new generation stamp and an " 6193 + "access token for block " + block); 6194 6195 // check stored block state 6196 BlockInfoContiguous storedBlock = getStoredBlock(ExtendedBlock.getLocalBlock(block)); 6197 if (storedBlock == null || 6198 storedBlock.getBlockUCState() != BlockUCState.UNDER_CONSTRUCTION) { 6199 throw new IOException(block + 6200 " does not exist or is not under Construction" + storedBlock); 6201 } 6202 6203 // check file inode 6204 final INodeFile file = ((INode)storedBlock.getBlockCollection()).asFile(); 6205 if (file == null || !file.isUnderConstruction() || isFileDeleted(file)) { 6206 throw new IOException("The file " + storedBlock + 6207 " belonged to does not exist or it is not under construction."); 6208 } 6209 6210 // check lease 6211 if (clientName == null 6212 || !clientName.equals(file.getFileUnderConstructionFeature() 6213 .getClientName())) { 6214 throw new LeaseExpiredException("Lease mismatch: " + block + 6215 " is accessed by a non lease holder " + clientName); 6216 } 6217 6218 return file; 6219 } 6220 6221 /** 6222 * Client is reporting some bad block locations. 6223 */ 6224 void reportBadBlocks(LocatedBlock[] blocks) throws IOException { 6225 checkOperation(OperationCategory.WRITE); 6226 NameNode.stateChangeLog.info("*DIR* reportBadBlocks"); 6227 writeLock(); 6228 try { 6229 checkOperation(OperationCategory.WRITE); 6230 for (int i = 0; i < blocks.length; i++) { 6231 ExtendedBlock blk = blocks[i].getBlock(); 6232 DatanodeInfo[] nodes = blocks[i].getLocations(); 6233 String[] storageIDs = blocks[i].getStorageIDs(); 6234 for (int j = 0; j < nodes.length; j++) { 6235 blockManager.findAndMarkBlockAsCorrupt(blk, nodes[j], 6236 storageIDs == null ? null: storageIDs[j], 6237 "client machine reported it"); 6238 } 6239 } 6240 } finally { 6241 writeUnlock(); 6242 } 6243 } 6244 6245 /** 6246 * Get a new generation stamp together with an access token for 6247 * a block under construction 6248 * 6249 * This method is called for recovering a failed pipeline or setting up 6250 * a pipeline to append to a block. 6251 * 6252 * @param block a block 6253 * @param clientName the name of a client 6254 * @return a located block with a new generation stamp and an access token 6255 * @throws IOException if any error occurs 6256 */ 6257 LocatedBlock updateBlockForPipeline(ExtendedBlock block, 6258 String clientName) throws IOException { 6259 LocatedBlock locatedBlock; 6260 checkOperation(OperationCategory.WRITE); 6261 writeLock(); 6262 try { 6263 checkOperation(OperationCategory.WRITE); 6264 6265 // check vadility of parameters 6266 checkUCBlock(block, clientName); 6267 6268 // get a new generation stamp and an access token 6269 block.setGenerationStamp(nextGenerationStamp(blockIdManager.isLegacyBlock(block.getLocalBlock()))); 6270 locatedBlock = new LocatedBlock(block, new DatanodeInfo[0]); 6271 blockManager.setBlockToken(locatedBlock, AccessMode.WRITE); 6272 } finally { 6273 writeUnlock(); 6274 } 6275 // Ensure we record the new generation stamp 6276 getEditLog().logSync(); 6277 return locatedBlock; 6278 } 6279 6280 /** 6281 * Update a pipeline for a block under construction 6282 * 6283 * @param clientName the name of the client 6284 * @param oldBlock and old block 6285 * @param newBlock a new block with a new generation stamp and length 6286 * @param newNodes datanodes in the pipeline 6287 * @throws IOException if any error occurs 6288 */ 6289 void updatePipeline( 6290 String clientName, ExtendedBlock oldBlock, ExtendedBlock newBlock, 6291 DatanodeID[] newNodes, String[] newStorageIDs, boolean logRetryCache) 6292 throws IOException { 6293 checkOperation(OperationCategory.WRITE); 6294 6295 LOG.info("updatePipeline(" + oldBlock.getLocalBlock() 6296 + ", newGS=" + newBlock.getGenerationStamp() 6297 + ", newLength=" + newBlock.getNumBytes() 6298 + ", newNodes=" + Arrays.asList(newNodes) 6299 + ", client=" + clientName 6300 + ")"); 6301 waitForLoadingFSImage(); 6302 writeLock(); 6303 try { 6304 checkOperation(OperationCategory.WRITE); 6305 checkNameNodeSafeMode("Pipeline not updated"); 6306 assert newBlock.getBlockId()==oldBlock.getBlockId() : newBlock + " and " 6307 + oldBlock + " has different block identifier"; 6308 updatePipelineInternal(clientName, oldBlock, newBlock, newNodes, 6309 newStorageIDs, logRetryCache); 6310 } finally { 6311 writeUnlock(); 6312 } 6313 getEditLog().logSync(); 6314 LOG.info("updatePipeline(" + oldBlock.getLocalBlock() + " => " 6315 + newBlock.getLocalBlock() + ") success"); 6316 } 6317 6318 private void updatePipelineInternal(String clientName, ExtendedBlock oldBlock, 6319 ExtendedBlock newBlock, DatanodeID[] newNodes, String[] newStorageIDs, 6320 boolean logRetryCache) 6321 throws IOException { 6322 assert hasWriteLock(); 6323 // check the vadility of the block and lease holder name 6324 final INodeFile pendingFile = checkUCBlock(oldBlock, clientName); 6325 final BlockInfoContiguousUnderConstruction blockinfo 6326 = (BlockInfoContiguousUnderConstruction)pendingFile.getLastBlock(); 6327 6328 // check new GS & length: this is not expected 6329 if (newBlock.getGenerationStamp() <= blockinfo.getGenerationStamp() || 6330 newBlock.getNumBytes() < blockinfo.getNumBytes()) { 6331 String msg = "Update " + oldBlock + " (len = " + 6332 blockinfo.getNumBytes() + ") to an older state: " + newBlock + 6333 " (len = " + newBlock.getNumBytes() +")"; 6334 LOG.warn(msg); 6335 throw new IOException(msg); 6336 } 6337 6338 // Update old block with the new generation stamp and new length 6339 blockinfo.setNumBytes(newBlock.getNumBytes()); 6340 blockinfo.setGenerationStampAndVerifyReplicas(newBlock.getGenerationStamp()); 6341 6342 // find the DatanodeDescriptor objects 6343 final DatanodeStorageInfo[] storages = blockManager.getDatanodeManager() 6344 .getDatanodeStorageInfos(newNodes, newStorageIDs); 6345 blockinfo.setExpectedLocations(storages); 6346 6347 String src = pendingFile.getFullPathName(); 6348 persistBlocks(src, pendingFile, logRetryCache); 6349 } 6350 6351 // rename was successful. If any part of the renamed subtree had 6352 // files that were being written to, update with new filename. 6353 void unprotectedChangeLease(String src, String dst) { 6354 assert hasWriteLock(); 6355 leaseManager.changeLease(src, dst); 6356 } 6357 6358 /** 6359 * Serializes leases. 6360 */ 6361 void saveFilesUnderConstruction(DataOutputStream out, 6362 Map<Long, INodeFile> snapshotUCMap) throws IOException { 6363 // This is run by an inferior thread of saveNamespace, which holds a read 6364 // lock on our behalf. If we took the read lock here, we could block 6365 // for fairness if a writer is waiting on the lock. 6366 synchronized (leaseManager) { 6367 Map<String, INodeFile> nodes = leaseManager.getINodesUnderConstruction(); 6368 for (Map.Entry<String, INodeFile> entry : nodes.entrySet()) { 6369 // TODO: for HDFS-5428, because of rename operations, some 6370 // under-construction files that are 6371 // in the current fs directory can also be captured in the 6372 // snapshotUCMap. We should remove them from the snapshotUCMap. 6373 snapshotUCMap.remove(entry.getValue().getId()); 6374 } 6375 6376 out.writeInt(nodes.size() + snapshotUCMap.size()); // write the size 6377 for (Map.Entry<String, INodeFile> entry : nodes.entrySet()) { 6378 FSImageSerialization.writeINodeUnderConstruction( 6379 out, entry.getValue(), entry.getKey()); 6380 } 6381 for (Map.Entry<Long, INodeFile> entry : snapshotUCMap.entrySet()) { 6382 // for those snapshot INodeFileUC, we use "/.reserved/.inodes/<inodeid>" 6383 // as their paths 6384 StringBuilder b = new StringBuilder(); 6385 b.append(FSDirectory.DOT_RESERVED_PATH_PREFIX) 6386 .append(Path.SEPARATOR).append(FSDirectory.DOT_INODES_STRING) 6387 .append(Path.SEPARATOR).append(entry.getValue().getId()); 6388 FSImageSerialization.writeINodeUnderConstruction( 6389 out, entry.getValue(), b.toString()); 6390 } 6391 } 6392 } 6393 6394 /** 6395 * @return all the under-construction files in the lease map 6396 */ 6397 Map<String, INodeFile> getFilesUnderConstruction() { 6398 synchronized (leaseManager) { 6399 return leaseManager.getINodesUnderConstruction(); 6400 } 6401 } 6402 6403 /** 6404 * Register a Backup name-node, verifying that it belongs 6405 * to the correct namespace, and adding it to the set of 6406 * active journals if necessary. 6407 * 6408 * @param bnReg registration of the new BackupNode 6409 * @param nnReg registration of this NameNode 6410 * @throws IOException if the namespace IDs do not match 6411 */ 6412 void registerBackupNode(NamenodeRegistration bnReg, 6413 NamenodeRegistration nnReg) throws IOException { 6414 writeLock(); 6415 try { 6416 if(getFSImage().getStorage().getNamespaceID() 6417 != bnReg.getNamespaceID()) 6418 throw new IOException("Incompatible namespaceIDs: " 6419 + " Namenode namespaceID = " 6420 + getFSImage().getStorage().getNamespaceID() + "; " 6421 + bnReg.getRole() + 6422 " node namespaceID = " + bnReg.getNamespaceID()); 6423 if (bnReg.getRole() == NamenodeRole.BACKUP) { 6424 getFSImage().getEditLog().registerBackupNode( 6425 bnReg, nnReg); 6426 } 6427 } finally { 6428 writeUnlock(); 6429 } 6430 } 6431 6432 /** 6433 * Release (unregister) backup node. 6434 * <p> 6435 * Find and remove the backup stream corresponding to the node. 6436 * @throws IOException 6437 */ 6438 void releaseBackupNode(NamenodeRegistration registration) 6439 throws IOException { 6440 checkOperation(OperationCategory.WRITE); 6441 writeLock(); 6442 try { 6443 checkOperation(OperationCategory.WRITE); 6444 if(getFSImage().getStorage().getNamespaceID() 6445 != registration.getNamespaceID()) 6446 throw new IOException("Incompatible namespaceIDs: " 6447 + " Namenode namespaceID = " 6448 + getFSImage().getStorage().getNamespaceID() + "; " 6449 + registration.getRole() + 6450 " node namespaceID = " + registration.getNamespaceID()); 6451 getEditLog().releaseBackupStream(registration); 6452 } finally { 6453 writeUnlock(); 6454 } 6455 } 6456 6457 static class CorruptFileBlockInfo { 6458 final String path; 6459 final Block block; 6460 6461 public CorruptFileBlockInfo(String p, Block b) { 6462 path = p; 6463 block = b; 6464 } 6465 6466 @Override 6467 public String toString() { 6468 return block.getBlockName() + "\t" + path; 6469 } 6470 } 6471 /** 6472 * @param path Restrict corrupt files to this portion of namespace. 6473 * @param cookieTab Support for continuation; cookieTab tells where 6474 * to start from 6475 * @return a list in which each entry describes a corrupt file/block 6476 * @throws IOException 6477 */ 6478 Collection<CorruptFileBlockInfo> listCorruptFileBlocks(String path, 6479 String[] cookieTab) throws IOException { 6480 checkSuperuserPrivilege(); 6481 checkOperation(OperationCategory.READ); 6482 6483 int count = 0; 6484 ArrayList<CorruptFileBlockInfo> corruptFiles = 6485 new ArrayList<CorruptFileBlockInfo>(); 6486 if (cookieTab == null) { 6487 cookieTab = new String[] { null }; 6488 } 6489 6490 // Do a quick check if there are any corrupt files without taking the lock 6491 if (blockManager.getMissingBlocksCount() == 0) { 6492 if (cookieTab[0] == null) { 6493 cookieTab[0] = String.valueOf(getIntCookie(cookieTab[0])); 6494 } 6495 LOG.info("there are no corrupt file blocks."); 6496 return corruptFiles; 6497 } 6498 6499 readLock(); 6500 try { 6501 checkOperation(OperationCategory.READ); 6502 if (!isPopulatingReplQueues()) { 6503 throw new IOException("Cannot run listCorruptFileBlocks because " + 6504 "replication queues have not been initialized."); 6505 } 6506 // print a limited # of corrupt files per call 6507 6508 final Iterator<Block> blkIterator = blockManager.getCorruptReplicaBlockIterator(); 6509 6510 int skip = getIntCookie(cookieTab[0]); 6511 for (int i = 0; i < skip && blkIterator.hasNext(); i++) { 6512 blkIterator.next(); 6513 } 6514 6515 while (blkIterator.hasNext()) { 6516 Block blk = blkIterator.next(); 6517 final INode inode = (INode)blockManager.getBlockCollection(blk); 6518 skip++; 6519 if (inode != null && blockManager.countNodes(blk).liveReplicas() == 0) { 6520 String src = FSDirectory.getFullPathName(inode); 6521 if (src.startsWith(path)){ 6522 corruptFiles.add(new CorruptFileBlockInfo(src, blk)); 6523 count++; 6524 if (count >= DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED) 6525 break; 6526 } 6527 } 6528 } 6529 cookieTab[0] = String.valueOf(skip); 6530 LOG.info("list corrupt file blocks returned: " + count); 6531 return corruptFiles; 6532 } finally { 6533 readUnlock(); 6534 } 6535 } 6536 6537 /** 6538 * Convert string cookie to integer. 6539 */ 6540 private static int getIntCookie(String cookie){ 6541 int c; 6542 if(cookie == null){ 6543 c = 0; 6544 } else { 6545 try{ 6546 c = Integer.parseInt(cookie); 6547 }catch (NumberFormatException e) { 6548 c = 0; 6549 } 6550 } 6551 c = Math.max(0, c); 6552 return c; 6553 } 6554 6555 /** 6556 * Create delegation token secret manager 6557 */ 6558 private DelegationTokenSecretManager createDelegationTokenSecretManager( 6559 Configuration conf) { 6560 return new DelegationTokenSecretManager(conf.getLong( 6561 DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY, 6562 DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT), 6563 conf.getLong(DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY, 6564 DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT), 6565 conf.getLong(DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY, 6566 DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT), 6567 DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL, 6568 conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY, 6569 DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT), 6570 this); 6571 } 6572 6573 /** 6574 * Returns the DelegationTokenSecretManager instance in the namesystem. 6575 * @return delegation token secret manager object 6576 */ 6577 DelegationTokenSecretManager getDelegationTokenSecretManager() { 6578 return dtSecretManager; 6579 } 6580 6581 /** 6582 * @param renewer Renewer information 6583 * @return delegation toek 6584 * @throws IOException on error 6585 */ 6586 Token<DelegationTokenIdentifier> getDelegationToken(Text renewer) 6587 throws IOException { 6588 Token<DelegationTokenIdentifier> token; 6589 checkOperation(OperationCategory.WRITE); 6590 writeLock(); 6591 try { 6592 checkOperation(OperationCategory.WRITE); 6593 checkNameNodeSafeMode("Cannot issue delegation token"); 6594 if (!isAllowedDelegationTokenOp()) { 6595 throw new IOException( 6596 "Delegation Token can be issued only with kerberos or web authentication"); 6597 } 6598 if (dtSecretManager == null || !dtSecretManager.isRunning()) { 6599 LOG.warn("trying to get DT with no secret manager running"); 6600 return null; 6601 } 6602 6603 UserGroupInformation ugi = getRemoteUser(); 6604 String user = ugi.getUserName(); 6605 Text owner = new Text(user); 6606 Text realUser = null; 6607 if (ugi.getRealUser() != null) { 6608 realUser = new Text(ugi.getRealUser().getUserName()); 6609 } 6610 DelegationTokenIdentifier dtId = new DelegationTokenIdentifier(owner, 6611 renewer, realUser); 6612 token = new Token<DelegationTokenIdentifier>( 6613 dtId, dtSecretManager); 6614 long expiryTime = dtSecretManager.getTokenExpiryTime(dtId); 6615 getEditLog().logGetDelegationToken(dtId, expiryTime); 6616 } finally { 6617 writeUnlock(); 6618 } 6619 getEditLog().logSync(); 6620 return token; 6621 } 6622 6623 /** 6624 * 6625 * @param token token to renew 6626 * @return new expiryTime of the token 6627 * @throws InvalidToken if {@code token} is invalid 6628 * @throws IOException on other errors 6629 */ 6630 long renewDelegationToken(Token<DelegationTokenIdentifier> token) 6631 throws InvalidToken, IOException { 6632 long expiryTime; 6633 checkOperation(OperationCategory.WRITE); 6634 writeLock(); 6635 try { 6636 checkOperation(OperationCategory.WRITE); 6637 6638 checkNameNodeSafeMode("Cannot renew delegation token"); 6639 if (!isAllowedDelegationTokenOp()) { 6640 throw new IOException( 6641 "Delegation Token can be renewed only with kerberos or web authentication"); 6642 } 6643 String renewer = getRemoteUser().getShortUserName(); 6644 expiryTime = dtSecretManager.renewToken(token, renewer); 6645 DelegationTokenIdentifier id = new DelegationTokenIdentifier(); 6646 ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier()); 6647 DataInputStream in = new DataInputStream(buf); 6648 id.readFields(in); 6649 getEditLog().logRenewDelegationToken(id, expiryTime); 6650 } finally { 6651 writeUnlock(); 6652 } 6653 getEditLog().logSync(); 6654 return expiryTime; 6655 } 6656 6657 /** 6658 * 6659 * @param token token to cancel 6660 * @throws IOException on error 6661 */ 6662 void cancelDelegationToken(Token<DelegationTokenIdentifier> token) 6663 throws IOException { 6664 checkOperation(OperationCategory.WRITE); 6665 writeLock(); 6666 try { 6667 checkOperation(OperationCategory.WRITE); 6668 6669 checkNameNodeSafeMode("Cannot cancel delegation token"); 6670 String canceller = getRemoteUser().getUserName(); 6671 DelegationTokenIdentifier id = dtSecretManager 6672 .cancelToken(token, canceller); 6673 getEditLog().logCancelDelegationToken(id); 6674 } finally { 6675 writeUnlock(); 6676 } 6677 getEditLog().logSync(); 6678 } 6679 6680 /** 6681 * @param out save state of the secret manager 6682 * @param sdPath String storage directory path 6683 */ 6684 void saveSecretManagerStateCompat(DataOutputStream out, String sdPath) 6685 throws IOException { 6686 dtSecretManager.saveSecretManagerStateCompat(out, sdPath); 6687 } 6688 6689 SecretManagerState saveSecretManagerState() { 6690 return dtSecretManager.saveSecretManagerState(); 6691 } 6692 6693 /** 6694 * @param in load the state of secret manager from input stream 6695 */ 6696 void loadSecretManagerStateCompat(DataInput in) throws IOException { 6697 dtSecretManager.loadSecretManagerStateCompat(in); 6698 } 6699 6700 void loadSecretManagerState(SecretManagerSection s, 6701 List<SecretManagerSection.DelegationKey> keys, 6702 List<SecretManagerSection.PersistToken> tokens) throws IOException { 6703 dtSecretManager.loadSecretManagerState(new SecretManagerState(s, keys, tokens)); 6704 } 6705 6706 /** 6707 * Log the updateMasterKey operation to edit logs 6708 * 6709 * @param key new delegation key. 6710 */ 6711 public void logUpdateMasterKey(DelegationKey key) { 6712 6713 assert !isInSafeMode() : 6714 "this should never be called while in safemode, since we stop " + 6715 "the DT manager before entering safemode!"; 6716 // No need to hold FSN lock since we don't access any internal 6717 // structures, and this is stopped before the FSN shuts itself 6718 // down, etc. 6719 getEditLog().logUpdateMasterKey(key); 6720 getEditLog().logSync(); 6721 } 6722 6723 /** 6724 * Log the cancellation of expired tokens to edit logs 6725 * 6726 * @param id token identifier to cancel 6727 */ 6728 public void logExpireDelegationToken(DelegationTokenIdentifier id) { 6729 assert !isInSafeMode() : 6730 "this should never be called while in safemode, since we stop " + 6731 "the DT manager before entering safemode!"; 6732 // No need to hold FSN lock since we don't access any internal 6733 // structures, and this is stopped before the FSN shuts itself 6734 // down, etc. 6735 getEditLog().logCancelDelegationToken(id); 6736 } 6737 6738 private void logReassignLease(String leaseHolder, String src, 6739 String newHolder) { 6740 assert hasWriteLock(); 6741 getEditLog().logReassignLease(leaseHolder, src, newHolder); 6742 } 6743 6744 /** 6745 * 6746 * @return true if delegation token operation is allowed 6747 */ 6748 private boolean isAllowedDelegationTokenOp() throws IOException { 6749 AuthenticationMethod authMethod = getConnectionAuthenticationMethod(); 6750 if (UserGroupInformation.isSecurityEnabled() 6751 && (authMethod != AuthenticationMethod.KERBEROS) 6752 && (authMethod != AuthenticationMethod.KERBEROS_SSL) 6753 && (authMethod != AuthenticationMethod.CERTIFICATE)) { 6754 return false; 6755 } 6756 return true; 6757 } 6758 6759 /** 6760 * Returns authentication method used to establish the connection 6761 * @return AuthenticationMethod used to establish connection 6762 * @throws IOException 6763 */ 6764 private AuthenticationMethod getConnectionAuthenticationMethod() 6765 throws IOException { 6766 UserGroupInformation ugi = getRemoteUser(); 6767 AuthenticationMethod authMethod = ugi.getAuthenticationMethod(); 6768 if (authMethod == AuthenticationMethod.PROXY) { 6769 authMethod = ugi.getRealUser().getAuthenticationMethod(); 6770 } 6771 return authMethod; 6772 } 6773 6774 /** 6775 * Client invoked methods are invoked over RPC and will be in 6776 * RPC call context even if the client exits. 6777 */ 6778 boolean isExternalInvocation() { 6779 return Server.isRpcInvocation() || NamenodeWebHdfsMethods.isWebHdfsInvocation(); 6780 } 6781 6782 private static InetAddress getRemoteIp() { 6783 InetAddress ip = Server.getRemoteIp(); 6784 if (ip != null) { 6785 return ip; 6786 } 6787 return NamenodeWebHdfsMethods.getRemoteIp(); 6788 } 6789 6790 // optimize ugi lookup for RPC operations to avoid a trip through 6791 // UGI.getCurrentUser which is synch'ed 6792 private static UserGroupInformation getRemoteUser() throws IOException { 6793 return NameNode.getRemoteUser(); 6794 } 6795 6796 /** 6797 * Log fsck event in the audit log 6798 */ 6799 void logFsckEvent(String src, InetAddress remoteAddress) throws IOException { 6800 if (isAuditEnabled()) { 6801 logAuditEvent(true, getRemoteUser(), 6802 remoteAddress, 6803 "fsck", src, null, null); 6804 } 6805 } 6806 /** 6807 * Register NameNodeMXBean 6808 */ 6809 private void registerMXBean() { 6810 mxbeanName = MBeans.register("NameNode", "NameNodeInfo", this); 6811 } 6812 6813 /** 6814 * Class representing Namenode information for JMX interfaces 6815 */ 6816 @Override // NameNodeMXBean 6817 public String getVersion() { 6818 return VersionInfo.getVersion() + ", r" + VersionInfo.getRevision(); 6819 } 6820 6821 @Override // NameNodeMXBean 6822 public long getUsed() { 6823 return this.getCapacityUsed(); 6824 } 6825 6826 @Override // NameNodeMXBean 6827 public long getFree() { 6828 return this.getCapacityRemaining(); 6829 } 6830 6831 @Override // NameNodeMXBean 6832 public long getTotal() { 6833 return this.getCapacityTotal(); 6834 } 6835 6836 @Override // NameNodeMXBean 6837 public String getSafemode() { 6838 if (!this.isInSafeMode()) 6839 return ""; 6840 return "Safe mode is ON. " + this.getSafeModeTip(); 6841 } 6842 6843 @Override // NameNodeMXBean 6844 public boolean isUpgradeFinalized() { 6845 return this.getFSImage().isUpgradeFinalized(); 6846 } 6847 6848 @Override // NameNodeMXBean 6849 public long getNonDfsUsedSpace() { 6850 return datanodeStatistics.getCapacityUsedNonDFS(); 6851 } 6852 6853 @Override // NameNodeMXBean 6854 public float getPercentUsed() { 6855 return datanodeStatistics.getCapacityUsedPercent(); 6856 } 6857 6858 @Override // NameNodeMXBean 6859 public long getBlockPoolUsedSpace() { 6860 return datanodeStatistics.getBlockPoolUsed(); 6861 } 6862 6863 @Override // NameNodeMXBean 6864 public float getPercentBlockPoolUsed() { 6865 return datanodeStatistics.getPercentBlockPoolUsed(); 6866 } 6867 6868 @Override // NameNodeMXBean 6869 public float getPercentRemaining() { 6870 return datanodeStatistics.getCapacityRemainingPercent(); 6871 } 6872 6873 @Override // NameNodeMXBean 6874 public long getCacheCapacity() { 6875 return datanodeStatistics.getCacheCapacity(); 6876 } 6877 6878 @Override // NameNodeMXBean 6879 public long getCacheUsed() { 6880 return datanodeStatistics.getCacheUsed(); 6881 } 6882 6883 @Override // NameNodeMXBean 6884 public long getTotalBlocks() { 6885 return getBlocksTotal(); 6886 } 6887 6888 @Override // NameNodeMXBean 6889 @Metric 6890 public long getTotalFiles() { 6891 return getFilesTotal(); 6892 } 6893 6894 @Override // NameNodeMXBean 6895 public long getNumberOfMissingBlocks() { 6896 return getMissingBlocksCount(); 6897 } 6898 6899 @Override // NameNodeMXBean 6900 public long getNumberOfMissingBlocksWithReplicationFactorOne() { 6901 return getMissingReplOneBlocksCount(); 6902 } 6903 6904 @Override // NameNodeMXBean 6905 public int getThreads() { 6906 return ManagementFactory.getThreadMXBean().getThreadCount(); 6907 } 6908 6909 /** 6910 * Returned information is a JSON representation of map with host name as the 6911 * key and value is a map of live node attribute keys to its values 6912 */ 6913 @Override // NameNodeMXBean 6914 public String getLiveNodes() { 6915 final Map<String, Map<String,Object>> info = 6916 new HashMap<String, Map<String,Object>>(); 6917 final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>(); 6918 blockManager.getDatanodeManager().fetchDatanodes(live, null, true); 6919 for (DatanodeDescriptor node : live) { 6920 ImmutableMap.Builder<String, Object> innerinfo = 6921 ImmutableMap.<String,Object>builder(); 6922 innerinfo 6923 .put("infoAddr", node.getInfoAddr()) 6924 .put("infoSecureAddr", node.getInfoSecureAddr()) 6925 .put("xferaddr", node.getXferAddr()) 6926 .put("lastContact", getLastContact(node)) 6927 .put("usedSpace", getDfsUsed(node)) 6928 .put("adminState", node.getAdminState().toString()) 6929 .put("nonDfsUsedSpace", node.getNonDfsUsed()) 6930 .put("capacity", node.getCapacity()) 6931 .put("numBlocks", node.numBlocks()) 6932 .put("version", node.getSoftwareVersion()) 6933 .put("used", node.getDfsUsed()) 6934 .put("remaining", node.getRemaining()) 6935 .put("blockScheduled", node.getBlocksScheduled()) 6936 .put("blockPoolUsed", node.getBlockPoolUsed()) 6937 .put("blockPoolUsedPercent", node.getBlockPoolUsedPercent()) 6938 .put("volfails", node.getVolumeFailures()); 6939 VolumeFailureSummary volumeFailureSummary = node.getVolumeFailureSummary(); 6940 if (volumeFailureSummary != null) { 6941 innerinfo 6942 .put("failedStorageLocations", 6943 volumeFailureSummary.getFailedStorageLocations()) 6944 .put("lastVolumeFailureDate", 6945 volumeFailureSummary.getLastVolumeFailureDate()) 6946 .put("estimatedCapacityLostTotal", 6947 volumeFailureSummary.getEstimatedCapacityLostTotal()); 6948 } 6949 info.put(node.getHostName() + ":" + node.getXferPort(), innerinfo.build()); 6950 } 6951 return JSON.toString(info); 6952 } 6953 6954 /** 6955 * Returned information is a JSON representation of map with host name as the 6956 * key and value is a map of dead node attribute keys to its values 6957 */ 6958 @Override // NameNodeMXBean 6959 public String getDeadNodes() { 6960 final Map<String, Map<String, Object>> info = 6961 new HashMap<String, Map<String, Object>>(); 6962 final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>(); 6963 blockManager.getDatanodeManager().fetchDatanodes(null, dead, true); 6964 for (DatanodeDescriptor node : dead) { 6965 Map<String, Object> innerinfo = ImmutableMap.<String, Object>builder() 6966 .put("lastContact", getLastContact(node)) 6967 .put("decommissioned", node.isDecommissioned()) 6968 .put("xferaddr", node.getXferAddr()) 6969 .build(); 6970 info.put(node.getHostName() + ":" + node.getXferPort(), innerinfo); 6971 } 6972 return JSON.toString(info); 6973 } 6974 6975 /** 6976 * Returned information is a JSON representation of map with host name as the 6977 * key and value is a map of decommissioning node attribute keys to its 6978 * values 6979 */ 6980 @Override // NameNodeMXBean 6981 public String getDecomNodes() { 6982 final Map<String, Map<String, Object>> info = 6983 new HashMap<String, Map<String, Object>>(); 6984 final List<DatanodeDescriptor> decomNodeList = blockManager.getDatanodeManager( 6985 ).getDecommissioningNodes(); 6986 for (DatanodeDescriptor node : decomNodeList) { 6987 Map<String, Object> innerinfo = ImmutableMap 6988 .<String, Object> builder() 6989 .put("xferaddr", node.getXferAddr()) 6990 .put("underReplicatedBlocks", 6991 node.decommissioningStatus.getUnderReplicatedBlocks()) 6992 .put("decommissionOnlyReplicas", 6993 node.decommissioningStatus.getDecommissionOnlyReplicas()) 6994 .put("underReplicateInOpenFiles", 6995 node.decommissioningStatus.getUnderReplicatedInOpenFiles()) 6996 .build(); 6997 info.put(node.getHostName() + ":" + node.getXferPort(), innerinfo); 6998 } 6999 return JSON.toString(info); 7000 } 7001 7002 private long getLastContact(DatanodeDescriptor alivenode) { 7003 return (monotonicNow() - alivenode.getLastUpdateMonotonic())/1000; 7004 } 7005 7006 private long getDfsUsed(DatanodeDescriptor alivenode) { 7007 return alivenode.getDfsUsed(); 7008 } 7009 7010 @Override // NameNodeMXBean 7011 public String getClusterId() { 7012 return getFSImage().getStorage().getClusterID(); 7013 } 7014 7015 @Override // NameNodeMXBean 7016 public String getBlockPoolId() { 7017 return blockPoolId; 7018 } 7019 7020 @Override // NameNodeMXBean 7021 public String getNameDirStatuses() { 7022 Map<String, Map<File, StorageDirType>> statusMap = 7023 new HashMap<String, Map<File, StorageDirType>>(); 7024 7025 Map<File, StorageDirType> activeDirs = new HashMap<File, StorageDirType>(); 7026 for (Iterator<StorageDirectory> it 7027 = getFSImage().getStorage().dirIterator(); it.hasNext();) { 7028 StorageDirectory st = it.next(); 7029 activeDirs.put(st.getRoot(), st.getStorageDirType()); 7030 } 7031 statusMap.put("active", activeDirs); 7032 7033 List<Storage.StorageDirectory> removedStorageDirs 7034 = getFSImage().getStorage().getRemovedStorageDirs(); 7035 Map<File, StorageDirType> failedDirs = new HashMap<File, StorageDirType>(); 7036 for (StorageDirectory st : removedStorageDirs) { 7037 failedDirs.put(st.getRoot(), st.getStorageDirType()); 7038 } 7039 statusMap.put("failed", failedDirs); 7040 7041 return JSON.toString(statusMap); 7042 } 7043 7044 @Override // NameNodeMXBean 7045 public String getNodeUsage() { 7046 float median = 0; 7047 float max = 0; 7048 float min = 0; 7049 float dev = 0; 7050 7051 final Map<String, Map<String,Object>> info = 7052 new HashMap<String, Map<String,Object>>(); 7053 final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>(); 7054 blockManager.getDatanodeManager().fetchDatanodes(live, null, true); 7055 7056 if (live.size() > 0) { 7057 float totalDfsUsed = 0; 7058 float[] usages = new float[live.size()]; 7059 int i = 0; 7060 for (DatanodeDescriptor dn : live) { 7061 usages[i++] = dn.getDfsUsedPercent(); 7062 totalDfsUsed += dn.getDfsUsedPercent(); 7063 } 7064 totalDfsUsed /= live.size(); 7065 Arrays.sort(usages); 7066 median = usages[usages.length / 2]; 7067 max = usages[usages.length - 1]; 7068 min = usages[0]; 7069 7070 for (i = 0; i < usages.length; i++) { 7071 dev += (usages[i] - totalDfsUsed) * (usages[i] - totalDfsUsed); 7072 } 7073 dev = (float) Math.sqrt(dev / usages.length); 7074 } 7075 7076 final Map<String, Object> innerInfo = new HashMap<String, Object>(); 7077 innerInfo.put("min", StringUtils.format("%.2f%%", min)); 7078 innerInfo.put("median", StringUtils.format("%.2f%%", median)); 7079 innerInfo.put("max", StringUtils.format("%.2f%%", max)); 7080 innerInfo.put("stdDev", StringUtils.format("%.2f%%", dev)); 7081 info.put("nodeUsage", innerInfo); 7082 7083 return JSON.toString(info); 7084 } 7085 7086 @Override // NameNodeMXBean 7087 public String getNameJournalStatus() { 7088 List<Map<String, String>> jasList = new ArrayList<Map<String, String>>(); 7089 FSEditLog log = getFSImage().getEditLog(); 7090 if (log != null) { 7091 boolean openForWrite = log.isOpenForWrite(); 7092 for (JournalAndStream jas : log.getJournals()) { 7093 final Map<String, String> jasMap = new HashMap<String, String>(); 7094 String manager = jas.getManager().toString(); 7095 7096 jasMap.put("required", String.valueOf(jas.isRequired())); 7097 jasMap.put("disabled", String.valueOf(jas.isDisabled())); 7098 jasMap.put("manager", manager); 7099 7100 if (jas.isDisabled()) { 7101 jasMap.put("stream", "Failed"); 7102 } else if (openForWrite) { 7103 EditLogOutputStream elos = jas.getCurrentStream(); 7104 if (elos != null) { 7105 jasMap.put("stream", elos.generateReport()); 7106 } else { 7107 jasMap.put("stream", "not currently writing"); 7108 } 7109 } else { 7110 jasMap.put("stream", "open for read"); 7111 } 7112 jasList.add(jasMap); 7113 } 7114 } 7115 return JSON.toString(jasList); 7116 } 7117 7118 @Override // NameNodeMxBean 7119 public String getJournalTransactionInfo() { 7120 Map<String, String> txnIdMap = new HashMap<String, String>(); 7121 txnIdMap.put("LastAppliedOrWrittenTxId", 7122 Long.toString(this.getFSImage().getLastAppliedOrWrittenTxId())); 7123 txnIdMap.put("MostRecentCheckpointTxId", 7124 Long.toString(this.getFSImage().getMostRecentCheckpointTxId())); 7125 return JSON.toString(txnIdMap); 7126 } 7127 7128 @Override // NameNodeMXBean 7129 public String getNNStarted() { 7130 return getStartTime().toString(); 7131 } 7132 7133 @Override // NameNodeMXBean 7134 public String getCompileInfo() { 7135 return VersionInfo.getDate() + " by " + VersionInfo.getUser() + 7136 " from " + VersionInfo.getBranch(); 7137 } 7138 7139 /** @return the block manager. */ 7140 public BlockManager getBlockManager() { 7141 return blockManager; 7142 } 7143 7144 public BlockIdManager getBlockIdManager() { 7145 return blockIdManager; 7146 } 7147 7148 /** @return the FSDirectory. */ 7149 public FSDirectory getFSDirectory() { 7150 return dir; 7151 } 7152 /** Set the FSDirectory. */ 7153 @VisibleForTesting 7154 public void setFSDirectory(FSDirectory dir) { 7155 this.dir = dir; 7156 } 7157 /** @return the cache manager. */ 7158 public CacheManager getCacheManager() { 7159 return cacheManager; 7160 } 7161 7162 @Override // NameNodeMXBean 7163 public String getCorruptFiles() { 7164 List<String> list = new ArrayList<String>(); 7165 Collection<FSNamesystem.CorruptFileBlockInfo> corruptFileBlocks; 7166 try { 7167 corruptFileBlocks = listCorruptFileBlocks("/", null); 7168 int corruptFileCount = corruptFileBlocks.size(); 7169 if (corruptFileCount != 0) { 7170 for (FSNamesystem.CorruptFileBlockInfo c : corruptFileBlocks) { 7171 list.add(c.toString()); 7172 } 7173 } 7174 } catch (IOException e) { 7175 LOG.warn("Get corrupt file blocks returned error: " + e.getMessage()); 7176 } 7177 return JSON.toString(list); 7178 } 7179 7180 @Override //NameNodeMXBean 7181 public int getDistinctVersionCount() { 7182 return blockManager.getDatanodeManager().getDatanodesSoftwareVersions() 7183 .size(); 7184 } 7185 7186 @Override //NameNodeMXBean 7187 public Map<String, Integer> getDistinctVersions() { 7188 return blockManager.getDatanodeManager().getDatanodesSoftwareVersions(); 7189 } 7190 7191 @Override //NameNodeMXBean 7192 public String getSoftwareVersion() { 7193 return VersionInfo.getVersion(); 7194 } 7195 7196 /** 7197 * Verifies that the given identifier and password are valid and match. 7198 * @param identifier Token identifier. 7199 * @param password Password in the token. 7200 */ 7201 public synchronized void verifyToken(DelegationTokenIdentifier identifier, 7202 byte[] password) throws InvalidToken, RetriableException { 7203 try { 7204 getDelegationTokenSecretManager().verifyToken(identifier, password); 7205 } catch (InvalidToken it) { 7206 if (inTransitionToActive()) { 7207 throw new RetriableException(it); 7208 } 7209 throw it; 7210 } 7211 } 7212 7213 @Override 7214 public boolean isGenStampInFuture(Block block) { 7215 return blockIdManager.isGenStampInFuture(block); 7216 } 7217 7218 @VisibleForTesting 7219 public EditLogTailer getEditLogTailer() { 7220 return editLogTailer; 7221 } 7222 7223 @VisibleForTesting 7224 public void setEditLogTailerForTests(EditLogTailer tailer) { 7225 this.editLogTailer = tailer; 7226 } 7227 7228 @VisibleForTesting 7229 void setFsLockForTests(ReentrantReadWriteLock lock) { 7230 this.fsLock.coarseLock = lock; 7231 } 7232 7233 @VisibleForTesting 7234 public ReentrantReadWriteLock getFsLockForTests() { 7235 return fsLock.coarseLock; 7236 } 7237 7238 @VisibleForTesting 7239 public ReentrantLock getCpLockForTests() { 7240 return cpLock; 7241 } 7242 7243 @VisibleForTesting 7244 public SafeModeInfo getSafeModeInfoForTests() { 7245 return safeMode; 7246 } 7247 7248 @VisibleForTesting 7249 public void setNNResourceChecker(NameNodeResourceChecker nnResourceChecker) { 7250 this.nnResourceChecker = nnResourceChecker; 7251 } 7252 7253 public SnapshotManager getSnapshotManager() { 7254 return snapshotManager; 7255 } 7256 7257 /** Allow snapshot on a directory. */ 7258 void allowSnapshot(String path) throws IOException { 7259 checkOperation(OperationCategory.WRITE); 7260 boolean success = false; 7261 writeLock(); 7262 try { 7263 checkOperation(OperationCategory.WRITE); 7264 checkNameNodeSafeMode("Cannot allow snapshot for " + path); 7265 checkSuperuserPrivilege(); 7266 FSDirSnapshotOp.allowSnapshot(dir, snapshotManager, path); 7267 success = true; 7268 } finally { 7269 writeUnlock(); 7270 } 7271 getEditLog().logSync(); 7272 logAuditEvent(success, "allowSnapshot", path, null, null); 7273 } 7274 7275 /** Disallow snapshot on a directory. */ 7276 void disallowSnapshot(String path) throws IOException { 7277 checkOperation(OperationCategory.WRITE); 7278 boolean success = false; 7279 writeLock(); 7280 try { 7281 checkOperation(OperationCategory.WRITE); 7282 checkNameNodeSafeMode("Cannot disallow snapshot for " + path); 7283 checkSuperuserPrivilege(); 7284 FSDirSnapshotOp.disallowSnapshot(dir, snapshotManager, path); 7285 success = true; 7286 } finally { 7287 writeUnlock(); 7288 } 7289 getEditLog().logSync(); 7290 logAuditEvent(success, "disallowSnapshot", path, null, null); 7291 } 7292 7293 /** 7294 * Create a snapshot 7295 * @param snapshotRoot The directory path where the snapshot is taken 7296 * @param snapshotName The name of the snapshot 7297 */ 7298 String createSnapshot(String snapshotRoot, String snapshotName, 7299 boolean logRetryCache) throws IOException { 7300 String snapshotPath = null; 7301 writeLock(); 7302 try { 7303 checkOperation(OperationCategory.WRITE); 7304 checkNameNodeSafeMode("Cannot create snapshot for " + snapshotRoot); 7305 snapshotPath = FSDirSnapshotOp.createSnapshot(dir, 7306 snapshotManager, snapshotRoot, snapshotName, logRetryCache); 7307 } finally { 7308 writeUnlock(); 7309 } 7310 getEditLog().logSync(); 7311 logAuditEvent(snapshotPath != null, "createSnapshot", snapshotRoot, 7312 snapshotPath, null); 7313 return snapshotPath; 7314 } 7315 7316 /** 7317 * Rename a snapshot 7318 * @param path The directory path where the snapshot was taken 7319 * @param snapshotOldName Old snapshot name 7320 * @param snapshotNewName New snapshot name 7321 * @throws SafeModeException 7322 * @throws IOException 7323 */ 7324 void renameSnapshot( 7325 String path, String snapshotOldName, String snapshotNewName, 7326 boolean logRetryCache) throws IOException { 7327 checkOperation(OperationCategory.WRITE); 7328 boolean success = false; 7329 writeLock(); 7330 try { 7331 checkOperation(OperationCategory.WRITE); 7332 checkNameNodeSafeMode("Cannot rename snapshot for " + path); 7333 FSDirSnapshotOp.renameSnapshot(dir, snapshotManager, path, 7334 snapshotOldName, snapshotNewName, logRetryCache); 7335 success = true; 7336 } finally { 7337 writeUnlock(); 7338 } 7339 getEditLog().logSync(); 7340 String oldSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotOldName); 7341 String newSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotNewName); 7342 logAuditEvent(success, "renameSnapshot", oldSnapshotRoot, 7343 newSnapshotRoot, null); 7344 } 7345 7346 /** 7347 * Get the list of snapshottable directories that are owned 7348 * by the current user. Return all the snapshottable directories if the 7349 * current user is a super user. 7350 * @return The list of all the current snapshottable directories 7351 * @throws IOException 7352 */ 7353 public SnapshottableDirectoryStatus[] getSnapshottableDirListing() 7354 throws IOException { 7355 SnapshottableDirectoryStatus[] status = null; 7356 checkOperation(OperationCategory.READ); 7357 boolean success = false; 7358 readLock(); 7359 try { 7360 checkOperation(OperationCategory.READ); 7361 status = FSDirSnapshotOp.getSnapshottableDirListing(dir, snapshotManager); 7362 success = true; 7363 } finally { 7364 readUnlock(); 7365 } 7366 logAuditEvent(success, "listSnapshottableDirectory", null, null, null); 7367 return status; 7368 } 7369 7370 /** 7371 * Get the difference between two snapshots (or between a snapshot and the 7372 * current status) of a snapshottable directory. 7373 * 7374 * @param path The full path of the snapshottable directory. 7375 * @param fromSnapshot Name of the snapshot to calculate the diff from. Null 7376 * or empty string indicates the current tree. 7377 * @param toSnapshot Name of the snapshot to calculated the diff to. Null or 7378 * empty string indicates the current tree. 7379 * @return A report about the difference between {@code fromSnapshot} and 7380 * {@code toSnapshot}. Modified/deleted/created/renamed files and 7381 * directories belonging to the snapshottable directories are listed 7382 * and labeled as M/-/+/R respectively. 7383 * @throws IOException 7384 */ 7385 SnapshotDiffReport getSnapshotDiffReport(String path, 7386 String fromSnapshot, String toSnapshot) throws IOException { 7387 SnapshotDiffReport diffs = null; 7388 checkOperation(OperationCategory.READ); 7389 readLock(); 7390 try { 7391 checkOperation(OperationCategory.READ); 7392 diffs = FSDirSnapshotOp.getSnapshotDiffReport(dir, snapshotManager, 7393 path, fromSnapshot, toSnapshot); 7394 } finally { 7395 readUnlock(); 7396 } 7397 7398 logAuditEvent(diffs != null, "computeSnapshotDiff", null, null, null); 7399 return diffs; 7400 } 7401 7402 /** 7403 * Delete a snapshot of a snapshottable directory 7404 * @param snapshotRoot The snapshottable directory 7405 * @param snapshotName The name of the to-be-deleted snapshot 7406 * @throws SafeModeException 7407 * @throws IOException 7408 */ 7409 void deleteSnapshot(String snapshotRoot, String snapshotName, 7410 boolean logRetryCache) throws IOException { 7411 checkOperation(OperationCategory.WRITE); 7412 boolean success = false; 7413 writeLock(); 7414 BlocksMapUpdateInfo blocksToBeDeleted = null; 7415 try { 7416 checkOperation(OperationCategory.WRITE); 7417 checkNameNodeSafeMode("Cannot delete snapshot for " + snapshotRoot); 7418 7419 blocksToBeDeleted = FSDirSnapshotOp.deleteSnapshot(dir, snapshotManager, 7420 snapshotRoot, snapshotName, logRetryCache); 7421 success = true; 7422 } finally { 7423 writeUnlock(); 7424 } 7425 getEditLog().logSync(); 7426 7427 // Breaking the pattern as removing blocks have to happen outside of the 7428 // global lock 7429 if (blocksToBeDeleted != null) { 7430 removeBlocks(blocksToBeDeleted); 7431 } 7432 7433 String rootPath = Snapshot.getSnapshotPath(snapshotRoot, snapshotName); 7434 logAuditEvent(success, "deleteSnapshot", rootPath, null, null); 7435 } 7436 7437 /** 7438 * Remove a list of INodeDirectorySnapshottable from the SnapshotManager 7439 * @param toRemove the list of INodeDirectorySnapshottable to be removed 7440 */ 7441 void removeSnapshottableDirs(List<INodeDirectory> toRemove) { 7442 if (snapshotManager != null) { 7443 snapshotManager.removeSnapshottable(toRemove); 7444 } 7445 } 7446 7447 RollingUpgradeInfo queryRollingUpgrade() throws IOException { 7448 checkSuperuserPrivilege(); 7449 checkOperation(OperationCategory.READ); 7450 readLock(); 7451 try { 7452 if (rollingUpgradeInfo != null) { 7453 boolean hasRollbackImage = this.getFSImage().hasRollbackFSImage(); 7454 rollingUpgradeInfo.setCreatedRollbackImages(hasRollbackImage); 7455 } 7456 return rollingUpgradeInfo; 7457 } finally { 7458 readUnlock(); 7459 } 7460 } 7461 7462 RollingUpgradeInfo startRollingUpgrade() throws IOException { 7463 checkSuperuserPrivilege(); 7464 checkOperation(OperationCategory.WRITE); 7465 writeLock(); 7466 try { 7467 checkOperation(OperationCategory.WRITE); 7468 if (isRollingUpgrade()) { 7469 return rollingUpgradeInfo; 7470 } 7471 long startTime = now(); 7472 if (!haEnabled) { // for non-HA, we require NN to be in safemode 7473 startRollingUpgradeInternalForNonHA(startTime); 7474 } else { // for HA, NN cannot be in safemode 7475 checkNameNodeSafeMode("Failed to start rolling upgrade"); 7476 startRollingUpgradeInternal(startTime); 7477 } 7478 7479 getEditLog().logStartRollingUpgrade(rollingUpgradeInfo.getStartTime()); 7480 if (haEnabled) { 7481 // roll the edit log to make sure the standby NameNode can tail 7482 getFSImage().rollEditLog(); 7483 } 7484 } finally { 7485 writeUnlock(); 7486 } 7487 7488 getEditLog().logSync(); 7489 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 7490 logAuditEvent(true, "startRollingUpgrade", null, null, null); 7491 } 7492 return rollingUpgradeInfo; 7493 } 7494 7495 /** 7496 * Update internal state to indicate that a rolling upgrade is in progress. 7497 * @param startTime rolling upgrade start time 7498 */ 7499 void startRollingUpgradeInternal(long startTime) 7500 throws IOException { 7501 checkRollingUpgrade("start rolling upgrade"); 7502 getFSImage().checkUpgrade(this); 7503 setRollingUpgradeInfo(false, startTime); 7504 } 7505 7506 /** 7507 * Update internal state to indicate that a rolling upgrade is in progress for 7508 * non-HA setup. This requires the namesystem is in SafeMode and after doing a 7509 * checkpoint for rollback the namesystem will quit the safemode automatically 7510 */ 7511 private void startRollingUpgradeInternalForNonHA(long startTime) 7512 throws IOException { 7513 Preconditions.checkState(!haEnabled); 7514 if (!isInSafeMode()) { 7515 throw new IOException("Safe mode should be turned ON " 7516 + "in order to create namespace image."); 7517 } 7518 checkRollingUpgrade("start rolling upgrade"); 7519 getFSImage().checkUpgrade(this); 7520 // in non-HA setup, we do an extra checkpoint to generate a rollback image 7521 getFSImage().saveNamespace(this, NameNodeFile.IMAGE_ROLLBACK, null); 7522 LOG.info("Successfully saved namespace for preparing rolling upgrade."); 7523 7524 // leave SafeMode automatically 7525 setSafeMode(SafeModeAction.SAFEMODE_LEAVE); 7526 setRollingUpgradeInfo(true, startTime); 7527 } 7528 7529 void setRollingUpgradeInfo(boolean createdRollbackImages, long startTime) { 7530 rollingUpgradeInfo = new RollingUpgradeInfo(blockPoolId, 7531 createdRollbackImages, startTime, 0L); 7532 } 7533 7534 public void setCreatedRollbackImages(boolean created) { 7535 if (rollingUpgradeInfo != null) { 7536 rollingUpgradeInfo.setCreatedRollbackImages(created); 7537 } 7538 } 7539 7540 public RollingUpgradeInfo getRollingUpgradeInfo() { 7541 return rollingUpgradeInfo; 7542 } 7543 7544 public boolean isNeedRollbackFsImage() { 7545 return needRollbackFsImage; 7546 } 7547 7548 public void setNeedRollbackFsImage(boolean needRollbackFsImage) { 7549 this.needRollbackFsImage = needRollbackFsImage; 7550 } 7551 7552 @Override // NameNodeMXBean 7553 public RollingUpgradeInfo.Bean getRollingUpgradeStatus() { 7554 RollingUpgradeInfo upgradeInfo = getRollingUpgradeInfo(); 7555 if (upgradeInfo != null) { 7556 return new RollingUpgradeInfo.Bean(upgradeInfo); 7557 } 7558 return null; 7559 } 7560 7561 /** Is rolling upgrade in progress? */ 7562 public boolean isRollingUpgrade() { 7563 return rollingUpgradeInfo != null; 7564 } 7565 7566 void checkRollingUpgrade(String action) throws RollingUpgradeException { 7567 if (isRollingUpgrade()) { 7568 throw new RollingUpgradeException("Failed to " + action 7569 + " since a rolling upgrade is already in progress." 7570 + " Existing rolling upgrade info:\n" + rollingUpgradeInfo); 7571 } 7572 } 7573 7574 RollingUpgradeInfo finalizeRollingUpgrade() throws IOException { 7575 checkSuperuserPrivilege(); 7576 checkOperation(OperationCategory.WRITE); 7577 writeLock(); 7578 final RollingUpgradeInfo returnInfo; 7579 try { 7580 checkOperation(OperationCategory.WRITE); 7581 if (!isRollingUpgrade()) { 7582 return null; 7583 } 7584 checkNameNodeSafeMode("Failed to finalize rolling upgrade"); 7585 7586 returnInfo = finalizeRollingUpgradeInternal(now()); 7587 getEditLog().logFinalizeRollingUpgrade(returnInfo.getFinalizeTime()); 7588 if (haEnabled) { 7589 // roll the edit log to make sure the standby NameNode can tail 7590 getFSImage().rollEditLog(); 7591 } 7592 getFSImage().updateStorageVersion(); 7593 getFSImage().renameCheckpoint(NameNodeFile.IMAGE_ROLLBACK, 7594 NameNodeFile.IMAGE); 7595 } finally { 7596 writeUnlock(); 7597 } 7598 7599 if (!haEnabled) { 7600 // Sync not needed for ha since the edit was rolled after logging. 7601 getEditLog().logSync(); 7602 } 7603 7604 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 7605 logAuditEvent(true, "finalizeRollingUpgrade", null, null, null); 7606 } 7607 return returnInfo; 7608 } 7609 7610 RollingUpgradeInfo finalizeRollingUpgradeInternal(long finalizeTime) 7611 throws RollingUpgradeException { 7612 final long startTime = rollingUpgradeInfo.getStartTime(); 7613 rollingUpgradeInfo = null; 7614 return new RollingUpgradeInfo(blockPoolId, false, startTime, finalizeTime); 7615 } 7616 7617 long addCacheDirective(CacheDirectiveInfo directive, 7618 EnumSet<CacheFlag> flags, boolean logRetryCache) 7619 throws IOException { 7620 checkOperation(OperationCategory.WRITE); 7621 CacheDirectiveInfo effectiveDirective = null; 7622 if (!flags.contains(CacheFlag.FORCE)) { 7623 cacheManager.waitForRescanIfNeeded(); 7624 } 7625 writeLock(); 7626 try { 7627 checkOperation(OperationCategory.WRITE); 7628 if (isInSafeMode()) { 7629 throw new SafeModeException( 7630 "Cannot add cache directive", safeMode); 7631 } 7632 effectiveDirective = FSNDNCacheOp.addCacheDirective(this, cacheManager, 7633 directive, flags, logRetryCache); 7634 } finally { 7635 writeUnlock(); 7636 boolean success = effectiveDirective != null; 7637 if (success) { 7638 getEditLog().logSync(); 7639 } 7640 7641 String effectiveDirectiveStr = effectiveDirective != null ? 7642 effectiveDirective.toString() : null; 7643 logAuditEvent(success, "addCacheDirective", effectiveDirectiveStr, 7644 null, null); 7645 } 7646 return effectiveDirective != null ? effectiveDirective.getId() : 0; 7647 } 7648 7649 void modifyCacheDirective(CacheDirectiveInfo directive, 7650 EnumSet<CacheFlag> flags, boolean logRetryCache) throws IOException { 7651 checkOperation(OperationCategory.WRITE); 7652 boolean success = false; 7653 if (!flags.contains(CacheFlag.FORCE)) { 7654 cacheManager.waitForRescanIfNeeded(); 7655 } 7656 writeLock(); 7657 try { 7658 checkOperation(OperationCategory.WRITE); 7659 if (isInSafeMode()) { 7660 throw new SafeModeException( 7661 "Cannot add cache directive", safeMode); 7662 } 7663 FSNDNCacheOp.modifyCacheDirective(this, cacheManager, directive, flags, 7664 logRetryCache); 7665 success = true; 7666 } finally { 7667 writeUnlock(); 7668 if (success) { 7669 getEditLog().logSync(); 7670 } 7671 String idStr = "{id: " + directive.getId().toString() + "}"; 7672 logAuditEvent(success, "modifyCacheDirective", idStr, 7673 directive.toString(), null); 7674 } 7675 } 7676 7677 void removeCacheDirective(long id, boolean logRetryCache) throws IOException { 7678 checkOperation(OperationCategory.WRITE); 7679 boolean success = false; 7680 writeLock(); 7681 try { 7682 checkOperation(OperationCategory.WRITE); 7683 if (isInSafeMode()) { 7684 throw new SafeModeException( 7685 "Cannot remove cache directives", safeMode); 7686 } 7687 FSNDNCacheOp.removeCacheDirective(this, cacheManager, id, logRetryCache); 7688 success = true; 7689 } finally { 7690 writeUnlock(); 7691 String idStr = "{id: " + Long.toString(id) + "}"; 7692 logAuditEvent(success, "removeCacheDirective", idStr, null, 7693 null); 7694 } 7695 getEditLog().logSync(); 7696 } 7697 7698 BatchedListEntries<CacheDirectiveEntry> listCacheDirectives( 7699 long startId, CacheDirectiveInfo filter) throws IOException { 7700 checkOperation(OperationCategory.READ); 7701 BatchedListEntries<CacheDirectiveEntry> results; 7702 cacheManager.waitForRescanIfNeeded(); 7703 readLock(); 7704 boolean success = false; 7705 try { 7706 checkOperation(OperationCategory.READ); 7707 results = FSNDNCacheOp.listCacheDirectives(this, cacheManager, startId, 7708 filter); 7709 success = true; 7710 } finally { 7711 readUnlock(); 7712 logAuditEvent(success, "listCacheDirectives", filter.toString(), null, 7713 null); 7714 } 7715 return results; 7716 } 7717 7718 void addCachePool(CachePoolInfo req, boolean logRetryCache) 7719 throws IOException { 7720 checkOperation(OperationCategory.WRITE); 7721 writeLock(); 7722 boolean success = false; 7723 String poolInfoStr = null; 7724 try { 7725 checkOperation(OperationCategory.WRITE); 7726 if (isInSafeMode()) { 7727 throw new SafeModeException( 7728 "Cannot add cache pool " + req.getPoolName(), safeMode); 7729 } 7730 CachePoolInfo info = FSNDNCacheOp.addCachePool(this, cacheManager, req, 7731 logRetryCache); 7732 poolInfoStr = info.toString(); 7733 success = true; 7734 } finally { 7735 writeUnlock(); 7736 logAuditEvent(success, "addCachePool", poolInfoStr, null, null); 7737 } 7738 7739 getEditLog().logSync(); 7740 } 7741 7742 void modifyCachePool(CachePoolInfo req, boolean logRetryCache) 7743 throws IOException { 7744 checkOperation(OperationCategory.WRITE); 7745 writeLock(); 7746 boolean success = false; 7747 try { 7748 checkOperation(OperationCategory.WRITE); 7749 if (isInSafeMode()) { 7750 throw new SafeModeException( 7751 "Cannot modify cache pool " + req.getPoolName(), safeMode); 7752 } 7753 FSNDNCacheOp.modifyCachePool(this, cacheManager, req, logRetryCache); 7754 success = true; 7755 } finally { 7756 writeUnlock(); 7757 String poolNameStr = "{poolName: " + 7758 (req == null ? null : req.getPoolName()) + "}"; 7759 logAuditEvent(success, "modifyCachePool", poolNameStr, 7760 req == null ? null : req.toString(), null); 7761 } 7762 7763 getEditLog().logSync(); 7764 } 7765 7766 void removeCachePool(String cachePoolName, boolean logRetryCache) 7767 throws IOException { 7768 checkOperation(OperationCategory.WRITE); 7769 writeLock(); 7770 boolean success = false; 7771 try { 7772 checkOperation(OperationCategory.WRITE); 7773 if (isInSafeMode()) { 7774 throw new SafeModeException( 7775 "Cannot remove cache pool " + cachePoolName, safeMode); 7776 } 7777 FSNDNCacheOp.removeCachePool(this, cacheManager, cachePoolName, 7778 logRetryCache); 7779 success = true; 7780 } finally { 7781 writeUnlock(); 7782 String poolNameStr = "{poolName: " + cachePoolName + "}"; 7783 logAuditEvent(success, "removeCachePool", poolNameStr, null, null); 7784 } 7785 7786 getEditLog().logSync(); 7787 } 7788 7789 BatchedListEntries<CachePoolEntry> listCachePools(String prevKey) 7790 throws IOException { 7791 BatchedListEntries<CachePoolEntry> results; 7792 checkOperation(OperationCategory.READ); 7793 boolean success = false; 7794 cacheManager.waitForRescanIfNeeded(); 7795 readLock(); 7796 try { 7797 checkOperation(OperationCategory.READ); 7798 results = FSNDNCacheOp.listCachePools(this, cacheManager, prevKey); 7799 success = true; 7800 } finally { 7801 readUnlock(); 7802 logAuditEvent(success, "listCachePools", null, null, null); 7803 } 7804 return results; 7805 } 7806 7807 void modifyAclEntries(final String src, List<AclEntry> aclSpec) 7808 throws IOException { 7809 HdfsFileStatus auditStat = null; 7810 checkOperation(OperationCategory.WRITE); 7811 writeLock(); 7812 try { 7813 checkOperation(OperationCategory.WRITE); 7814 checkNameNodeSafeMode("Cannot modify ACL entries on " + src); 7815 auditStat = FSDirAclOp.modifyAclEntries(dir, src, aclSpec); 7816 } catch (AccessControlException e) { 7817 logAuditEvent(false, "modifyAclEntries", src); 7818 throw e; 7819 } finally { 7820 writeUnlock(); 7821 } 7822 getEditLog().logSync(); 7823 logAuditEvent(true, "modifyAclEntries", src, null, auditStat); 7824 } 7825 7826 void removeAclEntries(final String src, List<AclEntry> aclSpec) 7827 throws IOException { 7828 checkOperation(OperationCategory.WRITE); 7829 HdfsFileStatus auditStat = null; 7830 writeLock(); 7831 try { 7832 checkOperation(OperationCategory.WRITE); 7833 checkNameNodeSafeMode("Cannot remove ACL entries on " + src); 7834 auditStat = FSDirAclOp.removeAclEntries(dir, src, aclSpec); 7835 } catch (AccessControlException e) { 7836 logAuditEvent(false, "removeAclEntries", src); 7837 throw e; 7838 } finally { 7839 writeUnlock(); 7840 } 7841 getEditLog().logSync(); 7842 logAuditEvent(true, "removeAclEntries", src, null, auditStat); 7843 } 7844 7845 void removeDefaultAcl(final String src) throws IOException { 7846 HdfsFileStatus auditStat = null; 7847 checkOperation(OperationCategory.WRITE); 7848 writeLock(); 7849 try { 7850 checkOperation(OperationCategory.WRITE); 7851 checkNameNodeSafeMode("Cannot remove default ACL entries on " + src); 7852 auditStat = FSDirAclOp.removeDefaultAcl(dir, src); 7853 } catch (AccessControlException e) { 7854 logAuditEvent(false, "removeDefaultAcl", src); 7855 throw e; 7856 } finally { 7857 writeUnlock(); 7858 } 7859 getEditLog().logSync(); 7860 logAuditEvent(true, "removeDefaultAcl", src, null, auditStat); 7861 } 7862 7863 void removeAcl(final String src) throws IOException { 7864 HdfsFileStatus auditStat = null; 7865 checkOperation(OperationCategory.WRITE); 7866 writeLock(); 7867 try { 7868 checkOperation(OperationCategory.WRITE); 7869 checkNameNodeSafeMode("Cannot remove ACL on " + src); 7870 auditStat = FSDirAclOp.removeAcl(dir, src); 7871 } catch (AccessControlException e) { 7872 logAuditEvent(false, "removeAcl", src); 7873 throw e; 7874 } finally { 7875 writeUnlock(); 7876 } 7877 getEditLog().logSync(); 7878 logAuditEvent(true, "removeAcl", src, null, auditStat); 7879 } 7880 7881 void setAcl(final String src, List<AclEntry> aclSpec) throws IOException { 7882 HdfsFileStatus auditStat = null; 7883 checkOperation(OperationCategory.WRITE); 7884 writeLock(); 7885 try { 7886 checkOperation(OperationCategory.WRITE); 7887 checkNameNodeSafeMode("Cannot set ACL on " + src); 7888 auditStat = FSDirAclOp.setAcl(dir, src, aclSpec); 7889 } catch (AccessControlException e) { 7890 logAuditEvent(false, "setAcl", src); 7891 throw e; 7892 } finally { 7893 writeUnlock(); 7894 } 7895 getEditLog().logSync(); 7896 logAuditEvent(true, "setAcl", src, null, auditStat); 7897 } 7898 7899 AclStatus getAclStatus(String src) throws IOException { 7900 checkOperation(OperationCategory.READ); 7901 boolean success = false; 7902 readLock(); 7903 try { 7904 checkOperation(OperationCategory.READ); 7905 final AclStatus ret = FSDirAclOp.getAclStatus(dir, src); 7906 success = true; 7907 return ret; 7908 } finally { 7909 readUnlock(); 7910 logAuditEvent(success, "getAclStatus", src); 7911 } 7912 } 7913 7914 /** 7915 * Create an encryption zone on directory src using the specified key. 7916 * 7917 * @param src the path of a directory which will be the root of the 7918 * encryption zone. The directory must be empty. 7919 * @param keyName name of a key which must be present in the configured 7920 * KeyProvider. 7921 * @throws AccessControlException if the caller is not the superuser. 7922 * @throws UnresolvedLinkException if the path can't be resolved. 7923 * @throws SafeModeException if the Namenode is in safe mode. 7924 */ 7925 void createEncryptionZone(final String src, final String keyName, 7926 boolean logRetryCache) 7927 throws IOException, UnresolvedLinkException, 7928 SafeModeException, AccessControlException { 7929 try { 7930 if (provider == null) { 7931 throw new IOException( 7932 "Can't create an encryption zone for " + src + 7933 " since no key provider is available."); 7934 } 7935 if (keyName == null || keyName.isEmpty()) { 7936 throw new IOException("Must specify a key name when creating an " + 7937 "encryption zone"); 7938 } 7939 KeyProvider.Metadata metadata = provider.getMetadata(keyName); 7940 if (metadata == null) { 7941 /* 7942 * It would be nice if we threw something more specific than 7943 * IOException when the key is not found, but the KeyProvider API 7944 * doesn't provide for that. If that API is ever changed to throw 7945 * something more specific (e.g. UnknownKeyException) then we can 7946 * update this to match it, or better yet, just rethrow the 7947 * KeyProvider's exception. 7948 */ 7949 throw new IOException("Key " + keyName + " doesn't exist."); 7950 } 7951 // If the provider supports pool for EDEKs, this will fill in the pool 7952 generateEncryptedDataEncryptionKey(keyName); 7953 createEncryptionZoneInt(src, metadata.getCipher(), 7954 keyName, logRetryCache); 7955 } catch (AccessControlException e) { 7956 logAuditEvent(false, "createEncryptionZone", src); 7957 throw e; 7958 } 7959 } 7960 7961 private void createEncryptionZoneInt(final String srcArg, String cipher, 7962 String keyName, final boolean logRetryCache) throws IOException { 7963 String src = srcArg; 7964 HdfsFileStatus resultingStat = null; 7965 checkSuperuserPrivilege(); 7966 checkOperation(OperationCategory.WRITE); 7967 final byte[][] pathComponents = 7968 FSDirectory.getPathComponentsForReservedPath(src); 7969 FSPermissionChecker pc = getPermissionChecker(); 7970 writeLock(); 7971 try { 7972 checkSuperuserPrivilege(); 7973 checkOperation(OperationCategory.WRITE); 7974 checkNameNodeSafeMode("Cannot create encryption zone on " + src); 7975 src = dir.resolvePath(pc, src, pathComponents); 7976 7977 final CipherSuite suite = CipherSuite.convert(cipher); 7978 // For now this is hardcoded, as we only support one method. 7979 final CryptoProtocolVersion version = 7980 CryptoProtocolVersion.ENCRYPTION_ZONES; 7981 final XAttr ezXAttr = dir.createEncryptionZone(src, suite, 7982 version, keyName); 7983 List<XAttr> xAttrs = Lists.newArrayListWithCapacity(1); 7984 xAttrs.add(ezXAttr); 7985 getEditLog().logSetXAttrs(src, xAttrs, logRetryCache); 7986 final INodesInPath iip = dir.getINodesInPath4Write(src, false); 7987 resultingStat = dir.getAuditFileInfo(iip); 7988 } finally { 7989 writeUnlock(); 7990 } 7991 getEditLog().logSync(); 7992 logAuditEvent(true, "createEncryptionZone", srcArg, null, resultingStat); 7993 } 7994 7995 /** 7996 * Get the encryption zone for the specified path. 7997 * 7998 * @param srcArg the path of a file or directory to get the EZ for. 7999 * @return the EZ of the of the path or null if none. 8000 * @throws AccessControlException if the caller is not the superuser. 8001 * @throws UnresolvedLinkException if the path can't be resolved. 8002 */ 8003 EncryptionZone getEZForPath(final String srcArg) 8004 throws AccessControlException, UnresolvedLinkException, IOException { 8005 String src = srcArg; 8006 HdfsFileStatus resultingStat = null; 8007 final byte[][] pathComponents = 8008 FSDirectory.getPathComponentsForReservedPath(src); 8009 boolean success = false; 8010 final FSPermissionChecker pc = getPermissionChecker(); 8011 checkOperation(OperationCategory.READ); 8012 readLock(); 8013 try { 8014 checkOperation(OperationCategory.READ); 8015 src = dir.resolvePath(pc, src, pathComponents); 8016 final INodesInPath iip = dir.getINodesInPath(src, true); 8017 if (isPermissionEnabled) { 8018 dir.checkPathAccess(pc, iip, FsAction.READ); 8019 } 8020 final EncryptionZone ret = dir.getEZForPath(iip); 8021 resultingStat = dir.getAuditFileInfo(iip); 8022 success = true; 8023 return ret; 8024 } finally { 8025 readUnlock(); 8026 logAuditEvent(success, "getEZForPath", srcArg, null, resultingStat); 8027 } 8028 } 8029 8030 BatchedListEntries<EncryptionZone> listEncryptionZones(long prevId) 8031 throws IOException { 8032 boolean success = false; 8033 checkSuperuserPrivilege(); 8034 checkOperation(OperationCategory.READ); 8035 readLock(); 8036 try { 8037 checkSuperuserPrivilege(); 8038 checkOperation(OperationCategory.READ); 8039 final BatchedListEntries<EncryptionZone> ret = 8040 dir.listEncryptionZones(prevId); 8041 success = true; 8042 return ret; 8043 } finally { 8044 readUnlock(); 8045 logAuditEvent(success, "listEncryptionZones", null); 8046 } 8047 } 8048 8049 void setXAttr(String src, XAttr xAttr, EnumSet<XAttrSetFlag> flag, 8050 boolean logRetryCache) 8051 throws IOException { 8052 checkOperation(OperationCategory.WRITE); 8053 HdfsFileStatus auditStat = null; 8054 writeLock(); 8055 try { 8056 checkOperation(OperationCategory.WRITE); 8057 checkNameNodeSafeMode("Cannot set XAttr on " + src); 8058 auditStat = FSDirXAttrOp.setXAttr(dir, src, xAttr, flag, logRetryCache); 8059 } catch (AccessControlException e) { 8060 logAuditEvent(false, "setXAttr", src); 8061 throw e; 8062 } finally { 8063 writeUnlock(); 8064 } 8065 getEditLog().logSync(); 8066 logAuditEvent(true, "setXAttr", src, null, auditStat); 8067 } 8068 8069 List<XAttr> getXAttrs(final String src, List<XAttr> xAttrs) 8070 throws IOException { 8071 checkOperation(OperationCategory.READ); 8072 readLock(); 8073 try { 8074 checkOperation(OperationCategory.READ); 8075 return FSDirXAttrOp.getXAttrs(dir, src, xAttrs); 8076 } catch (AccessControlException e) { 8077 logAuditEvent(false, "getXAttrs", src); 8078 throw e; 8079 } finally { 8080 readUnlock(); 8081 } 8082 } 8083 8084 List<XAttr> listXAttrs(String src) throws IOException { 8085 checkOperation(OperationCategory.READ); 8086 readLock(); 8087 try { 8088 checkOperation(OperationCategory.READ); 8089 return FSDirXAttrOp.listXAttrs(dir, src); 8090 } catch (AccessControlException e) { 8091 logAuditEvent(false, "listXAttrs", src); 8092 throw e; 8093 } finally { 8094 readUnlock(); 8095 } 8096 } 8097 8098 void removeXAttr(String src, XAttr xAttr, boolean logRetryCache) 8099 throws IOException { 8100 checkOperation(OperationCategory.WRITE); 8101 HdfsFileStatus auditStat = null; 8102 writeLock(); 8103 try { 8104 checkOperation(OperationCategory.WRITE); 8105 checkNameNodeSafeMode("Cannot remove XAttr entry on " + src); 8106 auditStat = FSDirXAttrOp.removeXAttr(dir, src, xAttr, logRetryCache); 8107 } catch (AccessControlException e) { 8108 logAuditEvent(false, "removeXAttr", src); 8109 throw e; 8110 } finally { 8111 writeUnlock(); 8112 } 8113 getEditLog().logSync(); 8114 logAuditEvent(true, "removeXAttr", src, null, auditStat); 8115 } 8116 8117 void checkAccess(String src, FsAction mode) throws IOException { 8118 checkOperation(OperationCategory.READ); 8119 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 8120 readLock(); 8121 try { 8122 checkOperation(OperationCategory.READ); 8123 src = FSDirectory.resolvePath(src, pathComponents, dir); 8124 final INodesInPath iip = dir.getINodesInPath(src, true); 8125 INode inode = iip.getLastINode(); 8126 if (inode == null) { 8127 throw new FileNotFoundException("Path not found"); 8128 } 8129 if (isPermissionEnabled) { 8130 FSPermissionChecker pc = getPermissionChecker(); 8131 dir.checkPathAccess(pc, iip, mode); 8132 } 8133 } catch (AccessControlException e) { 8134 logAuditEvent(false, "checkAccess", src); 8135 throw e; 8136 } finally { 8137 readUnlock(); 8138 } 8139 } 8140 8141 /** 8142 * Default AuditLogger implementation; used when no access logger is 8143 * defined in the config file. It can also be explicitly listed in the 8144 * config file. 8145 */ 8146 private static class DefaultAuditLogger extends HdfsAuditLogger { 8147 8148 private boolean logTokenTrackingId; 8149 8150 @Override 8151 public void initialize(Configuration conf) { 8152 logTokenTrackingId = conf.getBoolean( 8153 DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY, 8154 DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT); 8155 } 8156 8157 @Override 8158 public void logAuditEvent(boolean succeeded, String userName, 8159 InetAddress addr, String cmd, String src, String dst, 8160 FileStatus status, UserGroupInformation ugi, 8161 DelegationTokenSecretManager dtSecretManager) { 8162 if (auditLog.isInfoEnabled()) { 8163 final StringBuilder sb = auditBuffer.get(); 8164 sb.setLength(0); 8165 sb.append("allowed=").append(succeeded).append("\t"); 8166 sb.append("ugi=").append(userName).append("\t"); 8167 sb.append("ip=").append(addr).append("\t"); 8168 sb.append("cmd=").append(cmd).append("\t"); 8169 sb.append("src=").append(src).append("\t"); 8170 sb.append("dst=").append(dst).append("\t"); 8171 if (null == status) { 8172 sb.append("perm=null"); 8173 } else { 8174 sb.append("perm="); 8175 sb.append(status.getOwner()).append(":"); 8176 sb.append(status.getGroup()).append(":"); 8177 sb.append(status.getPermission()); 8178 } 8179 if (logTokenTrackingId) { 8180 sb.append("\t").append("trackingId="); 8181 String trackingId = null; 8182 if (ugi != null && dtSecretManager != null 8183 && ugi.getAuthenticationMethod() == AuthenticationMethod.TOKEN) { 8184 for (TokenIdentifier tid: ugi.getTokenIdentifiers()) { 8185 if (tid instanceof DelegationTokenIdentifier) { 8186 DelegationTokenIdentifier dtid = 8187 (DelegationTokenIdentifier)tid; 8188 trackingId = dtSecretManager.getTokenTrackingId(dtid); 8189 break; 8190 } 8191 } 8192 } 8193 sb.append(trackingId); 8194 } 8195 sb.append("\t").append("proto="); 8196 sb.append(NamenodeWebHdfsMethods.isWebHdfsInvocation() ? "webhdfs" : "rpc"); 8197 logAuditMessage(sb.toString()); 8198 } 8199 } 8200 8201 public void logAuditMessage(String message) { 8202 auditLog.info(message); 8203 } 8204 } 8205 8206 private static void enableAsyncAuditLog() { 8207 if (!(auditLog instanceof Log4JLogger)) { 8208 LOG.warn("Log4j is required to enable async auditlog"); 8209 return; 8210 } 8211 Logger logger = ((Log4JLogger)auditLog).getLogger(); 8212 @SuppressWarnings("unchecked") 8213 List<Appender> appenders = Collections.list(logger.getAllAppenders()); 8214 // failsafe against trying to async it more than once 8215 if (!appenders.isEmpty() && !(appenders.get(0) instanceof AsyncAppender)) { 8216 AsyncAppender asyncAppender = new AsyncAppender(); 8217 // change logger to have an async appender containing all the 8218 // previously configured appenders 8219 for (Appender appender : appenders) { 8220 logger.removeAppender(appender); 8221 asyncAppender.addAppender(appender); 8222 } 8223 logger.addAppender(asyncAppender); 8224 } 8225 } 8226 8227} 8228