001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.namenode; 019 020import static org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.EncryptedKeyVersion; 021import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT; 022import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY; 023import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT; 024import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY; 025import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_DEFAULT; 026import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_KEY; 027import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_DEFAULT; 028import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY; 029import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CHECKSUM_TYPE_DEFAULT; 030import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY; 031import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT; 032import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_KEY; 033import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_DEFAULT; 034import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_KEY; 035import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_DEFAULT; 036import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_KEY; 037import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT; 038import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY; 039import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOGGERS_KEY; 040import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT; 041import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY; 042import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT; 043import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY; 044import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT; 045import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY; 046import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME; 047import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT; 048import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY; 049import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT; 050import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY; 051import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT; 052import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY; 053import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT; 054import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY; 055import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY; 056import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY; 057import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS; 058import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT; 059import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD; 060import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT; 061import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT; 062import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY; 063import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC; 064import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC_DEFAULT; 065import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_INODE_ATTRIBUTES_PROVIDER_KEY; 066import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_DEFAULT; 067import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_KEY; 068import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY; 069import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_DEFAULT; 070import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY; 071import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY; 072import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT; 073import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY; 074import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT; 075import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY; 076import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT; 077import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY; 078import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY; 079import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_DEFAULT; 080import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY; 081import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT; 082import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY; 083import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY; 084import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_DEFAULT; 085import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY; 086import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT; 087import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY; 088import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_DEFAULT; 089import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY; 090import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_DEFAULT; 091import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_KEY; 092import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.SECURITY_XATTR_UNREADABLE_BY_SUPERUSER; 093import static org.apache.hadoop.util.Time.now; 094import static org.apache.hadoop.util.Time.monotonicNow; 095import static org.apache.hadoop.hdfs.server.namenode.top.metrics.TopMetrics.TOPMETRICS_METRICS_SOURCE_NAME; 096 097import java.io.BufferedWriter; 098import java.io.ByteArrayInputStream; 099import java.io.DataInput; 100import java.io.DataInputStream; 101import java.io.DataOutputStream; 102import java.io.File; 103import java.io.FileNotFoundException; 104import java.io.FileOutputStream; 105import java.io.IOException; 106import java.io.OutputStreamWriter; 107import java.io.PrintWriter; 108import java.io.StringWriter; 109import java.lang.management.ManagementFactory; 110import java.net.InetAddress; 111import java.net.URI; 112import java.security.GeneralSecurityException; 113import java.util.ArrayList; 114import java.util.Arrays; 115import java.util.Collection; 116import java.util.Collections; 117import java.util.Date; 118import java.util.EnumSet; 119import java.util.HashMap; 120import java.util.HashSet; 121import java.util.Iterator; 122import java.util.LinkedHashSet; 123import java.util.List; 124import java.util.Map; 125import java.util.Set; 126import java.util.TreeMap; 127import java.util.concurrent.TimeUnit; 128import java.util.concurrent.locks.Condition; 129import java.util.concurrent.locks.ReentrantLock; 130import java.util.concurrent.locks.ReentrantReadWriteLock; 131 132import javax.management.NotCompliantMBeanException; 133import javax.management.ObjectName; 134import javax.management.StandardMBean; 135 136import org.apache.commons.logging.Log; 137import org.apache.commons.logging.LogFactory; 138import org.apache.commons.logging.impl.Log4JLogger; 139import org.apache.hadoop.HadoopIllegalArgumentException; 140import org.apache.hadoop.classification.InterfaceAudience; 141import org.apache.hadoop.conf.Configuration; 142import org.apache.hadoop.crypto.CipherSuite; 143import org.apache.hadoop.crypto.CryptoProtocolVersion; 144import org.apache.hadoop.crypto.key.KeyProvider; 145import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension; 146import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries; 147import org.apache.hadoop.fs.CacheFlag; 148import org.apache.hadoop.fs.ContentSummary; 149import org.apache.hadoop.fs.CreateFlag; 150import org.apache.hadoop.fs.FileAlreadyExistsException; 151import org.apache.hadoop.fs.FileEncryptionInfo; 152import org.apache.hadoop.fs.FileStatus; 153import org.apache.hadoop.fs.FileSystem; 154import org.apache.hadoop.fs.FsServerDefaults; 155import org.apache.hadoop.fs.InvalidPathException; 156import org.apache.hadoop.fs.Options; 157import org.apache.hadoop.fs.ParentNotDirectoryException; 158import org.apache.hadoop.fs.Path; 159import org.apache.hadoop.fs.UnresolvedLinkException; 160import org.apache.hadoop.fs.XAttr; 161import org.apache.hadoop.fs.XAttrSetFlag; 162import org.apache.hadoop.fs.permission.AclEntry; 163import org.apache.hadoop.fs.permission.AclStatus; 164import org.apache.hadoop.fs.permission.FsAction; 165import org.apache.hadoop.fs.permission.FsPermission; 166import org.apache.hadoop.fs.permission.PermissionStatus; 167import org.apache.hadoop.fs.StorageType; 168import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; 169import org.apache.hadoop.ha.ServiceFailedException; 170import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy; 171import org.apache.hadoop.hdfs.DFSConfigKeys; 172import org.apache.hadoop.hdfs.DFSUtil; 173import org.apache.hadoop.hdfs.HAUtil; 174import org.apache.hadoop.hdfs.HdfsConfiguration; 175import org.apache.hadoop.hdfs.UnknownCryptoProtocolVersionException; 176import org.apache.hadoop.hdfs.XAttrHelper; 177import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException; 178import org.apache.hadoop.hdfs.protocol.Block; 179import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry; 180import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo; 181import org.apache.hadoop.hdfs.protocol.CachePoolEntry; 182import org.apache.hadoop.hdfs.protocol.CachePoolInfo; 183import org.apache.hadoop.hdfs.protocol.ClientProtocol; 184import org.apache.hadoop.hdfs.protocol.DatanodeID; 185import org.apache.hadoop.hdfs.protocol.DatanodeInfo; 186import org.apache.hadoop.hdfs.protocol.DirectoryListing; 187import org.apache.hadoop.hdfs.protocol.EncryptionZone; 188import org.apache.hadoop.hdfs.protocol.ExtendedBlock; 189import org.apache.hadoop.hdfs.protocol.HdfsConstants; 190import org.apache.hadoop.hdfs.protocol.LastBlockWithStatus; 191import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; 192import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; 193import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; 194import org.apache.hadoop.hdfs.protocol.LocatedBlock; 195import org.apache.hadoop.hdfs.protocol.LocatedBlocks; 196import org.apache.hadoop.hdfs.protocol.QuotaExceededException; 197import org.apache.hadoop.hdfs.protocol.RecoveryInProgressException; 198import org.apache.hadoop.hdfs.protocol.RollingUpgradeException; 199import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo; 200import org.apache.hadoop.hdfs.protocol.SnapshotAccessControlException; 201import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport; 202import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus; 203import org.apache.hadoop.hdfs.protocol.datatransfer.ReplaceDatanodeOnFailure; 204import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager; 205import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager.AccessMode; 206import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; 207import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager; 208import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager.SecretManagerState; 209import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection; 210import org.apache.hadoop.hdfs.server.blockmanagement.BlockIdManager; 211import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoContiguous; 212import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoContiguousUnderConstruction; 213import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; 214import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; 215import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager; 216import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStatistics; 217import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo; 218import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState; 219import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole; 220import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption; 221import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; 222import org.apache.hadoop.hdfs.server.common.Storage; 223import org.apache.hadoop.hdfs.server.common.Storage.StorageDirType; 224import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; 225import org.apache.hadoop.hdfs.server.common.Util; 226import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection; 227import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo; 228import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream; 229import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease; 230import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile; 231import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory; 232import org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer; 233import org.apache.hadoop.hdfs.server.namenode.ha.HAContext; 234import org.apache.hadoop.hdfs.server.namenode.ha.StandbyCheckpointer; 235import org.apache.hadoop.hdfs.server.namenode.metrics.FSNamesystemMBean; 236import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; 237import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; 238import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager; 239import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase; 240import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; 241import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter; 242import org.apache.hadoop.hdfs.server.namenode.startupprogress.Status; 243import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step; 244import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType; 245import org.apache.hadoop.hdfs.server.namenode.top.TopAuditLogger; 246import org.apache.hadoop.hdfs.server.namenode.top.TopConf; 247import org.apache.hadoop.hdfs.server.namenode.top.metrics.TopMetrics; 248import org.apache.hadoop.hdfs.server.namenode.top.window.RollingWindowManager; 249import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods; 250import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand; 251import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; 252import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport; 253import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse; 254import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat; 255import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand; 256import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration; 257import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; 258import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks; 259import org.apache.hadoop.hdfs.server.protocol.StorageReport; 260import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary; 261import org.apache.hadoop.io.EnumSetWritable; 262import org.apache.hadoop.io.IOUtils; 263import org.apache.hadoop.io.Text; 264import org.apache.hadoop.ipc.RetriableException; 265import org.apache.hadoop.ipc.RetryCache; 266import org.apache.hadoop.ipc.Server; 267import org.apache.hadoop.ipc.StandbyException; 268import org.apache.hadoop.metrics2.annotation.Metric; 269import org.apache.hadoop.metrics2.annotation.Metrics; 270import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; 271import org.apache.hadoop.metrics2.lib.MetricsRegistry; 272import org.apache.hadoop.metrics2.lib.MutableRatesWithAggregation; 273import org.apache.hadoop.metrics2.util.MBeans; 274import org.apache.hadoop.net.NetworkTopology; 275import org.apache.hadoop.net.Node; 276import org.apache.hadoop.net.NodeBase; 277import org.apache.hadoop.security.AccessControlException; 278import org.apache.hadoop.security.UserGroupInformation; 279import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; 280import org.apache.hadoop.security.token.SecretManager.InvalidToken; 281import org.apache.hadoop.security.token.Token; 282import org.apache.hadoop.security.token.TokenIdentifier; 283import org.apache.hadoop.security.token.delegation.DelegationKey; 284import org.apache.hadoop.util.ChunkedArrayList; 285import org.apache.hadoop.util.Daemon; 286import org.apache.hadoop.util.DataChecksum; 287import org.apache.hadoop.util.ReflectionUtils; 288import org.apache.hadoop.util.StringUtils; 289import org.apache.hadoop.util.VersionInfo; 290import org.apache.log4j.Appender; 291import org.apache.log4j.AsyncAppender; 292import org.apache.log4j.Logger; 293import org.codehaus.jackson.map.ObjectMapper; 294import org.mortbay.util.ajax.JSON; 295 296import com.google.common.annotations.VisibleForTesting; 297import com.google.common.base.Charsets; 298import com.google.common.base.Preconditions; 299import com.google.common.collect.ImmutableMap; 300import com.google.common.collect.Lists; 301 302/*************************************************** 303 * FSNamesystem does the actual bookkeeping work for the 304 * DataNode. 305 * 306 * It tracks several important tables. 307 * 308 * 1) valid fsname --> blocklist (kept on disk, logged) 309 * 2) Set of all valid blocks (inverted #1) 310 * 3) block --> machinelist (kept in memory, rebuilt dynamically from reports) 311 * 4) machine --> blocklist (inverted #2) 312 * 5) LRU cache of updated-heartbeat machines 313 ***************************************************/ 314@InterfaceAudience.Private 315@Metrics(context="dfs") 316public class FSNamesystem implements Namesystem, FSNamesystemMBean, 317 NameNodeMXBean { 318 public static final Log LOG = LogFactory.getLog(FSNamesystem.class); 319 private final MetricsRegistry registry = new MetricsRegistry("FSNamesystem"); 320 @Metric final MutableRatesWithAggregation detailedLockHoldTimeMetrics = 321 registry.newRatesWithAggregation("detailedLockHoldTimeMetrics"); 322 323 private static final ThreadLocal<StringBuilder> auditBuffer = 324 new ThreadLocal<StringBuilder>() { 325 @Override 326 protected StringBuilder initialValue() { 327 return new StringBuilder(); 328 } 329 }; 330 331 private final BlockIdManager blockIdManager; 332 333 @VisibleForTesting 334 public boolean isAuditEnabled() { 335 return !isDefaultAuditLogger || auditLog.isInfoEnabled(); 336 } 337 338 private void logAuditEvent(boolean succeeded, String cmd, String src) 339 throws IOException { 340 logAuditEvent(succeeded, cmd, src, null, null); 341 } 342 343 private void logAuditEvent(boolean succeeded, String cmd, String src, 344 String dst, HdfsFileStatus stat) throws IOException { 345 if (isAuditEnabled() && isExternalInvocation()) { 346 logAuditEvent(succeeded, getRemoteUser(), getRemoteIp(), 347 cmd, src, dst, stat); 348 } 349 } 350 351 private void logAuditEvent(boolean succeeded, 352 UserGroupInformation ugi, InetAddress addr, String cmd, String src, 353 String dst, HdfsFileStatus stat) { 354 FileStatus status = null; 355 if (stat != null) { 356 Path symlink = stat.isSymlink() ? new Path(stat.getSymlink()) : null; 357 Path path = dst != null ? new Path(dst) : new Path(src); 358 status = new FileStatus(stat.getLen(), stat.isDir(), 359 stat.getReplication(), stat.getBlockSize(), stat.getModificationTime(), 360 stat.getAccessTime(), stat.getPermission(), stat.getOwner(), 361 stat.getGroup(), symlink, path); 362 } 363 for (AuditLogger logger : auditLoggers) { 364 if (logger instanceof HdfsAuditLogger) { 365 HdfsAuditLogger hdfsLogger = (HdfsAuditLogger) logger; 366 hdfsLogger.logAuditEvent(succeeded, ugi.toString(), addr, cmd, src, dst, 367 status, ugi, dtSecretManager); 368 } else { 369 logger.logAuditEvent(succeeded, ugi.toString(), addr, 370 cmd, src, dst, status); 371 } 372 } 373 } 374 375 /** 376 * Logger for audit events, noting successful FSNamesystem operations. Emits 377 * to FSNamesystem.audit at INFO. Each event causes a set of tab-separated 378 * <code>key=value</code> pairs to be written for the following properties: 379 * <code> 380 * ugi=<ugi in RPC> 381 * ip=<remote IP> 382 * cmd=<command> 383 * src=<src path> 384 * dst=<dst path (optional)> 385 * perm=<permissions (optional)> 386 * </code> 387 */ 388 public static final Log auditLog = LogFactory.getLog( 389 FSNamesystem.class.getName() + ".audit"); 390 391 static final int DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED = 100; 392 static int BLOCK_DELETION_INCREMENT = 1000; 393 private final boolean isPermissionEnabled; 394 private final UserGroupInformation fsOwner; 395 private final String supergroup; 396 private final boolean standbyShouldCheckpoint; 397 398 // Scan interval is not configurable. 399 private static final long DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL = 400 TimeUnit.MILLISECONDS.convert(1, TimeUnit.HOURS); 401 final DelegationTokenSecretManager dtSecretManager; 402 private final boolean alwaysUseDelegationTokensForTests; 403 404 private static final Step STEP_AWAITING_REPORTED_BLOCKS = 405 new Step(StepType.AWAITING_REPORTED_BLOCKS); 406 407 // Tracks whether the default audit logger is the only configured audit 408 // logger; this allows isAuditEnabled() to return false in case the 409 // underlying logger is disabled, and avoid some unnecessary work. 410 private final boolean isDefaultAuditLogger; 411 private final List<AuditLogger> auditLoggers; 412 413 /** The namespace tree. */ 414 FSDirectory dir; 415 private final BlockManager blockManager; 416 private final SnapshotManager snapshotManager; 417 private final CacheManager cacheManager; 418 private final DatanodeStatistics datanodeStatistics; 419 420 private String nameserviceId; 421 422 private volatile RollingUpgradeInfo rollingUpgradeInfo = null; 423 /** 424 * A flag that indicates whether the checkpointer should checkpoint a rollback 425 * fsimage. The edit log tailer sets this flag. The checkpoint will create a 426 * rollback fsimage if the flag is true, and then change the flag to false. 427 */ 428 private volatile boolean needRollbackFsImage; 429 430 // Block pool ID used by this namenode 431 private String blockPoolId; 432 433 final LeaseManager leaseManager = new LeaseManager(this); 434 435 volatile Daemon smmthread = null; // SafeModeMonitor thread 436 437 Daemon nnrmthread = null; // NamenodeResourceMonitor thread 438 439 Daemon nnEditLogRoller = null; // NameNodeEditLogRoller thread 440 441 // A daemon to periodically clean up corrupt lazyPersist files 442 // from the name space. 443 Daemon lazyPersistFileScrubber = null; 444 /** 445 * When an active namenode will roll its own edit log, in # edits 446 */ 447 private final long editLogRollerThreshold; 448 /** 449 * Check interval of an active namenode's edit log roller thread 450 */ 451 private final int editLogRollerInterval; 452 453 /** 454 * How frequently we scan and unlink corrupt lazyPersist files. 455 * (In seconds) 456 */ 457 private final int lazyPersistFileScrubIntervalSec; 458 459 private volatile boolean hasResourcesAvailable = false; 460 private volatile boolean fsRunning = true; 461 462 /** The start time of the namesystem. */ 463 private final long startTime = now(); 464 465 /** The interval of namenode checking for the disk space availability */ 466 private final long resourceRecheckInterval; 467 468 // The actual resource checker instance. 469 NameNodeResourceChecker nnResourceChecker; 470 471 private final FsServerDefaults serverDefaults; 472 private final boolean supportAppends; 473 private final ReplaceDatanodeOnFailure dtpReplaceDatanodeOnFailure; 474 475 private volatile SafeModeInfo safeMode; // safe mode information 476 477 private final long maxFsObjects; // maximum number of fs objects 478 479 private final long minBlockSize; // minimum block size 480 private final long maxBlocksPerFile; // maximum # of blocks per file 481 482 // precision of access times. 483 private final long accessTimePrecision; 484 485 /** Lock to protect FSNamesystem. */ 486 private final FSNamesystemLock fsLock; 487 488 /** 489 * Checkpoint lock to protect FSNamesystem modification on standby NNs. 490 * Unlike fsLock, it does not affect block updates. On active NNs, this lock 491 * does not provide proper protection, because there are operations that 492 * modify both block and name system state. Even on standby, fsLock is 493 * used when block state changes need to be blocked. 494 */ 495 private final ReentrantLock cpLock; 496 497 /** 498 * Used when this NN is in standby state to read from the shared edit log. 499 */ 500 private EditLogTailer editLogTailer = null; 501 502 /** 503 * Used when this NN is in standby state to perform checkpoints. 504 */ 505 private StandbyCheckpointer standbyCheckpointer; 506 507 /** 508 * Reference to the NN's HAContext object. This is only set once 509 * {@link #startCommonServices(Configuration, HAContext)} is called. 510 */ 511 private HAContext haContext; 512 513 private final boolean haEnabled; 514 515 /** flag indicating whether replication queues have been initialized */ 516 boolean initializedReplQueues = false; 517 518 /** 519 * Whether the namenode is in the middle of starting the active service 520 */ 521 private volatile boolean startingActiveService = false; 522 523 private final RetryCache retryCache; 524 525 private KeyProviderCryptoExtension provider = null; 526 527 private volatile boolean imageLoaded = false; 528 private final Condition cond; 529 530 private final FSImage fsImage; 531 532 private final TopConf topConf; 533 private TopMetrics topMetrics; 534 535 private INodeAttributeProvider inodeAttributeProvider; 536 537 /** 538 * Notify that loading of this FSDirectory is complete, and 539 * it is imageLoaded for use 540 */ 541 void imageLoadComplete() { 542 Preconditions.checkState(!imageLoaded, "FSDirectory already loaded"); 543 setImageLoaded(); 544 } 545 546 void setImageLoaded() { 547 if(imageLoaded) return; 548 writeLock(); 549 try { 550 setImageLoaded(true); 551 dir.markNameCacheInitialized(); 552 cond.signalAll(); 553 } finally { 554 writeUnlock("setImageLoaded"); 555 } 556 } 557 558 //This is for testing purposes only 559 @VisibleForTesting 560 boolean isImageLoaded() { 561 return imageLoaded; 562 } 563 564 // exposed for unit tests 565 protected void setImageLoaded(boolean flag) { 566 imageLoaded = flag; 567 } 568 569 /** 570 * Block until the object is imageLoaded to be used. 571 */ 572 void waitForLoadingFSImage() { 573 if (!imageLoaded) { 574 writeLock(); 575 try { 576 while (!imageLoaded) { 577 try { 578 cond.await(5000, TimeUnit.MILLISECONDS); 579 } catch (InterruptedException ignored) { 580 } 581 } 582 } finally { 583 writeUnlock(); 584 } 585 } 586 } 587 588 /** 589 * Clear all loaded data 590 */ 591 void clear() { 592 dir.reset(); 593 dtSecretManager.reset(); 594 blockIdManager.clear(); 595 leaseManager.removeAllLeases(); 596 snapshotManager.clearSnapshottableDirs(); 597 cacheManager.clear(); 598 setImageLoaded(false); 599 blockManager.clear(); 600 } 601 602 @VisibleForTesting 603 LeaseManager getLeaseManager() { 604 return leaseManager; 605 } 606 607 boolean isHaEnabled() { 608 return haEnabled; 609 } 610 611 /** 612 * Check the supplied configuration for correctness. 613 * @param conf Supplies the configuration to validate. 614 * @throws IOException if the configuration could not be queried. 615 * @throws IllegalArgumentException if the configuration is invalid. 616 */ 617 private static void checkConfiguration(Configuration conf) 618 throws IOException { 619 620 final Collection<URI> namespaceDirs = 621 FSNamesystem.getNamespaceDirs(conf); 622 final Collection<URI> editsDirs = 623 FSNamesystem.getNamespaceEditsDirs(conf); 624 final Collection<URI> requiredEditsDirs = 625 FSNamesystem.getRequiredNamespaceEditsDirs(conf); 626 final Collection<URI> sharedEditsDirs = 627 FSNamesystem.getSharedEditsDirs(conf); 628 629 for (URI u : requiredEditsDirs) { 630 if (u.toString().compareTo( 631 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_DEFAULT) == 0) { 632 continue; 633 } 634 635 // Each required directory must also be in editsDirs or in 636 // sharedEditsDirs. 637 if (!editsDirs.contains(u) && 638 !sharedEditsDirs.contains(u)) { 639 throw new IllegalArgumentException( 640 "Required edits directory " + u.toString() + " not present in " + 641 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY + ". " + 642 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY + "=" + 643 editsDirs.toString() + "; " + 644 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY + "=" + 645 requiredEditsDirs.toString() + ". " + 646 DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY + "=" + 647 sharedEditsDirs.toString() + "."); 648 } 649 } 650 651 if (namespaceDirs.size() == 1) { 652 LOG.warn("Only one image storage directory (" 653 + DFS_NAMENODE_NAME_DIR_KEY + ") configured. Beware of data loss" 654 + " due to lack of redundant storage directories!"); 655 } 656 if (editsDirs.size() == 1) { 657 LOG.warn("Only one namespace edits storage directory (" 658 + DFS_NAMENODE_EDITS_DIR_KEY + ") configured. Beware of data loss" 659 + " due to lack of redundant storage directories!"); 660 } 661 } 662 663 /** 664 * Instantiates an FSNamesystem loaded from the image and edits 665 * directories specified in the passed Configuration. 666 * 667 * @param conf the Configuration which specifies the storage directories 668 * from which to load 669 * @return an FSNamesystem which contains the loaded namespace 670 * @throws IOException if loading fails 671 */ 672 static FSNamesystem loadFromDisk(Configuration conf) throws IOException { 673 674 checkConfiguration(conf); 675 FSImage fsImage = new FSImage(conf, 676 FSNamesystem.getNamespaceDirs(conf), 677 FSNamesystem.getNamespaceEditsDirs(conf)); 678 FSNamesystem namesystem = new FSNamesystem(conf, fsImage, false); 679 StartupOption startOpt = NameNode.getStartupOption(conf); 680 if (startOpt == StartupOption.RECOVER) { 681 namesystem.setSafeMode(SafeModeAction.SAFEMODE_ENTER); 682 } 683 684 long loadStart = monotonicNow(); 685 try { 686 namesystem.loadFSImage(startOpt); 687 } catch (IOException ioe) { 688 LOG.warn("Encountered exception loading fsimage", ioe); 689 fsImage.close(); 690 throw ioe; 691 } 692 long timeTakenToLoadFSImage = monotonicNow() - loadStart; 693 LOG.info("Finished loading FSImage in " + timeTakenToLoadFSImage + " msecs"); 694 NameNodeMetrics nnMetrics = NameNode.getNameNodeMetrics(); 695 if (nnMetrics != null) { 696 nnMetrics.setFsImageLoadTime((int) timeTakenToLoadFSImage); 697 } 698 return namesystem; 699 } 700 701 FSNamesystem(Configuration conf, FSImage fsImage) throws IOException { 702 this(conf, fsImage, false); 703 } 704 705 /** 706 * Create an FSNamesystem associated with the specified image. 707 * 708 * Note that this does not load any data off of disk -- if you would 709 * like that behavior, use {@link #loadFromDisk(Configuration)} 710 * 711 * @param conf configuration 712 * @param fsImage The FSImage to associate with 713 * @param ignoreRetryCache Whether or not should ignore the retry cache setup 714 * step. For Secondary NN this should be set to true. 715 * @throws IOException on bad configuration 716 */ 717 FSNamesystem(Configuration conf, FSImage fsImage, boolean ignoreRetryCache) 718 throws IOException { 719 provider = DFSUtil.createKeyProviderCryptoExtension(conf); 720 if (provider == null) { 721 LOG.info("No KeyProvider found."); 722 } else { 723 LOG.info("Found KeyProvider: " + provider.toString()); 724 } 725 if (conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY, 726 DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT)) { 727 LOG.info("Enabling async auditlog"); 728 enableAsyncAuditLog(); 729 } 730 fsLock = new FSNamesystemLock(conf, detailedLockHoldTimeMetrics); 731 cond = fsLock.newWriteLockCondition(); 732 cpLock = new ReentrantLock(); 733 734 this.fsImage = fsImage; 735 try { 736 resourceRecheckInterval = conf.getLong( 737 DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY, 738 DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT); 739 740 this.blockManager = new BlockManager(this, conf); 741 this.datanodeStatistics = blockManager.getDatanodeManager().getDatanodeStatistics(); 742 this.blockIdManager = new BlockIdManager(blockManager); 743 744 this.fsOwner = UserGroupInformation.getCurrentUser(); 745 this.supergroup = conf.get(DFS_PERMISSIONS_SUPERUSERGROUP_KEY, 746 DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT); 747 this.isPermissionEnabled = conf.getBoolean(DFS_PERMISSIONS_ENABLED_KEY, 748 DFS_PERMISSIONS_ENABLED_DEFAULT); 749 LOG.info("fsOwner = " + fsOwner); 750 LOG.info("supergroup = " + supergroup); 751 LOG.info("isPermissionEnabled = " + isPermissionEnabled); 752 753 // block allocation has to be persisted in HA using a shared edits directory 754 // so that the standby has up-to-date namespace information 755 nameserviceId = DFSUtil.getNamenodeNameServiceId(conf); 756 this.haEnabled = HAUtil.isHAEnabled(conf, nameserviceId); 757 758 // Sanity check the HA-related config. 759 if (nameserviceId != null) { 760 LOG.info("Determined nameservice ID: " + nameserviceId); 761 } 762 LOG.info("HA Enabled: " + haEnabled); 763 if (!haEnabled && HAUtil.usesSharedEditsDir(conf)) { 764 LOG.warn("Configured NNs:\n" + DFSUtil.nnAddressesAsString(conf)); 765 throw new IOException("Invalid configuration: a shared edits dir " + 766 "must not be specified if HA is not enabled."); 767 } 768 769 // Get the checksum type from config 770 String checksumTypeStr = conf.get(DFS_CHECKSUM_TYPE_KEY, DFS_CHECKSUM_TYPE_DEFAULT); 771 DataChecksum.Type checksumType; 772 try { 773 checksumType = DataChecksum.Type.valueOf(checksumTypeStr); 774 } catch (IllegalArgumentException iae) { 775 throw new IOException("Invalid checksum type in " 776 + DFS_CHECKSUM_TYPE_KEY + ": " + checksumTypeStr); 777 } 778 779 this.serverDefaults = new FsServerDefaults( 780 conf.getLongBytes(DFS_BLOCK_SIZE_KEY, DFS_BLOCK_SIZE_DEFAULT), 781 conf.getInt(DFS_BYTES_PER_CHECKSUM_KEY, DFS_BYTES_PER_CHECKSUM_DEFAULT), 782 conf.getInt(DFS_CLIENT_WRITE_PACKET_SIZE_KEY, DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT), 783 (short) conf.getInt(DFS_REPLICATION_KEY, DFS_REPLICATION_DEFAULT), 784 conf.getInt(IO_FILE_BUFFER_SIZE_KEY, IO_FILE_BUFFER_SIZE_DEFAULT), 785 conf.getBoolean(DFS_ENCRYPT_DATA_TRANSFER_KEY, DFS_ENCRYPT_DATA_TRANSFER_DEFAULT), 786 conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT), 787 checksumType); 788 789 this.maxFsObjects = conf.getLong(DFS_NAMENODE_MAX_OBJECTS_KEY, 790 DFS_NAMENODE_MAX_OBJECTS_DEFAULT); 791 792 this.minBlockSize = conf.getLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 793 DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_DEFAULT); 794 this.maxBlocksPerFile = conf.getLong(DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_KEY, 795 DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_DEFAULT); 796 this.accessTimePrecision = conf.getLong(DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 797 DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT); 798 this.supportAppends = conf.getBoolean(DFS_SUPPORT_APPEND_KEY, DFS_SUPPORT_APPEND_DEFAULT); 799 LOG.info("Append Enabled: " + supportAppends); 800 801 this.dtpReplaceDatanodeOnFailure = ReplaceDatanodeOnFailure.get(conf); 802 803 this.standbyShouldCheckpoint = conf.getBoolean( 804 DFS_HA_STANDBY_CHECKPOINTS_KEY, DFS_HA_STANDBY_CHECKPOINTS_DEFAULT); 805 // # edit autoroll threshold is a multiple of the checkpoint threshold 806 this.editLogRollerThreshold = (long) 807 (conf.getFloat( 808 DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD, 809 DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT) * 810 conf.getLong( 811 DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 812 DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT)); 813 this.editLogRollerInterval = conf.getInt( 814 DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS, 815 DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT); 816 817 this.lazyPersistFileScrubIntervalSec = conf.getInt( 818 DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC, 819 DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC_DEFAULT); 820 821 if (this.lazyPersistFileScrubIntervalSec == 0) { 822 throw new IllegalArgumentException( 823 DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC + " must be non-zero."); 824 } 825 826 // For testing purposes, allow the DT secret manager to be started regardless 827 // of whether security is enabled. 828 alwaysUseDelegationTokensForTests = conf.getBoolean( 829 DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, 830 DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT); 831 832 this.dtSecretManager = createDelegationTokenSecretManager(conf); 833 this.dir = new FSDirectory(this, conf); 834 this.snapshotManager = new SnapshotManager(dir); 835 this.cacheManager = new CacheManager(this, conf, blockManager); 836 this.safeMode = new SafeModeInfo(conf); 837 this.topConf = new TopConf(conf); 838 this.auditLoggers = initAuditLoggers(conf); 839 this.isDefaultAuditLogger = auditLoggers.size() == 1 && 840 auditLoggers.get(0) instanceof DefaultAuditLogger; 841 this.retryCache = ignoreRetryCache ? null : initRetryCache(conf); 842 Class<? extends INodeAttributeProvider> klass = conf.getClass( 843 DFS_NAMENODE_INODE_ATTRIBUTES_PROVIDER_KEY, 844 null, INodeAttributeProvider.class); 845 if (klass != null) { 846 inodeAttributeProvider = ReflectionUtils.newInstance(klass, conf); 847 LOG.info("Using INode attribute provider: " + klass.getName()); 848 } 849 } catch(IOException e) { 850 LOG.error(getClass().getSimpleName() + " initialization failed.", e); 851 close(); 852 throw e; 853 } catch (RuntimeException re) { 854 LOG.error(getClass().getSimpleName() + " initialization failed.", re); 855 close(); 856 throw re; 857 } 858 } 859 860 @VisibleForTesting 861 public List<AuditLogger> getAuditLoggers() { 862 return auditLoggers; 863 } 864 865 @VisibleForTesting 866 public RetryCache getRetryCache() { 867 return retryCache; 868 } 869 870 void lockRetryCache() { 871 if (retryCache != null) { 872 retryCache.lock(); 873 } 874 } 875 876 void unlockRetryCache() { 877 if (retryCache != null) { 878 retryCache.unlock(); 879 } 880 } 881 882 /** Whether or not retry cache is enabled */ 883 boolean hasRetryCache() { 884 return retryCache != null; 885 } 886 887 void addCacheEntryWithPayload(byte[] clientId, int callId, Object payload) { 888 if (retryCache != null) { 889 retryCache.addCacheEntryWithPayload(clientId, callId, payload); 890 } 891 } 892 893 void addCacheEntry(byte[] clientId, int callId) { 894 if (retryCache != null) { 895 retryCache.addCacheEntry(clientId, callId); 896 } 897 } 898 899 @VisibleForTesting 900 public KeyProviderCryptoExtension getProvider() { 901 return provider; 902 } 903 904 @VisibleForTesting 905 static RetryCache initRetryCache(Configuration conf) { 906 boolean enable = conf.getBoolean(DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY, 907 DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT); 908 LOG.info("Retry cache on namenode is " + (enable ? "enabled" : "disabled")); 909 if (enable) { 910 float heapPercent = conf.getFloat( 911 DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY, 912 DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT); 913 long entryExpiryMillis = conf.getLong( 914 DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY, 915 DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT); 916 LOG.info("Retry cache will use " + heapPercent 917 + " of total heap and retry cache entry expiry time is " 918 + entryExpiryMillis + " millis"); 919 long entryExpiryNanos = entryExpiryMillis * 1000 * 1000; 920 return new RetryCache("NameNodeRetryCache", heapPercent, 921 entryExpiryNanos); 922 } 923 return null; 924 } 925 926 private List<AuditLogger> initAuditLoggers(Configuration conf) { 927 // Initialize the custom access loggers if configured. 928 Collection<String> alClasses = conf.getStringCollection(DFS_NAMENODE_AUDIT_LOGGERS_KEY); 929 List<AuditLogger> auditLoggers = Lists.newArrayList(); 930 if (alClasses != null && !alClasses.isEmpty()) { 931 for (String className : alClasses) { 932 try { 933 AuditLogger logger; 934 if (DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME.equals(className)) { 935 logger = new DefaultAuditLogger(); 936 } else { 937 logger = (AuditLogger) Class.forName(className).newInstance(); 938 } 939 logger.initialize(conf); 940 auditLoggers.add(logger); 941 } catch (RuntimeException re) { 942 throw re; 943 } catch (Exception e) { 944 throw new RuntimeException(e); 945 } 946 } 947 } 948 949 // Make sure there is at least one logger installed. 950 if (auditLoggers.isEmpty()) { 951 auditLoggers.add(new DefaultAuditLogger()); 952 } 953 954 // Add audit logger to calculate top users 955 if (topConf.isEnabled) { 956 topMetrics = new TopMetrics(conf, topConf.nntopReportingPeriodsMs); 957 if (DefaultMetricsSystem.instance().getSource( 958 TOPMETRICS_METRICS_SOURCE_NAME) == null) { 959 DefaultMetricsSystem.instance().register(TOPMETRICS_METRICS_SOURCE_NAME, 960 "Top N operations by user", topMetrics); 961 } 962 auditLoggers.add(new TopAuditLogger(topMetrics)); 963 } 964 965 return Collections.unmodifiableList(auditLoggers); 966 } 967 968 private void loadFSImage(StartupOption startOpt) throws IOException { 969 final FSImage fsImage = getFSImage(); 970 971 // format before starting up if requested 972 if (startOpt == StartupOption.FORMAT) { 973 974 fsImage.format(this, fsImage.getStorage().determineClusterId());// reuse current id 975 976 startOpt = StartupOption.REGULAR; 977 } 978 boolean success = false; 979 writeLock(); 980 try { 981 // We shouldn't be calling saveNamespace if we've come up in standby state. 982 MetaRecoveryContext recovery = startOpt.createRecoveryContext(); 983 final boolean staleImage 984 = fsImage.recoverTransitionRead(startOpt, this, recovery); 985 if (RollingUpgradeStartupOption.ROLLBACK.matches(startOpt) || 986 RollingUpgradeStartupOption.DOWNGRADE.matches(startOpt)) { 987 rollingUpgradeInfo = null; 988 } 989 final boolean needToSave = staleImage && !haEnabled && !isRollingUpgrade(); 990 LOG.info("Need to save fs image? " + needToSave 991 + " (staleImage=" + staleImage + ", haEnabled=" + haEnabled 992 + ", isRollingUpgrade=" + isRollingUpgrade() + ")"); 993 if (needToSave) { 994 fsImage.saveNamespace(this); 995 } else { 996 updateStorageVersionForRollingUpgrade(fsImage.getLayoutVersion(), 997 startOpt); 998 // No need to save, so mark the phase done. 999 StartupProgress prog = NameNode.getStartupProgress(); 1000 prog.beginPhase(Phase.SAVING_CHECKPOINT); 1001 prog.endPhase(Phase.SAVING_CHECKPOINT); 1002 } 1003 // This will start a new log segment and write to the seen_txid file, so 1004 // we shouldn't do it when coming up in standby state 1005 if (!haEnabled || (haEnabled && startOpt == StartupOption.UPGRADE) 1006 || (haEnabled && startOpt == StartupOption.UPGRADEONLY)) { 1007 fsImage.openEditLogForWrite(); 1008 } 1009 success = true; 1010 } finally { 1011 if (!success) { 1012 fsImage.close(); 1013 } 1014 writeUnlock("loadFSImage"); 1015 } 1016 imageLoadComplete(); 1017 } 1018 1019 private void updateStorageVersionForRollingUpgrade(final long layoutVersion, 1020 StartupOption startOpt) throws IOException { 1021 boolean rollingStarted = RollingUpgradeStartupOption.STARTED 1022 .matches(startOpt) && layoutVersion > HdfsConstants 1023 .NAMENODE_LAYOUT_VERSION; 1024 boolean rollingRollback = RollingUpgradeStartupOption.ROLLBACK 1025 .matches(startOpt); 1026 if (rollingRollback || rollingStarted) { 1027 fsImage.updateStorageVersion(); 1028 } 1029 } 1030 1031 private void startSecretManager() { 1032 if (dtSecretManager != null) { 1033 try { 1034 dtSecretManager.startThreads(); 1035 } catch (IOException e) { 1036 // Inability to start secret manager 1037 // can't be recovered from. 1038 throw new RuntimeException(e); 1039 } 1040 } 1041 } 1042 1043 private void startSecretManagerIfNecessary() { 1044 boolean shouldRun = shouldUseDelegationTokens() && 1045 !isInSafeMode() && getEditLog().isOpenForWrite(); 1046 boolean running = dtSecretManager.isRunning(); 1047 if (shouldRun && !running) { 1048 startSecretManager(); 1049 } 1050 } 1051 1052 private void stopSecretManager() { 1053 if (dtSecretManager != null) { 1054 dtSecretManager.stopThreads(); 1055 } 1056 } 1057 1058 /** 1059 * Start services common to both active and standby states 1060 */ 1061 void startCommonServices(Configuration conf, HAContext haContext) throws IOException { 1062 this.registerMBean(); // register the MBean for the FSNamesystemState 1063 writeLock(); 1064 this.haContext = haContext; 1065 try { 1066 nnResourceChecker = new NameNodeResourceChecker(conf); 1067 checkAvailableResources(); 1068 assert safeMode != null && !isPopulatingReplQueues(); 1069 StartupProgress prog = NameNode.getStartupProgress(); 1070 prog.beginPhase(Phase.SAFEMODE); 1071 prog.setTotal(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS, 1072 getCompleteBlocksTotal()); 1073 setBlockTotal(); 1074 blockManager.activate(conf); 1075 } finally { 1076 writeUnlock("startCommonServices"); 1077 } 1078 1079 registerMXBean(); 1080 DefaultMetricsSystem.instance().register(this); 1081 if (inodeAttributeProvider != null) { 1082 inodeAttributeProvider.start(); 1083 dir.setINodeAttributeProvider(inodeAttributeProvider); 1084 } 1085 snapshotManager.registerMXBean(); 1086 } 1087 1088 /** 1089 * Stop services common to both active and standby states 1090 */ 1091 void stopCommonServices() { 1092 writeLock(); 1093 if (inodeAttributeProvider != null) { 1094 dir.setINodeAttributeProvider(null); 1095 inodeAttributeProvider.stop(); 1096 } 1097 try { 1098 if (blockManager != null) blockManager.close(); 1099 } finally { 1100 writeUnlock("stopCommonServices"); 1101 } 1102 RetryCache.clear(retryCache); 1103 } 1104 1105 /** 1106 * Start services required in active state 1107 * @throws IOException 1108 */ 1109 void startActiveServices() throws IOException { 1110 startingActiveService = true; 1111 LOG.info("Starting services required for active state"); 1112 writeLock(); 1113 try { 1114 FSEditLog editLog = getFSImage().getEditLog(); 1115 1116 if (!editLog.isOpenForWrite()) { 1117 // During startup, we're already open for write during initialization. 1118 editLog.initJournalsForWrite(); 1119 // May need to recover 1120 editLog.recoverUnclosedStreams(); 1121 1122 LOG.info("Catching up to latest edits from old active before " + 1123 "taking over writer role in edits logs"); 1124 editLogTailer.catchupDuringFailover(); 1125 1126 blockManager.setPostponeBlocksFromFuture(false); 1127 blockManager.getDatanodeManager().markAllDatanodesStale(); 1128 blockManager.clearQueues(); 1129 blockManager.processAllPendingDNMessages(); 1130 1131 // Only need to re-process the queue, If not in SafeMode. 1132 if (!isInSafeMode()) { 1133 LOG.info("Reprocessing replication and invalidation queues"); 1134 initializeReplQueues(); 1135 } 1136 1137 if (LOG.isDebugEnabled()) { 1138 LOG.debug("NameNode metadata after re-processing " + 1139 "replication and invalidation queues during failover:\n" + 1140 metaSaveAsString()); 1141 } 1142 1143 long nextTxId = getFSImage().getLastAppliedTxId() + 1; 1144 LOG.info("Will take over writing edit logs at txnid " + 1145 nextTxId); 1146 editLog.setNextTxId(nextTxId); 1147 1148 getFSImage().editLog.openForWrite(); 1149 } 1150 1151 // Enable quota checks. 1152 dir.enableQuotaChecks(); 1153 if (haEnabled) { 1154 // Renew all of the leases before becoming active. 1155 // This is because, while we were in standby mode, 1156 // the leases weren't getting renewed on this NN. 1157 // Give them all a fresh start here. 1158 leaseManager.renewAllLeases(); 1159 } 1160 leaseManager.startMonitor(); 1161 startSecretManagerIfNecessary(); 1162 1163 //ResourceMonitor required only at ActiveNN. See HDFS-2914 1164 this.nnrmthread = new Daemon(new NameNodeResourceMonitor()); 1165 nnrmthread.start(); 1166 1167 nnEditLogRoller = new Daemon(new NameNodeEditLogRoller( 1168 editLogRollerThreshold, editLogRollerInterval)); 1169 nnEditLogRoller.start(); 1170 1171 if (lazyPersistFileScrubIntervalSec > 0) { 1172 lazyPersistFileScrubber = new Daemon(new LazyPersistFileScrubber( 1173 lazyPersistFileScrubIntervalSec)); 1174 lazyPersistFileScrubber.start(); 1175 } 1176 1177 cacheManager.startMonitorThread(); 1178 blockManager.getDatanodeManager().setShouldSendCachingCommands(true); 1179 } finally { 1180 startingActiveService = false; 1181 checkSafeMode(); 1182 writeUnlock("startActiveServices"); 1183 } 1184 } 1185 1186 /** 1187 * Initialize replication queues. 1188 */ 1189 private void initializeReplQueues() { 1190 LOG.info("initializing replication queues"); 1191 blockManager.processMisReplicatedBlocks(); 1192 initializedReplQueues = true; 1193 } 1194 1195 private boolean inActiveState() { 1196 return haContext != null && 1197 haContext.getState().getServiceState() == HAServiceState.ACTIVE; 1198 } 1199 1200 /** 1201 * @return Whether the namenode is transitioning to active state and is in the 1202 * middle of the {@link #startActiveServices()} 1203 */ 1204 public boolean inTransitionToActive() { 1205 return haEnabled && inActiveState() && startingActiveService; 1206 } 1207 1208 private boolean shouldUseDelegationTokens() { 1209 return UserGroupInformation.isSecurityEnabled() || 1210 alwaysUseDelegationTokensForTests; 1211 } 1212 1213 /** 1214 * Stop services required in active state 1215 */ 1216 void stopActiveServices() { 1217 LOG.info("Stopping services started for active state"); 1218 writeLock(); 1219 try { 1220 stopSecretManager(); 1221 leaseManager.stopMonitor(); 1222 if (nnrmthread != null) { 1223 ((NameNodeResourceMonitor) nnrmthread.getRunnable()).stopMonitor(); 1224 nnrmthread.interrupt(); 1225 } 1226 if (nnEditLogRoller != null) { 1227 ((NameNodeEditLogRoller)nnEditLogRoller.getRunnable()).stop(); 1228 nnEditLogRoller.interrupt(); 1229 } 1230 if (lazyPersistFileScrubber != null) { 1231 ((LazyPersistFileScrubber) lazyPersistFileScrubber.getRunnable()).stop(); 1232 lazyPersistFileScrubber.interrupt(); 1233 } 1234 if (dir != null && getFSImage() != null) { 1235 if (getFSImage().editLog != null) { 1236 getFSImage().editLog.close(); 1237 } 1238 // Update the fsimage with the last txid that we wrote 1239 // so that the tailer starts from the right spot. 1240 getFSImage().updateLastAppliedTxIdFromWritten(); 1241 } 1242 if (cacheManager != null) { 1243 cacheManager.stopMonitorThread(); 1244 cacheManager.clearDirectiveStats(); 1245 } 1246 blockManager.getDatanodeManager().clearPendingCachingCommands(); 1247 blockManager.getDatanodeManager().setShouldSendCachingCommands(false); 1248 // Don't want to keep replication queues when not in Active. 1249 blockManager.clearQueues(); 1250 initializedReplQueues = false; 1251 } finally { 1252 writeUnlock("stopActiveServices"); 1253 } 1254 } 1255 1256 /** 1257 * Start services required in standby state 1258 * 1259 * @throws IOException 1260 */ 1261 void startStandbyServices(final Configuration conf) throws IOException { 1262 LOG.info("Starting services required for standby state"); 1263 if (!getFSImage().editLog.isOpenForRead()) { 1264 // During startup, we're already open for read. 1265 getFSImage().editLog.initSharedJournalsForRead(); 1266 } 1267 1268 blockManager.setPostponeBlocksFromFuture(true); 1269 1270 // Disable quota checks while in standby. 1271 dir.disableQuotaChecks(); 1272 editLogTailer = new EditLogTailer(this, conf); 1273 editLogTailer.start(); 1274 if (standbyShouldCheckpoint) { 1275 standbyCheckpointer = new StandbyCheckpointer(conf, this); 1276 standbyCheckpointer.start(); 1277 } 1278 } 1279 1280 /** 1281 * Called when the NN is in Standby state and the editlog tailer tails the 1282 * OP_ROLLING_UPGRADE_START. 1283 */ 1284 void triggerRollbackCheckpoint() { 1285 setNeedRollbackFsImage(true); 1286 if (standbyCheckpointer != null) { 1287 standbyCheckpointer.triggerRollbackCheckpoint(); 1288 } 1289 } 1290 1291 /** 1292 * Called while the NN is in Standby state, but just about to be 1293 * asked to enter Active state. This cancels any checkpoints 1294 * currently being taken. 1295 */ 1296 void prepareToStopStandbyServices() throws ServiceFailedException { 1297 if (standbyCheckpointer != null) { 1298 standbyCheckpointer.cancelAndPreventCheckpoints( 1299 "About to leave standby state"); 1300 } 1301 } 1302 1303 /** Stop services required in standby state */ 1304 void stopStandbyServices() throws IOException { 1305 LOG.info("Stopping services started for standby state"); 1306 if (standbyCheckpointer != null) { 1307 standbyCheckpointer.stop(); 1308 } 1309 if (editLogTailer != null) { 1310 editLogTailer.stop(); 1311 } 1312 if (dir != null && getFSImage() != null && getFSImage().editLog != null) { 1313 getFSImage().editLog.close(); 1314 } 1315 } 1316 1317 @Override 1318 public void checkOperation(OperationCategory op) throws StandbyException { 1319 if (haContext != null) { 1320 // null in some unit tests 1321 haContext.checkOperation(op); 1322 } 1323 } 1324 1325 /** 1326 * @throws RetriableException 1327 * If 1) The NameNode is in SafeMode, 2) HA is enabled, and 3) 1328 * NameNode is in active state 1329 * @throws SafeModeException 1330 * Otherwise if NameNode is in SafeMode. 1331 */ 1332 void checkNameNodeSafeMode(String errorMsg) 1333 throws RetriableException, SafeModeException { 1334 if (isInSafeMode()) { 1335 SafeModeException se = new SafeModeException(errorMsg, safeMode); 1336 if (haEnabled && haContext != null 1337 && haContext.getState().getServiceState() == HAServiceState.ACTIVE 1338 && shouldRetrySafeMode(this.safeMode)) { 1339 throw new RetriableException(se); 1340 } else { 1341 throw se; 1342 } 1343 } 1344 } 1345 1346 boolean isPermissionEnabled() { 1347 return isPermissionEnabled; 1348 } 1349 1350 /** 1351 * We already know that the safemode is on. We will throw a RetriableException 1352 * if the safemode is not manual or caused by low resource. 1353 */ 1354 private boolean shouldRetrySafeMode(SafeModeInfo safeMode) { 1355 if (safeMode == null) { 1356 return false; 1357 } else { 1358 return !safeMode.isManual() && !safeMode.areResourcesLow(); 1359 } 1360 } 1361 1362 public static Collection<URI> getNamespaceDirs(Configuration conf) { 1363 return getStorageDirs(conf, DFS_NAMENODE_NAME_DIR_KEY); 1364 } 1365 1366 /** 1367 * Get all edits dirs which are required. If any shared edits dirs are 1368 * configured, these are also included in the set of required dirs. 1369 * 1370 * @param conf the HDFS configuration. 1371 * @return all required dirs. 1372 */ 1373 public static Collection<URI> getRequiredNamespaceEditsDirs(Configuration conf) { 1374 Set<URI> ret = new HashSet<URI>(); 1375 ret.addAll(getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY)); 1376 ret.addAll(getSharedEditsDirs(conf)); 1377 return ret; 1378 } 1379 1380 private static Collection<URI> getStorageDirs(Configuration conf, 1381 String propertyName) { 1382 Collection<String> dirNames = conf.getTrimmedStringCollection(propertyName); 1383 StartupOption startOpt = NameNode.getStartupOption(conf); 1384 if(startOpt == StartupOption.IMPORT) { 1385 // In case of IMPORT this will get rid of default directories 1386 // but will retain directories specified in hdfs-site.xml 1387 // When importing image from a checkpoint, the name-node can 1388 // start with empty set of storage directories. 1389 Configuration cE = new HdfsConfiguration(false); 1390 cE.addResource("core-default.xml"); 1391 cE.addResource("core-site.xml"); 1392 cE.addResource("hdfs-default.xml"); 1393 Collection<String> dirNames2 = cE.getTrimmedStringCollection(propertyName); 1394 dirNames.removeAll(dirNames2); 1395 if(dirNames.isEmpty()) 1396 LOG.warn("!!! WARNING !!!" + 1397 "\n\tThe NameNode currently runs without persistent storage." + 1398 "\n\tAny changes to the file system meta-data may be lost." + 1399 "\n\tRecommended actions:" + 1400 "\n\t\t- shutdown and restart NameNode with configured \"" 1401 + propertyName + "\" in hdfs-site.xml;" + 1402 "\n\t\t- use Backup Node as a persistent and up-to-date storage " + 1403 "of the file system meta-data."); 1404 } else if (dirNames.isEmpty()) { 1405 dirNames = Collections.singletonList( 1406 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_DEFAULT); 1407 } 1408 return Util.stringCollectionAsURIs(dirNames); 1409 } 1410 1411 /** 1412 * Return an ordered list of edits directories to write to. 1413 * The list is ordered such that all shared edits directories 1414 * are ordered before non-shared directories, and any duplicates 1415 * are removed. The order they are specified in the configuration 1416 * is retained. 1417 * @return Collection of shared edits directories. 1418 * @throws IOException if multiple shared edits directories are configured 1419 */ 1420 public static List<URI> getNamespaceEditsDirs(Configuration conf) 1421 throws IOException { 1422 return getNamespaceEditsDirs(conf, true); 1423 } 1424 1425 public static List<URI> getNamespaceEditsDirs(Configuration conf, 1426 boolean includeShared) 1427 throws IOException { 1428 // Use a LinkedHashSet so that order is maintained while we de-dup 1429 // the entries. 1430 LinkedHashSet<URI> editsDirs = new LinkedHashSet<URI>(); 1431 1432 if (includeShared) { 1433 List<URI> sharedDirs = getSharedEditsDirs(conf); 1434 1435 // Fail until multiple shared edits directories are supported (HDFS-2782) 1436 if (sharedDirs.size() > 1) { 1437 throw new IOException( 1438 "Multiple shared edits directories are not yet supported"); 1439 } 1440 1441 // First add the shared edits dirs. It's critical that the shared dirs 1442 // are added first, since JournalSet syncs them in the order they are listed, 1443 // and we need to make sure all edits are in place in the shared storage 1444 // before they are replicated locally. See HDFS-2874. 1445 for (URI dir : sharedDirs) { 1446 if (!editsDirs.add(dir)) { 1447 LOG.warn("Edits URI " + dir + " listed multiple times in " + 1448 DFS_NAMENODE_SHARED_EDITS_DIR_KEY + ". Ignoring duplicates."); 1449 } 1450 } 1451 } 1452 // Now add the non-shared dirs. 1453 for (URI dir : getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_KEY)) { 1454 if (!editsDirs.add(dir)) { 1455 LOG.warn("Edits URI " + dir + " listed multiple times in " + 1456 DFS_NAMENODE_SHARED_EDITS_DIR_KEY + " and " + 1457 DFS_NAMENODE_EDITS_DIR_KEY + ". Ignoring duplicates."); 1458 } 1459 } 1460 1461 if (editsDirs.isEmpty()) { 1462 // If this is the case, no edit dirs have been explicitly configured. 1463 // Image dirs are to be used for edits too. 1464 return Lists.newArrayList(getNamespaceDirs(conf)); 1465 } else { 1466 return Lists.newArrayList(editsDirs); 1467 } 1468 } 1469 1470 /** 1471 * Returns edit directories that are shared between primary and secondary. 1472 * @param conf configuration 1473 * @return collection of edit directories from {@code conf} 1474 */ 1475 public static List<URI> getSharedEditsDirs(Configuration conf) { 1476 // don't use getStorageDirs here, because we want an empty default 1477 // rather than the dir in /tmp 1478 Collection<String> dirNames = conf.getTrimmedStringCollection( 1479 DFS_NAMENODE_SHARED_EDITS_DIR_KEY); 1480 return Util.stringCollectionAsURIs(dirNames); 1481 } 1482 1483 @Override 1484 public void readLock() { 1485 this.fsLock.readLock(); 1486 } 1487 @Override 1488 public void readLockInterruptibly() throws InterruptedException { 1489 this.fsLock.readLockInterruptibly(); 1490 } 1491 @Override 1492 public void readUnlock() { 1493 this.fsLock.readUnlock(); 1494 } 1495 public void readUnlock(String opName) { 1496 this.fsLock.readUnlock(opName); 1497 } 1498 @Override 1499 public void writeLock() { 1500 this.fsLock.writeLock(); 1501 } 1502 @Override 1503 public void writeLockInterruptibly() throws InterruptedException { 1504 this.fsLock.writeLockInterruptibly(); 1505 } 1506 @Override 1507 public void writeUnlock() { 1508 this.fsLock.writeUnlock(); 1509 } 1510 public void writeUnlock(String opName) { 1511 this.fsLock.writeUnlock(opName); 1512 } 1513 @Override 1514 public boolean hasWriteLock() { 1515 return this.fsLock.isWriteLockedByCurrentThread(); 1516 } 1517 @Override 1518 public boolean hasReadLock() { 1519 return this.fsLock.getReadHoldCount() > 0 || hasWriteLock(); 1520 } 1521 1522 public int getReadHoldCount() { 1523 return this.fsLock.getReadHoldCount(); 1524 } 1525 1526 public int getWriteHoldCount() { 1527 return this.fsLock.getWriteHoldCount(); 1528 } 1529 1530 /** Lock the checkpoint lock */ 1531 public void cpLock() { 1532 this.cpLock.lock(); 1533 } 1534 1535 /** Lock the checkpoint lock interrupibly */ 1536 public void cpLockInterruptibly() throws InterruptedException { 1537 this.cpLock.lockInterruptibly(); 1538 } 1539 1540 /** Unlock the checkpoint lock */ 1541 public void cpUnlock() { 1542 this.cpLock.unlock(); 1543 } 1544 1545 1546 NamespaceInfo getNamespaceInfo() { 1547 readLock(); 1548 try { 1549 return unprotectedGetNamespaceInfo(); 1550 } finally { 1551 readUnlock("getNamespaceInfo"); 1552 } 1553 } 1554 1555 /** 1556 * Version of @see #getNamespaceInfo() that is not protected by a lock. 1557 */ 1558 NamespaceInfo unprotectedGetNamespaceInfo() { 1559 return new NamespaceInfo(getFSImage().getStorage().getNamespaceID(), 1560 getClusterId(), getBlockPoolId(), 1561 getFSImage().getStorage().getCTime()); 1562 } 1563 1564 /** 1565 * Close down this file system manager. 1566 * Causes heartbeat and lease daemons to stop; waits briefly for 1567 * them to finish, but a short timeout returns control back to caller. 1568 */ 1569 void close() { 1570 fsRunning = false; 1571 try { 1572 stopCommonServices(); 1573 if (smmthread != null) smmthread.interrupt(); 1574 } finally { 1575 // using finally to ensure we also wait for lease daemon 1576 try { 1577 stopActiveServices(); 1578 stopStandbyServices(); 1579 } catch (IOException ie) { 1580 } finally { 1581 IOUtils.cleanup(LOG, dir); 1582 IOUtils.cleanup(LOG, fsImage); 1583 } 1584 } 1585 } 1586 1587 @Override 1588 public boolean isRunning() { 1589 return fsRunning; 1590 } 1591 1592 @Override 1593 public boolean isInStandbyState() { 1594 if (haContext == null || haContext.getState() == null) { 1595 // We're still starting up. In this case, if HA is 1596 // on for the cluster, we always start in standby. Otherwise 1597 // start in active. 1598 return haEnabled; 1599 } 1600 1601 return HAServiceState.STANDBY == haContext.getState().getServiceState(); 1602 } 1603 1604 /** 1605 * Dump all metadata into specified file 1606 */ 1607 void metaSave(String filename) throws IOException { 1608 checkSuperuserPrivilege(); 1609 checkOperation(OperationCategory.UNCHECKED); 1610 writeLock(); 1611 try { 1612 checkOperation(OperationCategory.UNCHECKED); 1613 File file = new File(System.getProperty("hadoop.log.dir"), filename); 1614 PrintWriter out = new PrintWriter(new BufferedWriter( 1615 new OutputStreamWriter(new FileOutputStream(file), Charsets.UTF_8))); 1616 metaSave(out); 1617 out.flush(); 1618 out.close(); 1619 } finally { 1620 writeUnlock("metaSave"); 1621 } 1622 } 1623 1624 private void metaSave(PrintWriter out) { 1625 assert hasWriteLock(); 1626 long totalInodes = this.dir.totalInodes(); 1627 long totalBlocks = this.getBlocksTotal(); 1628 out.println(totalInodes + " files and directories, " + totalBlocks 1629 + " blocks = " + (totalInodes + totalBlocks) + " total"); 1630 1631 blockManager.metaSave(out); 1632 } 1633 1634 private String metaSaveAsString() { 1635 StringWriter sw = new StringWriter(); 1636 PrintWriter pw = new PrintWriter(sw); 1637 metaSave(pw); 1638 pw.flush(); 1639 return sw.toString(); 1640 } 1641 1642 FsServerDefaults getServerDefaults() throws StandbyException { 1643 checkOperation(OperationCategory.READ); 1644 return serverDefaults; 1645 } 1646 1647 long getAccessTimePrecision() { 1648 return accessTimePrecision; 1649 } 1650 1651 private boolean isAccessTimeSupported() { 1652 return accessTimePrecision > 0; 1653 } 1654 1655 ///////////////////////////////////////////////////////// 1656 // 1657 // These methods are called by HadoopFS clients 1658 // 1659 ///////////////////////////////////////////////////////// 1660 /** 1661 * Set permissions for an existing file. 1662 * @throws IOException 1663 */ 1664 void setPermission(String src, FsPermission permission) throws IOException { 1665 final String operationName = "setPermission"; 1666 HdfsFileStatus auditStat; 1667 checkOperation(OperationCategory.WRITE); 1668 writeLock(); 1669 try { 1670 checkOperation(OperationCategory.WRITE); 1671 checkNameNodeSafeMode("Cannot set permission for " + src); 1672 auditStat = FSDirAttrOp.setPermission(dir, src, permission); 1673 } catch (AccessControlException e) { 1674 logAuditEvent(false, operationName, src); 1675 throw e; 1676 } finally { 1677 writeUnlock(operationName); 1678 } 1679 getEditLog().logSync(); 1680 logAuditEvent(true, operationName, src, null, auditStat); 1681 } 1682 1683 /** 1684 * Set owner for an existing file. 1685 * @throws IOException 1686 */ 1687 void setOwner(String src, String username, String group) 1688 throws IOException { 1689 final String operationName = "setOwner"; 1690 HdfsFileStatus auditStat; 1691 checkOperation(OperationCategory.WRITE); 1692 writeLock(); 1693 try { 1694 checkOperation(OperationCategory.WRITE); 1695 checkNameNodeSafeMode("Cannot set owner for " + src); 1696 auditStat = FSDirAttrOp.setOwner(dir, src, username, group); 1697 } catch (AccessControlException e) { 1698 logAuditEvent(false, operationName, src); 1699 throw e; 1700 } finally { 1701 writeUnlock(operationName); 1702 } 1703 getEditLog().logSync(); 1704 logAuditEvent(true, operationName, src, null, auditStat); 1705 } 1706 1707 static class GetBlockLocationsResult { 1708 final boolean updateAccessTime; 1709 final LocatedBlocks blocks; 1710 boolean updateAccessTime() { 1711 return updateAccessTime; 1712 } 1713 private GetBlockLocationsResult( 1714 boolean updateAccessTime, LocatedBlocks blocks) { 1715 this.updateAccessTime = updateAccessTime; 1716 this.blocks = blocks; 1717 } 1718 } 1719 1720 /** 1721 * Get block locations within the specified range. 1722 * @see ClientProtocol#getBlockLocations(String, long, long) 1723 */ 1724 LocatedBlocks getBlockLocations(String clientMachine, String srcArg, 1725 long offset, long length) throws IOException { 1726 final String operationName = "open"; 1727 checkOperation(OperationCategory.READ); 1728 GetBlockLocationsResult res = null; 1729 FSPermissionChecker pc = getPermissionChecker(); 1730 readLock(); 1731 try { 1732 checkOperation(OperationCategory.READ); 1733 res = getBlockLocations(pc, srcArg, offset, length, true, true); 1734 } catch (AccessControlException e) { 1735 logAuditEvent(false, operationName, srcArg); 1736 throw e; 1737 } finally { 1738 readUnlock(operationName); 1739 } 1740 1741 logAuditEvent(true, operationName, srcArg); 1742 1743 if (res.updateAccessTime()) { 1744 String src = srcArg; 1745 checkOperation(OperationCategory.WRITE); 1746 writeLock(); 1747 final long now = now(); 1748 try { 1749 checkOperation(OperationCategory.WRITE); 1750 /** 1751 * Resolve the path again and update the atime only when the file 1752 * exists. 1753 * 1754 * XXX: Races can still occur even after resolving the path again. 1755 * For example: 1756 * 1757 * <ul> 1758 * <li>Get the block location for "/a/b"</li> 1759 * <li>Rename "/a/b" to "/c/b"</li> 1760 * <li>The second resolution still points to "/a/b", which is 1761 * wrong.</li> 1762 * </ul> 1763 * 1764 * The behavior is incorrect but consistent with the one before 1765 * HDFS-7463. A better fix is to change the edit log of SetTime to 1766 * use inode id instead of a path. 1767 */ 1768 final INodesInPath iip = dir.resolvePath(pc, src); 1769 src = iip.getPath(); 1770 INode inode = iip.getLastINode(); 1771 boolean updateAccessTime = inode != null && 1772 now > inode.getAccessTime() + getAccessTimePrecision(); 1773 if (!isInSafeMode() && updateAccessTime) { 1774 boolean changed = FSDirAttrOp.setTimes(dir, 1775 inode, -1, now, false, iip.getLatestSnapshotId()); 1776 if (changed) { 1777 getEditLog().logTimes(src, -1, now); 1778 } 1779 } 1780 } catch (Throwable e) { 1781 LOG.warn("Failed to update the access time of " + src, e); 1782 } finally { 1783 writeUnlock(operationName); 1784 } 1785 } 1786 1787 LocatedBlocks blocks = res.blocks; 1788 if (blocks != null) { 1789 blockManager.getDatanodeManager().sortLocatedBlocks( 1790 clientMachine, blocks.getLocatedBlocks()); 1791 1792 // lastBlock is not part of getLocatedBlocks(), might need to sort it too 1793 LocatedBlock lastBlock = blocks.getLastLocatedBlock(); 1794 if (lastBlock != null) { 1795 ArrayList<LocatedBlock> lastBlockList = Lists.newArrayList(lastBlock); 1796 blockManager.getDatanodeManager().sortLocatedBlocks( 1797 clientMachine, lastBlockList); 1798 } 1799 } 1800 return blocks; 1801 } 1802 1803 /** 1804 * Get block locations within the specified range. 1805 * @see ClientProtocol#getBlockLocations(String, long, long) 1806 * @throws IOException 1807 */ 1808 GetBlockLocationsResult getBlockLocations( 1809 FSPermissionChecker pc, String src, long offset, long length, 1810 boolean needBlockToken, boolean checkSafeMode) throws IOException { 1811 if (offset < 0) { 1812 throw new HadoopIllegalArgumentException( 1813 "Negative offset is not supported. File: " + src); 1814 } 1815 if (length < 0) { 1816 throw new HadoopIllegalArgumentException( 1817 "Negative length is not supported. File: " + src); 1818 } 1819 final GetBlockLocationsResult ret = getBlockLocationsInt( 1820 pc, src, offset, length, needBlockToken); 1821 1822 if (checkSafeMode && isInSafeMode()) { 1823 for (LocatedBlock b : ret.blocks.getLocatedBlocks()) { 1824 // if safemode & no block locations yet then throw safemodeException 1825 if ((b.getLocations() == null) || (b.getLocations().length == 0)) { 1826 SafeModeException se = new SafeModeException( 1827 "Zero blocklocations for " + src, safeMode); 1828 if (haEnabled && haContext != null && 1829 haContext.getState().getServiceState() == HAServiceState.ACTIVE) { 1830 throw new RetriableException(se); 1831 } else { 1832 throw se; 1833 } 1834 } 1835 } 1836 } 1837 return ret; 1838 } 1839 1840 private GetBlockLocationsResult getBlockLocationsInt( 1841 FSPermissionChecker pc, final String srcArg, long offset, long length, 1842 boolean needBlockToken) 1843 throws IOException { 1844 String src = srcArg; 1845 final INodesInPath iip = dir.resolvePath(pc, src); 1846 src = iip.getPath(); 1847 final INodeFile inode = INodeFile.valueOf(iip.getLastINode(), src); 1848 if (isPermissionEnabled) { 1849 dir.checkPathAccess(pc, iip, FsAction.READ); 1850 checkUnreadableBySuperuser(pc, inode, iip.getPathSnapshotId()); 1851 } 1852 1853 final long fileSize = iip.isSnapshot() 1854 ? inode.computeFileSize(iip.getPathSnapshotId()) 1855 : inode.computeFileSizeNotIncludingLastUcBlock(); 1856 boolean isUc = inode.isUnderConstruction(); 1857 if (iip.isSnapshot()) { 1858 // if src indicates a snapshot file, we need to make sure the returned 1859 // blocks do not exceed the size of the snapshot file. 1860 length = Math.min(length, fileSize - offset); 1861 isUc = false; 1862 } 1863 1864 final FileEncryptionInfo feInfo = 1865 FSDirectory.isReservedRawName(srcArg) ? null 1866 : dir.getFileEncryptionInfo(inode, iip.getPathSnapshotId(), iip); 1867 1868 final LocatedBlocks blocks = blockManager.createLocatedBlocks( 1869 inode.getBlocks(iip.getPathSnapshotId()), fileSize, 1870 isUc, offset, length, needBlockToken, iip.isSnapshot(), feInfo); 1871 1872 // Set caching information for the located blocks. 1873 for (LocatedBlock lb : blocks.getLocatedBlocks()) { 1874 cacheManager.setCachedLocations(lb); 1875 } 1876 1877 final long now = now(); 1878 boolean updateAccessTime = isAccessTimeSupported() && !isInSafeMode() 1879 && !iip.isSnapshot() 1880 && now > inode.getAccessTime() + getAccessTimePrecision(); 1881 return new GetBlockLocationsResult(updateAccessTime, blocks); 1882 } 1883 1884 /** 1885 * Moves all the blocks from {@code srcs} and appends them to {@code target} 1886 * To avoid rollbacks we will verify validity of ALL of the args 1887 * before we start actual move. 1888 * 1889 * This does not support ".inodes" relative path 1890 * @param target target to concat into 1891 * @param srcs file that will be concatenated 1892 * @throws IOException on error 1893 */ 1894 void concat(String target, String [] srcs, boolean logRetryCache) 1895 throws IOException { 1896 waitForLoadingFSImage(); 1897 final String operationName = "concat"; 1898 HdfsFileStatus stat = null; 1899 boolean success = false; 1900 checkOperation(OperationCategory.WRITE); 1901 writeLock(); 1902 try { 1903 checkOperation(OperationCategory.WRITE); 1904 checkNameNodeSafeMode("Cannot concat " + target); 1905 stat = FSDirConcatOp.concat(dir, target, srcs, logRetryCache); 1906 success = true; 1907 } finally { 1908 writeUnlock(operationName); 1909 if (success) { 1910 getEditLog().logSync(); 1911 } 1912 logAuditEvent(success, operationName, Arrays.toString(srcs), 1913 target, stat); 1914 } 1915 } 1916 1917 /** 1918 * stores the modification and access time for this inode. 1919 * The access time is precise up to an hour. The transaction, if needed, is 1920 * written to the edits log but is not flushed. 1921 */ 1922 void setTimes(String src, long mtime, long atime) throws IOException { 1923 final String operationName = "setTimes"; 1924 HdfsFileStatus auditStat; 1925 checkOperation(OperationCategory.WRITE); 1926 writeLock(); 1927 try { 1928 checkOperation(OperationCategory.WRITE); 1929 checkNameNodeSafeMode("Cannot set times " + src); 1930 auditStat = FSDirAttrOp.setTimes(dir, src, mtime, atime); 1931 } catch (AccessControlException e) { 1932 logAuditEvent(false, operationName, src); 1933 throw e; 1934 } finally { 1935 writeUnlock(operationName); 1936 } 1937 getEditLog().logSync(); 1938 logAuditEvent(true, operationName, src, null, auditStat); 1939 } 1940 1941 /** 1942 * Create a symbolic link. 1943 */ 1944 @SuppressWarnings("deprecation") 1945 void createSymlink(String target, String link, 1946 PermissionStatus dirPerms, boolean createParent, boolean logRetryCache) 1947 throws IOException { 1948 final String operationName = "createSymlink"; 1949 if (!FileSystem.areSymlinksEnabled()) { 1950 throw new UnsupportedOperationException("Symlinks not supported"); 1951 } 1952 HdfsFileStatus auditStat = null; 1953 checkOperation(OperationCategory.WRITE); 1954 writeLock(); 1955 try { 1956 checkOperation(OperationCategory.WRITE); 1957 checkNameNodeSafeMode("Cannot create symlink " + link); 1958 auditStat = FSDirSymlinkOp.createSymlinkInt(this, target, link, dirPerms, 1959 createParent, logRetryCache); 1960 } catch (AccessControlException e) { 1961 logAuditEvent(false, operationName, link, target, null); 1962 throw e; 1963 } finally { 1964 writeUnlock(operationName); 1965 } 1966 getEditLog().logSync(); 1967 logAuditEvent(true, operationName, link, target, auditStat); 1968 } 1969 1970 /** 1971 * Set replication for an existing file. 1972 * 1973 * The NameNode sets new replication and schedules either replication of 1974 * under-replicated data blocks or removal of the excessive block copies 1975 * if the blocks are over-replicated. 1976 * 1977 * @see ClientProtocol#setReplication(String, short) 1978 * @param src file name 1979 * @param replication new replication 1980 * @return true if successful; 1981 * false if file does not exist or is a directory 1982 */ 1983 boolean setReplication(final String src, final short replication) 1984 throws IOException { 1985 final String operationName = "setReplication"; 1986 boolean success = false; 1987 waitForLoadingFSImage(); 1988 checkOperation(OperationCategory.WRITE); 1989 writeLock(); 1990 try { 1991 checkOperation(OperationCategory.WRITE); 1992 checkNameNodeSafeMode("Cannot set replication for " + src); 1993 success = FSDirAttrOp.setReplication(dir, blockManager, src, replication); 1994 } catch (AccessControlException e) { 1995 logAuditEvent(false, operationName, src); 1996 throw e; 1997 } finally { 1998 writeUnlock(operationName); 1999 } 2000 if (success) { 2001 getEditLog().logSync(); 2002 logAuditEvent(true, operationName, src); 2003 } 2004 return success; 2005 } 2006 2007 /** 2008 * Truncate file to a lower length. 2009 * Truncate cannot be reverted / recovered from as it causes data loss. 2010 * Truncation at block boundary is atomic, otherwise it requires 2011 * block recovery to truncate the last block of the file. 2012 * 2013 * @return true if client does not need to wait for block recovery, 2014 * false if client needs to wait for block recovery. 2015 */ 2016 boolean truncate(String src, long newLength, 2017 String clientName, String clientMachine, 2018 long mtime) 2019 throws IOException, UnresolvedLinkException { 2020 boolean ret; 2021 try { 2022 ret = truncateInt(src, newLength, clientName, clientMachine, mtime); 2023 } catch (AccessControlException e) { 2024 logAuditEvent(false, "truncate", src); 2025 throw e; 2026 } 2027 return ret; 2028 } 2029 2030 boolean truncateInt(String srcArg, long newLength, 2031 String clientName, String clientMachine, 2032 long mtime) 2033 throws IOException, UnresolvedLinkException { 2034 final String operationName = "truncate"; 2035 String src = srcArg; 2036 NameNode.stateChangeLog.debug( 2037 "DIR* NameSystem.truncate: src={} newLength={}", src, newLength); 2038 if (newLength < 0) { 2039 throw new HadoopIllegalArgumentException( 2040 "Cannot truncate to a negative file size: " + newLength + "."); 2041 } 2042 HdfsFileStatus stat = null; 2043 FSPermissionChecker pc = getPermissionChecker(); 2044 checkOperation(OperationCategory.WRITE); 2045 boolean res; 2046 writeLock(); 2047 BlocksMapUpdateInfo toRemoveBlocks = new BlocksMapUpdateInfo(); 2048 try { 2049 checkOperation(OperationCategory.WRITE); 2050 checkNameNodeSafeMode("Cannot truncate for " + src); 2051 INodesInPath iip = dir.resolvePath(pc, src); 2052 src = iip.getPath(); 2053 res = truncateInternal(src, newLength, clientName, 2054 clientMachine, mtime, pc, toRemoveBlocks); 2055 stat = dir.getAuditFileInfo(dir.getINodesInPath4Write(src, false)); 2056 } finally { 2057 writeUnlock(operationName); 2058 } 2059 getEditLog().logSync(); 2060 if (!toRemoveBlocks.getToDeleteList().isEmpty()) { 2061 removeBlocks(toRemoveBlocks); 2062 toRemoveBlocks.clear(); 2063 } 2064 logAuditEvent(true, operationName, src, null, stat); 2065 return res; 2066 } 2067 2068 /** 2069 * Truncate a file to a given size 2070 * Update the count at each ancestor directory with quota 2071 */ 2072 boolean truncateInternal(String src, long newLength, 2073 String clientName, String clientMachine, 2074 long mtime, FSPermissionChecker pc, 2075 BlocksMapUpdateInfo toRemoveBlocks) 2076 throws IOException, UnresolvedLinkException { 2077 assert hasWriteLock(); 2078 INodesInPath iip = dir.getINodesInPath4Write(src, true); 2079 if (isPermissionEnabled) { 2080 dir.checkPathAccess(pc, iip, FsAction.WRITE); 2081 } 2082 INodeFile file = INodeFile.valueOf(iip.getLastINode(), src); 2083 final BlockStoragePolicy lpPolicy = 2084 blockManager.getStoragePolicy("LAZY_PERSIST"); 2085 2086 if (lpPolicy != null && 2087 lpPolicy.getId() == file.getStoragePolicyID()) { 2088 throw new UnsupportedOperationException( 2089 "Cannot truncate lazy persist file " + src); 2090 } 2091 2092 // Check if the file is already being truncated with the same length 2093 final BlockInfoContiguous last = file.getLastBlock(); 2094 if (last != null && last.getBlockUCState() == BlockUCState.UNDER_RECOVERY) { 2095 final Block truncateBlock 2096 = ((BlockInfoContiguousUnderConstruction)last).getTruncateBlock(); 2097 if (truncateBlock != null) { 2098 final long truncateLength = file.computeFileSize(false, false) 2099 + truncateBlock.getNumBytes(); 2100 if (newLength == truncateLength) { 2101 return false; 2102 } 2103 } 2104 } 2105 2106 // Opening an existing file for truncate. May need lease recovery. 2107 recoverLeaseInternal(RecoverLeaseOp.TRUNCATE_FILE, 2108 iip, src, clientName, clientMachine, false); 2109 // Truncate length check. 2110 long oldLength = file.computeFileSize(); 2111 if(oldLength == newLength) { 2112 return true; 2113 } 2114 if(oldLength < newLength) { 2115 throw new HadoopIllegalArgumentException( 2116 "Cannot truncate to a larger file size. Current size: " + oldLength + 2117 ", truncate size: " + newLength + "."); 2118 } 2119 // Perform INodeFile truncation. 2120 final QuotaCounts delta = new QuotaCounts.Builder().build(); 2121 boolean onBlockBoundary = dir.truncate(iip, newLength, toRemoveBlocks, 2122 mtime, delta); 2123 Block truncateBlock = null; 2124 if(!onBlockBoundary) { 2125 // Open file for write, but don't log into edits 2126 long lastBlockDelta = file.computeFileSize() - newLength; 2127 assert lastBlockDelta > 0 : "delta is 0 only if on block bounday"; 2128 truncateBlock = prepareFileForTruncate(iip, clientName, clientMachine, 2129 lastBlockDelta, null); 2130 } 2131 2132 // update the quota: use the preferred block size for UC block 2133 dir.writeLock(); 2134 try { 2135 dir.updateCountNoQuotaCheck(iip, iip.length() - 1, delta); 2136 } finally { 2137 dir.writeUnlock(); 2138 } 2139 2140 getEditLog().logTruncate(src, clientName, clientMachine, newLength, mtime, 2141 truncateBlock); 2142 return onBlockBoundary; 2143 } 2144 2145 /** 2146 * Convert current INode to UnderConstruction. 2147 * Recreate lease. 2148 * Create new block for the truncated copy. 2149 * Schedule truncation of the replicas. 2150 * 2151 * @return the returned block will be written to editLog and passed back into 2152 * this method upon loading. 2153 */ 2154 Block prepareFileForTruncate(INodesInPath iip, 2155 String leaseHolder, 2156 String clientMachine, 2157 long lastBlockDelta, 2158 Block newBlock) 2159 throws IOException { 2160 INodeFile file = iip.getLastINode().asFile(); 2161 String src = iip.getPath(); 2162 file.recordModification(iip.getLatestSnapshotId()); 2163 file.toUnderConstruction(leaseHolder, clientMachine); 2164 assert file.isUnderConstruction() : "inode should be under construction."; 2165 leaseManager.addLease( 2166 file.getFileUnderConstructionFeature().getClientName(), src); 2167 boolean shouldRecoverNow = (newBlock == null); 2168 BlockInfoContiguous oldBlock = file.getLastBlock(); 2169 boolean shouldCopyOnTruncate = shouldCopyOnTruncate(file, oldBlock); 2170 if(newBlock == null) { 2171 newBlock = (shouldCopyOnTruncate) ? createNewBlock() : 2172 new Block(oldBlock.getBlockId(), oldBlock.getNumBytes(), 2173 nextGenerationStamp(blockIdManager.isLegacyBlock(oldBlock))); 2174 } 2175 2176 BlockInfoContiguousUnderConstruction truncatedBlockUC; 2177 if(shouldCopyOnTruncate) { 2178 // Add new truncateBlock into blocksMap and 2179 // use oldBlock as a source for copy-on-truncate recovery 2180 truncatedBlockUC = new BlockInfoContiguousUnderConstruction(newBlock, 2181 file.getBlockReplication()); 2182 truncatedBlockUC.setNumBytes(oldBlock.getNumBytes() - lastBlockDelta); 2183 truncatedBlockUC.setTruncateBlock(oldBlock); 2184 file.setLastBlock(truncatedBlockUC, blockManager.getStorages(oldBlock)); 2185 getBlockManager().addBlockCollection(truncatedBlockUC, file); 2186 2187 NameNode.stateChangeLog.debug( 2188 "BLOCK* prepareFileForTruncate: Scheduling copy-on-truncate to new" + 2189 " size {} new block {} old block {}", truncatedBlockUC.getNumBytes(), 2190 newBlock, truncatedBlockUC.getTruncateBlock()); 2191 } else { 2192 // Use new generation stamp for in-place truncate recovery 2193 blockManager.convertLastBlockToUnderConstruction(file, lastBlockDelta); 2194 oldBlock = file.getLastBlock(); 2195 assert !oldBlock.isComplete() : "oldBlock should be under construction"; 2196 truncatedBlockUC = (BlockInfoContiguousUnderConstruction) oldBlock; 2197 truncatedBlockUC.setTruncateBlock(new BlockInfoContiguous(oldBlock, 2198 file.getBlockReplication())); 2199 truncatedBlockUC.getTruncateBlock().setNumBytes( 2200 oldBlock.getNumBytes() - lastBlockDelta); 2201 truncatedBlockUC.getTruncateBlock().setGenerationStamp( 2202 newBlock.getGenerationStamp()); 2203 2204 NameNode.stateChangeLog.debug( 2205 "BLOCK* prepareFileForTruncate: {} Scheduling in-place block " + 2206 "truncate to new size {}", 2207 truncatedBlockUC.getTruncateBlock().getNumBytes(), truncatedBlockUC); 2208 } 2209 if (shouldRecoverNow) { 2210 truncatedBlockUC.initializeBlockRecovery(newBlock.getGenerationStamp(), 2211 true); 2212 } 2213 2214 return newBlock; 2215 } 2216 2217 /** 2218 * Defines if a replica needs to be copied on truncate or 2219 * can be truncated in place. 2220 */ 2221 boolean shouldCopyOnTruncate(INodeFile file, BlockInfoContiguous blk) { 2222 if(!isUpgradeFinalized()) { 2223 return true; 2224 } 2225 if (isRollingUpgrade()) { 2226 return true; 2227 } 2228 return file.isBlockInLatestSnapshot(blk); 2229 } 2230 2231 /** 2232 * Set the storage policy for a file or a directory. 2233 * 2234 * @param src file/directory path 2235 * @param policyName storage policy name 2236 */ 2237 void setStoragePolicy(String src, String policyName) throws IOException { 2238 HdfsFileStatus auditStat; 2239 waitForLoadingFSImage(); 2240 checkOperation(OperationCategory.WRITE); 2241 final String operationName = "setStoragePolicy"; 2242 writeLock(); 2243 try { 2244 checkOperation(OperationCategory.WRITE); 2245 checkNameNodeSafeMode("Cannot set storage policy for " + src); 2246 auditStat = FSDirAttrOp.setStoragePolicy( 2247 dir, blockManager, src, policyName); 2248 } catch (AccessControlException e) { 2249 logAuditEvent(false, operationName, src); 2250 throw e; 2251 } finally { 2252 writeUnlock(operationName); 2253 } 2254 getEditLog().logSync(); 2255 logAuditEvent(true, operationName, src, null, auditStat); 2256 } 2257 2258 /** 2259 * @return All the existing block storage policies 2260 */ 2261 BlockStoragePolicy[] getStoragePolicies() throws IOException { 2262 checkOperation(OperationCategory.READ); 2263 waitForLoadingFSImage(); 2264 readLock(); 2265 try { 2266 checkOperation(OperationCategory.READ); 2267 return FSDirAttrOp.getStoragePolicies(blockManager); 2268 } finally { 2269 readUnlock("getStoragePolicies"); 2270 } 2271 } 2272 2273 long getPreferredBlockSize(String src) throws IOException { 2274 checkOperation(OperationCategory.READ); 2275 readLock(); 2276 try { 2277 checkOperation(OperationCategory.READ); 2278 return FSDirAttrOp.getPreferredBlockSize(dir, src); 2279 } finally { 2280 readUnlock("getPreferredBlockSize"); 2281 } 2282 } 2283 2284 /** 2285 * If the file is within an encryption zone, select the appropriate 2286 * CryptoProtocolVersion from the list provided by the client. Since the 2287 * client may be newer, we need to handle unknown versions. 2288 * 2289 * @param zone EncryptionZone of the file 2290 * @param supportedVersions List of supported protocol versions 2291 * @return chosen protocol version 2292 * @throws IOException 2293 */ 2294 private CryptoProtocolVersion chooseProtocolVersion(EncryptionZone zone, 2295 CryptoProtocolVersion[] supportedVersions) 2296 throws UnknownCryptoProtocolVersionException, UnresolvedLinkException, 2297 SnapshotAccessControlException { 2298 Preconditions.checkNotNull(zone); 2299 Preconditions.checkNotNull(supportedVersions); 2300 // Right now, we only support a single protocol version, 2301 // so simply look for it in the list of provided options 2302 final CryptoProtocolVersion required = zone.getVersion(); 2303 2304 for (CryptoProtocolVersion c : supportedVersions) { 2305 if (c.equals(CryptoProtocolVersion.UNKNOWN)) { 2306 if (LOG.isDebugEnabled()) { 2307 LOG.debug("Ignoring unknown CryptoProtocolVersion provided by " + 2308 "client: " + c.getUnknownValue()); 2309 } 2310 continue; 2311 } 2312 if (c.equals(required)) { 2313 return c; 2314 } 2315 } 2316 throw new UnknownCryptoProtocolVersionException( 2317 "No crypto protocol versions provided by the client are supported." 2318 + " Client provided: " + Arrays.toString(supportedVersions) 2319 + " NameNode supports: " + Arrays.toString(CryptoProtocolVersion 2320 .values())); 2321 } 2322 2323 /** 2324 * Invoke KeyProvider APIs to generate an encrypted data encryption key for an 2325 * encryption zone. Should not be called with any locks held. 2326 * 2327 * @param ezKeyName key name of an encryption zone 2328 * @return New EDEK, or null if ezKeyName is null 2329 * @throws IOException 2330 */ 2331 private EncryptedKeyVersion generateEncryptedDataEncryptionKey(String 2332 ezKeyName) throws IOException { 2333 if (ezKeyName == null) { 2334 return null; 2335 } 2336 EncryptedKeyVersion edek = null; 2337 try { 2338 edek = provider.generateEncryptedKey(ezKeyName); 2339 } catch (GeneralSecurityException e) { 2340 throw new IOException(e); 2341 } 2342 Preconditions.checkNotNull(edek); 2343 return edek; 2344 } 2345 2346 /** 2347 * Create a new file entry in the namespace. 2348 * 2349 * For description of parameters and exceptions thrown see 2350 * {@link ClientProtocol#create}, except it returns valid file status upon 2351 * success 2352 */ 2353 HdfsFileStatus startFile(String src, PermissionStatus permissions, 2354 String holder, String clientMachine, EnumSet<CreateFlag> flag, 2355 boolean createParent, short replication, long blockSize, 2356 CryptoProtocolVersion[] supportedVersions, boolean logRetryCache) 2357 throws AccessControlException, SafeModeException, 2358 FileAlreadyExistsException, UnresolvedLinkException, 2359 FileNotFoundException, ParentNotDirectoryException, IOException { 2360 2361 HdfsFileStatus status = null; 2362 try { 2363 status = startFileInt(src, permissions, holder, clientMachine, flag, 2364 createParent, replication, blockSize, supportedVersions, 2365 logRetryCache); 2366 } catch (AccessControlException e) { 2367 logAuditEvent(false, "create", src); 2368 throw e; 2369 } 2370 return status; 2371 } 2372 2373 private HdfsFileStatus startFileInt(final String srcArg, 2374 PermissionStatus permissions, String holder, String clientMachine, 2375 EnumSet<CreateFlag> flag, boolean createParent, short replication, 2376 long blockSize, CryptoProtocolVersion[] supportedVersions, 2377 boolean logRetryCache) 2378 throws AccessControlException, SafeModeException, 2379 FileAlreadyExistsException, UnresolvedLinkException, 2380 FileNotFoundException, ParentNotDirectoryException, IOException { 2381 String src = srcArg; 2382 final String operationName = "create"; 2383 if (NameNode.stateChangeLog.isDebugEnabled()) { 2384 StringBuilder builder = new StringBuilder(); 2385 builder.append("DIR* NameSystem.startFile: src=" + src 2386 + ", holder=" + holder 2387 + ", clientMachine=" + clientMachine 2388 + ", createParent=" + createParent 2389 + ", replication=" + replication 2390 + ", createFlag=" + flag.toString() 2391 + ", blockSize=" + blockSize); 2392 builder.append(", supportedVersions="); 2393 if (supportedVersions != null) { 2394 builder.append(Arrays.toString(supportedVersions)); 2395 } else { 2396 builder.append("null"); 2397 } 2398 NameNode.stateChangeLog.debug(builder.toString()); 2399 } 2400 if (!DFSUtil.isValidName(src)) { 2401 throw new InvalidPathException(src); 2402 } 2403 blockManager.verifyReplication(src, replication, clientMachine); 2404 2405 boolean skipSync = false; 2406 HdfsFileStatus stat = null; 2407 FSPermissionChecker pc = getPermissionChecker(); 2408 if (blockSize < minBlockSize) { 2409 throw new IOException("Specified block size is less than configured" + 2410 " minimum value (" + DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY 2411 + "): " + blockSize + " < " + minBlockSize); 2412 } 2413 boolean create = flag.contains(CreateFlag.CREATE); 2414 boolean overwrite = flag.contains(CreateFlag.OVERWRITE); 2415 boolean isLazyPersist = flag.contains(CreateFlag.LAZY_PERSIST); 2416 2417 waitForLoadingFSImage(); 2418 2419 /** 2420 * If the file is in an encryption zone, we optimistically create an 2421 * EDEK for the file by calling out to the configured KeyProvider. 2422 * Since this typically involves doing an RPC, we take the readLock 2423 * initially, then drop it to do the RPC. 2424 * 2425 * Since the path can flip-flop between being in an encryption zone and not 2426 * in the meantime, we need to recheck the preconditions when we retake the 2427 * lock to do the create. If the preconditions are not met, we throw a 2428 * special RetryStartFileException to ask the DFSClient to try the create 2429 * again later. 2430 */ 2431 CryptoProtocolVersion protocolVersion = null; 2432 CipherSuite suite = null; 2433 String ezKeyName = null; 2434 EncryptedKeyVersion edek = null; 2435 2436 if (provider != null) { 2437 readLock(); 2438 try { 2439 INodesInPath iip = dir.resolvePathForWrite(pc, src); 2440 src = iip.getPath(); 2441 // Nothing to do if the path is not within an EZ 2442 final EncryptionZone zone = dir.getEZForPath(iip); 2443 if (zone != null) { 2444 protocolVersion = chooseProtocolVersion(zone, supportedVersions); 2445 suite = zone.getSuite(); 2446 ezKeyName = zone.getKeyName(); 2447 2448 Preconditions.checkNotNull(protocolVersion); 2449 Preconditions.checkNotNull(suite); 2450 Preconditions.checkArgument(!suite.equals(CipherSuite.UNKNOWN), 2451 "Chose an UNKNOWN CipherSuite!"); 2452 Preconditions.checkNotNull(ezKeyName); 2453 } 2454 } finally { 2455 readUnlock(operationName); 2456 } 2457 2458 Preconditions.checkState( 2459 (suite == null && ezKeyName == null) || 2460 (suite != null && ezKeyName != null), 2461 "Both suite and ezKeyName should both be null or not null"); 2462 2463 // Generate EDEK if necessary while not holding the lock 2464 edek = generateEncryptedDataEncryptionKey(ezKeyName); 2465 EncryptionFaultInjector.getInstance().startFileAfterGenerateKey(); 2466 } 2467 2468 // Proceed with the create, using the computed cipher suite and 2469 // generated EDEK 2470 BlocksMapUpdateInfo toRemoveBlocks = null; 2471 writeLock(); 2472 try { 2473 checkOperation(OperationCategory.WRITE); 2474 checkNameNodeSafeMode("Cannot create file" + src); 2475 dir.writeLock(); 2476 try { 2477 final INodesInPath iip = dir.resolvePathForWrite(pc, src); 2478 src = iip.getPath(); 2479 toRemoveBlocks = startFileInternal( 2480 pc, iip, permissions, holder, 2481 clientMachine, create, overwrite, 2482 createParent, replication, blockSize, 2483 isLazyPersist, suite, protocolVersion, edek, 2484 logRetryCache); 2485 stat = FSDirStatAndListingOp.getFileInfo( 2486 dir, src, false, FSDirectory.isReservedRawName(srcArg)); 2487 } finally { 2488 dir.writeUnlock(); 2489 } 2490 } catch (StandbyException se) { 2491 skipSync = true; 2492 throw se; 2493 } finally { 2494 writeUnlock(operationName); 2495 // There might be transactions logged while trying to recover the lease. 2496 // They need to be sync'ed even when an exception was thrown. 2497 if (!skipSync) { 2498 getEditLog().logSync(); 2499 if (toRemoveBlocks != null) { 2500 removeBlocks(toRemoveBlocks); 2501 toRemoveBlocks.clear(); 2502 } 2503 } 2504 } 2505 2506 logAuditEvent(true, operationName, srcArg, null, stat); 2507 return stat; 2508 } 2509 2510 /** 2511 * Create a new file or overwrite an existing file<br> 2512 * 2513 * Once the file is create the client then allocates a new block with the next 2514 * call using {@link ClientProtocol#addBlock}. 2515 * <p> 2516 * For description of parameters and exceptions thrown see 2517 * {@link ClientProtocol#create} 2518 */ 2519 private BlocksMapUpdateInfo startFileInternal(FSPermissionChecker pc, 2520 INodesInPath iip, PermissionStatus permissions, String holder, 2521 String clientMachine, boolean create, boolean overwrite, 2522 boolean createParent, short replication, long blockSize, 2523 boolean isLazyPersist, CipherSuite suite, CryptoProtocolVersion version, 2524 EncryptedKeyVersion edek, boolean logRetryEntry) 2525 throws IOException { 2526 assert hasWriteLock(); 2527 // Verify that the destination does not exist as a directory already. 2528 final INode inode = iip.getLastINode(); 2529 final String src = iip.getPath(); 2530 if (inode != null && inode.isDirectory()) { 2531 throw new FileAlreadyExistsException(src + 2532 " already exists as a directory"); 2533 } 2534 2535 final INodeFile myFile = INodeFile.valueOf(inode, src, true); 2536 if (isPermissionEnabled) { 2537 if (overwrite && myFile != null) { 2538 dir.checkPathAccess(pc, iip, FsAction.WRITE); 2539 } 2540 /* 2541 * To overwrite existing file, need to check 'w' permission 2542 * of parent (equals to ancestor in this case) 2543 */ 2544 dir.checkAncestorAccess(pc, iip, FsAction.WRITE); 2545 } 2546 if (!createParent) { 2547 dir.verifyParentDir(iip, src); 2548 } 2549 2550 FileEncryptionInfo feInfo = null; 2551 2552 final EncryptionZone zone = dir.getEZForPath(iip); 2553 if (zone != null) { 2554 // The path is now within an EZ, but we're missing encryption parameters 2555 if (suite == null || edek == null) { 2556 throw new RetryStartFileException(); 2557 } 2558 // Path is within an EZ and we have provided encryption parameters. 2559 // Make sure that the generated EDEK matches the settings of the EZ. 2560 final String ezKeyName = zone.getKeyName(); 2561 if (!ezKeyName.equals(edek.getEncryptionKeyName())) { 2562 throw new RetryStartFileException(); 2563 } 2564 feInfo = new FileEncryptionInfo(suite, version, 2565 edek.getEncryptedKeyVersion().getMaterial(), 2566 edek.getEncryptedKeyIv(), 2567 ezKeyName, edek.getEncryptionKeyVersionName()); 2568 } 2569 2570 try { 2571 BlocksMapUpdateInfo toRemoveBlocks = null; 2572 if (myFile == null) { 2573 if (!create) { 2574 throw new FileNotFoundException("Can't overwrite non-existent " + 2575 src + " for client " + clientMachine); 2576 } 2577 } else { 2578 if (overwrite) { 2579 toRemoveBlocks = new BlocksMapUpdateInfo(); 2580 List<INode> toRemoveINodes = new ChunkedArrayList<INode>(); 2581 long ret = FSDirDeleteOp.delete(dir, iip, toRemoveBlocks, 2582 toRemoveINodes, now()); 2583 if (ret >= 0) { 2584 iip = INodesInPath.replace(iip, iip.length() - 1, null); 2585 FSDirDeleteOp.incrDeletedFileCount(ret); 2586 removeLeasesAndINodes(src, toRemoveINodes, true); 2587 } 2588 } else { 2589 // If lease soft limit time is expired, recover the lease 2590 recoverLeaseInternal(RecoverLeaseOp.CREATE_FILE, 2591 iip, src, holder, clientMachine, false); 2592 throw new FileAlreadyExistsException(src + " for client " + 2593 clientMachine + " already exists"); 2594 } 2595 } 2596 2597 checkFsObjectLimit(); 2598 INodeFile newNode = null; 2599 2600 // Always do an implicit mkdirs for parent directory tree. 2601 Map.Entry<INodesInPath, String> parent = FSDirMkdirOp 2602 .createAncestorDirectories(dir, iip, permissions); 2603 if (parent != null) { 2604 iip = dir.addFile(parent.getKey(), parent.getValue(), permissions, 2605 replication, blockSize, holder, clientMachine); 2606 newNode = iip != null ? iip.getLastINode().asFile() : null; 2607 } 2608 2609 if (newNode == null) { 2610 throw new IOException("Unable to add " + src + " to namespace"); 2611 } 2612 leaseManager.addLease(newNode.getFileUnderConstructionFeature() 2613 .getClientName(), src); 2614 2615 // Set encryption attributes if necessary 2616 if (feInfo != null) { 2617 dir.setFileEncryptionInfo(src, feInfo); 2618 newNode = dir.getInode(newNode.getId()).asFile(); 2619 } 2620 2621 setNewINodeStoragePolicy(newNode, iip, isLazyPersist); 2622 2623 // record file record in log, record new generation stamp 2624 getEditLog().logOpenFile(src, newNode, overwrite, logRetryEntry); 2625 NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: added {}" + 2626 " inode {} holder {}", src, newNode.getId(), holder); 2627 return toRemoveBlocks; 2628 } catch (IOException ie) { 2629 NameNode.stateChangeLog.warn("DIR* NameSystem.startFile: " + src + " " + 2630 ie.getMessage()); 2631 throw ie; 2632 } 2633 } 2634 2635 private void setNewINodeStoragePolicy(INodeFile inode, 2636 INodesInPath iip, 2637 boolean isLazyPersist) 2638 throws IOException { 2639 2640 if (isLazyPersist) { 2641 BlockStoragePolicy lpPolicy = 2642 blockManager.getStoragePolicy("LAZY_PERSIST"); 2643 2644 // Set LAZY_PERSIST storage policy if the flag was passed to 2645 // CreateFile. 2646 if (lpPolicy == null) { 2647 throw new HadoopIllegalArgumentException( 2648 "The LAZY_PERSIST storage policy has been disabled " + 2649 "by the administrator."); 2650 } 2651 inode.setStoragePolicyID(lpPolicy.getId(), 2652 iip.getLatestSnapshotId()); 2653 } else { 2654 BlockStoragePolicy effectivePolicy = 2655 blockManager.getStoragePolicy(inode.getStoragePolicyID()); 2656 2657 if (effectivePolicy != null && 2658 effectivePolicy.isCopyOnCreateFile()) { 2659 // Copy effective policy from ancestor directory to current file. 2660 inode.setStoragePolicyID(effectivePolicy.getId(), 2661 iip.getLatestSnapshotId()); 2662 } 2663 } 2664 } 2665 2666 /** 2667 * Append to an existing file for append. 2668 * <p> 2669 * 2670 * The method returns the last block of the file if this is a partial block, 2671 * which can still be used for writing more data. The client uses the returned 2672 * block locations to form the data pipeline for this block.<br> 2673 * The method returns null if the last block is full. The client then 2674 * allocates a new block with the next call using 2675 * {@link ClientProtocol#addBlock}. 2676 * <p> 2677 * 2678 * For description of parameters and exceptions thrown see 2679 * {@link ClientProtocol#append(String, String, EnumSetWritable)} 2680 * 2681 * @return the last block locations if the block is partial or null otherwise 2682 */ 2683 private LocatedBlock appendFileInternal(FSPermissionChecker pc, 2684 INodesInPath iip, String holder, String clientMachine, boolean newBlock, 2685 boolean logRetryCache) throws IOException { 2686 assert hasWriteLock(); 2687 // Verify that the destination does not exist as a directory already. 2688 final INode inode = iip.getLastINode(); 2689 final String src = iip.getPath(); 2690 if (inode != null && inode.isDirectory()) { 2691 throw new FileAlreadyExistsException("Cannot append to directory " + src 2692 + "; already exists as a directory."); 2693 } 2694 if (isPermissionEnabled) { 2695 dir.checkPathAccess(pc, iip, FsAction.WRITE); 2696 } 2697 2698 try { 2699 if (inode == null) { 2700 throw new FileNotFoundException("failed to append to non-existent file " 2701 + src + " for client " + clientMachine); 2702 } 2703 INodeFile myFile = INodeFile.valueOf(inode, src, true); 2704 final BlockStoragePolicy lpPolicy = 2705 blockManager.getStoragePolicy("LAZY_PERSIST"); 2706 if (lpPolicy != null && 2707 lpPolicy.getId() == myFile.getStoragePolicyID()) { 2708 throw new UnsupportedOperationException( 2709 "Cannot append to lazy persist file " + src); 2710 } 2711 // Opening an existing file for append - may need to recover lease. 2712 recoverLeaseInternal(RecoverLeaseOp.APPEND_FILE, 2713 iip, src, holder, clientMachine, false); 2714 2715 final BlockInfoContiguous lastBlock = myFile.getLastBlock(); 2716 // Check that the block has at least minimum replication. 2717 if(lastBlock != null && lastBlock.isComplete() && 2718 !getBlockManager().isSufficientlyReplicated(lastBlock)) { 2719 throw new IOException("append: lastBlock=" + lastBlock + 2720 " of src=" + src + " is not sufficiently replicated yet."); 2721 } 2722 return prepareFileForAppend(src, iip, holder, clientMachine, newBlock, 2723 true, logRetryCache); 2724 } catch (IOException ie) { 2725 NameNode.stateChangeLog.warn("DIR* NameSystem.append: " +ie.getMessage()); 2726 throw ie; 2727 } 2728 } 2729 2730 /** 2731 * Convert current node to under construction. 2732 * Recreate in-memory lease record. 2733 * 2734 * @param src path to the file 2735 * @param leaseHolder identifier of the lease holder on this file 2736 * @param clientMachine identifier of the client machine 2737 * @param newBlock if the data is appended to a new block 2738 * @param writeToEditLog whether to persist this change to the edit log 2739 * @param logRetryCache whether to record RPC ids in editlog for retry cache 2740 * rebuilding 2741 * @return the last block locations if the block is partial or null otherwise 2742 * @throws UnresolvedLinkException 2743 * @throws IOException 2744 */ 2745 LocatedBlock prepareFileForAppend(String src, INodesInPath iip, 2746 String leaseHolder, String clientMachine, boolean newBlock, 2747 boolean writeToEditLog, boolean logRetryCache) throws IOException { 2748 final INodeFile file = iip.getLastINode().asFile(); 2749 final QuotaCounts delta = verifyQuotaForUCBlock(file, iip); 2750 2751 file.recordModification(iip.getLatestSnapshotId()); 2752 file.toUnderConstruction(leaseHolder, clientMachine); 2753 2754 leaseManager.addLease( 2755 file.getFileUnderConstructionFeature().getClientName(), src); 2756 2757 LocatedBlock ret = null; 2758 if (!newBlock) { 2759 ret = blockManager.convertLastBlockToUnderConstruction(file, 0); 2760 if (ret != null && delta != null) { 2761 Preconditions.checkState(delta.getStorageSpace() >= 0, 2762 "appending to a block with size larger than the preferred block size"); 2763 dir.writeLock(); 2764 try { 2765 dir.updateCountNoQuotaCheck(iip, iip.length() - 1, delta); 2766 } finally { 2767 dir.writeUnlock(); 2768 } 2769 } 2770 } else { 2771 BlockInfoContiguous lastBlock = file.getLastBlock(); 2772 if (lastBlock != null) { 2773 ExtendedBlock blk = new ExtendedBlock(this.getBlockPoolId(), lastBlock); 2774 ret = new LocatedBlock(blk, new DatanodeInfo[0]); 2775 } 2776 } 2777 2778 if (writeToEditLog) { 2779 getEditLog().logAppendFile(src, file, newBlock, logRetryCache); 2780 } 2781 return ret; 2782 } 2783 2784 /** 2785 * Verify quota when using the preferred block size for UC block. This is 2786 * usually used by append and truncate 2787 * @throws QuotaExceededException when violating the storage quota 2788 * @return expected quota usage update. null means no change or no need to 2789 * update quota usage later 2790 */ 2791 private QuotaCounts verifyQuotaForUCBlock(INodeFile file, INodesInPath iip) 2792 throws QuotaExceededException { 2793 if (!isImageLoaded() || dir.shouldSkipQuotaChecks()) { 2794 // Do not check quota if editlog is still being processed 2795 return null; 2796 } 2797 if (file.getLastBlock() != null) { 2798 final QuotaCounts delta = computeQuotaDeltaForUCBlock(file); 2799 dir.readLock(); 2800 try { 2801 FSDirectory.verifyQuota(iip, iip.length() - 1, delta, null); 2802 return delta; 2803 } finally { 2804 dir.readUnlock(); 2805 } 2806 } 2807 return null; 2808 } 2809 2810 /** Compute quota change for converting a complete block to a UC block */ 2811 private QuotaCounts computeQuotaDeltaForUCBlock(INodeFile file) { 2812 final QuotaCounts delta = new QuotaCounts.Builder().build(); 2813 final BlockInfoContiguous lastBlock = file.getLastBlock(); 2814 if (lastBlock != null) { 2815 final long diff = file.getPreferredBlockSize() - lastBlock.getNumBytes(); 2816 final short repl = file.getBlockReplication(); 2817 delta.addStorageSpace(diff * repl); 2818 final BlockStoragePolicy policy = dir.getBlockStoragePolicySuite() 2819 .getPolicy(file.getStoragePolicyID()); 2820 List<StorageType> types = policy.chooseStorageTypes(repl); 2821 for (StorageType t : types) { 2822 if (t.supportTypeQuota()) { 2823 delta.addTypeSpace(t, diff); 2824 } 2825 } 2826 } 2827 return delta; 2828 } 2829 2830 /** 2831 * Recover lease; 2832 * Immediately revoke the lease of the current lease holder and start lease 2833 * recovery so that the file can be forced to be closed. 2834 * 2835 * @param src the path of the file to start lease recovery 2836 * @param holder the lease holder's name 2837 * @param clientMachine the client machine's name 2838 * @return true if the file is already closed or 2839 * if the lease can be released and the file can be closed. 2840 * @throws IOException 2841 */ 2842 boolean recoverLease(String src, String holder, String clientMachine) 2843 throws IOException { 2844 if (!DFSUtil.isValidName(src)) { 2845 throw new IOException("Invalid file name: " + src); 2846 } 2847 2848 boolean skipSync = false; 2849 FSPermissionChecker pc = getPermissionChecker(); 2850 checkOperation(OperationCategory.WRITE); 2851 writeLock(); 2852 try { 2853 checkOperation(OperationCategory.WRITE); 2854 checkNameNodeSafeMode("Cannot recover the lease of " + src); 2855 final INodesInPath iip = dir.resolvePathForWrite(pc, src); 2856 src = iip.getPath(); 2857 final INodeFile inode = INodeFile.valueOf(iip.getLastINode(), src); 2858 if (!inode.isUnderConstruction()) { 2859 return true; 2860 } 2861 if (isPermissionEnabled) { 2862 dir.checkPathAccess(pc, iip, FsAction.WRITE); 2863 } 2864 2865 return recoverLeaseInternal(RecoverLeaseOp.RECOVER_LEASE, 2866 iip, src, holder, clientMachine, true); 2867 } catch (StandbyException se) { 2868 skipSync = true; 2869 throw se; 2870 } finally { 2871 writeUnlock("recoverLease"); 2872 // There might be transactions logged while trying to recover the lease. 2873 // They need to be sync'ed even when an exception was thrown. 2874 if (!skipSync) { 2875 getEditLog().logSync(); 2876 } 2877 } 2878 } 2879 2880 private enum RecoverLeaseOp { 2881 CREATE_FILE, 2882 APPEND_FILE, 2883 TRUNCATE_FILE, 2884 RECOVER_LEASE; 2885 2886 private String getExceptionMessage(String src, String holder, 2887 String clientMachine, String reason) { 2888 return "Failed to " + this + " " + src + " for " + holder + 2889 " on " + clientMachine + " because " + reason; 2890 } 2891 } 2892 2893 boolean recoverLeaseInternal(RecoverLeaseOp op, INodesInPath iip, 2894 String src, String holder, String clientMachine, boolean force) 2895 throws IOException { 2896 assert hasWriteLock(); 2897 INodeFile file = iip.getLastINode().asFile(); 2898 if (file.isUnderConstruction()) { 2899 // 2900 // If the file is under construction , then it must be in our 2901 // leases. Find the appropriate lease record. 2902 // 2903 Lease lease = leaseManager.getLease(holder); 2904 2905 if (!force && lease != null) { 2906 Lease leaseFile = leaseManager.getLeaseByPath(src); 2907 if (leaseFile != null && leaseFile.equals(lease)) { 2908 // We found the lease for this file but the original 2909 // holder is trying to obtain it again. 2910 throw new AlreadyBeingCreatedException( 2911 op.getExceptionMessage(src, holder, clientMachine, 2912 holder + " is already the current lease holder.")); 2913 } 2914 } 2915 // 2916 // Find the original holder. 2917 // 2918 FileUnderConstructionFeature uc = file.getFileUnderConstructionFeature(); 2919 String clientName = uc.getClientName(); 2920 lease = leaseManager.getLease(clientName); 2921 if (lease == null) { 2922 throw new AlreadyBeingCreatedException( 2923 op.getExceptionMessage(src, holder, clientMachine, 2924 "the file is under construction but no leases found.")); 2925 } 2926 if (force) { 2927 // close now: no need to wait for soft lease expiration and 2928 // close only the file src 2929 LOG.info("recoverLease: " + lease + ", src=" + src + 2930 " from client " + clientName); 2931 return internalReleaseLease(lease, src, iip, holder); 2932 } else { 2933 assert lease.getHolder().equals(clientName) : 2934 "Current lease holder " + lease.getHolder() + 2935 " does not match file creator " + clientName; 2936 // 2937 // If the original holder has not renewed in the last SOFTLIMIT 2938 // period, then start lease recovery. 2939 // 2940 if (lease.expiredSoftLimit()) { 2941 LOG.info("startFile: recover " + lease + ", src=" + src + " client " 2942 + clientName); 2943 if (internalReleaseLease(lease, src, iip, null)) { 2944 return true; 2945 } else { 2946 throw new RecoveryInProgressException( 2947 op.getExceptionMessage(src, holder, clientMachine, 2948 "lease recovery is in progress. Try again later.")); 2949 } 2950 } else { 2951 final BlockInfoContiguous lastBlock = file.getLastBlock(); 2952 if (lastBlock != null 2953 && lastBlock.getBlockUCState() == BlockUCState.UNDER_RECOVERY) { 2954 throw new RecoveryInProgressException( 2955 op.getExceptionMessage(src, holder, clientMachine, 2956 "another recovery is in progress by " 2957 + clientName + " on " + uc.getClientMachine())); 2958 } else { 2959 throw new AlreadyBeingCreatedException( 2960 op.getExceptionMessage(src, holder, clientMachine, 2961 "this file lease is currently owned by " 2962 + clientName + " on " + uc.getClientMachine())); 2963 } 2964 } 2965 } 2966 } else { 2967 return true; 2968 } 2969 } 2970 2971 /** 2972 * Append to an existing file in the namespace. 2973 */ 2974 LastBlockWithStatus appendFile(String src, String holder, 2975 String clientMachine, EnumSet<CreateFlag> flag, boolean logRetryCache) 2976 throws IOException { 2977 try { 2978 return appendFileInt(src, holder, clientMachine, 2979 flag.contains(CreateFlag.NEW_BLOCK), logRetryCache); 2980 } catch (AccessControlException e) { 2981 logAuditEvent(false, "append", src); 2982 throw e; 2983 } 2984 } 2985 2986 private LastBlockWithStatus appendFileInt(final String srcArg, String holder, 2987 String clientMachine, boolean newBlock, boolean logRetryCache) 2988 throws IOException { 2989 String src = srcArg; 2990 final String operationName = "append"; 2991 NameNode.stateChangeLog.debug( 2992 "DIR* NameSystem.appendFile: src={}, holder={}, clientMachine={}", 2993 src, holder, clientMachine); 2994 boolean skipSync = false; 2995 if (!supportAppends) { 2996 throw new UnsupportedOperationException( 2997 "Append is not enabled on this NameNode. Use the " + 2998 DFS_SUPPORT_APPEND_KEY + " configuration option to enable it."); 2999 } 3000 3001 LocatedBlock lb = null; 3002 HdfsFileStatus stat = null; 3003 FSPermissionChecker pc = getPermissionChecker(); 3004 writeLock(); 3005 try { 3006 checkOperation(OperationCategory.WRITE); 3007 checkNameNodeSafeMode("Cannot append to file" + src); 3008 final INodesInPath iip = dir.resolvePathForWrite(pc, src); 3009 src = iip.getPath(); 3010 lb = appendFileInternal(pc, iip, holder, clientMachine, newBlock, 3011 logRetryCache); 3012 stat = FSDirStatAndListingOp.getFileInfo(dir, src, false, 3013 FSDirectory.isReservedRawName(srcArg)); 3014 } catch (StandbyException se) { 3015 skipSync = true; 3016 throw se; 3017 } finally { 3018 writeUnlock(operationName); 3019 // There might be transactions logged while trying to recover the lease. 3020 // They need to be sync'ed even when an exception was thrown. 3021 if (!skipSync) { 3022 getEditLog().logSync(); 3023 } 3024 } 3025 if (lb != null) { 3026 NameNode.stateChangeLog.debug( 3027 "DIR* NameSystem.appendFile: file {} for {} at {} block {} block" + 3028 " size {}", src, holder, clientMachine, lb.getBlock(), 3029 lb.getBlock().getNumBytes()); 3030 } 3031 logAuditEvent(true, operationName, srcArg); 3032 return new LastBlockWithStatus(lb, stat); 3033 } 3034 3035 ExtendedBlock getExtendedBlock(Block blk) { 3036 return new ExtendedBlock(blockPoolId, blk); 3037 } 3038 3039 void setBlockPoolId(String bpid) { 3040 blockPoolId = bpid; 3041 blockManager.setBlockPoolId(blockPoolId); 3042 } 3043 3044 /** 3045 * The client would like to obtain an additional block for the indicated 3046 * filename (which is being written-to). Return an array that consists 3047 * of the block, plus a set of machines. The first on this list should 3048 * be where the client writes data. Subsequent items in the list must 3049 * be provided in the connection to the first datanode. 3050 * 3051 * Make sure the previous blocks have been reported by datanodes and 3052 * are replicated. Will return an empty 2-elt array if we want the 3053 * client to "try again later". 3054 */ 3055 LocatedBlock getAdditionalBlock(String src, long fileId, String clientName, 3056 ExtendedBlock previous, Set<Node> excludedNodes, 3057 List<String> favoredNodes) throws IOException { 3058 LocatedBlock[] onRetryBlock = new LocatedBlock[1]; 3059 DatanodeStorageInfo targets[] = getNewBlockTargets(src, fileId, 3060 clientName, previous, excludedNodes, favoredNodes, onRetryBlock); 3061 if (targets == null) { 3062 assert onRetryBlock[0] != null : "Retry block is null"; 3063 // This is a retry. Just return the last block. 3064 return onRetryBlock[0]; 3065 } 3066 LocatedBlock newBlock = storeAllocatedBlock( 3067 src, fileId, clientName, previous, targets); 3068 return newBlock; 3069 } 3070 3071 /** 3072 * Part I of getAdditionalBlock(). 3073 * Analyze the state of the file under read lock to determine if the client 3074 * can add a new block, detect potential retries, lease mismatches, 3075 * and minimal replication of the penultimate block. 3076 * 3077 * Generate target DataNode locations for the new block, 3078 * but do not create the new block yet. 3079 */ 3080 DatanodeStorageInfo[] getNewBlockTargets(String src, long fileId, 3081 String clientName, ExtendedBlock previous, Set<Node> excludedNodes, 3082 List<String> favoredNodes, LocatedBlock[] onRetryBlock) throws IOException { 3083 final long blockSize; 3084 final int replication; 3085 final byte storagePolicyID; 3086 Node clientNode = null; 3087 String clientMachine = null; 3088 3089 NameNode.stateChangeLog.debug("BLOCK* getAdditionalBlock: {} inodeId {}" + 3090 " for {}", src, fileId, clientName); 3091 3092 checkOperation(OperationCategory.READ); 3093 FSPermissionChecker pc = getPermissionChecker(); 3094 readLock(); 3095 try { 3096 checkOperation(OperationCategory.READ); 3097 INodesInPath iip = dir.resolvePath(pc, src, fileId); 3098 src = iip.getPath(); 3099 FileState fileState = analyzeFileState( 3100 iip, fileId, clientName, previous, onRetryBlock); 3101 if (onRetryBlock[0] != null && onRetryBlock[0].getLocations().length > 0) { 3102 // This is a retry. No need to generate new locations. 3103 // Use the last block if it has locations. 3104 return null; 3105 } 3106 3107 final INodeFile pendingFile = fileState.inode; 3108 if (!checkFileProgress(src, pendingFile, false)) { 3109 throw new NotReplicatedYetException("Not replicated yet: " + src); 3110 } 3111 src = fileState.path; 3112 3113 if (pendingFile.getBlocks().length >= maxBlocksPerFile) { 3114 throw new IOException("File has reached the limit on maximum number of" 3115 + " blocks (" + DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_KEY 3116 + "): " + pendingFile.getBlocks().length + " >= " 3117 + maxBlocksPerFile); 3118 } 3119 blockSize = pendingFile.getPreferredBlockSize(); 3120 clientMachine = pendingFile.getFileUnderConstructionFeature() 3121 .getClientMachine(); 3122 clientNode = blockManager.getDatanodeManager().getDatanodeByHost( 3123 clientMachine); 3124 replication = pendingFile.getFileReplication(); 3125 storagePolicyID = pendingFile.getStoragePolicyID(); 3126 } finally { 3127 readUnlock("getNewBlockTargets"); 3128 } 3129 3130 if (clientNode == null) { 3131 clientNode = getClientNode(clientMachine); 3132 } 3133 3134 // choose targets for the new block to be allocated. 3135 return getBlockManager().chooseTarget4NewBlock( 3136 src, replication, clientNode, excludedNodes, blockSize, favoredNodes, 3137 storagePolicyID); 3138 } 3139 3140 /** 3141 * Part II of getAdditionalBlock(). 3142 * Should repeat the same analysis of the file state as in Part 1, 3143 * but under the write lock. 3144 * If the conditions still hold, then allocate a new block with 3145 * the new targets, add it to the INode and to the BlocksMap. 3146 */ 3147 LocatedBlock storeAllocatedBlock(String src, long fileId, String clientName, 3148 ExtendedBlock previous, DatanodeStorageInfo[] targets) throws IOException { 3149 Block newBlock = null; 3150 long offset; 3151 checkOperation(OperationCategory.WRITE); 3152 waitForLoadingFSImage(); 3153 writeLock(); 3154 try { 3155 checkOperation(OperationCategory.WRITE); 3156 // Run the full analysis again, since things could have changed 3157 // while chooseTarget() was executing. 3158 LocatedBlock[] onRetryBlock = new LocatedBlock[1]; 3159 final INodesInPath iip = dir.resolvePath(null, src, fileId); 3160 FileState fileState = 3161 analyzeFileState(iip, fileId, clientName, previous, onRetryBlock); 3162 final INodeFile pendingFile = fileState.inode; 3163 src = fileState.path; 3164 3165 if (onRetryBlock[0] != null) { 3166 if (onRetryBlock[0].getLocations().length > 0) { 3167 // This is a retry. Just return the last block if having locations. 3168 return onRetryBlock[0]; 3169 } else { 3170 // add new chosen targets to already allocated block and return 3171 BlockInfoContiguous lastBlockInFile = pendingFile.getLastBlock(); 3172 ((BlockInfoContiguousUnderConstruction) lastBlockInFile) 3173 .setExpectedLocations(targets); 3174 offset = pendingFile.computeFileSize(); 3175 return makeLocatedBlock(lastBlockInFile, targets, offset); 3176 } 3177 } 3178 3179 // commit the last block and complete it if it has minimum replicas 3180 commitOrCompleteLastBlock(pendingFile, fileState.iip, 3181 ExtendedBlock.getLocalBlock(previous)); 3182 3183 // allocate new block, record block locations in INode. 3184 newBlock = createNewBlock(); 3185 INodesInPath inodesInPath = INodesInPath.fromINode(pendingFile); 3186 saveAllocatedBlock(src, inodesInPath, newBlock, targets); 3187 3188 persistNewBlock(src, pendingFile); 3189 offset = pendingFile.computeFileSize(); 3190 } finally { 3191 writeUnlock("storeAllocatedBlock"); 3192 } 3193 getEditLog().logSync(); 3194 3195 // Return located block 3196 return makeLocatedBlock(newBlock, targets, offset); 3197 } 3198 3199 /* 3200 * Resolve clientmachine address to get a network location path 3201 */ 3202 private Node getClientNode(String clientMachine) { 3203 List<String> hosts = new ArrayList<String>(1); 3204 hosts.add(clientMachine); 3205 List<String> rName = getBlockManager().getDatanodeManager() 3206 .resolveNetworkLocation(hosts); 3207 Node clientNode = null; 3208 if (rName != null) { 3209 // Able to resolve clientMachine mapping. 3210 // Create a temp node to findout the rack local nodes 3211 clientNode = new NodeBase(rName.get(0) + NodeBase.PATH_SEPARATOR_STR 3212 + clientMachine); 3213 } 3214 return clientNode; 3215 } 3216 3217 static class FileState { 3218 public final INodeFile inode; 3219 public final String path; 3220 public final INodesInPath iip; 3221 3222 public FileState(INodeFile inode, String fullPath, INodesInPath iip) { 3223 this.inode = inode; 3224 this.path = fullPath; 3225 this.iip = iip; 3226 } 3227 } 3228 3229 private FileState analyzeFileState( 3230 INodesInPath iip, long fileId, String clientName, 3231 ExtendedBlock previous, LocatedBlock[] onRetryBlock) 3232 throws IOException { 3233 assert hasReadLock(); 3234 String src = iip.getPath(); 3235 checkBlock(previous); 3236 onRetryBlock[0] = null; 3237 checkNameNodeSafeMode("Cannot add block to " + src); 3238 3239 // have we exceeded the configured limit of fs objects. 3240 checkFsObjectLimit(); 3241 3242 Block previousBlock = ExtendedBlock.getLocalBlock(previous); 3243 final INodeFile pendingFile = checkLease(iip, clientName, fileId); 3244 BlockInfoContiguous lastBlockInFile = pendingFile.getLastBlock(); 3245 if (!Block.matchingIdAndGenStamp(previousBlock, lastBlockInFile)) { 3246 // The block that the client claims is the current last block 3247 // doesn't match up with what we think is the last block. There are 3248 // four possibilities: 3249 // 1) This is the first block allocation of an append() pipeline 3250 // which started appending exactly at or exceeding the block boundary. 3251 // In this case, the client isn't passed the previous block, 3252 // so it makes the allocateBlock() call with previous=null. 3253 // We can distinguish this since the last block of the file 3254 // will be exactly a full block. 3255 // 2) This is a retry from a client that missed the response of a 3256 // prior getAdditionalBlock() call, perhaps because of a network 3257 // timeout, or because of an HA failover. In that case, we know 3258 // by the fact that the client is re-issuing the RPC that it 3259 // never began to write to the old block. Hence it is safe to 3260 // to return the existing block. 3261 // 3) This is an entirely bogus request/bug -- we should error out 3262 // rather than potentially appending a new block with an empty 3263 // one in the middle, etc 3264 // 4) This is a retry from a client that timed out while 3265 // the prior getAdditionalBlock() is still being processed, 3266 // currently working on chooseTarget(). 3267 // There are no means to distinguish between the first and 3268 // the second attempts in Part I, because the first one hasn't 3269 // changed the namesystem state yet. 3270 // We run this analysis again in Part II where case 4 is impossible. 3271 3272 BlockInfoContiguous penultimateBlock = pendingFile.getPenultimateBlock(); 3273 if (previous == null && 3274 lastBlockInFile != null && 3275 lastBlockInFile.getNumBytes() >= pendingFile.getPreferredBlockSize() && 3276 lastBlockInFile.isComplete()) { 3277 // Case 1 3278 NameNode.stateChangeLog.debug( 3279 "BLOCK* NameSystem.allocateBlock: handling block allocation" + 3280 " writing to a file with a complete previous block: src={}" + 3281 " lastBlock={}", src, lastBlockInFile); 3282 } else if (Block.matchingIdAndGenStamp(penultimateBlock, previousBlock)) { 3283 if (lastBlockInFile.getNumBytes() != 0) { 3284 throw new IOException( 3285 "Request looked like a retry to allocate block " + 3286 lastBlockInFile + " but it already contains " + 3287 lastBlockInFile.getNumBytes() + " bytes"); 3288 } 3289 3290 // Case 2 3291 // Return the last block. 3292 NameNode.stateChangeLog.info("BLOCK* allocateBlock: " + 3293 "caught retry for allocation of a new block in " + 3294 src + ". Returning previously allocated block " + lastBlockInFile); 3295 long offset = pendingFile.computeFileSize(); 3296 onRetryBlock[0] = makeLocatedBlock(lastBlockInFile, 3297 ((BlockInfoContiguousUnderConstruction)lastBlockInFile).getExpectedStorageLocations(), 3298 offset); 3299 return new FileState(pendingFile, src, iip); 3300 } else { 3301 // Case 3 3302 throw new IOException("Cannot allocate block in " + src + ": " + 3303 "passed 'previous' block " + previous + " does not match actual " + 3304 "last block in file " + lastBlockInFile); 3305 } 3306 } 3307 return new FileState(pendingFile, src, iip); 3308 } 3309 3310 LocatedBlock makeLocatedBlock(Block blk, DatanodeStorageInfo[] locs, 3311 long offset) throws IOException { 3312 LocatedBlock lBlk = new LocatedBlock( 3313 getExtendedBlock(blk), locs, offset, false); 3314 getBlockManager().setBlockToken( 3315 lBlk, BlockTokenSecretManager.AccessMode.WRITE); 3316 return lBlk; 3317 } 3318 3319 /** @see ClientProtocol#getAdditionalDatanode */ 3320 LocatedBlock getAdditionalDatanode(String src, long fileId, 3321 final ExtendedBlock blk, final DatanodeInfo[] existings, 3322 final String[] storageIDs, 3323 final Set<Node> excludes, 3324 final int numAdditionalNodes, final String clientName 3325 ) throws IOException { 3326 //check if the feature is enabled 3327 dtpReplaceDatanodeOnFailure.checkEnabled(); 3328 3329 Node clientnode = null; 3330 String clientMachine; 3331 final long preferredblocksize; 3332 final byte storagePolicyID; 3333 final List<DatanodeStorageInfo> chosen; 3334 checkOperation(OperationCategory.READ); 3335 FSPermissionChecker pc = getPermissionChecker(); 3336 readLock(); 3337 try { 3338 checkOperation(OperationCategory.READ); 3339 //check safe mode 3340 checkNameNodeSafeMode("Cannot add datanode; src=" + src + ", blk=" + blk); 3341 final INodesInPath iip = dir.resolvePath(pc, src, fileId); 3342 src = iip.getPath(); 3343 3344 //check lease 3345 final INodeFile file = checkLease(iip, clientName, fileId); 3346 clientMachine = file.getFileUnderConstructionFeature().getClientMachine(); 3347 clientnode = blockManager.getDatanodeManager().getDatanodeByHost(clientMachine); 3348 preferredblocksize = file.getPreferredBlockSize(); 3349 storagePolicyID = file.getStoragePolicyID(); 3350 3351 //find datanode storages 3352 final DatanodeManager dm = blockManager.getDatanodeManager(); 3353 chosen = Arrays.asList(dm.getDatanodeStorageInfos(existings, storageIDs, 3354 "src=%s, fileId=%d, blk=%s, clientName=%s, clientMachine=%s", 3355 src, fileId, blk, clientName, clientMachine)); 3356 } finally { 3357 readUnlock("getAdditionalDatanode"); 3358 } 3359 3360 if (clientnode == null) { 3361 clientnode = getClientNode(clientMachine); 3362 } 3363 3364 // choose new datanodes. 3365 final DatanodeStorageInfo[] targets = blockManager.chooseTarget4AdditionalDatanode( 3366 src, numAdditionalNodes, clientnode, chosen, 3367 excludes, preferredblocksize, storagePolicyID); 3368 final LocatedBlock lb = new LocatedBlock(blk, targets); 3369 blockManager.setBlockToken(lb, AccessMode.COPY); 3370 return lb; 3371 } 3372 3373 /** 3374 * The client would like to let go of the given block 3375 */ 3376 boolean abandonBlock(ExtendedBlock b, long fileId, String src, String holder) 3377 throws IOException { 3378 NameNode.stateChangeLog.debug( 3379 "BLOCK* NameSystem.abandonBlock: {} of file {}", b, src); 3380 checkOperation(OperationCategory.WRITE); 3381 FSPermissionChecker pc = getPermissionChecker(); 3382 waitForLoadingFSImage(); 3383 writeLock(); 3384 final INodesInPath iip = dir.resolvePath(pc, src, fileId); 3385 src = iip.getPath(); 3386 try { 3387 checkOperation(OperationCategory.WRITE); 3388 checkNameNodeSafeMode("Cannot abandon block " + b + " for file" + src); 3389 final INodeFile file = checkLease(iip, holder, fileId); 3390 3391 // Remove the block from the pending creates list 3392 boolean removed = dir.removeBlock(src, iip, file, 3393 ExtendedBlock.getLocalBlock(b)); 3394 if (!removed) { 3395 return true; 3396 } 3397 NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: {} is " + 3398 "removed from pendingCreates", b); 3399 persistBlocks(src, file, false); 3400 } finally { 3401 writeUnlock("abandonBlock"); 3402 } 3403 getEditLog().logSync(); 3404 3405 return true; 3406 } 3407 3408 private INodeFile checkLease(INodesInPath iip, String holder, long fileId) 3409 throws LeaseExpiredException, FileNotFoundException { 3410 String src = iip.getPath(); 3411 INode inode = iip.getLastINode(); 3412 assert hasReadLock(); 3413 final String ident = src + " (inode " + fileId + ")"; 3414 if (inode == null) { 3415 Lease lease = leaseManager.getLease(holder); 3416 throw new LeaseExpiredException( 3417 "No lease on " + ident + ": File does not exist. " 3418 + (lease != null ? lease.toString() 3419 : "Holder " + holder + " does not have any open files.")); 3420 } 3421 if (!inode.isFile()) { 3422 Lease lease = leaseManager.getLease(holder); 3423 throw new LeaseExpiredException( 3424 "No lease on " + ident + ": INode is not a regular file. " 3425 + (lease != null ? lease.toString() 3426 : "Holder " + holder + " does not have any open files.")); 3427 } 3428 final INodeFile file = inode.asFile(); 3429 if (!file.isUnderConstruction()) { 3430 Lease lease = leaseManager.getLease(holder); 3431 throw new LeaseExpiredException( 3432 "No lease on " + ident + ": File is not open for writing. " 3433 + (lease != null ? lease.toString() 3434 : "Holder " + holder + " does not have any open files.")); 3435 } 3436 // No further modification is allowed on a deleted file. 3437 // A file is considered deleted, if it is not in the inodeMap or is marked 3438 // as deleted in the snapshot feature. 3439 if (isFileDeleted(file)) { 3440 throw new FileNotFoundException(src); 3441 } 3442 String clientName = file.getFileUnderConstructionFeature().getClientName(); 3443 if (holder != null && !clientName.equals(holder)) { 3444 throw new LeaseExpiredException("Lease mismatch on " + ident + 3445 " owned by " + clientName + " but is accessed by " + holder); 3446 } 3447 return file; 3448 } 3449 3450 /** 3451 * Complete in-progress write to the given file. 3452 * @return true if successful, false if the client should continue to retry 3453 * (e.g if not all blocks have reached minimum replication yet) 3454 * @throws IOException on error (eg lease mismatch, file not open, file deleted) 3455 */ 3456 boolean completeFile(final String srcArg, String holder, 3457 ExtendedBlock last, long fileId) 3458 throws SafeModeException, UnresolvedLinkException, IOException { 3459 String src = srcArg; 3460 NameNode.stateChangeLog.debug("DIR* NameSystem.completeFile: {} for {}", 3461 src, holder); 3462 checkBlock(last); 3463 boolean success = false; 3464 checkOperation(OperationCategory.WRITE); 3465 waitForLoadingFSImage(); 3466 writeLock(); 3467 try { 3468 checkOperation(OperationCategory.WRITE); 3469 checkNameNodeSafeMode("Cannot complete file " + src); 3470 success = completeFileInternal(src, holder, 3471 ExtendedBlock.getLocalBlock(last), fileId); 3472 } finally { 3473 writeUnlock("completeFile"); 3474 } 3475 getEditLog().logSync(); 3476 if (success) { 3477 NameNode.stateChangeLog.info("DIR* completeFile: " + srcArg 3478 + " is closed by " + holder); 3479 } 3480 return success; 3481 } 3482 3483 private boolean completeFileInternal(String src, String holder, Block last, 3484 long fileId) throws IOException { 3485 assert hasWriteLock(); 3486 final INodeFile pendingFile; 3487 FSPermissionChecker pc = getPermissionChecker(); 3488 final INodesInPath iip = dir.resolvePath(pc, src, fileId); 3489 src = iip.getPath(); 3490 INode inode = null; 3491 try { 3492 inode = iip.getLastINode(); 3493 pendingFile = checkLease(iip, holder, fileId); 3494 } catch (LeaseExpiredException lee) { 3495 if (inode != null && inode.isFile() && 3496 !inode.asFile().isUnderConstruction()) { 3497 // This could be a retry RPC - i.e the client tried to close 3498 // the file, but missed the RPC response. Thus, it is trying 3499 // again to close the file. If the file still exists and 3500 // the client's view of the last block matches the actual 3501 // last block, then we'll treat it as a successful close. 3502 // See HDFS-3031. 3503 final Block realLastBlock = inode.asFile().getLastBlock(); 3504 if (Block.matchingIdAndGenStamp(last, realLastBlock)) { 3505 NameNode.stateChangeLog.info("DIR* completeFile: " + 3506 "request from " + holder + " to complete inode " + fileId + 3507 "(" + src + ") which is already closed. But, it appears to be " + 3508 "an RPC retry. Returning success"); 3509 return true; 3510 } 3511 } 3512 throw lee; 3513 } 3514 // Check the state of the penultimate block. It should be completed 3515 // before attempting to complete the last one. 3516 if (!checkFileProgress(src, pendingFile, false)) { 3517 return false; 3518 } 3519 3520 // commit the last block and complete it if it has minimum replicas 3521 commitOrCompleteLastBlock(pendingFile, iip, last); 3522 3523 if (!checkFileProgress(src, pendingFile, true)) { 3524 return false; 3525 } 3526 3527 finalizeINodeFileUnderConstruction(src, pendingFile, 3528 Snapshot.CURRENT_STATE_ID); 3529 return true; 3530 } 3531 3532 /** 3533 * Save allocated block at the given pending filename 3534 * 3535 * @param src path to the file 3536 * @param inodesInPath representing each of the components of src. 3537 * The last INode is the INode for {@code src} file. 3538 * @param newBlock newly allocated block to be save 3539 * @param targets target datanodes where replicas of the new block is placed 3540 * @throws QuotaExceededException If addition of block exceeds space quota 3541 */ 3542 BlockInfoContiguous saveAllocatedBlock(String src, INodesInPath inodesInPath, 3543 Block newBlock, DatanodeStorageInfo[] targets) 3544 throws IOException { 3545 assert hasWriteLock(); 3546 BlockInfoContiguous b = dir.addBlock(src, inodesInPath, newBlock, targets); 3547 NameNode.stateChangeLog.info("BLOCK* allocate " + b + " for " + src); 3548 DatanodeStorageInfo.incrementBlocksScheduled(targets); 3549 return b; 3550 } 3551 3552 /** 3553 * Create new block with a unique block id and a new generation stamp. 3554 */ 3555 Block createNewBlock() throws IOException { 3556 assert hasWriteLock(); 3557 Block b = new Block(nextBlockId(), 0, 0); 3558 // Increment the generation stamp for every new block. 3559 b.setGenerationStamp(nextGenerationStamp(false)); 3560 return b; 3561 } 3562 3563 /** 3564 * Check that the indicated file's blocks are present and 3565 * replicated. If not, return false. If checkall is true, then check 3566 * all blocks, otherwise check only penultimate block. 3567 */ 3568 boolean checkFileProgress(String src, INodeFile v, boolean checkall) { 3569 if (checkall) { 3570 // check all blocks of the file. 3571 for (BlockInfoContiguous block: v.getBlocks()) { 3572 if (!isCompleteBlock(src, block, blockManager.minReplication)) { 3573 return false; 3574 } 3575 } 3576 } else { 3577 // check the penultimate block of this file 3578 BlockInfoContiguous b = v.getPenultimateBlock(); 3579 if (b != null 3580 && !isCompleteBlock(src, b, blockManager.minReplication)) { 3581 return false; 3582 } 3583 } 3584 return true; 3585 } 3586 3587 private static boolean isCompleteBlock(String src, BlockInfoContiguous b, int minRepl) { 3588 if (!b.isComplete()) { 3589 final BlockInfoContiguousUnderConstruction uc = (BlockInfoContiguousUnderConstruction)b; 3590 final int numNodes = b.numNodes(); 3591 LOG.info("BLOCK* " + b + " is not COMPLETE (ucState = " 3592 + uc.getBlockUCState() + ", replication# = " + numNodes 3593 + (numNodes < minRepl? " < ": " >= ") 3594 + " minimum = " + minRepl + ") in file " + src); 3595 return false; 3596 } 3597 return true; 3598 } 3599 3600 //////////////////////////////////////////////////////////////// 3601 // Here's how to handle block-copy failure during client write: 3602 // -- As usual, the client's write should result in a streaming 3603 // backup write to a k-machine sequence. 3604 // -- If one of the backup machines fails, no worries. Fail silently. 3605 // -- Before client is allowed to close and finalize file, make sure 3606 // that the blocks are backed up. Namenode may have to issue specific backup 3607 // commands to make up for earlier datanode failures. Once all copies 3608 // are made, edit namespace and return to client. 3609 //////////////////////////////////////////////////////////////// 3610 3611 /** 3612 * Change the indicated filename. 3613 * @deprecated Use {@link #renameTo(String, String, boolean, 3614 * Options.Rename...)} instead. 3615 */ 3616 @Deprecated 3617 boolean renameTo(String src, String dst, boolean logRetryCache) 3618 throws IOException { 3619 final String operationName = "rename"; 3620 waitForLoadingFSImage(); 3621 FSDirRenameOp.RenameOldResult ret = null; 3622 writeLock(); 3623 try { 3624 checkOperation(OperationCategory.WRITE); 3625 checkNameNodeSafeMode("Cannot rename " + src); 3626 ret = FSDirRenameOp.renameToInt(dir, src, dst, logRetryCache); 3627 } catch (AccessControlException e) { 3628 logAuditEvent(false, operationName, src, dst, null); 3629 throw e; 3630 } finally { 3631 writeUnlock(operationName); 3632 } 3633 boolean success = ret != null && ret.success; 3634 if (success) { 3635 getEditLog().logSync(); 3636 } 3637 logAuditEvent(success, "rename", src, dst, 3638 ret == null ? null : ret.auditStat); 3639 return success; 3640 } 3641 3642 void renameTo(final String src, final String dst, 3643 boolean logRetryCache, Options.Rename... options) 3644 throws IOException { 3645 final String operationName = "rename"; 3646 waitForLoadingFSImage(); 3647 Map.Entry<BlocksMapUpdateInfo, HdfsFileStatus> res = null; 3648 writeLock(); 3649 try { 3650 checkOperation(OperationCategory.WRITE); 3651 checkNameNodeSafeMode("Cannot rename " + src); 3652 res = FSDirRenameOp.renameToInt(dir, src, dst, logRetryCache, options); 3653 } catch (AccessControlException e) { 3654 logAuditEvent(false, operationName + " (options=" + 3655 Arrays.toString(options) + ")", src, dst, null); 3656 throw e; 3657 } finally { 3658 writeUnlock(operationName); 3659 } 3660 3661 getEditLog().logSync(); 3662 3663 BlocksMapUpdateInfo collectedBlocks = res.getKey(); 3664 HdfsFileStatus auditStat = res.getValue(); 3665 if (!collectedBlocks.getToDeleteList().isEmpty()) { 3666 removeBlocks(collectedBlocks); 3667 collectedBlocks.clear(); 3668 } 3669 3670 logAuditEvent(true, operationName + " (options=" + 3671 Arrays.toString(options) + ")", src, dst, auditStat); 3672 } 3673 3674 /** 3675 * Remove the indicated file from namespace. 3676 * 3677 * @see ClientProtocol#delete(String, boolean) for detailed description and 3678 * description of exceptions 3679 */ 3680 boolean delete(String src, boolean recursive, boolean logRetryCache) 3681 throws IOException { 3682 waitForLoadingFSImage(); 3683 final String operationName = "delete"; 3684 BlocksMapUpdateInfo toRemovedBlocks = null; 3685 writeLock(); 3686 boolean ret = false; 3687 try { 3688 checkOperation(OperationCategory.WRITE); 3689 checkNameNodeSafeMode("Cannot delete " + src); 3690 toRemovedBlocks = FSDirDeleteOp.delete( 3691 this, src, recursive, logRetryCache); 3692 ret = toRemovedBlocks != null; 3693 } catch (AccessControlException e) { 3694 logAuditEvent(false, operationName, src); 3695 throw e; 3696 } finally { 3697 writeUnlock(operationName); 3698 } 3699 getEditLog().logSync(); 3700 if (toRemovedBlocks != null) { 3701 removeBlocks(toRemovedBlocks); // Incremental deletion of blocks 3702 } 3703 logAuditEvent(true, operationName, src); 3704 return ret; 3705 } 3706 3707 FSPermissionChecker getPermissionChecker() 3708 throws AccessControlException { 3709 return dir.getPermissionChecker(); 3710 } 3711 3712 /** 3713 * From the given list, incrementally remove the blocks from blockManager 3714 * Writelock is dropped and reacquired every BLOCK_DELETION_INCREMENT to 3715 * ensure that other waiters on the lock can get in. See HDFS-2938 3716 * 3717 * @param blocks 3718 * An instance of {@link BlocksMapUpdateInfo} which contains a list 3719 * of blocks that need to be removed from blocksMap 3720 */ 3721 void removeBlocks(BlocksMapUpdateInfo blocks) { 3722 List<Block> toDeleteList = blocks.getToDeleteList(); 3723 Iterator<Block> iter = toDeleteList.iterator(); 3724 while (iter.hasNext()) { 3725 writeLock(); 3726 try { 3727 for (int i = 0; i < BLOCK_DELETION_INCREMENT && iter.hasNext(); i++) { 3728 blockManager.removeBlock(iter.next()); 3729 } 3730 } finally { 3731 writeUnlock("removeBlocks"); 3732 } 3733 } 3734 } 3735 3736 /** 3737 * Remove leases and inodes related to a given path 3738 * @param src The given path 3739 * @param removedINodes Containing the list of inodes to be removed from 3740 * inodesMap 3741 * @param acquireINodeMapLock Whether to acquire the lock for inode removal 3742 */ 3743 void removeLeasesAndINodes(String src, List<INode> removedINodes, 3744 final boolean acquireINodeMapLock) { 3745 assert hasWriteLock(); 3746 leaseManager.removeLeaseWithPrefixPath(src); 3747 // remove inodes from inodesMap 3748 if (removedINodes != null) { 3749 if (acquireINodeMapLock) { 3750 dir.writeLock(); 3751 } 3752 try { 3753 dir.removeFromInodeMap(removedINodes); 3754 } finally { 3755 if (acquireINodeMapLock) { 3756 dir.writeUnlock(); 3757 } 3758 } 3759 removedINodes.clear(); 3760 } 3761 } 3762 3763 /** 3764 * Removes the blocks from blocksmap and updates the safemode blocks total 3765 * 3766 * @param blocks 3767 * An instance of {@link BlocksMapUpdateInfo} which contains a list 3768 * of blocks that need to be removed from blocksMap 3769 */ 3770 void removeBlocksAndUpdateSafemodeTotal(BlocksMapUpdateInfo blocks) { 3771 assert hasWriteLock(); 3772 // In the case that we are a Standby tailing edits from the 3773 // active while in safe-mode, we need to track the total number 3774 // of blocks and safe blocks in the system. 3775 boolean trackBlockCounts = isSafeModeTrackingBlocks(); 3776 int numRemovedComplete = 0, numRemovedSafe = 0; 3777 3778 for (Block b : blocks.getToDeleteList()) { 3779 if (trackBlockCounts) { 3780 BlockInfoContiguous bi = getStoredBlock(b); 3781 if (bi.isComplete()) { 3782 numRemovedComplete++; 3783 if (bi.numNodes() >= blockManager.minReplication) { 3784 numRemovedSafe++; 3785 } 3786 } 3787 } 3788 blockManager.removeBlock(b); 3789 } 3790 if (trackBlockCounts) { 3791 if (LOG.isDebugEnabled()) { 3792 LOG.debug("Adjusting safe-mode totals for deletion." 3793 + "decreasing safeBlocks by " + numRemovedSafe 3794 + ", totalBlocks by " + numRemovedComplete); 3795 } 3796 adjustSafeModeBlockTotals(-numRemovedSafe, -numRemovedComplete); 3797 } 3798 } 3799 3800 /** 3801 * @see SafeModeInfo#shouldIncrementallyTrackBlocks 3802 */ 3803 private boolean isSafeModeTrackingBlocks() { 3804 if (!haEnabled) { 3805 // Never track blocks incrementally in non-HA code. 3806 return false; 3807 } 3808 SafeModeInfo sm = this.safeMode; 3809 return sm != null && sm.shouldIncrementallyTrackBlocks(); 3810 } 3811 3812 /** 3813 * Get the file info for a specific file. 3814 * 3815 * @param src The string representation of the path to the file 3816 * @param resolveLink whether to throw UnresolvedLinkException 3817 * if src refers to a symlink 3818 * 3819 * @throws AccessControlException if access is denied 3820 * @throws UnresolvedLinkException if a symlink is encountered. 3821 * 3822 * @return object containing information regarding the file 3823 * or null if file not found 3824 * @throws StandbyException 3825 */ 3826 HdfsFileStatus getFileInfo(final String src, boolean resolveLink) 3827 throws IOException { 3828 final String operationName = "getfileinfo"; 3829 checkOperation(OperationCategory.READ); 3830 HdfsFileStatus stat = null; 3831 readLock(); 3832 try { 3833 checkOperation(OperationCategory.READ); 3834 stat = FSDirStatAndListingOp.getFileInfo(dir, src, resolveLink); 3835 } catch (AccessControlException e) { 3836 logAuditEvent(false, operationName, src); 3837 throw e; 3838 } finally { 3839 readUnlock(operationName); 3840 } 3841 logAuditEvent(true, operationName, src); 3842 return stat; 3843 } 3844 3845 /** 3846 * Returns true if the file is closed 3847 */ 3848 boolean isFileClosed(final String src) throws IOException { 3849 final String operationName = "isFileClosed"; 3850 checkOperation(OperationCategory.READ); 3851 readLock(); 3852 try { 3853 checkOperation(OperationCategory.READ); 3854 return FSDirStatAndListingOp.isFileClosed(dir, src); 3855 } catch (AccessControlException e) { 3856 logAuditEvent(false, operationName, src); 3857 throw e; 3858 } finally { 3859 readUnlock(operationName); 3860 } 3861 } 3862 3863 /** 3864 * Create all the necessary directories 3865 */ 3866 boolean mkdirs(String src, PermissionStatus permissions, 3867 boolean createParent) throws IOException { 3868 final String operationName = "mkdirs"; 3869 HdfsFileStatus auditStat = null; 3870 checkOperation(OperationCategory.WRITE); 3871 writeLock(); 3872 try { 3873 checkOperation(OperationCategory.WRITE); 3874 checkNameNodeSafeMode("Cannot create directory " + src); 3875 auditStat = FSDirMkdirOp.mkdirs(this, src, permissions, createParent); 3876 } catch (AccessControlException e) { 3877 logAuditEvent(false, operationName, src); 3878 throw e; 3879 } finally { 3880 writeUnlock(operationName); 3881 } 3882 getEditLog().logSync(); 3883 logAuditEvent(true, operationName, src, null, auditStat); 3884 return true; 3885 } 3886 3887 /** 3888 * Get the content summary for a specific file/dir. 3889 * 3890 * @param src The string representation of the path to the file 3891 * 3892 * @throws AccessControlException if access is denied 3893 * @throws UnresolvedLinkException if a symlink is encountered. 3894 * @throws FileNotFoundException if no file exists 3895 * @throws StandbyException 3896 * @throws IOException for issues with writing to the audit log 3897 * 3898 * @return object containing information regarding the file 3899 * or null if file not found 3900 */ 3901 ContentSummary getContentSummary(final String src) throws IOException { 3902 checkOperation(OperationCategory.READ); 3903 final String operationName = "contentSummary"; 3904 readLock(); 3905 boolean success = true; 3906 try { 3907 checkOperation(OperationCategory.READ); 3908 return FSDirStatAndListingOp.getContentSummary(dir, src); 3909 } catch (AccessControlException ace) { 3910 success = false; 3911 throw ace; 3912 } finally { 3913 readUnlock(operationName); 3914 logAuditEvent(success, operationName, src); 3915 } 3916 } 3917 3918 /** 3919 * Set the namespace quota and storage space quota for a directory. 3920 * See {@link ClientProtocol#setQuota(String, long, long, StorageType)} for the 3921 * contract. 3922 * 3923 * Note: This does not support ".inodes" relative path. 3924 */ 3925 void setQuota(String src, long nsQuota, long ssQuota, StorageType type) 3926 throws IOException { 3927 checkOperation(OperationCategory.WRITE); 3928 final String operationName = "setQuota"; 3929 writeLock(); 3930 boolean success = false; 3931 try { 3932 checkOperation(OperationCategory.WRITE); 3933 checkNameNodeSafeMode("Cannot set quota on " + src); 3934 FSDirAttrOp.setQuota(dir, src, nsQuota, ssQuota, type); 3935 success = true; 3936 } finally { 3937 writeUnlock(operationName); 3938 if (success) { 3939 getEditLog().logSync(); 3940 } 3941 logAuditEvent(success, operationName, src); 3942 } 3943 } 3944 3945 /** Persist all metadata about this file. 3946 * @param src The string representation of the path 3947 * @param fileId The inode ID that we're fsyncing. Older clients will pass 3948 * INodeId.GRANDFATHER_INODE_ID here. 3949 * @param clientName The string representation of the client 3950 * @param lastBlockLength The length of the last block 3951 * under construction reported from client. 3952 * @throws IOException if path does not exist 3953 */ 3954 void fsync(String src, long fileId, String clientName, long lastBlockLength) 3955 throws IOException { 3956 NameNode.stateChangeLog.info("BLOCK* fsync: " + src + " for " + clientName); 3957 checkOperation(OperationCategory.WRITE); 3958 3959 FSPermissionChecker pc = getPermissionChecker(); 3960 waitForLoadingFSImage(); 3961 writeLock(); 3962 try { 3963 checkOperation(OperationCategory.WRITE); 3964 checkNameNodeSafeMode("Cannot fsync file " + src); 3965 INodesInPath iip = dir.resolvePath(pc, src, fileId); 3966 src = iip.getPath(); 3967 final INodeFile pendingFile = checkLease(iip, clientName, fileId); 3968 if (lastBlockLength > 0) { 3969 pendingFile.getFileUnderConstructionFeature().updateLengthOfLastBlock( 3970 pendingFile, lastBlockLength); 3971 } 3972 persistBlocks(src, pendingFile, false); 3973 } finally { 3974 writeUnlock("fsync"); 3975 } 3976 getEditLog().logSync(); 3977 } 3978 3979 /** 3980 * Move a file that is being written to be immutable. 3981 * @param src The filename 3982 * @param lease The lease for the client creating the file 3983 * @param recoveryLeaseHolder reassign lease to this holder if the last block 3984 * needs recovery; keep current holder if null. 3985 * @throws AlreadyBeingCreatedException if file is waiting to achieve minimal 3986 * replication;<br> 3987 * RecoveryInProgressException if lease recovery is in progress.<br> 3988 * IOException in case of an error. 3989 * @return true if file has been successfully finalized and closed or 3990 * false if block recovery has been initiated. Since the lease owner 3991 * has been changed and logged, caller should call logSync(). 3992 */ 3993 boolean internalReleaseLease(Lease lease, String src, INodesInPath iip, 3994 String recoveryLeaseHolder) throws IOException { 3995 LOG.info("Recovering " + lease + ", src=" + src); 3996 assert !isInSafeMode(); 3997 assert hasWriteLock(); 3998 3999 final INodeFile pendingFile = iip.getLastINode().asFile(); 4000 int nrBlocks = pendingFile.numBlocks(); 4001 BlockInfoContiguous[] blocks = pendingFile.getBlocks(); 4002 4003 int nrCompleteBlocks; 4004 BlockInfoContiguous curBlock = null; 4005 for(nrCompleteBlocks = 0; nrCompleteBlocks < nrBlocks; nrCompleteBlocks++) { 4006 curBlock = blocks[nrCompleteBlocks]; 4007 if(!curBlock.isComplete()) 4008 break; 4009 assert blockManager.checkMinReplication(curBlock) : 4010 "A COMPLETE block is not minimally replicated in " + src; 4011 } 4012 4013 // If there are no incomplete blocks associated with this file, 4014 // then reap lease immediately and close the file. 4015 if(nrCompleteBlocks == nrBlocks) { 4016 finalizeINodeFileUnderConstruction(src, pendingFile, 4017 iip.getLatestSnapshotId()); 4018 NameNode.stateChangeLog.warn("BLOCK*" 4019 + " internalReleaseLease: All existing blocks are COMPLETE," 4020 + " lease removed, file closed."); 4021 return true; // closed! 4022 } 4023 4024 // Only the last and the penultimate blocks may be in non COMPLETE state. 4025 // If the penultimate block is not COMPLETE, then it must be COMMITTED. 4026 if(nrCompleteBlocks < nrBlocks - 2 || 4027 nrCompleteBlocks == nrBlocks - 2 && 4028 curBlock != null && 4029 curBlock.getBlockUCState() != BlockUCState.COMMITTED) { 4030 final String message = "DIR* NameSystem.internalReleaseLease: " 4031 + "attempt to release a create lock on " 4032 + src + " but file is already closed."; 4033 NameNode.stateChangeLog.warn(message); 4034 throw new IOException(message); 4035 } 4036 4037 // The last block is not COMPLETE, and 4038 // that the penultimate block if exists is either COMPLETE or COMMITTED 4039 final BlockInfoContiguous lastBlock = pendingFile.getLastBlock(); 4040 BlockUCState lastBlockState = lastBlock.getBlockUCState(); 4041 BlockInfoContiguous penultimateBlock = pendingFile.getPenultimateBlock(); 4042 4043 // If penultimate block doesn't exist then its minReplication is met 4044 boolean penultimateBlockMinReplication = penultimateBlock == null ? true : 4045 blockManager.checkMinReplication(penultimateBlock); 4046 4047 switch(lastBlockState) { 4048 case COMPLETE: 4049 assert false : "Already checked that the last block is incomplete"; 4050 break; 4051 case COMMITTED: 4052 // Close file if committed blocks are minimally replicated 4053 if(penultimateBlockMinReplication && 4054 blockManager.checkMinReplication(lastBlock)) { 4055 finalizeINodeFileUnderConstruction(src, pendingFile, 4056 iip.getLatestSnapshotId()); 4057 NameNode.stateChangeLog.warn("BLOCK*" 4058 + " internalReleaseLease: Committed blocks are minimally replicated," 4059 + " lease removed, file closed."); 4060 return true; // closed! 4061 } 4062 // Cannot close file right now, since some blocks 4063 // are not yet minimally replicated. 4064 // This may potentially cause infinite loop in lease recovery 4065 // if there are no valid replicas on data-nodes. 4066 String message = "DIR* NameSystem.internalReleaseLease: " + 4067 "Failed to release lease for file " + src + 4068 ". Committed blocks are waiting to be minimally replicated." + 4069 " Try again later."; 4070 NameNode.stateChangeLog.warn(message); 4071 throw new AlreadyBeingCreatedException(message); 4072 case UNDER_CONSTRUCTION: 4073 case UNDER_RECOVERY: 4074 final BlockInfoContiguousUnderConstruction uc = (BlockInfoContiguousUnderConstruction)lastBlock; 4075 // determine if last block was intended to be truncated 4076 BlockInfoContiguous recoveryBlock = uc.getTruncateBlock(); 4077 boolean truncateRecovery = recoveryBlock != null; 4078 boolean copyOnTruncate = truncateRecovery && 4079 recoveryBlock.getBlockId() != uc.getBlockId(); 4080 assert !copyOnTruncate || 4081 recoveryBlock.getBlockId() < uc.getBlockId() && 4082 recoveryBlock.getGenerationStamp() < uc.getGenerationStamp() && 4083 recoveryBlock.getNumBytes() > uc.getNumBytes() : 4084 "wrong recoveryBlock"; 4085 4086 // setup the last block locations from the blockManager if not known 4087 if (uc.getNumExpectedLocations() == 0) { 4088 uc.setExpectedLocations(blockManager.getStorages(lastBlock)); 4089 } 4090 4091 if (uc.getNumExpectedLocations() == 0 && uc.getNumBytes() == 0) { 4092 // There is no datanode reported to this block. 4093 // may be client have crashed before writing data to pipeline. 4094 // This blocks doesn't need any recovery. 4095 // We can remove this block and close the file. 4096 pendingFile.removeLastBlock(lastBlock); 4097 finalizeINodeFileUnderConstruction(src, pendingFile, 4098 iip.getLatestSnapshotId()); 4099 NameNode.stateChangeLog.warn("BLOCK* internalReleaseLease: " 4100 + "Removed empty last block and closed file."); 4101 return true; 4102 } 4103 // start recovery of the last block for this file 4104 long blockRecoveryId = nextGenerationStamp(blockIdManager.isLegacyBlock(uc)); 4105 lease = reassignLease(lease, src, recoveryLeaseHolder, pendingFile); 4106 if(copyOnTruncate) { 4107 uc.setGenerationStamp(blockRecoveryId); 4108 } else if(truncateRecovery) { 4109 recoveryBlock.setGenerationStamp(blockRecoveryId); 4110 } 4111 uc.initializeBlockRecovery(blockRecoveryId, true); 4112 leaseManager.renewLease(lease); 4113 // Cannot close file right now, since the last block requires recovery. 4114 // This may potentially cause infinite loop in lease recovery 4115 // if there are no valid replicas on data-nodes. 4116 NameNode.stateChangeLog.warn( 4117 "DIR* NameSystem.internalReleaseLease: " + 4118 "File " + src + " has not been closed." + 4119 " Lease recovery is in progress. " + 4120 "RecoveryId = " + blockRecoveryId + " for block " + lastBlock); 4121 break; 4122 } 4123 return false; 4124 } 4125 4126 private Lease reassignLease(Lease lease, String src, String newHolder, 4127 INodeFile pendingFile) { 4128 assert hasWriteLock(); 4129 if(newHolder == null) 4130 return lease; 4131 // The following transaction is not synced. Make sure it's sync'ed later. 4132 logReassignLease(lease.getHolder(), src, newHolder); 4133 return reassignLeaseInternal(lease, src, newHolder, pendingFile); 4134 } 4135 4136 Lease reassignLeaseInternal(Lease lease, String src, String newHolder, 4137 INodeFile pendingFile) { 4138 assert hasWriteLock(); 4139 pendingFile.getFileUnderConstructionFeature().setClientName(newHolder); 4140 return leaseManager.reassignLease(lease, src, newHolder); 4141 } 4142 4143 private void commitOrCompleteLastBlock(final INodeFile fileINode, 4144 final INodesInPath iip, final Block commitBlock) throws IOException { 4145 assert hasWriteLock(); 4146 Preconditions.checkArgument(fileINode.isUnderConstruction()); 4147 blockManager.commitOrCompleteLastBlock(fileINode, commitBlock, iip); 4148 } 4149 4150 private void finalizeINodeFileUnderConstruction(String src, 4151 INodeFile pendingFile, int latestSnapshot) throws IOException { 4152 assert hasWriteLock(); 4153 4154 FileUnderConstructionFeature uc = pendingFile.getFileUnderConstructionFeature(); 4155 if (uc == null) { 4156 throw new IOException("Cannot finalize file " + src 4157 + " because it is not under construction"); 4158 } 4159 4160 pendingFile.recordModification(latestSnapshot); 4161 4162 // The file is no longer pending. 4163 // Create permanent INode, update blocks. No need to replace the inode here 4164 // since we just remove the uc feature from pendingFile 4165 pendingFile.toCompleteFile(now()); 4166 4167 leaseManager.removeLease(uc.getClientName(), src); 4168 4169 waitForLoadingFSImage(); 4170 // close file and persist block allocations for this file 4171 closeFile(src, pendingFile); 4172 4173 blockManager.checkReplication(pendingFile); 4174 } 4175 4176 @VisibleForTesting 4177 BlockInfoContiguous getStoredBlock(Block block) { 4178 return blockManager.getStoredBlock(block); 4179 } 4180 4181 @Override 4182 public boolean isInSnapshot(BlockInfoContiguousUnderConstruction blockUC) { 4183 assert hasReadLock(); 4184 final BlockCollection bc = blockUC.getBlockCollection(); 4185 if (bc == null || !(bc instanceof INodeFile) 4186 || !bc.isUnderConstruction()) { 4187 return false; 4188 } 4189 4190 String fullName = bc.getName(); 4191 try { 4192 if (fullName != null && fullName.startsWith(Path.SEPARATOR) 4193 && dir.getINode(fullName) == bc) { 4194 // If file exists in normal path then no need to look in snapshot 4195 return false; 4196 } 4197 } catch (UnresolvedLinkException e) { 4198 LOG.error("Error while resolving the link : " + fullName, e); 4199 return false; 4200 } 4201 /* 4202 * 1. if bc is under construction and also with snapshot, and 4203 * bc is not in the current fsdirectory tree, bc must represent a snapshot 4204 * file. 4205 * 2. if fullName is not an absolute path, bc cannot be existent in the 4206 * current fsdirectory tree. 4207 * 3. if bc is not the current node associated with fullName, bc must be a 4208 * snapshot inode. 4209 */ 4210 return true; 4211 } 4212 4213 void commitBlockSynchronization(ExtendedBlock oldBlock, 4214 long newgenerationstamp, long newlength, 4215 boolean closeFile, boolean deleteblock, DatanodeID[] newtargets, 4216 String[] newtargetstorages) throws IOException { 4217 LOG.info("commitBlockSynchronization(oldBlock=" + oldBlock 4218 + ", newgenerationstamp=" + newgenerationstamp 4219 + ", newlength=" + newlength 4220 + ", newtargets=" + Arrays.asList(newtargets) 4221 + ", closeFile=" + closeFile 4222 + ", deleteBlock=" + deleteblock 4223 + ")"); 4224 checkOperation(OperationCategory.WRITE); 4225 final String src; 4226 waitForLoadingFSImage(); 4227 writeLock(); 4228 try { 4229 checkOperation(OperationCategory.WRITE); 4230 // If a DN tries to commit to the standby, the recovery will 4231 // fail, and the next retry will succeed on the new NN. 4232 4233 checkNameNodeSafeMode( 4234 "Cannot commitBlockSynchronization while in safe mode"); 4235 final BlockInfoContiguous storedBlock = getStoredBlock( 4236 ExtendedBlock.getLocalBlock(oldBlock)); 4237 if (storedBlock == null) { 4238 if (deleteblock) { 4239 // This may be a retry attempt so ignore the failure 4240 // to locate the block. 4241 if (LOG.isDebugEnabled()) { 4242 LOG.debug("Block (=" + oldBlock + ") not found"); 4243 } 4244 return; 4245 } else { 4246 throw new IOException("Block (=" + oldBlock + ") not found"); 4247 } 4248 } 4249 final long oldGenerationStamp = storedBlock.getGenerationStamp(); 4250 final long oldNumBytes = storedBlock.getNumBytes(); 4251 // 4252 // The implementation of delete operation (see @deleteInternal method) 4253 // first removes the file paths from namespace, and delays the removal 4254 // of blocks to later time for better performance. When 4255 // commitBlockSynchronization (this method) is called in between, the 4256 // blockCollection of storedBlock could have been assigned to null by 4257 // the delete operation, throw IOException here instead of NPE; if the 4258 // file path is already removed from namespace by the delete operation, 4259 // throw FileNotFoundException here, so not to proceed to the end of 4260 // this method to add a CloseOp to the edit log for an already deleted 4261 // file (See HDFS-6825). 4262 // 4263 BlockCollection blockCollection = storedBlock.getBlockCollection(); 4264 if (blockCollection == null) { 4265 throw new IOException("The blockCollection of " + storedBlock 4266 + " is null, likely because the file owning this block was" 4267 + " deleted and the block removal is delayed"); 4268 } 4269 INodeFile iFile = ((INode)blockCollection).asFile(); 4270 src = iFile.getFullPathName(); 4271 if (isFileDeleted(iFile)) { 4272 throw new FileNotFoundException("File not found: " 4273 + src + ", likely due to delayed block removal"); 4274 } 4275 if ((!iFile.isUnderConstruction() || storedBlock.isComplete()) && 4276 iFile.getLastBlock().isComplete()) { 4277 if (LOG.isDebugEnabled()) { 4278 LOG.debug("Unexpected block (=" + oldBlock 4279 + ") since the file (=" + iFile.getLocalName() 4280 + ") is not under construction"); 4281 } 4282 return; 4283 } 4284 4285 BlockInfoContiguousUnderConstruction truncatedBlock = 4286 (BlockInfoContiguousUnderConstruction) iFile.getLastBlock(); 4287 long recoveryId = truncatedBlock.getBlockRecoveryId(); 4288 boolean copyTruncate = 4289 truncatedBlock.getBlockId() != storedBlock.getBlockId(); 4290 if(recoveryId != newgenerationstamp) { 4291 throw new IOException("The recovery id " + newgenerationstamp 4292 + " does not match current recovery id " 4293 + recoveryId + " for block " + oldBlock); 4294 } 4295 4296 if (deleteblock) { 4297 Block blockToDel = ExtendedBlock.getLocalBlock(oldBlock); 4298 boolean remove = iFile.removeLastBlock(blockToDel); 4299 if (remove) { 4300 blockManager.removeBlock(storedBlock); 4301 } 4302 } 4303 else { 4304 // update last block 4305 if(!copyTruncate) { 4306 storedBlock.setGenerationStamp(newgenerationstamp); 4307 storedBlock.setNumBytes(newlength); 4308 } 4309 4310 // find the DatanodeDescriptor objects 4311 ArrayList<DatanodeDescriptor> trimmedTargets = 4312 new ArrayList<DatanodeDescriptor>(newtargets.length); 4313 ArrayList<String> trimmedStorages = 4314 new ArrayList<String>(newtargets.length); 4315 if (newtargets.length > 0) { 4316 for (int i = 0; i < newtargets.length; ++i) { 4317 // try to get targetNode 4318 DatanodeDescriptor targetNode = 4319 blockManager.getDatanodeManager().getDatanode(newtargets[i]); 4320 if (targetNode != null) { 4321 trimmedTargets.add(targetNode); 4322 trimmedStorages.add(newtargetstorages[i]); 4323 } else if (LOG.isDebugEnabled()) { 4324 LOG.debug("DatanodeDescriptor (=" + newtargets[i] + ") not found"); 4325 } 4326 } 4327 } 4328 if ((closeFile) && !trimmedTargets.isEmpty()) { 4329 // the file is getting closed. Insert block locations into blockManager. 4330 // Otherwise fsck will report these blocks as MISSING, especially if the 4331 // blocksReceived from Datanodes take a long time to arrive. 4332 for (int i = 0; i < trimmedTargets.size(); i++) { 4333 DatanodeStorageInfo storageInfo = 4334 trimmedTargets.get(i).getStorageInfo(trimmedStorages.get(i)); 4335 if (storageInfo != null) { 4336 if(copyTruncate) { 4337 storageInfo.addBlock(truncatedBlock); 4338 } else { 4339 storageInfo.addBlock(storedBlock); 4340 } 4341 } 4342 } 4343 } 4344 4345 // add pipeline locations into the INodeUnderConstruction 4346 DatanodeStorageInfo[] trimmedStorageInfos = 4347 blockManager.getDatanodeManager().getDatanodeStorageInfos( 4348 trimmedTargets.toArray(new DatanodeID[trimmedTargets.size()]), 4349 trimmedStorages.toArray(new String[trimmedStorages.size()]), 4350 "src=%s, oldBlock=%s, newgenerationstamp=%d, newlength=%d", 4351 src, oldBlock, newgenerationstamp, newlength); 4352 4353 if(copyTruncate) { 4354 iFile.setLastBlock(truncatedBlock, trimmedStorageInfos); 4355 } else { 4356 iFile.setLastBlock(storedBlock, trimmedStorageInfos); 4357 if (closeFile) { 4358 blockManager.markBlockReplicasAsCorrupt(storedBlock, 4359 oldGenerationStamp, oldNumBytes, trimmedStorageInfos); 4360 } 4361 } 4362 } 4363 4364 if (closeFile) { 4365 if(copyTruncate) { 4366 closeFileCommitBlocks(src, iFile, truncatedBlock); 4367 if(!iFile.isBlockInLatestSnapshot(storedBlock)) { 4368 blockManager.removeBlock(storedBlock); 4369 } 4370 } else { 4371 closeFileCommitBlocks(src, iFile, storedBlock); 4372 } 4373 } else { 4374 // If this commit does not want to close the file, persist blocks 4375 persistBlocks(src, iFile, false); 4376 } 4377 } finally { 4378 writeUnlock("commitBlockSynchronization"); 4379 } 4380 getEditLog().logSync(); 4381 if (closeFile) { 4382 LOG.info("commitBlockSynchronization(oldBlock=" + oldBlock 4383 + ", file=" + src 4384 + ", newgenerationstamp=" + newgenerationstamp 4385 + ", newlength=" + newlength 4386 + ", newtargets=" + Arrays.asList(newtargets) + ") successful"); 4387 } else { 4388 LOG.info("commitBlockSynchronization(" + oldBlock + ") successful"); 4389 } 4390 } 4391 4392 /** 4393 * @param pendingFile open file that needs to be closed 4394 * @param storedBlock last block 4395 * @throws IOException on error 4396 */ 4397 @VisibleForTesting 4398 void closeFileCommitBlocks(String src, INodeFile pendingFile, 4399 BlockInfoContiguous storedBlock) throws IOException { 4400 final INodesInPath iip = INodesInPath.fromINode(pendingFile); 4401 4402 // commit the last block and complete it if it has minimum replicas 4403 commitOrCompleteLastBlock(pendingFile, iip, storedBlock); 4404 4405 //remove lease, close file 4406 finalizeINodeFileUnderConstruction(src, pendingFile, 4407 Snapshot.findLatestSnapshot(pendingFile, Snapshot.CURRENT_STATE_ID)); 4408 } 4409 4410 /** 4411 * Renew the lease(s) held by the given client 4412 */ 4413 void renewLease(String holder) throws IOException { 4414 checkOperation(OperationCategory.WRITE); 4415 readLock(); 4416 try { 4417 checkOperation(OperationCategory.WRITE); 4418 checkNameNodeSafeMode("Cannot renew lease for " + holder); 4419 leaseManager.renewLease(holder); 4420 } finally { 4421 readUnlock("renewLease"); 4422 } 4423 } 4424 4425 /** 4426 * Get a partial listing of the indicated directory 4427 * 4428 * @param src the directory name 4429 * @param startAfter the name to start after 4430 * @param needLocation if blockLocations need to be returned 4431 * @return a partial listing starting after startAfter 4432 * 4433 * @throws AccessControlException if access is denied 4434 * @throws UnresolvedLinkException if symbolic link is encountered 4435 * @throws IOException if other I/O error occurred 4436 */ 4437 DirectoryListing getListing(String src, byte[] startAfter, 4438 boolean needLocation) 4439 throws IOException { 4440 checkOperation(OperationCategory.READ); 4441 final String operationName = "listStatus"; 4442 DirectoryListing dl = null; 4443 readLock(); 4444 try { 4445 checkOperation(NameNode.OperationCategory.READ); 4446 dl = FSDirStatAndListingOp.getListingInt(dir, src, startAfter, 4447 needLocation); 4448 } catch (AccessControlException e) { 4449 logAuditEvent(false, operationName, src); 4450 throw e; 4451 } finally { 4452 readUnlock(operationName); 4453 } 4454 logAuditEvent(true, operationName, src); 4455 return dl; 4456 } 4457 4458 ///////////////////////////////////////////////////////// 4459 // 4460 // These methods are called by datanodes 4461 // 4462 ///////////////////////////////////////////////////////// 4463 /** 4464 * Register Datanode. 4465 * <p> 4466 * The purpose of registration is to identify whether the new datanode 4467 * serves a new data storage, and will report new data block copies, 4468 * which the namenode was not aware of; or the datanode is a replacement 4469 * node for the data storage that was previously served by a different 4470 * or the same (in terms of host:port) datanode. 4471 * The data storages are distinguished by their storageIDs. When a new 4472 * data storage is reported the namenode issues a new unique storageID. 4473 * <p> 4474 * Finally, the namenode returns its namespaceID as the registrationID 4475 * for the datanodes. 4476 * namespaceID is a persistent attribute of the name space. 4477 * The registrationID is checked every time the datanode is communicating 4478 * with the namenode. 4479 * Datanodes with inappropriate registrationID are rejected. 4480 * If the namenode stops, and then restarts it can restore its 4481 * namespaceID and will continue serving the datanodes that has previously 4482 * registered with the namenode without restarting the whole cluster. 4483 * 4484 * @see org.apache.hadoop.hdfs.server.datanode.DataNode 4485 */ 4486 void registerDatanode(DatanodeRegistration nodeReg) throws IOException { 4487 writeLock(); 4488 try { 4489 getBlockManager().getDatanodeManager().registerDatanode(nodeReg); 4490 checkSafeMode(); 4491 } finally { 4492 writeUnlock("registerDatanode"); 4493 } 4494 } 4495 4496 /** 4497 * Get registrationID for datanodes based on the namespaceID. 4498 * 4499 * @see #registerDatanode(DatanodeRegistration) 4500 * @return registration ID 4501 */ 4502 String getRegistrationID() { 4503 return Storage.getRegistrationID(getFSImage().getStorage()); 4504 } 4505 4506 /** 4507 * The given node has reported in. This method should: 4508 * 1) Record the heartbeat, so the datanode isn't timed out 4509 * 2) Adjust usage stats for future block allocation 4510 * 4511 * If a substantial amount of time passed since the last datanode 4512 * heartbeat then request an immediate block report. 4513 * 4514 * @return an array of datanode commands 4515 * @throws IOException 4516 */ 4517 HeartbeatResponse handleHeartbeat(DatanodeRegistration nodeReg, 4518 StorageReport[] reports, long cacheCapacity, long cacheUsed, 4519 int xceiverCount, int xmitsInProgress, int failedVolumes, 4520 VolumeFailureSummary volumeFailureSummary) throws IOException { 4521 readLock(); 4522 try { 4523 //get datanode commands 4524 final int maxTransfer = blockManager.getMaxReplicationStreams() 4525 - xmitsInProgress; 4526 DatanodeCommand[] cmds = blockManager.getDatanodeManager().handleHeartbeat( 4527 nodeReg, reports, blockPoolId, cacheCapacity, cacheUsed, 4528 xceiverCount, maxTransfer, failedVolumes, volumeFailureSummary); 4529 4530 //create ha status 4531 final NNHAStatusHeartbeat haState = new NNHAStatusHeartbeat( 4532 haContext.getState().getServiceState(), 4533 getFSImage().getCorrectLastAppliedOrWrittenTxId()); 4534 4535 return new HeartbeatResponse(cmds, haState, rollingUpgradeInfo); 4536 } finally { 4537 readUnlock("handleHeartbeat"); 4538 } 4539 } 4540 4541 /** 4542 * Returns whether or not there were available resources at the last check of 4543 * resources. 4544 * 4545 * @return true if there were sufficient resources available, false otherwise. 4546 */ 4547 boolean nameNodeHasResourcesAvailable() { 4548 return hasResourcesAvailable; 4549 } 4550 4551 /** 4552 * Perform resource checks and cache the results. 4553 */ 4554 void checkAvailableResources() { 4555 Preconditions.checkState(nnResourceChecker != null, 4556 "nnResourceChecker not initialized"); 4557 hasResourcesAvailable = nnResourceChecker.hasAvailableDiskSpace(); 4558 } 4559 4560 /** 4561 * Persist the block list for the inode. 4562 * @param path 4563 * @param file 4564 * @param logRetryCache 4565 */ 4566 private void persistBlocks(String path, INodeFile file, 4567 boolean logRetryCache) { 4568 assert hasWriteLock(); 4569 Preconditions.checkArgument(file.isUnderConstruction()); 4570 getEditLog().logUpdateBlocks(path, file, logRetryCache); 4571 NameNode.stateChangeLog.debug("persistBlocks: {} with {} blocks is" + 4572 " peristed to the file system", path, file.getBlocks().length); 4573 } 4574 4575 /** 4576 * Close file. 4577 * @param path 4578 * @param file 4579 */ 4580 private void closeFile(String path, INodeFile file) { 4581 assert hasWriteLock(); 4582 waitForLoadingFSImage(); 4583 // file is closed 4584 getEditLog().logCloseFile(path, file); 4585 NameNode.stateChangeLog.debug("closeFile: {} with {} blocks is persisted" + 4586 " to the file system", path, file.getBlocks().length); 4587 } 4588 4589 /** 4590 * Periodically calls hasAvailableResources of NameNodeResourceChecker, and if 4591 * there are found to be insufficient resources available, causes the NN to 4592 * enter safe mode. If resources are later found to have returned to 4593 * acceptable levels, this daemon will cause the NN to exit safe mode. 4594 */ 4595 class NameNodeResourceMonitor implements Runnable { 4596 boolean shouldNNRmRun = true; 4597 @Override 4598 public void run () { 4599 try { 4600 while (fsRunning && shouldNNRmRun) { 4601 checkAvailableResources(); 4602 if(!nameNodeHasResourcesAvailable()) { 4603 String lowResourcesMsg = "NameNode low on available disk space. "; 4604 if (!isInSafeMode()) { 4605 LOG.warn(lowResourcesMsg + "Entering safe mode."); 4606 } else { 4607 LOG.warn(lowResourcesMsg + "Already in safe mode."); 4608 } 4609 enterSafeMode(true); 4610 } 4611 try { 4612 Thread.sleep(resourceRecheckInterval); 4613 } catch (InterruptedException ie) { 4614 // Deliberately ignore 4615 } 4616 } 4617 } catch (Exception e) { 4618 FSNamesystem.LOG.error("Exception in NameNodeResourceMonitor: ", e); 4619 } 4620 } 4621 4622 public void stopMonitor() { 4623 shouldNNRmRun = false; 4624 } 4625 } 4626 4627 class NameNodeEditLogRoller implements Runnable { 4628 4629 private boolean shouldRun = true; 4630 private final long rollThreshold; 4631 private final long sleepIntervalMs; 4632 4633 public NameNodeEditLogRoller(long rollThreshold, int sleepIntervalMs) { 4634 this.rollThreshold = rollThreshold; 4635 this.sleepIntervalMs = sleepIntervalMs; 4636 } 4637 4638 @Override 4639 public void run() { 4640 while (fsRunning && shouldRun) { 4641 try { 4642 FSEditLog editLog = getFSImage().getEditLog(); 4643 long numEdits = 4644 editLog.getLastWrittenTxId() - editLog.getCurSegmentTxId(); 4645 if (numEdits > rollThreshold) { 4646 FSNamesystem.LOG.info("NameNode rolling its own edit log because" 4647 + " number of edits in open segment exceeds threshold of " 4648 + rollThreshold); 4649 rollEditLog(); 4650 } 4651 } catch (Exception e) { 4652 FSNamesystem.LOG.error("Swallowing exception in " 4653 + NameNodeEditLogRoller.class.getSimpleName() + ":", e); 4654 } 4655 try { 4656 Thread.sleep(sleepIntervalMs); 4657 } catch (InterruptedException e) { 4658 FSNamesystem.LOG.info(NameNodeEditLogRoller.class.getSimpleName() 4659 + " was interrupted, exiting"); 4660 break; 4661 } 4662 } 4663 } 4664 4665 public void stop() { 4666 shouldRun = false; 4667 } 4668 } 4669 4670 /** 4671 * Daemon to periodically scan the namespace for lazyPersist files 4672 * with missing blocks and unlink them. 4673 */ 4674 class LazyPersistFileScrubber implements Runnable { 4675 private volatile boolean shouldRun = true; 4676 final int scrubIntervalSec; 4677 public LazyPersistFileScrubber(final int scrubIntervalSec) { 4678 this.scrubIntervalSec = scrubIntervalSec; 4679 } 4680 4681 /** 4682 * Periodically go over the list of lazyPersist files with missing 4683 * blocks and unlink them from the namespace. 4684 */ 4685 private void clearCorruptLazyPersistFiles() 4686 throws IOException { 4687 4688 BlockStoragePolicy lpPolicy = blockManager.getStoragePolicy("LAZY_PERSIST"); 4689 4690 List<BlockCollection> filesToDelete = new ArrayList<>(); 4691 boolean changed = false; 4692 writeLock(); 4693 try { 4694 final Iterator<Block> it = blockManager.getCorruptReplicaBlockIterator(); 4695 4696 while (it.hasNext()) { 4697 Block b = it.next(); 4698 BlockInfoContiguous blockInfo = blockManager.getStoredBlock(b); 4699 if (blockInfo == null) { 4700 LOG.info("Cannot find block info for block " + b); 4701 } else { 4702 if (blockInfo.getBlockCollection().getStoragePolicyID() 4703 == lpPolicy.getId()) { 4704 filesToDelete.add(blockInfo.getBlockCollection()); 4705 } 4706 } 4707 } 4708 4709 for (BlockCollection bc : filesToDelete) { 4710 LOG.warn("Removing lazyPersist file " + bc.getName() + " with no replicas."); 4711 BlocksMapUpdateInfo toRemoveBlocks = 4712 FSDirDeleteOp.deleteInternal( 4713 FSNamesystem.this, bc.getName(), 4714 INodesInPath.fromINode((INodeFile) bc), false); 4715 changed |= toRemoveBlocks != null; 4716 if (toRemoveBlocks != null) { 4717 removeBlocks(toRemoveBlocks); // Incremental deletion of blocks 4718 } 4719 } 4720 } finally { 4721 writeUnlock("clearCorruptLazyPersistFiles"); 4722 } 4723 if (changed) { 4724 getEditLog().logSync(); 4725 } 4726 } 4727 4728 @Override 4729 public void run() { 4730 while (fsRunning && shouldRun) { 4731 try { 4732 clearCorruptLazyPersistFiles(); 4733 } catch (Exception e) { 4734 FSNamesystem.LOG.error( 4735 "Ignoring exception in LazyPersistFileScrubber:", e); 4736 } 4737 4738 try { 4739 Thread.sleep(scrubIntervalSec * 1000); 4740 } catch (InterruptedException e) { 4741 FSNamesystem.LOG.info( 4742 "LazyPersistFileScrubber was interrupted, exiting"); 4743 break; 4744 } 4745 } 4746 } 4747 4748 public void stop() { 4749 shouldRun = false; 4750 } 4751 } 4752 4753 public FSImage getFSImage() { 4754 return fsImage; 4755 } 4756 4757 public FSEditLog getEditLog() { 4758 return getFSImage().getEditLog(); 4759 } 4760 4761 private void checkBlock(ExtendedBlock block) throws IOException { 4762 if (block != null && !this.blockPoolId.equals(block.getBlockPoolId())) { 4763 throw new IOException("Unexpected BlockPoolId " + block.getBlockPoolId() 4764 + " - expected " + blockPoolId); 4765 } 4766 } 4767 4768 @Metric({"MissingBlocks", "Number of missing blocks"}) 4769 public long getMissingBlocksCount() { 4770 // not locking 4771 return blockManager.getMissingBlocksCount(); 4772 } 4773 4774 @Metric({"MissingReplOneBlocks", "Number of missing blocks " + 4775 "with replication factor 1"}) 4776 public long getMissingReplOneBlocksCount() { 4777 // not locking 4778 return blockManager.getMissingReplOneBlocksCount(); 4779 } 4780 4781 @Metric({"ExpiredHeartbeats", "Number of expired heartbeats"}) 4782 public int getExpiredHeartbeats() { 4783 return datanodeStatistics.getExpiredHeartbeats(); 4784 } 4785 4786 @Metric({"TransactionsSinceLastCheckpoint", 4787 "Number of transactions since last checkpoint"}) 4788 public long getTransactionsSinceLastCheckpoint() { 4789 return getEditLog().getLastWrittenTxIdWithoutLock() - 4790 getFSImage().getStorage().getMostRecentCheckpointTxId(); 4791 } 4792 4793 @Metric({"TransactionsSinceLastLogRoll", 4794 "Number of transactions since last edit log roll"}) 4795 public long getTransactionsSinceLastLogRoll() { 4796 if (isInStandbyState() || !getEditLog().isSegmentOpenWithoutLock()) { 4797 return 0; 4798 } else { 4799 return getEditLog().getLastWrittenTxIdWithoutLock() - 4800 getEditLog().getCurSegmentTxIdWithoutLock() + 1; 4801 } 4802 } 4803 4804 @Metric({"LastWrittenTransactionId", "Transaction ID written to the edit log"}) 4805 public long getLastWrittenTransactionId() { 4806 return getEditLog().getLastWrittenTxIdWithoutLock(); 4807 } 4808 4809 @Metric({"LastCheckpointTime", 4810 "Time in milliseconds since the epoch of the last checkpoint"}) 4811 public long getLastCheckpointTime() { 4812 return getFSImage().getStorage().getMostRecentCheckpointTime(); 4813 } 4814 4815 /** @see ClientProtocol#getStats() */ 4816 long[] getStats() { 4817 final long[] stats = datanodeStatistics.getStats(); 4818 stats[ClientProtocol.GET_STATS_UNDER_REPLICATED_IDX] = getUnderReplicatedBlocks(); 4819 stats[ClientProtocol.GET_STATS_CORRUPT_BLOCKS_IDX] = getCorruptReplicaBlocks(); 4820 stats[ClientProtocol.GET_STATS_MISSING_BLOCKS_IDX] = getMissingBlocksCount(); 4821 stats[ClientProtocol.GET_STATS_MISSING_REPL_ONE_BLOCKS_IDX] = 4822 getMissingReplOneBlocksCount(); 4823 return stats; 4824 } 4825 4826 @Override // FSNamesystemMBean 4827 @Metric({"CapacityTotal", 4828 "Total raw capacity of data nodes in bytes"}) 4829 public long getCapacityTotal() { 4830 return datanodeStatistics.getCapacityTotal(); 4831 } 4832 4833 @Metric({"CapacityTotalGB", 4834 "Total raw capacity of data nodes in GB"}) 4835 public float getCapacityTotalGB() { 4836 return DFSUtil.roundBytesToGB(getCapacityTotal()); 4837 } 4838 4839 @Override // FSNamesystemMBean 4840 @Metric({"CapacityUsed", 4841 "Total used capacity across all data nodes in bytes"}) 4842 public long getCapacityUsed() { 4843 return datanodeStatistics.getCapacityUsed(); 4844 } 4845 4846 @Metric({"CapacityUsedGB", 4847 "Total used capacity across all data nodes in GB"}) 4848 public float getCapacityUsedGB() { 4849 return DFSUtil.roundBytesToGB(getCapacityUsed()); 4850 } 4851 4852 @Override // FSNamesystemMBean 4853 @Metric({"CapacityRemaining", "Remaining capacity in bytes"}) 4854 public long getCapacityRemaining() { 4855 return datanodeStatistics.getCapacityRemaining(); 4856 } 4857 4858 @Metric({"CapacityRemainingGB", "Remaining capacity in GB"}) 4859 public float getCapacityRemainingGB() { 4860 return DFSUtil.roundBytesToGB(getCapacityRemaining()); 4861 } 4862 4863 @Metric({"CapacityUsedNonDFS", 4864 "Total space used by data nodes for non DFS purposes in bytes"}) 4865 public long getCapacityUsedNonDFS() { 4866 return datanodeStatistics.getCapacityUsedNonDFS(); 4867 } 4868 4869 /** 4870 * Total number of connections. 4871 */ 4872 @Override // FSNamesystemMBean 4873 @Metric 4874 public int getTotalLoad() { 4875 return datanodeStatistics.getXceiverCount(); 4876 } 4877 4878 @Metric({ "SnapshottableDirectories", "Number of snapshottable directories" }) 4879 public int getNumSnapshottableDirs() { 4880 return this.snapshotManager.getNumSnapshottableDirs(); 4881 } 4882 4883 @Metric({ "Snapshots", "The number of snapshots" }) 4884 public int getNumSnapshots() { 4885 return this.snapshotManager.getNumSnapshots(); 4886 } 4887 4888 @Override 4889 public String getSnapshotStats() { 4890 Map<String, Object> info = new HashMap<String, Object>(); 4891 info.put("SnapshottableDirectories", this.getNumSnapshottableDirs()); 4892 info.put("Snapshots", this.getNumSnapshots()); 4893 return JSON.toString(info); 4894 } 4895 4896 @Override // FSNamesystemMBean 4897 @Metric({ "NumEncryptionZones", "The number of encryption zones" }) 4898 public int getNumEncryptionZones() { 4899 return dir.ezManager.getNumEncryptionZones(); 4900 } 4901 4902 /** 4903 * Returns the length of the wait Queue for the FSNameSystemLock. 4904 * 4905 * A larger number here indicates lots of threads are waiting for 4906 * FSNameSystemLock. 4907 * 4908 * @return int - Number of Threads waiting to acquire FSNameSystemLock 4909 */ 4910 @Override 4911 @Metric({"LockQueueLength", "Number of threads waiting to " + 4912 "acquire FSNameSystemLock"}) 4913 public int getFsLockQueueLength() { 4914 return fsLock.getQueueLength(); 4915 } 4916 4917 int getNumberOfDatanodes(DatanodeReportType type) { 4918 readLock(); 4919 try { 4920 return getBlockManager().getDatanodeManager().getDatanodeListForReport( 4921 type).size(); 4922 } finally { 4923 readUnlock("getNumberOfDatanodes"); 4924 } 4925 } 4926 4927 DatanodeInfo[] datanodeReport(final DatanodeReportType type 4928 ) throws AccessControlException, StandbyException { 4929 checkSuperuserPrivilege(); 4930 checkOperation(OperationCategory.UNCHECKED); 4931 readLock(); 4932 try { 4933 checkOperation(OperationCategory.UNCHECKED); 4934 final DatanodeManager dm = getBlockManager().getDatanodeManager(); 4935 final List<DatanodeDescriptor> results = dm.getDatanodeListForReport(type); 4936 4937 DatanodeInfo[] arr = new DatanodeInfo[results.size()]; 4938 for (int i=0; i<arr.length; i++) { 4939 arr[i] = new DatanodeInfo(results.get(i)); 4940 } 4941 return arr; 4942 } finally { 4943 readUnlock("datanodeReport"); 4944 } 4945 } 4946 4947 DatanodeStorageReport[] getDatanodeStorageReport(final DatanodeReportType type 4948 ) throws AccessControlException, StandbyException { 4949 checkSuperuserPrivilege(); 4950 checkOperation(OperationCategory.UNCHECKED); 4951 readLock(); 4952 try { 4953 checkOperation(OperationCategory.UNCHECKED); 4954 final DatanodeManager dm = getBlockManager().getDatanodeManager(); 4955 final List<DatanodeDescriptor> datanodes = dm.getDatanodeListForReport(type); 4956 4957 DatanodeStorageReport[] reports = new DatanodeStorageReport[datanodes.size()]; 4958 for (int i = 0; i < reports.length; i++) { 4959 final DatanodeDescriptor d = datanodes.get(i); 4960 reports[i] = new DatanodeStorageReport(new DatanodeInfo(d), 4961 d.getStorageReports()); 4962 } 4963 return reports; 4964 } finally { 4965 readUnlock("getDatanodeStorageReport"); 4966 } 4967 } 4968 4969 /** 4970 * Save namespace image. 4971 * This will save current namespace into fsimage file and empty edits file. 4972 * Requires superuser privilege and safe mode. 4973 * 4974 * @throws AccessControlException if superuser privilege is violated. 4975 * @throws IOException if 4976 */ 4977 void saveNamespace() throws AccessControlException, IOException { 4978 checkOperation(OperationCategory.UNCHECKED); 4979 checkSuperuserPrivilege(); 4980 4981 cpLock(); // Block if a checkpointing is in progress on standby. 4982 readLock(); 4983 try { 4984 checkOperation(OperationCategory.UNCHECKED); 4985 4986 if (!isInSafeMode()) { 4987 throw new IOException("Safe mode should be turned ON " 4988 + "in order to create namespace image."); 4989 } 4990 getFSImage().saveNamespace(this); 4991 } finally { 4992 readUnlock("saveNamespace"); 4993 cpUnlock(); 4994 } 4995 LOG.info("New namespace image has been created"); 4996 } 4997 4998 /** 4999 * Enables/Disables/Checks restoring failed storage replicas if the storage becomes available again. 5000 * Requires superuser privilege. 5001 * 5002 * @throws AccessControlException if superuser privilege is violated. 5003 */ 5004 boolean restoreFailedStorage(String arg) throws AccessControlException, 5005 StandbyException { 5006 checkSuperuserPrivilege(); 5007 checkOperation(OperationCategory.UNCHECKED); 5008 cpLock(); // Block if a checkpointing is in progress on standby. 5009 writeLock(); 5010 try { 5011 checkOperation(OperationCategory.UNCHECKED); 5012 5013 // if it is disabled - enable it and vice versa. 5014 if(arg.equals("check")) 5015 return getFSImage().getStorage().getRestoreFailedStorage(); 5016 5017 boolean val = arg.equals("true"); // false if not 5018 getFSImage().getStorage().setRestoreFailedStorage(val); 5019 5020 return val; 5021 } finally { 5022 writeUnlock("restoreFailedStorage"); 5023 cpUnlock(); 5024 } 5025 } 5026 5027 Date getStartTime() { 5028 return new Date(startTime); 5029 } 5030 5031 void finalizeUpgrade() throws IOException { 5032 checkSuperuserPrivilege(); 5033 checkOperation(OperationCategory.UNCHECKED); 5034 cpLock(); // Block if a checkpointing is in progress on standby. 5035 writeLock(); 5036 try { 5037 checkOperation(OperationCategory.UNCHECKED); 5038 getFSImage().finalizeUpgrade(this.isHaEnabled() && inActiveState()); 5039 } finally { 5040 writeUnlock("finalizeUpgrade"); 5041 cpUnlock(); 5042 } 5043 } 5044 5045 void refreshNodes() throws IOException { 5046 checkOperation(OperationCategory.UNCHECKED); 5047 checkSuperuserPrivilege(); 5048 getBlockManager().getDatanodeManager().refreshNodes(new HdfsConfiguration()); 5049 } 5050 5051 void setBalancerBandwidth(long bandwidth) throws IOException { 5052 checkOperation(OperationCategory.UNCHECKED); 5053 checkSuperuserPrivilege(); 5054 getBlockManager().getDatanodeManager().setBalancerBandwidth(bandwidth); 5055 } 5056 5057 /** 5058 * Persist the new block (the last block of the given file). 5059 * @param path 5060 * @param file 5061 */ 5062 private void persistNewBlock(String path, INodeFile file) { 5063 Preconditions.checkArgument(file.isUnderConstruction()); 5064 getEditLog().logAddBlock(path, file); 5065 NameNode.stateChangeLog.debug("persistNewBlock: {} with new block {}," + 5066 " current total block count is {}", path, 5067 file.getLastBlock().toString(), file.getBlocks().length); 5068 } 5069 5070 /** 5071 * SafeModeInfo contains information related to the safe mode. 5072 * <p> 5073 * An instance of {@link SafeModeInfo} is created when the name node 5074 * enters safe mode. 5075 * <p> 5076 * During name node startup {@link SafeModeInfo} counts the number of 5077 * <em>safe blocks</em>, those that have at least the minimal number of 5078 * replicas, and calculates the ratio of safe blocks to the total number 5079 * of blocks in the system, which is the size of blocks in 5080 * {@link FSNamesystem#blockManager}. When the ratio reaches the 5081 * {@link #threshold} it starts the SafeModeMonitor daemon in order 5082 * to monitor whether the safe mode {@link #extension} is passed. 5083 * Then it leaves safe mode and destroys itself. 5084 * <p> 5085 * If safe mode is turned on manually then the number of safe blocks is 5086 * not tracked because the name node is not intended to leave safe mode 5087 * automatically in the case. 5088 * 5089 * @see ClientProtocol#setSafeMode(HdfsConstants.SafeModeAction, boolean) 5090 */ 5091 public class SafeModeInfo { 5092 // configuration fields 5093 /** Safe mode threshold condition %.*/ 5094 private final double threshold; 5095 /** Safe mode minimum number of datanodes alive */ 5096 private final int datanodeThreshold; 5097 /** 5098 * Safe mode extension after the threshold. 5099 * Make it volatile so that getSafeModeTip can read the latest value 5100 * without taking a lock. 5101 */ 5102 private volatile int extension; 5103 /** Min replication required by safe mode. */ 5104 private final int safeReplication; 5105 /** threshold for populating needed replication queues */ 5106 private final double replQueueThreshold; 5107 // internal fields 5108 /** Time when threshold was reached. 5109 * <br> -1 safe mode is off 5110 * <br> 0 safe mode is on, and threshold is not reached yet 5111 * <br> >0 safe mode is on, but we are in extension period 5112 */ 5113 private long reached = -1; 5114 private long reachedTimestamp = -1; 5115 /** Total number of blocks. */ 5116 int blockTotal; 5117 /** Number of safe blocks. */ 5118 int blockSafe; 5119 /** Number of blocks needed to satisfy safe mode threshold condition */ 5120 private int blockThreshold; 5121 /** Number of blocks needed before populating replication queues */ 5122 private int blockReplQueueThreshold; 5123 /** time of the last status printout */ 5124 private long lastStatusReport = 0; 5125 /** 5126 * Was safemode entered automatically because available resources were low. 5127 * Make it volatile so that getSafeModeTip can read the latest value 5128 * without taking a lock. 5129 */ 5130 private volatile boolean resourcesLow = false; 5131 /** Should safemode adjust its block totals as blocks come in */ 5132 private boolean shouldIncrementallyTrackBlocks = false; 5133 /** counter for tracking startup progress of reported blocks */ 5134 private Counter awaitingReportedBlocksCounter; 5135 5136 /** 5137 * Creates SafeModeInfo when the name node enters 5138 * automatic safe mode at startup. 5139 * 5140 * @param conf configuration 5141 */ 5142 private SafeModeInfo(Configuration conf) { 5143 this.threshold = conf.getFloat(DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY, 5144 DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT); 5145 if(threshold > 1.0) { 5146 LOG.warn("The threshold value should't be greater than 1, threshold: " + threshold); 5147 } 5148 this.datanodeThreshold = conf.getInt( 5149 DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY, 5150 DFS_NAMENODE_SAFEMODE_MIN_DATANODES_DEFAULT); 5151 this.extension = conf.getInt(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 0); 5152 this.safeReplication = conf.getInt(DFS_NAMENODE_REPLICATION_MIN_KEY, 5153 DFS_NAMENODE_REPLICATION_MIN_DEFAULT); 5154 5155 LOG.info(DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY + " = " + threshold); 5156 LOG.info(DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY + " = " + datanodeThreshold); 5157 LOG.info(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY + " = " + extension); 5158 5159 // default to safe mode threshold (i.e., don't populate queues before leaving safe mode) 5160 this.replQueueThreshold = 5161 conf.getFloat(DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY, 5162 (float) threshold); 5163 this.blockTotal = 0; 5164 this.blockSafe = 0; 5165 } 5166 5167 /** 5168 * In the HA case, the StandbyNode can be in safemode while the namespace 5169 * is modified by the edit log tailer. In this case, the number of total 5170 * blocks changes as edits are processed (eg blocks are added and deleted). 5171 * However, we don't want to do the incremental tracking during the 5172 * startup-time loading process -- only once the initial total has been 5173 * set after the image has been loaded. 5174 */ 5175 private boolean shouldIncrementallyTrackBlocks() { 5176 return shouldIncrementallyTrackBlocks; 5177 } 5178 5179 /** 5180 * Creates SafeModeInfo when safe mode is entered manually, or because 5181 * available resources are low. 5182 * 5183 * The {@link #threshold} is set to 1.5 so that it could never be reached. 5184 * {@link #blockTotal} is set to -1 to indicate that safe mode is manual. 5185 * 5186 * @see SafeModeInfo 5187 */ 5188 private SafeModeInfo(boolean resourcesLow) { 5189 this.threshold = 1.5f; // this threshold can never be reached 5190 this.datanodeThreshold = Integer.MAX_VALUE; 5191 this.extension = Integer.MAX_VALUE; 5192 this.safeReplication = Short.MAX_VALUE + 1; // more than maxReplication 5193 this.replQueueThreshold = 1.5f; // can never be reached 5194 this.blockTotal = -1; 5195 this.blockSafe = -1; 5196 this.resourcesLow = resourcesLow; 5197 enter(); 5198 reportStatus("STATE* Safe mode is ON.", true); 5199 } 5200 5201 /** 5202 * Check if safe mode is on. 5203 * @return true if in safe mode 5204 */ 5205 private synchronized boolean isOn() { 5206 doConsistencyCheck(); 5207 return this.reached >= 0; 5208 } 5209 5210 /** 5211 * Enter safe mode. 5212 */ 5213 private void enter() { 5214 this.reached = 0; 5215 this.reachedTimestamp = 0; 5216 } 5217 5218 /** 5219 * Leave safe mode. 5220 * <p> 5221 * Check for invalid, under- & over-replicated blocks in the end of startup. 5222 */ 5223 private synchronized void leave() { 5224 // if not done yet, initialize replication queues. 5225 // In the standby, do not populate repl queues 5226 if (!isPopulatingReplQueues() && shouldPopulateReplQueues()) { 5227 initializeReplQueues(); 5228 } 5229 long timeInSafemode = now() - startTime; 5230 NameNode.stateChangeLog.info("STATE* Leaving safe mode after " 5231 + timeInSafemode/1000 + " secs"); 5232 NameNode.getNameNodeMetrics().setSafeModeTime((int) timeInSafemode); 5233 5234 //Log the following only once (when transitioning from ON -> OFF) 5235 if (reached >= 0) { 5236 NameNode.stateChangeLog.info("STATE* Safe mode is OFF"); 5237 } 5238 reached = -1; 5239 reachedTimestamp = -1; 5240 safeMode = null; 5241 final NetworkTopology nt = blockManager.getDatanodeManager().getNetworkTopology(); 5242 NameNode.stateChangeLog.info("STATE* Network topology has " 5243 + nt.getNumOfRacks() + " racks and " 5244 + nt.getNumOfLeaves() + " datanodes"); 5245 NameNode.stateChangeLog.info("STATE* UnderReplicatedBlocks has " 5246 + blockManager.numOfUnderReplicatedBlocks() + " blocks"); 5247 5248 startSecretManagerIfNecessary(); 5249 5250 // If startup has not yet completed, end safemode phase. 5251 StartupProgress prog = NameNode.getStartupProgress(); 5252 if (prog.getStatus(Phase.SAFEMODE) != Status.COMPLETE) { 5253 prog.endStep(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS); 5254 prog.endPhase(Phase.SAFEMODE); 5255 } 5256 } 5257 5258 /** 5259 * Check whether we have reached the threshold for 5260 * initializing replication queues. 5261 */ 5262 private synchronized boolean canInitializeReplQueues() { 5263 return shouldPopulateReplQueues() 5264 && blockSafe >= blockReplQueueThreshold; 5265 } 5266 5267 /** 5268 * Safe mode can be turned off iff 5269 * the threshold is reached and 5270 * the extension time have passed. 5271 * @return true if can leave or false otherwise. 5272 */ 5273 private synchronized boolean canLeave() { 5274 if (reached == 0) { 5275 return false; 5276 } 5277 5278 if (monotonicNow() - reached < extension) { 5279 reportStatus("STATE* Safe mode ON, in safe mode extension.", false); 5280 return false; 5281 } 5282 5283 if (needEnter()) { 5284 reportStatus("STATE* Safe mode ON, thresholds not met.", false); 5285 return false; 5286 } 5287 5288 return true; 5289 } 5290 5291 /** 5292 * There is no need to enter safe mode 5293 * if DFS is empty or {@link #threshold} == 0 5294 */ 5295 private boolean needEnter() { 5296 return (threshold != 0 && blockSafe < blockThreshold) || 5297 (datanodeThreshold != 0 && getNumLiveDataNodes() < datanodeThreshold) || 5298 (!nameNodeHasResourcesAvailable()); 5299 } 5300 5301 /** 5302 * Check and trigger safe mode if needed. 5303 */ 5304 private void checkMode() { 5305 // Have to have write-lock since leaving safemode initializes 5306 // repl queues, which requires write lock 5307 assert hasWriteLock(); 5308 if (inTransitionToActive()) { 5309 return; 5310 } 5311 // if smmthread is already running, the block threshold must have been 5312 // reached before, there is no need to enter the safe mode again 5313 if (smmthread == null && needEnter()) { 5314 enter(); 5315 // check if we are ready to initialize replication queues 5316 if (canInitializeReplQueues() && !isPopulatingReplQueues() 5317 && !haEnabled) { 5318 initializeReplQueues(); 5319 } 5320 reportStatus("STATE* Safe mode ON.", false); 5321 return; 5322 } 5323 // the threshold is reached or was reached before 5324 if (!isOn() || // safe mode is off 5325 extension <= 0 || threshold <= 0) { // don't need to wait 5326 this.leave(); // leave safe mode 5327 return; 5328 } 5329 if (reached > 0) { // threshold has already been reached before 5330 reportStatus("STATE* Safe mode ON.", false); 5331 return; 5332 } 5333 // start monitor 5334 reached = monotonicNow(); 5335 reachedTimestamp = now(); 5336 if (smmthread == null) { 5337 smmthread = new Daemon(new SafeModeMonitor()); 5338 smmthread.start(); 5339 reportStatus("STATE* Safe mode extension entered.", true); 5340 } 5341 5342 // check if we are ready to initialize replication queues 5343 if (canInitializeReplQueues() && !isPopulatingReplQueues() && !haEnabled) { 5344 initializeReplQueues(); 5345 } 5346 } 5347 5348 /** 5349 * Set total number of blocks. 5350 */ 5351 private synchronized void setBlockTotal(int total) { 5352 this.blockTotal = total; 5353 this.blockThreshold = (int) (blockTotal * threshold); 5354 this.blockReplQueueThreshold = 5355 (int) (blockTotal * replQueueThreshold); 5356 if (haEnabled) { 5357 // After we initialize the block count, any further namespace 5358 // modifications done while in safe mode need to keep track 5359 // of the number of total blocks in the system. 5360 this.shouldIncrementallyTrackBlocks = true; 5361 } 5362 if(blockSafe < 0) 5363 this.blockSafe = 0; 5364 checkMode(); 5365 } 5366 5367 /** 5368 * Increment number of safe blocks if current block has 5369 * reached minimal replication. 5370 * @param replication current replication 5371 */ 5372 private synchronized void incrementSafeBlockCount(short replication) { 5373 if (replication == safeReplication) { 5374 this.blockSafe++; 5375 5376 // Report startup progress only if we haven't completed startup yet. 5377 StartupProgress prog = NameNode.getStartupProgress(); 5378 if (prog.getStatus(Phase.SAFEMODE) != Status.COMPLETE) { 5379 if (this.awaitingReportedBlocksCounter == null) { 5380 this.awaitingReportedBlocksCounter = prog.getCounter(Phase.SAFEMODE, 5381 STEP_AWAITING_REPORTED_BLOCKS); 5382 } 5383 this.awaitingReportedBlocksCounter.increment(); 5384 } 5385 5386 checkMode(); 5387 } 5388 } 5389 5390 /** 5391 * Decrement number of safe blocks if current block has 5392 * fallen below minimal replication. 5393 * @param replication current replication 5394 */ 5395 private synchronized void decrementSafeBlockCount(short replication) { 5396 if (replication == safeReplication-1) { 5397 this.blockSafe--; 5398 //blockSafe is set to -1 in manual / low resources safemode 5399 assert blockSafe >= 0 || isManual() || areResourcesLow(); 5400 checkMode(); 5401 } 5402 } 5403 5404 /** 5405 * Check if safe mode was entered manually 5406 */ 5407 private boolean isManual() { 5408 return extension == Integer.MAX_VALUE; 5409 } 5410 5411 /** 5412 * Set manual safe mode. 5413 */ 5414 private synchronized void setManual() { 5415 extension = Integer.MAX_VALUE; 5416 } 5417 5418 /** 5419 * Check if safe mode was entered due to resources being low. 5420 */ 5421 private boolean areResourcesLow() { 5422 return resourcesLow; 5423 } 5424 5425 /** 5426 * Set that resources are low for this instance of safe mode. 5427 */ 5428 private void setResourcesLow() { 5429 resourcesLow = true; 5430 } 5431 5432 /** 5433 * A tip on how safe mode is to be turned off: manually or automatically. 5434 */ 5435 String getTurnOffTip() { 5436 if(!isOn()) { 5437 return "Safe mode is OFF."; 5438 } 5439 5440 //Manual OR low-resource safemode. (Admin intervention required) 5441 String adminMsg = "It was turned on manually. "; 5442 if (areResourcesLow()) { 5443 adminMsg = "Resources are low on NN. Please add or free up more " 5444 + "resources then turn off safe mode manually. NOTE: If you turn off" 5445 + " safe mode before adding resources, " 5446 + "the NN will immediately return to safe mode. "; 5447 } 5448 if (isManual() || areResourcesLow()) { 5449 return adminMsg 5450 + "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off."; 5451 } 5452 5453 boolean thresholdsMet = true; 5454 int numLive = getNumLiveDataNodes(); 5455 String msg = ""; 5456 if (blockSafe < blockThreshold) { 5457 msg += String.format( 5458 "The reported blocks %d needs additional %d" 5459 + " blocks to reach the threshold %.4f of total blocks %d.%n", 5460 blockSafe, (blockThreshold - blockSafe) + 1, threshold, blockTotal); 5461 thresholdsMet = false; 5462 } else { 5463 msg += String.format("The reported blocks %d has reached the threshold" 5464 + " %.4f of total blocks %d. ", blockSafe, threshold, blockTotal); 5465 } 5466 if (numLive < datanodeThreshold) { 5467 msg += String.format( 5468 "The number of live datanodes %d needs an additional %d live " 5469 + "datanodes to reach the minimum number %d.%n", 5470 numLive, (datanodeThreshold - numLive), datanodeThreshold); 5471 thresholdsMet = false; 5472 } else { 5473 msg += String.format("The number of live datanodes %d has reached " 5474 + "the minimum number %d. ", 5475 numLive, datanodeThreshold); 5476 } 5477 msg += (reached > 0) ? "In safe mode extension. " : ""; 5478 msg += "Safe mode will be turned off automatically "; 5479 5480 if (!thresholdsMet) { 5481 msg += "once the thresholds have been reached."; 5482 } else if (reached + extension - monotonicNow() > 0) { 5483 msg += ("in " + (reached + extension - monotonicNow()) / 1000 + " seconds."); 5484 } else { 5485 msg += "soon."; 5486 } 5487 5488 return msg; 5489 } 5490 5491 /** 5492 * Print status every 20 seconds. 5493 */ 5494 private void reportStatus(String msg, boolean rightNow) { 5495 long curTime = now(); 5496 if(!rightNow && (curTime - lastStatusReport < 20 * 1000)) 5497 return; 5498 NameNode.stateChangeLog.info(msg + " \n" + getTurnOffTip()); 5499 lastStatusReport = curTime; 5500 } 5501 5502 @Override 5503 public String toString() { 5504 String resText = "Current safe blocks = " 5505 + blockSafe 5506 + ". Target blocks = " + blockThreshold + " for threshold = %" + threshold 5507 + ". Minimal replication = " + safeReplication + "."; 5508 if (reached > 0) 5509 resText += " Threshold was reached " + new Date(reachedTimestamp) + "."; 5510 return resText; 5511 } 5512 5513 /** 5514 * Checks consistency of the class state. 5515 * This is costly so only runs if asserts are enabled. 5516 */ 5517 private void doConsistencyCheck() { 5518 boolean assertsOn = false; 5519 assert assertsOn = true; // set to true if asserts are on 5520 if (!assertsOn) return; 5521 5522 if (blockTotal == -1 && blockSafe == -1) { 5523 return; // manual safe mode 5524 } 5525 int activeBlocks = blockManager.getActiveBlockCount(); 5526 if ((blockTotal != activeBlocks) && 5527 !(blockSafe >= 0 && blockSafe <= blockTotal)) { 5528 throw new AssertionError( 5529 " SafeMode: Inconsistent filesystem state: " 5530 + "SafeMode data: blockTotal=" + blockTotal 5531 + " blockSafe=" + blockSafe + "; " 5532 + "BlockManager data: active=" + activeBlocks); 5533 } 5534 } 5535 5536 private synchronized void adjustBlockTotals(int deltaSafe, int deltaTotal) { 5537 if (!shouldIncrementallyTrackBlocks) { 5538 return; 5539 } 5540 assert haEnabled; 5541 5542 if (LOG.isDebugEnabled()) { 5543 LOG.debug("Adjusting block totals from " + 5544 blockSafe + "/" + blockTotal + " to " + 5545 (blockSafe + deltaSafe) + "/" + (blockTotal + deltaTotal)); 5546 } 5547 assert blockSafe + deltaSafe >= 0 : "Can't reduce blockSafe " + 5548 blockSafe + " by " + deltaSafe + ": would be negative"; 5549 assert blockTotal + deltaTotal >= 0 : "Can't reduce blockTotal " + 5550 blockTotal + " by " + deltaTotal + ": would be negative"; 5551 5552 blockSafe += deltaSafe; 5553 setBlockTotal(blockTotal + deltaTotal); 5554 } 5555 } 5556 5557 /** 5558 * Periodically check whether it is time to leave safe mode. 5559 * This thread starts when the threshold level is reached. 5560 * 5561 */ 5562 class SafeModeMonitor implements Runnable { 5563 /** interval in msec for checking safe mode: {@value} */ 5564 private static final long recheckInterval = 1000; 5565 5566 /** 5567 */ 5568 @Override 5569 public void run() { 5570 while (fsRunning) { 5571 writeLock(); 5572 try { 5573 if (safeMode == null) { // Not in safe mode. 5574 break; 5575 } 5576 if (safeMode.canLeave()) { 5577 // Leave safe mode. 5578 safeMode.leave(); 5579 smmthread = null; 5580 break; 5581 } 5582 } finally { 5583 writeUnlock(); 5584 } 5585 5586 try { 5587 Thread.sleep(recheckInterval); 5588 } catch (InterruptedException ie) { 5589 // Ignored 5590 } 5591 } 5592 if (!fsRunning) { 5593 LOG.info("NameNode is being shutdown, exit SafeModeMonitor thread"); 5594 } 5595 } 5596 } 5597 5598 boolean setSafeMode(SafeModeAction action) throws IOException { 5599 if (action != SafeModeAction.SAFEMODE_GET) { 5600 checkSuperuserPrivilege(); 5601 switch(action) { 5602 case SAFEMODE_LEAVE: // leave safe mode 5603 leaveSafeMode(); 5604 break; 5605 case SAFEMODE_ENTER: // enter safe mode 5606 enterSafeMode(false); 5607 break; 5608 default: 5609 LOG.error("Unexpected safe mode action"); 5610 } 5611 } 5612 return isInSafeMode(); 5613 } 5614 5615 @Override 5616 public void checkSafeMode() { 5617 // safeMode is volatile, and may be set to null at any time 5618 SafeModeInfo safeMode = this.safeMode; 5619 if (safeMode != null) { 5620 safeMode.checkMode(); 5621 } 5622 } 5623 5624 @Override 5625 public boolean isInSafeMode() { 5626 // safeMode is volatile, and may be set to null at any time 5627 SafeModeInfo safeMode = this.safeMode; 5628 if (safeMode == null) 5629 return false; 5630 return safeMode.isOn(); 5631 } 5632 5633 @Override 5634 public boolean isInStartupSafeMode() { 5635 // safeMode is volatile, and may be set to null at any time 5636 SafeModeInfo safeMode = this.safeMode; 5637 if (safeMode == null) 5638 return false; 5639 // If the NN is in safemode, and not due to manual / low resources, we 5640 // assume it must be because of startup. If the NN had low resources during 5641 // startup, we assume it came out of startup safemode and it is now in low 5642 // resources safemode 5643 return !safeMode.isManual() && !safeMode.areResourcesLow() 5644 && safeMode.isOn(); 5645 } 5646 5647 /** 5648 * Check if replication queues are to be populated 5649 * @return true when node is HAState.Active and not in the very first safemode 5650 */ 5651 @Override 5652 public boolean isPopulatingReplQueues() { 5653 if (!shouldPopulateReplQueues()) { 5654 return false; 5655 } 5656 return initializedReplQueues; 5657 } 5658 5659 private boolean shouldPopulateReplQueues() { 5660 if(haContext == null || haContext.getState() == null) 5661 return false; 5662 return haContext.getState().shouldPopulateReplQueues(); 5663 } 5664 5665 @Override 5666 public void incrementSafeBlockCount(int replication) { 5667 // safeMode is volatile, and may be set to null at any time 5668 SafeModeInfo safeMode = this.safeMode; 5669 if (safeMode == null) 5670 return; 5671 safeMode.incrementSafeBlockCount((short)replication); 5672 } 5673 5674 @Override 5675 public void decrementSafeBlockCount(Block b) { 5676 // safeMode is volatile, and may be set to null at any time 5677 SafeModeInfo safeMode = this.safeMode; 5678 if (safeMode == null) // mostly true 5679 return; 5680 BlockInfoContiguous storedBlock = getStoredBlock(b); 5681 if (storedBlock.isComplete()) { 5682 safeMode.decrementSafeBlockCount((short)blockManager.countNodes(b).liveReplicas()); 5683 } 5684 } 5685 5686 /** 5687 * Adjust the total number of blocks safe and expected during safe mode. 5688 * If safe mode is not currently on, this is a no-op. 5689 * @param deltaSafe the change in number of safe blocks 5690 * @param deltaTotal the change i nnumber of total blocks expected 5691 */ 5692 @Override 5693 public void adjustSafeModeBlockTotals(int deltaSafe, int deltaTotal) { 5694 // safeMode is volatile, and may be set to null at any time 5695 SafeModeInfo safeMode = this.safeMode; 5696 if (safeMode == null) 5697 return; 5698 safeMode.adjustBlockTotals(deltaSafe, deltaTotal); 5699 } 5700 5701 /** 5702 * Set the total number of blocks in the system. 5703 */ 5704 public void setBlockTotal() { 5705 // safeMode is volatile, and may be set to null at any time 5706 SafeModeInfo safeMode = this.safeMode; 5707 if (safeMode == null) 5708 return; 5709 safeMode.setBlockTotal((int)getCompleteBlocksTotal()); 5710 } 5711 5712 /** 5713 * Get the total number of blocks in the system. 5714 */ 5715 @Override // FSNamesystemMBean 5716 @Metric 5717 public long getBlocksTotal() { 5718 return blockManager.getTotalBlocks(); 5719 } 5720 5721 /** 5722 * Get the total number of COMPLETE blocks in the system. 5723 * For safe mode only complete blocks are counted. 5724 */ 5725 private long getCompleteBlocksTotal() { 5726 // Calculate number of blocks under construction 5727 long numUCBlocks = 0; 5728 readLock(); 5729 numUCBlocks = leaseManager.getNumUnderConstructionBlocks(); 5730 try { 5731 return getBlocksTotal() - numUCBlocks; 5732 } finally { 5733 readUnlock("getCompleteBlocksTotal"); 5734 } 5735 } 5736 5737 /** 5738 * Enter safe mode. If resourcesLow is false, then we assume it is manual 5739 * @throws IOException 5740 */ 5741 void enterSafeMode(boolean resourcesLow) throws IOException { 5742 writeLock(); 5743 try { 5744 // Stop the secret manager, since rolling the master key would 5745 // try to write to the edit log 5746 stopSecretManager(); 5747 5748 // Ensure that any concurrent operations have been fully synced 5749 // before entering safe mode. This ensures that the FSImage 5750 // is entirely stable on disk as soon as we're in safe mode. 5751 boolean isEditlogOpenForWrite = getEditLog().isOpenForWrite(); 5752 // Before Editlog is in OpenForWrite mode, editLogStream will be null. So, 5753 // logSyncAll call can be called only when Edlitlog is in OpenForWrite mode 5754 if (isEditlogOpenForWrite) { 5755 getEditLog().logSyncAll(); 5756 } 5757 if (!isInSafeMode()) { 5758 safeMode = new SafeModeInfo(resourcesLow); 5759 return; 5760 } 5761 if (resourcesLow) { 5762 safeMode.setResourcesLow(); 5763 } else { 5764 safeMode.setManual(); 5765 } 5766 if (isEditlogOpenForWrite) { 5767 getEditLog().logSyncAll(); 5768 } 5769 NameNode.stateChangeLog.info("STATE* Safe mode is ON" 5770 + safeMode.getTurnOffTip()); 5771 } finally { 5772 writeUnlock("enterSafeMode"); 5773 } 5774 } 5775 5776 /** 5777 * Leave safe mode. 5778 */ 5779 void leaveSafeMode() { 5780 writeLock(); 5781 try { 5782 if (!isInSafeMode()) { 5783 NameNode.stateChangeLog.info("STATE* Safe mode is already OFF"); 5784 return; 5785 } 5786 safeMode.leave(); 5787 } finally { 5788 writeUnlock("leaveSafeMode"); 5789 } 5790 } 5791 5792 String getSafeModeTip() { 5793 // There is no need to take readLock. 5794 // Don't use isInSafeMode as this.safeMode might be set to null. 5795 // after isInSafeMode returns. 5796 boolean inSafeMode; 5797 SafeModeInfo safeMode = this.safeMode; 5798 if (safeMode == null) { 5799 inSafeMode = false; 5800 } else { 5801 inSafeMode = safeMode.isOn(); 5802 } 5803 5804 if (!inSafeMode) { 5805 return ""; 5806 } else { 5807 return safeMode.getTurnOffTip(); 5808 } 5809 } 5810 5811 CheckpointSignature rollEditLog() throws IOException { 5812 checkSuperuserPrivilege(); 5813 checkOperation(OperationCategory.JOURNAL); 5814 writeLock(); 5815 try { 5816 checkOperation(OperationCategory.JOURNAL); 5817 checkNameNodeSafeMode("Log not rolled"); 5818 if (Server.isRpcInvocation()) { 5819 LOG.info("Roll Edit Log from " + Server.getRemoteAddress()); 5820 } 5821 return getFSImage().rollEditLog(); 5822 } finally { 5823 writeUnlock("rollEditLog"); 5824 } 5825 } 5826 5827 NamenodeCommand startCheckpoint(NamenodeRegistration backupNode, 5828 NamenodeRegistration activeNamenode) throws IOException { 5829 checkOperation(OperationCategory.CHECKPOINT); 5830 writeLock(); 5831 try { 5832 checkOperation(OperationCategory.CHECKPOINT); 5833 checkNameNodeSafeMode("Checkpoint not started"); 5834 5835 LOG.info("Start checkpoint for " + backupNode.getAddress()); 5836 NamenodeCommand cmd = getFSImage().startCheckpoint(backupNode, 5837 activeNamenode); 5838 getEditLog().logSync(); 5839 return cmd; 5840 } finally { 5841 writeUnlock("startCheckpoint"); 5842 } 5843 } 5844 5845 public void processIncrementalBlockReport(final DatanodeID nodeID, 5846 final StorageReceivedDeletedBlocks srdb) 5847 throws IOException { 5848 writeLock(); 5849 try { 5850 blockManager.processIncrementalBlockReport(nodeID, srdb); 5851 } finally { 5852 writeUnlock("processIncrementalBlockReport"); 5853 } 5854 } 5855 5856 void endCheckpoint(NamenodeRegistration registration, 5857 CheckpointSignature sig) throws IOException { 5858 checkOperation(OperationCategory.CHECKPOINT); 5859 readLock(); 5860 try { 5861 checkOperation(OperationCategory.CHECKPOINT); 5862 checkNameNodeSafeMode("Checkpoint not ended"); 5863 LOG.info("End checkpoint for " + registration.getAddress()); 5864 getFSImage().endCheckpoint(sig); 5865 } finally { 5866 readUnlock("endCheckpoint"); 5867 } 5868 } 5869 5870 PermissionStatus createFsOwnerPermissions(FsPermission permission) { 5871 return new PermissionStatus(fsOwner.getShortUserName(), supergroup, permission); 5872 } 5873 5874 private void checkUnreadableBySuperuser(FSPermissionChecker pc, 5875 INode inode, int snapshotId) 5876 throws IOException { 5877 if (pc.isSuperUser()) { 5878 for (XAttr xattr : FSDirXAttrOp.getXAttrs(dir, inode, snapshotId)) { 5879 if (XAttrHelper.getPrefixName(xattr). 5880 equals(SECURITY_XATTR_UNREADABLE_BY_SUPERUSER)) { 5881 throw new AccessControlException("Access is denied for " + 5882 pc.getUser() + " since the superuser is not allowed to " + 5883 "perform this operation."); 5884 } 5885 } 5886 } 5887 } 5888 5889 @Override 5890 public void checkSuperuserPrivilege() 5891 throws AccessControlException { 5892 if (isPermissionEnabled) { 5893 FSPermissionChecker pc = getPermissionChecker(); 5894 pc.checkSuperuserPrivilege(); 5895 } 5896 } 5897 5898 /** 5899 * Check to see if we have exceeded the limit on the number 5900 * of inodes. 5901 */ 5902 void checkFsObjectLimit() throws IOException { 5903 if (maxFsObjects != 0 && 5904 maxFsObjects <= dir.totalInodes() + getBlocksTotal()) { 5905 throw new IOException("Exceeded the configured number of objects " + 5906 maxFsObjects + " in the filesystem."); 5907 } 5908 } 5909 5910 /** 5911 * Get the total number of objects in the system. 5912 */ 5913 @Override // FSNamesystemMBean 5914 public long getMaxObjects() { 5915 return maxFsObjects; 5916 } 5917 5918 @Override // FSNamesystemMBean 5919 @Metric 5920 public long getFilesTotal() { 5921 // There is no need to take fSNamesystem's lock as 5922 // FSDirectory has its own lock. 5923 return this.dir.totalInodes(); 5924 } 5925 5926 @Override // FSNamesystemMBean 5927 @Metric 5928 public long getPendingReplicationBlocks() { 5929 return blockManager.getPendingReplicationBlocksCount(); 5930 } 5931 5932 @Override // FSNamesystemMBean 5933 @Metric 5934 public long getUnderReplicatedBlocks() { 5935 return blockManager.getUnderReplicatedBlocksCount(); 5936 } 5937 5938 /** Returns number of blocks with corrupt replicas */ 5939 @Metric({"CorruptBlocks", "Number of blocks with corrupt replicas"}) 5940 public long getCorruptReplicaBlocks() { 5941 return blockManager.getCorruptReplicaBlocksCount(); 5942 } 5943 5944 @Override // FSNamesystemMBean 5945 @Metric 5946 public long getScheduledReplicationBlocks() { 5947 return blockManager.getScheduledReplicationBlocksCount(); 5948 } 5949 5950 @Override 5951 @Metric 5952 public long getPendingDeletionBlocks() { 5953 return blockManager.getPendingDeletionBlocksCount(); 5954 } 5955 5956 @Override 5957 public long getBlockDeletionStartTime() { 5958 return startTime + blockManager.getStartupDelayBlockDeletionInMs(); 5959 } 5960 5961 @Metric 5962 public long getExcessBlocks() { 5963 return blockManager.getExcessBlocksCount(); 5964 } 5965 5966 // HA-only metric 5967 @Metric 5968 public long getPostponedMisreplicatedBlocks() { 5969 return blockManager.getPostponedMisreplicatedBlocksCount(); 5970 } 5971 5972 // HA-only metric 5973 @Metric 5974 public int getPendingDataNodeMessageCount() { 5975 return blockManager.getPendingDataNodeMessageCount(); 5976 } 5977 5978 // HA-only metric 5979 @Metric 5980 public String getHAState() { 5981 return haContext.getState().toString(); 5982 } 5983 5984 // HA-only metric 5985 @Metric 5986 public long getMillisSinceLastLoadedEdits() { 5987 if (isInStandbyState() && editLogTailer != null) { 5988 return monotonicNow() - editLogTailer.getLastLoadTimeMs(); 5989 } else { 5990 return 0; 5991 } 5992 } 5993 5994 @Metric 5995 public int getBlockCapacity() { 5996 return blockManager.getCapacity(); 5997 } 5998 5999 @Override // FSNamesystemMBean 6000 public String getFSState() { 6001 return isInSafeMode() ? "safeMode" : "Operational"; 6002 } 6003 6004 private ObjectName mbeanName; 6005 private ObjectName mxbeanName; 6006 6007 /** 6008 * Register the FSNamesystem MBean using the name 6009 * "hadoop:service=NameNode,name=FSNamesystemState" 6010 */ 6011 private void registerMBean() { 6012 // We can only implement one MXBean interface, so we keep the old one. 6013 try { 6014 StandardMBean bean = new StandardMBean(this, FSNamesystemMBean.class); 6015 mbeanName = MBeans.register("NameNode", "FSNamesystemState", bean); 6016 } catch (NotCompliantMBeanException e) { 6017 throw new RuntimeException("Bad MBean setup", e); 6018 } 6019 6020 LOG.info("Registered FSNamesystemState MBean"); 6021 } 6022 6023 /** 6024 * shutdown FSNamesystem 6025 */ 6026 void shutdown() { 6027 if (snapshotManager != null) { 6028 snapshotManager.shutdown(); 6029 } 6030 if (mbeanName != null) { 6031 MBeans.unregister(mbeanName); 6032 mbeanName = null; 6033 } 6034 if (mxbeanName != null) { 6035 MBeans.unregister(mxbeanName); 6036 mxbeanName = null; 6037 } 6038 if (dir != null) { 6039 dir.shutdown(); 6040 } 6041 if (blockManager != null) { 6042 blockManager.shutdown(); 6043 } 6044 } 6045 6046 @Override // FSNamesystemMBean 6047 @Metric({"NumLiveDataNodes", "Number of datanodes which are currently live"}) 6048 public int getNumLiveDataNodes() { 6049 return getBlockManager().getDatanodeManager().getNumLiveDataNodes(); 6050 } 6051 6052 @Override // FSNamesystemMBean 6053 @Metric({"NumDeadDataNodes", "Number of datanodes which are currently dead"}) 6054 public int getNumDeadDataNodes() { 6055 return getBlockManager().getDatanodeManager().getNumDeadDataNodes(); 6056 } 6057 6058 @Override // FSNamesystemMBean 6059 @Metric({"NumDecomLiveDataNodes", 6060 "Number of datanodes which have been decommissioned and are now live"}) 6061 public int getNumDecomLiveDataNodes() { 6062 final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>(); 6063 getBlockManager().getDatanodeManager().fetchDatanodes(live, null, false); 6064 int liveDecommissioned = 0; 6065 for (DatanodeDescriptor node : live) { 6066 liveDecommissioned += node.isDecommissioned() ? 1 : 0; 6067 } 6068 return liveDecommissioned; 6069 } 6070 6071 @Override // FSNamesystemMBean 6072 @Metric({"NumDecomDeadDataNodes", 6073 "Number of datanodes which have been decommissioned and are now dead"}) 6074 public int getNumDecomDeadDataNodes() { 6075 final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>(); 6076 getBlockManager().getDatanodeManager().fetchDatanodes(null, dead, false); 6077 int deadDecommissioned = 0; 6078 for (DatanodeDescriptor node : dead) { 6079 deadDecommissioned += node.isDecommissioned() ? 1 : 0; 6080 } 6081 return deadDecommissioned; 6082 } 6083 6084 @Override // FSNamesystemMBean 6085 @Metric({"VolumeFailuresTotal", 6086 "Total number of volume failures across all Datanodes"}) 6087 public int getVolumeFailuresTotal() { 6088 List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>(); 6089 getBlockManager().getDatanodeManager().fetchDatanodes(live, null, false); 6090 int volumeFailuresTotal = 0; 6091 for (DatanodeDescriptor node: live) { 6092 volumeFailuresTotal += node.getVolumeFailures(); 6093 } 6094 return volumeFailuresTotal; 6095 } 6096 6097 @Override // FSNamesystemMBean 6098 @Metric({"EstimatedCapacityLostTotal", 6099 "An estimate of the total capacity lost due to volume failures"}) 6100 public long getEstimatedCapacityLostTotal() { 6101 List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>(); 6102 getBlockManager().getDatanodeManager().fetchDatanodes(live, null, false); 6103 long estimatedCapacityLostTotal = 0; 6104 for (DatanodeDescriptor node: live) { 6105 VolumeFailureSummary volumeFailureSummary = node.getVolumeFailureSummary(); 6106 if (volumeFailureSummary != null) { 6107 estimatedCapacityLostTotal += 6108 volumeFailureSummary.getEstimatedCapacityLostTotal(); 6109 } 6110 } 6111 return estimatedCapacityLostTotal; 6112 } 6113 6114 @Override // FSNamesystemMBean 6115 @Metric({"NumDecommissioningDataNodes", 6116 "Number of datanodes in decommissioning state"}) 6117 public int getNumDecommissioningDataNodes() { 6118 return getBlockManager().getDatanodeManager().getDecommissioningNodes() 6119 .size(); 6120 } 6121 6122 @Override // FSNamesystemMBean 6123 @Metric({"StaleDataNodes", 6124 "Number of datanodes marked stale due to delayed heartbeat"}) 6125 public int getNumStaleDataNodes() { 6126 return getBlockManager().getDatanodeManager().getNumStaleNodes(); 6127 } 6128 6129 /** 6130 * Storages are marked as "content stale" after NN restart or fails over and 6131 * before NN receives the first Heartbeat followed by the first Blockreport. 6132 */ 6133 @Override // FSNamesystemMBean 6134 @Metric({"NumStaleStorages", 6135 "Number of storages marked as content stale"}) 6136 public int getNumStaleStorages() { 6137 return getBlockManager().getDatanodeManager().getNumStaleStorages(); 6138 } 6139 6140 @Override // FSNamesystemMBean 6141 public String getTopUserOpCounts() { 6142 if (!topConf.isEnabled) { 6143 return null; 6144 } 6145 6146 Date now = new Date(); 6147 final List<RollingWindowManager.TopWindow> topWindows = 6148 topMetrics.getTopWindows(); 6149 Map<String, Object> topMap = new TreeMap<String, Object>(); 6150 topMap.put("windows", topWindows); 6151 topMap.put("timestamp", DFSUtil.dateToIso8601String(now)); 6152 ObjectMapper mapper = new ObjectMapper(); 6153 try { 6154 return mapper.writeValueAsString(topMap); 6155 } catch (IOException e) { 6156 LOG.warn("Failed to fetch TopUser metrics", e); 6157 } 6158 return null; 6159 } 6160 6161 /** 6162 * Increments, logs and then returns the stamp 6163 */ 6164 long nextGenerationStamp(boolean legacyBlock) 6165 throws IOException, SafeModeException { 6166 assert hasWriteLock(); 6167 checkNameNodeSafeMode("Cannot get next generation stamp"); 6168 6169 long gs = blockIdManager.nextGenerationStamp(legacyBlock); 6170 if (legacyBlock) { 6171 getEditLog().logGenerationStampV1(gs); 6172 } else { 6173 getEditLog().logGenerationStampV2(gs); 6174 } 6175 6176 // NB: callers sync the log 6177 return gs; 6178 } 6179 6180 /** 6181 * Increments, logs and then returns the block ID 6182 */ 6183 private long nextBlockId() throws IOException { 6184 assert hasWriteLock(); 6185 checkNameNodeSafeMode("Cannot get next block ID"); 6186 final long blockId = blockIdManager.nextBlockId(); 6187 getEditLog().logAllocateBlockId(blockId); 6188 // NB: callers sync the log 6189 return blockId; 6190 } 6191 6192 private boolean isFileDeleted(INodeFile file) { 6193 // Not in the inodeMap or in the snapshot but marked deleted. 6194 if (dir.getInode(file.getId()) == null) { 6195 return true; 6196 } 6197 6198 // look at the path hierarchy to see if one parent is deleted by recursive 6199 // deletion 6200 INode tmpChild = file; 6201 INodeDirectory tmpParent = file.getParent(); 6202 while (true) { 6203 if (tmpParent == null) { 6204 return true; 6205 } 6206 6207 INode childINode = tmpParent.getChild(tmpChild.getLocalNameBytes(), 6208 Snapshot.CURRENT_STATE_ID); 6209 if (childINode == null || !childINode.equals(tmpChild)) { 6210 // a newly created INode with the same name as an already deleted one 6211 // would be a different INode than the deleted one 6212 return true; 6213 } 6214 6215 if (tmpParent.isRoot()) { 6216 break; 6217 } 6218 6219 tmpChild = tmpParent; 6220 tmpParent = tmpParent.getParent(); 6221 } 6222 6223 if (file.isWithSnapshot() && 6224 file.getFileWithSnapshotFeature().isCurrentFileDeleted()) { 6225 return true; 6226 } 6227 return false; 6228 } 6229 6230 private INodeFile checkUCBlock(ExtendedBlock block, 6231 String clientName) throws IOException { 6232 assert hasWriteLock(); 6233 checkNameNodeSafeMode("Cannot get a new generation stamp and an " 6234 + "access token for block " + block); 6235 6236 // check stored block state 6237 BlockInfoContiguous storedBlock = getStoredBlock(ExtendedBlock.getLocalBlock(block)); 6238 if (storedBlock == null || 6239 storedBlock.getBlockUCState() != BlockUCState.UNDER_CONSTRUCTION) { 6240 throw new IOException(block + 6241 " does not exist or is not under Construction" + storedBlock); 6242 } 6243 6244 // check file inode 6245 final INodeFile file = ((INode)storedBlock.getBlockCollection()).asFile(); 6246 if (file == null || !file.isUnderConstruction() || isFileDeleted(file)) { 6247 throw new IOException("The file " + storedBlock + 6248 " belonged to does not exist or it is not under construction."); 6249 } 6250 6251 // check lease 6252 if (clientName == null 6253 || !clientName.equals(file.getFileUnderConstructionFeature() 6254 .getClientName())) { 6255 throw new LeaseExpiredException("Lease mismatch: " + block + 6256 " is accessed by a non lease holder " + clientName); 6257 } 6258 6259 return file; 6260 } 6261 6262 /** 6263 * Client is reporting some bad block locations. 6264 */ 6265 void reportBadBlocks(LocatedBlock[] blocks) throws IOException { 6266 checkOperation(OperationCategory.WRITE); 6267 writeLock(); 6268 try { 6269 checkOperation(OperationCategory.WRITE); 6270 for (int i = 0; i < blocks.length; i++) { 6271 ExtendedBlock blk = blocks[i].getBlock(); 6272 DatanodeInfo[] nodes = blocks[i].getLocations(); 6273 String[] storageIDs = blocks[i].getStorageIDs(); 6274 for (int j = 0; j < nodes.length; j++) { 6275 NameNode.stateChangeLog.info("*DIR* reportBadBlocks for block: {} on" 6276 + " datanode: {}", blk, nodes[j].getXferAddr()); 6277 blockManager.findAndMarkBlockAsCorrupt(blk, nodes[j], 6278 storageIDs == null ? null: storageIDs[j], 6279 "client machine reported it"); 6280 } 6281 } 6282 } finally { 6283 writeUnlock("reportBadBlocks"); 6284 } 6285 } 6286 6287 /** 6288 * Get a new generation stamp together with an access token for 6289 * a block under construction 6290 * 6291 * This method is called for recovering a failed pipeline or setting up 6292 * a pipeline to append to a block. 6293 * 6294 * @param block a block 6295 * @param clientName the name of a client 6296 * @return a located block with a new generation stamp and an access token 6297 * @throws IOException if any error occurs 6298 */ 6299 LocatedBlock updateBlockForPipeline(ExtendedBlock block, 6300 String clientName) throws IOException { 6301 LocatedBlock locatedBlock; 6302 checkOperation(OperationCategory.WRITE); 6303 writeLock(); 6304 try { 6305 checkOperation(OperationCategory.WRITE); 6306 6307 // check vadility of parameters 6308 checkUCBlock(block, clientName); 6309 6310 // get a new generation stamp and an access token 6311 block.setGenerationStamp(nextGenerationStamp(blockIdManager.isLegacyBlock(block.getLocalBlock()))); 6312 locatedBlock = new LocatedBlock(block, new DatanodeInfo[0]); 6313 blockManager.setBlockToken(locatedBlock, AccessMode.WRITE); 6314 } finally { 6315 writeUnlock("bumpBlockGenerationStamp"); 6316 } 6317 // Ensure we record the new generation stamp 6318 getEditLog().logSync(); 6319 return locatedBlock; 6320 } 6321 6322 /** 6323 * Update a pipeline for a block under construction 6324 * 6325 * @param clientName the name of the client 6326 * @param oldBlock and old block 6327 * @param newBlock a new block with a new generation stamp and length 6328 * @param newNodes datanodes in the pipeline 6329 * @throws IOException if any error occurs 6330 */ 6331 void updatePipeline( 6332 String clientName, ExtendedBlock oldBlock, ExtendedBlock newBlock, 6333 DatanodeID[] newNodes, String[] newStorageIDs, boolean logRetryCache) 6334 throws IOException { 6335 checkOperation(OperationCategory.WRITE); 6336 LOG.info("updatePipeline(" + oldBlock.getLocalBlock() 6337 + ", newGS=" + newBlock.getGenerationStamp() 6338 + ", newLength=" + newBlock.getNumBytes() 6339 + ", newNodes=" + Arrays.asList(newNodes) 6340 + ", client=" + clientName 6341 + ")"); 6342 waitForLoadingFSImage(); 6343 writeLock(); 6344 try { 6345 checkOperation(OperationCategory.WRITE); 6346 checkNameNodeSafeMode("Pipeline not updated"); 6347 assert newBlock.getBlockId()==oldBlock.getBlockId() : newBlock + " and " 6348 + oldBlock + " has different block identifier"; 6349 updatePipelineInternal(clientName, oldBlock, newBlock, newNodes, 6350 newStorageIDs, logRetryCache); 6351 } finally { 6352 writeUnlock("updatePipeline"); 6353 } 6354 getEditLog().logSync(); 6355 LOG.info("updatePipeline(" + oldBlock.getLocalBlock() + " => " 6356 + newBlock.getLocalBlock() + ") success"); 6357 } 6358 6359 private void updatePipelineInternal(String clientName, ExtendedBlock oldBlock, 6360 ExtendedBlock newBlock, DatanodeID[] newNodes, String[] newStorageIDs, 6361 boolean logRetryCache) 6362 throws IOException { 6363 assert hasWriteLock(); 6364 // check the vadility of the block and lease holder name 6365 final INodeFile pendingFile = checkUCBlock(oldBlock, clientName); 6366 final String src = pendingFile.getFullPathName(); 6367 final BlockInfoContiguousUnderConstruction blockinfo 6368 = (BlockInfoContiguousUnderConstruction)pendingFile.getLastBlock(); 6369 6370 // check new GS & length: this is not expected 6371 if (newBlock.getGenerationStamp() <= blockinfo.getGenerationStamp() || 6372 newBlock.getNumBytes() < blockinfo.getNumBytes()) { 6373 String msg = "Update " + oldBlock + " (len = " + 6374 blockinfo.getNumBytes() + ") to an older state: " + newBlock + 6375 " (len = " + newBlock.getNumBytes() +")"; 6376 LOG.warn(msg); 6377 throw new IOException(msg); 6378 } 6379 6380 // Update old block with the new generation stamp and new length 6381 blockManager.updateLastBlock(blockinfo, newBlock); 6382 6383 // find the DatanodeDescriptor objects 6384 final DatanodeStorageInfo[] storages = blockManager.getDatanodeManager() 6385 .getDatanodeStorageInfos(newNodes, newStorageIDs, 6386 "src=%s, oldBlock=%s, newBlock=%s, clientName=%s", 6387 src, oldBlock, newBlock, clientName); 6388 blockinfo.setExpectedLocations(storages); 6389 6390 persistBlocks(src, pendingFile, logRetryCache); 6391 } 6392 6393 // rename was successful. If any part of the renamed subtree had 6394 // files that were being written to, update with new filename. 6395 void unprotectedChangeLease(String src, String dst) { 6396 assert hasWriteLock(); 6397 leaseManager.changeLease(src, dst); 6398 } 6399 6400 /** 6401 * Serializes leases. 6402 */ 6403 void saveFilesUnderConstruction(DataOutputStream out, 6404 Map<Long, INodeFile> snapshotUCMap) throws IOException { 6405 // This is run by an inferior thread of saveNamespace, which holds a read 6406 // lock on our behalf. If we took the read lock here, we could block 6407 // for fairness if a writer is waiting on the lock. 6408 synchronized (leaseManager) { 6409 Map<String, INodeFile> nodes = leaseManager.getINodesUnderConstruction(); 6410 for (Map.Entry<String, INodeFile> entry : nodes.entrySet()) { 6411 // TODO: for HDFS-5428, because of rename operations, some 6412 // under-construction files that are 6413 // in the current fs directory can also be captured in the 6414 // snapshotUCMap. We should remove them from the snapshotUCMap. 6415 snapshotUCMap.remove(entry.getValue().getId()); 6416 } 6417 6418 out.writeInt(nodes.size() + snapshotUCMap.size()); // write the size 6419 for (Map.Entry<String, INodeFile> entry : nodes.entrySet()) { 6420 FSImageSerialization.writeINodeUnderConstruction( 6421 out, entry.getValue(), entry.getKey()); 6422 } 6423 for (Map.Entry<Long, INodeFile> entry : snapshotUCMap.entrySet()) { 6424 // for those snapshot INodeFileUC, we use "/.reserved/.inodes/<inodeid>" 6425 // as their paths 6426 StringBuilder b = new StringBuilder(); 6427 b.append(FSDirectory.DOT_RESERVED_PATH_PREFIX) 6428 .append(Path.SEPARATOR).append(FSDirectory.DOT_INODES_STRING) 6429 .append(Path.SEPARATOR).append(entry.getValue().getId()); 6430 FSImageSerialization.writeINodeUnderConstruction( 6431 out, entry.getValue(), b.toString()); 6432 } 6433 } 6434 } 6435 6436 /** 6437 * @return all the under-construction files in the lease map 6438 */ 6439 Map<String, INodeFile> getFilesUnderConstruction() { 6440 synchronized (leaseManager) { 6441 return leaseManager.getINodesUnderConstruction(); 6442 } 6443 } 6444 6445 /** 6446 * Register a Backup name-node, verifying that it belongs 6447 * to the correct namespace, and adding it to the set of 6448 * active journals if necessary. 6449 * 6450 * @param bnReg registration of the new BackupNode 6451 * @param nnReg registration of this NameNode 6452 * @throws IOException if the namespace IDs do not match 6453 */ 6454 void registerBackupNode(NamenodeRegistration bnReg, 6455 NamenodeRegistration nnReg) throws IOException { 6456 writeLock(); 6457 try { 6458 if(getFSImage().getStorage().getNamespaceID() 6459 != bnReg.getNamespaceID()) 6460 throw new IOException("Incompatible namespaceIDs: " 6461 + " Namenode namespaceID = " 6462 + getFSImage().getStorage().getNamespaceID() + "; " 6463 + bnReg.getRole() + 6464 " node namespaceID = " + bnReg.getNamespaceID()); 6465 if (bnReg.getRole() == NamenodeRole.BACKUP) { 6466 getFSImage().getEditLog().registerBackupNode( 6467 bnReg, nnReg); 6468 } 6469 } finally { 6470 writeUnlock("registerBackupNode"); 6471 } 6472 } 6473 6474 /** 6475 * Release (unregister) backup node. 6476 * <p> 6477 * Find and remove the backup stream corresponding to the node. 6478 * @throws IOException 6479 */ 6480 void releaseBackupNode(NamenodeRegistration registration) 6481 throws IOException { 6482 checkOperation(OperationCategory.WRITE); 6483 writeLock(); 6484 try { 6485 checkOperation(OperationCategory.WRITE); 6486 if(getFSImage().getStorage().getNamespaceID() 6487 != registration.getNamespaceID()) 6488 throw new IOException("Incompatible namespaceIDs: " 6489 + " Namenode namespaceID = " 6490 + getFSImage().getStorage().getNamespaceID() + "; " 6491 + registration.getRole() + 6492 " node namespaceID = " + registration.getNamespaceID()); 6493 getEditLog().releaseBackupStream(registration); 6494 } finally { 6495 writeUnlock("releaseBackupNode"); 6496 } 6497 } 6498 6499 static class CorruptFileBlockInfo { 6500 final String path; 6501 final Block block; 6502 6503 public CorruptFileBlockInfo(String p, Block b) { 6504 path = p; 6505 block = b; 6506 } 6507 6508 @Override 6509 public String toString() { 6510 return block.getBlockName() + "\t" + path; 6511 } 6512 } 6513 /** 6514 * @param path Restrict corrupt files to this portion of namespace. 6515 * @param cookieTab Support for continuation; cookieTab tells where 6516 * to start from 6517 * @return a list in which each entry describes a corrupt file/block 6518 * @throws IOException 6519 */ 6520 Collection<CorruptFileBlockInfo> listCorruptFileBlocks(String path, 6521 String[] cookieTab) throws IOException { 6522 checkSuperuserPrivilege(); 6523 checkOperation(OperationCategory.READ); 6524 6525 int count = 0; 6526 ArrayList<CorruptFileBlockInfo> corruptFiles = 6527 new ArrayList<CorruptFileBlockInfo>(); 6528 if (cookieTab == null) { 6529 cookieTab = new String[] { null }; 6530 } 6531 6532 // Do a quick check if there are any corrupt files without taking the lock 6533 if (blockManager.getMissingBlocksCount() == 0) { 6534 if (cookieTab[0] == null) { 6535 cookieTab[0] = String.valueOf(getIntCookie(cookieTab[0])); 6536 } 6537 if (LOG.isDebugEnabled()) { 6538 LOG.debug("there are no corrupt file blocks."); 6539 } 6540 return corruptFiles; 6541 } 6542 6543 readLock(); 6544 try { 6545 checkOperation(OperationCategory.READ); 6546 if (!isPopulatingReplQueues()) { 6547 throw new IOException("Cannot run listCorruptFileBlocks because " + 6548 "replication queues have not been initialized."); 6549 } 6550 // print a limited # of corrupt files per call 6551 6552 final Iterator<Block> blkIterator = blockManager.getCorruptReplicaBlockIterator(); 6553 6554 int skip = getIntCookie(cookieTab[0]); 6555 for (int i = 0; i < skip && blkIterator.hasNext(); i++) { 6556 blkIterator.next(); 6557 } 6558 6559 while (blkIterator.hasNext()) { 6560 Block blk = blkIterator.next(); 6561 final INode inode = (INode)blockManager.getBlockCollection(blk); 6562 skip++; 6563 if (inode != null && blockManager.countNodes(blk).liveReplicas() == 0) { 6564 String src = inode.getFullPathName(); 6565 if (src.startsWith(path)){ 6566 corruptFiles.add(new CorruptFileBlockInfo(src, blk)); 6567 count++; 6568 if (count >= DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED) 6569 break; 6570 } 6571 } 6572 } 6573 cookieTab[0] = String.valueOf(skip); 6574 if (LOG.isDebugEnabled()) { 6575 LOG.debug("list corrupt file blocks returned: " + count); 6576 } 6577 return corruptFiles; 6578 } finally { 6579 readUnlock("listCorruptFileBlocks"); 6580 } 6581 } 6582 6583 /** 6584 * Convert string cookie to integer. 6585 */ 6586 private static int getIntCookie(String cookie){ 6587 int c; 6588 if(cookie == null){ 6589 c = 0; 6590 } else { 6591 try{ 6592 c = Integer.parseInt(cookie); 6593 }catch (NumberFormatException e) { 6594 c = 0; 6595 } 6596 } 6597 c = Math.max(0, c); 6598 return c; 6599 } 6600 6601 /** 6602 * Create delegation token secret manager 6603 */ 6604 private DelegationTokenSecretManager createDelegationTokenSecretManager( 6605 Configuration conf) { 6606 return new DelegationTokenSecretManager(conf.getLong( 6607 DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY, 6608 DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT), 6609 conf.getLong(DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY, 6610 DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT), 6611 conf.getLong(DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY, 6612 DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT), 6613 DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL, 6614 conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY, 6615 DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT), 6616 this); 6617 } 6618 6619 /** 6620 * Returns the DelegationTokenSecretManager instance in the namesystem. 6621 * @return delegation token secret manager object 6622 */ 6623 DelegationTokenSecretManager getDelegationTokenSecretManager() { 6624 return dtSecretManager; 6625 } 6626 6627 /** 6628 * @param renewer Renewer information 6629 * @return delegation toek 6630 * @throws IOException on error 6631 */ 6632 Token<DelegationTokenIdentifier> getDelegationToken(Text renewer) 6633 throws IOException { 6634 Token<DelegationTokenIdentifier> token; 6635 checkOperation(OperationCategory.WRITE); 6636 writeLock(); 6637 try { 6638 checkOperation(OperationCategory.WRITE); 6639 checkNameNodeSafeMode("Cannot issue delegation token"); 6640 if (!isAllowedDelegationTokenOp()) { 6641 throw new IOException( 6642 "Delegation Token can be issued only with kerberos or web authentication"); 6643 } 6644 if (dtSecretManager == null || !dtSecretManager.isRunning()) { 6645 LOG.warn("trying to get DT with no secret manager running"); 6646 return null; 6647 } 6648 6649 UserGroupInformation ugi = getRemoteUser(); 6650 String user = ugi.getUserName(); 6651 Text owner = new Text(user); 6652 Text realUser = null; 6653 if (ugi.getRealUser() != null) { 6654 realUser = new Text(ugi.getRealUser().getUserName()); 6655 } 6656 DelegationTokenIdentifier dtId = new DelegationTokenIdentifier(owner, 6657 renewer, realUser); 6658 token = new Token<DelegationTokenIdentifier>( 6659 dtId, dtSecretManager); 6660 long expiryTime = dtSecretManager.getTokenExpiryTime(dtId); 6661 getEditLog().logGetDelegationToken(dtId, expiryTime); 6662 } finally { 6663 writeUnlock("getDelegationToken"); 6664 } 6665 getEditLog().logSync(); 6666 return token; 6667 } 6668 6669 /** 6670 * 6671 * @param token token to renew 6672 * @return new expiryTime of the token 6673 * @throws InvalidToken if {@code token} is invalid 6674 * @throws IOException on other errors 6675 */ 6676 long renewDelegationToken(Token<DelegationTokenIdentifier> token) 6677 throws InvalidToken, IOException { 6678 long expiryTime; 6679 checkOperation(OperationCategory.WRITE); 6680 writeLock(); 6681 try { 6682 checkOperation(OperationCategory.WRITE); 6683 6684 checkNameNodeSafeMode("Cannot renew delegation token"); 6685 if (!isAllowedDelegationTokenOp()) { 6686 throw new IOException( 6687 "Delegation Token can be renewed only with kerberos or web authentication"); 6688 } 6689 String renewer = getRemoteUser().getShortUserName(); 6690 expiryTime = dtSecretManager.renewToken(token, renewer); 6691 DelegationTokenIdentifier id = new DelegationTokenIdentifier(); 6692 ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier()); 6693 DataInputStream in = new DataInputStream(buf); 6694 id.readFields(in); 6695 getEditLog().logRenewDelegationToken(id, expiryTime); 6696 } finally { 6697 writeUnlock("renewDelegationToken"); 6698 } 6699 getEditLog().logSync(); 6700 return expiryTime; 6701 } 6702 6703 /** 6704 * 6705 * @param token token to cancel 6706 * @throws IOException on error 6707 */ 6708 void cancelDelegationToken(Token<DelegationTokenIdentifier> token) 6709 throws IOException { 6710 checkOperation(OperationCategory.WRITE); 6711 writeLock(); 6712 try { 6713 checkOperation(OperationCategory.WRITE); 6714 6715 checkNameNodeSafeMode("Cannot cancel delegation token"); 6716 String canceller = getRemoteUser().getUserName(); 6717 DelegationTokenIdentifier id = dtSecretManager 6718 .cancelToken(token, canceller); 6719 getEditLog().logCancelDelegationToken(id); 6720 } finally { 6721 writeUnlock("cancelDelegationToken"); 6722 } 6723 getEditLog().logSync(); 6724 } 6725 6726 /** 6727 * @param out save state of the secret manager 6728 * @param sdPath String storage directory path 6729 */ 6730 void saveSecretManagerStateCompat(DataOutputStream out, String sdPath) 6731 throws IOException { 6732 dtSecretManager.saveSecretManagerStateCompat(out, sdPath); 6733 } 6734 6735 SecretManagerState saveSecretManagerState() { 6736 return dtSecretManager.saveSecretManagerState(); 6737 } 6738 6739 /** 6740 * @param in load the state of secret manager from input stream 6741 */ 6742 void loadSecretManagerStateCompat(DataInput in) throws IOException { 6743 dtSecretManager.loadSecretManagerStateCompat(in); 6744 } 6745 6746 void loadSecretManagerState(SecretManagerSection s, 6747 List<SecretManagerSection.DelegationKey> keys, 6748 List<SecretManagerSection.PersistToken> tokens) throws IOException { 6749 dtSecretManager.loadSecretManagerState(new SecretManagerState(s, keys, tokens)); 6750 } 6751 6752 /** 6753 * Log the updateMasterKey operation to edit logs 6754 * 6755 * @param key new delegation key. 6756 */ 6757 public void logUpdateMasterKey(DelegationKey key) { 6758 6759 assert !isInSafeMode() : 6760 "this should never be called while in safemode, since we stop " + 6761 "the DT manager before entering safemode!"; 6762 // edit log rolling is not thread-safe and must be protected by the 6763 // fsn lock. not updating namespace so read lock is sufficient. 6764 assert hasReadLock(); 6765 getEditLog().logUpdateMasterKey(key); 6766 getEditLog().logSync(); 6767 } 6768 6769 /** 6770 * Log the cancellation of expired tokens to edit logs 6771 * 6772 * @param id token identifier to cancel 6773 */ 6774 public void logExpireDelegationToken(DelegationTokenIdentifier id) { 6775 assert !isInSafeMode() : 6776 "this should never be called while in safemode, since we stop " + 6777 "the DT manager before entering safemode!"; 6778 // edit log rolling is not thread-safe and must be protected by the 6779 // fsn lock. not updating namespace so read lock is sufficient. 6780 assert hasReadLock(); 6781 // do not logSync so expiration edits are batched 6782 getEditLog().logCancelDelegationToken(id); 6783 } 6784 6785 private void logReassignLease(String leaseHolder, String src, 6786 String newHolder) { 6787 assert hasWriteLock(); 6788 getEditLog().logReassignLease(leaseHolder, src, newHolder); 6789 } 6790 6791 /** 6792 * 6793 * @return true if delegation token operation is allowed 6794 */ 6795 private boolean isAllowedDelegationTokenOp() throws IOException { 6796 AuthenticationMethod authMethod = getConnectionAuthenticationMethod(); 6797 if (UserGroupInformation.isSecurityEnabled() 6798 && (authMethod != AuthenticationMethod.KERBEROS) 6799 && (authMethod != AuthenticationMethod.KERBEROS_SSL) 6800 && (authMethod != AuthenticationMethod.CERTIFICATE)) { 6801 return false; 6802 } 6803 return true; 6804 } 6805 6806 /** 6807 * Returns authentication method used to establish the connection 6808 * @return AuthenticationMethod used to establish connection 6809 * @throws IOException 6810 */ 6811 private AuthenticationMethod getConnectionAuthenticationMethod() 6812 throws IOException { 6813 UserGroupInformation ugi = getRemoteUser(); 6814 AuthenticationMethod authMethod = ugi.getAuthenticationMethod(); 6815 if (authMethod == AuthenticationMethod.PROXY) { 6816 authMethod = ugi.getRealUser().getAuthenticationMethod(); 6817 } 6818 return authMethod; 6819 } 6820 6821 /** 6822 * Client invoked methods are invoked over RPC and will be in 6823 * RPC call context even if the client exits. 6824 */ 6825 boolean isExternalInvocation() { 6826 return Server.isRpcInvocation() || NamenodeWebHdfsMethods.isWebHdfsInvocation(); 6827 } 6828 6829 private static InetAddress getRemoteIp() { 6830 InetAddress ip = Server.getRemoteIp(); 6831 if (ip != null) { 6832 return ip; 6833 } 6834 return NamenodeWebHdfsMethods.getRemoteIp(); 6835 } 6836 6837 // optimize ugi lookup for RPC operations to avoid a trip through 6838 // UGI.getCurrentUser which is synch'ed 6839 private static UserGroupInformation getRemoteUser() throws IOException { 6840 return NameNode.getRemoteUser(); 6841 } 6842 6843 /** 6844 * Log fsck event in the audit log 6845 */ 6846 void logFsckEvent(String src, InetAddress remoteAddress) throws IOException { 6847 if (isAuditEnabled()) { 6848 logAuditEvent(true, getRemoteUser(), 6849 remoteAddress, 6850 "fsck", src, null, null); 6851 } 6852 } 6853 /** 6854 * Register NameNodeMXBean 6855 */ 6856 private void registerMXBean() { 6857 mxbeanName = MBeans.register("NameNode", "NameNodeInfo", this); 6858 } 6859 6860 /** 6861 * Class representing Namenode information for JMX interfaces 6862 */ 6863 @Override // NameNodeMXBean 6864 public String getVersion() { 6865 return VersionInfo.getVersion() + ", r" + VersionInfo.getRevision(); 6866 } 6867 6868 @Override // NameNodeMXBean 6869 public long getUsed() { 6870 return this.getCapacityUsed(); 6871 } 6872 6873 @Override // NameNodeMXBean 6874 public long getFree() { 6875 return this.getCapacityRemaining(); 6876 } 6877 6878 @Override // NameNodeMXBean 6879 public long getTotal() { 6880 return this.getCapacityTotal(); 6881 } 6882 6883 @Override // NameNodeMXBean 6884 public String getSafemode() { 6885 if (!this.isInSafeMode()) 6886 return ""; 6887 return "Safe mode is ON. " + this.getSafeModeTip(); 6888 } 6889 6890 @Override // NameNodeMXBean 6891 public boolean isUpgradeFinalized() { 6892 return this.getFSImage().isUpgradeFinalized(); 6893 } 6894 6895 @Override // NameNodeMXBean 6896 public long getNonDfsUsedSpace() { 6897 return datanodeStatistics.getCapacityUsedNonDFS(); 6898 } 6899 6900 @Override // NameNodeMXBean 6901 public float getPercentUsed() { 6902 return datanodeStatistics.getCapacityUsedPercent(); 6903 } 6904 6905 @Override // NameNodeMXBean 6906 public long getBlockPoolUsedSpace() { 6907 return datanodeStatistics.getBlockPoolUsed(); 6908 } 6909 6910 @Override // NameNodeMXBean 6911 public float getPercentBlockPoolUsed() { 6912 return datanodeStatistics.getPercentBlockPoolUsed(); 6913 } 6914 6915 @Override // NameNodeMXBean 6916 public float getPercentRemaining() { 6917 return datanodeStatistics.getCapacityRemainingPercent(); 6918 } 6919 6920 @Override // NameNodeMXBean 6921 public long getCacheCapacity() { 6922 return datanodeStatistics.getCacheCapacity(); 6923 } 6924 6925 @Override // NameNodeMXBean 6926 public long getCacheUsed() { 6927 return datanodeStatistics.getCacheUsed(); 6928 } 6929 6930 @Override // NameNodeMXBean 6931 public long getTotalBlocks() { 6932 return getBlocksTotal(); 6933 } 6934 6935 @Override // NameNodeMXBean 6936 @Metric 6937 public long getTotalFiles() { 6938 return getFilesTotal(); 6939 } 6940 6941 @Override // NameNodeMXBean 6942 public long getNumberOfMissingBlocks() { 6943 return getMissingBlocksCount(); 6944 } 6945 6946 @Override // NameNodeMXBean 6947 public long getNumberOfMissingBlocksWithReplicationFactorOne() { 6948 return getMissingReplOneBlocksCount(); 6949 } 6950 6951 @Override // NameNodeMXBean 6952 public int getThreads() { 6953 return ManagementFactory.getThreadMXBean().getThreadCount(); 6954 } 6955 6956 /** 6957 * Returned information is a JSON representation of map with host name as the 6958 * key and value is a map of live node attribute keys to its values 6959 */ 6960 @Override // NameNodeMXBean 6961 public String getLiveNodes() { 6962 final Map<String, Map<String,Object>> info = 6963 new HashMap<String, Map<String,Object>>(); 6964 final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>(); 6965 blockManager.getDatanodeManager().fetchDatanodes(live, null, false); 6966 for (DatanodeDescriptor node : live) { 6967 ImmutableMap.Builder<String, Object> innerinfo = 6968 ImmutableMap.<String,Object>builder(); 6969 innerinfo 6970 .put("infoAddr", node.getInfoAddr()) 6971 .put("infoSecureAddr", node.getInfoSecureAddr()) 6972 .put("xferaddr", node.getXferAddr()) 6973 .put("lastContact", getLastContact(node)) 6974 .put("usedSpace", getDfsUsed(node)) 6975 .put("adminState", node.getAdminState().toString()) 6976 .put("nonDfsUsedSpace", node.getNonDfsUsed()) 6977 .put("capacity", node.getCapacity()) 6978 .put("numBlocks", node.numBlocks()) 6979 .put("version", node.getSoftwareVersion()) 6980 .put("used", node.getDfsUsed()) 6981 .put("remaining", node.getRemaining()) 6982 .put("blockScheduled", node.getBlocksScheduled()) 6983 .put("blockPoolUsed", node.getBlockPoolUsed()) 6984 .put("blockPoolUsedPercent", node.getBlockPoolUsedPercent()) 6985 .put("volfails", node.getVolumeFailures()); 6986 VolumeFailureSummary volumeFailureSummary = node.getVolumeFailureSummary(); 6987 if (volumeFailureSummary != null) { 6988 innerinfo 6989 .put("failedStorageLocations", 6990 volumeFailureSummary.getFailedStorageLocations()) 6991 .put("lastVolumeFailureDate", 6992 volumeFailureSummary.getLastVolumeFailureDate()) 6993 .put("estimatedCapacityLostTotal", 6994 volumeFailureSummary.getEstimatedCapacityLostTotal()); 6995 } 6996 info.put(node.getHostName() + ":" + node.getXferPort(), innerinfo.build()); 6997 } 6998 return JSON.toString(info); 6999 } 7000 7001 /** 7002 * Returned information is a JSON representation of map with host name as the 7003 * key and value is a map of dead node attribute keys to its values 7004 */ 7005 @Override // NameNodeMXBean 7006 public String getDeadNodes() { 7007 final Map<String, Map<String, Object>> info = 7008 new HashMap<String, Map<String, Object>>(); 7009 final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>(); 7010 blockManager.getDatanodeManager().fetchDatanodes(null, dead, false); 7011 for (DatanodeDescriptor node : dead) { 7012 Map<String, Object> innerinfo = ImmutableMap.<String, Object>builder() 7013 .put("lastContact", getLastContact(node)) 7014 .put("decommissioned", node.isDecommissioned()) 7015 .put("xferaddr", node.getXferAddr()) 7016 .build(); 7017 info.put(node.getHostName() + ":" + node.getXferPort(), innerinfo); 7018 } 7019 return JSON.toString(info); 7020 } 7021 7022 /** 7023 * Returned information is a JSON representation of map with host name as the 7024 * key and value is a map of decommissioning node attribute keys to its 7025 * values 7026 */ 7027 @Override // NameNodeMXBean 7028 public String getDecomNodes() { 7029 final Map<String, Map<String, Object>> info = 7030 new HashMap<String, Map<String, Object>>(); 7031 final List<DatanodeDescriptor> decomNodeList = blockManager.getDatanodeManager( 7032 ).getDecommissioningNodes(); 7033 for (DatanodeDescriptor node : decomNodeList) { 7034 Map<String, Object> innerinfo = ImmutableMap 7035 .<String, Object> builder() 7036 .put("xferaddr", node.getXferAddr()) 7037 .put("underReplicatedBlocks", 7038 node.decommissioningStatus.getUnderReplicatedBlocks()) 7039 .put("decommissionOnlyReplicas", 7040 node.decommissioningStatus.getDecommissionOnlyReplicas()) 7041 .put("underReplicateInOpenFiles", 7042 node.decommissioningStatus.getUnderReplicatedInOpenFiles()) 7043 .build(); 7044 info.put(node.getHostName() + ":" + node.getXferPort(), innerinfo); 7045 } 7046 return JSON.toString(info); 7047 } 7048 7049 private long getLastContact(DatanodeDescriptor alivenode) { 7050 return (monotonicNow() - alivenode.getLastUpdateMonotonic())/1000; 7051 } 7052 7053 private long getDfsUsed(DatanodeDescriptor alivenode) { 7054 return alivenode.getDfsUsed(); 7055 } 7056 7057 @Override // NameNodeMXBean 7058 public String getClusterId() { 7059 return getFSImage().getStorage().getClusterID(); 7060 } 7061 7062 @Override // NameNodeMXBean 7063 public String getBlockPoolId() { 7064 return blockPoolId; 7065 } 7066 7067 @Override // NameNodeMXBean 7068 public String getNameDirStatuses() { 7069 Map<String, Map<File, StorageDirType>> statusMap = 7070 new HashMap<String, Map<File, StorageDirType>>(); 7071 7072 Map<File, StorageDirType> activeDirs = new HashMap<File, StorageDirType>(); 7073 for (Iterator<StorageDirectory> it 7074 = getFSImage().getStorage().dirIterator(); it.hasNext();) { 7075 StorageDirectory st = it.next(); 7076 activeDirs.put(st.getRoot(), st.getStorageDirType()); 7077 } 7078 statusMap.put("active", activeDirs); 7079 7080 List<Storage.StorageDirectory> removedStorageDirs 7081 = getFSImage().getStorage().getRemovedStorageDirs(); 7082 Map<File, StorageDirType> failedDirs = new HashMap<File, StorageDirType>(); 7083 for (StorageDirectory st : removedStorageDirs) { 7084 failedDirs.put(st.getRoot(), st.getStorageDirType()); 7085 } 7086 statusMap.put("failed", failedDirs); 7087 7088 return JSON.toString(statusMap); 7089 } 7090 7091 @Override // NameNodeMXBean 7092 public String getNodeUsage() { 7093 float median = 0; 7094 float max = 0; 7095 float min = 0; 7096 float dev = 0; 7097 7098 final Map<String, Map<String,Object>> info = 7099 new HashMap<String, Map<String,Object>>(); 7100 final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>(); 7101 blockManager.getDatanodeManager().fetchDatanodes(live, null, true); 7102 7103 if (live.size() > 0) { 7104 float totalDfsUsed = 0; 7105 float[] usages = new float[live.size()]; 7106 int i = 0; 7107 for (DatanodeDescriptor dn : live) { 7108 usages[i++] = dn.getDfsUsedPercent(); 7109 totalDfsUsed += dn.getDfsUsedPercent(); 7110 } 7111 totalDfsUsed /= live.size(); 7112 Arrays.sort(usages); 7113 median = usages[usages.length / 2]; 7114 max = usages[usages.length - 1]; 7115 min = usages[0]; 7116 7117 for (i = 0; i < usages.length; i++) { 7118 dev += (usages[i] - totalDfsUsed) * (usages[i] - totalDfsUsed); 7119 } 7120 dev = (float) Math.sqrt(dev / usages.length); 7121 } 7122 7123 final Map<String, Object> innerInfo = new HashMap<String, Object>(); 7124 innerInfo.put("min", StringUtils.format("%.2f%%", min)); 7125 innerInfo.put("median", StringUtils.format("%.2f%%", median)); 7126 innerInfo.put("max", StringUtils.format("%.2f%%", max)); 7127 innerInfo.put("stdDev", StringUtils.format("%.2f%%", dev)); 7128 info.put("nodeUsage", innerInfo); 7129 7130 return JSON.toString(info); 7131 } 7132 7133 @Override // NameNodeMXBean 7134 public String getNameJournalStatus() { 7135 List<Map<String, String>> jasList = new ArrayList<Map<String, String>>(); 7136 FSEditLog log = getFSImage().getEditLog(); 7137 if (log != null) { 7138 // This flag can be false because we cannot hold a lock of FSEditLog 7139 // for metrics. 7140 boolean openForWrite = log.isOpenForWriteWithoutLock(); 7141 for (JournalAndStream jas : log.getJournals()) { 7142 final Map<String, String> jasMap = new HashMap<String, String>(); 7143 String manager = jas.getManager().toString(); 7144 7145 jasMap.put("required", String.valueOf(jas.isRequired())); 7146 jasMap.put("disabled", String.valueOf(jas.isDisabled())); 7147 jasMap.put("manager", manager); 7148 7149 if (jas.isDisabled()) { 7150 jasMap.put("stream", "Failed"); 7151 } else if (openForWrite) { 7152 EditLogOutputStream elos = jas.getCurrentStream(); 7153 if (elos != null) { 7154 jasMap.put("stream", elos.generateReport()); 7155 } else { 7156 jasMap.put("stream", "not currently writing"); 7157 } 7158 } else { 7159 jasMap.put("stream", "open for read"); 7160 } 7161 jasList.add(jasMap); 7162 } 7163 } 7164 return JSON.toString(jasList); 7165 } 7166 7167 @Override // NameNodeMxBean 7168 public String getJournalTransactionInfo() { 7169 Map<String, String> txnIdMap = new HashMap<String, String>(); 7170 txnIdMap.put("LastAppliedOrWrittenTxId", 7171 Long.toString(this.getFSImage().getLastAppliedOrWrittenTxId())); 7172 txnIdMap.put("MostRecentCheckpointTxId", 7173 Long.toString(this.getFSImage().getMostRecentCheckpointTxId())); 7174 return JSON.toString(txnIdMap); 7175 } 7176 7177 @Override // NameNodeMXBean 7178 public String getNNStarted() { 7179 return getStartTime().toString(); 7180 } 7181 7182 @Override // NameNodeMXBean 7183 public String getCompileInfo() { 7184 return VersionInfo.getDate() + " by " + VersionInfo.getUser() + 7185 " from " + VersionInfo.getBranch(); 7186 } 7187 7188 /** @return the block manager. */ 7189 public BlockManager getBlockManager() { 7190 return blockManager; 7191 } 7192 7193 public BlockIdManager getBlockIdManager() { 7194 return blockIdManager; 7195 } 7196 7197 /** @return the FSDirectory. */ 7198 @Override 7199 public FSDirectory getFSDirectory() { 7200 return dir; 7201 } 7202 /** Set the FSDirectory. */ 7203 @VisibleForTesting 7204 public void setFSDirectory(FSDirectory dir) { 7205 this.dir = dir; 7206 } 7207 /** @return the cache manager. */ 7208 public CacheManager getCacheManager() { 7209 return cacheManager; 7210 } 7211 7212 @Override // NameNodeMXBean 7213 public String getCorruptFiles() { 7214 List<String> list = new ArrayList<String>(); 7215 Collection<FSNamesystem.CorruptFileBlockInfo> corruptFileBlocks; 7216 try { 7217 corruptFileBlocks = listCorruptFileBlocks("/", null); 7218 int corruptFileCount = corruptFileBlocks.size(); 7219 if (corruptFileCount != 0) { 7220 for (FSNamesystem.CorruptFileBlockInfo c : corruptFileBlocks) { 7221 list.add(c.toString()); 7222 } 7223 } 7224 } catch (IOException e) { 7225 LOG.warn("Get corrupt file blocks returned error: " + e.getMessage()); 7226 } 7227 return JSON.toString(list); 7228 } 7229 7230 @Override //NameNodeMXBean 7231 public int getDistinctVersionCount() { 7232 return blockManager.getDatanodeManager().getDatanodesSoftwareVersions() 7233 .size(); 7234 } 7235 7236 @Override //NameNodeMXBean 7237 public Map<String, Integer> getDistinctVersions() { 7238 return blockManager.getDatanodeManager().getDatanodesSoftwareVersions(); 7239 } 7240 7241 @Override //NameNodeMXBean 7242 public String getSoftwareVersion() { 7243 return VersionInfo.getVersion(); 7244 } 7245 7246 /** 7247 * Verifies that the given identifier and password are valid and match. 7248 * @param identifier Token identifier. 7249 * @param password Password in the token. 7250 */ 7251 public synchronized void verifyToken(DelegationTokenIdentifier identifier, 7252 byte[] password) throws InvalidToken, RetriableException { 7253 try { 7254 getDelegationTokenSecretManager().verifyToken(identifier, password); 7255 } catch (InvalidToken it) { 7256 if (inTransitionToActive()) { 7257 throw new RetriableException(it); 7258 } 7259 throw it; 7260 } 7261 } 7262 7263 @Override 7264 public boolean isGenStampInFuture(Block block) { 7265 return blockIdManager.isGenStampInFuture(block); 7266 } 7267 7268 @VisibleForTesting 7269 public EditLogTailer getEditLogTailer() { 7270 return editLogTailer; 7271 } 7272 7273 @VisibleForTesting 7274 public void setEditLogTailerForTests(EditLogTailer tailer) { 7275 this.editLogTailer = tailer; 7276 } 7277 7278 @VisibleForTesting 7279 void setFsLockForTests(ReentrantReadWriteLock lock) { 7280 this.fsLock.coarseLock = lock; 7281 } 7282 7283 @VisibleForTesting 7284 public ReentrantReadWriteLock getFsLockForTests() { 7285 return fsLock.coarseLock; 7286 } 7287 7288 @VisibleForTesting 7289 public ReentrantLock getCpLockForTests() { 7290 return cpLock; 7291 } 7292 7293 @VisibleForTesting 7294 public SafeModeInfo getSafeModeInfoForTests() { 7295 return safeMode; 7296 } 7297 7298 @VisibleForTesting 7299 public void setNNResourceChecker(NameNodeResourceChecker nnResourceChecker) { 7300 this.nnResourceChecker = nnResourceChecker; 7301 } 7302 7303 public SnapshotManager getSnapshotManager() { 7304 return snapshotManager; 7305 } 7306 7307 /** Allow snapshot on a directory. */ 7308 void allowSnapshot(String path) throws IOException { 7309 checkOperation(OperationCategory.WRITE); 7310 final String operationName = "allowSnapshot"; 7311 boolean success = false; 7312 writeLock(); 7313 try { 7314 checkOperation(OperationCategory.WRITE); 7315 checkNameNodeSafeMode("Cannot allow snapshot for " + path); 7316 checkSuperuserPrivilege(); 7317 FSDirSnapshotOp.allowSnapshot(dir, snapshotManager, path); 7318 success = true; 7319 } finally { 7320 writeUnlock(operationName); 7321 } 7322 getEditLog().logSync(); 7323 logAuditEvent(success, operationName, path, null, null); 7324 } 7325 7326 /** Disallow snapshot on a directory. */ 7327 void disallowSnapshot(String path) throws IOException { 7328 checkOperation(OperationCategory.WRITE); 7329 final String operationName = "disallowSnapshot"; 7330 boolean success = false; 7331 writeLock(); 7332 try { 7333 checkOperation(OperationCategory.WRITE); 7334 checkNameNodeSafeMode("Cannot disallow snapshot for " + path); 7335 checkSuperuserPrivilege(); 7336 FSDirSnapshotOp.disallowSnapshot(dir, snapshotManager, path); 7337 success = true; 7338 } finally { 7339 writeUnlock(operationName); 7340 } 7341 getEditLog().logSync(); 7342 logAuditEvent(success, operationName, path, null, null); 7343 } 7344 7345 /** 7346 * Create a snapshot 7347 * @param snapshotRoot The directory path where the snapshot is taken 7348 * @param snapshotName The name of the snapshot 7349 */ 7350 String createSnapshot(String snapshotRoot, String snapshotName, 7351 boolean logRetryCache) throws IOException { 7352 final String operationName = "createSnapshot"; 7353 String snapshotPath = null; 7354 writeLock(); 7355 try { 7356 checkOperation(OperationCategory.WRITE); 7357 checkNameNodeSafeMode("Cannot create snapshot for " + snapshotRoot); 7358 snapshotPath = FSDirSnapshotOp.createSnapshot(dir, 7359 snapshotManager, snapshotRoot, snapshotName, logRetryCache); 7360 } finally { 7361 writeUnlock(operationName); 7362 } 7363 getEditLog().logSync(); 7364 logAuditEvent(snapshotPath != null, operationName, snapshotRoot, 7365 snapshotPath, null); 7366 return snapshotPath; 7367 } 7368 7369 /** 7370 * Rename a snapshot 7371 * @param path The directory path where the snapshot was taken 7372 * @param snapshotOldName Old snapshot name 7373 * @param snapshotNewName New snapshot name 7374 * @throws SafeModeException 7375 * @throws IOException 7376 */ 7377 void renameSnapshot( 7378 String path, String snapshotOldName, String snapshotNewName, 7379 boolean logRetryCache) throws IOException { 7380 final String operationName = "renameSnapshot"; 7381 boolean success = false; 7382 writeLock(); 7383 try { 7384 checkOperation(OperationCategory.WRITE); 7385 checkNameNodeSafeMode("Cannot rename snapshot for " + path); 7386 FSDirSnapshotOp.renameSnapshot(dir, snapshotManager, path, 7387 snapshotOldName, snapshotNewName, logRetryCache); 7388 success = true; 7389 } finally { 7390 writeUnlock(operationName); 7391 } 7392 getEditLog().logSync(); 7393 String oldSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotOldName); 7394 String newSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotNewName); 7395 logAuditEvent(success, operationName, oldSnapshotRoot, 7396 newSnapshotRoot, null); 7397 } 7398 7399 /** 7400 * Get the list of snapshottable directories that are owned 7401 * by the current user. Return all the snapshottable directories if the 7402 * current user is a super user. 7403 * @return The list of all the current snapshottable directories 7404 * @throws IOException 7405 */ 7406 public SnapshottableDirectoryStatus[] getSnapshottableDirListing() 7407 throws IOException { 7408 final String operationName = "listSnapshottableDirectory"; 7409 SnapshottableDirectoryStatus[] status = null; 7410 checkOperation(OperationCategory.READ); 7411 boolean success = false; 7412 readLock(); 7413 try { 7414 checkOperation(OperationCategory.READ); 7415 status = FSDirSnapshotOp.getSnapshottableDirListing(dir, snapshotManager); 7416 success = true; 7417 } finally { 7418 readUnlock(operationName); 7419 } 7420 logAuditEvent(success, operationName, null, null, null); 7421 return status; 7422 } 7423 7424 /** 7425 * Get the difference between two snapshots (or between a snapshot and the 7426 * current status) of a snapshottable directory. 7427 * 7428 * @param path The full path of the snapshottable directory. 7429 * @param fromSnapshot Name of the snapshot to calculate the diff from. Null 7430 * or empty string indicates the current tree. 7431 * @param toSnapshot Name of the snapshot to calculated the diff to. Null or 7432 * empty string indicates the current tree. 7433 * @return A report about the difference between {@code fromSnapshot} and 7434 * {@code toSnapshot}. Modified/deleted/created/renamed files and 7435 * directories belonging to the snapshottable directories are listed 7436 * and labeled as M/-/+/R respectively. 7437 * @throws IOException 7438 */ 7439 SnapshotDiffReport getSnapshotDiffReport(String path, 7440 String fromSnapshot, String toSnapshot) throws IOException { 7441 final String operationName = "computeSnapshotDiff"; 7442 SnapshotDiffReport diffs = null; 7443 checkOperation(OperationCategory.READ); 7444 readLock(); 7445 try { 7446 checkOperation(OperationCategory.READ); 7447 diffs = FSDirSnapshotOp.getSnapshotDiffReport(dir, snapshotManager, 7448 path, fromSnapshot, toSnapshot); 7449 } finally { 7450 readUnlock(operationName); 7451 } 7452 7453 logAuditEvent(diffs != null, operationName, null, null, null); 7454 return diffs; 7455 } 7456 7457 /** 7458 * Delete a snapshot of a snapshottable directory 7459 * @param snapshotRoot The snapshottable directory 7460 * @param snapshotName The name of the to-be-deleted snapshot 7461 * @throws SafeModeException 7462 * @throws IOException 7463 */ 7464 void deleteSnapshot(String snapshotRoot, String snapshotName, 7465 boolean logRetryCache) throws IOException { 7466 final String operationName = "deleteSnapshot"; 7467 boolean success = false; 7468 checkOperation(OperationCategory.WRITE); 7469 writeLock(); 7470 BlocksMapUpdateInfo blocksToBeDeleted = null; 7471 try { 7472 checkOperation(OperationCategory.WRITE); 7473 checkNameNodeSafeMode("Cannot delete snapshot for " + snapshotRoot); 7474 7475 blocksToBeDeleted = FSDirSnapshotOp.deleteSnapshot(dir, snapshotManager, 7476 snapshotRoot, snapshotName, logRetryCache); 7477 success = true; 7478 } finally { 7479 writeUnlock(operationName); 7480 } 7481 getEditLog().logSync(); 7482 7483 // Breaking the pattern as removing blocks have to happen outside of the 7484 // global lock 7485 if (blocksToBeDeleted != null) { 7486 removeBlocks(blocksToBeDeleted); 7487 } 7488 7489 String rootPath = Snapshot.getSnapshotPath(snapshotRoot, snapshotName); 7490 logAuditEvent(success, operationName, rootPath, null, null); 7491 } 7492 7493 /** 7494 * Remove a list of INodeDirectorySnapshottable from the SnapshotManager 7495 * @param toRemove the list of INodeDirectorySnapshottable to be removed 7496 */ 7497 void removeSnapshottableDirs(List<INodeDirectory> toRemove) { 7498 if (snapshotManager != null) { 7499 snapshotManager.removeSnapshottable(toRemove); 7500 } 7501 } 7502 7503 RollingUpgradeInfo queryRollingUpgrade() throws IOException { 7504 checkSuperuserPrivilege(); 7505 checkOperation(OperationCategory.READ); 7506 readLock(); 7507 try { 7508 checkOperation(OperationCategory.READ); 7509 if (!isRollingUpgrade()) { 7510 return null; 7511 } 7512 Preconditions.checkNotNull(rollingUpgradeInfo); 7513 boolean hasRollbackImage = this.getFSImage().hasRollbackFSImage(); 7514 rollingUpgradeInfo.setCreatedRollbackImages(hasRollbackImage); 7515 return rollingUpgradeInfo; 7516 } finally { 7517 readUnlock("queryRollingUpgrade"); 7518 } 7519 } 7520 7521 RollingUpgradeInfo startRollingUpgrade() throws IOException { 7522 final String operationName = "startRollingUpgrade"; 7523 checkSuperuserPrivilege(); 7524 checkOperation(OperationCategory.WRITE); 7525 writeLock(); 7526 try { 7527 checkOperation(OperationCategory.WRITE); 7528 if (isRollingUpgrade()) { 7529 return rollingUpgradeInfo; 7530 } 7531 long startTime = now(); 7532 if (!haEnabled) { // for non-HA, we require NN to be in safemode 7533 startRollingUpgradeInternalForNonHA(startTime); 7534 } else { // for HA, NN cannot be in safemode 7535 checkNameNodeSafeMode("Failed to start rolling upgrade"); 7536 startRollingUpgradeInternal(startTime); 7537 } 7538 7539 getEditLog().logStartRollingUpgrade(rollingUpgradeInfo.getStartTime()); 7540 if (haEnabled) { 7541 // roll the edit log to make sure the standby NameNode can tail 7542 getFSImage().rollEditLog(); 7543 } 7544 } finally { 7545 writeUnlock(operationName); 7546 } 7547 7548 getEditLog().logSync(); 7549 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 7550 logAuditEvent(true, operationName, null, null, null); 7551 } 7552 return rollingUpgradeInfo; 7553 } 7554 7555 /** 7556 * Update internal state to indicate that a rolling upgrade is in progress. 7557 * @param startTime rolling upgrade start time 7558 */ 7559 void startRollingUpgradeInternal(long startTime) 7560 throws IOException { 7561 checkRollingUpgrade("start rolling upgrade"); 7562 getFSImage().checkUpgrade(); 7563 setRollingUpgradeInfo(false, startTime); 7564 } 7565 7566 /** 7567 * Update internal state to indicate that a rolling upgrade is in progress for 7568 * non-HA setup. This requires the namesystem is in SafeMode and after doing a 7569 * checkpoint for rollback the namesystem will quit the safemode automatically 7570 */ 7571 private void startRollingUpgradeInternalForNonHA(long startTime) 7572 throws IOException { 7573 Preconditions.checkState(!haEnabled); 7574 if (!isInSafeMode()) { 7575 throw new IOException("Safe mode should be turned ON " 7576 + "in order to create namespace image."); 7577 } 7578 checkRollingUpgrade("start rolling upgrade"); 7579 getFSImage().checkUpgrade(); 7580 // in non-HA setup, we do an extra checkpoint to generate a rollback image 7581 getFSImage().saveNamespace(this, NameNodeFile.IMAGE_ROLLBACK, null); 7582 LOG.info("Successfully saved namespace for preparing rolling upgrade."); 7583 7584 // leave SafeMode automatically 7585 setSafeMode(SafeModeAction.SAFEMODE_LEAVE); 7586 setRollingUpgradeInfo(true, startTime); 7587 } 7588 7589 void setRollingUpgradeInfo(boolean createdRollbackImages, long startTime) { 7590 rollingUpgradeInfo = new RollingUpgradeInfo(blockPoolId, 7591 createdRollbackImages, startTime, 0L); 7592 } 7593 7594 public void setCreatedRollbackImages(boolean created) { 7595 if (rollingUpgradeInfo != null) { 7596 rollingUpgradeInfo.setCreatedRollbackImages(created); 7597 } 7598 } 7599 7600 public RollingUpgradeInfo getRollingUpgradeInfo() { 7601 return rollingUpgradeInfo; 7602 } 7603 7604 public boolean isNeedRollbackFsImage() { 7605 return needRollbackFsImage; 7606 } 7607 7608 public void setNeedRollbackFsImage(boolean needRollbackFsImage) { 7609 this.needRollbackFsImage = needRollbackFsImage; 7610 } 7611 7612 @Override // NameNodeMXBean 7613 public RollingUpgradeInfo.Bean getRollingUpgradeStatus() { 7614 if (!isRollingUpgrade()) { 7615 return null; 7616 } 7617 RollingUpgradeInfo upgradeInfo = getRollingUpgradeInfo(); 7618 if (upgradeInfo.createdRollbackImages()) { 7619 return new RollingUpgradeInfo.Bean(upgradeInfo); 7620 } 7621 readLock(); 7622 try { 7623 // check again after acquiring the read lock. 7624 upgradeInfo = getRollingUpgradeInfo(); 7625 if (upgradeInfo == null) { 7626 return null; 7627 } 7628 if (!upgradeInfo.createdRollbackImages()) { 7629 boolean hasRollbackImage = this.getFSImage().hasRollbackFSImage(); 7630 upgradeInfo.setCreatedRollbackImages(hasRollbackImage); 7631 } 7632 } catch (IOException ioe) { 7633 LOG.warn("Encountered exception setting Rollback Image", ioe); 7634 } finally { 7635 readUnlock("getRollingUpgradeStatus"); 7636 } 7637 return new RollingUpgradeInfo.Bean(upgradeInfo); 7638 } 7639 7640 /** Is rolling upgrade in progress? */ 7641 public boolean isRollingUpgrade() { 7642 return rollingUpgradeInfo != null && !rollingUpgradeInfo.isFinalized(); 7643 } 7644 7645 void checkRollingUpgrade(String action) throws RollingUpgradeException { 7646 if (isRollingUpgrade()) { 7647 throw new RollingUpgradeException("Failed to " + action 7648 + " since a rolling upgrade is already in progress." 7649 + " Existing rolling upgrade info:\n" + rollingUpgradeInfo); 7650 } 7651 } 7652 7653 RollingUpgradeInfo finalizeRollingUpgrade() throws IOException { 7654 final String operationName = "finalizeRollingUpgrade"; 7655 checkSuperuserPrivilege(); 7656 checkOperation(OperationCategory.WRITE); 7657 writeLock(); 7658 try { 7659 checkOperation(OperationCategory.WRITE); 7660 if (!isRollingUpgrade()) { 7661 return null; 7662 } 7663 checkNameNodeSafeMode("Failed to finalize rolling upgrade"); 7664 7665 finalizeRollingUpgradeInternal(now()); 7666 getEditLog().logFinalizeRollingUpgrade(rollingUpgradeInfo.getFinalizeTime()); 7667 if (haEnabled) { 7668 // roll the edit log to make sure the standby NameNode can tail 7669 getFSImage().rollEditLog(); 7670 } 7671 getFSImage().updateStorageVersion(); 7672 getFSImage().renameCheckpoint(NameNodeFile.IMAGE_ROLLBACK, 7673 NameNodeFile.IMAGE); 7674 } finally { 7675 writeUnlock(operationName); 7676 } 7677 7678 if (!haEnabled) { 7679 // Sync not needed for ha since the edit was rolled after logging. 7680 getEditLog().logSync(); 7681 } 7682 7683 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 7684 logAuditEvent(true, operationName, null, null, null); 7685 } 7686 return rollingUpgradeInfo; 7687 } 7688 7689 void finalizeRollingUpgradeInternal(long finalizeTime) { 7690 // Set the finalize time 7691 rollingUpgradeInfo.finalize(finalizeTime); 7692 } 7693 7694 long addCacheDirective(CacheDirectiveInfo directive, 7695 EnumSet<CacheFlag> flags, boolean logRetryCache) 7696 throws IOException { 7697 final String operationName = "addCacheDirective"; 7698 CacheDirectiveInfo effectiveDirective = null; 7699 if (!flags.contains(CacheFlag.FORCE)) { 7700 cacheManager.waitForRescanIfNeeded(); 7701 } 7702 checkOperation(OperationCategory.WRITE); 7703 writeLock(); 7704 try { 7705 checkOperation(OperationCategory.WRITE); 7706 if (isInSafeMode()) { 7707 throw new SafeModeException( 7708 "Cannot add cache directive", safeMode); 7709 } 7710 effectiveDirective = FSNDNCacheOp.addCacheDirective(this, cacheManager, 7711 directive, flags, logRetryCache); 7712 } finally { 7713 writeUnlock(operationName); 7714 boolean success = effectiveDirective != null; 7715 if (success) { 7716 getEditLog().logSync(); 7717 } 7718 7719 String effectiveDirectiveStr = effectiveDirective != null ? 7720 effectiveDirective.toString() : null; 7721 logAuditEvent(success, operationName, effectiveDirectiveStr, 7722 null, null); 7723 } 7724 return effectiveDirective != null ? effectiveDirective.getId() : 0; 7725 } 7726 7727 void modifyCacheDirective(CacheDirectiveInfo directive, 7728 EnumSet<CacheFlag> flags, boolean logRetryCache) throws IOException { 7729 final String operationName = "modifyCacheDirective"; 7730 boolean success = false; 7731 if (!flags.contains(CacheFlag.FORCE)) { 7732 cacheManager.waitForRescanIfNeeded(); 7733 } 7734 checkOperation(OperationCategory.WRITE); 7735 writeLock(); 7736 try { 7737 checkOperation(OperationCategory.WRITE); 7738 if (isInSafeMode()) { 7739 throw new SafeModeException( 7740 "Cannot add cache directive", safeMode); 7741 } 7742 FSNDNCacheOp.modifyCacheDirective(this, cacheManager, directive, flags, 7743 logRetryCache); 7744 success = true; 7745 } finally { 7746 writeUnlock(operationName); 7747 if (success) { 7748 getEditLog().logSync(); 7749 } 7750 String idStr = "{id: " + directive.getId().toString() + "}"; 7751 logAuditEvent(success, "modifyCacheDirective", idStr, 7752 directive.toString(), null); 7753 } 7754 } 7755 7756 void removeCacheDirective(long id, boolean logRetryCache) throws IOException { 7757 final String operationName = "removeCacheDirective"; 7758 boolean success = false; 7759 checkOperation(OperationCategory.WRITE); 7760 writeLock(); 7761 try { 7762 checkOperation(OperationCategory.WRITE); 7763 if (isInSafeMode()) { 7764 throw new SafeModeException( 7765 "Cannot remove cache directives", safeMode); 7766 } 7767 FSNDNCacheOp.removeCacheDirective(this, cacheManager, id, logRetryCache); 7768 success = true; 7769 } finally { 7770 writeUnlock(operationName); 7771 String idStr = "{id: " + Long.toString(id) + "}"; 7772 logAuditEvent(success, operationName, idStr, null, 7773 null); 7774 } 7775 getEditLog().logSync(); 7776 } 7777 7778 BatchedListEntries<CacheDirectiveEntry> listCacheDirectives( 7779 long startId, CacheDirectiveInfo filter) throws IOException { 7780 final String operationName = "listCacheDirectives"; 7781 checkOperation(OperationCategory.READ); 7782 BatchedListEntries<CacheDirectiveEntry> results; 7783 cacheManager.waitForRescanIfNeeded(); 7784 readLock(); 7785 boolean success = false; 7786 try { 7787 checkOperation(OperationCategory.READ); 7788 results = FSNDNCacheOp.listCacheDirectives(this, cacheManager, startId, 7789 filter); 7790 success = true; 7791 } finally { 7792 readUnlock(operationName); 7793 logAuditEvent(success, operationName, filter.toString(), null, 7794 null); 7795 } 7796 return results; 7797 } 7798 7799 void addCachePool(CachePoolInfo req, boolean logRetryCache) 7800 throws IOException { 7801 final String operationName = "addCachePool"; 7802 checkOperation(OperationCategory.WRITE); 7803 writeLock(); 7804 boolean success = false; 7805 String poolInfoStr = null; 7806 try { 7807 checkOperation(OperationCategory.WRITE); 7808 if (isInSafeMode()) { 7809 throw new SafeModeException( 7810 "Cannot add cache pool " + req.getPoolName(), safeMode); 7811 } 7812 CachePoolInfo info = FSNDNCacheOp.addCachePool(this, cacheManager, req, 7813 logRetryCache); 7814 poolInfoStr = info.toString(); 7815 success = true; 7816 } finally { 7817 writeUnlock(operationName); 7818 logAuditEvent(success, operationName, poolInfoStr, null, null); 7819 } 7820 7821 getEditLog().logSync(); 7822 } 7823 7824 void modifyCachePool(CachePoolInfo req, boolean logRetryCache) 7825 throws IOException { 7826 final String operationName = "modifyCachePool"; 7827 checkOperation(OperationCategory.WRITE); 7828 writeLock(); 7829 boolean success = false; 7830 try { 7831 checkOperation(OperationCategory.WRITE); 7832 if (isInSafeMode()) { 7833 throw new SafeModeException( 7834 "Cannot modify cache pool " + req.getPoolName(), safeMode); 7835 } 7836 FSNDNCacheOp.modifyCachePool(this, cacheManager, req, logRetryCache); 7837 success = true; 7838 } finally { 7839 writeUnlock(operationName); 7840 String poolNameStr = "{poolName: " + 7841 (req == null ? null : req.getPoolName()) + "}"; 7842 logAuditEvent(success, operationName, poolNameStr, 7843 req == null ? null : req.toString(), null); 7844 } 7845 7846 getEditLog().logSync(); 7847 } 7848 7849 void removeCachePool(String cachePoolName, boolean logRetryCache) 7850 throws IOException { 7851 final String operationName = "removeCachePool"; 7852 checkOperation(OperationCategory.WRITE); 7853 writeLock(); 7854 boolean success = false; 7855 try { 7856 checkOperation(OperationCategory.WRITE); 7857 if (isInSafeMode()) { 7858 throw new SafeModeException( 7859 "Cannot remove cache pool " + cachePoolName, safeMode); 7860 } 7861 FSNDNCacheOp.removeCachePool(this, cacheManager, cachePoolName, 7862 logRetryCache); 7863 success = true; 7864 } finally { 7865 writeUnlock(operationName); 7866 String poolNameStr = "{poolName: " + cachePoolName + "}"; 7867 logAuditEvent(success, operationName, poolNameStr, null, null); 7868 } 7869 7870 getEditLog().logSync(); 7871 } 7872 7873 BatchedListEntries<CachePoolEntry> listCachePools(String prevKey) 7874 throws IOException { 7875 final String operationName = "listCachePools"; 7876 BatchedListEntries<CachePoolEntry> results; 7877 checkOperation(OperationCategory.READ); 7878 boolean success = false; 7879 cacheManager.waitForRescanIfNeeded(); 7880 readLock(); 7881 try { 7882 checkOperation(OperationCategory.READ); 7883 results = FSNDNCacheOp.listCachePools(this, cacheManager, prevKey); 7884 success = true; 7885 } finally { 7886 readUnlock(operationName); 7887 logAuditEvent(success, operationName, null, null, null); 7888 } 7889 return results; 7890 } 7891 7892 void modifyAclEntries(final String src, List<AclEntry> aclSpec) 7893 throws IOException { 7894 final String operationName = "modifyAclEntries"; 7895 HdfsFileStatus auditStat = null; 7896 checkOperation(OperationCategory.WRITE); 7897 writeLock(); 7898 try { 7899 checkOperation(OperationCategory.WRITE); 7900 checkNameNodeSafeMode("Cannot modify ACL entries on " + src); 7901 auditStat = FSDirAclOp.modifyAclEntries(dir, src, aclSpec); 7902 } catch (AccessControlException e) { 7903 logAuditEvent(false, operationName, src); 7904 throw e; 7905 } finally { 7906 writeUnlock(operationName); 7907 } 7908 getEditLog().logSync(); 7909 logAuditEvent(true, operationName, src, null, auditStat); 7910 } 7911 7912 void removeAclEntries(final String src, List<AclEntry> aclSpec) 7913 throws IOException { 7914 final String operationName = "removeAclEntries"; 7915 checkOperation(OperationCategory.WRITE); 7916 HdfsFileStatus auditStat = null; 7917 writeLock(); 7918 try { 7919 checkOperation(OperationCategory.WRITE); 7920 checkNameNodeSafeMode("Cannot remove ACL entries on " + src); 7921 auditStat = FSDirAclOp.removeAclEntries(dir, src, aclSpec); 7922 } catch (AccessControlException e) { 7923 logAuditEvent(false, operationName, src); 7924 throw e; 7925 } finally { 7926 writeUnlock(operationName); 7927 } 7928 getEditLog().logSync(); 7929 logAuditEvent(true, operationName, src, null, auditStat); 7930 } 7931 7932 void removeDefaultAcl(final String src) throws IOException { 7933 final String operationName = "removeDefaultAcl"; 7934 HdfsFileStatus auditStat = null; 7935 checkOperation(OperationCategory.WRITE); 7936 writeLock(); 7937 try { 7938 checkOperation(OperationCategory.WRITE); 7939 checkNameNodeSafeMode("Cannot remove default ACL entries on " + src); 7940 auditStat = FSDirAclOp.removeDefaultAcl(dir, src); 7941 } catch (AccessControlException e) { 7942 logAuditEvent(false, operationName, src); 7943 throw e; 7944 } finally { 7945 writeUnlock(operationName); 7946 } 7947 getEditLog().logSync(); 7948 logAuditEvent(true, operationName, src, null, auditStat); 7949 } 7950 7951 void removeAcl(final String src) throws IOException { 7952 final String operationName = "removeAcl"; 7953 HdfsFileStatus auditStat = null; 7954 checkOperation(OperationCategory.WRITE); 7955 writeLock(); 7956 try { 7957 checkOperation(OperationCategory.WRITE); 7958 checkNameNodeSafeMode("Cannot remove ACL on " + src); 7959 auditStat = FSDirAclOp.removeAcl(dir, src); 7960 } catch (AccessControlException e) { 7961 logAuditEvent(false, operationName, src); 7962 throw e; 7963 } finally { 7964 writeUnlock(operationName); 7965 } 7966 getEditLog().logSync(); 7967 logAuditEvent(true, operationName, src, null, auditStat); 7968 } 7969 7970 void setAcl(final String src, List<AclEntry> aclSpec) throws IOException { 7971 final String operationName = "setAcl"; 7972 HdfsFileStatus auditStat = null; 7973 checkOperation(OperationCategory.WRITE); 7974 writeLock(); 7975 try { 7976 checkOperation(OperationCategory.WRITE); 7977 checkNameNodeSafeMode("Cannot set ACL on " + src); 7978 auditStat = FSDirAclOp.setAcl(dir, src, aclSpec); 7979 } catch (AccessControlException e) { 7980 logAuditEvent(false, operationName, src); 7981 throw e; 7982 } finally { 7983 writeUnlock(operationName); 7984 } 7985 getEditLog().logSync(); 7986 logAuditEvent(true, operationName, src, null, auditStat); 7987 } 7988 7989 AclStatus getAclStatus(String src) throws IOException { 7990 final String operationName = "getAclStatus"; 7991 checkOperation(OperationCategory.READ); 7992 boolean success = false; 7993 readLock(); 7994 try { 7995 checkOperation(OperationCategory.READ); 7996 final AclStatus ret = FSDirAclOp.getAclStatus(dir, src); 7997 success = true; 7998 return ret; 7999 } finally { 8000 readUnlock(operationName); 8001 logAuditEvent(success, operationName, src); 8002 } 8003 } 8004 8005 /** 8006 * Create an encryption zone on directory src using the specified key. 8007 * 8008 * @param src the path of a directory which will be the root of the 8009 * encryption zone. The directory must be empty. 8010 * @param keyName name of a key which must be present in the configured 8011 * KeyProvider. 8012 * @throws AccessControlException if the caller is not the superuser. 8013 * @throws UnresolvedLinkException if the path can't be resolved. 8014 * @throws SafeModeException if the Namenode is in safe mode. 8015 */ 8016 void createEncryptionZone(final String src, final String keyName, 8017 boolean logRetryCache) 8018 throws IOException, UnresolvedLinkException, 8019 SafeModeException, AccessControlException { 8020 try { 8021 if (provider == null) { 8022 throw new IOException( 8023 "Can't create an encryption zone for " + src + 8024 " since no key provider is available."); 8025 } 8026 if (keyName == null || keyName.isEmpty()) { 8027 throw new IOException("Must specify a key name when creating an " + 8028 "encryption zone"); 8029 } 8030 KeyProvider.Metadata metadata = provider.getMetadata(keyName); 8031 if (metadata == null) { 8032 /* 8033 * It would be nice if we threw something more specific than 8034 * IOException when the key is not found, but the KeyProvider API 8035 * doesn't provide for that. If that API is ever changed to throw 8036 * something more specific (e.g. UnknownKeyException) then we can 8037 * update this to match it, or better yet, just rethrow the 8038 * KeyProvider's exception. 8039 */ 8040 throw new IOException("Key " + keyName + " doesn't exist."); 8041 } 8042 // If the provider supports pool for EDEKs, this will fill in the pool 8043 generateEncryptedDataEncryptionKey(keyName); 8044 createEncryptionZoneInt(src, metadata.getCipher(), 8045 keyName, logRetryCache); 8046 } catch (AccessControlException e) { 8047 logAuditEvent(false, "createEncryptionZone", src); 8048 throw e; 8049 } 8050 } 8051 8052 private void createEncryptionZoneInt(final String srcArg, String cipher, 8053 String keyName, final boolean logRetryCache) throws IOException { 8054 final String operationName = "createEncryptionZone"; 8055 String src = srcArg; 8056 HdfsFileStatus resultingStat = null; 8057 checkSuperuserPrivilege(); 8058 FSPermissionChecker pc = getPermissionChecker(); 8059 writeLock(); 8060 try { 8061 checkSuperuserPrivilege(); 8062 checkOperation(OperationCategory.WRITE); 8063 checkNameNodeSafeMode("Cannot create encryption zone on " + src); 8064 final INodesInPath iip = dir.resolvePathForWrite(pc, src); 8065 src = iip.getPath(); 8066 8067 final CipherSuite suite = CipherSuite.convert(cipher); 8068 // For now this is hardcoded, as we only support one method. 8069 final CryptoProtocolVersion version = 8070 CryptoProtocolVersion.ENCRYPTION_ZONES; 8071 final XAttr ezXAttr = dir.createEncryptionZone(src, suite, 8072 version, keyName); 8073 List<XAttr> xAttrs = Lists.newArrayListWithCapacity(1); 8074 xAttrs.add(ezXAttr); 8075 getEditLog().logSetXAttrs(src, xAttrs, logRetryCache); 8076 resultingStat = dir.getAuditFileInfo(iip); 8077 } finally { 8078 writeUnlock(operationName); 8079 } 8080 getEditLog().logSync(); 8081 logAuditEvent(true, operationName, srcArg, null, resultingStat); 8082 } 8083 8084 /** 8085 * Get the encryption zone for the specified path. 8086 * 8087 * @param srcArg the path of a file or directory to get the EZ for. 8088 * @return the EZ of the of the path or null if none. 8089 * @throws AccessControlException if the caller is not the superuser. 8090 * @throws UnresolvedLinkException if the path can't be resolved. 8091 */ 8092 EncryptionZone getEZForPath(final String srcArg) 8093 throws AccessControlException, UnresolvedLinkException, IOException { 8094 String src = srcArg; 8095 final String operationName = "getEZForPath"; 8096 HdfsFileStatus resultingStat = null; 8097 boolean success = false; 8098 final FSPermissionChecker pc = getPermissionChecker(); 8099 checkOperation(OperationCategory.READ); 8100 readLock(); 8101 try { 8102 checkOperation(OperationCategory.READ); 8103 INodesInPath iip = dir.resolvePath(pc, src); 8104 if (isPermissionEnabled) { 8105 dir.checkPathAccess(pc, iip, FsAction.READ); 8106 } 8107 final EncryptionZone ret = dir.getEZForPath(iip); 8108 resultingStat = dir.getAuditFileInfo(iip); 8109 success = true; 8110 return ret; 8111 } finally { 8112 readUnlock(operationName); 8113 logAuditEvent(success, operationName, srcArg, null, resultingStat); 8114 } 8115 } 8116 8117 BatchedListEntries<EncryptionZone> listEncryptionZones(long prevId) 8118 throws IOException { 8119 final String operationName = "listEncryptionZones"; 8120 boolean success = false; 8121 checkSuperuserPrivilege(); 8122 checkOperation(OperationCategory.READ); 8123 readLock(); 8124 try { 8125 checkSuperuserPrivilege(); 8126 checkOperation(OperationCategory.READ); 8127 final BatchedListEntries<EncryptionZone> ret = 8128 dir.listEncryptionZones(prevId); 8129 success = true; 8130 return ret; 8131 } finally { 8132 readUnlock(operationName); 8133 logAuditEvent(success, operationName, null); 8134 } 8135 } 8136 8137 void setXAttr(String src, XAttr xAttr, EnumSet<XAttrSetFlag> flag, 8138 boolean logRetryCache) 8139 throws IOException { 8140 final String operationName = "setXAttr"; 8141 HdfsFileStatus auditStat = null; 8142 writeLock(); 8143 try { 8144 checkOperation(OperationCategory.WRITE); 8145 checkNameNodeSafeMode("Cannot set XAttr on " + src); 8146 auditStat = FSDirXAttrOp.setXAttr(dir, src, xAttr, flag, logRetryCache); 8147 } catch (AccessControlException e) { 8148 logAuditEvent(false, operationName, src); 8149 throw e; 8150 } finally { 8151 writeUnlock(operationName); 8152 } 8153 getEditLog().logSync(); 8154 logAuditEvent(true, operationName, src, null, auditStat); 8155 } 8156 8157 List<XAttr> getXAttrs(final String src, List<XAttr> xAttrs) 8158 throws IOException { 8159 final String operationName = "getXAttrs"; 8160 checkOperation(OperationCategory.READ); 8161 readLock(); 8162 try { 8163 checkOperation(OperationCategory.READ); 8164 return FSDirXAttrOp.getXAttrs(dir, src, xAttrs); 8165 } catch (AccessControlException e) { 8166 logAuditEvent(false, operationName, src); 8167 throw e; 8168 } finally { 8169 readUnlock(operationName); 8170 } 8171 } 8172 8173 List<XAttr> listXAttrs(String src) throws IOException { 8174 final String operationName = "listXAttrs"; 8175 checkOperation(OperationCategory.READ); 8176 readLock(); 8177 try { 8178 checkOperation(OperationCategory.READ); 8179 return FSDirXAttrOp.listXAttrs(dir, src); 8180 } catch (AccessControlException e) { 8181 logAuditEvent(false, operationName, src); 8182 throw e; 8183 } finally { 8184 readUnlock(operationName); 8185 } 8186 } 8187 8188 void removeXAttr(String src, XAttr xAttr, boolean logRetryCache) 8189 throws IOException { 8190 final String operationName = "removeXAttr"; 8191 HdfsFileStatus auditStat = null; 8192 writeLock(); 8193 try { 8194 checkOperation(OperationCategory.WRITE); 8195 checkNameNodeSafeMode("Cannot remove XAttr entry on " + src); 8196 auditStat = FSDirXAttrOp.removeXAttr(dir, src, xAttr, logRetryCache); 8197 } catch (AccessControlException e) { 8198 logAuditEvent(false, operationName, src); 8199 throw e; 8200 } finally { 8201 writeUnlock(operationName); 8202 } 8203 getEditLog().logSync(); 8204 logAuditEvent(true, operationName, src, null, auditStat); 8205 } 8206 8207 void checkAccess(String src, FsAction mode) throws IOException { 8208 final String operationName = "checkAccess"; 8209 checkOperation(OperationCategory.READ); 8210 FSPermissionChecker pc = getPermissionChecker(); 8211 readLock(); 8212 try { 8213 checkOperation(OperationCategory.READ); 8214 final INodesInPath iip = dir.resolvePath(pc, src); 8215 src = iip.getPath(); 8216 INode inode = iip.getLastINode(); 8217 if (inode == null) { 8218 throw new FileNotFoundException("Path not found"); 8219 } 8220 if (isPermissionEnabled) { 8221 dir.checkPathAccess(pc, iip, mode); 8222 } 8223 } catch (AccessControlException e) { 8224 logAuditEvent(false, operationName, src); 8225 throw e; 8226 } finally { 8227 readUnlock(operationName); 8228 } 8229 } 8230 8231 /** 8232 * Default AuditLogger implementation; used when no access logger is 8233 * defined in the config file. It can also be explicitly listed in the 8234 * config file. 8235 */ 8236 private static class DefaultAuditLogger extends HdfsAuditLogger { 8237 8238 private boolean logTokenTrackingId; 8239 8240 @Override 8241 public void initialize(Configuration conf) { 8242 logTokenTrackingId = conf.getBoolean( 8243 DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY, 8244 DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT); 8245 } 8246 8247 @Override 8248 public void logAuditEvent(boolean succeeded, String userName, 8249 InetAddress addr, String cmd, String src, String dst, 8250 FileStatus status, UserGroupInformation ugi, 8251 DelegationTokenSecretManager dtSecretManager) { 8252 if (auditLog.isInfoEnabled()) { 8253 final StringBuilder sb = auditBuffer.get(); 8254 sb.setLength(0); 8255 sb.append("allowed=").append(succeeded).append("\t"); 8256 sb.append("ugi=").append(userName).append("\t"); 8257 sb.append("ip=").append(addr).append("\t"); 8258 sb.append("cmd=").append(cmd).append("\t"); 8259 sb.append("src=").append(src).append("\t"); 8260 sb.append("dst=").append(dst).append("\t"); 8261 if (null == status) { 8262 sb.append("perm=null"); 8263 } else { 8264 sb.append("perm="); 8265 sb.append(status.getOwner()).append(":"); 8266 sb.append(status.getGroup()).append(":"); 8267 sb.append(status.getPermission()); 8268 } 8269 if (logTokenTrackingId) { 8270 sb.append("\t").append("trackingId="); 8271 String trackingId = null; 8272 if (ugi != null && dtSecretManager != null 8273 && ugi.getAuthenticationMethod() == AuthenticationMethod.TOKEN) { 8274 for (TokenIdentifier tid: ugi.getTokenIdentifiers()) { 8275 if (tid instanceof DelegationTokenIdentifier) { 8276 DelegationTokenIdentifier dtid = 8277 (DelegationTokenIdentifier)tid; 8278 trackingId = dtSecretManager.getTokenTrackingId(dtid); 8279 break; 8280 } 8281 } 8282 } 8283 sb.append(trackingId); 8284 } 8285 sb.append("\t").append("proto="); 8286 sb.append(NamenodeWebHdfsMethods.isWebHdfsInvocation() ? "webhdfs" : "rpc"); 8287 logAuditMessage(sb.toString()); 8288 } 8289 } 8290 8291 public void logAuditMessage(String message) { 8292 auditLog.info(message); 8293 } 8294 } 8295 8296 private static void enableAsyncAuditLog() { 8297 if (!(auditLog instanceof Log4JLogger)) { 8298 LOG.warn("Log4j is required to enable async auditlog"); 8299 return; 8300 } 8301 Logger logger = ((Log4JLogger)auditLog).getLogger(); 8302 @SuppressWarnings("unchecked") 8303 List<Appender> appenders = Collections.list(logger.getAllAppenders()); 8304 // failsafe against trying to async it more than once 8305 if (!appenders.isEmpty() && !(appenders.get(0) instanceof AsyncAppender)) { 8306 AsyncAppender asyncAppender = new AsyncAppender(); 8307 // change logger to have an async appender containing all the 8308 // previously configured appenders 8309 for (Appender appender : appenders) { 8310 logger.removeAppender(appender); 8311 asyncAppender.addAppender(appender); 8312 } 8313 logger.addAppender(asyncAppender); 8314 } 8315 } 8316 8317} 8318