001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.namenode; 019 020import com.google.common.annotations.VisibleForTesting; 021import com.google.common.base.Preconditions; 022import com.google.protobuf.InvalidProtocolBufferException; 023 024import org.apache.hadoop.HadoopIllegalArgumentException; 025import org.apache.hadoop.classification.InterfaceAudience; 026import org.apache.hadoop.conf.Configuration; 027import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension; 028import org.apache.hadoop.fs.InvalidPathException; 029import org.apache.hadoop.fs.ParentNotDirectoryException; 030import org.apache.hadoop.fs.Path; 031import org.apache.hadoop.fs.StorageType; 032import org.apache.hadoop.fs.UnresolvedLinkException; 033import org.apache.hadoop.fs.XAttr; 034import org.apache.hadoop.fs.permission.FsAction; 035import org.apache.hadoop.fs.permission.FsPermission; 036import org.apache.hadoop.hdfs.DFSConfigKeys; 037import org.apache.hadoop.hdfs.DFSUtil; 038import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy; 039import org.apache.hadoop.hdfs.protocol.FSLimitException.MaxDirectoryItemsExceededException; 040import org.apache.hadoop.hdfs.protocol.FSLimitException.PathComponentTooLongException; 041import org.apache.hadoop.hdfs.protocol.HdfsConstants; 042import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; 043import org.apache.hadoop.hdfs.protocol.QuotaExceededException; 044import org.apache.hadoop.hdfs.protocol.SnapshotAccessControlException; 045import org.apache.hadoop.hdfs.protocol.UnresolvedPathException; 046import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos; 047import org.apache.hadoop.hdfs.protocolPB.PBHelperClient; 048import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; 049import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; 050import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite; 051import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; 052import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo.UpdatedReplicationInfo; 053import org.apache.hadoop.hdfs.util.ByteArray; 054import org.apache.hadoop.hdfs.util.EnumCounters; 055import org.apache.hadoop.hdfs.util.ReadOnlyList; 056import org.apache.hadoop.security.AccessControlException; 057import org.apache.hadoop.security.UserGroupInformation; 058import org.apache.hadoop.util.Time; 059import org.slf4j.Logger; 060import org.slf4j.LoggerFactory; 061 062import java.io.Closeable; 063import java.io.FileNotFoundException; 064import java.io.IOException; 065import java.util.ArrayList; 066import java.util.Arrays; 067import java.util.Collection; 068import java.util.concurrent.ForkJoinPool; 069import java.util.concurrent.RecursiveAction; 070import java.util.List; 071import java.util.Map; 072import java.util.SortedSet; 073import java.util.TreeSet; 074import java.util.concurrent.locks.ReentrantReadWriteLock; 075 076import static org.apache.hadoop.fs.CommonConfigurationKeys.FS_PROTECTED_DIRECTORIES; 077import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT; 078import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY; 079import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_QUOTA_BY_STORAGETYPE_ENABLED_DEFAULT; 080import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_QUOTA_BY_STORAGETYPE_ENABLED_KEY; 081import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_DEFAULT; 082import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_KEY; 083import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.CRYPTO_XATTR_ENCRYPTION_ZONE; 084import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.SECURITY_XATTR_UNREADABLE_BY_SUPERUSER; 085import static org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot.CURRENT_STATE_ID; 086 087/** 088 * Both FSDirectory and FSNamesystem manage the state of the namespace. 089 * FSDirectory is a pure in-memory data structure, all of whose operations 090 * happen entirely in memory. In contrast, FSNamesystem persists the operations 091 * to the disk. 092 * @see org.apache.hadoop.hdfs.server.namenode.FSNamesystem 093 **/ 094@InterfaceAudience.Private 095public class FSDirectory implements Closeable { 096 static final Logger LOG = LoggerFactory.getLogger(FSDirectory.class); 097 098 private static INodeDirectory createRoot(FSNamesystem namesystem) { 099 final INodeDirectory r = new INodeDirectory( 100 INodeId.ROOT_INODE_ID, 101 INodeDirectory.ROOT_NAME, 102 namesystem.createFsOwnerPermissions(new FsPermission((short) 0755)), 103 0L); 104 r.addDirectoryWithQuotaFeature( 105 new DirectoryWithQuotaFeature.Builder(). 106 nameSpaceQuota(DirectoryWithQuotaFeature.DEFAULT_NAMESPACE_QUOTA). 107 storageSpaceQuota(DirectoryWithQuotaFeature.DEFAULT_STORAGE_SPACE_QUOTA). 108 build()); 109 r.addSnapshottableFeature(); 110 r.setSnapshotQuota(0); 111 return r; 112 } 113 114 @VisibleForTesting 115 static boolean CHECK_RESERVED_FILE_NAMES = true; 116 public final static String DOT_RESERVED_STRING = ".reserved"; 117 public final static String DOT_RESERVED_PATH_PREFIX = Path.SEPARATOR 118 + DOT_RESERVED_STRING; 119 public final static byte[] DOT_RESERVED = 120 DFSUtil.string2Bytes(DOT_RESERVED_STRING); 121 private final static String RAW_STRING = "raw"; 122 private final static byte[] RAW = DFSUtil.string2Bytes(RAW_STRING); 123 public final static String DOT_INODES_STRING = ".inodes"; 124 public final static byte[] DOT_INODES = 125 DFSUtil.string2Bytes(DOT_INODES_STRING); 126 private final static byte[] DOT_DOT = 127 DFSUtil.string2Bytes(".."); 128 129 public final static HdfsFileStatus DOT_RESERVED_STATUS = 130 new HdfsFileStatus(0, true, 0, 0, 0, 0, new FsPermission((short) 01770), 131 null, null, null, HdfsFileStatus.EMPTY_NAME, -1L, 0, null, 132 HdfsConstants.BLOCK_STORAGE_POLICY_ID_UNSPECIFIED); 133 134 public final static HdfsFileStatus DOT_SNAPSHOT_DIR_STATUS = 135 new HdfsFileStatus(0, true, 0, 0, 0, 0, null, null, null, null, 136 HdfsFileStatus.EMPTY_NAME, -1L, 0, null, 137 HdfsConstants.BLOCK_STORAGE_POLICY_ID_UNSPECIFIED); 138 139 INodeDirectory rootDir; 140 private final FSNamesystem namesystem; 141 private volatile boolean skipQuotaCheck = false; //skip while consuming edits 142 private final int maxComponentLength; 143 private final int maxDirItems; 144 private final int lsLimit; // max list limit 145 private final int contentCountLimit; // max content summary counts per run 146 private final long contentSleepMicroSec; 147 private final INodeMap inodeMap; // Synchronized by dirLock 148 private long yieldCount = 0; // keep track of lock yield count. 149 private int quotaInitThreads; 150 151 private final int inodeXAttrsLimit; //inode xattrs max limit 152 153 // A set of directories that have been protected using the 154 // dfs.namenode.protected.directories setting. These directories cannot 155 // be deleted unless they are empty. 156 // 157 // Each entry in this set must be a normalized path. 158 private final SortedSet<String> protectedDirectories; 159 160 // lock to protect the directory and BlockMap 161 private final ReentrantReadWriteLock dirLock; 162 163 private final boolean isPermissionEnabled; 164 /** 165 * Support for ACLs is controlled by a configuration flag. If the 166 * configuration flag is false, then the NameNode will reject all 167 * ACL-related operations. 168 */ 169 private final boolean aclsEnabled; 170 private final boolean xattrsEnabled; 171 private final int xattrMaxSize; 172 173 // precision of access times. 174 private final long accessTimePrecision; 175 // whether setStoragePolicy is allowed. 176 private final boolean storagePolicyEnabled; 177 // whether quota by storage type is allowed 178 private final boolean quotaByStorageTypeEnabled; 179 180 private final String fsOwnerShortUserName; 181 private final String supergroup; 182 private final INodeId inodeId; 183 184 private final FSEditLog editLog; 185 186 private HdfsFileStatus[] reservedStatuses; 187 188 private INodeAttributeProvider attributeProvider; 189 190 public void setINodeAttributeProvider(INodeAttributeProvider provider) { 191 attributeProvider = provider; 192 } 193 194 // utility methods to acquire and release read lock and write lock 195 void readLock() { 196 this.dirLock.readLock().lock(); 197 } 198 199 void readUnlock() { 200 this.dirLock.readLock().unlock(); 201 } 202 203 void writeLock() { 204 this.dirLock.writeLock().lock(); 205 } 206 207 void writeUnlock() { 208 this.dirLock.writeLock().unlock(); 209 } 210 211 boolean hasWriteLock() { 212 return this.dirLock.isWriteLockedByCurrentThread(); 213 } 214 215 boolean hasReadLock() { 216 return this.dirLock.getReadHoldCount() > 0 || hasWriteLock(); 217 } 218 219 public int getReadHoldCount() { 220 return this.dirLock.getReadHoldCount(); 221 } 222 223 public int getWriteHoldCount() { 224 return this.dirLock.getWriteHoldCount(); 225 } 226 227 @VisibleForTesting 228 public final EncryptionZoneManager ezManager; 229 230 /** 231 * Caches frequently used file names used in {@link INode} to reuse 232 * byte[] objects and reduce heap usage. 233 */ 234 private final NameCache<ByteArray> nameCache; 235 236 // used to specify path resolution type. *_LINK will return symlinks instead 237 // of throwing an unresolved exception 238 public enum DirOp { 239 READ, 240 READ_LINK, 241 WRITE, // disallows snapshot paths. 242 WRITE_LINK, 243 CREATE, // like write, but also blocks invalid path names. 244 CREATE_LINK; 245 }; 246 247 FSDirectory(FSNamesystem ns, Configuration conf) throws IOException { 248 this.dirLock = new ReentrantReadWriteLock(true); // fair 249 this.inodeId = new INodeId(); 250 rootDir = createRoot(ns); 251 inodeMap = INodeMap.newInstance(rootDir); 252 this.isPermissionEnabled = conf.getBoolean( 253 DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, 254 DFSConfigKeys.DFS_PERMISSIONS_ENABLED_DEFAULT); 255 this.fsOwnerShortUserName = 256 UserGroupInformation.getCurrentUser().getShortUserName(); 257 this.supergroup = conf.get( 258 DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY, 259 DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT); 260 this.aclsEnabled = conf.getBoolean( 261 DFSConfigKeys.DFS_NAMENODE_ACLS_ENABLED_KEY, 262 DFSConfigKeys.DFS_NAMENODE_ACLS_ENABLED_DEFAULT); 263 LOG.info("ACLs enabled? " + aclsEnabled); 264 this.xattrsEnabled = conf.getBoolean( 265 DFSConfigKeys.DFS_NAMENODE_XATTRS_ENABLED_KEY, 266 DFSConfigKeys.DFS_NAMENODE_XATTRS_ENABLED_DEFAULT); 267 LOG.info("XAttrs enabled? " + xattrsEnabled); 268 this.xattrMaxSize = conf.getInt( 269 DFSConfigKeys.DFS_NAMENODE_MAX_XATTR_SIZE_KEY, 270 DFSConfigKeys.DFS_NAMENODE_MAX_XATTR_SIZE_DEFAULT); 271 Preconditions.checkArgument(xattrMaxSize > 0, 272 "The maximum size of an xattr should be > 0: (%s).", 273 DFSConfigKeys.DFS_NAMENODE_MAX_XATTR_SIZE_KEY); 274 Preconditions.checkArgument(xattrMaxSize <= 275 DFSConfigKeys.DFS_NAMENODE_MAX_XATTR_SIZE_HARD_LIMIT, 276 "The maximum size of an xattr should be <= maximum size" 277 + " hard limit " + DFSConfigKeys.DFS_NAMENODE_MAX_XATTR_SIZE_HARD_LIMIT 278 + ": (%s).", DFSConfigKeys.DFS_NAMENODE_MAX_XATTR_SIZE_KEY); 279 280 this.accessTimePrecision = conf.getLong( 281 DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 282 DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT); 283 284 this.storagePolicyEnabled = 285 conf.getBoolean(DFS_STORAGE_POLICY_ENABLED_KEY, 286 DFS_STORAGE_POLICY_ENABLED_DEFAULT); 287 288 this.quotaByStorageTypeEnabled = 289 conf.getBoolean(DFS_QUOTA_BY_STORAGETYPE_ENABLED_KEY, 290 DFS_QUOTA_BY_STORAGETYPE_ENABLED_DEFAULT); 291 292 int configuredLimit = conf.getInt( 293 DFSConfigKeys.DFS_LIST_LIMIT, DFSConfigKeys.DFS_LIST_LIMIT_DEFAULT); 294 this.lsLimit = configuredLimit>0 ? 295 configuredLimit : DFSConfigKeys.DFS_LIST_LIMIT_DEFAULT; 296 this.contentCountLimit = conf.getInt( 297 DFSConfigKeys.DFS_CONTENT_SUMMARY_LIMIT_KEY, 298 DFSConfigKeys.DFS_CONTENT_SUMMARY_LIMIT_DEFAULT); 299 this.contentSleepMicroSec = conf.getLong( 300 DFSConfigKeys.DFS_CONTENT_SUMMARY_SLEEP_MICROSEC_KEY, 301 DFSConfigKeys.DFS_CONTENT_SUMMARY_SLEEP_MICROSEC_DEFAULT); 302 303 // filesystem limits 304 this.maxComponentLength = conf.getInt( 305 DFSConfigKeys.DFS_NAMENODE_MAX_COMPONENT_LENGTH_KEY, 306 DFSConfigKeys.DFS_NAMENODE_MAX_COMPONENT_LENGTH_DEFAULT); 307 this.maxDirItems = conf.getInt( 308 DFSConfigKeys.DFS_NAMENODE_MAX_DIRECTORY_ITEMS_KEY, 309 DFSConfigKeys.DFS_NAMENODE_MAX_DIRECTORY_ITEMS_DEFAULT); 310 this.inodeXAttrsLimit = conf.getInt( 311 DFSConfigKeys.DFS_NAMENODE_MAX_XATTRS_PER_INODE_KEY, 312 DFSConfigKeys.DFS_NAMENODE_MAX_XATTRS_PER_INODE_DEFAULT); 313 314 this.protectedDirectories = parseProtectedDirectories(conf); 315 316 Preconditions.checkArgument(this.inodeXAttrsLimit >= 0, 317 "Cannot set a negative limit on the number of xattrs per inode (%s).", 318 DFSConfigKeys.DFS_NAMENODE_MAX_XATTRS_PER_INODE_KEY); 319 // We need a maximum maximum because by default, PB limits message sizes 320 // to 64MB. This means we can only store approximately 6.7 million entries 321 // per directory, but let's use 6.4 million for some safety. 322 final int MAX_DIR_ITEMS = 64 * 100 * 1000; 323 Preconditions.checkArgument( 324 maxDirItems > 0 && maxDirItems <= MAX_DIR_ITEMS, "Cannot set " 325 + DFSConfigKeys.DFS_NAMENODE_MAX_DIRECTORY_ITEMS_KEY 326 + " to a value less than 1 or greater than " + MAX_DIR_ITEMS); 327 328 int threshold = conf.getInt( 329 DFSConfigKeys.DFS_NAMENODE_NAME_CACHE_THRESHOLD_KEY, 330 DFSConfigKeys.DFS_NAMENODE_NAME_CACHE_THRESHOLD_DEFAULT); 331 NameNode.LOG.info("Caching file names occurring more than " + threshold 332 + " times"); 333 nameCache = new NameCache<ByteArray>(threshold); 334 namesystem = ns; 335 this.editLog = ns.getEditLog(); 336 ezManager = new EncryptionZoneManager(this, conf); 337 338 this.quotaInitThreads = conf.getInt( 339 DFSConfigKeys.DFS_NAMENODE_QUOTA_INIT_THREADS_KEY, 340 DFSConfigKeys.DFS_NAMENODE_QUOTA_INIT_THREADS_DEFAULT); 341 } 342 343 /** 344 * Get HdfsFileStatuses of the reserved paths: .inodes and raw. 345 * 346 * @return Array of HdfsFileStatus 347 */ 348 HdfsFileStatus[] getReservedStatuses() { 349 Preconditions.checkNotNull(reservedStatuses, "reservedStatuses should " 350 + " not be null. It is populated when FSNamesystem loads FS image." 351 + " It has to be set at this time instead of initialization time" 352 + " because CTime is loaded during FSNamesystem#loadFromDisk."); 353 return reservedStatuses; 354 } 355 356 /** 357 * Create HdfsFileStatuses of the reserved paths: .inodes and raw. 358 * These statuses are solely for listing purpose. All other operations 359 * on the reserved dirs are disallowed. 360 * Operations on sub directories are resolved by 361 * {@link FSDirectory#resolvePath(String, byte[][], FSDirectory)} 362 * and conducted directly, without the need to check the reserved dirs. 363 * 364 * This method should only be invoked once during namenode initialization. 365 * 366 * @param cTime CTime of the file system 367 * @return Array of HdfsFileStatus 368 */ 369 void createReservedStatuses(long cTime) { 370 HdfsFileStatus inodes = new HdfsFileStatus(0, true, 0, 0, cTime, cTime, 371 new FsPermission((short) 0770), null, supergroup, null, 372 DOT_INODES, -1L, 0, null, 373 HdfsConstants.BLOCK_STORAGE_POLICY_ID_UNSPECIFIED); 374 HdfsFileStatus raw = new HdfsFileStatus(0, true, 0, 0, cTime, cTime, 375 new FsPermission((short) 0770), null, supergroup, null, RAW, -1L, 376 0, null, HdfsConstants.BLOCK_STORAGE_POLICY_ID_UNSPECIFIED); 377 reservedStatuses = new HdfsFileStatus[] { inodes, raw }; 378 } 379 380 FSNamesystem getFSNamesystem() { 381 return namesystem; 382 } 383 384 /** 385 * Parse configuration setting dfs.namenode.protected.directories to 386 * retrieve the set of protected directories. 387 * 388 * @param conf 389 * @return a TreeSet 390 */ 391 @VisibleForTesting 392 static SortedSet<String> parseProtectedDirectories(Configuration conf) { 393 // Normalize each input path to guard against administrator error. 394 return new TreeSet<>(normalizePaths( 395 conf.getTrimmedStringCollection(FS_PROTECTED_DIRECTORIES), 396 FS_PROTECTED_DIRECTORIES)); 397 } 398 399 SortedSet<String> getProtectedDirectories() { 400 return protectedDirectories; 401 } 402 403 BlockManager getBlockManager() { 404 return getFSNamesystem().getBlockManager(); 405 } 406 407 KeyProviderCryptoExtension getProvider() { 408 return getFSNamesystem().getProvider(); 409 } 410 411 /** @return the root directory inode. */ 412 public INodeDirectory getRoot() { 413 return rootDir; 414 } 415 416 public BlockStoragePolicySuite getBlockStoragePolicySuite() { 417 return getBlockManager().getStoragePolicySuite(); 418 } 419 420 boolean isPermissionEnabled() { 421 return isPermissionEnabled; 422 } 423 boolean isAclsEnabled() { 424 return aclsEnabled; 425 } 426 boolean isXattrsEnabled() { 427 return xattrsEnabled; 428 } 429 int getXattrMaxSize() { return xattrMaxSize; } 430 boolean isStoragePolicyEnabled() { 431 return storagePolicyEnabled; 432 } 433 boolean isAccessTimeSupported() { 434 return accessTimePrecision > 0; 435 } 436 long getAccessTimePrecision() { 437 return accessTimePrecision; 438 } 439 boolean isQuotaByStorageTypeEnabled() { 440 return quotaByStorageTypeEnabled; 441 } 442 443 444 int getLsLimit() { 445 return lsLimit; 446 } 447 448 int getContentCountLimit() { 449 return contentCountLimit; 450 } 451 452 long getContentSleepMicroSec() { 453 return contentSleepMicroSec; 454 } 455 456 int getInodeXAttrsLimit() { 457 return inodeXAttrsLimit; 458 } 459 460 FSEditLog getEditLog() { 461 return editLog; 462 } 463 464 /** 465 * Shutdown the filestore 466 */ 467 @Override 468 public void close() throws IOException {} 469 470 void markNameCacheInitialized() { 471 writeLock(); 472 try { 473 nameCache.initialized(); 474 } finally { 475 writeUnlock(); 476 } 477 } 478 479 boolean shouldSkipQuotaChecks() { 480 return skipQuotaCheck; 481 } 482 483 /** Enable quota verification */ 484 void enableQuotaChecks() { 485 skipQuotaCheck = false; 486 } 487 488 /** Disable quota verification */ 489 void disableQuotaChecks() { 490 skipQuotaCheck = true; 491 } 492 493 /** 494 * Resolves a given path into an INodesInPath. All ancestor inodes that 495 * exist are validated as traversable directories. Symlinks in the ancestry 496 * will generate an UnresolvedLinkException. The returned IIP will be an 497 * accessible path that also passed additional sanity checks based on how 498 * the path will be used as specified by the DirOp. 499 * READ: Expands reserved paths and performs permission checks 500 * during traversal. Raw paths are only accessible by a superuser. 501 * WRITE: In addition to READ checks, ensures the path is not a 502 * snapshot path. 503 * CREATE: In addition to WRITE checks, ensures path does not contain 504 * illegal character sequences. 505 * 506 * @param pc A permission checker for traversal checks. Pass null for 507 * no permission checks. 508 * @param src The path to resolve. 509 * @param dirOp The {@link DirOp} that controls additional checks. 510 * @param resolveLink If false, only ancestor symlinks will be checked. If 511 * true, the last inode will also be checked. 512 * @return if the path indicates an inode, return path after replacing up to 513 * <inodeid> with the corresponding path of the inode, else the path 514 * in {@code src} as is. If the path refers to a path in the "raw" 515 * directory, return the non-raw pathname. 516 * @throws FileNotFoundException 517 * @throws AccessControlException 518 * @throws ParentNotDirectoryException 519 * @throws UnresolvedLinkException 520 */ 521 @VisibleForTesting 522 public INodesInPath resolvePath(FSPermissionChecker pc, String src, 523 DirOp dirOp) throws UnresolvedLinkException, FileNotFoundException, 524 AccessControlException, ParentNotDirectoryException { 525 boolean isCreate = (dirOp == DirOp.CREATE || dirOp == DirOp.CREATE_LINK); 526 // prevent creation of new invalid paths 527 if (isCreate && !DFSUtil.isValidName(src)) { 528 throw new InvalidPathException("Invalid file name: " + src); 529 } 530 531 byte[][] components = INode.getPathComponents(src); 532 boolean isRaw = isReservedRawName(components); 533 if (isPermissionEnabled && pc != null && isRaw) { 534 pc.checkSuperuserPrivilege(); 535 } 536 components = resolveComponents(components, this); 537 INodesInPath iip = INodesInPath.resolve(rootDir, components, isRaw); 538 // verify all ancestors are dirs and traversable. note that only 539 // methods that create new namespace items have the signature to throw 540 // PNDE 541 try { 542 checkTraverse(pc, iip, dirOp); 543 } catch (ParentNotDirectoryException pnde) { 544 if (!isCreate) { 545 throw new AccessControlException(pnde.getMessage()); 546 } 547 throw pnde; 548 } 549 return iip; 550 } 551 552 INodesInPath resolvePath(FSPermissionChecker pc, String src, long fileId) 553 throws UnresolvedLinkException, FileNotFoundException, 554 AccessControlException, ParentNotDirectoryException { 555 // Older clients may not have given us an inode ID to work with. 556 // In this case, we have to try to resolve the path and hope it 557 // hasn't changed or been deleted since the file was opened for write. 558 INodesInPath iip; 559 if (fileId == HdfsConstants.GRANDFATHER_INODE_ID) { 560 iip = resolvePath(pc, src, DirOp.WRITE); 561 } else { 562 INode inode = getInode(fileId); 563 if (inode == null) { 564 iip = INodesInPath.fromComponents(INode.getPathComponents(src)); 565 } else { 566 iip = INodesInPath.fromINode(inode); 567 } 568 } 569 return iip; 570 } 571 572 // this method can be removed after IIP is used more extensively 573 static String resolvePath(String src, 574 FSDirectory fsd) throws FileNotFoundException { 575 byte[][] pathComponents = INode.getPathComponents(src); 576 pathComponents = resolveComponents(pathComponents, fsd); 577 return DFSUtil.byteArray2PathString(pathComponents); 578 } 579 580 /** 581 * @return true if the path is a non-empty directory; otherwise, return false. 582 */ 583 boolean isNonEmptyDirectory(INodesInPath inodesInPath) { 584 readLock(); 585 try { 586 final INode inode = inodesInPath.getLastINode(); 587 if (inode == null || !inode.isDirectory()) { 588 //not found or not a directory 589 return false; 590 } 591 final int s = inodesInPath.getPathSnapshotId(); 592 return !inode.asDirectory().getChildrenList(s).isEmpty(); 593 } finally { 594 readUnlock(); 595 } 596 } 597 598 /** 599 * Check whether the filepath could be created 600 * @throws SnapshotAccessControlException if path is in RO snapshot 601 */ 602 boolean isValidToCreate(String src, INodesInPath iip) 603 throws SnapshotAccessControlException { 604 String srcs = normalizePath(src); 605 return srcs.startsWith("/") && !srcs.endsWith("/") && 606 iip.getLastINode() == null; 607 } 608 609 /** 610 * Tell the block manager to update the replication factors when delete 611 * happens. Deleting a file or a snapshot might decrease the replication 612 * factor of the blocks as the blocks are always replicated to the highest 613 * replication factor among all snapshots. 614 */ 615 void updateReplicationFactor(Collection<UpdatedReplicationInfo> blocks) { 616 BlockManager bm = getBlockManager(); 617 for (UpdatedReplicationInfo e : blocks) { 618 BlockInfo b = e.block(); 619 bm.setReplication(b.getReplication(), e.targetReplication(), b); 620 } 621 } 622 623 /** 624 * Update the count of each directory with quota in the namespace. 625 * A directory's count is defined as the total number inodes in the tree 626 * rooted at the directory. 627 * 628 * This is an update of existing state of the filesystem and does not 629 * throw QuotaExceededException. 630 */ 631 void updateCountForQuota(int initThreads) { 632 writeLock(); 633 try { 634 int threads = (initThreads < 1) ? 1 : initThreads; 635 LOG.info("Initializing quota with " + threads + " thread(s)"); 636 long start = Time.now(); 637 QuotaCounts counts = new QuotaCounts.Builder().build(); 638 ForkJoinPool p = new ForkJoinPool(threads); 639 RecursiveAction task = new InitQuotaTask(getBlockStoragePolicySuite(), 640 rootDir.getStoragePolicyID(), rootDir, counts); 641 p.execute(task); 642 task.join(); 643 p.shutdown(); 644 LOG.info("Quota initialization completed in " + (Time.now() - start) + 645 " milliseconds\n" + counts); 646 } finally { 647 writeUnlock(); 648 } 649 } 650 651 void updateCountForQuota() { 652 updateCountForQuota(quotaInitThreads); 653 } 654 655 /** 656 * parallel initialization using fork-join. 657 */ 658 private static class InitQuotaTask extends RecursiveAction { 659 private final INodeDirectory dir; 660 private final QuotaCounts counts; 661 private final BlockStoragePolicySuite bsps; 662 private final byte blockStoragePolicyId; 663 664 public InitQuotaTask(BlockStoragePolicySuite bsps, 665 byte blockStoragePolicyId, INodeDirectory dir, QuotaCounts counts) { 666 this.dir = dir; 667 this.counts = counts; 668 this.bsps = bsps; 669 this.blockStoragePolicyId = blockStoragePolicyId; 670 } 671 672 public void compute() { 673 QuotaCounts myCounts = new QuotaCounts.Builder().build(); 674 dir.computeQuotaUsage4CurrentDirectory(bsps, blockStoragePolicyId, 675 myCounts); 676 677 ReadOnlyList<INode> children = 678 dir.getChildrenList(CURRENT_STATE_ID); 679 680 if (children.size() > 0) { 681 List<InitQuotaTask> subtasks = new ArrayList<InitQuotaTask>(); 682 for (INode child : children) { 683 final byte childPolicyId = 684 child.getStoragePolicyIDForQuota(blockStoragePolicyId); 685 if (child.isDirectory()) { 686 subtasks.add(new InitQuotaTask(bsps, childPolicyId, 687 child.asDirectory(), myCounts)); 688 } else { 689 // file or symlink. count using the local counts variable 690 myCounts.add(child.computeQuotaUsage(bsps, childPolicyId, false, 691 CURRENT_STATE_ID)); 692 } 693 } 694 // invoke and wait for completion 695 invokeAll(subtasks); 696 } 697 698 if (dir.isQuotaSet()) { 699 // check if quota is violated. It indicates a software bug. 700 final QuotaCounts q = dir.getQuotaCounts(); 701 702 final long nsConsumed = myCounts.getNameSpace(); 703 final long nsQuota = q.getNameSpace(); 704 if (Quota.isViolated(nsQuota, nsConsumed)) { 705 LOG.warn("Namespace quota violation in image for " 706 + dir.getFullPathName() 707 + " quota = " + nsQuota + " < consumed = " + nsConsumed); 708 } 709 710 final long ssConsumed = myCounts.getStorageSpace(); 711 final long ssQuota = q.getStorageSpace(); 712 if (Quota.isViolated(ssQuota, ssConsumed)) { 713 LOG.warn("Storagespace quota violation in image for " 714 + dir.getFullPathName() 715 + " quota = " + ssQuota + " < consumed = " + ssConsumed); 716 } 717 718 final EnumCounters<StorageType> tsConsumed = myCounts.getTypeSpaces(); 719 for (StorageType t : StorageType.getTypesSupportingQuota()) { 720 final long typeSpace = tsConsumed.get(t); 721 final long typeQuota = q.getTypeSpaces().get(t); 722 if (Quota.isViolated(typeQuota, typeSpace)) { 723 LOG.warn("Storage type quota violation in image for " 724 + dir.getFullPathName() 725 + " type = " + t.toString() + " quota = " 726 + typeQuota + " < consumed " + typeSpace); 727 } 728 } 729 if (LOG.isDebugEnabled()) { 730 LOG.debug("Setting quota for " + dir + "\n" + myCounts); 731 } 732 dir.getDirectoryWithQuotaFeature().setSpaceConsumed(nsConsumed, 733 ssConsumed, tsConsumed); 734 } 735 736 synchronized(counts) { 737 counts.add(myCounts); 738 } 739 } 740 } 741 742 /** Updates namespace, storagespace and typespaces consumed for all 743 * directories until the parent directory of file represented by path. 744 * 745 * @param iip the INodesInPath instance containing all the INodes for 746 * updating quota usage 747 * @param nsDelta the delta change of namespace 748 * @param ssDelta the delta change of storage space consumed without replication 749 * @param replication the replication factor of the block consumption change 750 * @throws QuotaExceededException if the new count violates any quota limit 751 * @throws FileNotFoundException if path does not exist. 752 */ 753 void updateSpaceConsumed(INodesInPath iip, long nsDelta, long ssDelta, short replication) 754 throws QuotaExceededException, FileNotFoundException, 755 UnresolvedLinkException, SnapshotAccessControlException { 756 writeLock(); 757 try { 758 if (iip.getLastINode() == null) { 759 throw new FileNotFoundException("Path not found: " + iip.getPath()); 760 } 761 updateCount(iip, nsDelta, ssDelta, replication, true); 762 } finally { 763 writeUnlock(); 764 } 765 } 766 767 public void updateCount(INodesInPath iip, INode.QuotaDelta quotaDelta, 768 boolean check) throws QuotaExceededException { 769 QuotaCounts counts = quotaDelta.getCountsCopy(); 770 updateCount(iip, iip.length() - 1, counts.negation(), check); 771 Map<INode, QuotaCounts> deltaInOtherPaths = quotaDelta.getUpdateMap(); 772 for (Map.Entry<INode, QuotaCounts> entry : deltaInOtherPaths.entrySet()) { 773 INodesInPath path = INodesInPath.fromINode(entry.getKey()); 774 updateCount(path, path.length() - 1, entry.getValue().negation(), check); 775 } 776 for (Map.Entry<INodeDirectory, QuotaCounts> entry : 777 quotaDelta.getQuotaDirMap().entrySet()) { 778 INodeDirectory quotaDir = entry.getKey(); 779 quotaDir.getDirectoryWithQuotaFeature().addSpaceConsumed2Cache( 780 entry.getValue().negation()); 781 } 782 } 783 784 /** 785 * Update the quota usage after deletion. The quota update is only necessary 786 * when image/edits have been loaded and the file/dir to be deleted is not 787 * contained in snapshots. 788 */ 789 void updateCountForDelete(final INode inode, final INodesInPath iip) { 790 if (getFSNamesystem().isImageLoaded() && 791 !inode.isInLatestSnapshot(iip.getLatestSnapshotId())) { 792 QuotaCounts counts = inode.computeQuotaUsage(getBlockStoragePolicySuite()); 793 unprotectedUpdateCount(iip, iip.length() - 1, counts.negation()); 794 } 795 } 796 797 /** 798 * Update usage count without replication factor change 799 */ 800 void updateCount(INodesInPath iip, long nsDelta, long ssDelta, short replication, 801 boolean checkQuota) throws QuotaExceededException { 802 final INodeFile fileINode = iip.getLastINode().asFile(); 803 EnumCounters<StorageType> typeSpaceDeltas = 804 getStorageTypeDeltas(fileINode.getStoragePolicyID(), ssDelta, 805 replication, replication);; 806 updateCount(iip, iip.length() - 1, 807 new QuotaCounts.Builder().nameSpace(nsDelta).storageSpace(ssDelta * replication). 808 typeSpaces(typeSpaceDeltas).build(), 809 checkQuota); 810 } 811 812 /** 813 * Update usage count with replication factor change due to setReplication 814 */ 815 void updateCount(INodesInPath iip, long nsDelta, long ssDelta, short oldRep, 816 short newRep, boolean checkQuota) throws QuotaExceededException { 817 final INodeFile fileINode = iip.getLastINode().asFile(); 818 EnumCounters<StorageType> typeSpaceDeltas = 819 getStorageTypeDeltas(fileINode.getStoragePolicyID(), ssDelta, oldRep, newRep); 820 updateCount(iip, iip.length() - 1, 821 new QuotaCounts.Builder().nameSpace(nsDelta). 822 storageSpace(ssDelta * (newRep - oldRep)). 823 typeSpaces(typeSpaceDeltas).build(), 824 checkQuota); 825 } 826 827 /** update count of each inode with quota 828 * 829 * @param iip inodes in a path 830 * @param numOfINodes the number of inodes to update starting from index 0 831 * @param counts the count of space/namespace/type usage to be update 832 * @param checkQuota if true then check if quota is exceeded 833 * @throws QuotaExceededException if the new count violates any quota limit 834 */ 835 void updateCount(INodesInPath iip, int numOfINodes, 836 QuotaCounts counts, boolean checkQuota) 837 throws QuotaExceededException { 838 assert hasWriteLock(); 839 if (!namesystem.isImageLoaded()) { 840 //still initializing. do not check or update quotas. 841 return; 842 } 843 if (numOfINodes > iip.length()) { 844 numOfINodes = iip.length(); 845 } 846 if (checkQuota && !skipQuotaCheck) { 847 verifyQuota(iip, numOfINodes, counts, null); 848 } 849 unprotectedUpdateCount(iip, numOfINodes, counts); 850 } 851 852 /** 853 * update quota of each inode and check to see if quota is exceeded. 854 * See {@link #updateCount(INodesInPath, int, QuotaCounts, boolean)} 855 */ 856 void updateCountNoQuotaCheck(INodesInPath inodesInPath, 857 int numOfINodes, QuotaCounts counts) { 858 assert hasWriteLock(); 859 try { 860 updateCount(inodesInPath, numOfINodes, counts, false); 861 } catch (QuotaExceededException e) { 862 NameNode.LOG.error("BUG: unexpected exception ", e); 863 } 864 } 865 866 /** 867 * updates quota without verification 868 * callers responsibility is to make sure quota is not exceeded 869 */ 870 static void unprotectedUpdateCount(INodesInPath inodesInPath, 871 int numOfINodes, QuotaCounts counts) { 872 for(int i=0; i < numOfINodes; i++) { 873 if (inodesInPath.getINode(i).isQuotaSet()) { // a directory with quota 874 inodesInPath.getINode(i).asDirectory().getDirectoryWithQuotaFeature() 875 .addSpaceConsumed2Cache(counts); 876 } 877 } 878 } 879 880 /** 881 * Update the cached quota space for a block that is being completed. 882 * Must only be called once, as the block is being completed. 883 * @param completeBlk - Completed block for which to update space 884 * @param inodes - INodes in path to file containing completeBlk; if null 885 * this will be resolved internally 886 */ 887 public void updateSpaceForCompleteBlock(BlockInfo completeBlk, 888 INodesInPath inodes) throws IOException { 889 assert namesystem.hasWriteLock(); 890 INodesInPath iip = inodes != null ? inodes : 891 INodesInPath.fromINode(namesystem.getBlockCollection(completeBlk)); 892 INodeFile fileINode = iip.getLastINode().asFile(); 893 // Adjust disk space consumption if required 894 final long diff = 895 fileINode.getPreferredBlockSize() - completeBlk.getNumBytes(); 896 if (diff > 0) { 897 try { 898 updateSpaceConsumed(iip, 0, -diff, fileINode.getFileReplication()); 899 } catch (IOException e) { 900 LOG.warn("Unexpected exception while updating disk space.", e); 901 } 902 } 903 } 904 905 public EnumCounters<StorageType> getStorageTypeDeltas(byte storagePolicyID, 906 long dsDelta, short oldRep, short newRep) { 907 EnumCounters<StorageType> typeSpaceDeltas = 908 new EnumCounters<StorageType>(StorageType.class); 909 // empty file 910 if(dsDelta == 0){ 911 return typeSpaceDeltas; 912 } 913 // Storage type and its quota are only available when storage policy is set 914 if (storagePolicyID != HdfsConstants.BLOCK_STORAGE_POLICY_ID_UNSPECIFIED) { 915 BlockStoragePolicy storagePolicy = getBlockManager().getStoragePolicy(storagePolicyID); 916 917 if (oldRep != newRep) { 918 List<StorageType> oldChosenStorageTypes = 919 storagePolicy.chooseStorageTypes(oldRep); 920 921 for (StorageType t : oldChosenStorageTypes) { 922 if (!t.supportTypeQuota()) { 923 continue; 924 } 925 Preconditions.checkArgument(dsDelta > 0); 926 typeSpaceDeltas.add(t, -dsDelta); 927 } 928 } 929 930 List<StorageType> newChosenStorageTypes = 931 storagePolicy.chooseStorageTypes(newRep); 932 933 for (StorageType t : newChosenStorageTypes) { 934 if (!t.supportTypeQuota()) { 935 continue; 936 } 937 typeSpaceDeltas.add(t, dsDelta); 938 } 939 } 940 return typeSpaceDeltas; 941 } 942 943 /** 944 * Add the given child to the namespace. 945 * @param existing the INodesInPath containing all the ancestral INodes 946 * @param child the new INode to add 947 * @return a new INodesInPath instance containing the new child INode. Null 948 * if the adding fails. 949 * @throws QuotaExceededException is thrown if it violates quota limit 950 */ 951 INodesInPath addINode(INodesInPath existing, INode child) 952 throws QuotaExceededException, UnresolvedLinkException { 953 cacheName(child); 954 writeLock(); 955 try { 956 return addLastINode(existing, child, true); 957 } finally { 958 writeUnlock(); 959 } 960 } 961 962 /** 963 * Verify quota for adding or moving a new INode with required 964 * namespace and storagespace to a given position. 965 * 966 * @param iip INodes corresponding to a path 967 * @param pos position where a new INode will be added 968 * @param deltas needed namespace, storagespace and storage types 969 * @param commonAncestor Last node in inodes array that is a common ancestor 970 * for a INode that is being moved from one location to the other. 971 * Pass null if a node is not being moved. 972 * @throws QuotaExceededException if quota limit is exceeded. 973 */ 974 static void verifyQuota(INodesInPath iip, int pos, QuotaCounts deltas, 975 INode commonAncestor) throws QuotaExceededException { 976 if (deltas.getNameSpace() <= 0 && deltas.getStorageSpace() <= 0 977 && deltas.getTypeSpaces().allLessOrEqual(0L)) { 978 // if quota is being freed or not being consumed 979 return; 980 } 981 982 // check existing components in the path 983 for(int i = (pos > iip.length() ? iip.length(): pos) - 1; i >= 0; i--) { 984 if (commonAncestor == iip.getINode(i)) { 985 // Stop checking for quota when common ancestor is reached 986 return; 987 } 988 final DirectoryWithQuotaFeature q 989 = iip.getINode(i).asDirectory().getDirectoryWithQuotaFeature(); 990 if (q != null) { // a directory with quota 991 try { 992 q.verifyQuota(deltas); 993 } catch (QuotaExceededException e) { 994 e.setPathName(iip.getPath(i)); 995 throw e; 996 } 997 } 998 } 999 } 1000 1001 /** Verify if the inode name is legal. */ 1002 void verifyINodeName(byte[] childName) throws HadoopIllegalArgumentException { 1003 if (Arrays.equals(HdfsServerConstants.DOT_SNAPSHOT_DIR_BYTES, childName)) { 1004 String s = "\"" + HdfsConstants.DOT_SNAPSHOT_DIR + "\" is a reserved name."; 1005 if (!namesystem.isImageLoaded()) { 1006 s += " Please rename it before upgrade."; 1007 } 1008 throw new HadoopIllegalArgumentException(s); 1009 } 1010 } 1011 1012 /** 1013 * Verify child's name for fs limit. 1014 * 1015 * @param childName byte[] containing new child name 1016 * @param parentPath String containing parent path 1017 * @throws PathComponentTooLongException child's name is too long. 1018 */ 1019 void verifyMaxComponentLength(byte[] childName, String parentPath) 1020 throws PathComponentTooLongException { 1021 if (maxComponentLength == 0) { 1022 return; 1023 } 1024 1025 final int length = childName.length; 1026 if (length > maxComponentLength) { 1027 final PathComponentTooLongException e = new PathComponentTooLongException( 1028 maxComponentLength, length, parentPath, 1029 DFSUtil.bytes2String(childName)); 1030 if (namesystem.isImageLoaded()) { 1031 throw e; 1032 } else { 1033 // Do not throw if edits log is still being processed 1034 NameNode.LOG.error("ERROR in FSDirectory.verifyINodeName", e); 1035 } 1036 } 1037 } 1038 1039 /** 1040 * Verify children size for fs limit. 1041 * 1042 * @throws MaxDirectoryItemsExceededException too many children. 1043 */ 1044 void verifyMaxDirItems(INodeDirectory parent, String parentPath) 1045 throws MaxDirectoryItemsExceededException { 1046 final int count = parent.getChildrenList(CURRENT_STATE_ID).size(); 1047 if (count >= maxDirItems) { 1048 final MaxDirectoryItemsExceededException e 1049 = new MaxDirectoryItemsExceededException(parentPath, maxDirItems, 1050 count); 1051 if (namesystem.isImageLoaded()) { 1052 throw e; 1053 } else { 1054 // Do not throw if edits log is still being processed 1055 NameNode.LOG.error("FSDirectory.verifyMaxDirItems: " 1056 + e.getLocalizedMessage()); 1057 } 1058 } 1059 } 1060 1061 /** 1062 * Add a child to the end of the path specified by INodesInPath. 1063 * @return an INodesInPath instance containing the new INode 1064 */ 1065 @VisibleForTesting 1066 public INodesInPath addLastINode(INodesInPath existing, INode inode, 1067 boolean checkQuota) throws QuotaExceededException { 1068 assert existing.getLastINode() != null && 1069 existing.getLastINode().isDirectory(); 1070 1071 final int pos = existing.length(); 1072 // Disallow creation of /.reserved. This may be created when loading 1073 // editlog/fsimage during upgrade since /.reserved was a valid name in older 1074 // release. This may also be called when a user tries to create a file 1075 // or directory /.reserved. 1076 if (pos == 1 && existing.getINode(0) == rootDir && isReservedName(inode)) { 1077 throw new HadoopIllegalArgumentException( 1078 "File name \"" + inode.getLocalName() + "\" is reserved and cannot " 1079 + "be created. If this is during upgrade change the name of the " 1080 + "existing file or directory to another name before upgrading " 1081 + "to the new release."); 1082 } 1083 final INodeDirectory parent = existing.getINode(pos - 1).asDirectory(); 1084 // The filesystem limits are not really quotas, so this check may appear 1085 // odd. It's because a rename operation deletes the src, tries to add 1086 // to the dest, if that fails, re-adds the src from whence it came. 1087 // The rename code disables the quota when it's restoring to the 1088 // original location because a quota violation would cause the the item 1089 // to go "poof". The fs limits must be bypassed for the same reason. 1090 if (checkQuota) { 1091 final String parentPath = existing.getPath(); 1092 verifyMaxComponentLength(inode.getLocalNameBytes(), parentPath); 1093 verifyMaxDirItems(parent, parentPath); 1094 } 1095 // always verify inode name 1096 verifyINodeName(inode.getLocalNameBytes()); 1097 1098 final QuotaCounts counts = inode.computeQuotaUsage(getBlockStoragePolicySuite()); 1099 updateCount(existing, pos, counts, checkQuota); 1100 1101 boolean isRename = (inode.getParent() != null); 1102 boolean added; 1103 try { 1104 added = parent.addChild(inode, true, existing.getLatestSnapshotId()); 1105 } catch (QuotaExceededException e) { 1106 updateCountNoQuotaCheck(existing, pos, counts.negation()); 1107 throw e; 1108 } 1109 if (!added) { 1110 updateCountNoQuotaCheck(existing, pos, counts.negation()); 1111 return null; 1112 } else { 1113 if (!isRename) { 1114 AclStorage.copyINodeDefaultAcl(inode); 1115 } 1116 addToInodeMap(inode); 1117 } 1118 return INodesInPath.append(existing, inode, inode.getLocalNameBytes()); 1119 } 1120 1121 INodesInPath addLastINodeNoQuotaCheck(INodesInPath existing, INode i) { 1122 try { 1123 return addLastINode(existing, i, false); 1124 } catch (QuotaExceededException e) { 1125 NameNode.LOG.warn("FSDirectory.addChildNoQuotaCheck - unexpected", e); 1126 } 1127 return null; 1128 } 1129 1130 /** 1131 * Remove the last inode in the path from the namespace. 1132 * Note: the caller needs to update the ancestors' quota count. 1133 * 1134 * @return -1 for failing to remove; 1135 * 0 for removing a reference whose referred inode has other 1136 * reference nodes; 1137 * 1 otherwise. 1138 */ 1139 @VisibleForTesting 1140 public long removeLastINode(final INodesInPath iip) { 1141 final int latestSnapshot = iip.getLatestSnapshotId(); 1142 final INode last = iip.getLastINode(); 1143 final INodeDirectory parent = iip.getINode(-2).asDirectory(); 1144 if (!parent.removeChild(last, latestSnapshot)) { 1145 return -1; 1146 } 1147 1148 return (!last.isInLatestSnapshot(latestSnapshot) 1149 && INodeReference.tryRemoveReference(last) > 0) ? 0 : 1; 1150 } 1151 1152 /** 1153 * Return a new collection of normalized paths from the given input 1154 * collection. The input collection is unmodified. 1155 * 1156 * Reserved paths, relative paths and paths with scheme are ignored. 1157 * 1158 * @param paths collection whose contents are to be normalized. 1159 * @return collection with all input paths normalized. 1160 */ 1161 static Collection<String> normalizePaths(Collection<String> paths, 1162 String errorString) { 1163 if (paths.isEmpty()) { 1164 return paths; 1165 } 1166 final Collection<String> normalized = new ArrayList<>(paths.size()); 1167 for (String dir : paths) { 1168 if (isReservedName(dir)) { 1169 LOG.error("{} ignoring reserved path {}", errorString, dir); 1170 } else { 1171 final Path path = new Path(dir); 1172 if (!path.isAbsolute()) { 1173 LOG.error("{} ignoring relative path {}", errorString, dir); 1174 } else if (path.toUri().getScheme() != null) { 1175 LOG.error("{} ignoring path {} with scheme", errorString, dir); 1176 } else { 1177 normalized.add(path.toString()); 1178 } 1179 } 1180 } 1181 return normalized; 1182 } 1183 1184 static String normalizePath(String src) { 1185 if (src.length() > 1 && src.endsWith("/")) { 1186 src = src.substring(0, src.length() - 1); 1187 } 1188 return src; 1189 } 1190 1191 @VisibleForTesting 1192 public long getYieldCount() { 1193 return yieldCount; 1194 } 1195 1196 void addYieldCount(long value) { 1197 yieldCount += value; 1198 } 1199 1200 public INodeMap getINodeMap() { 1201 return inodeMap; 1202 } 1203 1204 /** 1205 * This method is always called with writeLock of FSDirectory held. 1206 */ 1207 public final void addToInodeMap(INode inode) { 1208 if (inode instanceof INodeWithAdditionalFields) { 1209 inodeMap.put(inode); 1210 if (!inode.isSymlink()) { 1211 final XAttrFeature xaf = inode.getXAttrFeature(); 1212 addEncryptionZone((INodeWithAdditionalFields) inode, xaf); 1213 } 1214 } 1215 } 1216 1217 private void addEncryptionZone(INodeWithAdditionalFields inode, 1218 XAttrFeature xaf) { 1219 if (xaf == null) { 1220 return; 1221 } 1222 XAttr xattr = xaf.getXAttr(CRYPTO_XATTR_ENCRYPTION_ZONE); 1223 if (xattr == null) { 1224 return; 1225 } 1226 try { 1227 final HdfsProtos.ZoneEncryptionInfoProto ezProto = 1228 HdfsProtos.ZoneEncryptionInfoProto.parseFrom( 1229 xattr.getValue()); 1230 ezManager.unprotectedAddEncryptionZone(inode.getId(), 1231 PBHelperClient.convert(ezProto.getSuite()), 1232 PBHelperClient.convert(ezProto.getCryptoProtocolVersion()), 1233 ezProto.getKeyName()); 1234 } catch (InvalidProtocolBufferException e) { 1235 NameNode.LOG.warn("Error parsing protocol buffer of " + 1236 "EZ XAttr " + xattr.getName() + " dir:" + inode.getFullPathName()); 1237 } 1238 } 1239 1240 /** 1241 * This is to handle encryption zone for rootDir when loading from 1242 * fsimage, and should only be called during NN restart. 1243 */ 1244 public final void addRootDirToEncryptionZone(XAttrFeature xaf) { 1245 addEncryptionZone(rootDir, xaf); 1246 } 1247 1248 /** 1249 * This method is always called with writeLock of FSDirectory held. 1250 */ 1251 public final void removeFromInodeMap(List<? extends INode> inodes) { 1252 if (inodes != null) { 1253 for (INode inode : inodes) { 1254 if (inode != null && inode instanceof INodeWithAdditionalFields) { 1255 inodeMap.remove(inode); 1256 ezManager.removeEncryptionZone(inode.getId()); 1257 } 1258 } 1259 } 1260 } 1261 1262 /** 1263 * Get the inode from inodeMap based on its inode id. 1264 * @param id The given id 1265 * @return The inode associated with the given id 1266 */ 1267 public INode getInode(long id) { 1268 readLock(); 1269 try { 1270 return inodeMap.get(id); 1271 } finally { 1272 readUnlock(); 1273 } 1274 } 1275 1276 @VisibleForTesting 1277 int getInodeMapSize() { 1278 return inodeMap.size(); 1279 } 1280 1281 long totalInodes() { 1282 return getInodeMapSize(); 1283 } 1284 1285 /** 1286 * Reset the entire namespace tree. 1287 */ 1288 void reset() { 1289 writeLock(); 1290 try { 1291 rootDir = createRoot(getFSNamesystem()); 1292 inodeMap.clear(); 1293 addToInodeMap(rootDir); 1294 nameCache.reset(); 1295 inodeId.setCurrentValue(INodeId.LAST_RESERVED_ID); 1296 } finally { 1297 writeUnlock(); 1298 } 1299 } 1300 1301 static INode resolveLastINode(INodesInPath iip) throws FileNotFoundException { 1302 INode inode = iip.getLastINode(); 1303 if (inode == null) { 1304 throw new FileNotFoundException("cannot find " + iip.getPath()); 1305 } 1306 return inode; 1307 } 1308 1309 /** 1310 * Caches frequently used file names to reuse file name objects and 1311 * reduce heap size. 1312 */ 1313 void cacheName(INode inode) { 1314 // Name is cached only for files 1315 if (!inode.isFile()) { 1316 return; 1317 } 1318 ByteArray name = new ByteArray(inode.getLocalNameBytes()); 1319 name = nameCache.put(name); 1320 if (name != null) { 1321 inode.setLocalName(name.getBytes()); 1322 } 1323 } 1324 1325 void shutdown() { 1326 nameCache.reset(); 1327 inodeMap.clear(); 1328 } 1329 1330 /** 1331 * Given an INode get all the path complents leading to it from the root. 1332 * If an Inode corresponding to C is given in /A/B/C, the returned 1333 * patch components will be {root, A, B, C}. 1334 * Note that this method cannot handle scenarios where the inode is in a 1335 * snapshot. 1336 */ 1337 public static byte[][] getPathComponents(INode inode) { 1338 List<byte[]> components = new ArrayList<byte[]>(); 1339 components.add(0, inode.getLocalNameBytes()); 1340 while(inode.getParent() != null) { 1341 components.add(0, inode.getParent().getLocalNameBytes()); 1342 inode = inode.getParent(); 1343 } 1344 return components.toArray(new byte[components.size()][]); 1345 } 1346 1347 /** Check if a given inode name is reserved */ 1348 public static boolean isReservedName(INode inode) { 1349 return CHECK_RESERVED_FILE_NAMES 1350 && Arrays.equals(inode.getLocalNameBytes(), DOT_RESERVED); 1351 } 1352 1353 /** Check if a given path is reserved */ 1354 public static boolean isReservedName(String src) { 1355 return src.startsWith(DOT_RESERVED_PATH_PREFIX + Path.SEPARATOR); 1356 } 1357 1358 public static boolean isExactReservedName(String src) { 1359 return CHECK_RESERVED_FILE_NAMES && src.equals(DOT_RESERVED_PATH_PREFIX); 1360 } 1361 1362 public static boolean isExactReservedName(byte[][] components) { 1363 return CHECK_RESERVED_FILE_NAMES && 1364 (components.length == 2) && 1365 isReservedName(components); 1366 } 1367 1368 static boolean isReservedRawName(String src) { 1369 return src.startsWith(DOT_RESERVED_PATH_PREFIX + 1370 Path.SEPARATOR + RAW_STRING); 1371 } 1372 1373 static boolean isReservedInodesName(String src) { 1374 return src.startsWith(DOT_RESERVED_PATH_PREFIX + 1375 Path.SEPARATOR + DOT_INODES_STRING); 1376 } 1377 1378 static boolean isReservedName(byte[][] components) { 1379 return (components.length > 1) && 1380 Arrays.equals(INodeDirectory.ROOT_NAME, components[0]) && 1381 Arrays.equals(DOT_RESERVED, components[1]); 1382 } 1383 1384 static boolean isReservedRawName(byte[][] components) { 1385 return (components.length > 2) && 1386 isReservedName(components) && 1387 Arrays.equals(RAW, components[2]); 1388 } 1389 1390 /** 1391 * Resolve a /.reserved/... path to a non-reserved path. 1392 * <p/> 1393 * There are two special hierarchies under /.reserved/: 1394 * <p/> 1395 * /.reserved/.inodes/<inodeid> performs a path lookup by inodeid, 1396 * <p/> 1397 * /.reserved/raw/... returns the encrypted (raw) bytes of a file in an 1398 * encryption zone. For instance, if /ezone is an encryption zone, then 1399 * /ezone/a refers to the decrypted file and /.reserved/raw/ezone/a refers to 1400 * the encrypted (raw) bytes of /ezone/a. 1401 * <p/> 1402 * Pathnames in the /.reserved/raw directory that resolve to files not in an 1403 * encryption zone are equivalent to the corresponding non-raw path. Hence, 1404 * if /a/b/c refers to a file that is not in an encryption zone, then 1405 * /.reserved/raw/a/b/c is equivalent (they both refer to the same 1406 * unencrypted file). 1407 * 1408 * @param pathComponents to be resolved 1409 * @param fsd FSDirectory 1410 * @return if the path indicates an inode, return path after replacing up to 1411 * <inodeid> with the corresponding path of the inode, else the path 1412 * in {@code pathComponents} as is. If the path refers to a path in 1413 * the "raw" directory, return the non-raw pathname. 1414 * @throws FileNotFoundException if inodeid is invalid 1415 */ 1416 static byte[][] resolveComponents(byte[][] pathComponents, 1417 FSDirectory fsd) throws FileNotFoundException { 1418 final int nComponents = pathComponents.length; 1419 if (nComponents < 3 || !isReservedName(pathComponents)) { 1420 /* This is not a /.reserved/ path so do nothing. */ 1421 } else if (Arrays.equals(DOT_INODES, pathComponents[2])) { 1422 /* It's a /.reserved/.inodes path. */ 1423 if (nComponents > 3) { 1424 pathComponents = resolveDotInodesPath(pathComponents, fsd); 1425 } 1426 } else if (Arrays.equals(RAW, pathComponents[2])) { 1427 /* It's /.reserved/raw so strip off the /.reserved/raw prefix. */ 1428 if (nComponents == 3) { 1429 pathComponents = new byte[][]{INodeDirectory.ROOT_NAME}; 1430 } else { 1431 if (nComponents == 4 1432 && Arrays.equals(DOT_RESERVED, pathComponents[3])) { 1433 /* It's /.reserved/raw/.reserved so don't strip */ 1434 } else { 1435 pathComponents = constructRemainingPath( 1436 new byte[][]{INodeDirectory.ROOT_NAME}, pathComponents, 3); 1437 } 1438 } 1439 } 1440 return pathComponents; 1441 } 1442 1443 private static byte[][] resolveDotInodesPath( 1444 byte[][] pathComponents, FSDirectory fsd) 1445 throws FileNotFoundException { 1446 final String inodeId = DFSUtil.bytes2String(pathComponents[3]); 1447 final long id; 1448 try { 1449 id = Long.parseLong(inodeId); 1450 } catch (NumberFormatException e) { 1451 throw new FileNotFoundException("Invalid inode path: " + 1452 DFSUtil.byteArray2PathString(pathComponents)); 1453 } 1454 if (id == INodeId.ROOT_INODE_ID && pathComponents.length == 4) { 1455 return new byte[][]{INodeDirectory.ROOT_NAME}; 1456 } 1457 INode inode = fsd.getInode(id); 1458 if (inode == null) { 1459 throw new FileNotFoundException( 1460 "File for given inode path does not exist: " + 1461 DFSUtil.byteArray2PathString(pathComponents)); 1462 } 1463 1464 // Handle single ".." for NFS lookup support. 1465 if ((pathComponents.length > 4) 1466 && Arrays.equals(pathComponents[4], DOT_DOT)) { 1467 INode parent = inode.getParent(); 1468 if (parent == null || parent.getId() == INodeId.ROOT_INODE_ID) { 1469 // inode is root, or its parent is root. 1470 return new byte[][]{INodeDirectory.ROOT_NAME}; 1471 } 1472 return parent.getPathComponents(); 1473 } 1474 return constructRemainingPath( 1475 inode.getPathComponents(), pathComponents, 4); 1476 } 1477 1478 private static byte[][] constructRemainingPath(byte[][] components, 1479 byte[][] extraComponents, int startAt) { 1480 int remainder = extraComponents.length - startAt; 1481 if (remainder > 0) { 1482 // grow the array and copy in the remaining components 1483 int pos = components.length; 1484 components = Arrays.copyOf(components, pos + remainder); 1485 System.arraycopy(extraComponents, startAt, components, pos, remainder); 1486 } 1487 if (NameNode.LOG.isDebugEnabled()) { 1488 NameNode.LOG.debug( 1489 "Resolved path is " + DFSUtil.byteArray2PathString(components)); 1490 } 1491 return components; 1492 } 1493 1494 INode getINode4DotSnapshot(INodesInPath iip) throws UnresolvedLinkException { 1495 Preconditions.checkArgument( 1496 iip.isDotSnapshotDir(), "%s does not end with %s", 1497 iip.getPath(), HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR); 1498 1499 final INode node = iip.getINode(-2); 1500 if (node != null && node.isDirectory() 1501 && node.asDirectory().isSnapshottable()) { 1502 return node; 1503 } 1504 return null; 1505 } 1506 1507 /** 1508 * Resolves the given path into inodes. Reserved paths are not handled and 1509 * permissions are not verified. Client supplied paths should be 1510 * resolved via {@link #resolvePath(FSPermissionChecker, String, DirOp)}. 1511 * This method should only be used by internal methods. 1512 * @return the {@link INodesInPath} containing all inodes in the path. 1513 * @throws UnresolvedLinkException 1514 * @throws ParentNotDirectoryException 1515 * @throws AccessControlException 1516 */ 1517 public INodesInPath getINodesInPath(String src, DirOp dirOp) 1518 throws UnresolvedLinkException, AccessControlException, 1519 ParentNotDirectoryException { 1520 return getINodesInPath(INode.getPathComponents(src), dirOp); 1521 } 1522 1523 public INodesInPath getINodesInPath(byte[][] components, DirOp dirOp) 1524 throws UnresolvedLinkException, AccessControlException, 1525 ParentNotDirectoryException { 1526 INodesInPath iip = INodesInPath.resolve(rootDir, components); 1527 checkTraverse(null, iip, dirOp); 1528 return iip; 1529 } 1530 1531 /** 1532 * Get {@link INode} associated with the file / directory. 1533 * See {@link #getINode(String, DirOp)} 1534 */ 1535 @VisibleForTesting // should be removed after a lot of tests are updated 1536 public INode getINode(String src) throws UnresolvedLinkException, 1537 AccessControlException, ParentNotDirectoryException { 1538 return getINode(src, DirOp.READ); 1539 } 1540 1541 /** 1542 * Get {@link INode} associated with the file / directory. 1543 * See {@link #getINode(String, DirOp)} 1544 */ 1545 @VisibleForTesting // should be removed after a lot of tests are updated 1546 public INode getINode4Write(String src) throws UnresolvedLinkException, 1547 AccessControlException, FileNotFoundException, 1548 ParentNotDirectoryException { 1549 return getINode(src, DirOp.WRITE); 1550 } 1551 1552 /** 1553 * Get {@link INode} associated with the file / directory. 1554 */ 1555 public INode getINode(String src, DirOp dirOp) throws UnresolvedLinkException, 1556 AccessControlException, ParentNotDirectoryException { 1557 return getINodesInPath(src, dirOp).getLastINode(); 1558 } 1559 1560 FSPermissionChecker getPermissionChecker() 1561 throws AccessControlException { 1562 try { 1563 return getPermissionChecker(fsOwnerShortUserName, supergroup, 1564 NameNode.getRemoteUser()); 1565 } catch (IOException e) { 1566 throw new AccessControlException(e); 1567 } 1568 } 1569 1570 @VisibleForTesting 1571 FSPermissionChecker getPermissionChecker(String fsOwner, String superGroup, 1572 UserGroupInformation ugi) throws AccessControlException { 1573 return new FSPermissionChecker( 1574 fsOwner, superGroup, ugi, attributeProvider); 1575 } 1576 1577 void checkOwner(FSPermissionChecker pc, INodesInPath iip) 1578 throws AccessControlException, FileNotFoundException { 1579 if (iip.getLastINode() == null) { 1580 throw new FileNotFoundException( 1581 "Directory/File does not exist " + iip.getPath()); 1582 } 1583 checkPermission(pc, iip, true, null, null, null, null); 1584 } 1585 1586 void checkPathAccess(FSPermissionChecker pc, INodesInPath iip, 1587 FsAction access) throws AccessControlException { 1588 checkPermission(pc, iip, false, null, null, access, null); 1589 } 1590 void checkParentAccess(FSPermissionChecker pc, INodesInPath iip, 1591 FsAction access) throws AccessControlException { 1592 checkPermission(pc, iip, false, null, access, null, null); 1593 } 1594 1595 void checkAncestorAccess(FSPermissionChecker pc, INodesInPath iip, 1596 FsAction access) throws AccessControlException { 1597 checkPermission(pc, iip, false, access, null, null, null); 1598 } 1599 1600 void checkTraverse(FSPermissionChecker pc, INodesInPath iip, 1601 boolean resolveLink) throws AccessControlException, 1602 UnresolvedPathException, ParentNotDirectoryException { 1603 FSPermissionChecker.checkTraverse( 1604 isPermissionEnabled ? pc : null, iip, resolveLink); 1605 } 1606 1607 void checkTraverse(FSPermissionChecker pc, INodesInPath iip, 1608 DirOp dirOp) throws AccessControlException, UnresolvedPathException, 1609 ParentNotDirectoryException { 1610 final boolean resolveLink; 1611 switch (dirOp) { 1612 case READ_LINK: 1613 case WRITE_LINK: 1614 case CREATE_LINK: 1615 resolveLink = false; 1616 break; 1617 default: 1618 resolveLink = true; 1619 break; 1620 } 1621 checkTraverse(pc, iip, resolveLink); 1622 boolean allowSnapshot = (dirOp == DirOp.READ || dirOp == DirOp.READ_LINK); 1623 if (!allowSnapshot && iip.isSnapshot()) { 1624 throw new SnapshotAccessControlException( 1625 "Modification on a read-only snapshot is disallowed"); 1626 } 1627 } 1628 1629 /** 1630 * Check whether current user have permissions to access the path. For more 1631 * details of the parameters, see 1632 * {@link FSPermissionChecker#checkPermission}. 1633 */ 1634 void checkPermission(FSPermissionChecker pc, INodesInPath iip, 1635 boolean doCheckOwner, FsAction ancestorAccess, FsAction parentAccess, 1636 FsAction access, FsAction subAccess) 1637 throws AccessControlException { 1638 checkPermission(pc, iip, doCheckOwner, ancestorAccess, 1639 parentAccess, access, subAccess, false); 1640 } 1641 1642 /** 1643 * Check whether current user have permissions to access the path. For more 1644 * details of the parameters, see 1645 * {@link FSPermissionChecker#checkPermission}. 1646 */ 1647 void checkPermission(FSPermissionChecker pc, INodesInPath iip, 1648 boolean doCheckOwner, FsAction ancestorAccess, FsAction parentAccess, 1649 FsAction access, FsAction subAccess, boolean ignoreEmptyDir) 1650 throws AccessControlException { 1651 if (!pc.isSuperUser()) { 1652 readLock(); 1653 try { 1654 pc.checkPermission(iip, doCheckOwner, ancestorAccess, 1655 parentAccess, access, subAccess, ignoreEmptyDir); 1656 } finally { 1657 readUnlock(); 1658 } 1659 } 1660 } 1661 1662 void checkUnreadableBySuperuser(FSPermissionChecker pc, INodesInPath iip) 1663 throws IOException { 1664 if (pc.isSuperUser()) { 1665 if (FSDirXAttrOp.getXAttrByPrefixedName(this, iip, 1666 SECURITY_XATTR_UNREADABLE_BY_SUPERUSER) != null) { 1667 throw new AccessControlException( 1668 "Access is denied for " + pc.getUser() + " since the superuser " 1669 + "is not allowed to perform this operation."); 1670 } 1671 } 1672 } 1673 1674 HdfsFileStatus getAuditFileInfo(INodesInPath iip) 1675 throws IOException { 1676 return (namesystem.isAuditEnabled() && namesystem.isExternalInvocation()) 1677 ? FSDirStatAndListingOp.getFileInfo(this, iip, false) : null; 1678 } 1679 1680 /** 1681 * Verify that parent directory of src exists. 1682 */ 1683 void verifyParentDir(INodesInPath iip) 1684 throws FileNotFoundException, ParentNotDirectoryException { 1685 if (iip.length() > 2) { 1686 final INode parentNode = iip.getINode(-2); 1687 if (parentNode == null) { 1688 throw new FileNotFoundException("Parent directory doesn't exist: " 1689 + iip.getParentPath()); 1690 } else if (!parentNode.isDirectory()) { 1691 throw new ParentNotDirectoryException("Parent path is not a directory: " 1692 + iip.getParentPath()); 1693 } 1694 } 1695 } 1696 1697 /** Allocate a new inode ID. */ 1698 long allocateNewInodeId() { 1699 return inodeId.nextValue(); 1700 } 1701 1702 /** @return the last inode ID. */ 1703 public long getLastInodeId() { 1704 return inodeId.getCurrentValue(); 1705 } 1706 1707 /** 1708 * Set the last allocated inode id when fsimage or editlog is loaded. 1709 */ 1710 void resetLastInodeId(long newValue) throws IOException { 1711 try { 1712 inodeId.skipTo(newValue); 1713 } catch(IllegalStateException ise) { 1714 throw new IOException(ise); 1715 } 1716 } 1717 1718 /** Should only be used for tests to reset to any value */ 1719 void resetLastInodeIdWithoutChecking(long newValue) { 1720 inodeId.setCurrentValue(newValue); 1721 } 1722 1723 INodeAttributes getAttributes(INodesInPath iip) 1724 throws FileNotFoundException { 1725 INode node = FSDirectory.resolveLastINode(iip); 1726 int snapshot = iip.getPathSnapshotId(); 1727 INodeAttributes nodeAttrs = node.getSnapshotINode(snapshot); 1728 if (attributeProvider != null) { 1729 // permission checking sends the full components array including the 1730 // first empty component for the root. however file status 1731 // related calls are expected to strip out the root component according 1732 // to TestINodeAttributeProvider. 1733 byte[][] components = iip.getPathComponents(); 1734 components = Arrays.copyOfRange(components, 1, components.length); 1735 nodeAttrs = attributeProvider.getAttributes(components, nodeAttrs); 1736 } 1737 return nodeAttrs; 1738 } 1739 1740}