001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.namenode;
019
020import com.google.common.annotations.VisibleForTesting;
021import com.google.common.base.Preconditions;
022import com.google.protobuf.InvalidProtocolBufferException;
023
024import org.apache.hadoop.HadoopIllegalArgumentException;
025import org.apache.hadoop.classification.InterfaceAudience;
026import org.apache.hadoop.conf.Configuration;
027import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension;
028import org.apache.hadoop.fs.InvalidPathException;
029import org.apache.hadoop.fs.ParentNotDirectoryException;
030import org.apache.hadoop.fs.Path;
031import org.apache.hadoop.fs.StorageType;
032import org.apache.hadoop.fs.UnresolvedLinkException;
033import org.apache.hadoop.fs.XAttr;
034import org.apache.hadoop.fs.permission.FsAction;
035import org.apache.hadoop.fs.permission.FsPermission;
036import org.apache.hadoop.hdfs.DFSConfigKeys;
037import org.apache.hadoop.hdfs.DFSUtil;
038import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy;
039import org.apache.hadoop.hdfs.protocol.FSLimitException.MaxDirectoryItemsExceededException;
040import org.apache.hadoop.hdfs.protocol.FSLimitException.PathComponentTooLongException;
041import org.apache.hadoop.hdfs.protocol.HdfsConstants;
042import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
043import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
044import org.apache.hadoop.hdfs.protocol.SnapshotAccessControlException;
045import org.apache.hadoop.hdfs.protocol.UnresolvedPathException;
046import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos;
047import org.apache.hadoop.hdfs.protocolPB.PBHelperClient;
048import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
049import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
050import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite;
051import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
052import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo.UpdatedReplicationInfo;
053import org.apache.hadoop.hdfs.util.ByteArray;
054import org.apache.hadoop.hdfs.util.EnumCounters;
055import org.apache.hadoop.hdfs.util.ReadOnlyList;
056import org.apache.hadoop.security.AccessControlException;
057import org.apache.hadoop.security.UserGroupInformation;
058import org.apache.hadoop.util.Time;
059import org.slf4j.Logger;
060import org.slf4j.LoggerFactory;
061
062import java.io.Closeable;
063import java.io.FileNotFoundException;
064import java.io.IOException;
065import java.util.ArrayList;
066import java.util.Arrays;
067import java.util.Collection;
068import java.util.concurrent.ForkJoinPool;
069import java.util.concurrent.RecursiveAction;
070import java.util.List;
071import java.util.Map;
072import java.util.SortedSet;
073import java.util.TreeSet;
074import java.util.concurrent.locks.ReentrantReadWriteLock;
075
076import static org.apache.hadoop.fs.CommonConfigurationKeys.FS_PROTECTED_DIRECTORIES;
077import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT;
078import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY;
079import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_QUOTA_BY_STORAGETYPE_ENABLED_DEFAULT;
080import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_QUOTA_BY_STORAGETYPE_ENABLED_KEY;
081import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_DEFAULT;
082import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_KEY;
083import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.CRYPTO_XATTR_ENCRYPTION_ZONE;
084import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.SECURITY_XATTR_UNREADABLE_BY_SUPERUSER;
085import static org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot.CURRENT_STATE_ID;
086
087/**
088 * Both FSDirectory and FSNamesystem manage the state of the namespace.
089 * FSDirectory is a pure in-memory data structure, all of whose operations
090 * happen entirely in memory. In contrast, FSNamesystem persists the operations
091 * to the disk.
092 * @see org.apache.hadoop.hdfs.server.namenode.FSNamesystem
093 **/
094@InterfaceAudience.Private
095public class FSDirectory implements Closeable {
096  static final Logger LOG = LoggerFactory.getLogger(FSDirectory.class);
097
098  private static INodeDirectory createRoot(FSNamesystem namesystem) {
099    final INodeDirectory r = new INodeDirectory(
100        INodeId.ROOT_INODE_ID,
101        INodeDirectory.ROOT_NAME,
102        namesystem.createFsOwnerPermissions(new FsPermission((short) 0755)),
103        0L);
104    r.addDirectoryWithQuotaFeature(
105        new DirectoryWithQuotaFeature.Builder().
106            nameSpaceQuota(DirectoryWithQuotaFeature.DEFAULT_NAMESPACE_QUOTA).
107            storageSpaceQuota(DirectoryWithQuotaFeature.DEFAULT_STORAGE_SPACE_QUOTA).
108            build());
109    r.addSnapshottableFeature();
110    r.setSnapshotQuota(0);
111    return r;
112  }
113
114  @VisibleForTesting
115  static boolean CHECK_RESERVED_FILE_NAMES = true;
116  public final static String DOT_RESERVED_STRING = ".reserved";
117  public final static String DOT_RESERVED_PATH_PREFIX = Path.SEPARATOR
118      + DOT_RESERVED_STRING;
119  public final static byte[] DOT_RESERVED = 
120      DFSUtil.string2Bytes(DOT_RESERVED_STRING);
121  private final static String RAW_STRING = "raw";
122  private final static byte[] RAW = DFSUtil.string2Bytes(RAW_STRING);
123  public final static String DOT_INODES_STRING = ".inodes";
124  public final static byte[] DOT_INODES = 
125      DFSUtil.string2Bytes(DOT_INODES_STRING);
126  private final static byte[] DOT_DOT =
127      DFSUtil.string2Bytes("..");
128
129  public final static HdfsFileStatus DOT_RESERVED_STATUS =
130      new HdfsFileStatus(0, true, 0, 0, 0, 0, new FsPermission((short) 01770),
131          null, null, null, HdfsFileStatus.EMPTY_NAME, -1L, 0, null,
132          HdfsConstants.BLOCK_STORAGE_POLICY_ID_UNSPECIFIED);
133
134  public final static HdfsFileStatus DOT_SNAPSHOT_DIR_STATUS =
135      new HdfsFileStatus(0, true, 0, 0, 0, 0, null, null, null, null,
136          HdfsFileStatus.EMPTY_NAME, -1L, 0, null,
137          HdfsConstants.BLOCK_STORAGE_POLICY_ID_UNSPECIFIED);
138
139  INodeDirectory rootDir;
140  private final FSNamesystem namesystem;
141  private volatile boolean skipQuotaCheck = false; //skip while consuming edits
142  private final int maxComponentLength;
143  private final int maxDirItems;
144  private final int lsLimit;  // max list limit
145  private final int contentCountLimit; // max content summary counts per run
146  private final long contentSleepMicroSec;
147  private final INodeMap inodeMap; // Synchronized by dirLock
148  private long yieldCount = 0; // keep track of lock yield count.
149  private int quotaInitThreads;
150
151  private final int inodeXAttrsLimit; //inode xattrs max limit
152
153  // A set of directories that have been protected using the
154  // dfs.namenode.protected.directories setting. These directories cannot
155  // be deleted unless they are empty.
156  //
157  // Each entry in this set must be a normalized path.
158  private final SortedSet<String> protectedDirectories;
159
160  // lock to protect the directory and BlockMap
161  private final ReentrantReadWriteLock dirLock;
162
163  private final boolean isPermissionEnabled;
164  /**
165   * Support for ACLs is controlled by a configuration flag. If the
166   * configuration flag is false, then the NameNode will reject all
167   * ACL-related operations.
168   */
169  private final boolean aclsEnabled;
170  private final boolean xattrsEnabled;
171  private final int xattrMaxSize;
172
173  // precision of access times.
174  private final long accessTimePrecision;
175  // whether setStoragePolicy is allowed.
176  private final boolean storagePolicyEnabled;
177  // whether quota by storage type is allowed
178  private final boolean quotaByStorageTypeEnabled;
179
180  private final String fsOwnerShortUserName;
181  private final String supergroup;
182  private final INodeId inodeId;
183
184  private final FSEditLog editLog;
185
186  private HdfsFileStatus[] reservedStatuses;
187
188  private INodeAttributeProvider attributeProvider;
189
190  public void setINodeAttributeProvider(INodeAttributeProvider provider) {
191    attributeProvider = provider;
192  }
193
194  // utility methods to acquire and release read lock and write lock
195  void readLock() {
196    this.dirLock.readLock().lock();
197  }
198
199  void readUnlock() {
200    this.dirLock.readLock().unlock();
201  }
202
203  void writeLock() {
204    this.dirLock.writeLock().lock();
205  }
206
207  void writeUnlock() {
208    this.dirLock.writeLock().unlock();
209  }
210
211  boolean hasWriteLock() {
212    return this.dirLock.isWriteLockedByCurrentThread();
213  }
214
215  boolean hasReadLock() {
216    return this.dirLock.getReadHoldCount() > 0 || hasWriteLock();
217  }
218
219  public int getReadHoldCount() {
220    return this.dirLock.getReadHoldCount();
221  }
222
223  public int getWriteHoldCount() {
224    return this.dirLock.getWriteHoldCount();
225  }
226
227  @VisibleForTesting
228  public final EncryptionZoneManager ezManager;
229
230  /**
231   * Caches frequently used file names used in {@link INode} to reuse 
232   * byte[] objects and reduce heap usage.
233   */
234  private final NameCache<ByteArray> nameCache;
235
236  // used to specify path resolution type. *_LINK will return symlinks instead
237  // of throwing an unresolved exception
238  public enum DirOp {
239    READ,
240    READ_LINK,
241    WRITE,  // disallows snapshot paths.
242    WRITE_LINK,
243    CREATE, // like write, but also blocks invalid path names.
244    CREATE_LINK;
245  };
246
247  FSDirectory(FSNamesystem ns, Configuration conf) throws IOException {
248    this.dirLock = new ReentrantReadWriteLock(true); // fair
249    this.inodeId = new INodeId();
250    rootDir = createRoot(ns);
251    inodeMap = INodeMap.newInstance(rootDir);
252    this.isPermissionEnabled = conf.getBoolean(
253      DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY,
254      DFSConfigKeys.DFS_PERMISSIONS_ENABLED_DEFAULT);
255    this.fsOwnerShortUserName =
256      UserGroupInformation.getCurrentUser().getShortUserName();
257    this.supergroup = conf.get(
258      DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY,
259      DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT);
260    this.aclsEnabled = conf.getBoolean(
261        DFSConfigKeys.DFS_NAMENODE_ACLS_ENABLED_KEY,
262        DFSConfigKeys.DFS_NAMENODE_ACLS_ENABLED_DEFAULT);
263    LOG.info("ACLs enabled? " + aclsEnabled);
264    this.xattrsEnabled = conf.getBoolean(
265        DFSConfigKeys.DFS_NAMENODE_XATTRS_ENABLED_KEY,
266        DFSConfigKeys.DFS_NAMENODE_XATTRS_ENABLED_DEFAULT);
267    LOG.info("XAttrs enabled? " + xattrsEnabled);
268    this.xattrMaxSize = conf.getInt(
269        DFSConfigKeys.DFS_NAMENODE_MAX_XATTR_SIZE_KEY,
270        DFSConfigKeys.DFS_NAMENODE_MAX_XATTR_SIZE_DEFAULT);
271    Preconditions.checkArgument(xattrMaxSize > 0,
272        "The maximum size of an xattr should be > 0: (%s).",
273        DFSConfigKeys.DFS_NAMENODE_MAX_XATTR_SIZE_KEY);
274    Preconditions.checkArgument(xattrMaxSize <=
275        DFSConfigKeys.DFS_NAMENODE_MAX_XATTR_SIZE_HARD_LIMIT,
276        "The maximum size of an xattr should be <= maximum size"
277        + " hard limit " + DFSConfigKeys.DFS_NAMENODE_MAX_XATTR_SIZE_HARD_LIMIT
278        + ": (%s).", DFSConfigKeys.DFS_NAMENODE_MAX_XATTR_SIZE_KEY);
279
280    this.accessTimePrecision = conf.getLong(
281        DFS_NAMENODE_ACCESSTIME_PRECISION_KEY,
282        DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT);
283
284    this.storagePolicyEnabled =
285        conf.getBoolean(DFS_STORAGE_POLICY_ENABLED_KEY,
286                        DFS_STORAGE_POLICY_ENABLED_DEFAULT);
287
288    this.quotaByStorageTypeEnabled =
289        conf.getBoolean(DFS_QUOTA_BY_STORAGETYPE_ENABLED_KEY,
290                        DFS_QUOTA_BY_STORAGETYPE_ENABLED_DEFAULT);
291
292    int configuredLimit = conf.getInt(
293        DFSConfigKeys.DFS_LIST_LIMIT, DFSConfigKeys.DFS_LIST_LIMIT_DEFAULT);
294    this.lsLimit = configuredLimit>0 ?
295        configuredLimit : DFSConfigKeys.DFS_LIST_LIMIT_DEFAULT;
296    this.contentCountLimit = conf.getInt(
297        DFSConfigKeys.DFS_CONTENT_SUMMARY_LIMIT_KEY,
298        DFSConfigKeys.DFS_CONTENT_SUMMARY_LIMIT_DEFAULT);
299    this.contentSleepMicroSec = conf.getLong(
300        DFSConfigKeys.DFS_CONTENT_SUMMARY_SLEEP_MICROSEC_KEY,
301        DFSConfigKeys.DFS_CONTENT_SUMMARY_SLEEP_MICROSEC_DEFAULT);
302    
303    // filesystem limits
304    this.maxComponentLength = conf.getInt(
305        DFSConfigKeys.DFS_NAMENODE_MAX_COMPONENT_LENGTH_KEY,
306        DFSConfigKeys.DFS_NAMENODE_MAX_COMPONENT_LENGTH_DEFAULT);
307    this.maxDirItems = conf.getInt(
308        DFSConfigKeys.DFS_NAMENODE_MAX_DIRECTORY_ITEMS_KEY,
309        DFSConfigKeys.DFS_NAMENODE_MAX_DIRECTORY_ITEMS_DEFAULT);
310    this.inodeXAttrsLimit = conf.getInt(
311        DFSConfigKeys.DFS_NAMENODE_MAX_XATTRS_PER_INODE_KEY,
312        DFSConfigKeys.DFS_NAMENODE_MAX_XATTRS_PER_INODE_DEFAULT);
313
314    this.protectedDirectories = parseProtectedDirectories(conf);
315
316    Preconditions.checkArgument(this.inodeXAttrsLimit >= 0,
317        "Cannot set a negative limit on the number of xattrs per inode (%s).",
318        DFSConfigKeys.DFS_NAMENODE_MAX_XATTRS_PER_INODE_KEY);
319    // We need a maximum maximum because by default, PB limits message sizes
320    // to 64MB. This means we can only store approximately 6.7 million entries
321    // per directory, but let's use 6.4 million for some safety.
322    final int MAX_DIR_ITEMS = 64 * 100 * 1000;
323    Preconditions.checkArgument(
324        maxDirItems > 0 && maxDirItems <= MAX_DIR_ITEMS, "Cannot set "
325            + DFSConfigKeys.DFS_NAMENODE_MAX_DIRECTORY_ITEMS_KEY
326            + " to a value less than 1 or greater than " + MAX_DIR_ITEMS);
327
328    int threshold = conf.getInt(
329        DFSConfigKeys.DFS_NAMENODE_NAME_CACHE_THRESHOLD_KEY,
330        DFSConfigKeys.DFS_NAMENODE_NAME_CACHE_THRESHOLD_DEFAULT);
331    NameNode.LOG.info("Caching file names occurring more than " + threshold
332        + " times");
333    nameCache = new NameCache<ByteArray>(threshold);
334    namesystem = ns;
335    this.editLog = ns.getEditLog();
336    ezManager = new EncryptionZoneManager(this, conf);
337
338    this.quotaInitThreads = conf.getInt(
339        DFSConfigKeys.DFS_NAMENODE_QUOTA_INIT_THREADS_KEY,
340        DFSConfigKeys.DFS_NAMENODE_QUOTA_INIT_THREADS_DEFAULT);
341  }
342
343  /**
344   * Get HdfsFileStatuses of the reserved paths: .inodes and raw.
345   *
346   * @return Array of HdfsFileStatus
347   */
348  HdfsFileStatus[] getReservedStatuses() {
349    Preconditions.checkNotNull(reservedStatuses, "reservedStatuses should "
350        + " not be null. It is populated when FSNamesystem loads FS image."
351        + " It has to be set at this time instead of initialization time"
352        + " because CTime is loaded during FSNamesystem#loadFromDisk.");
353    return reservedStatuses;
354  }
355
356  /**
357   * Create HdfsFileStatuses of the reserved paths: .inodes and raw.
358   * These statuses are solely for listing purpose. All other operations
359   * on the reserved dirs are disallowed.
360   * Operations on sub directories are resolved by
361   * {@link FSDirectory#resolvePath(String, byte[][], FSDirectory)}
362   * and conducted directly, without the need to check the reserved dirs.
363   *
364   * This method should only be invoked once during namenode initialization.
365   *
366   * @param cTime CTime of the file system
367   * @return Array of HdfsFileStatus
368   */
369  void createReservedStatuses(long cTime) {
370    HdfsFileStatus inodes = new HdfsFileStatus(0, true, 0, 0, cTime, cTime,
371        new FsPermission((short) 0770), null, supergroup, null,
372        DOT_INODES, -1L, 0, null,
373        HdfsConstants.BLOCK_STORAGE_POLICY_ID_UNSPECIFIED);
374    HdfsFileStatus raw = new HdfsFileStatus(0, true, 0, 0, cTime, cTime,
375        new FsPermission((short) 0770), null, supergroup, null, RAW, -1L,
376        0, null, HdfsConstants.BLOCK_STORAGE_POLICY_ID_UNSPECIFIED);
377    reservedStatuses = new HdfsFileStatus[] { inodes, raw };
378  }
379
380  FSNamesystem getFSNamesystem() {
381    return namesystem;
382  }
383
384  /**
385   * Parse configuration setting dfs.namenode.protected.directories to
386   * retrieve the set of protected directories.
387   *
388   * @param conf
389   * @return a TreeSet
390   */
391  @VisibleForTesting
392  static SortedSet<String> parseProtectedDirectories(Configuration conf) {
393    // Normalize each input path to guard against administrator error.
394    return new TreeSet<>(normalizePaths(
395        conf.getTrimmedStringCollection(FS_PROTECTED_DIRECTORIES),
396        FS_PROTECTED_DIRECTORIES));
397  }
398
399  SortedSet<String> getProtectedDirectories() {
400    return protectedDirectories;
401  }
402
403  BlockManager getBlockManager() {
404    return getFSNamesystem().getBlockManager();
405  }
406
407  KeyProviderCryptoExtension getProvider() {
408    return getFSNamesystem().getProvider();
409  }
410
411  /** @return the root directory inode. */
412  public INodeDirectory getRoot() {
413    return rootDir;
414  }
415
416  public BlockStoragePolicySuite getBlockStoragePolicySuite() {
417    return getBlockManager().getStoragePolicySuite();
418  }
419
420  boolean isPermissionEnabled() {
421    return isPermissionEnabled;
422  }
423  boolean isAclsEnabled() {
424    return aclsEnabled;
425  }
426  boolean isXattrsEnabled() {
427    return xattrsEnabled;
428  }
429  int getXattrMaxSize() { return xattrMaxSize; }
430  boolean isStoragePolicyEnabled() {
431    return storagePolicyEnabled;
432  }
433  boolean isAccessTimeSupported() {
434    return accessTimePrecision > 0;
435  }
436  long getAccessTimePrecision() {
437    return accessTimePrecision;
438  }
439  boolean isQuotaByStorageTypeEnabled() {
440    return quotaByStorageTypeEnabled;
441  }
442
443
444  int getLsLimit() {
445    return lsLimit;
446  }
447
448  int getContentCountLimit() {
449    return contentCountLimit;
450  }
451
452  long getContentSleepMicroSec() {
453    return contentSleepMicroSec;
454  }
455
456  int getInodeXAttrsLimit() {
457    return inodeXAttrsLimit;
458  }
459
460  FSEditLog getEditLog() {
461    return editLog;
462  }
463
464  /**
465   * Shutdown the filestore
466   */
467  @Override
468  public void close() throws IOException {}
469
470  void markNameCacheInitialized() {
471    writeLock();
472    try {
473      nameCache.initialized();
474    } finally {
475      writeUnlock();
476    }
477  }
478
479  boolean shouldSkipQuotaChecks() {
480    return skipQuotaCheck;
481  }
482
483  /** Enable quota verification */
484  void enableQuotaChecks() {
485    skipQuotaCheck = false;
486  }
487
488  /** Disable quota verification */
489  void disableQuotaChecks() {
490    skipQuotaCheck = true;
491  }
492
493  /**
494   * Resolves a given path into an INodesInPath.  All ancestor inodes that
495   * exist are validated as traversable directories.  Symlinks in the ancestry
496   * will generate an UnresolvedLinkException.  The returned IIP will be an
497   * accessible path that also passed additional sanity checks based on how
498   * the path will be used as specified by the DirOp.
499   *   READ:   Expands reserved paths and performs permission checks
500   *           during traversal.  Raw paths are only accessible by a superuser.
501   *   WRITE:  In addition to READ checks, ensures the path is not a
502   *           snapshot path.
503   *   CREATE: In addition to WRITE checks, ensures path does not contain
504   *           illegal character sequences.
505   *
506   * @param pc  A permission checker for traversal checks.  Pass null for
507   *            no permission checks.
508   * @param src The path to resolve.
509   * @param dirOp The {@link DirOp} that controls additional checks.
510   * @param resolveLink If false, only ancestor symlinks will be checked.  If
511   *         true, the last inode will also be checked.
512   * @return if the path indicates an inode, return path after replacing up to
513   *         <inodeid> with the corresponding path of the inode, else the path
514   *         in {@code src} as is. If the path refers to a path in the "raw"
515   *         directory, return the non-raw pathname.
516   * @throws FileNotFoundException
517   * @throws AccessControlException
518   * @throws ParentNotDirectoryException
519   * @throws UnresolvedLinkException
520   */
521  @VisibleForTesting
522  public INodesInPath resolvePath(FSPermissionChecker pc, String src,
523      DirOp dirOp) throws UnresolvedLinkException, FileNotFoundException,
524      AccessControlException, ParentNotDirectoryException {
525    boolean isCreate = (dirOp == DirOp.CREATE || dirOp == DirOp.CREATE_LINK);
526    // prevent creation of new invalid paths
527    if (isCreate && !DFSUtil.isValidName(src)) {
528      throw new InvalidPathException("Invalid file name: " + src);
529    }
530
531    byte[][] components = INode.getPathComponents(src);
532    boolean isRaw = isReservedRawName(components);
533    if (isPermissionEnabled && pc != null && isRaw) {
534      pc.checkSuperuserPrivilege();
535    }
536    components = resolveComponents(components, this);
537    INodesInPath iip = INodesInPath.resolve(rootDir, components, isRaw);
538    // verify all ancestors are dirs and traversable.  note that only
539    // methods that create new namespace items have the signature to throw
540    // PNDE
541    try {
542      checkTraverse(pc, iip, dirOp);
543    } catch (ParentNotDirectoryException pnde) {
544      if (!isCreate) {
545        throw new AccessControlException(pnde.getMessage());
546      }
547      throw pnde;
548    }
549    return iip;
550  }
551
552  INodesInPath resolvePath(FSPermissionChecker pc, String src, long fileId)
553      throws UnresolvedLinkException, FileNotFoundException,
554      AccessControlException, ParentNotDirectoryException {
555    // Older clients may not have given us an inode ID to work with.
556    // In this case, we have to try to resolve the path and hope it
557    // hasn't changed or been deleted since the file was opened for write.
558    INodesInPath iip;
559    if (fileId == HdfsConstants.GRANDFATHER_INODE_ID) {
560      iip = resolvePath(pc, src, DirOp.WRITE);
561    } else {
562      INode inode = getInode(fileId);
563      if (inode == null) {
564        iip = INodesInPath.fromComponents(INode.getPathComponents(src));
565      } else {
566        iip = INodesInPath.fromINode(inode);
567      }
568    }
569    return iip;
570  }
571
572  // this method can be removed after IIP is used more extensively
573  static String resolvePath(String src,
574      FSDirectory fsd) throws FileNotFoundException {
575    byte[][] pathComponents = INode.getPathComponents(src);
576    pathComponents = resolveComponents(pathComponents, fsd);
577    return DFSUtil.byteArray2PathString(pathComponents);
578  }
579
580  /**
581   * @return true if the path is a non-empty directory; otherwise, return false.
582   */
583  boolean isNonEmptyDirectory(INodesInPath inodesInPath) {
584    readLock();
585    try {
586      final INode inode = inodesInPath.getLastINode();
587      if (inode == null || !inode.isDirectory()) {
588        //not found or not a directory
589        return false;
590      }
591      final int s = inodesInPath.getPathSnapshotId();
592      return !inode.asDirectory().getChildrenList(s).isEmpty();
593    } finally {
594      readUnlock();
595    }
596  }
597
598  /**
599   * Check whether the filepath could be created
600   * @throws SnapshotAccessControlException if path is in RO snapshot
601   */
602  boolean isValidToCreate(String src, INodesInPath iip)
603      throws SnapshotAccessControlException {
604    String srcs = normalizePath(src);
605    return srcs.startsWith("/") && !srcs.endsWith("/") &&
606        iip.getLastINode() == null;
607  }
608
609  /**
610   * Tell the block manager to update the replication factors when delete
611   * happens. Deleting a file or a snapshot might decrease the replication
612   * factor of the blocks as the blocks are always replicated to the highest
613   * replication factor among all snapshots.
614   */
615  void updateReplicationFactor(Collection<UpdatedReplicationInfo> blocks) {
616    BlockManager bm = getBlockManager();
617    for (UpdatedReplicationInfo e : blocks) {
618      BlockInfo b = e.block();
619      bm.setReplication(b.getReplication(), e.targetReplication(), b);
620    }
621  }
622
623  /**
624   * Update the count of each directory with quota in the namespace.
625   * A directory's count is defined as the total number inodes in the tree
626   * rooted at the directory.
627   *
628   * This is an update of existing state of the filesystem and does not
629   * throw QuotaExceededException.
630   */
631  void updateCountForQuota(int initThreads) {
632    writeLock();
633    try {
634      int threads = (initThreads < 1) ? 1 : initThreads;
635      LOG.info("Initializing quota with " + threads + " thread(s)");
636      long start = Time.now();
637      QuotaCounts counts = new QuotaCounts.Builder().build();
638      ForkJoinPool p = new ForkJoinPool(threads);
639      RecursiveAction task = new InitQuotaTask(getBlockStoragePolicySuite(),
640          rootDir.getStoragePolicyID(), rootDir, counts);
641      p.execute(task);
642      task.join();
643      p.shutdown();
644      LOG.info("Quota initialization completed in " + (Time.now() - start) +
645          " milliseconds\n" + counts);
646    } finally {
647      writeUnlock();
648    }
649  }
650
651  void updateCountForQuota() {
652    updateCountForQuota(quotaInitThreads);
653  }
654
655  /**
656   * parallel initialization using fork-join.
657   */
658  private static class InitQuotaTask extends RecursiveAction {
659    private final INodeDirectory dir;
660    private final QuotaCounts counts;
661    private final BlockStoragePolicySuite bsps;
662    private final byte blockStoragePolicyId;
663
664    public InitQuotaTask(BlockStoragePolicySuite bsps,
665        byte blockStoragePolicyId, INodeDirectory dir, QuotaCounts counts) {
666      this.dir = dir;
667      this.counts = counts;
668      this.bsps = bsps;
669      this.blockStoragePolicyId = blockStoragePolicyId;
670    }
671
672    public void compute() {
673      QuotaCounts myCounts =  new QuotaCounts.Builder().build();
674      dir.computeQuotaUsage4CurrentDirectory(bsps, blockStoragePolicyId,
675          myCounts);
676
677      ReadOnlyList<INode> children =
678          dir.getChildrenList(CURRENT_STATE_ID);
679
680      if (children.size() > 0) {
681        List<InitQuotaTask> subtasks = new ArrayList<InitQuotaTask>();
682        for (INode child : children) {
683          final byte childPolicyId =
684              child.getStoragePolicyIDForQuota(blockStoragePolicyId);
685          if (child.isDirectory()) {
686            subtasks.add(new InitQuotaTask(bsps, childPolicyId,
687                child.asDirectory(), myCounts));
688          } else {
689            // file or symlink. count using the local counts variable
690            myCounts.add(child.computeQuotaUsage(bsps, childPolicyId, false,
691                CURRENT_STATE_ID));
692          }
693        }
694        // invoke and wait for completion
695        invokeAll(subtasks);
696      }
697
698      if (dir.isQuotaSet()) {
699        // check if quota is violated. It indicates a software bug.
700        final QuotaCounts q = dir.getQuotaCounts();
701
702        final long nsConsumed = myCounts.getNameSpace();
703        final long nsQuota = q.getNameSpace();
704        if (Quota.isViolated(nsQuota, nsConsumed)) {
705          LOG.warn("Namespace quota violation in image for "
706              + dir.getFullPathName()
707              + " quota = " + nsQuota + " < consumed = " + nsConsumed);
708        }
709
710        final long ssConsumed = myCounts.getStorageSpace();
711        final long ssQuota = q.getStorageSpace();
712        if (Quota.isViolated(ssQuota, ssConsumed)) {
713          LOG.warn("Storagespace quota violation in image for "
714              + dir.getFullPathName()
715              + " quota = " + ssQuota + " < consumed = " + ssConsumed);
716        }
717
718        final EnumCounters<StorageType> tsConsumed = myCounts.getTypeSpaces();
719        for (StorageType t : StorageType.getTypesSupportingQuota()) {
720          final long typeSpace = tsConsumed.get(t);
721          final long typeQuota = q.getTypeSpaces().get(t);
722          if (Quota.isViolated(typeQuota, typeSpace)) {
723            LOG.warn("Storage type quota violation in image for "
724                + dir.getFullPathName()
725                + " type = " + t.toString() + " quota = "
726                + typeQuota + " < consumed " + typeSpace);
727          }
728        }
729        if (LOG.isDebugEnabled()) {
730          LOG.debug("Setting quota for " + dir + "\n" + myCounts);
731        }
732        dir.getDirectoryWithQuotaFeature().setSpaceConsumed(nsConsumed,
733            ssConsumed, tsConsumed);
734      }
735
736      synchronized(counts) {
737        counts.add(myCounts);
738      }
739    }
740  }
741
742  /** Updates namespace, storagespace and typespaces consumed for all
743   * directories until the parent directory of file represented by path.
744   *
745   * @param iip the INodesInPath instance containing all the INodes for
746   *            updating quota usage
747   * @param nsDelta the delta change of namespace
748   * @param ssDelta the delta change of storage space consumed without replication
749   * @param replication the replication factor of the block consumption change
750   * @throws QuotaExceededException if the new count violates any quota limit
751   * @throws FileNotFoundException if path does not exist.
752   */
753  void updateSpaceConsumed(INodesInPath iip, long nsDelta, long ssDelta, short replication)
754    throws QuotaExceededException, FileNotFoundException,
755    UnresolvedLinkException, SnapshotAccessControlException {
756    writeLock();
757    try {
758      if (iip.getLastINode() == null) {
759        throw new FileNotFoundException("Path not found: " + iip.getPath());
760      }
761      updateCount(iip, nsDelta, ssDelta, replication, true);
762    } finally {
763      writeUnlock();
764    }
765  }
766
767  public void updateCount(INodesInPath iip, INode.QuotaDelta quotaDelta,
768      boolean check) throws QuotaExceededException {
769    QuotaCounts counts = quotaDelta.getCountsCopy();
770    updateCount(iip, iip.length() - 1, counts.negation(), check);
771    Map<INode, QuotaCounts> deltaInOtherPaths = quotaDelta.getUpdateMap();
772    for (Map.Entry<INode, QuotaCounts> entry : deltaInOtherPaths.entrySet()) {
773      INodesInPath path = INodesInPath.fromINode(entry.getKey());
774      updateCount(path, path.length() - 1, entry.getValue().negation(), check);
775    }
776    for (Map.Entry<INodeDirectory, QuotaCounts> entry :
777        quotaDelta.getQuotaDirMap().entrySet()) {
778      INodeDirectory quotaDir = entry.getKey();
779      quotaDir.getDirectoryWithQuotaFeature().addSpaceConsumed2Cache(
780          entry.getValue().negation());
781    }
782  }
783
784  /**
785   * Update the quota usage after deletion. The quota update is only necessary
786   * when image/edits have been loaded and the file/dir to be deleted is not
787   * contained in snapshots.
788   */
789  void updateCountForDelete(final INode inode, final INodesInPath iip) {
790    if (getFSNamesystem().isImageLoaded() &&
791        !inode.isInLatestSnapshot(iip.getLatestSnapshotId())) {
792      QuotaCounts counts = inode.computeQuotaUsage(getBlockStoragePolicySuite());
793      unprotectedUpdateCount(iip, iip.length() - 1, counts.negation());
794    }
795  }
796
797  /**
798   * Update usage count without replication factor change
799   */
800  void updateCount(INodesInPath iip, long nsDelta, long ssDelta, short replication,
801      boolean checkQuota) throws QuotaExceededException {
802    final INodeFile fileINode = iip.getLastINode().asFile();
803    EnumCounters<StorageType> typeSpaceDeltas =
804      getStorageTypeDeltas(fileINode.getStoragePolicyID(), ssDelta,
805          replication, replication);;
806    updateCount(iip, iip.length() - 1,
807      new QuotaCounts.Builder().nameSpace(nsDelta).storageSpace(ssDelta * replication).
808          typeSpaces(typeSpaceDeltas).build(),
809        checkQuota);
810  }
811
812  /**
813   * Update usage count with replication factor change due to setReplication
814   */
815  void updateCount(INodesInPath iip, long nsDelta, long ssDelta, short oldRep,
816      short newRep, boolean checkQuota) throws QuotaExceededException {
817    final INodeFile fileINode = iip.getLastINode().asFile();
818    EnumCounters<StorageType> typeSpaceDeltas =
819        getStorageTypeDeltas(fileINode.getStoragePolicyID(), ssDelta, oldRep, newRep);
820    updateCount(iip, iip.length() - 1,
821        new QuotaCounts.Builder().nameSpace(nsDelta).
822            storageSpace(ssDelta * (newRep - oldRep)).
823            typeSpaces(typeSpaceDeltas).build(),
824        checkQuota);
825  }
826
827  /** update count of each inode with quota
828   * 
829   * @param iip inodes in a path
830   * @param numOfINodes the number of inodes to update starting from index 0
831   * @param counts the count of space/namespace/type usage to be update
832   * @param checkQuota if true then check if quota is exceeded
833   * @throws QuotaExceededException if the new count violates any quota limit
834   */
835  void updateCount(INodesInPath iip, int numOfINodes,
836                    QuotaCounts counts, boolean checkQuota)
837                    throws QuotaExceededException {
838    assert hasWriteLock();
839    if (!namesystem.isImageLoaded()) {
840      //still initializing. do not check or update quotas.
841      return;
842    }
843    if (numOfINodes > iip.length()) {
844      numOfINodes = iip.length();
845    }
846    if (checkQuota && !skipQuotaCheck) {
847      verifyQuota(iip, numOfINodes, counts, null);
848    }
849    unprotectedUpdateCount(iip, numOfINodes, counts);
850  }
851  
852  /** 
853   * update quota of each inode and check to see if quota is exceeded. 
854   * See {@link #updateCount(INodesInPath, int, QuotaCounts, boolean)}
855   */ 
856   void updateCountNoQuotaCheck(INodesInPath inodesInPath,
857      int numOfINodes, QuotaCounts counts) {
858    assert hasWriteLock();
859    try {
860      updateCount(inodesInPath, numOfINodes, counts, false);
861    } catch (QuotaExceededException e) {
862      NameNode.LOG.error("BUG: unexpected exception ", e);
863    }
864  }
865  
866  /**
867   * updates quota without verification
868   * callers responsibility is to make sure quota is not exceeded
869   */
870  static void unprotectedUpdateCount(INodesInPath inodesInPath,
871      int numOfINodes, QuotaCounts counts) {
872    for(int i=0; i < numOfINodes; i++) {
873      if (inodesInPath.getINode(i).isQuotaSet()) { // a directory with quota
874        inodesInPath.getINode(i).asDirectory().getDirectoryWithQuotaFeature()
875            .addSpaceConsumed2Cache(counts);
876      }
877    }
878  }
879
880  /**
881   * Update the cached quota space for a block that is being completed.
882   * Must only be called once, as the block is being completed.
883   * @param completeBlk - Completed block for which to update space
884   * @param inodes - INodes in path to file containing completeBlk; if null
885   *                 this will be resolved internally
886   */
887  public void updateSpaceForCompleteBlock(BlockInfo completeBlk,
888      INodesInPath inodes) throws IOException {
889    assert namesystem.hasWriteLock();
890    INodesInPath iip = inodes != null ? inodes :
891        INodesInPath.fromINode(namesystem.getBlockCollection(completeBlk));
892    INodeFile fileINode = iip.getLastINode().asFile();
893    // Adjust disk space consumption if required
894    final long diff =
895        fileINode.getPreferredBlockSize() - completeBlk.getNumBytes();
896    if (diff > 0) {
897      try {
898        updateSpaceConsumed(iip, 0, -diff, fileINode.getFileReplication());
899      } catch (IOException e) {
900        LOG.warn("Unexpected exception while updating disk space.", e);
901      }
902    }
903  }
904
905  public EnumCounters<StorageType> getStorageTypeDeltas(byte storagePolicyID,
906      long dsDelta, short oldRep, short newRep) {
907    EnumCounters<StorageType> typeSpaceDeltas =
908        new EnumCounters<StorageType>(StorageType.class);
909    // empty file
910    if(dsDelta == 0){
911      return typeSpaceDeltas;
912    }
913    // Storage type and its quota are only available when storage policy is set
914    if (storagePolicyID != HdfsConstants.BLOCK_STORAGE_POLICY_ID_UNSPECIFIED) {
915      BlockStoragePolicy storagePolicy = getBlockManager().getStoragePolicy(storagePolicyID);
916
917      if (oldRep != newRep) {
918        List<StorageType> oldChosenStorageTypes =
919            storagePolicy.chooseStorageTypes(oldRep);
920
921        for (StorageType t : oldChosenStorageTypes) {
922          if (!t.supportTypeQuota()) {
923            continue;
924          }
925          Preconditions.checkArgument(dsDelta > 0);
926          typeSpaceDeltas.add(t, -dsDelta);
927        }
928      }
929
930      List<StorageType> newChosenStorageTypes =
931          storagePolicy.chooseStorageTypes(newRep);
932
933      for (StorageType t : newChosenStorageTypes) {
934        if (!t.supportTypeQuota()) {
935          continue;
936        }
937        typeSpaceDeltas.add(t, dsDelta);
938      }
939    }
940    return typeSpaceDeltas;
941  }
942
943  /**
944   * Add the given child to the namespace.
945   * @param existing the INodesInPath containing all the ancestral INodes
946   * @param child the new INode to add
947   * @return a new INodesInPath instance containing the new child INode. Null
948   * if the adding fails.
949   * @throws QuotaExceededException is thrown if it violates quota limit
950   */
951  INodesInPath addINode(INodesInPath existing, INode child)
952      throws QuotaExceededException, UnresolvedLinkException {
953    cacheName(child);
954    writeLock();
955    try {
956      return addLastINode(existing, child, true);
957    } finally {
958      writeUnlock();
959    }
960  }
961
962  /**
963   * Verify quota for adding or moving a new INode with required 
964   * namespace and storagespace to a given position.
965   *  
966   * @param iip INodes corresponding to a path
967   * @param pos position where a new INode will be added
968   * @param deltas needed namespace, storagespace and storage types
969   * @param commonAncestor Last node in inodes array that is a common ancestor
970   *          for a INode that is being moved from one location to the other.
971   *          Pass null if a node is not being moved.
972   * @throws QuotaExceededException if quota limit is exceeded.
973   */
974  static void verifyQuota(INodesInPath iip, int pos, QuotaCounts deltas,
975                          INode commonAncestor) throws QuotaExceededException {
976    if (deltas.getNameSpace() <= 0 && deltas.getStorageSpace() <= 0
977        && deltas.getTypeSpaces().allLessOrEqual(0L)) {
978      // if quota is being freed or not being consumed
979      return;
980    }
981
982    // check existing components in the path
983    for(int i = (pos > iip.length() ? iip.length(): pos) - 1; i >= 0; i--) {
984      if (commonAncestor == iip.getINode(i)) {
985        // Stop checking for quota when common ancestor is reached
986        return;
987      }
988      final DirectoryWithQuotaFeature q
989          = iip.getINode(i).asDirectory().getDirectoryWithQuotaFeature();
990      if (q != null) { // a directory with quota
991        try {
992          q.verifyQuota(deltas);
993        } catch (QuotaExceededException e) {
994          e.setPathName(iip.getPath(i));
995          throw e;
996        }
997      }
998    }
999  }
1000
1001  /** Verify if the inode name is legal. */
1002  void verifyINodeName(byte[] childName) throws HadoopIllegalArgumentException {
1003    if (Arrays.equals(HdfsServerConstants.DOT_SNAPSHOT_DIR_BYTES, childName)) {
1004      String s = "\"" + HdfsConstants.DOT_SNAPSHOT_DIR + "\" is a reserved name.";
1005      if (!namesystem.isImageLoaded()) {
1006        s += "  Please rename it before upgrade.";
1007      }
1008      throw new HadoopIllegalArgumentException(s);
1009    }
1010  }
1011
1012  /**
1013   * Verify child's name for fs limit.
1014   *
1015   * @param childName byte[] containing new child name
1016   * @param parentPath String containing parent path
1017   * @throws PathComponentTooLongException child's name is too long.
1018   */
1019  void verifyMaxComponentLength(byte[] childName, String parentPath)
1020      throws PathComponentTooLongException {
1021    if (maxComponentLength == 0) {
1022      return;
1023    }
1024
1025    final int length = childName.length;
1026    if (length > maxComponentLength) {
1027      final PathComponentTooLongException e = new PathComponentTooLongException(
1028          maxComponentLength, length, parentPath,
1029          DFSUtil.bytes2String(childName));
1030      if (namesystem.isImageLoaded()) {
1031        throw e;
1032      } else {
1033        // Do not throw if edits log is still being processed
1034        NameNode.LOG.error("ERROR in FSDirectory.verifyINodeName", e);
1035      }
1036    }
1037  }
1038
1039  /**
1040   * Verify children size for fs limit.
1041   *
1042   * @throws MaxDirectoryItemsExceededException too many children.
1043   */
1044  void verifyMaxDirItems(INodeDirectory parent, String parentPath)
1045      throws MaxDirectoryItemsExceededException {
1046    final int count = parent.getChildrenList(CURRENT_STATE_ID).size();
1047    if (count >= maxDirItems) {
1048      final MaxDirectoryItemsExceededException e
1049          = new MaxDirectoryItemsExceededException(parentPath, maxDirItems,
1050          count);
1051      if (namesystem.isImageLoaded()) {
1052        throw e;
1053      } else {
1054        // Do not throw if edits log is still being processed
1055        NameNode.LOG.error("FSDirectory.verifyMaxDirItems: "
1056            + e.getLocalizedMessage());
1057      }
1058    }
1059  }
1060
1061  /**
1062   * Add a child to the end of the path specified by INodesInPath.
1063   * @return an INodesInPath instance containing the new INode
1064   */
1065  @VisibleForTesting
1066  public INodesInPath addLastINode(INodesInPath existing, INode inode,
1067      boolean checkQuota) throws QuotaExceededException {
1068    assert existing.getLastINode() != null &&
1069        existing.getLastINode().isDirectory();
1070
1071    final int pos = existing.length();
1072    // Disallow creation of /.reserved. This may be created when loading
1073    // editlog/fsimage during upgrade since /.reserved was a valid name in older
1074    // release. This may also be called when a user tries to create a file
1075    // or directory /.reserved.
1076    if (pos == 1 && existing.getINode(0) == rootDir && isReservedName(inode)) {
1077      throw new HadoopIllegalArgumentException(
1078          "File name \"" + inode.getLocalName() + "\" is reserved and cannot "
1079              + "be created. If this is during upgrade change the name of the "
1080              + "existing file or directory to another name before upgrading "
1081              + "to the new release.");
1082    }
1083    final INodeDirectory parent = existing.getINode(pos - 1).asDirectory();
1084    // The filesystem limits are not really quotas, so this check may appear
1085    // odd. It's because a rename operation deletes the src, tries to add
1086    // to the dest, if that fails, re-adds the src from whence it came.
1087    // The rename code disables the quota when it's restoring to the
1088    // original location because a quota violation would cause the the item
1089    // to go "poof".  The fs limits must be bypassed for the same reason.
1090    if (checkQuota) {
1091      final String parentPath = existing.getPath();
1092      verifyMaxComponentLength(inode.getLocalNameBytes(), parentPath);
1093      verifyMaxDirItems(parent, parentPath);
1094    }
1095    // always verify inode name
1096    verifyINodeName(inode.getLocalNameBytes());
1097
1098    final QuotaCounts counts = inode.computeQuotaUsage(getBlockStoragePolicySuite());
1099    updateCount(existing, pos, counts, checkQuota);
1100
1101    boolean isRename = (inode.getParent() != null);
1102    boolean added;
1103    try {
1104      added = parent.addChild(inode, true, existing.getLatestSnapshotId());
1105    } catch (QuotaExceededException e) {
1106      updateCountNoQuotaCheck(existing, pos, counts.negation());
1107      throw e;
1108    }
1109    if (!added) {
1110      updateCountNoQuotaCheck(existing, pos, counts.negation());
1111      return null;
1112    } else {
1113      if (!isRename) {
1114        AclStorage.copyINodeDefaultAcl(inode);
1115      }
1116      addToInodeMap(inode);
1117    }
1118    return INodesInPath.append(existing, inode, inode.getLocalNameBytes());
1119  }
1120
1121  INodesInPath addLastINodeNoQuotaCheck(INodesInPath existing, INode i) {
1122    try {
1123      return addLastINode(existing, i, false);
1124    } catch (QuotaExceededException e) {
1125      NameNode.LOG.warn("FSDirectory.addChildNoQuotaCheck - unexpected", e);
1126    }
1127    return null;
1128  }
1129
1130  /**
1131   * Remove the last inode in the path from the namespace.
1132   * Note: the caller needs to update the ancestors' quota count.
1133   *
1134   * @return -1 for failing to remove;
1135   *          0 for removing a reference whose referred inode has other 
1136   *            reference nodes;
1137   *          1 otherwise.
1138   */
1139  @VisibleForTesting
1140  public long removeLastINode(final INodesInPath iip) {
1141    final int latestSnapshot = iip.getLatestSnapshotId();
1142    final INode last = iip.getLastINode();
1143    final INodeDirectory parent = iip.getINode(-2).asDirectory();
1144    if (!parent.removeChild(last, latestSnapshot)) {
1145      return -1;
1146    }
1147
1148    return (!last.isInLatestSnapshot(latestSnapshot)
1149        && INodeReference.tryRemoveReference(last) > 0) ? 0 : 1;
1150  }
1151
1152  /**
1153   * Return a new collection of normalized paths from the given input
1154   * collection. The input collection is unmodified.
1155   *
1156   * Reserved paths, relative paths and paths with scheme are ignored.
1157   *
1158   * @param paths collection whose contents are to be normalized.
1159   * @return collection with all input paths normalized.
1160   */
1161  static Collection<String> normalizePaths(Collection<String> paths,
1162                                           String errorString) {
1163    if (paths.isEmpty()) {
1164      return paths;
1165    }
1166    final Collection<String> normalized = new ArrayList<>(paths.size());
1167    for (String dir : paths) {
1168      if (isReservedName(dir)) {
1169        LOG.error("{} ignoring reserved path {}", errorString, dir);
1170      } else {
1171        final Path path = new Path(dir);
1172        if (!path.isAbsolute()) {
1173          LOG.error("{} ignoring relative path {}", errorString, dir);
1174        } else if (path.toUri().getScheme() != null) {
1175          LOG.error("{} ignoring path {} with scheme", errorString, dir);
1176        } else {
1177          normalized.add(path.toString());
1178        }
1179      }
1180    }
1181    return normalized;
1182  }
1183
1184  static String normalizePath(String src) {
1185    if (src.length() > 1 && src.endsWith("/")) {
1186      src = src.substring(0, src.length() - 1);
1187    }
1188    return src;
1189  }
1190
1191  @VisibleForTesting
1192  public long getYieldCount() {
1193    return yieldCount;
1194  }
1195
1196  void addYieldCount(long value) {
1197    yieldCount += value;
1198  }
1199
1200  public INodeMap getINodeMap() {
1201    return inodeMap;
1202  }
1203
1204  /**
1205   * This method is always called with writeLock of FSDirectory held.
1206   */
1207  public final void addToInodeMap(INode inode) {
1208    if (inode instanceof INodeWithAdditionalFields) {
1209      inodeMap.put(inode);
1210      if (!inode.isSymlink()) {
1211        final XAttrFeature xaf = inode.getXAttrFeature();
1212        addEncryptionZone((INodeWithAdditionalFields) inode, xaf);
1213      }
1214    }
1215  }
1216
1217  private void addEncryptionZone(INodeWithAdditionalFields inode,
1218      XAttrFeature xaf) {
1219    if (xaf == null) {
1220      return;
1221    }
1222    XAttr xattr = xaf.getXAttr(CRYPTO_XATTR_ENCRYPTION_ZONE);
1223    if (xattr == null) {
1224      return;
1225    }
1226    try {
1227      final HdfsProtos.ZoneEncryptionInfoProto ezProto =
1228          HdfsProtos.ZoneEncryptionInfoProto.parseFrom(
1229              xattr.getValue());
1230      ezManager.unprotectedAddEncryptionZone(inode.getId(),
1231          PBHelperClient.convert(ezProto.getSuite()),
1232          PBHelperClient.convert(ezProto.getCryptoProtocolVersion()),
1233          ezProto.getKeyName());
1234    } catch (InvalidProtocolBufferException e) {
1235      NameNode.LOG.warn("Error parsing protocol buffer of " +
1236          "EZ XAttr " + xattr.getName() + " dir:" + inode.getFullPathName());
1237    }
1238  }
1239  
1240  /**
1241   * This is to handle encryption zone for rootDir when loading from
1242   * fsimage, and should only be called during NN restart.
1243   */
1244  public final void addRootDirToEncryptionZone(XAttrFeature xaf) {
1245    addEncryptionZone(rootDir, xaf);
1246  }
1247
1248  /**
1249   * This method is always called with writeLock of FSDirectory held.
1250   */
1251  public final void removeFromInodeMap(List<? extends INode> inodes) {
1252    if (inodes != null) {
1253      for (INode inode : inodes) {
1254        if (inode != null && inode instanceof INodeWithAdditionalFields) {
1255          inodeMap.remove(inode);
1256          ezManager.removeEncryptionZone(inode.getId());
1257        }
1258      }
1259    }
1260  }
1261  
1262  /**
1263   * Get the inode from inodeMap based on its inode id.
1264   * @param id The given id
1265   * @return The inode associated with the given id
1266   */
1267  public INode getInode(long id) {
1268    readLock();
1269    try {
1270      return inodeMap.get(id);
1271    } finally {
1272      readUnlock();
1273    }
1274  }
1275  
1276  @VisibleForTesting
1277  int getInodeMapSize() {
1278    return inodeMap.size();
1279  }
1280
1281  long totalInodes() {
1282    return getInodeMapSize();
1283  }
1284
1285  /**
1286   * Reset the entire namespace tree.
1287   */
1288  void reset() {
1289    writeLock();
1290    try {
1291      rootDir = createRoot(getFSNamesystem());
1292      inodeMap.clear();
1293      addToInodeMap(rootDir);
1294      nameCache.reset();
1295      inodeId.setCurrentValue(INodeId.LAST_RESERVED_ID);
1296    } finally {
1297      writeUnlock();
1298    }
1299  }
1300
1301  static INode resolveLastINode(INodesInPath iip) throws FileNotFoundException {
1302    INode inode = iip.getLastINode();
1303    if (inode == null) {
1304      throw new FileNotFoundException("cannot find " + iip.getPath());
1305    }
1306    return inode;
1307  }
1308
1309  /**
1310   * Caches frequently used file names to reuse file name objects and
1311   * reduce heap size.
1312   */
1313  void cacheName(INode inode) {
1314    // Name is cached only for files
1315    if (!inode.isFile()) {
1316      return;
1317    }
1318    ByteArray name = new ByteArray(inode.getLocalNameBytes());
1319    name = nameCache.put(name);
1320    if (name != null) {
1321      inode.setLocalName(name.getBytes());
1322    }
1323  }
1324  
1325  void shutdown() {
1326    nameCache.reset();
1327    inodeMap.clear();
1328  }
1329  
1330  /**
1331   * Given an INode get all the path complents leading to it from the root.
1332   * If an Inode corresponding to C is given in /A/B/C, the returned
1333   * patch components will be {root, A, B, C}.
1334   * Note that this method cannot handle scenarios where the inode is in a
1335   * snapshot.
1336   */
1337  public static byte[][] getPathComponents(INode inode) {
1338    List<byte[]> components = new ArrayList<byte[]>();
1339    components.add(0, inode.getLocalNameBytes());
1340    while(inode.getParent() != null) {
1341      components.add(0, inode.getParent().getLocalNameBytes());
1342      inode = inode.getParent();
1343    }
1344    return components.toArray(new byte[components.size()][]);
1345  }
1346
1347  /** Check if a given inode name is reserved */
1348  public static boolean isReservedName(INode inode) {
1349    return CHECK_RESERVED_FILE_NAMES
1350            && Arrays.equals(inode.getLocalNameBytes(), DOT_RESERVED);
1351  }
1352
1353  /** Check if a given path is reserved */
1354  public static boolean isReservedName(String src) {
1355    return src.startsWith(DOT_RESERVED_PATH_PREFIX + Path.SEPARATOR);
1356  }
1357
1358  public static boolean isExactReservedName(String src) {
1359    return CHECK_RESERVED_FILE_NAMES && src.equals(DOT_RESERVED_PATH_PREFIX);
1360  }
1361
1362  public static boolean isExactReservedName(byte[][] components) {
1363    return CHECK_RESERVED_FILE_NAMES &&
1364           (components.length == 2) &&
1365           isReservedName(components);
1366  }
1367
1368  static boolean isReservedRawName(String src) {
1369    return src.startsWith(DOT_RESERVED_PATH_PREFIX +
1370        Path.SEPARATOR + RAW_STRING);
1371  }
1372
1373  static boolean isReservedInodesName(String src) {
1374    return src.startsWith(DOT_RESERVED_PATH_PREFIX +
1375        Path.SEPARATOR + DOT_INODES_STRING);
1376  }
1377
1378  static boolean isReservedName(byte[][] components) {
1379    return (components.length > 1) &&
1380            Arrays.equals(INodeDirectory.ROOT_NAME, components[0]) &&
1381            Arrays.equals(DOT_RESERVED, components[1]);
1382  }
1383
1384  static boolean isReservedRawName(byte[][] components) {
1385    return (components.length > 2) &&
1386           isReservedName(components) &&
1387           Arrays.equals(RAW, components[2]);
1388  }
1389
1390  /**
1391   * Resolve a /.reserved/... path to a non-reserved path.
1392   * <p/>
1393   * There are two special hierarchies under /.reserved/:
1394   * <p/>
1395   * /.reserved/.inodes/<inodeid> performs a path lookup by inodeid,
1396   * <p/>
1397   * /.reserved/raw/... returns the encrypted (raw) bytes of a file in an
1398   * encryption zone. For instance, if /ezone is an encryption zone, then
1399   * /ezone/a refers to the decrypted file and /.reserved/raw/ezone/a refers to
1400   * the encrypted (raw) bytes of /ezone/a.
1401   * <p/>
1402   * Pathnames in the /.reserved/raw directory that resolve to files not in an
1403   * encryption zone are equivalent to the corresponding non-raw path. Hence,
1404   * if /a/b/c refers to a file that is not in an encryption zone, then
1405   * /.reserved/raw/a/b/c is equivalent (they both refer to the same
1406   * unencrypted file).
1407   * 
1408   * @param pathComponents to be resolved
1409   * @param fsd FSDirectory
1410   * @return if the path indicates an inode, return path after replacing up to
1411   *         <inodeid> with the corresponding path of the inode, else the path
1412   *         in {@code pathComponents} as is. If the path refers to a path in
1413   *         the "raw" directory, return the non-raw pathname.
1414   * @throws FileNotFoundException if inodeid is invalid
1415   */
1416  static byte[][] resolveComponents(byte[][] pathComponents,
1417      FSDirectory fsd) throws FileNotFoundException {
1418    final int nComponents = pathComponents.length;
1419    if (nComponents < 3 || !isReservedName(pathComponents)) {
1420      /* This is not a /.reserved/ path so do nothing. */
1421    } else if (Arrays.equals(DOT_INODES, pathComponents[2])) {
1422      /* It's a /.reserved/.inodes path. */
1423      if (nComponents > 3) {
1424        pathComponents = resolveDotInodesPath(pathComponents, fsd);
1425      }
1426    } else if (Arrays.equals(RAW, pathComponents[2])) {
1427      /* It's /.reserved/raw so strip off the /.reserved/raw prefix. */
1428      if (nComponents == 3) {
1429        pathComponents = new byte[][]{INodeDirectory.ROOT_NAME};
1430      } else {
1431        if (nComponents == 4
1432            && Arrays.equals(DOT_RESERVED, pathComponents[3])) {
1433          /* It's /.reserved/raw/.reserved so don't strip */
1434        } else {
1435          pathComponents = constructRemainingPath(
1436              new byte[][]{INodeDirectory.ROOT_NAME}, pathComponents, 3);
1437        }
1438      }
1439    }
1440    return pathComponents;
1441  }
1442
1443  private static byte[][] resolveDotInodesPath(
1444      byte[][] pathComponents, FSDirectory fsd)
1445      throws FileNotFoundException {
1446    final String inodeId = DFSUtil.bytes2String(pathComponents[3]);
1447    final long id;
1448    try {
1449      id = Long.parseLong(inodeId);
1450    } catch (NumberFormatException e) {
1451      throw new FileNotFoundException("Invalid inode path: " +
1452          DFSUtil.byteArray2PathString(pathComponents));
1453    }
1454    if (id == INodeId.ROOT_INODE_ID && pathComponents.length == 4) {
1455      return new byte[][]{INodeDirectory.ROOT_NAME};
1456    }
1457    INode inode = fsd.getInode(id);
1458    if (inode == null) {
1459      throw new FileNotFoundException(
1460          "File for given inode path does not exist: " +
1461              DFSUtil.byteArray2PathString(pathComponents));
1462    }
1463
1464    // Handle single ".." for NFS lookup support.
1465    if ((pathComponents.length > 4)
1466        && Arrays.equals(pathComponents[4], DOT_DOT)) {
1467      INode parent = inode.getParent();
1468      if (parent == null || parent.getId() == INodeId.ROOT_INODE_ID) {
1469        // inode is root, or its parent is root.
1470        return new byte[][]{INodeDirectory.ROOT_NAME};
1471      }
1472      return parent.getPathComponents();
1473    }
1474    return constructRemainingPath(
1475        inode.getPathComponents(), pathComponents, 4);
1476  }
1477
1478  private static byte[][] constructRemainingPath(byte[][] components,
1479      byte[][] extraComponents, int startAt) {
1480    int remainder = extraComponents.length - startAt;
1481    if (remainder > 0) {
1482      // grow the array and copy in the remaining components
1483      int pos = components.length;
1484      components = Arrays.copyOf(components, pos + remainder);
1485      System.arraycopy(extraComponents, startAt, components, pos, remainder);
1486    }
1487    if (NameNode.LOG.isDebugEnabled()) {
1488      NameNode.LOG.debug(
1489          "Resolved path is " + DFSUtil.byteArray2PathString(components));
1490    }
1491    return components;
1492  }
1493
1494  INode getINode4DotSnapshot(INodesInPath iip) throws UnresolvedLinkException {
1495    Preconditions.checkArgument(
1496        iip.isDotSnapshotDir(), "%s does not end with %s",
1497        iip.getPath(), HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR);
1498
1499    final INode node = iip.getINode(-2);
1500    if (node != null && node.isDirectory()
1501        && node.asDirectory().isSnapshottable()) {
1502      return node;
1503    }
1504    return null;
1505  }
1506
1507  /**
1508   * Resolves the given path into inodes.  Reserved paths are not handled and
1509   * permissions are not verified.  Client supplied paths should be
1510   * resolved via {@link #resolvePath(FSPermissionChecker, String, DirOp)}.
1511   * This method should only be used by internal methods.
1512   * @return the {@link INodesInPath} containing all inodes in the path.
1513   * @throws UnresolvedLinkException
1514   * @throws ParentNotDirectoryException
1515   * @throws AccessControlException
1516   */
1517  public INodesInPath getINodesInPath(String src, DirOp dirOp)
1518      throws UnresolvedLinkException, AccessControlException,
1519      ParentNotDirectoryException {
1520    return getINodesInPath(INode.getPathComponents(src), dirOp);
1521  }
1522
1523  public INodesInPath getINodesInPath(byte[][] components, DirOp dirOp)
1524      throws UnresolvedLinkException, AccessControlException,
1525      ParentNotDirectoryException {
1526    INodesInPath iip = INodesInPath.resolve(rootDir, components);
1527    checkTraverse(null, iip, dirOp);
1528    return iip;
1529  }
1530
1531  /**
1532   * Get {@link INode} associated with the file / directory.
1533   * See {@link #getINode(String, DirOp)}
1534   */
1535  @VisibleForTesting // should be removed after a lot of tests are updated
1536  public INode getINode(String src) throws UnresolvedLinkException,
1537      AccessControlException, ParentNotDirectoryException {
1538    return getINode(src, DirOp.READ);
1539  }
1540
1541  /**
1542   * Get {@link INode} associated with the file / directory.
1543   * See {@link #getINode(String, DirOp)}
1544   */
1545  @VisibleForTesting // should be removed after a lot of tests are updated
1546  public INode getINode4Write(String src) throws UnresolvedLinkException,
1547      AccessControlException, FileNotFoundException,
1548      ParentNotDirectoryException {
1549    return getINode(src, DirOp.WRITE);
1550  }
1551
1552  /**
1553   * Get {@link INode} associated with the file / directory.
1554   */
1555  public INode getINode(String src, DirOp dirOp) throws UnresolvedLinkException,
1556      AccessControlException, ParentNotDirectoryException {
1557    return getINodesInPath(src, dirOp).getLastINode();
1558  }
1559
1560  FSPermissionChecker getPermissionChecker()
1561    throws AccessControlException {
1562    try {
1563      return getPermissionChecker(fsOwnerShortUserName, supergroup,
1564          NameNode.getRemoteUser());
1565    } catch (IOException e) {
1566      throw new AccessControlException(e);
1567    }
1568  }
1569
1570  @VisibleForTesting
1571  FSPermissionChecker getPermissionChecker(String fsOwner, String superGroup,
1572      UserGroupInformation ugi) throws AccessControlException {
1573    return new FSPermissionChecker(
1574        fsOwner, superGroup, ugi, attributeProvider);
1575  }
1576
1577  void checkOwner(FSPermissionChecker pc, INodesInPath iip)
1578      throws AccessControlException, FileNotFoundException {
1579    if (iip.getLastINode() == null) {
1580      throw new FileNotFoundException(
1581          "Directory/File does not exist " + iip.getPath());
1582    }
1583    checkPermission(pc, iip, true, null, null, null, null);
1584  }
1585
1586  void checkPathAccess(FSPermissionChecker pc, INodesInPath iip,
1587      FsAction access) throws AccessControlException {
1588    checkPermission(pc, iip, false, null, null, access, null);
1589  }
1590  void checkParentAccess(FSPermissionChecker pc, INodesInPath iip,
1591      FsAction access) throws AccessControlException {
1592    checkPermission(pc, iip, false, null, access, null, null);
1593  }
1594
1595  void checkAncestorAccess(FSPermissionChecker pc, INodesInPath iip,
1596      FsAction access) throws AccessControlException {
1597    checkPermission(pc, iip, false, access, null, null, null);
1598  }
1599
1600  void checkTraverse(FSPermissionChecker pc, INodesInPath iip,
1601      boolean resolveLink) throws AccessControlException,
1602        UnresolvedPathException, ParentNotDirectoryException {
1603    FSPermissionChecker.checkTraverse(
1604        isPermissionEnabled ? pc : null, iip, resolveLink);
1605  }
1606
1607  void checkTraverse(FSPermissionChecker pc, INodesInPath iip,
1608      DirOp dirOp) throws AccessControlException, UnresolvedPathException,
1609          ParentNotDirectoryException {
1610    final boolean resolveLink;
1611    switch (dirOp) {
1612      case READ_LINK:
1613      case WRITE_LINK:
1614      case CREATE_LINK:
1615        resolveLink = false;
1616        break;
1617      default:
1618        resolveLink = true;
1619        break;
1620    }
1621    checkTraverse(pc, iip, resolveLink);
1622    boolean allowSnapshot = (dirOp == DirOp.READ || dirOp == DirOp.READ_LINK);
1623    if (!allowSnapshot && iip.isSnapshot()) {
1624      throw new SnapshotAccessControlException(
1625          "Modification on a read-only snapshot is disallowed");
1626    }
1627  }
1628
1629  /**
1630   * Check whether current user have permissions to access the path. For more
1631   * details of the parameters, see
1632   * {@link FSPermissionChecker#checkPermission}.
1633   */
1634  void checkPermission(FSPermissionChecker pc, INodesInPath iip,
1635      boolean doCheckOwner, FsAction ancestorAccess, FsAction parentAccess,
1636      FsAction access, FsAction subAccess)
1637    throws AccessControlException {
1638    checkPermission(pc, iip, doCheckOwner, ancestorAccess,
1639        parentAccess, access, subAccess, false);
1640  }
1641
1642  /**
1643   * Check whether current user have permissions to access the path. For more
1644   * details of the parameters, see
1645   * {@link FSPermissionChecker#checkPermission}.
1646   */
1647  void checkPermission(FSPermissionChecker pc, INodesInPath iip,
1648      boolean doCheckOwner, FsAction ancestorAccess, FsAction parentAccess,
1649      FsAction access, FsAction subAccess, boolean ignoreEmptyDir)
1650      throws AccessControlException {
1651    if (!pc.isSuperUser()) {
1652      readLock();
1653      try {
1654        pc.checkPermission(iip, doCheckOwner, ancestorAccess,
1655            parentAccess, access, subAccess, ignoreEmptyDir);
1656      } finally {
1657        readUnlock();
1658      }
1659    }
1660  }
1661
1662  void checkUnreadableBySuperuser(FSPermissionChecker pc, INodesInPath iip)
1663      throws IOException {
1664    if (pc.isSuperUser()) {
1665      if (FSDirXAttrOp.getXAttrByPrefixedName(this, iip,
1666          SECURITY_XATTR_UNREADABLE_BY_SUPERUSER) != null) {
1667        throw new AccessControlException(
1668            "Access is denied for " + pc.getUser() + " since the superuser "
1669            + "is not allowed to perform this operation.");
1670      }
1671    }
1672  }
1673
1674  HdfsFileStatus getAuditFileInfo(INodesInPath iip)
1675      throws IOException {
1676    return (namesystem.isAuditEnabled() && namesystem.isExternalInvocation())
1677        ? FSDirStatAndListingOp.getFileInfo(this, iip, false) : null;
1678  }
1679
1680  /**
1681   * Verify that parent directory of src exists.
1682   */
1683  void verifyParentDir(INodesInPath iip)
1684      throws FileNotFoundException, ParentNotDirectoryException {
1685    if (iip.length() > 2) {
1686      final INode parentNode = iip.getINode(-2);
1687      if (parentNode == null) {
1688        throw new FileNotFoundException("Parent directory doesn't exist: "
1689            + iip.getParentPath());
1690      } else if (!parentNode.isDirectory()) {
1691        throw new ParentNotDirectoryException("Parent path is not a directory: "
1692            + iip.getParentPath());
1693      }
1694    }
1695  }
1696
1697  /** Allocate a new inode ID. */
1698  long allocateNewInodeId() {
1699    return inodeId.nextValue();
1700  }
1701
1702  /** @return the last inode ID. */
1703  public long getLastInodeId() {
1704    return inodeId.getCurrentValue();
1705  }
1706
1707  /**
1708   * Set the last allocated inode id when fsimage or editlog is loaded.
1709   */
1710  void resetLastInodeId(long newValue) throws IOException {
1711    try {
1712      inodeId.skipTo(newValue);
1713    } catch(IllegalStateException ise) {
1714      throw new IOException(ise);
1715    }
1716  }
1717
1718  /** Should only be used for tests to reset to any value */
1719  void resetLastInodeIdWithoutChecking(long newValue) {
1720    inodeId.setCurrentValue(newValue);
1721  }
1722
1723  INodeAttributes getAttributes(INodesInPath iip)
1724      throws FileNotFoundException {
1725    INode node = FSDirectory.resolveLastINode(iip);
1726    int snapshot = iip.getPathSnapshotId();
1727    INodeAttributes nodeAttrs = node.getSnapshotINode(snapshot);
1728    if (attributeProvider != null) {
1729      // permission checking sends the full components array including the
1730      // first empty component for the root.  however file status
1731      // related calls are expected to strip out the root component according
1732      // to TestINodeAttributeProvider.
1733      byte[][] components = iip.getPathComponents();
1734      components = Arrays.copyOfRange(components, 1, components.length);
1735      nodeAttrs = attributeProvider.getAttributes(components, nodeAttrs);
1736    }
1737    return nodeAttrs;
1738  }
1739
1740}