001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.hdfs.server.namenode;
019    
020    import static org.apache.hadoop.util.Time.now;
021    
022    import java.io.Closeable;
023    import java.io.FileNotFoundException;
024    import java.io.IOException;
025    import java.util.ArrayList;
026    import java.util.Arrays;
027    import java.util.List;
028    import java.util.concurrent.TimeUnit;
029    import java.util.concurrent.locks.Condition;
030    import java.util.concurrent.locks.ReentrantReadWriteLock;
031    
032    import org.apache.hadoop.HadoopIllegalArgumentException;
033    import org.apache.hadoop.conf.Configuration;
034    import org.apache.hadoop.fs.ContentSummary;
035    import org.apache.hadoop.fs.FileAlreadyExistsException;
036    import org.apache.hadoop.fs.Options;
037    import org.apache.hadoop.fs.Options.Rename;
038    import org.apache.hadoop.fs.ParentNotDirectoryException;
039    import org.apache.hadoop.fs.Path;
040    import org.apache.hadoop.fs.PathIsNotDirectoryException;
041    import org.apache.hadoop.fs.UnresolvedLinkException;
042    import org.apache.hadoop.fs.permission.AclEntry;
043    import org.apache.hadoop.fs.permission.AclStatus;
044    import org.apache.hadoop.fs.permission.FsAction;
045    import org.apache.hadoop.fs.permission.FsPermission;
046    import org.apache.hadoop.fs.permission.PermissionStatus;
047    import org.apache.hadoop.hdfs.DFSConfigKeys;
048    import org.apache.hadoop.hdfs.DFSUtil;
049    import org.apache.hadoop.hdfs.DistributedFileSystem;
050    import org.apache.hadoop.hdfs.protocol.AclException;
051    import org.apache.hadoop.hdfs.protocol.Block;
052    import org.apache.hadoop.hdfs.protocol.ClientProtocol;
053    import org.apache.hadoop.hdfs.protocol.DirectoryListing;
054    import org.apache.hadoop.hdfs.protocol.FSLimitException.MaxDirectoryItemsExceededException;
055    import org.apache.hadoop.hdfs.protocol.FSLimitException.PathComponentTooLongException;
056    import org.apache.hadoop.hdfs.protocol.FsAclPermission;
057    import org.apache.hadoop.hdfs.protocol.HdfsConstants;
058    import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
059    import org.apache.hadoop.hdfs.protocol.HdfsLocatedFileStatus;
060    import org.apache.hadoop.hdfs.protocol.LocatedBlock;
061    import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
062    import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
063    import org.apache.hadoop.hdfs.protocol.SnapshotAccessControlException;
064    import org.apache.hadoop.hdfs.protocol.SnapshotException;
065    import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
066    import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
067    import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
068    import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
069    import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
070    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
071    import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
072    import org.apache.hadoop.hdfs.server.namenode.INodeReference.WithCount;
073    import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable;
074    import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
075    import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot.Root;
076    import org.apache.hadoop.hdfs.util.ByteArray;
077    import org.apache.hadoop.hdfs.util.ChunkedArrayList;
078    import org.apache.hadoop.hdfs.util.ReadOnlyList;
079    
080    import com.google.common.annotations.VisibleForTesting;
081    import com.google.common.base.Preconditions;
082    
083    /*************************************************
084     * FSDirectory stores the filesystem directory state.
085     * It handles writing/loading values to disk, and logging
086     * changes as we go.
087     *
088     * It keeps the filename->blockset mapping always-current
089     * and logged to disk.
090     * 
091     *************************************************/
092    public class FSDirectory implements Closeable {
093      private static INodeDirectorySnapshottable createRoot(FSNamesystem namesystem) {
094        final INodeDirectory r = new INodeDirectory(
095            INodeId.ROOT_INODE_ID,
096            INodeDirectory.ROOT_NAME,
097            namesystem.createFsOwnerPermissions(new FsPermission((short) 0755)),
098            0L);
099        r.addDirectoryWithQuotaFeature(
100            DirectoryWithQuotaFeature.DEFAULT_NAMESPACE_QUOTA,
101            DirectoryWithQuotaFeature.DEFAULT_DISKSPACE_QUOTA);
102        final INodeDirectorySnapshottable s = new INodeDirectorySnapshottable(r);
103        s.setSnapshotQuota(0);
104        return s;
105      }
106    
107      @VisibleForTesting
108      static boolean CHECK_RESERVED_FILE_NAMES = true;
109      public final static String DOT_RESERVED_STRING = ".reserved";
110      public final static String DOT_RESERVED_PATH_PREFIX = Path.SEPARATOR
111          + DOT_RESERVED_STRING;
112      public final static byte[] DOT_RESERVED = 
113          DFSUtil.string2Bytes(DOT_RESERVED_STRING);
114      public final static String DOT_INODES_STRING = ".inodes";
115      public final static byte[] DOT_INODES = 
116          DFSUtil.string2Bytes(DOT_INODES_STRING);
117      INodeDirectory rootDir;
118      FSImage fsImage;  
119      private final FSNamesystem namesystem;
120      private volatile boolean ready = false;
121      private final int maxComponentLength;
122      private final int maxDirItems;
123      private final int lsLimit;  // max list limit
124      private final int contentCountLimit; // max content summary counts per run
125      private final INodeMap inodeMap; // Synchronized by dirLock
126      private long yieldCount = 0; // keep track of lock yield count.
127    
128      // lock to protect the directory and BlockMap
129      private final ReentrantReadWriteLock dirLock;
130      private final Condition cond;
131    
132      // utility methods to acquire and release read lock and write lock
133      void readLock() {
134        this.dirLock.readLock().lock();
135      }
136    
137      void readUnlock() {
138        this.dirLock.readLock().unlock();
139      }
140    
141      void writeLock() {
142        this.dirLock.writeLock().lock();
143      }
144    
145      void writeUnlock() {
146        this.dirLock.writeLock().unlock();
147      }
148    
149      boolean hasWriteLock() {
150        return this.dirLock.isWriteLockedByCurrentThread();
151      }
152    
153      boolean hasReadLock() {
154        return this.dirLock.getReadHoldCount() > 0;
155      }
156    
157      public int getReadHoldCount() {
158        return this.dirLock.getReadHoldCount();
159      }
160    
161      public int getWriteHoldCount() {
162        return this.dirLock.getWriteHoldCount();
163      }
164    
165      /**
166       * Caches frequently used file names used in {@link INode} to reuse 
167       * byte[] objects and reduce heap usage.
168       */
169      private final NameCache<ByteArray> nameCache;
170    
171      FSDirectory(FSImage fsImage, FSNamesystem ns, Configuration conf) {
172        this.dirLock = new ReentrantReadWriteLock(true); // fair
173        this.cond = dirLock.writeLock().newCondition();
174        rootDir = createRoot(ns);
175        inodeMap = INodeMap.newInstance(rootDir);
176        this.fsImage = fsImage;
177        int configuredLimit = conf.getInt(
178            DFSConfigKeys.DFS_LIST_LIMIT, DFSConfigKeys.DFS_LIST_LIMIT_DEFAULT);
179        this.lsLimit = configuredLimit>0 ?
180            configuredLimit : DFSConfigKeys.DFS_LIST_LIMIT_DEFAULT;
181        this.contentCountLimit = conf.getInt(
182            DFSConfigKeys.DFS_CONTENT_SUMMARY_LIMIT_KEY,
183            DFSConfigKeys.DFS_CONTENT_SUMMARY_LIMIT_DEFAULT);
184        
185        // filesystem limits
186        this.maxComponentLength = conf.getInt(
187            DFSConfigKeys.DFS_NAMENODE_MAX_COMPONENT_LENGTH_KEY,
188            DFSConfigKeys.DFS_NAMENODE_MAX_COMPONENT_LENGTH_DEFAULT);
189        this.maxDirItems = conf.getInt(
190            DFSConfigKeys.DFS_NAMENODE_MAX_DIRECTORY_ITEMS_KEY,
191            DFSConfigKeys.DFS_NAMENODE_MAX_DIRECTORY_ITEMS_DEFAULT);
192        // We need a maximum maximum because by default, PB limits message sizes
193        // to 64MB. This means we can only store approximately 6.7 million entries
194        // per directory, but let's use 6.4 million for some safety.
195        final int MAX_DIR_ITEMS = 64 * 100 * 1000;
196        Preconditions.checkArgument(
197            maxDirItems > 0 && maxDirItems <= MAX_DIR_ITEMS, "Cannot set "
198                + DFSConfigKeys.DFS_NAMENODE_MAX_DIRECTORY_ITEMS_KEY
199                + " to a value less than 0 or greater than " + MAX_DIR_ITEMS);
200    
201        int threshold = conf.getInt(
202            DFSConfigKeys.DFS_NAMENODE_NAME_CACHE_THRESHOLD_KEY,
203            DFSConfigKeys.DFS_NAMENODE_NAME_CACHE_THRESHOLD_DEFAULT);
204        NameNode.LOG.info("Caching file names occuring more than " + threshold
205            + " times");
206        nameCache = new NameCache<ByteArray>(threshold);
207        namesystem = ns;
208      }
209        
210      private FSNamesystem getFSNamesystem() {
211        return namesystem;
212      }
213    
214      private BlockManager getBlockManager() {
215        return getFSNamesystem().getBlockManager();
216      }
217    
218      /** @return the root directory inode. */
219      public INodeDirectory getRoot() {
220        return rootDir;
221      }
222    
223      /**
224       * Notify that loading of this FSDirectory is complete, and
225       * it is ready for use 
226       */
227      void imageLoadComplete() {
228        Preconditions.checkState(!ready, "FSDirectory already loaded");
229        setReady();
230      }
231    
232      void setReady() {
233        if(ready) return;
234        writeLock();
235        try {
236          setReady(true);
237          this.nameCache.initialized();
238          cond.signalAll();
239        } finally {
240          writeUnlock();
241        }
242      }
243      
244      //This is for testing purposes only
245      @VisibleForTesting
246      boolean isReady() {
247        return ready;
248      }
249    
250      // exposed for unit tests
251      protected void setReady(boolean flag) {
252        ready = flag;
253      }
254    
255      private void incrDeletedFileCount(long count) {
256        if (getFSNamesystem() != null)
257          NameNode.getNameNodeMetrics().incrFilesDeleted(count);
258      }
259        
260      /**
261       * Shutdown the filestore
262       */
263      @Override
264      public void close() throws IOException {
265        fsImage.close();
266      }
267    
268      /**
269       * Block until the object is ready to be used.
270       */
271      void waitForReady() {
272        if (!ready) {
273          writeLock();
274          try {
275            while (!ready) {
276              try {
277                cond.await(5000, TimeUnit.MILLISECONDS);
278              } catch (InterruptedException ie) {
279              }
280            }
281          } finally {
282            writeUnlock();
283          }
284        }
285      }
286    
287      /**
288       * Add the given filename to the fs.
289       * @throws FileAlreadyExistsException
290       * @throws QuotaExceededException
291       * @throws UnresolvedLinkException
292       * @throws SnapshotAccessControlException 
293       */
294      INodeFile addFile(String path, PermissionStatus permissions,
295          short replication, long preferredBlockSize, String clientName,
296          String clientMachine, DatanodeDescriptor clientNode)
297        throws FileAlreadyExistsException, QuotaExceededException,
298          UnresolvedLinkException, SnapshotAccessControlException, AclException {
299        waitForReady();
300    
301        // Always do an implicit mkdirs for parent directory tree.
302        long modTime = now();
303        
304        Path parent = new Path(path).getParent();
305        if (parent == null) {
306          // Trying to add "/" as a file - this path has no
307          // parent -- avoids an NPE below.
308          return null;
309        }
310        
311        if (!mkdirs(parent.toString(), permissions, true, modTime)) {
312          return null;
313        }
314        INodeFile newNode = new INodeFile(namesystem.allocateNewInodeId(), null,
315            permissions, modTime, modTime, BlockInfo.EMPTY_ARRAY, replication,
316            preferredBlockSize);
317        newNode.toUnderConstruction(clientName, clientMachine, clientNode);
318    
319        boolean added = false;
320        writeLock();
321        try {
322          added = addINode(path, newNode);
323        } finally {
324          writeUnlock();
325        }
326        if (!added) {
327          NameNode.stateChangeLog.info("DIR* addFile: failed to add " + path);
328          return null;
329        }
330    
331        if(NameNode.stateChangeLog.isDebugEnabled()) {
332          NameNode.stateChangeLog.debug("DIR* addFile: " + path + " is added");
333        }
334        return newNode;
335      }
336    
337      INodeFile unprotectedAddFile( long id,
338                                String path, 
339                                PermissionStatus permissions,
340                                List<AclEntry> aclEntries,
341                                short replication,
342                                long modificationTime,
343                                long atime,
344                                long preferredBlockSize,
345                                boolean underConstruction,
346                                String clientName,
347                                String clientMachine) {
348        final INodeFile newNode;
349        assert hasWriteLock();
350        if (underConstruction) {
351          newNode = new INodeFile(id, null, permissions, modificationTime,
352              modificationTime, BlockInfo.EMPTY_ARRAY, replication,
353              preferredBlockSize);
354          newNode.toUnderConstruction(clientName, clientMachine, null);
355    
356        } else {
357          newNode = new INodeFile(id, null, permissions, modificationTime, atime,
358              BlockInfo.EMPTY_ARRAY, replication, preferredBlockSize);
359        }
360    
361        try {
362          if (addINode(path, newNode)) {
363            if (aclEntries != null) {
364              AclStorage.updateINodeAcl(newNode, aclEntries,
365                Snapshot.CURRENT_STATE_ID);
366            }
367            return newNode;
368          }
369        } catch (IOException e) {
370          if(NameNode.stateChangeLog.isDebugEnabled()) {
371            NameNode.stateChangeLog.debug(
372                "DIR* FSDirectory.unprotectedAddFile: exception when add " + path
373                    + " to the file system", e);
374          }
375        }
376        return null;
377      }
378    
379      /**
380       * Add a block to the file. Returns a reference to the added block.
381       */
382      BlockInfo addBlock(String path, INodesInPath inodesInPath, Block block,
383          DatanodeStorageInfo[] targets) throws IOException {
384        waitForReady();
385    
386        writeLock();
387        try {
388          final INodeFile fileINode = inodesInPath.getLastINode().asFile();
389          Preconditions.checkState(fileINode.isUnderConstruction());
390    
391          // check quota limits and updated space consumed
392          updateCount(inodesInPath, 0, fileINode.getBlockDiskspace(), true);
393    
394          // associate new last block for the file
395          BlockInfoUnderConstruction blockInfo =
396            new BlockInfoUnderConstruction(
397                block,
398                fileINode.getFileReplication(),
399                BlockUCState.UNDER_CONSTRUCTION,
400                targets);
401          getBlockManager().addBlockCollection(blockInfo, fileINode);
402          fileINode.addBlock(blockInfo);
403    
404          if(NameNode.stateChangeLog.isDebugEnabled()) {
405            NameNode.stateChangeLog.debug("DIR* FSDirectory.addBlock: "
406                + path + " with " + block
407                + " block is added to the in-memory "
408                + "file system");
409          }
410          return blockInfo;
411        } finally {
412          writeUnlock();
413        }
414      }
415    
416      /**
417       * Persist the block list for the inode.
418       */
419      void persistBlocks(String path, INodeFile file, boolean logRetryCache) {
420        Preconditions.checkArgument(file.isUnderConstruction());
421        waitForReady();
422    
423        writeLock();
424        try {
425          fsImage.getEditLog().logUpdateBlocks(path, file, logRetryCache);
426          if(NameNode.stateChangeLog.isDebugEnabled()) {
427            NameNode.stateChangeLog.debug("DIR* FSDirectory.persistBlocks: "
428                +path+" with "+ file.getBlocks().length 
429                +" blocks is persisted to the file system");
430          }
431        } finally {
432          writeUnlock();
433        }
434      }
435      
436      /**
437       * Persist the new block (the last block of the given file).
438       */
439      void persistNewBlock(String path, INodeFile file) {
440        Preconditions.checkArgument(file.isUnderConstruction());
441        waitForReady();
442    
443        writeLock();
444        try {
445          fsImage.getEditLog().logAddBlock(path, file);
446        } finally {
447          writeUnlock();
448        }
449        if (NameNode.stateChangeLog.isDebugEnabled()) {
450          NameNode.stateChangeLog.debug("DIR* FSDirectory.persistNewBlock: "
451              + path + " with new block " + file.getLastBlock().toString()
452              + ", current total block count is " + file.getBlocks().length);
453        }
454      }
455      
456      /**
457       * Close file.
458       */
459      void closeFile(String path, INodeFile file) {
460        waitForReady();
461        writeLock();
462        try {
463          // file is closed
464          fsImage.getEditLog().logCloseFile(path, file);
465          if (NameNode.stateChangeLog.isDebugEnabled()) {
466            NameNode.stateChangeLog.debug("DIR* FSDirectory.closeFile: "
467                +path+" with "+ file.getBlocks().length 
468                +" blocks is persisted to the file system");
469          }
470        } finally {
471          writeUnlock();
472        }
473      }
474    
475      /**
476       * Remove a block from the file.
477       * @return Whether the block exists in the corresponding file
478       */
479      boolean removeBlock(String path, INodeFile fileNode, Block block)
480          throws IOException {
481        Preconditions.checkArgument(fileNode.isUnderConstruction());
482        waitForReady();
483    
484        writeLock();
485        try {
486          return unprotectedRemoveBlock(path, fileNode, block);
487        } finally {
488          writeUnlock();
489        }
490      }
491      
492      boolean unprotectedRemoveBlock(String path,
493          INodeFile fileNode, Block block) throws IOException {
494        // modify file-> block and blocksMap
495        // fileNode should be under construction
496        boolean removed = fileNode.removeLastBlock(block);
497        if (!removed) {
498          return false;
499        }
500        getBlockManager().removeBlockFromMap(block);
501    
502        if(NameNode.stateChangeLog.isDebugEnabled()) {
503          NameNode.stateChangeLog.debug("DIR* FSDirectory.removeBlock: "
504              +path+" with "+block
505              +" block is removed from the file system");
506        }
507    
508        // update space consumed
509        final INodesInPath iip = rootDir.getINodesInPath4Write(path, true);
510        updateCount(iip, 0, -fileNode.getBlockDiskspace(), true);
511        return true;
512      }
513    
514      /**
515       * @throws SnapshotAccessControlException 
516       * @see #unprotectedRenameTo(String, String, long)
517       * @deprecated Use {@link #renameTo(String, String, Rename...)} instead.
518       */
519      @Deprecated
520      boolean renameTo(String src, String dst, boolean logRetryCache) 
521          throws QuotaExceededException, UnresolvedLinkException, 
522          FileAlreadyExistsException, SnapshotAccessControlException, IOException {
523        if (NameNode.stateChangeLog.isDebugEnabled()) {
524          NameNode.stateChangeLog.debug("DIR* FSDirectory.renameTo: "
525              +src+" to "+dst);
526        }
527        waitForReady();
528        long now = now();
529        writeLock();
530        try {
531          if (!unprotectedRenameTo(src, dst, now))
532            return false;
533        } finally {
534          writeUnlock();
535        }
536        fsImage.getEditLog().logRename(src, dst, now, logRetryCache);
537        return true;
538      }
539    
540      /**
541       * @see #unprotectedRenameTo(String, String, long, Options.Rename...)
542       */
543      void renameTo(String src, String dst, boolean logRetryCache, 
544          Options.Rename... options)
545          throws FileAlreadyExistsException, FileNotFoundException,
546          ParentNotDirectoryException, QuotaExceededException,
547          UnresolvedLinkException, IOException {
548        if (NameNode.stateChangeLog.isDebugEnabled()) {
549          NameNode.stateChangeLog.debug("DIR* FSDirectory.renameTo: " + src
550              + " to " + dst);
551        }
552        waitForReady();
553        long now = now();
554        writeLock();
555        try {
556          if (unprotectedRenameTo(src, dst, now, options)) {
557            incrDeletedFileCount(1);
558          }
559        } finally {
560          writeUnlock();
561        }
562        fsImage.getEditLog().logRename(src, dst, now, logRetryCache, options);
563      }
564    
565      /**
566       * Change a path name
567       * 
568       * @param src source path
569       * @param dst destination path
570       * @return true if rename succeeds; false otherwise
571       * @throws QuotaExceededException if the operation violates any quota limit
572       * @throws FileAlreadyExistsException if the src is a symlink that points to dst
573       * @throws SnapshotAccessControlException if path is in RO snapshot
574       * @deprecated See {@link #renameTo(String, String)}
575       */
576      @Deprecated
577      boolean unprotectedRenameTo(String src, String dst, long timestamp)
578        throws QuotaExceededException, UnresolvedLinkException, 
579        FileAlreadyExistsException, SnapshotAccessControlException, IOException {
580        assert hasWriteLock();
581        INodesInPath srcIIP = rootDir.getINodesInPath4Write(src, false);
582        final INode srcInode = srcIIP.getLastINode();
583        
584        // check the validation of the source
585        if (srcInode == null) {
586          NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
587              + "failed to rename " + src + " to " + dst
588              + " because source does not exist");
589          return false;
590        } 
591        if (srcIIP.getINodes().length == 1) {
592          NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
593              +"failed to rename "+src+" to "+dst+ " because source is the root");
594          return false;
595        }
596        
597        // srcInode and its subtree cannot contain snapshottable directories with
598        // snapshots
599        List<INodeDirectorySnapshottable> snapshottableDirs = 
600            new ArrayList<INodeDirectorySnapshottable>();
601        checkSnapshot(srcInode, snapshottableDirs);
602        
603        if (isDir(dst)) {
604          dst += Path.SEPARATOR + new Path(src).getName();
605        }
606        
607        // check the validity of the destination
608        if (dst.equals(src)) {
609          return true;
610        }
611        if (srcInode.isSymlink() && 
612            dst.equals(srcInode.asSymlink().getSymlinkString())) {
613          throw new FileAlreadyExistsException(
614              "Cannot rename symlink "+src+" to its target "+dst);
615        }
616        
617        // dst cannot be directory or a file under src
618        if (dst.startsWith(src) && 
619            dst.charAt(src.length()) == Path.SEPARATOR_CHAR) {
620          NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
621              + "failed to rename " + src + " to " + dst
622              + " because destination starts with src");
623          return false;
624        }
625        
626        byte[][] dstComponents = INode.getPathComponents(dst);
627        INodesInPath dstIIP = getExistingPathINodes(dstComponents);
628        if (dstIIP.isSnapshot()) {
629          throw new SnapshotAccessControlException(
630              "Modification on RO snapshot is disallowed");
631        }
632        if (dstIIP.getLastINode() != null) {
633          NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
634                                       +"failed to rename "+src+" to "+dst+ 
635                                       " because destination exists");
636          return false;
637        }
638        INode dstParent = dstIIP.getINode(-2);
639        if (dstParent == null) {
640          NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
641              +"failed to rename "+src+" to "+dst+ 
642              " because destination's parent does not exist");
643          return false;
644        }
645        
646        // Ensure dst has quota to accommodate rename
647        verifyFsLimitsForRename(srcIIP, dstIIP);
648        verifyQuotaForRename(srcIIP.getINodes(), dstIIP.getINodes());
649        
650        boolean added = false;
651        INode srcChild = srcIIP.getLastINode();
652        final byte[] srcChildName = srcChild.getLocalNameBytes();
653        final boolean isSrcInSnapshot = srcChild.isInLatestSnapshot(
654            srcIIP.getLatestSnapshotId());
655        final boolean srcChildIsReference = srcChild.isReference();
656        
657        // Record the snapshot on srcChild. After the rename, before any new 
658        // snapshot is taken on the dst tree, changes will be recorded in the latest
659        // snapshot of the src tree.
660        if (isSrcInSnapshot) {
661          srcChild = srcChild.recordModification(srcIIP.getLatestSnapshotId());
662          srcIIP.setLastINode(srcChild);
663        }
664        
665        // check srcChild for reference
666        final INodeReference.WithCount withCount;
667        Quota.Counts oldSrcCounts = Quota.Counts.newInstance();
668        int srcRefDstSnapshot = srcChildIsReference ? srcChild.asReference()
669            .getDstSnapshotId() : Snapshot.CURRENT_STATE_ID;
670        if (isSrcInSnapshot) {
671          final INodeReference.WithName withName = 
672              srcIIP.getINode(-2).asDirectory().replaceChild4ReferenceWithName(
673                  srcChild, srcIIP.getLatestSnapshotId()); 
674          withCount = (INodeReference.WithCount) withName.getReferredINode();
675          srcChild = withName;
676          srcIIP.setLastINode(srcChild);
677          // get the counts before rename
678          withCount.getReferredINode().computeQuotaUsage(oldSrcCounts, true);
679        } else if (srcChildIsReference) {
680          // srcChild is reference but srcChild is not in latest snapshot
681          withCount = (WithCount) srcChild.asReference().getReferredINode();
682        } else {
683          withCount = null;
684        }
685    
686        try {
687          // remove src
688          final long removedSrc = removeLastINode(srcIIP);
689          if (removedSrc == -1) {
690            NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
691                + "failed to rename " + src + " to " + dst
692                + " because the source can not be removed");
693            return false;
694          }
695          
696          if (dstParent.getParent() == null) {
697            // src and dst file/dir are in the same directory, and the dstParent has
698            // been replaced when we removed the src. Refresh the dstIIP and
699            // dstParent.
700            dstIIP = getExistingPathINodes(dstComponents);
701            dstParent = dstIIP.getINode(-2);
702          }
703          
704          // add src to the destination
705          
706          srcChild = srcIIP.getLastINode();
707          final byte[] dstChildName = dstIIP.getLastLocalName();
708          final INode toDst;
709          if (withCount == null) {
710            srcChild.setLocalName(dstChildName);
711            toDst = srcChild;
712          } else {
713            withCount.getReferredINode().setLocalName(dstChildName);
714            int dstSnapshotId = dstIIP.getLatestSnapshotId();
715            final INodeReference.DstReference ref = new INodeReference.DstReference(
716                dstParent.asDirectory(), withCount, dstSnapshotId);
717            toDst = ref;
718          }
719          
720          added = addLastINodeNoQuotaCheck(dstIIP, toDst);
721          if (added) {
722            if (NameNode.stateChangeLog.isDebugEnabled()) {
723              NameNode.stateChangeLog.debug("DIR* FSDirectory.unprotectedRenameTo: " 
724                  + src + " is renamed to " + dst);
725            }
726            // update modification time of dst and the parent of src
727            final INode srcParent = srcIIP.getINode(-2);
728            srcParent.updateModificationTime(timestamp, srcIIP.getLatestSnapshotId());
729            dstParent = dstIIP.getINode(-2); // refresh dstParent
730            dstParent.updateModificationTime(timestamp, dstIIP.getLatestSnapshotId());
731            // update moved leases with new filename
732            getFSNamesystem().unprotectedChangeLease(src, dst);     
733    
734            // update the quota usage in src tree
735            if (isSrcInSnapshot) {
736              // get the counts after rename
737              Quota.Counts newSrcCounts = srcChild.computeQuotaUsage(
738                  Quota.Counts.newInstance(), false);
739              newSrcCounts.subtract(oldSrcCounts);
740              srcParent.addSpaceConsumed(newSrcCounts.get(Quota.NAMESPACE),
741                  newSrcCounts.get(Quota.DISKSPACE), false);
742            }
743            
744            return true;
745          }
746        } finally {
747          if (!added) {
748            final INodeDirectory srcParent = srcIIP.getINode(-2).asDirectory();
749            final INode oldSrcChild = srcChild;
750            // put it back
751            if (withCount == null) {
752              srcChild.setLocalName(srcChildName);
753            } else if (!srcChildIsReference) { // src must be in snapshot
754              // the withCount node will no longer be used thus no need to update
755              // its reference number here
756              final INode originalChild = withCount.getReferredINode();
757              srcChild = originalChild;
758              srcChild.setLocalName(srcChildName);
759            } else {
760              withCount.removeReference(oldSrcChild.asReference());
761              final INodeReference originalRef = new INodeReference.DstReference(
762                  srcParent, withCount, srcRefDstSnapshot);
763              srcChild = originalRef;
764              withCount.getReferredINode().setLocalName(srcChildName);
765            }
766            
767            if (isSrcInSnapshot) {
768              // srcParent must have snapshot feature since isSrcInSnapshot is true
769              // and src node has been removed from srcParent 
770              srcParent.undoRename4ScrParent(oldSrcChild.asReference(), srcChild);
771            } else {
772              // original srcChild is not in latest snapshot, we only need to add
773              // the srcChild back
774              addLastINodeNoQuotaCheck(srcIIP, srcChild);
775            }
776          }
777        }
778        NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
779            +"failed to rename "+src+" to "+dst);
780        return false;
781      }
782    
783      /**
784       * Rename src to dst.
785       * See {@link DistributedFileSystem#rename(Path, Path, Options.Rename...)}
786       * for details related to rename semantics and exceptions.
787       * 
788       * @param src source path
789       * @param dst destination path
790       * @param timestamp modification time
791       * @param options Rename options
792       */
793      boolean unprotectedRenameTo(String src, String dst, long timestamp,
794          Options.Rename... options) throws FileAlreadyExistsException,
795          FileNotFoundException, ParentNotDirectoryException,
796          QuotaExceededException, UnresolvedLinkException, IOException {
797        assert hasWriteLock();
798        boolean overwrite = false;
799        if (null != options) {
800          for (Rename option : options) {
801            if (option == Rename.OVERWRITE) {
802              overwrite = true;
803            }
804          }
805        }
806        String error = null;
807        final INodesInPath srcIIP = rootDir.getINodesInPath4Write(src, false);
808        final INode srcInode = srcIIP.getLastINode();
809        // validate source
810        if (srcInode == null) {
811          error = "rename source " + src + " is not found.";
812          NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
813              + error);
814          throw new FileNotFoundException(error);
815        }
816        if (srcIIP.getINodes().length == 1) {
817          error = "rename source cannot be the root";
818          NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
819              + error);
820          throw new IOException(error);
821        }
822        // srcInode and its subtree cannot contain snapshottable directories with
823        // snapshots
824        checkSnapshot(srcInode, null);
825        
826        // validate the destination
827        if (dst.equals(src)) {
828          throw new FileAlreadyExistsException(
829              "The source "+src+" and destination "+dst+" are the same");
830        }
831        if (srcInode.isSymlink() && 
832            dst.equals(srcInode.asSymlink().getSymlinkString())) {
833          throw new FileAlreadyExistsException(
834              "Cannot rename symlink "+src+" to its target "+dst);
835        }
836        // dst cannot be a directory or a file under src
837        if (dst.startsWith(src) && 
838            dst.charAt(src.length()) == Path.SEPARATOR_CHAR) {
839          error = "Rename destination " + dst
840              + " is a directory or file under source " + src;
841          NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
842              + error);
843          throw new IOException(error);
844        }
845        INodesInPath dstIIP = rootDir.getINodesInPath4Write(dst, false);
846        if (dstIIP.getINodes().length == 1) {
847          error = "rename destination cannot be the root";
848          NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
849              + error);
850          throw new IOException(error);
851        }
852    
853        final INode dstInode = dstIIP.getLastINode();
854        List<INodeDirectorySnapshottable> snapshottableDirs = 
855            new ArrayList<INodeDirectorySnapshottable>();
856        if (dstInode != null) { // Destination exists
857          // It's OK to rename a file to a symlink and vice versa
858          if (dstInode.isDirectory() != srcInode.isDirectory()) {
859            error = "Source " + src + " and destination " + dst
860                + " must both be directories";
861            NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
862                + error);
863            throw new IOException(error);
864          }
865          if (!overwrite) { // If destination exists, overwrite flag must be true
866            error = "rename destination " + dst + " already exists";
867            NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
868                + error);
869            throw new FileAlreadyExistsException(error);
870          }
871          if (dstInode.isDirectory()) {
872            final ReadOnlyList<INode> children = dstInode.asDirectory()
873                .getChildrenList(Snapshot.CURRENT_STATE_ID);
874            if (!children.isEmpty()) {
875              error = "rename destination directory is not empty: " + dst;
876              NameNode.stateChangeLog.warn(
877                  "DIR* FSDirectory.unprotectedRenameTo: " + error);
878              throw new IOException(error);
879            }
880          }
881          checkSnapshot(dstInode, snapshottableDirs);
882        }
883    
884        INode dstParent = dstIIP.getINode(-2);
885        if (dstParent == null) {
886          error = "rename destination parent " + dst + " not found.";
887          NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
888              + error);
889          throw new FileNotFoundException(error);
890        }
891        if (!dstParent.isDirectory()) {
892          error = "rename destination parent " + dst + " is a file.";
893          NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
894              + error);
895          throw new ParentNotDirectoryException(error);
896        }
897    
898        // Ensure dst has quota to accommodate rename
899        verifyFsLimitsForRename(srcIIP, dstIIP);
900        verifyQuotaForRename(srcIIP.getINodes(), dstIIP.getINodes());
901    
902        INode srcChild = srcIIP.getLastINode();
903        final byte[] srcChildName = srcChild.getLocalNameBytes();
904        final boolean isSrcInSnapshot = srcChild.isInLatestSnapshot(
905            srcIIP.getLatestSnapshotId());
906        final boolean srcChildIsReference = srcChild.isReference();
907        
908        // Record the snapshot on srcChild. After the rename, before any new 
909        // snapshot is taken on the dst tree, changes will be recorded in the latest
910        // snapshot of the src tree.
911        if (isSrcInSnapshot) {
912          srcChild = srcChild.recordModification(srcIIP.getLatestSnapshotId());
913          srcIIP.setLastINode(srcChild);
914        }
915        
916        // check srcChild for reference
917        final INodeReference.WithCount withCount;
918        int srcRefDstSnapshot = srcChildIsReference ? srcChild.asReference()
919            .getDstSnapshotId() : Snapshot.CURRENT_STATE_ID;
920        Quota.Counts oldSrcCounts = Quota.Counts.newInstance();    
921        if (isSrcInSnapshot) {
922          final INodeReference.WithName withName = srcIIP.getINode(-2).asDirectory()
923              .replaceChild4ReferenceWithName(srcChild, srcIIP.getLatestSnapshotId()); 
924          withCount = (INodeReference.WithCount) withName.getReferredINode();
925          srcChild = withName;
926          srcIIP.setLastINode(srcChild);
927          // get the counts before rename
928          withCount.getReferredINode().computeQuotaUsage(oldSrcCounts, true);
929        } else if (srcChildIsReference) {
930          // srcChild is reference but srcChild is not in latest snapshot
931          withCount = (WithCount) srcChild.asReference().getReferredINode();
932        } else {
933          withCount = null;
934        }
935        
936        boolean undoRemoveSrc = true;
937        final long removedSrc = removeLastINode(srcIIP);
938        if (removedSrc == -1) {
939          error = "Failed to rename " + src + " to " + dst
940              + " because the source can not be removed";
941          NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
942              + error);
943          throw new IOException(error);
944        }
945        
946        if (dstParent.getParent() == null) {
947          // src and dst file/dir are in the same directory, and the dstParent has
948          // been replaced when we removed the src. Refresh the dstIIP and
949          // dstParent.
950          dstIIP = rootDir.getINodesInPath4Write(dst, false);
951        }
952        
953        boolean undoRemoveDst = false;
954        INode removedDst = null;
955        try {
956          if (dstInode != null) { // dst exists remove it
957            if (removeLastINode(dstIIP) != -1) {
958              removedDst = dstIIP.getLastINode();
959              undoRemoveDst = true;
960            }
961          }
962          
963          srcChild = srcIIP.getLastINode();
964    
965          final byte[] dstChildName = dstIIP.getLastLocalName();
966          final INode toDst;
967          if (withCount == null) {
968            srcChild.setLocalName(dstChildName);
969            toDst = srcChild;
970          } else {
971            withCount.getReferredINode().setLocalName(dstChildName);
972            int dstSnapshotId = dstIIP.getLatestSnapshotId();
973            final INodeReference.DstReference ref = new INodeReference.DstReference(
974                dstIIP.getINode(-2).asDirectory(), withCount, dstSnapshotId);
975            toDst = ref;
976          }
977    
978          // add src as dst to complete rename
979          if (addLastINodeNoQuotaCheck(dstIIP, toDst)) {
980            undoRemoveSrc = false;
981            if (NameNode.stateChangeLog.isDebugEnabled()) {
982              NameNode.stateChangeLog.debug(
983                  "DIR* FSDirectory.unprotectedRenameTo: " + src
984                  + " is renamed to " + dst);
985            }
986    
987            final INode srcParent = srcIIP.getINode(-2);
988            srcParent.updateModificationTime(timestamp, srcIIP.getLatestSnapshotId());
989            dstParent = dstIIP.getINode(-2);
990            dstParent.updateModificationTime(timestamp, dstIIP.getLatestSnapshotId());
991            // update moved lease with new filename
992            getFSNamesystem().unprotectedChangeLease(src, dst);
993    
994            // Collect the blocks and remove the lease for previous dst
995            long filesDeleted = -1;
996            if (removedDst != null) {
997              undoRemoveDst = false;
998              BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
999              List<INode> removedINodes = new ChunkedArrayList<INode>();
1000              filesDeleted = removedDst.cleanSubtree(Snapshot.CURRENT_STATE_ID,
1001                  dstIIP.getLatestSnapshotId(), collectedBlocks, removedINodes, true)
1002                  .get(Quota.NAMESPACE);
1003              getFSNamesystem().removePathAndBlocks(src, collectedBlocks,
1004                  removedINodes);
1005            }
1006    
1007            if (snapshottableDirs.size() > 0) {
1008              // There are snapshottable directories (without snapshots) to be
1009              // deleted. Need to update the SnapshotManager.
1010              namesystem.removeSnapshottableDirs(snapshottableDirs);
1011            }
1012            
1013            // update the quota usage in src tree
1014            if (isSrcInSnapshot) {
1015              // get the counts after rename
1016              Quota.Counts newSrcCounts = srcChild.computeQuotaUsage(
1017                  Quota.Counts.newInstance(), false);
1018              newSrcCounts.subtract(oldSrcCounts);
1019              srcParent.addSpaceConsumed(newSrcCounts.get(Quota.NAMESPACE),
1020                  newSrcCounts.get(Quota.DISKSPACE), false);
1021            }
1022            
1023            return filesDeleted >= 0;
1024          }
1025        } finally {
1026          if (undoRemoveSrc) {
1027            // Rename failed - restore src
1028            final INodeDirectory srcParent = srcIIP.getINode(-2).asDirectory();
1029            final INode oldSrcChild = srcChild;
1030            // put it back
1031            if (withCount == null) {
1032              srcChild.setLocalName(srcChildName);
1033            } else if (!srcChildIsReference) { // src must be in snapshot
1034              // the withCount node will no longer be used thus no need to update
1035              // its reference number here
1036              final INode originalChild = withCount.getReferredINode();
1037              srcChild = originalChild;
1038              srcChild.setLocalName(srcChildName);
1039            } else {
1040              withCount.removeReference(oldSrcChild.asReference());
1041              final INodeReference originalRef = new INodeReference.DstReference(
1042                  srcParent, withCount, srcRefDstSnapshot);
1043              srcChild = originalRef;
1044              withCount.getReferredINode().setLocalName(srcChildName);
1045            }
1046            
1047            if (srcParent.isWithSnapshot()) {
1048              srcParent.undoRename4ScrParent(oldSrcChild.asReference(), srcChild);
1049            } else {
1050              // srcParent is not an INodeDirectoryWithSnapshot, we only need to add
1051              // the srcChild back
1052              addLastINodeNoQuotaCheck(srcIIP, srcChild);
1053            }
1054          }
1055          if (undoRemoveDst) {
1056            // Rename failed - restore dst
1057            if (dstParent.isDirectory() && dstParent.asDirectory().isWithSnapshot()) {
1058              dstParent.asDirectory().undoRename4DstParent(removedDst,
1059                  dstIIP.getLatestSnapshotId());
1060            } else {
1061              addLastINodeNoQuotaCheck(dstIIP, removedDst);
1062            }
1063            if (removedDst.isReference()) {
1064              final INodeReference removedDstRef = removedDst.asReference();
1065              final INodeReference.WithCount wc = 
1066                  (WithCount) removedDstRef.getReferredINode().asReference();
1067              wc.addReference(removedDstRef);
1068            }
1069          }
1070        }
1071        NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
1072            + "failed to rename " + src + " to " + dst);
1073        throw new IOException("rename from " + src + " to " + dst + " failed.");
1074      }
1075      
1076      /**
1077       * Set file replication
1078       * 
1079       * @param src file name
1080       * @param replication new replication
1081       * @param blockRepls block replications - output parameter
1082       * @return array of file blocks
1083       * @throws QuotaExceededException
1084       * @throws SnapshotAccessControlException 
1085       */
1086      Block[] setReplication(String src, short replication, short[] blockRepls)
1087          throws QuotaExceededException, UnresolvedLinkException,
1088          SnapshotAccessControlException {
1089        waitForReady();
1090        writeLock();
1091        try {
1092          final Block[] fileBlocks = unprotectedSetReplication(
1093              src, replication, blockRepls);
1094          if (fileBlocks != null)  // log replication change
1095            fsImage.getEditLog().logSetReplication(src, replication);
1096          return fileBlocks;
1097        } finally {
1098          writeUnlock();
1099        }
1100      }
1101    
1102      Block[] unprotectedSetReplication(String src, short replication,
1103          short[] blockRepls) throws QuotaExceededException,
1104          UnresolvedLinkException, SnapshotAccessControlException {
1105        assert hasWriteLock();
1106    
1107        final INodesInPath iip = rootDir.getINodesInPath4Write(src, true);
1108        final INode inode = iip.getLastINode();
1109        if (inode == null || !inode.isFile()) {
1110          return null;
1111        }
1112        INodeFile file = inode.asFile();
1113        final short oldBR = file.getBlockReplication();
1114    
1115        // before setFileReplication, check for increasing block replication.
1116        // if replication > oldBR, then newBR == replication.
1117        // if replication < oldBR, we don't know newBR yet. 
1118        if (replication > oldBR) {
1119          long dsDelta = (replication - oldBR)*(file.diskspaceConsumed()/oldBR);
1120          updateCount(iip, 0, dsDelta, true);
1121        }
1122    
1123        file = file.setFileReplication(replication, iip.getLatestSnapshotId(),
1124            inodeMap);
1125        
1126        final short newBR = file.getBlockReplication(); 
1127        // check newBR < oldBR case. 
1128        if (newBR < oldBR) {
1129          long dsDelta = (newBR - oldBR)*(file.diskspaceConsumed()/newBR);
1130          updateCount(iip, 0, dsDelta, true);
1131        }
1132    
1133        if (blockRepls != null) {
1134          blockRepls[0] = oldBR;
1135          blockRepls[1] = newBR;
1136        }
1137        return file.getBlocks();
1138      }
1139    
1140      /**
1141       * @param path the file path
1142       * @return the block size of the file. 
1143       */
1144      long getPreferredBlockSize(String path) throws UnresolvedLinkException,
1145          FileNotFoundException, IOException {
1146        readLock();
1147        try {
1148          return INodeFile.valueOf(rootDir.getNode(path, false), path
1149              ).getPreferredBlockSize();
1150        } finally {
1151          readUnlock();
1152        }
1153      }
1154    
1155      boolean exists(String src) throws UnresolvedLinkException {
1156        src = normalizePath(src);
1157        readLock();
1158        try {
1159          INode inode = rootDir.getNode(src, false);
1160          if (inode == null) {
1161             return false;
1162          }
1163          return !inode.isFile() || inode.asFile().getBlocks() != null;
1164        } finally {
1165          readUnlock();
1166        }
1167      }
1168      
1169      void setPermission(String src, FsPermission permission)
1170          throws FileNotFoundException, UnresolvedLinkException,
1171          QuotaExceededException, SnapshotAccessControlException {
1172        writeLock();
1173        try {
1174          unprotectedSetPermission(src, permission);
1175        } finally {
1176          writeUnlock();
1177        }
1178        fsImage.getEditLog().logSetPermissions(src, permission);
1179      }
1180      
1181      void unprotectedSetPermission(String src, FsPermission permissions)
1182          throws FileNotFoundException, UnresolvedLinkException,
1183          QuotaExceededException, SnapshotAccessControlException {
1184        assert hasWriteLock();
1185        final INodesInPath inodesInPath = rootDir.getINodesInPath4Write(src, true);
1186        final INode inode = inodesInPath.getLastINode();
1187        if (inode == null) {
1188          throw new FileNotFoundException("File does not exist: " + src);
1189        }
1190        int snapshotId = inodesInPath.getLatestSnapshotId();
1191        inode.setPermission(permissions, snapshotId);
1192      }
1193    
1194      void setOwner(String src, String username, String groupname)
1195          throws FileNotFoundException, UnresolvedLinkException,
1196          QuotaExceededException, SnapshotAccessControlException {
1197        writeLock();
1198        try {
1199          unprotectedSetOwner(src, username, groupname);
1200        } finally {
1201          writeUnlock();
1202        }
1203        fsImage.getEditLog().logSetOwner(src, username, groupname);
1204      }
1205    
1206      void unprotectedSetOwner(String src, String username, String groupname)
1207          throws FileNotFoundException, UnresolvedLinkException,
1208          QuotaExceededException, SnapshotAccessControlException {
1209        assert hasWriteLock();
1210        final INodesInPath inodesInPath = rootDir.getINodesInPath4Write(src, true);
1211        INode inode = inodesInPath.getLastINode();
1212        if (inode == null) {
1213          throw new FileNotFoundException("File does not exist: " + src);
1214        }
1215        if (username != null) {
1216          inode = inode.setUser(username, inodesInPath.getLatestSnapshotId());
1217        }
1218        if (groupname != null) {
1219          inode.setGroup(groupname, inodesInPath.getLatestSnapshotId());
1220        }
1221      }
1222    
1223      /**
1224       * Concat all the blocks from srcs to trg and delete the srcs files
1225       */
1226      void concat(String target, String [] srcs, boolean supportRetryCache) 
1227          throws UnresolvedLinkException, QuotaExceededException,
1228          SnapshotAccessControlException, SnapshotException {
1229        writeLock();
1230        try {
1231          // actual move
1232          waitForReady();
1233          long timestamp = now();
1234          unprotectedConcat(target, srcs, timestamp);
1235          // do the commit
1236          fsImage.getEditLog().logConcat(target, srcs, timestamp, 
1237              supportRetryCache);
1238        } finally {
1239          writeUnlock();
1240        }
1241      }
1242    
1243      /**
1244       * Concat all the blocks from srcs to trg and delete the srcs files
1245       * @param target target file to move the blocks to
1246       * @param srcs list of file to move the blocks from
1247       */
1248      void unprotectedConcat(String target, String [] srcs, long timestamp) 
1249          throws UnresolvedLinkException, QuotaExceededException,
1250          SnapshotAccessControlException, SnapshotException {
1251        assert hasWriteLock();
1252        if (NameNode.stateChangeLog.isDebugEnabled()) {
1253          NameNode.stateChangeLog.debug("DIR* FSNamesystem.concat to "+target);
1254        }
1255        // do the move
1256        
1257        final INodesInPath trgIIP = rootDir.getINodesInPath4Write(target, true);
1258        final INode[] trgINodes = trgIIP.getINodes();
1259        final INodeFile trgInode = trgIIP.getLastINode().asFile();
1260        INodeDirectory trgParent = trgINodes[trgINodes.length-2].asDirectory();
1261        final int trgLatestSnapshot = trgIIP.getLatestSnapshotId();
1262        
1263        final INodeFile [] allSrcInodes = new INodeFile[srcs.length];
1264        for(int i = 0; i < srcs.length; i++) {
1265          final INodesInPath iip = getINodesInPath4Write(srcs[i]);
1266          final int latest = iip.getLatestSnapshotId();
1267          final INode inode = iip.getLastINode();
1268    
1269          // check if the file in the latest snapshot
1270          if (inode.isInLatestSnapshot(latest)) {
1271            throw new SnapshotException("Concat: the source file " + srcs[i]
1272                + " is in snapshot " + latest);
1273          }
1274    
1275          // check if the file has other references.
1276          if (inode.isReference() && ((INodeReference.WithCount)
1277              inode.asReference().getReferredINode()).getReferenceCount() > 1) {
1278            throw new SnapshotException("Concat: the source file " + srcs[i]
1279                + " is referred by some other reference in some snapshot.");
1280          }
1281    
1282          allSrcInodes[i] = inode.asFile();
1283        }
1284        trgInode.concatBlocks(allSrcInodes);
1285        
1286        // since we are in the same dir - we can use same parent to remove files
1287        int count = 0;
1288        for(INodeFile nodeToRemove: allSrcInodes) {
1289          if(nodeToRemove == null) continue;
1290          
1291          nodeToRemove.setBlocks(null);
1292          trgParent.removeChild(nodeToRemove, trgLatestSnapshot);
1293          inodeMap.remove(nodeToRemove);
1294          count++;
1295        }
1296        
1297        // update inodeMap
1298        removeFromInodeMap(Arrays.asList(allSrcInodes));
1299        
1300        trgInode.setModificationTime(timestamp, trgLatestSnapshot);
1301        trgParent.updateModificationTime(timestamp, trgLatestSnapshot);
1302        // update quota on the parent directory ('count' files removed, 0 space)
1303        unprotectedUpdateCount(trgIIP, trgINodes.length-1, -count, 0);
1304      }
1305    
1306      /**
1307       * Delete the target directory and collect the blocks under it
1308       * 
1309       * @param src Path of a directory to delete
1310       * @param collectedBlocks Blocks under the deleted directory
1311       * @param removedINodes INodes that should be removed from {@link #inodeMap}
1312       * @param logRetryCache Whether to record RPC IDs in editlog to support retry
1313       *                      cache rebuilding.
1314       * @return true on successful deletion; else false
1315       */
1316      boolean delete(String src, BlocksMapUpdateInfo collectedBlocks,
1317          List<INode> removedINodes, boolean logRetryCache) throws IOException {
1318        if (NameNode.stateChangeLog.isDebugEnabled()) {
1319          NameNode.stateChangeLog.debug("DIR* FSDirectory.delete: " + src);
1320        }
1321        waitForReady();
1322        long now = now();
1323        final long filesRemoved;
1324        writeLock();
1325        try {
1326          final INodesInPath inodesInPath = rootDir.getINodesInPath4Write(
1327              normalizePath(src), false);
1328          if (!deleteAllowed(inodesInPath, src) ) {
1329            filesRemoved = -1;
1330          } else {
1331            List<INodeDirectorySnapshottable> snapshottableDirs = 
1332                new ArrayList<INodeDirectorySnapshottable>();
1333            checkSnapshot(inodesInPath.getLastINode(), snapshottableDirs);
1334            filesRemoved = unprotectedDelete(inodesInPath, collectedBlocks,
1335                removedINodes, now);
1336            namesystem.removeSnapshottableDirs(snapshottableDirs);
1337          }
1338        } finally {
1339          writeUnlock();
1340        }
1341        if (filesRemoved < 0) {
1342          return false;
1343        }
1344        fsImage.getEditLog().logDelete(src, now, logRetryCache);
1345        incrDeletedFileCount(filesRemoved);
1346        // Blocks/INodes will be handled later by the caller of this method
1347        getFSNamesystem().removePathAndBlocks(src, null, null);
1348        return true;
1349      }
1350      
1351      private static boolean deleteAllowed(final INodesInPath iip,
1352          final String src) {
1353        final INode[] inodes = iip.getINodes(); 
1354        if (inodes == null || inodes.length == 0
1355            || inodes[inodes.length - 1] == null) {
1356          if(NameNode.stateChangeLog.isDebugEnabled()) {
1357            NameNode.stateChangeLog.debug("DIR* FSDirectory.unprotectedDelete: "
1358                + "failed to remove " + src + " because it does not exist");
1359          }
1360          return false;
1361        } else if (inodes.length == 1) { // src is the root
1362          NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedDelete: "
1363              + "failed to remove " + src
1364              + " because the root is not allowed to be deleted");
1365          return false;
1366        }
1367        return true;
1368      }
1369      
1370      /**
1371       * @return true if the path is a non-empty directory; otherwise, return false.
1372       */
1373      boolean isNonEmptyDirectory(String path) throws UnresolvedLinkException {
1374        readLock();
1375        try {
1376          final INodesInPath inodesInPath = rootDir.getLastINodeInPath(path, false);
1377          final INode inode = inodesInPath.getINode(0);
1378          if (inode == null || !inode.isDirectory()) {
1379            //not found or not a directory
1380            return false;
1381          }
1382          final int s = inodesInPath.getPathSnapshotId();
1383          return !inode.asDirectory().getChildrenList(s).isEmpty();
1384        } finally {
1385          readUnlock();
1386        }
1387      }
1388    
1389      /**
1390       * Delete a path from the name space
1391       * Update the count at each ancestor directory with quota
1392       * <br>
1393       * Note: This is to be used by {@link FSEditLog} only.
1394       * <br>
1395       * @param src a string representation of a path to an inode
1396       * @param mtime the time the inode is removed
1397       * @throws SnapshotAccessControlException if path is in RO snapshot
1398       */
1399      void unprotectedDelete(String src, long mtime) throws UnresolvedLinkException,
1400          QuotaExceededException, SnapshotAccessControlException, IOException {
1401        assert hasWriteLock();
1402        BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
1403        List<INode> removedINodes = new ChunkedArrayList<INode>();
1404    
1405        final INodesInPath inodesInPath = rootDir.getINodesInPath4Write(
1406            normalizePath(src), false);
1407        long filesRemoved = -1;
1408        if (deleteAllowed(inodesInPath, src)) {
1409          List<INodeDirectorySnapshottable> snapshottableDirs = 
1410              new ArrayList<INodeDirectorySnapshottable>();
1411          checkSnapshot(inodesInPath.getLastINode(), snapshottableDirs);
1412          filesRemoved = unprotectedDelete(inodesInPath, collectedBlocks,
1413              removedINodes, mtime);
1414          namesystem.removeSnapshottableDirs(snapshottableDirs); 
1415        }
1416    
1417        if (filesRemoved >= 0) {
1418          getFSNamesystem().removePathAndBlocks(src, collectedBlocks, 
1419              removedINodes);
1420        }
1421      }
1422      
1423      /**
1424       * Delete a path from the name space
1425       * Update the count at each ancestor directory with quota
1426       * @param iip the inodes resolved from the path
1427       * @param collectedBlocks blocks collected from the deleted path
1428       * @param removedINodes inodes that should be removed from {@link #inodeMap}
1429       * @param mtime the time the inode is removed
1430       * @return the number of inodes deleted; 0 if no inodes are deleted.
1431       */ 
1432      long unprotectedDelete(INodesInPath iip, BlocksMapUpdateInfo collectedBlocks,
1433          List<INode> removedINodes, long mtime) throws QuotaExceededException {
1434        assert hasWriteLock();
1435    
1436        // check if target node exists
1437        INode targetNode = iip.getLastINode();
1438        if (targetNode == null) {
1439          return -1;
1440        }
1441    
1442        // record modification
1443        final int latestSnapshot = iip.getLatestSnapshotId();
1444        targetNode = targetNode.recordModification(latestSnapshot);
1445        iip.setLastINode(targetNode);
1446    
1447        // Remove the node from the namespace
1448        long removed = removeLastINode(iip);
1449        if (removed == -1) {
1450          return -1;
1451        }
1452    
1453        // set the parent's modification time
1454        final INodeDirectory parent = targetNode.getParent();
1455        parent.updateModificationTime(mtime, latestSnapshot);
1456        if (removed == 0) {
1457          return 0;
1458        }
1459        
1460        // collect block
1461        if (!targetNode.isInLatestSnapshot(latestSnapshot)) {
1462          targetNode.destroyAndCollectBlocks(collectedBlocks, removedINodes);
1463        } else {
1464          Quota.Counts counts = targetNode.cleanSubtree(Snapshot.CURRENT_STATE_ID,
1465              latestSnapshot, collectedBlocks, removedINodes, true);
1466          parent.addSpaceConsumed(-counts.get(Quota.NAMESPACE),
1467              -counts.get(Quota.DISKSPACE), true);
1468          removed = counts.get(Quota.NAMESPACE);
1469        }
1470        if (NameNode.stateChangeLog.isDebugEnabled()) {
1471          NameNode.stateChangeLog.debug("DIR* FSDirectory.unprotectedDelete: "
1472              + targetNode.getFullPathName() + " is removed");
1473        }
1474        return removed;
1475      }
1476      
1477      /**
1478       * Check if the given INode (or one of its descendants) is snapshottable and
1479       * already has snapshots.
1480       * 
1481       * @param target The given INode
1482       * @param snapshottableDirs The list of directories that are snapshottable 
1483       *                          but do not have snapshots yet
1484       */
1485      private static void checkSnapshot(INode target,
1486          List<INodeDirectorySnapshottable> snapshottableDirs) throws IOException {
1487        if (target.isDirectory()) {
1488          INodeDirectory targetDir = target.asDirectory();
1489          if (targetDir.isSnapshottable()) {
1490            INodeDirectorySnapshottable ssTargetDir = 
1491                (INodeDirectorySnapshottable) targetDir;
1492            if (ssTargetDir.getNumSnapshots() > 0) {
1493              throw new IOException("The directory " + ssTargetDir.getFullPathName()
1494                  + " cannot be deleted since " + ssTargetDir.getFullPathName()
1495                  + " is snapshottable and already has snapshots");
1496            } else {
1497              if (snapshottableDirs != null) {
1498                snapshottableDirs.add(ssTargetDir);
1499              }
1500            }
1501          } 
1502          for (INode child : targetDir.getChildrenList(Snapshot.CURRENT_STATE_ID)) {
1503            checkSnapshot(child, snapshottableDirs);
1504          }
1505        }
1506      }
1507    
1508      /**
1509       * Get a partial listing of the indicated directory
1510       *
1511       * We will stop when any of the following conditions is met:
1512       * 1) this.lsLimit files have been added
1513       * 2) needLocation is true AND enough files have been added such
1514       * that at least this.lsLimit block locations are in the response
1515       *
1516       * @param src the directory name
1517       * @param startAfter the name to start listing after
1518       * @param needLocation if block locations are returned
1519       * @return a partial listing starting after startAfter
1520       */
1521      DirectoryListing getListing(String src, byte[] startAfter,
1522          boolean needLocation) throws UnresolvedLinkException, IOException {
1523        String srcs = normalizePath(src);
1524    
1525        readLock();
1526        try {
1527          if (srcs.endsWith(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR)) {
1528            return getSnapshotsListing(srcs, startAfter);
1529          }
1530          final INodesInPath inodesInPath = rootDir.getLastINodeInPath(srcs, true);
1531          final int snapshot = inodesInPath.getPathSnapshotId();
1532          final INode targetNode = inodesInPath.getINode(0);
1533          if (targetNode == null)
1534            return null;
1535          
1536          if (!targetNode.isDirectory()) {
1537            return new DirectoryListing(
1538                new HdfsFileStatus[]{createFileStatus(HdfsFileStatus.EMPTY_NAME,
1539                    targetNode, needLocation, snapshot)}, 0);
1540          }
1541    
1542          final INodeDirectory dirInode = targetNode.asDirectory();
1543          final ReadOnlyList<INode> contents = dirInode.getChildrenList(snapshot);
1544          int startChild = INodeDirectory.nextChild(contents, startAfter);
1545          int totalNumChildren = contents.size();
1546          int numOfListing = Math.min(totalNumChildren-startChild, this.lsLimit);
1547          int locationBudget = this.lsLimit;
1548          int listingCnt = 0;
1549          HdfsFileStatus listing[] = new HdfsFileStatus[numOfListing];
1550          for (int i=0; i<numOfListing && locationBudget>0; i++) {
1551            INode cur = contents.get(startChild+i);
1552            listing[i] = createFileStatus(cur.getLocalNameBytes(), cur,
1553                needLocation, snapshot);
1554            listingCnt++;
1555            if (needLocation) {
1556                // Once we  hit lsLimit locations, stop.
1557                // This helps to prevent excessively large response payloads.
1558                // Approximate #locations with locatedBlockCount() * repl_factor
1559                LocatedBlocks blks = 
1560                    ((HdfsLocatedFileStatus)listing[i]).getBlockLocations();
1561                locationBudget -= (blks == null) ? 0 :
1562                   blks.locatedBlockCount() * listing[i].getReplication();
1563            }
1564          }
1565          // truncate return array if necessary
1566          if (listingCnt < numOfListing) {
1567              listing = Arrays.copyOf(listing, listingCnt);
1568          }
1569          return new DirectoryListing(
1570              listing, totalNumChildren-startChild-listingCnt);
1571        } finally {
1572          readUnlock();
1573        }
1574      }
1575      
1576      /**
1577       * Get a listing of all the snapshots of a snapshottable directory
1578       */
1579      private DirectoryListing getSnapshotsListing(String src, byte[] startAfter)
1580          throws UnresolvedLinkException, IOException {
1581        Preconditions.checkState(hasReadLock());
1582        Preconditions.checkArgument(
1583            src.endsWith(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR), 
1584            "%s does not end with %s", src, HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR);
1585        
1586        final String dirPath = normalizePath(src.substring(0,
1587            src.length() - HdfsConstants.DOT_SNAPSHOT_DIR.length()));
1588        
1589        final INode node = this.getINode(dirPath);
1590        final INodeDirectorySnapshottable dirNode = INodeDirectorySnapshottable
1591            .valueOf(node, dirPath);
1592        final ReadOnlyList<Snapshot> snapshots = dirNode.getSnapshotList();
1593        int skipSize = ReadOnlyList.Util.binarySearch(snapshots, startAfter);
1594        skipSize = skipSize < 0 ? -skipSize - 1 : skipSize + 1;
1595        int numOfListing = Math.min(snapshots.size() - skipSize, this.lsLimit);
1596        final HdfsFileStatus listing[] = new HdfsFileStatus[numOfListing];
1597        for (int i = 0; i < numOfListing; i++) {
1598          Root sRoot = snapshots.get(i + skipSize).getRoot();
1599          listing[i] = createFileStatus(sRoot.getLocalNameBytes(), sRoot,
1600              Snapshot.CURRENT_STATE_ID);
1601        }
1602        return new DirectoryListing(
1603            listing, snapshots.size() - skipSize - numOfListing);
1604      }
1605    
1606      /** Get the file info for a specific file.
1607       * @param src The string representation of the path to the file
1608       * @param resolveLink whether to throw UnresolvedLinkException 
1609       * @return object containing information regarding the file
1610       *         or null if file not found
1611       */
1612      HdfsFileStatus getFileInfo(String src, boolean resolveLink) 
1613          throws UnresolvedLinkException {
1614        String srcs = normalizePath(src);
1615        readLock();
1616        try {
1617          if (srcs.endsWith(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR)) {
1618            return getFileInfo4DotSnapshot(srcs);
1619          }
1620          final INodesInPath inodesInPath = rootDir.getLastINodeInPath(srcs, resolveLink);
1621          final INode i = inodesInPath.getINode(0);
1622          return i == null? null: createFileStatus(HdfsFileStatus.EMPTY_NAME, i,
1623              inodesInPath.getPathSnapshotId());
1624        } finally {
1625          readUnlock();
1626        }
1627      }
1628      
1629      /**
1630       * Currently we only support "ls /xxx/.snapshot" which will return all the
1631       * snapshots of a directory. The FSCommand Ls will first call getFileInfo to
1632       * make sure the file/directory exists (before the real getListing call).
1633       * Since we do not have a real INode for ".snapshot", we return an empty
1634       * non-null HdfsFileStatus here.
1635       */
1636      private HdfsFileStatus getFileInfo4DotSnapshot(String src)
1637          throws UnresolvedLinkException {
1638        if (getINode4DotSnapshot(src) != null) {
1639          return new HdfsFileStatus(0, true, 0, 0, 0, 0, null, null, null, null,
1640              HdfsFileStatus.EMPTY_NAME, -1L, 0);
1641        }
1642        return null;
1643      }
1644    
1645      private INode getINode4DotSnapshot(String src) throws UnresolvedLinkException {
1646        Preconditions.checkArgument(
1647            src.endsWith(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR), 
1648            "%s does not end with %s", src, HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR);
1649        
1650        final String dirPath = normalizePath(src.substring(0,
1651            src.length() - HdfsConstants.DOT_SNAPSHOT_DIR.length()));
1652        
1653        final INode node = this.getINode(dirPath);
1654        if (node != null
1655            && node.isDirectory()
1656            && node.asDirectory() instanceof INodeDirectorySnapshottable) {
1657          return node;
1658        }
1659        return null;
1660      }
1661    
1662      /**
1663       * Get the blocks associated with the file.
1664       */
1665      Block[] getFileBlocks(String src) throws UnresolvedLinkException {
1666        waitForReady();
1667        readLock();
1668        try {
1669          final INode i = rootDir.getNode(src, false);
1670          return i != null && i.isFile()? i.asFile().getBlocks(): null;
1671        } finally {
1672          readUnlock();
1673        }
1674      }
1675    
1676    
1677      INodesInPath getExistingPathINodes(byte[][] components)
1678          throws UnresolvedLinkException {
1679        return INodesInPath.resolve(rootDir, components);
1680      }
1681    
1682      /**
1683       * Get {@link INode} associated with the file / directory.
1684       */
1685      public INode getINode(String src) throws UnresolvedLinkException {
1686        return getLastINodeInPath(src).getINode(0);
1687      }
1688    
1689      /**
1690       * Get {@link INode} associated with the file / directory.
1691       */
1692      public INodesInPath getLastINodeInPath(String src)
1693           throws UnresolvedLinkException {
1694        readLock();
1695        try {
1696          return rootDir.getLastINodeInPath(src, true);
1697        } finally {
1698          readUnlock();
1699        }
1700      }
1701      
1702      /**
1703       * Get {@link INode} associated with the file / directory.
1704       */
1705      public INodesInPath getINodesInPath4Write(String src
1706          ) throws UnresolvedLinkException, SnapshotAccessControlException {
1707        readLock();
1708        try {
1709          return rootDir.getINodesInPath4Write(src, true);
1710        } finally {
1711          readUnlock();
1712        }
1713      }
1714    
1715      /**
1716       * Get {@link INode} associated with the file / directory.
1717       * @throws SnapshotAccessControlException if path is in RO snapshot
1718       */
1719      public INode getINode4Write(String src) throws UnresolvedLinkException,
1720          SnapshotAccessControlException {
1721        readLock();
1722        try {
1723          return rootDir.getINode4Write(src, true);
1724        } finally {
1725          readUnlock();
1726        }
1727      }
1728    
1729      /** 
1730       * Check whether the filepath could be created
1731       * @throws SnapshotAccessControlException if path is in RO snapshot
1732       */
1733      boolean isValidToCreate(String src) throws UnresolvedLinkException,
1734          SnapshotAccessControlException {
1735        String srcs = normalizePath(src);
1736        readLock();
1737        try {
1738          if (srcs.startsWith("/") && !srcs.endsWith("/")
1739              && rootDir.getINode4Write(srcs, false) == null) {
1740            return true;
1741          } else {
1742            return false;
1743          }
1744        } finally {
1745          readUnlock();
1746        }
1747      }
1748    
1749      /**
1750       * Check whether the path specifies a directory
1751       */
1752      boolean isDir(String src) throws UnresolvedLinkException {
1753        src = normalizePath(src);
1754        readLock();
1755        try {
1756          INode node = rootDir.getNode(src, false);
1757          return node != null && node.isDirectory();
1758        } finally {
1759          readUnlock();
1760        }
1761      }
1762      
1763      /**
1764       * Check whether the path specifies a directory
1765       * @throws SnapshotAccessControlException if path is in RO snapshot
1766       */
1767      boolean isDirMutable(String src) throws UnresolvedLinkException,
1768          SnapshotAccessControlException {
1769        src = normalizePath(src);
1770        readLock();
1771        try {
1772          INode node = rootDir.getINode4Write(src, false);
1773          return node != null && node.isDirectory();
1774        } finally {
1775          readUnlock();
1776        }
1777      }
1778    
1779      /** Updates namespace and diskspace consumed for all
1780       * directories until the parent directory of file represented by path.
1781       * 
1782       * @param path path for the file.
1783       * @param nsDelta the delta change of namespace
1784       * @param dsDelta the delta change of diskspace
1785       * @throws QuotaExceededException if the new count violates any quota limit
1786       * @throws FileNotFoundException if path does not exist.
1787       */
1788      void updateSpaceConsumed(String path, long nsDelta, long dsDelta)
1789          throws QuotaExceededException, FileNotFoundException,
1790              UnresolvedLinkException, SnapshotAccessControlException {
1791        writeLock();
1792        try {
1793          final INodesInPath iip = rootDir.getINodesInPath4Write(path, false);
1794          if (iip.getLastINode() == null) {
1795            throw new FileNotFoundException("Path not found: " + path);
1796          }
1797          updateCount(iip, nsDelta, dsDelta, true);
1798        } finally {
1799          writeUnlock();
1800        }
1801      }
1802      
1803      private void updateCount(INodesInPath iip, long nsDelta, long dsDelta,
1804          boolean checkQuota) throws QuotaExceededException {
1805        updateCount(iip, iip.getINodes().length - 1, nsDelta, dsDelta, checkQuota);
1806      }
1807    
1808      /** update count of each inode with quota
1809       * 
1810       * @param iip inodes in a path
1811       * @param numOfINodes the number of inodes to update starting from index 0
1812       * @param nsDelta the delta change of namespace
1813       * @param dsDelta the delta change of diskspace
1814       * @param checkQuota if true then check if quota is exceeded
1815       * @throws QuotaExceededException if the new count violates any quota limit
1816       */
1817      private void updateCount(INodesInPath iip, int numOfINodes, 
1818                               long nsDelta, long dsDelta, boolean checkQuota)
1819                               throws QuotaExceededException {
1820        assert hasWriteLock();
1821        if (!ready) {
1822          //still initializing. do not check or update quotas.
1823          return;
1824        }
1825        final INode[] inodes = iip.getINodes();
1826        if (numOfINodes > inodes.length) {
1827          numOfINodes = inodes.length;
1828        }
1829        if (checkQuota) {
1830          verifyQuota(inodes, numOfINodes, nsDelta, dsDelta, null);
1831        }
1832        unprotectedUpdateCount(iip, numOfINodes, nsDelta, dsDelta);
1833      }
1834      
1835      /** 
1836       * update quota of each inode and check to see if quota is exceeded. 
1837       * See {@link #updateCount(INode[], int, long, long, boolean)}
1838       */ 
1839      private void updateCountNoQuotaCheck(INodesInPath inodesInPath,
1840          int numOfINodes, long nsDelta, long dsDelta) {
1841        assert hasWriteLock();
1842        try {
1843          updateCount(inodesInPath, numOfINodes, nsDelta, dsDelta, false);
1844        } catch (QuotaExceededException e) {
1845          NameNode.LOG.error("BUG: unexpected exception ", e);
1846        }
1847      }
1848      
1849      /**
1850       * updates quota without verification
1851       * callers responsibility is to make sure quota is not exceeded
1852       */
1853      private static void unprotectedUpdateCount(INodesInPath inodesInPath,
1854          int numOfINodes, long nsDelta, long dsDelta) {
1855        final INode[] inodes = inodesInPath.getINodes();
1856        for(int i=0; i < numOfINodes; i++) {
1857          if (inodes[i].isQuotaSet()) { // a directory with quota
1858            inodes[i].asDirectory().getDirectoryWithQuotaFeature()
1859                .addSpaceConsumed2Cache(nsDelta, dsDelta);
1860          }
1861        }
1862      }
1863      
1864      /** Return the name of the path represented by inodes at [0, pos] */
1865      static String getFullPathName(INode[] inodes, int pos) {
1866        StringBuilder fullPathName = new StringBuilder();
1867        if (inodes[0].isRoot()) {
1868          if (pos == 0) return Path.SEPARATOR;
1869        } else {
1870          fullPathName.append(inodes[0].getLocalName());
1871        }
1872        
1873        for (int i=1; i<=pos; i++) {
1874          fullPathName.append(Path.SEPARATOR_CHAR).append(inodes[i].getLocalName());
1875        }
1876        return fullPathName.toString();
1877      }
1878    
1879      /**
1880       * @return the relative path of an inode from one of its ancestors,
1881       *         represented by an array of inodes.
1882       */
1883      private static INode[] getRelativePathINodes(INode inode, INode ancestor) {
1884        // calculate the depth of this inode from the ancestor
1885        int depth = 0;
1886        for (INode i = inode; i != null && !i.equals(ancestor); i = i.getParent()) {
1887          depth++;
1888        }
1889        INode[] inodes = new INode[depth];
1890    
1891        // fill up the inodes in the path from this inode to root
1892        for (int i = 0; i < depth; i++) {
1893          if (inode == null) {
1894            NameNode.stateChangeLog.warn("Could not get full path."
1895                + " Corresponding file might have deleted already.");
1896            return null;
1897          }
1898          inodes[depth-i-1] = inode;
1899          inode = inode.getParent();
1900        }
1901        return inodes;
1902      }
1903      
1904      private static INode[] getFullPathINodes(INode inode) {
1905        return getRelativePathINodes(inode, null);
1906      }
1907      
1908      /** Return the full path name of the specified inode */
1909      static String getFullPathName(INode inode) {
1910        INode[] inodes = getFullPathINodes(inode);
1911        // inodes can be null only when its called without holding lock
1912        return inodes == null ? "" : getFullPathName(inodes, inodes.length - 1);
1913      }
1914      
1915      /**
1916       * Create a directory 
1917       * If ancestor directories do not exist, automatically create them.
1918    
1919       * @param src string representation of the path to the directory
1920       * @param permissions the permission of the directory
1921       * @param isAutocreate if the permission of the directory should inherit
1922       *                          from its parent or not. u+wx is implicitly added to
1923       *                          the automatically created directories, and to the
1924       *                          given directory if inheritPermission is true
1925       * @param now creation time
1926       * @return true if the operation succeeds false otherwise
1927       * @throws FileNotFoundException if an ancestor or itself is a file
1928       * @throws QuotaExceededException if directory creation violates 
1929       *                                any quota limit
1930       * @throws UnresolvedLinkException if a symlink is encountered in src.                      
1931       * @throws SnapshotAccessControlException if path is in RO snapshot
1932       */
1933      boolean mkdirs(String src, PermissionStatus permissions,
1934          boolean inheritPermission, long now)
1935          throws FileAlreadyExistsException, QuotaExceededException, 
1936                 UnresolvedLinkException, SnapshotAccessControlException,
1937                 AclException {
1938        src = normalizePath(src);
1939        String[] names = INode.getPathNames(src);
1940        byte[][] components = INode.getPathComponents(names);
1941        final int lastInodeIndex = components.length - 1;
1942    
1943        writeLock();
1944        try {
1945          INodesInPath iip = getExistingPathINodes(components);
1946          if (iip.isSnapshot()) {
1947            throw new SnapshotAccessControlException(
1948                "Modification on RO snapshot is disallowed");
1949          }
1950          INode[] inodes = iip.getINodes();
1951    
1952          // find the index of the first null in inodes[]
1953          StringBuilder pathbuilder = new StringBuilder();
1954          int i = 1;
1955          for(; i < inodes.length && inodes[i] != null; i++) {
1956            pathbuilder.append(Path.SEPARATOR).append(names[i]);
1957            if (!inodes[i].isDirectory()) {
1958              throw new FileAlreadyExistsException("Parent path is not a directory: "
1959                  + pathbuilder+ " "+inodes[i].getLocalName());
1960            }
1961          }
1962    
1963          // default to creating parent dirs with the given perms
1964          PermissionStatus parentPermissions = permissions;
1965    
1966          // if not inheriting and it's the last inode, there's no use in
1967          // computing perms that won't be used
1968          if (inheritPermission || (i < lastInodeIndex)) {
1969            // if inheriting (ie. creating a file or symlink), use the parent dir,
1970            // else the supplied permissions
1971            // NOTE: the permissions of the auto-created directories violate posix
1972            FsPermission parentFsPerm = inheritPermission
1973                ? inodes[i-1].getFsPermission() : permissions.getPermission();
1974            
1975            // ensure that the permissions allow user write+execute
1976            if (!parentFsPerm.getUserAction().implies(FsAction.WRITE_EXECUTE)) {
1977              parentFsPerm = new FsPermission(
1978                  parentFsPerm.getUserAction().or(FsAction.WRITE_EXECUTE),
1979                  parentFsPerm.getGroupAction(),
1980                  parentFsPerm.getOtherAction()
1981              );
1982            }
1983            
1984            if (!parentPermissions.getPermission().equals(parentFsPerm)) {
1985              parentPermissions = new PermissionStatus(
1986                  parentPermissions.getUserName(),
1987                  parentPermissions.getGroupName(),
1988                  parentFsPerm
1989              );
1990              // when inheriting, use same perms for entire path
1991              if (inheritPermission) permissions = parentPermissions;
1992            }
1993          }
1994          
1995          // create directories beginning from the first null index
1996          for(; i < inodes.length; i++) {
1997            pathbuilder.append(Path.SEPARATOR + names[i]);
1998            unprotectedMkdir(namesystem.allocateNewInodeId(), iip, i,
1999                components[i], (i < lastInodeIndex) ? parentPermissions
2000                    : permissions, null, now);
2001            if (inodes[i] == null) {
2002              return false;
2003            }
2004            // Directory creation also count towards FilesCreated
2005            // to match count of FilesDeleted metric.
2006            if (getFSNamesystem() != null)
2007              NameNode.getNameNodeMetrics().incrFilesCreated();
2008    
2009            final String cur = pathbuilder.toString();
2010            fsImage.getEditLog().logMkDir(cur, inodes[i]);
2011            if(NameNode.stateChangeLog.isDebugEnabled()) {
2012              NameNode.stateChangeLog.debug(
2013                  "DIR* FSDirectory.mkdirs: created directory " + cur);
2014            }
2015          }
2016        } finally {
2017          writeUnlock();
2018        }
2019        return true;
2020      }
2021    
2022      INode unprotectedMkdir(long inodeId, String src, PermissionStatus permissions,
2023                              List<AclEntry> aclEntries, long timestamp)
2024          throws QuotaExceededException, UnresolvedLinkException, AclException {
2025        assert hasWriteLock();
2026        byte[][] components = INode.getPathComponents(src);
2027        INodesInPath iip = getExistingPathINodes(components);
2028        INode[] inodes = iip.getINodes();
2029        final int pos = inodes.length - 1;
2030        unprotectedMkdir(inodeId, iip, pos, components[pos], permissions, aclEntries,
2031            timestamp);
2032        return inodes[pos];
2033      }
2034    
2035      /** create a directory at index pos.
2036       * The parent path to the directory is at [0, pos-1].
2037       * All ancestors exist. Newly created one stored at index pos.
2038       */
2039      private void unprotectedMkdir(long inodeId, INodesInPath inodesInPath,
2040          int pos, byte[] name, PermissionStatus permission,
2041          List<AclEntry> aclEntries, long timestamp)
2042          throws QuotaExceededException, AclException {
2043        assert hasWriteLock();
2044        final INodeDirectory dir = new INodeDirectory(inodeId, name, permission,
2045            timestamp);
2046        if (addChild(inodesInPath, pos, dir, true)) {
2047          if (aclEntries != null) {
2048            AclStorage.updateINodeAcl(dir, aclEntries, Snapshot.CURRENT_STATE_ID);
2049          }
2050          inodesInPath.setINode(pos, dir);
2051        }
2052      }
2053      
2054      /**
2055       * Add the given child to the namespace.
2056       * @param src The full path name of the child node.
2057       * @throw QuotaExceededException is thrown if it violates quota limit
2058       */
2059      private boolean addINode(String src, INode child
2060          ) throws QuotaExceededException, UnresolvedLinkException {
2061        byte[][] components = INode.getPathComponents(src);
2062        child.setLocalName(components[components.length-1]);
2063        cacheName(child);
2064        writeLock();
2065        try {
2066          return addLastINode(getExistingPathINodes(components), child, true);
2067        } finally {
2068          writeUnlock();
2069        }
2070      }
2071    
2072      /**
2073       * Verify quota for adding or moving a new INode with required 
2074       * namespace and diskspace to a given position.
2075       *  
2076       * @param inodes INodes corresponding to a path
2077       * @param pos position where a new INode will be added
2078       * @param nsDelta needed namespace
2079       * @param dsDelta needed diskspace
2080       * @param commonAncestor Last node in inodes array that is a common ancestor
2081       *          for a INode that is being moved from one location to the other.
2082       *          Pass null if a node is not being moved.
2083       * @throws QuotaExceededException if quota limit is exceeded.
2084       */
2085      private static void verifyQuota(INode[] inodes, int pos, long nsDelta,
2086          long dsDelta, INode commonAncestor) throws QuotaExceededException {
2087        if (nsDelta <= 0 && dsDelta <= 0) {
2088          // if quota is being freed or not being consumed
2089          return;
2090        }
2091    
2092        // check existing components in the path
2093        for(int i = (pos > inodes.length? inodes.length: pos) - 1; i >= 0; i--) {
2094          if (commonAncestor == inodes[i]) {
2095            // Stop checking for quota when common ancestor is reached
2096            return;
2097          }
2098          final DirectoryWithQuotaFeature q
2099              = inodes[i].asDirectory().getDirectoryWithQuotaFeature();
2100          if (q != null) { // a directory with quota
2101            try {
2102              q.verifyQuota(nsDelta, dsDelta);
2103            } catch (QuotaExceededException e) {
2104              e.setPathName(getFullPathName(inodes, i));
2105              throw e;
2106            }
2107          }
2108        }
2109      }
2110      
2111      /**
2112       * Verify quota for rename operation where srcInodes[srcInodes.length-1] moves
2113       * dstInodes[dstInodes.length-1]
2114       * 
2115       * @param src directory from where node is being moved.
2116       * @param dst directory to where node is moved to.
2117       * @throws QuotaExceededException if quota limit is exceeded.
2118       */
2119      private void verifyQuotaForRename(INode[] src, INode[] dst)
2120          throws QuotaExceededException {
2121        if (!ready) {
2122          // Do not check quota if edits log is still being processed
2123          return;
2124        }
2125        int i = 0;
2126        for(; src[i] == dst[i]; i++);
2127        // src[i - 1] is the last common ancestor.
2128    
2129        final Quota.Counts delta = src[src.length - 1].computeQuotaUsage();
2130        
2131        // Reduce the required quota by dst that is being removed
2132        final int dstIndex = dst.length - 1;
2133        if (dst[dstIndex] != null) {
2134          delta.subtract(dst[dstIndex].computeQuotaUsage());
2135        }
2136        verifyQuota(dst, dstIndex, delta.get(Quota.NAMESPACE),
2137            delta.get(Quota.DISKSPACE), src[i - 1]);
2138      }
2139    
2140      /**
2141       * Checks file system limits (max component length and max directory items)
2142       * during a rename operation.
2143       *
2144       * @param srcIIP INodesInPath containing every inode in the rename source
2145       * @param dstIIP INodesInPath containing every inode in the rename destination
2146       * @throws PathComponentTooLongException child's name is too long.
2147       * @throws MaxDirectoryItemsExceededException too many children.
2148       */
2149      private void verifyFsLimitsForRename(INodesInPath srcIIP, INodesInPath dstIIP)
2150          throws PathComponentTooLongException, MaxDirectoryItemsExceededException {
2151        byte[] dstChildName = dstIIP.getLastLocalName();
2152        INode[] dstInodes = dstIIP.getINodes();
2153        int pos = dstInodes.length - 1;
2154        verifyMaxComponentLength(dstChildName, dstInodes, pos);
2155        // Do not enforce max directory items if renaming within same directory.
2156        if (srcIIP.getINode(-2) != dstIIP.getINode(-2)) {
2157          verifyMaxDirItems(dstInodes, pos);
2158        }
2159      }
2160    
2161      /** Verify if the snapshot name is legal. */
2162      void verifySnapshotName(String snapshotName, String path)
2163          throws PathComponentTooLongException {
2164        if (snapshotName.contains(Path.SEPARATOR)) {
2165          throw new HadoopIllegalArgumentException(
2166              "Snapshot name cannot contain \"" + Path.SEPARATOR + "\"");
2167        }
2168        final byte[] bytes = DFSUtil.string2Bytes(snapshotName);
2169        verifyINodeName(bytes);
2170        verifyMaxComponentLength(bytes, path, 0);
2171      }
2172      
2173      /** Verify if the inode name is legal. */
2174      void verifyINodeName(byte[] childName) throws HadoopIllegalArgumentException {
2175        if (Arrays.equals(HdfsConstants.DOT_SNAPSHOT_DIR_BYTES, childName)) {
2176          String s = "\"" + HdfsConstants.DOT_SNAPSHOT_DIR + "\" is a reserved name.";
2177          if (!ready) {
2178            s += "  Please rename it before upgrade.";
2179          }
2180          throw new HadoopIllegalArgumentException(s);
2181        }
2182      }
2183    
2184      /**
2185       * Verify child's name for fs limit.
2186       *
2187       * @param childName byte[] containing new child name
2188       * @param parentPath Object either INode[] or String containing parent path
2189       * @param pos int position of new child in path
2190       * @throws PathComponentTooLongException child's name is too long.
2191       */
2192      private void verifyMaxComponentLength(byte[] childName, Object parentPath,
2193          int pos) throws PathComponentTooLongException {
2194        if (maxComponentLength == 0) {
2195          return;
2196        }
2197    
2198        final int length = childName.length;
2199        if (length > maxComponentLength) {
2200          final String p = parentPath instanceof INode[]?
2201              getFullPathName((INode[])parentPath, pos - 1): (String)parentPath;
2202          final PathComponentTooLongException e = new PathComponentTooLongException(
2203              maxComponentLength, length, p, DFSUtil.bytes2String(childName));
2204          if (ready) {
2205            throw e;
2206          } else {
2207            // Do not throw if edits log is still being processed
2208            NameNode.LOG.error("ERROR in FSDirectory.verifyINodeName", e);
2209          }
2210        }
2211      }
2212    
2213      /**
2214       * Verify children size for fs limit.
2215       *
2216       * @param pathComponents INode[] containing full path of inodes to new child
2217       * @param pos int position of new child in pathComponents
2218       * @throws MaxDirectoryItemsExceededException too many children.
2219       */
2220      private void verifyMaxDirItems(INode[] pathComponents, int pos)
2221          throws MaxDirectoryItemsExceededException {
2222    
2223        final INodeDirectory parent = pathComponents[pos-1].asDirectory();
2224        final int count = parent.getChildrenList(Snapshot.CURRENT_STATE_ID).size();
2225        if (count >= maxDirItems) {
2226          final MaxDirectoryItemsExceededException e
2227              = new MaxDirectoryItemsExceededException(maxDirItems, count);
2228          if (ready) {
2229            e.setPathName(getFullPathName(pathComponents, pos - 1));
2230            throw e;
2231          } else {
2232            // Do not throw if edits log is still being processed
2233            NameNode.LOG.error("FSDirectory.verifyMaxDirItems: "
2234                + e.getLocalizedMessage());
2235          }
2236        }
2237      }
2238      
2239      /**
2240       * The same as {@link #addChild(INodesInPath, int, INode, boolean)}
2241       * with pos = length - 1.
2242       */
2243      private boolean addLastINode(INodesInPath inodesInPath,
2244          INode inode, boolean checkQuota) throws QuotaExceededException {
2245        final int pos = inodesInPath.getINodes().length - 1;
2246        return addChild(inodesInPath, pos, inode, checkQuota);
2247      }
2248    
2249      /** Add a node child to the inodes at index pos. 
2250       * Its ancestors are stored at [0, pos-1].
2251       * @return false if the child with this name already exists; 
2252       *         otherwise return true;
2253       * @throw QuotaExceededException is thrown if it violates quota limit
2254       */
2255      private boolean addChild(INodesInPath iip, int pos,
2256          INode child, boolean checkQuota) throws QuotaExceededException {
2257        final INode[] inodes = iip.getINodes();
2258        // Disallow creation of /.reserved. This may be created when loading
2259        // editlog/fsimage during upgrade since /.reserved was a valid name in older
2260        // release. This may also be called when a user tries to create a file
2261        // or directory /.reserved.
2262        if (pos == 1 && inodes[0] == rootDir && isReservedName(child)) {
2263          throw new HadoopIllegalArgumentException(
2264              "File name \"" + child.getLocalName() + "\" is reserved and cannot "
2265                  + "be created. If this is during upgrade change the name of the "
2266                  + "existing file or directory to another name before upgrading "
2267                  + "to the new release.");
2268        }
2269        // The filesystem limits are not really quotas, so this check may appear
2270        // odd. It's because a rename operation deletes the src, tries to add
2271        // to the dest, if that fails, re-adds the src from whence it came.
2272        // The rename code disables the quota when it's restoring to the
2273        // original location becase a quota violation would cause the the item
2274        // to go "poof".  The fs limits must be bypassed for the same reason.
2275        if (checkQuota) {
2276          verifyMaxComponentLength(child.getLocalNameBytes(), inodes, pos);
2277          verifyMaxDirItems(inodes, pos);
2278        }
2279        // always verify inode name
2280        verifyINodeName(child.getLocalNameBytes());
2281        
2282        final Quota.Counts counts = child.computeQuotaUsage();
2283        updateCount(iip, pos,
2284            counts.get(Quota.NAMESPACE), counts.get(Quota.DISKSPACE), checkQuota);
2285        boolean isRename = (child.getParent() != null);
2286        final INodeDirectory parent = inodes[pos-1].asDirectory();
2287        boolean added = false;
2288        try {
2289          added = parent.addChild(child, true, iip.getLatestSnapshotId());
2290        } catch (QuotaExceededException e) {
2291          updateCountNoQuotaCheck(iip, pos,
2292              -counts.get(Quota.NAMESPACE), -counts.get(Quota.DISKSPACE));
2293          throw e;
2294        }
2295        if (!added) {
2296          updateCountNoQuotaCheck(iip, pos,
2297              -counts.get(Quota.NAMESPACE), -counts.get(Quota.DISKSPACE));
2298        } else {
2299          iip.setINode(pos - 1, child.getParent());
2300          if (!isRename) {
2301            AclStorage.copyINodeDefaultAcl(child);
2302          }
2303          addToInodeMap(child);
2304        }
2305        return added;
2306      }
2307      
2308      private boolean addLastINodeNoQuotaCheck(INodesInPath inodesInPath, INode i) {
2309        try {
2310          return addLastINode(inodesInPath, i, false);
2311        } catch (QuotaExceededException e) {
2312          NameNode.LOG.warn("FSDirectory.addChildNoQuotaCheck - unexpected", e);
2313        }
2314        return false;
2315      }
2316      
2317      /**
2318       * Remove the last inode in the path from the namespace.
2319       * Count of each ancestor with quota is also updated.
2320       * @return -1 for failing to remove;
2321       *          0 for removing a reference whose referred inode has other 
2322       *            reference nodes;
2323       *         >0 otherwise. 
2324       */
2325      private long removeLastINode(final INodesInPath iip)
2326          throws QuotaExceededException {
2327        final int latestSnapshot = iip.getLatestSnapshotId();
2328        final INode last = iip.getLastINode();
2329        final INodeDirectory parent = iip.getINode(-2).asDirectory();
2330        if (!parent.removeChild(last, latestSnapshot)) {
2331          return -1;
2332        }
2333        INodeDirectory newParent = last.getParent();
2334        if (parent != newParent) {
2335          iip.setINode(-2, newParent);
2336        }
2337        
2338        if (!last.isInLatestSnapshot(latestSnapshot)) {
2339          final Quota.Counts counts = last.computeQuotaUsage();
2340          updateCountNoQuotaCheck(iip, iip.getINodes().length - 1,
2341              -counts.get(Quota.NAMESPACE), -counts.get(Quota.DISKSPACE));
2342    
2343          if (INodeReference.tryRemoveReference(last) > 0) {
2344            return 0;
2345          } else {
2346            return counts.get(Quota.NAMESPACE);
2347          }
2348        }
2349        return 1;
2350      }
2351      
2352      /**
2353       */
2354      String normalizePath(String src) {
2355        if (src.length() > 1 && src.endsWith("/")) {
2356          src = src.substring(0, src.length() - 1);
2357        }
2358        return src;
2359      }
2360    
2361      ContentSummary getContentSummary(String src) 
2362        throws FileNotFoundException, UnresolvedLinkException {
2363        String srcs = normalizePath(src);
2364        readLock();
2365        try {
2366          INode targetNode = rootDir.getNode(srcs, false);
2367          if (targetNode == null) {
2368            throw new FileNotFoundException("File does not exist: " + srcs);
2369          }
2370          else {
2371            // Make it relinquish locks everytime contentCountLimit entries are
2372            // processed. 0 means disabled. I.e. blocking for the entire duration.
2373            ContentSummaryComputationContext cscc =
2374    
2375                new ContentSummaryComputationContext(this, getFSNamesystem(),
2376                contentCountLimit);
2377            ContentSummary cs = targetNode.computeAndConvertContentSummary(cscc);
2378            yieldCount += cscc.getYieldCount();
2379            return cs;
2380          }
2381        } finally {
2382          readUnlock();
2383        }
2384      }
2385    
2386      @VisibleForTesting
2387      public long getYieldCount() {
2388        return yieldCount;
2389      }
2390    
2391      public INodeMap getINodeMap() {
2392        return inodeMap;
2393      }
2394      
2395      /**
2396       * This method is always called with writeLock of FSDirectory held.
2397       */
2398      public final void addToInodeMap(INode inode) {
2399        if (inode instanceof INodeWithAdditionalFields) {
2400          inodeMap.put((INodeWithAdditionalFields)inode);
2401        }
2402      }
2403    
2404      
2405      /**
2406       * This method is always called with writeLock of FSDirectory held.
2407       */
2408      public final void removeFromInodeMap(List<? extends INode> inodes) {
2409        if (inodes != null) {
2410          for (INode inode : inodes) {
2411            if (inode != null && inode instanceof INodeWithAdditionalFields) {
2412              inodeMap.remove(inode);
2413            }
2414          }
2415        }
2416      }
2417      
2418      /**
2419       * Get the inode from inodeMap based on its inode id.
2420       * @param id The given id
2421       * @return The inode associated with the given id
2422       */
2423      public INode getInode(long id) {
2424        readLock();
2425        try {
2426          return inodeMap.get(id);
2427        } finally {
2428          readUnlock();
2429        }
2430      }
2431      
2432      @VisibleForTesting
2433      int getInodeMapSize() {
2434        return inodeMap.size();
2435      }
2436      
2437      /**
2438       * See {@link ClientProtocol#setQuota(String, long, long)} for the contract.
2439       * Sets quota for for a directory.
2440       * @returns INodeDirectory if any of the quotas have changed. null other wise.
2441       * @throws FileNotFoundException if the path does not exist.
2442       * @throws PathIsNotDirectoryException if the path is not a directory.
2443       * @throws QuotaExceededException if the directory tree size is 
2444       *                                greater than the given quota
2445       * @throws UnresolvedLinkException if a symlink is encountered in src.
2446       * @throws SnapshotAccessControlException if path is in RO snapshot
2447       */
2448      INodeDirectory unprotectedSetQuota(String src, long nsQuota, long dsQuota)
2449          throws FileNotFoundException, PathIsNotDirectoryException,
2450          QuotaExceededException, UnresolvedLinkException,
2451          SnapshotAccessControlException {
2452        assert hasWriteLock();
2453        // sanity check
2454        if ((nsQuota < 0 && nsQuota != HdfsConstants.QUOTA_DONT_SET && 
2455             nsQuota < HdfsConstants.QUOTA_RESET) || 
2456            (dsQuota < 0 && dsQuota != HdfsConstants.QUOTA_DONT_SET && 
2457              dsQuota < HdfsConstants.QUOTA_RESET)) {
2458          throw new IllegalArgumentException("Illegal value for nsQuota or " +
2459                                             "dsQuota : " + nsQuota + " and " +
2460                                             dsQuota);
2461        }
2462        
2463        String srcs = normalizePath(src);
2464        final INodesInPath iip = rootDir.getINodesInPath4Write(srcs, true);
2465        INodeDirectory dirNode = INodeDirectory.valueOf(iip.getLastINode(), srcs);
2466        if (dirNode.isRoot() && nsQuota == HdfsConstants.QUOTA_RESET) {
2467          throw new IllegalArgumentException("Cannot clear namespace quota on root.");
2468        } else { // a directory inode
2469          final Quota.Counts oldQuota = dirNode.getQuotaCounts();
2470          final long oldNsQuota = oldQuota.get(Quota.NAMESPACE);
2471          final long oldDsQuota = oldQuota.get(Quota.DISKSPACE);
2472          if (nsQuota == HdfsConstants.QUOTA_DONT_SET) {
2473            nsQuota = oldNsQuota;
2474          }
2475          if (dsQuota == HdfsConstants.QUOTA_DONT_SET) {
2476            dsQuota = oldDsQuota;
2477          }        
2478          if (oldNsQuota == nsQuota && oldDsQuota == dsQuota) {
2479            return null;
2480          }
2481    
2482          final int latest = iip.getLatestSnapshotId();
2483          dirNode = dirNode.recordModification(latest);
2484          dirNode.setQuota(nsQuota, dsQuota);
2485          return dirNode;
2486        }
2487      }
2488      
2489      /**
2490       * See {@link ClientProtocol#setQuota(String, long, long)} for the contract.
2491       * @throws SnapshotAccessControlException if path is in RO snapshot
2492       * @see #unprotectedSetQuota(String, long, long)
2493       */
2494      void setQuota(String src, long nsQuota, long dsQuota) 
2495          throws FileNotFoundException, PathIsNotDirectoryException,
2496          QuotaExceededException, UnresolvedLinkException,
2497          SnapshotAccessControlException {
2498        writeLock();
2499        try {
2500          INodeDirectory dir = unprotectedSetQuota(src, nsQuota, dsQuota);
2501          if (dir != null) {
2502            final Quota.Counts q = dir.getQuotaCounts();
2503            fsImage.getEditLog().logSetQuota(src,
2504                q.get(Quota.NAMESPACE), q.get(Quota.DISKSPACE));
2505          }
2506        } finally {
2507          writeUnlock();
2508        }
2509      }
2510      
2511      long totalInodes() {
2512        readLock();
2513        try {
2514          return rootDir.getDirectoryWithQuotaFeature().getSpaceConsumed()
2515              .get(Quota.NAMESPACE);
2516        } finally {
2517          readUnlock();
2518        }
2519      }
2520    
2521      /**
2522       * Sets the access time on the file/directory. Logs it in the transaction log.
2523       */
2524      void setTimes(String src, INode inode, long mtime, long atime, boolean force,
2525          int latestSnapshotId) throws QuotaExceededException {
2526        boolean status = false;
2527        writeLock();
2528        try {
2529          status = unprotectedSetTimes(inode, mtime, atime, force, latestSnapshotId);
2530        } finally {
2531          writeUnlock();
2532        }
2533        if (status) {
2534          fsImage.getEditLog().logTimes(src, mtime, atime);
2535        }
2536      }
2537    
2538      boolean unprotectedSetTimes(String src, long mtime, long atime, boolean force) 
2539          throws UnresolvedLinkException, QuotaExceededException {
2540        assert hasWriteLock();
2541        final INodesInPath i = getLastINodeInPath(src); 
2542        return unprotectedSetTimes(i.getLastINode(), mtime, atime, force,
2543            i.getLatestSnapshotId());
2544      }
2545    
2546      private boolean unprotectedSetTimes(INode inode, long mtime,
2547          long atime, boolean force, int latest) throws QuotaExceededException {
2548        assert hasWriteLock();
2549        boolean status = false;
2550        if (mtime != -1) {
2551          inode = inode.setModificationTime(mtime, latest);
2552          status = true;
2553        }
2554        if (atime != -1) {
2555          long inodeTime = inode.getAccessTime();
2556    
2557          // if the last access time update was within the last precision interval, then
2558          // no need to store access time
2559          if (atime <= inodeTime + getFSNamesystem().getAccessTimePrecision() && !force) {
2560            status =  false;
2561          } else {
2562            inode.setAccessTime(atime, latest);
2563            status = true;
2564          }
2565        } 
2566        return status;
2567      }
2568    
2569      /**
2570       * Reset the entire namespace tree.
2571       */
2572      void reset() {
2573        writeLock();
2574        try {
2575          setReady(false);
2576          rootDir = createRoot(getFSNamesystem());
2577          inodeMap.clear();
2578          addToInodeMap(rootDir);
2579          nameCache.reset();
2580        } finally {
2581          writeUnlock();
2582        }
2583      }
2584    
2585      /**
2586       * create an hdfs file status from an inode
2587       * 
2588       * @param path the local name
2589       * @param node inode
2590       * @param needLocation if block locations need to be included or not
2591       * @return a file status
2592       * @throws IOException if any error occurs
2593       */
2594      private HdfsFileStatus createFileStatus(byte[] path, INode node,
2595          boolean needLocation, int snapshot) throws IOException {
2596        if (needLocation) {
2597          return createLocatedFileStatus(path, node, snapshot);
2598        } else {
2599          return createFileStatus(path, node, snapshot);
2600        }
2601      }
2602      /**
2603       * Create FileStatus by file INode 
2604       */
2605       HdfsFileStatus createFileStatus(byte[] path, INode node,
2606           int snapshot) {
2607         long size = 0;     // length is zero for directories
2608         short replication = 0;
2609         long blocksize = 0;
2610         if (node.isFile()) {
2611           final INodeFile fileNode = node.asFile();
2612           size = fileNode.computeFileSize(snapshot);
2613           replication = fileNode.getFileReplication(snapshot);
2614           blocksize = fileNode.getPreferredBlockSize();
2615         }
2616         int childrenNum = node.isDirectory() ? 
2617             node.asDirectory().getChildrenNum(snapshot) : 0;
2618             
2619         return new HdfsFileStatus(
2620            size, 
2621            node.isDirectory(), 
2622            replication, 
2623            blocksize,
2624            node.getModificationTime(snapshot),
2625            node.getAccessTime(snapshot),
2626            getPermissionForFileStatus(node, snapshot),
2627            node.getUserName(snapshot),
2628            node.getGroupName(snapshot),
2629            node.isSymlink() ? node.asSymlink().getSymlink() : null,
2630            path,
2631            node.getId(),
2632            childrenNum);
2633      }
2634    
2635      /**
2636       * Create FileStatus with location info by file INode
2637       */
2638      private HdfsLocatedFileStatus createLocatedFileStatus(byte[] path,
2639          INode node, int snapshot) throws IOException {
2640        assert hasReadLock();
2641        long size = 0; // length is zero for directories
2642        short replication = 0;
2643        long blocksize = 0;
2644        LocatedBlocks loc = null;
2645        if (node.isFile()) {
2646          final INodeFile fileNode = node.asFile();
2647          size = fileNode.computeFileSize(snapshot);
2648          replication = fileNode.getFileReplication(snapshot);
2649          blocksize = fileNode.getPreferredBlockSize();
2650    
2651          final boolean inSnapshot = snapshot != Snapshot.CURRENT_STATE_ID; 
2652          final boolean isUc = inSnapshot ? false : fileNode.isUnderConstruction();
2653          final long fileSize = !inSnapshot && isUc ? 
2654              fileNode.computeFileSizeNotIncludingLastUcBlock() : size;
2655          loc = getFSNamesystem().getBlockManager().createLocatedBlocks(
2656              fileNode.getBlocks(), fileSize, isUc, 0L, size, false,
2657              inSnapshot);
2658          if (loc == null) {
2659            loc = new LocatedBlocks();
2660          }
2661        }
2662        int childrenNum = node.isDirectory() ? 
2663            node.asDirectory().getChildrenNum(snapshot) : 0;
2664            
2665        HdfsLocatedFileStatus status =
2666            new HdfsLocatedFileStatus(size, node.isDirectory(), replication,
2667              blocksize, node.getModificationTime(snapshot),
2668              node.getAccessTime(snapshot),
2669              getPermissionForFileStatus(node, snapshot),
2670              node.getUserName(snapshot), node.getGroupName(snapshot),
2671              node.isSymlink() ? node.asSymlink().getSymlink() : null, path,
2672              node.getId(), loc, childrenNum);
2673            // Set caching information for the located blocks.
2674        if (loc != null) {
2675          CacheManager cacheManager = namesystem.getCacheManager();
2676          for (LocatedBlock lb: loc.getLocatedBlocks()) {
2677            cacheManager.setCachedLocations(lb);
2678          }
2679        }
2680        return status;
2681      }
2682    
2683      /**
2684       * Returns an inode's FsPermission for use in an outbound FileStatus.  If the
2685       * inode has an ACL, then this method will convert to a FsAclPermission.
2686       *
2687       * @param node INode to check
2688       * @param snapshot int snapshot ID
2689       * @return FsPermission from inode, with ACL bit on if the inode has an ACL
2690       */
2691      private static FsPermission getPermissionForFileStatus(INode node,
2692          int snapshot) {
2693        FsPermission perm = node.getFsPermission(snapshot);
2694        if (node.getAclFeature(snapshot) != null) {
2695          perm = new FsAclPermission(perm);
2696        }
2697        return perm;
2698      }
2699        
2700      /**
2701       * Add the given symbolic link to the fs. Record it in the edits log.
2702       */
2703      INodeSymlink addSymlink(String path, String target,
2704          PermissionStatus dirPerms, boolean createParent, boolean logRetryCache)
2705          throws UnresolvedLinkException, FileAlreadyExistsException,
2706          QuotaExceededException, SnapshotAccessControlException, AclException {
2707        waitForReady();
2708    
2709        final long modTime = now();
2710        if (createParent) {
2711          final String parent = new Path(path).getParent().toString();
2712          if (!mkdirs(parent, dirPerms, true, modTime)) {
2713            return null;
2714          }
2715        }
2716        final String userName = dirPerms.getUserName();
2717        INodeSymlink newNode  = null;
2718        long id = namesystem.allocateNewInodeId();
2719        writeLock();
2720        try {
2721          newNode = unprotectedAddSymlink(id, path, target, modTime, modTime,
2722              new PermissionStatus(userName, null, FsPermission.getDefault()));
2723        } finally {
2724          writeUnlock();
2725        }
2726        if (newNode == null) {
2727          NameNode.stateChangeLog.info("DIR* addSymlink: failed to add " + path);
2728          return null;
2729        }
2730        fsImage.getEditLog().logSymlink(path, target, modTime, modTime, newNode,
2731            logRetryCache);
2732        
2733        if(NameNode.stateChangeLog.isDebugEnabled()) {
2734          NameNode.stateChangeLog.debug("DIR* addSymlink: " + path + " is added");
2735        }
2736        return newNode;
2737      }
2738    
2739      /**
2740       * Add the specified path into the namespace. Invoked from edit log processing.
2741       */
2742      INodeSymlink unprotectedAddSymlink(long id, String path, String target,
2743          long mtime, long atime, PermissionStatus perm)
2744          throws UnresolvedLinkException, QuotaExceededException {
2745        assert hasWriteLock();
2746        final INodeSymlink symlink = new INodeSymlink(id, null, perm, mtime, atime,
2747            target);
2748        return addINode(path, symlink) ? symlink : null;
2749      }
2750    
2751      void modifyAclEntries(String src, List<AclEntry> aclSpec) throws IOException {
2752        writeLock();
2753        try {
2754          List<AclEntry> newAcl = unprotectedModifyAclEntries(src, aclSpec);
2755          fsImage.getEditLog().logSetAcl(src, newAcl);
2756        } finally {
2757          writeUnlock();
2758        }
2759      }
2760    
2761      private List<AclEntry> unprotectedModifyAclEntries(String src,
2762          List<AclEntry> aclSpec) throws IOException {
2763        assert hasWriteLock();
2764        INodesInPath iip = rootDir.getINodesInPath4Write(normalizePath(src), true);
2765        INode inode = resolveLastINode(src, iip);
2766        int snapshotId = iip.getLatestSnapshotId();
2767        List<AclEntry> existingAcl = AclStorage.readINodeLogicalAcl(inode);
2768        List<AclEntry> newAcl = AclTransformation.mergeAclEntries(existingAcl,
2769          aclSpec);
2770        AclStorage.updateINodeAcl(inode, newAcl, snapshotId);
2771        return newAcl;
2772      }
2773    
2774      void removeAclEntries(String src, List<AclEntry> aclSpec) throws IOException {
2775        writeLock();
2776        try {
2777          List<AclEntry> newAcl = unprotectedRemoveAclEntries(src, aclSpec);
2778          fsImage.getEditLog().logSetAcl(src, newAcl);
2779        } finally {
2780          writeUnlock();
2781        }
2782      }
2783    
2784      private List<AclEntry> unprotectedRemoveAclEntries(String src,
2785          List<AclEntry> aclSpec) throws IOException {
2786        assert hasWriteLock();
2787        INodesInPath iip = rootDir.getINodesInPath4Write(normalizePath(src), true);
2788        INode inode = resolveLastINode(src, iip);
2789        int snapshotId = iip.getLatestSnapshotId();
2790        List<AclEntry> existingAcl = AclStorage.readINodeLogicalAcl(inode);
2791        List<AclEntry> newAcl = AclTransformation.filterAclEntriesByAclSpec(
2792          existingAcl, aclSpec);
2793        AclStorage.updateINodeAcl(inode, newAcl, snapshotId);
2794        return newAcl;
2795      }
2796    
2797      void removeDefaultAcl(String src) throws IOException {
2798        writeLock();
2799        try {
2800          List<AclEntry> newAcl = unprotectedRemoveDefaultAcl(src);
2801          fsImage.getEditLog().logSetAcl(src, newAcl);
2802        } finally {
2803          writeUnlock();
2804        }
2805      }
2806    
2807      private List<AclEntry> unprotectedRemoveDefaultAcl(String src)
2808          throws IOException {
2809        assert hasWriteLock();
2810        INodesInPath iip = rootDir.getINodesInPath4Write(normalizePath(src), true);
2811        INode inode = resolveLastINode(src, iip);
2812        int snapshotId = iip.getLatestSnapshotId();
2813        List<AclEntry> existingAcl = AclStorage.readINodeLogicalAcl(inode);
2814        List<AclEntry> newAcl = AclTransformation.filterDefaultAclEntries(
2815          existingAcl);
2816        AclStorage.updateINodeAcl(inode, newAcl, snapshotId);
2817        return newAcl;
2818      }
2819    
2820      void removeAcl(String src) throws IOException {
2821        writeLock();
2822        try {
2823          unprotectedRemoveAcl(src);
2824          fsImage.getEditLog().logSetAcl(src, AclFeature.EMPTY_ENTRY_LIST);
2825        } finally {
2826          writeUnlock();
2827        }
2828      }
2829    
2830      private void unprotectedRemoveAcl(String src) throws IOException {
2831        assert hasWriteLock();
2832        INodesInPath iip = rootDir.getINodesInPath4Write(normalizePath(src), true);
2833        INode inode = resolveLastINode(src, iip);
2834        int snapshotId = iip.getLatestSnapshotId();
2835        AclStorage.removeINodeAcl(inode, snapshotId);
2836      }
2837    
2838      void setAcl(String src, List<AclEntry> aclSpec) throws IOException {
2839        writeLock();
2840        try {
2841          List<AclEntry> newAcl = unprotectedSetAcl(src, aclSpec);
2842          fsImage.getEditLog().logSetAcl(src, newAcl);
2843        } finally {
2844          writeUnlock();
2845        }
2846      }
2847    
2848      List<AclEntry> unprotectedSetAcl(String src, List<AclEntry> aclSpec)
2849          throws IOException {
2850        // ACL removal is logged to edits as OP_SET_ACL with an empty list.
2851        if (aclSpec.isEmpty()) {
2852          unprotectedRemoveAcl(src);
2853          return AclFeature.EMPTY_ENTRY_LIST;
2854        }
2855    
2856        assert hasWriteLock();
2857        INodesInPath iip = rootDir.getINodesInPath4Write(normalizePath(src), true);
2858        INode inode = resolveLastINode(src, iip);
2859        int snapshotId = iip.getLatestSnapshotId();
2860        List<AclEntry> existingAcl = AclStorage.readINodeLogicalAcl(inode);
2861        List<AclEntry> newAcl = AclTransformation.replaceAclEntries(existingAcl,
2862          aclSpec);
2863        AclStorage.updateINodeAcl(inode, newAcl, snapshotId);
2864        return newAcl;
2865      }
2866    
2867      AclStatus getAclStatus(String src) throws IOException {
2868        String srcs = normalizePath(src);
2869        readLock();
2870        try {
2871          // There is no real inode for the path ending in ".snapshot", so return a
2872          // non-null, unpopulated AclStatus.  This is similar to getFileInfo.
2873          if (srcs.endsWith(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR) &&
2874              getINode4DotSnapshot(srcs) != null) {
2875            return new AclStatus.Builder().owner("").group("").build();
2876          }
2877          INodesInPath iip = rootDir.getLastINodeInPath(srcs, true);
2878          INode inode = resolveLastINode(src, iip);
2879          int snapshotId = iip.getPathSnapshotId();
2880          List<AclEntry> acl = AclStorage.readINodeAcl(inode, snapshotId);
2881          return new AclStatus.Builder()
2882              .owner(inode.getUserName()).group(inode.getGroupName())
2883              .stickyBit(inode.getFsPermission(snapshotId).getStickyBit())
2884              .addEntries(acl).build();
2885        } finally {
2886          readUnlock();
2887        }
2888      }
2889    
2890      private static INode resolveLastINode(String src, INodesInPath iip)
2891          throws FileNotFoundException {
2892        INode inode = iip.getLastINode();
2893        if (inode == null)
2894          throw new FileNotFoundException("cannot find " + src);
2895        return inode;
2896      }
2897    
2898      /**
2899       * Caches frequently used file names to reuse file name objects and
2900       * reduce heap size.
2901       */
2902      void cacheName(INode inode) {
2903        // Name is cached only for files
2904        if (!inode.isFile()) {
2905          return;
2906        }
2907        ByteArray name = new ByteArray(inode.getLocalNameBytes());
2908        name = nameCache.put(name);
2909        if (name != null) {
2910          inode.setLocalName(name.getBytes());
2911        }
2912      }
2913      
2914      void shutdown() {
2915        nameCache.reset();
2916        inodeMap.clear();
2917      }
2918      
2919      /**
2920       * Given an INode get all the path complents leading to it from the root.
2921       * If an Inode corresponding to C is given in /A/B/C, the returned
2922       * patch components will be {root, A, B, C}
2923       */
2924      static byte[][] getPathComponents(INode inode) {
2925        List<byte[]> components = new ArrayList<byte[]>();
2926        components.add(0, inode.getLocalNameBytes());
2927        while(inode.getParent() != null) {
2928          components.add(0, inode.getParent().getLocalNameBytes());
2929          inode = inode.getParent();
2930        }
2931        return components.toArray(new byte[components.size()][]);
2932      }
2933      
2934      /**
2935       * @return path components for reserved path, else null.
2936       */
2937      static byte[][] getPathComponentsForReservedPath(String src) {
2938        return !isReservedName(src) ? null : INode.getPathComponents(src);
2939      }
2940      
2941      /**
2942       * Resolve the path of /.reserved/.inodes/<inodeid>/... to a regular path
2943       * 
2944       * @param src path that is being processed
2945       * @param pathComponents path components corresponding to the path
2946       * @param fsd FSDirectory
2947       * @return if the path indicates an inode, return path after replacing upto
2948       *         <inodeid> with the corresponding path of the inode, else the path
2949       *         in {@code src} as is.
2950       * @throws FileNotFoundException if inodeid is invalid
2951       */
2952      static String resolvePath(String src, byte[][] pathComponents, FSDirectory fsd)
2953          throws FileNotFoundException {
2954        if (pathComponents == null || pathComponents.length <= 3) {
2955          return src;
2956        }
2957        // Not /.reserved/.inodes
2958        if (!Arrays.equals(DOT_RESERVED, pathComponents[1])
2959            || !Arrays.equals(DOT_INODES, pathComponents[2])) { // Not .inodes path
2960          return src;
2961        }
2962        final String inodeId = DFSUtil.bytes2String(pathComponents[3]);
2963        long id = 0;
2964        try {
2965          id = Long.valueOf(inodeId);
2966        } catch (NumberFormatException e) {
2967          throw new FileNotFoundException("Invalid inode path: " + src);
2968        }
2969        if (id == INodeId.ROOT_INODE_ID && pathComponents.length == 4) {
2970          return Path.SEPARATOR;
2971        }
2972        INode inode = fsd.getInode(id);
2973        if (inode == null) {
2974          throw new FileNotFoundException(
2975              "File for given inode path does not exist: " + src);
2976        }
2977        
2978        // Handle single ".." for NFS lookup support.
2979        if ((pathComponents.length > 4)
2980            && DFSUtil.bytes2String(pathComponents[4]).equals("..")) {
2981          INode parent = inode.getParent();
2982          if (parent == null || parent.getId() == INodeId.ROOT_INODE_ID) {
2983            // inode is root, or its parent is root.
2984            return Path.SEPARATOR;
2985          } else {
2986            return parent.getFullPathName();
2987          }
2988        }
2989    
2990        StringBuilder path = id == INodeId.ROOT_INODE_ID ? new StringBuilder()
2991            : new StringBuilder(inode.getFullPathName());
2992        for (int i = 4; i < pathComponents.length; i++) {
2993          path.append(Path.SEPARATOR).append(DFSUtil.bytes2String(pathComponents[i]));
2994        }
2995        if (NameNode.LOG.isDebugEnabled()) {
2996          NameNode.LOG.debug("Resolved path is " + path);
2997        }
2998        return path.toString();
2999      }
3000      
3001      /** Check if a given inode name is reserved */
3002      public static boolean isReservedName(INode inode) {
3003        return CHECK_RESERVED_FILE_NAMES
3004            && Arrays.equals(inode.getLocalNameBytes(), DOT_RESERVED);
3005      }
3006      
3007      /** Check if a given path is reserved */
3008      public static boolean isReservedName(String src) {
3009        return src.startsWith(DOT_RESERVED_PATH_PREFIX);
3010      }
3011    }