001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.hdfs.server.namenode;
019    
020    import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT;
021    import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY;
022    import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT;
023    import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY;
024    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_DEFAULT;
025    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_KEY;
026    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_DEFAULT;
027    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY;
028    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CHECKSUM_TYPE_DEFAULT;
029    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY;
030    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT;
031    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_KEY;
032    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_DEFAULT;
033    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_KEY;
034    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_DEFAULT;
035    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_KEY;
036    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT;
037    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY;
038    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOGGERS_KEY;
039    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT;
040    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY;
041    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT;
042    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY;
043    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT;
044    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY;
045    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME;
046    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT;
047    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY;
048    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT;
049    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY;
050    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT;
051    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY;
052    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT;
053    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY;
054    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY;
055    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY;
056    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS;
057    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT;
058    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD;
059    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT;
060    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT;
061    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY;
062    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_DEFAULT;
063    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_KEY;
064    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY;
065    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_DEFAULT;
066    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY;
067    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY;
068    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT;
069    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY;
070    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT;
071    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY;
072    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT;
073    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY;
074    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY;
075    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_DEFAULT;
076    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY;
077    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT;
078    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY;
079    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY;
080    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_DEFAULT;
081    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY;
082    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT;
083    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY;
084    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_DEFAULT;
085    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY;
086    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_DEFAULT;
087    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_KEY;
088    import static org.apache.hadoop.util.Time.now;
089    
090    import java.io.BufferedWriter;
091    import java.io.ByteArrayInputStream;
092    import java.io.DataInput;
093    import java.io.DataInputStream;
094    import java.io.File;
095    import java.io.FileNotFoundException;
096    import java.io.FileOutputStream;
097    import java.io.IOException;
098    import java.io.OutputStreamWriter;
099    import java.io.PrintWriter;
100    import java.io.StringWriter;
101    import java.lang.management.ManagementFactory;
102    import java.net.InetAddress;
103    import java.net.URI;
104    import java.util.ArrayList;
105    import java.util.Arrays;
106    import java.util.Collection;
107    import java.util.Collections;
108    import java.util.Date;
109    import java.util.EnumSet;
110    import java.util.HashMap;
111    import java.util.HashSet;
112    import java.util.Iterator;
113    import java.util.LinkedHashSet;
114    import java.util.List;
115    import java.util.Map;
116    import java.util.Set;
117    import java.util.concurrent.TimeUnit;
118    import java.util.concurrent.locks.ReentrantLock;
119    import java.util.concurrent.locks.ReentrantReadWriteLock;
120    
121    import javax.management.NotCompliantMBeanException;
122    import javax.management.ObjectName;
123    import javax.management.StandardMBean;
124    
125    import org.apache.commons.logging.Log;
126    import org.apache.commons.logging.LogFactory;
127    import org.apache.commons.logging.impl.Log4JLogger;
128    import org.apache.hadoop.HadoopIllegalArgumentException;
129    import org.apache.hadoop.classification.InterfaceAudience;
130    import org.apache.hadoop.conf.Configuration;
131    import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries;
132    import org.apache.hadoop.fs.CacheFlag;
133    import org.apache.hadoop.fs.ContentSummary;
134    import org.apache.hadoop.fs.CreateFlag;
135    import org.apache.hadoop.fs.DirectoryListingStartAfterNotFoundException;
136    import org.apache.hadoop.fs.FileAlreadyExistsException;
137    import org.apache.hadoop.fs.FileStatus;
138    import org.apache.hadoop.fs.FileSystem;
139    import org.apache.hadoop.fs.FsServerDefaults;
140    import org.apache.hadoop.fs.InvalidPathException;
141    import org.apache.hadoop.fs.Options;
142    import org.apache.hadoop.fs.Options.Rename;
143    import org.apache.hadoop.fs.ParentNotDirectoryException;
144    import org.apache.hadoop.fs.Path;
145    import org.apache.hadoop.fs.UnresolvedLinkException;
146    import org.apache.hadoop.fs.permission.AclEntry;
147    import org.apache.hadoop.fs.permission.AclStatus;
148    import org.apache.hadoop.fs.permission.FsAction;
149    import org.apache.hadoop.fs.permission.FsPermission;
150    import org.apache.hadoop.fs.permission.PermissionStatus;
151    import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
152    import org.apache.hadoop.ha.ServiceFailedException;
153    import org.apache.hadoop.hdfs.DFSConfigKeys;
154    import org.apache.hadoop.hdfs.DFSUtil;
155    import org.apache.hadoop.hdfs.HAUtil;
156    import org.apache.hadoop.hdfs.HdfsConfiguration;
157    import org.apache.hadoop.hdfs.StorageType;
158    import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
159    import org.apache.hadoop.hdfs.protocol.Block;
160    import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
161    import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
162    import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
163    import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
164    import org.apache.hadoop.hdfs.protocol.ClientProtocol;
165    import org.apache.hadoop.hdfs.protocol.DatanodeID;
166    import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
167    import org.apache.hadoop.hdfs.protocol.DirectoryListing;
168    import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
169    import org.apache.hadoop.hdfs.protocol.HdfsConstants;
170    import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
171    import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
172    import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
173    import org.apache.hadoop.hdfs.protocol.LocatedBlock;
174    import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
175    import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
176    import org.apache.hadoop.hdfs.protocol.RecoveryInProgressException;
177    import org.apache.hadoop.hdfs.protocol.RollingUpgradeException;
178    import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo;
179    import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
180    import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffReportEntry;
181    import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus;
182    import org.apache.hadoop.hdfs.protocol.datatransfer.ReplaceDatanodeOnFailure;
183    import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
184    import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager.AccessMode;
185    import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
186    import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
187    import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager.SecretManagerState;
188    import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection;
189    import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
190    import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
191    import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
192    import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
193    import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager;
194    import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStatistics;
195    import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
196    import org.apache.hadoop.hdfs.server.blockmanagement.OutOfV1GenerationStampsException;
197    import org.apache.hadoop.hdfs.server.common.GenerationStamp;
198    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
199    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
200    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption;
201    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
202    import org.apache.hadoop.hdfs.server.common.Storage;
203    import org.apache.hadoop.hdfs.server.common.Storage.StorageDirType;
204    import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
205    import org.apache.hadoop.hdfs.server.common.Util;
206    import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection;
207    import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
208    import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream;
209    import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
210    import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
211    import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
212    import org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer;
213    import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
214    import org.apache.hadoop.hdfs.server.namenode.ha.StandbyCheckpointer;
215    import org.apache.hadoop.hdfs.server.namenode.metrics.FSNamesystemMBean;
216    import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
217    import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable;
218    import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable.SnapshotDiffInfo;
219    import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
220    import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager;
221    import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
222    import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
223    import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
224    import org.apache.hadoop.hdfs.server.namenode.startupprogress.Status;
225    import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
226    import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType;
227    import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods;
228    import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
229    import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
230    import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
231    import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat;
232    import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand;
233    import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
234    import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
235    import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
236    import org.apache.hadoop.hdfs.server.protocol.StorageReport;
237    import org.apache.hadoop.hdfs.util.ChunkedArrayList;
238    import org.apache.hadoop.io.IOUtils;
239    import org.apache.hadoop.io.Text;
240    import org.apache.hadoop.ipc.RetriableException;
241    import org.apache.hadoop.ipc.RetryCache;
242    import org.apache.hadoop.ipc.RetryCache.CacheEntry;
243    import org.apache.hadoop.ipc.RetryCache.CacheEntryWithPayload;
244    import org.apache.hadoop.ipc.Server;
245    import org.apache.hadoop.ipc.StandbyException;
246    import org.apache.hadoop.metrics2.annotation.Metric;
247    import org.apache.hadoop.metrics2.annotation.Metrics;
248    import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
249    import org.apache.hadoop.metrics2.util.MBeans;
250    import org.apache.hadoop.net.NetworkTopology;
251    import org.apache.hadoop.net.Node;
252    import org.apache.hadoop.security.AccessControlException;
253    import org.apache.hadoop.security.UserGroupInformation;
254    import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
255    import org.apache.hadoop.security.token.SecretManager.InvalidToken;
256    import org.apache.hadoop.security.token.Token;
257    import org.apache.hadoop.security.token.TokenIdentifier;
258    import org.apache.hadoop.security.token.delegation.DelegationKey;
259    import org.apache.hadoop.util.Daemon;
260    import org.apache.hadoop.util.DataChecksum;
261    import org.apache.hadoop.util.StringUtils;
262    import org.apache.hadoop.util.Time;
263    import org.apache.hadoop.util.VersionInfo;
264    import org.apache.log4j.Appender;
265    import org.apache.log4j.AsyncAppender;
266    import org.apache.log4j.Logger;
267    import org.mortbay.util.ajax.JSON;
268    
269    import com.google.common.annotations.VisibleForTesting;
270    import com.google.common.base.Charsets;
271    import com.google.common.base.Preconditions;
272    import com.google.common.collect.ImmutableMap;
273    import com.google.common.collect.Lists;
274    
275    /***************************************************
276     * FSNamesystem does the actual bookkeeping work for the
277     * DataNode.
278     *
279     * It tracks several important tables.
280     *
281     * 1)  valid fsname --> blocklist  (kept on disk, logged)
282     * 2)  Set of all valid blocks (inverted #1)
283     * 3)  block --> machinelist (kept in memory, rebuilt dynamically from reports)
284     * 4)  machine --> blocklist (inverted #2)
285     * 5)  LRU cache of updated-heartbeat machines
286     ***************************************************/
287    @InterfaceAudience.Private
288    @Metrics(context="dfs")
289    public class FSNamesystem implements Namesystem, FSClusterStats,
290        FSNamesystemMBean, NameNodeMXBean {
291      public static final Log LOG = LogFactory.getLog(FSNamesystem.class);
292    
293      private static final ThreadLocal<StringBuilder> auditBuffer =
294        new ThreadLocal<StringBuilder>() {
295          @Override
296          protected StringBuilder initialValue() {
297            return new StringBuilder();
298          }
299      };
300    
301      @VisibleForTesting
302      public boolean isAuditEnabled() {
303        return !isDefaultAuditLogger || auditLog.isInfoEnabled();
304      }
305    
306      private HdfsFileStatus getAuditFileInfo(String path, boolean resolveSymlink)
307          throws IOException {
308        return (isAuditEnabled() && isExternalInvocation())
309            ? dir.getFileInfo(path, resolveSymlink) : null;
310      }
311      
312      private void logAuditEvent(boolean succeeded, String cmd, String src)
313          throws IOException {
314        logAuditEvent(succeeded, cmd, src, null, null);
315      }
316      
317      private void logAuditEvent(boolean succeeded, String cmd, String src,
318          String dst, HdfsFileStatus stat) throws IOException {
319        if (isAuditEnabled() && isExternalInvocation()) {
320          logAuditEvent(succeeded, getRemoteUser(), getRemoteIp(),
321                        cmd, src, dst, stat);
322        }
323      }
324    
325      private void logAuditEvent(boolean succeeded,
326          UserGroupInformation ugi, InetAddress addr, String cmd, String src,
327          String dst, HdfsFileStatus stat) {
328        FileStatus status = null;
329        if (stat != null) {
330          Path symlink = stat.isSymlink() ? new Path(stat.getSymlink()) : null;
331          Path path = dst != null ? new Path(dst) : new Path(src);
332          status = new FileStatus(stat.getLen(), stat.isDir(),
333              stat.getReplication(), stat.getBlockSize(), stat.getModificationTime(),
334              stat.getAccessTime(), stat.getPermission(), stat.getOwner(),
335              stat.getGroup(), symlink, path);
336        }
337        for (AuditLogger logger : auditLoggers) {
338          if (logger instanceof HdfsAuditLogger) {
339            HdfsAuditLogger hdfsLogger = (HdfsAuditLogger) logger;
340            hdfsLogger.logAuditEvent(succeeded, ugi.toString(), addr, cmd, src, dst,
341                status, ugi, dtSecretManager);
342          } else {
343            logger.logAuditEvent(succeeded, ugi.toString(), addr,
344                cmd, src, dst, status);
345          }
346        }
347      }
348    
349      /**
350       * Logger for audit events, noting successful FSNamesystem operations. Emits
351       * to FSNamesystem.audit at INFO. Each event causes a set of tab-separated
352       * <code>key=value</code> pairs to be written for the following properties:
353       * <code>
354       * ugi=&lt;ugi in RPC&gt;
355       * ip=&lt;remote IP&gt;
356       * cmd=&lt;command&gt;
357       * src=&lt;src path&gt;
358       * dst=&lt;dst path (optional)&gt;
359       * perm=&lt;permissions (optional)&gt;
360       * </code>
361       */
362      public static final Log auditLog = LogFactory.getLog(
363          FSNamesystem.class.getName() + ".audit");
364    
365      static final int DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED = 100;
366      static int BLOCK_DELETION_INCREMENT = 1000;
367      private final boolean isPermissionEnabled;
368      private final UserGroupInformation fsOwner;
369      private final String fsOwnerShortUserName;
370      private final String supergroup;
371      private final boolean standbyShouldCheckpoint;
372      
373      // Scan interval is not configurable.
374      private static final long DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL =
375        TimeUnit.MILLISECONDS.convert(1, TimeUnit.HOURS);
376      final DelegationTokenSecretManager dtSecretManager;
377      private final boolean alwaysUseDelegationTokensForTests;
378    
379      private static final Step STEP_AWAITING_REPORTED_BLOCKS =
380        new Step(StepType.AWAITING_REPORTED_BLOCKS);
381    
382      // Tracks whether the default audit logger is the only configured audit
383      // logger; this allows isAuditEnabled() to return false in case the
384      // underlying logger is disabled, and avoid some unnecessary work.
385      private final boolean isDefaultAuditLogger;
386      private final List<AuditLogger> auditLoggers;
387    
388      /** The namespace tree. */
389      FSDirectory dir;
390      private final BlockManager blockManager;
391      private final SnapshotManager snapshotManager;
392      private final CacheManager cacheManager;
393      private final DatanodeStatistics datanodeStatistics;
394    
395      private RollingUpgradeInfo rollingUpgradeInfo = null;
396      /**
397       * A flag that indicates whether the checkpointer should checkpoint a rollback
398       * fsimage. The edit log tailer sets this flag. The checkpoint will create a
399       * rollback fsimage if the flag is true, and then change the flag to false.
400       */
401      private volatile boolean needRollbackFsImage;
402    
403      // Block pool ID used by this namenode
404      private String blockPoolId;
405    
406      final LeaseManager leaseManager = new LeaseManager(this); 
407    
408      volatile Daemon smmthread = null;  // SafeModeMonitor thread
409      
410      Daemon nnrmthread = null; // NamenodeResourceMonitor thread
411    
412      Daemon nnEditLogRoller = null; // NameNodeEditLogRoller thread
413      /**
414       * When an active namenode will roll its own edit log, in # edits
415       */
416      private final long editLogRollerThreshold;
417      /**
418       * Check interval of an active namenode's edit log roller thread 
419       */
420      private final int editLogRollerInterval;
421    
422      private volatile boolean hasResourcesAvailable = false;
423      private volatile boolean fsRunning = true;
424      
425      /** The start time of the namesystem. */
426      private final long startTime = now();
427    
428      /** The interval of namenode checking for the disk space availability */
429      private final long resourceRecheckInterval;
430    
431      // The actual resource checker instance.
432      NameNodeResourceChecker nnResourceChecker;
433    
434      private final FsServerDefaults serverDefaults;
435      private final boolean supportAppends;
436      private final ReplaceDatanodeOnFailure dtpReplaceDatanodeOnFailure;
437    
438      private volatile SafeModeInfo safeMode;  // safe mode information
439    
440      private final long maxFsObjects;          // maximum number of fs objects
441    
442      private final long minBlockSize;         // minimum block size
443      private final long maxBlocksPerFile;     // maximum # of blocks per file
444    
445      /**
446       * The global generation stamp for legacy blocks with randomly
447       * generated block IDs.
448       */
449      private final GenerationStamp generationStampV1 = new GenerationStamp();
450    
451      /**
452       * The global generation stamp for this file system.
453       */
454      private final GenerationStamp generationStampV2 = new GenerationStamp();
455    
456      /**
457       * The value of the generation stamp when the first switch to sequential
458       * block IDs was made. Blocks with generation stamps below this value
459       * have randomly allocated block IDs. Blocks with generation stamps above
460       * this value had sequentially allocated block IDs. Read from the fsImage
461       * (or initialized as an offset from the V1 (legacy) generation stamp on
462       * upgrade).
463       */
464      private long generationStampV1Limit =
465          GenerationStamp.GRANDFATHER_GENERATION_STAMP;
466    
467      /**
468       * The global block ID space for this file system.
469       */
470      @VisibleForTesting
471      private final SequentialBlockIdGenerator blockIdGenerator;
472    
473      // precision of access times.
474      private final long accessTimePrecision;
475    
476      /** Lock to protect FSNamesystem. */
477      private final FSNamesystemLock fsLock;
478    
479      /**
480       * Used when this NN is in standby state to read from the shared edit log.
481       */
482      private EditLogTailer editLogTailer = null;
483    
484      /**
485       * Used when this NN is in standby state to perform checkpoints.
486       */
487      private StandbyCheckpointer standbyCheckpointer;
488    
489      /**
490       * Reference to the NN's HAContext object. This is only set once
491       * {@link #startCommonServices(Configuration, HAContext)} is called. 
492       */
493      private HAContext haContext;
494    
495      private final boolean haEnabled;
496    
497      /** flag indicating whether replication queues have been initialized */
498      boolean initializedReplQueues = false;
499    
500      /**
501       * Whether the namenode is in the middle of starting the active service
502       */
503      private volatile boolean startingActiveService = false;
504        
505      private INodeId inodeId;
506      
507      private final RetryCache retryCache;
508    
509      private final AclConfigFlag aclConfigFlag;
510    
511      /**
512       * Set the last allocated inode id when fsimage or editlog is loaded. 
513       */
514      public void resetLastInodeId(long newValue) throws IOException {
515        try {
516          inodeId.skipTo(newValue);
517        } catch(IllegalStateException ise) {
518          throw new IOException(ise);
519        }
520      }
521    
522      /** Should only be used for tests to reset to any value */
523      void resetLastInodeIdWithoutChecking(long newValue) {
524        inodeId.setCurrentValue(newValue);
525      }
526      
527      /** @return the last inode ID. */
528      public long getLastInodeId() {
529        return inodeId.getCurrentValue();
530      }
531    
532      /** Allocate a new inode ID. */
533      public long allocateNewInodeId() {
534        return inodeId.nextValue();
535      }
536      
537      /**
538       * Clear all loaded data
539       */
540      void clear() {
541        dir.reset();
542        dtSecretManager.reset();
543        generationStampV1.setCurrentValue(GenerationStamp.LAST_RESERVED_STAMP);
544        generationStampV2.setCurrentValue(GenerationStamp.LAST_RESERVED_STAMP);
545        blockIdGenerator.setCurrentValue(
546            SequentialBlockIdGenerator.LAST_RESERVED_BLOCK_ID);
547        generationStampV1Limit = GenerationStamp.GRANDFATHER_GENERATION_STAMP;
548        leaseManager.removeAllLeases();
549        inodeId.setCurrentValue(INodeId.LAST_RESERVED_ID);
550        snapshotManager.clearSnapshottableDirs();
551        cacheManager.clear();
552      }
553    
554      @VisibleForTesting
555      LeaseManager getLeaseManager() {
556        return leaseManager;
557      }
558      
559      boolean isHaEnabled() {
560        return haEnabled;
561      }
562      
563      /**
564       * Check the supplied configuration for correctness.
565       * @param conf Supplies the configuration to validate.
566       * @throws IOException if the configuration could not be queried.
567       * @throws IllegalArgumentException if the configuration is invalid.
568       */
569      private static void checkConfiguration(Configuration conf)
570          throws IOException {
571    
572        final Collection<URI> namespaceDirs =
573            FSNamesystem.getNamespaceDirs(conf);
574        final Collection<URI> editsDirs =
575            FSNamesystem.getNamespaceEditsDirs(conf);
576        final Collection<URI> requiredEditsDirs =
577            FSNamesystem.getRequiredNamespaceEditsDirs(conf);
578        final Collection<URI> sharedEditsDirs =
579            FSNamesystem.getSharedEditsDirs(conf);
580    
581        for (URI u : requiredEditsDirs) {
582          if (u.toString().compareTo(
583                  DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_DEFAULT) == 0) {
584            continue;
585          }
586    
587          // Each required directory must also be in editsDirs or in
588          // sharedEditsDirs.
589          if (!editsDirs.contains(u) &&
590              !sharedEditsDirs.contains(u)) {
591            throw new IllegalArgumentException(
592                "Required edits directory " + u.toString() + " not present in " +
593                DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY + ". " +
594                DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY + "=" +
595                editsDirs.toString() + "; " +
596                DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY + "=" +
597                requiredEditsDirs.toString() + ". " +
598                DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY + "=" +
599                sharedEditsDirs.toString() + ".");
600          }
601        }
602    
603        if (namespaceDirs.size() == 1) {
604          LOG.warn("Only one image storage directory ("
605              + DFS_NAMENODE_NAME_DIR_KEY + ") configured. Beware of dataloss"
606              + " due to lack of redundant storage directories!");
607        }
608        if (editsDirs.size() == 1) {
609          LOG.warn("Only one namespace edits storage directory ("
610              + DFS_NAMENODE_EDITS_DIR_KEY + ") configured. Beware of dataloss"
611              + " due to lack of redundant storage directories!");
612        }
613      }
614    
615      /**
616       * Instantiates an FSNamesystem loaded from the image and edits
617       * directories specified in the passed Configuration.
618       *
619       * @param conf the Configuration which specifies the storage directories
620       *             from which to load
621       * @return an FSNamesystem which contains the loaded namespace
622       * @throws IOException if loading fails
623       */
624      static FSNamesystem loadFromDisk(Configuration conf) throws IOException {
625    
626        checkConfiguration(conf);
627        FSImage fsImage = new FSImage(conf,
628            FSNamesystem.getNamespaceDirs(conf),
629            FSNamesystem.getNamespaceEditsDirs(conf));
630        FSNamesystem namesystem = new FSNamesystem(conf, fsImage, false);
631        StartupOption startOpt = NameNode.getStartupOption(conf);
632        if (startOpt == StartupOption.RECOVER) {
633          namesystem.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
634        }
635    
636        long loadStart = now();
637        try {
638          namesystem.loadFSImage(startOpt);
639        } catch (IOException ioe) {
640          LOG.warn("Encountered exception loading fsimage", ioe);
641          fsImage.close();
642          throw ioe;
643        }
644        long timeTakenToLoadFSImage = now() - loadStart;
645        LOG.info("Finished loading FSImage in " + timeTakenToLoadFSImage + " msecs");
646        NameNodeMetrics nnMetrics = NameNode.getNameNodeMetrics();
647        if (nnMetrics != null) {
648          nnMetrics.setFsImageLoadTime((int) timeTakenToLoadFSImage);
649        }
650        return namesystem;
651      }
652      
653      FSNamesystem(Configuration conf, FSImage fsImage) throws IOException {
654        this(conf, fsImage, false);
655      }
656      
657      /**
658       * Create an FSNamesystem associated with the specified image.
659       * 
660       * Note that this does not load any data off of disk -- if you would
661       * like that behavior, use {@link #loadFromDisk(Configuration)}
662       *
663       * @param conf configuration
664       * @param fsImage The FSImage to associate with
665       * @param ignoreRetryCache Whether or not should ignore the retry cache setup
666       *                         step. For Secondary NN this should be set to true.
667       * @throws IOException on bad configuration
668       */
669      FSNamesystem(Configuration conf, FSImage fsImage, boolean ignoreRetryCache)
670          throws IOException {
671        if (conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY,
672                            DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT)) {
673          LOG.info("Enabling async auditlog");
674          enableAsyncAuditLog();
675        }
676        boolean fair = conf.getBoolean("dfs.namenode.fslock.fair", true);
677        LOG.info("fsLock is fair:" + fair);
678        fsLock = new FSNamesystemLock(fair);
679        try {
680          resourceRecheckInterval = conf.getLong(
681              DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY,
682              DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT);
683    
684          this.blockManager = new BlockManager(this, this, conf);
685          this.datanodeStatistics = blockManager.getDatanodeManager().getDatanodeStatistics();
686          this.blockIdGenerator = new SequentialBlockIdGenerator(this.blockManager);
687    
688          this.fsOwner = UserGroupInformation.getCurrentUser();
689          this.fsOwnerShortUserName = fsOwner.getShortUserName();
690          this.supergroup = conf.get(DFS_PERMISSIONS_SUPERUSERGROUP_KEY, 
691                                     DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT);
692          this.isPermissionEnabled = conf.getBoolean(DFS_PERMISSIONS_ENABLED_KEY,
693                                                     DFS_PERMISSIONS_ENABLED_DEFAULT);
694          LOG.info("fsOwner             = " + fsOwner);
695          LOG.info("supergroup          = " + supergroup);
696          LOG.info("isPermissionEnabled = " + isPermissionEnabled);
697    
698          // block allocation has to be persisted in HA using a shared edits directory
699          // so that the standby has up-to-date namespace information
700          String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
701          this.haEnabled = HAUtil.isHAEnabled(conf, nameserviceId);  
702          
703          // Sanity check the HA-related config.
704          if (nameserviceId != null) {
705            LOG.info("Determined nameservice ID: " + nameserviceId);
706          }
707          LOG.info("HA Enabled: " + haEnabled);
708          if (!haEnabled && HAUtil.usesSharedEditsDir(conf)) {
709            LOG.warn("Configured NNs:\n" + DFSUtil.nnAddressesAsString(conf));
710            throw new IOException("Invalid configuration: a shared edits dir " +
711                "must not be specified if HA is not enabled.");
712          }
713    
714          // Get the checksum type from config
715          String checksumTypeStr = conf.get(DFS_CHECKSUM_TYPE_KEY, DFS_CHECKSUM_TYPE_DEFAULT);
716          DataChecksum.Type checksumType;
717          try {
718             checksumType = DataChecksum.Type.valueOf(checksumTypeStr);
719          } catch (IllegalArgumentException iae) {
720             throw new IOException("Invalid checksum type in "
721                + DFS_CHECKSUM_TYPE_KEY + ": " + checksumTypeStr);
722          }
723    
724          this.serverDefaults = new FsServerDefaults(
725              conf.getLongBytes(DFS_BLOCK_SIZE_KEY, DFS_BLOCK_SIZE_DEFAULT),
726              conf.getInt(DFS_BYTES_PER_CHECKSUM_KEY, DFS_BYTES_PER_CHECKSUM_DEFAULT),
727              conf.getInt(DFS_CLIENT_WRITE_PACKET_SIZE_KEY, DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT),
728              (short) conf.getInt(DFS_REPLICATION_KEY, DFS_REPLICATION_DEFAULT),
729              conf.getInt(IO_FILE_BUFFER_SIZE_KEY, IO_FILE_BUFFER_SIZE_DEFAULT),
730              conf.getBoolean(DFS_ENCRYPT_DATA_TRANSFER_KEY, DFS_ENCRYPT_DATA_TRANSFER_DEFAULT),
731              conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT),
732              checksumType);
733          
734          this.maxFsObjects = conf.getLong(DFS_NAMENODE_MAX_OBJECTS_KEY, 
735                                           DFS_NAMENODE_MAX_OBJECTS_DEFAULT);
736    
737          this.minBlockSize = conf.getLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY,
738              DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_DEFAULT);
739          this.maxBlocksPerFile = conf.getLong(DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_KEY,
740              DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_DEFAULT);
741          this.accessTimePrecision = conf.getLong(DFS_NAMENODE_ACCESSTIME_PRECISION_KEY,
742              DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT);
743          this.supportAppends = conf.getBoolean(DFS_SUPPORT_APPEND_KEY, DFS_SUPPORT_APPEND_DEFAULT);
744          LOG.info("Append Enabled: " + supportAppends);
745    
746          this.dtpReplaceDatanodeOnFailure = ReplaceDatanodeOnFailure.get(conf);
747          
748          this.standbyShouldCheckpoint = conf.getBoolean(
749              DFS_HA_STANDBY_CHECKPOINTS_KEY, DFS_HA_STANDBY_CHECKPOINTS_DEFAULT);
750          // # edit autoroll threshold is a multiple of the checkpoint threshold 
751          this.editLogRollerThreshold = (long)
752              (conf.getFloat(
753                  DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD,
754                  DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT) *
755              conf.getLong(
756                  DFS_NAMENODE_CHECKPOINT_TXNS_KEY,
757                  DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT));
758          this.editLogRollerInterval = conf.getInt(
759              DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS,
760              DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT);
761          this.inodeId = new INodeId();
762          
763          // For testing purposes, allow the DT secret manager to be started regardless
764          // of whether security is enabled.
765          alwaysUseDelegationTokensForTests = conf.getBoolean(
766              DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY,
767              DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT);
768    
769          this.dtSecretManager = createDelegationTokenSecretManager(conf);
770          this.dir = new FSDirectory(fsImage, this, conf);
771          this.snapshotManager = new SnapshotManager(dir);
772          this.cacheManager = new CacheManager(this, conf, blockManager);
773          this.safeMode = new SafeModeInfo(conf);
774          this.auditLoggers = initAuditLoggers(conf);
775          this.isDefaultAuditLogger = auditLoggers.size() == 1 &&
776            auditLoggers.get(0) instanceof DefaultAuditLogger;
777          this.retryCache = ignoreRetryCache ? null : initRetryCache(conf);
778          this.aclConfigFlag = new AclConfigFlag(conf);
779        } catch(IOException e) {
780          LOG.error(getClass().getSimpleName() + " initialization failed.", e);
781          close();
782          throw e;
783        } catch (RuntimeException re) {
784          LOG.error(getClass().getSimpleName() + " initialization failed.", re);
785          close();
786          throw re;
787        }
788      }
789      
790      @VisibleForTesting
791      public RetryCache getRetryCache() {
792        return retryCache;
793      }
794    
795      void lockRetryCache() {
796        if (retryCache != null) {
797          retryCache.lock();
798        }
799      }
800    
801      void unlockRetryCache() {
802        if (retryCache != null) {
803          retryCache.unlock();
804        }
805      }
806    
807      /** Whether or not retry cache is enabled */
808      boolean hasRetryCache() {
809        return retryCache != null;
810      }
811      
812      void addCacheEntryWithPayload(byte[] clientId, int callId, Object payload) {
813        if (retryCache != null) {
814          retryCache.addCacheEntryWithPayload(clientId, callId, payload);
815        }
816      }
817      
818      void addCacheEntry(byte[] clientId, int callId) {
819        if (retryCache != null) {
820          retryCache.addCacheEntry(clientId, callId);
821        }
822      }
823    
824      @VisibleForTesting
825      static RetryCache initRetryCache(Configuration conf) {
826        boolean enable = conf.getBoolean(DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY,
827            DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT);
828        LOG.info("Retry cache on namenode is " + (enable ? "enabled" : "disabled"));
829        if (enable) {
830          float heapPercent = conf.getFloat(
831              DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY,
832              DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT);
833          long entryExpiryMillis = conf.getLong(
834              DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY,
835              DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT);
836          LOG.info("Retry cache will use " + heapPercent
837              + " of total heap and retry cache entry expiry time is "
838              + entryExpiryMillis + " millis");
839          long entryExpiryNanos = entryExpiryMillis * 1000 * 1000;
840          return new RetryCache("NameNodeRetryCache", heapPercent,
841              entryExpiryNanos);
842        }
843        return null;
844      }
845    
846      private List<AuditLogger> initAuditLoggers(Configuration conf) {
847        // Initialize the custom access loggers if configured.
848        Collection<String> alClasses = conf.getStringCollection(DFS_NAMENODE_AUDIT_LOGGERS_KEY);
849        List<AuditLogger> auditLoggers = Lists.newArrayList();
850        if (alClasses != null && !alClasses.isEmpty()) {
851          for (String className : alClasses) {
852            try {
853              AuditLogger logger;
854              if (DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME.equals(className)) {
855                logger = new DefaultAuditLogger();
856              } else {
857                logger = (AuditLogger) Class.forName(className).newInstance();
858              }
859              logger.initialize(conf);
860              auditLoggers.add(logger);
861            } catch (RuntimeException re) {
862              throw re;
863            } catch (Exception e) {
864              throw new RuntimeException(e);
865            }
866          }
867        }
868    
869        // Make sure there is at least one logger installed.
870        if (auditLoggers.isEmpty()) {
871          auditLoggers.add(new DefaultAuditLogger());
872        }
873        return Collections.unmodifiableList(auditLoggers);
874      }
875    
876      private void loadFSImage(StartupOption startOpt) throws IOException {
877        final FSImage fsImage = getFSImage();
878    
879        // format before starting up if requested
880        if (startOpt == StartupOption.FORMAT) {
881          
882          fsImage.format(this, fsImage.getStorage().determineClusterId());// reuse current id
883    
884          startOpt = StartupOption.REGULAR;
885        }
886        boolean success = false;
887        writeLock();
888        try {
889          // We shouldn't be calling saveNamespace if we've come up in standby state.
890          MetaRecoveryContext recovery = startOpt.createRecoveryContext();
891          final boolean staleImage
892              = fsImage.recoverTransitionRead(startOpt, this, recovery);
893          if (RollingUpgradeStartupOption.ROLLBACK.matches(startOpt)) {
894            rollingUpgradeInfo = null;
895          }
896          final boolean needToSave = staleImage && !haEnabled && !isRollingUpgrade(); 
897          LOG.info("Need to save fs image? " + needToSave
898              + " (staleImage=" + staleImage + ", haEnabled=" + haEnabled
899              + ", isRollingUpgrade=" + isRollingUpgrade() + ")");
900          if (needToSave) {
901            fsImage.saveNamespace(this);
902          } else {
903            // No need to save, so mark the phase done.
904            StartupProgress prog = NameNode.getStartupProgress();
905            prog.beginPhase(Phase.SAVING_CHECKPOINT);
906            prog.endPhase(Phase.SAVING_CHECKPOINT);
907          }
908          // This will start a new log segment and write to the seen_txid file, so
909          // we shouldn't do it when coming up in standby state
910          if (!haEnabled || (haEnabled && startOpt == StartupOption.UPGRADE)) {
911            fsImage.openEditLogForWrite();
912          }
913          success = true;
914        } finally {
915          if (!success) {
916            fsImage.close();
917          }
918          writeUnlock();
919        }
920        dir.imageLoadComplete();
921      }
922    
923      private void startSecretManager() {
924        if (dtSecretManager != null) {
925          try {
926            dtSecretManager.startThreads();
927          } catch (IOException e) {
928            // Inability to start secret manager
929            // can't be recovered from.
930            throw new RuntimeException(e);
931          }
932        }
933      }
934      
935      private void startSecretManagerIfNecessary() {
936        boolean shouldRun = shouldUseDelegationTokens() &&
937          !isInSafeMode() && getEditLog().isOpenForWrite();
938        boolean running = dtSecretManager.isRunning();
939        if (shouldRun && !running) {
940          startSecretManager();
941        }
942      }
943    
944      private void stopSecretManager() {
945        if (dtSecretManager != null) {
946          dtSecretManager.stopThreads();
947        }
948      }
949      
950      /** 
951       * Start services common to both active and standby states
952       * @param haContext 
953       * @throws IOException
954       */
955      void startCommonServices(Configuration conf, HAContext haContext) throws IOException {
956        this.registerMBean(); // register the MBean for the FSNamesystemState
957        writeLock();
958        this.haContext = haContext;
959        try {
960          nnResourceChecker = new NameNodeResourceChecker(conf);
961          checkAvailableResources();
962          assert safeMode != null && !isPopulatingReplQueues();
963          StartupProgress prog = NameNode.getStartupProgress();
964          prog.beginPhase(Phase.SAFEMODE);
965          prog.setTotal(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS,
966            getCompleteBlocksTotal());
967          setBlockTotal();
968          blockManager.activate(conf);
969        } finally {
970          writeUnlock();
971        }
972        
973        registerMXBean();
974        DefaultMetricsSystem.instance().register(this);
975      }
976      
977      /** 
978       * Stop services common to both active and standby states
979       * @throws IOException
980       */
981      void stopCommonServices() {
982        writeLock();
983        try {
984          if (blockManager != null) blockManager.close();
985        } finally {
986          writeUnlock();
987        }
988        RetryCache.clear(retryCache);
989      }
990      
991      /**
992       * Start services required in active state
993       * @throws IOException
994       */
995      void startActiveServices() throws IOException {
996        startingActiveService = true;
997        LOG.info("Starting services required for active state");
998        writeLock();
999        try {
1000          FSEditLog editLog = dir.fsImage.getEditLog();
1001          
1002          if (!editLog.isOpenForWrite()) {
1003            // During startup, we're already open for write during initialization.
1004            editLog.initJournalsForWrite();
1005            // May need to recover
1006            editLog.recoverUnclosedStreams();
1007            
1008            LOG.info("Catching up to latest edits from old active before " +
1009                "taking over writer role in edits logs");
1010            editLogTailer.catchupDuringFailover();
1011            
1012            blockManager.setPostponeBlocksFromFuture(false);
1013            blockManager.getDatanodeManager().markAllDatanodesStale();
1014            blockManager.clearQueues();
1015            blockManager.processAllPendingDNMessages();
1016    
1017            // Only need to re-process the queue, If not in SafeMode.
1018            if (!isInSafeMode()) {
1019              LOG.info("Reprocessing replication and invalidation queues");
1020              initializeReplQueues();
1021            }
1022    
1023            if (LOG.isDebugEnabled()) {
1024              LOG.debug("NameNode metadata after re-processing " +
1025                  "replication and invalidation queues during failover:\n" +
1026                  metaSaveAsString());
1027            }
1028            
1029            long nextTxId = dir.fsImage.getLastAppliedTxId() + 1;
1030            LOG.info("Will take over writing edit logs at txnid " + 
1031                nextTxId);
1032            editLog.setNextTxId(nextTxId);
1033    
1034            dir.fsImage.editLog.openForWrite();
1035          }
1036          
1037          if (haEnabled) {
1038            // Renew all of the leases before becoming active.
1039            // This is because, while we were in standby mode,
1040            // the leases weren't getting renewed on this NN.
1041            // Give them all a fresh start here.
1042            leaseManager.renewAllLeases();
1043          }
1044          leaseManager.startMonitor();
1045          startSecretManagerIfNecessary();
1046    
1047          //ResourceMonitor required only at ActiveNN. See HDFS-2914
1048          this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
1049          nnrmthread.start();
1050    
1051          nnEditLogRoller = new Daemon(new NameNodeEditLogRoller(
1052              editLogRollerThreshold, editLogRollerInterval));
1053          nnEditLogRoller.start();
1054    
1055          cacheManager.startMonitorThread();
1056          blockManager.getDatanodeManager().setShouldSendCachingCommands(true);
1057        } finally {
1058          writeUnlock();
1059          startingActiveService = false;
1060        }
1061      }
1062    
1063      /**
1064       * Initialize replication queues.
1065       */
1066      private void initializeReplQueues() {
1067        LOG.info("initializing replication queues");
1068        blockManager.processMisReplicatedBlocks();
1069        initializedReplQueues = true;
1070      }
1071    
1072      private boolean inActiveState() {
1073        return haContext != null &&
1074            haContext.getState().getServiceState() == HAServiceState.ACTIVE;
1075      }
1076    
1077      /**
1078       * @return Whether the namenode is transitioning to active state and is in the
1079       *         middle of the {@link #startActiveServices()}
1080       */
1081      public boolean inTransitionToActive() {
1082        return haEnabled && inActiveState() && startingActiveService;
1083      }
1084    
1085      private boolean shouldUseDelegationTokens() {
1086        return UserGroupInformation.isSecurityEnabled() ||
1087          alwaysUseDelegationTokensForTests;
1088      }
1089    
1090      /** 
1091       * Stop services required in active state
1092       * @throws InterruptedException
1093       */
1094      void stopActiveServices() {
1095        LOG.info("Stopping services started for active state");
1096        writeLock();
1097        try {
1098          stopSecretManager();
1099          if (leaseManager != null) {
1100            leaseManager.stopMonitor();
1101          }
1102          if (nnrmthread != null) {
1103            ((NameNodeResourceMonitor) nnrmthread.getRunnable()).stopMonitor();
1104            nnrmthread.interrupt();
1105          }
1106          if (nnEditLogRoller != null) {
1107            ((NameNodeEditLogRoller)nnEditLogRoller.getRunnable()).stop();
1108            nnEditLogRoller.interrupt();
1109          }
1110          if (dir != null && dir.fsImage != null) {
1111            if (dir.fsImage.editLog != null) {
1112              dir.fsImage.editLog.close();
1113            }
1114            // Update the fsimage with the last txid that we wrote
1115            // so that the tailer starts from the right spot.
1116            dir.fsImage.updateLastAppliedTxIdFromWritten();
1117          }
1118          cacheManager.stopMonitorThread();
1119          cacheManager.clearDirectiveStats();
1120          blockManager.getDatanodeManager().clearPendingCachingCommands();
1121          blockManager.getDatanodeManager().setShouldSendCachingCommands(false);
1122          // Don't want to keep replication queues when not in Active.
1123          blockManager.clearQueues();
1124          initializedReplQueues = false;
1125        } finally {
1126          writeUnlock();
1127        }
1128      }
1129      
1130      /**
1131       * Start services required in standby state 
1132       * 
1133       * @throws IOException
1134       */
1135      void startStandbyServices(final Configuration conf) throws IOException {
1136        LOG.info("Starting services required for standby state");
1137        if (!dir.fsImage.editLog.isOpenForRead()) {
1138          // During startup, we're already open for read.
1139          dir.fsImage.editLog.initSharedJournalsForRead();
1140        }
1141        
1142        blockManager.setPostponeBlocksFromFuture(true);
1143    
1144        editLogTailer = new EditLogTailer(this, conf);
1145        editLogTailer.start();
1146        if (standbyShouldCheckpoint) {
1147          standbyCheckpointer = new StandbyCheckpointer(conf, this);
1148          standbyCheckpointer.start();
1149        }
1150      }
1151    
1152      /**
1153       * Called when the NN is in Standby state and the editlog tailer tails the
1154       * OP_ROLLING_UPGRADE_START.
1155       */
1156      void triggerRollbackCheckpoint() {
1157        setNeedRollbackFsImage(true);
1158        if (standbyCheckpointer != null) {
1159          standbyCheckpointer.triggerRollbackCheckpoint();
1160        }
1161      }
1162    
1163      /**
1164       * Called while the NN is in Standby state, but just about to be
1165       * asked to enter Active state. This cancels any checkpoints
1166       * currently being taken.
1167       */
1168      void prepareToStopStandbyServices() throws ServiceFailedException {
1169        if (standbyCheckpointer != null) {
1170          standbyCheckpointer.cancelAndPreventCheckpoints(
1171              "About to leave standby state");
1172        }
1173      }
1174    
1175      /** Stop services required in standby state */
1176      void stopStandbyServices() throws IOException {
1177        LOG.info("Stopping services started for standby state");
1178        if (standbyCheckpointer != null) {
1179          standbyCheckpointer.stop();
1180        }
1181        if (editLogTailer != null) {
1182          editLogTailer.stop();
1183        }
1184        if (dir != null && dir.fsImage != null && dir.fsImage.editLog != null) {
1185          dir.fsImage.editLog.close();
1186        }
1187      }
1188      
1189      @Override
1190      public void checkOperation(OperationCategory op) throws StandbyException {
1191        if (haContext != null) {
1192          // null in some unit tests
1193          haContext.checkOperation(op);
1194        }
1195      }
1196      
1197      /**
1198       * @throws RetriableException
1199       *           If 1) The NameNode is in SafeMode, 2) HA is enabled, and 3)
1200       *           NameNode is in active state
1201       * @throws SafeModeException
1202       *           Otherwise if NameNode is in SafeMode.
1203       */
1204      private void checkNameNodeSafeMode(String errorMsg)
1205          throws RetriableException, SafeModeException {
1206        if (isInSafeMode()) {
1207          SafeModeException se = new SafeModeException(errorMsg, safeMode);
1208          if (haEnabled && haContext != null
1209              && haContext.getState().getServiceState() == HAServiceState.ACTIVE
1210              && shouldRetrySafeMode(this.safeMode)) {
1211            throw new RetriableException(se);
1212          } else {
1213            throw se;
1214          }
1215        }
1216      }
1217      
1218      /**
1219       * We already know that the safemode is on. We will throw a RetriableException
1220       * if the safemode is not manual or caused by low resource.
1221       */
1222      private boolean shouldRetrySafeMode(SafeModeInfo safeMode) {
1223        if (safeMode == null) {
1224          return false;
1225        } else {
1226          return !safeMode.isManual() && !safeMode.areResourcesLow();
1227        }
1228      }
1229      
1230      public static Collection<URI> getNamespaceDirs(Configuration conf) {
1231        return getStorageDirs(conf, DFS_NAMENODE_NAME_DIR_KEY);
1232      }
1233    
1234      /**
1235       * Get all edits dirs which are required. If any shared edits dirs are
1236       * configured, these are also included in the set of required dirs.
1237       * 
1238       * @param conf the HDFS configuration.
1239       * @return all required dirs.
1240       */
1241      public static Collection<URI> getRequiredNamespaceEditsDirs(Configuration conf) {
1242        Set<URI> ret = new HashSet<URI>();
1243        ret.addAll(getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY));
1244        ret.addAll(getSharedEditsDirs(conf));
1245        return ret;
1246      }
1247    
1248      private static Collection<URI> getStorageDirs(Configuration conf,
1249                                                    String propertyName) {
1250        Collection<String> dirNames = conf.getTrimmedStringCollection(propertyName);
1251        StartupOption startOpt = NameNode.getStartupOption(conf);
1252        if(startOpt == StartupOption.IMPORT) {
1253          // In case of IMPORT this will get rid of default directories 
1254          // but will retain directories specified in hdfs-site.xml
1255          // When importing image from a checkpoint, the name-node can
1256          // start with empty set of storage directories.
1257          Configuration cE = new HdfsConfiguration(false);
1258          cE.addResource("core-default.xml");
1259          cE.addResource("core-site.xml");
1260          cE.addResource("hdfs-default.xml");
1261          Collection<String> dirNames2 = cE.getTrimmedStringCollection(propertyName);
1262          dirNames.removeAll(dirNames2);
1263          if(dirNames.isEmpty())
1264            LOG.warn("!!! WARNING !!!" +
1265              "\n\tThe NameNode currently runs without persistent storage." +
1266              "\n\tAny changes to the file system meta-data may be lost." +
1267              "\n\tRecommended actions:" +
1268              "\n\t\t- shutdown and restart NameNode with configured \"" 
1269              + propertyName + "\" in hdfs-site.xml;" +
1270              "\n\t\t- use Backup Node as a persistent and up-to-date storage " +
1271              "of the file system meta-data.");
1272        } else if (dirNames.isEmpty()) {
1273          dirNames = Collections.singletonList(
1274              DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_DEFAULT);
1275        }
1276        return Util.stringCollectionAsURIs(dirNames);
1277      }
1278    
1279      /**
1280       * Return an ordered list of edits directories to write to.
1281       * The list is ordered such that all shared edits directories
1282       * are ordered before non-shared directories, and any duplicates
1283       * are removed. The order they are specified in the configuration
1284       * is retained.
1285       * @return Collection of shared edits directories.
1286       * @throws IOException if multiple shared edits directories are configured
1287       */
1288      public static List<URI> getNamespaceEditsDirs(Configuration conf)
1289          throws IOException {
1290        return getNamespaceEditsDirs(conf, true);
1291      }
1292      
1293      public static List<URI> getNamespaceEditsDirs(Configuration conf,
1294          boolean includeShared)
1295          throws IOException {
1296        // Use a LinkedHashSet so that order is maintained while we de-dup
1297        // the entries.
1298        LinkedHashSet<URI> editsDirs = new LinkedHashSet<URI>();
1299        
1300        if (includeShared) {
1301          List<URI> sharedDirs = getSharedEditsDirs(conf);
1302      
1303          // Fail until multiple shared edits directories are supported (HDFS-2782)
1304          if (sharedDirs.size() > 1) {
1305            throw new IOException(
1306                "Multiple shared edits directories are not yet supported");
1307          }
1308      
1309          // First add the shared edits dirs. It's critical that the shared dirs
1310          // are added first, since JournalSet syncs them in the order they are listed,
1311          // and we need to make sure all edits are in place in the shared storage
1312          // before they are replicated locally. See HDFS-2874.
1313          for (URI dir : sharedDirs) {
1314            if (!editsDirs.add(dir)) {
1315              LOG.warn("Edits URI " + dir + " listed multiple times in " + 
1316                  DFS_NAMENODE_SHARED_EDITS_DIR_KEY + ". Ignoring duplicates.");
1317            }
1318          }
1319        }    
1320        // Now add the non-shared dirs.
1321        for (URI dir : getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_KEY)) {
1322          if (!editsDirs.add(dir)) {
1323            LOG.warn("Edits URI " + dir + " listed multiple times in " + 
1324                DFS_NAMENODE_SHARED_EDITS_DIR_KEY + " and " +
1325                DFS_NAMENODE_EDITS_DIR_KEY + ". Ignoring duplicates.");
1326          }
1327        }
1328    
1329        if (editsDirs.isEmpty()) {
1330          // If this is the case, no edit dirs have been explicitly configured.
1331          // Image dirs are to be used for edits too.
1332          return Lists.newArrayList(getNamespaceDirs(conf));
1333        } else {
1334          return Lists.newArrayList(editsDirs);
1335        }
1336      }
1337      
1338      /**
1339       * Returns edit directories that are shared between primary and secondary.
1340       * @param conf
1341       * @return Collection of edit directories.
1342       */
1343      public static List<URI> getSharedEditsDirs(Configuration conf) {
1344        // don't use getStorageDirs here, because we want an empty default
1345        // rather than the dir in /tmp
1346        Collection<String> dirNames = conf.getTrimmedStringCollection(
1347            DFS_NAMENODE_SHARED_EDITS_DIR_KEY);
1348        return Util.stringCollectionAsURIs(dirNames);
1349      }
1350    
1351      @Override
1352      public void readLock() {
1353        this.fsLock.readLock().lock();
1354      }
1355      @Override
1356      public void longReadLockInterruptibly() throws InterruptedException {
1357        this.fsLock.longReadLock().lockInterruptibly();
1358        try {
1359          this.fsLock.readLock().lockInterruptibly();
1360        } catch (InterruptedException ie) {
1361          // In the event we're interrupted while getting the normal FSNS read lock,
1362          // release the long read lock.
1363          this.fsLock.longReadLock().unlock();
1364          throw ie;
1365        }
1366      }
1367      @Override
1368      public void longReadUnlock() {
1369        this.fsLock.readLock().unlock();
1370        this.fsLock.longReadLock().unlock();
1371      }
1372      @Override
1373      public void readUnlock() {
1374        this.fsLock.readLock().unlock();
1375      }
1376      @Override
1377      public void writeLock() {
1378        this.fsLock.longReadLock().lock();
1379        this.fsLock.writeLock().lock();
1380      }
1381      @Override
1382      public void writeLockInterruptibly() throws InterruptedException {
1383        this.fsLock.longReadLock().lockInterruptibly();
1384        try {
1385          this.fsLock.writeLock().lockInterruptibly();
1386        } catch (InterruptedException ie) {
1387          // In the event we're interrupted while getting the normal FSNS write
1388          // lock, release the long read lock.
1389          this.fsLock.longReadLock().unlock();
1390          throw ie;
1391        }
1392      }
1393      @Override
1394      public void writeUnlock() {
1395        this.fsLock.writeLock().unlock();
1396        this.fsLock.longReadLock().unlock();
1397      }
1398      @Override
1399      public boolean hasWriteLock() {
1400        return this.fsLock.isWriteLockedByCurrentThread();
1401      }
1402      @Override
1403      public boolean hasReadLock() {
1404        return this.fsLock.getReadHoldCount() > 0 || hasWriteLock();
1405      }
1406    
1407      public int getReadHoldCount() {
1408        return this.fsLock.getReadHoldCount();
1409      }
1410    
1411      public int getWriteHoldCount() {
1412        return this.fsLock.getWriteHoldCount();
1413      }
1414    
1415      NamespaceInfo getNamespaceInfo() {
1416        readLock();
1417        try {
1418          return unprotectedGetNamespaceInfo();
1419        } finally {
1420          readUnlock();
1421        }
1422      }
1423    
1424      /**
1425       * Version of @see #getNamespaceInfo() that is not protected by a lock.
1426       */
1427      NamespaceInfo unprotectedGetNamespaceInfo() {
1428        return new NamespaceInfo(dir.fsImage.getStorage().getNamespaceID(),
1429            getClusterId(), getBlockPoolId(),
1430            dir.fsImage.getStorage().getCTime());
1431      }
1432    
1433      /**
1434       * Close down this file system manager.
1435       * Causes heartbeat and lease daemons to stop; waits briefly for
1436       * them to finish, but a short timeout returns control back to caller.
1437       */
1438      void close() {
1439        fsRunning = false;
1440        try {
1441          stopCommonServices();
1442          if (smmthread != null) smmthread.interrupt();
1443        } finally {
1444          // using finally to ensure we also wait for lease daemon
1445          try {
1446            stopActiveServices();
1447            stopStandbyServices();
1448            if (dir != null) {
1449              dir.close();
1450            }
1451          } catch (IOException ie) {
1452            LOG.error("Error closing FSDirectory", ie);
1453            IOUtils.cleanup(LOG, dir);
1454          }
1455        }
1456      }
1457    
1458      @Override
1459      public boolean isRunning() {
1460        return fsRunning;
1461      }
1462      
1463      @Override
1464      public boolean isInStandbyState() {
1465        if (haContext == null || haContext.getState() == null) {
1466          // We're still starting up. In this case, if HA is
1467          // on for the cluster, we always start in standby. Otherwise
1468          // start in active.
1469          return haEnabled;
1470        }
1471    
1472        return HAServiceState.STANDBY == haContext.getState().getServiceState();
1473      }
1474    
1475      /**
1476       * Dump all metadata into specified file
1477       */
1478      void metaSave(String filename) throws IOException {
1479        checkSuperuserPrivilege();
1480        checkOperation(OperationCategory.UNCHECKED);
1481        writeLock();
1482        try {
1483          checkOperation(OperationCategory.UNCHECKED);
1484          File file = new File(System.getProperty("hadoop.log.dir"), filename);
1485          PrintWriter out = new PrintWriter(new BufferedWriter(
1486              new OutputStreamWriter(new FileOutputStream(file), Charsets.UTF_8)));
1487          metaSave(out);
1488          out.flush();
1489          out.close();
1490        } finally {
1491          writeUnlock();
1492        }
1493      }
1494    
1495      private void metaSave(PrintWriter out) {
1496        assert hasWriteLock();
1497        long totalInodes = this.dir.totalInodes();
1498        long totalBlocks = this.getBlocksTotal();
1499        out.println(totalInodes + " files and directories, " + totalBlocks
1500            + " blocks = " + (totalInodes + totalBlocks) + " total");
1501    
1502        blockManager.metaSave(out);
1503      }
1504    
1505      private String metaSaveAsString() {
1506        StringWriter sw = new StringWriter();
1507        PrintWriter pw = new PrintWriter(sw);
1508        metaSave(pw);
1509        pw.flush();
1510        return sw.toString();
1511      }
1512      
1513    
1514      long getDefaultBlockSize() {
1515        return serverDefaults.getBlockSize();
1516      }
1517    
1518      FsServerDefaults getServerDefaults() throws StandbyException {
1519        checkOperation(OperationCategory.READ);
1520        return serverDefaults;
1521      }
1522    
1523      long getAccessTimePrecision() {
1524        return accessTimePrecision;
1525      }
1526    
1527      private boolean isAccessTimeSupported() {
1528        return accessTimePrecision > 0;
1529      }
1530    
1531      /////////////////////////////////////////////////////////
1532      //
1533      // These methods are called by HadoopFS clients
1534      //
1535      /////////////////////////////////////////////////////////
1536      /**
1537       * Set permissions for an existing file.
1538       * @throws IOException
1539       */
1540      void setPermission(String src, FsPermission permission)
1541          throws AccessControlException, FileNotFoundException, SafeModeException,
1542          UnresolvedLinkException, IOException {
1543        try {
1544          setPermissionInt(src, permission);
1545        } catch (AccessControlException e) {
1546          logAuditEvent(false, "setPermission", src);
1547          throw e;
1548        }
1549      }
1550    
1551      private void setPermissionInt(String src, FsPermission permission)
1552          throws AccessControlException, FileNotFoundException, SafeModeException,
1553          UnresolvedLinkException, IOException {
1554        HdfsFileStatus resultingStat = null;
1555        FSPermissionChecker pc = getPermissionChecker();
1556        checkOperation(OperationCategory.WRITE);
1557        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
1558        writeLock();
1559        try {
1560          checkOperation(OperationCategory.WRITE);
1561          checkNameNodeSafeMode("Cannot set permission for " + src);
1562          src = FSDirectory.resolvePath(src, pathComponents, dir);
1563          checkOwner(pc, src);
1564          dir.setPermission(src, permission);
1565          resultingStat = getAuditFileInfo(src, false);
1566        } finally {
1567          writeUnlock();
1568        }
1569        getEditLog().logSync();
1570        logAuditEvent(true, "setPermission", src, null, resultingStat);
1571      }
1572    
1573      /**
1574       * Set owner for an existing file.
1575       * @throws IOException
1576       */
1577      void setOwner(String src, String username, String group)
1578          throws AccessControlException, FileNotFoundException, SafeModeException,
1579          UnresolvedLinkException, IOException {
1580        try {
1581          setOwnerInt(src, username, group);
1582        } catch (AccessControlException e) {
1583          logAuditEvent(false, "setOwner", src);
1584          throw e;
1585        } 
1586      }
1587    
1588      private void setOwnerInt(String src, String username, String group)
1589          throws AccessControlException, FileNotFoundException, SafeModeException,
1590          UnresolvedLinkException, IOException {
1591        HdfsFileStatus resultingStat = null;
1592        FSPermissionChecker pc = getPermissionChecker();
1593        checkOperation(OperationCategory.WRITE);
1594        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
1595        writeLock();
1596        try {
1597          checkOperation(OperationCategory.WRITE);
1598          checkNameNodeSafeMode("Cannot set owner for " + src);
1599          src = FSDirectory.resolvePath(src, pathComponents, dir);
1600          checkOwner(pc, src);
1601          if (!pc.isSuperUser()) {
1602            if (username != null && !pc.getUser().equals(username)) {
1603              throw new AccessControlException("Non-super user cannot change owner");
1604            }
1605            if (group != null && !pc.containsGroup(group)) {
1606              throw new AccessControlException("User does not belong to " + group);
1607            }
1608          }
1609          dir.setOwner(src, username, group);
1610          resultingStat = getAuditFileInfo(src, false);
1611        } finally {
1612          writeUnlock();
1613        }
1614        getEditLog().logSync();
1615        logAuditEvent(true, "setOwner", src, null, resultingStat);
1616      }
1617    
1618      /**
1619       * Get block locations within the specified range.
1620       * @see ClientProtocol#getBlockLocations(String, long, long)
1621       */
1622      LocatedBlocks getBlockLocations(String clientMachine, String src,
1623          long offset, long length) throws AccessControlException,
1624          FileNotFoundException, UnresolvedLinkException, IOException {
1625        LocatedBlocks blocks = getBlockLocations(src, offset, length, true, true,
1626            true);
1627        if (blocks != null) {
1628          blockManager.getDatanodeManager().sortLocatedBlocks(
1629              clientMachine, blocks.getLocatedBlocks());
1630          
1631          LocatedBlock lastBlock = blocks.getLastLocatedBlock();
1632          if (lastBlock != null) {
1633            ArrayList<LocatedBlock> lastBlockList = new ArrayList<LocatedBlock>();
1634            lastBlockList.add(lastBlock);
1635            blockManager.getDatanodeManager().sortLocatedBlocks(
1636                                  clientMachine, lastBlockList);
1637          }
1638        }
1639        return blocks;
1640      }
1641    
1642      /**
1643       * Get block locations within the specified range.
1644       * @see ClientProtocol#getBlockLocations(String, long, long)
1645       * @throws FileNotFoundException, UnresolvedLinkException, IOException
1646       */
1647      LocatedBlocks getBlockLocations(String src, long offset, long length,
1648          boolean doAccessTime, boolean needBlockToken, boolean checkSafeMode)
1649          throws FileNotFoundException, UnresolvedLinkException, IOException {
1650        try {
1651          return getBlockLocationsInt(src, offset, length, doAccessTime,
1652                                      needBlockToken, checkSafeMode);
1653        } catch (AccessControlException e) {
1654          logAuditEvent(false, "open", src);
1655          throw e;
1656        }
1657      }
1658    
1659      private LocatedBlocks getBlockLocationsInt(String src, long offset,
1660          long length, boolean doAccessTime, boolean needBlockToken,
1661          boolean checkSafeMode)
1662          throws FileNotFoundException, UnresolvedLinkException, IOException {
1663        if (offset < 0) {
1664          throw new HadoopIllegalArgumentException(
1665              "Negative offset is not supported. File: " + src);
1666        }
1667        if (length < 0) {
1668          throw new HadoopIllegalArgumentException(
1669              "Negative length is not supported. File: " + src);
1670        }
1671        final LocatedBlocks ret = getBlockLocationsUpdateTimes(src,
1672            offset, length, doAccessTime, needBlockToken);  
1673        logAuditEvent(true, "open", src);
1674        if (checkSafeMode && isInSafeMode()) {
1675          for (LocatedBlock b : ret.getLocatedBlocks()) {
1676            // if safemode & no block locations yet then throw safemodeException
1677            if ((b.getLocations() == null) || (b.getLocations().length == 0)) {
1678              SafeModeException se = new SafeModeException(
1679                  "Zero blocklocations for " + src, safeMode);
1680              if (haEnabled && haContext != null && 
1681                  haContext.getState().getServiceState() == HAServiceState.ACTIVE) {
1682                throw new RetriableException(se);
1683              } else {
1684                throw se;
1685              }
1686            }
1687          }
1688        }
1689        return ret;
1690      }
1691    
1692      /*
1693       * Get block locations within the specified range, updating the
1694       * access times if necessary. 
1695       */
1696      private LocatedBlocks getBlockLocationsUpdateTimes(String src, long offset,
1697          long length, boolean doAccessTime, boolean needBlockToken)
1698          throws FileNotFoundException,
1699          UnresolvedLinkException, IOException {
1700        FSPermissionChecker pc = getPermissionChecker();
1701        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
1702        for (int attempt = 0; attempt < 2; attempt++) {
1703          boolean isReadOp = (attempt == 0);
1704          if (isReadOp) { // first attempt is with readlock
1705            checkOperation(OperationCategory.READ);
1706            readLock();
1707          }  else { // second attempt is with  write lock
1708            checkOperation(OperationCategory.WRITE);
1709            writeLock(); // writelock is needed to set accesstime
1710          }
1711          src = FSDirectory.resolvePath(src, pathComponents, dir);
1712          try {
1713            if (isReadOp) {
1714              checkOperation(OperationCategory.READ);
1715            } else {
1716              checkOperation(OperationCategory.WRITE);
1717            }
1718            if (isPermissionEnabled) {
1719              checkPathAccess(pc, src, FsAction.READ);
1720            }
1721    
1722            // if the namenode is in safemode, then do not update access time
1723            if (isInSafeMode()) {
1724              doAccessTime = false;
1725            }
1726    
1727            final INodesInPath iip = dir.getLastINodeInPath(src);
1728            final INodeFile inode = INodeFile.valueOf(iip.getLastINode(), src);
1729            if (!iip.isSnapshot() //snapshots are readonly, so don't update atime.
1730                && doAccessTime && isAccessTimeSupported()) {
1731              final long now = now();
1732              if (now > inode.getAccessTime() + getAccessTimePrecision()) {
1733                // if we have to set access time but we only have the readlock, then
1734                // restart this entire operation with the writeLock.
1735                if (isReadOp) {
1736                  continue;
1737                }
1738                dir.setTimes(src, inode, -1, now, false, iip.getLatestSnapshotId());
1739              }
1740            }
1741            final long fileSize = iip.isSnapshot() ?
1742                inode.computeFileSize(iip.getPathSnapshotId())
1743                : inode.computeFileSizeNotIncludingLastUcBlock();
1744            boolean isUc = inode.isUnderConstruction();
1745            if (iip.isSnapshot()) {
1746              // if src indicates a snapshot file, we need to make sure the returned
1747              // blocks do not exceed the size of the snapshot file.
1748              length = Math.min(length, fileSize - offset);
1749              isUc = false;
1750            }
1751            LocatedBlocks blocks =
1752              blockManager.createLocatedBlocks(inode.getBlocks(), fileSize,
1753                isUc, offset, length, needBlockToken, iip.isSnapshot());
1754            // Set caching information for the located blocks.
1755            for (LocatedBlock lb: blocks.getLocatedBlocks()) {
1756              cacheManager.setCachedLocations(lb);
1757            }
1758            return blocks;
1759          } finally {
1760            if (isReadOp) {
1761              readUnlock();
1762            } else {
1763              writeUnlock();
1764            }
1765          }
1766        }
1767        return null; // can never reach here
1768      }
1769    
1770      /**
1771       * Moves all the blocks from srcs and appends them to trg
1772       * To avoid rollbacks we will verify validitity of ALL of the args
1773       * before we start actual move.
1774       * 
1775       * This does not support ".inodes" relative path
1776       * @param target
1777       * @param srcs
1778       * @throws IOException
1779       */
1780      void concat(String target, String [] srcs) 
1781          throws IOException, UnresolvedLinkException {
1782        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
1783        if (cacheEntry != null && cacheEntry.isSuccess()) {
1784          return; // Return previous response
1785        }
1786        
1787        // Either there is no previous request in progres or it has failed
1788        if(FSNamesystem.LOG.isDebugEnabled()) {
1789          FSNamesystem.LOG.debug("concat " + Arrays.toString(srcs) +
1790              " to " + target);
1791        }
1792        
1793        boolean success = false;
1794        try {
1795          concatInt(target, srcs, cacheEntry != null);
1796          success = true;
1797        } catch (AccessControlException e) {
1798          logAuditEvent(false, "concat", Arrays.toString(srcs), target, null);
1799          throw e;
1800        } finally {
1801          RetryCache.setState(cacheEntry, success);
1802        }
1803      }
1804    
1805      private void concatInt(String target, String [] srcs, 
1806          boolean logRetryCache) throws IOException, UnresolvedLinkException {
1807        // verify args
1808        if(target.isEmpty()) {
1809          throw new IllegalArgumentException("Target file name is empty");
1810        }
1811        if(srcs == null || srcs.length == 0) {
1812          throw new IllegalArgumentException("No sources given");
1813        }
1814        
1815        // We require all files be in the same directory
1816        String trgParent = 
1817          target.substring(0, target.lastIndexOf(Path.SEPARATOR_CHAR));
1818        for (String s : srcs) {
1819          String srcParent = s.substring(0, s.lastIndexOf(Path.SEPARATOR_CHAR));
1820          if (!srcParent.equals(trgParent)) {
1821            throw new IllegalArgumentException(
1822               "Sources and target are not in the same directory");
1823          }
1824        }
1825    
1826        HdfsFileStatus resultingStat = null;
1827        FSPermissionChecker pc = getPermissionChecker();
1828        checkOperation(OperationCategory.WRITE);
1829        writeLock();
1830        try {
1831          checkOperation(OperationCategory.WRITE);
1832          checkNameNodeSafeMode("Cannot concat " + target);
1833          concatInternal(pc, target, srcs, logRetryCache);
1834          resultingStat = getAuditFileInfo(target, false);
1835        } finally {
1836          writeUnlock();
1837        }
1838        getEditLog().logSync();
1839        logAuditEvent(true, "concat", Arrays.toString(srcs), target, resultingStat);
1840      }
1841    
1842      /** See {@link #concat(String, String[])} */
1843      private void concatInternal(FSPermissionChecker pc, String target,
1844          String[] srcs, boolean logRetryCache) throws IOException,
1845          UnresolvedLinkException {
1846        assert hasWriteLock();
1847    
1848        // write permission for the target
1849        if (isPermissionEnabled) {
1850          checkPathAccess(pc, target, FsAction.WRITE);
1851    
1852          // and srcs
1853          for(String aSrc: srcs) {
1854            checkPathAccess(pc, aSrc, FsAction.READ); // read the file
1855            checkParentAccess(pc, aSrc, FsAction.WRITE); // for delete 
1856          }
1857        }
1858    
1859        // to make sure no two files are the same
1860        Set<INode> si = new HashSet<INode>();
1861    
1862        // we put the following prerequisite for the operation
1863        // replication and blocks sizes should be the same for ALL the blocks
1864    
1865        // check the target
1866        final INodeFile trgInode = INodeFile.valueOf(dir.getINode4Write(target),
1867            target);
1868        if(trgInode.isUnderConstruction()) {
1869          throw new HadoopIllegalArgumentException("concat: target file "
1870              + target + " is under construction");
1871        }
1872        // per design target shouldn't be empty and all the blocks same size
1873        if(trgInode.numBlocks() == 0) {
1874          throw new HadoopIllegalArgumentException("concat: target file "
1875              + target + " is empty");
1876        }
1877        if (trgInode.isWithSnapshot()) {
1878          throw new HadoopIllegalArgumentException("concat: target file "
1879              + target + " is in a snapshot");
1880        }
1881    
1882        long blockSize = trgInode.getPreferredBlockSize();
1883    
1884        // check the end block to be full
1885        final BlockInfo last = trgInode.getLastBlock();
1886        if(blockSize != last.getNumBytes()) {
1887          throw new HadoopIllegalArgumentException("The last block in " + target
1888              + " is not full; last block size = " + last.getNumBytes()
1889              + " but file block size = " + blockSize);
1890        }
1891    
1892        si.add(trgInode);
1893        final short repl = trgInode.getFileReplication();
1894    
1895        // now check the srcs
1896        boolean endSrc = false; // final src file doesn't have to have full end block
1897        for(int i=0; i<srcs.length; i++) {
1898          String src = srcs[i];
1899          if(i==srcs.length-1)
1900            endSrc=true;
1901    
1902          final INodeFile srcInode = INodeFile.valueOf(dir.getINode4Write(src), src);
1903          if(src.isEmpty() 
1904              || srcInode.isUnderConstruction()
1905              || srcInode.numBlocks() == 0) {
1906            throw new HadoopIllegalArgumentException("concat: source file " + src
1907                + " is invalid or empty or underConstruction");
1908          }
1909    
1910          // check replication and blocks size
1911          if(repl != srcInode.getBlockReplication()) {
1912            throw new HadoopIllegalArgumentException("concat: the soruce file "
1913                + src + " and the target file " + target
1914                + " should have the same replication: source replication is "
1915                + srcInode.getBlockReplication()
1916                + " but target replication is " + repl);
1917          }
1918    
1919          //boolean endBlock=false;
1920          // verify that all the blocks are of the same length as target
1921          // should be enough to check the end blocks
1922          final BlockInfo[] srcBlocks = srcInode.getBlocks();
1923          int idx = srcBlocks.length-1;
1924          if(endSrc)
1925            idx = srcBlocks.length-2; // end block of endSrc is OK not to be full
1926          if(idx >= 0 && srcBlocks[idx].getNumBytes() != blockSize) {
1927            throw new HadoopIllegalArgumentException("concat: the soruce file "
1928                + src + " and the target file " + target
1929                + " should have the same blocks sizes: target block size is "
1930                + blockSize + " but the size of source block " + idx + " is "
1931                + srcBlocks[idx].getNumBytes());
1932          }
1933    
1934          si.add(srcInode);
1935        }
1936    
1937        // make sure no two files are the same
1938        if(si.size() < srcs.length+1) { // trg + srcs
1939          // it means at least two files are the same
1940          throw new HadoopIllegalArgumentException(
1941              "concat: at least two of the source files are the same");
1942        }
1943    
1944        if(NameNode.stateChangeLog.isDebugEnabled()) {
1945          NameNode.stateChangeLog.debug("DIR* NameSystem.concat: " + 
1946              Arrays.toString(srcs) + " to " + target);
1947        }
1948    
1949        dir.concat(target,srcs, logRetryCache);
1950      }
1951      
1952      /**
1953       * stores the modification and access time for this inode. 
1954       * The access time is precise upto an hour. The transaction, if needed, is
1955       * written to the edits log but is not flushed.
1956       */
1957      void setTimes(String src, long mtime, long atime) 
1958          throws IOException, UnresolvedLinkException {
1959        if (!isAccessTimeSupported() && atime != -1) {
1960          throw new IOException("Access time for hdfs is not configured. " +
1961                                " Please set " + DFS_NAMENODE_ACCESSTIME_PRECISION_KEY + " configuration parameter.");
1962        }
1963        try {
1964          setTimesInt(src, mtime, atime);
1965        } catch (AccessControlException e) {
1966          logAuditEvent(false, "setTimes", src);
1967          throw e;
1968        }
1969      }
1970    
1971      private void setTimesInt(String src, long mtime, long atime) 
1972        throws IOException, UnresolvedLinkException {
1973        HdfsFileStatus resultingStat = null;
1974        FSPermissionChecker pc = getPermissionChecker();
1975        checkOperation(OperationCategory.WRITE);
1976        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
1977        writeLock();
1978        try {
1979          checkOperation(OperationCategory.WRITE);
1980          checkNameNodeSafeMode("Cannot set times " + src);
1981          src = FSDirectory.resolvePath(src, pathComponents, dir);
1982    
1983          // Write access is required to set access and modification times
1984          if (isPermissionEnabled) {
1985            checkPathAccess(pc, src, FsAction.WRITE);
1986          }
1987          final INodesInPath iip = dir.getINodesInPath4Write(src);
1988          final INode inode = iip.getLastINode();
1989          if (inode != null) {
1990            dir.setTimes(src, inode, mtime, atime, true, iip.getLatestSnapshotId());
1991            resultingStat = getAuditFileInfo(src, false);
1992          } else {
1993            throw new FileNotFoundException("File/Directory " + src + " does not exist.");
1994          }
1995        } finally {
1996          writeUnlock();
1997        }
1998        logAuditEvent(true, "setTimes", src, null, resultingStat);
1999      }
2000    
2001      /**
2002       * Create a symbolic link.
2003       */
2004      @SuppressWarnings("deprecation")
2005      void createSymlink(String target, String link,
2006          PermissionStatus dirPerms, boolean createParent) 
2007          throws IOException, UnresolvedLinkException {
2008        if (!FileSystem.areSymlinksEnabled()) {
2009          throw new UnsupportedOperationException("Symlinks not supported");
2010        }
2011        if (!DFSUtil.isValidName(link)) {
2012          throw new InvalidPathException("Invalid link name: " + link);
2013        }
2014        if (FSDirectory.isReservedName(target)) {
2015          throw new InvalidPathException("Invalid target name: " + target);
2016        }
2017        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
2018        if (cacheEntry != null && cacheEntry.isSuccess()) {
2019          return; // Return previous response
2020        }
2021        boolean success = false;
2022        try {
2023          createSymlinkInt(target, link, dirPerms, createParent, cacheEntry != null);
2024          success = true;
2025        } catch (AccessControlException e) {
2026          logAuditEvent(false, "createSymlink", link, target, null);
2027          throw e;
2028        } finally {
2029          RetryCache.setState(cacheEntry, success);
2030        }
2031      }
2032    
2033      private void createSymlinkInt(String target, String link,
2034          PermissionStatus dirPerms, boolean createParent, boolean logRetryCache) 
2035          throws IOException, UnresolvedLinkException {
2036        if (NameNode.stateChangeLog.isDebugEnabled()) {
2037          NameNode.stateChangeLog.debug("DIR* NameSystem.createSymlink: target="
2038              + target + " link=" + link);
2039        }
2040        HdfsFileStatus resultingStat = null;
2041        FSPermissionChecker pc = getPermissionChecker();
2042        checkOperation(OperationCategory.WRITE);
2043        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(link);
2044        writeLock();
2045        try {
2046          checkOperation(OperationCategory.WRITE);
2047          checkNameNodeSafeMode("Cannot create symlink " + link);
2048          link = FSDirectory.resolvePath(link, pathComponents, dir);
2049          if (!createParent) {
2050            verifyParentDir(link);
2051          }
2052          if (!dir.isValidToCreate(link)) {
2053            throw new IOException("failed to create link " + link 
2054                +" either because the filename is invalid or the file exists");
2055          }
2056          if (isPermissionEnabled) {
2057            checkAncestorAccess(pc, link, FsAction.WRITE);
2058          }
2059          // validate that we have enough inodes.
2060          checkFsObjectLimit();
2061    
2062          // add symbolic link to namespace
2063          dir.addSymlink(link, target, dirPerms, createParent, logRetryCache);
2064          resultingStat = getAuditFileInfo(link, false);
2065        } finally {
2066          writeUnlock();
2067        }
2068        getEditLog().logSync();
2069        logAuditEvent(true, "createSymlink", link, target, resultingStat);
2070      }
2071    
2072      /**
2073       * Set replication for an existing file.
2074       * 
2075       * The NameNode sets new replication and schedules either replication of 
2076       * under-replicated data blocks or removal of the excessive block copies 
2077       * if the blocks are over-replicated.
2078       * 
2079       * @see ClientProtocol#setReplication(String, short)
2080       * @param src file name
2081       * @param replication new replication
2082       * @return true if successful; 
2083       *         false if file does not exist or is a directory
2084       */
2085      boolean setReplication(final String src, final short replication)
2086          throws IOException {
2087        try {
2088          return setReplicationInt(src, replication);
2089        } catch (AccessControlException e) {
2090          logAuditEvent(false, "setReplication", src);
2091          throw e;
2092        }
2093      }
2094    
2095      private boolean setReplicationInt(String src, final short replication)
2096          throws IOException {
2097        blockManager.verifyReplication(src, replication, null);
2098        final boolean isFile;
2099        FSPermissionChecker pc = getPermissionChecker();
2100        checkOperation(OperationCategory.WRITE);
2101        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2102        writeLock();
2103        try {
2104          checkOperation(OperationCategory.WRITE);
2105          checkNameNodeSafeMode("Cannot set replication for " + src);
2106          src = FSDirectory.resolvePath(src, pathComponents, dir);
2107          if (isPermissionEnabled) {
2108            checkPathAccess(pc, src, FsAction.WRITE);
2109          }
2110    
2111          final short[] blockRepls = new short[2]; // 0: old, 1: new
2112          final Block[] blocks = dir.setReplication(src, replication, blockRepls);
2113          isFile = blocks != null;
2114          if (isFile) {
2115            blockManager.setReplication(blockRepls[0], blockRepls[1], src, blocks);
2116          }
2117        } finally {
2118          writeUnlock();
2119        }
2120    
2121        getEditLog().logSync();
2122        if (isFile) {
2123          logAuditEvent(true, "setReplication", src);
2124        }
2125        return isFile;
2126      }
2127    
2128      long getPreferredBlockSize(String filename) 
2129          throws IOException, UnresolvedLinkException {
2130        FSPermissionChecker pc = getPermissionChecker();
2131        checkOperation(OperationCategory.READ);
2132        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(filename);
2133        readLock();
2134        try {
2135          checkOperation(OperationCategory.READ);
2136          filename = FSDirectory.resolvePath(filename, pathComponents, dir);
2137          if (isPermissionEnabled) {
2138            checkTraverse(pc, filename);
2139          }
2140          return dir.getPreferredBlockSize(filename);
2141        } finally {
2142          readUnlock();
2143        }
2144      }
2145    
2146      /**
2147       * Verify that parent directory of src exists.
2148       */
2149      private void verifyParentDir(String src) throws FileNotFoundException,
2150          ParentNotDirectoryException, UnresolvedLinkException {
2151        assert hasReadLock();
2152        Path parent = new Path(src).getParent();
2153        if (parent != null) {
2154          final INode parentNode = dir.getINode(parent.toString());
2155          if (parentNode == null) {
2156            throw new FileNotFoundException("Parent directory doesn't exist: "
2157                + parent);
2158          } else if (!parentNode.isDirectory() && !parentNode.isSymlink()) {
2159            throw new ParentNotDirectoryException("Parent path is not a directory: "
2160                + parent);
2161          }
2162        }
2163      }
2164      
2165      /**
2166       * Create a new file entry in the namespace.
2167       * 
2168       * For description of parameters and exceptions thrown see
2169       * {@link ClientProtocol#create()}, except it returns valid file status upon
2170       * success
2171       * 
2172       * For retryCache handling details see -
2173       * {@link #getFileStatus(boolean, CacheEntryWithPayload)}
2174       * 
2175       */
2176      HdfsFileStatus startFile(String src, PermissionStatus permissions,
2177          String holder, String clientMachine, EnumSet<CreateFlag> flag,
2178          boolean createParent, short replication, long blockSize)
2179          throws AccessControlException, SafeModeException,
2180          FileAlreadyExistsException, UnresolvedLinkException,
2181          FileNotFoundException, ParentNotDirectoryException, IOException {
2182        HdfsFileStatus status = null;
2183        CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
2184            null);
2185        if (cacheEntry != null && cacheEntry.isSuccess()) {
2186          return (HdfsFileStatus) cacheEntry.getPayload();
2187        }
2188        
2189        try {
2190          status = startFileInt(src, permissions, holder, clientMachine, flag,
2191              createParent, replication, blockSize, cacheEntry != null);
2192        } catch (AccessControlException e) {
2193          logAuditEvent(false, "create", src);
2194          throw e;
2195        } finally {
2196          RetryCache.setState(cacheEntry, status != null, status);
2197        }
2198        return status;
2199      }
2200    
2201      private HdfsFileStatus startFileInt(String src, PermissionStatus permissions,
2202          String holder, String clientMachine, EnumSet<CreateFlag> flag,
2203          boolean createParent, short replication, long blockSize,
2204          boolean logRetryCache) throws AccessControlException, SafeModeException,
2205          FileAlreadyExistsException, UnresolvedLinkException,
2206          FileNotFoundException, ParentNotDirectoryException, IOException {
2207        if (NameNode.stateChangeLog.isDebugEnabled()) {
2208          NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: src=" + src
2209              + ", holder=" + holder
2210              + ", clientMachine=" + clientMachine
2211              + ", createParent=" + createParent
2212              + ", replication=" + replication
2213              + ", createFlag=" + flag.toString());
2214        }
2215        if (!DFSUtil.isValidName(src)) {
2216          throw new InvalidPathException(src);
2217        }
2218        blockManager.verifyReplication(src, replication, clientMachine);
2219    
2220        boolean skipSync = false;
2221        HdfsFileStatus stat = null;
2222        FSPermissionChecker pc = getPermissionChecker();
2223        checkOperation(OperationCategory.WRITE);
2224        if (blockSize < minBlockSize) {
2225          throw new IOException("Specified block size is less than configured" +
2226              " minimum value (" + DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY
2227              + "): " + blockSize + " < " + minBlockSize);
2228        }
2229        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2230        boolean create = flag.contains(CreateFlag.CREATE);
2231        boolean overwrite = flag.contains(CreateFlag.OVERWRITE);
2232        writeLock();
2233        try {
2234          checkOperation(OperationCategory.WRITE);
2235          checkNameNodeSafeMode("Cannot create file" + src);
2236          src = FSDirectory.resolvePath(src, pathComponents, dir);
2237          startFileInternal(pc, src, permissions, holder, clientMachine, create,
2238              overwrite, createParent, replication, blockSize, logRetryCache);
2239          stat = dir.getFileInfo(src, false);
2240        } catch (StandbyException se) {
2241          skipSync = true;
2242          throw se;
2243        } finally {
2244          writeUnlock();
2245          // There might be transactions logged while trying to recover the lease.
2246          // They need to be sync'ed even when an exception was thrown.
2247          if (!skipSync) {
2248            getEditLog().logSync();
2249          }
2250        } 
2251        logAuditEvent(true, "create", src, null, stat);
2252        return stat;
2253      }
2254    
2255      /**
2256       * Create a new file or overwrite an existing file<br>
2257       * 
2258       * Once the file is create the client then allocates a new block with the next
2259       * call using {@link NameNode#addBlock()}.
2260       * <p>
2261       * For description of parameters and exceptions thrown see
2262       * {@link ClientProtocol#create()}
2263       */
2264      private void startFileInternal(FSPermissionChecker pc, String src,
2265          PermissionStatus permissions, String holder, String clientMachine,
2266          boolean create, boolean overwrite, boolean createParent,
2267          short replication, long blockSize, boolean logRetryEntry)
2268          throws FileAlreadyExistsException, AccessControlException,
2269          UnresolvedLinkException, FileNotFoundException,
2270          ParentNotDirectoryException, IOException {
2271        assert hasWriteLock();
2272        // Verify that the destination does not exist as a directory already.
2273        final INodesInPath iip = dir.getINodesInPath4Write(src);
2274        final INode inode = iip.getLastINode();
2275        if (inode != null && inode.isDirectory()) {
2276          throw new FileAlreadyExistsException(src +
2277              " already exists as a directory");
2278        }
2279        final INodeFile myFile = INodeFile.valueOf(inode, src, true);
2280        if (isPermissionEnabled) {
2281          if (overwrite && myFile != null) {
2282            checkPathAccess(pc, src, FsAction.WRITE);
2283          } else {
2284            checkAncestorAccess(pc, src, FsAction.WRITE);
2285          }
2286        }
2287    
2288        if (!createParent) {
2289          verifyParentDir(src);
2290        }
2291    
2292        try {
2293          if (myFile == null) {
2294            if (!create) {
2295              throw new FileNotFoundException("Can't overwrite non-existent " +
2296                  src + " for client " + clientMachine);
2297            }
2298          } else {
2299            if (overwrite) {
2300              try {
2301                deleteInt(src, true, false); // File exists - delete if overwrite
2302              } catch (AccessControlException e) {
2303                logAuditEvent(false, "delete", src);
2304                throw e;
2305              }
2306            } else {
2307              // If lease soft limit time is expired, recover the lease
2308              recoverLeaseInternal(myFile, src, holder, clientMachine, false);
2309              throw new FileAlreadyExistsException(src + " for client " +
2310                  clientMachine + " already exists");
2311            }
2312          }
2313    
2314          checkFsObjectLimit();
2315          final DatanodeDescriptor clientNode = 
2316              blockManager.getDatanodeManager().getDatanodeByHost(clientMachine);
2317    
2318          INodeFile newNode = dir.addFile(src, permissions, replication, blockSize,
2319              holder, clientMachine, clientNode);
2320          if (newNode == null) {
2321            throw new IOException("Unable to add " + src +  " to namespace");
2322          }
2323          leaseManager.addLease(newNode.getFileUnderConstructionFeature()
2324              .getClientName(), src);
2325    
2326          // record file record in log, record new generation stamp
2327          getEditLog().logOpenFile(src, newNode, logRetryEntry);
2328          if (NameNode.stateChangeLog.isDebugEnabled()) {
2329            NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: added " +
2330                src + " inode " + newNode.getId() + " " + holder);
2331          }
2332        } catch (IOException ie) {
2333          NameNode.stateChangeLog.warn("DIR* NameSystem.startFile: " + src + " " +
2334              ie.getMessage());
2335          throw ie;
2336        }
2337      }
2338      
2339      /**
2340       * Append to an existing file for append.
2341       * <p>
2342       * 
2343       * The method returns the last block of the file if this is a partial block,
2344       * which can still be used for writing more data. The client uses the returned
2345       * block locations to form the data pipeline for this block.<br>
2346       * The method returns null if the last block is full. The client then
2347       * allocates a new block with the next call using {@link NameNode#addBlock()}.
2348       * <p>
2349       * 
2350       * For description of parameters and exceptions thrown see
2351       * {@link ClientProtocol#append(String, String)}
2352       * 
2353       * @return the last block locations if the block is partial or null otherwise
2354       */
2355      private LocatedBlock appendFileInternal(FSPermissionChecker pc, String src,
2356          String holder, String clientMachine, boolean logRetryCache)
2357          throws AccessControlException, UnresolvedLinkException,
2358          FileNotFoundException, IOException {
2359        assert hasWriteLock();
2360        // Verify that the destination does not exist as a directory already.
2361        final INodesInPath iip = dir.getINodesInPath4Write(src);
2362        final INode inode = iip.getLastINode();
2363        if (inode != null && inode.isDirectory()) {
2364          throw new FileAlreadyExistsException("Cannot append to directory " + src
2365              + "; already exists as a directory.");
2366        }
2367        if (isPermissionEnabled) {
2368          checkPathAccess(pc, src, FsAction.WRITE);
2369        }
2370    
2371        try {
2372          if (inode == null) {
2373            throw new FileNotFoundException("failed to append to non-existent file "
2374              + src + " for client " + clientMachine);
2375          }
2376          INodeFile myFile = INodeFile.valueOf(inode, src, true);
2377          // Opening an existing file for write - may need to recover lease.
2378          recoverLeaseInternal(myFile, src, holder, clientMachine, false);
2379          
2380          // recoverLeaseInternal may create a new InodeFile via 
2381          // finalizeINodeFileUnderConstruction so we need to refresh 
2382          // the referenced file.  
2383          myFile = INodeFile.valueOf(dir.getINode(src), src, true);
2384          final BlockInfo lastBlock = myFile.getLastBlock();
2385          // Check that the block has at least minimum replication.
2386          if(lastBlock != null && lastBlock.isComplete() &&
2387              !getBlockManager().isSufficientlyReplicated(lastBlock)) {
2388            throw new IOException("append: lastBlock=" + lastBlock +
2389                " of src=" + src + " is not sufficiently replicated yet.");
2390          }
2391          final DatanodeDescriptor clientNode = 
2392              blockManager.getDatanodeManager().getDatanodeByHost(clientMachine);
2393          return prepareFileForWrite(src, myFile, holder, clientMachine, clientNode,
2394              true, iip.getLatestSnapshotId(), logRetryCache);
2395        } catch (IOException ie) {
2396          NameNode.stateChangeLog.warn("DIR* NameSystem.append: " +ie.getMessage());
2397          throw ie;
2398        }
2399      }
2400      
2401      /**
2402       * Replace current node with a INodeUnderConstruction.
2403       * Recreate in-memory lease record.
2404       * 
2405       * @param src path to the file
2406       * @param file existing file object
2407       * @param leaseHolder identifier of the lease holder on this file
2408       * @param clientMachine identifier of the client machine
2409       * @param clientNode if the client is collocated with a DN, that DN's descriptor
2410       * @param writeToEditLog whether to persist this change to the edit log
2411       * @param logRetryCache whether to record RPC ids in editlog for retry cache
2412       *                      rebuilding
2413       * @return the last block locations if the block is partial or null otherwise
2414       * @throws UnresolvedLinkException
2415       * @throws IOException
2416       */
2417      LocatedBlock prepareFileForWrite(String src, INodeFile file,
2418          String leaseHolder, String clientMachine, DatanodeDescriptor clientNode,
2419          boolean writeToEditLog, int latestSnapshot, boolean logRetryCache)
2420          throws IOException {
2421        file = file.recordModification(latestSnapshot);
2422        final INodeFile cons = file.toUnderConstruction(leaseHolder, clientMachine,
2423            clientNode);
2424    
2425        leaseManager.addLease(cons.getFileUnderConstructionFeature()
2426            .getClientName(), src);
2427        
2428        LocatedBlock ret = blockManager.convertLastBlockToUnderConstruction(cons);
2429        if (writeToEditLog) {
2430          getEditLog().logOpenFile(src, cons, logRetryCache);
2431        }
2432        return ret;
2433      }
2434    
2435      /**
2436       * Recover lease;
2437       * Immediately revoke the lease of the current lease holder and start lease
2438       * recovery so that the file can be forced to be closed.
2439       * 
2440       * @param src the path of the file to start lease recovery
2441       * @param holder the lease holder's name
2442       * @param clientMachine the client machine's name
2443       * @return true if the file is already closed
2444       * @throws IOException
2445       */
2446      boolean recoverLease(String src, String holder, String clientMachine)
2447          throws IOException {
2448        if (!DFSUtil.isValidName(src)) {
2449          throw new IOException("Invalid file name: " + src);
2450        }
2451      
2452        boolean skipSync = false;
2453        FSPermissionChecker pc = getPermissionChecker();
2454        checkOperation(OperationCategory.WRITE);
2455        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2456        writeLock();
2457        try {
2458          checkOperation(OperationCategory.WRITE);
2459          checkNameNodeSafeMode("Cannot recover the lease of " + src);
2460          src = FSDirectory.resolvePath(src, pathComponents, dir);
2461          final INodeFile inode = INodeFile.valueOf(dir.getINode(src), src);
2462          if (!inode.isUnderConstruction()) {
2463            return true;
2464          }
2465          if (isPermissionEnabled) {
2466            checkPathAccess(pc, src, FsAction.WRITE);
2467          }
2468      
2469          recoverLeaseInternal(inode, src, holder, clientMachine, true);
2470        } catch (StandbyException se) {
2471          skipSync = true;
2472          throw se;
2473        } finally {
2474          writeUnlock();
2475          // There might be transactions logged while trying to recover the lease.
2476          // They need to be sync'ed even when an exception was thrown.
2477          if (!skipSync) {
2478            getEditLog().logSync();
2479          }
2480        }
2481        return false;
2482      }
2483    
2484      private void recoverLeaseInternal(INodeFile fileInode, 
2485          String src, String holder, String clientMachine, boolean force)
2486          throws IOException {
2487        assert hasWriteLock();
2488        if (fileInode != null && fileInode.isUnderConstruction()) {
2489          //
2490          // If the file is under construction , then it must be in our
2491          // leases. Find the appropriate lease record.
2492          //
2493          Lease lease = leaseManager.getLease(holder);
2494          //
2495          // We found the lease for this file. And surprisingly the original
2496          // holder is trying to recreate this file. This should never occur.
2497          //
2498          if (!force && lease != null) {
2499            Lease leaseFile = leaseManager.getLeaseByPath(src);
2500            if ((leaseFile != null && leaseFile.equals(lease)) ||
2501                lease.getHolder().equals(holder)) { 
2502              throw new AlreadyBeingCreatedException(
2503                "failed to create file " + src + " for " + holder +
2504                " for client " + clientMachine +
2505                " because current leaseholder is trying to recreate file.");
2506            }
2507          }
2508          //
2509          // Find the original holder.
2510          //
2511          FileUnderConstructionFeature uc = fileInode.getFileUnderConstructionFeature();
2512          String clientName = uc.getClientName();
2513          lease = leaseManager.getLease(clientName);
2514          if (lease == null) {
2515            throw new AlreadyBeingCreatedException(
2516              "failed to create file " + src + " for " + holder +
2517              " for client " + clientMachine +
2518              " because pendingCreates is non-null but no leases found.");
2519          }
2520          if (force) {
2521            // close now: no need to wait for soft lease expiration and 
2522            // close only the file src
2523            LOG.info("recoverLease: " + lease + ", src=" + src +
2524              " from client " + clientName);
2525            internalReleaseLease(lease, src, holder);
2526          } else {
2527            assert lease.getHolder().equals(clientName) :
2528              "Current lease holder " + lease.getHolder() +
2529              " does not match file creator " + clientName;
2530            //
2531            // If the original holder has not renewed in the last SOFTLIMIT 
2532            // period, then start lease recovery.
2533            //
2534            if (lease.expiredSoftLimit()) {
2535              LOG.info("startFile: recover " + lease + ", src=" + src + " client "
2536                  + clientName);
2537              boolean isClosed = internalReleaseLease(lease, src, null);
2538              if(!isClosed)
2539                throw new RecoveryInProgressException(
2540                    "Failed to close file " + src +
2541                    ". Lease recovery is in progress. Try again later.");
2542            } else {
2543              final BlockInfo lastBlock = fileInode.getLastBlock();
2544              if (lastBlock != null
2545                  && lastBlock.getBlockUCState() == BlockUCState.UNDER_RECOVERY) {
2546                throw new RecoveryInProgressException("Recovery in progress, file ["
2547                    + src + "], " + "lease owner [" + lease.getHolder() + "]");
2548              } else {
2549                throw new AlreadyBeingCreatedException("Failed to create file ["
2550                    + src + "] for [" + holder + "] for client [" + clientMachine
2551                    + "], because this file is already being created by ["
2552                    + clientName + "] on ["
2553                    + uc.getClientMachine() + "]");
2554              }
2555            }
2556          }
2557        }
2558      }
2559    
2560      /**
2561       * Append to an existing file in the namespace.
2562       */
2563      LocatedBlock appendFile(String src, String holder, String clientMachine)
2564          throws AccessControlException, SafeModeException,
2565          FileAlreadyExistsException, FileNotFoundException,
2566          ParentNotDirectoryException, IOException {
2567        LocatedBlock lb = null;
2568        CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
2569            null);
2570        if (cacheEntry != null && cacheEntry.isSuccess()) {
2571          return (LocatedBlock) cacheEntry.getPayload();
2572        }
2573          
2574        boolean success = false;
2575        try {
2576          lb = appendFileInt(src, holder, clientMachine, cacheEntry != null);
2577          success = true;
2578          return lb;
2579        } catch (AccessControlException e) {
2580          logAuditEvent(false, "append", src);
2581          throw e;
2582        } finally {
2583          RetryCache.setState(cacheEntry, success, lb);
2584        }
2585      }
2586    
2587      private LocatedBlock appendFileInt(String src, String holder,
2588          String clientMachine, boolean logRetryCache)
2589          throws AccessControlException, SafeModeException,
2590          FileAlreadyExistsException, FileNotFoundException,
2591          ParentNotDirectoryException, IOException {
2592        if (NameNode.stateChangeLog.isDebugEnabled()) {
2593          NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: src=" + src
2594              + ", holder=" + holder
2595              + ", clientMachine=" + clientMachine);
2596        }
2597        boolean skipSync = false;
2598        if (!supportAppends) {
2599          throw new UnsupportedOperationException(
2600              "Append is not enabled on this NameNode. Use the " +
2601              DFS_SUPPORT_APPEND_KEY + " configuration option to enable it.");
2602        }
2603    
2604        LocatedBlock lb = null;
2605        FSPermissionChecker pc = getPermissionChecker();
2606        checkOperation(OperationCategory.WRITE);
2607        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2608        writeLock();
2609        try {
2610          checkOperation(OperationCategory.WRITE);
2611          checkNameNodeSafeMode("Cannot append to file" + src);
2612          src = FSDirectory.resolvePath(src, pathComponents, dir);
2613          lb = appendFileInternal(pc, src, holder, clientMachine, logRetryCache);
2614        } catch (StandbyException se) {
2615          skipSync = true;
2616          throw se;
2617        } finally {
2618          writeUnlock();
2619          // There might be transactions logged while trying to recover the lease.
2620          // They need to be sync'ed even when an exception was thrown.
2621          if (!skipSync) {
2622            getEditLog().logSync();
2623          }
2624        }
2625        if (lb != null) {
2626          if (NameNode.stateChangeLog.isDebugEnabled()) {
2627            NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: file "
2628                +src+" for "+holder+" at "+clientMachine
2629                +" block " + lb.getBlock()
2630                +" block size " + lb.getBlock().getNumBytes());
2631          }
2632        }
2633        logAuditEvent(true, "append", src);
2634        return lb;
2635      }
2636    
2637      ExtendedBlock getExtendedBlock(Block blk) {
2638        return new ExtendedBlock(blockPoolId, blk);
2639      }
2640      
2641      void setBlockPoolId(String bpid) {
2642        blockPoolId = bpid;
2643        blockManager.setBlockPoolId(blockPoolId);
2644      }
2645    
2646      /**
2647       * The client would like to obtain an additional block for the indicated
2648       * filename (which is being written-to).  Return an array that consists
2649       * of the block, plus a set of machines.  The first on this list should
2650       * be where the client writes data.  Subsequent items in the list must
2651       * be provided in the connection to the first datanode.
2652       *
2653       * Make sure the previous blocks have been reported by datanodes and
2654       * are replicated.  Will return an empty 2-elt array if we want the
2655       * client to "try again later".
2656       */
2657      LocatedBlock getAdditionalBlock(String src, long fileId, String clientName,
2658          ExtendedBlock previous, Set<Node> excludedNodes, 
2659          List<String> favoredNodes)
2660          throws LeaseExpiredException, NotReplicatedYetException,
2661          QuotaExceededException, SafeModeException, UnresolvedLinkException,
2662          IOException {
2663        long blockSize;
2664        int replication;
2665        DatanodeDescriptor clientNode = null;
2666    
2667        if(NameNode.stateChangeLog.isDebugEnabled()) {
2668          NameNode.stateChangeLog.debug("BLOCK* NameSystem.getAdditionalBlock: "
2669              + src + " inodeId " +  fileId  + " for " + clientName);
2670        }
2671    
2672        // Part I. Analyze the state of the file with respect to the input data.
2673        checkOperation(OperationCategory.READ);
2674        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2675        readLock();
2676        try {
2677          checkOperation(OperationCategory.READ);
2678          src = FSDirectory.resolvePath(src, pathComponents, dir);
2679          LocatedBlock[] onRetryBlock = new LocatedBlock[1];
2680          final INode[] inodes = analyzeFileState(
2681              src, fileId, clientName, previous, onRetryBlock).getINodes();
2682          final INodeFile pendingFile = inodes[inodes.length - 1].asFile();
2683    
2684          if (onRetryBlock[0] != null && onRetryBlock[0].getLocations().length > 0) {
2685            // This is a retry. Just return the last block if having locations.
2686            return onRetryBlock[0];
2687          }
2688          if (pendingFile.getBlocks().length >= maxBlocksPerFile) {
2689            throw new IOException("File has reached the limit on maximum number of"
2690                + " blocks (" + DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_KEY
2691                + "): " + pendingFile.getBlocks().length + " >= "
2692                + maxBlocksPerFile);
2693          }
2694          blockSize = pendingFile.getPreferredBlockSize();
2695          clientNode = pendingFile.getFileUnderConstructionFeature().getClientNode();
2696          replication = pendingFile.getFileReplication();
2697        } finally {
2698          readUnlock();
2699        }
2700    
2701        // choose targets for the new block to be allocated.
2702        final DatanodeStorageInfo targets[] = getBlockManager().chooseTarget( 
2703            src, replication, clientNode, excludedNodes, blockSize, favoredNodes);
2704    
2705        // Part II.
2706        // Allocate a new block, add it to the INode and the BlocksMap. 
2707        Block newBlock = null;
2708        long offset;
2709        checkOperation(OperationCategory.WRITE);
2710        writeLock();
2711        try {
2712          checkOperation(OperationCategory.WRITE);
2713          // Run the full analysis again, since things could have changed
2714          // while chooseTarget() was executing.
2715          LocatedBlock[] onRetryBlock = new LocatedBlock[1];
2716          INodesInPath inodesInPath =
2717              analyzeFileState(src, fileId, clientName, previous, onRetryBlock);
2718          final INode[] inodes = inodesInPath.getINodes();
2719          final INodeFile pendingFile = inodes[inodes.length - 1].asFile();
2720    
2721          if (onRetryBlock[0] != null) {
2722            if (onRetryBlock[0].getLocations().length > 0) {
2723              // This is a retry. Just return the last block if having locations.
2724              return onRetryBlock[0];
2725            } else {
2726              // add new chosen targets to already allocated block and return
2727              BlockInfo lastBlockInFile = pendingFile.getLastBlock();
2728              ((BlockInfoUnderConstruction) lastBlockInFile)
2729                  .setExpectedLocations(targets);
2730              offset = pendingFile.computeFileSize();
2731              return makeLocatedBlock(lastBlockInFile, targets, offset);
2732            }
2733          }
2734    
2735          // commit the last block and complete it if it has minimum replicas
2736          commitOrCompleteLastBlock(pendingFile,
2737                                    ExtendedBlock.getLocalBlock(previous));
2738    
2739          // allocate new block, record block locations in INode.
2740          newBlock = createNewBlock();
2741          saveAllocatedBlock(src, inodesInPath, newBlock, targets);
2742    
2743          dir.persistNewBlock(src, pendingFile);
2744          offset = pendingFile.computeFileSize();
2745        } finally {
2746          writeUnlock();
2747        }
2748        getEditLog().logSync();
2749    
2750        // Return located block
2751        return makeLocatedBlock(newBlock, targets, offset);
2752      }
2753    
2754      INodesInPath analyzeFileState(String src,
2755                                    long fileId,
2756                                    String clientName,
2757                                    ExtendedBlock previous,
2758                                    LocatedBlock[] onRetryBlock)
2759              throws IOException  {
2760        assert hasReadLock();
2761    
2762        checkBlock(previous);
2763        onRetryBlock[0] = null;
2764        checkOperation(OperationCategory.WRITE);
2765        checkNameNodeSafeMode("Cannot add block to " + src);
2766    
2767        // have we exceeded the configured limit of fs objects.
2768        checkFsObjectLimit();
2769    
2770        Block previousBlock = ExtendedBlock.getLocalBlock(previous);
2771        final INodesInPath iip = dir.getINodesInPath4Write(src);
2772        final INodeFile pendingFile
2773            = checkLease(src, fileId, clientName, iip.getLastINode());
2774        BlockInfo lastBlockInFile = pendingFile.getLastBlock();
2775        if (!Block.matchingIdAndGenStamp(previousBlock, lastBlockInFile)) {
2776          // The block that the client claims is the current last block
2777          // doesn't match up with what we think is the last block. There are
2778          // four possibilities:
2779          // 1) This is the first block allocation of an append() pipeline
2780          //    which started appending exactly at a block boundary.
2781          //    In this case, the client isn't passed the previous block,
2782          //    so it makes the allocateBlock() call with previous=null.
2783          //    We can distinguish this since the last block of the file
2784          //    will be exactly a full block.
2785          // 2) This is a retry from a client that missed the response of a
2786          //    prior getAdditionalBlock() call, perhaps because of a network
2787          //    timeout, or because of an HA failover. In that case, we know
2788          //    by the fact that the client is re-issuing the RPC that it
2789          //    never began to write to the old block. Hence it is safe to
2790          //    to return the existing block.
2791          // 3) This is an entirely bogus request/bug -- we should error out
2792          //    rather than potentially appending a new block with an empty
2793          //    one in the middle, etc
2794          // 4) This is a retry from a client that timed out while
2795          //    the prior getAdditionalBlock() is still being processed,
2796          //    currently working on chooseTarget(). 
2797          //    There are no means to distinguish between the first and 
2798          //    the second attempts in Part I, because the first one hasn't
2799          //    changed the namesystem state yet.
2800          //    We run this analysis again in Part II where case 4 is impossible.
2801    
2802          BlockInfo penultimateBlock = pendingFile.getPenultimateBlock();
2803          if (previous == null &&
2804              lastBlockInFile != null &&
2805              lastBlockInFile.getNumBytes() == pendingFile.getPreferredBlockSize() &&
2806              lastBlockInFile.isComplete()) {
2807            // Case 1
2808            if (NameNode.stateChangeLog.isDebugEnabled()) {
2809               NameNode.stateChangeLog.debug(
2810                   "BLOCK* NameSystem.allocateBlock: handling block allocation" +
2811                   " writing to a file with a complete previous block: src=" +
2812                   src + " lastBlock=" + lastBlockInFile);
2813            }
2814          } else if (Block.matchingIdAndGenStamp(penultimateBlock, previousBlock)) {
2815            if (lastBlockInFile.getNumBytes() != 0) {
2816              throw new IOException(
2817                  "Request looked like a retry to allocate block " +
2818                  lastBlockInFile + " but it already contains " +
2819                  lastBlockInFile.getNumBytes() + " bytes");
2820            }
2821    
2822            // Case 2
2823            // Return the last block.
2824            NameNode.stateChangeLog.info("BLOCK* allocateBlock: " +
2825                "caught retry for allocation of a new block in " +
2826                src + ". Returning previously allocated block " + lastBlockInFile);
2827            long offset = pendingFile.computeFileSize();
2828            onRetryBlock[0] = makeLocatedBlock(lastBlockInFile,
2829                ((BlockInfoUnderConstruction)lastBlockInFile).getExpectedStorageLocations(),
2830                offset);
2831            return iip;
2832          } else {
2833            // Case 3
2834            throw new IOException("Cannot allocate block in " + src + ": " +
2835                "passed 'previous' block " + previous + " does not match actual " +
2836                "last block in file " + lastBlockInFile);
2837          }
2838        }
2839    
2840        // Check if the penultimate block is minimally replicated
2841        if (!checkFileProgress(pendingFile, false)) {
2842          throw new NotReplicatedYetException("Not replicated yet: " + src);
2843        }
2844        return iip;
2845      }
2846    
2847      LocatedBlock makeLocatedBlock(Block blk, DatanodeStorageInfo[] locs,
2848                                            long offset) throws IOException {
2849        LocatedBlock lBlk = new LocatedBlock(
2850            getExtendedBlock(blk), locs, offset, false);
2851        getBlockManager().setBlockToken(
2852            lBlk, BlockTokenSecretManager.AccessMode.WRITE);
2853        return lBlk;
2854      }
2855    
2856      /** @see NameNode#getAdditionalDatanode(String, ExtendedBlock, DatanodeInfo[], DatanodeInfo[], int, String) */
2857      LocatedBlock getAdditionalDatanode(String src, final ExtendedBlock blk,
2858          final DatanodeInfo[] existings, final String[] storageIDs,
2859          final Set<Node> excludes,
2860          final int numAdditionalNodes, final String clientName
2861          ) throws IOException {
2862        //check if the feature is enabled
2863        dtpReplaceDatanodeOnFailure.checkEnabled();
2864    
2865        final DatanodeDescriptor clientnode;
2866        final long preferredblocksize;
2867        final List<DatanodeStorageInfo> chosen;
2868        checkOperation(OperationCategory.READ);
2869        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2870        readLock();
2871        try {
2872          checkOperation(OperationCategory.READ);
2873          //check safe mode
2874          checkNameNodeSafeMode("Cannot add datanode; src=" + src + ", blk=" + blk);
2875          src = FSDirectory.resolvePath(src, pathComponents, dir);
2876    
2877          //check lease
2878          final INodeFile file = checkLease(src, clientName);
2879          clientnode = file.getFileUnderConstructionFeature().getClientNode();
2880          preferredblocksize = file.getPreferredBlockSize();
2881    
2882          //find datanode storages
2883          final DatanodeManager dm = blockManager.getDatanodeManager();
2884          chosen = Arrays.asList(dm.getDatanodeStorageInfos(existings, storageIDs));
2885        } finally {
2886          readUnlock();
2887        }
2888    
2889        // choose new datanodes.
2890        final DatanodeStorageInfo[] targets = blockManager.getBlockPlacementPolicy(
2891            ).chooseTarget(src, numAdditionalNodes, clientnode, chosen, true,
2892                // TODO: get storage type from the file
2893            excludes, preferredblocksize, StorageType.DEFAULT);
2894        final LocatedBlock lb = new LocatedBlock(blk, targets);
2895        blockManager.setBlockToken(lb, AccessMode.COPY);
2896        return lb;
2897      }
2898    
2899      /**
2900       * The client would like to let go of the given block
2901       */
2902      boolean abandonBlock(ExtendedBlock b, String src, String holder)
2903          throws LeaseExpiredException, FileNotFoundException,
2904          UnresolvedLinkException, IOException {
2905        if(NameNode.stateChangeLog.isDebugEnabled()) {
2906          NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: " + b
2907              + "of file " + src);
2908        }
2909        checkOperation(OperationCategory.WRITE);
2910        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2911        writeLock();
2912        try {
2913          checkOperation(OperationCategory.WRITE);
2914          checkNameNodeSafeMode("Cannot abandon block " + b + " for fle" + src);
2915          src = FSDirectory.resolvePath(src, pathComponents, dir);
2916    
2917          //
2918          // Remove the block from the pending creates list
2919          //
2920          INodeFile file = checkLease(src, holder);
2921          boolean removed = dir.removeBlock(src, file,
2922              ExtendedBlock.getLocalBlock(b));
2923          if (!removed) {
2924            return true;
2925          }
2926          if(NameNode.stateChangeLog.isDebugEnabled()) {
2927            NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: "
2928                                          + b + " is removed from pendingCreates");
2929          }
2930          dir.persistBlocks(src, file, false);
2931        } finally {
2932          writeUnlock();
2933        }
2934        getEditLog().logSync();
2935    
2936        return true;
2937      }
2938      
2939      /** make sure that we still have the lease on this file. */
2940      private INodeFile checkLease(String src, String holder)
2941          throws LeaseExpiredException, UnresolvedLinkException,
2942          FileNotFoundException {
2943        return checkLease(src, INodeId.GRANDFATHER_INODE_ID, holder,
2944            dir.getINode(src));
2945      }
2946      
2947      private INodeFile checkLease(String src, long fileId, String holder,
2948          INode inode) throws LeaseExpiredException, FileNotFoundException {
2949        assert hasReadLock();
2950        if (inode == null || !inode.isFile()) {
2951          Lease lease = leaseManager.getLease(holder);
2952          throw new LeaseExpiredException(
2953              "No lease on " + src + ": File does not exist. "
2954              + (lease != null ? lease.toString()
2955                  : "Holder " + holder + " does not have any open files."));
2956        }
2957        final INodeFile file = inode.asFile();
2958        if (!file.isUnderConstruction()) {
2959          Lease lease = leaseManager.getLease(holder);
2960          throw new LeaseExpiredException(
2961              "No lease on " + src + ": File is not open for writing. "
2962              + (lease != null ? lease.toString()
2963                  : "Holder " + holder + " does not have any open files."));
2964        }
2965        // No further modification is allowed on a deleted file.
2966        // A file is considered deleted, if it has no parent or is marked
2967        // as deleted in the snapshot feature.
2968        if (file.getParent() == null || (file.isWithSnapshot() &&
2969            file.getFileWithSnapshotFeature().isCurrentFileDeleted())) {
2970          throw new FileNotFoundException(src);
2971        }
2972        String clientName = file.getFileUnderConstructionFeature().getClientName();
2973        if (holder != null && !clientName.equals(holder)) {
2974          throw new LeaseExpiredException("Lease mismatch on " + src + " owned by "
2975              + clientName + " but is accessed by " + holder);
2976        }
2977        INodeId.checkId(fileId, file);
2978        return file;
2979      }
2980     
2981      /**
2982       * Complete in-progress write to the given file.
2983       * @return true if successful, false if the client should continue to retry
2984       *         (e.g if not all blocks have reached minimum replication yet)
2985       * @throws IOException on error (eg lease mismatch, file not open, file deleted)
2986       */
2987      boolean completeFile(String src, String holder,
2988                           ExtendedBlock last, long fileId)
2989        throws SafeModeException, UnresolvedLinkException, IOException {
2990        if (NameNode.stateChangeLog.isDebugEnabled()) {
2991          NameNode.stateChangeLog.debug("DIR* NameSystem.completeFile: " +
2992              src + " for " + holder);
2993        }
2994        checkBlock(last);
2995        boolean success = false;
2996        checkOperation(OperationCategory.WRITE);
2997        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2998        writeLock();
2999        try {
3000          checkOperation(OperationCategory.WRITE);
3001          checkNameNodeSafeMode("Cannot complete file " + src);
3002          src = FSDirectory.resolvePath(src, pathComponents, dir);
3003          success = completeFileInternal(src, holder,
3004            ExtendedBlock.getLocalBlock(last), fileId);
3005        } finally {
3006          writeUnlock();
3007        }
3008        getEditLog().logSync();
3009        if (success) {
3010          NameNode.stateChangeLog.info("DIR* completeFile: " + src
3011              + " is closed by " + holder);
3012        }
3013        return success;
3014      }
3015    
3016      private boolean completeFileInternal(String src, 
3017          String holder, Block last, long fileId) throws SafeModeException,
3018          UnresolvedLinkException, IOException {
3019        assert hasWriteLock();
3020        final INodesInPath iip = dir.getLastINodeInPath(src);
3021        final INodeFile pendingFile;
3022        try {
3023          pendingFile = checkLease(src, fileId, holder, iip.getINode(0));
3024        } catch (LeaseExpiredException lee) {
3025          final INode inode = dir.getINode(src);
3026          if (inode != null
3027              && inode.isFile()
3028              && !inode.asFile().isUnderConstruction()) {
3029            // This could be a retry RPC - i.e the client tried to close
3030            // the file, but missed the RPC response. Thus, it is trying
3031            // again to close the file. If the file still exists and
3032            // the client's view of the last block matches the actual
3033            // last block, then we'll treat it as a successful close.
3034            // See HDFS-3031.
3035            final Block realLastBlock = inode.asFile().getLastBlock();
3036            if (Block.matchingIdAndGenStamp(last, realLastBlock)) {
3037              NameNode.stateChangeLog.info("DIR* completeFile: " +
3038                  "request from " + holder + " to complete " + src +
3039                  " which is already closed. But, it appears to be an RPC " +
3040                  "retry. Returning success");
3041              return true;
3042            }
3043          }
3044          throw lee;
3045        }
3046        // Check the state of the penultimate block. It should be completed
3047        // before attempting to complete the last one.
3048        if (!checkFileProgress(pendingFile, false)) {
3049          return false;
3050        }
3051    
3052        // commit the last block and complete it if it has minimum replicas
3053        commitOrCompleteLastBlock(pendingFile, last);
3054    
3055        if (!checkFileProgress(pendingFile, true)) {
3056          return false;
3057        }
3058    
3059        finalizeINodeFileUnderConstruction(src, pendingFile,
3060            iip.getLatestSnapshotId());
3061        return true;
3062      }
3063    
3064      /**
3065       * Save allocated block at the given pending filename
3066       * 
3067       * @param src path to the file
3068       * @param inodesInPath representing each of the components of src. 
3069       *                     The last INode is the INode for the file.
3070       * @throws QuotaExceededException If addition of block exceeds space quota
3071       */
3072      BlockInfo saveAllocatedBlock(String src, INodesInPath inodes,
3073          Block newBlock, DatanodeStorageInfo[] targets)
3074              throws IOException {
3075        assert hasWriteLock();
3076        BlockInfo b = dir.addBlock(src, inodes, newBlock, targets);
3077        NameNode.stateChangeLog.info("BLOCK* allocateBlock: " + src + ". "
3078            + getBlockPoolId() + " " + b);
3079        DatanodeStorageInfo.incrementBlocksScheduled(targets);
3080        return b;
3081      }
3082    
3083      /**
3084       * Create new block with a unique block id and a new generation stamp.
3085       */
3086      Block createNewBlock() throws IOException {
3087        assert hasWriteLock();
3088        Block b = new Block(nextBlockId(), 0, 0);
3089        // Increment the generation stamp for every new block.
3090        b.setGenerationStamp(nextGenerationStamp(false));
3091        return b;
3092      }
3093    
3094      /**
3095       * Check that the indicated file's blocks are present and
3096       * replicated.  If not, return false. If checkall is true, then check
3097       * all blocks, otherwise check only penultimate block.
3098       */
3099      boolean checkFileProgress(INodeFile v, boolean checkall) {
3100        readLock();
3101        try {
3102          if (checkall) {
3103            //
3104            // check all blocks of the file.
3105            //
3106            for (BlockInfo block: v.getBlocks()) {
3107              if (!block.isComplete()) {
3108                LOG.info("BLOCK* checkFileProgress: " + block
3109                    + " has not reached minimal replication "
3110                    + blockManager.minReplication);
3111                return false;
3112              }
3113            }
3114          } else {
3115            //
3116            // check the penultimate block of this file
3117            //
3118            BlockInfo b = v.getPenultimateBlock();
3119            if (b != null && !b.isComplete()) {
3120              LOG.warn("BLOCK* checkFileProgress: " + b
3121                  + " has not reached minimal replication "
3122                  + blockManager.minReplication);
3123              return false;
3124            }
3125          }
3126          return true;
3127        } finally {
3128          readUnlock();
3129        }
3130      }
3131    
3132      ////////////////////////////////////////////////////////////////
3133      // Here's how to handle block-copy failure during client write:
3134      // -- As usual, the client's write should result in a streaming
3135      // backup write to a k-machine sequence.
3136      // -- If one of the backup machines fails, no worries.  Fail silently.
3137      // -- Before client is allowed to close and finalize file, make sure
3138      // that the blocks are backed up.  Namenode may have to issue specific backup
3139      // commands to make up for earlier datanode failures.  Once all copies
3140      // are made, edit namespace and return to client.
3141      ////////////////////////////////////////////////////////////////
3142    
3143      /** 
3144       * Change the indicated filename. 
3145       * @deprecated Use {@link #renameTo(String, String, Options.Rename...)} instead.
3146       */
3147      @Deprecated
3148      boolean renameTo(String src, String dst) 
3149          throws IOException, UnresolvedLinkException {
3150        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
3151        if (cacheEntry != null && cacheEntry.isSuccess()) {
3152          return true; // Return previous response
3153        }
3154        boolean ret = false;
3155        try {
3156          ret = renameToInt(src, dst, cacheEntry != null);
3157        } catch (AccessControlException e) {
3158          logAuditEvent(false, "rename", src, dst, null);
3159          throw e;
3160        } finally {
3161          RetryCache.setState(cacheEntry, ret);
3162        }
3163        return ret;
3164      }
3165    
3166      private boolean renameToInt(String src, String dst, boolean logRetryCache) 
3167        throws IOException, UnresolvedLinkException {
3168        if (NameNode.stateChangeLog.isDebugEnabled()) {
3169          NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: " + src +
3170              " to " + dst);
3171        }
3172        if (!DFSUtil.isValidName(dst)) {
3173          throw new IOException("Invalid name: " + dst);
3174        }
3175        FSPermissionChecker pc = getPermissionChecker();
3176        checkOperation(OperationCategory.WRITE);
3177        byte[][] srcComponents = FSDirectory.getPathComponentsForReservedPath(src);
3178        byte[][] dstComponents = FSDirectory.getPathComponentsForReservedPath(dst);
3179        boolean status = false;
3180        HdfsFileStatus resultingStat = null;
3181        writeLock();
3182        try {
3183          checkOperation(OperationCategory.WRITE);
3184          checkNameNodeSafeMode("Cannot rename " + src);
3185          src = FSDirectory.resolvePath(src, srcComponents, dir);
3186          dst = FSDirectory.resolvePath(dst, dstComponents, dir);
3187          checkOperation(OperationCategory.WRITE);
3188          status = renameToInternal(pc, src, dst, logRetryCache);
3189          if (status) {
3190            resultingStat = getAuditFileInfo(dst, false);
3191          }
3192        } finally {
3193          writeUnlock();
3194        }
3195        getEditLog().logSync();
3196        if (status) {
3197          logAuditEvent(true, "rename", src, dst, resultingStat);
3198        }
3199        return status;
3200      }
3201    
3202      /** @deprecated See {@link #renameTo(String, String)} */
3203      @Deprecated
3204      private boolean renameToInternal(FSPermissionChecker pc, String src,
3205          String dst, boolean logRetryCache) throws IOException,
3206          UnresolvedLinkException {
3207        assert hasWriteLock();
3208        if (isPermissionEnabled) {
3209          //We should not be doing this.  This is move() not renameTo().
3210          //but for now,
3211          //NOTE: yes, this is bad!  it's assuming much lower level behavior
3212          //      of rewriting the dst
3213          String actualdst = dir.isDir(dst)?
3214              dst + Path.SEPARATOR + new Path(src).getName(): dst;
3215          // Rename does not operates on link targets
3216          // Do not resolveLink when checking permissions of src and dst
3217          // Check write access to parent of src
3218          checkPermission(pc, src, false, null, FsAction.WRITE, null, null, false);
3219          // Check write access to ancestor of dst
3220          checkPermission(pc, actualdst, false, FsAction.WRITE, null, null, null,
3221              false);
3222        }
3223    
3224        if (dir.renameTo(src, dst, logRetryCache)) {
3225          return true;
3226        }
3227        return false;
3228      }
3229      
3230    
3231      /** Rename src to dst */
3232      void renameTo(String src, String dst, Options.Rename... options)
3233          throws IOException, UnresolvedLinkException {
3234        if (NameNode.stateChangeLog.isDebugEnabled()) {
3235          NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: with options - "
3236              + src + " to " + dst);
3237        }
3238        if (!DFSUtil.isValidName(dst)) {
3239          throw new InvalidPathException("Invalid name: " + dst);
3240        }
3241        final FSPermissionChecker pc = getPermissionChecker();
3242        
3243        checkOperation(OperationCategory.WRITE);
3244        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
3245        if (cacheEntry != null && cacheEntry.isSuccess()) {
3246          return; // Return previous response
3247        }
3248        byte[][] srcComponents = FSDirectory.getPathComponentsForReservedPath(src);
3249        byte[][] dstComponents = FSDirectory.getPathComponentsForReservedPath(dst);
3250        HdfsFileStatus resultingStat = null;
3251        boolean success = false;
3252        writeLock();
3253        try {
3254          checkOperation(OperationCategory.WRITE);
3255          checkNameNodeSafeMode("Cannot rename " + src);
3256          src = FSDirectory.resolvePath(src, srcComponents, dir);
3257          dst = FSDirectory.resolvePath(dst, dstComponents, dir);
3258          renameToInternal(pc, src, dst, cacheEntry != null, options);
3259          resultingStat = getAuditFileInfo(dst, false);
3260          success = true;
3261        } finally {
3262          writeUnlock();
3263          RetryCache.setState(cacheEntry, success);
3264        }
3265        getEditLog().logSync();
3266        if (resultingStat != null) {
3267          StringBuilder cmd = new StringBuilder("rename options=");
3268          for (Rename option : options) {
3269            cmd.append(option.value()).append(" ");
3270          }
3271          logAuditEvent(true, cmd.toString(), src, dst, resultingStat);
3272        }
3273      }
3274    
3275      private void renameToInternal(FSPermissionChecker pc, String src, String dst,
3276          boolean logRetryCache, Options.Rename... options) throws IOException {
3277        assert hasWriteLock();
3278        if (isPermissionEnabled) {
3279          // Rename does not operates on link targets
3280          // Do not resolveLink when checking permissions of src and dst
3281          // Check write access to parent of src
3282          checkPermission(pc, src, false, null, FsAction.WRITE, null, null, false);
3283          // Check write access to ancestor of dst
3284          checkPermission(pc, dst, false, FsAction.WRITE, null, null, null, false);
3285        }
3286    
3287        dir.renameTo(src, dst, logRetryCache, options);
3288      }
3289      
3290      /**
3291       * Remove the indicated file from namespace.
3292       * 
3293       * @see ClientProtocol#delete(String, boolean) for detailed description and 
3294       * description of exceptions
3295       */
3296      boolean delete(String src, boolean recursive)
3297          throws AccessControlException, SafeModeException,
3298          UnresolvedLinkException, IOException {
3299        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
3300        if (cacheEntry != null && cacheEntry.isSuccess()) {
3301          return true; // Return previous response
3302        }
3303        boolean ret = false;
3304        try {
3305          ret = deleteInt(src, recursive, cacheEntry != null);
3306        } catch (AccessControlException e) {
3307          logAuditEvent(false, "delete", src);
3308          throw e;
3309        } finally {
3310          RetryCache.setState(cacheEntry, ret);
3311        }
3312        return ret;
3313      }
3314          
3315      private boolean deleteInt(String src, boolean recursive, boolean logRetryCache)
3316          throws AccessControlException, SafeModeException,
3317          UnresolvedLinkException, IOException {
3318        if (NameNode.stateChangeLog.isDebugEnabled()) {
3319          NameNode.stateChangeLog.debug("DIR* NameSystem.delete: " + src);
3320        }
3321        boolean status = deleteInternal(src, recursive, true, logRetryCache);
3322        if (status) {
3323          logAuditEvent(true, "delete", src);
3324        }
3325        return status;
3326      }
3327        
3328      private FSPermissionChecker getPermissionChecker()
3329          throws AccessControlException {
3330        try {
3331          return new FSPermissionChecker(fsOwnerShortUserName, supergroup, getRemoteUser());
3332        } catch (IOException ioe) {
3333          throw new AccessControlException(ioe);
3334        }
3335      }
3336      
3337      /**
3338       * Remove a file/directory from the namespace.
3339       * <p>
3340       * For large directories, deletion is incremental. The blocks under
3341       * the directory are collected and deleted a small number at a time holding
3342       * the {@link FSNamesystem} lock.
3343       * <p>
3344       * For small directory or file the deletion is done in one shot.
3345       * 
3346       * @see ClientProtocol#delete(String, boolean) for description of exceptions
3347       */
3348      private boolean deleteInternal(String src, boolean recursive,
3349          boolean enforcePermission, boolean logRetryCache)
3350          throws AccessControlException, SafeModeException, UnresolvedLinkException,
3351                 IOException {
3352        BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
3353        List<INode> removedINodes = new ChunkedArrayList<INode>();
3354        FSPermissionChecker pc = getPermissionChecker();
3355        checkOperation(OperationCategory.WRITE);
3356        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3357        boolean ret = false;
3358        writeLock();
3359        try {
3360          checkOperation(OperationCategory.WRITE);
3361          checkNameNodeSafeMode("Cannot delete " + src);
3362          src = FSDirectory.resolvePath(src, pathComponents, dir);
3363          if (!recursive && dir.isNonEmptyDirectory(src)) {
3364            throw new IOException(src + " is non empty");
3365          }
3366          if (enforcePermission && isPermissionEnabled) {
3367            checkPermission(pc, src, false, null, FsAction.WRITE, null,
3368                FsAction.ALL, false);
3369          }
3370          // Unlink the target directory from directory tree
3371          if (!dir.delete(src, collectedBlocks, removedINodes, logRetryCache)) {
3372            return false;
3373          }
3374          ret = true;
3375        } finally {
3376          writeUnlock();
3377        }
3378        getEditLog().logSync(); 
3379        removeBlocks(collectedBlocks); // Incremental deletion of blocks
3380        collectedBlocks.clear();
3381    
3382        dir.writeLock();
3383        try {
3384          dir.removeFromInodeMap(removedINodes);
3385        } finally {
3386          dir.writeUnlock();
3387        }
3388        removedINodes.clear();
3389        if (NameNode.stateChangeLog.isDebugEnabled()) {
3390          NameNode.stateChangeLog.debug("DIR* Namesystem.delete: "
3391            + src +" is removed");
3392        }
3393        return ret;
3394      }
3395    
3396      /**
3397       * From the given list, incrementally remove the blocks from blockManager
3398       * Writelock is dropped and reacquired every BLOCK_DELETION_INCREMENT to
3399       * ensure that other waiters on the lock can get in. See HDFS-2938
3400       * 
3401       * @param blocks
3402       *          An instance of {@link BlocksMapUpdateInfo} which contains a list
3403       *          of blocks that need to be removed from blocksMap
3404       */
3405      void removeBlocks(BlocksMapUpdateInfo blocks) {
3406        List<Block> toDeleteList = blocks.getToDeleteList();
3407        Iterator<Block> iter = toDeleteList.iterator();
3408        while (iter.hasNext()) {
3409          writeLock();
3410          try {
3411            for (int i = 0; i < BLOCK_DELETION_INCREMENT && iter.hasNext(); i++) {
3412              blockManager.removeBlock(iter.next());
3413            }
3414          } finally {
3415            writeUnlock();
3416          }
3417        }
3418      }
3419      
3420      /**
3421       * Remove leases, inodes and blocks related to a given path
3422       * @param src The given path
3423       * @param blocks Containing the list of blocks to be deleted from blocksMap
3424       * @param removedINodes Containing the list of inodes to be removed from 
3425       *                      inodesMap
3426       */
3427      void removePathAndBlocks(String src, BlocksMapUpdateInfo blocks,
3428          List<INode> removedINodes) {
3429        assert hasWriteLock();
3430        leaseManager.removeLeaseWithPrefixPath(src);
3431        // remove inodes from inodesMap
3432        if (removedINodes != null) {
3433          dir.removeFromInodeMap(removedINodes);
3434          removedINodes.clear();
3435        }
3436        if (blocks == null) {
3437          return;
3438        }
3439        
3440        removeBlocksAndUpdateSafemodeTotal(blocks);
3441      }
3442    
3443      /**
3444       * Removes the blocks from blocksmap and updates the safemode blocks total
3445       * 
3446       * @param blocks
3447       *          An instance of {@link BlocksMapUpdateInfo} which contains a list
3448       *          of blocks that need to be removed from blocksMap
3449       */
3450      void removeBlocksAndUpdateSafemodeTotal(BlocksMapUpdateInfo blocks) {
3451        assert hasWriteLock();
3452        // In the case that we are a Standby tailing edits from the
3453        // active while in safe-mode, we need to track the total number
3454        // of blocks and safe blocks in the system.
3455        boolean trackBlockCounts = isSafeModeTrackingBlocks();
3456        int numRemovedComplete = 0, numRemovedSafe = 0;
3457    
3458        for (Block b : blocks.getToDeleteList()) {
3459          if (trackBlockCounts) {
3460            BlockInfo bi = getStoredBlock(b);
3461            if (bi.isComplete()) {
3462              numRemovedComplete++;
3463              if (bi.numNodes() >= blockManager.minReplication) {
3464                numRemovedSafe++;
3465              }
3466            }
3467          }
3468          blockManager.removeBlock(b);
3469        }
3470        if (trackBlockCounts) {
3471          if (LOG.isDebugEnabled()) {
3472            LOG.debug("Adjusting safe-mode totals for deletion."
3473                + "decreasing safeBlocks by " + numRemovedSafe
3474                + ", totalBlocks by " + numRemovedComplete);
3475          }
3476          adjustSafeModeBlockTotals(-numRemovedSafe, -numRemovedComplete);
3477        }
3478      }
3479    
3480      /**
3481       * @see SafeModeInfo#shouldIncrementallyTrackBlocks
3482       */
3483      private boolean isSafeModeTrackingBlocks() {
3484        if (!haEnabled) {
3485          // Never track blocks incrementally in non-HA code.
3486          return false;
3487        }
3488        SafeModeInfo sm = this.safeMode;
3489        return sm != null && sm.shouldIncrementallyTrackBlocks();
3490      }
3491    
3492      /**
3493       * Get the file info for a specific file.
3494       *
3495       * @param src The string representation of the path to the file
3496       * @param resolveLink whether to throw UnresolvedLinkException 
3497       *        if src refers to a symlink
3498       *
3499       * @throws AccessControlException if access is denied
3500       * @throws UnresolvedLinkException if a symlink is encountered.
3501       *
3502       * @return object containing information regarding the file
3503       *         or null if file not found
3504       * @throws StandbyException 
3505       */
3506      HdfsFileStatus getFileInfo(String src, boolean resolveLink) 
3507        throws AccessControlException, UnresolvedLinkException,
3508               StandbyException, IOException {
3509        if (!DFSUtil.isValidName(src)) {
3510          throw new InvalidPathException("Invalid file name: " + src);
3511        }
3512        HdfsFileStatus stat = null;
3513        FSPermissionChecker pc = getPermissionChecker();
3514        checkOperation(OperationCategory.READ);
3515        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3516        readLock();
3517        try {
3518          checkOperation(OperationCategory.READ);
3519          src = FSDirectory.resolvePath(src, pathComponents, dir);
3520          if (isPermissionEnabled) {
3521            checkPermission(pc, src, false, null, null, null, null, resolveLink);
3522          }
3523          stat = dir.getFileInfo(src, resolveLink);
3524        } catch (AccessControlException e) {
3525          logAuditEvent(false, "getfileinfo", src);
3526          throw e;
3527        } finally {
3528          readUnlock();
3529        }
3530        logAuditEvent(true, "getfileinfo", src);
3531        return stat;
3532      }
3533      
3534      /**
3535       * Returns true if the file is closed
3536       */
3537      boolean isFileClosed(String src) 
3538          throws AccessControlException, UnresolvedLinkException,
3539          StandbyException, IOException {
3540        FSPermissionChecker pc = getPermissionChecker();  
3541        checkOperation(OperationCategory.READ);
3542        readLock();
3543        try {
3544          checkOperation(OperationCategory.READ);
3545          if (isPermissionEnabled) {
3546            checkTraverse(pc, src);
3547          }
3548          return !INodeFile.valueOf(dir.getINode(src), src).isUnderConstruction();
3549        } catch (AccessControlException e) {
3550          if (isAuditEnabled() && isExternalInvocation()) {
3551            logAuditEvent(false, "isFileClosed", src);
3552          }
3553          throw e;
3554        } finally {
3555          readUnlock();
3556        }
3557      }
3558    
3559      /**
3560       * Create all the necessary directories
3561       */
3562      boolean mkdirs(String src, PermissionStatus permissions,
3563          boolean createParent) throws IOException, UnresolvedLinkException {
3564        boolean ret = false;
3565        try {
3566          ret = mkdirsInt(src, permissions, createParent);
3567        } catch (AccessControlException e) {
3568          logAuditEvent(false, "mkdirs", src);
3569          throw e;
3570        }
3571        return ret;
3572      }
3573    
3574      private boolean mkdirsInt(String src, PermissionStatus permissions,
3575          boolean createParent) throws IOException, UnresolvedLinkException {
3576        if(NameNode.stateChangeLog.isDebugEnabled()) {
3577          NameNode.stateChangeLog.debug("DIR* NameSystem.mkdirs: " + src);
3578        }
3579        if (!DFSUtil.isValidName(src)) {
3580          throw new InvalidPathException(src);
3581        }
3582        FSPermissionChecker pc = getPermissionChecker();
3583        checkOperation(OperationCategory.WRITE);
3584        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3585        HdfsFileStatus resultingStat = null;
3586        boolean status = false;
3587        writeLock();
3588        try {
3589          checkOperation(OperationCategory.WRITE);   
3590          checkNameNodeSafeMode("Cannot create directory " + src);
3591          src = FSDirectory.resolvePath(src, pathComponents, dir);
3592          status = mkdirsInternal(pc, src, permissions, createParent);
3593          if (status) {
3594            resultingStat = dir.getFileInfo(src, false);
3595          }
3596        } finally {
3597          writeUnlock();
3598        }
3599        getEditLog().logSync();
3600        if (status) {
3601          logAuditEvent(true, "mkdirs", src, null, resultingStat);
3602        }
3603        return status;
3604      }
3605        
3606      /**
3607       * Create all the necessary directories
3608       */
3609      private boolean mkdirsInternal(FSPermissionChecker pc, String src,
3610          PermissionStatus permissions, boolean createParent) 
3611          throws IOException, UnresolvedLinkException {
3612        assert hasWriteLock();
3613        if (isPermissionEnabled) {
3614          checkTraverse(pc, src);
3615        }
3616        if (dir.isDirMutable(src)) {
3617          // all the users of mkdirs() are used to expect 'true' even if
3618          // a new directory is not created.
3619          return true;
3620        }
3621        if (isPermissionEnabled) {
3622          checkAncestorAccess(pc, src, FsAction.WRITE);
3623        }
3624        if (!createParent) {
3625          verifyParentDir(src);
3626        }
3627    
3628        // validate that we have enough inodes. This is, at best, a 
3629        // heuristic because the mkdirs() operation might need to 
3630        // create multiple inodes.
3631        checkFsObjectLimit();
3632    
3633        if (!dir.mkdirs(src, permissions, false, now())) {
3634          throw new IOException("Failed to create directory: " + src);
3635        }
3636        return true;
3637      }
3638    
3639      /**
3640       * Get the content summary for a specific file/dir.
3641       *
3642       * @param src The string representation of the path to the file
3643       *
3644       * @throws AccessControlException if access is denied
3645       * @throws UnresolvedLinkException if a symlink is encountered.
3646       * @throws FileNotFoundException if no file exists
3647       * @throws StandbyException
3648       * @throws IOException for issues with writing to the audit log
3649       *
3650       * @return object containing information regarding the file
3651       *         or null if file not found
3652       */
3653      ContentSummary getContentSummary(String src) throws IOException {
3654        FSPermissionChecker pc = getPermissionChecker();
3655        checkOperation(OperationCategory.READ);
3656        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3657        readLock();
3658        boolean success = true;
3659        try {
3660          checkOperation(OperationCategory.READ);
3661          src = FSDirectory.resolvePath(src, pathComponents, dir);
3662          if (isPermissionEnabled) {
3663            checkPermission(pc, src, false, null, null, null, FsAction.READ_EXECUTE);
3664          }
3665          return dir.getContentSummary(src);
3666    
3667        } catch (AccessControlException ace) {
3668          success = false;
3669          throw ace;
3670        } finally {
3671          readUnlock();
3672          logAuditEvent(success, "contentSummary", src);
3673        }
3674      }
3675    
3676      /**
3677       * Set the namespace quota and diskspace quota for a directory.
3678       * See {@link ClientProtocol#setQuota(String, long, long)} for the 
3679       * contract.
3680       * 
3681       * Note: This does not support ".inodes" relative path.
3682       */
3683      void setQuota(String path, long nsQuota, long dsQuota) 
3684          throws IOException, UnresolvedLinkException {
3685        checkSuperuserPrivilege();
3686        checkOperation(OperationCategory.WRITE);
3687        writeLock();
3688        try {
3689          checkOperation(OperationCategory.WRITE);
3690          checkNameNodeSafeMode("Cannot set quota on " + path);
3691          dir.setQuota(path, nsQuota, dsQuota);
3692        } finally {
3693          writeUnlock();
3694        }
3695        getEditLog().logSync();
3696      }
3697    
3698      /** Persist all metadata about this file.
3699       * @param src The string representation of the path
3700       * @param clientName The string representation of the client
3701       * @param lastBlockLength The length of the last block 
3702       *                        under construction reported from client.
3703       * @throws IOException if path does not exist
3704       */
3705      void fsync(String src, String clientName, long lastBlockLength) 
3706          throws IOException, UnresolvedLinkException {
3707        NameNode.stateChangeLog.info("BLOCK* fsync: " + src + " for " + clientName);
3708        checkOperation(OperationCategory.WRITE);
3709        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3710        writeLock();
3711        try {
3712          checkOperation(OperationCategory.WRITE);
3713          checkNameNodeSafeMode("Cannot fsync file " + src);
3714          src = FSDirectory.resolvePath(src, pathComponents, dir);
3715          INodeFile pendingFile  = checkLease(src, clientName);
3716          if (lastBlockLength > 0) {
3717            pendingFile.getFileUnderConstructionFeature().updateLengthOfLastBlock(
3718                pendingFile, lastBlockLength);
3719          }
3720          dir.persistBlocks(src, pendingFile, false);
3721        } finally {
3722          writeUnlock();
3723        }
3724        getEditLog().logSync();
3725      }
3726    
3727      /**
3728       * Move a file that is being written to be immutable.
3729       * @param src The filename
3730       * @param lease The lease for the client creating the file
3731       * @param recoveryLeaseHolder reassign lease to this holder if the last block
3732       *        needs recovery; keep current holder if null.
3733       * @throws AlreadyBeingCreatedException if file is waiting to achieve minimal
3734       *         replication;<br>
3735       *         RecoveryInProgressException if lease recovery is in progress.<br>
3736       *         IOException in case of an error.
3737       * @return true  if file has been successfully finalized and closed or 
3738       *         false if block recovery has been initiated. Since the lease owner
3739       *         has been changed and logged, caller should call logSync().
3740       */
3741      boolean internalReleaseLease(Lease lease, String src, 
3742          String recoveryLeaseHolder) throws AlreadyBeingCreatedException, 
3743          IOException, UnresolvedLinkException {
3744        LOG.info("Recovering " + lease + ", src=" + src);
3745        assert !isInSafeMode();
3746        assert hasWriteLock();
3747    
3748        final INodesInPath iip = dir.getLastINodeInPath(src);
3749        final INodeFile pendingFile = iip.getINode(0).asFile();
3750        int nrBlocks = pendingFile.numBlocks();
3751        BlockInfo[] blocks = pendingFile.getBlocks();
3752    
3753        int nrCompleteBlocks;
3754        BlockInfo curBlock = null;
3755        for(nrCompleteBlocks = 0; nrCompleteBlocks < nrBlocks; nrCompleteBlocks++) {
3756          curBlock = blocks[nrCompleteBlocks];
3757          if(!curBlock.isComplete())
3758            break;
3759          assert blockManager.checkMinReplication(curBlock) :
3760                  "A COMPLETE block is not minimally replicated in " + src;
3761        }
3762    
3763        // If there are no incomplete blocks associated with this file,
3764        // then reap lease immediately and close the file.
3765        if(nrCompleteBlocks == nrBlocks) {
3766          finalizeINodeFileUnderConstruction(src, pendingFile,
3767              iip.getLatestSnapshotId());
3768          NameNode.stateChangeLog.warn("BLOCK*"
3769            + " internalReleaseLease: All existing blocks are COMPLETE,"
3770            + " lease removed, file closed.");
3771          return true;  // closed!
3772        }
3773    
3774        // Only the last and the penultimate blocks may be in non COMPLETE state.
3775        // If the penultimate block is not COMPLETE, then it must be COMMITTED.
3776        if(nrCompleteBlocks < nrBlocks - 2 ||
3777           nrCompleteBlocks == nrBlocks - 2 &&
3778             curBlock != null &&
3779             curBlock.getBlockUCState() != BlockUCState.COMMITTED) {
3780          final String message = "DIR* NameSystem.internalReleaseLease: "
3781            + "attempt to release a create lock on "
3782            + src + " but file is already closed.";
3783          NameNode.stateChangeLog.warn(message);
3784          throw new IOException(message);
3785        }
3786    
3787        // The last block is not COMPLETE, and
3788        // that the penultimate block if exists is either COMPLETE or COMMITTED
3789        final BlockInfo lastBlock = pendingFile.getLastBlock();
3790        BlockUCState lastBlockState = lastBlock.getBlockUCState();
3791        BlockInfo penultimateBlock = pendingFile.getPenultimateBlock();
3792        boolean penultimateBlockMinReplication;
3793        BlockUCState penultimateBlockState;
3794        if (penultimateBlock == null) {
3795          penultimateBlockState = BlockUCState.COMPLETE;
3796          // If penultimate block doesn't exist then its minReplication is met
3797          penultimateBlockMinReplication = true;
3798        } else {
3799          penultimateBlockState = BlockUCState.COMMITTED;
3800          penultimateBlockMinReplication = 
3801            blockManager.checkMinReplication(penultimateBlock);
3802        }
3803        assert penultimateBlockState == BlockUCState.COMPLETE ||
3804               penultimateBlockState == BlockUCState.COMMITTED :
3805               "Unexpected state of penultimate block in " + src;
3806    
3807        switch(lastBlockState) {
3808        case COMPLETE:
3809          assert false : "Already checked that the last block is incomplete";
3810          break;
3811        case COMMITTED:
3812          // Close file if committed blocks are minimally replicated
3813          if(penultimateBlockMinReplication &&
3814              blockManager.checkMinReplication(lastBlock)) {
3815            finalizeINodeFileUnderConstruction(src, pendingFile,
3816                iip.getLatestSnapshotId());
3817            NameNode.stateChangeLog.warn("BLOCK*"
3818              + " internalReleaseLease: Committed blocks are minimally replicated,"
3819              + " lease removed, file closed.");
3820            return true;  // closed!
3821          }
3822          // Cannot close file right now, since some blocks 
3823          // are not yet minimally replicated.
3824          // This may potentially cause infinite loop in lease recovery
3825          // if there are no valid replicas on data-nodes.
3826          String message = "DIR* NameSystem.internalReleaseLease: " +
3827              "Failed to release lease for file " + src +
3828              ". Committed blocks are waiting to be minimally replicated." +
3829              " Try again later.";
3830          NameNode.stateChangeLog.warn(message);
3831          throw new AlreadyBeingCreatedException(message);
3832        case UNDER_CONSTRUCTION:
3833        case UNDER_RECOVERY:
3834          final BlockInfoUnderConstruction uc = (BlockInfoUnderConstruction)lastBlock;
3835          // setup the last block locations from the blockManager if not known
3836          if (uc.getNumExpectedLocations() == 0) {
3837            uc.setExpectedLocations(blockManager.getStorages(lastBlock));
3838          }
3839    
3840          if (uc.getNumExpectedLocations() == 0 && uc.getNumBytes() == 0) {
3841            // There is no datanode reported to this block.
3842            // may be client have crashed before writing data to pipeline.
3843            // This blocks doesn't need any recovery.
3844            // We can remove this block and close the file.
3845            pendingFile.removeLastBlock(lastBlock);
3846            finalizeINodeFileUnderConstruction(src, pendingFile,
3847                iip.getLatestSnapshotId());
3848            NameNode.stateChangeLog.warn("BLOCK* internalReleaseLease: "
3849                + "Removed empty last block and closed file.");
3850            return true;
3851          }
3852          // start recovery of the last block for this file
3853          long blockRecoveryId = nextGenerationStamp(isLegacyBlock(uc));
3854          lease = reassignLease(lease, src, recoveryLeaseHolder, pendingFile);
3855          uc.initializeBlockRecovery(blockRecoveryId);
3856          leaseManager.renewLease(lease);
3857          // Cannot close file right now, since the last block requires recovery.
3858          // This may potentially cause infinite loop in lease recovery
3859          // if there are no valid replicas on data-nodes.
3860          NameNode.stateChangeLog.warn(
3861                    "DIR* NameSystem.internalReleaseLease: " +
3862                    "File " + src + " has not been closed." +
3863                   " Lease recovery is in progress. " +
3864                    "RecoveryId = " + blockRecoveryId + " for block " + lastBlock);
3865          break;
3866        }
3867        return false;
3868      }
3869    
3870      private Lease reassignLease(Lease lease, String src, String newHolder,
3871          INodeFile pendingFile) {
3872        assert hasWriteLock();
3873        if(newHolder == null)
3874          return lease;
3875        // The following transaction is not synced. Make sure it's sync'ed later.
3876        logReassignLease(lease.getHolder(), src, newHolder);
3877        return reassignLeaseInternal(lease, src, newHolder, pendingFile);
3878      }
3879      
3880      Lease reassignLeaseInternal(Lease lease, String src, String newHolder,
3881          INodeFile pendingFile) {
3882        assert hasWriteLock();
3883        pendingFile.getFileUnderConstructionFeature().setClientName(newHolder);
3884        return leaseManager.reassignLease(lease, src, newHolder);
3885      }
3886    
3887      private void commitOrCompleteLastBlock(final INodeFile fileINode,
3888          final Block commitBlock) throws IOException {
3889        assert hasWriteLock();
3890        Preconditions.checkArgument(fileINode.isUnderConstruction());
3891        if (!blockManager.commitOrCompleteLastBlock(fileINode, commitBlock)) {
3892          return;
3893        }
3894    
3895        // Adjust disk space consumption if required
3896        final long diff = fileINode.getPreferredBlockSize() - commitBlock.getNumBytes();    
3897        if (diff > 0) {
3898          try {
3899            String path = fileINode.getFullPathName();
3900            dir.updateSpaceConsumed(path, 0, -diff*fileINode.getFileReplication());
3901          } catch (IOException e) {
3902            LOG.warn("Unexpected exception while updating disk space.", e);
3903          }
3904        }
3905      }
3906    
3907      private void finalizeINodeFileUnderConstruction(String src,
3908          INodeFile pendingFile, int latestSnapshot) throws IOException,
3909          UnresolvedLinkException {
3910        assert hasWriteLock();
3911        FileUnderConstructionFeature uc = pendingFile.getFileUnderConstructionFeature();
3912        Preconditions.checkArgument(uc != null);
3913        leaseManager.removeLease(uc.getClientName(), src);
3914        
3915        pendingFile = pendingFile.recordModification(latestSnapshot);
3916    
3917        // The file is no longer pending.
3918        // Create permanent INode, update blocks. No need to replace the inode here
3919        // since we just remove the uc feature from pendingFile
3920        final INodeFile newFile = pendingFile.toCompleteFile(now());
3921    
3922        // close file and persist block allocations for this file
3923        dir.closeFile(src, newFile);
3924    
3925        blockManager.checkReplication(newFile);
3926      }
3927    
3928      @VisibleForTesting
3929      BlockInfo getStoredBlock(Block block) {
3930        return blockManager.getStoredBlock(block);
3931      }
3932      
3933      @Override
3934      public boolean isInSnapshot(BlockInfoUnderConstruction blockUC) {
3935        assert hasReadLock();
3936        final BlockCollection bc = blockUC.getBlockCollection();
3937        if (bc == null || !(bc instanceof INodeFile)
3938            || !((INodeFile) bc).isUnderConstruction()) {
3939          return false;
3940        }
3941    
3942        INodeFile inodeUC = (INodeFile) bc;
3943        String fullName = inodeUC.getName();
3944        try {
3945          if (fullName != null && fullName.startsWith(Path.SEPARATOR)
3946              && dir.getINode(fullName) == inodeUC) {
3947            // If file exists in normal path then no need to look in snapshot
3948            return false;
3949          }
3950        } catch (UnresolvedLinkException e) {
3951          LOG.error("Error while resolving the link : " + fullName, e);
3952          return false;
3953        }
3954        /*
3955         * 1. if bc is an instance of INodeFileUnderConstructionWithSnapshot, and
3956         * bc is not in the current fsdirectory tree, bc must represent a snapshot
3957         * file. 
3958         * 2. if fullName is not an absolute path, bc cannot be existent in the 
3959         * current fsdirectory tree. 
3960         * 3. if bc is not the current node associated with fullName, bc must be a
3961         * snapshot inode.
3962         */
3963        return true;
3964      }
3965    
3966      void commitBlockSynchronization(ExtendedBlock lastblock,
3967          long newgenerationstamp, long newlength,
3968          boolean closeFile, boolean deleteblock, DatanodeID[] newtargets,
3969          String[] newtargetstorages)
3970          throws IOException, UnresolvedLinkException {
3971        LOG.info("commitBlockSynchronization(lastblock=" + lastblock
3972                 + ", newgenerationstamp=" + newgenerationstamp
3973                 + ", newlength=" + newlength
3974                 + ", newtargets=" + Arrays.asList(newtargets)
3975                 + ", closeFile=" + closeFile
3976                 + ", deleteBlock=" + deleteblock
3977                 + ")");
3978        checkOperation(OperationCategory.WRITE);
3979        String src = "";
3980        writeLock();
3981        try {
3982          checkOperation(OperationCategory.WRITE);
3983          // If a DN tries to commit to the standby, the recovery will
3984          // fail, and the next retry will succeed on the new NN.
3985      
3986          checkNameNodeSafeMode(
3987              "Cannot commitBlockSynchronization while in safe mode");
3988          final BlockInfo storedBlock = getStoredBlock(
3989              ExtendedBlock.getLocalBlock(lastblock));
3990          if (storedBlock == null) {
3991            if (deleteblock) {
3992              // This may be a retry attempt so ignore the failure
3993              // to locate the block.
3994              if (LOG.isDebugEnabled()) {
3995                LOG.debug("Block (=" + lastblock + ") not found");
3996              }
3997              return;
3998            } else {
3999              throw new IOException("Block (=" + lastblock + ") not found");
4000            }
4001          }
4002          INodeFile iFile = ((INode)storedBlock.getBlockCollection()).asFile();
4003          if (!iFile.isUnderConstruction() || storedBlock.isComplete()) {
4004            if (LOG.isDebugEnabled()) {
4005              LOG.debug("Unexpected block (=" + lastblock
4006                        + ") since the file (=" + iFile.getLocalName()
4007                        + ") is not under construction");
4008            }
4009            return;
4010          }
4011    
4012          long recoveryId =
4013            ((BlockInfoUnderConstruction)storedBlock).getBlockRecoveryId();
4014          if(recoveryId != newgenerationstamp) {
4015            throw new IOException("The recovery id " + newgenerationstamp
4016                                  + " does not match current recovery id "
4017                                  + recoveryId + " for block " + lastblock); 
4018          }
4019    
4020          if (deleteblock) {
4021            Block blockToDel = ExtendedBlock.getLocalBlock(lastblock);
4022            boolean remove = iFile.removeLastBlock(blockToDel);
4023            if (remove) {
4024              blockManager.removeBlockFromMap(storedBlock);
4025            }
4026          }
4027          else {
4028            // update last block
4029            storedBlock.setGenerationStamp(newgenerationstamp);
4030            storedBlock.setNumBytes(newlength);
4031    
4032            // find the DatanodeDescriptor objects
4033            // There should be no locations in the blockManager till now because the
4034            // file is underConstruction
4035            ArrayList<DatanodeDescriptor> trimmedTargets =
4036                new ArrayList<DatanodeDescriptor>(newtargets.length);
4037            ArrayList<String> trimmedStorages =
4038                new ArrayList<String>(newtargets.length);
4039            if (newtargets.length > 0) {
4040              for (int i = 0; i < newtargets.length; ++i) {
4041                // try to get targetNode
4042                DatanodeDescriptor targetNode =
4043                    blockManager.getDatanodeManager().getDatanode(newtargets[i]);
4044                if (targetNode != null) {
4045                  trimmedTargets.add(targetNode);
4046                  trimmedStorages.add(newtargetstorages[i]);
4047                } else if (LOG.isDebugEnabled()) {
4048                  LOG.debug("DatanodeDescriptor (=" + newtargets[i] + ") not found");
4049                }
4050              }
4051            }
4052            if ((closeFile) && !trimmedTargets.isEmpty()) {
4053              // the file is getting closed. Insert block locations into blockManager.
4054              // Otherwise fsck will report these blocks as MISSING, especially if the
4055              // blocksReceived from Datanodes take a long time to arrive.
4056              for (int i = 0; i < trimmedTargets.size(); i++) {
4057                trimmedTargets.get(i).addBlock(
4058                  trimmedStorages.get(i), storedBlock);
4059              }
4060            }
4061    
4062            // add pipeline locations into the INodeUnderConstruction
4063            DatanodeStorageInfo[] trimmedStorageInfos =
4064                blockManager.getDatanodeManager().getDatanodeStorageInfos(
4065                    trimmedTargets.toArray(new DatanodeID[trimmedTargets.size()]),
4066                    trimmedStorages.toArray(new String[trimmedStorages.size()]));
4067            iFile.setLastBlock(storedBlock, trimmedStorageInfos);
4068          }
4069    
4070          if (closeFile) {
4071            src = closeFileCommitBlocks(iFile, storedBlock);
4072          } else {
4073            // If this commit does not want to close the file, persist blocks
4074            src = persistBlocks(iFile, false);
4075          }
4076        } finally {
4077          writeUnlock();
4078        }
4079        getEditLog().logSync();
4080        if (closeFile) {
4081          LOG.info("commitBlockSynchronization(newblock=" + lastblock
4082              + ", file=" + src
4083              + ", newgenerationstamp=" + newgenerationstamp
4084              + ", newlength=" + newlength
4085              + ", newtargets=" + Arrays.asList(newtargets) + ") successful");
4086        } else {
4087          LOG.info("commitBlockSynchronization(" + lastblock + ") successful");
4088        }
4089      }
4090    
4091      /**
4092       *
4093       * @param pendingFile
4094       * @param storedBlock
4095       * @return Path of the file that was closed.
4096       * @throws IOException
4097       */
4098      @VisibleForTesting
4099      String closeFileCommitBlocks(INodeFile pendingFile, BlockInfo storedBlock)
4100          throws IOException {
4101        String src = pendingFile.getFullPathName();
4102    
4103        // commit the last block and complete it if it has minimum replicas
4104        commitOrCompleteLastBlock(pendingFile, storedBlock);
4105    
4106        //remove lease, close file
4107        finalizeINodeFileUnderConstruction(src, pendingFile,
4108            Snapshot.findLatestSnapshot(pendingFile, Snapshot.CURRENT_STATE_ID));
4109    
4110        return src;
4111      }
4112    
4113      /**
4114       * Persist the block list for the given file.
4115       *
4116       * @param pendingFile
4117       * @return Path to the given file.
4118       * @throws IOException
4119       */
4120      @VisibleForTesting
4121      String persistBlocks(INodeFile pendingFile, boolean logRetryCache)
4122          throws IOException {
4123        String src = pendingFile.getFullPathName();
4124        dir.persistBlocks(src, pendingFile, logRetryCache);
4125        return src;
4126      }
4127    
4128      /**
4129       * Renew the lease(s) held by the given client
4130       */
4131      void renewLease(String holder) throws IOException {
4132        checkOperation(OperationCategory.WRITE);
4133        readLock();
4134        try {
4135          checkOperation(OperationCategory.WRITE);
4136          checkNameNodeSafeMode("Cannot renew lease for " + holder);
4137          leaseManager.renewLease(holder);
4138        } finally {
4139          readUnlock();
4140        }
4141      }
4142    
4143      /**
4144       * Get a partial listing of the indicated directory
4145       *
4146       * @param src the directory name
4147       * @param startAfter the name to start after
4148       * @param needLocation if blockLocations need to be returned
4149       * @return a partial listing starting after startAfter
4150       * 
4151       * @throws AccessControlException if access is denied
4152       * @throws UnresolvedLinkException if symbolic link is encountered
4153       * @throws IOException if other I/O error occurred
4154       */
4155      DirectoryListing getListing(String src, byte[] startAfter,
4156          boolean needLocation) 
4157          throws AccessControlException, UnresolvedLinkException, IOException {
4158        try {
4159          return getListingInt(src, startAfter, needLocation);
4160        } catch (AccessControlException e) {
4161          logAuditEvent(false, "listStatus", src);
4162          throw e;
4163        }
4164      }
4165    
4166      private DirectoryListing getListingInt(String src, byte[] startAfter,
4167          boolean needLocation) 
4168        throws AccessControlException, UnresolvedLinkException, IOException {
4169        DirectoryListing dl;
4170        FSPermissionChecker pc = getPermissionChecker();
4171        checkOperation(OperationCategory.READ);
4172        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
4173        String startAfterString = new String(startAfter);
4174        readLock();
4175        try {
4176          checkOperation(OperationCategory.READ);
4177          src = FSDirectory.resolvePath(src, pathComponents, dir);
4178    
4179          // Get file name when startAfter is an INodePath
4180          if (FSDirectory.isReservedName(startAfterString)) {
4181            byte[][] startAfterComponents = FSDirectory
4182                .getPathComponentsForReservedPath(startAfterString);
4183            try {
4184              String tmp = FSDirectory.resolvePath(src, startAfterComponents, dir);
4185              byte[][] regularPath = INode.getPathComponents(tmp);
4186              startAfter = regularPath[regularPath.length - 1];
4187            } catch (IOException e) {
4188              // Possibly the inode is deleted
4189              throw new DirectoryListingStartAfterNotFoundException(
4190                  "Can't find startAfter " + startAfterString);
4191            }
4192          }
4193          
4194          if (isPermissionEnabled) {
4195            if (dir.isDir(src)) {
4196              checkPathAccess(pc, src, FsAction.READ_EXECUTE);
4197            } else {
4198              checkTraverse(pc, src);
4199            }
4200          }
4201          logAuditEvent(true, "listStatus", src);
4202          dl = dir.getListing(src, startAfter, needLocation);
4203        } finally {
4204          readUnlock();
4205        }
4206        return dl;
4207      }
4208    
4209      /////////////////////////////////////////////////////////
4210      //
4211      // These methods are called by datanodes
4212      //
4213      /////////////////////////////////////////////////////////
4214      /**
4215       * Register Datanode.
4216       * <p>
4217       * The purpose of registration is to identify whether the new datanode
4218       * serves a new data storage, and will report new data block copies,
4219       * which the namenode was not aware of; or the datanode is a replacement
4220       * node for the data storage that was previously served by a different
4221       * or the same (in terms of host:port) datanode.
4222       * The data storages are distinguished by their storageIDs. When a new
4223       * data storage is reported the namenode issues a new unique storageID.
4224       * <p>
4225       * Finally, the namenode returns its namespaceID as the registrationID
4226       * for the datanodes. 
4227       * namespaceID is a persistent attribute of the name space.
4228       * The registrationID is checked every time the datanode is communicating
4229       * with the namenode. 
4230       * Datanodes with inappropriate registrationID are rejected.
4231       * If the namenode stops, and then restarts it can restore its 
4232       * namespaceID and will continue serving the datanodes that has previously
4233       * registered with the namenode without restarting the whole cluster.
4234       * 
4235       * @see org.apache.hadoop.hdfs.server.datanode.DataNode
4236       */
4237      void registerDatanode(DatanodeRegistration nodeReg) throws IOException {
4238        writeLock();
4239        try {
4240          getBlockManager().getDatanodeManager().registerDatanode(nodeReg);
4241          checkSafeMode();
4242        } finally {
4243          writeUnlock();
4244        }
4245      }
4246      
4247      /**
4248       * Get registrationID for datanodes based on the namespaceID.
4249       * 
4250       * @see #registerDatanode(DatanodeRegistration)
4251       * @return registration ID
4252       */
4253      String getRegistrationID() {
4254        return Storage.getRegistrationID(dir.fsImage.getStorage());
4255      }
4256    
4257      /**
4258       * The given node has reported in.  This method should:
4259       * 1) Record the heartbeat, so the datanode isn't timed out
4260       * 2) Adjust usage stats for future block allocation
4261       * 
4262       * If a substantial amount of time passed since the last datanode 
4263       * heartbeat then request an immediate block report.  
4264       * 
4265       * @return an array of datanode commands 
4266       * @throws IOException
4267       */
4268      HeartbeatResponse handleHeartbeat(DatanodeRegistration nodeReg,
4269          StorageReport[] reports, long cacheCapacity, long cacheUsed,
4270          int xceiverCount, int xmitsInProgress, int failedVolumes)
4271            throws IOException {
4272        readLock();
4273        try {
4274          //get datanode commands
4275          final int maxTransfer = blockManager.getMaxReplicationStreams()
4276              - xmitsInProgress;
4277          DatanodeCommand[] cmds = blockManager.getDatanodeManager().handleHeartbeat(
4278              nodeReg, reports, blockPoolId, cacheCapacity, cacheUsed,
4279              xceiverCount, maxTransfer, failedVolumes);
4280          
4281          //create ha status
4282          final NNHAStatusHeartbeat haState = new NNHAStatusHeartbeat(
4283              haContext.getState().getServiceState(),
4284              getFSImage().getLastAppliedOrWrittenTxId());
4285    
4286          return new HeartbeatResponse(cmds, haState, rollingUpgradeInfo);
4287        } finally {
4288          readUnlock();
4289        }
4290      }
4291    
4292      /**
4293       * Returns whether or not there were available resources at the last check of
4294       * resources.
4295       *
4296       * @return true if there were sufficient resources available, false otherwise.
4297       */
4298      boolean nameNodeHasResourcesAvailable() {
4299        return hasResourcesAvailable;
4300      }
4301    
4302      /**
4303       * Perform resource checks and cache the results.
4304       * @throws IOException
4305       */
4306      void checkAvailableResources() {
4307        Preconditions.checkState(nnResourceChecker != null,
4308            "nnResourceChecker not initialized");
4309        hasResourcesAvailable = nnResourceChecker.hasAvailableDiskSpace();
4310      }
4311    
4312      /**
4313       * Periodically calls hasAvailableResources of NameNodeResourceChecker, and if
4314       * there are found to be insufficient resources available, causes the NN to
4315       * enter safe mode. If resources are later found to have returned to
4316       * acceptable levels, this daemon will cause the NN to exit safe mode.
4317       */
4318      class NameNodeResourceMonitor implements Runnable  {
4319        boolean shouldNNRmRun = true;
4320        @Override
4321        public void run () {
4322          try {
4323            while (fsRunning && shouldNNRmRun) {
4324              checkAvailableResources();
4325              if(!nameNodeHasResourcesAvailable()) {
4326                String lowResourcesMsg = "NameNode low on available disk space. ";
4327                if (!isInSafeMode()) {
4328                  FSNamesystem.LOG.warn(lowResourcesMsg + "Entering safe mode.");
4329                } else {
4330                  FSNamesystem.LOG.warn(lowResourcesMsg + "Already in safe mode.");
4331                }
4332                enterSafeMode(true);
4333              }
4334              try {
4335                Thread.sleep(resourceRecheckInterval);
4336              } catch (InterruptedException ie) {
4337                // Deliberately ignore
4338              }
4339            }
4340          } catch (Exception e) {
4341            FSNamesystem.LOG.error("Exception in NameNodeResourceMonitor: ", e);
4342          }
4343        }
4344    
4345        public void stopMonitor() {
4346          shouldNNRmRun = false;
4347        }
4348     }
4349    
4350      class NameNodeEditLogRoller implements Runnable {
4351    
4352        private boolean shouldRun = true;
4353        private final long rollThreshold;
4354        private final long sleepIntervalMs;
4355    
4356        public NameNodeEditLogRoller(long rollThreshold, int sleepIntervalMs) {
4357            this.rollThreshold = rollThreshold;
4358            this.sleepIntervalMs = sleepIntervalMs;
4359        }
4360    
4361        @Override
4362        public void run() {
4363          while (fsRunning && shouldRun) {
4364            try {
4365              FSEditLog editLog = getFSImage().getEditLog();
4366              long numEdits =
4367                  editLog.getLastWrittenTxId() - editLog.getCurSegmentTxId();
4368              if (numEdits > rollThreshold) {
4369                FSNamesystem.LOG.info("NameNode rolling its own edit log because"
4370                    + " number of edits in open segment exceeds threshold of "
4371                    + rollThreshold);
4372                rollEditLog();
4373              }
4374              Thread.sleep(sleepIntervalMs);
4375            } catch (InterruptedException e) {
4376              FSNamesystem.LOG.info(NameNodeEditLogRoller.class.getSimpleName()
4377                  + " was interrupted, exiting");
4378              break;
4379            } catch (Exception e) {
4380              FSNamesystem.LOG.error("Swallowing exception in "
4381                  + NameNodeEditLogRoller.class.getSimpleName() + ":", e);
4382            }
4383          }
4384        }
4385    
4386        public void stop() {
4387          shouldRun = false;
4388        }
4389      }
4390    
4391      public FSImage getFSImage() {
4392        return dir.fsImage;
4393      }
4394    
4395      public FSEditLog getEditLog() {
4396        return getFSImage().getEditLog();
4397      }    
4398    
4399      private void checkBlock(ExtendedBlock block) throws IOException {
4400        if (block != null && !this.blockPoolId.equals(block.getBlockPoolId())) {
4401          throw new IOException("Unexpected BlockPoolId " + block.getBlockPoolId()
4402              + " - expected " + blockPoolId);
4403        }
4404      }
4405    
4406      @Metric({"MissingBlocks", "Number of missing blocks"})
4407      public long getMissingBlocksCount() {
4408        // not locking
4409        return blockManager.getMissingBlocksCount();
4410      }
4411      
4412      @Metric({"ExpiredHeartbeats", "Number of expired heartbeats"})
4413      public int getExpiredHeartbeats() {
4414        return datanodeStatistics.getExpiredHeartbeats();
4415      }
4416      
4417      @Metric({"TransactionsSinceLastCheckpoint",
4418          "Number of transactions since last checkpoint"})
4419      public long getTransactionsSinceLastCheckpoint() {
4420        return getEditLog().getLastWrittenTxId() -
4421            getFSImage().getStorage().getMostRecentCheckpointTxId();
4422      }
4423      
4424      @Metric({"TransactionsSinceLastLogRoll",
4425          "Number of transactions since last edit log roll"})
4426      public long getTransactionsSinceLastLogRoll() {
4427        if (isInStandbyState() || !getEditLog().isSegmentOpen()) {
4428          return 0;
4429        } else {
4430          return getEditLog().getLastWrittenTxId() -
4431            getEditLog().getCurSegmentTxId() + 1;
4432        }
4433      }
4434      
4435      @Metric({"LastWrittenTransactionId", "Transaction ID written to the edit log"})
4436      public long getLastWrittenTransactionId() {
4437        return getEditLog().getLastWrittenTxId();
4438      }
4439      
4440      @Metric({"LastCheckpointTime",
4441          "Time in milliseconds since the epoch of the last checkpoint"})
4442      public long getLastCheckpointTime() {
4443        return getFSImage().getStorage().getMostRecentCheckpointTime();
4444      }
4445    
4446      /** @see ClientProtocol#getStats() */
4447      long[] getStats() {
4448        final long[] stats = datanodeStatistics.getStats();
4449        stats[ClientProtocol.GET_STATS_UNDER_REPLICATED_IDX] = getUnderReplicatedBlocks();
4450        stats[ClientProtocol.GET_STATS_CORRUPT_BLOCKS_IDX] = getCorruptReplicaBlocks();
4451        stats[ClientProtocol.GET_STATS_MISSING_BLOCKS_IDX] = getMissingBlocksCount();
4452        return stats;
4453      }
4454    
4455      @Override // FSNamesystemMBean
4456      @Metric({"CapacityTotal",
4457          "Total raw capacity of data nodes in bytes"})
4458      public long getCapacityTotal() {
4459        return datanodeStatistics.getCapacityTotal();
4460      }
4461    
4462      @Metric({"CapacityTotalGB",
4463          "Total raw capacity of data nodes in GB"})
4464      public float getCapacityTotalGB() {
4465        return DFSUtil.roundBytesToGB(getCapacityTotal());
4466      }
4467    
4468      @Override // FSNamesystemMBean
4469      @Metric({"CapacityUsed",
4470          "Total used capacity across all data nodes in bytes"})
4471      public long getCapacityUsed() {
4472        return datanodeStatistics.getCapacityUsed();
4473      }
4474    
4475      @Metric({"CapacityUsedGB",
4476          "Total used capacity across all data nodes in GB"})
4477      public float getCapacityUsedGB() {
4478        return DFSUtil.roundBytesToGB(getCapacityUsed());
4479      }
4480    
4481      @Override // FSNamesystemMBean
4482      @Metric({"CapacityRemaining", "Remaining capacity in bytes"})
4483      public long getCapacityRemaining() {
4484        return datanodeStatistics.getCapacityRemaining();
4485      }
4486    
4487      @Metric({"CapacityRemainingGB", "Remaining capacity in GB"})
4488      public float getCapacityRemainingGB() {
4489        return DFSUtil.roundBytesToGB(getCapacityRemaining());
4490      }
4491    
4492      @Metric({"CapacityUsedNonDFS",
4493          "Total space used by data nodes for non DFS purposes in bytes"})
4494      public long getCapacityUsedNonDFS() {
4495        return datanodeStatistics.getCapacityUsedNonDFS();
4496      }
4497    
4498      /**
4499       * Total number of connections.
4500       */
4501      @Override // FSNamesystemMBean
4502      @Metric
4503      public int getTotalLoad() {
4504        return datanodeStatistics.getXceiverCount();
4505      }
4506      
4507      @Metric({ "SnapshottableDirectories", "Number of snapshottable directories" })
4508      public int getNumSnapshottableDirs() {
4509        return this.snapshotManager.getNumSnapshottableDirs();
4510      }
4511    
4512      @Metric({ "Snapshots", "The number of snapshots" })
4513      public int getNumSnapshots() {
4514        return this.snapshotManager.getNumSnapshots();
4515      }
4516    
4517      @Override
4518      public String getSnapshotStats() {
4519        Map<String, Object> info = new HashMap<String, Object>();
4520        info.put("SnapshottableDirectories", this.getNumSnapshottableDirs());
4521        info.put("Snapshots", this.getNumSnapshots());
4522        return JSON.toString(info);
4523      }
4524    
4525      int getNumberOfDatanodes(DatanodeReportType type) {
4526        readLock();
4527        try {
4528          return getBlockManager().getDatanodeManager().getDatanodeListForReport(
4529              type).size(); 
4530        } finally {
4531          readUnlock();
4532        }
4533      }
4534    
4535      DatanodeInfo[] datanodeReport(final DatanodeReportType type
4536          ) throws AccessControlException, StandbyException {
4537        checkSuperuserPrivilege();
4538        checkOperation(OperationCategory.UNCHECKED);
4539        readLock();
4540        try {
4541          checkOperation(OperationCategory.UNCHECKED);
4542          final DatanodeManager dm = getBlockManager().getDatanodeManager();      
4543          final List<DatanodeDescriptor> results = dm.getDatanodeListForReport(type);
4544    
4545          DatanodeInfo[] arr = new DatanodeInfo[results.size()];
4546          for (int i=0; i<arr.length; i++) {
4547            arr[i] = new DatanodeInfo(results.get(i));
4548          }
4549          return arr;
4550        } finally {
4551          readUnlock();
4552        }
4553      }
4554    
4555      /**
4556       * Save namespace image.
4557       * This will save current namespace into fsimage file and empty edits file.
4558       * Requires superuser privilege and safe mode.
4559       * 
4560       * @throws AccessControlException if superuser privilege is violated.
4561       * @throws IOException if 
4562       */
4563      void saveNamespace() throws AccessControlException, IOException {
4564        checkOperation(OperationCategory.UNCHECKED);
4565        checkSuperuserPrivilege();
4566        
4567        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
4568        if (cacheEntry != null && cacheEntry.isSuccess()) {
4569          return; // Return previous response
4570        }
4571        boolean success = false;
4572        readLock();
4573        try {
4574          checkOperation(OperationCategory.UNCHECKED);
4575    
4576          if (!isInSafeMode()) {
4577            throw new IOException("Safe mode should be turned ON "
4578                + "in order to create namespace image.");
4579          }
4580          getFSImage().saveNamespace(this);
4581          success = true;
4582        } finally {
4583          readUnlock();
4584          RetryCache.setState(cacheEntry, success);
4585        }
4586        LOG.info("New namespace image has been created");
4587      }
4588      
4589      /**
4590       * Enables/Disables/Checks restoring failed storage replicas if the storage becomes available again.
4591       * Requires superuser privilege.
4592       * 
4593       * @throws AccessControlException if superuser privilege is violated.
4594       */
4595      boolean restoreFailedStorage(String arg) throws AccessControlException,
4596          StandbyException {
4597        checkSuperuserPrivilege();
4598        checkOperation(OperationCategory.UNCHECKED);
4599        writeLock();
4600        try {
4601          checkOperation(OperationCategory.UNCHECKED);
4602          
4603          // if it is disabled - enable it and vice versa.
4604          if(arg.equals("check"))
4605            return getFSImage().getStorage().getRestoreFailedStorage();
4606          
4607          boolean val = arg.equals("true");  // false if not
4608          getFSImage().getStorage().setRestoreFailedStorage(val);
4609          
4610          return val;
4611        } finally {
4612          writeUnlock();
4613        }
4614      }
4615    
4616      Date getStartTime() {
4617        return new Date(startTime); 
4618      }
4619        
4620      void finalizeUpgrade() throws IOException {
4621        checkSuperuserPrivilege();
4622        checkOperation(OperationCategory.UNCHECKED);
4623        writeLock();
4624        try {
4625          checkOperation(OperationCategory.UNCHECKED);
4626          getFSImage().finalizeUpgrade(this.isHaEnabled() && inActiveState());
4627        } finally {
4628          writeUnlock();
4629        }
4630      }
4631    
4632      void refreshNodes() throws IOException {
4633        checkOperation(OperationCategory.UNCHECKED);
4634        checkSuperuserPrivilege();
4635        getBlockManager().getDatanodeManager().refreshNodes(new HdfsConfiguration());
4636      }
4637    
4638      void setBalancerBandwidth(long bandwidth) throws IOException {
4639        checkOperation(OperationCategory.UNCHECKED);
4640        checkSuperuserPrivilege();
4641        getBlockManager().getDatanodeManager().setBalancerBandwidth(bandwidth);
4642      }
4643    
4644      /**
4645       * SafeModeInfo contains information related to the safe mode.
4646       * <p>
4647       * An instance of {@link SafeModeInfo} is created when the name node
4648       * enters safe mode.
4649       * <p>
4650       * During name node startup {@link SafeModeInfo} counts the number of
4651       * <em>safe blocks</em>, those that have at least the minimal number of
4652       * replicas, and calculates the ratio of safe blocks to the total number
4653       * of blocks in the system, which is the size of blocks in
4654       * {@link FSNamesystem#blockManager}. When the ratio reaches the
4655       * {@link #threshold} it starts the SafeModeMonitor daemon in order
4656       * to monitor whether the safe mode {@link #extension} is passed.
4657       * Then it leaves safe mode and destroys itself.
4658       * <p>
4659       * If safe mode is turned on manually then the number of safe blocks is
4660       * not tracked because the name node is not intended to leave safe mode
4661       * automatically in the case.
4662       *
4663       * @see ClientProtocol#setSafeMode(HdfsConstants.SafeModeAction, boolean)
4664       */
4665      public class SafeModeInfo {
4666        // configuration fields
4667        /** Safe mode threshold condition %.*/
4668        private final double threshold;
4669        /** Safe mode minimum number of datanodes alive */
4670        private final int datanodeThreshold;
4671        /** Safe mode extension after the threshold. */
4672        private int extension;
4673        /** Min replication required by safe mode. */
4674        private final int safeReplication;
4675        /** threshold for populating needed replication queues */
4676        private final double replQueueThreshold;
4677        // internal fields
4678        /** Time when threshold was reached.
4679         * <br> -1 safe mode is off
4680         * <br> 0 safe mode is on, and threshold is not reached yet
4681         * <br> >0 safe mode is on, but we are in extension period 
4682         */
4683        private long reached = -1;  
4684        /** Total number of blocks. */
4685        int blockTotal; 
4686        /** Number of safe blocks. */
4687        int blockSafe;
4688        /** Number of blocks needed to satisfy safe mode threshold condition */
4689        private int blockThreshold;
4690        /** Number of blocks needed before populating replication queues */
4691        private int blockReplQueueThreshold;
4692        /** time of the last status printout */
4693        private long lastStatusReport = 0;
4694        /** Was safemode entered automatically because available resources were low. */
4695        private boolean resourcesLow = false;
4696        /** Should safemode adjust its block totals as blocks come in */
4697        private boolean shouldIncrementallyTrackBlocks = false;
4698        /** counter for tracking startup progress of reported blocks */
4699        private Counter awaitingReportedBlocksCounter;
4700        
4701        /**
4702         * Creates SafeModeInfo when the name node enters
4703         * automatic safe mode at startup.
4704         *  
4705         * @param conf configuration
4706         */
4707        private SafeModeInfo(Configuration conf) {
4708          this.threshold = conf.getFloat(DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY,
4709              DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT);
4710          if(threshold > 1.0) {
4711            LOG.warn("The threshold value should't be greater than 1, threshold: " + threshold);
4712          }
4713          this.datanodeThreshold = conf.getInt(
4714            DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY,
4715            DFS_NAMENODE_SAFEMODE_MIN_DATANODES_DEFAULT);
4716          this.extension = conf.getInt(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 0);
4717          this.safeReplication = conf.getInt(DFS_NAMENODE_REPLICATION_MIN_KEY, 
4718                                             DFS_NAMENODE_REPLICATION_MIN_DEFAULT);
4719          
4720          LOG.info(DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY + " = " + threshold);
4721          LOG.info(DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY + " = " + datanodeThreshold);
4722          LOG.info(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY + "     = " + extension);
4723    
4724          // default to safe mode threshold (i.e., don't populate queues before leaving safe mode)
4725          this.replQueueThreshold = 
4726            conf.getFloat(DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY,
4727                          (float) threshold);
4728          this.blockTotal = 0; 
4729          this.blockSafe = 0;
4730        }
4731    
4732        /**
4733         * In the HA case, the StandbyNode can be in safemode while the namespace
4734         * is modified by the edit log tailer. In this case, the number of total
4735         * blocks changes as edits are processed (eg blocks are added and deleted).
4736         * However, we don't want to do the incremental tracking during the
4737         * startup-time loading process -- only once the initial total has been
4738         * set after the image has been loaded.
4739         */
4740        private boolean shouldIncrementallyTrackBlocks() {
4741          return shouldIncrementallyTrackBlocks;
4742        }
4743    
4744        /**
4745         * Creates SafeModeInfo when safe mode is entered manually, or because
4746         * available resources are low.
4747         *
4748         * The {@link #threshold} is set to 1.5 so that it could never be reached.
4749         * {@link #blockTotal} is set to -1 to indicate that safe mode is manual.
4750         * 
4751         * @see SafeModeInfo
4752         */
4753        private SafeModeInfo(boolean resourcesLow) {
4754          this.threshold = 1.5f;  // this threshold can never be reached
4755          this.datanodeThreshold = Integer.MAX_VALUE;
4756          this.extension = Integer.MAX_VALUE;
4757          this.safeReplication = Short.MAX_VALUE + 1; // more than maxReplication
4758          this.replQueueThreshold = 1.5f; // can never be reached
4759          this.blockTotal = -1;
4760          this.blockSafe = -1;
4761          this.resourcesLow = resourcesLow;
4762          enter();
4763          reportStatus("STATE* Safe mode is ON.", true);
4764        }
4765          
4766        /**
4767         * Check if safe mode is on.
4768         * @return true if in safe mode
4769         */
4770        private synchronized boolean isOn() {
4771          doConsistencyCheck();
4772          return this.reached >= 0;
4773        }
4774          
4775        /**
4776         * Enter safe mode.
4777         */
4778        private void enter() {
4779          this.reached = 0;
4780        }
4781          
4782        /**
4783         * Leave safe mode.
4784         * <p>
4785         * Check for invalid, under- & over-replicated blocks in the end of startup.
4786         */
4787        private synchronized void leave() {
4788          // if not done yet, initialize replication queues.
4789          // In the standby, do not populate repl queues
4790          if (!isPopulatingReplQueues() && shouldPopulateReplQueues()) {
4791            initializeReplQueues();
4792          }
4793          long timeInSafemode = now() - startTime;
4794          NameNode.stateChangeLog.info("STATE* Leaving safe mode after " 
4795                                        + timeInSafemode/1000 + " secs");
4796          NameNode.getNameNodeMetrics().setSafeModeTime((int) timeInSafemode);
4797    
4798          //Log the following only once (when transitioning from ON -> OFF)
4799          if (reached >= 0) {
4800            NameNode.stateChangeLog.info("STATE* Safe mode is OFF"); 
4801          }
4802          reached = -1;
4803          safeMode = null;
4804          final NetworkTopology nt = blockManager.getDatanodeManager().getNetworkTopology();
4805          NameNode.stateChangeLog.info("STATE* Network topology has "
4806              + nt.getNumOfRacks() + " racks and "
4807              + nt.getNumOfLeaves() + " datanodes");
4808          NameNode.stateChangeLog.info("STATE* UnderReplicatedBlocks has "
4809              + blockManager.numOfUnderReplicatedBlocks() + " blocks");
4810    
4811          startSecretManagerIfNecessary();
4812    
4813          // If startup has not yet completed, end safemode phase.
4814          StartupProgress prog = NameNode.getStartupProgress();
4815          if (prog.getStatus(Phase.SAFEMODE) != Status.COMPLETE) {
4816            prog.endStep(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS);
4817            prog.endPhase(Phase.SAFEMODE);
4818          }
4819        }
4820    
4821        /**
4822         * Check whether we have reached the threshold for 
4823         * initializing replication queues.
4824         */
4825        private synchronized boolean canInitializeReplQueues() {
4826          return shouldPopulateReplQueues()
4827              && blockSafe >= blockReplQueueThreshold;
4828        }
4829          
4830        /** 
4831         * Safe mode can be turned off iff 
4832         * the threshold is reached and 
4833         * the extension time have passed.
4834         * @return true if can leave or false otherwise.
4835         */
4836        private synchronized boolean canLeave() {
4837          if (reached == 0) {
4838            return false;
4839          }
4840    
4841          if (now() - reached < extension) {
4842            reportStatus("STATE* Safe mode ON, in safe mode extension.", false);
4843            return false;
4844          }
4845    
4846          if (needEnter()) {
4847            reportStatus("STATE* Safe mode ON, thresholds not met.", false);
4848            return false;
4849          }
4850    
4851          return true;
4852        }
4853          
4854        /** 
4855         * There is no need to enter safe mode 
4856         * if DFS is empty or {@link #threshold} == 0
4857         */
4858        private boolean needEnter() {
4859          return (threshold != 0 && blockSafe < blockThreshold) ||
4860            (datanodeThreshold != 0 && getNumLiveDataNodes() < datanodeThreshold) ||
4861            (!nameNodeHasResourcesAvailable());
4862        }
4863          
4864        /**
4865         * Check and trigger safe mode if needed. 
4866         */
4867        private void checkMode() {
4868          // Have to have write-lock since leaving safemode initializes
4869          // repl queues, which requires write lock
4870          assert hasWriteLock();
4871          // if smmthread is already running, the block threshold must have been 
4872          // reached before, there is no need to enter the safe mode again
4873          if (smmthread == null && needEnter()) {
4874            enter();
4875            // check if we are ready to initialize replication queues
4876            if (canInitializeReplQueues() && !isPopulatingReplQueues()
4877                && !haEnabled) {
4878              initializeReplQueues();
4879            }
4880            reportStatus("STATE* Safe mode ON.", false);
4881            return;
4882          }
4883          // the threshold is reached or was reached before
4884          if (!isOn() ||                           // safe mode is off
4885              extension <= 0 || threshold <= 0) {  // don't need to wait
4886            this.leave(); // leave safe mode
4887            return;
4888          }
4889          if (reached > 0) {  // threshold has already been reached before
4890            reportStatus("STATE* Safe mode ON.", false);
4891            return;
4892          }
4893          // start monitor
4894          reached = now();
4895          if (smmthread == null) {
4896            smmthread = new Daemon(new SafeModeMonitor());
4897            smmthread.start();
4898            reportStatus("STATE* Safe mode extension entered.", true);
4899          }
4900    
4901          // check if we are ready to initialize replication queues
4902          if (canInitializeReplQueues() && !isPopulatingReplQueues() && !haEnabled) {
4903            initializeReplQueues();
4904          }
4905        }
4906          
4907        /**
4908         * Set total number of blocks.
4909         */
4910        private synchronized void setBlockTotal(int total) {
4911          this.blockTotal = total;
4912          this.blockThreshold = (int) (blockTotal * threshold);
4913          this.blockReplQueueThreshold = 
4914            (int) (blockTotal * replQueueThreshold);
4915          if (haEnabled) {
4916            // After we initialize the block count, any further namespace
4917            // modifications done while in safe mode need to keep track
4918            // of the number of total blocks in the system.
4919            this.shouldIncrementallyTrackBlocks = true;
4920          }
4921          if(blockSafe < 0)
4922            this.blockSafe = 0;
4923          checkMode();
4924        }
4925          
4926        /**
4927         * Increment number of safe blocks if current block has 
4928         * reached minimal replication.
4929         * @param replication current replication 
4930         */
4931        private synchronized void incrementSafeBlockCount(short replication) {
4932          if (replication == safeReplication) {
4933            this.blockSafe++;
4934    
4935            // Report startup progress only if we haven't completed startup yet.
4936            StartupProgress prog = NameNode.getStartupProgress();
4937            if (prog.getStatus(Phase.SAFEMODE) != Status.COMPLETE) {
4938              if (this.awaitingReportedBlocksCounter == null) {
4939                this.awaitingReportedBlocksCounter = prog.getCounter(Phase.SAFEMODE,
4940                  STEP_AWAITING_REPORTED_BLOCKS);
4941              }
4942              this.awaitingReportedBlocksCounter.increment();
4943            }
4944    
4945            checkMode();
4946          }
4947        }
4948          
4949        /**
4950         * Decrement number of safe blocks if current block has 
4951         * fallen below minimal replication.
4952         * @param replication current replication 
4953         */
4954        private synchronized void decrementSafeBlockCount(short replication) {
4955          if (replication == safeReplication-1) {
4956            this.blockSafe--;
4957            //blockSafe is set to -1 in manual / low resources safemode
4958            assert blockSafe >= 0 || isManual() || areResourcesLow();
4959            checkMode();
4960          }
4961        }
4962    
4963        /**
4964         * Check if safe mode was entered manually
4965         */
4966        private boolean isManual() {
4967          return extension == Integer.MAX_VALUE;
4968        }
4969    
4970        /**
4971         * Set manual safe mode.
4972         */
4973        private synchronized void setManual() {
4974          extension = Integer.MAX_VALUE;
4975        }
4976    
4977        /**
4978         * Check if safe mode was entered due to resources being low.
4979         */
4980        private boolean areResourcesLow() {
4981          return resourcesLow;
4982        }
4983    
4984        /**
4985         * Set that resources are low for this instance of safe mode.
4986         */
4987        private void setResourcesLow() {
4988          resourcesLow = true;
4989        }
4990    
4991        /**
4992         * A tip on how safe mode is to be turned off: manually or automatically.
4993         */
4994        String getTurnOffTip() {
4995          if(!isOn()) {
4996            return "Safe mode is OFF.";
4997          }
4998    
4999          //Manual OR low-resource safemode. (Admin intervention required)
5000          String adminMsg = "It was turned on manually. ";
5001          if (areResourcesLow()) {
5002            adminMsg = "Resources are low on NN. Please add or free up more "
5003              + "resources then turn off safe mode manually. NOTE:  If you turn off"
5004              + " safe mode before adding resources, "
5005              + "the NN will immediately return to safe mode. ";
5006          }
5007          if (isManual() || areResourcesLow()) {
5008            return adminMsg
5009              + "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off.";
5010          }
5011    
5012          boolean thresholdsMet = true;
5013          int numLive = getNumLiveDataNodes();
5014          String msg = "";
5015          if (blockSafe < blockThreshold) {
5016            msg += String.format(
5017              "The reported blocks %d needs additional %d"
5018              + " blocks to reach the threshold %.4f of total blocks %d.\n",
5019              blockSafe, (blockThreshold - blockSafe) + 1, threshold, blockTotal);
5020            thresholdsMet = false;
5021          } else {
5022            msg += String.format("The reported blocks %d has reached the threshold"
5023                + " %.4f of total blocks %d. ", blockSafe, threshold, blockTotal);
5024          }
5025          if (numLive < datanodeThreshold) {
5026            msg += String.format(
5027              "The number of live datanodes %d needs an additional %d live "
5028              + "datanodes to reach the minimum number %d.\n",
5029              numLive, (datanodeThreshold - numLive), datanodeThreshold);
5030            thresholdsMet = false;
5031          } else {
5032            msg += String.format("The number of live datanodes %d has reached "
5033                + "the minimum number %d. ",
5034                numLive, datanodeThreshold);
5035          }
5036          msg += (reached > 0) ? "In safe mode extension. " : "";
5037          msg += "Safe mode will be turned off automatically ";
5038    
5039          if (!thresholdsMet) {
5040            msg += "once the thresholds have been reached.";
5041          } else if (reached + extension - now() > 0) {
5042            msg += ("in " + (reached + extension - now()) / 1000 + " seconds.");
5043          } else {
5044            msg += "soon.";
5045          }
5046    
5047          return msg;
5048        }
5049    
5050        /**
5051         * Print status every 20 seconds.
5052         */
5053        private void reportStatus(String msg, boolean rightNow) {
5054          long curTime = now();
5055          if(!rightNow && (curTime - lastStatusReport < 20 * 1000))
5056            return;
5057          NameNode.stateChangeLog.info(msg + " \n" + getTurnOffTip());
5058          lastStatusReport = curTime;
5059        }
5060    
5061        @Override
5062        public String toString() {
5063          String resText = "Current safe blocks = " 
5064            + blockSafe 
5065            + ". Target blocks = " + blockThreshold + " for threshold = %" + threshold
5066            + ". Minimal replication = " + safeReplication + ".";
5067          if (reached > 0) 
5068            resText += " Threshold was reached " + new Date(reached) + ".";
5069          return resText;
5070        }
5071          
5072        /**
5073         * Checks consistency of the class state.
5074         * This is costly so only runs if asserts are enabled.
5075         */
5076        private void doConsistencyCheck() {
5077          boolean assertsOn = false;
5078          assert assertsOn = true; // set to true if asserts are on
5079          if (!assertsOn) return;
5080          
5081          if (blockTotal == -1 && blockSafe == -1) {
5082            return; // manual safe mode
5083          }
5084          int activeBlocks = blockManager.getActiveBlockCount();
5085          if ((blockTotal != activeBlocks) &&
5086              !(blockSafe >= 0 && blockSafe <= blockTotal)) {
5087            throw new AssertionError(
5088                " SafeMode: Inconsistent filesystem state: "
5089            + "SafeMode data: blockTotal=" + blockTotal
5090            + " blockSafe=" + blockSafe + "; "
5091            + "BlockManager data: active="  + activeBlocks);
5092          }
5093        }
5094    
5095        private synchronized void adjustBlockTotals(int deltaSafe, int deltaTotal) {
5096          if (!shouldIncrementallyTrackBlocks) {
5097            return;
5098          }
5099          assert haEnabled;
5100          
5101          if (LOG.isDebugEnabled()) {
5102            LOG.debug("Adjusting block totals from " +
5103                blockSafe + "/" + blockTotal + " to " +
5104                (blockSafe + deltaSafe) + "/" + (blockTotal + deltaTotal));
5105          }
5106          assert blockSafe + deltaSafe >= 0 : "Can't reduce blockSafe " +
5107            blockSafe + " by " + deltaSafe + ": would be negative";
5108          assert blockTotal + deltaTotal >= 0 : "Can't reduce blockTotal " +
5109            blockTotal + " by " + deltaTotal + ": would be negative";
5110          
5111          blockSafe += deltaSafe;
5112          setBlockTotal(blockTotal + deltaTotal);
5113        }
5114      }
5115        
5116      /**
5117       * Periodically check whether it is time to leave safe mode.
5118       * This thread starts when the threshold level is reached.
5119       *
5120       */
5121      class SafeModeMonitor implements Runnable {
5122        /** interval in msec for checking safe mode: {@value} */
5123        private static final long recheckInterval = 1000;
5124          
5125        /**
5126         */
5127        @Override
5128        public void run() {
5129          while (fsRunning) {
5130            writeLock();
5131            try {
5132              if (safeMode == null) { // Not in safe mode.
5133                break;
5134              }
5135              if (safeMode.canLeave()) {
5136                // Leave safe mode.
5137                safeMode.leave();
5138                smmthread = null;
5139                break;
5140              }
5141            } finally {
5142              writeUnlock();
5143            }
5144    
5145            try {
5146              Thread.sleep(recheckInterval);
5147            } catch (InterruptedException ie) {
5148              // Ignored
5149            }
5150          }
5151          if (!fsRunning) {
5152            LOG.info("NameNode is being shutdown, exit SafeModeMonitor thread");
5153          }
5154        }
5155      }
5156        
5157      boolean setSafeMode(SafeModeAction action) throws IOException {
5158        if (action != SafeModeAction.SAFEMODE_GET) {
5159          checkSuperuserPrivilege();
5160          switch(action) {
5161          case SAFEMODE_LEAVE: // leave safe mode
5162            leaveSafeMode();
5163            break;
5164          case SAFEMODE_ENTER: // enter safe mode
5165            enterSafeMode(false);
5166            break;
5167          default:
5168            LOG.error("Unexpected safe mode action");
5169          }
5170        }
5171        return isInSafeMode();
5172      }
5173    
5174      @Override
5175      public void checkSafeMode() {
5176        // safeMode is volatile, and may be set to null at any time
5177        SafeModeInfo safeMode = this.safeMode;
5178        if (safeMode != null) {
5179          safeMode.checkMode();
5180        }
5181      }
5182    
5183      @Override
5184      public boolean isInSafeMode() {
5185        // safeMode is volatile, and may be set to null at any time
5186        SafeModeInfo safeMode = this.safeMode;
5187        if (safeMode == null)
5188          return false;
5189        return safeMode.isOn();
5190      }
5191    
5192      @Override
5193      public boolean isInStartupSafeMode() {
5194        // safeMode is volatile, and may be set to null at any time
5195        SafeModeInfo safeMode = this.safeMode;
5196        if (safeMode == null)
5197          return false;
5198        // If the NN is in safemode, and not due to manual / low resources, we
5199        // assume it must be because of startup. If the NN had low resources during
5200        // startup, we assume it came out of startup safemode and it is now in low
5201        // resources safemode
5202        return !safeMode.isManual() && !safeMode.areResourcesLow()
5203          && safeMode.isOn();
5204      }
5205    
5206      /**
5207       * Check if replication queues are to be populated
5208       * @return true when node is HAState.Active and not in the very first safemode
5209       */
5210      @Override
5211      public boolean isPopulatingReplQueues() {
5212        if (!shouldPopulateReplQueues()) {
5213          return false;
5214        }
5215        return initializedReplQueues;
5216      }
5217    
5218      private boolean shouldPopulateReplQueues() {
5219        if(haContext == null || haContext.getState() == null)
5220          return false;
5221        return haContext.getState().shouldPopulateReplQueues();
5222      }
5223    
5224      @Override
5225      public void incrementSafeBlockCount(int replication) {
5226        // safeMode is volatile, and may be set to null at any time
5227        SafeModeInfo safeMode = this.safeMode;
5228        if (safeMode == null)
5229          return;
5230        safeMode.incrementSafeBlockCount((short)replication);
5231      }
5232    
5233      @Override
5234      public void decrementSafeBlockCount(Block b) {
5235        // safeMode is volatile, and may be set to null at any time
5236        SafeModeInfo safeMode = this.safeMode;
5237        if (safeMode == null) // mostly true
5238          return;
5239        BlockInfo storedBlock = getStoredBlock(b);
5240        if (storedBlock.isComplete()) {
5241          safeMode.decrementSafeBlockCount((short)blockManager.countNodes(b).liveReplicas());
5242        }
5243      }
5244      
5245      /**
5246       * Adjust the total number of blocks safe and expected during safe mode.
5247       * If safe mode is not currently on, this is a no-op.
5248       * @param deltaSafe the change in number of safe blocks
5249       * @param deltaTotal the change i nnumber of total blocks expected
5250       */
5251      @Override
5252      public void adjustSafeModeBlockTotals(int deltaSafe, int deltaTotal) {
5253        // safeMode is volatile, and may be set to null at any time
5254        SafeModeInfo safeMode = this.safeMode;
5255        if (safeMode == null)
5256          return;
5257        safeMode.adjustBlockTotals(deltaSafe, deltaTotal);
5258      }
5259    
5260      /**
5261       * Set the total number of blocks in the system. 
5262       */
5263      public void setBlockTotal() {
5264        // safeMode is volatile, and may be set to null at any time
5265        SafeModeInfo safeMode = this.safeMode;
5266        if (safeMode == null)
5267          return;
5268        safeMode.setBlockTotal((int)getCompleteBlocksTotal());
5269      }
5270    
5271      /**
5272       * Get the total number of blocks in the system. 
5273       */
5274      @Override // FSNamesystemMBean
5275      @Metric
5276      public long getBlocksTotal() {
5277        return blockManager.getTotalBlocks();
5278      }
5279    
5280      /**
5281       * Get the total number of COMPLETE blocks in the system.
5282       * For safe mode only complete blocks are counted.
5283       */
5284      private long getCompleteBlocksTotal() {
5285        // Calculate number of blocks under construction
5286        long numUCBlocks = 0;
5287        readLock();
5288        try {
5289          for (Lease lease : leaseManager.getSortedLeases()) {
5290            for (String path : lease.getPaths()) {
5291              final INodeFile cons;
5292              try {
5293                cons = dir.getINode(path).asFile();
5294                Preconditions.checkState(cons.isUnderConstruction());
5295              } catch (UnresolvedLinkException e) {
5296                throw new AssertionError("Lease files should reside on this FS");
5297              }
5298              BlockInfo[] blocks = cons.getBlocks();
5299              if(blocks == null)
5300                continue;
5301              for(BlockInfo b : blocks) {
5302                if(!b.isComplete())
5303                  numUCBlocks++;
5304              }
5305            }
5306          }
5307          LOG.info("Number of blocks under construction: " + numUCBlocks);
5308          return getBlocksTotal() - numUCBlocks;
5309        } finally {
5310          readUnlock();
5311        }
5312      }
5313    
5314      /**
5315       * Enter safe mode. If resourcesLow is false, then we assume it is manual
5316       * @throws IOException
5317       */
5318      void enterSafeMode(boolean resourcesLow) throws IOException {
5319        writeLock();
5320        try {
5321          // Stop the secret manager, since rolling the master key would
5322          // try to write to the edit log
5323          stopSecretManager();
5324    
5325          // Ensure that any concurrent operations have been fully synced
5326          // before entering safe mode. This ensures that the FSImage
5327          // is entirely stable on disk as soon as we're in safe mode.
5328          boolean isEditlogOpenForWrite = getEditLog().isOpenForWrite();
5329          // Before Editlog is in OpenForWrite mode, editLogStream will be null. So,
5330          // logSyncAll call can be called only when Edlitlog is in OpenForWrite mode
5331          if (isEditlogOpenForWrite) {
5332            getEditLog().logSyncAll();
5333          }
5334          if (!isInSafeMode()) {
5335            safeMode = new SafeModeInfo(resourcesLow);
5336            return;
5337          }
5338          if (resourcesLow) {
5339            safeMode.setResourcesLow();
5340          } else {
5341            safeMode.setManual();
5342          }
5343          if (isEditlogOpenForWrite) {
5344            getEditLog().logSyncAll();
5345          }
5346          NameNode.stateChangeLog.info("STATE* Safe mode is ON"
5347              + safeMode.getTurnOffTip());
5348        } finally {
5349          writeUnlock();
5350        }
5351      }
5352    
5353      /**
5354       * Leave safe mode.
5355       * @throws IOException
5356       */
5357      void leaveSafeMode() {
5358        writeLock();
5359        try {
5360          if (!isInSafeMode()) {
5361            NameNode.stateChangeLog.info("STATE* Safe mode is already OFF"); 
5362            return;
5363          }
5364          safeMode.leave();
5365        } finally {
5366          writeUnlock();
5367        }
5368      }
5369        
5370      String getSafeModeTip() {
5371        readLock();
5372        try {
5373          if (!isInSafeMode()) {
5374            return "";
5375          }
5376          return safeMode.getTurnOffTip();
5377        } finally {
5378          readUnlock();
5379        }
5380      }
5381    
5382      CheckpointSignature rollEditLog() throws IOException {
5383        checkSuperuserPrivilege();
5384        checkOperation(OperationCategory.JOURNAL);
5385        writeLock();
5386        try {
5387          checkOperation(OperationCategory.JOURNAL);
5388          checkNameNodeSafeMode("Log not rolled");
5389          if (Server.isRpcInvocation()) {
5390            LOG.info("Roll Edit Log from " + Server.getRemoteAddress());
5391          }
5392          return getFSImage().rollEditLog();
5393        } finally {
5394          writeUnlock();
5395        }
5396      }
5397    
5398      NamenodeCommand startCheckpoint(NamenodeRegistration backupNode,
5399          NamenodeRegistration activeNamenode) throws IOException {
5400        checkOperation(OperationCategory.CHECKPOINT);
5401        CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
5402            null);
5403        if (cacheEntry != null && cacheEntry.isSuccess()) {
5404          return (NamenodeCommand) cacheEntry.getPayload();
5405        }
5406        writeLock();
5407        NamenodeCommand cmd = null;
5408        try {
5409          checkOperation(OperationCategory.CHECKPOINT);
5410          checkNameNodeSafeMode("Checkpoint not started");
5411          
5412          LOG.info("Start checkpoint for " + backupNode.getAddress());
5413          cmd = getFSImage().startCheckpoint(backupNode, activeNamenode);
5414          getEditLog().logSync();
5415          return cmd;
5416        } finally {
5417          writeUnlock();
5418          RetryCache.setState(cacheEntry, cmd != null, cmd);
5419        }
5420      }
5421    
5422      public void processIncrementalBlockReport(final DatanodeID nodeID,
5423          final String poolId, final StorageReceivedDeletedBlocks srdb)
5424          throws IOException {
5425        writeLock();
5426        try {
5427          blockManager.processIncrementalBlockReport(nodeID, srdb);
5428        } finally {
5429          writeUnlock();
5430        }
5431      }
5432      
5433      void endCheckpoint(NamenodeRegistration registration,
5434                                CheckpointSignature sig) throws IOException {
5435        checkOperation(OperationCategory.CHECKPOINT);
5436        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
5437        if (cacheEntry != null && cacheEntry.isSuccess()) {
5438          return; // Return previous response
5439        }
5440        boolean success = false;
5441        readLock();
5442        try {
5443          checkOperation(OperationCategory.CHECKPOINT);
5444    
5445          checkNameNodeSafeMode("Checkpoint not ended");
5446          LOG.info("End checkpoint for " + registration.getAddress());
5447          getFSImage().endCheckpoint(sig);
5448          success = true;
5449        } finally {
5450          readUnlock();
5451          RetryCache.setState(cacheEntry, success);
5452        }
5453      }
5454    
5455      PermissionStatus createFsOwnerPermissions(FsPermission permission) {
5456        return new PermissionStatus(fsOwner.getShortUserName(), supergroup, permission);
5457      }
5458    
5459      private void checkOwner(FSPermissionChecker pc, String path)
5460          throws AccessControlException, UnresolvedLinkException {
5461        checkPermission(pc, path, true, null, null, null, null);
5462      }
5463    
5464      private void checkPathAccess(FSPermissionChecker pc,
5465          String path, FsAction access) throws AccessControlException,
5466          UnresolvedLinkException {
5467        checkPermission(pc, path, false, null, null, access, null);
5468      }
5469    
5470      private void checkParentAccess(FSPermissionChecker pc,
5471          String path, FsAction access) throws AccessControlException,
5472          UnresolvedLinkException {
5473        checkPermission(pc, path, false, null, access, null, null);
5474      }
5475    
5476      private void checkAncestorAccess(FSPermissionChecker pc,
5477          String path, FsAction access) throws AccessControlException,
5478          UnresolvedLinkException {
5479        checkPermission(pc, path, false, access, null, null, null);
5480      }
5481    
5482      private void checkTraverse(FSPermissionChecker pc, String path)
5483          throws AccessControlException, UnresolvedLinkException {
5484        checkPermission(pc, path, false, null, null, null, null);
5485      }
5486    
5487      @Override
5488      public void checkSuperuserPrivilege()
5489          throws AccessControlException {
5490        if (isPermissionEnabled) {
5491          FSPermissionChecker pc = getPermissionChecker();
5492          pc.checkSuperuserPrivilege();
5493        }
5494      }
5495    
5496      /**
5497       * Check whether current user have permissions to access the path. For more
5498       * details of the parameters, see
5499       * {@link FSPermissionChecker#checkPermission()}.
5500       */
5501      private void checkPermission(FSPermissionChecker pc,
5502          String path, boolean doCheckOwner, FsAction ancestorAccess,
5503          FsAction parentAccess, FsAction access, FsAction subAccess)
5504          throws AccessControlException, UnresolvedLinkException {
5505            checkPermission(pc, path, doCheckOwner, ancestorAccess,
5506                parentAccess, access, subAccess, true);
5507      }
5508    
5509      /**
5510       * Check whether current user have permissions to access the path. For more
5511       * details of the parameters, see
5512       * {@link FSPermissionChecker#checkPermission()}.
5513       */
5514      private void checkPermission(FSPermissionChecker pc,
5515          String path, boolean doCheckOwner, FsAction ancestorAccess,
5516          FsAction parentAccess, FsAction access, FsAction subAccess,
5517          boolean resolveLink)
5518          throws AccessControlException, UnresolvedLinkException {
5519        if (!pc.isSuperUser()) {
5520          dir.waitForReady();
5521          readLock();
5522          try {
5523            pc.checkPermission(path, dir.rootDir, doCheckOwner, ancestorAccess,
5524                parentAccess, access, subAccess, resolveLink);
5525          } finally {
5526            readUnlock();
5527          }
5528        }
5529      }
5530      
5531      /**
5532       * Check to see if we have exceeded the limit on the number
5533       * of inodes.
5534       */
5535      void checkFsObjectLimit() throws IOException {
5536        if (maxFsObjects != 0 &&
5537            maxFsObjects <= dir.totalInodes() + getBlocksTotal()) {
5538          throw new IOException("Exceeded the configured number of objects " +
5539                                 maxFsObjects + " in the filesystem.");
5540        }
5541      }
5542    
5543      /**
5544       * Get the total number of objects in the system. 
5545       */
5546      @Override // FSNamesystemMBean
5547      public long getMaxObjects() {
5548        return maxFsObjects;
5549      }
5550    
5551      @Override // FSNamesystemMBean
5552      @Metric
5553      public long getFilesTotal() {
5554        readLock();
5555        try {
5556          return this.dir.totalInodes();
5557        } finally {
5558          readUnlock();
5559        }
5560      }
5561    
5562      @Override // FSNamesystemMBean
5563      @Metric
5564      public long getPendingReplicationBlocks() {
5565        return blockManager.getPendingReplicationBlocksCount();
5566      }
5567    
5568      @Override // FSNamesystemMBean
5569      @Metric
5570      public long getUnderReplicatedBlocks() {
5571        return blockManager.getUnderReplicatedBlocksCount();
5572      }
5573    
5574      /** Returns number of blocks with corrupt replicas */
5575      @Metric({"CorruptBlocks", "Number of blocks with corrupt replicas"})
5576      public long getCorruptReplicaBlocks() {
5577        return blockManager.getCorruptReplicaBlocksCount();
5578      }
5579    
5580      @Override // FSNamesystemMBean
5581      @Metric
5582      public long getScheduledReplicationBlocks() {
5583        return blockManager.getScheduledReplicationBlocksCount();
5584      }
5585    
5586      @Override
5587      @Metric
5588      public long getPendingDeletionBlocks() {
5589        return blockManager.getPendingDeletionBlocksCount();
5590      }
5591    
5592      @Metric
5593      public long getExcessBlocks() {
5594        return blockManager.getExcessBlocksCount();
5595      }
5596      
5597      // HA-only metric
5598      @Metric
5599      public long getPostponedMisreplicatedBlocks() {
5600        return blockManager.getPostponedMisreplicatedBlocksCount();
5601      }
5602    
5603      // HA-only metric
5604      @Metric
5605      public int getPendingDataNodeMessageCount() {
5606        return blockManager.getPendingDataNodeMessageCount();
5607      }
5608      
5609      // HA-only metric
5610      @Metric
5611      public String getHAState() {
5612        return haContext.getState().toString();
5613      }
5614    
5615      // HA-only metric
5616      @Metric
5617      public long getMillisSinceLastLoadedEdits() {
5618        if (isInStandbyState() && editLogTailer != null) {
5619          return now() - editLogTailer.getLastLoadTimestamp();
5620        } else {
5621          return 0;
5622        }
5623      }
5624      
5625      @Metric
5626      public int getBlockCapacity() {
5627        return blockManager.getCapacity();
5628      }
5629    
5630      @Override // FSNamesystemMBean
5631      public String getFSState() {
5632        return isInSafeMode() ? "safeMode" : "Operational";
5633      }
5634      
5635      private ObjectName mbeanName;
5636      private ObjectName mxbeanName;
5637    
5638      /**
5639       * Register the FSNamesystem MBean using the name
5640       *        "hadoop:service=NameNode,name=FSNamesystemState"
5641       */
5642      private void registerMBean() {
5643        // We can only implement one MXBean interface, so we keep the old one.
5644        try {
5645          StandardMBean bean = new StandardMBean(this, FSNamesystemMBean.class);
5646          mbeanName = MBeans.register("NameNode", "FSNamesystemState", bean);
5647        } catch (NotCompliantMBeanException e) {
5648          throw new RuntimeException("Bad MBean setup", e);
5649        }
5650    
5651        LOG.info("Registered FSNamesystemState MBean");
5652      }
5653    
5654      /**
5655       * shutdown FSNamesystem
5656       */
5657      void shutdown() {
5658        if (mbeanName != null) {
5659          MBeans.unregister(mbeanName);
5660          mbeanName = null;
5661        }
5662        if (mxbeanName != null) {
5663          MBeans.unregister(mxbeanName);
5664          mxbeanName = null;
5665        }
5666        if (dir != null) {
5667          dir.shutdown();
5668        }
5669        if (blockManager != null) {
5670          blockManager.shutdown();
5671        }
5672      }
5673      
5674    
5675      @Override // FSNamesystemMBean
5676      public int getNumLiveDataNodes() {
5677        return getBlockManager().getDatanodeManager().getNumLiveDataNodes();
5678      }
5679    
5680      @Override // FSNamesystemMBean
5681      public int getNumDeadDataNodes() {
5682        return getBlockManager().getDatanodeManager().getNumDeadDataNodes();
5683      }
5684      
5685      @Override // FSNamesystemMBean
5686      public int getNumDecomLiveDataNodes() {
5687        final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
5688        getBlockManager().getDatanodeManager().fetchDatanodes(live, null, true);
5689        int liveDecommissioned = 0;
5690        for (DatanodeDescriptor node : live) {
5691          liveDecommissioned += node.isDecommissioned() ? 1 : 0;
5692        }
5693        return liveDecommissioned;
5694      }
5695    
5696      @Override // FSNamesystemMBean
5697      public int getNumDecomDeadDataNodes() {
5698        final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>();
5699        getBlockManager().getDatanodeManager().fetchDatanodes(null, dead, true);
5700        int deadDecommissioned = 0;
5701        for (DatanodeDescriptor node : dead) {
5702          deadDecommissioned += node.isDecommissioned() ? 1 : 0;
5703        }
5704        return deadDecommissioned;
5705      }
5706    
5707      @Override // FSNamesystemMBean
5708      public int getNumDecommissioningDataNodes() {
5709        return getBlockManager().getDatanodeManager().getDecommissioningNodes()
5710            .size();
5711      }
5712    
5713      @Override // FSNamesystemMBean
5714      @Metric({"StaleDataNodes", 
5715        "Number of datanodes marked stale due to delayed heartbeat"})
5716      public int getNumStaleDataNodes() {
5717        return getBlockManager().getDatanodeManager().getNumStaleNodes();
5718      }
5719    
5720      /**
5721       * Sets the current generation stamp for legacy blocks
5722       */
5723      void setGenerationStampV1(long stamp) {
5724        generationStampV1.setCurrentValue(stamp);
5725      }
5726    
5727      /**
5728       * Gets the current generation stamp for legacy blocks
5729       */
5730      long getGenerationStampV1() {
5731        return generationStampV1.getCurrentValue();
5732      }
5733    
5734      /**
5735       * Gets the current generation stamp for this filesystem
5736       */
5737      void setGenerationStampV2(long stamp) {
5738        generationStampV2.setCurrentValue(stamp);
5739      }
5740    
5741      /**
5742       * Gets the current generation stamp for this filesystem
5743       */
5744      long getGenerationStampV2() {
5745        return generationStampV2.getCurrentValue();
5746      }
5747    
5748      /**
5749       * Upgrades the generation stamp for the filesystem
5750       * by reserving a sufficient range for all existing blocks.
5751       * Should be invoked only during the first upgrade to
5752       * sequential block IDs.
5753       */
5754      long upgradeGenerationStampToV2() {
5755        Preconditions.checkState(generationStampV2.getCurrentValue() ==
5756            GenerationStamp.LAST_RESERVED_STAMP);
5757    
5758        generationStampV2.skipTo(
5759            generationStampV1.getCurrentValue() +
5760            HdfsConstants.RESERVED_GENERATION_STAMPS_V1);
5761    
5762        generationStampV1Limit = generationStampV2.getCurrentValue();
5763        return generationStampV2.getCurrentValue();
5764      }
5765    
5766      /**
5767       * Sets the generation stamp that delineates random and sequentially
5768       * allocated block IDs.
5769       * @param stamp
5770       */
5771      void setGenerationStampV1Limit(long stamp) {
5772        Preconditions.checkState(generationStampV1Limit ==
5773                                 GenerationStamp.GRANDFATHER_GENERATION_STAMP);
5774        generationStampV1Limit = stamp;
5775      }
5776    
5777      /**
5778       * Gets the value of the generation stamp that delineates sequential
5779       * and random block IDs.
5780       */
5781      long getGenerationStampAtblockIdSwitch() {
5782        return generationStampV1Limit;
5783      }
5784    
5785      @VisibleForTesting
5786      SequentialBlockIdGenerator getBlockIdGenerator() {
5787        return blockIdGenerator;
5788      }
5789    
5790      /**
5791       * Sets the maximum allocated block ID for this filesystem. This is
5792       * the basis for allocating new block IDs.
5793       */
5794      void setLastAllocatedBlockId(long blockId) {
5795        blockIdGenerator.skipTo(blockId);
5796      }
5797    
5798      /**
5799       * Gets the maximum sequentially allocated block ID for this filesystem
5800       */
5801      long getLastAllocatedBlockId() {
5802        return blockIdGenerator.getCurrentValue();
5803      }
5804    
5805      /**
5806       * Increments, logs and then returns the stamp
5807       */
5808      long nextGenerationStamp(boolean legacyBlock)
5809          throws IOException, SafeModeException {
5810        assert hasWriteLock();
5811        checkNameNodeSafeMode("Cannot get next generation stamp");
5812    
5813        long gs;
5814        if (legacyBlock) {
5815          gs = getNextGenerationStampV1();
5816          getEditLog().logGenerationStampV1(gs);
5817        } else {
5818          gs = getNextGenerationStampV2();
5819          getEditLog().logGenerationStampV2(gs);
5820        }
5821    
5822        // NB: callers sync the log
5823        return gs;
5824      }
5825    
5826      @VisibleForTesting
5827      long getNextGenerationStampV1() throws IOException {
5828        long genStampV1 = generationStampV1.nextValue();
5829    
5830        if (genStampV1 >= generationStampV1Limit) {
5831          // We ran out of generation stamps for legacy blocks. In practice, it
5832          // is extremely unlikely as we reserved 1T v1 generation stamps. The
5833          // result is that we can no longer append to the legacy blocks that
5834          // were created before the upgrade to sequential block IDs.
5835          throw new OutOfV1GenerationStampsException();
5836        }
5837    
5838        return genStampV1;
5839      }
5840    
5841      @VisibleForTesting
5842      long getNextGenerationStampV2() {
5843        return generationStampV2.nextValue();
5844      }
5845    
5846      long getGenerationStampV1Limit() {
5847        return generationStampV1Limit;
5848      }
5849    
5850      /**
5851       * Determine whether the block ID was randomly generated (legacy) or
5852       * sequentially generated. The generation stamp value is used to
5853       * make the distinction.
5854       * @param block
5855       * @return true if the block ID was randomly generated, false otherwise.
5856       */
5857      boolean isLegacyBlock(Block block) {
5858        return block.getGenerationStamp() < getGenerationStampV1Limit();
5859      }
5860    
5861      /**
5862       * Increments, logs and then returns the block ID
5863       */
5864      private long nextBlockId() throws IOException {
5865        assert hasWriteLock();
5866        checkNameNodeSafeMode("Cannot get next block ID");
5867        final long blockId = blockIdGenerator.nextValue();
5868        getEditLog().logAllocateBlockId(blockId);
5869        // NB: callers sync the log
5870        return blockId;
5871      }
5872    
5873      private INodeFile checkUCBlock(ExtendedBlock block,
5874          String clientName) throws IOException {
5875        assert hasWriteLock();
5876        checkNameNodeSafeMode("Cannot get a new generation stamp and an "
5877            + "access token for block " + block);
5878        
5879        // check stored block state
5880        BlockInfo storedBlock = getStoredBlock(ExtendedBlock.getLocalBlock(block));
5881        if (storedBlock == null || 
5882            storedBlock.getBlockUCState() != BlockUCState.UNDER_CONSTRUCTION) {
5883            throw new IOException(block + 
5884                " does not exist or is not under Construction" + storedBlock);
5885        }
5886        
5887        // check file inode
5888        final INodeFile file = ((INode)storedBlock.getBlockCollection()).asFile();
5889        if (file == null || !file.isUnderConstruction()) {
5890          throw new IOException("The file " + storedBlock + 
5891              " belonged to does not exist or it is not under construction.");
5892        }
5893        
5894        // check lease
5895        if (clientName == null
5896            || !clientName.equals(file.getFileUnderConstructionFeature()
5897                .getClientName())) {
5898          throw new LeaseExpiredException("Lease mismatch: " + block + 
5899              " is accessed by a non lease holder " + clientName); 
5900        }
5901    
5902        return file;
5903      }
5904      
5905      /**
5906       * Client is reporting some bad block locations.
5907       */
5908      void reportBadBlocks(LocatedBlock[] blocks) throws IOException {
5909        checkOperation(OperationCategory.WRITE);
5910        NameNode.stateChangeLog.info("*DIR* reportBadBlocks");
5911        writeLock();
5912        try {
5913          checkOperation(OperationCategory.WRITE);
5914          for (int i = 0; i < blocks.length; i++) {
5915            ExtendedBlock blk = blocks[i].getBlock();
5916            DatanodeInfo[] nodes = blocks[i].getLocations();
5917            String[] storageIDs = blocks[i].getStorageIDs();
5918            for (int j = 0; j < nodes.length; j++) {
5919              blockManager.findAndMarkBlockAsCorrupt(blk, nodes[j],
5920                  storageIDs == null ? null: storageIDs[j], 
5921                  "client machine reported it");
5922            }
5923          }
5924        } finally {
5925          writeUnlock();
5926        }
5927      }
5928    
5929      /**
5930       * Get a new generation stamp together with an access token for 
5931       * a block under construction
5932       * 
5933       * This method is called for recovering a failed pipeline or setting up
5934       * a pipeline to append to a block.
5935       * 
5936       * @param block a block
5937       * @param clientName the name of a client
5938       * @return a located block with a new generation stamp and an access token
5939       * @throws IOException if any error occurs
5940       */
5941      LocatedBlock updateBlockForPipeline(ExtendedBlock block, 
5942          String clientName) throws IOException {
5943        LocatedBlock locatedBlock;
5944        checkOperation(OperationCategory.WRITE);
5945        writeLock();
5946        try {
5947          checkOperation(OperationCategory.WRITE);
5948    
5949          // check vadility of parameters
5950          checkUCBlock(block, clientName);
5951      
5952          // get a new generation stamp and an access token
5953          block.setGenerationStamp(
5954              nextGenerationStamp(isLegacyBlock(block.getLocalBlock())));
5955          locatedBlock = new LocatedBlock(block, new DatanodeInfo[0]);
5956          blockManager.setBlockToken(locatedBlock, AccessMode.WRITE);
5957        } finally {
5958          writeUnlock();
5959        }
5960        // Ensure we record the new generation stamp
5961        getEditLog().logSync();
5962        return locatedBlock;
5963      }
5964      
5965      /**
5966       * Update a pipeline for a block under construction
5967       * 
5968       * @param clientName the name of the client
5969       * @param oldBlock and old block
5970       * @param newBlock a new block with a new generation stamp and length
5971       * @param newNodes datanodes in the pipeline
5972       * @throws IOException if any error occurs
5973       */
5974      void updatePipeline(String clientName, ExtendedBlock oldBlock, 
5975          ExtendedBlock newBlock, DatanodeID[] newNodes, String[] newStorageIDs)
5976          throws IOException {
5977        checkOperation(OperationCategory.WRITE);
5978        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
5979        if (cacheEntry != null && cacheEntry.isSuccess()) {
5980          return; // Return previous response
5981        }
5982        LOG.info("updatePipeline(block=" + oldBlock
5983                 + ", newGenerationStamp=" + newBlock.getGenerationStamp()
5984                 + ", newLength=" + newBlock.getNumBytes()
5985                 + ", newNodes=" + Arrays.asList(newNodes)
5986                 + ", clientName=" + clientName
5987                 + ")");
5988        writeLock();
5989        boolean success = false;
5990        try {
5991          checkOperation(OperationCategory.WRITE);
5992          checkNameNodeSafeMode("Pipeline not updated");
5993          assert newBlock.getBlockId()==oldBlock.getBlockId() : newBlock + " and "
5994            + oldBlock + " has different block identifier";
5995          updatePipelineInternal(clientName, oldBlock, newBlock, newNodes,
5996              newStorageIDs, cacheEntry != null);
5997          success = true;
5998        } finally {
5999          writeUnlock();
6000          RetryCache.setState(cacheEntry, success);
6001        }
6002        getEditLog().logSync();
6003        LOG.info("updatePipeline(" + oldBlock + ") successfully to " + newBlock);
6004      }
6005    
6006      /** @see #updatePipeline(String, ExtendedBlock, ExtendedBlock, DatanodeID[]) */
6007      private void updatePipelineInternal(String clientName, ExtendedBlock oldBlock, 
6008          ExtendedBlock newBlock, DatanodeID[] newNodes, String[] newStorageIDs,
6009          boolean logRetryCache)
6010          throws IOException {
6011        assert hasWriteLock();
6012        // check the vadility of the block and lease holder name
6013        final INodeFile pendingFile = checkUCBlock(oldBlock, clientName);
6014        final BlockInfoUnderConstruction blockinfo
6015            = (BlockInfoUnderConstruction)pendingFile.getLastBlock();
6016    
6017        // check new GS & length: this is not expected
6018        if (newBlock.getGenerationStamp() <= blockinfo.getGenerationStamp() ||
6019            newBlock.getNumBytes() < blockinfo.getNumBytes()) {
6020          String msg = "Update " + oldBlock + " (len = " + 
6021            blockinfo.getNumBytes() + ") to an older state: " + newBlock + 
6022            " (len = " + newBlock.getNumBytes() +")";
6023          LOG.warn(msg);
6024          throw new IOException(msg);
6025        }
6026    
6027        // Update old block with the new generation stamp and new length
6028        blockinfo.setNumBytes(newBlock.getNumBytes());
6029        blockinfo.setGenerationStampAndVerifyReplicas(newBlock.getGenerationStamp());
6030    
6031        // find the DatanodeDescriptor objects
6032        final DatanodeStorageInfo[] storages = blockManager.getDatanodeManager()
6033            .getDatanodeStorageInfos(newNodes, newStorageIDs);
6034        blockinfo.setExpectedLocations(storages);
6035    
6036        String src = pendingFile.getFullPathName();
6037        dir.persistBlocks(src, pendingFile, logRetryCache);
6038      }
6039    
6040      // rename was successful. If any part of the renamed subtree had
6041      // files that were being written to, update with new filename.
6042      void unprotectedChangeLease(String src, String dst) {
6043        assert hasWriteLock();
6044        leaseManager.changeLease(src, dst);
6045      }
6046    
6047      /**
6048       * @return all the under-construction files in the lease map
6049       */
6050      Map<String, INodeFile> getFilesUnderConstruction() {
6051        synchronized (leaseManager) {
6052          return leaseManager.getINodesUnderConstruction();
6053        }
6054      }
6055    
6056      /**
6057       * Register a Backup name-node, verifying that it belongs
6058       * to the correct namespace, and adding it to the set of
6059       * active journals if necessary.
6060       * 
6061       * @param bnReg registration of the new BackupNode
6062       * @param nnReg registration of this NameNode
6063       * @throws IOException if the namespace IDs do not match
6064       */
6065      void registerBackupNode(NamenodeRegistration bnReg,
6066          NamenodeRegistration nnReg) throws IOException {
6067        writeLock();
6068        try {
6069          if(getFSImage().getStorage().getNamespaceID() 
6070             != bnReg.getNamespaceID())
6071            throw new IOException("Incompatible namespaceIDs: "
6072                + " Namenode namespaceID = "
6073                + getFSImage().getStorage().getNamespaceID() + "; "
6074                + bnReg.getRole() +
6075                " node namespaceID = " + bnReg.getNamespaceID());
6076          if (bnReg.getRole() == NamenodeRole.BACKUP) {
6077            getFSImage().getEditLog().registerBackupNode(
6078                bnReg, nnReg);
6079          }
6080        } finally {
6081          writeUnlock();
6082        }
6083      }
6084    
6085      /**
6086       * Release (unregister) backup node.
6087       * <p>
6088       * Find and remove the backup stream corresponding to the node.
6089       * @param registration
6090       * @throws IOException
6091       */
6092      void releaseBackupNode(NamenodeRegistration registration)
6093        throws IOException {
6094        checkOperation(OperationCategory.WRITE);
6095        writeLock();
6096        try {
6097          checkOperation(OperationCategory.WRITE);
6098          if(getFSImage().getStorage().getNamespaceID()
6099             != registration.getNamespaceID())
6100            throw new IOException("Incompatible namespaceIDs: "
6101                + " Namenode namespaceID = "
6102                + getFSImage().getStorage().getNamespaceID() + "; "
6103                + registration.getRole() +
6104                " node namespaceID = " + registration.getNamespaceID());
6105          getEditLog().releaseBackupStream(registration);
6106        } finally {
6107          writeUnlock();
6108        }
6109      }
6110    
6111      static class CorruptFileBlockInfo {
6112        final String path;
6113        final Block block;
6114        
6115        public CorruptFileBlockInfo(String p, Block b) {
6116          path = p;
6117          block = b;
6118        }
6119        
6120        @Override
6121        public String toString() {
6122          return block.getBlockName() + "\t" + path;
6123        }
6124      }
6125      /**
6126       * @param path Restrict corrupt files to this portion of namespace.
6127       * @param startBlockAfter Support for continuation; the set of files we return
6128       *  back is ordered by blockid; startBlockAfter tells where to start from
6129       * @return a list in which each entry describes a corrupt file/block
6130       * @throws AccessControlException
6131       * @throws IOException
6132       */
6133      Collection<CorruptFileBlockInfo> listCorruptFileBlocks(String path,
6134      String[] cookieTab) throws IOException {
6135        checkSuperuserPrivilege();
6136        checkOperation(OperationCategory.READ);
6137        readLock();
6138        try {
6139          checkOperation(OperationCategory.READ);
6140          if (!isPopulatingReplQueues()) {
6141            throw new IOException("Cannot run listCorruptFileBlocks because " +
6142                                  "replication queues have not been initialized.");
6143          }
6144          // print a limited # of corrupt files per call
6145          int count = 0;
6146          ArrayList<CorruptFileBlockInfo> corruptFiles = new ArrayList<CorruptFileBlockInfo>();
6147    
6148          final Iterator<Block> blkIterator = blockManager.getCorruptReplicaBlockIterator();
6149    
6150          if (cookieTab == null) {
6151            cookieTab = new String[] { null };
6152          }
6153          int skip = getIntCookie(cookieTab[0]);
6154          for (int i = 0; i < skip && blkIterator.hasNext(); i++) {
6155            blkIterator.next();
6156          }
6157    
6158          while (blkIterator.hasNext()) {
6159            Block blk = blkIterator.next();
6160            final INode inode = (INode)blockManager.getBlockCollection(blk);
6161            skip++;
6162            if (inode != null && blockManager.countNodes(blk).liveReplicas() == 0) {
6163              String src = FSDirectory.getFullPathName(inode);
6164              if (src.startsWith(path)){
6165                corruptFiles.add(new CorruptFileBlockInfo(src, blk));
6166                count++;
6167                if (count >= DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED)
6168                  break;
6169              }
6170            }
6171          }
6172          cookieTab[0] = String.valueOf(skip);
6173          LOG.info("list corrupt file blocks returned: " + count);
6174          return corruptFiles;
6175        } finally {
6176          readUnlock();
6177        }
6178      }
6179    
6180      /**
6181       * Convert string cookie to integer.
6182       */
6183      private static int getIntCookie(String cookie){
6184        int c;
6185        if(cookie == null){
6186          c = 0;
6187        } else {
6188          try{
6189            c = Integer.parseInt(cookie);
6190          }catch (NumberFormatException e) {
6191            c = 0;
6192          }
6193        }
6194        c = Math.max(0, c);
6195        return c;
6196      }
6197    
6198      /**
6199       * Create delegation token secret manager
6200       */
6201      private DelegationTokenSecretManager createDelegationTokenSecretManager(
6202          Configuration conf) {
6203        return new DelegationTokenSecretManager(conf.getLong(
6204            DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY,
6205            DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT),
6206            conf.getLong(DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY,
6207                DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT),
6208            conf.getLong(DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY,
6209                DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT),
6210            DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL,
6211            conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY,
6212                DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT),
6213            this);
6214      }
6215    
6216      /**
6217       * Returns the DelegationTokenSecretManager instance in the namesystem.
6218       * @return delegation token secret manager object
6219       */
6220      DelegationTokenSecretManager getDelegationTokenSecretManager() {
6221        return dtSecretManager;
6222      }
6223    
6224      /**
6225       * @param renewer
6226       * @return Token<DelegationTokenIdentifier>
6227       * @throws IOException
6228       */
6229      Token<DelegationTokenIdentifier> getDelegationToken(Text renewer)
6230          throws IOException {
6231        Token<DelegationTokenIdentifier> token;
6232        checkOperation(OperationCategory.WRITE);
6233        writeLock();
6234        try {
6235          checkOperation(OperationCategory.WRITE);
6236          checkNameNodeSafeMode("Cannot issue delegation token");
6237          if (!isAllowedDelegationTokenOp()) {
6238            throw new IOException(
6239              "Delegation Token can be issued only with kerberos or web authentication");
6240          }
6241          if (dtSecretManager == null || !dtSecretManager.isRunning()) {
6242            LOG.warn("trying to get DT with no secret manager running");
6243            return null;
6244          }
6245    
6246          UserGroupInformation ugi = getRemoteUser();
6247          String user = ugi.getUserName();
6248          Text owner = new Text(user);
6249          Text realUser = null;
6250          if (ugi.getRealUser() != null) {
6251            realUser = new Text(ugi.getRealUser().getUserName());
6252          }
6253          DelegationTokenIdentifier dtId = new DelegationTokenIdentifier(owner,
6254            renewer, realUser);
6255          token = new Token<DelegationTokenIdentifier>(
6256            dtId, dtSecretManager);
6257          long expiryTime = dtSecretManager.getTokenExpiryTime(dtId);
6258          getEditLog().logGetDelegationToken(dtId, expiryTime);
6259        } finally {
6260          writeUnlock();
6261        }
6262        getEditLog().logSync();
6263        return token;
6264      }
6265    
6266      /**
6267       * 
6268       * @param token
6269       * @return New expiryTime of the token
6270       * @throws InvalidToken
6271       * @throws IOException
6272       */
6273      long renewDelegationToken(Token<DelegationTokenIdentifier> token)
6274          throws InvalidToken, IOException {
6275        long expiryTime;
6276        checkOperation(OperationCategory.WRITE);
6277        writeLock();
6278        try {
6279          checkOperation(OperationCategory.WRITE);
6280    
6281          checkNameNodeSafeMode("Cannot renew delegation token");
6282          if (!isAllowedDelegationTokenOp()) {
6283            throw new IOException(
6284                "Delegation Token can be renewed only with kerberos or web authentication");
6285          }
6286          String renewer = getRemoteUser().getShortUserName();
6287          expiryTime = dtSecretManager.renewToken(token, renewer);
6288          DelegationTokenIdentifier id = new DelegationTokenIdentifier();
6289          ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier());
6290          DataInputStream in = new DataInputStream(buf);
6291          id.readFields(in);
6292          getEditLog().logRenewDelegationToken(id, expiryTime);
6293        } finally {
6294          writeUnlock();
6295        }
6296        getEditLog().logSync();
6297        return expiryTime;
6298      }
6299    
6300      /**
6301       * 
6302       * @param token
6303       * @throws IOException
6304       */
6305      void cancelDelegationToken(Token<DelegationTokenIdentifier> token)
6306          throws IOException {
6307        checkOperation(OperationCategory.WRITE);
6308        writeLock();
6309        try {
6310          checkOperation(OperationCategory.WRITE);
6311    
6312          checkNameNodeSafeMode("Cannot cancel delegation token");
6313          String canceller = getRemoteUser().getUserName();
6314          DelegationTokenIdentifier id = dtSecretManager
6315            .cancelToken(token, canceller);
6316          getEditLog().logCancelDelegationToken(id);
6317        } finally {
6318          writeUnlock();
6319        }
6320        getEditLog().logSync();
6321      }
6322    
6323      SecretManagerState saveSecretManagerState() {
6324        return dtSecretManager.saveSecretManagerState();
6325      }
6326    
6327      /**
6328       * @param in load the state of secret manager from input stream
6329       */
6330      void loadSecretManagerStateCompat(DataInput in) throws IOException {
6331        dtSecretManager.loadSecretManagerStateCompat(in);
6332      }
6333    
6334      void loadSecretManagerState(SecretManagerSection s,
6335          List<SecretManagerSection.DelegationKey> keys,
6336          List<SecretManagerSection.PersistToken> tokens) throws IOException {
6337        dtSecretManager.loadSecretManagerState(new SecretManagerState(s, keys, tokens));
6338      }
6339    
6340      /**
6341       * Log the updateMasterKey operation to edit logs
6342       * 
6343       * @param key new delegation key.
6344       */
6345      public void logUpdateMasterKey(DelegationKey key) {
6346        
6347        assert !isInSafeMode() :
6348          "this should never be called while in safemode, since we stop " +
6349          "the DT manager before entering safemode!";
6350        // No need to hold FSN lock since we don't access any internal
6351        // structures, and this is stopped before the FSN shuts itself
6352        // down, etc.
6353        getEditLog().logUpdateMasterKey(key);
6354        getEditLog().logSync();
6355      }
6356      
6357      /**
6358       * Log the cancellation of expired tokens to edit logs
6359       * 
6360       * @param id token identifier to cancel
6361       */
6362      public void logExpireDelegationToken(DelegationTokenIdentifier id) {
6363        assert !isInSafeMode() :
6364          "this should never be called while in safemode, since we stop " +
6365          "the DT manager before entering safemode!";
6366        // No need to hold FSN lock since we don't access any internal
6367        // structures, and this is stopped before the FSN shuts itself
6368        // down, etc.
6369        getEditLog().logCancelDelegationToken(id);
6370      }  
6371      
6372      private void logReassignLease(String leaseHolder, String src,
6373          String newHolder) {
6374        assert hasWriteLock();
6375        getEditLog().logReassignLease(leaseHolder, src, newHolder);
6376      }
6377      
6378      /**
6379       * 
6380       * @return true if delegation token operation is allowed
6381       */
6382      private boolean isAllowedDelegationTokenOp() throws IOException {
6383        AuthenticationMethod authMethod = getConnectionAuthenticationMethod();
6384        if (UserGroupInformation.isSecurityEnabled()
6385            && (authMethod != AuthenticationMethod.KERBEROS)
6386            && (authMethod != AuthenticationMethod.KERBEROS_SSL)
6387            && (authMethod != AuthenticationMethod.CERTIFICATE)) {
6388          return false;
6389        }
6390        return true;
6391      }
6392      
6393      /**
6394       * Returns authentication method used to establish the connection
6395       * @return AuthenticationMethod used to establish connection
6396       * @throws IOException
6397       */
6398      private AuthenticationMethod getConnectionAuthenticationMethod()
6399          throws IOException {
6400        UserGroupInformation ugi = getRemoteUser();
6401        AuthenticationMethod authMethod = ugi.getAuthenticationMethod();
6402        if (authMethod == AuthenticationMethod.PROXY) {
6403          authMethod = ugi.getRealUser().getAuthenticationMethod();
6404        }
6405        return authMethod;
6406      }
6407      
6408      /**
6409       * Client invoked methods are invoked over RPC and will be in 
6410       * RPC call context even if the client exits.
6411       */
6412      private boolean isExternalInvocation() {
6413        return Server.isRpcInvocation() || NamenodeWebHdfsMethods.isWebHdfsInvocation();
6414      }
6415    
6416      private static InetAddress getRemoteIp() {
6417        InetAddress ip = Server.getRemoteIp();
6418        if (ip != null) {
6419          return ip;
6420        }
6421        return NamenodeWebHdfsMethods.getRemoteIp();
6422      }
6423      
6424      // optimize ugi lookup for RPC operations to avoid a trip through
6425      // UGI.getCurrentUser which is synch'ed
6426      private static UserGroupInformation getRemoteUser() throws IOException {
6427        return NameNode.getRemoteUser();
6428      }
6429      
6430      /**
6431       * Log fsck event in the audit log 
6432       */
6433      void logFsckEvent(String src, InetAddress remoteAddress) throws IOException {
6434        if (isAuditEnabled()) {
6435          logAuditEvent(true, getRemoteUser(),
6436                        remoteAddress,
6437                        "fsck", src, null, null);
6438        }
6439      }
6440      /**
6441       * Register NameNodeMXBean
6442       */
6443      private void registerMXBean() {
6444        mxbeanName = MBeans.register("NameNode", "NameNodeInfo", this);
6445      }
6446    
6447      /**
6448       * Class representing Namenode information for JMX interfaces
6449       */
6450      @Override // NameNodeMXBean
6451      public String getVersion() {
6452        return VersionInfo.getVersion() + ", r" + VersionInfo.getRevision();
6453      }
6454    
6455      @Override // NameNodeMXBean
6456      public long getUsed() {
6457        return this.getCapacityUsed();
6458      }
6459    
6460      @Override // NameNodeMXBean
6461      public long getFree() {
6462        return this.getCapacityRemaining();
6463      }
6464    
6465      @Override // NameNodeMXBean
6466      public long getTotal() {
6467        return this.getCapacityTotal();
6468      }
6469    
6470      @Override // NameNodeMXBean
6471      public String getSafemode() {
6472        if (!this.isInSafeMode())
6473          return "";
6474        return "Safe mode is ON. " + this.getSafeModeTip();
6475      }
6476    
6477      @Override // NameNodeMXBean
6478      public boolean isUpgradeFinalized() {
6479        return this.getFSImage().isUpgradeFinalized();
6480      }
6481    
6482      @Override // NameNodeMXBean
6483      public long getNonDfsUsedSpace() {
6484        return datanodeStatistics.getCapacityUsedNonDFS();
6485      }
6486    
6487      @Override // NameNodeMXBean
6488      public float getPercentUsed() {
6489        return datanodeStatistics.getCapacityUsedPercent();
6490      }
6491    
6492      @Override // NameNodeMXBean
6493      public long getBlockPoolUsedSpace() {
6494        return datanodeStatistics.getBlockPoolUsed();
6495      }
6496    
6497      @Override // NameNodeMXBean
6498      public float getPercentBlockPoolUsed() {
6499        return datanodeStatistics.getPercentBlockPoolUsed();
6500      }
6501    
6502      @Override // NameNodeMXBean
6503      public float getPercentRemaining() {
6504        return datanodeStatistics.getCapacityRemainingPercent();
6505      }
6506    
6507      @Override // NameNodeMXBean
6508      public long getCacheCapacity() {
6509        return datanodeStatistics.getCacheCapacity();
6510      }
6511    
6512      @Override // NameNodeMXBean
6513      public long getCacheUsed() {
6514        return datanodeStatistics.getCacheUsed();
6515      }
6516    
6517      @Override // NameNodeMXBean
6518      public long getTotalBlocks() {
6519        return getBlocksTotal();
6520      }
6521    
6522      @Override // NameNodeMXBean
6523      @Metric
6524      public long getTotalFiles() {
6525        return getFilesTotal();
6526      }
6527    
6528      @Override // NameNodeMXBean
6529      public long getNumberOfMissingBlocks() {
6530        return getMissingBlocksCount();
6531      }
6532      
6533      @Override // NameNodeMXBean
6534      public int getThreads() {
6535        return ManagementFactory.getThreadMXBean().getThreadCount();
6536      }
6537    
6538      /**
6539       * Returned information is a JSON representation of map with host name as the
6540       * key and value is a map of live node attribute keys to its values
6541       */
6542      @Override // NameNodeMXBean
6543      public String getLiveNodes() {
6544        final Map<String, Map<String,Object>> info = 
6545          new HashMap<String, Map<String,Object>>();
6546        final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
6547        blockManager.getDatanodeManager().fetchDatanodes(live, null, true);
6548        for (DatanodeDescriptor node : live) {
6549          Map<String, Object> innerinfo = ImmutableMap.<String, Object>builder()
6550              .put("infoAddr", node.getInfoAddr())
6551              .put("infoSecureAddr", node.getInfoSecureAddr())
6552              .put("xferaddr", node.getXferAddr())
6553              .put("lastContact", getLastContact(node))
6554              .put("usedSpace", getDfsUsed(node))
6555              .put("adminState", node.getAdminState().toString())
6556              .put("nonDfsUsedSpace", node.getNonDfsUsed())
6557              .put("capacity", node.getCapacity())
6558              .put("numBlocks", node.numBlocks())
6559              .put("version", node.getSoftwareVersion())
6560              .put("used", node.getDfsUsed())
6561              .put("remaining", node.getRemaining())
6562              .put("blockScheduled", node.getBlocksScheduled())
6563              .put("blockPoolUsed", node.getBlockPoolUsed())
6564              .put("blockPoolUsedPercent", node.getBlockPoolUsedPercent())
6565              .put("volfails", node.getVolumeFailures())
6566              .build();
6567    
6568          info.put(node.getHostName(), innerinfo);
6569        }
6570        return JSON.toString(info);
6571      }
6572    
6573      /**
6574       * Returned information is a JSON representation of map with host name as the
6575       * key and value is a map of dead node attribute keys to its values
6576       */
6577      @Override // NameNodeMXBean
6578      public String getDeadNodes() {
6579        final Map<String, Map<String, Object>> info = 
6580          new HashMap<String, Map<String, Object>>();
6581        final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>();
6582        blockManager.getDatanodeManager().fetchDatanodes(null, dead, true);
6583        for (DatanodeDescriptor node : dead) {
6584          Map<String, Object> innerinfo = ImmutableMap.<String, Object>builder()
6585              .put("lastContact", getLastContact(node))
6586              .put("decommissioned", node.isDecommissioned())
6587              .put("xferaddr", node.getXferAddr())
6588              .build();
6589          info.put(node.getHostName(), innerinfo);
6590        }
6591        return JSON.toString(info);
6592      }
6593    
6594      /**
6595       * Returned information is a JSON representation of map with host name as the
6596       * key and value is a map of decomisioning node attribute keys to its values
6597       */
6598      @Override // NameNodeMXBean
6599      public String getDecomNodes() {
6600        final Map<String, Map<String, Object>> info = 
6601          new HashMap<String, Map<String, Object>>();
6602        final List<DatanodeDescriptor> decomNodeList = blockManager.getDatanodeManager(
6603            ).getDecommissioningNodes();
6604        for (DatanodeDescriptor node : decomNodeList) {
6605          Map<String, Object> innerinfo = ImmutableMap
6606              .<String, Object> builder()
6607              .put("xferaddr", node.getXferAddr())
6608              .put("underReplicatedBlocks",
6609                  node.decommissioningStatus.getUnderReplicatedBlocks())
6610              .put("decommissionOnlyReplicas",
6611                  node.decommissioningStatus.getDecommissionOnlyReplicas())
6612              .put("underReplicateInOpenFiles",
6613                  node.decommissioningStatus.getUnderReplicatedInOpenFiles())
6614              .build();
6615          info.put(node.getHostName(), innerinfo);
6616        }
6617        return JSON.toString(info);
6618      }
6619    
6620      private long getLastContact(DatanodeDescriptor alivenode) {
6621        return (Time.now() - alivenode.getLastUpdate())/1000;
6622      }
6623    
6624      private long getDfsUsed(DatanodeDescriptor alivenode) {
6625        return alivenode.getDfsUsed();
6626      }
6627    
6628      @Override  // NameNodeMXBean
6629      public String getClusterId() {
6630        return dir.fsImage.getStorage().getClusterID();
6631      }
6632      
6633      @Override  // NameNodeMXBean
6634      public String getBlockPoolId() {
6635        return blockPoolId;
6636      }
6637      
6638      @Override  // NameNodeMXBean
6639      public String getNameDirStatuses() {
6640        Map<String, Map<File, StorageDirType>> statusMap =
6641          new HashMap<String, Map<File, StorageDirType>>();
6642        
6643        Map<File, StorageDirType> activeDirs = new HashMap<File, StorageDirType>();
6644        for (Iterator<StorageDirectory> it
6645            = getFSImage().getStorage().dirIterator(); it.hasNext();) {
6646          StorageDirectory st = it.next();
6647          activeDirs.put(st.getRoot(), st.getStorageDirType());
6648        }
6649        statusMap.put("active", activeDirs);
6650        
6651        List<Storage.StorageDirectory> removedStorageDirs
6652            = getFSImage().getStorage().getRemovedStorageDirs();
6653        Map<File, StorageDirType> failedDirs = new HashMap<File, StorageDirType>();
6654        for (StorageDirectory st : removedStorageDirs) {
6655          failedDirs.put(st.getRoot(), st.getStorageDirType());
6656        }
6657        statusMap.put("failed", failedDirs);
6658        
6659        return JSON.toString(statusMap);
6660      }
6661    
6662      @Override // NameNodeMXBean
6663      public String getNodeUsage() {
6664        float median = 0;
6665        float max = 0;
6666        float min = 0;
6667        float dev = 0;
6668    
6669        final Map<String, Map<String,Object>> info =
6670            new HashMap<String, Map<String,Object>>();
6671        final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
6672        blockManager.getDatanodeManager().fetchDatanodes(live, null, true);
6673    
6674        if (live.size() > 0) {
6675          float totalDfsUsed = 0;
6676          float[] usages = new float[live.size()];
6677          int i = 0;
6678          for (DatanodeDescriptor dn : live) {
6679            usages[i++] = dn.getDfsUsedPercent();
6680            totalDfsUsed += dn.getDfsUsedPercent();
6681          }
6682          totalDfsUsed /= live.size();
6683          Arrays.sort(usages);
6684          median = usages[usages.length / 2];
6685          max = usages[usages.length - 1];
6686          min = usages[0];
6687    
6688          for (i = 0; i < usages.length; i++) {
6689            dev += (usages[i] - totalDfsUsed) * (usages[i] - totalDfsUsed);
6690          }
6691          dev = (float) Math.sqrt(dev / usages.length);
6692        }
6693    
6694        final Map<String, Object> innerInfo = new HashMap<String, Object>();
6695        innerInfo.put("min", StringUtils.format("%.2f%%", min));
6696        innerInfo.put("median", StringUtils.format("%.2f%%", median));
6697        innerInfo.put("max", StringUtils.format("%.2f%%", max));
6698        innerInfo.put("stdDev", StringUtils.format("%.2f%%", dev));
6699        info.put("nodeUsage", innerInfo);
6700    
6701        return JSON.toString(info);
6702      }
6703    
6704      @Override  // NameNodeMXBean
6705      public String getNameJournalStatus() {
6706        List<Map<String, String>> jasList = new ArrayList<Map<String, String>>();
6707        FSEditLog log = getFSImage().getEditLog();
6708        if (log != null) {
6709          boolean openForWrite = log.isOpenForWrite();
6710          for (JournalAndStream jas : log.getJournals()) {
6711            final Map<String, String> jasMap = new HashMap<String, String>();
6712            String manager = jas.getManager().toString();
6713    
6714            jasMap.put("required", String.valueOf(jas.isRequired()));
6715            jasMap.put("disabled", String.valueOf(jas.isDisabled()));
6716            jasMap.put("manager", manager);
6717    
6718            if (jas.isDisabled()) {
6719              jasMap.put("stream", "Failed");
6720            } else if (openForWrite) {
6721              EditLogOutputStream elos = jas.getCurrentStream();
6722              if (elos != null) {
6723                jasMap.put("stream", elos.generateReport());
6724              } else {
6725                jasMap.put("stream", "not currently writing");
6726              }
6727            } else {
6728              jasMap.put("stream", "open for read");
6729            }
6730            jasList.add(jasMap);
6731          }
6732        }
6733        return JSON.toString(jasList);
6734      }
6735    
6736      @Override // NameNodeMxBean
6737      public String getJournalTransactionInfo() {
6738        Map<String, String> txnIdMap = new HashMap<String, String>();
6739        txnIdMap.put("LastAppliedOrWrittenTxId",
6740            Long.toString(this.getFSImage().getLastAppliedOrWrittenTxId()));
6741        txnIdMap.put("MostRecentCheckpointTxId",
6742            Long.toString(this.getFSImage().getMostRecentCheckpointTxId()));
6743        return JSON.toString(txnIdMap);
6744      }
6745      
6746      @Override  // NameNodeMXBean
6747      public String getNNStarted() {
6748        return getStartTime().toString();
6749      }
6750    
6751      @Override  // NameNodeMXBean
6752      public String getCompileInfo() {
6753        return VersionInfo.getDate() + " by " + VersionInfo.getUser() +
6754            " from " + VersionInfo.getBranch();
6755      }
6756    
6757      /** @return the block manager. */
6758      public BlockManager getBlockManager() {
6759        return blockManager;
6760      }
6761      /** @return the FSDirectory. */
6762      public FSDirectory getFSDirectory() {
6763        return dir;
6764      }
6765      /** @return the cache manager. */
6766      public CacheManager getCacheManager() {
6767        return cacheManager;
6768      }
6769    
6770      @Override  // NameNodeMXBean
6771      public String getCorruptFiles() {
6772        List<String> list = new ArrayList<String>();
6773        Collection<FSNamesystem.CorruptFileBlockInfo> corruptFileBlocks;
6774        try {
6775          corruptFileBlocks = listCorruptFileBlocks("/", null);
6776          int corruptFileCount = corruptFileBlocks.size();
6777          if (corruptFileCount != 0) {
6778            for (FSNamesystem.CorruptFileBlockInfo c : corruptFileBlocks) {
6779              list.add(c.toString());
6780            }
6781          }
6782        } catch (IOException e) {
6783          LOG.warn("Get corrupt file blocks returned error: " + e.getMessage());
6784        }
6785        return JSON.toString(list);
6786      }
6787    
6788      @Override  //NameNodeMXBean
6789      public int getDistinctVersionCount() {
6790        return blockManager.getDatanodeManager().getDatanodesSoftwareVersions()
6791          .size();
6792      }
6793    
6794      @Override  //NameNodeMXBean
6795      public Map<String, Integer> getDistinctVersions() {
6796        return blockManager.getDatanodeManager().getDatanodesSoftwareVersions();
6797      }
6798    
6799      @Override  //NameNodeMXBean
6800      public String getSoftwareVersion() {
6801        return VersionInfo.getVersion();
6802      }
6803    
6804      /**
6805       * Verifies that the given identifier and password are valid and match.
6806       * @param identifier Token identifier.
6807       * @param password Password in the token.
6808       */
6809      public synchronized void verifyToken(DelegationTokenIdentifier identifier,
6810          byte[] password) throws InvalidToken, RetriableException {
6811        try {
6812          getDelegationTokenSecretManager().verifyToken(identifier, password);
6813        } catch (InvalidToken it) {
6814          if (inTransitionToActive()) {
6815            throw new RetriableException(it);
6816          }
6817          throw it;
6818        }
6819      }
6820      
6821      @Override
6822      public boolean isGenStampInFuture(Block block) {
6823        if (isLegacyBlock(block)) {
6824          return block.getGenerationStamp() > getGenerationStampV1();
6825        } else {
6826          return block.getGenerationStamp() > getGenerationStampV2();
6827        }
6828      }
6829    
6830      @VisibleForTesting
6831      public EditLogTailer getEditLogTailer() {
6832        return editLogTailer;
6833      }
6834      
6835      @VisibleForTesting
6836      public void setEditLogTailerForTests(EditLogTailer tailer) {
6837        this.editLogTailer = tailer;
6838      }
6839      
6840      @VisibleForTesting
6841      void setFsLockForTests(ReentrantReadWriteLock lock) {
6842        this.fsLock.coarseLock = lock;
6843      }
6844      
6845      @VisibleForTesting
6846      public ReentrantReadWriteLock getFsLockForTests() {
6847        return fsLock.coarseLock;
6848      }
6849      
6850      @VisibleForTesting
6851      public ReentrantLock getLongReadLockForTests() {
6852        return fsLock.longReadLock;
6853      }
6854    
6855      @VisibleForTesting
6856      public SafeModeInfo getSafeModeInfoForTests() {
6857        return safeMode;
6858      }
6859      
6860      @VisibleForTesting
6861      public void setNNResourceChecker(NameNodeResourceChecker nnResourceChecker) {
6862        this.nnResourceChecker = nnResourceChecker;
6863      }
6864    
6865      @Override
6866      public boolean isAvoidingStaleDataNodesForWrite() {
6867        return this.blockManager.getDatanodeManager()
6868            .shouldAvoidStaleDataNodesForWrite();
6869      }
6870    
6871      @Override // FSClusterStats
6872      public int getNumDatanodesInService() {
6873        return getNumLiveDataNodes() - getNumDecomLiveDataNodes();
6874      }
6875    
6876      public SnapshotManager getSnapshotManager() {
6877        return snapshotManager;
6878      }
6879      
6880      /** Allow snapshot on a directroy. */
6881      void allowSnapshot(String path) throws SafeModeException, IOException {
6882        checkOperation(OperationCategory.WRITE);
6883        writeLock();
6884        try {
6885          checkOperation(OperationCategory.WRITE);
6886          checkNameNodeSafeMode("Cannot allow snapshot for " + path);
6887          checkSuperuserPrivilege();
6888    
6889          dir.writeLock();
6890          try {
6891            snapshotManager.setSnapshottable(path, true);
6892          } finally {
6893            dir.writeUnlock();
6894          }
6895          getEditLog().logAllowSnapshot(path);
6896        } finally {
6897          writeUnlock();
6898        }
6899        getEditLog().logSync();
6900    
6901        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
6902          logAuditEvent(true, "allowSnapshot", path, null, null);
6903        }
6904      }
6905      
6906      /** Disallow snapshot on a directory. */
6907      void disallowSnapshot(String path) throws SafeModeException, IOException {
6908        checkOperation(OperationCategory.WRITE);
6909        writeLock();
6910        try {
6911          checkOperation(OperationCategory.WRITE);
6912          checkNameNodeSafeMode("Cannot disallow snapshot for " + path);
6913          checkSuperuserPrivilege();
6914    
6915          dir.writeLock();
6916          try {
6917            snapshotManager.resetSnapshottable(path);
6918          } finally {
6919            dir.writeUnlock();
6920          }
6921          getEditLog().logDisallowSnapshot(path);
6922        } finally {
6923          writeUnlock();
6924        }
6925        getEditLog().logSync();
6926        
6927        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
6928          logAuditEvent(true, "disallowSnapshot", path, null, null);
6929        }
6930      }
6931      
6932      /**
6933       * Create a snapshot
6934       * @param snapshotRoot The directory path where the snapshot is taken
6935       * @param snapshotName The name of the snapshot
6936       */
6937      String createSnapshot(String snapshotRoot, String snapshotName)
6938          throws SafeModeException, IOException {
6939        checkOperation(OperationCategory.WRITE);
6940        final FSPermissionChecker pc = getPermissionChecker();
6941        CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
6942            null);
6943        if (cacheEntry != null && cacheEntry.isSuccess()) {
6944          return (String) cacheEntry.getPayload();
6945        }
6946        String snapshotPath = null;
6947        writeLock();
6948        try {
6949          checkOperation(OperationCategory.WRITE);
6950          checkNameNodeSafeMode("Cannot create snapshot for " + snapshotRoot);
6951          if (isPermissionEnabled) {
6952            checkOwner(pc, snapshotRoot);
6953          }
6954    
6955          if (snapshotName == null || snapshotName.isEmpty()) {
6956            snapshotName = Snapshot.generateDefaultSnapshotName();
6957          }
6958          if(snapshotName != null){
6959            if (!DFSUtil.isValidNameForComponent(snapshotName)) {
6960                throw new InvalidPathException("Invalid snapshot name: "
6961                    + snapshotName);
6962            }
6963          }
6964          dir.verifySnapshotName(snapshotName, snapshotRoot);
6965          dir.writeLock();
6966          try {
6967            snapshotPath = snapshotManager.createSnapshot(snapshotRoot, snapshotName);
6968          } finally {
6969            dir.writeUnlock();
6970          }
6971          getEditLog().logCreateSnapshot(snapshotRoot, snapshotName,
6972              cacheEntry != null);
6973        } finally {
6974          writeUnlock();
6975          RetryCache.setState(cacheEntry, snapshotPath != null, snapshotPath);
6976        }
6977        getEditLog().logSync();
6978        
6979        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
6980          logAuditEvent(true, "createSnapshot", snapshotRoot, snapshotPath, null);
6981        }
6982        return snapshotPath;
6983      }
6984      
6985      /**
6986       * Rename a snapshot
6987       * @param path The directory path where the snapshot was taken
6988       * @param snapshotOldName Old snapshot name
6989       * @param snapshotNewName New snapshot name
6990       * @throws SafeModeException
6991       * @throws IOException 
6992       */
6993      void renameSnapshot(String path, String snapshotOldName,
6994          String snapshotNewName) throws SafeModeException, IOException {
6995        checkOperation(OperationCategory.WRITE);
6996        final FSPermissionChecker pc = getPermissionChecker();
6997        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
6998        if (cacheEntry != null && cacheEntry.isSuccess()) {
6999          return; // Return previous response
7000        }
7001        writeLock();
7002        boolean success = false;
7003        try {
7004          checkOperation(OperationCategory.WRITE);
7005          checkNameNodeSafeMode("Cannot rename snapshot for " + path);
7006          if (isPermissionEnabled) {
7007            checkOwner(pc, path);
7008          }
7009          dir.verifySnapshotName(snapshotNewName, path);
7010          
7011          snapshotManager.renameSnapshot(path, snapshotOldName, snapshotNewName);
7012          getEditLog().logRenameSnapshot(path, snapshotOldName, snapshotNewName,
7013              cacheEntry != null);
7014          success = true;
7015        } finally {
7016          writeUnlock();
7017          RetryCache.setState(cacheEntry, success);
7018        }
7019        getEditLog().logSync();
7020        
7021        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
7022          String oldSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotOldName);
7023          String newSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotNewName);
7024          logAuditEvent(true, "renameSnapshot", oldSnapshotRoot, newSnapshotRoot, null);
7025        }
7026      }
7027      
7028      /**
7029       * Get the list of snapshottable directories that are owned 
7030       * by the current user. Return all the snapshottable directories if the 
7031       * current user is a super user.
7032       * @return The list of all the current snapshottable directories
7033       * @throws IOException
7034       */
7035      public SnapshottableDirectoryStatus[] getSnapshottableDirListing()
7036          throws IOException {
7037        SnapshottableDirectoryStatus[] status = null;
7038        checkOperation(OperationCategory.READ);
7039        final FSPermissionChecker checker = getPermissionChecker();
7040        readLock();
7041        try {
7042          checkOperation(OperationCategory.READ);
7043          final String user = checker.isSuperUser()? null : checker.getUser();
7044          status = snapshotManager.getSnapshottableDirListing(user);
7045        } finally {
7046          readUnlock();
7047        }
7048        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
7049          logAuditEvent(true, "listSnapshottableDirectory", null, null, null);
7050        }
7051        return status;
7052      }
7053      
7054      /**
7055       * Get the difference between two snapshots (or between a snapshot and the
7056       * current status) of a snapshottable directory.
7057       * 
7058       * @param path The full path of the snapshottable directory.
7059       * @param fromSnapshot Name of the snapshot to calculate the diff from. Null
7060       *          or empty string indicates the current tree.
7061       * @param toSnapshot Name of the snapshot to calculated the diff to. Null or
7062       *          empty string indicates the current tree.
7063       * @return A report about the difference between {@code fromSnapshot} and 
7064       *         {@code toSnapshot}. Modified/deleted/created/renamed files and 
7065       *         directories belonging to the snapshottable directories are listed 
7066       *         and labeled as M/-/+/R respectively. 
7067       * @throws IOException
7068       */
7069      SnapshotDiffReport getSnapshotDiffReport(String path,
7070          String fromSnapshot, String toSnapshot) throws IOException {
7071        SnapshotDiffInfo diffs = null;
7072        checkOperation(OperationCategory.READ);
7073        final FSPermissionChecker pc = getPermissionChecker();
7074        readLock();
7075        try {
7076          checkOperation(OperationCategory.READ);
7077          if (isPermissionEnabled) {
7078            checkSubtreeReadPermission(pc, path, fromSnapshot);
7079            checkSubtreeReadPermission(pc, path, toSnapshot);
7080          }
7081          diffs = snapshotManager.diff(path, fromSnapshot, toSnapshot);
7082        } finally {
7083          readUnlock();
7084        }
7085        
7086        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
7087          logAuditEvent(true, "computeSnapshotDiff", null, null, null);
7088        }
7089        return diffs != null ? diffs.generateReport() : new SnapshotDiffReport(
7090            path, fromSnapshot, toSnapshot,
7091            Collections.<DiffReportEntry> emptyList());
7092      }
7093      
7094      private void checkSubtreeReadPermission(final FSPermissionChecker pc,
7095          final String snapshottablePath, final String snapshot)
7096              throws AccessControlException, UnresolvedLinkException {
7097        final String fromPath = snapshot == null?
7098            snapshottablePath: Snapshot.getSnapshotPath(snapshottablePath, snapshot);
7099        checkPermission(pc, fromPath, false, null, null, FsAction.READ, FsAction.READ);
7100      }
7101      
7102      /**
7103       * Delete a snapshot of a snapshottable directory
7104       * @param snapshotRoot The snapshottable directory
7105       * @param snapshotName The name of the to-be-deleted snapshot
7106       * @throws SafeModeException
7107       * @throws IOException
7108       */
7109      void deleteSnapshot(String snapshotRoot, String snapshotName)
7110          throws SafeModeException, IOException {
7111        checkOperation(OperationCategory.WRITE);
7112        final FSPermissionChecker pc = getPermissionChecker();
7113        
7114        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7115        if (cacheEntry != null && cacheEntry.isSuccess()) {
7116          return; // Return previous response
7117        }
7118        boolean success = false;
7119        BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
7120        writeLock();
7121        try {
7122          checkOperation(OperationCategory.WRITE);
7123          checkNameNodeSafeMode("Cannot delete snapshot for " + snapshotRoot);
7124          if (isPermissionEnabled) {
7125            checkOwner(pc, snapshotRoot);
7126          }
7127    
7128          List<INode> removedINodes = new ChunkedArrayList<INode>();
7129          dir.writeLock();
7130          try {
7131            snapshotManager.deleteSnapshot(snapshotRoot, snapshotName,
7132                collectedBlocks, removedINodes);
7133            dir.removeFromInodeMap(removedINodes);
7134          } finally {
7135            dir.writeUnlock();
7136          }
7137          removedINodes.clear();
7138          getEditLog().logDeleteSnapshot(snapshotRoot, snapshotName,
7139              cacheEntry != null);
7140          success = true;
7141        } finally {
7142          writeUnlock();
7143          RetryCache.setState(cacheEntry, success);
7144        }
7145        getEditLog().logSync();
7146    
7147        removeBlocks(collectedBlocks);
7148        collectedBlocks.clear();
7149    
7150        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
7151          String rootPath = Snapshot.getSnapshotPath(snapshotRoot, snapshotName);
7152          logAuditEvent(true, "deleteSnapshot", rootPath, null, null);
7153        }
7154      }
7155    
7156      /**
7157       * Remove a list of INodeDirectorySnapshottable from the SnapshotManager
7158       * @param toRemove the list of INodeDirectorySnapshottable to be removed
7159       */
7160      void removeSnapshottableDirs(List<INodeDirectorySnapshottable> toRemove) {
7161        if (snapshotManager != null) {
7162          snapshotManager.removeSnapshottable(toRemove);
7163        }
7164      }
7165    
7166      RollingUpgradeInfo queryRollingUpgrade() throws IOException {
7167        checkSuperuserPrivilege();
7168        checkOperation(OperationCategory.READ);
7169        readLock();
7170        try {
7171          if (rollingUpgradeInfo != null) {
7172            boolean hasRollbackImage = this.getFSImage().hasRollbackFSImage();
7173            rollingUpgradeInfo.setCreatedRollbackImages(hasRollbackImage);
7174          }
7175          return rollingUpgradeInfo;
7176        } finally {
7177          readUnlock();
7178        }
7179      }
7180    
7181      RollingUpgradeInfo startRollingUpgrade() throws IOException {
7182        checkSuperuserPrivilege();
7183        checkOperation(OperationCategory.WRITE);
7184        writeLock();
7185        try {
7186          checkOperation(OperationCategory.WRITE);
7187          long startTime = now();
7188          if (!haEnabled) { // for non-HA, we require NN to be in safemode
7189            startRollingUpgradeInternalForNonHA(startTime);
7190          } else { // for HA, NN cannot be in safemode
7191            checkNameNodeSafeMode("Failed to start rolling upgrade");
7192            startRollingUpgradeInternal(startTime);
7193          }
7194    
7195          getEditLog().logStartRollingUpgrade(rollingUpgradeInfo.getStartTime());
7196          if (haEnabled) {
7197            // roll the edit log to make sure the standby NameNode can tail
7198            getFSImage().rollEditLog();
7199          }
7200        } finally {
7201          writeUnlock();
7202        }
7203    
7204        getEditLog().logSync();
7205        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
7206          logAuditEvent(true, "startRollingUpgrade", null, null, null);
7207        }
7208        return rollingUpgradeInfo;
7209      }
7210    
7211      /**
7212       * Update internal state to indicate that a rolling upgrade is in progress.
7213       * @param startTime
7214       */
7215      void startRollingUpgradeInternal(long startTime)
7216          throws IOException {
7217        checkRollingUpgrade("start rolling upgrade");
7218        getFSImage().checkUpgrade(this);
7219        setRollingUpgradeInfo(false, startTime);
7220      }
7221    
7222      /**
7223       * Update internal state to indicate that a rolling upgrade is in progress for
7224       * non-HA setup. This requires the namesystem is in SafeMode and after doing a
7225       * checkpoint for rollback the namesystem will quit the safemode automatically 
7226       */
7227      private void startRollingUpgradeInternalForNonHA(long startTime)
7228          throws IOException {
7229        Preconditions.checkState(!haEnabled);
7230        if (!isInSafeMode()) {
7231          throw new IOException("Safe mode should be turned ON "
7232              + "in order to create namespace image.");
7233        }
7234        checkRollingUpgrade("start rolling upgrade");
7235        getFSImage().checkUpgrade(this);
7236        // in non-HA setup, we do an extra ckpt to generate a rollback image
7237        getFSImage().saveNamespace(this, NameNodeFile.IMAGE_ROLLBACK, null);
7238        LOG.info("Successfully saved namespace for preparing rolling upgrade.");
7239    
7240        // leave SafeMode automatically
7241        setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
7242        setRollingUpgradeInfo(true, startTime);
7243      }
7244    
7245      void setRollingUpgradeInfo(boolean createdRollbackImages, long startTime) {
7246        rollingUpgradeInfo = new RollingUpgradeInfo(blockPoolId,
7247            createdRollbackImages, startTime, 0L);
7248      }
7249    
7250      public void setCreatedRollbackImages(boolean created) {
7251        if (rollingUpgradeInfo != null) {
7252          rollingUpgradeInfo.setCreatedRollbackImages(created);
7253        }
7254      }
7255    
7256      public RollingUpgradeInfo getRollingUpgradeInfo() {
7257        return rollingUpgradeInfo;
7258      }
7259    
7260      public boolean isNeedRollbackFsImage() {
7261        return needRollbackFsImage;
7262      }
7263    
7264      public void setNeedRollbackFsImage(boolean needRollbackFsImage) {
7265        this.needRollbackFsImage = needRollbackFsImage;
7266      }
7267    
7268      @Override  // NameNodeMXBean
7269      public RollingUpgradeInfo.Bean getRollingUpgradeStatus() {
7270        readLock();
7271        try {
7272          RollingUpgradeInfo upgradeInfo = getRollingUpgradeInfo();
7273          if (upgradeInfo != null) {
7274            return new RollingUpgradeInfo.Bean(upgradeInfo);
7275          }
7276          return null;
7277        } finally {
7278          readUnlock();
7279        }
7280      }
7281    
7282      /** Is rolling upgrade in progress? */
7283      public boolean isRollingUpgrade() {
7284        return rollingUpgradeInfo != null;
7285      }
7286    
7287      void checkRollingUpgrade(String action) throws RollingUpgradeException {
7288        if (isRollingUpgrade()) {
7289          throw new RollingUpgradeException("Failed to " + action
7290              + " since a rolling upgrade is already in progress."
7291              + " Existing rolling upgrade info:\n" + rollingUpgradeInfo);
7292        }
7293      }
7294    
7295      RollingUpgradeInfo finalizeRollingUpgrade() throws IOException {
7296        checkSuperuserPrivilege();
7297        checkOperation(OperationCategory.WRITE);
7298        writeLock();
7299        final RollingUpgradeInfo returnInfo;
7300        try {
7301          checkOperation(OperationCategory.WRITE);
7302          checkNameNodeSafeMode("Failed to finalize rolling upgrade");
7303    
7304          returnInfo = finalizeRollingUpgradeInternal(now());
7305          getEditLog().logFinalizeRollingUpgrade(returnInfo.getFinalizeTime());
7306          getFSImage().saveNamespace(this);
7307          getFSImage().renameCheckpoint(NameNodeFile.IMAGE_ROLLBACK,
7308              NameNodeFile.IMAGE);
7309        } finally {
7310          writeUnlock();
7311        }
7312    
7313        // getEditLog().logSync() is not needed since it does saveNamespace 
7314    
7315        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
7316          logAuditEvent(true, "finalizeRollingUpgrade", null, null, null);
7317        }
7318        return returnInfo;
7319      }
7320    
7321      RollingUpgradeInfo finalizeRollingUpgradeInternal(long finalizeTime)
7322          throws RollingUpgradeException {
7323        if (!isRollingUpgrade()) {
7324          throw new RollingUpgradeException(
7325              "Failed to finalize rolling upgrade since there is no rolling upgrade in progress.");
7326        }
7327    
7328        final long startTime = rollingUpgradeInfo.getStartTime();
7329        rollingUpgradeInfo = null;
7330        return new RollingUpgradeInfo(blockPoolId, false, startTime, finalizeTime);
7331      }
7332    
7333      long addCacheDirective(CacheDirectiveInfo directive, EnumSet<CacheFlag> flags)
7334          throws IOException {
7335        checkOperation(OperationCategory.WRITE);
7336        final FSPermissionChecker pc = isPermissionEnabled ?
7337            getPermissionChecker() : null;
7338        CacheEntryWithPayload cacheEntry =
7339            RetryCache.waitForCompletion(retryCache, null);
7340        if (cacheEntry != null && cacheEntry.isSuccess()) {
7341          return (Long) cacheEntry.getPayload();
7342        }
7343        boolean success = false;
7344        if (!flags.contains(CacheFlag.FORCE)) {
7345          cacheManager.waitForRescanIfNeeded();
7346        }
7347        writeLock();
7348        Long result = null;
7349        try {
7350          checkOperation(OperationCategory.WRITE);
7351          if (isInSafeMode()) {
7352            throw new SafeModeException(
7353                "Cannot add cache directive", safeMode);
7354          }
7355          if (directive.getId() != null) {
7356            throw new IOException("addDirective: you cannot specify an ID " +
7357                "for this operation.");
7358          }
7359          CacheDirectiveInfo effectiveDirective = 
7360              cacheManager.addDirective(directive, pc, flags);
7361          getEditLog().logAddCacheDirectiveInfo(effectiveDirective,
7362              cacheEntry != null);
7363          result = effectiveDirective.getId();
7364          success = true;
7365        } finally {
7366          writeUnlock();
7367          if (success) {
7368            getEditLog().logSync();
7369          }
7370          if (isAuditEnabled() && isExternalInvocation()) {
7371            logAuditEvent(success, "addCacheDirective", null, null, null);
7372          }
7373          RetryCache.setState(cacheEntry, success, result);
7374        }
7375        return result;
7376      }
7377    
7378      void modifyCacheDirective(CacheDirectiveInfo directive,
7379          EnumSet<CacheFlag> flags) throws IOException {
7380        checkOperation(OperationCategory.WRITE);
7381        final FSPermissionChecker pc = isPermissionEnabled ?
7382            getPermissionChecker() : null;
7383        boolean success = false;
7384        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7385        if (cacheEntry != null && cacheEntry.isSuccess()) {
7386          return;
7387        }
7388        if (!flags.contains(CacheFlag.FORCE)) {
7389          cacheManager.waitForRescanIfNeeded();
7390        }
7391        writeLock();
7392        try {
7393          checkOperation(OperationCategory.WRITE);
7394          if (isInSafeMode()) {
7395            throw new SafeModeException(
7396                "Cannot add cache directive", safeMode);
7397          }
7398          cacheManager.modifyDirective(directive, pc, flags);
7399          getEditLog().logModifyCacheDirectiveInfo(directive,
7400              cacheEntry != null);
7401          success = true;
7402        } finally {
7403          writeUnlock();
7404          if (success) {
7405            getEditLog().logSync();
7406          }
7407          if (isAuditEnabled() && isExternalInvocation()) {
7408            logAuditEvent(success, "modifyCacheDirective", null, null, null);
7409          }
7410          RetryCache.setState(cacheEntry, success);
7411        }
7412      }
7413    
7414      void removeCacheDirective(Long id) throws IOException {
7415        checkOperation(OperationCategory.WRITE);
7416        final FSPermissionChecker pc = isPermissionEnabled ?
7417            getPermissionChecker() : null;
7418        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7419        if (cacheEntry != null && cacheEntry.isSuccess()) {
7420          return;
7421        }
7422        boolean success = false;
7423        writeLock();
7424        try {
7425          checkOperation(OperationCategory.WRITE);
7426          if (isInSafeMode()) {
7427            throw new SafeModeException(
7428                "Cannot remove cache directives", safeMode);
7429          }
7430          cacheManager.removeDirective(id, pc);
7431          getEditLog().logRemoveCacheDirectiveInfo(id, cacheEntry != null);
7432          success = true;
7433        } finally {
7434          writeUnlock();
7435          if (isAuditEnabled() && isExternalInvocation()) {
7436            logAuditEvent(success, "removeCacheDirective", null, null,
7437                null);
7438          }
7439          RetryCache.setState(cacheEntry, success);
7440        }
7441        getEditLog().logSync();
7442      }
7443    
7444      BatchedListEntries<CacheDirectiveEntry> listCacheDirectives(
7445          long startId, CacheDirectiveInfo filter) throws IOException {
7446        checkOperation(OperationCategory.READ);
7447        final FSPermissionChecker pc = isPermissionEnabled ?
7448            getPermissionChecker() : null;
7449        BatchedListEntries<CacheDirectiveEntry> results;
7450        cacheManager.waitForRescanIfNeeded();
7451        readLock();
7452        boolean success = false;
7453        try {
7454          checkOperation(OperationCategory.READ);
7455          results =
7456              cacheManager.listCacheDirectives(startId, filter, pc);
7457          success = true;
7458        } finally {
7459          readUnlock();
7460          if (isAuditEnabled() && isExternalInvocation()) {
7461            logAuditEvent(success, "listCacheDirectives", null, null,
7462                null);
7463          }
7464        }
7465        return results;
7466      }
7467    
7468      public void addCachePool(CachePoolInfo req) throws IOException {
7469        checkOperation(OperationCategory.WRITE);
7470        final FSPermissionChecker pc = isPermissionEnabled ?
7471            getPermissionChecker() : null;
7472        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7473        if (cacheEntry != null && cacheEntry.isSuccess()) {
7474          return; // Return previous response
7475        }
7476        writeLock();
7477        boolean success = false;
7478        try {
7479          checkOperation(OperationCategory.WRITE);
7480          if (isInSafeMode()) {
7481            throw new SafeModeException(
7482                "Cannot add cache pool " + req.getPoolName(), safeMode);
7483          }
7484          if (pc != null) {
7485            pc.checkSuperuserPrivilege();
7486          }
7487          CachePoolInfo info = cacheManager.addCachePool(req);
7488          getEditLog().logAddCachePool(info, cacheEntry != null);
7489          success = true;
7490        } finally {
7491          writeUnlock();
7492          if (isAuditEnabled() && isExternalInvocation()) {
7493            logAuditEvent(success, "addCachePool", req.getPoolName(), null, null);
7494          }
7495          RetryCache.setState(cacheEntry, success);
7496        }
7497        
7498        getEditLog().logSync();
7499      }
7500    
7501      public void modifyCachePool(CachePoolInfo req) throws IOException {
7502        checkOperation(OperationCategory.WRITE);
7503        final FSPermissionChecker pc =
7504            isPermissionEnabled ? getPermissionChecker() : null;
7505        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7506        if (cacheEntry != null && cacheEntry.isSuccess()) {
7507          return; // Return previous response
7508        }
7509        writeLock();
7510        boolean success = false;
7511        try {
7512          checkOperation(OperationCategory.WRITE);
7513          if (isInSafeMode()) {
7514            throw new SafeModeException(
7515                "Cannot modify cache pool " + req.getPoolName(), safeMode);
7516          }
7517          if (pc != null) {
7518            pc.checkSuperuserPrivilege();
7519          }
7520          cacheManager.modifyCachePool(req);
7521          getEditLog().logModifyCachePool(req, cacheEntry != null);
7522          success = true;
7523        } finally {
7524          writeUnlock();
7525          if (isAuditEnabled() && isExternalInvocation()) {
7526            logAuditEvent(success, "modifyCachePool", req.getPoolName(), null, null);
7527          }
7528          RetryCache.setState(cacheEntry, success);
7529        }
7530    
7531        getEditLog().logSync();
7532      }
7533    
7534      public void removeCachePool(String cachePoolName) throws IOException {
7535        checkOperation(OperationCategory.WRITE);
7536        final FSPermissionChecker pc =
7537            isPermissionEnabled ? getPermissionChecker() : null;
7538        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7539        if (cacheEntry != null && cacheEntry.isSuccess()) {
7540          return; // Return previous response
7541        }
7542        writeLock();
7543        boolean success = false;
7544        try {
7545          checkOperation(OperationCategory.WRITE);
7546          if (isInSafeMode()) {
7547            throw new SafeModeException(
7548                "Cannot remove cache pool " + cachePoolName, safeMode);
7549          }
7550          if (pc != null) {
7551            pc.checkSuperuserPrivilege();
7552          }
7553          cacheManager.removeCachePool(cachePoolName);
7554          getEditLog().logRemoveCachePool(cachePoolName, cacheEntry != null);
7555          success = true;
7556        } finally {
7557          writeUnlock();
7558          if (isAuditEnabled() && isExternalInvocation()) {
7559            logAuditEvent(success, "removeCachePool", cachePoolName, null, null);
7560          }
7561          RetryCache.setState(cacheEntry, success);
7562        }
7563        
7564        getEditLog().logSync();
7565      }
7566    
7567      public BatchedListEntries<CachePoolEntry> listCachePools(String prevKey)
7568          throws IOException {
7569        final FSPermissionChecker pc =
7570            isPermissionEnabled ? getPermissionChecker() : null;
7571        BatchedListEntries<CachePoolEntry> results;
7572        checkOperation(OperationCategory.READ);
7573        boolean success = false;
7574        cacheManager.waitForRescanIfNeeded();
7575        readLock();
7576        try {
7577          checkOperation(OperationCategory.READ);
7578          results = cacheManager.listCachePools(pc, prevKey);
7579          success = true;
7580        } finally {
7581          readUnlock();
7582          if (isAuditEnabled() && isExternalInvocation()) {
7583            logAuditEvent(success, "listCachePools", null, null, null);
7584          }
7585        }
7586        return results;
7587      }
7588    
7589      void modifyAclEntries(String src, List<AclEntry> aclSpec) throws IOException {
7590        aclConfigFlag.checkForApiCall();
7591        HdfsFileStatus resultingStat = null;
7592        FSPermissionChecker pc = getPermissionChecker();
7593        checkOperation(OperationCategory.WRITE);
7594        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
7595        writeLock();
7596        try {
7597          checkOperation(OperationCategory.WRITE);
7598          checkNameNodeSafeMode("Cannot modify ACL entries on " + src);
7599          src = FSDirectory.resolvePath(src, pathComponents, dir);
7600          checkOwner(pc, src);
7601          dir.modifyAclEntries(src, aclSpec);
7602          resultingStat = getAuditFileInfo(src, false);
7603        } finally {
7604          writeUnlock();
7605        }
7606        getEditLog().logSync();
7607        logAuditEvent(true, "modifyAclEntries", src, null, resultingStat);
7608      }
7609    
7610      void removeAclEntries(String src, List<AclEntry> aclSpec) throws IOException {
7611        aclConfigFlag.checkForApiCall();
7612        HdfsFileStatus resultingStat = null;
7613        FSPermissionChecker pc = getPermissionChecker();
7614        checkOperation(OperationCategory.WRITE);
7615        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
7616        writeLock();
7617        try {
7618          checkOperation(OperationCategory.WRITE);
7619          checkNameNodeSafeMode("Cannot remove ACL entries on " + src);
7620          src = FSDirectory.resolvePath(src, pathComponents, dir);
7621          checkOwner(pc, src);
7622          dir.removeAclEntries(src, aclSpec);
7623          resultingStat = getAuditFileInfo(src, false);
7624        } finally {
7625          writeUnlock();
7626        }
7627        getEditLog().logSync();
7628        logAuditEvent(true, "removeAclEntries", src, null, resultingStat);
7629      }
7630    
7631      void removeDefaultAcl(String src) throws IOException {
7632        aclConfigFlag.checkForApiCall();
7633        HdfsFileStatus resultingStat = null;
7634        FSPermissionChecker pc = getPermissionChecker();
7635        checkOperation(OperationCategory.WRITE);
7636        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
7637        writeLock();
7638        try {
7639          checkOperation(OperationCategory.WRITE);
7640          checkNameNodeSafeMode("Cannot remove default ACL entries on " + src);
7641          src = FSDirectory.resolvePath(src, pathComponents, dir);
7642          checkOwner(pc, src);
7643          dir.removeDefaultAcl(src);
7644          resultingStat = getAuditFileInfo(src, false);
7645        } finally {
7646          writeUnlock();
7647        }
7648        getEditLog().logSync();
7649        logAuditEvent(true, "removeDefaultAcl", src, null, resultingStat);
7650      }
7651    
7652      void removeAcl(String src) throws IOException {
7653        aclConfigFlag.checkForApiCall();
7654        HdfsFileStatus resultingStat = null;
7655        FSPermissionChecker pc = getPermissionChecker();
7656        checkOperation(OperationCategory.WRITE);
7657        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
7658        writeLock();
7659        try {
7660          checkOperation(OperationCategory.WRITE);
7661          checkNameNodeSafeMode("Cannot remove ACL on " + src);
7662          src = FSDirectory.resolvePath(src, pathComponents, dir);
7663          checkOwner(pc, src);
7664          dir.removeAcl(src);
7665          resultingStat = getAuditFileInfo(src, false);
7666        } finally {
7667          writeUnlock();
7668        }
7669        getEditLog().logSync();
7670        logAuditEvent(true, "removeAcl", src, null, resultingStat);
7671      }
7672    
7673      void setAcl(String src, List<AclEntry> aclSpec) throws IOException {
7674        aclConfigFlag.checkForApiCall();
7675        HdfsFileStatus resultingStat = null;
7676        FSPermissionChecker pc = getPermissionChecker();
7677        checkOperation(OperationCategory.WRITE);
7678        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
7679        writeLock();
7680        try {
7681          checkOperation(OperationCategory.WRITE);
7682          checkNameNodeSafeMode("Cannot set ACL on " + src);
7683          src = FSDirectory.resolvePath(src, pathComponents, dir);
7684          checkOwner(pc, src);
7685          dir.setAcl(src, aclSpec);
7686          resultingStat = getAuditFileInfo(src, false);
7687        } finally {
7688          writeUnlock();
7689        }
7690        getEditLog().logSync();
7691        logAuditEvent(true, "setAcl", src, null, resultingStat);
7692      }
7693    
7694      AclStatus getAclStatus(String src) throws IOException {
7695        aclConfigFlag.checkForApiCall();
7696        FSPermissionChecker pc = getPermissionChecker();
7697        checkOperation(OperationCategory.READ);
7698        readLock();
7699        try {
7700          checkOperation(OperationCategory.READ);
7701          if (isPermissionEnabled) {
7702            checkPermission(pc, src, false, null, null, null, null);
7703          }
7704          return dir.getAclStatus(src);
7705        } finally {
7706          readUnlock();
7707        }
7708      }
7709    
7710      /**
7711       * Default AuditLogger implementation; used when no access logger is
7712       * defined in the config file. It can also be explicitly listed in the
7713       * config file.
7714       */
7715      private static class DefaultAuditLogger extends HdfsAuditLogger {
7716    
7717        private boolean logTokenTrackingId;
7718    
7719        @Override
7720        public void initialize(Configuration conf) {
7721          logTokenTrackingId = conf.getBoolean(
7722              DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY,
7723              DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT);
7724        }
7725    
7726        @Override
7727        public void logAuditEvent(boolean succeeded, String userName,
7728            InetAddress addr, String cmd, String src, String dst,
7729            FileStatus status, UserGroupInformation ugi,
7730            DelegationTokenSecretManager dtSecretManager) {
7731          if (auditLog.isInfoEnabled()) {
7732            final StringBuilder sb = auditBuffer.get();
7733            sb.setLength(0);
7734            sb.append("allowed=").append(succeeded).append("\t");
7735            sb.append("ugi=").append(userName).append("\t");
7736            sb.append("ip=").append(addr).append("\t");
7737            sb.append("cmd=").append(cmd).append("\t");
7738            sb.append("src=").append(src).append("\t");
7739            sb.append("dst=").append(dst).append("\t");
7740            if (null == status) {
7741              sb.append("perm=null");
7742            } else {
7743              sb.append("perm=");
7744              sb.append(status.getOwner()).append(":");
7745              sb.append(status.getGroup()).append(":");
7746              sb.append(status.getPermission());
7747            }
7748            if (logTokenTrackingId) {
7749              sb.append("\t").append("trackingId=");
7750              String trackingId = null;
7751              if (ugi != null && dtSecretManager != null
7752                  && ugi.getAuthenticationMethod() == AuthenticationMethod.TOKEN) {
7753                for (TokenIdentifier tid: ugi.getTokenIdentifiers()) {
7754                  if (tid instanceof DelegationTokenIdentifier) {
7755                    DelegationTokenIdentifier dtid =
7756                        (DelegationTokenIdentifier)tid;
7757                    trackingId = dtSecretManager.getTokenTrackingId(dtid);
7758                    break;
7759                  }
7760                }
7761              }
7762              sb.append(trackingId);
7763            }
7764            logAuditMessage(sb.toString());
7765          }
7766        }
7767    
7768        public void logAuditMessage(String message) {
7769          auditLog.info(message);
7770        }
7771      }
7772    
7773      private static void enableAsyncAuditLog() {
7774        if (!(auditLog instanceof Log4JLogger)) {
7775          LOG.warn("Log4j is required to enable async auditlog");
7776          return;
7777        }
7778        Logger logger = ((Log4JLogger)auditLog).getLogger();
7779        @SuppressWarnings("unchecked")
7780        List<Appender> appenders = Collections.list(logger.getAllAppenders());
7781        // failsafe against trying to async it more than once
7782        if (!appenders.isEmpty() && !(appenders.get(0) instanceof AsyncAppender)) {
7783          AsyncAppender asyncAppender = new AsyncAppender();
7784          // change logger to have an async appender containing all the
7785          // previously configured appenders
7786          for (Appender appender : appenders) {
7787            logger.removeAppender(appender);
7788            asyncAppender.addAppender(appender);
7789          }
7790          logger.addAppender(asyncAppender);        
7791        }
7792      }
7793    
7794    }
7795