001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.hdfs.server.namenode;
019
020 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT;
021 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY;
022 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT;
023 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY;
024 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_DEFAULT;
025 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_KEY;
026 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_DEFAULT;
027 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY;
028 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CHECKSUM_TYPE_DEFAULT;
029 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY;
030 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT;
031 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_KEY;
032 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_DEFAULT;
033 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_KEY;
034 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_DEFAULT;
035 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_KEY;
036 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT;
037 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY;
038 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOGGERS_KEY;
039 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT;
040 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY;
041 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT;
042 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY;
043 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT;
044 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY;
045 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME;
046 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT;
047 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY;
048 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT;
049 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY;
050 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT;
051 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY;
052 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT;
053 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY;
054 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY;
055 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY;
056 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS;
057 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT;
058 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD;
059 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT;
060 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT;
061 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY;
062 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_DEFAULT;
063 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_KEY;
064 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY;
065 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_DEFAULT;
066 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY;
067 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY;
068 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT;
069 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY;
070 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT;
071 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY;
072 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT;
073 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY;
074 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY;
075 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_DEFAULT;
076 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY;
077 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT;
078 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY;
079 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY;
080 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_DEFAULT;
081 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY;
082 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT;
083 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY;
084 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_DEFAULT;
085 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY;
086 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_DEFAULT;
087 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_KEY;
088 import static org.apache.hadoop.util.Time.now;
089
090 import java.io.BufferedWriter;
091 import java.io.ByteArrayInputStream;
092 import java.io.DataInput;
093 import java.io.DataInputStream;
094 import java.io.File;
095 import java.io.FileNotFoundException;
096 import java.io.FileOutputStream;
097 import java.io.IOException;
098 import java.io.OutputStreamWriter;
099 import java.io.PrintWriter;
100 import java.io.StringWriter;
101 import java.lang.management.ManagementFactory;
102 import java.net.InetAddress;
103 import java.net.URI;
104 import java.util.ArrayList;
105 import java.util.Arrays;
106 import java.util.Collection;
107 import java.util.Collections;
108 import java.util.Date;
109 import java.util.EnumSet;
110 import java.util.HashMap;
111 import java.util.HashSet;
112 import java.util.Iterator;
113 import java.util.LinkedHashSet;
114 import java.util.List;
115 import java.util.Map;
116 import java.util.Set;
117 import java.util.concurrent.TimeUnit;
118 import java.util.concurrent.locks.ReentrantLock;
119 import java.util.concurrent.locks.ReentrantReadWriteLock;
120
121 import javax.management.NotCompliantMBeanException;
122 import javax.management.ObjectName;
123 import javax.management.StandardMBean;
124
125 import org.apache.commons.logging.Log;
126 import org.apache.commons.logging.LogFactory;
127 import org.apache.commons.logging.impl.Log4JLogger;
128 import org.apache.hadoop.HadoopIllegalArgumentException;
129 import org.apache.hadoop.classification.InterfaceAudience;
130 import org.apache.hadoop.conf.Configuration;
131 import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries;
132 import org.apache.hadoop.fs.CacheFlag;
133 import org.apache.hadoop.fs.ContentSummary;
134 import org.apache.hadoop.fs.CreateFlag;
135 import org.apache.hadoop.fs.DirectoryListingStartAfterNotFoundException;
136 import org.apache.hadoop.fs.FileAlreadyExistsException;
137 import org.apache.hadoop.fs.FileStatus;
138 import org.apache.hadoop.fs.FileSystem;
139 import org.apache.hadoop.fs.FsServerDefaults;
140 import org.apache.hadoop.fs.InvalidPathException;
141 import org.apache.hadoop.fs.Options;
142 import org.apache.hadoop.fs.Options.Rename;
143 import org.apache.hadoop.fs.ParentNotDirectoryException;
144 import org.apache.hadoop.fs.Path;
145 import org.apache.hadoop.fs.UnresolvedLinkException;
146 import org.apache.hadoop.fs.permission.AclEntry;
147 import org.apache.hadoop.fs.permission.AclStatus;
148 import org.apache.hadoop.fs.permission.FsAction;
149 import org.apache.hadoop.fs.permission.FsPermission;
150 import org.apache.hadoop.fs.permission.PermissionStatus;
151 import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
152 import org.apache.hadoop.ha.ServiceFailedException;
153 import org.apache.hadoop.hdfs.DFSConfigKeys;
154 import org.apache.hadoop.hdfs.DFSUtil;
155 import org.apache.hadoop.hdfs.HAUtil;
156 import org.apache.hadoop.hdfs.HdfsConfiguration;
157 import org.apache.hadoop.hdfs.StorageType;
158 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
159 import org.apache.hadoop.hdfs.protocol.Block;
160 import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
161 import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
162 import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
163 import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
164 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
165 import org.apache.hadoop.hdfs.protocol.DatanodeID;
166 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
167 import org.apache.hadoop.hdfs.protocol.DirectoryListing;
168 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
169 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
170 import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
171 import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
172 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
173 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
174 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
175 import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
176 import org.apache.hadoop.hdfs.protocol.RecoveryInProgressException;
177 import org.apache.hadoop.hdfs.protocol.RollingUpgradeException;
178 import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo;
179 import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
180 import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffReportEntry;
181 import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus;
182 import org.apache.hadoop.hdfs.protocol.datatransfer.ReplaceDatanodeOnFailure;
183 import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
184 import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager.AccessMode;
185 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
186 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
187 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager.SecretManagerState;
188 import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection;
189 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
190 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
191 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
192 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
193 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager;
194 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStatistics;
195 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
196 import org.apache.hadoop.hdfs.server.blockmanagement.OutOfV1GenerationStampsException;
197 import org.apache.hadoop.hdfs.server.common.GenerationStamp;
198 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
199 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
200 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption;
201 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
202 import org.apache.hadoop.hdfs.server.common.Storage;
203 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirType;
204 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
205 import org.apache.hadoop.hdfs.server.common.Util;
206 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection;
207 import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
208 import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream;
209 import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
210 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
211 import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
212 import org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer;
213 import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
214 import org.apache.hadoop.hdfs.server.namenode.ha.StandbyCheckpointer;
215 import org.apache.hadoop.hdfs.server.namenode.metrics.FSNamesystemMBean;
216 import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
217 import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable;
218 import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable.SnapshotDiffInfo;
219 import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
220 import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager;
221 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
222 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
223 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
224 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Status;
225 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
226 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType;
227 import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods;
228 import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
229 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
230 import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
231 import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat;
232 import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand;
233 import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
234 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
235 import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
236 import org.apache.hadoop.hdfs.server.protocol.StorageReport;
237 import org.apache.hadoop.hdfs.util.ChunkedArrayList;
238 import org.apache.hadoop.io.IOUtils;
239 import org.apache.hadoop.io.Text;
240 import org.apache.hadoop.ipc.RetriableException;
241 import org.apache.hadoop.ipc.RetryCache;
242 import org.apache.hadoop.ipc.RetryCache.CacheEntry;
243 import org.apache.hadoop.ipc.RetryCache.CacheEntryWithPayload;
244 import org.apache.hadoop.ipc.Server;
245 import org.apache.hadoop.ipc.StandbyException;
246 import org.apache.hadoop.metrics2.annotation.Metric;
247 import org.apache.hadoop.metrics2.annotation.Metrics;
248 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
249 import org.apache.hadoop.metrics2.util.MBeans;
250 import org.apache.hadoop.net.NetworkTopology;
251 import org.apache.hadoop.net.Node;
252 import org.apache.hadoop.security.AccessControlException;
253 import org.apache.hadoop.security.UserGroupInformation;
254 import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
255 import org.apache.hadoop.security.token.SecretManager.InvalidToken;
256 import org.apache.hadoop.security.token.Token;
257 import org.apache.hadoop.security.token.TokenIdentifier;
258 import org.apache.hadoop.security.token.delegation.DelegationKey;
259 import org.apache.hadoop.util.Daemon;
260 import org.apache.hadoop.util.DataChecksum;
261 import org.apache.hadoop.util.StringUtils;
262 import org.apache.hadoop.util.Time;
263 import org.apache.hadoop.util.VersionInfo;
264 import org.apache.log4j.Appender;
265 import org.apache.log4j.AsyncAppender;
266 import org.apache.log4j.Logger;
267 import org.mortbay.util.ajax.JSON;
268
269 import com.google.common.annotations.VisibleForTesting;
270 import com.google.common.base.Charsets;
271 import com.google.common.base.Preconditions;
272 import com.google.common.collect.ImmutableMap;
273 import com.google.common.collect.Lists;
274
275 /***************************************************
276 * FSNamesystem does the actual bookkeeping work for the
277 * DataNode.
278 *
279 * It tracks several important tables.
280 *
281 * 1) valid fsname --> blocklist (kept on disk, logged)
282 * 2) Set of all valid blocks (inverted #1)
283 * 3) block --> machinelist (kept in memory, rebuilt dynamically from reports)
284 * 4) machine --> blocklist (inverted #2)
285 * 5) LRU cache of updated-heartbeat machines
286 ***************************************************/
287 @InterfaceAudience.Private
288 @Metrics(context="dfs")
289 public class FSNamesystem implements Namesystem, FSClusterStats,
290 FSNamesystemMBean, NameNodeMXBean {
291 public static final Log LOG = LogFactory.getLog(FSNamesystem.class);
292
293 private static final ThreadLocal<StringBuilder> auditBuffer =
294 new ThreadLocal<StringBuilder>() {
295 @Override
296 protected StringBuilder initialValue() {
297 return new StringBuilder();
298 }
299 };
300
301 @VisibleForTesting
302 public boolean isAuditEnabled() {
303 return !isDefaultAuditLogger || auditLog.isInfoEnabled();
304 }
305
306 private HdfsFileStatus getAuditFileInfo(String path, boolean resolveSymlink)
307 throws IOException {
308 return (isAuditEnabled() && isExternalInvocation())
309 ? dir.getFileInfo(path, resolveSymlink) : null;
310 }
311
312 private void logAuditEvent(boolean succeeded, String cmd, String src)
313 throws IOException {
314 logAuditEvent(succeeded, cmd, src, null, null);
315 }
316
317 private void logAuditEvent(boolean succeeded, String cmd, String src,
318 String dst, HdfsFileStatus stat) throws IOException {
319 if (isAuditEnabled() && isExternalInvocation()) {
320 logAuditEvent(succeeded, getRemoteUser(), getRemoteIp(),
321 cmd, src, dst, stat);
322 }
323 }
324
325 private void logAuditEvent(boolean succeeded,
326 UserGroupInformation ugi, InetAddress addr, String cmd, String src,
327 String dst, HdfsFileStatus stat) {
328 FileStatus status = null;
329 if (stat != null) {
330 Path symlink = stat.isSymlink() ? new Path(stat.getSymlink()) : null;
331 Path path = dst != null ? new Path(dst) : new Path(src);
332 status = new FileStatus(stat.getLen(), stat.isDir(),
333 stat.getReplication(), stat.getBlockSize(), stat.getModificationTime(),
334 stat.getAccessTime(), stat.getPermission(), stat.getOwner(),
335 stat.getGroup(), symlink, path);
336 }
337 for (AuditLogger logger : auditLoggers) {
338 if (logger instanceof HdfsAuditLogger) {
339 HdfsAuditLogger hdfsLogger = (HdfsAuditLogger) logger;
340 hdfsLogger.logAuditEvent(succeeded, ugi.toString(), addr, cmd, src, dst,
341 status, ugi, dtSecretManager);
342 } else {
343 logger.logAuditEvent(succeeded, ugi.toString(), addr,
344 cmd, src, dst, status);
345 }
346 }
347 }
348
349 /**
350 * Logger for audit events, noting successful FSNamesystem operations. Emits
351 * to FSNamesystem.audit at INFO. Each event causes a set of tab-separated
352 * <code>key=value</code> pairs to be written for the following properties:
353 * <code>
354 * ugi=<ugi in RPC>
355 * ip=<remote IP>
356 * cmd=<command>
357 * src=<src path>
358 * dst=<dst path (optional)>
359 * perm=<permissions (optional)>
360 * </code>
361 */
362 public static final Log auditLog = LogFactory.getLog(
363 FSNamesystem.class.getName() + ".audit");
364
365 static final int DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED = 100;
366 static int BLOCK_DELETION_INCREMENT = 1000;
367 private final boolean isPermissionEnabled;
368 private final UserGroupInformation fsOwner;
369 private final String fsOwnerShortUserName;
370 private final String supergroup;
371 private final boolean standbyShouldCheckpoint;
372
373 // Scan interval is not configurable.
374 private static final long DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL =
375 TimeUnit.MILLISECONDS.convert(1, TimeUnit.HOURS);
376 final DelegationTokenSecretManager dtSecretManager;
377 private final boolean alwaysUseDelegationTokensForTests;
378
379 private static final Step STEP_AWAITING_REPORTED_BLOCKS =
380 new Step(StepType.AWAITING_REPORTED_BLOCKS);
381
382 // Tracks whether the default audit logger is the only configured audit
383 // logger; this allows isAuditEnabled() to return false in case the
384 // underlying logger is disabled, and avoid some unnecessary work.
385 private final boolean isDefaultAuditLogger;
386 private final List<AuditLogger> auditLoggers;
387
388 /** The namespace tree. */
389 FSDirectory dir;
390 private final BlockManager blockManager;
391 private final SnapshotManager snapshotManager;
392 private final CacheManager cacheManager;
393 private final DatanodeStatistics datanodeStatistics;
394
395 private RollingUpgradeInfo rollingUpgradeInfo = null;
396 /**
397 * A flag that indicates whether the checkpointer should checkpoint a rollback
398 * fsimage. The edit log tailer sets this flag. The checkpoint will create a
399 * rollback fsimage if the flag is true, and then change the flag to false.
400 */
401 private volatile boolean needRollbackFsImage;
402
403 // Block pool ID used by this namenode
404 private String blockPoolId;
405
406 final LeaseManager leaseManager = new LeaseManager(this);
407
408 volatile Daemon smmthread = null; // SafeModeMonitor thread
409
410 Daemon nnrmthread = null; // NamenodeResourceMonitor thread
411
412 Daemon nnEditLogRoller = null; // NameNodeEditLogRoller thread
413 /**
414 * When an active namenode will roll its own edit log, in # edits
415 */
416 private final long editLogRollerThreshold;
417 /**
418 * Check interval of an active namenode's edit log roller thread
419 */
420 private final int editLogRollerInterval;
421
422 private volatile boolean hasResourcesAvailable = false;
423 private volatile boolean fsRunning = true;
424
425 /** The start time of the namesystem. */
426 private final long startTime = now();
427
428 /** The interval of namenode checking for the disk space availability */
429 private final long resourceRecheckInterval;
430
431 // The actual resource checker instance.
432 NameNodeResourceChecker nnResourceChecker;
433
434 private final FsServerDefaults serverDefaults;
435 private final boolean supportAppends;
436 private final ReplaceDatanodeOnFailure dtpReplaceDatanodeOnFailure;
437
438 private volatile SafeModeInfo safeMode; // safe mode information
439
440 private final long maxFsObjects; // maximum number of fs objects
441
442 private final long minBlockSize; // minimum block size
443 private final long maxBlocksPerFile; // maximum # of blocks per file
444
445 /**
446 * The global generation stamp for legacy blocks with randomly
447 * generated block IDs.
448 */
449 private final GenerationStamp generationStampV1 = new GenerationStamp();
450
451 /**
452 * The global generation stamp for this file system.
453 */
454 private final GenerationStamp generationStampV2 = new GenerationStamp();
455
456 /**
457 * The value of the generation stamp when the first switch to sequential
458 * block IDs was made. Blocks with generation stamps below this value
459 * have randomly allocated block IDs. Blocks with generation stamps above
460 * this value had sequentially allocated block IDs. Read from the fsImage
461 * (or initialized as an offset from the V1 (legacy) generation stamp on
462 * upgrade).
463 */
464 private long generationStampV1Limit =
465 GenerationStamp.GRANDFATHER_GENERATION_STAMP;
466
467 /**
468 * The global block ID space for this file system.
469 */
470 @VisibleForTesting
471 private final SequentialBlockIdGenerator blockIdGenerator;
472
473 // precision of access times.
474 private final long accessTimePrecision;
475
476 /** Lock to protect FSNamesystem. */
477 private final FSNamesystemLock fsLock;
478
479 /**
480 * Used when this NN is in standby state to read from the shared edit log.
481 */
482 private EditLogTailer editLogTailer = null;
483
484 /**
485 * Used when this NN is in standby state to perform checkpoints.
486 */
487 private StandbyCheckpointer standbyCheckpointer;
488
489 /**
490 * Reference to the NN's HAContext object. This is only set once
491 * {@link #startCommonServices(Configuration, HAContext)} is called.
492 */
493 private HAContext haContext;
494
495 private final boolean haEnabled;
496
497 /** flag indicating whether replication queues have been initialized */
498 boolean initializedReplQueues = false;
499
500 /**
501 * Whether the namenode is in the middle of starting the active service
502 */
503 private volatile boolean startingActiveService = false;
504
505 private INodeId inodeId;
506
507 private final RetryCache retryCache;
508
509 private final AclConfigFlag aclConfigFlag;
510
511 /**
512 * Set the last allocated inode id when fsimage or editlog is loaded.
513 */
514 public void resetLastInodeId(long newValue) throws IOException {
515 try {
516 inodeId.skipTo(newValue);
517 } catch(IllegalStateException ise) {
518 throw new IOException(ise);
519 }
520 }
521
522 /** Should only be used for tests to reset to any value */
523 void resetLastInodeIdWithoutChecking(long newValue) {
524 inodeId.setCurrentValue(newValue);
525 }
526
527 /** @return the last inode ID. */
528 public long getLastInodeId() {
529 return inodeId.getCurrentValue();
530 }
531
532 /** Allocate a new inode ID. */
533 public long allocateNewInodeId() {
534 return inodeId.nextValue();
535 }
536
537 /**
538 * Clear all loaded data
539 */
540 void clear() {
541 dir.reset();
542 dtSecretManager.reset();
543 generationStampV1.setCurrentValue(GenerationStamp.LAST_RESERVED_STAMP);
544 generationStampV2.setCurrentValue(GenerationStamp.LAST_RESERVED_STAMP);
545 blockIdGenerator.setCurrentValue(
546 SequentialBlockIdGenerator.LAST_RESERVED_BLOCK_ID);
547 generationStampV1Limit = GenerationStamp.GRANDFATHER_GENERATION_STAMP;
548 leaseManager.removeAllLeases();
549 inodeId.setCurrentValue(INodeId.LAST_RESERVED_ID);
550 snapshotManager.clearSnapshottableDirs();
551 cacheManager.clear();
552 }
553
554 @VisibleForTesting
555 LeaseManager getLeaseManager() {
556 return leaseManager;
557 }
558
559 boolean isHaEnabled() {
560 return haEnabled;
561 }
562
563 /**
564 * Check the supplied configuration for correctness.
565 * @param conf Supplies the configuration to validate.
566 * @throws IOException if the configuration could not be queried.
567 * @throws IllegalArgumentException if the configuration is invalid.
568 */
569 private static void checkConfiguration(Configuration conf)
570 throws IOException {
571
572 final Collection<URI> namespaceDirs =
573 FSNamesystem.getNamespaceDirs(conf);
574 final Collection<URI> editsDirs =
575 FSNamesystem.getNamespaceEditsDirs(conf);
576 final Collection<URI> requiredEditsDirs =
577 FSNamesystem.getRequiredNamespaceEditsDirs(conf);
578 final Collection<URI> sharedEditsDirs =
579 FSNamesystem.getSharedEditsDirs(conf);
580
581 for (URI u : requiredEditsDirs) {
582 if (u.toString().compareTo(
583 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_DEFAULT) == 0) {
584 continue;
585 }
586
587 // Each required directory must also be in editsDirs or in
588 // sharedEditsDirs.
589 if (!editsDirs.contains(u) &&
590 !sharedEditsDirs.contains(u)) {
591 throw new IllegalArgumentException(
592 "Required edits directory " + u.toString() + " not present in " +
593 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY + ". " +
594 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY + "=" +
595 editsDirs.toString() + "; " +
596 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY + "=" +
597 requiredEditsDirs.toString() + ". " +
598 DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY + "=" +
599 sharedEditsDirs.toString() + ".");
600 }
601 }
602
603 if (namespaceDirs.size() == 1) {
604 LOG.warn("Only one image storage directory ("
605 + DFS_NAMENODE_NAME_DIR_KEY + ") configured. Beware of dataloss"
606 + " due to lack of redundant storage directories!");
607 }
608 if (editsDirs.size() == 1) {
609 LOG.warn("Only one namespace edits storage directory ("
610 + DFS_NAMENODE_EDITS_DIR_KEY + ") configured. Beware of dataloss"
611 + " due to lack of redundant storage directories!");
612 }
613 }
614
615 /**
616 * Instantiates an FSNamesystem loaded from the image and edits
617 * directories specified in the passed Configuration.
618 *
619 * @param conf the Configuration which specifies the storage directories
620 * from which to load
621 * @return an FSNamesystem which contains the loaded namespace
622 * @throws IOException if loading fails
623 */
624 static FSNamesystem loadFromDisk(Configuration conf) throws IOException {
625
626 checkConfiguration(conf);
627 FSImage fsImage = new FSImage(conf,
628 FSNamesystem.getNamespaceDirs(conf),
629 FSNamesystem.getNamespaceEditsDirs(conf));
630 FSNamesystem namesystem = new FSNamesystem(conf, fsImage, false);
631 StartupOption startOpt = NameNode.getStartupOption(conf);
632 if (startOpt == StartupOption.RECOVER) {
633 namesystem.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
634 }
635
636 long loadStart = now();
637 try {
638 namesystem.loadFSImage(startOpt);
639 } catch (IOException ioe) {
640 LOG.warn("Encountered exception loading fsimage", ioe);
641 fsImage.close();
642 throw ioe;
643 }
644 long timeTakenToLoadFSImage = now() - loadStart;
645 LOG.info("Finished loading FSImage in " + timeTakenToLoadFSImage + " msecs");
646 NameNodeMetrics nnMetrics = NameNode.getNameNodeMetrics();
647 if (nnMetrics != null) {
648 nnMetrics.setFsImageLoadTime((int) timeTakenToLoadFSImage);
649 }
650 return namesystem;
651 }
652
653 FSNamesystem(Configuration conf, FSImage fsImage) throws IOException {
654 this(conf, fsImage, false);
655 }
656
657 /**
658 * Create an FSNamesystem associated with the specified image.
659 *
660 * Note that this does not load any data off of disk -- if you would
661 * like that behavior, use {@link #loadFromDisk(Configuration)}
662 *
663 * @param conf configuration
664 * @param fsImage The FSImage to associate with
665 * @param ignoreRetryCache Whether or not should ignore the retry cache setup
666 * step. For Secondary NN this should be set to true.
667 * @throws IOException on bad configuration
668 */
669 FSNamesystem(Configuration conf, FSImage fsImage, boolean ignoreRetryCache)
670 throws IOException {
671 if (conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY,
672 DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT)) {
673 LOG.info("Enabling async auditlog");
674 enableAsyncAuditLog();
675 }
676 boolean fair = conf.getBoolean("dfs.namenode.fslock.fair", true);
677 LOG.info("fsLock is fair:" + fair);
678 fsLock = new FSNamesystemLock(fair);
679 try {
680 resourceRecheckInterval = conf.getLong(
681 DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY,
682 DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT);
683
684 this.blockManager = new BlockManager(this, this, conf);
685 this.datanodeStatistics = blockManager.getDatanodeManager().getDatanodeStatistics();
686 this.blockIdGenerator = new SequentialBlockIdGenerator(this.blockManager);
687
688 this.fsOwner = UserGroupInformation.getCurrentUser();
689 this.fsOwnerShortUserName = fsOwner.getShortUserName();
690 this.supergroup = conf.get(DFS_PERMISSIONS_SUPERUSERGROUP_KEY,
691 DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT);
692 this.isPermissionEnabled = conf.getBoolean(DFS_PERMISSIONS_ENABLED_KEY,
693 DFS_PERMISSIONS_ENABLED_DEFAULT);
694 LOG.info("fsOwner = " + fsOwner);
695 LOG.info("supergroup = " + supergroup);
696 LOG.info("isPermissionEnabled = " + isPermissionEnabled);
697
698 // block allocation has to be persisted in HA using a shared edits directory
699 // so that the standby has up-to-date namespace information
700 String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
701 this.haEnabled = HAUtil.isHAEnabled(conf, nameserviceId);
702
703 // Sanity check the HA-related config.
704 if (nameserviceId != null) {
705 LOG.info("Determined nameservice ID: " + nameserviceId);
706 }
707 LOG.info("HA Enabled: " + haEnabled);
708 if (!haEnabled && HAUtil.usesSharedEditsDir(conf)) {
709 LOG.warn("Configured NNs:\n" + DFSUtil.nnAddressesAsString(conf));
710 throw new IOException("Invalid configuration: a shared edits dir " +
711 "must not be specified if HA is not enabled.");
712 }
713
714 // Get the checksum type from config
715 String checksumTypeStr = conf.get(DFS_CHECKSUM_TYPE_KEY, DFS_CHECKSUM_TYPE_DEFAULT);
716 DataChecksum.Type checksumType;
717 try {
718 checksumType = DataChecksum.Type.valueOf(checksumTypeStr);
719 } catch (IllegalArgumentException iae) {
720 throw new IOException("Invalid checksum type in "
721 + DFS_CHECKSUM_TYPE_KEY + ": " + checksumTypeStr);
722 }
723
724 this.serverDefaults = new FsServerDefaults(
725 conf.getLongBytes(DFS_BLOCK_SIZE_KEY, DFS_BLOCK_SIZE_DEFAULT),
726 conf.getInt(DFS_BYTES_PER_CHECKSUM_KEY, DFS_BYTES_PER_CHECKSUM_DEFAULT),
727 conf.getInt(DFS_CLIENT_WRITE_PACKET_SIZE_KEY, DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT),
728 (short) conf.getInt(DFS_REPLICATION_KEY, DFS_REPLICATION_DEFAULT),
729 conf.getInt(IO_FILE_BUFFER_SIZE_KEY, IO_FILE_BUFFER_SIZE_DEFAULT),
730 conf.getBoolean(DFS_ENCRYPT_DATA_TRANSFER_KEY, DFS_ENCRYPT_DATA_TRANSFER_DEFAULT),
731 conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT),
732 checksumType);
733
734 this.maxFsObjects = conf.getLong(DFS_NAMENODE_MAX_OBJECTS_KEY,
735 DFS_NAMENODE_MAX_OBJECTS_DEFAULT);
736
737 this.minBlockSize = conf.getLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY,
738 DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_DEFAULT);
739 this.maxBlocksPerFile = conf.getLong(DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_KEY,
740 DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_DEFAULT);
741 this.accessTimePrecision = conf.getLong(DFS_NAMENODE_ACCESSTIME_PRECISION_KEY,
742 DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT);
743 this.supportAppends = conf.getBoolean(DFS_SUPPORT_APPEND_KEY, DFS_SUPPORT_APPEND_DEFAULT);
744 LOG.info("Append Enabled: " + supportAppends);
745
746 this.dtpReplaceDatanodeOnFailure = ReplaceDatanodeOnFailure.get(conf);
747
748 this.standbyShouldCheckpoint = conf.getBoolean(
749 DFS_HA_STANDBY_CHECKPOINTS_KEY, DFS_HA_STANDBY_CHECKPOINTS_DEFAULT);
750 // # edit autoroll threshold is a multiple of the checkpoint threshold
751 this.editLogRollerThreshold = (long)
752 (conf.getFloat(
753 DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD,
754 DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT) *
755 conf.getLong(
756 DFS_NAMENODE_CHECKPOINT_TXNS_KEY,
757 DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT));
758 this.editLogRollerInterval = conf.getInt(
759 DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS,
760 DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT);
761 this.inodeId = new INodeId();
762
763 // For testing purposes, allow the DT secret manager to be started regardless
764 // of whether security is enabled.
765 alwaysUseDelegationTokensForTests = conf.getBoolean(
766 DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY,
767 DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT);
768
769 this.dtSecretManager = createDelegationTokenSecretManager(conf);
770 this.dir = new FSDirectory(fsImage, this, conf);
771 this.snapshotManager = new SnapshotManager(dir);
772 this.cacheManager = new CacheManager(this, conf, blockManager);
773 this.safeMode = new SafeModeInfo(conf);
774 this.auditLoggers = initAuditLoggers(conf);
775 this.isDefaultAuditLogger = auditLoggers.size() == 1 &&
776 auditLoggers.get(0) instanceof DefaultAuditLogger;
777 this.retryCache = ignoreRetryCache ? null : initRetryCache(conf);
778 this.aclConfigFlag = new AclConfigFlag(conf);
779 } catch(IOException e) {
780 LOG.error(getClass().getSimpleName() + " initialization failed.", e);
781 close();
782 throw e;
783 } catch (RuntimeException re) {
784 LOG.error(getClass().getSimpleName() + " initialization failed.", re);
785 close();
786 throw re;
787 }
788 }
789
790 @VisibleForTesting
791 public RetryCache getRetryCache() {
792 return retryCache;
793 }
794
795 void lockRetryCache() {
796 if (retryCache != null) {
797 retryCache.lock();
798 }
799 }
800
801 void unlockRetryCache() {
802 if (retryCache != null) {
803 retryCache.unlock();
804 }
805 }
806
807 /** Whether or not retry cache is enabled */
808 boolean hasRetryCache() {
809 return retryCache != null;
810 }
811
812 void addCacheEntryWithPayload(byte[] clientId, int callId, Object payload) {
813 if (retryCache != null) {
814 retryCache.addCacheEntryWithPayload(clientId, callId, payload);
815 }
816 }
817
818 void addCacheEntry(byte[] clientId, int callId) {
819 if (retryCache != null) {
820 retryCache.addCacheEntry(clientId, callId);
821 }
822 }
823
824 @VisibleForTesting
825 static RetryCache initRetryCache(Configuration conf) {
826 boolean enable = conf.getBoolean(DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY,
827 DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT);
828 LOG.info("Retry cache on namenode is " + (enable ? "enabled" : "disabled"));
829 if (enable) {
830 float heapPercent = conf.getFloat(
831 DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY,
832 DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT);
833 long entryExpiryMillis = conf.getLong(
834 DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY,
835 DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT);
836 LOG.info("Retry cache will use " + heapPercent
837 + " of total heap and retry cache entry expiry time is "
838 + entryExpiryMillis + " millis");
839 long entryExpiryNanos = entryExpiryMillis * 1000 * 1000;
840 return new RetryCache("NameNodeRetryCache", heapPercent,
841 entryExpiryNanos);
842 }
843 return null;
844 }
845
846 private List<AuditLogger> initAuditLoggers(Configuration conf) {
847 // Initialize the custom access loggers if configured.
848 Collection<String> alClasses = conf.getStringCollection(DFS_NAMENODE_AUDIT_LOGGERS_KEY);
849 List<AuditLogger> auditLoggers = Lists.newArrayList();
850 if (alClasses != null && !alClasses.isEmpty()) {
851 for (String className : alClasses) {
852 try {
853 AuditLogger logger;
854 if (DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME.equals(className)) {
855 logger = new DefaultAuditLogger();
856 } else {
857 logger = (AuditLogger) Class.forName(className).newInstance();
858 }
859 logger.initialize(conf);
860 auditLoggers.add(logger);
861 } catch (RuntimeException re) {
862 throw re;
863 } catch (Exception e) {
864 throw new RuntimeException(e);
865 }
866 }
867 }
868
869 // Make sure there is at least one logger installed.
870 if (auditLoggers.isEmpty()) {
871 auditLoggers.add(new DefaultAuditLogger());
872 }
873 return Collections.unmodifiableList(auditLoggers);
874 }
875
876 private void loadFSImage(StartupOption startOpt) throws IOException {
877 final FSImage fsImage = getFSImage();
878
879 // format before starting up if requested
880 if (startOpt == StartupOption.FORMAT) {
881
882 fsImage.format(this, fsImage.getStorage().determineClusterId());// reuse current id
883
884 startOpt = StartupOption.REGULAR;
885 }
886 boolean success = false;
887 writeLock();
888 try {
889 // We shouldn't be calling saveNamespace if we've come up in standby state.
890 MetaRecoveryContext recovery = startOpt.createRecoveryContext();
891 final boolean staleImage
892 = fsImage.recoverTransitionRead(startOpt, this, recovery);
893 if (RollingUpgradeStartupOption.ROLLBACK.matches(startOpt)) {
894 rollingUpgradeInfo = null;
895 }
896 final boolean needToSave = staleImage && !haEnabled && !isRollingUpgrade();
897 LOG.info("Need to save fs image? " + needToSave
898 + " (staleImage=" + staleImage + ", haEnabled=" + haEnabled
899 + ", isRollingUpgrade=" + isRollingUpgrade() + ")");
900 if (needToSave) {
901 fsImage.saveNamespace(this);
902 } else {
903 // No need to save, so mark the phase done.
904 StartupProgress prog = NameNode.getStartupProgress();
905 prog.beginPhase(Phase.SAVING_CHECKPOINT);
906 prog.endPhase(Phase.SAVING_CHECKPOINT);
907 }
908 // This will start a new log segment and write to the seen_txid file, so
909 // we shouldn't do it when coming up in standby state
910 if (!haEnabled || (haEnabled && startOpt == StartupOption.UPGRADE)) {
911 fsImage.openEditLogForWrite();
912 }
913 success = true;
914 } finally {
915 if (!success) {
916 fsImage.close();
917 }
918 writeUnlock();
919 }
920 dir.imageLoadComplete();
921 }
922
923 private void startSecretManager() {
924 if (dtSecretManager != null) {
925 try {
926 dtSecretManager.startThreads();
927 } catch (IOException e) {
928 // Inability to start secret manager
929 // can't be recovered from.
930 throw new RuntimeException(e);
931 }
932 }
933 }
934
935 private void startSecretManagerIfNecessary() {
936 boolean shouldRun = shouldUseDelegationTokens() &&
937 !isInSafeMode() && getEditLog().isOpenForWrite();
938 boolean running = dtSecretManager.isRunning();
939 if (shouldRun && !running) {
940 startSecretManager();
941 }
942 }
943
944 private void stopSecretManager() {
945 if (dtSecretManager != null) {
946 dtSecretManager.stopThreads();
947 }
948 }
949
950 /**
951 * Start services common to both active and standby states
952 * @param haContext
953 * @throws IOException
954 */
955 void startCommonServices(Configuration conf, HAContext haContext) throws IOException {
956 this.registerMBean(); // register the MBean for the FSNamesystemState
957 writeLock();
958 this.haContext = haContext;
959 try {
960 nnResourceChecker = new NameNodeResourceChecker(conf);
961 checkAvailableResources();
962 assert safeMode != null && !isPopulatingReplQueues();
963 StartupProgress prog = NameNode.getStartupProgress();
964 prog.beginPhase(Phase.SAFEMODE);
965 prog.setTotal(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS,
966 getCompleteBlocksTotal());
967 setBlockTotal();
968 blockManager.activate(conf);
969 } finally {
970 writeUnlock();
971 }
972
973 registerMXBean();
974 DefaultMetricsSystem.instance().register(this);
975 }
976
977 /**
978 * Stop services common to both active and standby states
979 * @throws IOException
980 */
981 void stopCommonServices() {
982 writeLock();
983 try {
984 if (blockManager != null) blockManager.close();
985 } finally {
986 writeUnlock();
987 }
988 RetryCache.clear(retryCache);
989 }
990
991 /**
992 * Start services required in active state
993 * @throws IOException
994 */
995 void startActiveServices() throws IOException {
996 startingActiveService = true;
997 LOG.info("Starting services required for active state");
998 writeLock();
999 try {
1000 FSEditLog editLog = dir.fsImage.getEditLog();
1001
1002 if (!editLog.isOpenForWrite()) {
1003 // During startup, we're already open for write during initialization.
1004 editLog.initJournalsForWrite();
1005 // May need to recover
1006 editLog.recoverUnclosedStreams();
1007
1008 LOG.info("Catching up to latest edits from old active before " +
1009 "taking over writer role in edits logs");
1010 editLogTailer.catchupDuringFailover();
1011
1012 blockManager.setPostponeBlocksFromFuture(false);
1013 blockManager.getDatanodeManager().markAllDatanodesStale();
1014 blockManager.clearQueues();
1015 blockManager.processAllPendingDNMessages();
1016
1017 // Only need to re-process the queue, If not in SafeMode.
1018 if (!isInSafeMode()) {
1019 LOG.info("Reprocessing replication and invalidation queues");
1020 initializeReplQueues();
1021 }
1022
1023 if (LOG.isDebugEnabled()) {
1024 LOG.debug("NameNode metadata after re-processing " +
1025 "replication and invalidation queues during failover:\n" +
1026 metaSaveAsString());
1027 }
1028
1029 long nextTxId = dir.fsImage.getLastAppliedTxId() + 1;
1030 LOG.info("Will take over writing edit logs at txnid " +
1031 nextTxId);
1032 editLog.setNextTxId(nextTxId);
1033
1034 dir.fsImage.editLog.openForWrite();
1035 }
1036
1037 if (haEnabled) {
1038 // Renew all of the leases before becoming active.
1039 // This is because, while we were in standby mode,
1040 // the leases weren't getting renewed on this NN.
1041 // Give them all a fresh start here.
1042 leaseManager.renewAllLeases();
1043 }
1044 leaseManager.startMonitor();
1045 startSecretManagerIfNecessary();
1046
1047 //ResourceMonitor required only at ActiveNN. See HDFS-2914
1048 this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
1049 nnrmthread.start();
1050
1051 nnEditLogRoller = new Daemon(new NameNodeEditLogRoller(
1052 editLogRollerThreshold, editLogRollerInterval));
1053 nnEditLogRoller.start();
1054
1055 cacheManager.startMonitorThread();
1056 blockManager.getDatanodeManager().setShouldSendCachingCommands(true);
1057 } finally {
1058 writeUnlock();
1059 startingActiveService = false;
1060 }
1061 }
1062
1063 /**
1064 * Initialize replication queues.
1065 */
1066 private void initializeReplQueues() {
1067 LOG.info("initializing replication queues");
1068 blockManager.processMisReplicatedBlocks();
1069 initializedReplQueues = true;
1070 }
1071
1072 private boolean inActiveState() {
1073 return haContext != null &&
1074 haContext.getState().getServiceState() == HAServiceState.ACTIVE;
1075 }
1076
1077 /**
1078 * @return Whether the namenode is transitioning to active state and is in the
1079 * middle of the {@link #startActiveServices()}
1080 */
1081 public boolean inTransitionToActive() {
1082 return haEnabled && inActiveState() && startingActiveService;
1083 }
1084
1085 private boolean shouldUseDelegationTokens() {
1086 return UserGroupInformation.isSecurityEnabled() ||
1087 alwaysUseDelegationTokensForTests;
1088 }
1089
1090 /**
1091 * Stop services required in active state
1092 * @throws InterruptedException
1093 */
1094 void stopActiveServices() {
1095 LOG.info("Stopping services started for active state");
1096 writeLock();
1097 try {
1098 stopSecretManager();
1099 if (leaseManager != null) {
1100 leaseManager.stopMonitor();
1101 }
1102 if (nnrmthread != null) {
1103 ((NameNodeResourceMonitor) nnrmthread.getRunnable()).stopMonitor();
1104 nnrmthread.interrupt();
1105 }
1106 if (nnEditLogRoller != null) {
1107 ((NameNodeEditLogRoller)nnEditLogRoller.getRunnable()).stop();
1108 nnEditLogRoller.interrupt();
1109 }
1110 if (dir != null && dir.fsImage != null) {
1111 if (dir.fsImage.editLog != null) {
1112 dir.fsImage.editLog.close();
1113 }
1114 // Update the fsimage with the last txid that we wrote
1115 // so that the tailer starts from the right spot.
1116 dir.fsImage.updateLastAppliedTxIdFromWritten();
1117 }
1118 cacheManager.stopMonitorThread();
1119 cacheManager.clearDirectiveStats();
1120 blockManager.getDatanodeManager().clearPendingCachingCommands();
1121 blockManager.getDatanodeManager().setShouldSendCachingCommands(false);
1122 // Don't want to keep replication queues when not in Active.
1123 blockManager.clearQueues();
1124 initializedReplQueues = false;
1125 } finally {
1126 writeUnlock();
1127 }
1128 }
1129
1130 /**
1131 * Start services required in standby state
1132 *
1133 * @throws IOException
1134 */
1135 void startStandbyServices(final Configuration conf) throws IOException {
1136 LOG.info("Starting services required for standby state");
1137 if (!dir.fsImage.editLog.isOpenForRead()) {
1138 // During startup, we're already open for read.
1139 dir.fsImage.editLog.initSharedJournalsForRead();
1140 }
1141
1142 blockManager.setPostponeBlocksFromFuture(true);
1143
1144 editLogTailer = new EditLogTailer(this, conf);
1145 editLogTailer.start();
1146 if (standbyShouldCheckpoint) {
1147 standbyCheckpointer = new StandbyCheckpointer(conf, this);
1148 standbyCheckpointer.start();
1149 }
1150 }
1151
1152 /**
1153 * Called when the NN is in Standby state and the editlog tailer tails the
1154 * OP_ROLLING_UPGRADE_START.
1155 */
1156 void triggerRollbackCheckpoint() {
1157 setNeedRollbackFsImage(true);
1158 if (standbyCheckpointer != null) {
1159 standbyCheckpointer.triggerRollbackCheckpoint();
1160 }
1161 }
1162
1163 /**
1164 * Called while the NN is in Standby state, but just about to be
1165 * asked to enter Active state. This cancels any checkpoints
1166 * currently being taken.
1167 */
1168 void prepareToStopStandbyServices() throws ServiceFailedException {
1169 if (standbyCheckpointer != null) {
1170 standbyCheckpointer.cancelAndPreventCheckpoints(
1171 "About to leave standby state");
1172 }
1173 }
1174
1175 /** Stop services required in standby state */
1176 void stopStandbyServices() throws IOException {
1177 LOG.info("Stopping services started for standby state");
1178 if (standbyCheckpointer != null) {
1179 standbyCheckpointer.stop();
1180 }
1181 if (editLogTailer != null) {
1182 editLogTailer.stop();
1183 }
1184 if (dir != null && dir.fsImage != null && dir.fsImage.editLog != null) {
1185 dir.fsImage.editLog.close();
1186 }
1187 }
1188
1189 @Override
1190 public void checkOperation(OperationCategory op) throws StandbyException {
1191 if (haContext != null) {
1192 // null in some unit tests
1193 haContext.checkOperation(op);
1194 }
1195 }
1196
1197 /**
1198 * @throws RetriableException
1199 * If 1) The NameNode is in SafeMode, 2) HA is enabled, and 3)
1200 * NameNode is in active state
1201 * @throws SafeModeException
1202 * Otherwise if NameNode is in SafeMode.
1203 */
1204 private void checkNameNodeSafeMode(String errorMsg)
1205 throws RetriableException, SafeModeException {
1206 if (isInSafeMode()) {
1207 SafeModeException se = new SafeModeException(errorMsg, safeMode);
1208 if (haEnabled && haContext != null
1209 && haContext.getState().getServiceState() == HAServiceState.ACTIVE
1210 && shouldRetrySafeMode(this.safeMode)) {
1211 throw new RetriableException(se);
1212 } else {
1213 throw se;
1214 }
1215 }
1216 }
1217
1218 /**
1219 * We already know that the safemode is on. We will throw a RetriableException
1220 * if the safemode is not manual or caused by low resource.
1221 */
1222 private boolean shouldRetrySafeMode(SafeModeInfo safeMode) {
1223 if (safeMode == null) {
1224 return false;
1225 } else {
1226 return !safeMode.isManual() && !safeMode.areResourcesLow();
1227 }
1228 }
1229
1230 public static Collection<URI> getNamespaceDirs(Configuration conf) {
1231 return getStorageDirs(conf, DFS_NAMENODE_NAME_DIR_KEY);
1232 }
1233
1234 /**
1235 * Get all edits dirs which are required. If any shared edits dirs are
1236 * configured, these are also included in the set of required dirs.
1237 *
1238 * @param conf the HDFS configuration.
1239 * @return all required dirs.
1240 */
1241 public static Collection<URI> getRequiredNamespaceEditsDirs(Configuration conf) {
1242 Set<URI> ret = new HashSet<URI>();
1243 ret.addAll(getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY));
1244 ret.addAll(getSharedEditsDirs(conf));
1245 return ret;
1246 }
1247
1248 private static Collection<URI> getStorageDirs(Configuration conf,
1249 String propertyName) {
1250 Collection<String> dirNames = conf.getTrimmedStringCollection(propertyName);
1251 StartupOption startOpt = NameNode.getStartupOption(conf);
1252 if(startOpt == StartupOption.IMPORT) {
1253 // In case of IMPORT this will get rid of default directories
1254 // but will retain directories specified in hdfs-site.xml
1255 // When importing image from a checkpoint, the name-node can
1256 // start with empty set of storage directories.
1257 Configuration cE = new HdfsConfiguration(false);
1258 cE.addResource("core-default.xml");
1259 cE.addResource("core-site.xml");
1260 cE.addResource("hdfs-default.xml");
1261 Collection<String> dirNames2 = cE.getTrimmedStringCollection(propertyName);
1262 dirNames.removeAll(dirNames2);
1263 if(dirNames.isEmpty())
1264 LOG.warn("!!! WARNING !!!" +
1265 "\n\tThe NameNode currently runs without persistent storage." +
1266 "\n\tAny changes to the file system meta-data may be lost." +
1267 "\n\tRecommended actions:" +
1268 "\n\t\t- shutdown and restart NameNode with configured \""
1269 + propertyName + "\" in hdfs-site.xml;" +
1270 "\n\t\t- use Backup Node as a persistent and up-to-date storage " +
1271 "of the file system meta-data.");
1272 } else if (dirNames.isEmpty()) {
1273 dirNames = Collections.singletonList(
1274 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_DEFAULT);
1275 }
1276 return Util.stringCollectionAsURIs(dirNames);
1277 }
1278
1279 /**
1280 * Return an ordered list of edits directories to write to.
1281 * The list is ordered such that all shared edits directories
1282 * are ordered before non-shared directories, and any duplicates
1283 * are removed. The order they are specified in the configuration
1284 * is retained.
1285 * @return Collection of shared edits directories.
1286 * @throws IOException if multiple shared edits directories are configured
1287 */
1288 public static List<URI> getNamespaceEditsDirs(Configuration conf)
1289 throws IOException {
1290 return getNamespaceEditsDirs(conf, true);
1291 }
1292
1293 public static List<URI> getNamespaceEditsDirs(Configuration conf,
1294 boolean includeShared)
1295 throws IOException {
1296 // Use a LinkedHashSet so that order is maintained while we de-dup
1297 // the entries.
1298 LinkedHashSet<URI> editsDirs = new LinkedHashSet<URI>();
1299
1300 if (includeShared) {
1301 List<URI> sharedDirs = getSharedEditsDirs(conf);
1302
1303 // Fail until multiple shared edits directories are supported (HDFS-2782)
1304 if (sharedDirs.size() > 1) {
1305 throw new IOException(
1306 "Multiple shared edits directories are not yet supported");
1307 }
1308
1309 // First add the shared edits dirs. It's critical that the shared dirs
1310 // are added first, since JournalSet syncs them in the order they are listed,
1311 // and we need to make sure all edits are in place in the shared storage
1312 // before they are replicated locally. See HDFS-2874.
1313 for (URI dir : sharedDirs) {
1314 if (!editsDirs.add(dir)) {
1315 LOG.warn("Edits URI " + dir + " listed multiple times in " +
1316 DFS_NAMENODE_SHARED_EDITS_DIR_KEY + ". Ignoring duplicates.");
1317 }
1318 }
1319 }
1320 // Now add the non-shared dirs.
1321 for (URI dir : getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_KEY)) {
1322 if (!editsDirs.add(dir)) {
1323 LOG.warn("Edits URI " + dir + " listed multiple times in " +
1324 DFS_NAMENODE_SHARED_EDITS_DIR_KEY + " and " +
1325 DFS_NAMENODE_EDITS_DIR_KEY + ". Ignoring duplicates.");
1326 }
1327 }
1328
1329 if (editsDirs.isEmpty()) {
1330 // If this is the case, no edit dirs have been explicitly configured.
1331 // Image dirs are to be used for edits too.
1332 return Lists.newArrayList(getNamespaceDirs(conf));
1333 } else {
1334 return Lists.newArrayList(editsDirs);
1335 }
1336 }
1337
1338 /**
1339 * Returns edit directories that are shared between primary and secondary.
1340 * @param conf
1341 * @return Collection of edit directories.
1342 */
1343 public static List<URI> getSharedEditsDirs(Configuration conf) {
1344 // don't use getStorageDirs here, because we want an empty default
1345 // rather than the dir in /tmp
1346 Collection<String> dirNames = conf.getTrimmedStringCollection(
1347 DFS_NAMENODE_SHARED_EDITS_DIR_KEY);
1348 return Util.stringCollectionAsURIs(dirNames);
1349 }
1350
1351 @Override
1352 public void readLock() {
1353 this.fsLock.readLock().lock();
1354 }
1355 @Override
1356 public void longReadLockInterruptibly() throws InterruptedException {
1357 this.fsLock.longReadLock().lockInterruptibly();
1358 try {
1359 this.fsLock.readLock().lockInterruptibly();
1360 } catch (InterruptedException ie) {
1361 // In the event we're interrupted while getting the normal FSNS read lock,
1362 // release the long read lock.
1363 this.fsLock.longReadLock().unlock();
1364 throw ie;
1365 }
1366 }
1367 @Override
1368 public void longReadUnlock() {
1369 this.fsLock.readLock().unlock();
1370 this.fsLock.longReadLock().unlock();
1371 }
1372 @Override
1373 public void readUnlock() {
1374 this.fsLock.readLock().unlock();
1375 }
1376 @Override
1377 public void writeLock() {
1378 this.fsLock.longReadLock().lock();
1379 this.fsLock.writeLock().lock();
1380 }
1381 @Override
1382 public void writeLockInterruptibly() throws InterruptedException {
1383 this.fsLock.longReadLock().lockInterruptibly();
1384 try {
1385 this.fsLock.writeLock().lockInterruptibly();
1386 } catch (InterruptedException ie) {
1387 // In the event we're interrupted while getting the normal FSNS write
1388 // lock, release the long read lock.
1389 this.fsLock.longReadLock().unlock();
1390 throw ie;
1391 }
1392 }
1393 @Override
1394 public void writeUnlock() {
1395 this.fsLock.writeLock().unlock();
1396 this.fsLock.longReadLock().unlock();
1397 }
1398 @Override
1399 public boolean hasWriteLock() {
1400 return this.fsLock.isWriteLockedByCurrentThread();
1401 }
1402 @Override
1403 public boolean hasReadLock() {
1404 return this.fsLock.getReadHoldCount() > 0 || hasWriteLock();
1405 }
1406
1407 public int getReadHoldCount() {
1408 return this.fsLock.getReadHoldCount();
1409 }
1410
1411 public int getWriteHoldCount() {
1412 return this.fsLock.getWriteHoldCount();
1413 }
1414
1415 NamespaceInfo getNamespaceInfo() {
1416 readLock();
1417 try {
1418 return unprotectedGetNamespaceInfo();
1419 } finally {
1420 readUnlock();
1421 }
1422 }
1423
1424 /**
1425 * Version of @see #getNamespaceInfo() that is not protected by a lock.
1426 */
1427 NamespaceInfo unprotectedGetNamespaceInfo() {
1428 return new NamespaceInfo(dir.fsImage.getStorage().getNamespaceID(),
1429 getClusterId(), getBlockPoolId(),
1430 dir.fsImage.getStorage().getCTime());
1431 }
1432
1433 /**
1434 * Close down this file system manager.
1435 * Causes heartbeat and lease daemons to stop; waits briefly for
1436 * them to finish, but a short timeout returns control back to caller.
1437 */
1438 void close() {
1439 fsRunning = false;
1440 try {
1441 stopCommonServices();
1442 if (smmthread != null) smmthread.interrupt();
1443 } finally {
1444 // using finally to ensure we also wait for lease daemon
1445 try {
1446 stopActiveServices();
1447 stopStandbyServices();
1448 if (dir != null) {
1449 dir.close();
1450 }
1451 } catch (IOException ie) {
1452 LOG.error("Error closing FSDirectory", ie);
1453 IOUtils.cleanup(LOG, dir);
1454 }
1455 }
1456 }
1457
1458 @Override
1459 public boolean isRunning() {
1460 return fsRunning;
1461 }
1462
1463 @Override
1464 public boolean isInStandbyState() {
1465 if (haContext == null || haContext.getState() == null) {
1466 // We're still starting up. In this case, if HA is
1467 // on for the cluster, we always start in standby. Otherwise
1468 // start in active.
1469 return haEnabled;
1470 }
1471
1472 return HAServiceState.STANDBY == haContext.getState().getServiceState();
1473 }
1474
1475 /**
1476 * Dump all metadata into specified file
1477 */
1478 void metaSave(String filename) throws IOException {
1479 checkSuperuserPrivilege();
1480 checkOperation(OperationCategory.UNCHECKED);
1481 writeLock();
1482 try {
1483 checkOperation(OperationCategory.UNCHECKED);
1484 File file = new File(System.getProperty("hadoop.log.dir"), filename);
1485 PrintWriter out = new PrintWriter(new BufferedWriter(
1486 new OutputStreamWriter(new FileOutputStream(file), Charsets.UTF_8)));
1487 metaSave(out);
1488 out.flush();
1489 out.close();
1490 } finally {
1491 writeUnlock();
1492 }
1493 }
1494
1495 private void metaSave(PrintWriter out) {
1496 assert hasWriteLock();
1497 long totalInodes = this.dir.totalInodes();
1498 long totalBlocks = this.getBlocksTotal();
1499 out.println(totalInodes + " files and directories, " + totalBlocks
1500 + " blocks = " + (totalInodes + totalBlocks) + " total");
1501
1502 blockManager.metaSave(out);
1503 }
1504
1505 private String metaSaveAsString() {
1506 StringWriter sw = new StringWriter();
1507 PrintWriter pw = new PrintWriter(sw);
1508 metaSave(pw);
1509 pw.flush();
1510 return sw.toString();
1511 }
1512
1513
1514 long getDefaultBlockSize() {
1515 return serverDefaults.getBlockSize();
1516 }
1517
1518 FsServerDefaults getServerDefaults() throws StandbyException {
1519 checkOperation(OperationCategory.READ);
1520 return serverDefaults;
1521 }
1522
1523 long getAccessTimePrecision() {
1524 return accessTimePrecision;
1525 }
1526
1527 private boolean isAccessTimeSupported() {
1528 return accessTimePrecision > 0;
1529 }
1530
1531 /////////////////////////////////////////////////////////
1532 //
1533 // These methods are called by HadoopFS clients
1534 //
1535 /////////////////////////////////////////////////////////
1536 /**
1537 * Set permissions for an existing file.
1538 * @throws IOException
1539 */
1540 void setPermission(String src, FsPermission permission)
1541 throws AccessControlException, FileNotFoundException, SafeModeException,
1542 UnresolvedLinkException, IOException {
1543 try {
1544 setPermissionInt(src, permission);
1545 } catch (AccessControlException e) {
1546 logAuditEvent(false, "setPermission", src);
1547 throw e;
1548 }
1549 }
1550
1551 private void setPermissionInt(String src, FsPermission permission)
1552 throws AccessControlException, FileNotFoundException, SafeModeException,
1553 UnresolvedLinkException, IOException {
1554 HdfsFileStatus resultingStat = null;
1555 FSPermissionChecker pc = getPermissionChecker();
1556 checkOperation(OperationCategory.WRITE);
1557 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
1558 writeLock();
1559 try {
1560 checkOperation(OperationCategory.WRITE);
1561 checkNameNodeSafeMode("Cannot set permission for " + src);
1562 src = FSDirectory.resolvePath(src, pathComponents, dir);
1563 checkOwner(pc, src);
1564 dir.setPermission(src, permission);
1565 resultingStat = getAuditFileInfo(src, false);
1566 } finally {
1567 writeUnlock();
1568 }
1569 getEditLog().logSync();
1570 logAuditEvent(true, "setPermission", src, null, resultingStat);
1571 }
1572
1573 /**
1574 * Set owner for an existing file.
1575 * @throws IOException
1576 */
1577 void setOwner(String src, String username, String group)
1578 throws AccessControlException, FileNotFoundException, SafeModeException,
1579 UnresolvedLinkException, IOException {
1580 try {
1581 setOwnerInt(src, username, group);
1582 } catch (AccessControlException e) {
1583 logAuditEvent(false, "setOwner", src);
1584 throw e;
1585 }
1586 }
1587
1588 private void setOwnerInt(String src, String username, String group)
1589 throws AccessControlException, FileNotFoundException, SafeModeException,
1590 UnresolvedLinkException, IOException {
1591 HdfsFileStatus resultingStat = null;
1592 FSPermissionChecker pc = getPermissionChecker();
1593 checkOperation(OperationCategory.WRITE);
1594 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
1595 writeLock();
1596 try {
1597 checkOperation(OperationCategory.WRITE);
1598 checkNameNodeSafeMode("Cannot set owner for " + src);
1599 src = FSDirectory.resolvePath(src, pathComponents, dir);
1600 checkOwner(pc, src);
1601 if (!pc.isSuperUser()) {
1602 if (username != null && !pc.getUser().equals(username)) {
1603 throw new AccessControlException("Non-super user cannot change owner");
1604 }
1605 if (group != null && !pc.containsGroup(group)) {
1606 throw new AccessControlException("User does not belong to " + group);
1607 }
1608 }
1609 dir.setOwner(src, username, group);
1610 resultingStat = getAuditFileInfo(src, false);
1611 } finally {
1612 writeUnlock();
1613 }
1614 getEditLog().logSync();
1615 logAuditEvent(true, "setOwner", src, null, resultingStat);
1616 }
1617
1618 /**
1619 * Get block locations within the specified range.
1620 * @see ClientProtocol#getBlockLocations(String, long, long)
1621 */
1622 LocatedBlocks getBlockLocations(String clientMachine, String src,
1623 long offset, long length) throws AccessControlException,
1624 FileNotFoundException, UnresolvedLinkException, IOException {
1625 LocatedBlocks blocks = getBlockLocations(src, offset, length, true, true,
1626 true);
1627 if (blocks != null) {
1628 blockManager.getDatanodeManager().sortLocatedBlocks(
1629 clientMachine, blocks.getLocatedBlocks());
1630
1631 LocatedBlock lastBlock = blocks.getLastLocatedBlock();
1632 if (lastBlock != null) {
1633 ArrayList<LocatedBlock> lastBlockList = new ArrayList<LocatedBlock>();
1634 lastBlockList.add(lastBlock);
1635 blockManager.getDatanodeManager().sortLocatedBlocks(
1636 clientMachine, lastBlockList);
1637 }
1638 }
1639 return blocks;
1640 }
1641
1642 /**
1643 * Get block locations within the specified range.
1644 * @see ClientProtocol#getBlockLocations(String, long, long)
1645 * @throws FileNotFoundException, UnresolvedLinkException, IOException
1646 */
1647 LocatedBlocks getBlockLocations(String src, long offset, long length,
1648 boolean doAccessTime, boolean needBlockToken, boolean checkSafeMode)
1649 throws FileNotFoundException, UnresolvedLinkException, IOException {
1650 try {
1651 return getBlockLocationsInt(src, offset, length, doAccessTime,
1652 needBlockToken, checkSafeMode);
1653 } catch (AccessControlException e) {
1654 logAuditEvent(false, "open", src);
1655 throw e;
1656 }
1657 }
1658
1659 private LocatedBlocks getBlockLocationsInt(String src, long offset,
1660 long length, boolean doAccessTime, boolean needBlockToken,
1661 boolean checkSafeMode)
1662 throws FileNotFoundException, UnresolvedLinkException, IOException {
1663 if (offset < 0) {
1664 throw new HadoopIllegalArgumentException(
1665 "Negative offset is not supported. File: " + src);
1666 }
1667 if (length < 0) {
1668 throw new HadoopIllegalArgumentException(
1669 "Negative length is not supported. File: " + src);
1670 }
1671 final LocatedBlocks ret = getBlockLocationsUpdateTimes(src,
1672 offset, length, doAccessTime, needBlockToken);
1673 logAuditEvent(true, "open", src);
1674 if (checkSafeMode && isInSafeMode()) {
1675 for (LocatedBlock b : ret.getLocatedBlocks()) {
1676 // if safemode & no block locations yet then throw safemodeException
1677 if ((b.getLocations() == null) || (b.getLocations().length == 0)) {
1678 SafeModeException se = new SafeModeException(
1679 "Zero blocklocations for " + src, safeMode);
1680 if (haEnabled && haContext != null &&
1681 haContext.getState().getServiceState() == HAServiceState.ACTIVE) {
1682 throw new RetriableException(se);
1683 } else {
1684 throw se;
1685 }
1686 }
1687 }
1688 }
1689 return ret;
1690 }
1691
1692 /*
1693 * Get block locations within the specified range, updating the
1694 * access times if necessary.
1695 */
1696 private LocatedBlocks getBlockLocationsUpdateTimes(String src, long offset,
1697 long length, boolean doAccessTime, boolean needBlockToken)
1698 throws FileNotFoundException,
1699 UnresolvedLinkException, IOException {
1700 FSPermissionChecker pc = getPermissionChecker();
1701 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
1702 for (int attempt = 0; attempt < 2; attempt++) {
1703 boolean isReadOp = (attempt == 0);
1704 if (isReadOp) { // first attempt is with readlock
1705 checkOperation(OperationCategory.READ);
1706 readLock();
1707 } else { // second attempt is with write lock
1708 checkOperation(OperationCategory.WRITE);
1709 writeLock(); // writelock is needed to set accesstime
1710 }
1711 src = FSDirectory.resolvePath(src, pathComponents, dir);
1712 try {
1713 if (isReadOp) {
1714 checkOperation(OperationCategory.READ);
1715 } else {
1716 checkOperation(OperationCategory.WRITE);
1717 }
1718 if (isPermissionEnabled) {
1719 checkPathAccess(pc, src, FsAction.READ);
1720 }
1721
1722 // if the namenode is in safemode, then do not update access time
1723 if (isInSafeMode()) {
1724 doAccessTime = false;
1725 }
1726
1727 final INodesInPath iip = dir.getLastINodeInPath(src);
1728 final INodeFile inode = INodeFile.valueOf(iip.getLastINode(), src);
1729 if (!iip.isSnapshot() //snapshots are readonly, so don't update atime.
1730 && doAccessTime && isAccessTimeSupported()) {
1731 final long now = now();
1732 if (now > inode.getAccessTime() + getAccessTimePrecision()) {
1733 // if we have to set access time but we only have the readlock, then
1734 // restart this entire operation with the writeLock.
1735 if (isReadOp) {
1736 continue;
1737 }
1738 dir.setTimes(src, inode, -1, now, false, iip.getLatestSnapshotId());
1739 }
1740 }
1741 final long fileSize = iip.isSnapshot() ?
1742 inode.computeFileSize(iip.getPathSnapshotId())
1743 : inode.computeFileSizeNotIncludingLastUcBlock();
1744 boolean isUc = inode.isUnderConstruction();
1745 if (iip.isSnapshot()) {
1746 // if src indicates a snapshot file, we need to make sure the returned
1747 // blocks do not exceed the size of the snapshot file.
1748 length = Math.min(length, fileSize - offset);
1749 isUc = false;
1750 }
1751 LocatedBlocks blocks =
1752 blockManager.createLocatedBlocks(inode.getBlocks(), fileSize,
1753 isUc, offset, length, needBlockToken, iip.isSnapshot());
1754 // Set caching information for the located blocks.
1755 for (LocatedBlock lb: blocks.getLocatedBlocks()) {
1756 cacheManager.setCachedLocations(lb);
1757 }
1758 return blocks;
1759 } finally {
1760 if (isReadOp) {
1761 readUnlock();
1762 } else {
1763 writeUnlock();
1764 }
1765 }
1766 }
1767 return null; // can never reach here
1768 }
1769
1770 /**
1771 * Moves all the blocks from srcs and appends them to trg
1772 * To avoid rollbacks we will verify validitity of ALL of the args
1773 * before we start actual move.
1774 *
1775 * This does not support ".inodes" relative path
1776 * @param target
1777 * @param srcs
1778 * @throws IOException
1779 */
1780 void concat(String target, String [] srcs)
1781 throws IOException, UnresolvedLinkException {
1782 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
1783 if (cacheEntry != null && cacheEntry.isSuccess()) {
1784 return; // Return previous response
1785 }
1786
1787 // Either there is no previous request in progres or it has failed
1788 if(FSNamesystem.LOG.isDebugEnabled()) {
1789 FSNamesystem.LOG.debug("concat " + Arrays.toString(srcs) +
1790 " to " + target);
1791 }
1792
1793 boolean success = false;
1794 try {
1795 concatInt(target, srcs, cacheEntry != null);
1796 success = true;
1797 } catch (AccessControlException e) {
1798 logAuditEvent(false, "concat", Arrays.toString(srcs), target, null);
1799 throw e;
1800 } finally {
1801 RetryCache.setState(cacheEntry, success);
1802 }
1803 }
1804
1805 private void concatInt(String target, String [] srcs,
1806 boolean logRetryCache) throws IOException, UnresolvedLinkException {
1807 // verify args
1808 if(target.isEmpty()) {
1809 throw new IllegalArgumentException("Target file name is empty");
1810 }
1811 if(srcs == null || srcs.length == 0) {
1812 throw new IllegalArgumentException("No sources given");
1813 }
1814
1815 // We require all files be in the same directory
1816 String trgParent =
1817 target.substring(0, target.lastIndexOf(Path.SEPARATOR_CHAR));
1818 for (String s : srcs) {
1819 String srcParent = s.substring(0, s.lastIndexOf(Path.SEPARATOR_CHAR));
1820 if (!srcParent.equals(trgParent)) {
1821 throw new IllegalArgumentException(
1822 "Sources and target are not in the same directory");
1823 }
1824 }
1825
1826 HdfsFileStatus resultingStat = null;
1827 FSPermissionChecker pc = getPermissionChecker();
1828 checkOperation(OperationCategory.WRITE);
1829 writeLock();
1830 try {
1831 checkOperation(OperationCategory.WRITE);
1832 checkNameNodeSafeMode("Cannot concat " + target);
1833 concatInternal(pc, target, srcs, logRetryCache);
1834 resultingStat = getAuditFileInfo(target, false);
1835 } finally {
1836 writeUnlock();
1837 }
1838 getEditLog().logSync();
1839 logAuditEvent(true, "concat", Arrays.toString(srcs), target, resultingStat);
1840 }
1841
1842 /** See {@link #concat(String, String[])} */
1843 private void concatInternal(FSPermissionChecker pc, String target,
1844 String[] srcs, boolean logRetryCache) throws IOException,
1845 UnresolvedLinkException {
1846 assert hasWriteLock();
1847
1848 // write permission for the target
1849 if (isPermissionEnabled) {
1850 checkPathAccess(pc, target, FsAction.WRITE);
1851
1852 // and srcs
1853 for(String aSrc: srcs) {
1854 checkPathAccess(pc, aSrc, FsAction.READ); // read the file
1855 checkParentAccess(pc, aSrc, FsAction.WRITE); // for delete
1856 }
1857 }
1858
1859 // to make sure no two files are the same
1860 Set<INode> si = new HashSet<INode>();
1861
1862 // we put the following prerequisite for the operation
1863 // replication and blocks sizes should be the same for ALL the blocks
1864
1865 // check the target
1866 final INodeFile trgInode = INodeFile.valueOf(dir.getINode4Write(target),
1867 target);
1868 if(trgInode.isUnderConstruction()) {
1869 throw new HadoopIllegalArgumentException("concat: target file "
1870 + target + " is under construction");
1871 }
1872 // per design target shouldn't be empty and all the blocks same size
1873 if(trgInode.numBlocks() == 0) {
1874 throw new HadoopIllegalArgumentException("concat: target file "
1875 + target + " is empty");
1876 }
1877 if (trgInode.isWithSnapshot()) {
1878 throw new HadoopIllegalArgumentException("concat: target file "
1879 + target + " is in a snapshot");
1880 }
1881
1882 long blockSize = trgInode.getPreferredBlockSize();
1883
1884 // check the end block to be full
1885 final BlockInfo last = trgInode.getLastBlock();
1886 if(blockSize != last.getNumBytes()) {
1887 throw new HadoopIllegalArgumentException("The last block in " + target
1888 + " is not full; last block size = " + last.getNumBytes()
1889 + " but file block size = " + blockSize);
1890 }
1891
1892 si.add(trgInode);
1893 final short repl = trgInode.getFileReplication();
1894
1895 // now check the srcs
1896 boolean endSrc = false; // final src file doesn't have to have full end block
1897 for(int i=0; i<srcs.length; i++) {
1898 String src = srcs[i];
1899 if(i==srcs.length-1)
1900 endSrc=true;
1901
1902 final INodeFile srcInode = INodeFile.valueOf(dir.getINode4Write(src), src);
1903 if(src.isEmpty()
1904 || srcInode.isUnderConstruction()
1905 || srcInode.numBlocks() == 0) {
1906 throw new HadoopIllegalArgumentException("concat: source file " + src
1907 + " is invalid or empty or underConstruction");
1908 }
1909
1910 // check replication and blocks size
1911 if(repl != srcInode.getBlockReplication()) {
1912 throw new HadoopIllegalArgumentException("concat: the soruce file "
1913 + src + " and the target file " + target
1914 + " should have the same replication: source replication is "
1915 + srcInode.getBlockReplication()
1916 + " but target replication is " + repl);
1917 }
1918
1919 //boolean endBlock=false;
1920 // verify that all the blocks are of the same length as target
1921 // should be enough to check the end blocks
1922 final BlockInfo[] srcBlocks = srcInode.getBlocks();
1923 int idx = srcBlocks.length-1;
1924 if(endSrc)
1925 idx = srcBlocks.length-2; // end block of endSrc is OK not to be full
1926 if(idx >= 0 && srcBlocks[idx].getNumBytes() != blockSize) {
1927 throw new HadoopIllegalArgumentException("concat: the soruce file "
1928 + src + " and the target file " + target
1929 + " should have the same blocks sizes: target block size is "
1930 + blockSize + " but the size of source block " + idx + " is "
1931 + srcBlocks[idx].getNumBytes());
1932 }
1933
1934 si.add(srcInode);
1935 }
1936
1937 // make sure no two files are the same
1938 if(si.size() < srcs.length+1) { // trg + srcs
1939 // it means at least two files are the same
1940 throw new HadoopIllegalArgumentException(
1941 "concat: at least two of the source files are the same");
1942 }
1943
1944 if(NameNode.stateChangeLog.isDebugEnabled()) {
1945 NameNode.stateChangeLog.debug("DIR* NameSystem.concat: " +
1946 Arrays.toString(srcs) + " to " + target);
1947 }
1948
1949 dir.concat(target,srcs, logRetryCache);
1950 }
1951
1952 /**
1953 * stores the modification and access time for this inode.
1954 * The access time is precise upto an hour. The transaction, if needed, is
1955 * written to the edits log but is not flushed.
1956 */
1957 void setTimes(String src, long mtime, long atime)
1958 throws IOException, UnresolvedLinkException {
1959 if (!isAccessTimeSupported() && atime != -1) {
1960 throw new IOException("Access time for hdfs is not configured. " +
1961 " Please set " + DFS_NAMENODE_ACCESSTIME_PRECISION_KEY + " configuration parameter.");
1962 }
1963 try {
1964 setTimesInt(src, mtime, atime);
1965 } catch (AccessControlException e) {
1966 logAuditEvent(false, "setTimes", src);
1967 throw e;
1968 }
1969 }
1970
1971 private void setTimesInt(String src, long mtime, long atime)
1972 throws IOException, UnresolvedLinkException {
1973 HdfsFileStatus resultingStat = null;
1974 FSPermissionChecker pc = getPermissionChecker();
1975 checkOperation(OperationCategory.WRITE);
1976 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
1977 writeLock();
1978 try {
1979 checkOperation(OperationCategory.WRITE);
1980 checkNameNodeSafeMode("Cannot set times " + src);
1981 src = FSDirectory.resolvePath(src, pathComponents, dir);
1982
1983 // Write access is required to set access and modification times
1984 if (isPermissionEnabled) {
1985 checkPathAccess(pc, src, FsAction.WRITE);
1986 }
1987 final INodesInPath iip = dir.getINodesInPath4Write(src);
1988 final INode inode = iip.getLastINode();
1989 if (inode != null) {
1990 dir.setTimes(src, inode, mtime, atime, true, iip.getLatestSnapshotId());
1991 resultingStat = getAuditFileInfo(src, false);
1992 } else {
1993 throw new FileNotFoundException("File/Directory " + src + " does not exist.");
1994 }
1995 } finally {
1996 writeUnlock();
1997 }
1998 logAuditEvent(true, "setTimes", src, null, resultingStat);
1999 }
2000
2001 /**
2002 * Create a symbolic link.
2003 */
2004 @SuppressWarnings("deprecation")
2005 void createSymlink(String target, String link,
2006 PermissionStatus dirPerms, boolean createParent)
2007 throws IOException, UnresolvedLinkException {
2008 if (!FileSystem.areSymlinksEnabled()) {
2009 throw new UnsupportedOperationException("Symlinks not supported");
2010 }
2011 if (!DFSUtil.isValidName(link)) {
2012 throw new InvalidPathException("Invalid link name: " + link);
2013 }
2014 if (FSDirectory.isReservedName(target)) {
2015 throw new InvalidPathException("Invalid target name: " + target);
2016 }
2017 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
2018 if (cacheEntry != null && cacheEntry.isSuccess()) {
2019 return; // Return previous response
2020 }
2021 boolean success = false;
2022 try {
2023 createSymlinkInt(target, link, dirPerms, createParent, cacheEntry != null);
2024 success = true;
2025 } catch (AccessControlException e) {
2026 logAuditEvent(false, "createSymlink", link, target, null);
2027 throw e;
2028 } finally {
2029 RetryCache.setState(cacheEntry, success);
2030 }
2031 }
2032
2033 private void createSymlinkInt(String target, String link,
2034 PermissionStatus dirPerms, boolean createParent, boolean logRetryCache)
2035 throws IOException, UnresolvedLinkException {
2036 if (NameNode.stateChangeLog.isDebugEnabled()) {
2037 NameNode.stateChangeLog.debug("DIR* NameSystem.createSymlink: target="
2038 + target + " link=" + link);
2039 }
2040 HdfsFileStatus resultingStat = null;
2041 FSPermissionChecker pc = getPermissionChecker();
2042 checkOperation(OperationCategory.WRITE);
2043 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(link);
2044 writeLock();
2045 try {
2046 checkOperation(OperationCategory.WRITE);
2047 checkNameNodeSafeMode("Cannot create symlink " + link);
2048 link = FSDirectory.resolvePath(link, pathComponents, dir);
2049 if (!createParent) {
2050 verifyParentDir(link);
2051 }
2052 if (!dir.isValidToCreate(link)) {
2053 throw new IOException("failed to create link " + link
2054 +" either because the filename is invalid or the file exists");
2055 }
2056 if (isPermissionEnabled) {
2057 checkAncestorAccess(pc, link, FsAction.WRITE);
2058 }
2059 // validate that we have enough inodes.
2060 checkFsObjectLimit();
2061
2062 // add symbolic link to namespace
2063 dir.addSymlink(link, target, dirPerms, createParent, logRetryCache);
2064 resultingStat = getAuditFileInfo(link, false);
2065 } finally {
2066 writeUnlock();
2067 }
2068 getEditLog().logSync();
2069 logAuditEvent(true, "createSymlink", link, target, resultingStat);
2070 }
2071
2072 /**
2073 * Set replication for an existing file.
2074 *
2075 * The NameNode sets new replication and schedules either replication of
2076 * under-replicated data blocks or removal of the excessive block copies
2077 * if the blocks are over-replicated.
2078 *
2079 * @see ClientProtocol#setReplication(String, short)
2080 * @param src file name
2081 * @param replication new replication
2082 * @return true if successful;
2083 * false if file does not exist or is a directory
2084 */
2085 boolean setReplication(final String src, final short replication)
2086 throws IOException {
2087 try {
2088 return setReplicationInt(src, replication);
2089 } catch (AccessControlException e) {
2090 logAuditEvent(false, "setReplication", src);
2091 throw e;
2092 }
2093 }
2094
2095 private boolean setReplicationInt(String src, final short replication)
2096 throws IOException {
2097 blockManager.verifyReplication(src, replication, null);
2098 final boolean isFile;
2099 FSPermissionChecker pc = getPermissionChecker();
2100 checkOperation(OperationCategory.WRITE);
2101 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2102 writeLock();
2103 try {
2104 checkOperation(OperationCategory.WRITE);
2105 checkNameNodeSafeMode("Cannot set replication for " + src);
2106 src = FSDirectory.resolvePath(src, pathComponents, dir);
2107 if (isPermissionEnabled) {
2108 checkPathAccess(pc, src, FsAction.WRITE);
2109 }
2110
2111 final short[] blockRepls = new short[2]; // 0: old, 1: new
2112 final Block[] blocks = dir.setReplication(src, replication, blockRepls);
2113 isFile = blocks != null;
2114 if (isFile) {
2115 blockManager.setReplication(blockRepls[0], blockRepls[1], src, blocks);
2116 }
2117 } finally {
2118 writeUnlock();
2119 }
2120
2121 getEditLog().logSync();
2122 if (isFile) {
2123 logAuditEvent(true, "setReplication", src);
2124 }
2125 return isFile;
2126 }
2127
2128 long getPreferredBlockSize(String filename)
2129 throws IOException, UnresolvedLinkException {
2130 FSPermissionChecker pc = getPermissionChecker();
2131 checkOperation(OperationCategory.READ);
2132 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(filename);
2133 readLock();
2134 try {
2135 checkOperation(OperationCategory.READ);
2136 filename = FSDirectory.resolvePath(filename, pathComponents, dir);
2137 if (isPermissionEnabled) {
2138 checkTraverse(pc, filename);
2139 }
2140 return dir.getPreferredBlockSize(filename);
2141 } finally {
2142 readUnlock();
2143 }
2144 }
2145
2146 /**
2147 * Verify that parent directory of src exists.
2148 */
2149 private void verifyParentDir(String src) throws FileNotFoundException,
2150 ParentNotDirectoryException, UnresolvedLinkException {
2151 assert hasReadLock();
2152 Path parent = new Path(src).getParent();
2153 if (parent != null) {
2154 final INode parentNode = dir.getINode(parent.toString());
2155 if (parentNode == null) {
2156 throw new FileNotFoundException("Parent directory doesn't exist: "
2157 + parent);
2158 } else if (!parentNode.isDirectory() && !parentNode.isSymlink()) {
2159 throw new ParentNotDirectoryException("Parent path is not a directory: "
2160 + parent);
2161 }
2162 }
2163 }
2164
2165 /**
2166 * Create a new file entry in the namespace.
2167 *
2168 * For description of parameters and exceptions thrown see
2169 * {@link ClientProtocol#create()}, except it returns valid file status upon
2170 * success
2171 *
2172 * For retryCache handling details see -
2173 * {@link #getFileStatus(boolean, CacheEntryWithPayload)}
2174 *
2175 */
2176 HdfsFileStatus startFile(String src, PermissionStatus permissions,
2177 String holder, String clientMachine, EnumSet<CreateFlag> flag,
2178 boolean createParent, short replication, long blockSize)
2179 throws AccessControlException, SafeModeException,
2180 FileAlreadyExistsException, UnresolvedLinkException,
2181 FileNotFoundException, ParentNotDirectoryException, IOException {
2182 HdfsFileStatus status = null;
2183 CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
2184 null);
2185 if (cacheEntry != null && cacheEntry.isSuccess()) {
2186 return (HdfsFileStatus) cacheEntry.getPayload();
2187 }
2188
2189 try {
2190 status = startFileInt(src, permissions, holder, clientMachine, flag,
2191 createParent, replication, blockSize, cacheEntry != null);
2192 } catch (AccessControlException e) {
2193 logAuditEvent(false, "create", src);
2194 throw e;
2195 } finally {
2196 RetryCache.setState(cacheEntry, status != null, status);
2197 }
2198 return status;
2199 }
2200
2201 private HdfsFileStatus startFileInt(String src, PermissionStatus permissions,
2202 String holder, String clientMachine, EnumSet<CreateFlag> flag,
2203 boolean createParent, short replication, long blockSize,
2204 boolean logRetryCache) throws AccessControlException, SafeModeException,
2205 FileAlreadyExistsException, UnresolvedLinkException,
2206 FileNotFoundException, ParentNotDirectoryException, IOException {
2207 if (NameNode.stateChangeLog.isDebugEnabled()) {
2208 NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: src=" + src
2209 + ", holder=" + holder
2210 + ", clientMachine=" + clientMachine
2211 + ", createParent=" + createParent
2212 + ", replication=" + replication
2213 + ", createFlag=" + flag.toString());
2214 }
2215 if (!DFSUtil.isValidName(src)) {
2216 throw new InvalidPathException(src);
2217 }
2218 blockManager.verifyReplication(src, replication, clientMachine);
2219
2220 boolean skipSync = false;
2221 HdfsFileStatus stat = null;
2222 FSPermissionChecker pc = getPermissionChecker();
2223 checkOperation(OperationCategory.WRITE);
2224 if (blockSize < minBlockSize) {
2225 throw new IOException("Specified block size is less than configured" +
2226 " minimum value (" + DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY
2227 + "): " + blockSize + " < " + minBlockSize);
2228 }
2229 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2230 boolean create = flag.contains(CreateFlag.CREATE);
2231 boolean overwrite = flag.contains(CreateFlag.OVERWRITE);
2232 writeLock();
2233 try {
2234 checkOperation(OperationCategory.WRITE);
2235 checkNameNodeSafeMode("Cannot create file" + src);
2236 src = FSDirectory.resolvePath(src, pathComponents, dir);
2237 startFileInternal(pc, src, permissions, holder, clientMachine, create,
2238 overwrite, createParent, replication, blockSize, logRetryCache);
2239 stat = dir.getFileInfo(src, false);
2240 } catch (StandbyException se) {
2241 skipSync = true;
2242 throw se;
2243 } finally {
2244 writeUnlock();
2245 // There might be transactions logged while trying to recover the lease.
2246 // They need to be sync'ed even when an exception was thrown.
2247 if (!skipSync) {
2248 getEditLog().logSync();
2249 }
2250 }
2251 logAuditEvent(true, "create", src, null, stat);
2252 return stat;
2253 }
2254
2255 /**
2256 * Create a new file or overwrite an existing file<br>
2257 *
2258 * Once the file is create the client then allocates a new block with the next
2259 * call using {@link NameNode#addBlock()}.
2260 * <p>
2261 * For description of parameters and exceptions thrown see
2262 * {@link ClientProtocol#create()}
2263 */
2264 private void startFileInternal(FSPermissionChecker pc, String src,
2265 PermissionStatus permissions, String holder, String clientMachine,
2266 boolean create, boolean overwrite, boolean createParent,
2267 short replication, long blockSize, boolean logRetryEntry)
2268 throws FileAlreadyExistsException, AccessControlException,
2269 UnresolvedLinkException, FileNotFoundException,
2270 ParentNotDirectoryException, IOException {
2271 assert hasWriteLock();
2272 // Verify that the destination does not exist as a directory already.
2273 final INodesInPath iip = dir.getINodesInPath4Write(src);
2274 final INode inode = iip.getLastINode();
2275 if (inode != null && inode.isDirectory()) {
2276 throw new FileAlreadyExistsException(src +
2277 " already exists as a directory");
2278 }
2279 final INodeFile myFile = INodeFile.valueOf(inode, src, true);
2280 if (isPermissionEnabled) {
2281 if (overwrite && myFile != null) {
2282 checkPathAccess(pc, src, FsAction.WRITE);
2283 } else {
2284 checkAncestorAccess(pc, src, FsAction.WRITE);
2285 }
2286 }
2287
2288 if (!createParent) {
2289 verifyParentDir(src);
2290 }
2291
2292 try {
2293 if (myFile == null) {
2294 if (!create) {
2295 throw new FileNotFoundException("Can't overwrite non-existent " +
2296 src + " for client " + clientMachine);
2297 }
2298 } else {
2299 if (overwrite) {
2300 try {
2301 deleteInt(src, true, false); // File exists - delete if overwrite
2302 } catch (AccessControlException e) {
2303 logAuditEvent(false, "delete", src);
2304 throw e;
2305 }
2306 } else {
2307 // If lease soft limit time is expired, recover the lease
2308 recoverLeaseInternal(myFile, src, holder, clientMachine, false);
2309 throw new FileAlreadyExistsException(src + " for client " +
2310 clientMachine + " already exists");
2311 }
2312 }
2313
2314 checkFsObjectLimit();
2315 final DatanodeDescriptor clientNode =
2316 blockManager.getDatanodeManager().getDatanodeByHost(clientMachine);
2317
2318 INodeFile newNode = dir.addFile(src, permissions, replication, blockSize,
2319 holder, clientMachine, clientNode);
2320 if (newNode == null) {
2321 throw new IOException("Unable to add " + src + " to namespace");
2322 }
2323 leaseManager.addLease(newNode.getFileUnderConstructionFeature()
2324 .getClientName(), src);
2325
2326 // record file record in log, record new generation stamp
2327 getEditLog().logOpenFile(src, newNode, logRetryEntry);
2328 if (NameNode.stateChangeLog.isDebugEnabled()) {
2329 NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: added " +
2330 src + " inode " + newNode.getId() + " " + holder);
2331 }
2332 } catch (IOException ie) {
2333 NameNode.stateChangeLog.warn("DIR* NameSystem.startFile: " + src + " " +
2334 ie.getMessage());
2335 throw ie;
2336 }
2337 }
2338
2339 /**
2340 * Append to an existing file for append.
2341 * <p>
2342 *
2343 * The method returns the last block of the file if this is a partial block,
2344 * which can still be used for writing more data. The client uses the returned
2345 * block locations to form the data pipeline for this block.<br>
2346 * The method returns null if the last block is full. The client then
2347 * allocates a new block with the next call using {@link NameNode#addBlock()}.
2348 * <p>
2349 *
2350 * For description of parameters and exceptions thrown see
2351 * {@link ClientProtocol#append(String, String)}
2352 *
2353 * @return the last block locations if the block is partial or null otherwise
2354 */
2355 private LocatedBlock appendFileInternal(FSPermissionChecker pc, String src,
2356 String holder, String clientMachine, boolean logRetryCache)
2357 throws AccessControlException, UnresolvedLinkException,
2358 FileNotFoundException, IOException {
2359 assert hasWriteLock();
2360 // Verify that the destination does not exist as a directory already.
2361 final INodesInPath iip = dir.getINodesInPath4Write(src);
2362 final INode inode = iip.getLastINode();
2363 if (inode != null && inode.isDirectory()) {
2364 throw new FileAlreadyExistsException("Cannot append to directory " + src
2365 + "; already exists as a directory.");
2366 }
2367 if (isPermissionEnabled) {
2368 checkPathAccess(pc, src, FsAction.WRITE);
2369 }
2370
2371 try {
2372 if (inode == null) {
2373 throw new FileNotFoundException("failed to append to non-existent file "
2374 + src + " for client " + clientMachine);
2375 }
2376 INodeFile myFile = INodeFile.valueOf(inode, src, true);
2377 // Opening an existing file for write - may need to recover lease.
2378 recoverLeaseInternal(myFile, src, holder, clientMachine, false);
2379
2380 // recoverLeaseInternal may create a new InodeFile via
2381 // finalizeINodeFileUnderConstruction so we need to refresh
2382 // the referenced file.
2383 myFile = INodeFile.valueOf(dir.getINode(src), src, true);
2384 final BlockInfo lastBlock = myFile.getLastBlock();
2385 // Check that the block has at least minimum replication.
2386 if(lastBlock != null && lastBlock.isComplete() &&
2387 !getBlockManager().isSufficientlyReplicated(lastBlock)) {
2388 throw new IOException("append: lastBlock=" + lastBlock +
2389 " of src=" + src + " is not sufficiently replicated yet.");
2390 }
2391 final DatanodeDescriptor clientNode =
2392 blockManager.getDatanodeManager().getDatanodeByHost(clientMachine);
2393 return prepareFileForWrite(src, myFile, holder, clientMachine, clientNode,
2394 true, iip.getLatestSnapshotId(), logRetryCache);
2395 } catch (IOException ie) {
2396 NameNode.stateChangeLog.warn("DIR* NameSystem.append: " +ie.getMessage());
2397 throw ie;
2398 }
2399 }
2400
2401 /**
2402 * Replace current node with a INodeUnderConstruction.
2403 * Recreate in-memory lease record.
2404 *
2405 * @param src path to the file
2406 * @param file existing file object
2407 * @param leaseHolder identifier of the lease holder on this file
2408 * @param clientMachine identifier of the client machine
2409 * @param clientNode if the client is collocated with a DN, that DN's descriptor
2410 * @param writeToEditLog whether to persist this change to the edit log
2411 * @param logRetryCache whether to record RPC ids in editlog for retry cache
2412 * rebuilding
2413 * @return the last block locations if the block is partial or null otherwise
2414 * @throws UnresolvedLinkException
2415 * @throws IOException
2416 */
2417 LocatedBlock prepareFileForWrite(String src, INodeFile file,
2418 String leaseHolder, String clientMachine, DatanodeDescriptor clientNode,
2419 boolean writeToEditLog, int latestSnapshot, boolean logRetryCache)
2420 throws IOException {
2421 file = file.recordModification(latestSnapshot);
2422 final INodeFile cons = file.toUnderConstruction(leaseHolder, clientMachine,
2423 clientNode);
2424
2425 leaseManager.addLease(cons.getFileUnderConstructionFeature()
2426 .getClientName(), src);
2427
2428 LocatedBlock ret = blockManager.convertLastBlockToUnderConstruction(cons);
2429 if (writeToEditLog) {
2430 getEditLog().logOpenFile(src, cons, logRetryCache);
2431 }
2432 return ret;
2433 }
2434
2435 /**
2436 * Recover lease;
2437 * Immediately revoke the lease of the current lease holder and start lease
2438 * recovery so that the file can be forced to be closed.
2439 *
2440 * @param src the path of the file to start lease recovery
2441 * @param holder the lease holder's name
2442 * @param clientMachine the client machine's name
2443 * @return true if the file is already closed
2444 * @throws IOException
2445 */
2446 boolean recoverLease(String src, String holder, String clientMachine)
2447 throws IOException {
2448 if (!DFSUtil.isValidName(src)) {
2449 throw new IOException("Invalid file name: " + src);
2450 }
2451
2452 boolean skipSync = false;
2453 FSPermissionChecker pc = getPermissionChecker();
2454 checkOperation(OperationCategory.WRITE);
2455 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2456 writeLock();
2457 try {
2458 checkOperation(OperationCategory.WRITE);
2459 checkNameNodeSafeMode("Cannot recover the lease of " + src);
2460 src = FSDirectory.resolvePath(src, pathComponents, dir);
2461 final INodeFile inode = INodeFile.valueOf(dir.getINode(src), src);
2462 if (!inode.isUnderConstruction()) {
2463 return true;
2464 }
2465 if (isPermissionEnabled) {
2466 checkPathAccess(pc, src, FsAction.WRITE);
2467 }
2468
2469 recoverLeaseInternal(inode, src, holder, clientMachine, true);
2470 } catch (StandbyException se) {
2471 skipSync = true;
2472 throw se;
2473 } finally {
2474 writeUnlock();
2475 // There might be transactions logged while trying to recover the lease.
2476 // They need to be sync'ed even when an exception was thrown.
2477 if (!skipSync) {
2478 getEditLog().logSync();
2479 }
2480 }
2481 return false;
2482 }
2483
2484 private void recoverLeaseInternal(INodeFile fileInode,
2485 String src, String holder, String clientMachine, boolean force)
2486 throws IOException {
2487 assert hasWriteLock();
2488 if (fileInode != null && fileInode.isUnderConstruction()) {
2489 //
2490 // If the file is under construction , then it must be in our
2491 // leases. Find the appropriate lease record.
2492 //
2493 Lease lease = leaseManager.getLease(holder);
2494 //
2495 // We found the lease for this file. And surprisingly the original
2496 // holder is trying to recreate this file. This should never occur.
2497 //
2498 if (!force && lease != null) {
2499 Lease leaseFile = leaseManager.getLeaseByPath(src);
2500 if ((leaseFile != null && leaseFile.equals(lease)) ||
2501 lease.getHolder().equals(holder)) {
2502 throw new AlreadyBeingCreatedException(
2503 "failed to create file " + src + " for " + holder +
2504 " for client " + clientMachine +
2505 " because current leaseholder is trying to recreate file.");
2506 }
2507 }
2508 //
2509 // Find the original holder.
2510 //
2511 FileUnderConstructionFeature uc = fileInode.getFileUnderConstructionFeature();
2512 String clientName = uc.getClientName();
2513 lease = leaseManager.getLease(clientName);
2514 if (lease == null) {
2515 throw new AlreadyBeingCreatedException(
2516 "failed to create file " + src + " for " + holder +
2517 " for client " + clientMachine +
2518 " because pendingCreates is non-null but no leases found.");
2519 }
2520 if (force) {
2521 // close now: no need to wait for soft lease expiration and
2522 // close only the file src
2523 LOG.info("recoverLease: " + lease + ", src=" + src +
2524 " from client " + clientName);
2525 internalReleaseLease(lease, src, holder);
2526 } else {
2527 assert lease.getHolder().equals(clientName) :
2528 "Current lease holder " + lease.getHolder() +
2529 " does not match file creator " + clientName;
2530 //
2531 // If the original holder has not renewed in the last SOFTLIMIT
2532 // period, then start lease recovery.
2533 //
2534 if (lease.expiredSoftLimit()) {
2535 LOG.info("startFile: recover " + lease + ", src=" + src + " client "
2536 + clientName);
2537 boolean isClosed = internalReleaseLease(lease, src, null);
2538 if(!isClosed)
2539 throw new RecoveryInProgressException(
2540 "Failed to close file " + src +
2541 ". Lease recovery is in progress. Try again later.");
2542 } else {
2543 final BlockInfo lastBlock = fileInode.getLastBlock();
2544 if (lastBlock != null
2545 && lastBlock.getBlockUCState() == BlockUCState.UNDER_RECOVERY) {
2546 throw new RecoveryInProgressException("Recovery in progress, file ["
2547 + src + "], " + "lease owner [" + lease.getHolder() + "]");
2548 } else {
2549 throw new AlreadyBeingCreatedException("Failed to create file ["
2550 + src + "] for [" + holder + "] for client [" + clientMachine
2551 + "], because this file is already being created by ["
2552 + clientName + "] on ["
2553 + uc.getClientMachine() + "]");
2554 }
2555 }
2556 }
2557 }
2558 }
2559
2560 /**
2561 * Append to an existing file in the namespace.
2562 */
2563 LocatedBlock appendFile(String src, String holder, String clientMachine)
2564 throws AccessControlException, SafeModeException,
2565 FileAlreadyExistsException, FileNotFoundException,
2566 ParentNotDirectoryException, IOException {
2567 LocatedBlock lb = null;
2568 CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
2569 null);
2570 if (cacheEntry != null && cacheEntry.isSuccess()) {
2571 return (LocatedBlock) cacheEntry.getPayload();
2572 }
2573
2574 boolean success = false;
2575 try {
2576 lb = appendFileInt(src, holder, clientMachine, cacheEntry != null);
2577 success = true;
2578 return lb;
2579 } catch (AccessControlException e) {
2580 logAuditEvent(false, "append", src);
2581 throw e;
2582 } finally {
2583 RetryCache.setState(cacheEntry, success, lb);
2584 }
2585 }
2586
2587 private LocatedBlock appendFileInt(String src, String holder,
2588 String clientMachine, boolean logRetryCache)
2589 throws AccessControlException, SafeModeException,
2590 FileAlreadyExistsException, FileNotFoundException,
2591 ParentNotDirectoryException, IOException {
2592 if (NameNode.stateChangeLog.isDebugEnabled()) {
2593 NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: src=" + src
2594 + ", holder=" + holder
2595 + ", clientMachine=" + clientMachine);
2596 }
2597 boolean skipSync = false;
2598 if (!supportAppends) {
2599 throw new UnsupportedOperationException(
2600 "Append is not enabled on this NameNode. Use the " +
2601 DFS_SUPPORT_APPEND_KEY + " configuration option to enable it.");
2602 }
2603
2604 LocatedBlock lb = null;
2605 FSPermissionChecker pc = getPermissionChecker();
2606 checkOperation(OperationCategory.WRITE);
2607 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2608 writeLock();
2609 try {
2610 checkOperation(OperationCategory.WRITE);
2611 checkNameNodeSafeMode("Cannot append to file" + src);
2612 src = FSDirectory.resolvePath(src, pathComponents, dir);
2613 lb = appendFileInternal(pc, src, holder, clientMachine, logRetryCache);
2614 } catch (StandbyException se) {
2615 skipSync = true;
2616 throw se;
2617 } finally {
2618 writeUnlock();
2619 // There might be transactions logged while trying to recover the lease.
2620 // They need to be sync'ed even when an exception was thrown.
2621 if (!skipSync) {
2622 getEditLog().logSync();
2623 }
2624 }
2625 if (lb != null) {
2626 if (NameNode.stateChangeLog.isDebugEnabled()) {
2627 NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: file "
2628 +src+" for "+holder+" at "+clientMachine
2629 +" block " + lb.getBlock()
2630 +" block size " + lb.getBlock().getNumBytes());
2631 }
2632 }
2633 logAuditEvent(true, "append", src);
2634 return lb;
2635 }
2636
2637 ExtendedBlock getExtendedBlock(Block blk) {
2638 return new ExtendedBlock(blockPoolId, blk);
2639 }
2640
2641 void setBlockPoolId(String bpid) {
2642 blockPoolId = bpid;
2643 blockManager.setBlockPoolId(blockPoolId);
2644 }
2645
2646 /**
2647 * The client would like to obtain an additional block for the indicated
2648 * filename (which is being written-to). Return an array that consists
2649 * of the block, plus a set of machines. The first on this list should
2650 * be where the client writes data. Subsequent items in the list must
2651 * be provided in the connection to the first datanode.
2652 *
2653 * Make sure the previous blocks have been reported by datanodes and
2654 * are replicated. Will return an empty 2-elt array if we want the
2655 * client to "try again later".
2656 */
2657 LocatedBlock getAdditionalBlock(String src, long fileId, String clientName,
2658 ExtendedBlock previous, Set<Node> excludedNodes,
2659 List<String> favoredNodes)
2660 throws LeaseExpiredException, NotReplicatedYetException,
2661 QuotaExceededException, SafeModeException, UnresolvedLinkException,
2662 IOException {
2663 long blockSize;
2664 int replication;
2665 DatanodeDescriptor clientNode = null;
2666
2667 if(NameNode.stateChangeLog.isDebugEnabled()) {
2668 NameNode.stateChangeLog.debug("BLOCK* NameSystem.getAdditionalBlock: "
2669 + src + " inodeId " + fileId + " for " + clientName);
2670 }
2671
2672 // Part I. Analyze the state of the file with respect to the input data.
2673 checkOperation(OperationCategory.READ);
2674 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2675 readLock();
2676 try {
2677 checkOperation(OperationCategory.READ);
2678 src = FSDirectory.resolvePath(src, pathComponents, dir);
2679 LocatedBlock[] onRetryBlock = new LocatedBlock[1];
2680 final INode[] inodes = analyzeFileState(
2681 src, fileId, clientName, previous, onRetryBlock).getINodes();
2682 final INodeFile pendingFile = inodes[inodes.length - 1].asFile();
2683
2684 if (onRetryBlock[0] != null && onRetryBlock[0].getLocations().length > 0) {
2685 // This is a retry. Just return the last block if having locations.
2686 return onRetryBlock[0];
2687 }
2688 if (pendingFile.getBlocks().length >= maxBlocksPerFile) {
2689 throw new IOException("File has reached the limit on maximum number of"
2690 + " blocks (" + DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_KEY
2691 + "): " + pendingFile.getBlocks().length + " >= "
2692 + maxBlocksPerFile);
2693 }
2694 blockSize = pendingFile.getPreferredBlockSize();
2695 clientNode = pendingFile.getFileUnderConstructionFeature().getClientNode();
2696 replication = pendingFile.getFileReplication();
2697 } finally {
2698 readUnlock();
2699 }
2700
2701 // choose targets for the new block to be allocated.
2702 final DatanodeStorageInfo targets[] = getBlockManager().chooseTarget(
2703 src, replication, clientNode, excludedNodes, blockSize, favoredNodes);
2704
2705 // Part II.
2706 // Allocate a new block, add it to the INode and the BlocksMap.
2707 Block newBlock = null;
2708 long offset;
2709 checkOperation(OperationCategory.WRITE);
2710 writeLock();
2711 try {
2712 checkOperation(OperationCategory.WRITE);
2713 // Run the full analysis again, since things could have changed
2714 // while chooseTarget() was executing.
2715 LocatedBlock[] onRetryBlock = new LocatedBlock[1];
2716 INodesInPath inodesInPath =
2717 analyzeFileState(src, fileId, clientName, previous, onRetryBlock);
2718 final INode[] inodes = inodesInPath.getINodes();
2719 final INodeFile pendingFile = inodes[inodes.length - 1].asFile();
2720
2721 if (onRetryBlock[0] != null) {
2722 if (onRetryBlock[0].getLocations().length > 0) {
2723 // This is a retry. Just return the last block if having locations.
2724 return onRetryBlock[0];
2725 } else {
2726 // add new chosen targets to already allocated block and return
2727 BlockInfo lastBlockInFile = pendingFile.getLastBlock();
2728 ((BlockInfoUnderConstruction) lastBlockInFile)
2729 .setExpectedLocations(targets);
2730 offset = pendingFile.computeFileSize();
2731 return makeLocatedBlock(lastBlockInFile, targets, offset);
2732 }
2733 }
2734
2735 // commit the last block and complete it if it has minimum replicas
2736 commitOrCompleteLastBlock(pendingFile,
2737 ExtendedBlock.getLocalBlock(previous));
2738
2739 // allocate new block, record block locations in INode.
2740 newBlock = createNewBlock();
2741 saveAllocatedBlock(src, inodesInPath, newBlock, targets);
2742
2743 dir.persistNewBlock(src, pendingFile);
2744 offset = pendingFile.computeFileSize();
2745 } finally {
2746 writeUnlock();
2747 }
2748 getEditLog().logSync();
2749
2750 // Return located block
2751 return makeLocatedBlock(newBlock, targets, offset);
2752 }
2753
2754 INodesInPath analyzeFileState(String src,
2755 long fileId,
2756 String clientName,
2757 ExtendedBlock previous,
2758 LocatedBlock[] onRetryBlock)
2759 throws IOException {
2760 assert hasReadLock();
2761
2762 checkBlock(previous);
2763 onRetryBlock[0] = null;
2764 checkOperation(OperationCategory.WRITE);
2765 checkNameNodeSafeMode("Cannot add block to " + src);
2766
2767 // have we exceeded the configured limit of fs objects.
2768 checkFsObjectLimit();
2769
2770 Block previousBlock = ExtendedBlock.getLocalBlock(previous);
2771 final INodesInPath iip = dir.getINodesInPath4Write(src);
2772 final INodeFile pendingFile
2773 = checkLease(src, fileId, clientName, iip.getLastINode());
2774 BlockInfo lastBlockInFile = pendingFile.getLastBlock();
2775 if (!Block.matchingIdAndGenStamp(previousBlock, lastBlockInFile)) {
2776 // The block that the client claims is the current last block
2777 // doesn't match up with what we think is the last block. There are
2778 // four possibilities:
2779 // 1) This is the first block allocation of an append() pipeline
2780 // which started appending exactly at a block boundary.
2781 // In this case, the client isn't passed the previous block,
2782 // so it makes the allocateBlock() call with previous=null.
2783 // We can distinguish this since the last block of the file
2784 // will be exactly a full block.
2785 // 2) This is a retry from a client that missed the response of a
2786 // prior getAdditionalBlock() call, perhaps because of a network
2787 // timeout, or because of an HA failover. In that case, we know
2788 // by the fact that the client is re-issuing the RPC that it
2789 // never began to write to the old block. Hence it is safe to
2790 // to return the existing block.
2791 // 3) This is an entirely bogus request/bug -- we should error out
2792 // rather than potentially appending a new block with an empty
2793 // one in the middle, etc
2794 // 4) This is a retry from a client that timed out while
2795 // the prior getAdditionalBlock() is still being processed,
2796 // currently working on chooseTarget().
2797 // There are no means to distinguish between the first and
2798 // the second attempts in Part I, because the first one hasn't
2799 // changed the namesystem state yet.
2800 // We run this analysis again in Part II where case 4 is impossible.
2801
2802 BlockInfo penultimateBlock = pendingFile.getPenultimateBlock();
2803 if (previous == null &&
2804 lastBlockInFile != null &&
2805 lastBlockInFile.getNumBytes() == pendingFile.getPreferredBlockSize() &&
2806 lastBlockInFile.isComplete()) {
2807 // Case 1
2808 if (NameNode.stateChangeLog.isDebugEnabled()) {
2809 NameNode.stateChangeLog.debug(
2810 "BLOCK* NameSystem.allocateBlock: handling block allocation" +
2811 " writing to a file with a complete previous block: src=" +
2812 src + " lastBlock=" + lastBlockInFile);
2813 }
2814 } else if (Block.matchingIdAndGenStamp(penultimateBlock, previousBlock)) {
2815 if (lastBlockInFile.getNumBytes() != 0) {
2816 throw new IOException(
2817 "Request looked like a retry to allocate block " +
2818 lastBlockInFile + " but it already contains " +
2819 lastBlockInFile.getNumBytes() + " bytes");
2820 }
2821
2822 // Case 2
2823 // Return the last block.
2824 NameNode.stateChangeLog.info("BLOCK* allocateBlock: " +
2825 "caught retry for allocation of a new block in " +
2826 src + ". Returning previously allocated block " + lastBlockInFile);
2827 long offset = pendingFile.computeFileSize();
2828 onRetryBlock[0] = makeLocatedBlock(lastBlockInFile,
2829 ((BlockInfoUnderConstruction)lastBlockInFile).getExpectedStorageLocations(),
2830 offset);
2831 return iip;
2832 } else {
2833 // Case 3
2834 throw new IOException("Cannot allocate block in " + src + ": " +
2835 "passed 'previous' block " + previous + " does not match actual " +
2836 "last block in file " + lastBlockInFile);
2837 }
2838 }
2839
2840 // Check if the penultimate block is minimally replicated
2841 if (!checkFileProgress(pendingFile, false)) {
2842 throw new NotReplicatedYetException("Not replicated yet: " + src);
2843 }
2844 return iip;
2845 }
2846
2847 LocatedBlock makeLocatedBlock(Block blk, DatanodeStorageInfo[] locs,
2848 long offset) throws IOException {
2849 LocatedBlock lBlk = new LocatedBlock(
2850 getExtendedBlock(blk), locs, offset, false);
2851 getBlockManager().setBlockToken(
2852 lBlk, BlockTokenSecretManager.AccessMode.WRITE);
2853 return lBlk;
2854 }
2855
2856 /** @see NameNode#getAdditionalDatanode(String, ExtendedBlock, DatanodeInfo[], DatanodeInfo[], int, String) */
2857 LocatedBlock getAdditionalDatanode(String src, final ExtendedBlock blk,
2858 final DatanodeInfo[] existings, final String[] storageIDs,
2859 final Set<Node> excludes,
2860 final int numAdditionalNodes, final String clientName
2861 ) throws IOException {
2862 //check if the feature is enabled
2863 dtpReplaceDatanodeOnFailure.checkEnabled();
2864
2865 final DatanodeDescriptor clientnode;
2866 final long preferredblocksize;
2867 final List<DatanodeStorageInfo> chosen;
2868 checkOperation(OperationCategory.READ);
2869 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2870 readLock();
2871 try {
2872 checkOperation(OperationCategory.READ);
2873 //check safe mode
2874 checkNameNodeSafeMode("Cannot add datanode; src=" + src + ", blk=" + blk);
2875 src = FSDirectory.resolvePath(src, pathComponents, dir);
2876
2877 //check lease
2878 final INodeFile file = checkLease(src, clientName);
2879 clientnode = file.getFileUnderConstructionFeature().getClientNode();
2880 preferredblocksize = file.getPreferredBlockSize();
2881
2882 //find datanode storages
2883 final DatanodeManager dm = blockManager.getDatanodeManager();
2884 chosen = Arrays.asList(dm.getDatanodeStorageInfos(existings, storageIDs));
2885 } finally {
2886 readUnlock();
2887 }
2888
2889 // choose new datanodes.
2890 final DatanodeStorageInfo[] targets = blockManager.getBlockPlacementPolicy(
2891 ).chooseTarget(src, numAdditionalNodes, clientnode, chosen, true,
2892 // TODO: get storage type from the file
2893 excludes, preferredblocksize, StorageType.DEFAULT);
2894 final LocatedBlock lb = new LocatedBlock(blk, targets);
2895 blockManager.setBlockToken(lb, AccessMode.COPY);
2896 return lb;
2897 }
2898
2899 /**
2900 * The client would like to let go of the given block
2901 */
2902 boolean abandonBlock(ExtendedBlock b, String src, String holder)
2903 throws LeaseExpiredException, FileNotFoundException,
2904 UnresolvedLinkException, IOException {
2905 if(NameNode.stateChangeLog.isDebugEnabled()) {
2906 NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: " + b
2907 + "of file " + src);
2908 }
2909 checkOperation(OperationCategory.WRITE);
2910 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2911 writeLock();
2912 try {
2913 checkOperation(OperationCategory.WRITE);
2914 checkNameNodeSafeMode("Cannot abandon block " + b + " for fle" + src);
2915 src = FSDirectory.resolvePath(src, pathComponents, dir);
2916
2917 //
2918 // Remove the block from the pending creates list
2919 //
2920 INodeFile file = checkLease(src, holder);
2921 boolean removed = dir.removeBlock(src, file,
2922 ExtendedBlock.getLocalBlock(b));
2923 if (!removed) {
2924 return true;
2925 }
2926 if(NameNode.stateChangeLog.isDebugEnabled()) {
2927 NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: "
2928 + b + " is removed from pendingCreates");
2929 }
2930 dir.persistBlocks(src, file, false);
2931 } finally {
2932 writeUnlock();
2933 }
2934 getEditLog().logSync();
2935
2936 return true;
2937 }
2938
2939 /** make sure that we still have the lease on this file. */
2940 private INodeFile checkLease(String src, String holder)
2941 throws LeaseExpiredException, UnresolvedLinkException,
2942 FileNotFoundException {
2943 return checkLease(src, INodeId.GRANDFATHER_INODE_ID, holder,
2944 dir.getINode(src));
2945 }
2946
2947 private INodeFile checkLease(String src, long fileId, String holder,
2948 INode inode) throws LeaseExpiredException, FileNotFoundException {
2949 assert hasReadLock();
2950 if (inode == null || !inode.isFile()) {
2951 Lease lease = leaseManager.getLease(holder);
2952 throw new LeaseExpiredException(
2953 "No lease on " + src + ": File does not exist. "
2954 + (lease != null ? lease.toString()
2955 : "Holder " + holder + " does not have any open files."));
2956 }
2957 final INodeFile file = inode.asFile();
2958 if (!file.isUnderConstruction()) {
2959 Lease lease = leaseManager.getLease(holder);
2960 throw new LeaseExpiredException(
2961 "No lease on " + src + ": File is not open for writing. "
2962 + (lease != null ? lease.toString()
2963 : "Holder " + holder + " does not have any open files."));
2964 }
2965 // No further modification is allowed on a deleted file.
2966 // A file is considered deleted, if it has no parent or is marked
2967 // as deleted in the snapshot feature.
2968 if (file.getParent() == null || (file.isWithSnapshot() &&
2969 file.getFileWithSnapshotFeature().isCurrentFileDeleted())) {
2970 throw new FileNotFoundException(src);
2971 }
2972 String clientName = file.getFileUnderConstructionFeature().getClientName();
2973 if (holder != null && !clientName.equals(holder)) {
2974 throw new LeaseExpiredException("Lease mismatch on " + src + " owned by "
2975 + clientName + " but is accessed by " + holder);
2976 }
2977 INodeId.checkId(fileId, file);
2978 return file;
2979 }
2980
2981 /**
2982 * Complete in-progress write to the given file.
2983 * @return true if successful, false if the client should continue to retry
2984 * (e.g if not all blocks have reached minimum replication yet)
2985 * @throws IOException on error (eg lease mismatch, file not open, file deleted)
2986 */
2987 boolean completeFile(String src, String holder,
2988 ExtendedBlock last, long fileId)
2989 throws SafeModeException, UnresolvedLinkException, IOException {
2990 if (NameNode.stateChangeLog.isDebugEnabled()) {
2991 NameNode.stateChangeLog.debug("DIR* NameSystem.completeFile: " +
2992 src + " for " + holder);
2993 }
2994 checkBlock(last);
2995 boolean success = false;
2996 checkOperation(OperationCategory.WRITE);
2997 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2998 writeLock();
2999 try {
3000 checkOperation(OperationCategory.WRITE);
3001 checkNameNodeSafeMode("Cannot complete file " + src);
3002 src = FSDirectory.resolvePath(src, pathComponents, dir);
3003 success = completeFileInternal(src, holder,
3004 ExtendedBlock.getLocalBlock(last), fileId);
3005 } finally {
3006 writeUnlock();
3007 }
3008 getEditLog().logSync();
3009 if (success) {
3010 NameNode.stateChangeLog.info("DIR* completeFile: " + src
3011 + " is closed by " + holder);
3012 }
3013 return success;
3014 }
3015
3016 private boolean completeFileInternal(String src,
3017 String holder, Block last, long fileId) throws SafeModeException,
3018 UnresolvedLinkException, IOException {
3019 assert hasWriteLock();
3020 final INodesInPath iip = dir.getLastINodeInPath(src);
3021 final INodeFile pendingFile;
3022 try {
3023 pendingFile = checkLease(src, fileId, holder, iip.getINode(0));
3024 } catch (LeaseExpiredException lee) {
3025 final INode inode = dir.getINode(src);
3026 if (inode != null
3027 && inode.isFile()
3028 && !inode.asFile().isUnderConstruction()) {
3029 // This could be a retry RPC - i.e the client tried to close
3030 // the file, but missed the RPC response. Thus, it is trying
3031 // again to close the file. If the file still exists and
3032 // the client's view of the last block matches the actual
3033 // last block, then we'll treat it as a successful close.
3034 // See HDFS-3031.
3035 final Block realLastBlock = inode.asFile().getLastBlock();
3036 if (Block.matchingIdAndGenStamp(last, realLastBlock)) {
3037 NameNode.stateChangeLog.info("DIR* completeFile: " +
3038 "request from " + holder + " to complete " + src +
3039 " which is already closed. But, it appears to be an RPC " +
3040 "retry. Returning success");
3041 return true;
3042 }
3043 }
3044 throw lee;
3045 }
3046 // Check the state of the penultimate block. It should be completed
3047 // before attempting to complete the last one.
3048 if (!checkFileProgress(pendingFile, false)) {
3049 return false;
3050 }
3051
3052 // commit the last block and complete it if it has minimum replicas
3053 commitOrCompleteLastBlock(pendingFile, last);
3054
3055 if (!checkFileProgress(pendingFile, true)) {
3056 return false;
3057 }
3058
3059 finalizeINodeFileUnderConstruction(src, pendingFile,
3060 iip.getLatestSnapshotId());
3061 return true;
3062 }
3063
3064 /**
3065 * Save allocated block at the given pending filename
3066 *
3067 * @param src path to the file
3068 * @param inodesInPath representing each of the components of src.
3069 * The last INode is the INode for the file.
3070 * @throws QuotaExceededException If addition of block exceeds space quota
3071 */
3072 BlockInfo saveAllocatedBlock(String src, INodesInPath inodes,
3073 Block newBlock, DatanodeStorageInfo[] targets)
3074 throws IOException {
3075 assert hasWriteLock();
3076 BlockInfo b = dir.addBlock(src, inodes, newBlock, targets);
3077 NameNode.stateChangeLog.info("BLOCK* allocateBlock: " + src + ". "
3078 + getBlockPoolId() + " " + b);
3079 DatanodeStorageInfo.incrementBlocksScheduled(targets);
3080 return b;
3081 }
3082
3083 /**
3084 * Create new block with a unique block id and a new generation stamp.
3085 */
3086 Block createNewBlock() throws IOException {
3087 assert hasWriteLock();
3088 Block b = new Block(nextBlockId(), 0, 0);
3089 // Increment the generation stamp for every new block.
3090 b.setGenerationStamp(nextGenerationStamp(false));
3091 return b;
3092 }
3093
3094 /**
3095 * Check that the indicated file's blocks are present and
3096 * replicated. If not, return false. If checkall is true, then check
3097 * all blocks, otherwise check only penultimate block.
3098 */
3099 boolean checkFileProgress(INodeFile v, boolean checkall) {
3100 readLock();
3101 try {
3102 if (checkall) {
3103 //
3104 // check all blocks of the file.
3105 //
3106 for (BlockInfo block: v.getBlocks()) {
3107 if (!block.isComplete()) {
3108 LOG.info("BLOCK* checkFileProgress: " + block
3109 + " has not reached minimal replication "
3110 + blockManager.minReplication);
3111 return false;
3112 }
3113 }
3114 } else {
3115 //
3116 // check the penultimate block of this file
3117 //
3118 BlockInfo b = v.getPenultimateBlock();
3119 if (b != null && !b.isComplete()) {
3120 LOG.warn("BLOCK* checkFileProgress: " + b
3121 + " has not reached minimal replication "
3122 + blockManager.minReplication);
3123 return false;
3124 }
3125 }
3126 return true;
3127 } finally {
3128 readUnlock();
3129 }
3130 }
3131
3132 ////////////////////////////////////////////////////////////////
3133 // Here's how to handle block-copy failure during client write:
3134 // -- As usual, the client's write should result in a streaming
3135 // backup write to a k-machine sequence.
3136 // -- If one of the backup machines fails, no worries. Fail silently.
3137 // -- Before client is allowed to close and finalize file, make sure
3138 // that the blocks are backed up. Namenode may have to issue specific backup
3139 // commands to make up for earlier datanode failures. Once all copies
3140 // are made, edit namespace and return to client.
3141 ////////////////////////////////////////////////////////////////
3142
3143 /**
3144 * Change the indicated filename.
3145 * @deprecated Use {@link #renameTo(String, String, Options.Rename...)} instead.
3146 */
3147 @Deprecated
3148 boolean renameTo(String src, String dst)
3149 throws IOException, UnresolvedLinkException {
3150 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
3151 if (cacheEntry != null && cacheEntry.isSuccess()) {
3152 return true; // Return previous response
3153 }
3154 boolean ret = false;
3155 try {
3156 ret = renameToInt(src, dst, cacheEntry != null);
3157 } catch (AccessControlException e) {
3158 logAuditEvent(false, "rename", src, dst, null);
3159 throw e;
3160 } finally {
3161 RetryCache.setState(cacheEntry, ret);
3162 }
3163 return ret;
3164 }
3165
3166 private boolean renameToInt(String src, String dst, boolean logRetryCache)
3167 throws IOException, UnresolvedLinkException {
3168 if (NameNode.stateChangeLog.isDebugEnabled()) {
3169 NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: " + src +
3170 " to " + dst);
3171 }
3172 if (!DFSUtil.isValidName(dst)) {
3173 throw new IOException("Invalid name: " + dst);
3174 }
3175 FSPermissionChecker pc = getPermissionChecker();
3176 checkOperation(OperationCategory.WRITE);
3177 byte[][] srcComponents = FSDirectory.getPathComponentsForReservedPath(src);
3178 byte[][] dstComponents = FSDirectory.getPathComponentsForReservedPath(dst);
3179 boolean status = false;
3180 HdfsFileStatus resultingStat = null;
3181 writeLock();
3182 try {
3183 checkOperation(OperationCategory.WRITE);
3184 checkNameNodeSafeMode("Cannot rename " + src);
3185 src = FSDirectory.resolvePath(src, srcComponents, dir);
3186 dst = FSDirectory.resolvePath(dst, dstComponents, dir);
3187 checkOperation(OperationCategory.WRITE);
3188 status = renameToInternal(pc, src, dst, logRetryCache);
3189 if (status) {
3190 resultingStat = getAuditFileInfo(dst, false);
3191 }
3192 } finally {
3193 writeUnlock();
3194 }
3195 getEditLog().logSync();
3196 if (status) {
3197 logAuditEvent(true, "rename", src, dst, resultingStat);
3198 }
3199 return status;
3200 }
3201
3202 /** @deprecated See {@link #renameTo(String, String)} */
3203 @Deprecated
3204 private boolean renameToInternal(FSPermissionChecker pc, String src,
3205 String dst, boolean logRetryCache) throws IOException,
3206 UnresolvedLinkException {
3207 assert hasWriteLock();
3208 if (isPermissionEnabled) {
3209 //We should not be doing this. This is move() not renameTo().
3210 //but for now,
3211 //NOTE: yes, this is bad! it's assuming much lower level behavior
3212 // of rewriting the dst
3213 String actualdst = dir.isDir(dst)?
3214 dst + Path.SEPARATOR + new Path(src).getName(): dst;
3215 // Rename does not operates on link targets
3216 // Do not resolveLink when checking permissions of src and dst
3217 // Check write access to parent of src
3218 checkPermission(pc, src, false, null, FsAction.WRITE, null, null, false);
3219 // Check write access to ancestor of dst
3220 checkPermission(pc, actualdst, false, FsAction.WRITE, null, null, null,
3221 false);
3222 }
3223
3224 if (dir.renameTo(src, dst, logRetryCache)) {
3225 return true;
3226 }
3227 return false;
3228 }
3229
3230
3231 /** Rename src to dst */
3232 void renameTo(String src, String dst, Options.Rename... options)
3233 throws IOException, UnresolvedLinkException {
3234 if (NameNode.stateChangeLog.isDebugEnabled()) {
3235 NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: with options - "
3236 + src + " to " + dst);
3237 }
3238 if (!DFSUtil.isValidName(dst)) {
3239 throw new InvalidPathException("Invalid name: " + dst);
3240 }
3241 final FSPermissionChecker pc = getPermissionChecker();
3242
3243 checkOperation(OperationCategory.WRITE);
3244 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
3245 if (cacheEntry != null && cacheEntry.isSuccess()) {
3246 return; // Return previous response
3247 }
3248 byte[][] srcComponents = FSDirectory.getPathComponentsForReservedPath(src);
3249 byte[][] dstComponents = FSDirectory.getPathComponentsForReservedPath(dst);
3250 HdfsFileStatus resultingStat = null;
3251 boolean success = false;
3252 writeLock();
3253 try {
3254 checkOperation(OperationCategory.WRITE);
3255 checkNameNodeSafeMode("Cannot rename " + src);
3256 src = FSDirectory.resolvePath(src, srcComponents, dir);
3257 dst = FSDirectory.resolvePath(dst, dstComponents, dir);
3258 renameToInternal(pc, src, dst, cacheEntry != null, options);
3259 resultingStat = getAuditFileInfo(dst, false);
3260 success = true;
3261 } finally {
3262 writeUnlock();
3263 RetryCache.setState(cacheEntry, success);
3264 }
3265 getEditLog().logSync();
3266 if (resultingStat != null) {
3267 StringBuilder cmd = new StringBuilder("rename options=");
3268 for (Rename option : options) {
3269 cmd.append(option.value()).append(" ");
3270 }
3271 logAuditEvent(true, cmd.toString(), src, dst, resultingStat);
3272 }
3273 }
3274
3275 private void renameToInternal(FSPermissionChecker pc, String src, String dst,
3276 boolean logRetryCache, Options.Rename... options) throws IOException {
3277 assert hasWriteLock();
3278 if (isPermissionEnabled) {
3279 // Rename does not operates on link targets
3280 // Do not resolveLink when checking permissions of src and dst
3281 // Check write access to parent of src
3282 checkPermission(pc, src, false, null, FsAction.WRITE, null, null, false);
3283 // Check write access to ancestor of dst
3284 checkPermission(pc, dst, false, FsAction.WRITE, null, null, null, false);
3285 }
3286
3287 dir.renameTo(src, dst, logRetryCache, options);
3288 }
3289
3290 /**
3291 * Remove the indicated file from namespace.
3292 *
3293 * @see ClientProtocol#delete(String, boolean) for detailed description and
3294 * description of exceptions
3295 */
3296 boolean delete(String src, boolean recursive)
3297 throws AccessControlException, SafeModeException,
3298 UnresolvedLinkException, IOException {
3299 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
3300 if (cacheEntry != null && cacheEntry.isSuccess()) {
3301 return true; // Return previous response
3302 }
3303 boolean ret = false;
3304 try {
3305 ret = deleteInt(src, recursive, cacheEntry != null);
3306 } catch (AccessControlException e) {
3307 logAuditEvent(false, "delete", src);
3308 throw e;
3309 } finally {
3310 RetryCache.setState(cacheEntry, ret);
3311 }
3312 return ret;
3313 }
3314
3315 private boolean deleteInt(String src, boolean recursive, boolean logRetryCache)
3316 throws AccessControlException, SafeModeException,
3317 UnresolvedLinkException, IOException {
3318 if (NameNode.stateChangeLog.isDebugEnabled()) {
3319 NameNode.stateChangeLog.debug("DIR* NameSystem.delete: " + src);
3320 }
3321 boolean status = deleteInternal(src, recursive, true, logRetryCache);
3322 if (status) {
3323 logAuditEvent(true, "delete", src);
3324 }
3325 return status;
3326 }
3327
3328 private FSPermissionChecker getPermissionChecker()
3329 throws AccessControlException {
3330 try {
3331 return new FSPermissionChecker(fsOwnerShortUserName, supergroup, getRemoteUser());
3332 } catch (IOException ioe) {
3333 throw new AccessControlException(ioe);
3334 }
3335 }
3336
3337 /**
3338 * Remove a file/directory from the namespace.
3339 * <p>
3340 * For large directories, deletion is incremental. The blocks under
3341 * the directory are collected and deleted a small number at a time holding
3342 * the {@link FSNamesystem} lock.
3343 * <p>
3344 * For small directory or file the deletion is done in one shot.
3345 *
3346 * @see ClientProtocol#delete(String, boolean) for description of exceptions
3347 */
3348 private boolean deleteInternal(String src, boolean recursive,
3349 boolean enforcePermission, boolean logRetryCache)
3350 throws AccessControlException, SafeModeException, UnresolvedLinkException,
3351 IOException {
3352 BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
3353 List<INode> removedINodes = new ChunkedArrayList<INode>();
3354 FSPermissionChecker pc = getPermissionChecker();
3355 checkOperation(OperationCategory.WRITE);
3356 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3357 boolean ret = false;
3358 writeLock();
3359 try {
3360 checkOperation(OperationCategory.WRITE);
3361 checkNameNodeSafeMode("Cannot delete " + src);
3362 src = FSDirectory.resolvePath(src, pathComponents, dir);
3363 if (!recursive && dir.isNonEmptyDirectory(src)) {
3364 throw new IOException(src + " is non empty");
3365 }
3366 if (enforcePermission && isPermissionEnabled) {
3367 checkPermission(pc, src, false, null, FsAction.WRITE, null,
3368 FsAction.ALL, false);
3369 }
3370 // Unlink the target directory from directory tree
3371 if (!dir.delete(src, collectedBlocks, removedINodes, logRetryCache)) {
3372 return false;
3373 }
3374 ret = true;
3375 } finally {
3376 writeUnlock();
3377 }
3378 getEditLog().logSync();
3379 removeBlocks(collectedBlocks); // Incremental deletion of blocks
3380 collectedBlocks.clear();
3381
3382 dir.writeLock();
3383 try {
3384 dir.removeFromInodeMap(removedINodes);
3385 } finally {
3386 dir.writeUnlock();
3387 }
3388 removedINodes.clear();
3389 if (NameNode.stateChangeLog.isDebugEnabled()) {
3390 NameNode.stateChangeLog.debug("DIR* Namesystem.delete: "
3391 + src +" is removed");
3392 }
3393 return ret;
3394 }
3395
3396 /**
3397 * From the given list, incrementally remove the blocks from blockManager
3398 * Writelock is dropped and reacquired every BLOCK_DELETION_INCREMENT to
3399 * ensure that other waiters on the lock can get in. See HDFS-2938
3400 *
3401 * @param blocks
3402 * An instance of {@link BlocksMapUpdateInfo} which contains a list
3403 * of blocks that need to be removed from blocksMap
3404 */
3405 void removeBlocks(BlocksMapUpdateInfo blocks) {
3406 List<Block> toDeleteList = blocks.getToDeleteList();
3407 Iterator<Block> iter = toDeleteList.iterator();
3408 while (iter.hasNext()) {
3409 writeLock();
3410 try {
3411 for (int i = 0; i < BLOCK_DELETION_INCREMENT && iter.hasNext(); i++) {
3412 blockManager.removeBlock(iter.next());
3413 }
3414 } finally {
3415 writeUnlock();
3416 }
3417 }
3418 }
3419
3420 /**
3421 * Remove leases, inodes and blocks related to a given path
3422 * @param src The given path
3423 * @param blocks Containing the list of blocks to be deleted from blocksMap
3424 * @param removedINodes Containing the list of inodes to be removed from
3425 * inodesMap
3426 */
3427 void removePathAndBlocks(String src, BlocksMapUpdateInfo blocks,
3428 List<INode> removedINodes) {
3429 assert hasWriteLock();
3430 leaseManager.removeLeaseWithPrefixPath(src);
3431 // remove inodes from inodesMap
3432 if (removedINodes != null) {
3433 dir.removeFromInodeMap(removedINodes);
3434 removedINodes.clear();
3435 }
3436 if (blocks == null) {
3437 return;
3438 }
3439
3440 removeBlocksAndUpdateSafemodeTotal(blocks);
3441 }
3442
3443 /**
3444 * Removes the blocks from blocksmap and updates the safemode blocks total
3445 *
3446 * @param blocks
3447 * An instance of {@link BlocksMapUpdateInfo} which contains a list
3448 * of blocks that need to be removed from blocksMap
3449 */
3450 void removeBlocksAndUpdateSafemodeTotal(BlocksMapUpdateInfo blocks) {
3451 assert hasWriteLock();
3452 // In the case that we are a Standby tailing edits from the
3453 // active while in safe-mode, we need to track the total number
3454 // of blocks and safe blocks in the system.
3455 boolean trackBlockCounts = isSafeModeTrackingBlocks();
3456 int numRemovedComplete = 0, numRemovedSafe = 0;
3457
3458 for (Block b : blocks.getToDeleteList()) {
3459 if (trackBlockCounts) {
3460 BlockInfo bi = getStoredBlock(b);
3461 if (bi.isComplete()) {
3462 numRemovedComplete++;
3463 if (bi.numNodes() >= blockManager.minReplication) {
3464 numRemovedSafe++;
3465 }
3466 }
3467 }
3468 blockManager.removeBlock(b);
3469 }
3470 if (trackBlockCounts) {
3471 if (LOG.isDebugEnabled()) {
3472 LOG.debug("Adjusting safe-mode totals for deletion."
3473 + "decreasing safeBlocks by " + numRemovedSafe
3474 + ", totalBlocks by " + numRemovedComplete);
3475 }
3476 adjustSafeModeBlockTotals(-numRemovedSafe, -numRemovedComplete);
3477 }
3478 }
3479
3480 /**
3481 * @see SafeModeInfo#shouldIncrementallyTrackBlocks
3482 */
3483 private boolean isSafeModeTrackingBlocks() {
3484 if (!haEnabled) {
3485 // Never track blocks incrementally in non-HA code.
3486 return false;
3487 }
3488 SafeModeInfo sm = this.safeMode;
3489 return sm != null && sm.shouldIncrementallyTrackBlocks();
3490 }
3491
3492 /**
3493 * Get the file info for a specific file.
3494 *
3495 * @param src The string representation of the path to the file
3496 * @param resolveLink whether to throw UnresolvedLinkException
3497 * if src refers to a symlink
3498 *
3499 * @throws AccessControlException if access is denied
3500 * @throws UnresolvedLinkException if a symlink is encountered.
3501 *
3502 * @return object containing information regarding the file
3503 * or null if file not found
3504 * @throws StandbyException
3505 */
3506 HdfsFileStatus getFileInfo(String src, boolean resolveLink)
3507 throws AccessControlException, UnresolvedLinkException,
3508 StandbyException, IOException {
3509 if (!DFSUtil.isValidName(src)) {
3510 throw new InvalidPathException("Invalid file name: " + src);
3511 }
3512 HdfsFileStatus stat = null;
3513 FSPermissionChecker pc = getPermissionChecker();
3514 checkOperation(OperationCategory.READ);
3515 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3516 readLock();
3517 try {
3518 checkOperation(OperationCategory.READ);
3519 src = FSDirectory.resolvePath(src, pathComponents, dir);
3520 if (isPermissionEnabled) {
3521 checkPermission(pc, src, false, null, null, null, null, resolveLink);
3522 }
3523 stat = dir.getFileInfo(src, resolveLink);
3524 } catch (AccessControlException e) {
3525 logAuditEvent(false, "getfileinfo", src);
3526 throw e;
3527 } finally {
3528 readUnlock();
3529 }
3530 logAuditEvent(true, "getfileinfo", src);
3531 return stat;
3532 }
3533
3534 /**
3535 * Returns true if the file is closed
3536 */
3537 boolean isFileClosed(String src)
3538 throws AccessControlException, UnresolvedLinkException,
3539 StandbyException, IOException {
3540 FSPermissionChecker pc = getPermissionChecker();
3541 checkOperation(OperationCategory.READ);
3542 readLock();
3543 try {
3544 checkOperation(OperationCategory.READ);
3545 if (isPermissionEnabled) {
3546 checkTraverse(pc, src);
3547 }
3548 return !INodeFile.valueOf(dir.getINode(src), src).isUnderConstruction();
3549 } catch (AccessControlException e) {
3550 if (isAuditEnabled() && isExternalInvocation()) {
3551 logAuditEvent(false, "isFileClosed", src);
3552 }
3553 throw e;
3554 } finally {
3555 readUnlock();
3556 }
3557 }
3558
3559 /**
3560 * Create all the necessary directories
3561 */
3562 boolean mkdirs(String src, PermissionStatus permissions,
3563 boolean createParent) throws IOException, UnresolvedLinkException {
3564 boolean ret = false;
3565 try {
3566 ret = mkdirsInt(src, permissions, createParent);
3567 } catch (AccessControlException e) {
3568 logAuditEvent(false, "mkdirs", src);
3569 throw e;
3570 }
3571 return ret;
3572 }
3573
3574 private boolean mkdirsInt(String src, PermissionStatus permissions,
3575 boolean createParent) throws IOException, UnresolvedLinkException {
3576 if(NameNode.stateChangeLog.isDebugEnabled()) {
3577 NameNode.stateChangeLog.debug("DIR* NameSystem.mkdirs: " + src);
3578 }
3579 if (!DFSUtil.isValidName(src)) {
3580 throw new InvalidPathException(src);
3581 }
3582 FSPermissionChecker pc = getPermissionChecker();
3583 checkOperation(OperationCategory.WRITE);
3584 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3585 HdfsFileStatus resultingStat = null;
3586 boolean status = false;
3587 writeLock();
3588 try {
3589 checkOperation(OperationCategory.WRITE);
3590 checkNameNodeSafeMode("Cannot create directory " + src);
3591 src = FSDirectory.resolvePath(src, pathComponents, dir);
3592 status = mkdirsInternal(pc, src, permissions, createParent);
3593 if (status) {
3594 resultingStat = dir.getFileInfo(src, false);
3595 }
3596 } finally {
3597 writeUnlock();
3598 }
3599 getEditLog().logSync();
3600 if (status) {
3601 logAuditEvent(true, "mkdirs", src, null, resultingStat);
3602 }
3603 return status;
3604 }
3605
3606 /**
3607 * Create all the necessary directories
3608 */
3609 private boolean mkdirsInternal(FSPermissionChecker pc, String src,
3610 PermissionStatus permissions, boolean createParent)
3611 throws IOException, UnresolvedLinkException {
3612 assert hasWriteLock();
3613 if (isPermissionEnabled) {
3614 checkTraverse(pc, src);
3615 }
3616 if (dir.isDirMutable(src)) {
3617 // all the users of mkdirs() are used to expect 'true' even if
3618 // a new directory is not created.
3619 return true;
3620 }
3621 if (isPermissionEnabled) {
3622 checkAncestorAccess(pc, src, FsAction.WRITE);
3623 }
3624 if (!createParent) {
3625 verifyParentDir(src);
3626 }
3627
3628 // validate that we have enough inodes. This is, at best, a
3629 // heuristic because the mkdirs() operation might need to
3630 // create multiple inodes.
3631 checkFsObjectLimit();
3632
3633 if (!dir.mkdirs(src, permissions, false, now())) {
3634 throw new IOException("Failed to create directory: " + src);
3635 }
3636 return true;
3637 }
3638
3639 /**
3640 * Get the content summary for a specific file/dir.
3641 *
3642 * @param src The string representation of the path to the file
3643 *
3644 * @throws AccessControlException if access is denied
3645 * @throws UnresolvedLinkException if a symlink is encountered.
3646 * @throws FileNotFoundException if no file exists
3647 * @throws StandbyException
3648 * @throws IOException for issues with writing to the audit log
3649 *
3650 * @return object containing information regarding the file
3651 * or null if file not found
3652 */
3653 ContentSummary getContentSummary(String src) throws IOException {
3654 FSPermissionChecker pc = getPermissionChecker();
3655 checkOperation(OperationCategory.READ);
3656 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3657 readLock();
3658 boolean success = true;
3659 try {
3660 checkOperation(OperationCategory.READ);
3661 src = FSDirectory.resolvePath(src, pathComponents, dir);
3662 if (isPermissionEnabled) {
3663 checkPermission(pc, src, false, null, null, null, FsAction.READ_EXECUTE);
3664 }
3665 return dir.getContentSummary(src);
3666
3667 } catch (AccessControlException ace) {
3668 success = false;
3669 throw ace;
3670 } finally {
3671 readUnlock();
3672 logAuditEvent(success, "contentSummary", src);
3673 }
3674 }
3675
3676 /**
3677 * Set the namespace quota and diskspace quota for a directory.
3678 * See {@link ClientProtocol#setQuota(String, long, long)} for the
3679 * contract.
3680 *
3681 * Note: This does not support ".inodes" relative path.
3682 */
3683 void setQuota(String path, long nsQuota, long dsQuota)
3684 throws IOException, UnresolvedLinkException {
3685 checkSuperuserPrivilege();
3686 checkOperation(OperationCategory.WRITE);
3687 writeLock();
3688 try {
3689 checkOperation(OperationCategory.WRITE);
3690 checkNameNodeSafeMode("Cannot set quota on " + path);
3691 dir.setQuota(path, nsQuota, dsQuota);
3692 } finally {
3693 writeUnlock();
3694 }
3695 getEditLog().logSync();
3696 }
3697
3698 /** Persist all metadata about this file.
3699 * @param src The string representation of the path
3700 * @param clientName The string representation of the client
3701 * @param lastBlockLength The length of the last block
3702 * under construction reported from client.
3703 * @throws IOException if path does not exist
3704 */
3705 void fsync(String src, String clientName, long lastBlockLength)
3706 throws IOException, UnresolvedLinkException {
3707 NameNode.stateChangeLog.info("BLOCK* fsync: " + src + " for " + clientName);
3708 checkOperation(OperationCategory.WRITE);
3709 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3710 writeLock();
3711 try {
3712 checkOperation(OperationCategory.WRITE);
3713 checkNameNodeSafeMode("Cannot fsync file " + src);
3714 src = FSDirectory.resolvePath(src, pathComponents, dir);
3715 INodeFile pendingFile = checkLease(src, clientName);
3716 if (lastBlockLength > 0) {
3717 pendingFile.getFileUnderConstructionFeature().updateLengthOfLastBlock(
3718 pendingFile, lastBlockLength);
3719 }
3720 dir.persistBlocks(src, pendingFile, false);
3721 } finally {
3722 writeUnlock();
3723 }
3724 getEditLog().logSync();
3725 }
3726
3727 /**
3728 * Move a file that is being written to be immutable.
3729 * @param src The filename
3730 * @param lease The lease for the client creating the file
3731 * @param recoveryLeaseHolder reassign lease to this holder if the last block
3732 * needs recovery; keep current holder if null.
3733 * @throws AlreadyBeingCreatedException if file is waiting to achieve minimal
3734 * replication;<br>
3735 * RecoveryInProgressException if lease recovery is in progress.<br>
3736 * IOException in case of an error.
3737 * @return true if file has been successfully finalized and closed or
3738 * false if block recovery has been initiated. Since the lease owner
3739 * has been changed and logged, caller should call logSync().
3740 */
3741 boolean internalReleaseLease(Lease lease, String src,
3742 String recoveryLeaseHolder) throws AlreadyBeingCreatedException,
3743 IOException, UnresolvedLinkException {
3744 LOG.info("Recovering " + lease + ", src=" + src);
3745 assert !isInSafeMode();
3746 assert hasWriteLock();
3747
3748 final INodesInPath iip = dir.getLastINodeInPath(src);
3749 final INodeFile pendingFile = iip.getINode(0).asFile();
3750 int nrBlocks = pendingFile.numBlocks();
3751 BlockInfo[] blocks = pendingFile.getBlocks();
3752
3753 int nrCompleteBlocks;
3754 BlockInfo curBlock = null;
3755 for(nrCompleteBlocks = 0; nrCompleteBlocks < nrBlocks; nrCompleteBlocks++) {
3756 curBlock = blocks[nrCompleteBlocks];
3757 if(!curBlock.isComplete())
3758 break;
3759 assert blockManager.checkMinReplication(curBlock) :
3760 "A COMPLETE block is not minimally replicated in " + src;
3761 }
3762
3763 // If there are no incomplete blocks associated with this file,
3764 // then reap lease immediately and close the file.
3765 if(nrCompleteBlocks == nrBlocks) {
3766 finalizeINodeFileUnderConstruction(src, pendingFile,
3767 iip.getLatestSnapshotId());
3768 NameNode.stateChangeLog.warn("BLOCK*"
3769 + " internalReleaseLease: All existing blocks are COMPLETE,"
3770 + " lease removed, file closed.");
3771 return true; // closed!
3772 }
3773
3774 // Only the last and the penultimate blocks may be in non COMPLETE state.
3775 // If the penultimate block is not COMPLETE, then it must be COMMITTED.
3776 if(nrCompleteBlocks < nrBlocks - 2 ||
3777 nrCompleteBlocks == nrBlocks - 2 &&
3778 curBlock != null &&
3779 curBlock.getBlockUCState() != BlockUCState.COMMITTED) {
3780 final String message = "DIR* NameSystem.internalReleaseLease: "
3781 + "attempt to release a create lock on "
3782 + src + " but file is already closed.";
3783 NameNode.stateChangeLog.warn(message);
3784 throw new IOException(message);
3785 }
3786
3787 // The last block is not COMPLETE, and
3788 // that the penultimate block if exists is either COMPLETE or COMMITTED
3789 final BlockInfo lastBlock = pendingFile.getLastBlock();
3790 BlockUCState lastBlockState = lastBlock.getBlockUCState();
3791 BlockInfo penultimateBlock = pendingFile.getPenultimateBlock();
3792 boolean penultimateBlockMinReplication;
3793 BlockUCState penultimateBlockState;
3794 if (penultimateBlock == null) {
3795 penultimateBlockState = BlockUCState.COMPLETE;
3796 // If penultimate block doesn't exist then its minReplication is met
3797 penultimateBlockMinReplication = true;
3798 } else {
3799 penultimateBlockState = BlockUCState.COMMITTED;
3800 penultimateBlockMinReplication =
3801 blockManager.checkMinReplication(penultimateBlock);
3802 }
3803 assert penultimateBlockState == BlockUCState.COMPLETE ||
3804 penultimateBlockState == BlockUCState.COMMITTED :
3805 "Unexpected state of penultimate block in " + src;
3806
3807 switch(lastBlockState) {
3808 case COMPLETE:
3809 assert false : "Already checked that the last block is incomplete";
3810 break;
3811 case COMMITTED:
3812 // Close file if committed blocks are minimally replicated
3813 if(penultimateBlockMinReplication &&
3814 blockManager.checkMinReplication(lastBlock)) {
3815 finalizeINodeFileUnderConstruction(src, pendingFile,
3816 iip.getLatestSnapshotId());
3817 NameNode.stateChangeLog.warn("BLOCK*"
3818 + " internalReleaseLease: Committed blocks are minimally replicated,"
3819 + " lease removed, file closed.");
3820 return true; // closed!
3821 }
3822 // Cannot close file right now, since some blocks
3823 // are not yet minimally replicated.
3824 // This may potentially cause infinite loop in lease recovery
3825 // if there are no valid replicas on data-nodes.
3826 String message = "DIR* NameSystem.internalReleaseLease: " +
3827 "Failed to release lease for file " + src +
3828 ". Committed blocks are waiting to be minimally replicated." +
3829 " Try again later.";
3830 NameNode.stateChangeLog.warn(message);
3831 throw new AlreadyBeingCreatedException(message);
3832 case UNDER_CONSTRUCTION:
3833 case UNDER_RECOVERY:
3834 final BlockInfoUnderConstruction uc = (BlockInfoUnderConstruction)lastBlock;
3835 // setup the last block locations from the blockManager if not known
3836 if (uc.getNumExpectedLocations() == 0) {
3837 uc.setExpectedLocations(blockManager.getStorages(lastBlock));
3838 }
3839
3840 if (uc.getNumExpectedLocations() == 0 && uc.getNumBytes() == 0) {
3841 // There is no datanode reported to this block.
3842 // may be client have crashed before writing data to pipeline.
3843 // This blocks doesn't need any recovery.
3844 // We can remove this block and close the file.
3845 pendingFile.removeLastBlock(lastBlock);
3846 finalizeINodeFileUnderConstruction(src, pendingFile,
3847 iip.getLatestSnapshotId());
3848 NameNode.stateChangeLog.warn("BLOCK* internalReleaseLease: "
3849 + "Removed empty last block and closed file.");
3850 return true;
3851 }
3852 // start recovery of the last block for this file
3853 long blockRecoveryId = nextGenerationStamp(isLegacyBlock(uc));
3854 lease = reassignLease(lease, src, recoveryLeaseHolder, pendingFile);
3855 uc.initializeBlockRecovery(blockRecoveryId);
3856 leaseManager.renewLease(lease);
3857 // Cannot close file right now, since the last block requires recovery.
3858 // This may potentially cause infinite loop in lease recovery
3859 // if there are no valid replicas on data-nodes.
3860 NameNode.stateChangeLog.warn(
3861 "DIR* NameSystem.internalReleaseLease: " +
3862 "File " + src + " has not been closed." +
3863 " Lease recovery is in progress. " +
3864 "RecoveryId = " + blockRecoveryId + " for block " + lastBlock);
3865 break;
3866 }
3867 return false;
3868 }
3869
3870 private Lease reassignLease(Lease lease, String src, String newHolder,
3871 INodeFile pendingFile) {
3872 assert hasWriteLock();
3873 if(newHolder == null)
3874 return lease;
3875 // The following transaction is not synced. Make sure it's sync'ed later.
3876 logReassignLease(lease.getHolder(), src, newHolder);
3877 return reassignLeaseInternal(lease, src, newHolder, pendingFile);
3878 }
3879
3880 Lease reassignLeaseInternal(Lease lease, String src, String newHolder,
3881 INodeFile pendingFile) {
3882 assert hasWriteLock();
3883 pendingFile.getFileUnderConstructionFeature().setClientName(newHolder);
3884 return leaseManager.reassignLease(lease, src, newHolder);
3885 }
3886
3887 private void commitOrCompleteLastBlock(final INodeFile fileINode,
3888 final Block commitBlock) throws IOException {
3889 assert hasWriteLock();
3890 Preconditions.checkArgument(fileINode.isUnderConstruction());
3891 if (!blockManager.commitOrCompleteLastBlock(fileINode, commitBlock)) {
3892 return;
3893 }
3894
3895 // Adjust disk space consumption if required
3896 final long diff = fileINode.getPreferredBlockSize() - commitBlock.getNumBytes();
3897 if (diff > 0) {
3898 try {
3899 String path = fileINode.getFullPathName();
3900 dir.updateSpaceConsumed(path, 0, -diff*fileINode.getFileReplication());
3901 } catch (IOException e) {
3902 LOG.warn("Unexpected exception while updating disk space.", e);
3903 }
3904 }
3905 }
3906
3907 private void finalizeINodeFileUnderConstruction(String src,
3908 INodeFile pendingFile, int latestSnapshot) throws IOException,
3909 UnresolvedLinkException {
3910 assert hasWriteLock();
3911 FileUnderConstructionFeature uc = pendingFile.getFileUnderConstructionFeature();
3912 Preconditions.checkArgument(uc != null);
3913 leaseManager.removeLease(uc.getClientName(), src);
3914
3915 pendingFile = pendingFile.recordModification(latestSnapshot);
3916
3917 // The file is no longer pending.
3918 // Create permanent INode, update blocks. No need to replace the inode here
3919 // since we just remove the uc feature from pendingFile
3920 final INodeFile newFile = pendingFile.toCompleteFile(now());
3921
3922 // close file and persist block allocations for this file
3923 dir.closeFile(src, newFile);
3924
3925 blockManager.checkReplication(newFile);
3926 }
3927
3928 @VisibleForTesting
3929 BlockInfo getStoredBlock(Block block) {
3930 return blockManager.getStoredBlock(block);
3931 }
3932
3933 @Override
3934 public boolean isInSnapshot(BlockInfoUnderConstruction blockUC) {
3935 assert hasReadLock();
3936 final BlockCollection bc = blockUC.getBlockCollection();
3937 if (bc == null || !(bc instanceof INodeFile)
3938 || !((INodeFile) bc).isUnderConstruction()) {
3939 return false;
3940 }
3941
3942 INodeFile inodeUC = (INodeFile) bc;
3943 String fullName = inodeUC.getName();
3944 try {
3945 if (fullName != null && fullName.startsWith(Path.SEPARATOR)
3946 && dir.getINode(fullName) == inodeUC) {
3947 // If file exists in normal path then no need to look in snapshot
3948 return false;
3949 }
3950 } catch (UnresolvedLinkException e) {
3951 LOG.error("Error while resolving the link : " + fullName, e);
3952 return false;
3953 }
3954 /*
3955 * 1. if bc is an instance of INodeFileUnderConstructionWithSnapshot, and
3956 * bc is not in the current fsdirectory tree, bc must represent a snapshot
3957 * file.
3958 * 2. if fullName is not an absolute path, bc cannot be existent in the
3959 * current fsdirectory tree.
3960 * 3. if bc is not the current node associated with fullName, bc must be a
3961 * snapshot inode.
3962 */
3963 return true;
3964 }
3965
3966 void commitBlockSynchronization(ExtendedBlock lastblock,
3967 long newgenerationstamp, long newlength,
3968 boolean closeFile, boolean deleteblock, DatanodeID[] newtargets,
3969 String[] newtargetstorages)
3970 throws IOException, UnresolvedLinkException {
3971 LOG.info("commitBlockSynchronization(lastblock=" + lastblock
3972 + ", newgenerationstamp=" + newgenerationstamp
3973 + ", newlength=" + newlength
3974 + ", newtargets=" + Arrays.asList(newtargets)
3975 + ", closeFile=" + closeFile
3976 + ", deleteBlock=" + deleteblock
3977 + ")");
3978 checkOperation(OperationCategory.WRITE);
3979 String src = "";
3980 writeLock();
3981 try {
3982 checkOperation(OperationCategory.WRITE);
3983 // If a DN tries to commit to the standby, the recovery will
3984 // fail, and the next retry will succeed on the new NN.
3985
3986 checkNameNodeSafeMode(
3987 "Cannot commitBlockSynchronization while in safe mode");
3988 final BlockInfo storedBlock = getStoredBlock(
3989 ExtendedBlock.getLocalBlock(lastblock));
3990 if (storedBlock == null) {
3991 if (deleteblock) {
3992 // This may be a retry attempt so ignore the failure
3993 // to locate the block.
3994 if (LOG.isDebugEnabled()) {
3995 LOG.debug("Block (=" + lastblock + ") not found");
3996 }
3997 return;
3998 } else {
3999 throw new IOException("Block (=" + lastblock + ") not found");
4000 }
4001 }
4002 INodeFile iFile = ((INode)storedBlock.getBlockCollection()).asFile();
4003 if (!iFile.isUnderConstruction() || storedBlock.isComplete()) {
4004 if (LOG.isDebugEnabled()) {
4005 LOG.debug("Unexpected block (=" + lastblock
4006 + ") since the file (=" + iFile.getLocalName()
4007 + ") is not under construction");
4008 }
4009 return;
4010 }
4011
4012 long recoveryId =
4013 ((BlockInfoUnderConstruction)storedBlock).getBlockRecoveryId();
4014 if(recoveryId != newgenerationstamp) {
4015 throw new IOException("The recovery id " + newgenerationstamp
4016 + " does not match current recovery id "
4017 + recoveryId + " for block " + lastblock);
4018 }
4019
4020 if (deleteblock) {
4021 Block blockToDel = ExtendedBlock.getLocalBlock(lastblock);
4022 boolean remove = iFile.removeLastBlock(blockToDel);
4023 if (remove) {
4024 blockManager.removeBlockFromMap(storedBlock);
4025 }
4026 }
4027 else {
4028 // update last block
4029 storedBlock.setGenerationStamp(newgenerationstamp);
4030 storedBlock.setNumBytes(newlength);
4031
4032 // find the DatanodeDescriptor objects
4033 // There should be no locations in the blockManager till now because the
4034 // file is underConstruction
4035 ArrayList<DatanodeDescriptor> trimmedTargets =
4036 new ArrayList<DatanodeDescriptor>(newtargets.length);
4037 ArrayList<String> trimmedStorages =
4038 new ArrayList<String>(newtargets.length);
4039 if (newtargets.length > 0) {
4040 for (int i = 0; i < newtargets.length; ++i) {
4041 // try to get targetNode
4042 DatanodeDescriptor targetNode =
4043 blockManager.getDatanodeManager().getDatanode(newtargets[i]);
4044 if (targetNode != null) {
4045 trimmedTargets.add(targetNode);
4046 trimmedStorages.add(newtargetstorages[i]);
4047 } else if (LOG.isDebugEnabled()) {
4048 LOG.debug("DatanodeDescriptor (=" + newtargets[i] + ") not found");
4049 }
4050 }
4051 }
4052 if ((closeFile) && !trimmedTargets.isEmpty()) {
4053 // the file is getting closed. Insert block locations into blockManager.
4054 // Otherwise fsck will report these blocks as MISSING, especially if the
4055 // blocksReceived from Datanodes take a long time to arrive.
4056 for (int i = 0; i < trimmedTargets.size(); i++) {
4057 trimmedTargets.get(i).addBlock(
4058 trimmedStorages.get(i), storedBlock);
4059 }
4060 }
4061
4062 // add pipeline locations into the INodeUnderConstruction
4063 DatanodeStorageInfo[] trimmedStorageInfos =
4064 blockManager.getDatanodeManager().getDatanodeStorageInfos(
4065 trimmedTargets.toArray(new DatanodeID[trimmedTargets.size()]),
4066 trimmedStorages.toArray(new String[trimmedStorages.size()]));
4067 iFile.setLastBlock(storedBlock, trimmedStorageInfos);
4068 }
4069
4070 if (closeFile) {
4071 src = closeFileCommitBlocks(iFile, storedBlock);
4072 } else {
4073 // If this commit does not want to close the file, persist blocks
4074 src = persistBlocks(iFile, false);
4075 }
4076 } finally {
4077 writeUnlock();
4078 }
4079 getEditLog().logSync();
4080 if (closeFile) {
4081 LOG.info("commitBlockSynchronization(newblock=" + lastblock
4082 + ", file=" + src
4083 + ", newgenerationstamp=" + newgenerationstamp
4084 + ", newlength=" + newlength
4085 + ", newtargets=" + Arrays.asList(newtargets) + ") successful");
4086 } else {
4087 LOG.info("commitBlockSynchronization(" + lastblock + ") successful");
4088 }
4089 }
4090
4091 /**
4092 *
4093 * @param pendingFile
4094 * @param storedBlock
4095 * @return Path of the file that was closed.
4096 * @throws IOException
4097 */
4098 @VisibleForTesting
4099 String closeFileCommitBlocks(INodeFile pendingFile, BlockInfo storedBlock)
4100 throws IOException {
4101 String src = pendingFile.getFullPathName();
4102
4103 // commit the last block and complete it if it has minimum replicas
4104 commitOrCompleteLastBlock(pendingFile, storedBlock);
4105
4106 //remove lease, close file
4107 finalizeINodeFileUnderConstruction(src, pendingFile,
4108 Snapshot.findLatestSnapshot(pendingFile, Snapshot.CURRENT_STATE_ID));
4109
4110 return src;
4111 }
4112
4113 /**
4114 * Persist the block list for the given file.
4115 *
4116 * @param pendingFile
4117 * @return Path to the given file.
4118 * @throws IOException
4119 */
4120 @VisibleForTesting
4121 String persistBlocks(INodeFile pendingFile, boolean logRetryCache)
4122 throws IOException {
4123 String src = pendingFile.getFullPathName();
4124 dir.persistBlocks(src, pendingFile, logRetryCache);
4125 return src;
4126 }
4127
4128 /**
4129 * Renew the lease(s) held by the given client
4130 */
4131 void renewLease(String holder) throws IOException {
4132 checkOperation(OperationCategory.WRITE);
4133 readLock();
4134 try {
4135 checkOperation(OperationCategory.WRITE);
4136 checkNameNodeSafeMode("Cannot renew lease for " + holder);
4137 leaseManager.renewLease(holder);
4138 } finally {
4139 readUnlock();
4140 }
4141 }
4142
4143 /**
4144 * Get a partial listing of the indicated directory
4145 *
4146 * @param src the directory name
4147 * @param startAfter the name to start after
4148 * @param needLocation if blockLocations need to be returned
4149 * @return a partial listing starting after startAfter
4150 *
4151 * @throws AccessControlException if access is denied
4152 * @throws UnresolvedLinkException if symbolic link is encountered
4153 * @throws IOException if other I/O error occurred
4154 */
4155 DirectoryListing getListing(String src, byte[] startAfter,
4156 boolean needLocation)
4157 throws AccessControlException, UnresolvedLinkException, IOException {
4158 try {
4159 return getListingInt(src, startAfter, needLocation);
4160 } catch (AccessControlException e) {
4161 logAuditEvent(false, "listStatus", src);
4162 throw e;
4163 }
4164 }
4165
4166 private DirectoryListing getListingInt(String src, byte[] startAfter,
4167 boolean needLocation)
4168 throws AccessControlException, UnresolvedLinkException, IOException {
4169 DirectoryListing dl;
4170 FSPermissionChecker pc = getPermissionChecker();
4171 checkOperation(OperationCategory.READ);
4172 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
4173 String startAfterString = new String(startAfter);
4174 readLock();
4175 try {
4176 checkOperation(OperationCategory.READ);
4177 src = FSDirectory.resolvePath(src, pathComponents, dir);
4178
4179 // Get file name when startAfter is an INodePath
4180 if (FSDirectory.isReservedName(startAfterString)) {
4181 byte[][] startAfterComponents = FSDirectory
4182 .getPathComponentsForReservedPath(startAfterString);
4183 try {
4184 String tmp = FSDirectory.resolvePath(src, startAfterComponents, dir);
4185 byte[][] regularPath = INode.getPathComponents(tmp);
4186 startAfter = regularPath[regularPath.length - 1];
4187 } catch (IOException e) {
4188 // Possibly the inode is deleted
4189 throw new DirectoryListingStartAfterNotFoundException(
4190 "Can't find startAfter " + startAfterString);
4191 }
4192 }
4193
4194 if (isPermissionEnabled) {
4195 if (dir.isDir(src)) {
4196 checkPathAccess(pc, src, FsAction.READ_EXECUTE);
4197 } else {
4198 checkTraverse(pc, src);
4199 }
4200 }
4201 logAuditEvent(true, "listStatus", src);
4202 dl = dir.getListing(src, startAfter, needLocation);
4203 } finally {
4204 readUnlock();
4205 }
4206 return dl;
4207 }
4208
4209 /////////////////////////////////////////////////////////
4210 //
4211 // These methods are called by datanodes
4212 //
4213 /////////////////////////////////////////////////////////
4214 /**
4215 * Register Datanode.
4216 * <p>
4217 * The purpose of registration is to identify whether the new datanode
4218 * serves a new data storage, and will report new data block copies,
4219 * which the namenode was not aware of; or the datanode is a replacement
4220 * node for the data storage that was previously served by a different
4221 * or the same (in terms of host:port) datanode.
4222 * The data storages are distinguished by their storageIDs. When a new
4223 * data storage is reported the namenode issues a new unique storageID.
4224 * <p>
4225 * Finally, the namenode returns its namespaceID as the registrationID
4226 * for the datanodes.
4227 * namespaceID is a persistent attribute of the name space.
4228 * The registrationID is checked every time the datanode is communicating
4229 * with the namenode.
4230 * Datanodes with inappropriate registrationID are rejected.
4231 * If the namenode stops, and then restarts it can restore its
4232 * namespaceID and will continue serving the datanodes that has previously
4233 * registered with the namenode without restarting the whole cluster.
4234 *
4235 * @see org.apache.hadoop.hdfs.server.datanode.DataNode
4236 */
4237 void registerDatanode(DatanodeRegistration nodeReg) throws IOException {
4238 writeLock();
4239 try {
4240 getBlockManager().getDatanodeManager().registerDatanode(nodeReg);
4241 checkSafeMode();
4242 } finally {
4243 writeUnlock();
4244 }
4245 }
4246
4247 /**
4248 * Get registrationID for datanodes based on the namespaceID.
4249 *
4250 * @see #registerDatanode(DatanodeRegistration)
4251 * @return registration ID
4252 */
4253 String getRegistrationID() {
4254 return Storage.getRegistrationID(dir.fsImage.getStorage());
4255 }
4256
4257 /**
4258 * The given node has reported in. This method should:
4259 * 1) Record the heartbeat, so the datanode isn't timed out
4260 * 2) Adjust usage stats for future block allocation
4261 *
4262 * If a substantial amount of time passed since the last datanode
4263 * heartbeat then request an immediate block report.
4264 *
4265 * @return an array of datanode commands
4266 * @throws IOException
4267 */
4268 HeartbeatResponse handleHeartbeat(DatanodeRegistration nodeReg,
4269 StorageReport[] reports, long cacheCapacity, long cacheUsed,
4270 int xceiverCount, int xmitsInProgress, int failedVolumes)
4271 throws IOException {
4272 readLock();
4273 try {
4274 //get datanode commands
4275 final int maxTransfer = blockManager.getMaxReplicationStreams()
4276 - xmitsInProgress;
4277 DatanodeCommand[] cmds = blockManager.getDatanodeManager().handleHeartbeat(
4278 nodeReg, reports, blockPoolId, cacheCapacity, cacheUsed,
4279 xceiverCount, maxTransfer, failedVolumes);
4280
4281 //create ha status
4282 final NNHAStatusHeartbeat haState = new NNHAStatusHeartbeat(
4283 haContext.getState().getServiceState(),
4284 getFSImage().getLastAppliedOrWrittenTxId());
4285
4286 return new HeartbeatResponse(cmds, haState, rollingUpgradeInfo);
4287 } finally {
4288 readUnlock();
4289 }
4290 }
4291
4292 /**
4293 * Returns whether or not there were available resources at the last check of
4294 * resources.
4295 *
4296 * @return true if there were sufficient resources available, false otherwise.
4297 */
4298 boolean nameNodeHasResourcesAvailable() {
4299 return hasResourcesAvailable;
4300 }
4301
4302 /**
4303 * Perform resource checks and cache the results.
4304 * @throws IOException
4305 */
4306 void checkAvailableResources() {
4307 Preconditions.checkState(nnResourceChecker != null,
4308 "nnResourceChecker not initialized");
4309 hasResourcesAvailable = nnResourceChecker.hasAvailableDiskSpace();
4310 }
4311
4312 /**
4313 * Periodically calls hasAvailableResources of NameNodeResourceChecker, and if
4314 * there are found to be insufficient resources available, causes the NN to
4315 * enter safe mode. If resources are later found to have returned to
4316 * acceptable levels, this daemon will cause the NN to exit safe mode.
4317 */
4318 class NameNodeResourceMonitor implements Runnable {
4319 boolean shouldNNRmRun = true;
4320 @Override
4321 public void run () {
4322 try {
4323 while (fsRunning && shouldNNRmRun) {
4324 checkAvailableResources();
4325 if(!nameNodeHasResourcesAvailable()) {
4326 String lowResourcesMsg = "NameNode low on available disk space. ";
4327 if (!isInSafeMode()) {
4328 FSNamesystem.LOG.warn(lowResourcesMsg + "Entering safe mode.");
4329 } else {
4330 FSNamesystem.LOG.warn(lowResourcesMsg + "Already in safe mode.");
4331 }
4332 enterSafeMode(true);
4333 }
4334 try {
4335 Thread.sleep(resourceRecheckInterval);
4336 } catch (InterruptedException ie) {
4337 // Deliberately ignore
4338 }
4339 }
4340 } catch (Exception e) {
4341 FSNamesystem.LOG.error("Exception in NameNodeResourceMonitor: ", e);
4342 }
4343 }
4344
4345 public void stopMonitor() {
4346 shouldNNRmRun = false;
4347 }
4348 }
4349
4350 class NameNodeEditLogRoller implements Runnable {
4351
4352 private boolean shouldRun = true;
4353 private final long rollThreshold;
4354 private final long sleepIntervalMs;
4355
4356 public NameNodeEditLogRoller(long rollThreshold, int sleepIntervalMs) {
4357 this.rollThreshold = rollThreshold;
4358 this.sleepIntervalMs = sleepIntervalMs;
4359 }
4360
4361 @Override
4362 public void run() {
4363 while (fsRunning && shouldRun) {
4364 try {
4365 FSEditLog editLog = getFSImage().getEditLog();
4366 long numEdits =
4367 editLog.getLastWrittenTxId() - editLog.getCurSegmentTxId();
4368 if (numEdits > rollThreshold) {
4369 FSNamesystem.LOG.info("NameNode rolling its own edit log because"
4370 + " number of edits in open segment exceeds threshold of "
4371 + rollThreshold);
4372 rollEditLog();
4373 }
4374 Thread.sleep(sleepIntervalMs);
4375 } catch (InterruptedException e) {
4376 FSNamesystem.LOG.info(NameNodeEditLogRoller.class.getSimpleName()
4377 + " was interrupted, exiting");
4378 break;
4379 } catch (Exception e) {
4380 FSNamesystem.LOG.error("Swallowing exception in "
4381 + NameNodeEditLogRoller.class.getSimpleName() + ":", e);
4382 }
4383 }
4384 }
4385
4386 public void stop() {
4387 shouldRun = false;
4388 }
4389 }
4390
4391 public FSImage getFSImage() {
4392 return dir.fsImage;
4393 }
4394
4395 public FSEditLog getEditLog() {
4396 return getFSImage().getEditLog();
4397 }
4398
4399 private void checkBlock(ExtendedBlock block) throws IOException {
4400 if (block != null && !this.blockPoolId.equals(block.getBlockPoolId())) {
4401 throw new IOException("Unexpected BlockPoolId " + block.getBlockPoolId()
4402 + " - expected " + blockPoolId);
4403 }
4404 }
4405
4406 @Metric({"MissingBlocks", "Number of missing blocks"})
4407 public long getMissingBlocksCount() {
4408 // not locking
4409 return blockManager.getMissingBlocksCount();
4410 }
4411
4412 @Metric({"ExpiredHeartbeats", "Number of expired heartbeats"})
4413 public int getExpiredHeartbeats() {
4414 return datanodeStatistics.getExpiredHeartbeats();
4415 }
4416
4417 @Metric({"TransactionsSinceLastCheckpoint",
4418 "Number of transactions since last checkpoint"})
4419 public long getTransactionsSinceLastCheckpoint() {
4420 return getEditLog().getLastWrittenTxId() -
4421 getFSImage().getStorage().getMostRecentCheckpointTxId();
4422 }
4423
4424 @Metric({"TransactionsSinceLastLogRoll",
4425 "Number of transactions since last edit log roll"})
4426 public long getTransactionsSinceLastLogRoll() {
4427 if (isInStandbyState() || !getEditLog().isSegmentOpen()) {
4428 return 0;
4429 } else {
4430 return getEditLog().getLastWrittenTxId() -
4431 getEditLog().getCurSegmentTxId() + 1;
4432 }
4433 }
4434
4435 @Metric({"LastWrittenTransactionId", "Transaction ID written to the edit log"})
4436 public long getLastWrittenTransactionId() {
4437 return getEditLog().getLastWrittenTxId();
4438 }
4439
4440 @Metric({"LastCheckpointTime",
4441 "Time in milliseconds since the epoch of the last checkpoint"})
4442 public long getLastCheckpointTime() {
4443 return getFSImage().getStorage().getMostRecentCheckpointTime();
4444 }
4445
4446 /** @see ClientProtocol#getStats() */
4447 long[] getStats() {
4448 final long[] stats = datanodeStatistics.getStats();
4449 stats[ClientProtocol.GET_STATS_UNDER_REPLICATED_IDX] = getUnderReplicatedBlocks();
4450 stats[ClientProtocol.GET_STATS_CORRUPT_BLOCKS_IDX] = getCorruptReplicaBlocks();
4451 stats[ClientProtocol.GET_STATS_MISSING_BLOCKS_IDX] = getMissingBlocksCount();
4452 return stats;
4453 }
4454
4455 @Override // FSNamesystemMBean
4456 @Metric({"CapacityTotal",
4457 "Total raw capacity of data nodes in bytes"})
4458 public long getCapacityTotal() {
4459 return datanodeStatistics.getCapacityTotal();
4460 }
4461
4462 @Metric({"CapacityTotalGB",
4463 "Total raw capacity of data nodes in GB"})
4464 public float getCapacityTotalGB() {
4465 return DFSUtil.roundBytesToGB(getCapacityTotal());
4466 }
4467
4468 @Override // FSNamesystemMBean
4469 @Metric({"CapacityUsed",
4470 "Total used capacity across all data nodes in bytes"})
4471 public long getCapacityUsed() {
4472 return datanodeStatistics.getCapacityUsed();
4473 }
4474
4475 @Metric({"CapacityUsedGB",
4476 "Total used capacity across all data nodes in GB"})
4477 public float getCapacityUsedGB() {
4478 return DFSUtil.roundBytesToGB(getCapacityUsed());
4479 }
4480
4481 @Override // FSNamesystemMBean
4482 @Metric({"CapacityRemaining", "Remaining capacity in bytes"})
4483 public long getCapacityRemaining() {
4484 return datanodeStatistics.getCapacityRemaining();
4485 }
4486
4487 @Metric({"CapacityRemainingGB", "Remaining capacity in GB"})
4488 public float getCapacityRemainingGB() {
4489 return DFSUtil.roundBytesToGB(getCapacityRemaining());
4490 }
4491
4492 @Metric({"CapacityUsedNonDFS",
4493 "Total space used by data nodes for non DFS purposes in bytes"})
4494 public long getCapacityUsedNonDFS() {
4495 return datanodeStatistics.getCapacityUsedNonDFS();
4496 }
4497
4498 /**
4499 * Total number of connections.
4500 */
4501 @Override // FSNamesystemMBean
4502 @Metric
4503 public int getTotalLoad() {
4504 return datanodeStatistics.getXceiverCount();
4505 }
4506
4507 @Metric({ "SnapshottableDirectories", "Number of snapshottable directories" })
4508 public int getNumSnapshottableDirs() {
4509 return this.snapshotManager.getNumSnapshottableDirs();
4510 }
4511
4512 @Metric({ "Snapshots", "The number of snapshots" })
4513 public int getNumSnapshots() {
4514 return this.snapshotManager.getNumSnapshots();
4515 }
4516
4517 @Override
4518 public String getSnapshotStats() {
4519 Map<String, Object> info = new HashMap<String, Object>();
4520 info.put("SnapshottableDirectories", this.getNumSnapshottableDirs());
4521 info.put("Snapshots", this.getNumSnapshots());
4522 return JSON.toString(info);
4523 }
4524
4525 int getNumberOfDatanodes(DatanodeReportType type) {
4526 readLock();
4527 try {
4528 return getBlockManager().getDatanodeManager().getDatanodeListForReport(
4529 type).size();
4530 } finally {
4531 readUnlock();
4532 }
4533 }
4534
4535 DatanodeInfo[] datanodeReport(final DatanodeReportType type
4536 ) throws AccessControlException, StandbyException {
4537 checkSuperuserPrivilege();
4538 checkOperation(OperationCategory.UNCHECKED);
4539 readLock();
4540 try {
4541 checkOperation(OperationCategory.UNCHECKED);
4542 final DatanodeManager dm = getBlockManager().getDatanodeManager();
4543 final List<DatanodeDescriptor> results = dm.getDatanodeListForReport(type);
4544
4545 DatanodeInfo[] arr = new DatanodeInfo[results.size()];
4546 for (int i=0; i<arr.length; i++) {
4547 arr[i] = new DatanodeInfo(results.get(i));
4548 }
4549 return arr;
4550 } finally {
4551 readUnlock();
4552 }
4553 }
4554
4555 /**
4556 * Save namespace image.
4557 * This will save current namespace into fsimage file and empty edits file.
4558 * Requires superuser privilege and safe mode.
4559 *
4560 * @throws AccessControlException if superuser privilege is violated.
4561 * @throws IOException if
4562 */
4563 void saveNamespace() throws AccessControlException, IOException {
4564 checkOperation(OperationCategory.UNCHECKED);
4565 checkSuperuserPrivilege();
4566
4567 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
4568 if (cacheEntry != null && cacheEntry.isSuccess()) {
4569 return; // Return previous response
4570 }
4571 boolean success = false;
4572 readLock();
4573 try {
4574 checkOperation(OperationCategory.UNCHECKED);
4575
4576 if (!isInSafeMode()) {
4577 throw new IOException("Safe mode should be turned ON "
4578 + "in order to create namespace image.");
4579 }
4580 getFSImage().saveNamespace(this);
4581 success = true;
4582 } finally {
4583 readUnlock();
4584 RetryCache.setState(cacheEntry, success);
4585 }
4586 LOG.info("New namespace image has been created");
4587 }
4588
4589 /**
4590 * Enables/Disables/Checks restoring failed storage replicas if the storage becomes available again.
4591 * Requires superuser privilege.
4592 *
4593 * @throws AccessControlException if superuser privilege is violated.
4594 */
4595 boolean restoreFailedStorage(String arg) throws AccessControlException,
4596 StandbyException {
4597 checkSuperuserPrivilege();
4598 checkOperation(OperationCategory.UNCHECKED);
4599 writeLock();
4600 try {
4601 checkOperation(OperationCategory.UNCHECKED);
4602
4603 // if it is disabled - enable it and vice versa.
4604 if(arg.equals("check"))
4605 return getFSImage().getStorage().getRestoreFailedStorage();
4606
4607 boolean val = arg.equals("true"); // false if not
4608 getFSImage().getStorage().setRestoreFailedStorage(val);
4609
4610 return val;
4611 } finally {
4612 writeUnlock();
4613 }
4614 }
4615
4616 Date getStartTime() {
4617 return new Date(startTime);
4618 }
4619
4620 void finalizeUpgrade() throws IOException {
4621 checkSuperuserPrivilege();
4622 checkOperation(OperationCategory.UNCHECKED);
4623 writeLock();
4624 try {
4625 checkOperation(OperationCategory.UNCHECKED);
4626 getFSImage().finalizeUpgrade(this.isHaEnabled() && inActiveState());
4627 } finally {
4628 writeUnlock();
4629 }
4630 }
4631
4632 void refreshNodes() throws IOException {
4633 checkOperation(OperationCategory.UNCHECKED);
4634 checkSuperuserPrivilege();
4635 getBlockManager().getDatanodeManager().refreshNodes(new HdfsConfiguration());
4636 }
4637
4638 void setBalancerBandwidth(long bandwidth) throws IOException {
4639 checkOperation(OperationCategory.UNCHECKED);
4640 checkSuperuserPrivilege();
4641 getBlockManager().getDatanodeManager().setBalancerBandwidth(bandwidth);
4642 }
4643
4644 /**
4645 * SafeModeInfo contains information related to the safe mode.
4646 * <p>
4647 * An instance of {@link SafeModeInfo} is created when the name node
4648 * enters safe mode.
4649 * <p>
4650 * During name node startup {@link SafeModeInfo} counts the number of
4651 * <em>safe blocks</em>, those that have at least the minimal number of
4652 * replicas, and calculates the ratio of safe blocks to the total number
4653 * of blocks in the system, which is the size of blocks in
4654 * {@link FSNamesystem#blockManager}. When the ratio reaches the
4655 * {@link #threshold} it starts the SafeModeMonitor daemon in order
4656 * to monitor whether the safe mode {@link #extension} is passed.
4657 * Then it leaves safe mode and destroys itself.
4658 * <p>
4659 * If safe mode is turned on manually then the number of safe blocks is
4660 * not tracked because the name node is not intended to leave safe mode
4661 * automatically in the case.
4662 *
4663 * @see ClientProtocol#setSafeMode(HdfsConstants.SafeModeAction, boolean)
4664 */
4665 public class SafeModeInfo {
4666 // configuration fields
4667 /** Safe mode threshold condition %.*/
4668 private final double threshold;
4669 /** Safe mode minimum number of datanodes alive */
4670 private final int datanodeThreshold;
4671 /** Safe mode extension after the threshold. */
4672 private int extension;
4673 /** Min replication required by safe mode. */
4674 private final int safeReplication;
4675 /** threshold for populating needed replication queues */
4676 private final double replQueueThreshold;
4677 // internal fields
4678 /** Time when threshold was reached.
4679 * <br> -1 safe mode is off
4680 * <br> 0 safe mode is on, and threshold is not reached yet
4681 * <br> >0 safe mode is on, but we are in extension period
4682 */
4683 private long reached = -1;
4684 /** Total number of blocks. */
4685 int blockTotal;
4686 /** Number of safe blocks. */
4687 int blockSafe;
4688 /** Number of blocks needed to satisfy safe mode threshold condition */
4689 private int blockThreshold;
4690 /** Number of blocks needed before populating replication queues */
4691 private int blockReplQueueThreshold;
4692 /** time of the last status printout */
4693 private long lastStatusReport = 0;
4694 /** Was safemode entered automatically because available resources were low. */
4695 private boolean resourcesLow = false;
4696 /** Should safemode adjust its block totals as blocks come in */
4697 private boolean shouldIncrementallyTrackBlocks = false;
4698 /** counter for tracking startup progress of reported blocks */
4699 private Counter awaitingReportedBlocksCounter;
4700
4701 /**
4702 * Creates SafeModeInfo when the name node enters
4703 * automatic safe mode at startup.
4704 *
4705 * @param conf configuration
4706 */
4707 private SafeModeInfo(Configuration conf) {
4708 this.threshold = conf.getFloat(DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY,
4709 DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT);
4710 if(threshold > 1.0) {
4711 LOG.warn("The threshold value should't be greater than 1, threshold: " + threshold);
4712 }
4713 this.datanodeThreshold = conf.getInt(
4714 DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY,
4715 DFS_NAMENODE_SAFEMODE_MIN_DATANODES_DEFAULT);
4716 this.extension = conf.getInt(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 0);
4717 this.safeReplication = conf.getInt(DFS_NAMENODE_REPLICATION_MIN_KEY,
4718 DFS_NAMENODE_REPLICATION_MIN_DEFAULT);
4719
4720 LOG.info(DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY + " = " + threshold);
4721 LOG.info(DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY + " = " + datanodeThreshold);
4722 LOG.info(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY + " = " + extension);
4723
4724 // default to safe mode threshold (i.e., don't populate queues before leaving safe mode)
4725 this.replQueueThreshold =
4726 conf.getFloat(DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY,
4727 (float) threshold);
4728 this.blockTotal = 0;
4729 this.blockSafe = 0;
4730 }
4731
4732 /**
4733 * In the HA case, the StandbyNode can be in safemode while the namespace
4734 * is modified by the edit log tailer. In this case, the number of total
4735 * blocks changes as edits are processed (eg blocks are added and deleted).
4736 * However, we don't want to do the incremental tracking during the
4737 * startup-time loading process -- only once the initial total has been
4738 * set after the image has been loaded.
4739 */
4740 private boolean shouldIncrementallyTrackBlocks() {
4741 return shouldIncrementallyTrackBlocks;
4742 }
4743
4744 /**
4745 * Creates SafeModeInfo when safe mode is entered manually, or because
4746 * available resources are low.
4747 *
4748 * The {@link #threshold} is set to 1.5 so that it could never be reached.
4749 * {@link #blockTotal} is set to -1 to indicate that safe mode is manual.
4750 *
4751 * @see SafeModeInfo
4752 */
4753 private SafeModeInfo(boolean resourcesLow) {
4754 this.threshold = 1.5f; // this threshold can never be reached
4755 this.datanodeThreshold = Integer.MAX_VALUE;
4756 this.extension = Integer.MAX_VALUE;
4757 this.safeReplication = Short.MAX_VALUE + 1; // more than maxReplication
4758 this.replQueueThreshold = 1.5f; // can never be reached
4759 this.blockTotal = -1;
4760 this.blockSafe = -1;
4761 this.resourcesLow = resourcesLow;
4762 enter();
4763 reportStatus("STATE* Safe mode is ON.", true);
4764 }
4765
4766 /**
4767 * Check if safe mode is on.
4768 * @return true if in safe mode
4769 */
4770 private synchronized boolean isOn() {
4771 doConsistencyCheck();
4772 return this.reached >= 0;
4773 }
4774
4775 /**
4776 * Enter safe mode.
4777 */
4778 private void enter() {
4779 this.reached = 0;
4780 }
4781
4782 /**
4783 * Leave safe mode.
4784 * <p>
4785 * Check for invalid, under- & over-replicated blocks in the end of startup.
4786 */
4787 private synchronized void leave() {
4788 // if not done yet, initialize replication queues.
4789 // In the standby, do not populate repl queues
4790 if (!isPopulatingReplQueues() && shouldPopulateReplQueues()) {
4791 initializeReplQueues();
4792 }
4793 long timeInSafemode = now() - startTime;
4794 NameNode.stateChangeLog.info("STATE* Leaving safe mode after "
4795 + timeInSafemode/1000 + " secs");
4796 NameNode.getNameNodeMetrics().setSafeModeTime((int) timeInSafemode);
4797
4798 //Log the following only once (when transitioning from ON -> OFF)
4799 if (reached >= 0) {
4800 NameNode.stateChangeLog.info("STATE* Safe mode is OFF");
4801 }
4802 reached = -1;
4803 safeMode = null;
4804 final NetworkTopology nt = blockManager.getDatanodeManager().getNetworkTopology();
4805 NameNode.stateChangeLog.info("STATE* Network topology has "
4806 + nt.getNumOfRacks() + " racks and "
4807 + nt.getNumOfLeaves() + " datanodes");
4808 NameNode.stateChangeLog.info("STATE* UnderReplicatedBlocks has "
4809 + blockManager.numOfUnderReplicatedBlocks() + " blocks");
4810
4811 startSecretManagerIfNecessary();
4812
4813 // If startup has not yet completed, end safemode phase.
4814 StartupProgress prog = NameNode.getStartupProgress();
4815 if (prog.getStatus(Phase.SAFEMODE) != Status.COMPLETE) {
4816 prog.endStep(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS);
4817 prog.endPhase(Phase.SAFEMODE);
4818 }
4819 }
4820
4821 /**
4822 * Check whether we have reached the threshold for
4823 * initializing replication queues.
4824 */
4825 private synchronized boolean canInitializeReplQueues() {
4826 return shouldPopulateReplQueues()
4827 && blockSafe >= blockReplQueueThreshold;
4828 }
4829
4830 /**
4831 * Safe mode can be turned off iff
4832 * the threshold is reached and
4833 * the extension time have passed.
4834 * @return true if can leave or false otherwise.
4835 */
4836 private synchronized boolean canLeave() {
4837 if (reached == 0) {
4838 return false;
4839 }
4840
4841 if (now() - reached < extension) {
4842 reportStatus("STATE* Safe mode ON, in safe mode extension.", false);
4843 return false;
4844 }
4845
4846 if (needEnter()) {
4847 reportStatus("STATE* Safe mode ON, thresholds not met.", false);
4848 return false;
4849 }
4850
4851 return true;
4852 }
4853
4854 /**
4855 * There is no need to enter safe mode
4856 * if DFS is empty or {@link #threshold} == 0
4857 */
4858 private boolean needEnter() {
4859 return (threshold != 0 && blockSafe < blockThreshold) ||
4860 (datanodeThreshold != 0 && getNumLiveDataNodes() < datanodeThreshold) ||
4861 (!nameNodeHasResourcesAvailable());
4862 }
4863
4864 /**
4865 * Check and trigger safe mode if needed.
4866 */
4867 private void checkMode() {
4868 // Have to have write-lock since leaving safemode initializes
4869 // repl queues, which requires write lock
4870 assert hasWriteLock();
4871 // if smmthread is already running, the block threshold must have been
4872 // reached before, there is no need to enter the safe mode again
4873 if (smmthread == null && needEnter()) {
4874 enter();
4875 // check if we are ready to initialize replication queues
4876 if (canInitializeReplQueues() && !isPopulatingReplQueues()
4877 && !haEnabled) {
4878 initializeReplQueues();
4879 }
4880 reportStatus("STATE* Safe mode ON.", false);
4881 return;
4882 }
4883 // the threshold is reached or was reached before
4884 if (!isOn() || // safe mode is off
4885 extension <= 0 || threshold <= 0) { // don't need to wait
4886 this.leave(); // leave safe mode
4887 return;
4888 }
4889 if (reached > 0) { // threshold has already been reached before
4890 reportStatus("STATE* Safe mode ON.", false);
4891 return;
4892 }
4893 // start monitor
4894 reached = now();
4895 if (smmthread == null) {
4896 smmthread = new Daemon(new SafeModeMonitor());
4897 smmthread.start();
4898 reportStatus("STATE* Safe mode extension entered.", true);
4899 }
4900
4901 // check if we are ready to initialize replication queues
4902 if (canInitializeReplQueues() && !isPopulatingReplQueues() && !haEnabled) {
4903 initializeReplQueues();
4904 }
4905 }
4906
4907 /**
4908 * Set total number of blocks.
4909 */
4910 private synchronized void setBlockTotal(int total) {
4911 this.blockTotal = total;
4912 this.blockThreshold = (int) (blockTotal * threshold);
4913 this.blockReplQueueThreshold =
4914 (int) (blockTotal * replQueueThreshold);
4915 if (haEnabled) {
4916 // After we initialize the block count, any further namespace
4917 // modifications done while in safe mode need to keep track
4918 // of the number of total blocks in the system.
4919 this.shouldIncrementallyTrackBlocks = true;
4920 }
4921 if(blockSafe < 0)
4922 this.blockSafe = 0;
4923 checkMode();
4924 }
4925
4926 /**
4927 * Increment number of safe blocks if current block has
4928 * reached minimal replication.
4929 * @param replication current replication
4930 */
4931 private synchronized void incrementSafeBlockCount(short replication) {
4932 if (replication == safeReplication) {
4933 this.blockSafe++;
4934
4935 // Report startup progress only if we haven't completed startup yet.
4936 StartupProgress prog = NameNode.getStartupProgress();
4937 if (prog.getStatus(Phase.SAFEMODE) != Status.COMPLETE) {
4938 if (this.awaitingReportedBlocksCounter == null) {
4939 this.awaitingReportedBlocksCounter = prog.getCounter(Phase.SAFEMODE,
4940 STEP_AWAITING_REPORTED_BLOCKS);
4941 }
4942 this.awaitingReportedBlocksCounter.increment();
4943 }
4944
4945 checkMode();
4946 }
4947 }
4948
4949 /**
4950 * Decrement number of safe blocks if current block has
4951 * fallen below minimal replication.
4952 * @param replication current replication
4953 */
4954 private synchronized void decrementSafeBlockCount(short replication) {
4955 if (replication == safeReplication-1) {
4956 this.blockSafe--;
4957 //blockSafe is set to -1 in manual / low resources safemode
4958 assert blockSafe >= 0 || isManual() || areResourcesLow();
4959 checkMode();
4960 }
4961 }
4962
4963 /**
4964 * Check if safe mode was entered manually
4965 */
4966 private boolean isManual() {
4967 return extension == Integer.MAX_VALUE;
4968 }
4969
4970 /**
4971 * Set manual safe mode.
4972 */
4973 private synchronized void setManual() {
4974 extension = Integer.MAX_VALUE;
4975 }
4976
4977 /**
4978 * Check if safe mode was entered due to resources being low.
4979 */
4980 private boolean areResourcesLow() {
4981 return resourcesLow;
4982 }
4983
4984 /**
4985 * Set that resources are low for this instance of safe mode.
4986 */
4987 private void setResourcesLow() {
4988 resourcesLow = true;
4989 }
4990
4991 /**
4992 * A tip on how safe mode is to be turned off: manually or automatically.
4993 */
4994 String getTurnOffTip() {
4995 if(!isOn()) {
4996 return "Safe mode is OFF.";
4997 }
4998
4999 //Manual OR low-resource safemode. (Admin intervention required)
5000 String adminMsg = "It was turned on manually. ";
5001 if (areResourcesLow()) {
5002 adminMsg = "Resources are low on NN. Please add or free up more "
5003 + "resources then turn off safe mode manually. NOTE: If you turn off"
5004 + " safe mode before adding resources, "
5005 + "the NN will immediately return to safe mode. ";
5006 }
5007 if (isManual() || areResourcesLow()) {
5008 return adminMsg
5009 + "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off.";
5010 }
5011
5012 boolean thresholdsMet = true;
5013 int numLive = getNumLiveDataNodes();
5014 String msg = "";
5015 if (blockSafe < blockThreshold) {
5016 msg += String.format(
5017 "The reported blocks %d needs additional %d"
5018 + " blocks to reach the threshold %.4f of total blocks %d.\n",
5019 blockSafe, (blockThreshold - blockSafe) + 1, threshold, blockTotal);
5020 thresholdsMet = false;
5021 } else {
5022 msg += String.format("The reported blocks %d has reached the threshold"
5023 + " %.4f of total blocks %d. ", blockSafe, threshold, blockTotal);
5024 }
5025 if (numLive < datanodeThreshold) {
5026 msg += String.format(
5027 "The number of live datanodes %d needs an additional %d live "
5028 + "datanodes to reach the minimum number %d.\n",
5029 numLive, (datanodeThreshold - numLive), datanodeThreshold);
5030 thresholdsMet = false;
5031 } else {
5032 msg += String.format("The number of live datanodes %d has reached "
5033 + "the minimum number %d. ",
5034 numLive, datanodeThreshold);
5035 }
5036 msg += (reached > 0) ? "In safe mode extension. " : "";
5037 msg += "Safe mode will be turned off automatically ";
5038
5039 if (!thresholdsMet) {
5040 msg += "once the thresholds have been reached.";
5041 } else if (reached + extension - now() > 0) {
5042 msg += ("in " + (reached + extension - now()) / 1000 + " seconds.");
5043 } else {
5044 msg += "soon.";
5045 }
5046
5047 return msg;
5048 }
5049
5050 /**
5051 * Print status every 20 seconds.
5052 */
5053 private void reportStatus(String msg, boolean rightNow) {
5054 long curTime = now();
5055 if(!rightNow && (curTime - lastStatusReport < 20 * 1000))
5056 return;
5057 NameNode.stateChangeLog.info(msg + " \n" + getTurnOffTip());
5058 lastStatusReport = curTime;
5059 }
5060
5061 @Override
5062 public String toString() {
5063 String resText = "Current safe blocks = "
5064 + blockSafe
5065 + ". Target blocks = " + blockThreshold + " for threshold = %" + threshold
5066 + ". Minimal replication = " + safeReplication + ".";
5067 if (reached > 0)
5068 resText += " Threshold was reached " + new Date(reached) + ".";
5069 return resText;
5070 }
5071
5072 /**
5073 * Checks consistency of the class state.
5074 * This is costly so only runs if asserts are enabled.
5075 */
5076 private void doConsistencyCheck() {
5077 boolean assertsOn = false;
5078 assert assertsOn = true; // set to true if asserts are on
5079 if (!assertsOn) return;
5080
5081 if (blockTotal == -1 && blockSafe == -1) {
5082 return; // manual safe mode
5083 }
5084 int activeBlocks = blockManager.getActiveBlockCount();
5085 if ((blockTotal != activeBlocks) &&
5086 !(blockSafe >= 0 && blockSafe <= blockTotal)) {
5087 throw new AssertionError(
5088 " SafeMode: Inconsistent filesystem state: "
5089 + "SafeMode data: blockTotal=" + blockTotal
5090 + " blockSafe=" + blockSafe + "; "
5091 + "BlockManager data: active=" + activeBlocks);
5092 }
5093 }
5094
5095 private synchronized void adjustBlockTotals(int deltaSafe, int deltaTotal) {
5096 if (!shouldIncrementallyTrackBlocks) {
5097 return;
5098 }
5099 assert haEnabled;
5100
5101 if (LOG.isDebugEnabled()) {
5102 LOG.debug("Adjusting block totals from " +
5103 blockSafe + "/" + blockTotal + " to " +
5104 (blockSafe + deltaSafe) + "/" + (blockTotal + deltaTotal));
5105 }
5106 assert blockSafe + deltaSafe >= 0 : "Can't reduce blockSafe " +
5107 blockSafe + " by " + deltaSafe + ": would be negative";
5108 assert blockTotal + deltaTotal >= 0 : "Can't reduce blockTotal " +
5109 blockTotal + " by " + deltaTotal + ": would be negative";
5110
5111 blockSafe += deltaSafe;
5112 setBlockTotal(blockTotal + deltaTotal);
5113 }
5114 }
5115
5116 /**
5117 * Periodically check whether it is time to leave safe mode.
5118 * This thread starts when the threshold level is reached.
5119 *
5120 */
5121 class SafeModeMonitor implements Runnable {
5122 /** interval in msec for checking safe mode: {@value} */
5123 private static final long recheckInterval = 1000;
5124
5125 /**
5126 */
5127 @Override
5128 public void run() {
5129 while (fsRunning) {
5130 writeLock();
5131 try {
5132 if (safeMode == null) { // Not in safe mode.
5133 break;
5134 }
5135 if (safeMode.canLeave()) {
5136 // Leave safe mode.
5137 safeMode.leave();
5138 smmthread = null;
5139 break;
5140 }
5141 } finally {
5142 writeUnlock();
5143 }
5144
5145 try {
5146 Thread.sleep(recheckInterval);
5147 } catch (InterruptedException ie) {
5148 // Ignored
5149 }
5150 }
5151 if (!fsRunning) {
5152 LOG.info("NameNode is being shutdown, exit SafeModeMonitor thread");
5153 }
5154 }
5155 }
5156
5157 boolean setSafeMode(SafeModeAction action) throws IOException {
5158 if (action != SafeModeAction.SAFEMODE_GET) {
5159 checkSuperuserPrivilege();
5160 switch(action) {
5161 case SAFEMODE_LEAVE: // leave safe mode
5162 leaveSafeMode();
5163 break;
5164 case SAFEMODE_ENTER: // enter safe mode
5165 enterSafeMode(false);
5166 break;
5167 default:
5168 LOG.error("Unexpected safe mode action");
5169 }
5170 }
5171 return isInSafeMode();
5172 }
5173
5174 @Override
5175 public void checkSafeMode() {
5176 // safeMode is volatile, and may be set to null at any time
5177 SafeModeInfo safeMode = this.safeMode;
5178 if (safeMode != null) {
5179 safeMode.checkMode();
5180 }
5181 }
5182
5183 @Override
5184 public boolean isInSafeMode() {
5185 // safeMode is volatile, and may be set to null at any time
5186 SafeModeInfo safeMode = this.safeMode;
5187 if (safeMode == null)
5188 return false;
5189 return safeMode.isOn();
5190 }
5191
5192 @Override
5193 public boolean isInStartupSafeMode() {
5194 // safeMode is volatile, and may be set to null at any time
5195 SafeModeInfo safeMode = this.safeMode;
5196 if (safeMode == null)
5197 return false;
5198 // If the NN is in safemode, and not due to manual / low resources, we
5199 // assume it must be because of startup. If the NN had low resources during
5200 // startup, we assume it came out of startup safemode and it is now in low
5201 // resources safemode
5202 return !safeMode.isManual() && !safeMode.areResourcesLow()
5203 && safeMode.isOn();
5204 }
5205
5206 /**
5207 * Check if replication queues are to be populated
5208 * @return true when node is HAState.Active and not in the very first safemode
5209 */
5210 @Override
5211 public boolean isPopulatingReplQueues() {
5212 if (!shouldPopulateReplQueues()) {
5213 return false;
5214 }
5215 return initializedReplQueues;
5216 }
5217
5218 private boolean shouldPopulateReplQueues() {
5219 if(haContext == null || haContext.getState() == null)
5220 return false;
5221 return haContext.getState().shouldPopulateReplQueues();
5222 }
5223
5224 @Override
5225 public void incrementSafeBlockCount(int replication) {
5226 // safeMode is volatile, and may be set to null at any time
5227 SafeModeInfo safeMode = this.safeMode;
5228 if (safeMode == null)
5229 return;
5230 safeMode.incrementSafeBlockCount((short)replication);
5231 }
5232
5233 @Override
5234 public void decrementSafeBlockCount(Block b) {
5235 // safeMode is volatile, and may be set to null at any time
5236 SafeModeInfo safeMode = this.safeMode;
5237 if (safeMode == null) // mostly true
5238 return;
5239 BlockInfo storedBlock = getStoredBlock(b);
5240 if (storedBlock.isComplete()) {
5241 safeMode.decrementSafeBlockCount((short)blockManager.countNodes(b).liveReplicas());
5242 }
5243 }
5244
5245 /**
5246 * Adjust the total number of blocks safe and expected during safe mode.
5247 * If safe mode is not currently on, this is a no-op.
5248 * @param deltaSafe the change in number of safe blocks
5249 * @param deltaTotal the change i nnumber of total blocks expected
5250 */
5251 @Override
5252 public void adjustSafeModeBlockTotals(int deltaSafe, int deltaTotal) {
5253 // safeMode is volatile, and may be set to null at any time
5254 SafeModeInfo safeMode = this.safeMode;
5255 if (safeMode == null)
5256 return;
5257 safeMode.adjustBlockTotals(deltaSafe, deltaTotal);
5258 }
5259
5260 /**
5261 * Set the total number of blocks in the system.
5262 */
5263 public void setBlockTotal() {
5264 // safeMode is volatile, and may be set to null at any time
5265 SafeModeInfo safeMode = this.safeMode;
5266 if (safeMode == null)
5267 return;
5268 safeMode.setBlockTotal((int)getCompleteBlocksTotal());
5269 }
5270
5271 /**
5272 * Get the total number of blocks in the system.
5273 */
5274 @Override // FSNamesystemMBean
5275 @Metric
5276 public long getBlocksTotal() {
5277 return blockManager.getTotalBlocks();
5278 }
5279
5280 /**
5281 * Get the total number of COMPLETE blocks in the system.
5282 * For safe mode only complete blocks are counted.
5283 */
5284 private long getCompleteBlocksTotal() {
5285 // Calculate number of blocks under construction
5286 long numUCBlocks = 0;
5287 readLock();
5288 try {
5289 for (Lease lease : leaseManager.getSortedLeases()) {
5290 for (String path : lease.getPaths()) {
5291 final INodeFile cons;
5292 try {
5293 cons = dir.getINode(path).asFile();
5294 Preconditions.checkState(cons.isUnderConstruction());
5295 } catch (UnresolvedLinkException e) {
5296 throw new AssertionError("Lease files should reside on this FS");
5297 }
5298 BlockInfo[] blocks = cons.getBlocks();
5299 if(blocks == null)
5300 continue;
5301 for(BlockInfo b : blocks) {
5302 if(!b.isComplete())
5303 numUCBlocks++;
5304 }
5305 }
5306 }
5307 LOG.info("Number of blocks under construction: " + numUCBlocks);
5308 return getBlocksTotal() - numUCBlocks;
5309 } finally {
5310 readUnlock();
5311 }
5312 }
5313
5314 /**
5315 * Enter safe mode. If resourcesLow is false, then we assume it is manual
5316 * @throws IOException
5317 */
5318 void enterSafeMode(boolean resourcesLow) throws IOException {
5319 writeLock();
5320 try {
5321 // Stop the secret manager, since rolling the master key would
5322 // try to write to the edit log
5323 stopSecretManager();
5324
5325 // Ensure that any concurrent operations have been fully synced
5326 // before entering safe mode. This ensures that the FSImage
5327 // is entirely stable on disk as soon as we're in safe mode.
5328 boolean isEditlogOpenForWrite = getEditLog().isOpenForWrite();
5329 // Before Editlog is in OpenForWrite mode, editLogStream will be null. So,
5330 // logSyncAll call can be called only when Edlitlog is in OpenForWrite mode
5331 if (isEditlogOpenForWrite) {
5332 getEditLog().logSyncAll();
5333 }
5334 if (!isInSafeMode()) {
5335 safeMode = new SafeModeInfo(resourcesLow);
5336 return;
5337 }
5338 if (resourcesLow) {
5339 safeMode.setResourcesLow();
5340 } else {
5341 safeMode.setManual();
5342 }
5343 if (isEditlogOpenForWrite) {
5344 getEditLog().logSyncAll();
5345 }
5346 NameNode.stateChangeLog.info("STATE* Safe mode is ON"
5347 + safeMode.getTurnOffTip());
5348 } finally {
5349 writeUnlock();
5350 }
5351 }
5352
5353 /**
5354 * Leave safe mode.
5355 * @throws IOException
5356 */
5357 void leaveSafeMode() {
5358 writeLock();
5359 try {
5360 if (!isInSafeMode()) {
5361 NameNode.stateChangeLog.info("STATE* Safe mode is already OFF");
5362 return;
5363 }
5364 safeMode.leave();
5365 } finally {
5366 writeUnlock();
5367 }
5368 }
5369
5370 String getSafeModeTip() {
5371 readLock();
5372 try {
5373 if (!isInSafeMode()) {
5374 return "";
5375 }
5376 return safeMode.getTurnOffTip();
5377 } finally {
5378 readUnlock();
5379 }
5380 }
5381
5382 CheckpointSignature rollEditLog() throws IOException {
5383 checkSuperuserPrivilege();
5384 checkOperation(OperationCategory.JOURNAL);
5385 writeLock();
5386 try {
5387 checkOperation(OperationCategory.JOURNAL);
5388 checkNameNodeSafeMode("Log not rolled");
5389 if (Server.isRpcInvocation()) {
5390 LOG.info("Roll Edit Log from " + Server.getRemoteAddress());
5391 }
5392 return getFSImage().rollEditLog();
5393 } finally {
5394 writeUnlock();
5395 }
5396 }
5397
5398 NamenodeCommand startCheckpoint(NamenodeRegistration backupNode,
5399 NamenodeRegistration activeNamenode) throws IOException {
5400 checkOperation(OperationCategory.CHECKPOINT);
5401 CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
5402 null);
5403 if (cacheEntry != null && cacheEntry.isSuccess()) {
5404 return (NamenodeCommand) cacheEntry.getPayload();
5405 }
5406 writeLock();
5407 NamenodeCommand cmd = null;
5408 try {
5409 checkOperation(OperationCategory.CHECKPOINT);
5410 checkNameNodeSafeMode("Checkpoint not started");
5411
5412 LOG.info("Start checkpoint for " + backupNode.getAddress());
5413 cmd = getFSImage().startCheckpoint(backupNode, activeNamenode);
5414 getEditLog().logSync();
5415 return cmd;
5416 } finally {
5417 writeUnlock();
5418 RetryCache.setState(cacheEntry, cmd != null, cmd);
5419 }
5420 }
5421
5422 public void processIncrementalBlockReport(final DatanodeID nodeID,
5423 final String poolId, final StorageReceivedDeletedBlocks srdb)
5424 throws IOException {
5425 writeLock();
5426 try {
5427 blockManager.processIncrementalBlockReport(nodeID, srdb);
5428 } finally {
5429 writeUnlock();
5430 }
5431 }
5432
5433 void endCheckpoint(NamenodeRegistration registration,
5434 CheckpointSignature sig) throws IOException {
5435 checkOperation(OperationCategory.CHECKPOINT);
5436 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
5437 if (cacheEntry != null && cacheEntry.isSuccess()) {
5438 return; // Return previous response
5439 }
5440 boolean success = false;
5441 readLock();
5442 try {
5443 checkOperation(OperationCategory.CHECKPOINT);
5444
5445 checkNameNodeSafeMode("Checkpoint not ended");
5446 LOG.info("End checkpoint for " + registration.getAddress());
5447 getFSImage().endCheckpoint(sig);
5448 success = true;
5449 } finally {
5450 readUnlock();
5451 RetryCache.setState(cacheEntry, success);
5452 }
5453 }
5454
5455 PermissionStatus createFsOwnerPermissions(FsPermission permission) {
5456 return new PermissionStatus(fsOwner.getShortUserName(), supergroup, permission);
5457 }
5458
5459 private void checkOwner(FSPermissionChecker pc, String path)
5460 throws AccessControlException, UnresolvedLinkException {
5461 checkPermission(pc, path, true, null, null, null, null);
5462 }
5463
5464 private void checkPathAccess(FSPermissionChecker pc,
5465 String path, FsAction access) throws AccessControlException,
5466 UnresolvedLinkException {
5467 checkPermission(pc, path, false, null, null, access, null);
5468 }
5469
5470 private void checkParentAccess(FSPermissionChecker pc,
5471 String path, FsAction access) throws AccessControlException,
5472 UnresolvedLinkException {
5473 checkPermission(pc, path, false, null, access, null, null);
5474 }
5475
5476 private void checkAncestorAccess(FSPermissionChecker pc,
5477 String path, FsAction access) throws AccessControlException,
5478 UnresolvedLinkException {
5479 checkPermission(pc, path, false, access, null, null, null);
5480 }
5481
5482 private void checkTraverse(FSPermissionChecker pc, String path)
5483 throws AccessControlException, UnresolvedLinkException {
5484 checkPermission(pc, path, false, null, null, null, null);
5485 }
5486
5487 @Override
5488 public void checkSuperuserPrivilege()
5489 throws AccessControlException {
5490 if (isPermissionEnabled) {
5491 FSPermissionChecker pc = getPermissionChecker();
5492 pc.checkSuperuserPrivilege();
5493 }
5494 }
5495
5496 /**
5497 * Check whether current user have permissions to access the path. For more
5498 * details of the parameters, see
5499 * {@link FSPermissionChecker#checkPermission()}.
5500 */
5501 private void checkPermission(FSPermissionChecker pc,
5502 String path, boolean doCheckOwner, FsAction ancestorAccess,
5503 FsAction parentAccess, FsAction access, FsAction subAccess)
5504 throws AccessControlException, UnresolvedLinkException {
5505 checkPermission(pc, path, doCheckOwner, ancestorAccess,
5506 parentAccess, access, subAccess, true);
5507 }
5508
5509 /**
5510 * Check whether current user have permissions to access the path. For more
5511 * details of the parameters, see
5512 * {@link FSPermissionChecker#checkPermission()}.
5513 */
5514 private void checkPermission(FSPermissionChecker pc,
5515 String path, boolean doCheckOwner, FsAction ancestorAccess,
5516 FsAction parentAccess, FsAction access, FsAction subAccess,
5517 boolean resolveLink)
5518 throws AccessControlException, UnresolvedLinkException {
5519 if (!pc.isSuperUser()) {
5520 dir.waitForReady();
5521 readLock();
5522 try {
5523 pc.checkPermission(path, dir.rootDir, doCheckOwner, ancestorAccess,
5524 parentAccess, access, subAccess, resolveLink);
5525 } finally {
5526 readUnlock();
5527 }
5528 }
5529 }
5530
5531 /**
5532 * Check to see if we have exceeded the limit on the number
5533 * of inodes.
5534 */
5535 void checkFsObjectLimit() throws IOException {
5536 if (maxFsObjects != 0 &&
5537 maxFsObjects <= dir.totalInodes() + getBlocksTotal()) {
5538 throw new IOException("Exceeded the configured number of objects " +
5539 maxFsObjects + " in the filesystem.");
5540 }
5541 }
5542
5543 /**
5544 * Get the total number of objects in the system.
5545 */
5546 @Override // FSNamesystemMBean
5547 public long getMaxObjects() {
5548 return maxFsObjects;
5549 }
5550
5551 @Override // FSNamesystemMBean
5552 @Metric
5553 public long getFilesTotal() {
5554 readLock();
5555 try {
5556 return this.dir.totalInodes();
5557 } finally {
5558 readUnlock();
5559 }
5560 }
5561
5562 @Override // FSNamesystemMBean
5563 @Metric
5564 public long getPendingReplicationBlocks() {
5565 return blockManager.getPendingReplicationBlocksCount();
5566 }
5567
5568 @Override // FSNamesystemMBean
5569 @Metric
5570 public long getUnderReplicatedBlocks() {
5571 return blockManager.getUnderReplicatedBlocksCount();
5572 }
5573
5574 /** Returns number of blocks with corrupt replicas */
5575 @Metric({"CorruptBlocks", "Number of blocks with corrupt replicas"})
5576 public long getCorruptReplicaBlocks() {
5577 return blockManager.getCorruptReplicaBlocksCount();
5578 }
5579
5580 @Override // FSNamesystemMBean
5581 @Metric
5582 public long getScheduledReplicationBlocks() {
5583 return blockManager.getScheduledReplicationBlocksCount();
5584 }
5585
5586 @Override
5587 @Metric
5588 public long getPendingDeletionBlocks() {
5589 return blockManager.getPendingDeletionBlocksCount();
5590 }
5591
5592 @Metric
5593 public long getExcessBlocks() {
5594 return blockManager.getExcessBlocksCount();
5595 }
5596
5597 // HA-only metric
5598 @Metric
5599 public long getPostponedMisreplicatedBlocks() {
5600 return blockManager.getPostponedMisreplicatedBlocksCount();
5601 }
5602
5603 // HA-only metric
5604 @Metric
5605 public int getPendingDataNodeMessageCount() {
5606 return blockManager.getPendingDataNodeMessageCount();
5607 }
5608
5609 // HA-only metric
5610 @Metric
5611 public String getHAState() {
5612 return haContext.getState().toString();
5613 }
5614
5615 // HA-only metric
5616 @Metric
5617 public long getMillisSinceLastLoadedEdits() {
5618 if (isInStandbyState() && editLogTailer != null) {
5619 return now() - editLogTailer.getLastLoadTimestamp();
5620 } else {
5621 return 0;
5622 }
5623 }
5624
5625 @Metric
5626 public int getBlockCapacity() {
5627 return blockManager.getCapacity();
5628 }
5629
5630 @Override // FSNamesystemMBean
5631 public String getFSState() {
5632 return isInSafeMode() ? "safeMode" : "Operational";
5633 }
5634
5635 private ObjectName mbeanName;
5636 private ObjectName mxbeanName;
5637
5638 /**
5639 * Register the FSNamesystem MBean using the name
5640 * "hadoop:service=NameNode,name=FSNamesystemState"
5641 */
5642 private void registerMBean() {
5643 // We can only implement one MXBean interface, so we keep the old one.
5644 try {
5645 StandardMBean bean = new StandardMBean(this, FSNamesystemMBean.class);
5646 mbeanName = MBeans.register("NameNode", "FSNamesystemState", bean);
5647 } catch (NotCompliantMBeanException e) {
5648 throw new RuntimeException("Bad MBean setup", e);
5649 }
5650
5651 LOG.info("Registered FSNamesystemState MBean");
5652 }
5653
5654 /**
5655 * shutdown FSNamesystem
5656 */
5657 void shutdown() {
5658 if (mbeanName != null) {
5659 MBeans.unregister(mbeanName);
5660 mbeanName = null;
5661 }
5662 if (mxbeanName != null) {
5663 MBeans.unregister(mxbeanName);
5664 mxbeanName = null;
5665 }
5666 if (dir != null) {
5667 dir.shutdown();
5668 }
5669 if (blockManager != null) {
5670 blockManager.shutdown();
5671 }
5672 }
5673
5674
5675 @Override // FSNamesystemMBean
5676 public int getNumLiveDataNodes() {
5677 return getBlockManager().getDatanodeManager().getNumLiveDataNodes();
5678 }
5679
5680 @Override // FSNamesystemMBean
5681 public int getNumDeadDataNodes() {
5682 return getBlockManager().getDatanodeManager().getNumDeadDataNodes();
5683 }
5684
5685 @Override // FSNamesystemMBean
5686 public int getNumDecomLiveDataNodes() {
5687 final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
5688 getBlockManager().getDatanodeManager().fetchDatanodes(live, null, true);
5689 int liveDecommissioned = 0;
5690 for (DatanodeDescriptor node : live) {
5691 liveDecommissioned += node.isDecommissioned() ? 1 : 0;
5692 }
5693 return liveDecommissioned;
5694 }
5695
5696 @Override // FSNamesystemMBean
5697 public int getNumDecomDeadDataNodes() {
5698 final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>();
5699 getBlockManager().getDatanodeManager().fetchDatanodes(null, dead, true);
5700 int deadDecommissioned = 0;
5701 for (DatanodeDescriptor node : dead) {
5702 deadDecommissioned += node.isDecommissioned() ? 1 : 0;
5703 }
5704 return deadDecommissioned;
5705 }
5706
5707 @Override // FSNamesystemMBean
5708 public int getNumDecommissioningDataNodes() {
5709 return getBlockManager().getDatanodeManager().getDecommissioningNodes()
5710 .size();
5711 }
5712
5713 @Override // FSNamesystemMBean
5714 @Metric({"StaleDataNodes",
5715 "Number of datanodes marked stale due to delayed heartbeat"})
5716 public int getNumStaleDataNodes() {
5717 return getBlockManager().getDatanodeManager().getNumStaleNodes();
5718 }
5719
5720 /**
5721 * Sets the current generation stamp for legacy blocks
5722 */
5723 void setGenerationStampV1(long stamp) {
5724 generationStampV1.setCurrentValue(stamp);
5725 }
5726
5727 /**
5728 * Gets the current generation stamp for legacy blocks
5729 */
5730 long getGenerationStampV1() {
5731 return generationStampV1.getCurrentValue();
5732 }
5733
5734 /**
5735 * Gets the current generation stamp for this filesystem
5736 */
5737 void setGenerationStampV2(long stamp) {
5738 generationStampV2.setCurrentValue(stamp);
5739 }
5740
5741 /**
5742 * Gets the current generation stamp for this filesystem
5743 */
5744 long getGenerationStampV2() {
5745 return generationStampV2.getCurrentValue();
5746 }
5747
5748 /**
5749 * Upgrades the generation stamp for the filesystem
5750 * by reserving a sufficient range for all existing blocks.
5751 * Should be invoked only during the first upgrade to
5752 * sequential block IDs.
5753 */
5754 long upgradeGenerationStampToV2() {
5755 Preconditions.checkState(generationStampV2.getCurrentValue() ==
5756 GenerationStamp.LAST_RESERVED_STAMP);
5757
5758 generationStampV2.skipTo(
5759 generationStampV1.getCurrentValue() +
5760 HdfsConstants.RESERVED_GENERATION_STAMPS_V1);
5761
5762 generationStampV1Limit = generationStampV2.getCurrentValue();
5763 return generationStampV2.getCurrentValue();
5764 }
5765
5766 /**
5767 * Sets the generation stamp that delineates random and sequentially
5768 * allocated block IDs.
5769 * @param stamp
5770 */
5771 void setGenerationStampV1Limit(long stamp) {
5772 Preconditions.checkState(generationStampV1Limit ==
5773 GenerationStamp.GRANDFATHER_GENERATION_STAMP);
5774 generationStampV1Limit = stamp;
5775 }
5776
5777 /**
5778 * Gets the value of the generation stamp that delineates sequential
5779 * and random block IDs.
5780 */
5781 long getGenerationStampAtblockIdSwitch() {
5782 return generationStampV1Limit;
5783 }
5784
5785 @VisibleForTesting
5786 SequentialBlockIdGenerator getBlockIdGenerator() {
5787 return blockIdGenerator;
5788 }
5789
5790 /**
5791 * Sets the maximum allocated block ID for this filesystem. This is
5792 * the basis for allocating new block IDs.
5793 */
5794 void setLastAllocatedBlockId(long blockId) {
5795 blockIdGenerator.skipTo(blockId);
5796 }
5797
5798 /**
5799 * Gets the maximum sequentially allocated block ID for this filesystem
5800 */
5801 long getLastAllocatedBlockId() {
5802 return blockIdGenerator.getCurrentValue();
5803 }
5804
5805 /**
5806 * Increments, logs and then returns the stamp
5807 */
5808 long nextGenerationStamp(boolean legacyBlock)
5809 throws IOException, SafeModeException {
5810 assert hasWriteLock();
5811 checkNameNodeSafeMode("Cannot get next generation stamp");
5812
5813 long gs;
5814 if (legacyBlock) {
5815 gs = getNextGenerationStampV1();
5816 getEditLog().logGenerationStampV1(gs);
5817 } else {
5818 gs = getNextGenerationStampV2();
5819 getEditLog().logGenerationStampV2(gs);
5820 }
5821
5822 // NB: callers sync the log
5823 return gs;
5824 }
5825
5826 @VisibleForTesting
5827 long getNextGenerationStampV1() throws IOException {
5828 long genStampV1 = generationStampV1.nextValue();
5829
5830 if (genStampV1 >= generationStampV1Limit) {
5831 // We ran out of generation stamps for legacy blocks. In practice, it
5832 // is extremely unlikely as we reserved 1T v1 generation stamps. The
5833 // result is that we can no longer append to the legacy blocks that
5834 // were created before the upgrade to sequential block IDs.
5835 throw new OutOfV1GenerationStampsException();
5836 }
5837
5838 return genStampV1;
5839 }
5840
5841 @VisibleForTesting
5842 long getNextGenerationStampV2() {
5843 return generationStampV2.nextValue();
5844 }
5845
5846 long getGenerationStampV1Limit() {
5847 return generationStampV1Limit;
5848 }
5849
5850 /**
5851 * Determine whether the block ID was randomly generated (legacy) or
5852 * sequentially generated. The generation stamp value is used to
5853 * make the distinction.
5854 * @param block
5855 * @return true if the block ID was randomly generated, false otherwise.
5856 */
5857 boolean isLegacyBlock(Block block) {
5858 return block.getGenerationStamp() < getGenerationStampV1Limit();
5859 }
5860
5861 /**
5862 * Increments, logs and then returns the block ID
5863 */
5864 private long nextBlockId() throws IOException {
5865 assert hasWriteLock();
5866 checkNameNodeSafeMode("Cannot get next block ID");
5867 final long blockId = blockIdGenerator.nextValue();
5868 getEditLog().logAllocateBlockId(blockId);
5869 // NB: callers sync the log
5870 return blockId;
5871 }
5872
5873 private INodeFile checkUCBlock(ExtendedBlock block,
5874 String clientName) throws IOException {
5875 assert hasWriteLock();
5876 checkNameNodeSafeMode("Cannot get a new generation stamp and an "
5877 + "access token for block " + block);
5878
5879 // check stored block state
5880 BlockInfo storedBlock = getStoredBlock(ExtendedBlock.getLocalBlock(block));
5881 if (storedBlock == null ||
5882 storedBlock.getBlockUCState() != BlockUCState.UNDER_CONSTRUCTION) {
5883 throw new IOException(block +
5884 " does not exist or is not under Construction" + storedBlock);
5885 }
5886
5887 // check file inode
5888 final INodeFile file = ((INode)storedBlock.getBlockCollection()).asFile();
5889 if (file == null || !file.isUnderConstruction()) {
5890 throw new IOException("The file " + storedBlock +
5891 " belonged to does not exist or it is not under construction.");
5892 }
5893
5894 // check lease
5895 if (clientName == null
5896 || !clientName.equals(file.getFileUnderConstructionFeature()
5897 .getClientName())) {
5898 throw new LeaseExpiredException("Lease mismatch: " + block +
5899 " is accessed by a non lease holder " + clientName);
5900 }
5901
5902 return file;
5903 }
5904
5905 /**
5906 * Client is reporting some bad block locations.
5907 */
5908 void reportBadBlocks(LocatedBlock[] blocks) throws IOException {
5909 checkOperation(OperationCategory.WRITE);
5910 NameNode.stateChangeLog.info("*DIR* reportBadBlocks");
5911 writeLock();
5912 try {
5913 checkOperation(OperationCategory.WRITE);
5914 for (int i = 0; i < blocks.length; i++) {
5915 ExtendedBlock blk = blocks[i].getBlock();
5916 DatanodeInfo[] nodes = blocks[i].getLocations();
5917 String[] storageIDs = blocks[i].getStorageIDs();
5918 for (int j = 0; j < nodes.length; j++) {
5919 blockManager.findAndMarkBlockAsCorrupt(blk, nodes[j],
5920 storageIDs == null ? null: storageIDs[j],
5921 "client machine reported it");
5922 }
5923 }
5924 } finally {
5925 writeUnlock();
5926 }
5927 }
5928
5929 /**
5930 * Get a new generation stamp together with an access token for
5931 * a block under construction
5932 *
5933 * This method is called for recovering a failed pipeline or setting up
5934 * a pipeline to append to a block.
5935 *
5936 * @param block a block
5937 * @param clientName the name of a client
5938 * @return a located block with a new generation stamp and an access token
5939 * @throws IOException if any error occurs
5940 */
5941 LocatedBlock updateBlockForPipeline(ExtendedBlock block,
5942 String clientName) throws IOException {
5943 LocatedBlock locatedBlock;
5944 checkOperation(OperationCategory.WRITE);
5945 writeLock();
5946 try {
5947 checkOperation(OperationCategory.WRITE);
5948
5949 // check vadility of parameters
5950 checkUCBlock(block, clientName);
5951
5952 // get a new generation stamp and an access token
5953 block.setGenerationStamp(
5954 nextGenerationStamp(isLegacyBlock(block.getLocalBlock())));
5955 locatedBlock = new LocatedBlock(block, new DatanodeInfo[0]);
5956 blockManager.setBlockToken(locatedBlock, AccessMode.WRITE);
5957 } finally {
5958 writeUnlock();
5959 }
5960 // Ensure we record the new generation stamp
5961 getEditLog().logSync();
5962 return locatedBlock;
5963 }
5964
5965 /**
5966 * Update a pipeline for a block under construction
5967 *
5968 * @param clientName the name of the client
5969 * @param oldBlock and old block
5970 * @param newBlock a new block with a new generation stamp and length
5971 * @param newNodes datanodes in the pipeline
5972 * @throws IOException if any error occurs
5973 */
5974 void updatePipeline(String clientName, ExtendedBlock oldBlock,
5975 ExtendedBlock newBlock, DatanodeID[] newNodes, String[] newStorageIDs)
5976 throws IOException {
5977 checkOperation(OperationCategory.WRITE);
5978 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
5979 if (cacheEntry != null && cacheEntry.isSuccess()) {
5980 return; // Return previous response
5981 }
5982 LOG.info("updatePipeline(block=" + oldBlock
5983 + ", newGenerationStamp=" + newBlock.getGenerationStamp()
5984 + ", newLength=" + newBlock.getNumBytes()
5985 + ", newNodes=" + Arrays.asList(newNodes)
5986 + ", clientName=" + clientName
5987 + ")");
5988 writeLock();
5989 boolean success = false;
5990 try {
5991 checkOperation(OperationCategory.WRITE);
5992 checkNameNodeSafeMode("Pipeline not updated");
5993 assert newBlock.getBlockId()==oldBlock.getBlockId() : newBlock + " and "
5994 + oldBlock + " has different block identifier";
5995 updatePipelineInternal(clientName, oldBlock, newBlock, newNodes,
5996 newStorageIDs, cacheEntry != null);
5997 success = true;
5998 } finally {
5999 writeUnlock();
6000 RetryCache.setState(cacheEntry, success);
6001 }
6002 getEditLog().logSync();
6003 LOG.info("updatePipeline(" + oldBlock + ") successfully to " + newBlock);
6004 }
6005
6006 /** @see #updatePipeline(String, ExtendedBlock, ExtendedBlock, DatanodeID[]) */
6007 private void updatePipelineInternal(String clientName, ExtendedBlock oldBlock,
6008 ExtendedBlock newBlock, DatanodeID[] newNodes, String[] newStorageIDs,
6009 boolean logRetryCache)
6010 throws IOException {
6011 assert hasWriteLock();
6012 // check the vadility of the block and lease holder name
6013 final INodeFile pendingFile = checkUCBlock(oldBlock, clientName);
6014 final BlockInfoUnderConstruction blockinfo
6015 = (BlockInfoUnderConstruction)pendingFile.getLastBlock();
6016
6017 // check new GS & length: this is not expected
6018 if (newBlock.getGenerationStamp() <= blockinfo.getGenerationStamp() ||
6019 newBlock.getNumBytes() < blockinfo.getNumBytes()) {
6020 String msg = "Update " + oldBlock + " (len = " +
6021 blockinfo.getNumBytes() + ") to an older state: " + newBlock +
6022 " (len = " + newBlock.getNumBytes() +")";
6023 LOG.warn(msg);
6024 throw new IOException(msg);
6025 }
6026
6027 // Update old block with the new generation stamp and new length
6028 blockinfo.setNumBytes(newBlock.getNumBytes());
6029 blockinfo.setGenerationStampAndVerifyReplicas(newBlock.getGenerationStamp());
6030
6031 // find the DatanodeDescriptor objects
6032 final DatanodeStorageInfo[] storages = blockManager.getDatanodeManager()
6033 .getDatanodeStorageInfos(newNodes, newStorageIDs);
6034 blockinfo.setExpectedLocations(storages);
6035
6036 String src = pendingFile.getFullPathName();
6037 dir.persistBlocks(src, pendingFile, logRetryCache);
6038 }
6039
6040 // rename was successful. If any part of the renamed subtree had
6041 // files that were being written to, update with new filename.
6042 void unprotectedChangeLease(String src, String dst) {
6043 assert hasWriteLock();
6044 leaseManager.changeLease(src, dst);
6045 }
6046
6047 /**
6048 * @return all the under-construction files in the lease map
6049 */
6050 Map<String, INodeFile> getFilesUnderConstruction() {
6051 synchronized (leaseManager) {
6052 return leaseManager.getINodesUnderConstruction();
6053 }
6054 }
6055
6056 /**
6057 * Register a Backup name-node, verifying that it belongs
6058 * to the correct namespace, and adding it to the set of
6059 * active journals if necessary.
6060 *
6061 * @param bnReg registration of the new BackupNode
6062 * @param nnReg registration of this NameNode
6063 * @throws IOException if the namespace IDs do not match
6064 */
6065 void registerBackupNode(NamenodeRegistration bnReg,
6066 NamenodeRegistration nnReg) throws IOException {
6067 writeLock();
6068 try {
6069 if(getFSImage().getStorage().getNamespaceID()
6070 != bnReg.getNamespaceID())
6071 throw new IOException("Incompatible namespaceIDs: "
6072 + " Namenode namespaceID = "
6073 + getFSImage().getStorage().getNamespaceID() + "; "
6074 + bnReg.getRole() +
6075 " node namespaceID = " + bnReg.getNamespaceID());
6076 if (bnReg.getRole() == NamenodeRole.BACKUP) {
6077 getFSImage().getEditLog().registerBackupNode(
6078 bnReg, nnReg);
6079 }
6080 } finally {
6081 writeUnlock();
6082 }
6083 }
6084
6085 /**
6086 * Release (unregister) backup node.
6087 * <p>
6088 * Find and remove the backup stream corresponding to the node.
6089 * @param registration
6090 * @throws IOException
6091 */
6092 void releaseBackupNode(NamenodeRegistration registration)
6093 throws IOException {
6094 checkOperation(OperationCategory.WRITE);
6095 writeLock();
6096 try {
6097 checkOperation(OperationCategory.WRITE);
6098 if(getFSImage().getStorage().getNamespaceID()
6099 != registration.getNamespaceID())
6100 throw new IOException("Incompatible namespaceIDs: "
6101 + " Namenode namespaceID = "
6102 + getFSImage().getStorage().getNamespaceID() + "; "
6103 + registration.getRole() +
6104 " node namespaceID = " + registration.getNamespaceID());
6105 getEditLog().releaseBackupStream(registration);
6106 } finally {
6107 writeUnlock();
6108 }
6109 }
6110
6111 static class CorruptFileBlockInfo {
6112 final String path;
6113 final Block block;
6114
6115 public CorruptFileBlockInfo(String p, Block b) {
6116 path = p;
6117 block = b;
6118 }
6119
6120 @Override
6121 public String toString() {
6122 return block.getBlockName() + "\t" + path;
6123 }
6124 }
6125 /**
6126 * @param path Restrict corrupt files to this portion of namespace.
6127 * @param startBlockAfter Support for continuation; the set of files we return
6128 * back is ordered by blockid; startBlockAfter tells where to start from
6129 * @return a list in which each entry describes a corrupt file/block
6130 * @throws AccessControlException
6131 * @throws IOException
6132 */
6133 Collection<CorruptFileBlockInfo> listCorruptFileBlocks(String path,
6134 String[] cookieTab) throws IOException {
6135 checkSuperuserPrivilege();
6136 checkOperation(OperationCategory.READ);
6137 readLock();
6138 try {
6139 checkOperation(OperationCategory.READ);
6140 if (!isPopulatingReplQueues()) {
6141 throw new IOException("Cannot run listCorruptFileBlocks because " +
6142 "replication queues have not been initialized.");
6143 }
6144 // print a limited # of corrupt files per call
6145 int count = 0;
6146 ArrayList<CorruptFileBlockInfo> corruptFiles = new ArrayList<CorruptFileBlockInfo>();
6147
6148 final Iterator<Block> blkIterator = blockManager.getCorruptReplicaBlockIterator();
6149
6150 if (cookieTab == null) {
6151 cookieTab = new String[] { null };
6152 }
6153 int skip = getIntCookie(cookieTab[0]);
6154 for (int i = 0; i < skip && blkIterator.hasNext(); i++) {
6155 blkIterator.next();
6156 }
6157
6158 while (blkIterator.hasNext()) {
6159 Block blk = blkIterator.next();
6160 final INode inode = (INode)blockManager.getBlockCollection(blk);
6161 skip++;
6162 if (inode != null && blockManager.countNodes(blk).liveReplicas() == 0) {
6163 String src = FSDirectory.getFullPathName(inode);
6164 if (src.startsWith(path)){
6165 corruptFiles.add(new CorruptFileBlockInfo(src, blk));
6166 count++;
6167 if (count >= DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED)
6168 break;
6169 }
6170 }
6171 }
6172 cookieTab[0] = String.valueOf(skip);
6173 LOG.info("list corrupt file blocks returned: " + count);
6174 return corruptFiles;
6175 } finally {
6176 readUnlock();
6177 }
6178 }
6179
6180 /**
6181 * Convert string cookie to integer.
6182 */
6183 private static int getIntCookie(String cookie){
6184 int c;
6185 if(cookie == null){
6186 c = 0;
6187 } else {
6188 try{
6189 c = Integer.parseInt(cookie);
6190 }catch (NumberFormatException e) {
6191 c = 0;
6192 }
6193 }
6194 c = Math.max(0, c);
6195 return c;
6196 }
6197
6198 /**
6199 * Create delegation token secret manager
6200 */
6201 private DelegationTokenSecretManager createDelegationTokenSecretManager(
6202 Configuration conf) {
6203 return new DelegationTokenSecretManager(conf.getLong(
6204 DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY,
6205 DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT),
6206 conf.getLong(DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY,
6207 DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT),
6208 conf.getLong(DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY,
6209 DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT),
6210 DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL,
6211 conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY,
6212 DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT),
6213 this);
6214 }
6215
6216 /**
6217 * Returns the DelegationTokenSecretManager instance in the namesystem.
6218 * @return delegation token secret manager object
6219 */
6220 DelegationTokenSecretManager getDelegationTokenSecretManager() {
6221 return dtSecretManager;
6222 }
6223
6224 /**
6225 * @param renewer
6226 * @return Token<DelegationTokenIdentifier>
6227 * @throws IOException
6228 */
6229 Token<DelegationTokenIdentifier> getDelegationToken(Text renewer)
6230 throws IOException {
6231 Token<DelegationTokenIdentifier> token;
6232 checkOperation(OperationCategory.WRITE);
6233 writeLock();
6234 try {
6235 checkOperation(OperationCategory.WRITE);
6236 checkNameNodeSafeMode("Cannot issue delegation token");
6237 if (!isAllowedDelegationTokenOp()) {
6238 throw new IOException(
6239 "Delegation Token can be issued only with kerberos or web authentication");
6240 }
6241 if (dtSecretManager == null || !dtSecretManager.isRunning()) {
6242 LOG.warn("trying to get DT with no secret manager running");
6243 return null;
6244 }
6245
6246 UserGroupInformation ugi = getRemoteUser();
6247 String user = ugi.getUserName();
6248 Text owner = new Text(user);
6249 Text realUser = null;
6250 if (ugi.getRealUser() != null) {
6251 realUser = new Text(ugi.getRealUser().getUserName());
6252 }
6253 DelegationTokenIdentifier dtId = new DelegationTokenIdentifier(owner,
6254 renewer, realUser);
6255 token = new Token<DelegationTokenIdentifier>(
6256 dtId, dtSecretManager);
6257 long expiryTime = dtSecretManager.getTokenExpiryTime(dtId);
6258 getEditLog().logGetDelegationToken(dtId, expiryTime);
6259 } finally {
6260 writeUnlock();
6261 }
6262 getEditLog().logSync();
6263 return token;
6264 }
6265
6266 /**
6267 *
6268 * @param token
6269 * @return New expiryTime of the token
6270 * @throws InvalidToken
6271 * @throws IOException
6272 */
6273 long renewDelegationToken(Token<DelegationTokenIdentifier> token)
6274 throws InvalidToken, IOException {
6275 long expiryTime;
6276 checkOperation(OperationCategory.WRITE);
6277 writeLock();
6278 try {
6279 checkOperation(OperationCategory.WRITE);
6280
6281 checkNameNodeSafeMode("Cannot renew delegation token");
6282 if (!isAllowedDelegationTokenOp()) {
6283 throw new IOException(
6284 "Delegation Token can be renewed only with kerberos or web authentication");
6285 }
6286 String renewer = getRemoteUser().getShortUserName();
6287 expiryTime = dtSecretManager.renewToken(token, renewer);
6288 DelegationTokenIdentifier id = new DelegationTokenIdentifier();
6289 ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier());
6290 DataInputStream in = new DataInputStream(buf);
6291 id.readFields(in);
6292 getEditLog().logRenewDelegationToken(id, expiryTime);
6293 } finally {
6294 writeUnlock();
6295 }
6296 getEditLog().logSync();
6297 return expiryTime;
6298 }
6299
6300 /**
6301 *
6302 * @param token
6303 * @throws IOException
6304 */
6305 void cancelDelegationToken(Token<DelegationTokenIdentifier> token)
6306 throws IOException {
6307 checkOperation(OperationCategory.WRITE);
6308 writeLock();
6309 try {
6310 checkOperation(OperationCategory.WRITE);
6311
6312 checkNameNodeSafeMode("Cannot cancel delegation token");
6313 String canceller = getRemoteUser().getUserName();
6314 DelegationTokenIdentifier id = dtSecretManager
6315 .cancelToken(token, canceller);
6316 getEditLog().logCancelDelegationToken(id);
6317 } finally {
6318 writeUnlock();
6319 }
6320 getEditLog().logSync();
6321 }
6322
6323 SecretManagerState saveSecretManagerState() {
6324 return dtSecretManager.saveSecretManagerState();
6325 }
6326
6327 /**
6328 * @param in load the state of secret manager from input stream
6329 */
6330 void loadSecretManagerStateCompat(DataInput in) throws IOException {
6331 dtSecretManager.loadSecretManagerStateCompat(in);
6332 }
6333
6334 void loadSecretManagerState(SecretManagerSection s,
6335 List<SecretManagerSection.DelegationKey> keys,
6336 List<SecretManagerSection.PersistToken> tokens) throws IOException {
6337 dtSecretManager.loadSecretManagerState(new SecretManagerState(s, keys, tokens));
6338 }
6339
6340 /**
6341 * Log the updateMasterKey operation to edit logs
6342 *
6343 * @param key new delegation key.
6344 */
6345 public void logUpdateMasterKey(DelegationKey key) {
6346
6347 assert !isInSafeMode() :
6348 "this should never be called while in safemode, since we stop " +
6349 "the DT manager before entering safemode!";
6350 // No need to hold FSN lock since we don't access any internal
6351 // structures, and this is stopped before the FSN shuts itself
6352 // down, etc.
6353 getEditLog().logUpdateMasterKey(key);
6354 getEditLog().logSync();
6355 }
6356
6357 /**
6358 * Log the cancellation of expired tokens to edit logs
6359 *
6360 * @param id token identifier to cancel
6361 */
6362 public void logExpireDelegationToken(DelegationTokenIdentifier id) {
6363 assert !isInSafeMode() :
6364 "this should never be called while in safemode, since we stop " +
6365 "the DT manager before entering safemode!";
6366 // No need to hold FSN lock since we don't access any internal
6367 // structures, and this is stopped before the FSN shuts itself
6368 // down, etc.
6369 getEditLog().logCancelDelegationToken(id);
6370 }
6371
6372 private void logReassignLease(String leaseHolder, String src,
6373 String newHolder) {
6374 assert hasWriteLock();
6375 getEditLog().logReassignLease(leaseHolder, src, newHolder);
6376 }
6377
6378 /**
6379 *
6380 * @return true if delegation token operation is allowed
6381 */
6382 private boolean isAllowedDelegationTokenOp() throws IOException {
6383 AuthenticationMethod authMethod = getConnectionAuthenticationMethod();
6384 if (UserGroupInformation.isSecurityEnabled()
6385 && (authMethod != AuthenticationMethod.KERBEROS)
6386 && (authMethod != AuthenticationMethod.KERBEROS_SSL)
6387 && (authMethod != AuthenticationMethod.CERTIFICATE)) {
6388 return false;
6389 }
6390 return true;
6391 }
6392
6393 /**
6394 * Returns authentication method used to establish the connection
6395 * @return AuthenticationMethod used to establish connection
6396 * @throws IOException
6397 */
6398 private AuthenticationMethod getConnectionAuthenticationMethod()
6399 throws IOException {
6400 UserGroupInformation ugi = getRemoteUser();
6401 AuthenticationMethod authMethod = ugi.getAuthenticationMethod();
6402 if (authMethod == AuthenticationMethod.PROXY) {
6403 authMethod = ugi.getRealUser().getAuthenticationMethod();
6404 }
6405 return authMethod;
6406 }
6407
6408 /**
6409 * Client invoked methods are invoked over RPC and will be in
6410 * RPC call context even if the client exits.
6411 */
6412 private boolean isExternalInvocation() {
6413 return Server.isRpcInvocation() || NamenodeWebHdfsMethods.isWebHdfsInvocation();
6414 }
6415
6416 private static InetAddress getRemoteIp() {
6417 InetAddress ip = Server.getRemoteIp();
6418 if (ip != null) {
6419 return ip;
6420 }
6421 return NamenodeWebHdfsMethods.getRemoteIp();
6422 }
6423
6424 // optimize ugi lookup for RPC operations to avoid a trip through
6425 // UGI.getCurrentUser which is synch'ed
6426 private static UserGroupInformation getRemoteUser() throws IOException {
6427 return NameNode.getRemoteUser();
6428 }
6429
6430 /**
6431 * Log fsck event in the audit log
6432 */
6433 void logFsckEvent(String src, InetAddress remoteAddress) throws IOException {
6434 if (isAuditEnabled()) {
6435 logAuditEvent(true, getRemoteUser(),
6436 remoteAddress,
6437 "fsck", src, null, null);
6438 }
6439 }
6440 /**
6441 * Register NameNodeMXBean
6442 */
6443 private void registerMXBean() {
6444 mxbeanName = MBeans.register("NameNode", "NameNodeInfo", this);
6445 }
6446
6447 /**
6448 * Class representing Namenode information for JMX interfaces
6449 */
6450 @Override // NameNodeMXBean
6451 public String getVersion() {
6452 return VersionInfo.getVersion() + ", r" + VersionInfo.getRevision();
6453 }
6454
6455 @Override // NameNodeMXBean
6456 public long getUsed() {
6457 return this.getCapacityUsed();
6458 }
6459
6460 @Override // NameNodeMXBean
6461 public long getFree() {
6462 return this.getCapacityRemaining();
6463 }
6464
6465 @Override // NameNodeMXBean
6466 public long getTotal() {
6467 return this.getCapacityTotal();
6468 }
6469
6470 @Override // NameNodeMXBean
6471 public String getSafemode() {
6472 if (!this.isInSafeMode())
6473 return "";
6474 return "Safe mode is ON. " + this.getSafeModeTip();
6475 }
6476
6477 @Override // NameNodeMXBean
6478 public boolean isUpgradeFinalized() {
6479 return this.getFSImage().isUpgradeFinalized();
6480 }
6481
6482 @Override // NameNodeMXBean
6483 public long getNonDfsUsedSpace() {
6484 return datanodeStatistics.getCapacityUsedNonDFS();
6485 }
6486
6487 @Override // NameNodeMXBean
6488 public float getPercentUsed() {
6489 return datanodeStatistics.getCapacityUsedPercent();
6490 }
6491
6492 @Override // NameNodeMXBean
6493 public long getBlockPoolUsedSpace() {
6494 return datanodeStatistics.getBlockPoolUsed();
6495 }
6496
6497 @Override // NameNodeMXBean
6498 public float getPercentBlockPoolUsed() {
6499 return datanodeStatistics.getPercentBlockPoolUsed();
6500 }
6501
6502 @Override // NameNodeMXBean
6503 public float getPercentRemaining() {
6504 return datanodeStatistics.getCapacityRemainingPercent();
6505 }
6506
6507 @Override // NameNodeMXBean
6508 public long getCacheCapacity() {
6509 return datanodeStatistics.getCacheCapacity();
6510 }
6511
6512 @Override // NameNodeMXBean
6513 public long getCacheUsed() {
6514 return datanodeStatistics.getCacheUsed();
6515 }
6516
6517 @Override // NameNodeMXBean
6518 public long getTotalBlocks() {
6519 return getBlocksTotal();
6520 }
6521
6522 @Override // NameNodeMXBean
6523 @Metric
6524 public long getTotalFiles() {
6525 return getFilesTotal();
6526 }
6527
6528 @Override // NameNodeMXBean
6529 public long getNumberOfMissingBlocks() {
6530 return getMissingBlocksCount();
6531 }
6532
6533 @Override // NameNodeMXBean
6534 public int getThreads() {
6535 return ManagementFactory.getThreadMXBean().getThreadCount();
6536 }
6537
6538 /**
6539 * Returned information is a JSON representation of map with host name as the
6540 * key and value is a map of live node attribute keys to its values
6541 */
6542 @Override // NameNodeMXBean
6543 public String getLiveNodes() {
6544 final Map<String, Map<String,Object>> info =
6545 new HashMap<String, Map<String,Object>>();
6546 final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
6547 blockManager.getDatanodeManager().fetchDatanodes(live, null, true);
6548 for (DatanodeDescriptor node : live) {
6549 Map<String, Object> innerinfo = ImmutableMap.<String, Object>builder()
6550 .put("infoAddr", node.getInfoAddr())
6551 .put("infoSecureAddr", node.getInfoSecureAddr())
6552 .put("xferaddr", node.getXferAddr())
6553 .put("lastContact", getLastContact(node))
6554 .put("usedSpace", getDfsUsed(node))
6555 .put("adminState", node.getAdminState().toString())
6556 .put("nonDfsUsedSpace", node.getNonDfsUsed())
6557 .put("capacity", node.getCapacity())
6558 .put("numBlocks", node.numBlocks())
6559 .put("version", node.getSoftwareVersion())
6560 .put("used", node.getDfsUsed())
6561 .put("remaining", node.getRemaining())
6562 .put("blockScheduled", node.getBlocksScheduled())
6563 .put("blockPoolUsed", node.getBlockPoolUsed())
6564 .put("blockPoolUsedPercent", node.getBlockPoolUsedPercent())
6565 .put("volfails", node.getVolumeFailures())
6566 .build();
6567
6568 info.put(node.getHostName(), innerinfo);
6569 }
6570 return JSON.toString(info);
6571 }
6572
6573 /**
6574 * Returned information is a JSON representation of map with host name as the
6575 * key and value is a map of dead node attribute keys to its values
6576 */
6577 @Override // NameNodeMXBean
6578 public String getDeadNodes() {
6579 final Map<String, Map<String, Object>> info =
6580 new HashMap<String, Map<String, Object>>();
6581 final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>();
6582 blockManager.getDatanodeManager().fetchDatanodes(null, dead, true);
6583 for (DatanodeDescriptor node : dead) {
6584 Map<String, Object> innerinfo = ImmutableMap.<String, Object>builder()
6585 .put("lastContact", getLastContact(node))
6586 .put("decommissioned", node.isDecommissioned())
6587 .put("xferaddr", node.getXferAddr())
6588 .build();
6589 info.put(node.getHostName(), innerinfo);
6590 }
6591 return JSON.toString(info);
6592 }
6593
6594 /**
6595 * Returned information is a JSON representation of map with host name as the
6596 * key and value is a map of decomisioning node attribute keys to its values
6597 */
6598 @Override // NameNodeMXBean
6599 public String getDecomNodes() {
6600 final Map<String, Map<String, Object>> info =
6601 new HashMap<String, Map<String, Object>>();
6602 final List<DatanodeDescriptor> decomNodeList = blockManager.getDatanodeManager(
6603 ).getDecommissioningNodes();
6604 for (DatanodeDescriptor node : decomNodeList) {
6605 Map<String, Object> innerinfo = ImmutableMap
6606 .<String, Object> builder()
6607 .put("xferaddr", node.getXferAddr())
6608 .put("underReplicatedBlocks",
6609 node.decommissioningStatus.getUnderReplicatedBlocks())
6610 .put("decommissionOnlyReplicas",
6611 node.decommissioningStatus.getDecommissionOnlyReplicas())
6612 .put("underReplicateInOpenFiles",
6613 node.decommissioningStatus.getUnderReplicatedInOpenFiles())
6614 .build();
6615 info.put(node.getHostName(), innerinfo);
6616 }
6617 return JSON.toString(info);
6618 }
6619
6620 private long getLastContact(DatanodeDescriptor alivenode) {
6621 return (Time.now() - alivenode.getLastUpdate())/1000;
6622 }
6623
6624 private long getDfsUsed(DatanodeDescriptor alivenode) {
6625 return alivenode.getDfsUsed();
6626 }
6627
6628 @Override // NameNodeMXBean
6629 public String getClusterId() {
6630 return dir.fsImage.getStorage().getClusterID();
6631 }
6632
6633 @Override // NameNodeMXBean
6634 public String getBlockPoolId() {
6635 return blockPoolId;
6636 }
6637
6638 @Override // NameNodeMXBean
6639 public String getNameDirStatuses() {
6640 Map<String, Map<File, StorageDirType>> statusMap =
6641 new HashMap<String, Map<File, StorageDirType>>();
6642
6643 Map<File, StorageDirType> activeDirs = new HashMap<File, StorageDirType>();
6644 for (Iterator<StorageDirectory> it
6645 = getFSImage().getStorage().dirIterator(); it.hasNext();) {
6646 StorageDirectory st = it.next();
6647 activeDirs.put(st.getRoot(), st.getStorageDirType());
6648 }
6649 statusMap.put("active", activeDirs);
6650
6651 List<Storage.StorageDirectory> removedStorageDirs
6652 = getFSImage().getStorage().getRemovedStorageDirs();
6653 Map<File, StorageDirType> failedDirs = new HashMap<File, StorageDirType>();
6654 for (StorageDirectory st : removedStorageDirs) {
6655 failedDirs.put(st.getRoot(), st.getStorageDirType());
6656 }
6657 statusMap.put("failed", failedDirs);
6658
6659 return JSON.toString(statusMap);
6660 }
6661
6662 @Override // NameNodeMXBean
6663 public String getNodeUsage() {
6664 float median = 0;
6665 float max = 0;
6666 float min = 0;
6667 float dev = 0;
6668
6669 final Map<String, Map<String,Object>> info =
6670 new HashMap<String, Map<String,Object>>();
6671 final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
6672 blockManager.getDatanodeManager().fetchDatanodes(live, null, true);
6673
6674 if (live.size() > 0) {
6675 float totalDfsUsed = 0;
6676 float[] usages = new float[live.size()];
6677 int i = 0;
6678 for (DatanodeDescriptor dn : live) {
6679 usages[i++] = dn.getDfsUsedPercent();
6680 totalDfsUsed += dn.getDfsUsedPercent();
6681 }
6682 totalDfsUsed /= live.size();
6683 Arrays.sort(usages);
6684 median = usages[usages.length / 2];
6685 max = usages[usages.length - 1];
6686 min = usages[0];
6687
6688 for (i = 0; i < usages.length; i++) {
6689 dev += (usages[i] - totalDfsUsed) * (usages[i] - totalDfsUsed);
6690 }
6691 dev = (float) Math.sqrt(dev / usages.length);
6692 }
6693
6694 final Map<String, Object> innerInfo = new HashMap<String, Object>();
6695 innerInfo.put("min", StringUtils.format("%.2f%%", min));
6696 innerInfo.put("median", StringUtils.format("%.2f%%", median));
6697 innerInfo.put("max", StringUtils.format("%.2f%%", max));
6698 innerInfo.put("stdDev", StringUtils.format("%.2f%%", dev));
6699 info.put("nodeUsage", innerInfo);
6700
6701 return JSON.toString(info);
6702 }
6703
6704 @Override // NameNodeMXBean
6705 public String getNameJournalStatus() {
6706 List<Map<String, String>> jasList = new ArrayList<Map<String, String>>();
6707 FSEditLog log = getFSImage().getEditLog();
6708 if (log != null) {
6709 boolean openForWrite = log.isOpenForWrite();
6710 for (JournalAndStream jas : log.getJournals()) {
6711 final Map<String, String> jasMap = new HashMap<String, String>();
6712 String manager = jas.getManager().toString();
6713
6714 jasMap.put("required", String.valueOf(jas.isRequired()));
6715 jasMap.put("disabled", String.valueOf(jas.isDisabled()));
6716 jasMap.put("manager", manager);
6717
6718 if (jas.isDisabled()) {
6719 jasMap.put("stream", "Failed");
6720 } else if (openForWrite) {
6721 EditLogOutputStream elos = jas.getCurrentStream();
6722 if (elos != null) {
6723 jasMap.put("stream", elos.generateReport());
6724 } else {
6725 jasMap.put("stream", "not currently writing");
6726 }
6727 } else {
6728 jasMap.put("stream", "open for read");
6729 }
6730 jasList.add(jasMap);
6731 }
6732 }
6733 return JSON.toString(jasList);
6734 }
6735
6736 @Override // NameNodeMxBean
6737 public String getJournalTransactionInfo() {
6738 Map<String, String> txnIdMap = new HashMap<String, String>();
6739 txnIdMap.put("LastAppliedOrWrittenTxId",
6740 Long.toString(this.getFSImage().getLastAppliedOrWrittenTxId()));
6741 txnIdMap.put("MostRecentCheckpointTxId",
6742 Long.toString(this.getFSImage().getMostRecentCheckpointTxId()));
6743 return JSON.toString(txnIdMap);
6744 }
6745
6746 @Override // NameNodeMXBean
6747 public String getNNStarted() {
6748 return getStartTime().toString();
6749 }
6750
6751 @Override // NameNodeMXBean
6752 public String getCompileInfo() {
6753 return VersionInfo.getDate() + " by " + VersionInfo.getUser() +
6754 " from " + VersionInfo.getBranch();
6755 }
6756
6757 /** @return the block manager. */
6758 public BlockManager getBlockManager() {
6759 return blockManager;
6760 }
6761 /** @return the FSDirectory. */
6762 public FSDirectory getFSDirectory() {
6763 return dir;
6764 }
6765 /** @return the cache manager. */
6766 public CacheManager getCacheManager() {
6767 return cacheManager;
6768 }
6769
6770 @Override // NameNodeMXBean
6771 public String getCorruptFiles() {
6772 List<String> list = new ArrayList<String>();
6773 Collection<FSNamesystem.CorruptFileBlockInfo> corruptFileBlocks;
6774 try {
6775 corruptFileBlocks = listCorruptFileBlocks("/", null);
6776 int corruptFileCount = corruptFileBlocks.size();
6777 if (corruptFileCount != 0) {
6778 for (FSNamesystem.CorruptFileBlockInfo c : corruptFileBlocks) {
6779 list.add(c.toString());
6780 }
6781 }
6782 } catch (IOException e) {
6783 LOG.warn("Get corrupt file blocks returned error: " + e.getMessage());
6784 }
6785 return JSON.toString(list);
6786 }
6787
6788 @Override //NameNodeMXBean
6789 public int getDistinctVersionCount() {
6790 return blockManager.getDatanodeManager().getDatanodesSoftwareVersions()
6791 .size();
6792 }
6793
6794 @Override //NameNodeMXBean
6795 public Map<String, Integer> getDistinctVersions() {
6796 return blockManager.getDatanodeManager().getDatanodesSoftwareVersions();
6797 }
6798
6799 @Override //NameNodeMXBean
6800 public String getSoftwareVersion() {
6801 return VersionInfo.getVersion();
6802 }
6803
6804 /**
6805 * Verifies that the given identifier and password are valid and match.
6806 * @param identifier Token identifier.
6807 * @param password Password in the token.
6808 */
6809 public synchronized void verifyToken(DelegationTokenIdentifier identifier,
6810 byte[] password) throws InvalidToken, RetriableException {
6811 try {
6812 getDelegationTokenSecretManager().verifyToken(identifier, password);
6813 } catch (InvalidToken it) {
6814 if (inTransitionToActive()) {
6815 throw new RetriableException(it);
6816 }
6817 throw it;
6818 }
6819 }
6820
6821 @Override
6822 public boolean isGenStampInFuture(Block block) {
6823 if (isLegacyBlock(block)) {
6824 return block.getGenerationStamp() > getGenerationStampV1();
6825 } else {
6826 return block.getGenerationStamp() > getGenerationStampV2();
6827 }
6828 }
6829
6830 @VisibleForTesting
6831 public EditLogTailer getEditLogTailer() {
6832 return editLogTailer;
6833 }
6834
6835 @VisibleForTesting
6836 public void setEditLogTailerForTests(EditLogTailer tailer) {
6837 this.editLogTailer = tailer;
6838 }
6839
6840 @VisibleForTesting
6841 void setFsLockForTests(ReentrantReadWriteLock lock) {
6842 this.fsLock.coarseLock = lock;
6843 }
6844
6845 @VisibleForTesting
6846 public ReentrantReadWriteLock getFsLockForTests() {
6847 return fsLock.coarseLock;
6848 }
6849
6850 @VisibleForTesting
6851 public ReentrantLock getLongReadLockForTests() {
6852 return fsLock.longReadLock;
6853 }
6854
6855 @VisibleForTesting
6856 public SafeModeInfo getSafeModeInfoForTests() {
6857 return safeMode;
6858 }
6859
6860 @VisibleForTesting
6861 public void setNNResourceChecker(NameNodeResourceChecker nnResourceChecker) {
6862 this.nnResourceChecker = nnResourceChecker;
6863 }
6864
6865 @Override
6866 public boolean isAvoidingStaleDataNodesForWrite() {
6867 return this.blockManager.getDatanodeManager()
6868 .shouldAvoidStaleDataNodesForWrite();
6869 }
6870
6871 @Override // FSClusterStats
6872 public int getNumDatanodesInService() {
6873 return getNumLiveDataNodes() - getNumDecomLiveDataNodes();
6874 }
6875
6876 public SnapshotManager getSnapshotManager() {
6877 return snapshotManager;
6878 }
6879
6880 /** Allow snapshot on a directroy. */
6881 void allowSnapshot(String path) throws SafeModeException, IOException {
6882 checkOperation(OperationCategory.WRITE);
6883 writeLock();
6884 try {
6885 checkOperation(OperationCategory.WRITE);
6886 checkNameNodeSafeMode("Cannot allow snapshot for " + path);
6887 checkSuperuserPrivilege();
6888
6889 dir.writeLock();
6890 try {
6891 snapshotManager.setSnapshottable(path, true);
6892 } finally {
6893 dir.writeUnlock();
6894 }
6895 getEditLog().logAllowSnapshot(path);
6896 } finally {
6897 writeUnlock();
6898 }
6899 getEditLog().logSync();
6900
6901 if (auditLog.isInfoEnabled() && isExternalInvocation()) {
6902 logAuditEvent(true, "allowSnapshot", path, null, null);
6903 }
6904 }
6905
6906 /** Disallow snapshot on a directory. */
6907 void disallowSnapshot(String path) throws SafeModeException, IOException {
6908 checkOperation(OperationCategory.WRITE);
6909 writeLock();
6910 try {
6911 checkOperation(OperationCategory.WRITE);
6912 checkNameNodeSafeMode("Cannot disallow snapshot for " + path);
6913 checkSuperuserPrivilege();
6914
6915 dir.writeLock();
6916 try {
6917 snapshotManager.resetSnapshottable(path);
6918 } finally {
6919 dir.writeUnlock();
6920 }
6921 getEditLog().logDisallowSnapshot(path);
6922 } finally {
6923 writeUnlock();
6924 }
6925 getEditLog().logSync();
6926
6927 if (auditLog.isInfoEnabled() && isExternalInvocation()) {
6928 logAuditEvent(true, "disallowSnapshot", path, null, null);
6929 }
6930 }
6931
6932 /**
6933 * Create a snapshot
6934 * @param snapshotRoot The directory path where the snapshot is taken
6935 * @param snapshotName The name of the snapshot
6936 */
6937 String createSnapshot(String snapshotRoot, String snapshotName)
6938 throws SafeModeException, IOException {
6939 checkOperation(OperationCategory.WRITE);
6940 final FSPermissionChecker pc = getPermissionChecker();
6941 CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
6942 null);
6943 if (cacheEntry != null && cacheEntry.isSuccess()) {
6944 return (String) cacheEntry.getPayload();
6945 }
6946 String snapshotPath = null;
6947 writeLock();
6948 try {
6949 checkOperation(OperationCategory.WRITE);
6950 checkNameNodeSafeMode("Cannot create snapshot for " + snapshotRoot);
6951 if (isPermissionEnabled) {
6952 checkOwner(pc, snapshotRoot);
6953 }
6954
6955 if (snapshotName == null || snapshotName.isEmpty()) {
6956 snapshotName = Snapshot.generateDefaultSnapshotName();
6957 }
6958 if(snapshotName != null){
6959 if (!DFSUtil.isValidNameForComponent(snapshotName)) {
6960 throw new InvalidPathException("Invalid snapshot name: "
6961 + snapshotName);
6962 }
6963 }
6964 dir.verifySnapshotName(snapshotName, snapshotRoot);
6965 dir.writeLock();
6966 try {
6967 snapshotPath = snapshotManager.createSnapshot(snapshotRoot, snapshotName);
6968 } finally {
6969 dir.writeUnlock();
6970 }
6971 getEditLog().logCreateSnapshot(snapshotRoot, snapshotName,
6972 cacheEntry != null);
6973 } finally {
6974 writeUnlock();
6975 RetryCache.setState(cacheEntry, snapshotPath != null, snapshotPath);
6976 }
6977 getEditLog().logSync();
6978
6979 if (auditLog.isInfoEnabled() && isExternalInvocation()) {
6980 logAuditEvent(true, "createSnapshot", snapshotRoot, snapshotPath, null);
6981 }
6982 return snapshotPath;
6983 }
6984
6985 /**
6986 * Rename a snapshot
6987 * @param path The directory path where the snapshot was taken
6988 * @param snapshotOldName Old snapshot name
6989 * @param snapshotNewName New snapshot name
6990 * @throws SafeModeException
6991 * @throws IOException
6992 */
6993 void renameSnapshot(String path, String snapshotOldName,
6994 String snapshotNewName) throws SafeModeException, IOException {
6995 checkOperation(OperationCategory.WRITE);
6996 final FSPermissionChecker pc = getPermissionChecker();
6997 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
6998 if (cacheEntry != null && cacheEntry.isSuccess()) {
6999 return; // Return previous response
7000 }
7001 writeLock();
7002 boolean success = false;
7003 try {
7004 checkOperation(OperationCategory.WRITE);
7005 checkNameNodeSafeMode("Cannot rename snapshot for " + path);
7006 if (isPermissionEnabled) {
7007 checkOwner(pc, path);
7008 }
7009 dir.verifySnapshotName(snapshotNewName, path);
7010
7011 snapshotManager.renameSnapshot(path, snapshotOldName, snapshotNewName);
7012 getEditLog().logRenameSnapshot(path, snapshotOldName, snapshotNewName,
7013 cacheEntry != null);
7014 success = true;
7015 } finally {
7016 writeUnlock();
7017 RetryCache.setState(cacheEntry, success);
7018 }
7019 getEditLog().logSync();
7020
7021 if (auditLog.isInfoEnabled() && isExternalInvocation()) {
7022 String oldSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotOldName);
7023 String newSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotNewName);
7024 logAuditEvent(true, "renameSnapshot", oldSnapshotRoot, newSnapshotRoot, null);
7025 }
7026 }
7027
7028 /**
7029 * Get the list of snapshottable directories that are owned
7030 * by the current user. Return all the snapshottable directories if the
7031 * current user is a super user.
7032 * @return The list of all the current snapshottable directories
7033 * @throws IOException
7034 */
7035 public SnapshottableDirectoryStatus[] getSnapshottableDirListing()
7036 throws IOException {
7037 SnapshottableDirectoryStatus[] status = null;
7038 checkOperation(OperationCategory.READ);
7039 final FSPermissionChecker checker = getPermissionChecker();
7040 readLock();
7041 try {
7042 checkOperation(OperationCategory.READ);
7043 final String user = checker.isSuperUser()? null : checker.getUser();
7044 status = snapshotManager.getSnapshottableDirListing(user);
7045 } finally {
7046 readUnlock();
7047 }
7048 if (auditLog.isInfoEnabled() && isExternalInvocation()) {
7049 logAuditEvent(true, "listSnapshottableDirectory", null, null, null);
7050 }
7051 return status;
7052 }
7053
7054 /**
7055 * Get the difference between two snapshots (or between a snapshot and the
7056 * current status) of a snapshottable directory.
7057 *
7058 * @param path The full path of the snapshottable directory.
7059 * @param fromSnapshot Name of the snapshot to calculate the diff from. Null
7060 * or empty string indicates the current tree.
7061 * @param toSnapshot Name of the snapshot to calculated the diff to. Null or
7062 * empty string indicates the current tree.
7063 * @return A report about the difference between {@code fromSnapshot} and
7064 * {@code toSnapshot}. Modified/deleted/created/renamed files and
7065 * directories belonging to the snapshottable directories are listed
7066 * and labeled as M/-/+/R respectively.
7067 * @throws IOException
7068 */
7069 SnapshotDiffReport getSnapshotDiffReport(String path,
7070 String fromSnapshot, String toSnapshot) throws IOException {
7071 SnapshotDiffInfo diffs = null;
7072 checkOperation(OperationCategory.READ);
7073 final FSPermissionChecker pc = getPermissionChecker();
7074 readLock();
7075 try {
7076 checkOperation(OperationCategory.READ);
7077 if (isPermissionEnabled) {
7078 checkSubtreeReadPermission(pc, path, fromSnapshot);
7079 checkSubtreeReadPermission(pc, path, toSnapshot);
7080 }
7081 diffs = snapshotManager.diff(path, fromSnapshot, toSnapshot);
7082 } finally {
7083 readUnlock();
7084 }
7085
7086 if (auditLog.isInfoEnabled() && isExternalInvocation()) {
7087 logAuditEvent(true, "computeSnapshotDiff", null, null, null);
7088 }
7089 return diffs != null ? diffs.generateReport() : new SnapshotDiffReport(
7090 path, fromSnapshot, toSnapshot,
7091 Collections.<DiffReportEntry> emptyList());
7092 }
7093
7094 private void checkSubtreeReadPermission(final FSPermissionChecker pc,
7095 final String snapshottablePath, final String snapshot)
7096 throws AccessControlException, UnresolvedLinkException {
7097 final String fromPath = snapshot == null?
7098 snapshottablePath: Snapshot.getSnapshotPath(snapshottablePath, snapshot);
7099 checkPermission(pc, fromPath, false, null, null, FsAction.READ, FsAction.READ);
7100 }
7101
7102 /**
7103 * Delete a snapshot of a snapshottable directory
7104 * @param snapshotRoot The snapshottable directory
7105 * @param snapshotName The name of the to-be-deleted snapshot
7106 * @throws SafeModeException
7107 * @throws IOException
7108 */
7109 void deleteSnapshot(String snapshotRoot, String snapshotName)
7110 throws SafeModeException, IOException {
7111 checkOperation(OperationCategory.WRITE);
7112 final FSPermissionChecker pc = getPermissionChecker();
7113
7114 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7115 if (cacheEntry != null && cacheEntry.isSuccess()) {
7116 return; // Return previous response
7117 }
7118 boolean success = false;
7119 BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
7120 writeLock();
7121 try {
7122 checkOperation(OperationCategory.WRITE);
7123 checkNameNodeSafeMode("Cannot delete snapshot for " + snapshotRoot);
7124 if (isPermissionEnabled) {
7125 checkOwner(pc, snapshotRoot);
7126 }
7127
7128 List<INode> removedINodes = new ChunkedArrayList<INode>();
7129 dir.writeLock();
7130 try {
7131 snapshotManager.deleteSnapshot(snapshotRoot, snapshotName,
7132 collectedBlocks, removedINodes);
7133 dir.removeFromInodeMap(removedINodes);
7134 } finally {
7135 dir.writeUnlock();
7136 }
7137 removedINodes.clear();
7138 getEditLog().logDeleteSnapshot(snapshotRoot, snapshotName,
7139 cacheEntry != null);
7140 success = true;
7141 } finally {
7142 writeUnlock();
7143 RetryCache.setState(cacheEntry, success);
7144 }
7145 getEditLog().logSync();
7146
7147 removeBlocks(collectedBlocks);
7148 collectedBlocks.clear();
7149
7150 if (auditLog.isInfoEnabled() && isExternalInvocation()) {
7151 String rootPath = Snapshot.getSnapshotPath(snapshotRoot, snapshotName);
7152 logAuditEvent(true, "deleteSnapshot", rootPath, null, null);
7153 }
7154 }
7155
7156 /**
7157 * Remove a list of INodeDirectorySnapshottable from the SnapshotManager
7158 * @param toRemove the list of INodeDirectorySnapshottable to be removed
7159 */
7160 void removeSnapshottableDirs(List<INodeDirectorySnapshottable> toRemove) {
7161 if (snapshotManager != null) {
7162 snapshotManager.removeSnapshottable(toRemove);
7163 }
7164 }
7165
7166 RollingUpgradeInfo queryRollingUpgrade() throws IOException {
7167 checkSuperuserPrivilege();
7168 checkOperation(OperationCategory.READ);
7169 readLock();
7170 try {
7171 if (rollingUpgradeInfo != null) {
7172 boolean hasRollbackImage = this.getFSImage().hasRollbackFSImage();
7173 rollingUpgradeInfo.setCreatedRollbackImages(hasRollbackImage);
7174 }
7175 return rollingUpgradeInfo;
7176 } finally {
7177 readUnlock();
7178 }
7179 }
7180
7181 RollingUpgradeInfo startRollingUpgrade() throws IOException {
7182 checkSuperuserPrivilege();
7183 checkOperation(OperationCategory.WRITE);
7184 writeLock();
7185 try {
7186 checkOperation(OperationCategory.WRITE);
7187 long startTime = now();
7188 if (!haEnabled) { // for non-HA, we require NN to be in safemode
7189 startRollingUpgradeInternalForNonHA(startTime);
7190 } else { // for HA, NN cannot be in safemode
7191 checkNameNodeSafeMode("Failed to start rolling upgrade");
7192 startRollingUpgradeInternal(startTime);
7193 }
7194
7195 getEditLog().logStartRollingUpgrade(rollingUpgradeInfo.getStartTime());
7196 if (haEnabled) {
7197 // roll the edit log to make sure the standby NameNode can tail
7198 getFSImage().rollEditLog();
7199 }
7200 } finally {
7201 writeUnlock();
7202 }
7203
7204 getEditLog().logSync();
7205 if (auditLog.isInfoEnabled() && isExternalInvocation()) {
7206 logAuditEvent(true, "startRollingUpgrade", null, null, null);
7207 }
7208 return rollingUpgradeInfo;
7209 }
7210
7211 /**
7212 * Update internal state to indicate that a rolling upgrade is in progress.
7213 * @param startTime
7214 */
7215 void startRollingUpgradeInternal(long startTime)
7216 throws IOException {
7217 checkRollingUpgrade("start rolling upgrade");
7218 getFSImage().checkUpgrade(this);
7219 setRollingUpgradeInfo(false, startTime);
7220 }
7221
7222 /**
7223 * Update internal state to indicate that a rolling upgrade is in progress for
7224 * non-HA setup. This requires the namesystem is in SafeMode and after doing a
7225 * checkpoint for rollback the namesystem will quit the safemode automatically
7226 */
7227 private void startRollingUpgradeInternalForNonHA(long startTime)
7228 throws IOException {
7229 Preconditions.checkState(!haEnabled);
7230 if (!isInSafeMode()) {
7231 throw new IOException("Safe mode should be turned ON "
7232 + "in order to create namespace image.");
7233 }
7234 checkRollingUpgrade("start rolling upgrade");
7235 getFSImage().checkUpgrade(this);
7236 // in non-HA setup, we do an extra ckpt to generate a rollback image
7237 getFSImage().saveNamespace(this, NameNodeFile.IMAGE_ROLLBACK, null);
7238 LOG.info("Successfully saved namespace for preparing rolling upgrade.");
7239
7240 // leave SafeMode automatically
7241 setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
7242 setRollingUpgradeInfo(true, startTime);
7243 }
7244
7245 void setRollingUpgradeInfo(boolean createdRollbackImages, long startTime) {
7246 rollingUpgradeInfo = new RollingUpgradeInfo(blockPoolId,
7247 createdRollbackImages, startTime, 0L);
7248 }
7249
7250 public void setCreatedRollbackImages(boolean created) {
7251 if (rollingUpgradeInfo != null) {
7252 rollingUpgradeInfo.setCreatedRollbackImages(created);
7253 }
7254 }
7255
7256 public RollingUpgradeInfo getRollingUpgradeInfo() {
7257 return rollingUpgradeInfo;
7258 }
7259
7260 public boolean isNeedRollbackFsImage() {
7261 return needRollbackFsImage;
7262 }
7263
7264 public void setNeedRollbackFsImage(boolean needRollbackFsImage) {
7265 this.needRollbackFsImage = needRollbackFsImage;
7266 }
7267
7268 @Override // NameNodeMXBean
7269 public RollingUpgradeInfo.Bean getRollingUpgradeStatus() {
7270 readLock();
7271 try {
7272 RollingUpgradeInfo upgradeInfo = getRollingUpgradeInfo();
7273 if (upgradeInfo != null) {
7274 return new RollingUpgradeInfo.Bean(upgradeInfo);
7275 }
7276 return null;
7277 } finally {
7278 readUnlock();
7279 }
7280 }
7281
7282 /** Is rolling upgrade in progress? */
7283 public boolean isRollingUpgrade() {
7284 return rollingUpgradeInfo != null;
7285 }
7286
7287 void checkRollingUpgrade(String action) throws RollingUpgradeException {
7288 if (isRollingUpgrade()) {
7289 throw new RollingUpgradeException("Failed to " + action
7290 + " since a rolling upgrade is already in progress."
7291 + " Existing rolling upgrade info:\n" + rollingUpgradeInfo);
7292 }
7293 }
7294
7295 RollingUpgradeInfo finalizeRollingUpgrade() throws IOException {
7296 checkSuperuserPrivilege();
7297 checkOperation(OperationCategory.WRITE);
7298 writeLock();
7299 final RollingUpgradeInfo returnInfo;
7300 try {
7301 checkOperation(OperationCategory.WRITE);
7302 checkNameNodeSafeMode("Failed to finalize rolling upgrade");
7303
7304 returnInfo = finalizeRollingUpgradeInternal(now());
7305 getEditLog().logFinalizeRollingUpgrade(returnInfo.getFinalizeTime());
7306 getFSImage().saveNamespace(this);
7307 getFSImage().renameCheckpoint(NameNodeFile.IMAGE_ROLLBACK,
7308 NameNodeFile.IMAGE);
7309 } finally {
7310 writeUnlock();
7311 }
7312
7313 // getEditLog().logSync() is not needed since it does saveNamespace
7314
7315 if (auditLog.isInfoEnabled() && isExternalInvocation()) {
7316 logAuditEvent(true, "finalizeRollingUpgrade", null, null, null);
7317 }
7318 return returnInfo;
7319 }
7320
7321 RollingUpgradeInfo finalizeRollingUpgradeInternal(long finalizeTime)
7322 throws RollingUpgradeException {
7323 if (!isRollingUpgrade()) {
7324 throw new RollingUpgradeException(
7325 "Failed to finalize rolling upgrade since there is no rolling upgrade in progress.");
7326 }
7327
7328 final long startTime = rollingUpgradeInfo.getStartTime();
7329 rollingUpgradeInfo = null;
7330 return new RollingUpgradeInfo(blockPoolId, false, startTime, finalizeTime);
7331 }
7332
7333 long addCacheDirective(CacheDirectiveInfo directive, EnumSet<CacheFlag> flags)
7334 throws IOException {
7335 checkOperation(OperationCategory.WRITE);
7336 final FSPermissionChecker pc = isPermissionEnabled ?
7337 getPermissionChecker() : null;
7338 CacheEntryWithPayload cacheEntry =
7339 RetryCache.waitForCompletion(retryCache, null);
7340 if (cacheEntry != null && cacheEntry.isSuccess()) {
7341 return (Long) cacheEntry.getPayload();
7342 }
7343 boolean success = false;
7344 if (!flags.contains(CacheFlag.FORCE)) {
7345 cacheManager.waitForRescanIfNeeded();
7346 }
7347 writeLock();
7348 Long result = null;
7349 try {
7350 checkOperation(OperationCategory.WRITE);
7351 if (isInSafeMode()) {
7352 throw new SafeModeException(
7353 "Cannot add cache directive", safeMode);
7354 }
7355 if (directive.getId() != null) {
7356 throw new IOException("addDirective: you cannot specify an ID " +
7357 "for this operation.");
7358 }
7359 CacheDirectiveInfo effectiveDirective =
7360 cacheManager.addDirective(directive, pc, flags);
7361 getEditLog().logAddCacheDirectiveInfo(effectiveDirective,
7362 cacheEntry != null);
7363 result = effectiveDirective.getId();
7364 success = true;
7365 } finally {
7366 writeUnlock();
7367 if (success) {
7368 getEditLog().logSync();
7369 }
7370 if (isAuditEnabled() && isExternalInvocation()) {
7371 logAuditEvent(success, "addCacheDirective", null, null, null);
7372 }
7373 RetryCache.setState(cacheEntry, success, result);
7374 }
7375 return result;
7376 }
7377
7378 void modifyCacheDirective(CacheDirectiveInfo directive,
7379 EnumSet<CacheFlag> flags) throws IOException {
7380 checkOperation(OperationCategory.WRITE);
7381 final FSPermissionChecker pc = isPermissionEnabled ?
7382 getPermissionChecker() : null;
7383 boolean success = false;
7384 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7385 if (cacheEntry != null && cacheEntry.isSuccess()) {
7386 return;
7387 }
7388 if (!flags.contains(CacheFlag.FORCE)) {
7389 cacheManager.waitForRescanIfNeeded();
7390 }
7391 writeLock();
7392 try {
7393 checkOperation(OperationCategory.WRITE);
7394 if (isInSafeMode()) {
7395 throw new SafeModeException(
7396 "Cannot add cache directive", safeMode);
7397 }
7398 cacheManager.modifyDirective(directive, pc, flags);
7399 getEditLog().logModifyCacheDirectiveInfo(directive,
7400 cacheEntry != null);
7401 success = true;
7402 } finally {
7403 writeUnlock();
7404 if (success) {
7405 getEditLog().logSync();
7406 }
7407 if (isAuditEnabled() && isExternalInvocation()) {
7408 logAuditEvent(success, "modifyCacheDirective", null, null, null);
7409 }
7410 RetryCache.setState(cacheEntry, success);
7411 }
7412 }
7413
7414 void removeCacheDirective(Long id) throws IOException {
7415 checkOperation(OperationCategory.WRITE);
7416 final FSPermissionChecker pc = isPermissionEnabled ?
7417 getPermissionChecker() : null;
7418 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7419 if (cacheEntry != null && cacheEntry.isSuccess()) {
7420 return;
7421 }
7422 boolean success = false;
7423 writeLock();
7424 try {
7425 checkOperation(OperationCategory.WRITE);
7426 if (isInSafeMode()) {
7427 throw new SafeModeException(
7428 "Cannot remove cache directives", safeMode);
7429 }
7430 cacheManager.removeDirective(id, pc);
7431 getEditLog().logRemoveCacheDirectiveInfo(id, cacheEntry != null);
7432 success = true;
7433 } finally {
7434 writeUnlock();
7435 if (isAuditEnabled() && isExternalInvocation()) {
7436 logAuditEvent(success, "removeCacheDirective", null, null,
7437 null);
7438 }
7439 RetryCache.setState(cacheEntry, success);
7440 }
7441 getEditLog().logSync();
7442 }
7443
7444 BatchedListEntries<CacheDirectiveEntry> listCacheDirectives(
7445 long startId, CacheDirectiveInfo filter) throws IOException {
7446 checkOperation(OperationCategory.READ);
7447 final FSPermissionChecker pc = isPermissionEnabled ?
7448 getPermissionChecker() : null;
7449 BatchedListEntries<CacheDirectiveEntry> results;
7450 cacheManager.waitForRescanIfNeeded();
7451 readLock();
7452 boolean success = false;
7453 try {
7454 checkOperation(OperationCategory.READ);
7455 results =
7456 cacheManager.listCacheDirectives(startId, filter, pc);
7457 success = true;
7458 } finally {
7459 readUnlock();
7460 if (isAuditEnabled() && isExternalInvocation()) {
7461 logAuditEvent(success, "listCacheDirectives", null, null,
7462 null);
7463 }
7464 }
7465 return results;
7466 }
7467
7468 public void addCachePool(CachePoolInfo req) throws IOException {
7469 checkOperation(OperationCategory.WRITE);
7470 final FSPermissionChecker pc = isPermissionEnabled ?
7471 getPermissionChecker() : null;
7472 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7473 if (cacheEntry != null && cacheEntry.isSuccess()) {
7474 return; // Return previous response
7475 }
7476 writeLock();
7477 boolean success = false;
7478 try {
7479 checkOperation(OperationCategory.WRITE);
7480 if (isInSafeMode()) {
7481 throw new SafeModeException(
7482 "Cannot add cache pool " + req.getPoolName(), safeMode);
7483 }
7484 if (pc != null) {
7485 pc.checkSuperuserPrivilege();
7486 }
7487 CachePoolInfo info = cacheManager.addCachePool(req);
7488 getEditLog().logAddCachePool(info, cacheEntry != null);
7489 success = true;
7490 } finally {
7491 writeUnlock();
7492 if (isAuditEnabled() && isExternalInvocation()) {
7493 logAuditEvent(success, "addCachePool", req.getPoolName(), null, null);
7494 }
7495 RetryCache.setState(cacheEntry, success);
7496 }
7497
7498 getEditLog().logSync();
7499 }
7500
7501 public void modifyCachePool(CachePoolInfo req) throws IOException {
7502 checkOperation(OperationCategory.WRITE);
7503 final FSPermissionChecker pc =
7504 isPermissionEnabled ? getPermissionChecker() : null;
7505 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7506 if (cacheEntry != null && cacheEntry.isSuccess()) {
7507 return; // Return previous response
7508 }
7509 writeLock();
7510 boolean success = false;
7511 try {
7512 checkOperation(OperationCategory.WRITE);
7513 if (isInSafeMode()) {
7514 throw new SafeModeException(
7515 "Cannot modify cache pool " + req.getPoolName(), safeMode);
7516 }
7517 if (pc != null) {
7518 pc.checkSuperuserPrivilege();
7519 }
7520 cacheManager.modifyCachePool(req);
7521 getEditLog().logModifyCachePool(req, cacheEntry != null);
7522 success = true;
7523 } finally {
7524 writeUnlock();
7525 if (isAuditEnabled() && isExternalInvocation()) {
7526 logAuditEvent(success, "modifyCachePool", req.getPoolName(), null, null);
7527 }
7528 RetryCache.setState(cacheEntry, success);
7529 }
7530
7531 getEditLog().logSync();
7532 }
7533
7534 public void removeCachePool(String cachePoolName) throws IOException {
7535 checkOperation(OperationCategory.WRITE);
7536 final FSPermissionChecker pc =
7537 isPermissionEnabled ? getPermissionChecker() : null;
7538 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7539 if (cacheEntry != null && cacheEntry.isSuccess()) {
7540 return; // Return previous response
7541 }
7542 writeLock();
7543 boolean success = false;
7544 try {
7545 checkOperation(OperationCategory.WRITE);
7546 if (isInSafeMode()) {
7547 throw new SafeModeException(
7548 "Cannot remove cache pool " + cachePoolName, safeMode);
7549 }
7550 if (pc != null) {
7551 pc.checkSuperuserPrivilege();
7552 }
7553 cacheManager.removeCachePool(cachePoolName);
7554 getEditLog().logRemoveCachePool(cachePoolName, cacheEntry != null);
7555 success = true;
7556 } finally {
7557 writeUnlock();
7558 if (isAuditEnabled() && isExternalInvocation()) {
7559 logAuditEvent(success, "removeCachePool", cachePoolName, null, null);
7560 }
7561 RetryCache.setState(cacheEntry, success);
7562 }
7563
7564 getEditLog().logSync();
7565 }
7566
7567 public BatchedListEntries<CachePoolEntry> listCachePools(String prevKey)
7568 throws IOException {
7569 final FSPermissionChecker pc =
7570 isPermissionEnabled ? getPermissionChecker() : null;
7571 BatchedListEntries<CachePoolEntry> results;
7572 checkOperation(OperationCategory.READ);
7573 boolean success = false;
7574 cacheManager.waitForRescanIfNeeded();
7575 readLock();
7576 try {
7577 checkOperation(OperationCategory.READ);
7578 results = cacheManager.listCachePools(pc, prevKey);
7579 success = true;
7580 } finally {
7581 readUnlock();
7582 if (isAuditEnabled() && isExternalInvocation()) {
7583 logAuditEvent(success, "listCachePools", null, null, null);
7584 }
7585 }
7586 return results;
7587 }
7588
7589 void modifyAclEntries(String src, List<AclEntry> aclSpec) throws IOException {
7590 aclConfigFlag.checkForApiCall();
7591 HdfsFileStatus resultingStat = null;
7592 FSPermissionChecker pc = getPermissionChecker();
7593 checkOperation(OperationCategory.WRITE);
7594 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
7595 writeLock();
7596 try {
7597 checkOperation(OperationCategory.WRITE);
7598 checkNameNodeSafeMode("Cannot modify ACL entries on " + src);
7599 src = FSDirectory.resolvePath(src, pathComponents, dir);
7600 checkOwner(pc, src);
7601 dir.modifyAclEntries(src, aclSpec);
7602 resultingStat = getAuditFileInfo(src, false);
7603 } finally {
7604 writeUnlock();
7605 }
7606 getEditLog().logSync();
7607 logAuditEvent(true, "modifyAclEntries", src, null, resultingStat);
7608 }
7609
7610 void removeAclEntries(String src, List<AclEntry> aclSpec) throws IOException {
7611 aclConfigFlag.checkForApiCall();
7612 HdfsFileStatus resultingStat = null;
7613 FSPermissionChecker pc = getPermissionChecker();
7614 checkOperation(OperationCategory.WRITE);
7615 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
7616 writeLock();
7617 try {
7618 checkOperation(OperationCategory.WRITE);
7619 checkNameNodeSafeMode("Cannot remove ACL entries on " + src);
7620 src = FSDirectory.resolvePath(src, pathComponents, dir);
7621 checkOwner(pc, src);
7622 dir.removeAclEntries(src, aclSpec);
7623 resultingStat = getAuditFileInfo(src, false);
7624 } finally {
7625 writeUnlock();
7626 }
7627 getEditLog().logSync();
7628 logAuditEvent(true, "removeAclEntries", src, null, resultingStat);
7629 }
7630
7631 void removeDefaultAcl(String src) throws IOException {
7632 aclConfigFlag.checkForApiCall();
7633 HdfsFileStatus resultingStat = null;
7634 FSPermissionChecker pc = getPermissionChecker();
7635 checkOperation(OperationCategory.WRITE);
7636 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
7637 writeLock();
7638 try {
7639 checkOperation(OperationCategory.WRITE);
7640 checkNameNodeSafeMode("Cannot remove default ACL entries on " + src);
7641 src = FSDirectory.resolvePath(src, pathComponents, dir);
7642 checkOwner(pc, src);
7643 dir.removeDefaultAcl(src);
7644 resultingStat = getAuditFileInfo(src, false);
7645 } finally {
7646 writeUnlock();
7647 }
7648 getEditLog().logSync();
7649 logAuditEvent(true, "removeDefaultAcl", src, null, resultingStat);
7650 }
7651
7652 void removeAcl(String src) throws IOException {
7653 aclConfigFlag.checkForApiCall();
7654 HdfsFileStatus resultingStat = null;
7655 FSPermissionChecker pc = getPermissionChecker();
7656 checkOperation(OperationCategory.WRITE);
7657 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
7658 writeLock();
7659 try {
7660 checkOperation(OperationCategory.WRITE);
7661 checkNameNodeSafeMode("Cannot remove ACL on " + src);
7662 src = FSDirectory.resolvePath(src, pathComponents, dir);
7663 checkOwner(pc, src);
7664 dir.removeAcl(src);
7665 resultingStat = getAuditFileInfo(src, false);
7666 } finally {
7667 writeUnlock();
7668 }
7669 getEditLog().logSync();
7670 logAuditEvent(true, "removeAcl", src, null, resultingStat);
7671 }
7672
7673 void setAcl(String src, List<AclEntry> aclSpec) throws IOException {
7674 aclConfigFlag.checkForApiCall();
7675 HdfsFileStatus resultingStat = null;
7676 FSPermissionChecker pc = getPermissionChecker();
7677 checkOperation(OperationCategory.WRITE);
7678 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
7679 writeLock();
7680 try {
7681 checkOperation(OperationCategory.WRITE);
7682 checkNameNodeSafeMode("Cannot set ACL on " + src);
7683 src = FSDirectory.resolvePath(src, pathComponents, dir);
7684 checkOwner(pc, src);
7685 dir.setAcl(src, aclSpec);
7686 resultingStat = getAuditFileInfo(src, false);
7687 } finally {
7688 writeUnlock();
7689 }
7690 getEditLog().logSync();
7691 logAuditEvent(true, "setAcl", src, null, resultingStat);
7692 }
7693
7694 AclStatus getAclStatus(String src) throws IOException {
7695 aclConfigFlag.checkForApiCall();
7696 FSPermissionChecker pc = getPermissionChecker();
7697 checkOperation(OperationCategory.READ);
7698 readLock();
7699 try {
7700 checkOperation(OperationCategory.READ);
7701 if (isPermissionEnabled) {
7702 checkPermission(pc, src, false, null, null, null, null);
7703 }
7704 return dir.getAclStatus(src);
7705 } finally {
7706 readUnlock();
7707 }
7708 }
7709
7710 /**
7711 * Default AuditLogger implementation; used when no access logger is
7712 * defined in the config file. It can also be explicitly listed in the
7713 * config file.
7714 */
7715 private static class DefaultAuditLogger extends HdfsAuditLogger {
7716
7717 private boolean logTokenTrackingId;
7718
7719 @Override
7720 public void initialize(Configuration conf) {
7721 logTokenTrackingId = conf.getBoolean(
7722 DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY,
7723 DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT);
7724 }
7725
7726 @Override
7727 public void logAuditEvent(boolean succeeded, String userName,
7728 InetAddress addr, String cmd, String src, String dst,
7729 FileStatus status, UserGroupInformation ugi,
7730 DelegationTokenSecretManager dtSecretManager) {
7731 if (auditLog.isInfoEnabled()) {
7732 final StringBuilder sb = auditBuffer.get();
7733 sb.setLength(0);
7734 sb.append("allowed=").append(succeeded).append("\t");
7735 sb.append("ugi=").append(userName).append("\t");
7736 sb.append("ip=").append(addr).append("\t");
7737 sb.append("cmd=").append(cmd).append("\t");
7738 sb.append("src=").append(src).append("\t");
7739 sb.append("dst=").append(dst).append("\t");
7740 if (null == status) {
7741 sb.append("perm=null");
7742 } else {
7743 sb.append("perm=");
7744 sb.append(status.getOwner()).append(":");
7745 sb.append(status.getGroup()).append(":");
7746 sb.append(status.getPermission());
7747 }
7748 if (logTokenTrackingId) {
7749 sb.append("\t").append("trackingId=");
7750 String trackingId = null;
7751 if (ugi != null && dtSecretManager != null
7752 && ugi.getAuthenticationMethod() == AuthenticationMethod.TOKEN) {
7753 for (TokenIdentifier tid: ugi.getTokenIdentifiers()) {
7754 if (tid instanceof DelegationTokenIdentifier) {
7755 DelegationTokenIdentifier dtid =
7756 (DelegationTokenIdentifier)tid;
7757 trackingId = dtSecretManager.getTokenTrackingId(dtid);
7758 break;
7759 }
7760 }
7761 }
7762 sb.append(trackingId);
7763 }
7764 logAuditMessage(sb.toString());
7765 }
7766 }
7767
7768 public void logAuditMessage(String message) {
7769 auditLog.info(message);
7770 }
7771 }
7772
7773 private static void enableAsyncAuditLog() {
7774 if (!(auditLog instanceof Log4JLogger)) {
7775 LOG.warn("Log4j is required to enable async auditlog");
7776 return;
7777 }
7778 Logger logger = ((Log4JLogger)auditLog).getLogger();
7779 @SuppressWarnings("unchecked")
7780 List<Appender> appenders = Collections.list(logger.getAllAppenders());
7781 // failsafe against trying to async it more than once
7782 if (!appenders.isEmpty() && !(appenders.get(0) instanceof AsyncAppender)) {
7783 AsyncAppender asyncAppender = new AsyncAppender();
7784 // change logger to have an async appender containing all the
7785 // previously configured appenders
7786 for (Appender appender : appenders) {
7787 logger.removeAppender(appender);
7788 asyncAppender.addAppender(appender);
7789 }
7790 logger.addAppender(asyncAppender);
7791 }
7792 }
7793
7794 }
7795