001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.hdfs.server.namenode;
019
020 import static org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.EncryptedKeyVersion;
021 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT;
022 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY;
023 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT;
024 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY;
025 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_DEFAULT;
026 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_KEY;
027 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_DEFAULT;
028 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY;
029 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CHECKSUM_TYPE_DEFAULT;
030 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY;
031 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT;
032 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_KEY;
033 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_DEFAULT;
034 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_KEY;
035 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_DEFAULT;
036 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_KEY;
037 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT;
038 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY;
039 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOGGERS_KEY;
040 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT;
041 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY;
042 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT;
043 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY;
044 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT;
045 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY;
046 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME;
047 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT;
048 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY;
049 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT;
050 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY;
051 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT;
052 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY;
053 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT;
054 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY;
055 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY;
056 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY;
057 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS;
058 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT;
059 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD;
060 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT;
061 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT;
062 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY;
063 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC;
064 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC_DEFAULT;
065 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_DEFAULT;
066 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_KEY;
067 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY;
068 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_DEFAULT;
069 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY;
070 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY;
071 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT;
072 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY;
073 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT;
074 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY;
075 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT;
076 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY;
077 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY;
078 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_DEFAULT;
079 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY;
080 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT;
081 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY;
082 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY;
083 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_DEFAULT;
084 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY;
085 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT;
086 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY;
087 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_DEFAULT;
088 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY;
089 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_KEY;
090 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_DEFAULT;
091 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_DEFAULT;
092 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_KEY;
093 import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.SECURITY_XATTR_UNREADABLE_BY_SUPERUSER;
094 import static org.apache.hadoop.util.Time.now;
095
096 import java.io.BufferedWriter;
097 import java.io.ByteArrayInputStream;
098 import java.io.DataInput;
099 import java.io.DataInputStream;
100 import java.io.DataOutputStream;
101 import java.io.File;
102 import java.io.FileNotFoundException;
103 import java.io.FileOutputStream;
104 import java.io.IOException;
105 import java.io.OutputStreamWriter;
106 import java.io.PrintWriter;
107 import java.io.StringWriter;
108 import java.lang.management.ManagementFactory;
109 import java.net.InetAddress;
110 import java.net.URI;
111 import java.security.GeneralSecurityException;
112 import java.security.NoSuchAlgorithmException;
113 import java.util.ArrayList;
114 import java.util.Arrays;
115 import java.util.Collection;
116 import java.util.Collections;
117 import java.util.Date;
118 import java.util.EnumSet;
119 import java.util.HashMap;
120 import java.util.HashSet;
121 import java.util.Iterator;
122 import java.util.LinkedHashSet;
123 import java.util.List;
124 import java.util.Map;
125 import java.util.Set;
126 import java.util.UUID;
127 import java.util.concurrent.TimeUnit;
128 import java.util.concurrent.locks.Condition;
129 import java.util.concurrent.locks.ReentrantLock;
130 import java.util.concurrent.locks.ReentrantReadWriteLock;
131
132 import javax.management.NotCompliantMBeanException;
133 import javax.management.ObjectName;
134 import javax.management.StandardMBean;
135
136 import org.apache.commons.logging.Log;
137 import org.apache.commons.logging.LogFactory;
138 import org.apache.commons.logging.impl.Log4JLogger;
139 import org.apache.hadoop.HadoopIllegalArgumentException;
140 import org.apache.hadoop.classification.InterfaceAudience;
141 import org.apache.hadoop.conf.Configuration;
142 import org.apache.hadoop.crypto.CipherSuite;
143 import org.apache.hadoop.crypto.CryptoProtocolVersion;
144 import org.apache.hadoop.crypto.key.KeyProvider;
145 import org.apache.hadoop.crypto.CryptoCodec;
146 import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension;
147 import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries;
148 import org.apache.hadoop.fs.CacheFlag;
149 import org.apache.hadoop.fs.ContentSummary;
150 import org.apache.hadoop.fs.CreateFlag;
151 import org.apache.hadoop.fs.DirectoryListingStartAfterNotFoundException;
152 import org.apache.hadoop.fs.FileAlreadyExistsException;
153 import org.apache.hadoop.fs.FileEncryptionInfo;
154 import org.apache.hadoop.fs.FileStatus;
155 import org.apache.hadoop.fs.FileSystem;
156 import org.apache.hadoop.fs.FsServerDefaults;
157 import org.apache.hadoop.fs.InvalidPathException;
158 import org.apache.hadoop.fs.Options;
159 import org.apache.hadoop.fs.Options.Rename;
160 import org.apache.hadoop.fs.ParentNotDirectoryException;
161 import org.apache.hadoop.fs.Path;
162 import org.apache.hadoop.fs.PathIsNotEmptyDirectoryException;
163 import org.apache.hadoop.fs.UnresolvedLinkException;
164 import org.apache.hadoop.fs.XAttr;
165 import org.apache.hadoop.fs.XAttrSetFlag;
166 import org.apache.hadoop.fs.permission.AclEntry;
167 import org.apache.hadoop.fs.permission.AclStatus;
168 import org.apache.hadoop.fs.permission.FsAction;
169 import org.apache.hadoop.fs.permission.FsPermission;
170 import org.apache.hadoop.fs.permission.PermissionStatus;
171 import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
172 import org.apache.hadoop.ha.ServiceFailedException;
173 import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy;
174 import org.apache.hadoop.hdfs.DFSConfigKeys;
175 import org.apache.hadoop.hdfs.DFSUtil;
176 import org.apache.hadoop.hdfs.HAUtil;
177 import org.apache.hadoop.hdfs.HdfsConfiguration;
178 import org.apache.hadoop.hdfs.UnknownCryptoProtocolVersionException;
179 import org.apache.hadoop.hdfs.XAttrHelper;
180 import org.apache.hadoop.hdfs.protocol.AclException;
181 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
182 import org.apache.hadoop.hdfs.protocol.Block;
183 import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
184 import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
185 import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
186 import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
187 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
188 import org.apache.hadoop.hdfs.protocol.DatanodeID;
189 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
190 import org.apache.hadoop.hdfs.protocol.DirectoryListing;
191 import org.apache.hadoop.hdfs.protocol.EncryptionZone;
192 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
193 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
194 import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
195 import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
196 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
197 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
198 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
199 import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
200 import org.apache.hadoop.hdfs.protocol.RecoveryInProgressException;
201 import org.apache.hadoop.hdfs.protocol.RollingUpgradeException;
202 import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo;
203 import org.apache.hadoop.hdfs.protocol.SnapshotAccessControlException;
204 import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
205 import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus;
206 import org.apache.hadoop.hdfs.protocol.datatransfer.ReplaceDatanodeOnFailure;
207 import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
208 import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager.AccessMode;
209 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
210 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
211 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager.SecretManagerState;
212 import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection;
213 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
214 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
215 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
216 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
217 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager;
218 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStatistics;
219 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
220 import org.apache.hadoop.hdfs.server.blockmanagement.OutOfV1GenerationStampsException;
221 import org.apache.hadoop.hdfs.server.common.GenerationStamp;
222 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
223 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
224 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption;
225 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
226 import org.apache.hadoop.hdfs.server.common.Storage;
227 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirType;
228 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
229 import org.apache.hadoop.hdfs.server.common.Util;
230 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection;
231 import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
232 import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream;
233 import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
234 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
235 import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
236 import org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer;
237 import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
238 import org.apache.hadoop.hdfs.server.namenode.ha.StandbyCheckpointer;
239 import org.apache.hadoop.hdfs.server.namenode.metrics.FSNamesystemMBean;
240 import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
241 import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
242 import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager;
243 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
244 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
245 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
246 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Status;
247 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
248 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType;
249 import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods;
250 import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
251 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
252 import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport;
253 import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
254 import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat;
255 import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand;
256 import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
257 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
258 import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
259 import org.apache.hadoop.hdfs.server.protocol.StorageReport;
260 import org.apache.hadoop.hdfs.util.ChunkedArrayList;
261 import org.apache.hadoop.io.IOUtils;
262 import org.apache.hadoop.io.Text;
263 import org.apache.hadoop.ipc.RetriableException;
264 import org.apache.hadoop.ipc.RetryCache;
265 import org.apache.hadoop.ipc.RetryCache.CacheEntry;
266 import org.apache.hadoop.ipc.RetryCache.CacheEntryWithPayload;
267 import org.apache.hadoop.ipc.Server;
268 import org.apache.hadoop.ipc.StandbyException;
269 import org.apache.hadoop.metrics2.annotation.Metric;
270 import org.apache.hadoop.metrics2.annotation.Metrics;
271 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
272 import org.apache.hadoop.metrics2.util.MBeans;
273 import org.apache.hadoop.net.NetworkTopology;
274 import org.apache.hadoop.net.Node;
275 import org.apache.hadoop.net.NodeBase;
276 import org.apache.hadoop.security.AccessControlException;
277 import org.apache.hadoop.security.UserGroupInformation;
278 import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
279 import org.apache.hadoop.security.token.SecretManager.InvalidToken;
280 import org.apache.hadoop.security.token.Token;
281 import org.apache.hadoop.security.token.TokenIdentifier;
282 import org.apache.hadoop.security.token.delegation.DelegationKey;
283 import org.apache.hadoop.util.Daemon;
284 import org.apache.hadoop.util.DataChecksum;
285 import org.apache.hadoop.util.StringUtils;
286 import org.apache.hadoop.util.Time;
287 import org.apache.hadoop.util.VersionInfo;
288 import org.apache.log4j.Appender;
289 import org.apache.log4j.AsyncAppender;
290 import org.apache.log4j.Logger;
291 import org.mortbay.util.ajax.JSON;
292
293 import com.google.common.annotations.VisibleForTesting;
294 import com.google.common.base.Charsets;
295 import com.google.common.base.Preconditions;
296 import com.google.common.collect.ImmutableMap;
297 import com.google.common.collect.Lists;
298
299 /***************************************************
300 * FSNamesystem does the actual bookkeeping work for the
301 * DataNode.
302 *
303 * It tracks several important tables.
304 *
305 * 1) valid fsname --> blocklist (kept on disk, logged)
306 * 2) Set of all valid blocks (inverted #1)
307 * 3) block --> machinelist (kept in memory, rebuilt dynamically from reports)
308 * 4) machine --> blocklist (inverted #2)
309 * 5) LRU cache of updated-heartbeat machines
310 ***************************************************/
311 @InterfaceAudience.Private
312 @Metrics(context="dfs")
313 public class FSNamesystem implements Namesystem, FSClusterStats,
314 FSNamesystemMBean, NameNodeMXBean {
315 public static final Log LOG = LogFactory.getLog(FSNamesystem.class);
316
317 private static final ThreadLocal<StringBuilder> auditBuffer =
318 new ThreadLocal<StringBuilder>() {
319 @Override
320 protected StringBuilder initialValue() {
321 return new StringBuilder();
322 }
323 };
324
325 @VisibleForTesting
326 public boolean isAuditEnabled() {
327 return !isDefaultAuditLogger || auditLog.isInfoEnabled();
328 }
329
330 private HdfsFileStatus getAuditFileInfo(String path, boolean resolveSymlink)
331 throws IOException {
332 return (isAuditEnabled() && isExternalInvocation())
333 ? dir.getFileInfo(path, resolveSymlink, false, false) : null;
334 }
335
336 private void logAuditEvent(boolean succeeded, String cmd, String src)
337 throws IOException {
338 logAuditEvent(succeeded, cmd, src, null, null);
339 }
340
341 private void logAuditEvent(boolean succeeded, String cmd, String src,
342 String dst, HdfsFileStatus stat) throws IOException {
343 if (isAuditEnabled() && isExternalInvocation()) {
344 logAuditEvent(succeeded, getRemoteUser(), getRemoteIp(),
345 cmd, src, dst, stat);
346 }
347 }
348
349 private void logAuditEvent(boolean succeeded,
350 UserGroupInformation ugi, InetAddress addr, String cmd, String src,
351 String dst, HdfsFileStatus stat) {
352 FileStatus status = null;
353 if (stat != null) {
354 Path symlink = stat.isSymlink() ? new Path(stat.getSymlink()) : null;
355 Path path = dst != null ? new Path(dst) : new Path(src);
356 status = new FileStatus(stat.getLen(), stat.isDir(),
357 stat.getReplication(), stat.getBlockSize(), stat.getModificationTime(),
358 stat.getAccessTime(), stat.getPermission(), stat.getOwner(),
359 stat.getGroup(), symlink, path);
360 }
361 for (AuditLogger logger : auditLoggers) {
362 if (logger instanceof HdfsAuditLogger) {
363 HdfsAuditLogger hdfsLogger = (HdfsAuditLogger) logger;
364 hdfsLogger.logAuditEvent(succeeded, ugi.toString(), addr, cmd, src, dst,
365 status, ugi, dtSecretManager);
366 } else {
367 logger.logAuditEvent(succeeded, ugi.toString(), addr,
368 cmd, src, dst, status);
369 }
370 }
371 }
372
373 /**
374 * Logger for audit events, noting successful FSNamesystem operations. Emits
375 * to FSNamesystem.audit at INFO. Each event causes a set of tab-separated
376 * <code>key=value</code> pairs to be written for the following properties:
377 * <code>
378 * ugi=<ugi in RPC>
379 * ip=<remote IP>
380 * cmd=<command>
381 * src=<src path>
382 * dst=<dst path (optional)>
383 * perm=<permissions (optional)>
384 * </code>
385 */
386 public static final Log auditLog = LogFactory.getLog(
387 FSNamesystem.class.getName() + ".audit");
388
389 static final int DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED = 100;
390 static int BLOCK_DELETION_INCREMENT = 1000;
391 private final boolean isPermissionEnabled;
392 private final UserGroupInformation fsOwner;
393 private final String fsOwnerShortUserName;
394 private final String supergroup;
395 private final boolean standbyShouldCheckpoint;
396
397 // Scan interval is not configurable.
398 private static final long DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL =
399 TimeUnit.MILLISECONDS.convert(1, TimeUnit.HOURS);
400 final DelegationTokenSecretManager dtSecretManager;
401 private final boolean alwaysUseDelegationTokensForTests;
402
403 private static final Step STEP_AWAITING_REPORTED_BLOCKS =
404 new Step(StepType.AWAITING_REPORTED_BLOCKS);
405
406 // Tracks whether the default audit logger is the only configured audit
407 // logger; this allows isAuditEnabled() to return false in case the
408 // underlying logger is disabled, and avoid some unnecessary work.
409 private final boolean isDefaultAuditLogger;
410 private final List<AuditLogger> auditLoggers;
411
412 /** The namespace tree. */
413 FSDirectory dir;
414 private final BlockManager blockManager;
415 private final SnapshotManager snapshotManager;
416 private final CacheManager cacheManager;
417 private final DatanodeStatistics datanodeStatistics;
418
419 // whether setStoragePolicy is allowed.
420 private final boolean isStoragePolicyEnabled;
421
422 private String nameserviceId;
423
424 private RollingUpgradeInfo rollingUpgradeInfo = null;
425 /**
426 * A flag that indicates whether the checkpointer should checkpoint a rollback
427 * fsimage. The edit log tailer sets this flag. The checkpoint will create a
428 * rollback fsimage if the flag is true, and then change the flag to false.
429 */
430 private volatile boolean needRollbackFsImage;
431
432 // Block pool ID used by this namenode
433 private String blockPoolId;
434
435 final LeaseManager leaseManager = new LeaseManager(this);
436
437 volatile Daemon smmthread = null; // SafeModeMonitor thread
438
439 Daemon nnrmthread = null; // NamenodeResourceMonitor thread
440
441 Daemon nnEditLogRoller = null; // NameNodeEditLogRoller thread
442
443 // A daemon to periodically clean up corrupt lazyPersist files
444 // from the name space.
445 Daemon lazyPersistFileScrubber = null;
446 /**
447 * When an active namenode will roll its own edit log, in # edits
448 */
449 private final long editLogRollerThreshold;
450 /**
451 * Check interval of an active namenode's edit log roller thread
452 */
453 private final int editLogRollerInterval;
454
455 /**
456 * How frequently we scan and unlink corrupt lazyPersist files.
457 * (In seconds)
458 */
459 private final int lazyPersistFileScrubIntervalSec;
460
461 private volatile boolean hasResourcesAvailable = false;
462 private volatile boolean fsRunning = true;
463
464 /** The start time of the namesystem. */
465 private final long startTime = now();
466
467 /** The interval of namenode checking for the disk space availability */
468 private final long resourceRecheckInterval;
469
470 // The actual resource checker instance.
471 NameNodeResourceChecker nnResourceChecker;
472
473 private final FsServerDefaults serverDefaults;
474 private final boolean supportAppends;
475 private final ReplaceDatanodeOnFailure dtpReplaceDatanodeOnFailure;
476
477 private volatile SafeModeInfo safeMode; // safe mode information
478
479 private final long maxFsObjects; // maximum number of fs objects
480
481 private final long minBlockSize; // minimum block size
482 private final long maxBlocksPerFile; // maximum # of blocks per file
483
484 /**
485 * The global generation stamp for legacy blocks with randomly
486 * generated block IDs.
487 */
488 private final GenerationStamp generationStampV1 = new GenerationStamp();
489
490 /**
491 * The global generation stamp for this file system.
492 */
493 private final GenerationStamp generationStampV2 = new GenerationStamp();
494
495 /**
496 * The value of the generation stamp when the first switch to sequential
497 * block IDs was made. Blocks with generation stamps below this value
498 * have randomly allocated block IDs. Blocks with generation stamps above
499 * this value had sequentially allocated block IDs. Read from the fsImage
500 * (or initialized as an offset from the V1 (legacy) generation stamp on
501 * upgrade).
502 */
503 private long generationStampV1Limit =
504 GenerationStamp.GRANDFATHER_GENERATION_STAMP;
505
506 /**
507 * The global block ID space for this file system.
508 */
509 @VisibleForTesting
510 private final SequentialBlockIdGenerator blockIdGenerator;
511
512 // precision of access times.
513 private final long accessTimePrecision;
514
515 /** Lock to protect FSNamesystem. */
516 private final FSNamesystemLock fsLock;
517
518 /**
519 * Used when this NN is in standby state to read from the shared edit log.
520 */
521 private EditLogTailer editLogTailer = null;
522
523 /**
524 * Used when this NN is in standby state to perform checkpoints.
525 */
526 private StandbyCheckpointer standbyCheckpointer;
527
528 /**
529 * Reference to the NN's HAContext object. This is only set once
530 * {@link #startCommonServices(Configuration, HAContext)} is called.
531 */
532 private HAContext haContext;
533
534 private final boolean haEnabled;
535
536 /** flag indicating whether replication queues have been initialized */
537 boolean initializedReplQueues = false;
538
539 /**
540 * Whether the namenode is in the middle of starting the active service
541 */
542 private volatile boolean startingActiveService = false;
543
544 private INodeId inodeId;
545
546 private final RetryCache retryCache;
547
548 private final NNConf nnConf;
549
550 private KeyProviderCryptoExtension provider = null;
551 private KeyProvider.Options providerOptions = null;
552
553 private final CryptoCodec codec;
554
555 private volatile boolean imageLoaded = false;
556 private final Condition cond;
557
558 private final FSImage fsImage;
559
560 /**
561 * Notify that loading of this FSDirectory is complete, and
562 * it is imageLoaded for use
563 */
564 void imageLoadComplete() {
565 Preconditions.checkState(!imageLoaded, "FSDirectory already loaded");
566 setImageLoaded();
567 }
568
569 void setImageLoaded() {
570 if(imageLoaded) return;
571 writeLock();
572 try {
573 setImageLoaded(true);
574 dir.markNameCacheInitialized();
575 cond.signalAll();
576 } finally {
577 writeUnlock();
578 }
579 }
580
581 //This is for testing purposes only
582 @VisibleForTesting
583 boolean isImageLoaded() {
584 return imageLoaded;
585 }
586
587 // exposed for unit tests
588 protected void setImageLoaded(boolean flag) {
589 imageLoaded = flag;
590 }
591
592 /**
593 * Block until the object is imageLoaded to be used.
594 */
595 void waitForLoadingFSImage() {
596 if (!imageLoaded) {
597 writeLock();
598 try {
599 while (!imageLoaded) {
600 try {
601 cond.await(5000, TimeUnit.MILLISECONDS);
602 } catch (InterruptedException ignored) {
603 }
604 }
605 } finally {
606 writeUnlock();
607 }
608 }
609 }
610
611 /**
612 * Set the last allocated inode id when fsimage or editlog is loaded.
613 */
614 public void resetLastInodeId(long newValue) throws IOException {
615 try {
616 inodeId.skipTo(newValue);
617 } catch(IllegalStateException ise) {
618 throw new IOException(ise);
619 }
620 }
621
622 /** Should only be used for tests to reset to any value */
623 void resetLastInodeIdWithoutChecking(long newValue) {
624 inodeId.setCurrentValue(newValue);
625 }
626
627 /** @return the last inode ID. */
628 public long getLastInodeId() {
629 return inodeId.getCurrentValue();
630 }
631
632 /** Allocate a new inode ID. */
633 public long allocateNewInodeId() {
634 return inodeId.nextValue();
635 }
636
637 /**
638 * Clear all loaded data
639 */
640 void clear() {
641 dir.reset();
642 dtSecretManager.reset();
643 generationStampV1.setCurrentValue(GenerationStamp.LAST_RESERVED_STAMP);
644 generationStampV2.setCurrentValue(GenerationStamp.LAST_RESERVED_STAMP);
645 blockIdGenerator.setCurrentValue(
646 SequentialBlockIdGenerator.LAST_RESERVED_BLOCK_ID);
647 generationStampV1Limit = GenerationStamp.GRANDFATHER_GENERATION_STAMP;
648 leaseManager.removeAllLeases();
649 inodeId.setCurrentValue(INodeId.LAST_RESERVED_ID);
650 snapshotManager.clearSnapshottableDirs();
651 cacheManager.clear();
652 setImageLoaded(false);
653 }
654
655 @VisibleForTesting
656 LeaseManager getLeaseManager() {
657 return leaseManager;
658 }
659
660 boolean isHaEnabled() {
661 return haEnabled;
662 }
663
664 /**
665 * Check the supplied configuration for correctness.
666 * @param conf Supplies the configuration to validate.
667 * @throws IOException if the configuration could not be queried.
668 * @throws IllegalArgumentException if the configuration is invalid.
669 */
670 private static void checkConfiguration(Configuration conf)
671 throws IOException {
672
673 final Collection<URI> namespaceDirs =
674 FSNamesystem.getNamespaceDirs(conf);
675 final Collection<URI> editsDirs =
676 FSNamesystem.getNamespaceEditsDirs(conf);
677 final Collection<URI> requiredEditsDirs =
678 FSNamesystem.getRequiredNamespaceEditsDirs(conf);
679 final Collection<URI> sharedEditsDirs =
680 FSNamesystem.getSharedEditsDirs(conf);
681
682 for (URI u : requiredEditsDirs) {
683 if (u.toString().compareTo(
684 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_DEFAULT) == 0) {
685 continue;
686 }
687
688 // Each required directory must also be in editsDirs or in
689 // sharedEditsDirs.
690 if (!editsDirs.contains(u) &&
691 !sharedEditsDirs.contains(u)) {
692 throw new IllegalArgumentException(
693 "Required edits directory " + u.toString() + " not present in " +
694 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY + ". " +
695 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY + "=" +
696 editsDirs.toString() + "; " +
697 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY + "=" +
698 requiredEditsDirs.toString() + ". " +
699 DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY + "=" +
700 sharedEditsDirs.toString() + ".");
701 }
702 }
703
704 if (namespaceDirs.size() == 1) {
705 LOG.warn("Only one image storage directory ("
706 + DFS_NAMENODE_NAME_DIR_KEY + ") configured. Beware of data loss"
707 + " due to lack of redundant storage directories!");
708 }
709 if (editsDirs.size() == 1) {
710 LOG.warn("Only one namespace edits storage directory ("
711 + DFS_NAMENODE_EDITS_DIR_KEY + ") configured. Beware of data loss"
712 + " due to lack of redundant storage directories!");
713 }
714 }
715
716 /**
717 * Instantiates an FSNamesystem loaded from the image and edits
718 * directories specified in the passed Configuration.
719 *
720 * @param conf the Configuration which specifies the storage directories
721 * from which to load
722 * @return an FSNamesystem which contains the loaded namespace
723 * @throws IOException if loading fails
724 */
725 static FSNamesystem loadFromDisk(Configuration conf) throws IOException {
726
727 checkConfiguration(conf);
728 FSImage fsImage = new FSImage(conf,
729 FSNamesystem.getNamespaceDirs(conf),
730 FSNamesystem.getNamespaceEditsDirs(conf));
731 FSNamesystem namesystem = new FSNamesystem(conf, fsImage, false);
732 StartupOption startOpt = NameNode.getStartupOption(conf);
733 if (startOpt == StartupOption.RECOVER) {
734 namesystem.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
735 }
736
737 long loadStart = now();
738 try {
739 namesystem.loadFSImage(startOpt);
740 } catch (IOException ioe) {
741 LOG.warn("Encountered exception loading fsimage", ioe);
742 fsImage.close();
743 throw ioe;
744 }
745 long timeTakenToLoadFSImage = now() - loadStart;
746 LOG.info("Finished loading FSImage in " + timeTakenToLoadFSImage + " msecs");
747 NameNodeMetrics nnMetrics = NameNode.getNameNodeMetrics();
748 if (nnMetrics != null) {
749 nnMetrics.setFsImageLoadTime((int) timeTakenToLoadFSImage);
750 }
751 return namesystem;
752 }
753
754 FSNamesystem(Configuration conf, FSImage fsImage) throws IOException {
755 this(conf, fsImage, false);
756 }
757
758 /**
759 * Create an FSNamesystem associated with the specified image.
760 *
761 * Note that this does not load any data off of disk -- if you would
762 * like that behavior, use {@link #loadFromDisk(Configuration)}
763 *
764 * @param conf configuration
765 * @param fsImage The FSImage to associate with
766 * @param ignoreRetryCache Whether or not should ignore the retry cache setup
767 * step. For Secondary NN this should be set to true.
768 * @throws IOException on bad configuration
769 */
770 FSNamesystem(Configuration conf, FSImage fsImage, boolean ignoreRetryCache)
771 throws IOException {
772 provider = DFSUtil.createKeyProviderCryptoExtension(conf);
773 if (provider == null) {
774 LOG.info("No KeyProvider found.");
775 } else {
776 LOG.info("Found KeyProvider: " + provider.toString());
777 }
778 providerOptions = KeyProvider.options(conf);
779 this.codec = CryptoCodec.getInstance(conf);
780 if (conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY,
781 DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT)) {
782 LOG.info("Enabling async auditlog");
783 enableAsyncAuditLog();
784 }
785 boolean fair = conf.getBoolean("dfs.namenode.fslock.fair", true);
786 LOG.info("fsLock is fair:" + fair);
787 fsLock = new FSNamesystemLock(fair);
788 cond = fsLock.writeLock().newCondition();
789 this.fsImage = fsImage;
790 try {
791 resourceRecheckInterval = conf.getLong(
792 DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY,
793 DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT);
794
795 this.blockManager = new BlockManager(this, this, conf);
796 this.datanodeStatistics = blockManager.getDatanodeManager().getDatanodeStatistics();
797 this.blockIdGenerator = new SequentialBlockIdGenerator(this.blockManager);
798
799 this.isStoragePolicyEnabled =
800 conf.getBoolean(DFS_STORAGE_POLICY_ENABLED_KEY,
801 DFS_STORAGE_POLICY_ENABLED_DEFAULT);
802
803 this.fsOwner = UserGroupInformation.getCurrentUser();
804 this.fsOwnerShortUserName = fsOwner.getShortUserName();
805 this.supergroup = conf.get(DFS_PERMISSIONS_SUPERUSERGROUP_KEY,
806 DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT);
807 this.isPermissionEnabled = conf.getBoolean(DFS_PERMISSIONS_ENABLED_KEY,
808 DFS_PERMISSIONS_ENABLED_DEFAULT);
809 LOG.info("fsOwner = " + fsOwner);
810 LOG.info("supergroup = " + supergroup);
811 LOG.info("isPermissionEnabled = " + isPermissionEnabled);
812
813 // block allocation has to be persisted in HA using a shared edits directory
814 // so that the standby has up-to-date namespace information
815 nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
816 this.haEnabled = HAUtil.isHAEnabled(conf, nameserviceId);
817
818 // Sanity check the HA-related config.
819 if (nameserviceId != null) {
820 LOG.info("Determined nameservice ID: " + nameserviceId);
821 }
822 LOG.info("HA Enabled: " + haEnabled);
823 if (!haEnabled && HAUtil.usesSharedEditsDir(conf)) {
824 LOG.warn("Configured NNs:\n" + DFSUtil.nnAddressesAsString(conf));
825 throw new IOException("Invalid configuration: a shared edits dir " +
826 "must not be specified if HA is not enabled.");
827 }
828
829 // Get the checksum type from config
830 String checksumTypeStr = conf.get(DFS_CHECKSUM_TYPE_KEY, DFS_CHECKSUM_TYPE_DEFAULT);
831 DataChecksum.Type checksumType;
832 try {
833 checksumType = DataChecksum.Type.valueOf(checksumTypeStr);
834 } catch (IllegalArgumentException iae) {
835 throw new IOException("Invalid checksum type in "
836 + DFS_CHECKSUM_TYPE_KEY + ": " + checksumTypeStr);
837 }
838
839 this.serverDefaults = new FsServerDefaults(
840 conf.getLongBytes(DFS_BLOCK_SIZE_KEY, DFS_BLOCK_SIZE_DEFAULT),
841 conf.getInt(DFS_BYTES_PER_CHECKSUM_KEY, DFS_BYTES_PER_CHECKSUM_DEFAULT),
842 conf.getInt(DFS_CLIENT_WRITE_PACKET_SIZE_KEY, DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT),
843 (short) conf.getInt(DFS_REPLICATION_KEY, DFS_REPLICATION_DEFAULT),
844 conf.getInt(IO_FILE_BUFFER_SIZE_KEY, IO_FILE_BUFFER_SIZE_DEFAULT),
845 conf.getBoolean(DFS_ENCRYPT_DATA_TRANSFER_KEY, DFS_ENCRYPT_DATA_TRANSFER_DEFAULT),
846 conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT),
847 checksumType);
848
849 this.maxFsObjects = conf.getLong(DFS_NAMENODE_MAX_OBJECTS_KEY,
850 DFS_NAMENODE_MAX_OBJECTS_DEFAULT);
851
852 this.minBlockSize = conf.getLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY,
853 DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_DEFAULT);
854 this.maxBlocksPerFile = conf.getLong(DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_KEY,
855 DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_DEFAULT);
856 this.accessTimePrecision = conf.getLong(DFS_NAMENODE_ACCESSTIME_PRECISION_KEY,
857 DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT);
858 this.supportAppends = conf.getBoolean(DFS_SUPPORT_APPEND_KEY, DFS_SUPPORT_APPEND_DEFAULT);
859 LOG.info("Append Enabled: " + supportAppends);
860
861 this.dtpReplaceDatanodeOnFailure = ReplaceDatanodeOnFailure.get(conf);
862
863 this.standbyShouldCheckpoint = conf.getBoolean(
864 DFS_HA_STANDBY_CHECKPOINTS_KEY, DFS_HA_STANDBY_CHECKPOINTS_DEFAULT);
865 // # edit autoroll threshold is a multiple of the checkpoint threshold
866 this.editLogRollerThreshold = (long)
867 (conf.getFloat(
868 DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD,
869 DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT) *
870 conf.getLong(
871 DFS_NAMENODE_CHECKPOINT_TXNS_KEY,
872 DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT));
873 this.editLogRollerInterval = conf.getInt(
874 DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS,
875 DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT);
876 this.inodeId = new INodeId();
877
878 this.lazyPersistFileScrubIntervalSec = conf.getInt(
879 DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC,
880 DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC_DEFAULT);
881
882 if (this.lazyPersistFileScrubIntervalSec == 0) {
883 throw new IllegalArgumentException(
884 DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC + " must be non-zero.");
885 }
886
887 // For testing purposes, allow the DT secret manager to be started regardless
888 // of whether security is enabled.
889 alwaysUseDelegationTokensForTests = conf.getBoolean(
890 DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY,
891 DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT);
892
893 this.dtSecretManager = createDelegationTokenSecretManager(conf);
894 this.dir = new FSDirectory(this, conf);
895 this.snapshotManager = new SnapshotManager(dir);
896 this.cacheManager = new CacheManager(this, conf, blockManager);
897 this.safeMode = new SafeModeInfo(conf);
898 this.auditLoggers = initAuditLoggers(conf);
899 this.isDefaultAuditLogger = auditLoggers.size() == 1 &&
900 auditLoggers.get(0) instanceof DefaultAuditLogger;
901 this.retryCache = ignoreRetryCache ? null : initRetryCache(conf);
902 this.nnConf = new NNConf(conf);
903 } catch(IOException e) {
904 LOG.error(getClass().getSimpleName() + " initialization failed.", e);
905 close();
906 throw e;
907 } catch (RuntimeException re) {
908 LOG.error(getClass().getSimpleName() + " initialization failed.", re);
909 close();
910 throw re;
911 }
912 }
913
914 @VisibleForTesting
915 public RetryCache getRetryCache() {
916 return retryCache;
917 }
918
919 void lockRetryCache() {
920 if (retryCache != null) {
921 retryCache.lock();
922 }
923 }
924
925 void unlockRetryCache() {
926 if (retryCache != null) {
927 retryCache.unlock();
928 }
929 }
930
931 /** Whether or not retry cache is enabled */
932 boolean hasRetryCache() {
933 return retryCache != null;
934 }
935
936 void addCacheEntryWithPayload(byte[] clientId, int callId, Object payload) {
937 if (retryCache != null) {
938 retryCache.addCacheEntryWithPayload(clientId, callId, payload);
939 }
940 }
941
942 void addCacheEntry(byte[] clientId, int callId) {
943 if (retryCache != null) {
944 retryCache.addCacheEntry(clientId, callId);
945 }
946 }
947
948 @VisibleForTesting
949 public KeyProviderCryptoExtension getProvider() {
950 return provider;
951 }
952
953 @VisibleForTesting
954 static RetryCache initRetryCache(Configuration conf) {
955 boolean enable = conf.getBoolean(DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY,
956 DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT);
957 LOG.info("Retry cache on namenode is " + (enable ? "enabled" : "disabled"));
958 if (enable) {
959 float heapPercent = conf.getFloat(
960 DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY,
961 DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT);
962 long entryExpiryMillis = conf.getLong(
963 DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY,
964 DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT);
965 LOG.info("Retry cache will use " + heapPercent
966 + " of total heap and retry cache entry expiry time is "
967 + entryExpiryMillis + " millis");
968 long entryExpiryNanos = entryExpiryMillis * 1000 * 1000;
969 return new RetryCache("NameNodeRetryCache", heapPercent,
970 entryExpiryNanos);
971 }
972 return null;
973 }
974
975 private List<AuditLogger> initAuditLoggers(Configuration conf) {
976 // Initialize the custom access loggers if configured.
977 Collection<String> alClasses = conf.getStringCollection(DFS_NAMENODE_AUDIT_LOGGERS_KEY);
978 List<AuditLogger> auditLoggers = Lists.newArrayList();
979 if (alClasses != null && !alClasses.isEmpty()) {
980 for (String className : alClasses) {
981 try {
982 AuditLogger logger;
983 if (DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME.equals(className)) {
984 logger = new DefaultAuditLogger();
985 } else {
986 logger = (AuditLogger) Class.forName(className).newInstance();
987 }
988 logger.initialize(conf);
989 auditLoggers.add(logger);
990 } catch (RuntimeException re) {
991 throw re;
992 } catch (Exception e) {
993 throw new RuntimeException(e);
994 }
995 }
996 }
997
998 // Make sure there is at least one logger installed.
999 if (auditLoggers.isEmpty()) {
1000 auditLoggers.add(new DefaultAuditLogger());
1001 }
1002 return Collections.unmodifiableList(auditLoggers);
1003 }
1004
1005 private void loadFSImage(StartupOption startOpt) throws IOException {
1006 final FSImage fsImage = getFSImage();
1007
1008 // format before starting up if requested
1009 if (startOpt == StartupOption.FORMAT) {
1010
1011 fsImage.format(this, fsImage.getStorage().determineClusterId());// reuse current id
1012
1013 startOpt = StartupOption.REGULAR;
1014 }
1015 boolean success = false;
1016 writeLock();
1017 try {
1018 // We shouldn't be calling saveNamespace if we've come up in standby state.
1019 MetaRecoveryContext recovery = startOpt.createRecoveryContext();
1020 final boolean staleImage
1021 = fsImage.recoverTransitionRead(startOpt, this, recovery);
1022 if (RollingUpgradeStartupOption.ROLLBACK.matches(startOpt) ||
1023 RollingUpgradeStartupOption.DOWNGRADE.matches(startOpt)) {
1024 rollingUpgradeInfo = null;
1025 }
1026 final boolean needToSave = staleImage && !haEnabled && !isRollingUpgrade();
1027 LOG.info("Need to save fs image? " + needToSave
1028 + " (staleImage=" + staleImage + ", haEnabled=" + haEnabled
1029 + ", isRollingUpgrade=" + isRollingUpgrade() + ")");
1030 if (needToSave) {
1031 fsImage.saveNamespace(this);
1032 } else {
1033 updateStorageVersionForRollingUpgrade(fsImage.getLayoutVersion(),
1034 startOpt);
1035 // No need to save, so mark the phase done.
1036 StartupProgress prog = NameNode.getStartupProgress();
1037 prog.beginPhase(Phase.SAVING_CHECKPOINT);
1038 prog.endPhase(Phase.SAVING_CHECKPOINT);
1039 }
1040 // This will start a new log segment and write to the seen_txid file, so
1041 // we shouldn't do it when coming up in standby state
1042 if (!haEnabled || (haEnabled && startOpt == StartupOption.UPGRADE)
1043 || (haEnabled && startOpt == StartupOption.UPGRADEONLY)) {
1044 fsImage.openEditLogForWrite();
1045 }
1046 success = true;
1047 } finally {
1048 if (!success) {
1049 fsImage.close();
1050 }
1051 writeUnlock();
1052 }
1053 imageLoadComplete();
1054 }
1055
1056 private void updateStorageVersionForRollingUpgrade(final long layoutVersion,
1057 StartupOption startOpt) throws IOException {
1058 boolean rollingStarted = RollingUpgradeStartupOption.STARTED
1059 .matches(startOpt) && layoutVersion > HdfsConstants
1060 .NAMENODE_LAYOUT_VERSION;
1061 boolean rollingRollback = RollingUpgradeStartupOption.ROLLBACK
1062 .matches(startOpt);
1063 if (rollingRollback || rollingStarted) {
1064 fsImage.updateStorageVersion();
1065 }
1066 }
1067
1068 private void startSecretManager() {
1069 if (dtSecretManager != null) {
1070 try {
1071 dtSecretManager.startThreads();
1072 } catch (IOException e) {
1073 // Inability to start secret manager
1074 // can't be recovered from.
1075 throw new RuntimeException(e);
1076 }
1077 }
1078 }
1079
1080 private void startSecretManagerIfNecessary() {
1081 boolean shouldRun = shouldUseDelegationTokens() &&
1082 !isInSafeMode() && getEditLog().isOpenForWrite();
1083 boolean running = dtSecretManager.isRunning();
1084 if (shouldRun && !running) {
1085 startSecretManager();
1086 }
1087 }
1088
1089 private void stopSecretManager() {
1090 if (dtSecretManager != null) {
1091 dtSecretManager.stopThreads();
1092 }
1093 }
1094
1095 /**
1096 * Start services common to both active and standby states
1097 */
1098 void startCommonServices(Configuration conf, HAContext haContext) throws IOException {
1099 this.registerMBean(); // register the MBean for the FSNamesystemState
1100 writeLock();
1101 this.haContext = haContext;
1102 try {
1103 nnResourceChecker = new NameNodeResourceChecker(conf);
1104 checkAvailableResources();
1105 assert safeMode != null && !isPopulatingReplQueues();
1106 StartupProgress prog = NameNode.getStartupProgress();
1107 prog.beginPhase(Phase.SAFEMODE);
1108 prog.setTotal(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS,
1109 getCompleteBlocksTotal());
1110 setBlockTotal();
1111 blockManager.activate(conf);
1112 } finally {
1113 writeUnlock();
1114 }
1115
1116 registerMXBean();
1117 DefaultMetricsSystem.instance().register(this);
1118 snapshotManager.registerMXBean();
1119 }
1120
1121 /**
1122 * Stop services common to both active and standby states
1123 */
1124 void stopCommonServices() {
1125 writeLock();
1126 try {
1127 if (blockManager != null) blockManager.close();
1128 } finally {
1129 writeUnlock();
1130 }
1131 RetryCache.clear(retryCache);
1132 }
1133
1134 /**
1135 * Start services required in active state
1136 * @throws IOException
1137 */
1138 void startActiveServices() throws IOException {
1139 startingActiveService = true;
1140 LOG.info("Starting services required for active state");
1141 writeLock();
1142 try {
1143 FSEditLog editLog = getFSImage().getEditLog();
1144
1145 if (!editLog.isOpenForWrite()) {
1146 // During startup, we're already open for write during initialization.
1147 editLog.initJournalsForWrite();
1148 // May need to recover
1149 editLog.recoverUnclosedStreams();
1150
1151 LOG.info("Catching up to latest edits from old active before " +
1152 "taking over writer role in edits logs");
1153 editLogTailer.catchupDuringFailover();
1154
1155 blockManager.setPostponeBlocksFromFuture(false);
1156 blockManager.getDatanodeManager().markAllDatanodesStale();
1157 blockManager.clearQueues();
1158 blockManager.processAllPendingDNMessages();
1159
1160 // Only need to re-process the queue, If not in SafeMode.
1161 if (!isInSafeMode()) {
1162 LOG.info("Reprocessing replication and invalidation queues");
1163 initializeReplQueues();
1164 }
1165
1166 if (LOG.isDebugEnabled()) {
1167 LOG.debug("NameNode metadata after re-processing " +
1168 "replication and invalidation queues during failover:\n" +
1169 metaSaveAsString());
1170 }
1171
1172 long nextTxId = getFSImage().getLastAppliedTxId() + 1;
1173 LOG.info("Will take over writing edit logs at txnid " +
1174 nextTxId);
1175 editLog.setNextTxId(nextTxId);
1176
1177 getFSImage().editLog.openForWrite();
1178 }
1179
1180 // Enable quota checks.
1181 dir.enableQuotaChecks();
1182 if (haEnabled) {
1183 // Renew all of the leases before becoming active.
1184 // This is because, while we were in standby mode,
1185 // the leases weren't getting renewed on this NN.
1186 // Give them all a fresh start here.
1187 leaseManager.renewAllLeases();
1188 }
1189 leaseManager.startMonitor();
1190 startSecretManagerIfNecessary();
1191
1192 //ResourceMonitor required only at ActiveNN. See HDFS-2914
1193 this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
1194 nnrmthread.start();
1195
1196 nnEditLogRoller = new Daemon(new NameNodeEditLogRoller(
1197 editLogRollerThreshold, editLogRollerInterval));
1198 nnEditLogRoller.start();
1199
1200 if (lazyPersistFileScrubIntervalSec > 0) {
1201 lazyPersistFileScrubber = new Daemon(new LazyPersistFileScrubber(
1202 lazyPersistFileScrubIntervalSec));
1203 lazyPersistFileScrubber.start();
1204 }
1205
1206 cacheManager.startMonitorThread();
1207 blockManager.getDatanodeManager().setShouldSendCachingCommands(true);
1208 } finally {
1209 startingActiveService = false;
1210 checkSafeMode();
1211 writeUnlock();
1212 }
1213 }
1214
1215 /**
1216 * Initialize replication queues.
1217 */
1218 private void initializeReplQueues() {
1219 LOG.info("initializing replication queues");
1220 blockManager.processMisReplicatedBlocks();
1221 initializedReplQueues = true;
1222 }
1223
1224 private boolean inActiveState() {
1225 return haContext != null &&
1226 haContext.getState().getServiceState() == HAServiceState.ACTIVE;
1227 }
1228
1229 /**
1230 * @return Whether the namenode is transitioning to active state and is in the
1231 * middle of the {@link #startActiveServices()}
1232 */
1233 public boolean inTransitionToActive() {
1234 return haEnabled && inActiveState() && startingActiveService;
1235 }
1236
1237 private boolean shouldUseDelegationTokens() {
1238 return UserGroupInformation.isSecurityEnabled() ||
1239 alwaysUseDelegationTokensForTests;
1240 }
1241
1242 /**
1243 * Stop services required in active state
1244 */
1245 void stopActiveServices() {
1246 LOG.info("Stopping services started for active state");
1247 writeLock();
1248 try {
1249 stopSecretManager();
1250 leaseManager.stopMonitor();
1251 if (nnrmthread != null) {
1252 ((NameNodeResourceMonitor) nnrmthread.getRunnable()).stopMonitor();
1253 nnrmthread.interrupt();
1254 }
1255 if (nnEditLogRoller != null) {
1256 ((NameNodeEditLogRoller)nnEditLogRoller.getRunnable()).stop();
1257 nnEditLogRoller.interrupt();
1258 }
1259 if (lazyPersistFileScrubber != null) {
1260 ((LazyPersistFileScrubber) lazyPersistFileScrubber.getRunnable()).stop();
1261 lazyPersistFileScrubber.interrupt();
1262 }
1263 if (dir != null && getFSImage() != null) {
1264 if (getFSImage().editLog != null) {
1265 getFSImage().editLog.close();
1266 }
1267 // Update the fsimage with the last txid that we wrote
1268 // so that the tailer starts from the right spot.
1269 getFSImage().updateLastAppliedTxIdFromWritten();
1270 }
1271 if (cacheManager != null) {
1272 cacheManager.stopMonitorThread();
1273 cacheManager.clearDirectiveStats();
1274 }
1275 blockManager.getDatanodeManager().clearPendingCachingCommands();
1276 blockManager.getDatanodeManager().setShouldSendCachingCommands(false);
1277 // Don't want to keep replication queues when not in Active.
1278 blockManager.clearQueues();
1279 initializedReplQueues = false;
1280 } finally {
1281 writeUnlock();
1282 }
1283 }
1284
1285 /**
1286 * Start services required in standby state
1287 *
1288 * @throws IOException
1289 */
1290 void startStandbyServices(final Configuration conf) throws IOException {
1291 LOG.info("Starting services required for standby state");
1292 if (!getFSImage().editLog.isOpenForRead()) {
1293 // During startup, we're already open for read.
1294 getFSImage().editLog.initSharedJournalsForRead();
1295 }
1296
1297 blockManager.setPostponeBlocksFromFuture(true);
1298
1299 // Disable quota checks while in standby.
1300 dir.disableQuotaChecks();
1301 editLogTailer = new EditLogTailer(this, conf);
1302 editLogTailer.start();
1303 if (standbyShouldCheckpoint) {
1304 standbyCheckpointer = new StandbyCheckpointer(conf, this);
1305 standbyCheckpointer.start();
1306 }
1307 }
1308
1309 /**
1310 * Called when the NN is in Standby state and the editlog tailer tails the
1311 * OP_ROLLING_UPGRADE_START.
1312 */
1313 void triggerRollbackCheckpoint() {
1314 setNeedRollbackFsImage(true);
1315 if (standbyCheckpointer != null) {
1316 standbyCheckpointer.triggerRollbackCheckpoint();
1317 }
1318 }
1319
1320 /**
1321 * Called while the NN is in Standby state, but just about to be
1322 * asked to enter Active state. This cancels any checkpoints
1323 * currently being taken.
1324 */
1325 void prepareToStopStandbyServices() throws ServiceFailedException {
1326 if (standbyCheckpointer != null) {
1327 standbyCheckpointer.cancelAndPreventCheckpoints(
1328 "About to leave standby state");
1329 }
1330 }
1331
1332 /** Stop services required in standby state */
1333 void stopStandbyServices() throws IOException {
1334 LOG.info("Stopping services started for standby state");
1335 if (standbyCheckpointer != null) {
1336 standbyCheckpointer.stop();
1337 }
1338 if (editLogTailer != null) {
1339 editLogTailer.stop();
1340 }
1341 if (dir != null && getFSImage() != null && getFSImage().editLog != null) {
1342 getFSImage().editLog.close();
1343 }
1344 }
1345
1346 @Override
1347 public void checkOperation(OperationCategory op) throws StandbyException {
1348 if (haContext != null) {
1349 // null in some unit tests
1350 haContext.checkOperation(op);
1351 }
1352 }
1353
1354 /**
1355 * @throws RetriableException
1356 * If 1) The NameNode is in SafeMode, 2) HA is enabled, and 3)
1357 * NameNode is in active state
1358 * @throws SafeModeException
1359 * Otherwise if NameNode is in SafeMode.
1360 */
1361 private void checkNameNodeSafeMode(String errorMsg)
1362 throws RetriableException, SafeModeException {
1363 if (isInSafeMode()) {
1364 SafeModeException se = new SafeModeException(errorMsg, safeMode);
1365 if (haEnabled && haContext != null
1366 && haContext.getState().getServiceState() == HAServiceState.ACTIVE
1367 && shouldRetrySafeMode(this.safeMode)) {
1368 throw new RetriableException(se);
1369 } else {
1370 throw se;
1371 }
1372 }
1373 }
1374
1375 /**
1376 * We already know that the safemode is on. We will throw a RetriableException
1377 * if the safemode is not manual or caused by low resource.
1378 */
1379 private boolean shouldRetrySafeMode(SafeModeInfo safeMode) {
1380 if (safeMode == null) {
1381 return false;
1382 } else {
1383 return !safeMode.isManual() && !safeMode.areResourcesLow();
1384 }
1385 }
1386
1387 public static Collection<URI> getNamespaceDirs(Configuration conf) {
1388 return getStorageDirs(conf, DFS_NAMENODE_NAME_DIR_KEY);
1389 }
1390
1391 /**
1392 * Get all edits dirs which are required. If any shared edits dirs are
1393 * configured, these are also included in the set of required dirs.
1394 *
1395 * @param conf the HDFS configuration.
1396 * @return all required dirs.
1397 */
1398 public static Collection<URI> getRequiredNamespaceEditsDirs(Configuration conf) {
1399 Set<URI> ret = new HashSet<URI>();
1400 ret.addAll(getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY));
1401 ret.addAll(getSharedEditsDirs(conf));
1402 return ret;
1403 }
1404
1405 private static Collection<URI> getStorageDirs(Configuration conf,
1406 String propertyName) {
1407 Collection<String> dirNames = conf.getTrimmedStringCollection(propertyName);
1408 StartupOption startOpt = NameNode.getStartupOption(conf);
1409 if(startOpt == StartupOption.IMPORT) {
1410 // In case of IMPORT this will get rid of default directories
1411 // but will retain directories specified in hdfs-site.xml
1412 // When importing image from a checkpoint, the name-node can
1413 // start with empty set of storage directories.
1414 Configuration cE = new HdfsConfiguration(false);
1415 cE.addResource("core-default.xml");
1416 cE.addResource("core-site.xml");
1417 cE.addResource("hdfs-default.xml");
1418 Collection<String> dirNames2 = cE.getTrimmedStringCollection(propertyName);
1419 dirNames.removeAll(dirNames2);
1420 if(dirNames.isEmpty())
1421 LOG.warn("!!! WARNING !!!" +
1422 "\n\tThe NameNode currently runs without persistent storage." +
1423 "\n\tAny changes to the file system meta-data may be lost." +
1424 "\n\tRecommended actions:" +
1425 "\n\t\t- shutdown and restart NameNode with configured \""
1426 + propertyName + "\" in hdfs-site.xml;" +
1427 "\n\t\t- use Backup Node as a persistent and up-to-date storage " +
1428 "of the file system meta-data.");
1429 } else if (dirNames.isEmpty()) {
1430 dirNames = Collections.singletonList(
1431 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_DEFAULT);
1432 }
1433 return Util.stringCollectionAsURIs(dirNames);
1434 }
1435
1436 /**
1437 * Return an ordered list of edits directories to write to.
1438 * The list is ordered such that all shared edits directories
1439 * are ordered before non-shared directories, and any duplicates
1440 * are removed. The order they are specified in the configuration
1441 * is retained.
1442 * @return Collection of shared edits directories.
1443 * @throws IOException if multiple shared edits directories are configured
1444 */
1445 public static List<URI> getNamespaceEditsDirs(Configuration conf)
1446 throws IOException {
1447 return getNamespaceEditsDirs(conf, true);
1448 }
1449
1450 public static List<URI> getNamespaceEditsDirs(Configuration conf,
1451 boolean includeShared)
1452 throws IOException {
1453 // Use a LinkedHashSet so that order is maintained while we de-dup
1454 // the entries.
1455 LinkedHashSet<URI> editsDirs = new LinkedHashSet<URI>();
1456
1457 if (includeShared) {
1458 List<URI> sharedDirs = getSharedEditsDirs(conf);
1459
1460 // Fail until multiple shared edits directories are supported (HDFS-2782)
1461 if (sharedDirs.size() > 1) {
1462 throw new IOException(
1463 "Multiple shared edits directories are not yet supported");
1464 }
1465
1466 // First add the shared edits dirs. It's critical that the shared dirs
1467 // are added first, since JournalSet syncs them in the order they are listed,
1468 // and we need to make sure all edits are in place in the shared storage
1469 // before they are replicated locally. See HDFS-2874.
1470 for (URI dir : sharedDirs) {
1471 if (!editsDirs.add(dir)) {
1472 LOG.warn("Edits URI " + dir + " listed multiple times in " +
1473 DFS_NAMENODE_SHARED_EDITS_DIR_KEY + ". Ignoring duplicates.");
1474 }
1475 }
1476 }
1477 // Now add the non-shared dirs.
1478 for (URI dir : getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_KEY)) {
1479 if (!editsDirs.add(dir)) {
1480 LOG.warn("Edits URI " + dir + " listed multiple times in " +
1481 DFS_NAMENODE_SHARED_EDITS_DIR_KEY + " and " +
1482 DFS_NAMENODE_EDITS_DIR_KEY + ". Ignoring duplicates.");
1483 }
1484 }
1485
1486 if (editsDirs.isEmpty()) {
1487 // If this is the case, no edit dirs have been explicitly configured.
1488 // Image dirs are to be used for edits too.
1489 return Lists.newArrayList(getNamespaceDirs(conf));
1490 } else {
1491 return Lists.newArrayList(editsDirs);
1492 }
1493 }
1494
1495 /**
1496 * Returns edit directories that are shared between primary and secondary.
1497 * @param conf configuration
1498 * @return collection of edit directories from {@code conf}
1499 */
1500 public static List<URI> getSharedEditsDirs(Configuration conf) {
1501 // don't use getStorageDirs here, because we want an empty default
1502 // rather than the dir in /tmp
1503 Collection<String> dirNames = conf.getTrimmedStringCollection(
1504 DFS_NAMENODE_SHARED_EDITS_DIR_KEY);
1505 return Util.stringCollectionAsURIs(dirNames);
1506 }
1507
1508 @Override
1509 public void readLock() {
1510 this.fsLock.readLock().lock();
1511 }
1512 @Override
1513 public void longReadLockInterruptibly() throws InterruptedException {
1514 this.fsLock.longReadLock().lockInterruptibly();
1515 try {
1516 this.fsLock.readLock().lockInterruptibly();
1517 } catch (InterruptedException ie) {
1518 // In the event we're interrupted while getting the normal FSNS read lock,
1519 // release the long read lock.
1520 this.fsLock.longReadLock().unlock();
1521 throw ie;
1522 }
1523 }
1524 @Override
1525 public void longReadUnlock() {
1526 this.fsLock.readLock().unlock();
1527 this.fsLock.longReadLock().unlock();
1528 }
1529 @Override
1530 public void readUnlock() {
1531 this.fsLock.readLock().unlock();
1532 }
1533 @Override
1534 public void writeLock() {
1535 this.fsLock.longReadLock().lock();
1536 this.fsLock.writeLock().lock();
1537 }
1538 @Override
1539 public void writeLockInterruptibly() throws InterruptedException {
1540 this.fsLock.longReadLock().lockInterruptibly();
1541 try {
1542 this.fsLock.writeLock().lockInterruptibly();
1543 } catch (InterruptedException ie) {
1544 // In the event we're interrupted while getting the normal FSNS write
1545 // lock, release the long read lock.
1546 this.fsLock.longReadLock().unlock();
1547 throw ie;
1548 }
1549 }
1550 @Override
1551 public void writeUnlock() {
1552 this.fsLock.writeLock().unlock();
1553 this.fsLock.longReadLock().unlock();
1554 }
1555 @Override
1556 public boolean hasWriteLock() {
1557 return this.fsLock.isWriteLockedByCurrentThread();
1558 }
1559 @Override
1560 public boolean hasReadLock() {
1561 return this.fsLock.getReadHoldCount() > 0 || hasWriteLock();
1562 }
1563
1564 public int getReadHoldCount() {
1565 return this.fsLock.getReadHoldCount();
1566 }
1567
1568 public int getWriteHoldCount() {
1569 return this.fsLock.getWriteHoldCount();
1570 }
1571
1572 NamespaceInfo getNamespaceInfo() {
1573 readLock();
1574 try {
1575 return unprotectedGetNamespaceInfo();
1576 } finally {
1577 readUnlock();
1578 }
1579 }
1580
1581 /**
1582 * Version of @see #getNamespaceInfo() that is not protected by a lock.
1583 */
1584 NamespaceInfo unprotectedGetNamespaceInfo() {
1585 return new NamespaceInfo(getFSImage().getStorage().getNamespaceID(),
1586 getClusterId(), getBlockPoolId(),
1587 getFSImage().getStorage().getCTime());
1588 }
1589
1590 /**
1591 * Close down this file system manager.
1592 * Causes heartbeat and lease daemons to stop; waits briefly for
1593 * them to finish, but a short timeout returns control back to caller.
1594 */
1595 void close() {
1596 fsRunning = false;
1597 try {
1598 stopCommonServices();
1599 if (smmthread != null) smmthread.interrupt();
1600 } finally {
1601 // using finally to ensure we also wait for lease daemon
1602 try {
1603 stopActiveServices();
1604 stopStandbyServices();
1605 } catch (IOException ie) {
1606 } finally {
1607 IOUtils.cleanup(LOG, dir);
1608 IOUtils.cleanup(LOG, fsImage);
1609 }
1610 }
1611 }
1612
1613 @Override
1614 public boolean isRunning() {
1615 return fsRunning;
1616 }
1617
1618 @Override
1619 public boolean isInStandbyState() {
1620 if (haContext == null || haContext.getState() == null) {
1621 // We're still starting up. In this case, if HA is
1622 // on for the cluster, we always start in standby. Otherwise
1623 // start in active.
1624 return haEnabled;
1625 }
1626
1627 return HAServiceState.STANDBY == haContext.getState().getServiceState();
1628 }
1629
1630 /**
1631 * Dump all metadata into specified file
1632 */
1633 void metaSave(String filename) throws IOException {
1634 checkSuperuserPrivilege();
1635 checkOperation(OperationCategory.UNCHECKED);
1636 writeLock();
1637 try {
1638 checkOperation(OperationCategory.UNCHECKED);
1639 File file = new File(System.getProperty("hadoop.log.dir"), filename);
1640 PrintWriter out = new PrintWriter(new BufferedWriter(
1641 new OutputStreamWriter(new FileOutputStream(file), Charsets.UTF_8)));
1642 metaSave(out);
1643 out.flush();
1644 out.close();
1645 } finally {
1646 writeUnlock();
1647 }
1648 }
1649
1650 private void metaSave(PrintWriter out) {
1651 assert hasWriteLock();
1652 long totalInodes = this.dir.totalInodes();
1653 long totalBlocks = this.getBlocksTotal();
1654 out.println(totalInodes + " files and directories, " + totalBlocks
1655 + " blocks = " + (totalInodes + totalBlocks) + " total");
1656
1657 blockManager.metaSave(out);
1658 }
1659
1660 private String metaSaveAsString() {
1661 StringWriter sw = new StringWriter();
1662 PrintWriter pw = new PrintWriter(sw);
1663 metaSave(pw);
1664 pw.flush();
1665 return sw.toString();
1666 }
1667
1668
1669 long getDefaultBlockSize() {
1670 return serverDefaults.getBlockSize();
1671 }
1672
1673 FsServerDefaults getServerDefaults() throws StandbyException {
1674 checkOperation(OperationCategory.READ);
1675 return serverDefaults;
1676 }
1677
1678 long getAccessTimePrecision() {
1679 return accessTimePrecision;
1680 }
1681
1682 private boolean isAccessTimeSupported() {
1683 return accessTimePrecision > 0;
1684 }
1685
1686 /////////////////////////////////////////////////////////
1687 //
1688 // These methods are called by HadoopFS clients
1689 //
1690 /////////////////////////////////////////////////////////
1691 /**
1692 * Set permissions for an existing file.
1693 * @throws IOException
1694 */
1695 void setPermission(String src, FsPermission permission)
1696 throws AccessControlException, FileNotFoundException, SafeModeException,
1697 UnresolvedLinkException, IOException {
1698 try {
1699 setPermissionInt(src, permission);
1700 } catch (AccessControlException e) {
1701 logAuditEvent(false, "setPermission", src);
1702 throw e;
1703 }
1704 }
1705
1706 private void setPermissionInt(final String srcArg, FsPermission permission)
1707 throws AccessControlException, FileNotFoundException, SafeModeException,
1708 UnresolvedLinkException, IOException {
1709 String src = srcArg;
1710 HdfsFileStatus resultingStat = null;
1711 FSPermissionChecker pc = getPermissionChecker();
1712 checkOperation(OperationCategory.WRITE);
1713 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
1714 writeLock();
1715 try {
1716 checkOperation(OperationCategory.WRITE);
1717 checkNameNodeSafeMode("Cannot set permission for " + src);
1718 src = resolvePath(src, pathComponents);
1719 checkOwner(pc, src);
1720 dir.setPermission(src, permission);
1721 getEditLog().logSetPermissions(src, permission);
1722 resultingStat = getAuditFileInfo(src, false);
1723 } finally {
1724 writeUnlock();
1725 }
1726 getEditLog().logSync();
1727 logAuditEvent(true, "setPermission", srcArg, null, resultingStat);
1728 }
1729
1730 /**
1731 * Set owner for an existing file.
1732 * @throws IOException
1733 */
1734 void setOwner(String src, String username, String group)
1735 throws AccessControlException, FileNotFoundException, SafeModeException,
1736 UnresolvedLinkException, IOException {
1737 try {
1738 setOwnerInt(src, username, group);
1739 } catch (AccessControlException e) {
1740 logAuditEvent(false, "setOwner", src);
1741 throw e;
1742 }
1743 }
1744
1745 private void setOwnerInt(final String srcArg, String username, String group)
1746 throws AccessControlException, FileNotFoundException, SafeModeException,
1747 UnresolvedLinkException, IOException {
1748 String src = srcArg;
1749 HdfsFileStatus resultingStat = null;
1750 FSPermissionChecker pc = getPermissionChecker();
1751 checkOperation(OperationCategory.WRITE);
1752 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
1753 writeLock();
1754 try {
1755 checkOperation(OperationCategory.WRITE);
1756 checkNameNodeSafeMode("Cannot set owner for " + src);
1757 src = resolvePath(src, pathComponents);
1758 checkOwner(pc, src);
1759 if (!pc.isSuperUser()) {
1760 if (username != null && !pc.getUser().equals(username)) {
1761 throw new AccessControlException("Non-super user cannot change owner");
1762 }
1763 if (group != null && !pc.containsGroup(group)) {
1764 throw new AccessControlException("User does not belong to " + group);
1765 }
1766 }
1767 dir.setOwner(src, username, group);
1768 getEditLog().logSetOwner(src, username, group);
1769 resultingStat = getAuditFileInfo(src, false);
1770 } finally {
1771 writeUnlock();
1772 }
1773 getEditLog().logSync();
1774 logAuditEvent(true, "setOwner", srcArg, null, resultingStat);
1775 }
1776
1777 /**
1778 * Get block locations within the specified range.
1779 * @see ClientProtocol#getBlockLocations(String, long, long)
1780 */
1781 LocatedBlocks getBlockLocations(String clientMachine, String src,
1782 long offset, long length) throws AccessControlException,
1783 FileNotFoundException, UnresolvedLinkException, IOException {
1784 LocatedBlocks blocks = getBlockLocations(src, offset, length, true, true,
1785 true);
1786 if (blocks != null) {
1787 blockManager.getDatanodeManager().sortLocatedBlocks(clientMachine,
1788 blocks.getLocatedBlocks());
1789
1790 // lastBlock is not part of getLocatedBlocks(), might need to sort it too
1791 LocatedBlock lastBlock = blocks.getLastLocatedBlock();
1792 if (lastBlock != null) {
1793 ArrayList<LocatedBlock> lastBlockList =
1794 Lists.newArrayListWithCapacity(1);
1795 lastBlockList.add(lastBlock);
1796 blockManager.getDatanodeManager().sortLocatedBlocks(clientMachine,
1797 lastBlockList);
1798 }
1799 }
1800 return blocks;
1801 }
1802
1803 /**
1804 * Get block locations within the specified range.
1805 * @see ClientProtocol#getBlockLocations(String, long, long)
1806 * @throws FileNotFoundException, UnresolvedLinkException, IOException
1807 */
1808 LocatedBlocks getBlockLocations(String src, long offset, long length,
1809 boolean doAccessTime, boolean needBlockToken, boolean checkSafeMode)
1810 throws FileNotFoundException, UnresolvedLinkException, IOException {
1811 try {
1812 return getBlockLocationsInt(src, offset, length, doAccessTime,
1813 needBlockToken, checkSafeMode);
1814 } catch (AccessControlException e) {
1815 logAuditEvent(false, "open", src);
1816 throw e;
1817 }
1818 }
1819
1820 private LocatedBlocks getBlockLocationsInt(String src, long offset,
1821 long length, boolean doAccessTime, boolean needBlockToken,
1822 boolean checkSafeMode)
1823 throws FileNotFoundException, UnresolvedLinkException, IOException {
1824 if (offset < 0) {
1825 throw new HadoopIllegalArgumentException(
1826 "Negative offset is not supported. File: " + src);
1827 }
1828 if (length < 0) {
1829 throw new HadoopIllegalArgumentException(
1830 "Negative length is not supported. File: " + src);
1831 }
1832 final LocatedBlocks ret = getBlockLocationsUpdateTimes(src,
1833 offset, length, doAccessTime, needBlockToken);
1834 logAuditEvent(true, "open", src);
1835 if (checkSafeMode && isInSafeMode()) {
1836 for (LocatedBlock b : ret.getLocatedBlocks()) {
1837 // if safemode & no block locations yet then throw safemodeException
1838 if ((b.getLocations() == null) || (b.getLocations().length == 0)) {
1839 SafeModeException se = new SafeModeException(
1840 "Zero blocklocations for " + src, safeMode);
1841 if (haEnabled && haContext != null &&
1842 haContext.getState().getServiceState() == HAServiceState.ACTIVE) {
1843 throw new RetriableException(se);
1844 } else {
1845 throw se;
1846 }
1847 }
1848 }
1849 }
1850 return ret;
1851 }
1852
1853 /*
1854 * Get block locations within the specified range, updating the
1855 * access times if necessary.
1856 */
1857 private LocatedBlocks getBlockLocationsUpdateTimes(final String srcArg,
1858 long offset, long length, boolean doAccessTime, boolean needBlockToken)
1859 throws FileNotFoundException,
1860 UnresolvedLinkException, IOException {
1861 String src = srcArg;
1862 FSPermissionChecker pc = getPermissionChecker();
1863 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
1864 for (int attempt = 0; attempt < 2; attempt++) {
1865 boolean isReadOp = (attempt == 0);
1866 if (isReadOp) { // first attempt is with readlock
1867 checkOperation(OperationCategory.READ);
1868 readLock();
1869 } else { // second attempt is with write lock
1870 checkOperation(OperationCategory.WRITE);
1871 writeLock(); // writelock is needed to set accesstime
1872 }
1873 try {
1874 src = resolvePath(src, pathComponents);
1875 if (isReadOp) {
1876 checkOperation(OperationCategory.READ);
1877 } else {
1878 checkOperation(OperationCategory.WRITE);
1879 }
1880 if (isPermissionEnabled) {
1881 checkPathAccess(pc, src, FsAction.READ);
1882 }
1883
1884 // if the namenode is in safemode, then do not update access time
1885 if (isInSafeMode()) {
1886 doAccessTime = false;
1887 }
1888
1889 final INodesInPath iip = dir.getINodesInPath(src, true);
1890 final INode[] inodes = iip.getINodes();
1891 final INodeFile inode = INodeFile.valueOf(
1892 inodes[inodes.length - 1], src);
1893 if (isPermissionEnabled) {
1894 checkUnreadableBySuperuser(pc, inode, iip.getPathSnapshotId());
1895 }
1896 if (!iip.isSnapshot() //snapshots are readonly, so don't update atime.
1897 && doAccessTime && isAccessTimeSupported()) {
1898 final long now = now();
1899 if (now > inode.getAccessTime() + getAccessTimePrecision()) {
1900 // if we have to set access time but we only have the readlock, then
1901 // restart this entire operation with the writeLock.
1902 if (isReadOp) {
1903 continue;
1904 }
1905 boolean changed = dir.setTimes(inode, -1, now, false,
1906 iip.getLatestSnapshotId());
1907 if (changed) {
1908 getEditLog().logTimes(src, -1, now);
1909 }
1910 }
1911 }
1912 final long fileSize = iip.isSnapshot() ?
1913 inode.computeFileSize(iip.getPathSnapshotId())
1914 : inode.computeFileSizeNotIncludingLastUcBlock();
1915 boolean isUc = inode.isUnderConstruction();
1916 if (iip.isSnapshot()) {
1917 // if src indicates a snapshot file, we need to make sure the returned
1918 // blocks do not exceed the size of the snapshot file.
1919 length = Math.min(length, fileSize - offset);
1920 isUc = false;
1921 }
1922
1923 final FileEncryptionInfo feInfo =
1924 FSDirectory.isReservedRawName(srcArg) ?
1925 null : dir.getFileEncryptionInfo(inode, iip.getPathSnapshotId(),
1926 iip);
1927
1928 final LocatedBlocks blocks =
1929 blockManager.createLocatedBlocks(inode.getBlocks(), fileSize,
1930 isUc, offset, length, needBlockToken, iip.isSnapshot(), feInfo);
1931 // Set caching information for the located blocks.
1932 for (LocatedBlock lb: blocks.getLocatedBlocks()) {
1933 cacheManager.setCachedLocations(lb);
1934 }
1935 return blocks;
1936 } finally {
1937 if (isReadOp) {
1938 readUnlock();
1939 } else {
1940 writeUnlock();
1941 }
1942 }
1943 }
1944 return null; // can never reach here
1945 }
1946
1947 /**
1948 * Moves all the blocks from {@code srcs} and appends them to {@code target}
1949 * To avoid rollbacks we will verify validity of ALL of the args
1950 * before we start actual move.
1951 *
1952 * This does not support ".inodes" relative path
1953 * @param target target to concat into
1954 * @param srcs file that will be concatenated
1955 * @throws IOException on error
1956 */
1957 void concat(String target, String [] srcs)
1958 throws IOException, UnresolvedLinkException {
1959 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
1960 if (cacheEntry != null && cacheEntry.isSuccess()) {
1961 return; // Return previous response
1962 }
1963
1964 // Either there is no previous request in progress or it has failed
1965 if(FSNamesystem.LOG.isDebugEnabled()) {
1966 FSNamesystem.LOG.debug("concat " + Arrays.toString(srcs) +
1967 " to " + target);
1968 }
1969
1970 boolean success = false;
1971 try {
1972 concatInt(target, srcs, cacheEntry != null);
1973 success = true;
1974 } catch (AccessControlException e) {
1975 logAuditEvent(false, "concat", Arrays.toString(srcs), target, null);
1976 throw e;
1977 } finally {
1978 RetryCache.setState(cacheEntry, success);
1979 }
1980 }
1981
1982 private void concatInt(String target, String [] srcs,
1983 boolean logRetryCache) throws IOException, UnresolvedLinkException {
1984 // verify args
1985 if(target.isEmpty()) {
1986 throw new IllegalArgumentException("Target file name is empty");
1987 }
1988 if(srcs == null || srcs.length == 0) {
1989 throw new IllegalArgumentException("No sources given");
1990 }
1991
1992 // We require all files be in the same directory
1993 String trgParent =
1994 target.substring(0, target.lastIndexOf(Path.SEPARATOR_CHAR));
1995 for (String s : srcs) {
1996 String srcParent = s.substring(0, s.lastIndexOf(Path.SEPARATOR_CHAR));
1997 if (!srcParent.equals(trgParent)) {
1998 throw new IllegalArgumentException(
1999 "Sources and target are not in the same directory");
2000 }
2001 }
2002
2003 HdfsFileStatus resultingStat = null;
2004 FSPermissionChecker pc = getPermissionChecker();
2005 checkOperation(OperationCategory.WRITE);
2006 waitForLoadingFSImage();
2007 writeLock();
2008 try {
2009 checkOperation(OperationCategory.WRITE);
2010 checkNameNodeSafeMode("Cannot concat " + target);
2011 concatInternal(pc, target, srcs, logRetryCache);
2012 resultingStat = getAuditFileInfo(target, false);
2013 } finally {
2014 writeUnlock();
2015 }
2016 getEditLog().logSync();
2017 logAuditEvent(true, "concat", Arrays.toString(srcs), target, resultingStat);
2018 }
2019
2020 /** See {@link #concat(String, String[])} */
2021 private void concatInternal(FSPermissionChecker pc, String target,
2022 String[] srcs, boolean logRetryCache) throws IOException,
2023 UnresolvedLinkException {
2024 assert hasWriteLock();
2025
2026 // write permission for the target
2027 if (isPermissionEnabled) {
2028 checkPathAccess(pc, target, FsAction.WRITE);
2029
2030 // and srcs
2031 for(String aSrc: srcs) {
2032 checkPathAccess(pc, aSrc, FsAction.READ); // read the file
2033 checkParentAccess(pc, aSrc, FsAction.WRITE); // for delete
2034 }
2035 }
2036
2037 // to make sure no two files are the same
2038 Set<INode> si = new HashSet<INode>();
2039
2040 // we put the following prerequisite for the operation
2041 // replication and blocks sizes should be the same for ALL the blocks
2042
2043 // check the target
2044 final INodesInPath trgIip = dir.getINodesInPath4Write(target);
2045 if (dir.getEZForPath(trgIip) != null) {
2046 throw new HadoopIllegalArgumentException(
2047 "concat can not be called for files in an encryption zone.");
2048 }
2049 final INodeFile trgInode = INodeFile.valueOf(trgIip.getLastINode(),
2050 target);
2051 if(trgInode.isUnderConstruction()) {
2052 throw new HadoopIllegalArgumentException("concat: target file "
2053 + target + " is under construction");
2054 }
2055 // per design target shouldn't be empty and all the blocks same size
2056 if(trgInode.numBlocks() == 0) {
2057 throw new HadoopIllegalArgumentException("concat: target file "
2058 + target + " is empty");
2059 }
2060 if (trgInode.isWithSnapshot()) {
2061 throw new HadoopIllegalArgumentException("concat: target file "
2062 + target + " is in a snapshot");
2063 }
2064
2065 long blockSize = trgInode.getPreferredBlockSize();
2066
2067 // check the end block to be full
2068 final BlockInfo last = trgInode.getLastBlock();
2069 if(blockSize != last.getNumBytes()) {
2070 throw new HadoopIllegalArgumentException("The last block in " + target
2071 + " is not full; last block size = " + last.getNumBytes()
2072 + " but file block size = " + blockSize);
2073 }
2074
2075 si.add(trgInode);
2076 final short repl = trgInode.getFileReplication();
2077
2078 // now check the srcs
2079 boolean endSrc = false; // final src file doesn't have to have full end block
2080 for(int i=0; i<srcs.length; i++) {
2081 String src = srcs[i];
2082 if(i==srcs.length-1)
2083 endSrc=true;
2084
2085 final INodeFile srcInode = INodeFile.valueOf(dir.getINode4Write(src), src);
2086 if(src.isEmpty()
2087 || srcInode.isUnderConstruction()
2088 || srcInode.numBlocks() == 0) {
2089 throw new HadoopIllegalArgumentException("concat: source file " + src
2090 + " is invalid or empty or underConstruction");
2091 }
2092
2093 // check replication and blocks size
2094 if(repl != srcInode.getBlockReplication()) {
2095 throw new HadoopIllegalArgumentException("concat: the source file "
2096 + src + " and the target file " + target
2097 + " should have the same replication: source replication is "
2098 + srcInode.getBlockReplication()
2099 + " but target replication is " + repl);
2100 }
2101
2102 //boolean endBlock=false;
2103 // verify that all the blocks are of the same length as target
2104 // should be enough to check the end blocks
2105 final BlockInfo[] srcBlocks = srcInode.getBlocks();
2106 int idx = srcBlocks.length-1;
2107 if(endSrc)
2108 idx = srcBlocks.length-2; // end block of endSrc is OK not to be full
2109 if(idx >= 0 && srcBlocks[idx].getNumBytes() != blockSize) {
2110 throw new HadoopIllegalArgumentException("concat: the source file "
2111 + src + " and the target file " + target
2112 + " should have the same blocks sizes: target block size is "
2113 + blockSize + " but the size of source block " + idx + " is "
2114 + srcBlocks[idx].getNumBytes());
2115 }
2116
2117 si.add(srcInode);
2118 }
2119
2120 // make sure no two files are the same
2121 if(si.size() < srcs.length+1) { // trg + srcs
2122 // it means at least two files are the same
2123 throw new HadoopIllegalArgumentException(
2124 "concat: at least two of the source files are the same");
2125 }
2126
2127 if(NameNode.stateChangeLog.isDebugEnabled()) {
2128 NameNode.stateChangeLog.debug("DIR* NameSystem.concat: " +
2129 Arrays.toString(srcs) + " to " + target);
2130 }
2131
2132 long timestamp = now();
2133 dir.concat(target, srcs, timestamp);
2134 getEditLog().logConcat(target, srcs, timestamp, logRetryCache);
2135 }
2136
2137 /**
2138 * stores the modification and access time for this inode.
2139 * The access time is precise up to an hour. The transaction, if needed, is
2140 * written to the edits log but is not flushed.
2141 */
2142 void setTimes(String src, long mtime, long atime)
2143 throws IOException, UnresolvedLinkException {
2144 if (!isAccessTimeSupported() && atime != -1) {
2145 throw new IOException("Access time for hdfs is not configured. " +
2146 " Please set " + DFS_NAMENODE_ACCESSTIME_PRECISION_KEY + " configuration parameter.");
2147 }
2148 try {
2149 setTimesInt(src, mtime, atime);
2150 } catch (AccessControlException e) {
2151 logAuditEvent(false, "setTimes", src);
2152 throw e;
2153 }
2154 }
2155
2156 private void setTimesInt(final String srcArg, long mtime, long atime)
2157 throws IOException, UnresolvedLinkException {
2158 String src = srcArg;
2159 HdfsFileStatus resultingStat = null;
2160 FSPermissionChecker pc = getPermissionChecker();
2161 checkOperation(OperationCategory.WRITE);
2162 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2163 writeLock();
2164 try {
2165 checkOperation(OperationCategory.WRITE);
2166 checkNameNodeSafeMode("Cannot set times " + src);
2167 src = resolvePath(src, pathComponents);
2168
2169 // Write access is required to set access and modification times
2170 if (isPermissionEnabled) {
2171 checkPathAccess(pc, src, FsAction.WRITE);
2172 }
2173 final INodesInPath iip = dir.getINodesInPath4Write(src);
2174 final INode inode = iip.getLastINode();
2175 if (inode != null) {
2176 boolean changed = dir.setTimes(inode, mtime, atime, true,
2177 iip.getLatestSnapshotId());
2178 if (changed) {
2179 getEditLog().logTimes(src, mtime, atime);
2180 }
2181 resultingStat = getAuditFileInfo(src, false);
2182 } else {
2183 throw new FileNotFoundException("File/Directory " + src + " does not exist.");
2184 }
2185 } finally {
2186 writeUnlock();
2187 }
2188 logAuditEvent(true, "setTimes", srcArg, null, resultingStat);
2189 }
2190
2191 /**
2192 * Create a symbolic link.
2193 */
2194 @SuppressWarnings("deprecation")
2195 void createSymlink(String target, String link,
2196 PermissionStatus dirPerms, boolean createParent)
2197 throws IOException, UnresolvedLinkException {
2198 if (!FileSystem.areSymlinksEnabled()) {
2199 throw new UnsupportedOperationException("Symlinks not supported");
2200 }
2201 if (!DFSUtil.isValidName(link)) {
2202 throw new InvalidPathException("Invalid link name: " + link);
2203 }
2204 if (FSDirectory.isReservedName(target)) {
2205 throw new InvalidPathException("Invalid target name: " + target);
2206 }
2207 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
2208 if (cacheEntry != null && cacheEntry.isSuccess()) {
2209 return; // Return previous response
2210 }
2211 boolean success = false;
2212 try {
2213 createSymlinkInt(target, link, dirPerms, createParent, cacheEntry != null);
2214 success = true;
2215 } catch (AccessControlException e) {
2216 logAuditEvent(false, "createSymlink", link, target, null);
2217 throw e;
2218 } finally {
2219 RetryCache.setState(cacheEntry, success);
2220 }
2221 }
2222
2223 private void createSymlinkInt(String target, final String linkArg,
2224 PermissionStatus dirPerms, boolean createParent, boolean logRetryCache)
2225 throws IOException, UnresolvedLinkException {
2226 String link = linkArg;
2227 if (NameNode.stateChangeLog.isDebugEnabled()) {
2228 NameNode.stateChangeLog.debug("DIR* NameSystem.createSymlink: target="
2229 + target + " link=" + link);
2230 }
2231 HdfsFileStatus resultingStat = null;
2232 FSPermissionChecker pc = getPermissionChecker();
2233 checkOperation(OperationCategory.WRITE);
2234 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(link);
2235 writeLock();
2236 try {
2237 checkOperation(OperationCategory.WRITE);
2238 checkNameNodeSafeMode("Cannot create symlink " + link);
2239 link = resolvePath(link, pathComponents);
2240 if (!createParent) {
2241 verifyParentDir(link);
2242 }
2243 if (!dir.isValidToCreate(link)) {
2244 throw new IOException("failed to create link " + link
2245 +" either because the filename is invalid or the file exists");
2246 }
2247 if (isPermissionEnabled) {
2248 checkAncestorAccess(pc, link, FsAction.WRITE);
2249 }
2250 // validate that we have enough inodes.
2251 checkFsObjectLimit();
2252
2253 // add symbolic link to namespace
2254 addSymlink(link, target, dirPerms, createParent, logRetryCache);
2255 resultingStat = getAuditFileInfo(link, false);
2256 } finally {
2257 writeUnlock();
2258 }
2259 getEditLog().logSync();
2260 logAuditEvent(true, "createSymlink", linkArg, target, resultingStat);
2261 }
2262
2263 /**
2264 * Set replication for an existing file.
2265 *
2266 * The NameNode sets new replication and schedules either replication of
2267 * under-replicated data blocks or removal of the excessive block copies
2268 * if the blocks are over-replicated.
2269 *
2270 * @see ClientProtocol#setReplication(String, short)
2271 * @param src file name
2272 * @param replication new replication
2273 * @return true if successful;
2274 * false if file does not exist or is a directory
2275 */
2276 boolean setReplication(final String src, final short replication)
2277 throws IOException {
2278 try {
2279 return setReplicationInt(src, replication);
2280 } catch (AccessControlException e) {
2281 logAuditEvent(false, "setReplication", src);
2282 throw e;
2283 }
2284 }
2285
2286 private boolean setReplicationInt(final String srcArg,
2287 final short replication) throws IOException {
2288 String src = srcArg;
2289 blockManager.verifyReplication(src, replication, null);
2290 final boolean isFile;
2291 FSPermissionChecker pc = getPermissionChecker();
2292 checkOperation(OperationCategory.WRITE);
2293 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2294 waitForLoadingFSImage();
2295 writeLock();
2296 try {
2297 checkOperation(OperationCategory.WRITE);
2298 checkNameNodeSafeMode("Cannot set replication for " + src);
2299 src = resolvePath(src, pathComponents);
2300 if (isPermissionEnabled) {
2301 checkPathAccess(pc, src, FsAction.WRITE);
2302 }
2303
2304 final short[] blockRepls = new short[2]; // 0: old, 1: new
2305 final Block[] blocks = dir.setReplication(src, replication, blockRepls);
2306 isFile = blocks != null;
2307 if (isFile) {
2308 getEditLog().logSetReplication(src, replication);
2309 blockManager.setReplication(blockRepls[0], blockRepls[1], src, blocks);
2310 }
2311 } finally {
2312 writeUnlock();
2313 }
2314
2315 getEditLog().logSync();
2316 if (isFile) {
2317 logAuditEvent(true, "setReplication", srcArg);
2318 }
2319 return isFile;
2320 }
2321
2322 /**
2323 * Set the storage policy for a file or a directory.
2324 *
2325 * @param src file/directory path
2326 * @param policyName storage policy name
2327 */
2328 void setStoragePolicy(String src, final String policyName)
2329 throws IOException {
2330 try {
2331 setStoragePolicyInt(src, policyName);
2332 } catch (AccessControlException e) {
2333 logAuditEvent(false, "setStoragePolicy", src);
2334 throw e;
2335 }
2336 }
2337
2338 private void setStoragePolicyInt(String src, final String policyName)
2339 throws IOException, UnresolvedLinkException, AccessControlException {
2340
2341 if (!isStoragePolicyEnabled) {
2342 throw new IOException("Failed to set storage policy since "
2343 + DFS_STORAGE_POLICY_ENABLED_KEY + " is set to false.");
2344 }
2345 FSPermissionChecker pc = null;
2346 if (isPermissionEnabled) {
2347 pc = getPermissionChecker();
2348 }
2349
2350 checkOperation(OperationCategory.WRITE);
2351 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2352 waitForLoadingFSImage();
2353 HdfsFileStatus fileStat;
2354 writeLock();
2355 try {
2356 checkOperation(OperationCategory.WRITE);
2357 checkNameNodeSafeMode("Cannot set storage policy for " + src);
2358
2359 if (pc != null) {
2360 checkPermission(pc, src, false, null, null, FsAction.WRITE, null,
2361 false, true);
2362 }
2363
2364 src = FSDirectory.resolvePath(src, pathComponents, dir);
2365
2366 // get the corresponding policy and make sure the policy name is valid
2367 BlockStoragePolicy policy = blockManager.getStoragePolicy(policyName);
2368 if (policy == null) {
2369 throw new HadoopIllegalArgumentException(
2370 "Cannot find a block policy with the name " + policyName);
2371 }
2372 dir.setStoragePolicy(src, policy.getId());
2373 getEditLog().logSetStoragePolicy(src, policy.getId());
2374 fileStat = getAuditFileInfo(src, false);
2375 } finally {
2376 writeUnlock();
2377 }
2378
2379 getEditLog().logSync();
2380 logAuditEvent(true, "setStoragePolicy", src, null, fileStat);
2381 }
2382
2383 /**
2384 * @return All the existing block storage policies
2385 */
2386 BlockStoragePolicy[] getStoragePolicies() throws IOException {
2387 checkOperation(OperationCategory.READ);
2388 waitForLoadingFSImage();
2389 readLock();
2390 try {
2391 checkOperation(OperationCategory.READ);
2392 return blockManager.getStoragePolicies();
2393 } finally {
2394 readUnlock();
2395 }
2396 }
2397
2398 long getPreferredBlockSize(String filename)
2399 throws IOException, UnresolvedLinkException {
2400 FSPermissionChecker pc = getPermissionChecker();
2401 checkOperation(OperationCategory.READ);
2402 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(filename);
2403 readLock();
2404 try {
2405 checkOperation(OperationCategory.READ);
2406 filename = resolvePath(filename, pathComponents);
2407 if (isPermissionEnabled) {
2408 checkTraverse(pc, filename);
2409 }
2410 return dir.getPreferredBlockSize(filename);
2411 } finally {
2412 readUnlock();
2413 }
2414 }
2415
2416 /**
2417 * Verify that parent directory of src exists.
2418 */
2419 private void verifyParentDir(String src) throws FileNotFoundException,
2420 ParentNotDirectoryException, UnresolvedLinkException {
2421 assert hasReadLock();
2422 Path parent = new Path(src).getParent();
2423 if (parent != null) {
2424 final INode parentNode = dir.getINode(parent.toString());
2425 if (parentNode == null) {
2426 throw new FileNotFoundException("Parent directory doesn't exist: "
2427 + parent);
2428 } else if (!parentNode.isDirectory() && !parentNode.isSymlink()) {
2429 throw new ParentNotDirectoryException("Parent path is not a directory: "
2430 + parent);
2431 }
2432 }
2433 }
2434
2435 /**
2436 * If the file is within an encryption zone, select the appropriate
2437 * CryptoProtocolVersion from the list provided by the client. Since the
2438 * client may be newer, we need to handle unknown versions.
2439 *
2440 * @param zone EncryptionZone of the file
2441 * @param supportedVersions List of supported protocol versions
2442 * @return chosen protocol version
2443 * @throws IOException
2444 */
2445 private CryptoProtocolVersion chooseProtocolVersion(EncryptionZone zone,
2446 CryptoProtocolVersion[] supportedVersions)
2447 throws UnknownCryptoProtocolVersionException, UnresolvedLinkException,
2448 SnapshotAccessControlException {
2449 Preconditions.checkNotNull(zone);
2450 Preconditions.checkNotNull(supportedVersions);
2451 // Right now, we only support a single protocol version,
2452 // so simply look for it in the list of provided options
2453 final CryptoProtocolVersion required = zone.getVersion();
2454
2455 for (CryptoProtocolVersion c : supportedVersions) {
2456 if (c.equals(CryptoProtocolVersion.UNKNOWN)) {
2457 if (LOG.isDebugEnabled()) {
2458 LOG.debug("Ignoring unknown CryptoProtocolVersion provided by " +
2459 "client: " + c.getUnknownValue());
2460 }
2461 continue;
2462 }
2463 if (c.equals(required)) {
2464 return c;
2465 }
2466 }
2467 throw new UnknownCryptoProtocolVersionException(
2468 "No crypto protocol versions provided by the client are supported."
2469 + " Client provided: " + Arrays.toString(supportedVersions)
2470 + " NameNode supports: " + Arrays.toString(CryptoProtocolVersion
2471 .values()));
2472 }
2473
2474 /**
2475 * Invoke KeyProvider APIs to generate an encrypted data encryption key for an
2476 * encryption zone. Should not be called with any locks held.
2477 *
2478 * @param ezKeyName key name of an encryption zone
2479 * @return New EDEK, or null if ezKeyName is null
2480 * @throws IOException
2481 */
2482 private EncryptedKeyVersion generateEncryptedDataEncryptionKey(String
2483 ezKeyName) throws IOException {
2484 if (ezKeyName == null) {
2485 return null;
2486 }
2487 EncryptedKeyVersion edek = null;
2488 try {
2489 edek = provider.generateEncryptedKey(ezKeyName);
2490 } catch (GeneralSecurityException e) {
2491 throw new IOException(e);
2492 }
2493 Preconditions.checkNotNull(edek);
2494 return edek;
2495 }
2496
2497 /**
2498 * Create a new file entry in the namespace.
2499 *
2500 * For description of parameters and exceptions thrown see
2501 * {@link ClientProtocol#create}, except it returns valid file status upon
2502 * success
2503 */
2504 HdfsFileStatus startFile(String src, PermissionStatus permissions,
2505 String holder, String clientMachine, EnumSet<CreateFlag> flag,
2506 boolean createParent, short replication, long blockSize,
2507 CryptoProtocolVersion[] supportedVersions)
2508 throws AccessControlException, SafeModeException,
2509 FileAlreadyExistsException, UnresolvedLinkException,
2510 FileNotFoundException, ParentNotDirectoryException, IOException {
2511 HdfsFileStatus status = null;
2512 CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
2513 null);
2514 if (cacheEntry != null && cacheEntry.isSuccess()) {
2515 return (HdfsFileStatus) cacheEntry.getPayload();
2516 }
2517
2518 try {
2519 status = startFileInt(src, permissions, holder, clientMachine, flag,
2520 createParent, replication, blockSize, supportedVersions,
2521 cacheEntry != null);
2522 } catch (AccessControlException e) {
2523 logAuditEvent(false, "create", src);
2524 throw e;
2525 } finally {
2526 RetryCache.setState(cacheEntry, status != null, status);
2527 }
2528 return status;
2529 }
2530
2531 private HdfsFileStatus startFileInt(final String srcArg,
2532 PermissionStatus permissions, String holder, String clientMachine,
2533 EnumSet<CreateFlag> flag, boolean createParent, short replication,
2534 long blockSize, CryptoProtocolVersion[] supportedVersions,
2535 boolean logRetryCache)
2536 throws AccessControlException, SafeModeException,
2537 FileAlreadyExistsException, UnresolvedLinkException,
2538 FileNotFoundException, ParentNotDirectoryException, IOException {
2539 String src = srcArg;
2540 if (NameNode.stateChangeLog.isDebugEnabled()) {
2541 StringBuilder builder = new StringBuilder();
2542 builder.append("DIR* NameSystem.startFile: src=" + src
2543 + ", holder=" + holder
2544 + ", clientMachine=" + clientMachine
2545 + ", createParent=" + createParent
2546 + ", replication=" + replication
2547 + ", createFlag=" + flag.toString()
2548 + ", blockSize=" + blockSize);
2549 builder.append(", supportedVersions=");
2550 if (supportedVersions != null) {
2551 builder.append(Arrays.toString(supportedVersions));
2552 } else {
2553 builder.append("null");
2554 }
2555 NameNode.stateChangeLog.debug(builder.toString());
2556 }
2557 if (!DFSUtil.isValidName(src)) {
2558 throw new InvalidPathException(src);
2559 }
2560 blockManager.verifyReplication(src, replication, clientMachine);
2561
2562 boolean skipSync = false;
2563 HdfsFileStatus stat = null;
2564 FSPermissionChecker pc = getPermissionChecker();
2565 checkOperation(OperationCategory.WRITE);
2566 if (blockSize < minBlockSize) {
2567 throw new IOException("Specified block size is less than configured" +
2568 " minimum value (" + DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY
2569 + "): " + blockSize + " < " + minBlockSize);
2570 }
2571 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2572 boolean create = flag.contains(CreateFlag.CREATE);
2573 boolean overwrite = flag.contains(CreateFlag.OVERWRITE);
2574 boolean isLazyPersist = flag.contains(CreateFlag.LAZY_PERSIST);
2575
2576 waitForLoadingFSImage();
2577
2578 /**
2579 * If the file is in an encryption zone, we optimistically create an
2580 * EDEK for the file by calling out to the configured KeyProvider.
2581 * Since this typically involves doing an RPC, we take the readLock
2582 * initially, then drop it to do the RPC.
2583 *
2584 * Since the path can flip-flop between being in an encryption zone and not
2585 * in the meantime, we need to recheck the preconditions when we retake the
2586 * lock to do the create. If the preconditions are not met, we throw a
2587 * special RetryStartFileException to ask the DFSClient to try the create
2588 * again later.
2589 */
2590 CryptoProtocolVersion protocolVersion = null;
2591 CipherSuite suite = null;
2592 String ezKeyName = null;
2593 readLock();
2594 try {
2595 src = resolvePath(src, pathComponents);
2596 INodesInPath iip = dir.getINodesInPath4Write(src);
2597 // Nothing to do if the path is not within an EZ
2598 if (dir.isInAnEZ(iip)) {
2599 EncryptionZone zone = dir.getEZForPath(iip);
2600 protocolVersion = chooseProtocolVersion(zone, supportedVersions);
2601 suite = zone.getSuite();
2602 ezKeyName = dir.getKeyName(iip);
2603
2604 Preconditions.checkNotNull(protocolVersion);
2605 Preconditions.checkNotNull(suite);
2606 Preconditions.checkArgument(!suite.equals(CipherSuite.UNKNOWN),
2607 "Chose an UNKNOWN CipherSuite!");
2608 Preconditions.checkNotNull(ezKeyName);
2609 }
2610 } finally {
2611 readUnlock();
2612 }
2613
2614 Preconditions.checkState(
2615 (suite == null && ezKeyName == null) ||
2616 (suite != null && ezKeyName != null),
2617 "Both suite and ezKeyName should both be null or not null");
2618
2619 // Generate EDEK if necessary while not holding the lock
2620 EncryptedKeyVersion edek =
2621 generateEncryptedDataEncryptionKey(ezKeyName);
2622 EncryptionFaultInjector.getInstance().startFileAfterGenerateKey();
2623
2624 // Proceed with the create, using the computed cipher suite and
2625 // generated EDEK
2626 BlocksMapUpdateInfo toRemoveBlocks = null;
2627 writeLock();
2628 try {
2629 checkOperation(OperationCategory.WRITE);
2630 checkNameNodeSafeMode("Cannot create file" + src);
2631 src = resolvePath(src, pathComponents);
2632 toRemoveBlocks = startFileInternal(pc, src, permissions, holder,
2633 clientMachine, create, overwrite, createParent, replication,
2634 blockSize, isLazyPersist, suite, protocolVersion, edek, logRetryCache);
2635 stat = dir.getFileInfo(src, false,
2636 FSDirectory.isReservedRawName(srcArg), true);
2637 } catch (StandbyException se) {
2638 skipSync = true;
2639 throw se;
2640 } finally {
2641 writeUnlock();
2642 // There might be transactions logged while trying to recover the lease.
2643 // They need to be sync'ed even when an exception was thrown.
2644 if (!skipSync) {
2645 getEditLog().logSync();
2646 if (toRemoveBlocks != null) {
2647 removeBlocks(toRemoveBlocks);
2648 toRemoveBlocks.clear();
2649 }
2650 }
2651 }
2652
2653 logAuditEvent(true, "create", srcArg, null, stat);
2654 return stat;
2655 }
2656
2657 /**
2658 * Create a new file or overwrite an existing file<br>
2659 *
2660 * Once the file is create the client then allocates a new block with the next
2661 * call using {@link ClientProtocol#addBlock}.
2662 * <p>
2663 * For description of parameters and exceptions thrown see
2664 * {@link ClientProtocol#create}
2665 */
2666 private BlocksMapUpdateInfo startFileInternal(FSPermissionChecker pc,
2667 String src, PermissionStatus permissions, String holder,
2668 String clientMachine, boolean create, boolean overwrite,
2669 boolean createParent, short replication, long blockSize,
2670 boolean isLazyPersist, CipherSuite suite, CryptoProtocolVersion version,
2671 EncryptedKeyVersion edek, boolean logRetryEntry)
2672 throws FileAlreadyExistsException, AccessControlException,
2673 UnresolvedLinkException, FileNotFoundException,
2674 ParentNotDirectoryException, RetryStartFileException, IOException {
2675 assert hasWriteLock();
2676 // Verify that the destination does not exist as a directory already.
2677 final INodesInPath iip = dir.getINodesInPath4Write(src);
2678 final INode inode = iip.getLastINode();
2679 if (inode != null && inode.isDirectory()) {
2680 throw new FileAlreadyExistsException(src +
2681 " already exists as a directory");
2682 }
2683
2684 FileEncryptionInfo feInfo = null;
2685 if (dir.isInAnEZ(iip)) {
2686 // The path is now within an EZ, but we're missing encryption parameters
2687 if (suite == null || edek == null) {
2688 throw new RetryStartFileException();
2689 }
2690 // Path is within an EZ and we have provided encryption parameters.
2691 // Make sure that the generated EDEK matches the settings of the EZ.
2692 String ezKeyName = dir.getKeyName(iip);
2693 if (!ezKeyName.equals(edek.getEncryptionKeyName())) {
2694 throw new RetryStartFileException();
2695 }
2696 feInfo = new FileEncryptionInfo(suite, version,
2697 edek.getEncryptedKeyVersion().getMaterial(),
2698 edek.getEncryptedKeyIv(),
2699 ezKeyName, edek.getEncryptionKeyVersionName());
2700 Preconditions.checkNotNull(feInfo);
2701 }
2702
2703 final INodeFile myFile = INodeFile.valueOf(inode, src, true);
2704 if (isPermissionEnabled) {
2705 if (overwrite && myFile != null) {
2706 checkPathAccess(pc, src, FsAction.WRITE);
2707 }
2708 /*
2709 * To overwrite existing file, need to check 'w' permission
2710 * of parent (equals to ancestor in this case)
2711 */
2712 checkAncestorAccess(pc, src, FsAction.WRITE);
2713 }
2714
2715 if (!createParent) {
2716 verifyParentDir(src);
2717 }
2718
2719 try {
2720 BlocksMapUpdateInfo toRemoveBlocks = null;
2721 if (myFile == null) {
2722 if (!create) {
2723 throw new FileNotFoundException("Can't overwrite non-existent " +
2724 src + " for client " + clientMachine);
2725 }
2726 } else {
2727 if (overwrite) {
2728 toRemoveBlocks = new BlocksMapUpdateInfo();
2729 List<INode> toRemoveINodes = new ChunkedArrayList<INode>();
2730 long ret = dir.delete(src, toRemoveBlocks, toRemoveINodes, now());
2731 if (ret >= 0) {
2732 incrDeletedFileCount(ret);
2733 removePathAndBlocks(src, null, toRemoveINodes, true);
2734 }
2735 } else {
2736 // If lease soft limit time is expired, recover the lease
2737 recoverLeaseInternal(myFile, src, holder, clientMachine, false);
2738 throw new FileAlreadyExistsException(src + " for client " +
2739 clientMachine + " already exists");
2740 }
2741 }
2742
2743 checkFsObjectLimit();
2744 INodeFile newNode = null;
2745
2746 // Always do an implicit mkdirs for parent directory tree.
2747 Path parent = new Path(src).getParent();
2748 if (parent != null && mkdirsRecursively(parent.toString(),
2749 permissions, true, now())) {
2750 newNode = dir.addFile(src, permissions, replication, blockSize,
2751 holder, clientMachine);
2752 }
2753
2754 if (newNode == null) {
2755 throw new IOException("Unable to add " + src + " to namespace");
2756 }
2757 leaseManager.addLease(newNode.getFileUnderConstructionFeature()
2758 .getClientName(), src);
2759
2760 // Set encryption attributes if necessary
2761 if (feInfo != null) {
2762 dir.setFileEncryptionInfo(src, feInfo);
2763 newNode = dir.getInode(newNode.getId()).asFile();
2764 }
2765
2766 setNewINodeStoragePolicy(newNode, iip, isLazyPersist);
2767
2768 // record file record in log, record new generation stamp
2769 getEditLog().logOpenFile(src, newNode, overwrite, logRetryEntry);
2770 if (NameNode.stateChangeLog.isDebugEnabled()) {
2771 NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: added " +
2772 src + " inode " + newNode.getId() + " " + holder);
2773 }
2774 return toRemoveBlocks;
2775 } catch (IOException ie) {
2776 NameNode.stateChangeLog.warn("DIR* NameSystem.startFile: " + src + " " +
2777 ie.getMessage());
2778 throw ie;
2779 }
2780 }
2781
2782 private void setNewINodeStoragePolicy(INodeFile inode,
2783 INodesInPath iip,
2784 boolean isLazyPersist)
2785 throws IOException {
2786
2787 if (isLazyPersist) {
2788 BlockStoragePolicy lpPolicy =
2789 blockManager.getStoragePolicy("LAZY_PERSIST");
2790
2791 // Set LAZY_PERSIST storage policy if the flag was passed to
2792 // CreateFile.
2793 if (lpPolicy == null) {
2794 throw new HadoopIllegalArgumentException(
2795 "The LAZY_PERSIST storage policy has been disabled " +
2796 "by the administrator.");
2797 }
2798 inode.setStoragePolicyID(lpPolicy.getId(),
2799 iip.getLatestSnapshotId());
2800 } else {
2801 BlockStoragePolicy effectivePolicy =
2802 blockManager.getStoragePolicy(inode.getStoragePolicyID());
2803
2804 if (effectivePolicy != null &&
2805 effectivePolicy.isCopyOnCreateFile()) {
2806 // Copy effective policy from ancestor directory to current file.
2807 inode.setStoragePolicyID(effectivePolicy.getId(),
2808 iip.getLatestSnapshotId());
2809 }
2810 }
2811 }
2812
2813 /**
2814 * Append to an existing file for append.
2815 * <p>
2816 *
2817 * The method returns the last block of the file if this is a partial block,
2818 * which can still be used for writing more data. The client uses the returned
2819 * block locations to form the data pipeline for this block.<br>
2820 * The method returns null if the last block is full. The client then
2821 * allocates a new block with the next call using
2822 * {@link ClientProtocol#addBlock}.
2823 * <p>
2824 *
2825 * For description of parameters and exceptions thrown see
2826 * {@link ClientProtocol#append(String, String)}
2827 *
2828 * @return the last block locations if the block is partial or null otherwise
2829 */
2830 private LocatedBlock appendFileInternal(FSPermissionChecker pc, String src,
2831 String holder, String clientMachine, boolean logRetryCache)
2832 throws AccessControlException, UnresolvedLinkException,
2833 FileNotFoundException, IOException {
2834 assert hasWriteLock();
2835 // Verify that the destination does not exist as a directory already.
2836 final INodesInPath iip = dir.getINodesInPath4Write(src);
2837 final INode inode = iip.getLastINode();
2838 if (inode != null && inode.isDirectory()) {
2839 throw new FileAlreadyExistsException("Cannot append to directory " + src
2840 + "; already exists as a directory.");
2841 }
2842 if (isPermissionEnabled) {
2843 checkPathAccess(pc, src, FsAction.WRITE);
2844 }
2845
2846 try {
2847 if (inode == null) {
2848 throw new FileNotFoundException("failed to append to non-existent file "
2849 + src + " for client " + clientMachine);
2850 }
2851 INodeFile myFile = INodeFile.valueOf(inode, src, true);
2852 final BlockStoragePolicy lpPolicy =
2853 blockManager.getStoragePolicy("LAZY_PERSIST");
2854
2855 if (lpPolicy != null &&
2856 lpPolicy.getId() == myFile.getStoragePolicyID()) {
2857 throw new UnsupportedOperationException(
2858 "Cannot append to lazy persist file " + src);
2859 }
2860 // Opening an existing file for write - may need to recover lease.
2861 recoverLeaseInternal(myFile, src, holder, clientMachine, false);
2862
2863 // recoverLeaseInternal may create a new InodeFile via
2864 // finalizeINodeFileUnderConstruction so we need to refresh
2865 // the referenced file.
2866 myFile = INodeFile.valueOf(dir.getINode(src), src, true);
2867 final BlockInfo lastBlock = myFile.getLastBlock();
2868 // Check that the block has at least minimum replication.
2869 if(lastBlock != null && lastBlock.isComplete() &&
2870 !getBlockManager().isSufficientlyReplicated(lastBlock)) {
2871 throw new IOException("append: lastBlock=" + lastBlock +
2872 " of src=" + src + " is not sufficiently replicated yet.");
2873 }
2874 return prepareFileForWrite(src, myFile, holder, clientMachine, true,
2875 iip.getLatestSnapshotId(), logRetryCache);
2876 } catch (IOException ie) {
2877 NameNode.stateChangeLog.warn("DIR* NameSystem.append: " +ie.getMessage());
2878 throw ie;
2879 }
2880 }
2881
2882 /**
2883 * Replace current node with a INodeUnderConstruction.
2884 * Recreate in-memory lease record.
2885 *
2886 * @param src path to the file
2887 * @param file existing file object
2888 * @param leaseHolder identifier of the lease holder on this file
2889 * @param clientMachine identifier of the client machine
2890 * @param writeToEditLog whether to persist this change to the edit log
2891 * @param logRetryCache whether to record RPC ids in editlog for retry cache
2892 * rebuilding
2893 * @return the last block locations if the block is partial or null otherwise
2894 * @throws UnresolvedLinkException
2895 * @throws IOException
2896 */
2897 LocatedBlock prepareFileForWrite(String src, INodeFile file,
2898 String leaseHolder, String clientMachine,
2899 boolean writeToEditLog,
2900 int latestSnapshot, boolean logRetryCache)
2901 throws IOException {
2902 file.recordModification(latestSnapshot);
2903 final INodeFile cons = file.toUnderConstruction(leaseHolder, clientMachine);
2904
2905 leaseManager.addLease(cons.getFileUnderConstructionFeature()
2906 .getClientName(), src);
2907
2908 LocatedBlock ret = blockManager.convertLastBlockToUnderConstruction(cons);
2909 if (ret != null) {
2910 // update the quota: use the preferred block size for UC block
2911 final long diff = file.getPreferredBlockSize() - ret.getBlockSize();
2912 dir.updateSpaceConsumed(src, 0, diff * file.getBlockReplication());
2913 }
2914
2915 if (writeToEditLog) {
2916 getEditLog().logOpenFile(src, cons, false, logRetryCache);
2917 }
2918 return ret;
2919 }
2920
2921 /**
2922 * Recover lease;
2923 * Immediately revoke the lease of the current lease holder and start lease
2924 * recovery so that the file can be forced to be closed.
2925 *
2926 * @param src the path of the file to start lease recovery
2927 * @param holder the lease holder's name
2928 * @param clientMachine the client machine's name
2929 * @return true if the file is already closed
2930 * @throws IOException
2931 */
2932 boolean recoverLease(String src, String holder, String clientMachine)
2933 throws IOException {
2934 if (!DFSUtil.isValidName(src)) {
2935 throw new IOException("Invalid file name: " + src);
2936 }
2937
2938 boolean skipSync = false;
2939 FSPermissionChecker pc = getPermissionChecker();
2940 checkOperation(OperationCategory.WRITE);
2941 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2942 writeLock();
2943 try {
2944 checkOperation(OperationCategory.WRITE);
2945 checkNameNodeSafeMode("Cannot recover the lease of " + src);
2946 src = resolvePath(src, pathComponents);
2947 final INodeFile inode = INodeFile.valueOf(dir.getINode(src), src);
2948 if (!inode.isUnderConstruction()) {
2949 return true;
2950 }
2951 if (isPermissionEnabled) {
2952 checkPathAccess(pc, src, FsAction.WRITE);
2953 }
2954
2955 recoverLeaseInternal(inode, src, holder, clientMachine, true);
2956 } catch (StandbyException se) {
2957 skipSync = true;
2958 throw se;
2959 } finally {
2960 writeUnlock();
2961 // There might be transactions logged while trying to recover the lease.
2962 // They need to be sync'ed even when an exception was thrown.
2963 if (!skipSync) {
2964 getEditLog().logSync();
2965 }
2966 }
2967 return false;
2968 }
2969
2970 private void recoverLeaseInternal(INodeFile fileInode,
2971 String src, String holder, String clientMachine, boolean force)
2972 throws IOException {
2973 assert hasWriteLock();
2974 if (fileInode != null && fileInode.isUnderConstruction()) {
2975 //
2976 // If the file is under construction , then it must be in our
2977 // leases. Find the appropriate lease record.
2978 //
2979 Lease lease = leaseManager.getLease(holder);
2980 //
2981 // We found the lease for this file. And surprisingly the original
2982 // holder is trying to recreate this file. This should never occur.
2983 //
2984
2985 if (!force && lease != null) {
2986 Lease leaseFile = leaseManager.getLeaseByPath(src);
2987 if (leaseFile != null && leaseFile.equals(lease)) {
2988 throw new AlreadyBeingCreatedException(
2989 "failed to create file " + src + " for " + holder +
2990 " for client " + clientMachine +
2991 " because current leaseholder is trying to recreate file.");
2992 }
2993 }
2994 //
2995 // Find the original holder.
2996 //
2997 FileUnderConstructionFeature uc = fileInode.getFileUnderConstructionFeature();
2998 String clientName = uc.getClientName();
2999 lease = leaseManager.getLease(clientName);
3000 if (lease == null) {
3001 throw new AlreadyBeingCreatedException(
3002 "failed to create file " + src + " for " + holder +
3003 " for client " + clientMachine +
3004 " because pendingCreates is non-null but no leases found.");
3005 }
3006 if (force) {
3007 // close now: no need to wait for soft lease expiration and
3008 // close only the file src
3009 LOG.info("recoverLease: " + lease + ", src=" + src +
3010 " from client " + clientName);
3011 internalReleaseLease(lease, src, holder);
3012 } else {
3013 assert lease.getHolder().equals(clientName) :
3014 "Current lease holder " + lease.getHolder() +
3015 " does not match file creator " + clientName;
3016 //
3017 // If the original holder has not renewed in the last SOFTLIMIT
3018 // period, then start lease recovery.
3019 //
3020 if (lease.expiredSoftLimit()) {
3021 LOG.info("startFile: recover " + lease + ", src=" + src + " client "
3022 + clientName);
3023 boolean isClosed = internalReleaseLease(lease, src, null);
3024 if(!isClosed)
3025 throw new RecoveryInProgressException(
3026 "Failed to close file " + src +
3027 ". Lease recovery is in progress. Try again later.");
3028 } else {
3029 final BlockInfo lastBlock = fileInode.getLastBlock();
3030 if (lastBlock != null
3031 && lastBlock.getBlockUCState() == BlockUCState.UNDER_RECOVERY) {
3032 throw new RecoveryInProgressException("Recovery in progress, file ["
3033 + src + "], " + "lease owner [" + lease.getHolder() + "]");
3034 } else {
3035 throw new AlreadyBeingCreatedException("Failed to create file ["
3036 + src + "] for [" + holder + "] for client [" + clientMachine
3037 + "], because this file is already being created by ["
3038 + clientName + "] on ["
3039 + uc.getClientMachine() + "]");
3040 }
3041 }
3042 }
3043 }
3044 }
3045
3046 /**
3047 * Append to an existing file in the namespace.
3048 */
3049 LocatedBlock appendFile(String src, String holder, String clientMachine)
3050 throws AccessControlException, SafeModeException,
3051 FileAlreadyExistsException, FileNotFoundException,
3052 ParentNotDirectoryException, IOException {
3053 LocatedBlock lb = null;
3054 CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
3055 null);
3056 if (cacheEntry != null && cacheEntry.isSuccess()) {
3057 return (LocatedBlock) cacheEntry.getPayload();
3058 }
3059
3060 boolean success = false;
3061 try {
3062 lb = appendFileInt(src, holder, clientMachine, cacheEntry != null);
3063 success = true;
3064 return lb;
3065 } catch (AccessControlException e) {
3066 logAuditEvent(false, "append", src);
3067 throw e;
3068 } finally {
3069 RetryCache.setState(cacheEntry, success, lb);
3070 }
3071 }
3072
3073 private LocatedBlock appendFileInt(final String srcArg, String holder,
3074 String clientMachine, boolean logRetryCache)
3075 throws AccessControlException, SafeModeException,
3076 FileAlreadyExistsException, FileNotFoundException,
3077 ParentNotDirectoryException, IOException {
3078 String src = srcArg;
3079 if (NameNode.stateChangeLog.isDebugEnabled()) {
3080 NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: src=" + src
3081 + ", holder=" + holder
3082 + ", clientMachine=" + clientMachine);
3083 }
3084 boolean skipSync = false;
3085 if (!supportAppends) {
3086 throw new UnsupportedOperationException(
3087 "Append is not enabled on this NameNode. Use the " +
3088 DFS_SUPPORT_APPEND_KEY + " configuration option to enable it.");
3089 }
3090
3091 LocatedBlock lb = null;
3092 FSPermissionChecker pc = getPermissionChecker();
3093 checkOperation(OperationCategory.WRITE);
3094 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3095 writeLock();
3096 try {
3097 checkOperation(OperationCategory.WRITE);
3098 checkNameNodeSafeMode("Cannot append to file" + src);
3099 src = resolvePath(src, pathComponents);
3100 lb = appendFileInternal(pc, src, holder, clientMachine, logRetryCache);
3101 } catch (StandbyException se) {
3102 skipSync = true;
3103 throw se;
3104 } finally {
3105 writeUnlock();
3106 // There might be transactions logged while trying to recover the lease.
3107 // They need to be sync'ed even when an exception was thrown.
3108 if (!skipSync) {
3109 getEditLog().logSync();
3110 }
3111 }
3112 if (lb != null) {
3113 if (NameNode.stateChangeLog.isDebugEnabled()) {
3114 NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: file "
3115 +src+" for "+holder+" at "+clientMachine
3116 +" block " + lb.getBlock()
3117 +" block size " + lb.getBlock().getNumBytes());
3118 }
3119 }
3120 logAuditEvent(true, "append", srcArg);
3121 return lb;
3122 }
3123
3124 ExtendedBlock getExtendedBlock(Block blk) {
3125 return new ExtendedBlock(blockPoolId, blk);
3126 }
3127
3128 void setBlockPoolId(String bpid) {
3129 blockPoolId = bpid;
3130 blockManager.setBlockPoolId(blockPoolId);
3131 }
3132
3133 /**
3134 * The client would like to obtain an additional block for the indicated
3135 * filename (which is being written-to). Return an array that consists
3136 * of the block, plus a set of machines. The first on this list should
3137 * be where the client writes data. Subsequent items in the list must
3138 * be provided in the connection to the first datanode.
3139 *
3140 * Make sure the previous blocks have been reported by datanodes and
3141 * are replicated. Will return an empty 2-elt array if we want the
3142 * client to "try again later".
3143 */
3144 LocatedBlock getAdditionalBlock(String src, long fileId, String clientName,
3145 ExtendedBlock previous, Set<Node> excludedNodes,
3146 List<String> favoredNodes)
3147 throws LeaseExpiredException, NotReplicatedYetException,
3148 QuotaExceededException, SafeModeException, UnresolvedLinkException,
3149 IOException {
3150 final long blockSize;
3151 final int replication;
3152 final byte storagePolicyID;
3153 Node clientNode = null;
3154 String clientMachine = null;
3155
3156 if(NameNode.stateChangeLog.isDebugEnabled()) {
3157 NameNode.stateChangeLog.debug("BLOCK* NameSystem.getAdditionalBlock: "
3158 + src + " inodeId " + fileId + " for " + clientName);
3159 }
3160
3161 // Part I. Analyze the state of the file with respect to the input data.
3162 checkOperation(OperationCategory.READ);
3163 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3164 readLock();
3165 try {
3166 checkOperation(OperationCategory.READ);
3167 src = resolvePath(src, pathComponents);
3168 LocatedBlock[] onRetryBlock = new LocatedBlock[1];
3169 FileState fileState = analyzeFileState(
3170 src, fileId, clientName, previous, onRetryBlock);
3171 final INodeFile pendingFile = fileState.inode;
3172 src = fileState.path;
3173
3174 if (onRetryBlock[0] != null && onRetryBlock[0].getLocations().length > 0) {
3175 // This is a retry. Just return the last block if having locations.
3176 return onRetryBlock[0];
3177 }
3178 if (pendingFile.getBlocks().length >= maxBlocksPerFile) {
3179 throw new IOException("File has reached the limit on maximum number of"
3180 + " blocks (" + DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_KEY
3181 + "): " + pendingFile.getBlocks().length + " >= "
3182 + maxBlocksPerFile);
3183 }
3184 blockSize = pendingFile.getPreferredBlockSize();
3185 clientMachine = pendingFile.getFileUnderConstructionFeature()
3186 .getClientMachine();
3187 clientNode = blockManager.getDatanodeManager().getDatanodeByHost(
3188 clientMachine);
3189 replication = pendingFile.getFileReplication();
3190 storagePolicyID = pendingFile.getStoragePolicyID();
3191 } finally {
3192 readUnlock();
3193 }
3194
3195 if (clientNode == null) {
3196 clientNode = getClientNode(clientMachine);
3197 }
3198
3199 // choose targets for the new block to be allocated.
3200 final DatanodeStorageInfo targets[] = getBlockManager().chooseTarget4NewBlock(
3201 src, replication, clientNode, excludedNodes, blockSize, favoredNodes,
3202 storagePolicyID);
3203
3204 // Part II.
3205 // Allocate a new block, add it to the INode and the BlocksMap.
3206 Block newBlock = null;
3207 long offset;
3208 checkOperation(OperationCategory.WRITE);
3209 waitForLoadingFSImage();
3210 writeLock();
3211 try {
3212 checkOperation(OperationCategory.WRITE);
3213 // Run the full analysis again, since things could have changed
3214 // while chooseTarget() was executing.
3215 LocatedBlock[] onRetryBlock = new LocatedBlock[1];
3216 FileState fileState =
3217 analyzeFileState(src, fileId, clientName, previous, onRetryBlock);
3218 final INodeFile pendingFile = fileState.inode;
3219 src = fileState.path;
3220
3221 if (onRetryBlock[0] != null) {
3222 if (onRetryBlock[0].getLocations().length > 0) {
3223 // This is a retry. Just return the last block if having locations.
3224 return onRetryBlock[0];
3225 } else {
3226 // add new chosen targets to already allocated block and return
3227 BlockInfo lastBlockInFile = pendingFile.getLastBlock();
3228 ((BlockInfoUnderConstruction) lastBlockInFile)
3229 .setExpectedLocations(targets);
3230 offset = pendingFile.computeFileSize();
3231 return makeLocatedBlock(lastBlockInFile, targets, offset);
3232 }
3233 }
3234
3235 // commit the last block and complete it if it has minimum replicas
3236 commitOrCompleteLastBlock(pendingFile,
3237 ExtendedBlock.getLocalBlock(previous));
3238
3239 // allocate new block, record block locations in INode.
3240 newBlock = createNewBlock();
3241 INodesInPath inodesInPath = INodesInPath.fromINode(pendingFile);
3242 saveAllocatedBlock(src, inodesInPath, newBlock, targets);
3243
3244 persistNewBlock(src, pendingFile);
3245 offset = pendingFile.computeFileSize();
3246 } finally {
3247 writeUnlock();
3248 }
3249 getEditLog().logSync();
3250
3251 // Return located block
3252 return makeLocatedBlock(newBlock, targets, offset);
3253 }
3254
3255 /*
3256 * Resolve clientmachine address to get a network location path
3257 */
3258 private Node getClientNode(String clientMachine) {
3259 List<String> hosts = new ArrayList<String>(1);
3260 hosts.add(clientMachine);
3261 List<String> rName = getBlockManager().getDatanodeManager()
3262 .resolveNetworkLocation(hosts);
3263 Node clientNode = null;
3264 if (rName != null) {
3265 // Able to resolve clientMachine mapping.
3266 // Create a temp node to findout the rack local nodes
3267 clientNode = new NodeBase(rName.get(0) + NodeBase.PATH_SEPARATOR_STR
3268 + clientMachine);
3269 }
3270 return clientNode;
3271 }
3272
3273 static class FileState {
3274 public final INodeFile inode;
3275 public final String path;
3276
3277 public FileState(INodeFile inode, String fullPath) {
3278 this.inode = inode;
3279 this.path = fullPath;
3280 }
3281 }
3282
3283 FileState analyzeFileState(String src,
3284 long fileId,
3285 String clientName,
3286 ExtendedBlock previous,
3287 LocatedBlock[] onRetryBlock)
3288 throws IOException {
3289 assert hasReadLock();
3290
3291 checkBlock(previous);
3292 onRetryBlock[0] = null;
3293 checkOperation(OperationCategory.WRITE);
3294 checkNameNodeSafeMode("Cannot add block to " + src);
3295
3296 // have we exceeded the configured limit of fs objects.
3297 checkFsObjectLimit();
3298
3299 Block previousBlock = ExtendedBlock.getLocalBlock(previous);
3300 INode inode;
3301 if (fileId == INodeId.GRANDFATHER_INODE_ID) {
3302 // Older clients may not have given us an inode ID to work with.
3303 // In this case, we have to try to resolve the path and hope it
3304 // hasn't changed or been deleted since the file was opened for write.
3305 final INodesInPath iip = dir.getINodesInPath4Write(src);
3306 inode = iip.getLastINode();
3307 } else {
3308 // Newer clients pass the inode ID, so we can just get the inode
3309 // directly.
3310 inode = dir.getInode(fileId);
3311 if (inode != null) src = inode.getFullPathName();
3312 }
3313 final INodeFile pendingFile = checkLease(src, clientName, inode, fileId);
3314 BlockInfo lastBlockInFile = pendingFile.getLastBlock();
3315 if (!Block.matchingIdAndGenStamp(previousBlock, lastBlockInFile)) {
3316 // The block that the client claims is the current last block
3317 // doesn't match up with what we think is the last block. There are
3318 // four possibilities:
3319 // 1) This is the first block allocation of an append() pipeline
3320 // which started appending exactly at a block boundary.
3321 // In this case, the client isn't passed the previous block,
3322 // so it makes the allocateBlock() call with previous=null.
3323 // We can distinguish this since the last block of the file
3324 // will be exactly a full block.
3325 // 2) This is a retry from a client that missed the response of a
3326 // prior getAdditionalBlock() call, perhaps because of a network
3327 // timeout, or because of an HA failover. In that case, we know
3328 // by the fact that the client is re-issuing the RPC that it
3329 // never began to write to the old block. Hence it is safe to
3330 // to return the existing block.
3331 // 3) This is an entirely bogus request/bug -- we should error out
3332 // rather than potentially appending a new block with an empty
3333 // one in the middle, etc
3334 // 4) This is a retry from a client that timed out while
3335 // the prior getAdditionalBlock() is still being processed,
3336 // currently working on chooseTarget().
3337 // There are no means to distinguish between the first and
3338 // the second attempts in Part I, because the first one hasn't
3339 // changed the namesystem state yet.
3340 // We run this analysis again in Part II where case 4 is impossible.
3341
3342 BlockInfo penultimateBlock = pendingFile.getPenultimateBlock();
3343 if (previous == null &&
3344 lastBlockInFile != null &&
3345 lastBlockInFile.getNumBytes() == pendingFile.getPreferredBlockSize() &&
3346 lastBlockInFile.isComplete()) {
3347 // Case 1
3348 if (NameNode.stateChangeLog.isDebugEnabled()) {
3349 NameNode.stateChangeLog.debug(
3350 "BLOCK* NameSystem.allocateBlock: handling block allocation" +
3351 " writing to a file with a complete previous block: src=" +
3352 src + " lastBlock=" + lastBlockInFile);
3353 }
3354 } else if (Block.matchingIdAndGenStamp(penultimateBlock, previousBlock)) {
3355 if (lastBlockInFile.getNumBytes() != 0) {
3356 throw new IOException(
3357 "Request looked like a retry to allocate block " +
3358 lastBlockInFile + " but it already contains " +
3359 lastBlockInFile.getNumBytes() + " bytes");
3360 }
3361
3362 // Case 2
3363 // Return the last block.
3364 NameNode.stateChangeLog.info("BLOCK* allocateBlock: " +
3365 "caught retry for allocation of a new block in " +
3366 src + ". Returning previously allocated block " + lastBlockInFile);
3367 long offset = pendingFile.computeFileSize();
3368 onRetryBlock[0] = makeLocatedBlock(lastBlockInFile,
3369 ((BlockInfoUnderConstruction)lastBlockInFile).getExpectedStorageLocations(),
3370 offset);
3371 return new FileState(pendingFile, src);
3372 } else {
3373 // Case 3
3374 throw new IOException("Cannot allocate block in " + src + ": " +
3375 "passed 'previous' block " + previous + " does not match actual " +
3376 "last block in file " + lastBlockInFile);
3377 }
3378 }
3379
3380 // Check if the penultimate block is minimally replicated
3381 if (!checkFileProgress(pendingFile, false)) {
3382 throw new NotReplicatedYetException("Not replicated yet: " + src);
3383 }
3384 return new FileState(pendingFile, src);
3385 }
3386
3387 LocatedBlock makeLocatedBlock(Block blk, DatanodeStorageInfo[] locs,
3388 long offset) throws IOException {
3389 LocatedBlock lBlk = new LocatedBlock(
3390 getExtendedBlock(blk), locs, offset, false);
3391 getBlockManager().setBlockToken(
3392 lBlk, BlockTokenSecretManager.AccessMode.WRITE);
3393 return lBlk;
3394 }
3395
3396 /** @see ClientProtocol#getAdditionalDatanode */
3397 LocatedBlock getAdditionalDatanode(String src, long fileId,
3398 final ExtendedBlock blk, final DatanodeInfo[] existings,
3399 final String[] storageIDs,
3400 final Set<Node> excludes,
3401 final int numAdditionalNodes, final String clientName
3402 ) throws IOException {
3403 //check if the feature is enabled
3404 dtpReplaceDatanodeOnFailure.checkEnabled();
3405
3406 Node clientnode = null;
3407 String clientMachine;
3408 final long preferredblocksize;
3409 final byte storagePolicyID;
3410 final List<DatanodeStorageInfo> chosen;
3411 checkOperation(OperationCategory.READ);
3412 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3413 readLock();
3414 try {
3415 checkOperation(OperationCategory.READ);
3416 //check safe mode
3417 checkNameNodeSafeMode("Cannot add datanode; src=" + src + ", blk=" + blk);
3418 src = resolvePath(src, pathComponents);
3419
3420 //check lease
3421 final INode inode;
3422 if (fileId == INodeId.GRANDFATHER_INODE_ID) {
3423 // Older clients may not have given us an inode ID to work with.
3424 // In this case, we have to try to resolve the path and hope it
3425 // hasn't changed or been deleted since the file was opened for write.
3426 inode = dir.getINode(src);
3427 } else {
3428 inode = dir.getInode(fileId);
3429 if (inode != null) src = inode.getFullPathName();
3430 }
3431 final INodeFile file = checkLease(src, clientName, inode, fileId);
3432 clientMachine = file.getFileUnderConstructionFeature().getClientMachine();
3433 clientnode = blockManager.getDatanodeManager().getDatanodeByHost(clientMachine);
3434 preferredblocksize = file.getPreferredBlockSize();
3435 storagePolicyID = file.getStoragePolicyID();
3436
3437 //find datanode storages
3438 final DatanodeManager dm = blockManager.getDatanodeManager();
3439 chosen = Arrays.asList(dm.getDatanodeStorageInfos(existings, storageIDs));
3440 } finally {
3441 readUnlock();
3442 }
3443
3444 if (clientnode == null) {
3445 clientnode = getClientNode(clientMachine);
3446 }
3447
3448 // choose new datanodes.
3449 final DatanodeStorageInfo[] targets = blockManager.chooseTarget4AdditionalDatanode(
3450 src, numAdditionalNodes, clientnode, chosen,
3451 excludes, preferredblocksize, storagePolicyID);
3452 final LocatedBlock lb = new LocatedBlock(blk, targets);
3453 blockManager.setBlockToken(lb, AccessMode.COPY);
3454 return lb;
3455 }
3456
3457 /**
3458 * The client would like to let go of the given block
3459 */
3460 boolean abandonBlock(ExtendedBlock b, long fileId, String src, String holder)
3461 throws LeaseExpiredException, FileNotFoundException,
3462 UnresolvedLinkException, IOException {
3463 if(NameNode.stateChangeLog.isDebugEnabled()) {
3464 NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: " + b
3465 + "of file " + src);
3466 }
3467 checkOperation(OperationCategory.WRITE);
3468 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3469 waitForLoadingFSImage();
3470 writeLock();
3471 try {
3472 checkOperation(OperationCategory.WRITE);
3473 checkNameNodeSafeMode("Cannot abandon block " + b + " for file" + src);
3474 src = resolvePath(src, pathComponents);
3475
3476 final INode inode;
3477 if (fileId == INodeId.GRANDFATHER_INODE_ID) {
3478 // Older clients may not have given us an inode ID to work with.
3479 // In this case, we have to try to resolve the path and hope it
3480 // hasn't changed or been deleted since the file was opened for write.
3481 inode = dir.getINode(src);
3482 } else {
3483 inode = dir.getInode(fileId);
3484 if (inode != null) src = inode.getFullPathName();
3485 }
3486 final INodeFile file = checkLease(src, holder, inode, fileId);
3487
3488 //
3489 // Remove the block from the pending creates list
3490 //
3491 boolean removed = dir.removeBlock(src, file,
3492 ExtendedBlock.getLocalBlock(b));
3493 if (!removed) {
3494 return true;
3495 }
3496 if(NameNode.stateChangeLog.isDebugEnabled()) {
3497 NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: "
3498 + b + " is removed from pendingCreates");
3499 }
3500 persistBlocks(src, file, false);
3501 } finally {
3502 writeUnlock();
3503 }
3504 getEditLog().logSync();
3505
3506 return true;
3507 }
3508
3509 private INodeFile checkLease(String src, String holder, INode inode,
3510 long fileId)
3511 throws LeaseExpiredException, FileNotFoundException {
3512 assert hasReadLock();
3513 final String ident = src + " (inode " + fileId + ")";
3514 if (inode == null) {
3515 Lease lease = leaseManager.getLease(holder);
3516 throw new LeaseExpiredException(
3517 "No lease on " + ident + ": File does not exist. "
3518 + (lease != null ? lease.toString()
3519 : "Holder " + holder + " does not have any open files."));
3520 }
3521 if (!inode.isFile()) {
3522 Lease lease = leaseManager.getLease(holder);
3523 throw new LeaseExpiredException(
3524 "No lease on " + ident + ": INode is not a regular file. "
3525 + (lease != null ? lease.toString()
3526 : "Holder " + holder + " does not have any open files."));
3527 }
3528 final INodeFile file = inode.asFile();
3529 if (!file.isUnderConstruction()) {
3530 Lease lease = leaseManager.getLease(holder);
3531 throw new LeaseExpiredException(
3532 "No lease on " + ident + ": File is not open for writing. "
3533 + (lease != null ? lease.toString()
3534 : "Holder " + holder + " does not have any open files."));
3535 }
3536 // No further modification is allowed on a deleted file.
3537 // A file is considered deleted, if it is not in the inodeMap or is marked
3538 // as deleted in the snapshot feature.
3539 if (isFileDeleted(file)) {
3540 throw new FileNotFoundException(src);
3541 }
3542 String clientName = file.getFileUnderConstructionFeature().getClientName();
3543 if (holder != null && !clientName.equals(holder)) {
3544 throw new LeaseExpiredException("Lease mismatch on " + ident +
3545 " owned by " + clientName + " but is accessed by " + holder);
3546 }
3547 return file;
3548 }
3549
3550 /**
3551 * Complete in-progress write to the given file.
3552 * @return true if successful, false if the client should continue to retry
3553 * (e.g if not all blocks have reached minimum replication yet)
3554 * @throws IOException on error (eg lease mismatch, file not open, file deleted)
3555 */
3556 boolean completeFile(final String srcArg, String holder,
3557 ExtendedBlock last, long fileId)
3558 throws SafeModeException, UnresolvedLinkException, IOException {
3559 String src = srcArg;
3560 if (NameNode.stateChangeLog.isDebugEnabled()) {
3561 NameNode.stateChangeLog.debug("DIR* NameSystem.completeFile: " +
3562 src + " for " + holder);
3563 }
3564 checkBlock(last);
3565 boolean success = false;
3566 checkOperation(OperationCategory.WRITE);
3567 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3568 waitForLoadingFSImage();
3569 writeLock();
3570 try {
3571 checkOperation(OperationCategory.WRITE);
3572 checkNameNodeSafeMode("Cannot complete file " + src);
3573 src = resolvePath(src, pathComponents);
3574 success = completeFileInternal(src, holder,
3575 ExtendedBlock.getLocalBlock(last), fileId);
3576 } finally {
3577 writeUnlock();
3578 }
3579 getEditLog().logSync();
3580 if (success) {
3581 NameNode.stateChangeLog.info("DIR* completeFile: " + srcArg
3582 + " is closed by " + holder);
3583 }
3584 return success;
3585 }
3586
3587 private boolean completeFileInternal(String src,
3588 String holder, Block last, long fileId) throws SafeModeException,
3589 UnresolvedLinkException, IOException {
3590 assert hasWriteLock();
3591 final INodeFile pendingFile;
3592 try {
3593 final INode inode;
3594 if (fileId == INodeId.GRANDFATHER_INODE_ID) {
3595 // Older clients may not have given us an inode ID to work with.
3596 // In this case, we have to try to resolve the path and hope it
3597 // hasn't changed or been deleted since the file was opened for write.
3598 final INodesInPath iip = dir.getLastINodeInPath(src);
3599 inode = iip.getINode(0);
3600 } else {
3601 inode = dir.getInode(fileId);
3602 if (inode != null) src = inode.getFullPathName();
3603 }
3604 pendingFile = checkLease(src, holder, inode, fileId);
3605 } catch (LeaseExpiredException lee) {
3606 final INode inode = dir.getINode(src);
3607 if (inode != null
3608 && inode.isFile()
3609 && !inode.asFile().isUnderConstruction()) {
3610 // This could be a retry RPC - i.e the client tried to close
3611 // the file, but missed the RPC response. Thus, it is trying
3612 // again to close the file. If the file still exists and
3613 // the client's view of the last block matches the actual
3614 // last block, then we'll treat it as a successful close.
3615 // See HDFS-3031.
3616 final Block realLastBlock = inode.asFile().getLastBlock();
3617 if (Block.matchingIdAndGenStamp(last, realLastBlock)) {
3618 NameNode.stateChangeLog.info("DIR* completeFile: " +
3619 "request from " + holder + " to complete inode " + fileId +
3620 "(" + src + ") which is already closed. But, it appears to be " +
3621 "an RPC retry. Returning success");
3622 return true;
3623 }
3624 }
3625 throw lee;
3626 }
3627 // Check the state of the penultimate block. It should be completed
3628 // before attempting to complete the last one.
3629 if (!checkFileProgress(pendingFile, false)) {
3630 return false;
3631 }
3632
3633 // commit the last block and complete it if it has minimum replicas
3634 commitOrCompleteLastBlock(pendingFile, last);
3635
3636 if (!checkFileProgress(pendingFile, true)) {
3637 return false;
3638 }
3639
3640 finalizeINodeFileUnderConstruction(src, pendingFile,
3641 Snapshot.CURRENT_STATE_ID);
3642 return true;
3643 }
3644
3645 /**
3646 * Save allocated block at the given pending filename
3647 *
3648 * @param src path to the file
3649 * @param inodesInPath representing each of the components of src.
3650 * The last INode is the INode for {@code src} file.
3651 * @param newBlock newly allocated block to be save
3652 * @param targets target datanodes where replicas of the new block is placed
3653 * @throws QuotaExceededException If addition of block exceeds space quota
3654 */
3655 BlockInfo saveAllocatedBlock(String src, INodesInPath inodes,
3656 Block newBlock, DatanodeStorageInfo[] targets)
3657 throws IOException {
3658 assert hasWriteLock();
3659 BlockInfo b = dir.addBlock(src, inodes, newBlock, targets);
3660 NameNode.stateChangeLog.info("BLOCK* allocateBlock: " + src + ". "
3661 + getBlockPoolId() + " " + b);
3662 DatanodeStorageInfo.incrementBlocksScheduled(targets);
3663 return b;
3664 }
3665
3666 /**
3667 * Create new block with a unique block id and a new generation stamp.
3668 */
3669 Block createNewBlock() throws IOException {
3670 assert hasWriteLock();
3671 Block b = new Block(nextBlockId(), 0, 0);
3672 // Increment the generation stamp for every new block.
3673 b.setGenerationStamp(nextGenerationStamp(false));
3674 return b;
3675 }
3676
3677 /**
3678 * Check that the indicated file's blocks are present and
3679 * replicated. If not, return false. If checkall is true, then check
3680 * all blocks, otherwise check only penultimate block.
3681 */
3682 boolean checkFileProgress(INodeFile v, boolean checkall) {
3683 readLock();
3684 try {
3685 if (checkall) {
3686 //
3687 // check all blocks of the file.
3688 //
3689 for (BlockInfo block: v.getBlocks()) {
3690 if (!block.isComplete()) {
3691 LOG.info("BLOCK* checkFileProgress: " + block
3692 + " has not reached minimal replication "
3693 + blockManager.minReplication);
3694 return false;
3695 }
3696 }
3697 } else {
3698 //
3699 // check the penultimate block of this file
3700 //
3701 BlockInfo b = v.getPenultimateBlock();
3702 if (b != null && !b.isComplete()) {
3703 LOG.warn("BLOCK* checkFileProgress: " + b
3704 + " has not reached minimal replication "
3705 + blockManager.minReplication);
3706 return false;
3707 }
3708 }
3709 return true;
3710 } finally {
3711 readUnlock();
3712 }
3713 }
3714
3715 ////////////////////////////////////////////////////////////////
3716 // Here's how to handle block-copy failure during client write:
3717 // -- As usual, the client's write should result in a streaming
3718 // backup write to a k-machine sequence.
3719 // -- If one of the backup machines fails, no worries. Fail silently.
3720 // -- Before client is allowed to close and finalize file, make sure
3721 // that the blocks are backed up. Namenode may have to issue specific backup
3722 // commands to make up for earlier datanode failures. Once all copies
3723 // are made, edit namespace and return to client.
3724 ////////////////////////////////////////////////////////////////
3725
3726 /**
3727 * Change the indicated filename.
3728 * @deprecated Use {@link #renameTo(String, String, Options.Rename...)} instead.
3729 */
3730 @Deprecated
3731 boolean renameTo(String src, String dst)
3732 throws IOException, UnresolvedLinkException {
3733 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
3734 if (cacheEntry != null && cacheEntry.isSuccess()) {
3735 return true; // Return previous response
3736 }
3737 boolean ret = false;
3738 try {
3739 ret = renameToInt(src, dst, cacheEntry != null);
3740 } catch (AccessControlException e) {
3741 logAuditEvent(false, "rename", src, dst, null);
3742 throw e;
3743 } finally {
3744 RetryCache.setState(cacheEntry, ret);
3745 }
3746 return ret;
3747 }
3748
3749 private boolean renameToInt(final String srcArg, final String dstArg,
3750 boolean logRetryCache)
3751 throws IOException, UnresolvedLinkException {
3752 String src = srcArg;
3753 String dst = dstArg;
3754 if (NameNode.stateChangeLog.isDebugEnabled()) {
3755 NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: " + src +
3756 " to " + dst);
3757 }
3758 if (!DFSUtil.isValidName(dst)) {
3759 throw new IOException("Invalid name: " + dst);
3760 }
3761 FSPermissionChecker pc = getPermissionChecker();
3762 checkOperation(OperationCategory.WRITE);
3763 byte[][] srcComponents = FSDirectory.getPathComponentsForReservedPath(src);
3764 byte[][] dstComponents = FSDirectory.getPathComponentsForReservedPath(dst);
3765 boolean status = false;
3766 HdfsFileStatus resultingStat = null;
3767 writeLock();
3768 try {
3769 checkOperation(OperationCategory.WRITE);
3770 checkNameNodeSafeMode("Cannot rename " + src);
3771 waitForLoadingFSImage();
3772 src = resolvePath(src, srcComponents);
3773 dst = resolvePath(dst, dstComponents);
3774 checkOperation(OperationCategory.WRITE);
3775 status = renameToInternal(pc, src, dst, logRetryCache);
3776 if (status) {
3777 resultingStat = getAuditFileInfo(dst, false);
3778 }
3779 } finally {
3780 writeUnlock();
3781 }
3782 getEditLog().logSync();
3783 if (status) {
3784 logAuditEvent(true, "rename", srcArg, dstArg, resultingStat);
3785 }
3786 return status;
3787 }
3788
3789 /** @deprecated See {@link #renameTo(String, String)} */
3790 @Deprecated
3791 private boolean renameToInternal(FSPermissionChecker pc, String src,
3792 String dst, boolean logRetryCache) throws IOException,
3793 UnresolvedLinkException {
3794 assert hasWriteLock();
3795 if (isPermissionEnabled) {
3796 //We should not be doing this. This is move() not renameTo().
3797 //but for now,
3798 //NOTE: yes, this is bad! it's assuming much lower level behavior
3799 // of rewriting the dst
3800 String actualdst = dir.isDir(dst)?
3801 dst + Path.SEPARATOR + new Path(src).getName(): dst;
3802 // Rename does not operates on link targets
3803 // Do not resolveLink when checking permissions of src and dst
3804 // Check write access to parent of src
3805 checkPermission(pc, src, false, null, FsAction.WRITE, null, null,
3806 false, false);
3807 // Check write access to ancestor of dst
3808 checkPermission(pc, actualdst, false, FsAction.WRITE, null, null, null,
3809 false, false);
3810 }
3811
3812 long mtime = now();
3813 if (dir.renameTo(src, dst, mtime)) {
3814 getEditLog().logRename(src, dst, mtime, logRetryCache);
3815 return true;
3816 }
3817 return false;
3818 }
3819
3820
3821 /** Rename src to dst */
3822 void renameTo(final String srcArg, final String dstArg,
3823 Options.Rename... options) throws IOException, UnresolvedLinkException {
3824 String src = srcArg;
3825 String dst = dstArg;
3826 if (NameNode.stateChangeLog.isDebugEnabled()) {
3827 NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: with options - "
3828 + src + " to " + dst);
3829 }
3830 if (!DFSUtil.isValidName(dst)) {
3831 throw new InvalidPathException("Invalid name: " + dst);
3832 }
3833 final FSPermissionChecker pc = getPermissionChecker();
3834
3835 checkOperation(OperationCategory.WRITE);
3836 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
3837 if (cacheEntry != null && cacheEntry.isSuccess()) {
3838 return; // Return previous response
3839 }
3840 byte[][] srcComponents = FSDirectory.getPathComponentsForReservedPath(src);
3841 byte[][] dstComponents = FSDirectory.getPathComponentsForReservedPath(dst);
3842 HdfsFileStatus resultingStat = null;
3843 boolean success = false;
3844 writeLock();
3845 BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
3846 try {
3847 checkOperation(OperationCategory.WRITE);
3848 checkNameNodeSafeMode("Cannot rename " + src);
3849 src = resolvePath(src, srcComponents);
3850 dst = resolvePath(dst, dstComponents);
3851 renameToInternal(pc, src, dst, cacheEntry != null,
3852 collectedBlocks, options);
3853 resultingStat = getAuditFileInfo(dst, false);
3854 success = true;
3855 } finally {
3856 writeUnlock();
3857 RetryCache.setState(cacheEntry, success);
3858 }
3859 getEditLog().logSync();
3860 if (!collectedBlocks.getToDeleteList().isEmpty()) {
3861 removeBlocks(collectedBlocks);
3862 collectedBlocks.clear();
3863 }
3864 if (resultingStat != null) {
3865 StringBuilder cmd = new StringBuilder("rename options=");
3866 for (Rename option : options) {
3867 cmd.append(option.value()).append(" ");
3868 }
3869 logAuditEvent(true, cmd.toString(), srcArg, dstArg, resultingStat);
3870 }
3871 }
3872
3873 private void renameToInternal(FSPermissionChecker pc, String src,
3874 String dst, boolean logRetryCache, BlocksMapUpdateInfo collectedBlocks,
3875 Options.Rename... options) throws IOException {
3876 assert hasWriteLock();
3877 if (isPermissionEnabled) {
3878 // Rename does not operates on link targets
3879 // Do not resolveLink when checking permissions of src and dst
3880 // Check write access to parent of src
3881 checkPermission(pc, src, false, null, FsAction.WRITE, null, null, false,
3882 false);
3883 // Check write access to ancestor of dst
3884 checkPermission(pc, dst, false, FsAction.WRITE, null, null, null, false,
3885 false);
3886 }
3887
3888 waitForLoadingFSImage();
3889 long mtime = now();
3890 dir.renameTo(src, dst, mtime, collectedBlocks, options);
3891 getEditLog().logRename(src, dst, mtime, logRetryCache, options);
3892 }
3893
3894 /**
3895 * Remove the indicated file from namespace.
3896 *
3897 * @see ClientProtocol#delete(String, boolean) for detailed description and
3898 * description of exceptions
3899 */
3900 boolean delete(String src, boolean recursive)
3901 throws AccessControlException, SafeModeException,
3902 UnresolvedLinkException, IOException {
3903 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
3904 if (cacheEntry != null && cacheEntry.isSuccess()) {
3905 return true; // Return previous response
3906 }
3907 boolean ret = false;
3908 try {
3909 ret = deleteInt(src, recursive, cacheEntry != null);
3910 } catch (AccessControlException e) {
3911 logAuditEvent(false, "delete", src);
3912 throw e;
3913 } finally {
3914 RetryCache.setState(cacheEntry, ret);
3915 }
3916 return ret;
3917 }
3918
3919 private boolean deleteInt(String src, boolean recursive, boolean logRetryCache)
3920 throws AccessControlException, SafeModeException,
3921 UnresolvedLinkException, IOException {
3922 if (NameNode.stateChangeLog.isDebugEnabled()) {
3923 NameNode.stateChangeLog.debug("DIR* NameSystem.delete: " + src);
3924 }
3925 boolean status = deleteInternal(src, recursive, true, logRetryCache);
3926 if (status) {
3927 logAuditEvent(true, "delete", src);
3928 }
3929 return status;
3930 }
3931
3932 private FSPermissionChecker getPermissionChecker()
3933 throws AccessControlException {
3934 try {
3935 return new FSPermissionChecker(fsOwnerShortUserName, supergroup, getRemoteUser());
3936 } catch (IOException ioe) {
3937 throw new AccessControlException(ioe);
3938 }
3939 }
3940
3941 /**
3942 * Remove a file/directory from the namespace.
3943 * <p>
3944 * For large directories, deletion is incremental. The blocks under
3945 * the directory are collected and deleted a small number at a time holding
3946 * the {@link FSNamesystem} lock.
3947 * <p>
3948 * For small directory or file the deletion is done in one shot.
3949 *
3950 * @see ClientProtocol#delete(String, boolean) for description of exceptions
3951 */
3952 private boolean deleteInternal(String src, boolean recursive,
3953 boolean enforcePermission, boolean logRetryCache)
3954 throws AccessControlException, SafeModeException, UnresolvedLinkException,
3955 IOException {
3956 BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
3957 List<INode> removedINodes = new ChunkedArrayList<INode>();
3958 FSPermissionChecker pc = getPermissionChecker();
3959 checkOperation(OperationCategory.WRITE);
3960 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3961 boolean ret = false;
3962
3963 waitForLoadingFSImage();
3964 writeLock();
3965 try {
3966 checkOperation(OperationCategory.WRITE);
3967 checkNameNodeSafeMode("Cannot delete " + src);
3968 src = resolvePath(src, pathComponents);
3969 if (!recursive && dir.isNonEmptyDirectory(src)) {
3970 throw new PathIsNotEmptyDirectoryException(src + " is non empty");
3971 }
3972 if (enforcePermission && isPermissionEnabled) {
3973 checkPermission(pc, src, false, null, FsAction.WRITE, null,
3974 FsAction.ALL, true, false);
3975 }
3976
3977 long mtime = now();
3978 // Unlink the target directory from directory tree
3979 long filesRemoved = dir.delete(src, collectedBlocks, removedINodes,
3980 mtime);
3981 if (filesRemoved < 0) {
3982 return false;
3983 }
3984 getEditLog().logDelete(src, mtime, logRetryCache);
3985 incrDeletedFileCount(filesRemoved);
3986 // Blocks/INodes will be handled later
3987 removePathAndBlocks(src, null, removedINodes, true);
3988 ret = true;
3989 } finally {
3990 writeUnlock();
3991 }
3992 getEditLog().logSync();
3993 removeBlocks(collectedBlocks); // Incremental deletion of blocks
3994 collectedBlocks.clear();
3995
3996 if (NameNode.stateChangeLog.isDebugEnabled()) {
3997 NameNode.stateChangeLog.debug("DIR* Namesystem.delete: "
3998 + src +" is removed");
3999 }
4000 return ret;
4001 }
4002
4003 /**
4004 * From the given list, incrementally remove the blocks from blockManager
4005 * Writelock is dropped and reacquired every BLOCK_DELETION_INCREMENT to
4006 * ensure that other waiters on the lock can get in. See HDFS-2938
4007 *
4008 * @param blocks
4009 * An instance of {@link BlocksMapUpdateInfo} which contains a list
4010 * of blocks that need to be removed from blocksMap
4011 */
4012 void removeBlocks(BlocksMapUpdateInfo blocks) {
4013 List<Block> toDeleteList = blocks.getToDeleteList();
4014 Iterator<Block> iter = toDeleteList.iterator();
4015 while (iter.hasNext()) {
4016 writeLock();
4017 try {
4018 for (int i = 0; i < BLOCK_DELETION_INCREMENT && iter.hasNext(); i++) {
4019 blockManager.removeBlock(iter.next());
4020 }
4021 } finally {
4022 writeUnlock();
4023 }
4024 }
4025 }
4026
4027 /**
4028 * Remove leases, inodes and blocks related to a given path
4029 * @param src The given path
4030 * @param blocks Containing the list of blocks to be deleted from blocksMap
4031 * @param removedINodes Containing the list of inodes to be removed from
4032 * inodesMap
4033 * @param acquireINodeMapLock Whether to acquire the lock for inode removal
4034 */
4035 void removePathAndBlocks(String src, BlocksMapUpdateInfo blocks,
4036 List<INode> removedINodes, final boolean acquireINodeMapLock) {
4037 assert hasWriteLock();
4038 leaseManager.removeLeaseWithPrefixPath(src);
4039 // remove inodes from inodesMap
4040 if (removedINodes != null) {
4041 if (acquireINodeMapLock) {
4042 dir.writeLock();
4043 }
4044 try {
4045 dir.removeFromInodeMap(removedINodes);
4046 } finally {
4047 if (acquireINodeMapLock) {
4048 dir.writeUnlock();
4049 }
4050 }
4051 removedINodes.clear();
4052 }
4053 if (blocks == null) {
4054 return;
4055 }
4056
4057 removeBlocksAndUpdateSafemodeTotal(blocks);
4058 }
4059
4060 /**
4061 * Removes the blocks from blocksmap and updates the safemode blocks total
4062 *
4063 * @param blocks
4064 * An instance of {@link BlocksMapUpdateInfo} which contains a list
4065 * of blocks that need to be removed from blocksMap
4066 */
4067 void removeBlocksAndUpdateSafemodeTotal(BlocksMapUpdateInfo blocks) {
4068 assert hasWriteLock();
4069 // In the case that we are a Standby tailing edits from the
4070 // active while in safe-mode, we need to track the total number
4071 // of blocks and safe blocks in the system.
4072 boolean trackBlockCounts = isSafeModeTrackingBlocks();
4073 int numRemovedComplete = 0, numRemovedSafe = 0;
4074
4075 for (Block b : blocks.getToDeleteList()) {
4076 if (trackBlockCounts) {
4077 BlockInfo bi = getStoredBlock(b);
4078 if (bi.isComplete()) {
4079 numRemovedComplete++;
4080 if (bi.numNodes() >= blockManager.minReplication) {
4081 numRemovedSafe++;
4082 }
4083 }
4084 }
4085 blockManager.removeBlock(b);
4086 }
4087 if (trackBlockCounts) {
4088 if (LOG.isDebugEnabled()) {
4089 LOG.debug("Adjusting safe-mode totals for deletion."
4090 + "decreasing safeBlocks by " + numRemovedSafe
4091 + ", totalBlocks by " + numRemovedComplete);
4092 }
4093 adjustSafeModeBlockTotals(-numRemovedSafe, -numRemovedComplete);
4094 }
4095 }
4096
4097 /**
4098 * @see SafeModeInfo#shouldIncrementallyTrackBlocks
4099 */
4100 private boolean isSafeModeTrackingBlocks() {
4101 if (!haEnabled) {
4102 // Never track blocks incrementally in non-HA code.
4103 return false;
4104 }
4105 SafeModeInfo sm = this.safeMode;
4106 return sm != null && sm.shouldIncrementallyTrackBlocks();
4107 }
4108
4109 /**
4110 * Get the file info for a specific file.
4111 *
4112 * @param srcArg The string representation of the path to the file
4113 * @param resolveLink whether to throw UnresolvedLinkException
4114 * if src refers to a symlink
4115 *
4116 * @throws AccessControlException if access is denied
4117 * @throws UnresolvedLinkException if a symlink is encountered.
4118 *
4119 * @return object containing information regarding the file
4120 * or null if file not found
4121 * @throws StandbyException
4122 */
4123 HdfsFileStatus getFileInfo(final String srcArg, boolean resolveLink)
4124 throws AccessControlException, UnresolvedLinkException,
4125 StandbyException, IOException {
4126 String src = srcArg;
4127 if (!DFSUtil.isValidName(src)) {
4128 throw new InvalidPathException("Invalid file name: " + src);
4129 }
4130 HdfsFileStatus stat = null;
4131 FSPermissionChecker pc = getPermissionChecker();
4132 checkOperation(OperationCategory.READ);
4133 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
4134 readLock();
4135 try {
4136 checkOperation(OperationCategory.READ);
4137 src = resolvePath(src, pathComponents);
4138 boolean isSuperUser = true;
4139 if (isPermissionEnabled) {
4140 checkPermission(pc, src, false, null, null, null, null, false,
4141 resolveLink);
4142 isSuperUser = pc.isSuperUser();
4143 }
4144 stat = dir.getFileInfo(src, resolveLink,
4145 FSDirectory.isReservedRawName(srcArg), isSuperUser);
4146 } catch (AccessControlException e) {
4147 logAuditEvent(false, "getfileinfo", srcArg);
4148 throw e;
4149 } finally {
4150 readUnlock();
4151 }
4152 logAuditEvent(true, "getfileinfo", srcArg);
4153 return stat;
4154 }
4155
4156 /**
4157 * Returns true if the file is closed
4158 */
4159 boolean isFileClosed(final String srcArg)
4160 throws AccessControlException, UnresolvedLinkException,
4161 StandbyException, IOException {
4162 String src = srcArg;
4163 FSPermissionChecker pc = getPermissionChecker();
4164 checkOperation(OperationCategory.READ);
4165 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
4166 readLock();
4167 try {
4168 src = resolvePath(src, pathComponents);
4169 checkOperation(OperationCategory.READ);
4170 if (isPermissionEnabled) {
4171 checkTraverse(pc, src);
4172 }
4173 return !INodeFile.valueOf(dir.getINode(src), src).isUnderConstruction();
4174 } catch (AccessControlException e) {
4175 if (isAuditEnabled() && isExternalInvocation()) {
4176 logAuditEvent(false, "isFileClosed", srcArg);
4177 }
4178 throw e;
4179 } finally {
4180 readUnlock();
4181 }
4182 }
4183
4184 /**
4185 * Create all the necessary directories
4186 */
4187 boolean mkdirs(String src, PermissionStatus permissions,
4188 boolean createParent) throws IOException, UnresolvedLinkException {
4189 boolean ret = false;
4190 try {
4191 ret = mkdirsInt(src, permissions, createParent);
4192 } catch (AccessControlException e) {
4193 logAuditEvent(false, "mkdirs", src);
4194 throw e;
4195 }
4196 return ret;
4197 }
4198
4199 private boolean mkdirsInt(final String srcArg, PermissionStatus permissions,
4200 boolean createParent) throws IOException, UnresolvedLinkException {
4201 String src = srcArg;
4202 if(NameNode.stateChangeLog.isDebugEnabled()) {
4203 NameNode.stateChangeLog.debug("DIR* NameSystem.mkdirs: " + src);
4204 }
4205 if (!DFSUtil.isValidName(src)) {
4206 throw new InvalidPathException(src);
4207 }
4208 FSPermissionChecker pc = getPermissionChecker();
4209 checkOperation(OperationCategory.WRITE);
4210 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
4211 HdfsFileStatus resultingStat = null;
4212 boolean status = false;
4213 writeLock();
4214 try {
4215 checkOperation(OperationCategory.WRITE);
4216 checkNameNodeSafeMode("Cannot create directory " + src);
4217 src = resolvePath(src, pathComponents);
4218 status = mkdirsInternal(pc, src, permissions, createParent);
4219 if (status) {
4220 resultingStat = getAuditFileInfo(src, false);
4221 }
4222 } finally {
4223 writeUnlock();
4224 }
4225 getEditLog().logSync();
4226 if (status) {
4227 logAuditEvent(true, "mkdirs", srcArg, null, resultingStat);
4228 }
4229 return status;
4230 }
4231
4232 /**
4233 * Create all the necessary directories
4234 */
4235 private boolean mkdirsInternal(FSPermissionChecker pc, String src,
4236 PermissionStatus permissions, boolean createParent)
4237 throws IOException, UnresolvedLinkException {
4238 assert hasWriteLock();
4239 if (isPermissionEnabled) {
4240 checkTraverse(pc, src);
4241 }
4242 if (dir.isDirMutable(src)) {
4243 // all the users of mkdirs() are used to expect 'true' even if
4244 // a new directory is not created.
4245 return true;
4246 }
4247 if (isPermissionEnabled) {
4248 checkAncestorAccess(pc, src, FsAction.WRITE);
4249 }
4250 if (!createParent) {
4251 verifyParentDir(src);
4252 }
4253
4254 // validate that we have enough inodes. This is, at best, a
4255 // heuristic because the mkdirs() operation might need to
4256 // create multiple inodes.
4257 checkFsObjectLimit();
4258
4259 if (!mkdirsRecursively(src, permissions, false, now())) {
4260 throw new IOException("Failed to create directory: " + src);
4261 }
4262 return true;
4263 }
4264
4265 /**
4266 * Create a directory
4267 * If ancestor directories do not exist, automatically create them.
4268
4269 * @param src string representation of the path to the directory
4270 * @param permissions the permission of the directory
4271 * @param inheritPermission if the permission of the directory should inherit
4272 * from its parent or not. u+wx is implicitly added to
4273 * the automatically created directories, and to the
4274 * given directory if inheritPermission is true
4275 * @param now creation time
4276 * @return true if the operation succeeds false otherwise
4277 * @throws QuotaExceededException if directory creation violates
4278 * any quota limit
4279 * @throws UnresolvedLinkException if a symlink is encountered in src.
4280 * @throws SnapshotAccessControlException if path is in RO snapshot
4281 */
4282 private boolean mkdirsRecursively(String src, PermissionStatus permissions,
4283 boolean inheritPermission, long now)
4284 throws FileAlreadyExistsException, QuotaExceededException,
4285 UnresolvedLinkException, SnapshotAccessControlException,
4286 AclException {
4287 src = FSDirectory.normalizePath(src);
4288 String[] names = INode.getPathNames(src);
4289 byte[][] components = INode.getPathComponents(names);
4290 final int lastInodeIndex = components.length - 1;
4291
4292 dir.writeLock();
4293 try {
4294 INodesInPath iip = dir.getExistingPathINodes(components);
4295 if (iip.isSnapshot()) {
4296 throw new SnapshotAccessControlException(
4297 "Modification on RO snapshot is disallowed");
4298 }
4299 INode[] inodes = iip.getINodes();
4300
4301 // find the index of the first null in inodes[]
4302 StringBuilder pathbuilder = new StringBuilder();
4303 int i = 1;
4304 for(; i < inodes.length && inodes[i] != null; i++) {
4305 pathbuilder.append(Path.SEPARATOR).append(names[i]);
4306 if (!inodes[i].isDirectory()) {
4307 throw new FileAlreadyExistsException(
4308 "Parent path is not a directory: "
4309 + pathbuilder + " "+inodes[i].getLocalName());
4310 }
4311 }
4312
4313 // default to creating parent dirs with the given perms
4314 PermissionStatus parentPermissions = permissions;
4315
4316 // if not inheriting and it's the last inode, there's no use in
4317 // computing perms that won't be used
4318 if (inheritPermission || (i < lastInodeIndex)) {
4319 // if inheriting (ie. creating a file or symlink), use the parent dir,
4320 // else the supplied permissions
4321 // NOTE: the permissions of the auto-created directories violate posix
4322 FsPermission parentFsPerm = inheritPermission
4323 ? inodes[i-1].getFsPermission() : permissions.getPermission();
4324
4325 // ensure that the permissions allow user write+execute
4326 if (!parentFsPerm.getUserAction().implies(FsAction.WRITE_EXECUTE)) {
4327 parentFsPerm = new FsPermission(
4328 parentFsPerm.getUserAction().or(FsAction.WRITE_EXECUTE),
4329 parentFsPerm.getGroupAction(),
4330 parentFsPerm.getOtherAction()
4331 );
4332 }
4333
4334 if (!parentPermissions.getPermission().equals(parentFsPerm)) {
4335 parentPermissions = new PermissionStatus(
4336 parentPermissions.getUserName(),
4337 parentPermissions.getGroupName(),
4338 parentFsPerm
4339 );
4340 // when inheriting, use same perms for entire path
4341 if (inheritPermission) permissions = parentPermissions;
4342 }
4343 }
4344
4345 // create directories beginning from the first null index
4346 for(; i < inodes.length; i++) {
4347 pathbuilder.append(Path.SEPARATOR).append(names[i]);
4348 dir.unprotectedMkdir(allocateNewInodeId(), iip, i, components[i],
4349 (i < lastInodeIndex) ? parentPermissions : permissions, null,
4350 now);
4351 if (inodes[i] == null) {
4352 return false;
4353 }
4354 // Directory creation also count towards FilesCreated
4355 // to match count of FilesDeleted metric.
4356 NameNode.getNameNodeMetrics().incrFilesCreated();
4357
4358 final String cur = pathbuilder.toString();
4359 getEditLog().logMkDir(cur, inodes[i]);
4360 if(NameNode.stateChangeLog.isDebugEnabled()) {
4361 NameNode.stateChangeLog.debug(
4362 "mkdirs: created directory " + cur);
4363 }
4364 }
4365 } finally {
4366 dir.writeUnlock();
4367 }
4368 return true;
4369 }
4370
4371 /**
4372 * Get the content summary for a specific file/dir.
4373 *
4374 * @param srcArg The string representation of the path to the file
4375 *
4376 * @throws AccessControlException if access is denied
4377 * @throws UnresolvedLinkException if a symlink is encountered.
4378 * @throws FileNotFoundException if no file exists
4379 * @throws StandbyException
4380 * @throws IOException for issues with writing to the audit log
4381 *
4382 * @return object containing information regarding the file
4383 * or null if file not found
4384 */
4385 ContentSummary getContentSummary(final String srcArg) throws IOException {
4386 String src = srcArg;
4387 FSPermissionChecker pc = getPermissionChecker();
4388 checkOperation(OperationCategory.READ);
4389 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
4390 readLock();
4391 boolean success = true;
4392 try {
4393 checkOperation(OperationCategory.READ);
4394 src = resolvePath(src, pathComponents);
4395 if (isPermissionEnabled) {
4396 checkPermission(pc, src, false, null, null, null, FsAction.READ_EXECUTE);
4397 }
4398 return dir.getContentSummary(src);
4399
4400 } catch (AccessControlException ace) {
4401 success = false;
4402 throw ace;
4403 } finally {
4404 readUnlock();
4405 logAuditEvent(success, "contentSummary", srcArg);
4406 }
4407 }
4408
4409 /**
4410 * Set the namespace quota and diskspace quota for a directory.
4411 * See {@link ClientProtocol#setQuota(String, long, long)} for the
4412 * contract.
4413 *
4414 * Note: This does not support ".inodes" relative path.
4415 */
4416 void setQuota(String path, long nsQuota, long dsQuota)
4417 throws IOException, UnresolvedLinkException {
4418 checkSuperuserPrivilege();
4419 checkOperation(OperationCategory.WRITE);
4420 writeLock();
4421 try {
4422 checkOperation(OperationCategory.WRITE);
4423 checkNameNodeSafeMode("Cannot set quota on " + path);
4424 INodeDirectory changed = dir.setQuota(path, nsQuota, dsQuota);
4425 if (changed != null) {
4426 final Quota.Counts q = changed.getQuotaCounts();
4427 getEditLog().logSetQuota(path,
4428 q.get(Quota.NAMESPACE), q.get(Quota.DISKSPACE));
4429 }
4430 } finally {
4431 writeUnlock();
4432 }
4433 getEditLog().logSync();
4434 }
4435
4436 /** Persist all metadata about this file.
4437 * @param src The string representation of the path
4438 * @param fileId The inode ID that we're fsyncing. Older clients will pass
4439 * INodeId.GRANDFATHER_INODE_ID here.
4440 * @param clientName The string representation of the client
4441 * @param lastBlockLength The length of the last block
4442 * under construction reported from client.
4443 * @throws IOException if path does not exist
4444 */
4445 void fsync(String src, long fileId, String clientName, long lastBlockLength)
4446 throws IOException, UnresolvedLinkException {
4447 NameNode.stateChangeLog.info("BLOCK* fsync: " + src + " for " + clientName);
4448 checkOperation(OperationCategory.WRITE);
4449 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
4450
4451 waitForLoadingFSImage();
4452 writeLock();
4453 try {
4454 checkOperation(OperationCategory.WRITE);
4455 checkNameNodeSafeMode("Cannot fsync file " + src);
4456 src = resolvePath(src, pathComponents);
4457 final INode inode;
4458 if (fileId == INodeId.GRANDFATHER_INODE_ID) {
4459 // Older clients may not have given us an inode ID to work with.
4460 // In this case, we have to try to resolve the path and hope it
4461 // hasn't changed or been deleted since the file was opened for write.
4462 inode = dir.getINode(src);
4463 } else {
4464 inode = dir.getInode(fileId);
4465 if (inode != null) src = inode.getFullPathName();
4466 }
4467 final INodeFile pendingFile = checkLease(src, clientName, inode, fileId);
4468 if (lastBlockLength > 0) {
4469 pendingFile.getFileUnderConstructionFeature().updateLengthOfLastBlock(
4470 pendingFile, lastBlockLength);
4471 }
4472 persistBlocks(src, pendingFile, false);
4473 } finally {
4474 writeUnlock();
4475 }
4476 getEditLog().logSync();
4477 }
4478
4479 /**
4480 * Move a file that is being written to be immutable.
4481 * @param src The filename
4482 * @param lease The lease for the client creating the file
4483 * @param recoveryLeaseHolder reassign lease to this holder if the last block
4484 * needs recovery; keep current holder if null.
4485 * @throws AlreadyBeingCreatedException if file is waiting to achieve minimal
4486 * replication;<br>
4487 * RecoveryInProgressException if lease recovery is in progress.<br>
4488 * IOException in case of an error.
4489 * @return true if file has been successfully finalized and closed or
4490 * false if block recovery has been initiated. Since the lease owner
4491 * has been changed and logged, caller should call logSync().
4492 */
4493 boolean internalReleaseLease(Lease lease, String src,
4494 String recoveryLeaseHolder) throws AlreadyBeingCreatedException,
4495 IOException, UnresolvedLinkException {
4496 LOG.info("Recovering " + lease + ", src=" + src);
4497 assert !isInSafeMode();
4498 assert hasWriteLock();
4499
4500 final INodesInPath iip = dir.getLastINodeInPath(src);
4501 final INodeFile pendingFile = iip.getINode(0).asFile();
4502 int nrBlocks = pendingFile.numBlocks();
4503 BlockInfo[] blocks = pendingFile.getBlocks();
4504
4505 int nrCompleteBlocks;
4506 BlockInfo curBlock = null;
4507 for(nrCompleteBlocks = 0; nrCompleteBlocks < nrBlocks; nrCompleteBlocks++) {
4508 curBlock = blocks[nrCompleteBlocks];
4509 if(!curBlock.isComplete())
4510 break;
4511 assert blockManager.checkMinReplication(curBlock) :
4512 "A COMPLETE block is not minimally replicated in " + src;
4513 }
4514
4515 // If there are no incomplete blocks associated with this file,
4516 // then reap lease immediately and close the file.
4517 if(nrCompleteBlocks == nrBlocks) {
4518 finalizeINodeFileUnderConstruction(src, pendingFile,
4519 iip.getLatestSnapshotId());
4520 NameNode.stateChangeLog.warn("BLOCK*"
4521 + " internalReleaseLease: All existing blocks are COMPLETE,"
4522 + " lease removed, file closed.");
4523 return true; // closed!
4524 }
4525
4526 // Only the last and the penultimate blocks may be in non COMPLETE state.
4527 // If the penultimate block is not COMPLETE, then it must be COMMITTED.
4528 if(nrCompleteBlocks < nrBlocks - 2 ||
4529 nrCompleteBlocks == nrBlocks - 2 &&
4530 curBlock != null &&
4531 curBlock.getBlockUCState() != BlockUCState.COMMITTED) {
4532 final String message = "DIR* NameSystem.internalReleaseLease: "
4533 + "attempt to release a create lock on "
4534 + src + " but file is already closed.";
4535 NameNode.stateChangeLog.warn(message);
4536 throw new IOException(message);
4537 }
4538
4539 // The last block is not COMPLETE, and
4540 // that the penultimate block if exists is either COMPLETE or COMMITTED
4541 final BlockInfo lastBlock = pendingFile.getLastBlock();
4542 BlockUCState lastBlockState = lastBlock.getBlockUCState();
4543 BlockInfo penultimateBlock = pendingFile.getPenultimateBlock();
4544
4545 // If penultimate block doesn't exist then its minReplication is met
4546 boolean penultimateBlockMinReplication = penultimateBlock == null ? true :
4547 blockManager.checkMinReplication(penultimateBlock);
4548
4549 switch(lastBlockState) {
4550 case COMPLETE:
4551 assert false : "Already checked that the last block is incomplete";
4552 break;
4553 case COMMITTED:
4554 // Close file if committed blocks are minimally replicated
4555 if(penultimateBlockMinReplication &&
4556 blockManager.checkMinReplication(lastBlock)) {
4557 finalizeINodeFileUnderConstruction(src, pendingFile,
4558 iip.getLatestSnapshotId());
4559 NameNode.stateChangeLog.warn("BLOCK*"
4560 + " internalReleaseLease: Committed blocks are minimally replicated,"
4561 + " lease removed, file closed.");
4562 return true; // closed!
4563 }
4564 // Cannot close file right now, since some blocks
4565 // are not yet minimally replicated.
4566 // This may potentially cause infinite loop in lease recovery
4567 // if there are no valid replicas on data-nodes.
4568 String message = "DIR* NameSystem.internalReleaseLease: " +
4569 "Failed to release lease for file " + src +
4570 ". Committed blocks are waiting to be minimally replicated." +
4571 " Try again later.";
4572 NameNode.stateChangeLog.warn(message);
4573 throw new AlreadyBeingCreatedException(message);
4574 case UNDER_CONSTRUCTION:
4575 case UNDER_RECOVERY:
4576 final BlockInfoUnderConstruction uc = (BlockInfoUnderConstruction)lastBlock;
4577 // setup the last block locations from the blockManager if not known
4578 if (uc.getNumExpectedLocations() == 0) {
4579 uc.setExpectedLocations(blockManager.getStorages(lastBlock));
4580 }
4581
4582 if (uc.getNumExpectedLocations() == 0 && uc.getNumBytes() == 0) {
4583 // There is no datanode reported to this block.
4584 // may be client have crashed before writing data to pipeline.
4585 // This blocks doesn't need any recovery.
4586 // We can remove this block and close the file.
4587 pendingFile.removeLastBlock(lastBlock);
4588 finalizeINodeFileUnderConstruction(src, pendingFile,
4589 iip.getLatestSnapshotId());
4590 NameNode.stateChangeLog.warn("BLOCK* internalReleaseLease: "
4591 + "Removed empty last block and closed file.");
4592 return true;
4593 }
4594 // start recovery of the last block for this file
4595 long blockRecoveryId = nextGenerationStamp(isLegacyBlock(uc));
4596 lease = reassignLease(lease, src, recoveryLeaseHolder, pendingFile);
4597 uc.initializeBlockRecovery(blockRecoveryId);
4598 leaseManager.renewLease(lease);
4599 // Cannot close file right now, since the last block requires recovery.
4600 // This may potentially cause infinite loop in lease recovery
4601 // if there are no valid replicas on data-nodes.
4602 NameNode.stateChangeLog.warn(
4603 "DIR* NameSystem.internalReleaseLease: " +
4604 "File " + src + " has not been closed." +
4605 " Lease recovery is in progress. " +
4606 "RecoveryId = " + blockRecoveryId + " for block " + lastBlock);
4607 break;
4608 }
4609 return false;
4610 }
4611
4612 private Lease reassignLease(Lease lease, String src, String newHolder,
4613 INodeFile pendingFile) {
4614 assert hasWriteLock();
4615 if(newHolder == null)
4616 return lease;
4617 // The following transaction is not synced. Make sure it's sync'ed later.
4618 logReassignLease(lease.getHolder(), src, newHolder);
4619 return reassignLeaseInternal(lease, src, newHolder, pendingFile);
4620 }
4621
4622 Lease reassignLeaseInternal(Lease lease, String src, String newHolder,
4623 INodeFile pendingFile) {
4624 assert hasWriteLock();
4625 pendingFile.getFileUnderConstructionFeature().setClientName(newHolder);
4626 return leaseManager.reassignLease(lease, src, newHolder);
4627 }
4628
4629 private void commitOrCompleteLastBlock(final INodeFile fileINode,
4630 final Block commitBlock) throws IOException {
4631 assert hasWriteLock();
4632 Preconditions.checkArgument(fileINode.isUnderConstruction());
4633 if (!blockManager.commitOrCompleteLastBlock(fileINode, commitBlock)) {
4634 return;
4635 }
4636
4637 // Adjust disk space consumption if required
4638 final long diff = fileINode.getPreferredBlockSize() - commitBlock.getNumBytes();
4639 if (diff > 0) {
4640 try {
4641 String path = fileINode.getFullPathName();
4642 dir.updateSpaceConsumed(path, 0, -diff*fileINode.getFileReplication());
4643 } catch (IOException e) {
4644 LOG.warn("Unexpected exception while updating disk space.", e);
4645 }
4646 }
4647 }
4648
4649 private void finalizeINodeFileUnderConstruction(String src,
4650 INodeFile pendingFile, int latestSnapshot) throws IOException,
4651 UnresolvedLinkException {
4652 assert hasWriteLock();
4653
4654 FileUnderConstructionFeature uc = pendingFile.getFileUnderConstructionFeature();
4655 Preconditions.checkArgument(uc != null);
4656 leaseManager.removeLease(uc.getClientName(), src);
4657
4658 pendingFile.recordModification(latestSnapshot);
4659
4660 // The file is no longer pending.
4661 // Create permanent INode, update blocks. No need to replace the inode here
4662 // since we just remove the uc feature from pendingFile
4663 final INodeFile newFile = pendingFile.toCompleteFile(now());
4664
4665 waitForLoadingFSImage();
4666 // close file and persist block allocations for this file
4667 closeFile(src, newFile);
4668
4669 blockManager.checkReplication(newFile);
4670 }
4671
4672 @VisibleForTesting
4673 BlockInfo getStoredBlock(Block block) {
4674 return blockManager.getStoredBlock(block);
4675 }
4676
4677 @Override
4678 public boolean isInSnapshot(BlockInfoUnderConstruction blockUC) {
4679 assert hasReadLock();
4680 final BlockCollection bc = blockUC.getBlockCollection();
4681 if (bc == null || !(bc instanceof INodeFile)
4682 || !bc.isUnderConstruction()) {
4683 return false;
4684 }
4685
4686 INodeFile inodeUC = (INodeFile) bc;
4687 String fullName = inodeUC.getName();
4688 try {
4689 if (fullName != null && fullName.startsWith(Path.SEPARATOR)
4690 && dir.getINode(fullName) == inodeUC) {
4691 // If file exists in normal path then no need to look in snapshot
4692 return false;
4693 }
4694 } catch (UnresolvedLinkException e) {
4695 LOG.error("Error while resolving the link : " + fullName, e);
4696 return false;
4697 }
4698 /*
4699 * 1. if bc is an instance of INodeFileUnderConstructionWithSnapshot, and
4700 * bc is not in the current fsdirectory tree, bc must represent a snapshot
4701 * file.
4702 * 2. if fullName is not an absolute path, bc cannot be existent in the
4703 * current fsdirectory tree.
4704 * 3. if bc is not the current node associated with fullName, bc must be a
4705 * snapshot inode.
4706 */
4707 return true;
4708 }
4709
4710 void commitBlockSynchronization(ExtendedBlock lastblock,
4711 long newgenerationstamp, long newlength,
4712 boolean closeFile, boolean deleteblock, DatanodeID[] newtargets,
4713 String[] newtargetstorages)
4714 throws IOException, UnresolvedLinkException {
4715 LOG.info("commitBlockSynchronization(lastblock=" + lastblock
4716 + ", newgenerationstamp=" + newgenerationstamp
4717 + ", newlength=" + newlength
4718 + ", newtargets=" + Arrays.asList(newtargets)
4719 + ", closeFile=" + closeFile
4720 + ", deleteBlock=" + deleteblock
4721 + ")");
4722 checkOperation(OperationCategory.WRITE);
4723 String src = "";
4724 waitForLoadingFSImage();
4725 writeLock();
4726 try {
4727 checkOperation(OperationCategory.WRITE);
4728 // If a DN tries to commit to the standby, the recovery will
4729 // fail, and the next retry will succeed on the new NN.
4730
4731 checkNameNodeSafeMode(
4732 "Cannot commitBlockSynchronization while in safe mode");
4733 final BlockInfo storedBlock = getStoredBlock(
4734 ExtendedBlock.getLocalBlock(lastblock));
4735 if (storedBlock == null) {
4736 if (deleteblock) {
4737 // This may be a retry attempt so ignore the failure
4738 // to locate the block.
4739 if (LOG.isDebugEnabled()) {
4740 LOG.debug("Block (=" + lastblock + ") not found");
4741 }
4742 return;
4743 } else {
4744 throw new IOException("Block (=" + lastblock + ") not found");
4745 }
4746 }
4747 //
4748 // The implementation of delete operation (see @deleteInternal method)
4749 // first removes the file paths from namespace, and delays the removal
4750 // of blocks to later time for better performance. When
4751 // commitBlockSynchronization (this method) is called in between, the
4752 // blockCollection of storedBlock could have been assigned to null by
4753 // the delete operation, throw IOException here instead of NPE; if the
4754 // file path is already removed from namespace by the delete operation,
4755 // throw FileNotFoundException here, so not to proceed to the end of
4756 // this method to add a CloseOp to the edit log for an already deleted
4757 // file (See HDFS-6825).
4758 //
4759 BlockCollection blockCollection = storedBlock.getBlockCollection();
4760 if (blockCollection == null) {
4761 throw new IOException("The blockCollection of " + storedBlock
4762 + " is null, likely because the file owning this block was"
4763 + " deleted and the block removal is delayed");
4764 }
4765 INodeFile iFile = ((INode)blockCollection).asFile();
4766 if (isFileDeleted(iFile)) {
4767 throw new FileNotFoundException("File not found: "
4768 + iFile.getFullPathName() + ", likely due to delayed block"
4769 + " removal");
4770 }
4771 if (!iFile.isUnderConstruction() || storedBlock.isComplete()) {
4772 if (LOG.isDebugEnabled()) {
4773 LOG.debug("Unexpected block (=" + lastblock
4774 + ") since the file (=" + iFile.getLocalName()
4775 + ") is not under construction");
4776 }
4777 return;
4778 }
4779
4780 long recoveryId =
4781 ((BlockInfoUnderConstruction)storedBlock).getBlockRecoveryId();
4782 if(recoveryId != newgenerationstamp) {
4783 throw new IOException("The recovery id " + newgenerationstamp
4784 + " does not match current recovery id "
4785 + recoveryId + " for block " + lastblock);
4786 }
4787
4788 if (deleteblock) {
4789 Block blockToDel = ExtendedBlock.getLocalBlock(lastblock);
4790 boolean remove = iFile.removeLastBlock(blockToDel);
4791 if (remove) {
4792 blockManager.removeBlockFromMap(storedBlock);
4793 }
4794 }
4795 else {
4796 // update last block
4797 storedBlock.setGenerationStamp(newgenerationstamp);
4798 storedBlock.setNumBytes(newlength);
4799
4800 // find the DatanodeDescriptor objects
4801 // There should be no locations in the blockManager till now because the
4802 // file is underConstruction
4803 ArrayList<DatanodeDescriptor> trimmedTargets =
4804 new ArrayList<DatanodeDescriptor>(newtargets.length);
4805 ArrayList<String> trimmedStorages =
4806 new ArrayList<String>(newtargets.length);
4807 if (newtargets.length > 0) {
4808 for (int i = 0; i < newtargets.length; ++i) {
4809 // try to get targetNode
4810 DatanodeDescriptor targetNode =
4811 blockManager.getDatanodeManager().getDatanode(newtargets[i]);
4812 if (targetNode != null) {
4813 trimmedTargets.add(targetNode);
4814 trimmedStorages.add(newtargetstorages[i]);
4815 } else if (LOG.isDebugEnabled()) {
4816 LOG.debug("DatanodeDescriptor (=" + newtargets[i] + ") not found");
4817 }
4818 }
4819 }
4820 if ((closeFile) && !trimmedTargets.isEmpty()) {
4821 // the file is getting closed. Insert block locations into blockManager.
4822 // Otherwise fsck will report these blocks as MISSING, especially if the
4823 // blocksReceived from Datanodes take a long time to arrive.
4824 for (int i = 0; i < trimmedTargets.size(); i++) {
4825 DatanodeStorageInfo storageInfo =
4826 trimmedTargets.get(i).getStorageInfo(trimmedStorages.get(i));
4827 if (storageInfo != null) {
4828 storageInfo.addBlock(storedBlock);
4829 }
4830 }
4831 }
4832
4833 // add pipeline locations into the INodeUnderConstruction
4834 DatanodeStorageInfo[] trimmedStorageInfos =
4835 blockManager.getDatanodeManager().getDatanodeStorageInfos(
4836 trimmedTargets.toArray(new DatanodeID[trimmedTargets.size()]),
4837 trimmedStorages.toArray(new String[trimmedStorages.size()]));
4838 iFile.setLastBlock(storedBlock, trimmedStorageInfos);
4839 }
4840
4841 if (closeFile) {
4842 src = closeFileCommitBlocks(iFile, storedBlock);
4843 } else {
4844 // If this commit does not want to close the file, persist blocks
4845 src = iFile.getFullPathName();
4846 persistBlocks(src, iFile, false);
4847 }
4848 } finally {
4849 writeUnlock();
4850 }
4851 getEditLog().logSync();
4852 if (closeFile) {
4853 LOG.info("commitBlockSynchronization(newblock=" + lastblock
4854 + ", file=" + src
4855 + ", newgenerationstamp=" + newgenerationstamp
4856 + ", newlength=" + newlength
4857 + ", newtargets=" + Arrays.asList(newtargets) + ") successful");
4858 } else {
4859 LOG.info("commitBlockSynchronization(" + lastblock + ") successful");
4860 }
4861 }
4862
4863 /**
4864 * @param pendingFile open file that needs to be closed
4865 * @param storedBlock last block
4866 * @return Path of the file that was closed.
4867 * @throws IOException on error
4868 */
4869 @VisibleForTesting
4870 String closeFileCommitBlocks(INodeFile pendingFile, BlockInfo storedBlock)
4871 throws IOException {
4872 String src = pendingFile.getFullPathName();
4873
4874 // commit the last block and complete it if it has minimum replicas
4875 commitOrCompleteLastBlock(pendingFile, storedBlock);
4876
4877 //remove lease, close file
4878 finalizeINodeFileUnderConstruction(src, pendingFile,
4879 Snapshot.findLatestSnapshot(pendingFile, Snapshot.CURRENT_STATE_ID));
4880
4881 return src;
4882 }
4883
4884 /**
4885 * Renew the lease(s) held by the given client
4886 */
4887 void renewLease(String holder) throws IOException {
4888 checkOperation(OperationCategory.WRITE);
4889 readLock();
4890 try {
4891 checkOperation(OperationCategory.WRITE);
4892 checkNameNodeSafeMode("Cannot renew lease for " + holder);
4893 leaseManager.renewLease(holder);
4894 } finally {
4895 readUnlock();
4896 }
4897 }
4898
4899 /**
4900 * Get a partial listing of the indicated directory
4901 *
4902 * @param src the directory name
4903 * @param startAfter the name to start after
4904 * @param needLocation if blockLocations need to be returned
4905 * @return a partial listing starting after startAfter
4906 *
4907 * @throws AccessControlException if access is denied
4908 * @throws UnresolvedLinkException if symbolic link is encountered
4909 * @throws IOException if other I/O error occurred
4910 */
4911 DirectoryListing getListing(String src, byte[] startAfter,
4912 boolean needLocation)
4913 throws AccessControlException, UnresolvedLinkException, IOException {
4914 try {
4915 return getListingInt(src, startAfter, needLocation);
4916 } catch (AccessControlException e) {
4917 logAuditEvent(false, "listStatus", src);
4918 throw e;
4919 }
4920 }
4921
4922 private DirectoryListing getListingInt(final String srcArg, byte[] startAfter,
4923 boolean needLocation)
4924 throws AccessControlException, UnresolvedLinkException, IOException {
4925 String src = srcArg;
4926 DirectoryListing dl;
4927 FSPermissionChecker pc = getPermissionChecker();
4928 checkOperation(OperationCategory.READ);
4929 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
4930 String startAfterString = new String(startAfter);
4931 readLock();
4932 try {
4933 checkOperation(OperationCategory.READ);
4934 src = resolvePath(src, pathComponents);
4935
4936 // Get file name when startAfter is an INodePath
4937 if (FSDirectory.isReservedName(startAfterString)) {
4938 byte[][] startAfterComponents = FSDirectory
4939 .getPathComponentsForReservedPath(startAfterString);
4940 try {
4941 String tmp = FSDirectory.resolvePath(src, startAfterComponents, dir);
4942 byte[][] regularPath = INode.getPathComponents(tmp);
4943 startAfter = regularPath[regularPath.length - 1];
4944 } catch (IOException e) {
4945 // Possibly the inode is deleted
4946 throw new DirectoryListingStartAfterNotFoundException(
4947 "Can't find startAfter " + startAfterString);
4948 }
4949 }
4950
4951 boolean isSuperUser = true;
4952 if (isPermissionEnabled) {
4953 if (dir.isDir(src)) {
4954 checkPathAccess(pc, src, FsAction.READ_EXECUTE);
4955 } else {
4956 checkTraverse(pc, src);
4957 }
4958 isSuperUser = pc.isSuperUser();
4959 }
4960 logAuditEvent(true, "listStatus", srcArg);
4961 dl = dir.getListing(src, startAfter, needLocation, isSuperUser);
4962 } finally {
4963 readUnlock();
4964 }
4965 return dl;
4966 }
4967
4968 /////////////////////////////////////////////////////////
4969 //
4970 // These methods are called by datanodes
4971 //
4972 /////////////////////////////////////////////////////////
4973 /**
4974 * Register Datanode.
4975 * <p>
4976 * The purpose of registration is to identify whether the new datanode
4977 * serves a new data storage, and will report new data block copies,
4978 * which the namenode was not aware of; or the datanode is a replacement
4979 * node for the data storage that was previously served by a different
4980 * or the same (in terms of host:port) datanode.
4981 * The data storages are distinguished by their storageIDs. When a new
4982 * data storage is reported the namenode issues a new unique storageID.
4983 * <p>
4984 * Finally, the namenode returns its namespaceID as the registrationID
4985 * for the datanodes.
4986 * namespaceID is a persistent attribute of the name space.
4987 * The registrationID is checked every time the datanode is communicating
4988 * with the namenode.
4989 * Datanodes with inappropriate registrationID are rejected.
4990 * If the namenode stops, and then restarts it can restore its
4991 * namespaceID and will continue serving the datanodes that has previously
4992 * registered with the namenode without restarting the whole cluster.
4993 *
4994 * @see org.apache.hadoop.hdfs.server.datanode.DataNode
4995 */
4996 void registerDatanode(DatanodeRegistration nodeReg) throws IOException {
4997 writeLock();
4998 try {
4999 getBlockManager().getDatanodeManager().registerDatanode(nodeReg);
5000 checkSafeMode();
5001 } finally {
5002 writeUnlock();
5003 }
5004 }
5005
5006 /**
5007 * Get registrationID for datanodes based on the namespaceID.
5008 *
5009 * @see #registerDatanode(DatanodeRegistration)
5010 * @return registration ID
5011 */
5012 String getRegistrationID() {
5013 return Storage.getRegistrationID(getFSImage().getStorage());
5014 }
5015
5016 /**
5017 * The given node has reported in. This method should:
5018 * 1) Record the heartbeat, so the datanode isn't timed out
5019 * 2) Adjust usage stats for future block allocation
5020 *
5021 * If a substantial amount of time passed since the last datanode
5022 * heartbeat then request an immediate block report.
5023 *
5024 * @return an array of datanode commands
5025 * @throws IOException
5026 */
5027 HeartbeatResponse handleHeartbeat(DatanodeRegistration nodeReg,
5028 StorageReport[] reports, long cacheCapacity, long cacheUsed,
5029 int xceiverCount, int xmitsInProgress, int failedVolumes)
5030 throws IOException {
5031 readLock();
5032 try {
5033 //get datanode commands
5034 final int maxTransfer = blockManager.getMaxReplicationStreams()
5035 - xmitsInProgress;
5036 DatanodeCommand[] cmds = blockManager.getDatanodeManager().handleHeartbeat(
5037 nodeReg, reports, blockPoolId, cacheCapacity, cacheUsed,
5038 xceiverCount, maxTransfer, failedVolumes);
5039
5040 //create ha status
5041 final NNHAStatusHeartbeat haState = new NNHAStatusHeartbeat(
5042 haContext.getState().getServiceState(),
5043 getFSImage().getLastAppliedOrWrittenTxId());
5044
5045 return new HeartbeatResponse(cmds, haState, rollingUpgradeInfo);
5046 } finally {
5047 readUnlock();
5048 }
5049 }
5050
5051 /**
5052 * Returns whether or not there were available resources at the last check of
5053 * resources.
5054 *
5055 * @return true if there were sufficient resources available, false otherwise.
5056 */
5057 boolean nameNodeHasResourcesAvailable() {
5058 return hasResourcesAvailable;
5059 }
5060
5061 /**
5062 * Perform resource checks and cache the results.
5063 */
5064 void checkAvailableResources() {
5065 Preconditions.checkState(nnResourceChecker != null,
5066 "nnResourceChecker not initialized");
5067 hasResourcesAvailable = nnResourceChecker.hasAvailableDiskSpace();
5068 }
5069
5070 /**
5071 * Persist the block list for the inode.
5072 * @param path
5073 * @param file
5074 * @param logRetryCache
5075 */
5076 private void persistBlocks(String path, INodeFile file,
5077 boolean logRetryCache) {
5078 assert hasWriteLock();
5079 Preconditions.checkArgument(file.isUnderConstruction());
5080 getEditLog().logUpdateBlocks(path, file, logRetryCache);
5081 if(NameNode.stateChangeLog.isDebugEnabled()) {
5082 NameNode.stateChangeLog.debug("persistBlocks: " + path
5083 + " with " + file.getBlocks().length + " blocks is persisted to" +
5084 " the file system");
5085 }
5086 }
5087
5088 void incrDeletedFileCount(long count) {
5089 NameNode.getNameNodeMetrics().incrFilesDeleted(count);
5090 }
5091
5092 /**
5093 * Close file.
5094 * @param path
5095 * @param file
5096 */
5097 private void closeFile(String path, INodeFile file) {
5098 assert hasWriteLock();
5099 waitForLoadingFSImage();
5100 // file is closed
5101 getEditLog().logCloseFile(path, file);
5102 if (NameNode.stateChangeLog.isDebugEnabled()) {
5103 NameNode.stateChangeLog.debug("closeFile: "
5104 +path+" with "+ file.getBlocks().length
5105 +" blocks is persisted to the file system");
5106 }
5107 }
5108
5109 /**
5110 * Add the given symbolic link to the fs. Record it in the edits log.
5111 */
5112 private INodeSymlink addSymlink(String path, String target,
5113 PermissionStatus dirPerms,
5114 boolean createParent, boolean logRetryCache)
5115 throws UnresolvedLinkException, FileAlreadyExistsException,
5116 QuotaExceededException, SnapshotAccessControlException, AclException {
5117 waitForLoadingFSImage();
5118
5119 final long modTime = now();
5120 if (createParent) {
5121 final String parent = new Path(path).getParent().toString();
5122 if (!mkdirsRecursively(parent, dirPerms, true, modTime)) {
5123 return null;
5124 }
5125 }
5126 final String userName = dirPerms.getUserName();
5127 long id = allocateNewInodeId();
5128 INodeSymlink newNode = dir.addSymlink(id, path, target, modTime, modTime,
5129 new PermissionStatus(userName, null, FsPermission.getDefault()));
5130 if (newNode == null) {
5131 NameNode.stateChangeLog.info("addSymlink: failed to add " + path);
5132 return null;
5133 }
5134 getEditLog().logSymlink(path, target, modTime, modTime, newNode,
5135 logRetryCache);
5136
5137 if(NameNode.stateChangeLog.isDebugEnabled()) {
5138 NameNode.stateChangeLog.debug("addSymlink: " + path + " is added");
5139 }
5140 return newNode;
5141 }
5142
5143 /**
5144 * Periodically calls hasAvailableResources of NameNodeResourceChecker, and if
5145 * there are found to be insufficient resources available, causes the NN to
5146 * enter safe mode. If resources are later found to have returned to
5147 * acceptable levels, this daemon will cause the NN to exit safe mode.
5148 */
5149 class NameNodeResourceMonitor implements Runnable {
5150 boolean shouldNNRmRun = true;
5151 @Override
5152 public void run () {
5153 try {
5154 while (fsRunning && shouldNNRmRun) {
5155 checkAvailableResources();
5156 if(!nameNodeHasResourcesAvailable()) {
5157 String lowResourcesMsg = "NameNode low on available disk space. ";
5158 if (!isInSafeMode()) {
5159 FSNamesystem.LOG.warn(lowResourcesMsg + "Entering safe mode.");
5160 } else {
5161 FSNamesystem.LOG.warn(lowResourcesMsg + "Already in safe mode.");
5162 }
5163 enterSafeMode(true);
5164 }
5165 try {
5166 Thread.sleep(resourceRecheckInterval);
5167 } catch (InterruptedException ie) {
5168 // Deliberately ignore
5169 }
5170 }
5171 } catch (Exception e) {
5172 FSNamesystem.LOG.error("Exception in NameNodeResourceMonitor: ", e);
5173 }
5174 }
5175
5176 public void stopMonitor() {
5177 shouldNNRmRun = false;
5178 }
5179 }
5180
5181 class NameNodeEditLogRoller implements Runnable {
5182
5183 private boolean shouldRun = true;
5184 private final long rollThreshold;
5185 private final long sleepIntervalMs;
5186
5187 public NameNodeEditLogRoller(long rollThreshold, int sleepIntervalMs) {
5188 this.rollThreshold = rollThreshold;
5189 this.sleepIntervalMs = sleepIntervalMs;
5190 }
5191
5192 @Override
5193 public void run() {
5194 while (fsRunning && shouldRun) {
5195 try {
5196 FSEditLog editLog = getFSImage().getEditLog();
5197 long numEdits =
5198 editLog.getLastWrittenTxId() - editLog.getCurSegmentTxId();
5199 if (numEdits > rollThreshold) {
5200 FSNamesystem.LOG.info("NameNode rolling its own edit log because"
5201 + " number of edits in open segment exceeds threshold of "
5202 + rollThreshold);
5203 rollEditLog();
5204 }
5205 Thread.sleep(sleepIntervalMs);
5206 } catch (InterruptedException e) {
5207 FSNamesystem.LOG.info(NameNodeEditLogRoller.class.getSimpleName()
5208 + " was interrupted, exiting");
5209 break;
5210 } catch (Exception e) {
5211 FSNamesystem.LOG.error("Swallowing exception in "
5212 + NameNodeEditLogRoller.class.getSimpleName() + ":", e);
5213 }
5214 }
5215 }
5216
5217 public void stop() {
5218 shouldRun = false;
5219 }
5220 }
5221
5222 /**
5223 * Daemon to periodically scan the namespace for lazyPersist files
5224 * with missing blocks and unlink them.
5225 */
5226 class LazyPersistFileScrubber implements Runnable {
5227 private volatile boolean shouldRun = true;
5228 final int scrubIntervalSec;
5229 public LazyPersistFileScrubber(final int scrubIntervalSec) {
5230 this.scrubIntervalSec = scrubIntervalSec;
5231 }
5232
5233 /**
5234 * Periodically go over the list of lazyPersist files with missing
5235 * blocks and unlink them from the namespace.
5236 */
5237 private void clearCorruptLazyPersistFiles()
5238 throws SafeModeException, AccessControlException,
5239 UnresolvedLinkException, IOException {
5240
5241 BlockStoragePolicy lpPolicy = blockManager.getStoragePolicy("LAZY_PERSIST");
5242
5243 List<BlockCollection> filesToDelete = new ArrayList<BlockCollection>();
5244
5245 writeLock();
5246
5247 try {
5248 final Iterator<Block> it = blockManager.getCorruptReplicaBlockIterator();
5249
5250 while (it.hasNext()) {
5251 Block b = it.next();
5252 BlockInfo blockInfo = blockManager.getStoredBlock(b);
5253 if (blockInfo.getBlockCollection().getStoragePolicyID() == lpPolicy.getId()) {
5254 filesToDelete.add(blockInfo.getBlockCollection());
5255 }
5256 }
5257
5258 for (BlockCollection bc : filesToDelete) {
5259 LOG.warn("Removing lazyPersist file " + bc.getName() + " with no replicas.");
5260 deleteInternal(bc.getName(), false, false, false);
5261 }
5262 } finally {
5263 writeUnlock();
5264 }
5265 }
5266
5267 @Override
5268 public void run() {
5269 while (fsRunning && shouldRun) {
5270 try {
5271 clearCorruptLazyPersistFiles();
5272 Thread.sleep(scrubIntervalSec * 1000);
5273 } catch (InterruptedException e) {
5274 FSNamesystem.LOG.info(
5275 "LazyPersistFileScrubber was interrupted, exiting");
5276 break;
5277 } catch (Exception e) {
5278 FSNamesystem.LOG.error(
5279 "Ignoring exception in LazyPersistFileScrubber:", e);
5280 }
5281 }
5282 }
5283
5284 public void stop() {
5285 shouldRun = false;
5286 }
5287 }
5288
5289 public FSImage getFSImage() {
5290 return fsImage;
5291 }
5292
5293 public FSEditLog getEditLog() {
5294 return getFSImage().getEditLog();
5295 }
5296
5297 private void checkBlock(ExtendedBlock block) throws IOException {
5298 if (block != null && !this.blockPoolId.equals(block.getBlockPoolId())) {
5299 throw new IOException("Unexpected BlockPoolId " + block.getBlockPoolId()
5300 + " - expected " + blockPoolId);
5301 }
5302 }
5303
5304 @Metric({"MissingBlocks", "Number of missing blocks"})
5305 public long getMissingBlocksCount() {
5306 // not locking
5307 return blockManager.getMissingBlocksCount();
5308 }
5309
5310 @Metric({"ExpiredHeartbeats", "Number of expired heartbeats"})
5311 public int getExpiredHeartbeats() {
5312 return datanodeStatistics.getExpiredHeartbeats();
5313 }
5314
5315 @Metric({"TransactionsSinceLastCheckpoint",
5316 "Number of transactions since last checkpoint"})
5317 public long getTransactionsSinceLastCheckpoint() {
5318 return getEditLog().getLastWrittenTxId() -
5319 getFSImage().getStorage().getMostRecentCheckpointTxId();
5320 }
5321
5322 @Metric({"TransactionsSinceLastLogRoll",
5323 "Number of transactions since last edit log roll"})
5324 public long getTransactionsSinceLastLogRoll() {
5325 if (isInStandbyState() || !getEditLog().isSegmentOpen()) {
5326 return 0;
5327 } else {
5328 return getEditLog().getLastWrittenTxId() -
5329 getEditLog().getCurSegmentTxId() + 1;
5330 }
5331 }
5332
5333 @Metric({"LastWrittenTransactionId", "Transaction ID written to the edit log"})
5334 public long getLastWrittenTransactionId() {
5335 return getEditLog().getLastWrittenTxId();
5336 }
5337
5338 @Metric({"LastCheckpointTime",
5339 "Time in milliseconds since the epoch of the last checkpoint"})
5340 public long getLastCheckpointTime() {
5341 return getFSImage().getStorage().getMostRecentCheckpointTime();
5342 }
5343
5344 /** @see ClientProtocol#getStats() */
5345 long[] getStats() {
5346 final long[] stats = datanodeStatistics.getStats();
5347 stats[ClientProtocol.GET_STATS_UNDER_REPLICATED_IDX] = getUnderReplicatedBlocks();
5348 stats[ClientProtocol.GET_STATS_CORRUPT_BLOCKS_IDX] = getCorruptReplicaBlocks();
5349 stats[ClientProtocol.GET_STATS_MISSING_BLOCKS_IDX] = getMissingBlocksCount();
5350 return stats;
5351 }
5352
5353 @Override // FSNamesystemMBean
5354 @Metric({"CapacityTotal",
5355 "Total raw capacity of data nodes in bytes"})
5356 public long getCapacityTotal() {
5357 return datanodeStatistics.getCapacityTotal();
5358 }
5359
5360 @Metric({"CapacityTotalGB",
5361 "Total raw capacity of data nodes in GB"})
5362 public float getCapacityTotalGB() {
5363 return DFSUtil.roundBytesToGB(getCapacityTotal());
5364 }
5365
5366 @Override // FSNamesystemMBean
5367 @Metric({"CapacityUsed",
5368 "Total used capacity across all data nodes in bytes"})
5369 public long getCapacityUsed() {
5370 return datanodeStatistics.getCapacityUsed();
5371 }
5372
5373 @Metric({"CapacityUsedGB",
5374 "Total used capacity across all data nodes in GB"})
5375 public float getCapacityUsedGB() {
5376 return DFSUtil.roundBytesToGB(getCapacityUsed());
5377 }
5378
5379 @Override // FSNamesystemMBean
5380 @Metric({"CapacityRemaining", "Remaining capacity in bytes"})
5381 public long getCapacityRemaining() {
5382 return datanodeStatistics.getCapacityRemaining();
5383 }
5384
5385 @Metric({"CapacityRemainingGB", "Remaining capacity in GB"})
5386 public float getCapacityRemainingGB() {
5387 return DFSUtil.roundBytesToGB(getCapacityRemaining());
5388 }
5389
5390 @Metric({"CapacityUsedNonDFS",
5391 "Total space used by data nodes for non DFS purposes in bytes"})
5392 public long getCapacityUsedNonDFS() {
5393 return datanodeStatistics.getCapacityUsedNonDFS();
5394 }
5395
5396 /**
5397 * Total number of connections.
5398 */
5399 @Override // FSNamesystemMBean
5400 @Metric
5401 public int getTotalLoad() {
5402 return datanodeStatistics.getXceiverCount();
5403 }
5404
5405 @Metric({ "SnapshottableDirectories", "Number of snapshottable directories" })
5406 public int getNumSnapshottableDirs() {
5407 return this.snapshotManager.getNumSnapshottableDirs();
5408 }
5409
5410 @Metric({ "Snapshots", "The number of snapshots" })
5411 public int getNumSnapshots() {
5412 return this.snapshotManager.getNumSnapshots();
5413 }
5414
5415 @Override
5416 public String getSnapshotStats() {
5417 Map<String, Object> info = new HashMap<String, Object>();
5418 info.put("SnapshottableDirectories", this.getNumSnapshottableDirs());
5419 info.put("Snapshots", this.getNumSnapshots());
5420 return JSON.toString(info);
5421 }
5422
5423 int getNumberOfDatanodes(DatanodeReportType type) {
5424 readLock();
5425 try {
5426 return getBlockManager().getDatanodeManager().getDatanodeListForReport(
5427 type).size();
5428 } finally {
5429 readUnlock();
5430 }
5431 }
5432
5433 DatanodeInfo[] datanodeReport(final DatanodeReportType type
5434 ) throws AccessControlException, StandbyException {
5435 checkSuperuserPrivilege();
5436 checkOperation(OperationCategory.UNCHECKED);
5437 readLock();
5438 try {
5439 checkOperation(OperationCategory.UNCHECKED);
5440 final DatanodeManager dm = getBlockManager().getDatanodeManager();
5441 final List<DatanodeDescriptor> results = dm.getDatanodeListForReport(type);
5442
5443 DatanodeInfo[] arr = new DatanodeInfo[results.size()];
5444 for (int i=0; i<arr.length; i++) {
5445 arr[i] = new DatanodeInfo(results.get(i));
5446 }
5447 return arr;
5448 } finally {
5449 readUnlock();
5450 }
5451 }
5452
5453 DatanodeStorageReport[] getDatanodeStorageReport(final DatanodeReportType type
5454 ) throws AccessControlException, StandbyException {
5455 checkSuperuserPrivilege();
5456 checkOperation(OperationCategory.UNCHECKED);
5457 readLock();
5458 try {
5459 checkOperation(OperationCategory.UNCHECKED);
5460 final DatanodeManager dm = getBlockManager().getDatanodeManager();
5461 final List<DatanodeDescriptor> datanodes = dm.getDatanodeListForReport(type);
5462
5463 DatanodeStorageReport[] reports = new DatanodeStorageReport[datanodes.size()];
5464 for (int i = 0; i < reports.length; i++) {
5465 final DatanodeDescriptor d = datanodes.get(i);
5466 reports[i] = new DatanodeStorageReport(new DatanodeInfo(d),
5467 d.getStorageReports());
5468 }
5469 return reports;
5470 } finally {
5471 readUnlock();
5472 }
5473 }
5474
5475 /**
5476 * Save namespace image.
5477 * This will save current namespace into fsimage file and empty edits file.
5478 * Requires superuser privilege and safe mode.
5479 *
5480 * @throws AccessControlException if superuser privilege is violated.
5481 * @throws IOException if
5482 */
5483 void saveNamespace() throws AccessControlException, IOException {
5484 checkOperation(OperationCategory.UNCHECKED);
5485 checkSuperuserPrivilege();
5486
5487 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
5488 if (cacheEntry != null && cacheEntry.isSuccess()) {
5489 return; // Return previous response
5490 }
5491 boolean success = false;
5492 readLock();
5493 try {
5494 checkOperation(OperationCategory.UNCHECKED);
5495
5496 if (!isInSafeMode()) {
5497 throw new IOException("Safe mode should be turned ON "
5498 + "in order to create namespace image.");
5499 }
5500 getFSImage().saveNamespace(this);
5501 success = true;
5502 } finally {
5503 readUnlock();
5504 RetryCache.setState(cacheEntry, success);
5505 }
5506 LOG.info("New namespace image has been created");
5507 }
5508
5509 /**
5510 * Enables/Disables/Checks restoring failed storage replicas if the storage becomes available again.
5511 * Requires superuser privilege.
5512 *
5513 * @throws AccessControlException if superuser privilege is violated.
5514 */
5515 boolean restoreFailedStorage(String arg) throws AccessControlException,
5516 StandbyException {
5517 checkSuperuserPrivilege();
5518 checkOperation(OperationCategory.UNCHECKED);
5519 writeLock();
5520 try {
5521 checkOperation(OperationCategory.UNCHECKED);
5522
5523 // if it is disabled - enable it and vice versa.
5524 if(arg.equals("check"))
5525 return getFSImage().getStorage().getRestoreFailedStorage();
5526
5527 boolean val = arg.equals("true"); // false if not
5528 getFSImage().getStorage().setRestoreFailedStorage(val);
5529
5530 return val;
5531 } finally {
5532 writeUnlock();
5533 }
5534 }
5535
5536 Date getStartTime() {
5537 return new Date(startTime);
5538 }
5539
5540 void finalizeUpgrade() throws IOException {
5541 checkSuperuserPrivilege();
5542 checkOperation(OperationCategory.UNCHECKED);
5543 writeLock();
5544 try {
5545 checkOperation(OperationCategory.UNCHECKED);
5546 getFSImage().finalizeUpgrade(this.isHaEnabled() && inActiveState());
5547 } finally {
5548 writeUnlock();
5549 }
5550 }
5551
5552 void refreshNodes() throws IOException {
5553 checkOperation(OperationCategory.UNCHECKED);
5554 checkSuperuserPrivilege();
5555 getBlockManager().getDatanodeManager().refreshNodes(new HdfsConfiguration());
5556 }
5557
5558 void setBalancerBandwidth(long bandwidth) throws IOException {
5559 checkOperation(OperationCategory.UNCHECKED);
5560 checkSuperuserPrivilege();
5561 getBlockManager().getDatanodeManager().setBalancerBandwidth(bandwidth);
5562 }
5563
5564 /**
5565 * Persist the new block (the last block of the given file).
5566 * @param path
5567 * @param file
5568 */
5569 private void persistNewBlock(String path, INodeFile file) {
5570 Preconditions.checkArgument(file.isUnderConstruction());
5571 getEditLog().logAddBlock(path, file);
5572 if (NameNode.stateChangeLog.isDebugEnabled()) {
5573 NameNode.stateChangeLog.debug("persistNewBlock: "
5574 + path + " with new block " + file.getLastBlock().toString()
5575 + ", current total block count is " + file.getBlocks().length);
5576 }
5577 }
5578
5579 /**
5580 * SafeModeInfo contains information related to the safe mode.
5581 * <p>
5582 * An instance of {@link SafeModeInfo} is created when the name node
5583 * enters safe mode.
5584 * <p>
5585 * During name node startup {@link SafeModeInfo} counts the number of
5586 * <em>safe blocks</em>, those that have at least the minimal number of
5587 * replicas, and calculates the ratio of safe blocks to the total number
5588 * of blocks in the system, which is the size of blocks in
5589 * {@link FSNamesystem#blockManager}. When the ratio reaches the
5590 * {@link #threshold} it starts the SafeModeMonitor daemon in order
5591 * to monitor whether the safe mode {@link #extension} is passed.
5592 * Then it leaves safe mode and destroys itself.
5593 * <p>
5594 * If safe mode is turned on manually then the number of safe blocks is
5595 * not tracked because the name node is not intended to leave safe mode
5596 * automatically in the case.
5597 *
5598 * @see ClientProtocol#setSafeMode(HdfsConstants.SafeModeAction, boolean)
5599 */
5600 public class SafeModeInfo {
5601 // configuration fields
5602 /** Safe mode threshold condition %.*/
5603 private final double threshold;
5604 /** Safe mode minimum number of datanodes alive */
5605 private final int datanodeThreshold;
5606 /**
5607 * Safe mode extension after the threshold.
5608 * Make it volatile so that getSafeModeTip can read the latest value
5609 * without taking a lock.
5610 */
5611 private volatile int extension;
5612 /** Min replication required by safe mode. */
5613 private final int safeReplication;
5614 /** threshold for populating needed replication queues */
5615 private final double replQueueThreshold;
5616 // internal fields
5617 /** Time when threshold was reached.
5618 * <br> -1 safe mode is off
5619 * <br> 0 safe mode is on, and threshold is not reached yet
5620 * <br> >0 safe mode is on, but we are in extension period
5621 */
5622 private long reached = -1;
5623 /** Total number of blocks. */
5624 int blockTotal;
5625 /** Number of safe blocks. */
5626 int blockSafe;
5627 /** Number of blocks needed to satisfy safe mode threshold condition */
5628 private int blockThreshold;
5629 /** Number of blocks needed before populating replication queues */
5630 private int blockReplQueueThreshold;
5631 /** time of the last status printout */
5632 private long lastStatusReport = 0;
5633 /**
5634 * Was safemode entered automatically because available resources were low.
5635 * Make it volatile so that getSafeModeTip can read the latest value
5636 * without taking a lock.
5637 */
5638 private volatile boolean resourcesLow = false;
5639 /** Should safemode adjust its block totals as blocks come in */
5640 private boolean shouldIncrementallyTrackBlocks = false;
5641 /** counter for tracking startup progress of reported blocks */
5642 private Counter awaitingReportedBlocksCounter;
5643
5644 /**
5645 * Creates SafeModeInfo when the name node enters
5646 * automatic safe mode at startup.
5647 *
5648 * @param conf configuration
5649 */
5650 private SafeModeInfo(Configuration conf) {
5651 this.threshold = conf.getFloat(DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY,
5652 DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT);
5653 if(threshold > 1.0) {
5654 LOG.warn("The threshold value should't be greater than 1, threshold: " + threshold);
5655 }
5656 this.datanodeThreshold = conf.getInt(
5657 DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY,
5658 DFS_NAMENODE_SAFEMODE_MIN_DATANODES_DEFAULT);
5659 this.extension = conf.getInt(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 0);
5660 this.safeReplication = conf.getInt(DFS_NAMENODE_REPLICATION_MIN_KEY,
5661 DFS_NAMENODE_REPLICATION_MIN_DEFAULT);
5662
5663 LOG.info(DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY + " = " + threshold);
5664 LOG.info(DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY + " = " + datanodeThreshold);
5665 LOG.info(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY + " = " + extension);
5666
5667 // default to safe mode threshold (i.e., don't populate queues before leaving safe mode)
5668 this.replQueueThreshold =
5669 conf.getFloat(DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY,
5670 (float) threshold);
5671 this.blockTotal = 0;
5672 this.blockSafe = 0;
5673 }
5674
5675 /**
5676 * In the HA case, the StandbyNode can be in safemode while the namespace
5677 * is modified by the edit log tailer. In this case, the number of total
5678 * blocks changes as edits are processed (eg blocks are added and deleted).
5679 * However, we don't want to do the incremental tracking during the
5680 * startup-time loading process -- only once the initial total has been
5681 * set after the image has been loaded.
5682 */
5683 private boolean shouldIncrementallyTrackBlocks() {
5684 return shouldIncrementallyTrackBlocks;
5685 }
5686
5687 /**
5688 * Creates SafeModeInfo when safe mode is entered manually, or because
5689 * available resources are low.
5690 *
5691 * The {@link #threshold} is set to 1.5 so that it could never be reached.
5692 * {@link #blockTotal} is set to -1 to indicate that safe mode is manual.
5693 *
5694 * @see SafeModeInfo
5695 */
5696 private SafeModeInfo(boolean resourcesLow) {
5697 this.threshold = 1.5f; // this threshold can never be reached
5698 this.datanodeThreshold = Integer.MAX_VALUE;
5699 this.extension = Integer.MAX_VALUE;
5700 this.safeReplication = Short.MAX_VALUE + 1; // more than maxReplication
5701 this.replQueueThreshold = 1.5f; // can never be reached
5702 this.blockTotal = -1;
5703 this.blockSafe = -1;
5704 this.resourcesLow = resourcesLow;
5705 enter();
5706 reportStatus("STATE* Safe mode is ON.", true);
5707 }
5708
5709 /**
5710 * Check if safe mode is on.
5711 * @return true if in safe mode
5712 */
5713 private synchronized boolean isOn() {
5714 doConsistencyCheck();
5715 return this.reached >= 0;
5716 }
5717
5718 /**
5719 * Enter safe mode.
5720 */
5721 private void enter() {
5722 this.reached = 0;
5723 }
5724
5725 /**
5726 * Leave safe mode.
5727 * <p>
5728 * Check for invalid, under- & over-replicated blocks in the end of startup.
5729 */
5730 private synchronized void leave() {
5731 // if not done yet, initialize replication queues.
5732 // In the standby, do not populate repl queues
5733 if (!isPopulatingReplQueues() && shouldPopulateReplQueues()) {
5734 initializeReplQueues();
5735 }
5736 long timeInSafemode = now() - startTime;
5737 NameNode.stateChangeLog.info("STATE* Leaving safe mode after "
5738 + timeInSafemode/1000 + " secs");
5739 NameNode.getNameNodeMetrics().setSafeModeTime((int) timeInSafemode);
5740
5741 //Log the following only once (when transitioning from ON -> OFF)
5742 if (reached >= 0) {
5743 NameNode.stateChangeLog.info("STATE* Safe mode is OFF");
5744 }
5745 reached = -1;
5746 safeMode = null;
5747 final NetworkTopology nt = blockManager.getDatanodeManager().getNetworkTopology();
5748 NameNode.stateChangeLog.info("STATE* Network topology has "
5749 + nt.getNumOfRacks() + " racks and "
5750 + nt.getNumOfLeaves() + " datanodes");
5751 NameNode.stateChangeLog.info("STATE* UnderReplicatedBlocks has "
5752 + blockManager.numOfUnderReplicatedBlocks() + " blocks");
5753
5754 startSecretManagerIfNecessary();
5755
5756 // If startup has not yet completed, end safemode phase.
5757 StartupProgress prog = NameNode.getStartupProgress();
5758 if (prog.getStatus(Phase.SAFEMODE) != Status.COMPLETE) {
5759 prog.endStep(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS);
5760 prog.endPhase(Phase.SAFEMODE);
5761 }
5762 }
5763
5764 /**
5765 * Check whether we have reached the threshold for
5766 * initializing replication queues.
5767 */
5768 private synchronized boolean canInitializeReplQueues() {
5769 return shouldPopulateReplQueues()
5770 && blockSafe >= blockReplQueueThreshold;
5771 }
5772
5773 /**
5774 * Safe mode can be turned off iff
5775 * the threshold is reached and
5776 * the extension time have passed.
5777 * @return true if can leave or false otherwise.
5778 */
5779 private synchronized boolean canLeave() {
5780 if (reached == 0) {
5781 return false;
5782 }
5783
5784 if (now() - reached < extension) {
5785 reportStatus("STATE* Safe mode ON, in safe mode extension.", false);
5786 return false;
5787 }
5788
5789 if (needEnter()) {
5790 reportStatus("STATE* Safe mode ON, thresholds not met.", false);
5791 return false;
5792 }
5793
5794 return true;
5795 }
5796
5797 /**
5798 * There is no need to enter safe mode
5799 * if DFS is empty or {@link #threshold} == 0
5800 */
5801 private boolean needEnter() {
5802 return (threshold != 0 && blockSafe < blockThreshold) ||
5803 (datanodeThreshold != 0 && getNumLiveDataNodes() < datanodeThreshold) ||
5804 (!nameNodeHasResourcesAvailable());
5805 }
5806
5807 /**
5808 * Check and trigger safe mode if needed.
5809 */
5810 private void checkMode() {
5811 // Have to have write-lock since leaving safemode initializes
5812 // repl queues, which requires write lock
5813 assert hasWriteLock();
5814 if (inTransitionToActive()) {
5815 return;
5816 }
5817 // if smmthread is already running, the block threshold must have been
5818 // reached before, there is no need to enter the safe mode again
5819 if (smmthread == null && needEnter()) {
5820 enter();
5821 // check if we are ready to initialize replication queues
5822 if (canInitializeReplQueues() && !isPopulatingReplQueues()
5823 && !haEnabled) {
5824 initializeReplQueues();
5825 }
5826 reportStatus("STATE* Safe mode ON.", false);
5827 return;
5828 }
5829 // the threshold is reached or was reached before
5830 if (!isOn() || // safe mode is off
5831 extension <= 0 || threshold <= 0) { // don't need to wait
5832 this.leave(); // leave safe mode
5833 return;
5834 }
5835 if (reached > 0) { // threshold has already been reached before
5836 reportStatus("STATE* Safe mode ON.", false);
5837 return;
5838 }
5839 // start monitor
5840 reached = now();
5841 if (smmthread == null) {
5842 smmthread = new Daemon(new SafeModeMonitor());
5843 smmthread.start();
5844 reportStatus("STATE* Safe mode extension entered.", true);
5845 }
5846
5847 // check if we are ready to initialize replication queues
5848 if (canInitializeReplQueues() && !isPopulatingReplQueues() && !haEnabled) {
5849 initializeReplQueues();
5850 }
5851 }
5852
5853 /**
5854 * Set total number of blocks.
5855 */
5856 private synchronized void setBlockTotal(int total) {
5857 this.blockTotal = total;
5858 this.blockThreshold = (int) (blockTotal * threshold);
5859 this.blockReplQueueThreshold =
5860 (int) (blockTotal * replQueueThreshold);
5861 if (haEnabled) {
5862 // After we initialize the block count, any further namespace
5863 // modifications done while in safe mode need to keep track
5864 // of the number of total blocks in the system.
5865 this.shouldIncrementallyTrackBlocks = true;
5866 }
5867 if(blockSafe < 0)
5868 this.blockSafe = 0;
5869 checkMode();
5870 }
5871
5872 /**
5873 * Increment number of safe blocks if current block has
5874 * reached minimal replication.
5875 * @param replication current replication
5876 */
5877 private synchronized void incrementSafeBlockCount(short replication) {
5878 if (replication == safeReplication) {
5879 this.blockSafe++;
5880
5881 // Report startup progress only if we haven't completed startup yet.
5882 StartupProgress prog = NameNode.getStartupProgress();
5883 if (prog.getStatus(Phase.SAFEMODE) != Status.COMPLETE) {
5884 if (this.awaitingReportedBlocksCounter == null) {
5885 this.awaitingReportedBlocksCounter = prog.getCounter(Phase.SAFEMODE,
5886 STEP_AWAITING_REPORTED_BLOCKS);
5887 }
5888 this.awaitingReportedBlocksCounter.increment();
5889 }
5890
5891 checkMode();
5892 }
5893 }
5894
5895 /**
5896 * Decrement number of safe blocks if current block has
5897 * fallen below minimal replication.
5898 * @param replication current replication
5899 */
5900 private synchronized void decrementSafeBlockCount(short replication) {
5901 if (replication == safeReplication-1) {
5902 this.blockSafe--;
5903 //blockSafe is set to -1 in manual / low resources safemode
5904 assert blockSafe >= 0 || isManual() || areResourcesLow();
5905 checkMode();
5906 }
5907 }
5908
5909 /**
5910 * Check if safe mode was entered manually
5911 */
5912 private boolean isManual() {
5913 return extension == Integer.MAX_VALUE;
5914 }
5915
5916 /**
5917 * Set manual safe mode.
5918 */
5919 private synchronized void setManual() {
5920 extension = Integer.MAX_VALUE;
5921 }
5922
5923 /**
5924 * Check if safe mode was entered due to resources being low.
5925 */
5926 private boolean areResourcesLow() {
5927 return resourcesLow;
5928 }
5929
5930 /**
5931 * Set that resources are low for this instance of safe mode.
5932 */
5933 private void setResourcesLow() {
5934 resourcesLow = true;
5935 }
5936
5937 /**
5938 * A tip on how safe mode is to be turned off: manually or automatically.
5939 */
5940 String getTurnOffTip() {
5941 if(!isOn()) {
5942 return "Safe mode is OFF.";
5943 }
5944
5945 //Manual OR low-resource safemode. (Admin intervention required)
5946 String adminMsg = "It was turned on manually. ";
5947 if (areResourcesLow()) {
5948 adminMsg = "Resources are low on NN. Please add or free up more "
5949 + "resources then turn off safe mode manually. NOTE: If you turn off"
5950 + " safe mode before adding resources, "
5951 + "the NN will immediately return to safe mode. ";
5952 }
5953 if (isManual() || areResourcesLow()) {
5954 return adminMsg
5955 + "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off.";
5956 }
5957
5958 boolean thresholdsMet = true;
5959 int numLive = getNumLiveDataNodes();
5960 String msg = "";
5961 if (blockSafe < blockThreshold) {
5962 msg += String.format(
5963 "The reported blocks %d needs additional %d"
5964 + " blocks to reach the threshold %.4f of total blocks %d.%n",
5965 blockSafe, (blockThreshold - blockSafe) + 1, threshold, blockTotal);
5966 thresholdsMet = false;
5967 } else {
5968 msg += String.format("The reported blocks %d has reached the threshold"
5969 + " %.4f of total blocks %d. ", blockSafe, threshold, blockTotal);
5970 }
5971 if (numLive < datanodeThreshold) {
5972 msg += String.format(
5973 "The number of live datanodes %d needs an additional %d live "
5974 + "datanodes to reach the minimum number %d.%n",
5975 numLive, (datanodeThreshold - numLive), datanodeThreshold);
5976 thresholdsMet = false;
5977 } else {
5978 msg += String.format("The number of live datanodes %d has reached "
5979 + "the minimum number %d. ",
5980 numLive, datanodeThreshold);
5981 }
5982 msg += (reached > 0) ? "In safe mode extension. " : "";
5983 msg += "Safe mode will be turned off automatically ";
5984
5985 if (!thresholdsMet) {
5986 msg += "once the thresholds have been reached.";
5987 } else if (reached + extension - now() > 0) {
5988 msg += ("in " + (reached + extension - now()) / 1000 + " seconds.");
5989 } else {
5990 msg += "soon.";
5991 }
5992
5993 return msg;
5994 }
5995
5996 /**
5997 * Print status every 20 seconds.
5998 */
5999 private void reportStatus(String msg, boolean rightNow) {
6000 long curTime = now();
6001 if(!rightNow && (curTime - lastStatusReport < 20 * 1000))
6002 return;
6003 NameNode.stateChangeLog.info(msg + " \n" + getTurnOffTip());
6004 lastStatusReport = curTime;
6005 }
6006
6007 @Override
6008 public String toString() {
6009 String resText = "Current safe blocks = "
6010 + blockSafe
6011 + ". Target blocks = " + blockThreshold + " for threshold = %" + threshold
6012 + ". Minimal replication = " + safeReplication + ".";
6013 if (reached > 0)
6014 resText += " Threshold was reached " + new Date(reached) + ".";
6015 return resText;
6016 }
6017
6018 /**
6019 * Checks consistency of the class state.
6020 * This is costly so only runs if asserts are enabled.
6021 */
6022 private void doConsistencyCheck() {
6023 boolean assertsOn = false;
6024 assert assertsOn = true; // set to true if asserts are on
6025 if (!assertsOn) return;
6026
6027 if (blockTotal == -1 && blockSafe == -1) {
6028 return; // manual safe mode
6029 }
6030 int activeBlocks = blockManager.getActiveBlockCount();
6031 if ((blockTotal != activeBlocks) &&
6032 !(blockSafe >= 0 && blockSafe <= blockTotal)) {
6033 throw new AssertionError(
6034 " SafeMode: Inconsistent filesystem state: "
6035 + "SafeMode data: blockTotal=" + blockTotal
6036 + " blockSafe=" + blockSafe + "; "
6037 + "BlockManager data: active=" + activeBlocks);
6038 }
6039 }
6040
6041 private synchronized void adjustBlockTotals(int deltaSafe, int deltaTotal) {
6042 if (!shouldIncrementallyTrackBlocks) {
6043 return;
6044 }
6045 assert haEnabled;
6046
6047 if (LOG.isDebugEnabled()) {
6048 LOG.debug("Adjusting block totals from " +
6049 blockSafe + "/" + blockTotal + " to " +
6050 (blockSafe + deltaSafe) + "/" + (blockTotal + deltaTotal));
6051 }
6052 assert blockSafe + deltaSafe >= 0 : "Can't reduce blockSafe " +
6053 blockSafe + " by " + deltaSafe + ": would be negative";
6054 assert blockTotal + deltaTotal >= 0 : "Can't reduce blockTotal " +
6055 blockTotal + " by " + deltaTotal + ": would be negative";
6056
6057 blockSafe += deltaSafe;
6058 setBlockTotal(blockTotal + deltaTotal);
6059 }
6060 }
6061
6062 /**
6063 * Periodically check whether it is time to leave safe mode.
6064 * This thread starts when the threshold level is reached.
6065 *
6066 */
6067 class SafeModeMonitor implements Runnable {
6068 /** interval in msec for checking safe mode: {@value} */
6069 private static final long recheckInterval = 1000;
6070
6071 /**
6072 */
6073 @Override
6074 public void run() {
6075 while (fsRunning) {
6076 writeLock();
6077 try {
6078 if (safeMode == null) { // Not in safe mode.
6079 break;
6080 }
6081 if (safeMode.canLeave()) {
6082 // Leave safe mode.
6083 safeMode.leave();
6084 smmthread = null;
6085 break;
6086 }
6087 } finally {
6088 writeUnlock();
6089 }
6090
6091 try {
6092 Thread.sleep(recheckInterval);
6093 } catch (InterruptedException ie) {
6094 // Ignored
6095 }
6096 }
6097 if (!fsRunning) {
6098 LOG.info("NameNode is being shutdown, exit SafeModeMonitor thread");
6099 }
6100 }
6101 }
6102
6103 boolean setSafeMode(SafeModeAction action) throws IOException {
6104 if (action != SafeModeAction.SAFEMODE_GET) {
6105 checkSuperuserPrivilege();
6106 switch(action) {
6107 case SAFEMODE_LEAVE: // leave safe mode
6108 leaveSafeMode();
6109 break;
6110 case SAFEMODE_ENTER: // enter safe mode
6111 enterSafeMode(false);
6112 break;
6113 default:
6114 LOG.error("Unexpected safe mode action");
6115 }
6116 }
6117 return isInSafeMode();
6118 }
6119
6120 @Override
6121 public void checkSafeMode() {
6122 // safeMode is volatile, and may be set to null at any time
6123 SafeModeInfo safeMode = this.safeMode;
6124 if (safeMode != null) {
6125 safeMode.checkMode();
6126 }
6127 }
6128
6129 @Override
6130 public boolean isInSafeMode() {
6131 // safeMode is volatile, and may be set to null at any time
6132 SafeModeInfo safeMode = this.safeMode;
6133 if (safeMode == null)
6134 return false;
6135 return safeMode.isOn();
6136 }
6137
6138 @Override
6139 public boolean isInStartupSafeMode() {
6140 // safeMode is volatile, and may be set to null at any time
6141 SafeModeInfo safeMode = this.safeMode;
6142 if (safeMode == null)
6143 return false;
6144 // If the NN is in safemode, and not due to manual / low resources, we
6145 // assume it must be because of startup. If the NN had low resources during
6146 // startup, we assume it came out of startup safemode and it is now in low
6147 // resources safemode
6148 return !safeMode.isManual() && !safeMode.areResourcesLow()
6149 && safeMode.isOn();
6150 }
6151
6152 /**
6153 * Check if replication queues are to be populated
6154 * @return true when node is HAState.Active and not in the very first safemode
6155 */
6156 @Override
6157 public boolean isPopulatingReplQueues() {
6158 if (!shouldPopulateReplQueues()) {
6159 return false;
6160 }
6161 return initializedReplQueues;
6162 }
6163
6164 private boolean shouldPopulateReplQueues() {
6165 if(haContext == null || haContext.getState() == null)
6166 return false;
6167 return haContext.getState().shouldPopulateReplQueues();
6168 }
6169
6170 @Override
6171 public void incrementSafeBlockCount(int replication) {
6172 // safeMode is volatile, and may be set to null at any time
6173 SafeModeInfo safeMode = this.safeMode;
6174 if (safeMode == null)
6175 return;
6176 safeMode.incrementSafeBlockCount((short)replication);
6177 }
6178
6179 @Override
6180 public void decrementSafeBlockCount(Block b) {
6181 // safeMode is volatile, and may be set to null at any time
6182 SafeModeInfo safeMode = this.safeMode;
6183 if (safeMode == null) // mostly true
6184 return;
6185 BlockInfo storedBlock = getStoredBlock(b);
6186 if (storedBlock.isComplete()) {
6187 safeMode.decrementSafeBlockCount((short)blockManager.countNodes(b).liveReplicas());
6188 }
6189 }
6190
6191 /**
6192 * Adjust the total number of blocks safe and expected during safe mode.
6193 * If safe mode is not currently on, this is a no-op.
6194 * @param deltaSafe the change in number of safe blocks
6195 * @param deltaTotal the change i nnumber of total blocks expected
6196 */
6197 @Override
6198 public void adjustSafeModeBlockTotals(int deltaSafe, int deltaTotal) {
6199 // safeMode is volatile, and may be set to null at any time
6200 SafeModeInfo safeMode = this.safeMode;
6201 if (safeMode == null)
6202 return;
6203 safeMode.adjustBlockTotals(deltaSafe, deltaTotal);
6204 }
6205
6206 /**
6207 * Set the total number of blocks in the system.
6208 */
6209 public void setBlockTotal() {
6210 // safeMode is volatile, and may be set to null at any time
6211 SafeModeInfo safeMode = this.safeMode;
6212 if (safeMode == null)
6213 return;
6214 safeMode.setBlockTotal((int)getCompleteBlocksTotal());
6215 }
6216
6217 /**
6218 * Get the total number of blocks in the system.
6219 */
6220 @Override // FSNamesystemMBean
6221 @Metric
6222 public long getBlocksTotal() {
6223 return blockManager.getTotalBlocks();
6224 }
6225
6226 /**
6227 * Get the total number of COMPLETE blocks in the system.
6228 * For safe mode only complete blocks are counted.
6229 */
6230 private long getCompleteBlocksTotal() {
6231 // Calculate number of blocks under construction
6232 long numUCBlocks = 0;
6233 readLock();
6234 try {
6235 for (Lease lease : leaseManager.getSortedLeases()) {
6236 for (String path : lease.getPaths()) {
6237 final INodeFile cons;
6238 try {
6239 cons = dir.getINode(path).asFile();
6240 Preconditions.checkState(cons.isUnderConstruction());
6241 } catch (UnresolvedLinkException e) {
6242 throw new AssertionError("Lease files should reside on this FS");
6243 }
6244 BlockInfo[] blocks = cons.getBlocks();
6245 if(blocks == null)
6246 continue;
6247 for(BlockInfo b : blocks) {
6248 if(!b.isComplete())
6249 numUCBlocks++;
6250 }
6251 }
6252 }
6253 LOG.info("Number of blocks under construction: " + numUCBlocks);
6254 return getBlocksTotal() - numUCBlocks;
6255 } finally {
6256 readUnlock();
6257 }
6258 }
6259
6260 /**
6261 * Enter safe mode. If resourcesLow is false, then we assume it is manual
6262 * @throws IOException
6263 */
6264 void enterSafeMode(boolean resourcesLow) throws IOException {
6265 writeLock();
6266 try {
6267 // Stop the secret manager, since rolling the master key would
6268 // try to write to the edit log
6269 stopSecretManager();
6270
6271 // Ensure that any concurrent operations have been fully synced
6272 // before entering safe mode. This ensures that the FSImage
6273 // is entirely stable on disk as soon as we're in safe mode.
6274 boolean isEditlogOpenForWrite = getEditLog().isOpenForWrite();
6275 // Before Editlog is in OpenForWrite mode, editLogStream will be null. So,
6276 // logSyncAll call can be called only when Edlitlog is in OpenForWrite mode
6277 if (isEditlogOpenForWrite) {
6278 getEditLog().logSyncAll();
6279 }
6280 if (!isInSafeMode()) {
6281 safeMode = new SafeModeInfo(resourcesLow);
6282 return;
6283 }
6284 if (resourcesLow) {
6285 safeMode.setResourcesLow();
6286 } else {
6287 safeMode.setManual();
6288 }
6289 if (isEditlogOpenForWrite) {
6290 getEditLog().logSyncAll();
6291 }
6292 NameNode.stateChangeLog.info("STATE* Safe mode is ON"
6293 + safeMode.getTurnOffTip());
6294 } finally {
6295 writeUnlock();
6296 }
6297 }
6298
6299 /**
6300 * Leave safe mode.
6301 */
6302 void leaveSafeMode() {
6303 writeLock();
6304 try {
6305 if (!isInSafeMode()) {
6306 NameNode.stateChangeLog.info("STATE* Safe mode is already OFF");
6307 return;
6308 }
6309 safeMode.leave();
6310 } finally {
6311 writeUnlock();
6312 }
6313 }
6314
6315 String getSafeModeTip() {
6316 // There is no need to take readLock.
6317 // Don't use isInSafeMode as this.safeMode might be set to null.
6318 // after isInSafeMode returns.
6319 boolean inSafeMode;
6320 SafeModeInfo safeMode = this.safeMode;
6321 if (safeMode == null) {
6322 inSafeMode = false;
6323 } else {
6324 inSafeMode = safeMode.isOn();
6325 }
6326
6327 if (!inSafeMode) {
6328 return "";
6329 } else {
6330 return safeMode.getTurnOffTip();
6331 }
6332 }
6333
6334 CheckpointSignature rollEditLog() throws IOException {
6335 checkSuperuserPrivilege();
6336 checkOperation(OperationCategory.JOURNAL);
6337 writeLock();
6338 try {
6339 checkOperation(OperationCategory.JOURNAL);
6340 checkNameNodeSafeMode("Log not rolled");
6341 if (Server.isRpcInvocation()) {
6342 LOG.info("Roll Edit Log from " + Server.getRemoteAddress());
6343 }
6344 return getFSImage().rollEditLog();
6345 } finally {
6346 writeUnlock();
6347 }
6348 }
6349
6350 NamenodeCommand startCheckpoint(NamenodeRegistration backupNode,
6351 NamenodeRegistration activeNamenode) throws IOException {
6352 checkOperation(OperationCategory.CHECKPOINT);
6353 CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
6354 null);
6355 if (cacheEntry != null && cacheEntry.isSuccess()) {
6356 return (NamenodeCommand) cacheEntry.getPayload();
6357 }
6358 writeLock();
6359 NamenodeCommand cmd = null;
6360 try {
6361 checkOperation(OperationCategory.CHECKPOINT);
6362 checkNameNodeSafeMode("Checkpoint not started");
6363
6364 LOG.info("Start checkpoint for " + backupNode.getAddress());
6365 cmd = getFSImage().startCheckpoint(backupNode, activeNamenode);
6366 getEditLog().logSync();
6367 return cmd;
6368 } finally {
6369 writeUnlock();
6370 RetryCache.setState(cacheEntry, cmd != null, cmd);
6371 }
6372 }
6373
6374 public void processIncrementalBlockReport(final DatanodeID nodeID,
6375 final StorageReceivedDeletedBlocks srdb)
6376 throws IOException {
6377 writeLock();
6378 try {
6379 blockManager.processIncrementalBlockReport(nodeID, srdb);
6380 } finally {
6381 writeUnlock();
6382 }
6383 }
6384
6385 void endCheckpoint(NamenodeRegistration registration,
6386 CheckpointSignature sig) throws IOException {
6387 checkOperation(OperationCategory.CHECKPOINT);
6388 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
6389 if (cacheEntry != null && cacheEntry.isSuccess()) {
6390 return; // Return previous response
6391 }
6392 boolean success = false;
6393 readLock();
6394 try {
6395 checkOperation(OperationCategory.CHECKPOINT);
6396
6397 checkNameNodeSafeMode("Checkpoint not ended");
6398 LOG.info("End checkpoint for " + registration.getAddress());
6399 getFSImage().endCheckpoint(sig);
6400 success = true;
6401 } finally {
6402 readUnlock();
6403 RetryCache.setState(cacheEntry, success);
6404 }
6405 }
6406
6407 PermissionStatus createFsOwnerPermissions(FsPermission permission) {
6408 return new PermissionStatus(fsOwner.getShortUserName(), supergroup, permission);
6409 }
6410
6411 private void checkOwner(FSPermissionChecker pc, String path)
6412 throws AccessControlException, UnresolvedLinkException {
6413 checkPermission(pc, path, true, null, null, null, null);
6414 }
6415
6416 private void checkPathAccess(FSPermissionChecker pc,
6417 String path, FsAction access) throws AccessControlException,
6418 UnresolvedLinkException {
6419 checkPermission(pc, path, false, null, null, access, null);
6420 }
6421
6422 private void checkUnreadableBySuperuser(FSPermissionChecker pc,
6423 INode inode, int snapshotId)
6424 throws IOException {
6425 for (XAttr xattr : dir.getXAttrs(inode, snapshotId)) {
6426 if (XAttrHelper.getPrefixName(xattr).
6427 equals(SECURITY_XATTR_UNREADABLE_BY_SUPERUSER)) {
6428 if (pc.isSuperUser()) {
6429 throw new AccessControlException("Access is denied for " +
6430 pc.getUser() + " since the superuser is not allowed to " +
6431 "perform this operation.");
6432 }
6433 }
6434 }
6435 }
6436
6437 private void checkParentAccess(FSPermissionChecker pc,
6438 String path, FsAction access) throws AccessControlException,
6439 UnresolvedLinkException {
6440 checkPermission(pc, path, false, null, access, null, null);
6441 }
6442
6443 private void checkAncestorAccess(FSPermissionChecker pc,
6444 String path, FsAction access) throws AccessControlException,
6445 UnresolvedLinkException {
6446 checkPermission(pc, path, false, access, null, null, null);
6447 }
6448
6449 private void checkTraverse(FSPermissionChecker pc, String path)
6450 throws AccessControlException, UnresolvedLinkException {
6451 checkPermission(pc, path, false, null, null, null, null);
6452 }
6453
6454 /**
6455 * This is a wrapper for FSDirectory.resolvePath(). If the path passed
6456 * is prefixed with /.reserved/raw, then it checks to ensure that the caller
6457 * has super user privs.
6458 *
6459 * @param path The path to resolve.
6460 * @param pathComponents path components corresponding to the path
6461 * @return if the path indicates an inode, return path after replacing up to
6462 * <inodeid> with the corresponding path of the inode, else the path
6463 * in {@code src} as is. If the path refers to a path in the "raw"
6464 * directory, return the non-raw pathname.
6465 * @throws FileNotFoundException
6466 * @throws AccessControlException
6467 */
6468 private String resolvePath(String path, byte[][] pathComponents)
6469 throws FileNotFoundException, AccessControlException {
6470 if (FSDirectory.isReservedRawName(path)) {
6471 checkSuperuserPrivilege();
6472 }
6473 return FSDirectory.resolvePath(path, pathComponents, dir);
6474 }
6475
6476 @Override
6477 public void checkSuperuserPrivilege()
6478 throws AccessControlException {
6479 if (isPermissionEnabled) {
6480 FSPermissionChecker pc = getPermissionChecker();
6481 pc.checkSuperuserPrivilege();
6482 }
6483 }
6484
6485 /**
6486 * Check whether current user have permissions to access the path. For more
6487 * details of the parameters, see
6488 * {@link FSPermissionChecker#checkPermission}.
6489 */
6490 private void checkPermission(FSPermissionChecker pc,
6491 String path, boolean doCheckOwner, FsAction ancestorAccess,
6492 FsAction parentAccess, FsAction access, FsAction subAccess)
6493 throws AccessControlException, UnresolvedLinkException {
6494 checkPermission(pc, path, doCheckOwner, ancestorAccess,
6495 parentAccess, access, subAccess, false, true);
6496 }
6497
6498 /**
6499 * Check whether current user have permissions to access the path. For more
6500 * details of the parameters, see
6501 * {@link FSPermissionChecker#checkPermission}.
6502 */
6503 private void checkPermission(FSPermissionChecker pc,
6504 String path, boolean doCheckOwner, FsAction ancestorAccess,
6505 FsAction parentAccess, FsAction access, FsAction subAccess,
6506 boolean ignoreEmptyDir, boolean resolveLink)
6507 throws AccessControlException, UnresolvedLinkException {
6508 if (!pc.isSuperUser()) {
6509 waitForLoadingFSImage();
6510 readLock();
6511 try {
6512 pc.checkPermission(path, dir, doCheckOwner, ancestorAccess,
6513 parentAccess, access, subAccess, ignoreEmptyDir, resolveLink);
6514 } finally {
6515 readUnlock();
6516 }
6517 }
6518 }
6519
6520 /**
6521 * Check to see if we have exceeded the limit on the number
6522 * of inodes.
6523 */
6524 void checkFsObjectLimit() throws IOException {
6525 if (maxFsObjects != 0 &&
6526 maxFsObjects <= dir.totalInodes() + getBlocksTotal()) {
6527 throw new IOException("Exceeded the configured number of objects " +
6528 maxFsObjects + " in the filesystem.");
6529 }
6530 }
6531
6532 /**
6533 * Get the total number of objects in the system.
6534 */
6535 @Override // FSNamesystemMBean
6536 public long getMaxObjects() {
6537 return maxFsObjects;
6538 }
6539
6540 @Override // FSNamesystemMBean
6541 @Metric
6542 public long getFilesTotal() {
6543 // There is no need to take fSNamesystem's lock as
6544 // FSDirectory has its own lock.
6545 return this.dir.totalInodes();
6546 }
6547
6548 @Override // FSNamesystemMBean
6549 @Metric
6550 public long getPendingReplicationBlocks() {
6551 return blockManager.getPendingReplicationBlocksCount();
6552 }
6553
6554 @Override // FSNamesystemMBean
6555 @Metric
6556 public long getUnderReplicatedBlocks() {
6557 return blockManager.getUnderReplicatedBlocksCount();
6558 }
6559
6560 /** Returns number of blocks with corrupt replicas */
6561 @Metric({"CorruptBlocks", "Number of blocks with corrupt replicas"})
6562 public long getCorruptReplicaBlocks() {
6563 return blockManager.getCorruptReplicaBlocksCount();
6564 }
6565
6566 @Override // FSNamesystemMBean
6567 @Metric
6568 public long getScheduledReplicationBlocks() {
6569 return blockManager.getScheduledReplicationBlocksCount();
6570 }
6571
6572 @Override
6573 @Metric
6574 public long getPendingDeletionBlocks() {
6575 return blockManager.getPendingDeletionBlocksCount();
6576 }
6577
6578 @Override
6579 public long getBlockDeletionStartTime() {
6580 return startTime + blockManager.getStartupDelayBlockDeletionInMs();
6581 }
6582
6583 @Metric
6584 public long getExcessBlocks() {
6585 return blockManager.getExcessBlocksCount();
6586 }
6587
6588 // HA-only metric
6589 @Metric
6590 public long getPostponedMisreplicatedBlocks() {
6591 return blockManager.getPostponedMisreplicatedBlocksCount();
6592 }
6593
6594 // HA-only metric
6595 @Metric
6596 public int getPendingDataNodeMessageCount() {
6597 return blockManager.getPendingDataNodeMessageCount();
6598 }
6599
6600 // HA-only metric
6601 @Metric
6602 public String getHAState() {
6603 return haContext.getState().toString();
6604 }
6605
6606 // HA-only metric
6607 @Metric
6608 public long getMillisSinceLastLoadedEdits() {
6609 if (isInStandbyState() && editLogTailer != null) {
6610 return now() - editLogTailer.getLastLoadTimestamp();
6611 } else {
6612 return 0;
6613 }
6614 }
6615
6616 @Metric
6617 public int getBlockCapacity() {
6618 return blockManager.getCapacity();
6619 }
6620
6621 @Override // FSNamesystemMBean
6622 public String getFSState() {
6623 return isInSafeMode() ? "safeMode" : "Operational";
6624 }
6625
6626 private ObjectName mbeanName;
6627 private ObjectName mxbeanName;
6628
6629 /**
6630 * Register the FSNamesystem MBean using the name
6631 * "hadoop:service=NameNode,name=FSNamesystemState"
6632 */
6633 private void registerMBean() {
6634 // We can only implement one MXBean interface, so we keep the old one.
6635 try {
6636 StandardMBean bean = new StandardMBean(this, FSNamesystemMBean.class);
6637 mbeanName = MBeans.register("NameNode", "FSNamesystemState", bean);
6638 } catch (NotCompliantMBeanException e) {
6639 throw new RuntimeException("Bad MBean setup", e);
6640 }
6641
6642 LOG.info("Registered FSNamesystemState MBean");
6643 }
6644
6645 /**
6646 * shutdown FSNamesystem
6647 */
6648 void shutdown() {
6649 if (snapshotManager != null) {
6650 snapshotManager.shutdown();
6651 }
6652 if (mbeanName != null) {
6653 MBeans.unregister(mbeanName);
6654 mbeanName = null;
6655 }
6656 if (mxbeanName != null) {
6657 MBeans.unregister(mxbeanName);
6658 mxbeanName = null;
6659 }
6660 if (dir != null) {
6661 dir.shutdown();
6662 }
6663 if (blockManager != null) {
6664 blockManager.shutdown();
6665 }
6666 }
6667
6668 @Override // FSNamesystemMBean
6669 public int getNumLiveDataNodes() {
6670 return getBlockManager().getDatanodeManager().getNumLiveDataNodes();
6671 }
6672
6673 @Override // FSNamesystemMBean
6674 public int getNumDeadDataNodes() {
6675 return getBlockManager().getDatanodeManager().getNumDeadDataNodes();
6676 }
6677
6678 @Override // FSNamesystemMBean
6679 public int getNumDecomLiveDataNodes() {
6680 final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
6681 getBlockManager().getDatanodeManager().fetchDatanodes(live, null, true);
6682 int liveDecommissioned = 0;
6683 for (DatanodeDescriptor node : live) {
6684 liveDecommissioned += node.isDecommissioned() ? 1 : 0;
6685 }
6686 return liveDecommissioned;
6687 }
6688
6689 @Override // FSNamesystemMBean
6690 public int getNumDecomDeadDataNodes() {
6691 final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>();
6692 getBlockManager().getDatanodeManager().fetchDatanodes(null, dead, true);
6693 int deadDecommissioned = 0;
6694 for (DatanodeDescriptor node : dead) {
6695 deadDecommissioned += node.isDecommissioned() ? 1 : 0;
6696 }
6697 return deadDecommissioned;
6698 }
6699
6700 @Override // FSNamesystemMBean
6701 public int getNumDecommissioningDataNodes() {
6702 return getBlockManager().getDatanodeManager().getDecommissioningNodes()
6703 .size();
6704 }
6705
6706 @Override // FSNamesystemMBean
6707 @Metric({"StaleDataNodes",
6708 "Number of datanodes marked stale due to delayed heartbeat"})
6709 public int getNumStaleDataNodes() {
6710 return getBlockManager().getDatanodeManager().getNumStaleNodes();
6711 }
6712
6713 /**
6714 * Storages are marked as "content stale" after NN restart or fails over and
6715 * before NN receives the first Heartbeat followed by the first Blockreport.
6716 */
6717 @Override // FSNamesystemMBean
6718 public int getNumStaleStorages() {
6719 return getBlockManager().getDatanodeManager().getNumStaleStorages();
6720 }
6721
6722 /**
6723 * Sets the current generation stamp for legacy blocks
6724 */
6725 void setGenerationStampV1(long stamp) {
6726 generationStampV1.setCurrentValue(stamp);
6727 }
6728
6729 /**
6730 * Gets the current generation stamp for legacy blocks
6731 */
6732 long getGenerationStampV1() {
6733 return generationStampV1.getCurrentValue();
6734 }
6735
6736 /**
6737 * Gets the current generation stamp for this filesystem
6738 */
6739 void setGenerationStampV2(long stamp) {
6740 generationStampV2.setCurrentValue(stamp);
6741 }
6742
6743 /**
6744 * Gets the current generation stamp for this filesystem
6745 */
6746 long getGenerationStampV2() {
6747 return generationStampV2.getCurrentValue();
6748 }
6749
6750 /**
6751 * Upgrades the generation stamp for the filesystem
6752 * by reserving a sufficient range for all existing blocks.
6753 * Should be invoked only during the first upgrade to
6754 * sequential block IDs.
6755 */
6756 long upgradeGenerationStampToV2() {
6757 Preconditions.checkState(generationStampV2.getCurrentValue() ==
6758 GenerationStamp.LAST_RESERVED_STAMP);
6759
6760 generationStampV2.skipTo(
6761 generationStampV1.getCurrentValue() +
6762 HdfsConstants.RESERVED_GENERATION_STAMPS_V1);
6763
6764 generationStampV1Limit = generationStampV2.getCurrentValue();
6765 return generationStampV2.getCurrentValue();
6766 }
6767
6768 /**
6769 * Sets the generation stamp that delineates random and sequentially
6770 * allocated block IDs.
6771 * @param stamp set generation stamp limit to this value
6772 */
6773 void setGenerationStampV1Limit(long stamp) {
6774 Preconditions.checkState(generationStampV1Limit ==
6775 GenerationStamp.GRANDFATHER_GENERATION_STAMP);
6776 generationStampV1Limit = stamp;
6777 }
6778
6779 /**
6780 * Gets the value of the generation stamp that delineates sequential
6781 * and random block IDs.
6782 */
6783 long getGenerationStampAtblockIdSwitch() {
6784 return generationStampV1Limit;
6785 }
6786
6787 @VisibleForTesting
6788 SequentialBlockIdGenerator getBlockIdGenerator() {
6789 return blockIdGenerator;
6790 }
6791
6792 /**
6793 * Sets the maximum allocated block ID for this filesystem. This is
6794 * the basis for allocating new block IDs.
6795 */
6796 void setLastAllocatedBlockId(long blockId) {
6797 blockIdGenerator.skipTo(blockId);
6798 }
6799
6800 /**
6801 * Gets the maximum sequentially allocated block ID for this filesystem
6802 */
6803 long getLastAllocatedBlockId() {
6804 return blockIdGenerator.getCurrentValue();
6805 }
6806
6807 /**
6808 * Increments, logs and then returns the stamp
6809 */
6810 long nextGenerationStamp(boolean legacyBlock)
6811 throws IOException, SafeModeException {
6812 assert hasWriteLock();
6813 checkNameNodeSafeMode("Cannot get next generation stamp");
6814
6815 long gs;
6816 if (legacyBlock) {
6817 gs = getNextGenerationStampV1();
6818 getEditLog().logGenerationStampV1(gs);
6819 } else {
6820 gs = getNextGenerationStampV2();
6821 getEditLog().logGenerationStampV2(gs);
6822 }
6823
6824 // NB: callers sync the log
6825 return gs;
6826 }
6827
6828 @VisibleForTesting
6829 long getNextGenerationStampV1() throws IOException {
6830 long genStampV1 = generationStampV1.nextValue();
6831
6832 if (genStampV1 >= generationStampV1Limit) {
6833 // We ran out of generation stamps for legacy blocks. In practice, it
6834 // is extremely unlikely as we reserved 1T v1 generation stamps. The
6835 // result is that we can no longer append to the legacy blocks that
6836 // were created before the upgrade to sequential block IDs.
6837 throw new OutOfV1GenerationStampsException();
6838 }
6839
6840 return genStampV1;
6841 }
6842
6843 @VisibleForTesting
6844 long getNextGenerationStampV2() {
6845 return generationStampV2.nextValue();
6846 }
6847
6848 long getGenerationStampV1Limit() {
6849 return generationStampV1Limit;
6850 }
6851
6852 /**
6853 * Determine whether the block ID was randomly generated (legacy) or
6854 * sequentially generated. The generation stamp value is used to
6855 * make the distinction.
6856 * @return true if the block ID was randomly generated, false otherwise.
6857 */
6858 boolean isLegacyBlock(Block block) {
6859 return block.getGenerationStamp() < getGenerationStampV1Limit();
6860 }
6861
6862 /**
6863 * Increments, logs and then returns the block ID
6864 */
6865 private long nextBlockId() throws IOException {
6866 assert hasWriteLock();
6867 checkNameNodeSafeMode("Cannot get next block ID");
6868 final long blockId = blockIdGenerator.nextValue();
6869 getEditLog().logAllocateBlockId(blockId);
6870 // NB: callers sync the log
6871 return blockId;
6872 }
6873
6874 private boolean isFileDeleted(INodeFile file) {
6875 // Not in the inodeMap or in the snapshot but marked deleted.
6876 if (dir.getInode(file.getId()) == null) {
6877 return true;
6878 }
6879
6880 // look at the path hierarchy to see if one parent is deleted by recursive
6881 // deletion
6882 INode tmpChild = file;
6883 INodeDirectory tmpParent = file.getParent();
6884 while (true) {
6885 if (tmpParent == null ||
6886 tmpParent.searchChildren(tmpChild.getLocalNameBytes()) < 0) {
6887 return true;
6888 }
6889 if (tmpParent.isRoot()) {
6890 break;
6891 }
6892 tmpChild = tmpParent;
6893 tmpParent = tmpParent.getParent();
6894 }
6895
6896 if (file.isWithSnapshot() &&
6897 file.getFileWithSnapshotFeature().isCurrentFileDeleted()) {
6898 return true;
6899 }
6900 return false;
6901 }
6902
6903 private INodeFile checkUCBlock(ExtendedBlock block,
6904 String clientName) throws IOException {
6905 assert hasWriteLock();
6906 checkNameNodeSafeMode("Cannot get a new generation stamp and an "
6907 + "access token for block " + block);
6908
6909 // check stored block state
6910 BlockInfo storedBlock = getStoredBlock(ExtendedBlock.getLocalBlock(block));
6911 if (storedBlock == null ||
6912 storedBlock.getBlockUCState() != BlockUCState.UNDER_CONSTRUCTION) {
6913 throw new IOException(block +
6914 " does not exist or is not under Construction" + storedBlock);
6915 }
6916
6917 // check file inode
6918 final INodeFile file = ((INode)storedBlock.getBlockCollection()).asFile();
6919 if (file == null || !file.isUnderConstruction() || isFileDeleted(file)) {
6920 throw new IOException("The file " + storedBlock +
6921 " belonged to does not exist or it is not under construction.");
6922 }
6923
6924 // check lease
6925 if (clientName == null
6926 || !clientName.equals(file.getFileUnderConstructionFeature()
6927 .getClientName())) {
6928 throw new LeaseExpiredException("Lease mismatch: " + block +
6929 " is accessed by a non lease holder " + clientName);
6930 }
6931
6932 return file;
6933 }
6934
6935 /**
6936 * Client is reporting some bad block locations.
6937 */
6938 void reportBadBlocks(LocatedBlock[] blocks) throws IOException {
6939 checkOperation(OperationCategory.WRITE);
6940 NameNode.stateChangeLog.info("*DIR* reportBadBlocks");
6941 writeLock();
6942 try {
6943 checkOperation(OperationCategory.WRITE);
6944 for (int i = 0; i < blocks.length; i++) {
6945 ExtendedBlock blk = blocks[i].getBlock();
6946 DatanodeInfo[] nodes = blocks[i].getLocations();
6947 String[] storageIDs = blocks[i].getStorageIDs();
6948 for (int j = 0; j < nodes.length; j++) {
6949 blockManager.findAndMarkBlockAsCorrupt(blk, nodes[j],
6950 storageIDs == null ? null: storageIDs[j],
6951 "client machine reported it");
6952 }
6953 }
6954 } finally {
6955 writeUnlock();
6956 }
6957 }
6958
6959 /**
6960 * Get a new generation stamp together with an access token for
6961 * a block under construction
6962 *
6963 * This method is called for recovering a failed pipeline or setting up
6964 * a pipeline to append to a block.
6965 *
6966 * @param block a block
6967 * @param clientName the name of a client
6968 * @return a located block with a new generation stamp and an access token
6969 * @throws IOException if any error occurs
6970 */
6971 LocatedBlock updateBlockForPipeline(ExtendedBlock block,
6972 String clientName) throws IOException {
6973 LocatedBlock locatedBlock;
6974 checkOperation(OperationCategory.WRITE);
6975 writeLock();
6976 try {
6977 checkOperation(OperationCategory.WRITE);
6978
6979 // check vadility of parameters
6980 checkUCBlock(block, clientName);
6981
6982 // get a new generation stamp and an access token
6983 block.setGenerationStamp(
6984 nextGenerationStamp(isLegacyBlock(block.getLocalBlock())));
6985 locatedBlock = new LocatedBlock(block, new DatanodeInfo[0]);
6986 blockManager.setBlockToken(locatedBlock, AccessMode.WRITE);
6987 } finally {
6988 writeUnlock();
6989 }
6990 // Ensure we record the new generation stamp
6991 getEditLog().logSync();
6992 return locatedBlock;
6993 }
6994
6995 /**
6996 * Update a pipeline for a block under construction
6997 *
6998 * @param clientName the name of the client
6999 * @param oldBlock and old block
7000 * @param newBlock a new block with a new generation stamp and length
7001 * @param newNodes datanodes in the pipeline
7002 * @throws IOException if any error occurs
7003 */
7004 void updatePipeline(String clientName, ExtendedBlock oldBlock,
7005 ExtendedBlock newBlock, DatanodeID[] newNodes, String[] newStorageIDs)
7006 throws IOException {
7007 checkOperation(OperationCategory.WRITE);
7008 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7009 if (cacheEntry != null && cacheEntry.isSuccess()) {
7010 return; // Return previous response
7011 }
7012 LOG.info("updatePipeline(block=" + oldBlock
7013 + ", newGenerationStamp=" + newBlock.getGenerationStamp()
7014 + ", newLength=" + newBlock.getNumBytes()
7015 + ", newNodes=" + Arrays.asList(newNodes)
7016 + ", clientName=" + clientName
7017 + ")");
7018 waitForLoadingFSImage();
7019 writeLock();
7020 boolean success = false;
7021 try {
7022 checkOperation(OperationCategory.WRITE);
7023 checkNameNodeSafeMode("Pipeline not updated");
7024 assert newBlock.getBlockId()==oldBlock.getBlockId() : newBlock + " and "
7025 + oldBlock + " has different block identifier";
7026 updatePipelineInternal(clientName, oldBlock, newBlock, newNodes,
7027 newStorageIDs, cacheEntry != null);
7028 success = true;
7029 } finally {
7030 writeUnlock();
7031 RetryCache.setState(cacheEntry, success);
7032 }
7033 getEditLog().logSync();
7034 LOG.info("updatePipeline(" + oldBlock + ") successfully to " + newBlock);
7035 }
7036
7037 /**
7038 * @see #updatePipeline(String, ExtendedBlock, ExtendedBlock, DatanodeID[], String[])
7039 */
7040 private void updatePipelineInternal(String clientName, ExtendedBlock oldBlock,
7041 ExtendedBlock newBlock, DatanodeID[] newNodes, String[] newStorageIDs,
7042 boolean logRetryCache)
7043 throws IOException {
7044 assert hasWriteLock();
7045 // check the vadility of the block and lease holder name
7046 final INodeFile pendingFile = checkUCBlock(oldBlock, clientName);
7047 final BlockInfoUnderConstruction blockinfo
7048 = (BlockInfoUnderConstruction)pendingFile.getLastBlock();
7049
7050 // check new GS & length: this is not expected
7051 if (newBlock.getGenerationStamp() <= blockinfo.getGenerationStamp() ||
7052 newBlock.getNumBytes() < blockinfo.getNumBytes()) {
7053 String msg = "Update " + oldBlock + " (len = " +
7054 blockinfo.getNumBytes() + ") to an older state: " + newBlock +
7055 " (len = " + newBlock.getNumBytes() +")";
7056 LOG.warn(msg);
7057 throw new IOException(msg);
7058 }
7059
7060 // Update old block with the new generation stamp and new length
7061 blockinfo.setNumBytes(newBlock.getNumBytes());
7062 blockinfo.setGenerationStampAndVerifyReplicas(newBlock.getGenerationStamp());
7063
7064 // find the DatanodeDescriptor objects
7065 final DatanodeStorageInfo[] storages = blockManager.getDatanodeManager()
7066 .getDatanodeStorageInfos(newNodes, newStorageIDs);
7067 blockinfo.setExpectedLocations(storages);
7068
7069 String src = pendingFile.getFullPathName();
7070 persistBlocks(src, pendingFile, logRetryCache);
7071 }
7072
7073 // rename was successful. If any part of the renamed subtree had
7074 // files that were being written to, update with new filename.
7075 void unprotectedChangeLease(String src, String dst) {
7076 assert hasWriteLock();
7077 leaseManager.changeLease(src, dst);
7078 }
7079
7080 /**
7081 * Serializes leases.
7082 */
7083 void saveFilesUnderConstruction(DataOutputStream out,
7084 Map<Long, INodeFile> snapshotUCMap) throws IOException {
7085 // This is run by an inferior thread of saveNamespace, which holds a read
7086 // lock on our behalf. If we took the read lock here, we could block
7087 // for fairness if a writer is waiting on the lock.
7088 synchronized (leaseManager) {
7089 Map<String, INodeFile> nodes = leaseManager.getINodesUnderConstruction();
7090 for (Map.Entry<String, INodeFile> entry : nodes.entrySet()) {
7091 // TODO: for HDFS-5428, because of rename operations, some
7092 // under-construction files that are
7093 // in the current fs directory can also be captured in the
7094 // snapshotUCMap. We should remove them from the snapshotUCMap.
7095 snapshotUCMap.remove(entry.getValue().getId());
7096 }
7097
7098 out.writeInt(nodes.size() + snapshotUCMap.size()); // write the size
7099 for (Map.Entry<String, INodeFile> entry : nodes.entrySet()) {
7100 FSImageSerialization.writeINodeUnderConstruction(
7101 out, entry.getValue(), entry.getKey());
7102 }
7103 for (Map.Entry<Long, INodeFile> entry : snapshotUCMap.entrySet()) {
7104 // for those snapshot INodeFileUC, we use "/.reserved/.inodes/<inodeid>"
7105 // as their paths
7106 StringBuilder b = new StringBuilder();
7107 b.append(FSDirectory.DOT_RESERVED_PATH_PREFIX)
7108 .append(Path.SEPARATOR).append(FSDirectory.DOT_INODES_STRING)
7109 .append(Path.SEPARATOR).append(entry.getValue().getId());
7110 FSImageSerialization.writeINodeUnderConstruction(
7111 out, entry.getValue(), b.toString());
7112 }
7113 }
7114 }
7115
7116 /**
7117 * @return all the under-construction files in the lease map
7118 */
7119 Map<String, INodeFile> getFilesUnderConstruction() {
7120 synchronized (leaseManager) {
7121 return leaseManager.getINodesUnderConstruction();
7122 }
7123 }
7124
7125 /**
7126 * Register a Backup name-node, verifying that it belongs
7127 * to the correct namespace, and adding it to the set of
7128 * active journals if necessary.
7129 *
7130 * @param bnReg registration of the new BackupNode
7131 * @param nnReg registration of this NameNode
7132 * @throws IOException if the namespace IDs do not match
7133 */
7134 void registerBackupNode(NamenodeRegistration bnReg,
7135 NamenodeRegistration nnReg) throws IOException {
7136 writeLock();
7137 try {
7138 if(getFSImage().getStorage().getNamespaceID()
7139 != bnReg.getNamespaceID())
7140 throw new IOException("Incompatible namespaceIDs: "
7141 + " Namenode namespaceID = "
7142 + getFSImage().getStorage().getNamespaceID() + "; "
7143 + bnReg.getRole() +
7144 " node namespaceID = " + bnReg.getNamespaceID());
7145 if (bnReg.getRole() == NamenodeRole.BACKUP) {
7146 getFSImage().getEditLog().registerBackupNode(
7147 bnReg, nnReg);
7148 }
7149 } finally {
7150 writeUnlock();
7151 }
7152 }
7153
7154 /**
7155 * Release (unregister) backup node.
7156 * <p>
7157 * Find and remove the backup stream corresponding to the node.
7158 * @throws IOException
7159 */
7160 void releaseBackupNode(NamenodeRegistration registration)
7161 throws IOException {
7162 checkOperation(OperationCategory.WRITE);
7163 writeLock();
7164 try {
7165 checkOperation(OperationCategory.WRITE);
7166 if(getFSImage().getStorage().getNamespaceID()
7167 != registration.getNamespaceID())
7168 throw new IOException("Incompatible namespaceIDs: "
7169 + " Namenode namespaceID = "
7170 + getFSImage().getStorage().getNamespaceID() + "; "
7171 + registration.getRole() +
7172 " node namespaceID = " + registration.getNamespaceID());
7173 getEditLog().releaseBackupStream(registration);
7174 } finally {
7175 writeUnlock();
7176 }
7177 }
7178
7179 static class CorruptFileBlockInfo {
7180 final String path;
7181 final Block block;
7182
7183 public CorruptFileBlockInfo(String p, Block b) {
7184 path = p;
7185 block = b;
7186 }
7187
7188 @Override
7189 public String toString() {
7190 return block.getBlockName() + "\t" + path;
7191 }
7192 }
7193 /**
7194 * @param path Restrict corrupt files to this portion of namespace.
7195 * @param cookieTab Support for continuation; cookieTab tells where
7196 * to start from
7197 * @return a list in which each entry describes a corrupt file/block
7198 * @throws IOException
7199 */
7200 Collection<CorruptFileBlockInfo> listCorruptFileBlocks(String path,
7201 String[] cookieTab) throws IOException {
7202 checkSuperuserPrivilege();
7203 checkOperation(OperationCategory.READ);
7204
7205 int count = 0;
7206 ArrayList<CorruptFileBlockInfo> corruptFiles =
7207 new ArrayList<CorruptFileBlockInfo>();
7208 if (cookieTab == null) {
7209 cookieTab = new String[] { null };
7210 }
7211
7212 // Do a quick check if there are any corrupt files without taking the lock
7213 if (blockManager.getMissingBlocksCount() == 0) {
7214 if (cookieTab[0] == null) {
7215 cookieTab[0] = String.valueOf(getIntCookie(cookieTab[0]));
7216 }
7217 LOG.info("there are no corrupt file blocks.");
7218 return corruptFiles;
7219 }
7220
7221 readLock();
7222 try {
7223 checkOperation(OperationCategory.READ);
7224 if (!isPopulatingReplQueues()) {
7225 throw new IOException("Cannot run listCorruptFileBlocks because " +
7226 "replication queues have not been initialized.");
7227 }
7228 // print a limited # of corrupt files per call
7229
7230 final Iterator<Block> blkIterator = blockManager.getCorruptReplicaBlockIterator();
7231
7232 int skip = getIntCookie(cookieTab[0]);
7233 for (int i = 0; i < skip && blkIterator.hasNext(); i++) {
7234 blkIterator.next();
7235 }
7236
7237 while (blkIterator.hasNext()) {
7238 Block blk = blkIterator.next();
7239 final INode inode = (INode)blockManager.getBlockCollection(blk);
7240 skip++;
7241 if (inode != null && blockManager.countNodes(blk).liveReplicas() == 0) {
7242 String src = FSDirectory.getFullPathName(inode);
7243 if (src.startsWith(path)){
7244 corruptFiles.add(new CorruptFileBlockInfo(src, blk));
7245 count++;
7246 if (count >= DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED)
7247 break;
7248 }
7249 }
7250 }
7251 cookieTab[0] = String.valueOf(skip);
7252 LOG.info("list corrupt file blocks returned: " + count);
7253 return corruptFiles;
7254 } finally {
7255 readUnlock();
7256 }
7257 }
7258
7259 /**
7260 * Convert string cookie to integer.
7261 */
7262 private static int getIntCookie(String cookie){
7263 int c;
7264 if(cookie == null){
7265 c = 0;
7266 } else {
7267 try{
7268 c = Integer.parseInt(cookie);
7269 }catch (NumberFormatException e) {
7270 c = 0;
7271 }
7272 }
7273 c = Math.max(0, c);
7274 return c;
7275 }
7276
7277 /**
7278 * Create delegation token secret manager
7279 */
7280 private DelegationTokenSecretManager createDelegationTokenSecretManager(
7281 Configuration conf) {
7282 return new DelegationTokenSecretManager(conf.getLong(
7283 DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY,
7284 DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT),
7285 conf.getLong(DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY,
7286 DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT),
7287 conf.getLong(DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY,
7288 DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT),
7289 DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL,
7290 conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY,
7291 DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT),
7292 this);
7293 }
7294
7295 /**
7296 * Returns the DelegationTokenSecretManager instance in the namesystem.
7297 * @return delegation token secret manager object
7298 */
7299 DelegationTokenSecretManager getDelegationTokenSecretManager() {
7300 return dtSecretManager;
7301 }
7302
7303 /**
7304 * @param renewer Renewer information
7305 * @return delegation toek
7306 * @throws IOException on error
7307 */
7308 Token<DelegationTokenIdentifier> getDelegationToken(Text renewer)
7309 throws IOException {
7310 Token<DelegationTokenIdentifier> token;
7311 checkOperation(OperationCategory.WRITE);
7312 writeLock();
7313 try {
7314 checkOperation(OperationCategory.WRITE);
7315 checkNameNodeSafeMode("Cannot issue delegation token");
7316 if (!isAllowedDelegationTokenOp()) {
7317 throw new IOException(
7318 "Delegation Token can be issued only with kerberos or web authentication");
7319 }
7320 if (dtSecretManager == null || !dtSecretManager.isRunning()) {
7321 LOG.warn("trying to get DT with no secret manager running");
7322 return null;
7323 }
7324
7325 UserGroupInformation ugi = getRemoteUser();
7326 String user = ugi.getUserName();
7327 Text owner = new Text(user);
7328 Text realUser = null;
7329 if (ugi.getRealUser() != null) {
7330 realUser = new Text(ugi.getRealUser().getUserName());
7331 }
7332 DelegationTokenIdentifier dtId = new DelegationTokenIdentifier(owner,
7333 renewer, realUser);
7334 token = new Token<DelegationTokenIdentifier>(
7335 dtId, dtSecretManager);
7336 long expiryTime = dtSecretManager.getTokenExpiryTime(dtId);
7337 getEditLog().logGetDelegationToken(dtId, expiryTime);
7338 } finally {
7339 writeUnlock();
7340 }
7341 getEditLog().logSync();
7342 return token;
7343 }
7344
7345 /**
7346 *
7347 * @param token token to renew
7348 * @return new expiryTime of the token
7349 * @throws InvalidToken if {@code token} is invalid
7350 * @throws IOException on other errors
7351 */
7352 long renewDelegationToken(Token<DelegationTokenIdentifier> token)
7353 throws InvalidToken, IOException {
7354 long expiryTime;
7355 checkOperation(OperationCategory.WRITE);
7356 writeLock();
7357 try {
7358 checkOperation(OperationCategory.WRITE);
7359
7360 checkNameNodeSafeMode("Cannot renew delegation token");
7361 if (!isAllowedDelegationTokenOp()) {
7362 throw new IOException(
7363 "Delegation Token can be renewed only with kerberos or web authentication");
7364 }
7365 String renewer = getRemoteUser().getShortUserName();
7366 expiryTime = dtSecretManager.renewToken(token, renewer);
7367 DelegationTokenIdentifier id = new DelegationTokenIdentifier();
7368 ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier());
7369 DataInputStream in = new DataInputStream(buf);
7370 id.readFields(in);
7371 getEditLog().logRenewDelegationToken(id, expiryTime);
7372 } finally {
7373 writeUnlock();
7374 }
7375 getEditLog().logSync();
7376 return expiryTime;
7377 }
7378
7379 /**
7380 *
7381 * @param token token to cancel
7382 * @throws IOException on error
7383 */
7384 void cancelDelegationToken(Token<DelegationTokenIdentifier> token)
7385 throws IOException {
7386 checkOperation(OperationCategory.WRITE);
7387 writeLock();
7388 try {
7389 checkOperation(OperationCategory.WRITE);
7390
7391 checkNameNodeSafeMode("Cannot cancel delegation token");
7392 String canceller = getRemoteUser().getUserName();
7393 DelegationTokenIdentifier id = dtSecretManager
7394 .cancelToken(token, canceller);
7395 getEditLog().logCancelDelegationToken(id);
7396 } finally {
7397 writeUnlock();
7398 }
7399 getEditLog().logSync();
7400 }
7401
7402 /**
7403 * @param out save state of the secret manager
7404 * @param sdPath String storage directory path
7405 */
7406 void saveSecretManagerStateCompat(DataOutputStream out, String sdPath)
7407 throws IOException {
7408 dtSecretManager.saveSecretManagerStateCompat(out, sdPath);
7409 }
7410
7411 SecretManagerState saveSecretManagerState() {
7412 return dtSecretManager.saveSecretManagerState();
7413 }
7414
7415 /**
7416 * @param in load the state of secret manager from input stream
7417 */
7418 void loadSecretManagerStateCompat(DataInput in) throws IOException {
7419 dtSecretManager.loadSecretManagerStateCompat(in);
7420 }
7421
7422 void loadSecretManagerState(SecretManagerSection s,
7423 List<SecretManagerSection.DelegationKey> keys,
7424 List<SecretManagerSection.PersistToken> tokens) throws IOException {
7425 dtSecretManager.loadSecretManagerState(new SecretManagerState(s, keys, tokens));
7426 }
7427
7428 /**
7429 * Log the updateMasterKey operation to edit logs
7430 *
7431 * @param key new delegation key.
7432 */
7433 public void logUpdateMasterKey(DelegationKey key) {
7434
7435 assert !isInSafeMode() :
7436 "this should never be called while in safemode, since we stop " +
7437 "the DT manager before entering safemode!";
7438 // No need to hold FSN lock since we don't access any internal
7439 // structures, and this is stopped before the FSN shuts itself
7440 // down, etc.
7441 getEditLog().logUpdateMasterKey(key);
7442 getEditLog().logSync();
7443 }
7444
7445 /**
7446 * Log the cancellation of expired tokens to edit logs
7447 *
7448 * @param id token identifier to cancel
7449 */
7450 public void logExpireDelegationToken(DelegationTokenIdentifier id) {
7451 assert !isInSafeMode() :
7452 "this should never be called while in safemode, since we stop " +
7453 "the DT manager before entering safemode!";
7454 // No need to hold FSN lock since we don't access any internal
7455 // structures, and this is stopped before the FSN shuts itself
7456 // down, etc.
7457 getEditLog().logCancelDelegationToken(id);
7458 }
7459
7460 private void logReassignLease(String leaseHolder, String src,
7461 String newHolder) {
7462 assert hasWriteLock();
7463 getEditLog().logReassignLease(leaseHolder, src, newHolder);
7464 }
7465
7466 /**
7467 *
7468 * @return true if delegation token operation is allowed
7469 */
7470 private boolean isAllowedDelegationTokenOp() throws IOException {
7471 AuthenticationMethod authMethod = getConnectionAuthenticationMethod();
7472 if (UserGroupInformation.isSecurityEnabled()
7473 && (authMethod != AuthenticationMethod.KERBEROS)
7474 && (authMethod != AuthenticationMethod.KERBEROS_SSL)
7475 && (authMethod != AuthenticationMethod.CERTIFICATE)) {
7476 return false;
7477 }
7478 return true;
7479 }
7480
7481 /**
7482 * Returns authentication method used to establish the connection
7483 * @return AuthenticationMethod used to establish connection
7484 * @throws IOException
7485 */
7486 private AuthenticationMethod getConnectionAuthenticationMethod()
7487 throws IOException {
7488 UserGroupInformation ugi = getRemoteUser();
7489 AuthenticationMethod authMethod = ugi.getAuthenticationMethod();
7490 if (authMethod == AuthenticationMethod.PROXY) {
7491 authMethod = ugi.getRealUser().getAuthenticationMethod();
7492 }
7493 return authMethod;
7494 }
7495
7496 /**
7497 * Client invoked methods are invoked over RPC and will be in
7498 * RPC call context even if the client exits.
7499 */
7500 private boolean isExternalInvocation() {
7501 return Server.isRpcInvocation() || NamenodeWebHdfsMethods.isWebHdfsInvocation();
7502 }
7503
7504 private static InetAddress getRemoteIp() {
7505 InetAddress ip = Server.getRemoteIp();
7506 if (ip != null) {
7507 return ip;
7508 }
7509 return NamenodeWebHdfsMethods.getRemoteIp();
7510 }
7511
7512 // optimize ugi lookup for RPC operations to avoid a trip through
7513 // UGI.getCurrentUser which is synch'ed
7514 private static UserGroupInformation getRemoteUser() throws IOException {
7515 return NameNode.getRemoteUser();
7516 }
7517
7518 /**
7519 * Log fsck event in the audit log
7520 */
7521 void logFsckEvent(String src, InetAddress remoteAddress) throws IOException {
7522 if (isAuditEnabled()) {
7523 logAuditEvent(true, getRemoteUser(),
7524 remoteAddress,
7525 "fsck", src, null, null);
7526 }
7527 }
7528 /**
7529 * Register NameNodeMXBean
7530 */
7531 private void registerMXBean() {
7532 mxbeanName = MBeans.register("NameNode", "NameNodeInfo", this);
7533 }
7534
7535 /**
7536 * Class representing Namenode information for JMX interfaces
7537 */
7538 @Override // NameNodeMXBean
7539 public String getVersion() {
7540 return VersionInfo.getVersion() + ", r" + VersionInfo.getRevision();
7541 }
7542
7543 @Override // NameNodeMXBean
7544 public long getUsed() {
7545 return this.getCapacityUsed();
7546 }
7547
7548 @Override // NameNodeMXBean
7549 public long getFree() {
7550 return this.getCapacityRemaining();
7551 }
7552
7553 @Override // NameNodeMXBean
7554 public long getTotal() {
7555 return this.getCapacityTotal();
7556 }
7557
7558 @Override // NameNodeMXBean
7559 public String getSafemode() {
7560 if (!this.isInSafeMode())
7561 return "";
7562 return "Safe mode is ON. " + this.getSafeModeTip();
7563 }
7564
7565 @Override // NameNodeMXBean
7566 public boolean isUpgradeFinalized() {
7567 return this.getFSImage().isUpgradeFinalized();
7568 }
7569
7570 @Override // NameNodeMXBean
7571 public long getNonDfsUsedSpace() {
7572 return datanodeStatistics.getCapacityUsedNonDFS();
7573 }
7574
7575 @Override // NameNodeMXBean
7576 public float getPercentUsed() {
7577 return datanodeStatistics.getCapacityUsedPercent();
7578 }
7579
7580 @Override // NameNodeMXBean
7581 public long getBlockPoolUsedSpace() {
7582 return datanodeStatistics.getBlockPoolUsed();
7583 }
7584
7585 @Override // NameNodeMXBean
7586 public float getPercentBlockPoolUsed() {
7587 return datanodeStatistics.getPercentBlockPoolUsed();
7588 }
7589
7590 @Override // NameNodeMXBean
7591 public float getPercentRemaining() {
7592 return datanodeStatistics.getCapacityRemainingPercent();
7593 }
7594
7595 @Override // NameNodeMXBean
7596 public long getCacheCapacity() {
7597 return datanodeStatistics.getCacheCapacity();
7598 }
7599
7600 @Override // NameNodeMXBean
7601 public long getCacheUsed() {
7602 return datanodeStatistics.getCacheUsed();
7603 }
7604
7605 @Override // NameNodeMXBean
7606 public long getTotalBlocks() {
7607 return getBlocksTotal();
7608 }
7609
7610 @Override // NameNodeMXBean
7611 @Metric
7612 public long getTotalFiles() {
7613 return getFilesTotal();
7614 }
7615
7616 @Override // NameNodeMXBean
7617 public long getNumberOfMissingBlocks() {
7618 return getMissingBlocksCount();
7619 }
7620
7621 @Override // NameNodeMXBean
7622 public int getThreads() {
7623 return ManagementFactory.getThreadMXBean().getThreadCount();
7624 }
7625
7626 /**
7627 * Returned information is a JSON representation of map with host name as the
7628 * key and value is a map of live node attribute keys to its values
7629 */
7630 @Override // NameNodeMXBean
7631 public String getLiveNodes() {
7632 final Map<String, Map<String,Object>> info =
7633 new HashMap<String, Map<String,Object>>();
7634 final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
7635 blockManager.getDatanodeManager().fetchDatanodes(live, null, true);
7636 for (DatanodeDescriptor node : live) {
7637 Map<String, Object> innerinfo = ImmutableMap.<String, Object>builder()
7638 .put("infoAddr", node.getInfoAddr())
7639 .put("infoSecureAddr", node.getInfoSecureAddr())
7640 .put("xferaddr", node.getXferAddr())
7641 .put("lastContact", getLastContact(node))
7642 .put("usedSpace", getDfsUsed(node))
7643 .put("adminState", node.getAdminState().toString())
7644 .put("nonDfsUsedSpace", node.getNonDfsUsed())
7645 .put("capacity", node.getCapacity())
7646 .put("numBlocks", node.numBlocks())
7647 .put("version", node.getSoftwareVersion())
7648 .put("used", node.getDfsUsed())
7649 .put("remaining", node.getRemaining())
7650 .put("blockScheduled", node.getBlocksScheduled())
7651 .put("blockPoolUsed", node.getBlockPoolUsed())
7652 .put("blockPoolUsedPercent", node.getBlockPoolUsedPercent())
7653 .put("volfails", node.getVolumeFailures())
7654 .build();
7655
7656 info.put(node.getHostName(), innerinfo);
7657 }
7658 return JSON.toString(info);
7659 }
7660
7661 /**
7662 * Returned information is a JSON representation of map with host name as the
7663 * key and value is a map of dead node attribute keys to its values
7664 */
7665 @Override // NameNodeMXBean
7666 public String getDeadNodes() {
7667 final Map<String, Map<String, Object>> info =
7668 new HashMap<String, Map<String, Object>>();
7669 final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>();
7670 blockManager.getDatanodeManager().fetchDatanodes(null, dead, true);
7671 for (DatanodeDescriptor node : dead) {
7672 Map<String, Object> innerinfo = ImmutableMap.<String, Object>builder()
7673 .put("lastContact", getLastContact(node))
7674 .put("decommissioned", node.isDecommissioned())
7675 .put("xferaddr", node.getXferAddr())
7676 .build();
7677 info.put(node.getHostName(), innerinfo);
7678 }
7679 return JSON.toString(info);
7680 }
7681
7682 /**
7683 * Returned information is a JSON representation of map with host name as the
7684 * key and value is a map of decommissioning node attribute keys to its
7685 * values
7686 */
7687 @Override // NameNodeMXBean
7688 public String getDecomNodes() {
7689 final Map<String, Map<String, Object>> info =
7690 new HashMap<String, Map<String, Object>>();
7691 final List<DatanodeDescriptor> decomNodeList = blockManager.getDatanodeManager(
7692 ).getDecommissioningNodes();
7693 for (DatanodeDescriptor node : decomNodeList) {
7694 Map<String, Object> innerinfo = ImmutableMap
7695 .<String, Object> builder()
7696 .put("xferaddr", node.getXferAddr())
7697 .put("underReplicatedBlocks",
7698 node.decommissioningStatus.getUnderReplicatedBlocks())
7699 .put("decommissionOnlyReplicas",
7700 node.decommissioningStatus.getDecommissionOnlyReplicas())
7701 .put("underReplicateInOpenFiles",
7702 node.decommissioningStatus.getUnderReplicatedInOpenFiles())
7703 .build();
7704 info.put(node.getHostName(), innerinfo);
7705 }
7706 return JSON.toString(info);
7707 }
7708
7709 private long getLastContact(DatanodeDescriptor alivenode) {
7710 return (Time.now() - alivenode.getLastUpdate())/1000;
7711 }
7712
7713 private long getDfsUsed(DatanodeDescriptor alivenode) {
7714 return alivenode.getDfsUsed();
7715 }
7716
7717 @Override // NameNodeMXBean
7718 public String getClusterId() {
7719 return getFSImage().getStorage().getClusterID();
7720 }
7721
7722 @Override // NameNodeMXBean
7723 public String getBlockPoolId() {
7724 return blockPoolId;
7725 }
7726
7727 @Override // NameNodeMXBean
7728 public String getNameDirStatuses() {
7729 Map<String, Map<File, StorageDirType>> statusMap =
7730 new HashMap<String, Map<File, StorageDirType>>();
7731
7732 Map<File, StorageDirType> activeDirs = new HashMap<File, StorageDirType>();
7733 for (Iterator<StorageDirectory> it
7734 = getFSImage().getStorage().dirIterator(); it.hasNext();) {
7735 StorageDirectory st = it.next();
7736 activeDirs.put(st.getRoot(), st.getStorageDirType());
7737 }
7738 statusMap.put("active", activeDirs);
7739
7740 List<Storage.StorageDirectory> removedStorageDirs
7741 = getFSImage().getStorage().getRemovedStorageDirs();
7742 Map<File, StorageDirType> failedDirs = new HashMap<File, StorageDirType>();
7743 for (StorageDirectory st : removedStorageDirs) {
7744 failedDirs.put(st.getRoot(), st.getStorageDirType());
7745 }
7746 statusMap.put("failed", failedDirs);
7747
7748 return JSON.toString(statusMap);
7749 }
7750
7751 @Override // NameNodeMXBean
7752 public String getNodeUsage() {
7753 float median = 0;
7754 float max = 0;
7755 float min = 0;
7756 float dev = 0;
7757
7758 final Map<String, Map<String,Object>> info =
7759 new HashMap<String, Map<String,Object>>();
7760 final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
7761 blockManager.getDatanodeManager().fetchDatanodes(live, null, true);
7762
7763 if (live.size() > 0) {
7764 float totalDfsUsed = 0;
7765 float[] usages = new float[live.size()];
7766 int i = 0;
7767 for (DatanodeDescriptor dn : live) {
7768 usages[i++] = dn.getDfsUsedPercent();
7769 totalDfsUsed += dn.getDfsUsedPercent();
7770 }
7771 totalDfsUsed /= live.size();
7772 Arrays.sort(usages);
7773 median = usages[usages.length / 2];
7774 max = usages[usages.length - 1];
7775 min = usages[0];
7776
7777 for (i = 0; i < usages.length; i++) {
7778 dev += (usages[i] - totalDfsUsed) * (usages[i] - totalDfsUsed);
7779 }
7780 dev = (float) Math.sqrt(dev / usages.length);
7781 }
7782
7783 final Map<String, Object> innerInfo = new HashMap<String, Object>();
7784 innerInfo.put("min", StringUtils.format("%.2f%%", min));
7785 innerInfo.put("median", StringUtils.format("%.2f%%", median));
7786 innerInfo.put("max", StringUtils.format("%.2f%%", max));
7787 innerInfo.put("stdDev", StringUtils.format("%.2f%%", dev));
7788 info.put("nodeUsage", innerInfo);
7789
7790 return JSON.toString(info);
7791 }
7792
7793 @Override // NameNodeMXBean
7794 public String getNameJournalStatus() {
7795 List<Map<String, String>> jasList = new ArrayList<Map<String, String>>();
7796 FSEditLog log = getFSImage().getEditLog();
7797 if (log != null) {
7798 boolean openForWrite = log.isOpenForWrite();
7799 for (JournalAndStream jas : log.getJournals()) {
7800 final Map<String, String> jasMap = new HashMap<String, String>();
7801 String manager = jas.getManager().toString();
7802
7803 jasMap.put("required", String.valueOf(jas.isRequired()));
7804 jasMap.put("disabled", String.valueOf(jas.isDisabled()));
7805 jasMap.put("manager", manager);
7806
7807 if (jas.isDisabled()) {
7808 jasMap.put("stream", "Failed");
7809 } else if (openForWrite) {
7810 EditLogOutputStream elos = jas.getCurrentStream();
7811 if (elos != null) {
7812 jasMap.put("stream", elos.generateReport());
7813 } else {
7814 jasMap.put("stream", "not currently writing");
7815 }
7816 } else {
7817 jasMap.put("stream", "open for read");
7818 }
7819 jasList.add(jasMap);
7820 }
7821 }
7822 return JSON.toString(jasList);
7823 }
7824
7825 @Override // NameNodeMxBean
7826 public String getJournalTransactionInfo() {
7827 Map<String, String> txnIdMap = new HashMap<String, String>();
7828 txnIdMap.put("LastAppliedOrWrittenTxId",
7829 Long.toString(this.getFSImage().getLastAppliedOrWrittenTxId()));
7830 txnIdMap.put("MostRecentCheckpointTxId",
7831 Long.toString(this.getFSImage().getMostRecentCheckpointTxId()));
7832 return JSON.toString(txnIdMap);
7833 }
7834
7835 @Override // NameNodeMXBean
7836 public String getNNStarted() {
7837 return getStartTime().toString();
7838 }
7839
7840 @Override // NameNodeMXBean
7841 public String getCompileInfo() {
7842 return VersionInfo.getDate() + " by " + VersionInfo.getUser() +
7843 " from " + VersionInfo.getBranch();
7844 }
7845
7846 /** @return the block manager. */
7847 public BlockManager getBlockManager() {
7848 return blockManager;
7849 }
7850 /** @return the FSDirectory. */
7851 public FSDirectory getFSDirectory() {
7852 return dir;
7853 }
7854 /** Set the FSDirectory. */
7855 @VisibleForTesting
7856 public void setFSDirectory(FSDirectory dir) {
7857 this.dir = dir;
7858 }
7859 /** @return the cache manager. */
7860 public CacheManager getCacheManager() {
7861 return cacheManager;
7862 }
7863
7864 @Override // NameNodeMXBean
7865 public String getCorruptFiles() {
7866 List<String> list = new ArrayList<String>();
7867 Collection<FSNamesystem.CorruptFileBlockInfo> corruptFileBlocks;
7868 try {
7869 corruptFileBlocks = listCorruptFileBlocks("/", null);
7870 int corruptFileCount = corruptFileBlocks.size();
7871 if (corruptFileCount != 0) {
7872 for (FSNamesystem.CorruptFileBlockInfo c : corruptFileBlocks) {
7873 list.add(c.toString());
7874 }
7875 }
7876 } catch (IOException e) {
7877 LOG.warn("Get corrupt file blocks returned error: " + e.getMessage());
7878 }
7879 return JSON.toString(list);
7880 }
7881
7882 @Override //NameNodeMXBean
7883 public int getDistinctVersionCount() {
7884 return blockManager.getDatanodeManager().getDatanodesSoftwareVersions()
7885 .size();
7886 }
7887
7888 @Override //NameNodeMXBean
7889 public Map<String, Integer> getDistinctVersions() {
7890 return blockManager.getDatanodeManager().getDatanodesSoftwareVersions();
7891 }
7892
7893 @Override //NameNodeMXBean
7894 public String getSoftwareVersion() {
7895 return VersionInfo.getVersion();
7896 }
7897
7898 /**
7899 * Verifies that the given identifier and password are valid and match.
7900 * @param identifier Token identifier.
7901 * @param password Password in the token.
7902 */
7903 public synchronized void verifyToken(DelegationTokenIdentifier identifier,
7904 byte[] password) throws InvalidToken, RetriableException {
7905 try {
7906 getDelegationTokenSecretManager().verifyToken(identifier, password);
7907 } catch (InvalidToken it) {
7908 if (inTransitionToActive()) {
7909 throw new RetriableException(it);
7910 }
7911 throw it;
7912 }
7913 }
7914
7915 @Override
7916 public boolean isGenStampInFuture(Block block) {
7917 if (isLegacyBlock(block)) {
7918 return block.getGenerationStamp() > getGenerationStampV1();
7919 } else {
7920 return block.getGenerationStamp() > getGenerationStampV2();
7921 }
7922 }
7923
7924 @VisibleForTesting
7925 public EditLogTailer getEditLogTailer() {
7926 return editLogTailer;
7927 }
7928
7929 @VisibleForTesting
7930 public void setEditLogTailerForTests(EditLogTailer tailer) {
7931 this.editLogTailer = tailer;
7932 }
7933
7934 @VisibleForTesting
7935 void setFsLockForTests(ReentrantReadWriteLock lock) {
7936 this.fsLock.coarseLock = lock;
7937 }
7938
7939 @VisibleForTesting
7940 public ReentrantReadWriteLock getFsLockForTests() {
7941 return fsLock.coarseLock;
7942 }
7943
7944 @VisibleForTesting
7945 public ReentrantLock getLongReadLockForTests() {
7946 return fsLock.longReadLock;
7947 }
7948
7949 @VisibleForTesting
7950 public SafeModeInfo getSafeModeInfoForTests() {
7951 return safeMode;
7952 }
7953
7954 @VisibleForTesting
7955 public void setNNResourceChecker(NameNodeResourceChecker nnResourceChecker) {
7956 this.nnResourceChecker = nnResourceChecker;
7957 }
7958
7959 @Override
7960 public boolean isAvoidingStaleDataNodesForWrite() {
7961 return this.blockManager.getDatanodeManager()
7962 .shouldAvoidStaleDataNodesForWrite();
7963 }
7964
7965 @Override // FSClusterStats
7966 public int getNumDatanodesInService() {
7967 return datanodeStatistics.getNumDatanodesInService();
7968 }
7969
7970 @Override // for block placement strategy
7971 public double getInServiceXceiverAverage() {
7972 double avgLoad = 0;
7973 final int nodes = getNumDatanodesInService();
7974 if (nodes != 0) {
7975 final int xceivers = datanodeStatistics.getInServiceXceiverCount();
7976 avgLoad = (double)xceivers/nodes;
7977 }
7978 return avgLoad;
7979 }
7980
7981 public SnapshotManager getSnapshotManager() {
7982 return snapshotManager;
7983 }
7984
7985 /** Allow snapshot on a directory. */
7986 void allowSnapshot(String path) throws SafeModeException, IOException {
7987 checkOperation(OperationCategory.WRITE);
7988 writeLock();
7989 try {
7990 checkOperation(OperationCategory.WRITE);
7991 checkNameNodeSafeMode("Cannot allow snapshot for " + path);
7992 checkSuperuserPrivilege();
7993
7994 dir.writeLock();
7995 try {
7996 snapshotManager.setSnapshottable(path, true);
7997 } finally {
7998 dir.writeUnlock();
7999 }
8000 getEditLog().logAllowSnapshot(path);
8001 } finally {
8002 writeUnlock();
8003 }
8004 getEditLog().logSync();
8005
8006 if (auditLog.isInfoEnabled() && isExternalInvocation()) {
8007 logAuditEvent(true, "allowSnapshot", path, null, null);
8008 }
8009 }
8010
8011 /** Disallow snapshot on a directory. */
8012 void disallowSnapshot(String path) throws SafeModeException, IOException {
8013 checkOperation(OperationCategory.WRITE);
8014 writeLock();
8015 try {
8016 checkOperation(OperationCategory.WRITE);
8017 checkNameNodeSafeMode("Cannot disallow snapshot for " + path);
8018 checkSuperuserPrivilege();
8019
8020 dir.writeLock();
8021 try {
8022 snapshotManager.resetSnapshottable(path);
8023 } finally {
8024 dir.writeUnlock();
8025 }
8026 getEditLog().logDisallowSnapshot(path);
8027 } finally {
8028 writeUnlock();
8029 }
8030 getEditLog().logSync();
8031
8032 if (auditLog.isInfoEnabled() && isExternalInvocation()) {
8033 logAuditEvent(true, "disallowSnapshot", path, null, null);
8034 }
8035 }
8036
8037 /**
8038 * Create a snapshot
8039 * @param snapshotRoot The directory path where the snapshot is taken
8040 * @param snapshotName The name of the snapshot
8041 */
8042 String createSnapshot(String snapshotRoot, String snapshotName)
8043 throws SafeModeException, IOException {
8044 checkOperation(OperationCategory.WRITE);
8045 final FSPermissionChecker pc = getPermissionChecker();
8046 CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
8047 null);
8048 if (cacheEntry != null && cacheEntry.isSuccess()) {
8049 return (String) cacheEntry.getPayload();
8050 }
8051 String snapshotPath = null;
8052 writeLock();
8053 try {
8054 checkOperation(OperationCategory.WRITE);
8055 checkNameNodeSafeMode("Cannot create snapshot for " + snapshotRoot);
8056 if (isPermissionEnabled) {
8057 checkOwner(pc, snapshotRoot);
8058 }
8059
8060 if (snapshotName == null || snapshotName.isEmpty()) {
8061 snapshotName = Snapshot.generateDefaultSnapshotName();
8062 }
8063 if(snapshotName != null){
8064 if (!DFSUtil.isValidNameForComponent(snapshotName)) {
8065 throw new InvalidPathException("Invalid snapshot name: "
8066 + snapshotName);
8067 }
8068 }
8069 dir.verifySnapshotName(snapshotName, snapshotRoot);
8070 dir.writeLock();
8071 try {
8072 snapshotPath = snapshotManager.createSnapshot(snapshotRoot, snapshotName);
8073 } finally {
8074 dir.writeUnlock();
8075 }
8076 getEditLog().logCreateSnapshot(snapshotRoot, snapshotName,
8077 cacheEntry != null);
8078 } finally {
8079 writeUnlock();
8080 RetryCache.setState(cacheEntry, snapshotPath != null, snapshotPath);
8081 }
8082 getEditLog().logSync();
8083
8084 if (auditLog.isInfoEnabled() && isExternalInvocation()) {
8085 logAuditEvent(true, "createSnapshot", snapshotRoot, snapshotPath, null);
8086 }
8087 return snapshotPath;
8088 }
8089
8090 /**
8091 * Rename a snapshot
8092 * @param path The directory path where the snapshot was taken
8093 * @param snapshotOldName Old snapshot name
8094 * @param snapshotNewName New snapshot name
8095 * @throws SafeModeException
8096 * @throws IOException
8097 */
8098 void renameSnapshot(String path, String snapshotOldName,
8099 String snapshotNewName) throws SafeModeException, IOException {
8100 checkOperation(OperationCategory.WRITE);
8101 final FSPermissionChecker pc = getPermissionChecker();
8102 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
8103 if (cacheEntry != null && cacheEntry.isSuccess()) {
8104 return; // Return previous response
8105 }
8106 writeLock();
8107 boolean success = false;
8108 try {
8109 checkOperation(OperationCategory.WRITE);
8110 checkNameNodeSafeMode("Cannot rename snapshot for " + path);
8111 if (isPermissionEnabled) {
8112 checkOwner(pc, path);
8113 }
8114 dir.verifySnapshotName(snapshotNewName, path);
8115
8116 snapshotManager.renameSnapshot(path, snapshotOldName, snapshotNewName);
8117 getEditLog().logRenameSnapshot(path, snapshotOldName, snapshotNewName,
8118 cacheEntry != null);
8119 success = true;
8120 } finally {
8121 writeUnlock();
8122 RetryCache.setState(cacheEntry, success);
8123 }
8124 getEditLog().logSync();
8125
8126 if (auditLog.isInfoEnabled() && isExternalInvocation()) {
8127 String oldSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotOldName);
8128 String newSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotNewName);
8129 logAuditEvent(true, "renameSnapshot", oldSnapshotRoot, newSnapshotRoot, null);
8130 }
8131 }
8132
8133 /**
8134 * Get the list of snapshottable directories that are owned
8135 * by the current user. Return all the snapshottable directories if the
8136 * current user is a super user.
8137 * @return The list of all the current snapshottable directories
8138 * @throws IOException
8139 */
8140 public SnapshottableDirectoryStatus[] getSnapshottableDirListing()
8141 throws IOException {
8142 SnapshottableDirectoryStatus[] status = null;
8143 checkOperation(OperationCategory.READ);
8144 final FSPermissionChecker checker = getPermissionChecker();
8145 readLock();
8146 try {
8147 checkOperation(OperationCategory.READ);
8148 final String user = checker.isSuperUser()? null : checker.getUser();
8149 status = snapshotManager.getSnapshottableDirListing(user);
8150 } finally {
8151 readUnlock();
8152 }
8153 if (auditLog.isInfoEnabled() && isExternalInvocation()) {
8154 logAuditEvent(true, "listSnapshottableDirectory", null, null, null);
8155 }
8156 return status;
8157 }
8158
8159 /**
8160 * Get the difference between two snapshots (or between a snapshot and the
8161 * current status) of a snapshottable directory.
8162 *
8163 * @param path The full path of the snapshottable directory.
8164 * @param fromSnapshot Name of the snapshot to calculate the diff from. Null
8165 * or empty string indicates the current tree.
8166 * @param toSnapshot Name of the snapshot to calculated the diff to. Null or
8167 * empty string indicates the current tree.
8168 * @return A report about the difference between {@code fromSnapshot} and
8169 * {@code toSnapshot}. Modified/deleted/created/renamed files and
8170 * directories belonging to the snapshottable directories are listed
8171 * and labeled as M/-/+/R respectively.
8172 * @throws IOException
8173 */
8174 SnapshotDiffReport getSnapshotDiffReport(String path,
8175 String fromSnapshot, String toSnapshot) throws IOException {
8176 SnapshotDiffReport diffs;
8177 checkOperation(OperationCategory.READ);
8178 final FSPermissionChecker pc = getPermissionChecker();
8179 readLock();
8180 try {
8181 checkOperation(OperationCategory.READ);
8182 if (isPermissionEnabled) {
8183 checkSubtreeReadPermission(pc, path, fromSnapshot);
8184 checkSubtreeReadPermission(pc, path, toSnapshot);
8185 }
8186 diffs = snapshotManager.diff(path, fromSnapshot, toSnapshot);
8187 } finally {
8188 readUnlock();
8189 }
8190
8191 if (auditLog.isInfoEnabled() && isExternalInvocation()) {
8192 logAuditEvent(true, "computeSnapshotDiff", null, null, null);
8193 }
8194 return diffs;
8195 }
8196
8197 private void checkSubtreeReadPermission(final FSPermissionChecker pc,
8198 final String snapshottablePath, final String snapshot)
8199 throws AccessControlException, UnresolvedLinkException {
8200 final String fromPath = snapshot == null?
8201 snapshottablePath: Snapshot.getSnapshotPath(snapshottablePath, snapshot);
8202 checkPermission(pc, fromPath, false, null, null, FsAction.READ, FsAction.READ);
8203 }
8204
8205 /**
8206 * Delete a snapshot of a snapshottable directory
8207 * @param snapshotRoot The snapshottable directory
8208 * @param snapshotName The name of the to-be-deleted snapshot
8209 * @throws SafeModeException
8210 * @throws IOException
8211 */
8212 void deleteSnapshot(String snapshotRoot, String snapshotName)
8213 throws SafeModeException, IOException {
8214 checkOperation(OperationCategory.WRITE);
8215 final FSPermissionChecker pc = getPermissionChecker();
8216
8217 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
8218 if (cacheEntry != null && cacheEntry.isSuccess()) {
8219 return; // Return previous response
8220 }
8221 boolean success = false;
8222 BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
8223 writeLock();
8224 try {
8225 checkOperation(OperationCategory.WRITE);
8226 checkNameNodeSafeMode("Cannot delete snapshot for " + snapshotRoot);
8227 if (isPermissionEnabled) {
8228 checkOwner(pc, snapshotRoot);
8229 }
8230
8231 List<INode> removedINodes = new ChunkedArrayList<INode>();
8232 dir.writeLock();
8233 try {
8234 snapshotManager.deleteSnapshot(snapshotRoot, snapshotName,
8235 collectedBlocks, removedINodes);
8236 dir.removeFromInodeMap(removedINodes);
8237 } finally {
8238 dir.writeUnlock();
8239 }
8240 removedINodes.clear();
8241 getEditLog().logDeleteSnapshot(snapshotRoot, snapshotName,
8242 cacheEntry != null);
8243 success = true;
8244 } finally {
8245 writeUnlock();
8246 RetryCache.setState(cacheEntry, success);
8247 }
8248 getEditLog().logSync();
8249
8250 removeBlocks(collectedBlocks);
8251 collectedBlocks.clear();
8252
8253 if (auditLog.isInfoEnabled() && isExternalInvocation()) {
8254 String rootPath = Snapshot.getSnapshotPath(snapshotRoot, snapshotName);
8255 logAuditEvent(true, "deleteSnapshot", rootPath, null, null);
8256 }
8257 }
8258
8259 /**
8260 * Remove a list of INodeDirectorySnapshottable from the SnapshotManager
8261 * @param toRemove the list of INodeDirectorySnapshottable to be removed
8262 */
8263 void removeSnapshottableDirs(List<INodeDirectory> toRemove) {
8264 if (snapshotManager != null) {
8265 snapshotManager.removeSnapshottable(toRemove);
8266 }
8267 }
8268
8269 RollingUpgradeInfo queryRollingUpgrade() throws IOException {
8270 checkSuperuserPrivilege();
8271 checkOperation(OperationCategory.READ);
8272 readLock();
8273 try {
8274 if (rollingUpgradeInfo != null) {
8275 boolean hasRollbackImage = this.getFSImage().hasRollbackFSImage();
8276 rollingUpgradeInfo.setCreatedRollbackImages(hasRollbackImage);
8277 }
8278 return rollingUpgradeInfo;
8279 } finally {
8280 readUnlock();
8281 }
8282 }
8283
8284 RollingUpgradeInfo startRollingUpgrade() throws IOException {
8285 checkSuperuserPrivilege();
8286 checkOperation(OperationCategory.WRITE);
8287 writeLock();
8288 try {
8289 checkOperation(OperationCategory.WRITE);
8290 if (isRollingUpgrade()) {
8291 return rollingUpgradeInfo;
8292 }
8293 long startTime = now();
8294 if (!haEnabled) { // for non-HA, we require NN to be in safemode
8295 startRollingUpgradeInternalForNonHA(startTime);
8296 } else { // for HA, NN cannot be in safemode
8297 checkNameNodeSafeMode("Failed to start rolling upgrade");
8298 startRollingUpgradeInternal(startTime);
8299 }
8300
8301 getEditLog().logStartRollingUpgrade(rollingUpgradeInfo.getStartTime());
8302 if (haEnabled) {
8303 // roll the edit log to make sure the standby NameNode can tail
8304 getFSImage().rollEditLog();
8305 }
8306 } finally {
8307 writeUnlock();
8308 }
8309
8310 getEditLog().logSync();
8311 if (auditLog.isInfoEnabled() && isExternalInvocation()) {
8312 logAuditEvent(true, "startRollingUpgrade", null, null, null);
8313 }
8314 return rollingUpgradeInfo;
8315 }
8316
8317 /**
8318 * Update internal state to indicate that a rolling upgrade is in progress.
8319 * @param startTime rolling upgrade start time
8320 */
8321 void startRollingUpgradeInternal(long startTime)
8322 throws IOException {
8323 checkRollingUpgrade("start rolling upgrade");
8324 getFSImage().checkUpgrade(this);
8325 setRollingUpgradeInfo(false, startTime);
8326 }
8327
8328 /**
8329 * Update internal state to indicate that a rolling upgrade is in progress for
8330 * non-HA setup. This requires the namesystem is in SafeMode and after doing a
8331 * checkpoint for rollback the namesystem will quit the safemode automatically
8332 */
8333 private void startRollingUpgradeInternalForNonHA(long startTime)
8334 throws IOException {
8335 Preconditions.checkState(!haEnabled);
8336 if (!isInSafeMode()) {
8337 throw new IOException("Safe mode should be turned ON "
8338 + "in order to create namespace image.");
8339 }
8340 checkRollingUpgrade("start rolling upgrade");
8341 getFSImage().checkUpgrade(this);
8342 // in non-HA setup, we do an extra checkpoint to generate a rollback image
8343 getFSImage().saveNamespace(this, NameNodeFile.IMAGE_ROLLBACK, null);
8344 LOG.info("Successfully saved namespace for preparing rolling upgrade.");
8345
8346 // leave SafeMode automatically
8347 setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
8348 setRollingUpgradeInfo(true, startTime);
8349 }
8350
8351 void setRollingUpgradeInfo(boolean createdRollbackImages, long startTime) {
8352 rollingUpgradeInfo = new RollingUpgradeInfo(blockPoolId,
8353 createdRollbackImages, startTime, 0L);
8354 }
8355
8356 public void setCreatedRollbackImages(boolean created) {
8357 if (rollingUpgradeInfo != null) {
8358 rollingUpgradeInfo.setCreatedRollbackImages(created);
8359 }
8360 }
8361
8362 public RollingUpgradeInfo getRollingUpgradeInfo() {
8363 return rollingUpgradeInfo;
8364 }
8365
8366 public boolean isNeedRollbackFsImage() {
8367 return needRollbackFsImage;
8368 }
8369
8370 public void setNeedRollbackFsImage(boolean needRollbackFsImage) {
8371 this.needRollbackFsImage = needRollbackFsImage;
8372 }
8373
8374 @Override // NameNodeMXBean
8375 public RollingUpgradeInfo.Bean getRollingUpgradeStatus() {
8376 readLock();
8377 try {
8378 RollingUpgradeInfo upgradeInfo = getRollingUpgradeInfo();
8379 if (upgradeInfo != null) {
8380 return new RollingUpgradeInfo.Bean(upgradeInfo);
8381 }
8382 return null;
8383 } finally {
8384 readUnlock();
8385 }
8386 }
8387
8388 /** Is rolling upgrade in progress? */
8389 public boolean isRollingUpgrade() {
8390 return rollingUpgradeInfo != null;
8391 }
8392
8393 void checkRollingUpgrade(String action) throws RollingUpgradeException {
8394 if (isRollingUpgrade()) {
8395 throw new RollingUpgradeException("Failed to " + action
8396 + " since a rolling upgrade is already in progress."
8397 + " Existing rolling upgrade info:\n" + rollingUpgradeInfo);
8398 }
8399 }
8400
8401 void finalizeRollingUpgrade() throws IOException {
8402 checkSuperuserPrivilege();
8403 checkOperation(OperationCategory.WRITE);
8404 writeLock();
8405 final RollingUpgradeInfo returnInfo;
8406 try {
8407 checkOperation(OperationCategory.WRITE);
8408 if (!isRollingUpgrade()) {
8409 return;
8410 }
8411 checkNameNodeSafeMode("Failed to finalize rolling upgrade");
8412
8413 returnInfo = finalizeRollingUpgradeInternal(now());
8414 getEditLog().logFinalizeRollingUpgrade(returnInfo.getFinalizeTime());
8415 if (haEnabled) {
8416 // roll the edit log to make sure the standby NameNode can tail
8417 getFSImage().rollEditLog();
8418 }
8419 getFSImage().updateStorageVersion();
8420 getFSImage().renameCheckpoint(NameNodeFile.IMAGE_ROLLBACK,
8421 NameNodeFile.IMAGE);
8422 } finally {
8423 writeUnlock();
8424 }
8425
8426 if (!haEnabled) {
8427 // Sync not needed for ha since the edit was rolled after logging.
8428 getEditLog().logSync();
8429 }
8430
8431 if (auditLog.isInfoEnabled() && isExternalInvocation()) {
8432 logAuditEvent(true, "finalizeRollingUpgrade", null, null, null);
8433 }
8434 return;
8435 }
8436
8437 RollingUpgradeInfo finalizeRollingUpgradeInternal(long finalizeTime)
8438 throws RollingUpgradeException {
8439 final long startTime = rollingUpgradeInfo.getStartTime();
8440 rollingUpgradeInfo = null;
8441 return new RollingUpgradeInfo(blockPoolId, false, startTime, finalizeTime);
8442 }
8443
8444 long addCacheDirective(CacheDirectiveInfo directive, EnumSet<CacheFlag> flags)
8445 throws IOException {
8446 checkOperation(OperationCategory.WRITE);
8447 final FSPermissionChecker pc = isPermissionEnabled ?
8448 getPermissionChecker() : null;
8449 CacheEntryWithPayload cacheEntry =
8450 RetryCache.waitForCompletion(retryCache, null);
8451 if (cacheEntry != null && cacheEntry.isSuccess()) {
8452 return (Long) cacheEntry.getPayload();
8453 }
8454 boolean success = false;
8455 if (!flags.contains(CacheFlag.FORCE)) {
8456 cacheManager.waitForRescanIfNeeded();
8457 }
8458 writeLock();
8459 String effectiveDirectiveStr = null;
8460 Long result = null;
8461 try {
8462 checkOperation(OperationCategory.WRITE);
8463 if (isInSafeMode()) {
8464 throw new SafeModeException(
8465 "Cannot add cache directive", safeMode);
8466 }
8467 if (directive.getId() != null) {
8468 throw new IOException("addDirective: you cannot specify an ID " +
8469 "for this operation.");
8470 }
8471 CacheDirectiveInfo effectiveDirective =
8472 cacheManager.addDirective(directive, pc, flags);
8473 getEditLog().logAddCacheDirectiveInfo(effectiveDirective,
8474 cacheEntry != null);
8475 result = effectiveDirective.getId();
8476 effectiveDirectiveStr = effectiveDirective.toString();
8477 success = true;
8478 } finally {
8479 writeUnlock();
8480 if (success) {
8481 getEditLog().logSync();
8482 }
8483 if (isAuditEnabled() && isExternalInvocation()) {
8484 logAuditEvent(success, "addCacheDirective", effectiveDirectiveStr, null, null);
8485 }
8486 RetryCache.setState(cacheEntry, success, result);
8487 }
8488 return result;
8489 }
8490
8491 void modifyCacheDirective(CacheDirectiveInfo directive,
8492 EnumSet<CacheFlag> flags) throws IOException {
8493 checkOperation(OperationCategory.WRITE);
8494 final FSPermissionChecker pc = isPermissionEnabled ?
8495 getPermissionChecker() : null;
8496 boolean success = false;
8497 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
8498 if (cacheEntry != null && cacheEntry.isSuccess()) {
8499 return;
8500 }
8501 if (!flags.contains(CacheFlag.FORCE)) {
8502 cacheManager.waitForRescanIfNeeded();
8503 }
8504 writeLock();
8505 try {
8506 checkOperation(OperationCategory.WRITE);
8507 if (isInSafeMode()) {
8508 throw new SafeModeException(
8509 "Cannot add cache directive", safeMode);
8510 }
8511 cacheManager.modifyDirective(directive, pc, flags);
8512 getEditLog().logModifyCacheDirectiveInfo(directive,
8513 cacheEntry != null);
8514 success = true;
8515 } finally {
8516 writeUnlock();
8517 if (success) {
8518 getEditLog().logSync();
8519 }
8520 if (isAuditEnabled() && isExternalInvocation()) {
8521 String idStr = "{id: " + directive.getId().toString() + "}";
8522 logAuditEvent(success, "modifyCacheDirective", idStr, directive.toString(), null);
8523 }
8524 RetryCache.setState(cacheEntry, success);
8525 }
8526 }
8527
8528 void removeCacheDirective(Long id) throws IOException {
8529 checkOperation(OperationCategory.WRITE);
8530 final FSPermissionChecker pc = isPermissionEnabled ?
8531 getPermissionChecker() : null;
8532 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
8533 if (cacheEntry != null && cacheEntry.isSuccess()) {
8534 return;
8535 }
8536 boolean success = false;
8537 writeLock();
8538 try {
8539 checkOperation(OperationCategory.WRITE);
8540 if (isInSafeMode()) {
8541 throw new SafeModeException(
8542 "Cannot remove cache directives", safeMode);
8543 }
8544 cacheManager.removeDirective(id, pc);
8545 getEditLog().logRemoveCacheDirectiveInfo(id, cacheEntry != null);
8546 success = true;
8547 } finally {
8548 writeUnlock();
8549 if (isAuditEnabled() && isExternalInvocation()) {
8550 String idStr = "{id: " + id.toString() + "}";
8551 logAuditEvent(success, "removeCacheDirective", idStr, null,
8552 null);
8553 }
8554 RetryCache.setState(cacheEntry, success);
8555 }
8556 getEditLog().logSync();
8557 }
8558
8559 BatchedListEntries<CacheDirectiveEntry> listCacheDirectives(
8560 long startId, CacheDirectiveInfo filter) throws IOException {
8561 checkOperation(OperationCategory.READ);
8562 final FSPermissionChecker pc = isPermissionEnabled ?
8563 getPermissionChecker() : null;
8564 BatchedListEntries<CacheDirectiveEntry> results;
8565 cacheManager.waitForRescanIfNeeded();
8566 readLock();
8567 boolean success = false;
8568 try {
8569 checkOperation(OperationCategory.READ);
8570 results =
8571 cacheManager.listCacheDirectives(startId, filter, pc);
8572 success = true;
8573 } finally {
8574 readUnlock();
8575 if (isAuditEnabled() && isExternalInvocation()) {
8576 logAuditEvent(success, "listCacheDirectives", filter.toString(), null,
8577 null);
8578 }
8579 }
8580 return results;
8581 }
8582
8583 public void addCachePool(CachePoolInfo req) throws IOException {
8584 checkOperation(OperationCategory.WRITE);
8585 final FSPermissionChecker pc = isPermissionEnabled ?
8586 getPermissionChecker() : null;
8587 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
8588 if (cacheEntry != null && cacheEntry.isSuccess()) {
8589 return; // Return previous response
8590 }
8591 writeLock();
8592 boolean success = false;
8593 String poolInfoStr = null;
8594 try {
8595 checkOperation(OperationCategory.WRITE);
8596 if (isInSafeMode()) {
8597 throw new SafeModeException(
8598 "Cannot add cache pool " + req.getPoolName(), safeMode);
8599 }
8600 if (pc != null) {
8601 pc.checkSuperuserPrivilege();
8602 }
8603 CachePoolInfo info = cacheManager.addCachePool(req);
8604 poolInfoStr = info.toString();
8605 getEditLog().logAddCachePool(info, cacheEntry != null);
8606 success = true;
8607 } finally {
8608 writeUnlock();
8609 if (isAuditEnabled() && isExternalInvocation()) {
8610 logAuditEvent(success, "addCachePool", poolInfoStr, null, null);
8611 }
8612 RetryCache.setState(cacheEntry, success);
8613 }
8614
8615 getEditLog().logSync();
8616 }
8617
8618 public void modifyCachePool(CachePoolInfo req) throws IOException {
8619 checkOperation(OperationCategory.WRITE);
8620 final FSPermissionChecker pc =
8621 isPermissionEnabled ? getPermissionChecker() : null;
8622 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
8623 if (cacheEntry != null && cacheEntry.isSuccess()) {
8624 return; // Return previous response
8625 }
8626 writeLock();
8627 boolean success = false;
8628 try {
8629 checkOperation(OperationCategory.WRITE);
8630 if (isInSafeMode()) {
8631 throw new SafeModeException(
8632 "Cannot modify cache pool " + req.getPoolName(), safeMode);
8633 }
8634 if (pc != null) {
8635 pc.checkSuperuserPrivilege();
8636 }
8637 cacheManager.modifyCachePool(req);
8638 getEditLog().logModifyCachePool(req, cacheEntry != null);
8639 success = true;
8640 } finally {
8641 writeUnlock();
8642 if (isAuditEnabled() && isExternalInvocation()) {
8643 String poolNameStr = "{poolName: " + req.getPoolName() + "}";
8644 logAuditEvent(success, "modifyCachePool", poolNameStr, req.toString(), null);
8645 }
8646 RetryCache.setState(cacheEntry, success);
8647 }
8648
8649 getEditLog().logSync();
8650 }
8651
8652 public void removeCachePool(String cachePoolName) throws IOException {
8653 checkOperation(OperationCategory.WRITE);
8654 final FSPermissionChecker pc =
8655 isPermissionEnabled ? getPermissionChecker() : null;
8656 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
8657 if (cacheEntry != null && cacheEntry.isSuccess()) {
8658 return; // Return previous response
8659 }
8660 writeLock();
8661 boolean success = false;
8662 try {
8663 checkOperation(OperationCategory.WRITE);
8664 if (isInSafeMode()) {
8665 throw new SafeModeException(
8666 "Cannot remove cache pool " + cachePoolName, safeMode);
8667 }
8668 if (pc != null) {
8669 pc.checkSuperuserPrivilege();
8670 }
8671 cacheManager.removeCachePool(cachePoolName);
8672 getEditLog().logRemoveCachePool(cachePoolName, cacheEntry != null);
8673 success = true;
8674 } finally {
8675 writeUnlock();
8676 if (isAuditEnabled() && isExternalInvocation()) {
8677 String poolNameStr = "{poolName: " + cachePoolName + "}";
8678 logAuditEvent(success, "removeCachePool", poolNameStr, null, null);
8679 }
8680 RetryCache.setState(cacheEntry, success);
8681 }
8682
8683 getEditLog().logSync();
8684 }
8685
8686 public BatchedListEntries<CachePoolEntry> listCachePools(String prevKey)
8687 throws IOException {
8688 final FSPermissionChecker pc =
8689 isPermissionEnabled ? getPermissionChecker() : null;
8690 BatchedListEntries<CachePoolEntry> results;
8691 checkOperation(OperationCategory.READ);
8692 boolean success = false;
8693 cacheManager.waitForRescanIfNeeded();
8694 readLock();
8695 try {
8696 checkOperation(OperationCategory.READ);
8697 results = cacheManager.listCachePools(pc, prevKey);
8698 success = true;
8699 } finally {
8700 readUnlock();
8701 if (isAuditEnabled() && isExternalInvocation()) {
8702 logAuditEvent(success, "listCachePools", null, null, null);
8703 }
8704 }
8705 return results;
8706 }
8707
8708 void modifyAclEntries(final String srcArg, List<AclEntry> aclSpec)
8709 throws IOException {
8710 String src = srcArg;
8711 nnConf.checkAclsConfigFlag();
8712 HdfsFileStatus resultingStat = null;
8713 FSPermissionChecker pc = getPermissionChecker();
8714 checkOperation(OperationCategory.WRITE);
8715 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
8716 writeLock();
8717 try {
8718 checkOperation(OperationCategory.WRITE);
8719 checkNameNodeSafeMode("Cannot modify ACL entries on " + src);
8720 src = resolvePath(src, pathComponents);
8721 checkOwner(pc, src);
8722 List<AclEntry> newAcl = dir.modifyAclEntries(src, aclSpec);
8723 getEditLog().logSetAcl(src, newAcl);
8724 resultingStat = getAuditFileInfo(src, false);
8725 } catch (AccessControlException e) {
8726 logAuditEvent(false, "modifyAclEntries", srcArg);
8727 throw e;
8728 } finally {
8729 writeUnlock();
8730 }
8731 getEditLog().logSync();
8732 logAuditEvent(true, "modifyAclEntries", srcArg, null, resultingStat);
8733 }
8734
8735 void removeAclEntries(final String srcArg, List<AclEntry> aclSpec)
8736 throws IOException {
8737 String src = srcArg;
8738 nnConf.checkAclsConfigFlag();
8739 HdfsFileStatus resultingStat = null;
8740 FSPermissionChecker pc = getPermissionChecker();
8741 checkOperation(OperationCategory.WRITE);
8742 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
8743 writeLock();
8744 try {
8745 checkOperation(OperationCategory.WRITE);
8746 checkNameNodeSafeMode("Cannot remove ACL entries on " + src);
8747 src = resolvePath(src, pathComponents);
8748 checkOwner(pc, src);
8749 List<AclEntry> newAcl = dir.removeAclEntries(src, aclSpec);
8750 getEditLog().logSetAcl(src, newAcl);
8751 resultingStat = getAuditFileInfo(src, false);
8752 } catch (AccessControlException e) {
8753 logAuditEvent(false, "removeAclEntries", srcArg);
8754 throw e;
8755 } finally {
8756 writeUnlock();
8757 }
8758 getEditLog().logSync();
8759 logAuditEvent(true, "removeAclEntries", srcArg, null, resultingStat);
8760 }
8761
8762 void removeDefaultAcl(final String srcArg) throws IOException {
8763 String src = srcArg;
8764 nnConf.checkAclsConfigFlag();
8765 HdfsFileStatus resultingStat = null;
8766 FSPermissionChecker pc = getPermissionChecker();
8767 checkOperation(OperationCategory.WRITE);
8768 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
8769 writeLock();
8770 try {
8771 checkOperation(OperationCategory.WRITE);
8772 checkNameNodeSafeMode("Cannot remove default ACL entries on " + src);
8773 src = resolvePath(src, pathComponents);
8774 checkOwner(pc, src);
8775 List<AclEntry> newAcl = dir.removeDefaultAcl(src);
8776 getEditLog().logSetAcl(src, newAcl);
8777 resultingStat = getAuditFileInfo(src, false);
8778 } catch (AccessControlException e) {
8779 logAuditEvent(false, "removeDefaultAcl", srcArg);
8780 throw e;
8781 } finally {
8782 writeUnlock();
8783 }
8784 getEditLog().logSync();
8785 logAuditEvent(true, "removeDefaultAcl", srcArg, null, resultingStat);
8786 }
8787
8788 void removeAcl(final String srcArg) throws IOException {
8789 String src = srcArg;
8790 nnConf.checkAclsConfigFlag();
8791 HdfsFileStatus resultingStat = null;
8792 FSPermissionChecker pc = getPermissionChecker();
8793 checkOperation(OperationCategory.WRITE);
8794 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
8795 writeLock();
8796 try {
8797 checkOperation(OperationCategory.WRITE);
8798 checkNameNodeSafeMode("Cannot remove ACL on " + src);
8799 src = resolvePath(src, pathComponents);
8800 checkOwner(pc, src);
8801 dir.removeAcl(src);
8802 getEditLog().logSetAcl(src, AclFeature.EMPTY_ENTRY_LIST);
8803 resultingStat = getAuditFileInfo(src, false);
8804 } catch (AccessControlException e) {
8805 logAuditEvent(false, "removeAcl", srcArg);
8806 throw e;
8807 } finally {
8808 writeUnlock();
8809 }
8810 getEditLog().logSync();
8811 logAuditEvent(true, "removeAcl", srcArg, null, resultingStat);
8812 }
8813
8814 void setAcl(final String srcArg, List<AclEntry> aclSpec) throws IOException {
8815 String src = srcArg;
8816 nnConf.checkAclsConfigFlag();
8817 HdfsFileStatus resultingStat = null;
8818 FSPermissionChecker pc = getPermissionChecker();
8819 checkOperation(OperationCategory.WRITE);
8820 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
8821 writeLock();
8822 try {
8823 checkOperation(OperationCategory.WRITE);
8824 checkNameNodeSafeMode("Cannot set ACL on " + src);
8825 src = resolvePath(src, pathComponents);
8826 checkOwner(pc, src);
8827 List<AclEntry> newAcl = dir.setAcl(src, aclSpec);
8828 getEditLog().logSetAcl(src, newAcl);
8829 resultingStat = getAuditFileInfo(src, false);
8830 } catch (AccessControlException e) {
8831 logAuditEvent(false, "setAcl", srcArg);
8832 throw e;
8833 } finally {
8834 writeUnlock();
8835 }
8836 getEditLog().logSync();
8837 logAuditEvent(true, "setAcl", srcArg, null, resultingStat);
8838 }
8839
8840 AclStatus getAclStatus(String src) throws IOException {
8841 nnConf.checkAclsConfigFlag();
8842 FSPermissionChecker pc = getPermissionChecker();
8843 checkOperation(OperationCategory.READ);
8844 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
8845 boolean success = false;
8846 readLock();
8847 try {
8848 checkOperation(OperationCategory.READ);
8849 src = resolvePath(src, pathComponents);
8850 if (isPermissionEnabled) {
8851 checkPermission(pc, src, false, null, null, null, null);
8852 }
8853 final AclStatus ret = dir.getAclStatus(src);
8854 success = true;
8855 return ret;
8856 } finally {
8857 readUnlock();
8858 logAuditEvent(success, "getAclStatus", src);
8859 }
8860 }
8861
8862 /**
8863 * Create an encryption zone on directory src using the specified key.
8864 *
8865 * @param src the path of a directory which will be the root of the
8866 * encryption zone. The directory must be empty.
8867 * @param keyName name of a key which must be present in the configured
8868 * KeyProvider.
8869 * @throws AccessControlException if the caller is not the superuser.
8870 * @throws UnresolvedLinkException if the path can't be resolved.
8871 * @throws SafeModeException if the Namenode is in safe mode.
8872 */
8873 void createEncryptionZone(final String src, final String keyName)
8874 throws IOException, UnresolvedLinkException,
8875 SafeModeException, AccessControlException {
8876 final CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
8877 if (cacheEntry != null && cacheEntry.isSuccess()) {
8878 return; // Return previous response
8879 }
8880
8881 boolean success = false;
8882 try {
8883 if (provider == null) {
8884 throw new IOException(
8885 "Can't create an encryption zone for " + src +
8886 " since no key provider is available.");
8887 }
8888 if (keyName == null || keyName.isEmpty()) {
8889 throw new IOException("Must specify a key name when creating an " +
8890 "encryption zone");
8891 }
8892 KeyProvider.Metadata metadata = provider.getMetadata(keyName);
8893 if (metadata == null) {
8894 /*
8895 * It would be nice if we threw something more specific than
8896 * IOException when the key is not found, but the KeyProvider API
8897 * doesn't provide for that. If that API is ever changed to throw
8898 * something more specific (e.g. UnknownKeyException) then we can
8899 * update this to match it, or better yet, just rethrow the
8900 * KeyProvider's exception.
8901 */
8902 throw new IOException("Key " + keyName + " doesn't exist.");
8903 }
8904 createEncryptionZoneInt(src, metadata.getCipher(),
8905 keyName, cacheEntry != null);
8906 success = true;
8907 } catch (AccessControlException e) {
8908 logAuditEvent(false, "createEncryptionZone", src);
8909 throw e;
8910 } finally {
8911 RetryCache.setState(cacheEntry, success);
8912 }
8913 }
8914
8915 private void createEncryptionZoneInt(final String srcArg, String cipher,
8916 String keyName, final boolean logRetryCache) throws IOException {
8917 String src = srcArg;
8918 HdfsFileStatus resultingStat = null;
8919 checkSuperuserPrivilege();
8920 checkOperation(OperationCategory.WRITE);
8921 final byte[][] pathComponents =
8922 FSDirectory.getPathComponentsForReservedPath(src);
8923 writeLock();
8924 try {
8925 checkSuperuserPrivilege();
8926 checkOperation(OperationCategory.WRITE);
8927 checkNameNodeSafeMode("Cannot create encryption zone on " + src);
8928 src = resolvePath(src, pathComponents);
8929
8930 final CipherSuite suite = CipherSuite.convert(cipher);
8931 // For now this is hardcoded, as we only support one method.
8932 final CryptoProtocolVersion version =
8933 CryptoProtocolVersion.ENCRYPTION_ZONES;
8934 final XAttr ezXAttr = dir.createEncryptionZone(src, suite,
8935 version, keyName);
8936 List<XAttr> xAttrs = Lists.newArrayListWithCapacity(1);
8937 xAttrs.add(ezXAttr);
8938 getEditLog().logSetXAttrs(src, xAttrs, logRetryCache);
8939 resultingStat = getAuditFileInfo(src, false);
8940 } finally {
8941 writeUnlock();
8942 }
8943 getEditLog().logSync();
8944 logAuditEvent(true, "createEncryptionZone", srcArg, null, resultingStat);
8945 }
8946
8947 /**
8948 * Get the encryption zone for the specified path.
8949 *
8950 * @param srcArg the path of a file or directory to get the EZ for.
8951 * @return the EZ of the of the path or null if none.
8952 * @throws AccessControlException if the caller is not the superuser.
8953 * @throws UnresolvedLinkException if the path can't be resolved.
8954 */
8955 EncryptionZone getEZForPath(final String srcArg)
8956 throws AccessControlException, UnresolvedLinkException, IOException {
8957 String src = srcArg;
8958 HdfsFileStatus resultingStat = null;
8959 final byte[][] pathComponents =
8960 FSDirectory.getPathComponentsForReservedPath(src);
8961 boolean success = false;
8962 final FSPermissionChecker pc = getPermissionChecker();
8963 checkOperation(OperationCategory.READ);
8964 readLock();
8965 try {
8966 if (isPermissionEnabled) {
8967 checkPathAccess(pc, src, FsAction.READ);
8968 }
8969 checkOperation(OperationCategory.READ);
8970 src = resolvePath(src, pathComponents);
8971 final INodesInPath iip = dir.getINodesInPath(src, true);
8972 final EncryptionZone ret = dir.getEZForPath(iip);
8973 resultingStat = getAuditFileInfo(src, false);
8974 success = true;
8975 return ret;
8976 } finally {
8977 readUnlock();
8978 logAuditEvent(success, "getEZForPath", srcArg, null, resultingStat);
8979 }
8980 }
8981
8982 BatchedListEntries<EncryptionZone> listEncryptionZones(long prevId)
8983 throws IOException {
8984 boolean success = false;
8985 checkSuperuserPrivilege();
8986 checkOperation(OperationCategory.READ);
8987 readLock();
8988 try {
8989 checkSuperuserPrivilege();
8990 checkOperation(OperationCategory.READ);
8991 final BatchedListEntries<EncryptionZone> ret =
8992 dir.listEncryptionZones(prevId);
8993 success = true;
8994 return ret;
8995 } finally {
8996 readUnlock();
8997 logAuditEvent(success, "listEncryptionZones", null);
8998 }
8999 }
9000
9001 /**
9002 * Set xattr for a file or directory.
9003 *
9004 * @param src
9005 * - path on which it sets the xattr
9006 * @param xAttr
9007 * - xAttr details to set
9008 * @param flag
9009 * - xAttrs flags
9010 * @throws AccessControlException
9011 * @throws SafeModeException
9012 * @throws UnresolvedLinkException
9013 * @throws IOException
9014 */
9015 void setXAttr(String src, XAttr xAttr, EnumSet<XAttrSetFlag> flag)
9016 throws AccessControlException, SafeModeException,
9017 UnresolvedLinkException, IOException {
9018 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
9019 if (cacheEntry != null && cacheEntry.isSuccess()) {
9020 return; // Return previous response
9021 }
9022 boolean success = false;
9023 try {
9024 setXAttrInt(src, xAttr, flag, cacheEntry != null);
9025 success = true;
9026 } catch (AccessControlException e) {
9027 logAuditEvent(false, "setXAttr", src);
9028 throw e;
9029 } finally {
9030 RetryCache.setState(cacheEntry, success);
9031 }
9032 }
9033
9034 private void setXAttrInt(final String srcArg, XAttr xAttr,
9035 EnumSet<XAttrSetFlag> flag, boolean logRetryCache) throws IOException {
9036 String src = srcArg;
9037 nnConf.checkXAttrsConfigFlag();
9038 checkXAttrSize(xAttr);
9039 HdfsFileStatus resultingStat = null;
9040 FSPermissionChecker pc = getPermissionChecker();
9041 XAttrPermissionFilter.checkPermissionForApi(pc, xAttr,
9042 FSDirectory.isReservedRawName(src));
9043 checkOperation(OperationCategory.WRITE);
9044 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
9045 writeLock();
9046 try {
9047 checkOperation(OperationCategory.WRITE);
9048 checkNameNodeSafeMode("Cannot set XAttr on " + src);
9049 src = resolvePath(src, pathComponents);
9050 checkXAttrChangeAccess(src, xAttr, pc);
9051 List<XAttr> xAttrs = Lists.newArrayListWithCapacity(1);
9052 xAttrs.add(xAttr);
9053 dir.setXAttrs(src, xAttrs, flag);
9054 getEditLog().logSetXAttrs(src, xAttrs, logRetryCache);
9055 resultingStat = getAuditFileInfo(src, false);
9056 } finally {
9057 writeUnlock();
9058 }
9059 getEditLog().logSync();
9060 logAuditEvent(true, "setXAttr", srcArg, null, resultingStat);
9061 }
9062
9063 /**
9064 * Verifies that the combined size of the name and value of an xattr is within
9065 * the configured limit. Setting a limit of zero disables this check.
9066 */
9067 private void checkXAttrSize(XAttr xAttr) {
9068 if (nnConf.xattrMaxSize == 0) {
9069 return;
9070 }
9071 int size = xAttr.getName().getBytes(Charsets.UTF_8).length;
9072 if (xAttr.getValue() != null) {
9073 size += xAttr.getValue().length;
9074 }
9075 if (size > nnConf.xattrMaxSize) {
9076 throw new HadoopIllegalArgumentException(
9077 "The XAttr is too big. The maximum combined size of the"
9078 + " name and value is " + nnConf.xattrMaxSize
9079 + ", but the total size is " + size);
9080 }
9081 }
9082
9083 List<XAttr> getXAttrs(final String srcArg, List<XAttr> xAttrs)
9084 throws IOException {
9085 String src = srcArg;
9086 nnConf.checkXAttrsConfigFlag();
9087 FSPermissionChecker pc = getPermissionChecker();
9088 final boolean isRawPath = FSDirectory.isReservedRawName(src);
9089 boolean getAll = xAttrs == null || xAttrs.isEmpty();
9090 if (!getAll) {
9091 try {
9092 XAttrPermissionFilter.checkPermissionForApi(pc, xAttrs, isRawPath);
9093 } catch (AccessControlException e) {
9094 logAuditEvent(false, "getXAttrs", srcArg);
9095 throw e;
9096 }
9097 }
9098 checkOperation(OperationCategory.READ);
9099 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
9100 readLock();
9101 try {
9102 src = resolvePath(src, pathComponents);
9103 checkOperation(OperationCategory.READ);
9104 if (isPermissionEnabled) {
9105 checkPathAccess(pc, src, FsAction.READ);
9106 }
9107 List<XAttr> all = dir.getXAttrs(src);
9108 List<XAttr> filteredAll = XAttrPermissionFilter.
9109 filterXAttrsForApi(pc, all, isRawPath);
9110 if (getAll) {
9111 return filteredAll;
9112 } else {
9113 if (filteredAll == null || filteredAll.isEmpty()) {
9114 return null;
9115 }
9116 List<XAttr> toGet = Lists.newArrayListWithCapacity(xAttrs.size());
9117 for (XAttr xAttr : xAttrs) {
9118 boolean foundIt = false;
9119 for (XAttr a : filteredAll) {
9120 if (xAttr.getNameSpace() == a.getNameSpace()
9121 && xAttr.getName().equals(a.getName())) {
9122 toGet.add(a);
9123 foundIt = true;
9124 break;
9125 }
9126 }
9127 if (!foundIt) {
9128 throw new IOException(
9129 "At least one of the attributes provided was not found.");
9130 }
9131 }
9132 return toGet;
9133 }
9134 } catch (AccessControlException e) {
9135 logAuditEvent(false, "getXAttrs", srcArg);
9136 throw e;
9137 } finally {
9138 readUnlock();
9139 }
9140 }
9141
9142 List<XAttr> listXAttrs(String src) throws IOException {
9143 nnConf.checkXAttrsConfigFlag();
9144 final FSPermissionChecker pc = getPermissionChecker();
9145 final boolean isRawPath = FSDirectory.isReservedRawName(src);
9146 checkOperation(OperationCategory.READ);
9147 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
9148 readLock();
9149 try {
9150 src = resolvePath(src, pathComponents);
9151 checkOperation(OperationCategory.READ);
9152 if (isPermissionEnabled) {
9153 /* To access xattr names, you need EXECUTE in the owning directory. */
9154 checkParentAccess(pc, src, FsAction.EXECUTE);
9155 }
9156 final List<XAttr> all = dir.getXAttrs(src);
9157 final List<XAttr> filteredAll = XAttrPermissionFilter.
9158 filterXAttrsForApi(pc, all, isRawPath);
9159 return filteredAll;
9160 } catch (AccessControlException e) {
9161 logAuditEvent(false, "listXAttrs", src);
9162 throw e;
9163 } finally {
9164 readUnlock();
9165 }
9166 }
9167
9168 /**
9169 * Remove an xattr for a file or directory.
9170 *
9171 * @param src
9172 * - path to remove the xattr from
9173 * @param xAttr
9174 * - xAttr to remove
9175 * @throws AccessControlException
9176 * @throws SafeModeException
9177 * @throws UnresolvedLinkException
9178 * @throws IOException
9179 */
9180 void removeXAttr(String src, XAttr xAttr) throws IOException {
9181 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
9182 if (cacheEntry != null && cacheEntry.isSuccess()) {
9183 return; // Return previous response
9184 }
9185 boolean success = false;
9186 try {
9187 removeXAttrInt(src, xAttr, cacheEntry != null);
9188 success = true;
9189 } catch (AccessControlException e) {
9190 logAuditEvent(false, "removeXAttr", src);
9191 throw e;
9192 } finally {
9193 RetryCache.setState(cacheEntry, success);
9194 }
9195 }
9196
9197 void removeXAttrInt(final String srcArg, XAttr xAttr, boolean logRetryCache)
9198 throws IOException {
9199 String src = srcArg;
9200 nnConf.checkXAttrsConfigFlag();
9201 HdfsFileStatus resultingStat = null;
9202 FSPermissionChecker pc = getPermissionChecker();
9203 XAttrPermissionFilter.checkPermissionForApi(pc, xAttr,
9204 FSDirectory.isReservedRawName(src));
9205 checkOperation(OperationCategory.WRITE);
9206 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
9207 writeLock();
9208 try {
9209 checkOperation(OperationCategory.WRITE);
9210 checkNameNodeSafeMode("Cannot remove XAttr entry on " + src);
9211 src = resolvePath(src, pathComponents);
9212 checkXAttrChangeAccess(src, xAttr, pc);
9213
9214 List<XAttr> xAttrs = Lists.newArrayListWithCapacity(1);
9215 xAttrs.add(xAttr);
9216 List<XAttr> removedXAttrs = dir.removeXAttrs(src, xAttrs);
9217 if (removedXAttrs != null && !removedXAttrs.isEmpty()) {
9218 getEditLog().logRemoveXAttrs(src, removedXAttrs, logRetryCache);
9219 } else {
9220 throw new IOException(
9221 "No matching attributes found for remove operation");
9222 }
9223 resultingStat = getAuditFileInfo(src, false);
9224 } finally {
9225 writeUnlock();
9226 }
9227 getEditLog().logSync();
9228 logAuditEvent(true, "removeXAttr", srcArg, null, resultingStat);
9229 }
9230
9231 private void checkXAttrChangeAccess(String src, XAttr xAttr,
9232 FSPermissionChecker pc) throws UnresolvedLinkException,
9233 AccessControlException {
9234 if (isPermissionEnabled && xAttr.getNameSpace() == XAttr.NameSpace.USER) {
9235 final INode inode = dir.getINode(src);
9236 if (inode != null &&
9237 inode.isDirectory() &&
9238 inode.getFsPermission().getStickyBit()) {
9239 if (!pc.isSuperUser()) {
9240 checkOwner(pc, src);
9241 }
9242 } else {
9243 checkPathAccess(pc, src, FsAction.WRITE);
9244 }
9245 }
9246 }
9247
9248 void checkAccess(String src, FsAction mode) throws AccessControlException,
9249 FileNotFoundException, UnresolvedLinkException, IOException {
9250 checkOperation(OperationCategory.READ);
9251 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
9252 readLock();
9253 try {
9254 checkOperation(OperationCategory.READ);
9255 src = FSDirectory.resolvePath(src, pathComponents, dir);
9256 if (dir.getINode(src) == null) {
9257 throw new FileNotFoundException("Path not found");
9258 }
9259 if (isPermissionEnabled) {
9260 FSPermissionChecker pc = getPermissionChecker();
9261 checkPathAccess(pc, src, mode);
9262 }
9263 } catch (AccessControlException e) {
9264 logAuditEvent(false, "checkAccess", src);
9265 throw e;
9266 } finally {
9267 readUnlock();
9268 }
9269 }
9270
9271 /**
9272 * Default AuditLogger implementation; used when no access logger is
9273 * defined in the config file. It can also be explicitly listed in the
9274 * config file.
9275 */
9276 private static class DefaultAuditLogger extends HdfsAuditLogger {
9277
9278 private boolean logTokenTrackingId;
9279
9280 @Override
9281 public void initialize(Configuration conf) {
9282 logTokenTrackingId = conf.getBoolean(
9283 DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY,
9284 DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT);
9285 }
9286
9287 @Override
9288 public void logAuditEvent(boolean succeeded, String userName,
9289 InetAddress addr, String cmd, String src, String dst,
9290 FileStatus status, UserGroupInformation ugi,
9291 DelegationTokenSecretManager dtSecretManager) {
9292 if (auditLog.isInfoEnabled()) {
9293 final StringBuilder sb = auditBuffer.get();
9294 sb.setLength(0);
9295 sb.append("allowed=").append(succeeded).append("\t");
9296 sb.append("ugi=").append(userName).append("\t");
9297 sb.append("ip=").append(addr).append("\t");
9298 sb.append("cmd=").append(cmd).append("\t");
9299 sb.append("src=").append(src).append("\t");
9300 sb.append("dst=").append(dst).append("\t");
9301 if (null == status) {
9302 sb.append("perm=null");
9303 } else {
9304 sb.append("perm=");
9305 sb.append(status.getOwner()).append(":");
9306 sb.append(status.getGroup()).append(":");
9307 sb.append(status.getPermission());
9308 }
9309 if (logTokenTrackingId) {
9310 sb.append("\t").append("trackingId=");
9311 String trackingId = null;
9312 if (ugi != null && dtSecretManager != null
9313 && ugi.getAuthenticationMethod() == AuthenticationMethod.TOKEN) {
9314 for (TokenIdentifier tid: ugi.getTokenIdentifiers()) {
9315 if (tid instanceof DelegationTokenIdentifier) {
9316 DelegationTokenIdentifier dtid =
9317 (DelegationTokenIdentifier)tid;
9318 trackingId = dtSecretManager.getTokenTrackingId(dtid);
9319 break;
9320 }
9321 }
9322 }
9323 sb.append(trackingId);
9324 }
9325 sb.append("\t").append("proto=");
9326 sb.append(NamenodeWebHdfsMethods.isWebHdfsInvocation() ? "webhdfs" : "rpc");
9327 logAuditMessage(sb.toString());
9328 }
9329 }
9330
9331 public void logAuditMessage(String message) {
9332 auditLog.info(message);
9333 }
9334 }
9335
9336 private static void enableAsyncAuditLog() {
9337 if (!(auditLog instanceof Log4JLogger)) {
9338 LOG.warn("Log4j is required to enable async auditlog");
9339 return;
9340 }
9341 Logger logger = ((Log4JLogger)auditLog).getLogger();
9342 @SuppressWarnings("unchecked")
9343 List<Appender> appenders = Collections.list(logger.getAllAppenders());
9344 // failsafe against trying to async it more than once
9345 if (!appenders.isEmpty() && !(appenders.get(0) instanceof AsyncAppender)) {
9346 AsyncAppender asyncAppender = new AsyncAppender();
9347 // change logger to have an async appender containing all the
9348 // previously configured appenders
9349 for (Appender appender : appenders) {
9350 logger.removeAppender(appender);
9351 asyncAppender.addAppender(appender);
9352 }
9353 logger.addAppender(asyncAppender);
9354 }
9355 }
9356 }
9357