001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.hdfs.server.namenode;
019
020 import static org.apache.hadoop.util.Time.now;
021
022 import java.io.File;
023 import java.io.FilterInputStream;
024 import java.io.IOException;
025 import java.io.InputStream;
026 import java.util.ArrayList;
027 import java.util.Arrays;
028 import java.util.EnumMap;
029 import java.util.List;
030
031 import org.apache.commons.logging.Log;
032 import org.apache.commons.logging.LogFactory;
033 import org.apache.hadoop.classification.InterfaceAudience;
034 import org.apache.hadoop.classification.InterfaceStability;
035 import org.apache.hadoop.fs.FileSystem;
036 import org.apache.hadoop.hdfs.protocol.Block;
037 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
038 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
039 import org.apache.hadoop.hdfs.protocol.LayoutVersion;
040 import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
041 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
042 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
043 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
044 import org.apache.hadoop.hdfs.server.common.Storage;
045 import org.apache.hadoop.hdfs.server.namenode.EditLogFileInputStream.LogHeaderCorruptException;
046 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCloseOp;
047 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AllocateBlockIdOp;
048 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AllowSnapshotOp;
049 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.BlockListUpdatingOp;
050 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.CancelDelegationTokenOp;
051 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ClearNSQuotaOp;
052 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ConcatDeleteOp;
053 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.CreateSnapshotOp;
054 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteOp;
055 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteSnapshotOp;
056 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DisallowSnapshotOp;
057 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.GetDelegationTokenOp;
058 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.MkdirOp;
059 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ReassignLeaseOp;
060 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOldOp;
061 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOp;
062 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameSnapshotOp;
063 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenewDelegationTokenOp;
064 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetGenstampV1Op;
065 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetGenstampV2Op;
066 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetNSQuotaOp;
067 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetOwnerOp;
068 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetPermissionsOp;
069 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetQuotaOp;
070 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetReplicationOp;
071 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SymlinkOp;
072 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.TimesOp;
073 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateBlocksOp;
074 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateMasterKeyOp;
075 import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
076 import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
077 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
078 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
079 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
080 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
081 import org.apache.hadoop.hdfs.util.Holder;
082
083 import com.google.common.base.Joiner;
084
085 @InterfaceAudience.Private
086 @InterfaceStability.Evolving
087 public class FSEditLogLoader {
088 static final Log LOG = LogFactory.getLog(FSEditLogLoader.class.getName());
089 static long REPLAY_TRANSACTION_LOG_INTERVAL = 1000; // 1sec
090 private final FSNamesystem fsNamesys;
091 private long lastAppliedTxId;
092
093 public FSEditLogLoader(FSNamesystem fsNamesys, long lastAppliedTxId) {
094 this.fsNamesys = fsNamesys;
095 this.lastAppliedTxId = lastAppliedTxId;
096 }
097
098 /**
099 * Load an edit log, and apply the changes to the in-memory structure
100 * This is where we apply edits that we've been writing to disk all
101 * along.
102 */
103 long loadFSEdits(EditLogInputStream edits, long expectedStartingTxId,
104 MetaRecoveryContext recovery) throws IOException {
105 StartupProgress prog = NameNode.getStartupProgress();
106 Step step = createStartupProgressStep(edits);
107 prog.beginStep(Phase.LOADING_EDITS, step);
108 fsNamesys.writeLock();
109 try {
110 long startTime = now();
111 FSImage.LOG.info("Start loading edits file " + edits.getName());
112 long numEdits = loadEditRecords(edits, false,
113 expectedStartingTxId, recovery);
114 FSImage.LOG.info("Edits file " + edits.getName()
115 + " of size " + edits.length() + " edits # " + numEdits
116 + " loaded in " + (now()-startTime)/1000 + " seconds");
117 return numEdits;
118 } finally {
119 edits.close();
120 fsNamesys.writeUnlock();
121 prog.endStep(Phase.LOADING_EDITS, step);
122 }
123 }
124
125 long loadEditRecords(EditLogInputStream in, boolean closeOnExit,
126 long expectedStartingTxId, MetaRecoveryContext recovery)
127 throws IOException {
128 FSDirectory fsDir = fsNamesys.dir;
129
130 EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts =
131 new EnumMap<FSEditLogOpCodes, Holder<Integer>>(FSEditLogOpCodes.class);
132
133 if (LOG.isTraceEnabled()) {
134 LOG.trace("Acquiring write lock to replay edit log");
135 }
136
137 fsNamesys.writeLock();
138 fsDir.writeLock();
139
140 long recentOpcodeOffsets[] = new long[4];
141 Arrays.fill(recentOpcodeOffsets, -1);
142
143 long expectedTxId = expectedStartingTxId;
144 long numEdits = 0;
145 long lastTxId = in.getLastTxId();
146 long numTxns = (lastTxId - expectedStartingTxId) + 1;
147 StartupProgress prog = NameNode.getStartupProgress();
148 Step step = createStartupProgressStep(in);
149 prog.setTotal(Phase.LOADING_EDITS, step, numTxns);
150 Counter counter = prog.getCounter(Phase.LOADING_EDITS, step);
151 long lastLogTime = now();
152 long lastInodeId = fsNamesys.getLastInodeId();
153
154 try {
155 while (true) {
156 try {
157 FSEditLogOp op;
158 try {
159 op = in.readOp();
160 if (op == null) {
161 break;
162 }
163 } catch (Throwable e) {
164 // Handle a problem with our input
165 check203UpgradeFailure(in.getVersion(), e);
166 String errorMessage =
167 formatEditLogReplayError(in, recentOpcodeOffsets, expectedTxId);
168 FSImage.LOG.error(errorMessage, e);
169 if (recovery == null) {
170 // We will only try to skip over problematic opcodes when in
171 // recovery mode.
172 throw new EditLogInputException(errorMessage, e, numEdits);
173 }
174 MetaRecoveryContext.editLogLoaderPrompt(
175 "We failed to read txId " + expectedTxId,
176 recovery, "skipping the bad section in the log");
177 in.resync();
178 continue;
179 }
180 recentOpcodeOffsets[(int)(numEdits % recentOpcodeOffsets.length)] =
181 in.getPosition();
182 if (op.hasTransactionId()) {
183 if (op.getTransactionId() > expectedTxId) {
184 MetaRecoveryContext.editLogLoaderPrompt("There appears " +
185 "to be a gap in the edit log. We expected txid " +
186 expectedTxId + ", but got txid " +
187 op.getTransactionId() + ".", recovery, "ignoring missing " +
188 " transaction IDs");
189 } else if (op.getTransactionId() < expectedTxId) {
190 MetaRecoveryContext.editLogLoaderPrompt("There appears " +
191 "to be an out-of-order edit in the edit log. We " +
192 "expected txid " + expectedTxId + ", but got txid " +
193 op.getTransactionId() + ".", recovery,
194 "skipping the out-of-order edit");
195 continue;
196 }
197 }
198 try {
199 long inodeId = applyEditLogOp(op, fsDir, in.getVersion(), lastInodeId);
200 if (lastInodeId < inodeId) {
201 lastInodeId = inodeId;
202 }
203 } catch (Throwable e) {
204 LOG.error("Encountered exception on operation " + op, e);
205 MetaRecoveryContext.editLogLoaderPrompt("Failed to " +
206 "apply edit log operation " + op + ": error " +
207 e.getMessage(), recovery, "applying edits");
208 }
209 // Now that the operation has been successfully decoded and
210 // applied, update our bookkeeping.
211 incrOpCount(op.opCode, opCounts, step, counter);
212 if (op.hasTransactionId()) {
213 lastAppliedTxId = op.getTransactionId();
214 expectedTxId = lastAppliedTxId + 1;
215 } else {
216 expectedTxId = lastAppliedTxId = expectedStartingTxId;
217 }
218 // log progress
219 if (op.hasTransactionId()) {
220 long now = now();
221 if (now - lastLogTime > REPLAY_TRANSACTION_LOG_INTERVAL) {
222 long deltaTxId = lastAppliedTxId - expectedStartingTxId + 1;
223 int percent = Math.round((float) deltaTxId / numTxns * 100);
224 LOG.info("replaying edit log: " + deltaTxId + "/" + numTxns
225 + " transactions completed. (" + percent + "%)");
226 lastLogTime = now;
227 }
228 }
229 numEdits++;
230 } catch (MetaRecoveryContext.RequestStopException e) {
231 MetaRecoveryContext.LOG.warn("Stopped reading edit log at " +
232 in.getPosition() + "/" + in.length());
233 break;
234 }
235 }
236 } finally {
237 fsNamesys.resetLastInodeId(lastInodeId);
238 if(closeOnExit) {
239 in.close();
240 }
241 fsDir.writeUnlock();
242 fsNamesys.writeUnlock();
243
244 if (LOG.isTraceEnabled()) {
245 LOG.trace("replaying edit log finished");
246 }
247
248 if (FSImage.LOG.isDebugEnabled()) {
249 dumpOpCounts(opCounts);
250 }
251 }
252 return numEdits;
253 }
254
255 // allocate and update last allocated inode id
256 private long getAndUpdateLastInodeId(long inodeIdFromOp, int logVersion,
257 long lastInodeId) throws IOException {
258 long inodeId = inodeIdFromOp;
259
260 if (inodeId == INodeId.GRANDFATHER_INODE_ID) {
261 if (LayoutVersion.supports(Feature.ADD_INODE_ID, logVersion)) {
262 throw new IOException("The layout version " + logVersion
263 + " supports inodeId but gave bogus inodeId");
264 }
265 inodeId = fsNamesys.allocateNewInodeId();
266 } else {
267 // need to reset lastInodeId. fsnamesys gets lastInodeId firstly from
268 // fsimage but editlog captures more recent inodeId allocations
269 if (inodeId > lastInodeId) {
270 fsNamesys.resetLastInodeId(inodeId);
271 }
272 }
273 return inodeId;
274 }
275
276 @SuppressWarnings("deprecation")
277 private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir,
278 int logVersion, long lastInodeId) throws IOException {
279 long inodeId = INodeId.GRANDFATHER_INODE_ID;
280 if (LOG.isTraceEnabled()) {
281 LOG.trace("replaying edit log: " + op);
282 }
283 final boolean toAddRetryCache = fsNamesys.hasRetryCache() && op.hasRpcIds();
284
285 switch (op.opCode) {
286 case OP_ADD: {
287 AddCloseOp addCloseOp = (AddCloseOp)op;
288 if (FSNamesystem.LOG.isDebugEnabled()) {
289 FSNamesystem.LOG.debug(op.opCode + ": " + addCloseOp.path +
290 " numblocks : " + addCloseOp.blocks.length +
291 " clientHolder " + addCloseOp.clientName +
292 " clientMachine " + addCloseOp.clientMachine);
293 }
294 // There three cases here:
295 // 1. OP_ADD to create a new file
296 // 2. OP_ADD to update file blocks
297 // 3. OP_ADD to open file for append
298
299 // See if the file already exists (persistBlocks call)
300 final INodesInPath iip = fsDir.getLastINodeInPath(addCloseOp.path);
301 final INodeFile oldFile = INodeFile.valueOf(
302 iip.getINode(0), addCloseOp.path, true);
303 INodeFile newFile = oldFile;
304 if (oldFile == null) { // this is OP_ADD on a new file (case 1)
305 // versions > 0 support per file replication
306 // get name and replication
307 final short replication = fsNamesys.getBlockManager()
308 .adjustReplication(addCloseOp.replication);
309 assert addCloseOp.blocks.length == 0;
310
311 // add to the file tree
312 inodeId = getAndUpdateLastInodeId(addCloseOp.inodeId, logVersion,
313 lastInodeId);
314 newFile = fsDir.unprotectedAddFile(inodeId,
315 addCloseOp.path, addCloseOp.permissions, replication,
316 addCloseOp.mtime, addCloseOp.atime, addCloseOp.blockSize, true,
317 addCloseOp.clientName, addCloseOp.clientMachine);
318 fsNamesys.leaseManager.addLease(addCloseOp.clientName, addCloseOp.path);
319
320 // add the op into retry cache if necessary
321 if (toAddRetryCache) {
322 HdfsFileStatus stat = fsNamesys.dir.createFileStatus(
323 HdfsFileStatus.EMPTY_NAME, newFile, null);
324 fsNamesys.addCacheEntryWithPayload(addCloseOp.rpcClientId,
325 addCloseOp.rpcCallId, stat);
326 }
327 } else { // This is OP_ADD on an existing file
328 if (!oldFile.isUnderConstruction()) {
329 // This is case 3: a call to append() on an already-closed file.
330 if (FSNamesystem.LOG.isDebugEnabled()) {
331 FSNamesystem.LOG.debug("Reopening an already-closed file " +
332 "for append");
333 }
334 LocatedBlock lb = fsNamesys.prepareFileForWrite(addCloseOp.path,
335 oldFile, addCloseOp.clientName, addCloseOp.clientMachine, null,
336 false, iip.getLatestSnapshot(), false);
337 newFile = INodeFile.valueOf(fsDir.getINode(addCloseOp.path),
338 addCloseOp.path, true);
339
340 // add the op into retry cache is necessary
341 if (toAddRetryCache) {
342 fsNamesys.addCacheEntryWithPayload(addCloseOp.rpcClientId,
343 addCloseOp.rpcCallId, lb);
344 }
345 }
346 }
347 // Fall-through for case 2.
348 // Regardless of whether it's a new file or an updated file,
349 // update the block list.
350
351 // Update the salient file attributes.
352 newFile.setAccessTime(addCloseOp.atime, null, fsDir.getINodeMap());
353 newFile.setModificationTime(addCloseOp.mtime, null, fsDir.getINodeMap());
354 updateBlocks(fsDir, addCloseOp, newFile);
355 break;
356 }
357 case OP_CLOSE: {
358 AddCloseOp addCloseOp = (AddCloseOp)op;
359
360 if (FSNamesystem.LOG.isDebugEnabled()) {
361 FSNamesystem.LOG.debug(op.opCode + ": " + addCloseOp.path +
362 " numblocks : " + addCloseOp.blocks.length +
363 " clientHolder " + addCloseOp.clientName +
364 " clientMachine " + addCloseOp.clientMachine);
365 }
366
367 final INodesInPath iip = fsDir.getLastINodeInPath(addCloseOp.path);
368 final INodeFile oldFile = INodeFile.valueOf(iip.getINode(0), addCloseOp.path);
369
370 // Update the salient file attributes.
371 oldFile.setAccessTime(addCloseOp.atime, null, fsDir.getINodeMap());
372 oldFile.setModificationTime(addCloseOp.mtime, null, fsDir.getINodeMap());
373 updateBlocks(fsDir, addCloseOp, oldFile);
374
375 // Now close the file
376 if (!oldFile.isUnderConstruction() &&
377 logVersion <= LayoutVersion.BUGFIX_HDFS_2991_VERSION) {
378 // There was a bug (HDFS-2991) in hadoop < 0.23.1 where OP_CLOSE
379 // could show up twice in a row. But after that version, this
380 // should be fixed, so we should treat it as an error.
381 throw new IOException(
382 "File is not under construction: " + addCloseOp.path);
383 }
384 // One might expect that you could use removeLease(holder, path) here,
385 // but OP_CLOSE doesn't serialize the holder. So, remove by path.
386 if (oldFile.isUnderConstruction()) {
387 INodeFileUnderConstruction ucFile = (INodeFileUnderConstruction) oldFile;
388 fsNamesys.leaseManager.removeLeaseWithPrefixPath(addCloseOp.path);
389 INodeFile newFile = ucFile.toINodeFile(ucFile.getModificationTime());
390 fsDir.unprotectedReplaceINodeFile(addCloseOp.path, ucFile, newFile);
391 }
392 break;
393 }
394 case OP_UPDATE_BLOCKS: {
395 UpdateBlocksOp updateOp = (UpdateBlocksOp)op;
396 if (FSNamesystem.LOG.isDebugEnabled()) {
397 FSNamesystem.LOG.debug(op.opCode + ": " + updateOp.path +
398 " numblocks : " + updateOp.blocks.length);
399 }
400 INodeFile oldFile = INodeFile.valueOf(fsDir.getINode(updateOp.path),
401 updateOp.path);
402 // Update in-memory data structures
403 updateBlocks(fsDir, updateOp, oldFile);
404
405 if (toAddRetryCache) {
406 fsNamesys.addCacheEntry(updateOp.rpcClientId, updateOp.rpcCallId);
407 }
408 break;
409 }
410
411 case OP_SET_REPLICATION: {
412 SetReplicationOp setReplicationOp = (SetReplicationOp)op;
413 short replication = fsNamesys.getBlockManager().adjustReplication(
414 setReplicationOp.replication);
415 fsDir.unprotectedSetReplication(setReplicationOp.path,
416 replication, null);
417 break;
418 }
419 case OP_CONCAT_DELETE: {
420 ConcatDeleteOp concatDeleteOp = (ConcatDeleteOp)op;
421 fsDir.unprotectedConcat(concatDeleteOp.trg, concatDeleteOp.srcs,
422 concatDeleteOp.timestamp);
423
424 if (toAddRetryCache) {
425 fsNamesys.addCacheEntry(concatDeleteOp.rpcClientId,
426 concatDeleteOp.rpcCallId);
427 }
428 break;
429 }
430 case OP_RENAME_OLD: {
431 RenameOldOp renameOp = (RenameOldOp)op;
432 fsDir.unprotectedRenameTo(renameOp.src, renameOp.dst,
433 renameOp.timestamp);
434
435 if (toAddRetryCache) {
436 fsNamesys.addCacheEntry(renameOp.rpcClientId, renameOp.rpcCallId);
437 }
438 break;
439 }
440 case OP_DELETE: {
441 DeleteOp deleteOp = (DeleteOp)op;
442 fsDir.unprotectedDelete(deleteOp.path, deleteOp.timestamp);
443
444 if (toAddRetryCache) {
445 fsNamesys.addCacheEntry(deleteOp.rpcClientId, deleteOp.rpcCallId);
446 }
447 break;
448 }
449 case OP_MKDIR: {
450 MkdirOp mkdirOp = (MkdirOp)op;
451 inodeId = getAndUpdateLastInodeId(mkdirOp.inodeId, logVersion,
452 lastInodeId);
453 fsDir.unprotectedMkdir(inodeId, mkdirOp.path, mkdirOp.permissions,
454 mkdirOp.timestamp);
455 break;
456 }
457 case OP_SET_GENSTAMP_V1: {
458 SetGenstampV1Op setGenstampV1Op = (SetGenstampV1Op)op;
459 fsNamesys.setGenerationStampV1(setGenstampV1Op.genStampV1);
460 break;
461 }
462 case OP_SET_PERMISSIONS: {
463 SetPermissionsOp setPermissionsOp = (SetPermissionsOp)op;
464 fsDir.unprotectedSetPermission(setPermissionsOp.src,
465 setPermissionsOp.permissions);
466 break;
467 }
468 case OP_SET_OWNER: {
469 SetOwnerOp setOwnerOp = (SetOwnerOp)op;
470 fsDir.unprotectedSetOwner(setOwnerOp.src, setOwnerOp.username,
471 setOwnerOp.groupname);
472 break;
473 }
474 case OP_SET_NS_QUOTA: {
475 SetNSQuotaOp setNSQuotaOp = (SetNSQuotaOp)op;
476 fsDir.unprotectedSetQuota(setNSQuotaOp.src,
477 setNSQuotaOp.nsQuota,
478 HdfsConstants.QUOTA_DONT_SET);
479 break;
480 }
481 case OP_CLEAR_NS_QUOTA: {
482 ClearNSQuotaOp clearNSQuotaOp = (ClearNSQuotaOp)op;
483 fsDir.unprotectedSetQuota(clearNSQuotaOp.src,
484 HdfsConstants.QUOTA_RESET,
485 HdfsConstants.QUOTA_DONT_SET);
486 break;
487 }
488
489 case OP_SET_QUOTA:
490 SetQuotaOp setQuotaOp = (SetQuotaOp)op;
491 fsDir.unprotectedSetQuota(setQuotaOp.src,
492 setQuotaOp.nsQuota,
493 setQuotaOp.dsQuota);
494 break;
495
496 case OP_TIMES: {
497 TimesOp timesOp = (TimesOp)op;
498
499 fsDir.unprotectedSetTimes(timesOp.path,
500 timesOp.mtime,
501 timesOp.atime, true);
502 break;
503 }
504 case OP_SYMLINK: {
505 if (!FileSystem.isSymlinksEnabled()) {
506 throw new IOException("Symlinks not supported - please remove symlink before upgrading to this version of HDFS");
507 }
508 SymlinkOp symlinkOp = (SymlinkOp)op;
509 inodeId = getAndUpdateLastInodeId(symlinkOp.inodeId, logVersion,
510 lastInodeId);
511 fsDir.unprotectedAddSymlink(inodeId, symlinkOp.path,
512 symlinkOp.value, symlinkOp.mtime,
513 symlinkOp.atime, symlinkOp.permissionStatus);
514
515 if (toAddRetryCache) {
516 fsNamesys.addCacheEntry(symlinkOp.rpcClientId, symlinkOp.rpcCallId);
517 }
518 break;
519 }
520 case OP_RENAME: {
521 RenameOp renameOp = (RenameOp)op;
522 fsDir.unprotectedRenameTo(renameOp.src, renameOp.dst,
523 renameOp.timestamp, renameOp.options);
524
525 if (toAddRetryCache) {
526 fsNamesys.addCacheEntry(renameOp.rpcClientId, renameOp.rpcCallId);
527 }
528 break;
529 }
530 case OP_GET_DELEGATION_TOKEN: {
531 GetDelegationTokenOp getDelegationTokenOp
532 = (GetDelegationTokenOp)op;
533
534 fsNamesys.getDelegationTokenSecretManager()
535 .addPersistedDelegationToken(getDelegationTokenOp.token,
536 getDelegationTokenOp.expiryTime);
537 break;
538 }
539 case OP_RENEW_DELEGATION_TOKEN: {
540 RenewDelegationTokenOp renewDelegationTokenOp
541 = (RenewDelegationTokenOp)op;
542 fsNamesys.getDelegationTokenSecretManager()
543 .updatePersistedTokenRenewal(renewDelegationTokenOp.token,
544 renewDelegationTokenOp.expiryTime);
545 break;
546 }
547 case OP_CANCEL_DELEGATION_TOKEN: {
548 CancelDelegationTokenOp cancelDelegationTokenOp
549 = (CancelDelegationTokenOp)op;
550 fsNamesys.getDelegationTokenSecretManager()
551 .updatePersistedTokenCancellation(
552 cancelDelegationTokenOp.token);
553 break;
554 }
555 case OP_UPDATE_MASTER_KEY: {
556 UpdateMasterKeyOp updateMasterKeyOp = (UpdateMasterKeyOp)op;
557 fsNamesys.getDelegationTokenSecretManager()
558 .updatePersistedMasterKey(updateMasterKeyOp.key);
559 break;
560 }
561 case OP_REASSIGN_LEASE: {
562 ReassignLeaseOp reassignLeaseOp = (ReassignLeaseOp)op;
563
564 Lease lease = fsNamesys.leaseManager.getLease(
565 reassignLeaseOp.leaseHolder);
566 INodeFileUnderConstruction pendingFile =
567 INodeFileUnderConstruction.valueOf(
568 fsDir.getINode(reassignLeaseOp.path), reassignLeaseOp.path);
569 fsNamesys.reassignLeaseInternal(lease,
570 reassignLeaseOp.path, reassignLeaseOp.newHolder, pendingFile);
571 break;
572 }
573 case OP_START_LOG_SEGMENT:
574 case OP_END_LOG_SEGMENT: {
575 // no data in here currently.
576 break;
577 }
578 case OP_CREATE_SNAPSHOT: {
579 CreateSnapshotOp createSnapshotOp = (CreateSnapshotOp) op;
580 String path = fsNamesys.getSnapshotManager().createSnapshot(
581 createSnapshotOp.snapshotRoot, createSnapshotOp.snapshotName);
582 if (toAddRetryCache) {
583 fsNamesys.addCacheEntryWithPayload(createSnapshotOp.rpcClientId,
584 createSnapshotOp.rpcCallId, path);
585 }
586 break;
587 }
588 case OP_DELETE_SNAPSHOT: {
589 DeleteSnapshotOp deleteSnapshotOp = (DeleteSnapshotOp) op;
590 BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
591 List<INode> removedINodes = new ArrayList<INode>();
592 fsNamesys.getSnapshotManager().deleteSnapshot(
593 deleteSnapshotOp.snapshotRoot, deleteSnapshotOp.snapshotName,
594 collectedBlocks, removedINodes);
595 fsNamesys.removeBlocks(collectedBlocks);
596 collectedBlocks.clear();
597 fsNamesys.dir.removeFromInodeMap(removedINodes);
598 removedINodes.clear();
599
600 if (toAddRetryCache) {
601 fsNamesys.addCacheEntry(deleteSnapshotOp.rpcClientId,
602 deleteSnapshotOp.rpcCallId);
603 }
604 break;
605 }
606 case OP_RENAME_SNAPSHOT: {
607 RenameSnapshotOp renameSnapshotOp = (RenameSnapshotOp) op;
608 fsNamesys.getSnapshotManager().renameSnapshot(
609 renameSnapshotOp.snapshotRoot, renameSnapshotOp.snapshotOldName,
610 renameSnapshotOp.snapshotNewName);
611
612 if (toAddRetryCache) {
613 fsNamesys.addCacheEntry(renameSnapshotOp.rpcClientId,
614 renameSnapshotOp.rpcCallId);
615 }
616 break;
617 }
618 case OP_ALLOW_SNAPSHOT: {
619 AllowSnapshotOp allowSnapshotOp = (AllowSnapshotOp) op;
620 fsNamesys.getSnapshotManager().setSnapshottable(
621 allowSnapshotOp.snapshotRoot, false);
622 break;
623 }
624 case OP_DISALLOW_SNAPSHOT: {
625 DisallowSnapshotOp disallowSnapshotOp = (DisallowSnapshotOp) op;
626 fsNamesys.getSnapshotManager().resetSnapshottable(
627 disallowSnapshotOp.snapshotRoot);
628 break;
629 }
630 case OP_SET_GENSTAMP_V2: {
631 SetGenstampV2Op setGenstampV2Op = (SetGenstampV2Op) op;
632 fsNamesys.setGenerationStampV2(setGenstampV2Op.genStampV2);
633 break;
634 }
635 case OP_ALLOCATE_BLOCK_ID: {
636 AllocateBlockIdOp allocateBlockIdOp = (AllocateBlockIdOp) op;
637 fsNamesys.setLastAllocatedBlockId(allocateBlockIdOp.blockId);
638 break;
639 }
640 default:
641 throw new IOException("Invalid operation read " + op.opCode);
642 }
643 return inodeId;
644 }
645
646 private static String formatEditLogReplayError(EditLogInputStream in,
647 long recentOpcodeOffsets[], long txid) {
648 StringBuilder sb = new StringBuilder();
649 sb.append("Error replaying edit log at offset " + in.getPosition());
650 sb.append(". Expected transaction ID was ").append(txid);
651 if (recentOpcodeOffsets[0] != -1) {
652 Arrays.sort(recentOpcodeOffsets);
653 sb.append("\nRecent opcode offsets:");
654 for (long offset : recentOpcodeOffsets) {
655 if (offset != -1) {
656 sb.append(' ').append(offset);
657 }
658 }
659 }
660 return sb.toString();
661 }
662
663 /**
664 * Update in-memory data structures with new block information.
665 * @throws IOException
666 */
667 private void updateBlocks(FSDirectory fsDir, BlockListUpdatingOp op,
668 INodeFile file) throws IOException {
669 // Update its block list
670 BlockInfo[] oldBlocks = file.getBlocks();
671 Block[] newBlocks = op.getBlocks();
672 String path = op.getPath();
673
674 // Are we only updating the last block's gen stamp.
675 boolean isGenStampUpdate = oldBlocks.length == newBlocks.length;
676
677 // First, update blocks in common
678 for (int i = 0; i < oldBlocks.length && i < newBlocks.length; i++) {
679 BlockInfo oldBlock = oldBlocks[i];
680 Block newBlock = newBlocks[i];
681
682 boolean isLastBlock = i == newBlocks.length - 1;
683 if (oldBlock.getBlockId() != newBlock.getBlockId() ||
684 (oldBlock.getGenerationStamp() != newBlock.getGenerationStamp() &&
685 !(isGenStampUpdate && isLastBlock))) {
686 throw new IOException("Mismatched block IDs or generation stamps, " +
687 "attempting to replace block " + oldBlock + " with " + newBlock +
688 " as block # " + i + "/" + newBlocks.length + " of " +
689 path);
690 }
691
692 oldBlock.setNumBytes(newBlock.getNumBytes());
693 boolean changeMade =
694 oldBlock.getGenerationStamp() != newBlock.getGenerationStamp();
695 oldBlock.setGenerationStamp(newBlock.getGenerationStamp());
696
697 if (oldBlock instanceof BlockInfoUnderConstruction &&
698 (!isLastBlock || op.shouldCompleteLastBlock())) {
699 changeMade = true;
700 fsNamesys.getBlockManager().forceCompleteBlock(
701 (INodeFileUnderConstruction)file,
702 (BlockInfoUnderConstruction)oldBlock);
703 }
704 if (changeMade) {
705 // The state or gen-stamp of the block has changed. So, we may be
706 // able to process some messages from datanodes that we previously
707 // were unable to process.
708 fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock);
709 }
710 }
711
712 if (newBlocks.length < oldBlocks.length) {
713 // We're removing a block from the file, e.g. abandonBlock(...)
714 if (!file.isUnderConstruction()) {
715 throw new IOException("Trying to remove a block from file " +
716 path + " which is not under construction.");
717 }
718 if (newBlocks.length != oldBlocks.length - 1) {
719 throw new IOException("Trying to remove more than one block from file "
720 + path);
721 }
722 Block oldBlock = oldBlocks[oldBlocks.length - 1];
723 boolean removed = fsDir.unprotectedRemoveBlock(path,
724 (INodeFileUnderConstruction) file, oldBlock);
725 if (!removed && !(op instanceof UpdateBlocksOp)) {
726 throw new IOException("Trying to delete non-existant block " + oldBlock);
727 }
728 } else if (newBlocks.length > oldBlocks.length) {
729 // We're adding blocks
730 for (int i = oldBlocks.length; i < newBlocks.length; i++) {
731 Block newBlock = newBlocks[i];
732 BlockInfo newBI;
733 if (!op.shouldCompleteLastBlock()) {
734 // TODO: shouldn't this only be true for the last block?
735 // what about an old-version fsync() where fsync isn't called
736 // until several blocks in?
737 newBI = new BlockInfoUnderConstruction(
738 newBlock, file.getBlockReplication());
739 } else {
740 // OP_CLOSE should add finalized blocks. This code path
741 // is only executed when loading edits written by prior
742 // versions of Hadoop. Current versions always log
743 // OP_ADD operations as each block is allocated.
744 newBI = new BlockInfo(newBlock, file.getBlockReplication());
745 }
746 fsNamesys.getBlockManager().addBlockCollection(newBI, file);
747 file.addBlock(newBI);
748 fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock);
749 }
750 }
751 }
752
753 private static void dumpOpCounts(
754 EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts) {
755 StringBuilder sb = new StringBuilder();
756 sb.append("Summary of operations loaded from edit log:\n ");
757 Joiner.on("\n ").withKeyValueSeparator("=").appendTo(sb, opCounts);
758 FSImage.LOG.debug(sb.toString());
759 }
760
761 private void incrOpCount(FSEditLogOpCodes opCode,
762 EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts, Step step,
763 Counter counter) {
764 Holder<Integer> holder = opCounts.get(opCode);
765 if (holder == null) {
766 holder = new Holder<Integer>(1);
767 opCounts.put(opCode, holder);
768 } else {
769 holder.held++;
770 }
771 counter.increment();
772 }
773
774 /**
775 * Throw appropriate exception during upgrade from 203, when editlog loading
776 * could fail due to opcode conflicts.
777 */
778 private void check203UpgradeFailure(int logVersion, Throwable e)
779 throws IOException {
780 // 0.20.203 version version has conflicting opcodes with the later releases.
781 // The editlog must be emptied by restarting the namenode, before proceeding
782 // with the upgrade.
783 if (Storage.is203LayoutVersion(logVersion)
784 && logVersion != HdfsConstants.LAYOUT_VERSION) {
785 String msg = "During upgrade failed to load the editlog version "
786 + logVersion + " from release 0.20.203. Please go back to the old "
787 + " release and restart the namenode. This empties the editlog "
788 + " and saves the namespace. Resume the upgrade after this step.";
789 throw new IOException(msg, e);
790 }
791 }
792
793 /**
794 * Find the last valid transaction ID in the stream.
795 * If there are invalid or corrupt transactions in the middle of the stream,
796 * validateEditLog will skip over them.
797 * This reads through the stream but does not close it.
798 *
799 * @throws IOException if the stream cannot be read due to an IO error (eg
800 * if the log does not exist)
801 */
802 static EditLogValidation validateEditLog(EditLogInputStream in) {
803 long lastPos = 0;
804 long lastTxId = HdfsConstants.INVALID_TXID;
805 long numValid = 0;
806 FSEditLogOp op = null;
807 while (true) {
808 lastPos = in.getPosition();
809 try {
810 if ((op = in.readOp()) == null) {
811 break;
812 }
813 } catch (Throwable t) {
814 FSImage.LOG.warn("Caught exception after reading " + numValid +
815 " ops from " + in + " while determining its valid length." +
816 "Position was " + lastPos, t);
817 in.resync();
818 FSImage.LOG.warn("After resync, position is " + in.getPosition());
819 continue;
820 }
821 if (lastTxId == HdfsConstants.INVALID_TXID
822 || op.getTransactionId() > lastTxId) {
823 lastTxId = op.getTransactionId();
824 }
825 numValid++;
826 }
827 return new EditLogValidation(lastPos, lastTxId, false);
828 }
829
830 static class EditLogValidation {
831 private final long validLength;
832 private final long endTxId;
833 private final boolean hasCorruptHeader;
834
835 EditLogValidation(long validLength, long endTxId,
836 boolean hasCorruptHeader) {
837 this.validLength = validLength;
838 this.endTxId = endTxId;
839 this.hasCorruptHeader = hasCorruptHeader;
840 }
841
842 long getValidLength() { return validLength; }
843
844 long getEndTxId() { return endTxId; }
845
846 boolean hasCorruptHeader() { return hasCorruptHeader; }
847 }
848
849 /**
850 * Stream wrapper that keeps track of the current stream position.
851 *
852 * This stream also allows us to set a limit on how many bytes we can read
853 * without getting an exception.
854 */
855 public static class PositionTrackingInputStream extends FilterInputStream
856 implements StreamLimiter {
857 private long curPos = 0;
858 private long markPos = -1;
859 private long limitPos = Long.MAX_VALUE;
860
861 public PositionTrackingInputStream(InputStream is) {
862 super(is);
863 }
864
865 private void checkLimit(long amt) throws IOException {
866 long extra = (curPos + amt) - limitPos;
867 if (extra > 0) {
868 throw new IOException("Tried to read " + amt + " byte(s) past " +
869 "the limit at offset " + limitPos);
870 }
871 }
872
873 @Override
874 public int read() throws IOException {
875 checkLimit(1);
876 int ret = super.read();
877 if (ret != -1) curPos++;
878 return ret;
879 }
880
881 @Override
882 public int read(byte[] data) throws IOException {
883 checkLimit(data.length);
884 int ret = super.read(data);
885 if (ret > 0) curPos += ret;
886 return ret;
887 }
888
889 @Override
890 public int read(byte[] data, int offset, int length) throws IOException {
891 checkLimit(length);
892 int ret = super.read(data, offset, length);
893 if (ret > 0) curPos += ret;
894 return ret;
895 }
896
897 @Override
898 public void setLimit(long limit) {
899 limitPos = curPos + limit;
900 }
901
902 @Override
903 public void clearLimit() {
904 limitPos = Long.MAX_VALUE;
905 }
906
907 @Override
908 public void mark(int limit) {
909 super.mark(limit);
910 markPos = curPos;
911 }
912
913 @Override
914 public void reset() throws IOException {
915 if (markPos == -1) {
916 throw new IOException("Not marked!");
917 }
918 super.reset();
919 curPos = markPos;
920 markPos = -1;
921 }
922
923 public long getPos() {
924 return curPos;
925 }
926
927 @Override
928 public long skip(long amt) throws IOException {
929 long extra = (curPos + amt) - limitPos;
930 if (extra > 0) {
931 throw new IOException("Tried to skip " + extra + " bytes past " +
932 "the limit at offset " + limitPos);
933 }
934 long ret = super.skip(amt);
935 curPos += ret;
936 return ret;
937 }
938 }
939
940 public long getLastAppliedTxId() {
941 return lastAppliedTxId;
942 }
943
944 /**
945 * Creates a Step used for updating startup progress, populated with
946 * information from the given edits. The step always includes the log's name.
947 * If the log has a known length, then the length is included in the step too.
948 *
949 * @param edits EditLogInputStream to use for populating step
950 * @return Step populated with information from edits
951 * @throws IOException thrown if there is an I/O error
952 */
953 private static Step createStartupProgressStep(EditLogInputStream edits)
954 throws IOException {
955 long length = edits.length();
956 String name = edits.getCurrentStreamName();
957 return length != -1 ? new Step(name, length) : new Step(name);
958 }
959 }