001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.hdfs.server.namenode;
019
020 import static org.apache.hadoop.util.Time.now;
021
022 import java.io.DataInput;
023 import java.io.DataInputStream;
024 import java.io.File;
025 import java.io.FileInputStream;
026 import java.io.FileNotFoundException;
027 import java.io.IOException;
028 import java.security.DigestInputStream;
029 import java.security.MessageDigest;
030 import java.util.Arrays;
031 import java.util.Collection;
032 import java.util.Map;
033 import java.util.TreeMap;
034
035 import org.apache.commons.logging.Log;
036 import org.apache.hadoop.classification.InterfaceAudience;
037 import org.apache.hadoop.classification.InterfaceStability;
038 import org.apache.hadoop.conf.Configuration;
039 import org.apache.hadoop.fs.FileSystem;
040 import org.apache.hadoop.fs.Path;
041 import org.apache.hadoop.fs.PathIsNotDirectoryException;
042 import org.apache.hadoop.fs.UnresolvedLinkException;
043 import org.apache.hadoop.fs.permission.PermissionStatus;
044 import org.apache.hadoop.hdfs.DFSUtil;
045 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
046 import org.apache.hadoop.hdfs.protocol.LayoutFlags;
047 import org.apache.hadoop.hdfs.protocol.LayoutVersion;
048 import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
049 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
050 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
051 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
052 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
053 import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
054 import org.apache.hadoop.hdfs.server.namenode.snapshot.FileDiffList;
055 import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable;
056 import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
057 import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat;
058 import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat.ReferenceMap;
059 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
060 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
061 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
062 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
063 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType;
064 import org.apache.hadoop.io.IOUtils;
065 import org.apache.hadoop.io.MD5Hash;
066 import org.apache.hadoop.io.Text;
067 import org.apache.hadoop.util.StringUtils;
068
069 import com.google.common.base.Preconditions;
070 import com.google.common.annotations.VisibleForTesting;
071
072 /**
073 * This class loads and stores the FSImage of the NameNode. The file
074 * src/main/proto/fsimage.proto describes the on-disk layout of the FSImage.
075 */
076 @InterfaceAudience.Private
077 @InterfaceStability.Evolving
078 public class FSImageFormat {
079 private static final Log LOG = FSImage.LOG;
080
081 // Static-only class
082 private FSImageFormat() {}
083
084 interface AbstractLoader {
085 MD5Hash getLoadedImageMd5();
086 long getLoadedImageTxId();
087 }
088
089 static class LoaderDelegator implements AbstractLoader {
090 private AbstractLoader impl;
091 private final Configuration conf;
092 private final FSNamesystem fsn;
093
094 LoaderDelegator(Configuration conf, FSNamesystem fsn) {
095 this.conf = conf;
096 this.fsn = fsn;
097 }
098
099 @Override
100 public MD5Hash getLoadedImageMd5() {
101 return impl.getLoadedImageMd5();
102 }
103
104 @Override
105 public long getLoadedImageTxId() {
106 return impl.getLoadedImageTxId();
107 }
108
109 public void load(File file) throws IOException {
110 Preconditions.checkState(impl == null, "Image already loaded!");
111
112 FileInputStream is = null;
113 try {
114 is = new FileInputStream(file);
115 byte[] magic = new byte[FSImageUtil.MAGIC_HEADER.length];
116 IOUtils.readFully(is, magic, 0, magic.length);
117 if (Arrays.equals(magic, FSImageUtil.MAGIC_HEADER)) {
118 FSImageFormatProtobuf.Loader loader = new FSImageFormatProtobuf.Loader(
119 conf, fsn);
120 impl = loader;
121 loader.load(file);
122 } else {
123 Loader loader = new Loader(conf, fsn);
124 impl = loader;
125 loader.load(file);
126 }
127
128 } finally {
129 IOUtils.cleanup(LOG, is);
130 }
131 }
132 }
133
134 /**
135 * Construct a loader class to load the image. It chooses the loader based on
136 * the layout version.
137 */
138 public static LoaderDelegator newLoader(Configuration conf, FSNamesystem fsn) {
139 return new LoaderDelegator(conf, fsn);
140 }
141
142 /**
143 * A one-shot class responsible for loading an image. The load() function
144 * should be called once, after which the getter methods may be used to retrieve
145 * information about the image that was loaded, if loading was successful.
146 */
147 public static class Loader implements AbstractLoader {
148 private final Configuration conf;
149 /** which namesystem this loader is working for */
150 private final FSNamesystem namesystem;
151
152 /** Set to true once a file has been loaded using this loader. */
153 private boolean loaded = false;
154
155 /** The transaction ID of the last edit represented by the loaded file */
156 private long imgTxId;
157 /** The MD5 sum of the loaded file */
158 private MD5Hash imgDigest;
159
160 private Map<Integer, Snapshot> snapshotMap = null;
161 private final ReferenceMap referenceMap = new ReferenceMap();
162
163 Loader(Configuration conf, FSNamesystem namesystem) {
164 this.conf = conf;
165 this.namesystem = namesystem;
166 }
167
168 /**
169 * Return the MD5 checksum of the image that has been loaded.
170 * @throws IllegalStateException if load() has not yet been called.
171 */
172 @Override
173 public MD5Hash getLoadedImageMd5() {
174 checkLoaded();
175 return imgDigest;
176 }
177
178 @Override
179 public long getLoadedImageTxId() {
180 checkLoaded();
181 return imgTxId;
182 }
183
184 /**
185 * Throw IllegalStateException if load() has not yet been called.
186 */
187 private void checkLoaded() {
188 if (!loaded) {
189 throw new IllegalStateException("Image not yet loaded!");
190 }
191 }
192
193 /**
194 * Throw IllegalStateException if load() has already been called.
195 */
196 private void checkNotLoaded() {
197 if (loaded) {
198 throw new IllegalStateException("Image already loaded!");
199 }
200 }
201
202 public void load(File curFile) throws IOException {
203 checkNotLoaded();
204 assert curFile != null : "curFile is null";
205
206 StartupProgress prog = NameNode.getStartupProgress();
207 Step step = new Step(StepType.INODES);
208 prog.beginStep(Phase.LOADING_FSIMAGE, step);
209 long startTime = now();
210
211 //
212 // Load in bits
213 //
214 MessageDigest digester = MD5Hash.getDigester();
215 DigestInputStream fin = new DigestInputStream(
216 new FileInputStream(curFile), digester);
217
218 DataInputStream in = new DataInputStream(fin);
219 try {
220 // read image version: first appeared in version -1
221 int imgVersion = in.readInt();
222 if (getLayoutVersion() != imgVersion) {
223 throw new InconsistentFSStateException(curFile,
224 "imgVersion " + imgVersion +
225 " expected to be " + getLayoutVersion());
226 }
227 boolean supportSnapshot = NameNodeLayoutVersion.supports(
228 LayoutVersion.Feature.SNAPSHOT, imgVersion);
229 if (NameNodeLayoutVersion.supports(
230 LayoutVersion.Feature.ADD_LAYOUT_FLAGS, imgVersion)) {
231 LayoutFlags.read(in);
232 }
233
234 // read namespaceID: first appeared in version -2
235 in.readInt();
236
237 long numFiles = in.readLong();
238
239 // read in the last generation stamp for legacy blocks.
240 long genstamp = in.readLong();
241 namesystem.setGenerationStampV1(genstamp);
242
243 if (NameNodeLayoutVersion.supports(
244 LayoutVersion.Feature.SEQUENTIAL_BLOCK_ID, imgVersion)) {
245 // read the starting generation stamp for sequential block IDs
246 genstamp = in.readLong();
247 namesystem.setGenerationStampV2(genstamp);
248
249 // read the last generation stamp for blocks created after
250 // the switch to sequential block IDs.
251 long stampAtIdSwitch = in.readLong();
252 namesystem.setGenerationStampV1Limit(stampAtIdSwitch);
253
254 // read the max sequential block ID.
255 long maxSequentialBlockId = in.readLong();
256 namesystem.setLastAllocatedBlockId(maxSequentialBlockId);
257 } else {
258 long startingGenStamp = namesystem.upgradeGenerationStampToV2();
259 // This is an upgrade.
260 LOG.info("Upgrading to sequential block IDs. Generation stamp " +
261 "for new blocks set to " + startingGenStamp);
262 }
263
264 // read the transaction ID of the last edit represented by
265 // this image
266 if (NameNodeLayoutVersion.supports(
267 LayoutVersion.Feature.STORED_TXIDS, imgVersion)) {
268 imgTxId = in.readLong();
269 } else {
270 imgTxId = 0;
271 }
272
273 // read the last allocated inode id in the fsimage
274 if (NameNodeLayoutVersion.supports(
275 LayoutVersion.Feature.ADD_INODE_ID, imgVersion)) {
276 long lastInodeId = in.readLong();
277 namesystem.resetLastInodeId(lastInodeId);
278 if (LOG.isDebugEnabled()) {
279 LOG.debug("load last allocated InodeId from fsimage:" + lastInodeId);
280 }
281 } else {
282 if (LOG.isDebugEnabled()) {
283 LOG.debug("Old layout version doesn't have inode id."
284 + " Will assign new id for each inode.");
285 }
286 }
287
288 if (supportSnapshot) {
289 snapshotMap = namesystem.getSnapshotManager().read(in, this);
290 }
291
292 // read compression related info
293 FSImageCompression compression;
294 if (NameNodeLayoutVersion.supports(
295 LayoutVersion.Feature.FSIMAGE_COMPRESSION, imgVersion)) {
296 compression = FSImageCompression.readCompressionHeader(conf, in);
297 } else {
298 compression = FSImageCompression.createNoopCompression();
299 }
300 in = compression.unwrapInputStream(fin);
301
302 LOG.info("Loading image file " + curFile + " using " + compression);
303
304 // load all inodes
305 LOG.info("Number of files = " + numFiles);
306 prog.setTotal(Phase.LOADING_FSIMAGE, step, numFiles);
307 Counter counter = prog.getCounter(Phase.LOADING_FSIMAGE, step);
308 if (NameNodeLayoutVersion.supports(
309 LayoutVersion.Feature.FSIMAGE_NAME_OPTIMIZATION, imgVersion)) {
310 if (supportSnapshot) {
311 loadLocalNameINodesWithSnapshot(numFiles, in, counter);
312 } else {
313 loadLocalNameINodes(numFiles, in, counter);
314 }
315 } else {
316 loadFullNameINodes(numFiles, in, counter);
317 }
318
319 loadFilesUnderConstruction(in, supportSnapshot, counter);
320 prog.endStep(Phase.LOADING_FSIMAGE, step);
321 // Now that the step is finished, set counter equal to total to adjust
322 // for possible under-counting due to reference inodes.
323 prog.setCount(Phase.LOADING_FSIMAGE, step, numFiles);
324
325 loadSecretManagerState(in);
326
327 loadCacheManagerState(in);
328
329 // make sure to read to the end of file
330 boolean eof = (in.read() == -1);
331 assert eof : "Should have reached the end of image file " + curFile;
332 } finally {
333 in.close();
334 }
335
336 imgDigest = new MD5Hash(digester.digest());
337 loaded = true;
338
339 LOG.info("Image file " + curFile + " of size " + curFile.length() +
340 " bytes loaded in " + (now() - startTime)/1000 + " seconds.");
341 }
342
343 /** Update the root node's attributes */
344 private void updateRootAttr(INodeWithAdditionalFields root) {
345 final Quota.Counts q = root.getQuotaCounts();
346 final long nsQuota = q.get(Quota.NAMESPACE);
347 final long dsQuota = q.get(Quota.DISKSPACE);
348 FSDirectory fsDir = namesystem.dir;
349 if (nsQuota != -1 || dsQuota != -1) {
350 fsDir.rootDir.getDirectoryWithQuotaFeature().setQuota(nsQuota, dsQuota);
351 }
352 fsDir.rootDir.cloneModificationTime(root);
353 fsDir.rootDir.clonePermissionStatus(root);
354 }
355
356 /**
357 * Load fsimage files when 1) only local names are stored,
358 * and 2) snapshot is supported.
359 *
360 * @param numFiles number of files expected to be read
361 * @param in Image input stream
362 * @param counter Counter to increment for namenode startup progress
363 */
364 private void loadLocalNameINodesWithSnapshot(long numFiles, DataInput in,
365 Counter counter) throws IOException {
366 assert NameNodeLayoutVersion.supports(
367 LayoutVersion.Feature.FSIMAGE_NAME_OPTIMIZATION, getLayoutVersion());
368 assert NameNodeLayoutVersion.supports(
369 LayoutVersion.Feature.SNAPSHOT, getLayoutVersion());
370
371 // load root
372 loadRoot(in, counter);
373 // load rest of the nodes recursively
374 loadDirectoryWithSnapshot(in, counter);
375 }
376
377 /**
378 * load fsimage files assuming only local names are stored. Used when
379 * snapshots are not supported by the layout version.
380 *
381 * @param numFiles number of files expected to be read
382 * @param in image input stream
383 * @param counter Counter to increment for namenode startup progress
384 * @throws IOException
385 */
386 private void loadLocalNameINodes(long numFiles, DataInput in, Counter counter)
387 throws IOException {
388 assert NameNodeLayoutVersion.supports(
389 LayoutVersion.Feature.FSIMAGE_NAME_OPTIMIZATION, getLayoutVersion());
390 assert numFiles > 0;
391
392 // load root
393 loadRoot(in, counter);
394 // have loaded the first file (the root)
395 numFiles--;
396
397 // load rest of the nodes directory by directory
398 while (numFiles > 0) {
399 numFiles -= loadDirectory(in, counter);
400 }
401 if (numFiles != 0) {
402 throw new IOException("Read unexpect number of files: " + -numFiles);
403 }
404 }
405
406 /**
407 * Load information about root, and use the information to update the root
408 * directory of NameSystem.
409 * @param in The {@link DataInput} instance to read.
410 * @param counter Counter to increment for namenode startup progress
411 */
412 private void loadRoot(DataInput in, Counter counter)
413 throws IOException {
414 // load root
415 if (in.readShort() != 0) {
416 throw new IOException("First node is not root");
417 }
418 final INodeDirectory root = loadINode(null, false, in, counter)
419 .asDirectory();
420 // update the root's attributes
421 updateRootAttr(root);
422 }
423
424 /** Load children nodes for the parent directory. */
425 private int loadChildren(INodeDirectory parent, DataInput in,
426 Counter counter) throws IOException {
427 int numChildren = in.readInt();
428 for (int i = 0; i < numChildren; i++) {
429 // load single inode
430 INode newNode = loadINodeWithLocalName(false, in, true, counter);
431 addToParent(parent, newNode);
432 }
433 return numChildren;
434 }
435
436 /**
437 * Load a directory when snapshot is supported.
438 * @param in The {@link DataInput} instance to read.
439 * @param counter Counter to increment for namenode startup progress
440 */
441 private void loadDirectoryWithSnapshot(DataInput in, Counter counter)
442 throws IOException {
443 // Step 1. Identify the parent INode
444 long inodeId = in.readLong();
445 final INodeDirectory parent = this.namesystem.dir.getInode(inodeId)
446 .asDirectory();
447
448 // Check if the whole subtree has been saved (for reference nodes)
449 boolean toLoadSubtree = referenceMap.toProcessSubtree(parent.getId());
450 if (!toLoadSubtree) {
451 return;
452 }
453
454 // Step 2. Load snapshots if parent is snapshottable
455 int numSnapshots = in.readInt();
456 if (numSnapshots >= 0) {
457 final INodeDirectorySnapshottable snapshottableParent
458 = INodeDirectorySnapshottable.valueOf(parent, parent.getLocalName());
459 // load snapshots and snapshotQuota
460 SnapshotFSImageFormat.loadSnapshotList(snapshottableParent,
461 numSnapshots, in, this);
462 if (snapshottableParent.getSnapshotQuota() > 0) {
463 // add the directory to the snapshottable directory list in
464 // SnapshotManager. Note that we only add root when its snapshot quota
465 // is positive.
466 this.namesystem.getSnapshotManager().addSnapshottable(
467 snapshottableParent);
468 }
469 }
470
471 // Step 3. Load children nodes under parent
472 loadChildren(parent, in, counter);
473
474 // Step 4. load Directory Diff List
475 SnapshotFSImageFormat.loadDirectoryDiffList(parent, in, this);
476
477 // Recursively load sub-directories, including snapshot copies of deleted
478 // directories
479 int numSubTree = in.readInt();
480 for (int i = 0; i < numSubTree; i++) {
481 loadDirectoryWithSnapshot(in, counter);
482 }
483 }
484
485 /**
486 * Load all children of a directory
487 *
488 * @param in
489 * @param counter Counter to increment for namenode startup progress
490 * @return number of child inodes read
491 * @throws IOException
492 */
493 private int loadDirectory(DataInput in, Counter counter) throws IOException {
494 String parentPath = FSImageSerialization.readString(in);
495 // Rename .snapshot paths if we're doing an upgrade
496 parentPath = renameReservedPathsOnUpgrade(parentPath, getLayoutVersion());
497 final INodeDirectory parent = INodeDirectory.valueOf(
498 namesystem.dir.rootDir.getNode(parentPath, true), parentPath);
499 return loadChildren(parent, in, counter);
500 }
501
502 /**
503 * load fsimage files assuming full path names are stored
504 *
505 * @param numFiles total number of files to load
506 * @param in data input stream
507 * @param counter Counter to increment for namenode startup progress
508 * @throws IOException if any error occurs
509 */
510 private void loadFullNameINodes(long numFiles, DataInput in, Counter counter)
511 throws IOException {
512 byte[][] pathComponents;
513 byte[][] parentPath = {{}};
514 FSDirectory fsDir = namesystem.dir;
515 INodeDirectory parentINode = fsDir.rootDir;
516 for (long i = 0; i < numFiles; i++) {
517 pathComponents = FSImageSerialization.readPathComponents(in);
518 final INode newNode = loadINode(
519 pathComponents[pathComponents.length-1], false, in, counter);
520
521 if (isRoot(pathComponents)) { // it is the root
522 // update the root's attributes
523 updateRootAttr(newNode.asDirectory());
524 continue;
525 }
526
527 namesystem.dir.addToInodeMap(newNode);
528 // check if the new inode belongs to the same parent
529 if(!isParent(pathComponents, parentPath)) {
530 parentINode = getParentINodeDirectory(pathComponents);
531 parentPath = getParent(pathComponents);
532 }
533
534 // add new inode
535 addToParent(parentINode, newNode);
536 }
537 }
538
539 private INodeDirectory getParentINodeDirectory(byte[][] pathComponents
540 ) throws FileNotFoundException, PathIsNotDirectoryException,
541 UnresolvedLinkException {
542 if (pathComponents.length < 2) { // root
543 return null;
544 }
545 // Gets the parent INode
546 final INodesInPath inodes = namesystem.dir.getExistingPathINodes(
547 pathComponents);
548 return INodeDirectory.valueOf(inodes.getINode(-2), pathComponents);
549 }
550
551 /**
552 * Add the child node to parent and, if child is a file, update block map.
553 * This method is only used for image loading so that synchronization,
554 * modification time update and space count update are not needed.
555 */
556 private void addToParent(INodeDirectory parent, INode child) {
557 FSDirectory fsDir = namesystem.dir;
558 if (parent == fsDir.rootDir) {
559 child.setLocalName(renameReservedRootComponentOnUpgrade(
560 child.getLocalNameBytes(), getLayoutVersion()));
561 }
562 // NOTE: This does not update space counts for parents
563 if (!parent.addChild(child)) {
564 return;
565 }
566 namesystem.dir.cacheName(child);
567
568 if (child.isFile()) {
569 updateBlocksMap(child.asFile());
570 }
571 }
572
573 public void updateBlocksMap(INodeFile file) {
574 // Add file->block mapping
575 final BlockInfo[] blocks = file.getBlocks();
576 if (blocks != null) {
577 final BlockManager bm = namesystem.getBlockManager();
578 for (int i = 0; i < blocks.length; i++) {
579 file.setBlock(i, bm.addBlockCollection(blocks[i], file));
580 }
581 }
582 }
583
584 public INode loadINodeWithLocalName(boolean isSnapshotINode, DataInput in,
585 boolean updateINodeMap) throws IOException {
586 return loadINodeWithLocalName(isSnapshotINode, in, updateINodeMap, null);
587 }
588
589 public INode loadINodeWithLocalName(boolean isSnapshotINode,
590 DataInput in, boolean updateINodeMap, Counter counter)
591 throws IOException {
592 byte[] localName = FSImageSerialization.readLocalName(in);
593 localName =
594 renameReservedComponentOnUpgrade(localName, getLayoutVersion());
595 INode inode = loadINode(localName, isSnapshotINode, in, counter);
596 if (updateINodeMap) {
597 namesystem.dir.addToInodeMap(inode);
598 }
599 return inode;
600 }
601
602 /**
603 * load an inode from fsimage except for its name
604 *
605 * @param in data input stream from which image is read
606 * @param counter Counter to increment for namenode startup progress
607 * @return an inode
608 */
609 @SuppressWarnings("deprecation")
610 INode loadINode(final byte[] localName, boolean isSnapshotINode,
611 DataInput in, Counter counter) throws IOException {
612 final int imgVersion = getLayoutVersion();
613 if (NameNodeLayoutVersion.supports(
614 LayoutVersion.Feature.SNAPSHOT, imgVersion)) {
615 namesystem.getFSDirectory().verifyINodeName(localName);
616 }
617
618 long inodeId = NameNodeLayoutVersion.supports(
619 LayoutVersion.Feature.ADD_INODE_ID, imgVersion) ? in.readLong()
620 : namesystem.allocateNewInodeId();
621
622 final short replication = namesystem.getBlockManager().adjustReplication(
623 in.readShort());
624 final long modificationTime = in.readLong();
625 long atime = 0;
626 if (NameNodeLayoutVersion.supports(
627 LayoutVersion.Feature.FILE_ACCESS_TIME, imgVersion)) {
628 atime = in.readLong();
629 }
630 final long blockSize = in.readLong();
631 final int numBlocks = in.readInt();
632
633 if (numBlocks >= 0) {
634 // file
635
636 // read blocks
637 BlockInfo[] blocks = new BlockInfo[numBlocks];
638 for (int j = 0; j < numBlocks; j++) {
639 blocks[j] = new BlockInfo(replication);
640 blocks[j].readFields(in);
641 }
642
643 String clientName = "";
644 String clientMachine = "";
645 boolean underConstruction = false;
646 FileDiffList fileDiffs = null;
647 if (NameNodeLayoutVersion.supports(
648 LayoutVersion.Feature.SNAPSHOT, imgVersion)) {
649 // read diffs
650 fileDiffs = SnapshotFSImageFormat.loadFileDiffList(in, this);
651
652 if (isSnapshotINode) {
653 underConstruction = in.readBoolean();
654 if (underConstruction) {
655 clientName = FSImageSerialization.readString(in);
656 clientMachine = FSImageSerialization.readString(in);
657 // convert the last block to BlockUC
658 if (blocks != null && blocks.length > 0) {
659 BlockInfo lastBlk = blocks[blocks.length - 1];
660 blocks[blocks.length - 1] = new BlockInfoUnderConstruction(
661 lastBlk, replication);
662 }
663 }
664 }
665 }
666
667 final PermissionStatus permissions = PermissionStatus.read(in);
668
669 // return
670 if (counter != null) {
671 counter.increment();
672 }
673 final INodeFile file = new INodeFile(inodeId, localName, permissions,
674 modificationTime, atime, blocks, replication, blockSize);
675 if (underConstruction) {
676 file.toUnderConstruction(clientName, clientMachine, null);
677 }
678 return fileDiffs == null ? file : new INodeFile(file, fileDiffs);
679 } else if (numBlocks == -1) {
680 //directory
681
682 //read quotas
683 final long nsQuota = in.readLong();
684 long dsQuota = -1L;
685 if (NameNodeLayoutVersion.supports(
686 LayoutVersion.Feature.DISKSPACE_QUOTA, imgVersion)) {
687 dsQuota = in.readLong();
688 }
689
690 //read snapshot info
691 boolean snapshottable = false;
692 boolean withSnapshot = false;
693 if (NameNodeLayoutVersion.supports(
694 LayoutVersion.Feature.SNAPSHOT, imgVersion)) {
695 snapshottable = in.readBoolean();
696 if (!snapshottable) {
697 withSnapshot = in.readBoolean();
698 }
699 }
700
701 final PermissionStatus permissions = PermissionStatus.read(in);
702
703 //return
704 if (counter != null) {
705 counter.increment();
706 }
707 final INodeDirectory dir = new INodeDirectory(inodeId, localName,
708 permissions, modificationTime);
709 if (nsQuota >= 0 || dsQuota >= 0) {
710 dir.addDirectoryWithQuotaFeature(nsQuota, dsQuota);
711 }
712 if (withSnapshot) {
713 dir.addSnapshotFeature(null);
714 }
715 return snapshottable ? new INodeDirectorySnapshottable(dir) : dir;
716 } else if (numBlocks == -2) {
717 //symlink
718 if (!FileSystem.areSymlinksEnabled()) {
719 throw new IOException("Symlinks not supported - please remove symlink before upgrading to this version of HDFS");
720 }
721
722 final String symlink = Text.readString(in);
723 final PermissionStatus permissions = PermissionStatus.read(in);
724 if (counter != null) {
725 counter.increment();
726 }
727 return new INodeSymlink(inodeId, localName, permissions,
728 modificationTime, atime, symlink);
729 } else if (numBlocks == -3) {
730 //reference
731 // Intentionally do not increment counter, because it is too difficult at
732 // this point to assess whether or not this is a reference that counts
733 // toward quota.
734
735 final boolean isWithName = in.readBoolean();
736 // lastSnapshotId for WithName node, dstSnapshotId for DstReference node
737 int snapshotId = in.readInt();
738
739 final INodeReference.WithCount withCount
740 = referenceMap.loadINodeReferenceWithCount(isSnapshotINode, in, this);
741
742 if (isWithName) {
743 return new INodeReference.WithName(null, withCount, localName,
744 snapshotId);
745 } else {
746 final INodeReference ref = new INodeReference.DstReference(null,
747 withCount, snapshotId);
748 return ref;
749 }
750 }
751
752 throw new IOException("Unknown inode type: numBlocks=" + numBlocks);
753 }
754
755 /** Load {@link INodeFileAttributes}. */
756 public INodeFileAttributes loadINodeFileAttributes(DataInput in)
757 throws IOException {
758 final int layoutVersion = getLayoutVersion();
759
760 if (!NameNodeLayoutVersion.supports(
761 LayoutVersion.Feature.OPTIMIZE_SNAPSHOT_INODES, layoutVersion)) {
762 return loadINodeWithLocalName(true, in, false).asFile();
763 }
764
765 final byte[] name = FSImageSerialization.readLocalName(in);
766 final PermissionStatus permissions = PermissionStatus.read(in);
767 final long modificationTime = in.readLong();
768 final long accessTime = in.readLong();
769
770 final short replication = namesystem.getBlockManager().adjustReplication(
771 in.readShort());
772 final long preferredBlockSize = in.readLong();
773
774 return new INodeFileAttributes.SnapshotCopy(name, permissions, null, modificationTime,
775 accessTime, replication, preferredBlockSize);
776 }
777
778 public INodeDirectoryAttributes loadINodeDirectoryAttributes(DataInput in)
779 throws IOException {
780 final int layoutVersion = getLayoutVersion();
781
782 if (!NameNodeLayoutVersion.supports(
783 LayoutVersion.Feature.OPTIMIZE_SNAPSHOT_INODES, layoutVersion)) {
784 return loadINodeWithLocalName(true, in, false).asDirectory();
785 }
786
787 final byte[] name = FSImageSerialization.readLocalName(in);
788 final PermissionStatus permissions = PermissionStatus.read(in);
789 final long modificationTime = in.readLong();
790
791 //read quotas
792 final long nsQuota = in.readLong();
793 final long dsQuota = in.readLong();
794
795 return nsQuota == -1L && dsQuota == -1L?
796 new INodeDirectoryAttributes.SnapshotCopy(name, permissions, null, modificationTime)
797 : new INodeDirectoryAttributes.CopyWithQuota(name, permissions,
798 null, modificationTime, nsQuota, dsQuota);
799 }
800
801 private void loadFilesUnderConstruction(DataInput in,
802 boolean supportSnapshot, Counter counter) throws IOException {
803 FSDirectory fsDir = namesystem.dir;
804 int size = in.readInt();
805
806 LOG.info("Number of files under construction = " + size);
807
808 for (int i = 0; i < size; i++) {
809 INodeFile cons = FSImageSerialization.readINodeUnderConstruction(in,
810 namesystem, getLayoutVersion());
811 counter.increment();
812
813 // verify that file exists in namespace
814 String path = cons.getLocalName();
815 INodeFile oldnode = null;
816 boolean inSnapshot = false;
817 if (path != null && FSDirectory.isReservedName(path) &&
818 NameNodeLayoutVersion.supports(
819 LayoutVersion.Feature.ADD_INODE_ID, getLayoutVersion())) {
820 // TODO: for HDFS-5428, we use reserved path for those INodeFileUC in
821 // snapshot. If we support INode ID in the layout version, we can use
822 // the inode id to find the oldnode.
823 oldnode = namesystem.dir.getInode(cons.getId()).asFile();
824 inSnapshot = true;
825 } else {
826 final INodesInPath iip = fsDir.getLastINodeInPath(path);
827 oldnode = INodeFile.valueOf(iip.getINode(0), path);
828 }
829
830 FileUnderConstructionFeature uc = cons.getFileUnderConstructionFeature();
831 oldnode.toUnderConstruction(uc.getClientName(), uc.getClientMachine(),
832 uc.getClientNode());
833 if (oldnode.numBlocks() > 0) {
834 BlockInfo ucBlock = cons.getLastBlock();
835 // we do not replace the inode, just replace the last block of oldnode
836 BlockInfo info = namesystem.getBlockManager().addBlockCollection(
837 ucBlock, oldnode);
838 oldnode.setBlock(oldnode.numBlocks() - 1, info);
839 }
840
841 if (!inSnapshot) {
842 namesystem.leaseManager.addLease(cons
843 .getFileUnderConstructionFeature().getClientName(), path);
844 }
845 }
846 }
847
848 private void loadSecretManagerState(DataInput in)
849 throws IOException {
850 int imgVersion = getLayoutVersion();
851
852 if (!NameNodeLayoutVersion.supports(
853 LayoutVersion.Feature.DELEGATION_TOKEN, imgVersion)) {
854 //SecretManagerState is not available.
855 //This must not happen if security is turned on.
856 return;
857 }
858 namesystem.loadSecretManagerStateCompat(in);
859 }
860
861 private void loadCacheManagerState(DataInput in) throws IOException {
862 int imgVersion = getLayoutVersion();
863 if (!NameNodeLayoutVersion.supports(
864 LayoutVersion.Feature.CACHING, imgVersion)) {
865 return;
866 }
867 namesystem.getCacheManager().loadStateCompat(in);
868 }
869
870 private int getLayoutVersion() {
871 return namesystem.getFSImage().getStorage().getLayoutVersion();
872 }
873
874 private boolean isRoot(byte[][] path) {
875 return path.length == 1 &&
876 path[0] == null;
877 }
878
879 private boolean isParent(byte[][] path, byte[][] parent) {
880 if (path == null || parent == null)
881 return false;
882 if (parent.length == 0 || path.length != parent.length + 1)
883 return false;
884 boolean isParent = true;
885 for (int i = 0; i < parent.length; i++) {
886 isParent = isParent && Arrays.equals(path[i], parent[i]);
887 }
888 return isParent;
889 }
890
891 /**
892 * Return string representing the parent of the given path.
893 */
894 String getParent(String path) {
895 return path.substring(0, path.lastIndexOf(Path.SEPARATOR));
896 }
897
898 byte[][] getParent(byte[][] path) {
899 byte[][] result = new byte[path.length - 1][];
900 for (int i = 0; i < result.length; i++) {
901 result[i] = new byte[path[i].length];
902 System.arraycopy(path[i], 0, result[i], 0, path[i].length);
903 }
904 return result;
905 }
906
907 public Snapshot getSnapshot(DataInput in) throws IOException {
908 return snapshotMap.get(in.readInt());
909 }
910 }
911
912 @VisibleForTesting
913 public static final TreeMap<String, String> renameReservedMap =
914 new TreeMap<String, String>();
915
916 /**
917 * Use the default key-value pairs that will be used to determine how to
918 * rename reserved paths on upgrade.
919 */
920 @VisibleForTesting
921 public static void useDefaultRenameReservedPairs() {
922 renameReservedMap.clear();
923 for (String key: HdfsConstants.RESERVED_PATH_COMPONENTS) {
924 renameReservedMap.put(
925 key,
926 key + "." + HdfsConstants.NAMENODE_LAYOUT_VERSION + "."
927 + "UPGRADE_RENAMED");
928 }
929 }
930
931 /**
932 * Set the key-value pairs that will be used to determine how to rename
933 * reserved paths on upgrade.
934 */
935 @VisibleForTesting
936 public static void setRenameReservedPairs(String renameReserved) {
937 // Clear and set the default values
938 useDefaultRenameReservedPairs();
939 // Overwrite with provided values
940 setRenameReservedMapInternal(renameReserved);
941 }
942
943 private static void setRenameReservedMapInternal(String renameReserved) {
944 Collection<String> pairs =
945 StringUtils.getTrimmedStringCollection(renameReserved);
946 for (String p : pairs) {
947 String[] pair = StringUtils.split(p, '/', '=');
948 Preconditions.checkArgument(pair.length == 2,
949 "Could not parse key-value pair " + p);
950 String key = pair[0];
951 String value = pair[1];
952 Preconditions.checkArgument(DFSUtil.isReservedPathComponent(key),
953 "Unknown reserved path " + key);
954 Preconditions.checkArgument(DFSUtil.isValidNameForComponent(value),
955 "Invalid rename path for " + key + ": " + value);
956 LOG.info("Will rename reserved path " + key + " to " + value);
957 renameReservedMap.put(key, value);
958 }
959 }
960
961 /**
962 * When upgrading from an old version, the filesystem could contain paths
963 * that are now reserved in the new version (e.g. .snapshot). This renames
964 * these new reserved paths to a user-specified value to avoid collisions
965 * with the reserved name.
966 *
967 * @param path Old path potentially containing a reserved path
968 * @return New path with reserved path components renamed to user value
969 */
970 static String renameReservedPathsOnUpgrade(String path,
971 final int layoutVersion) {
972 final String oldPath = path;
973 // If any known LVs aren't supported, we're doing an upgrade
974 if (!NameNodeLayoutVersion.supports(Feature.ADD_INODE_ID, layoutVersion)) {
975 String[] components = INode.getPathNames(path);
976 // Only need to worry about the root directory
977 if (components.length > 1) {
978 components[1] = DFSUtil.bytes2String(
979 renameReservedRootComponentOnUpgrade(
980 DFSUtil.string2Bytes(components[1]),
981 layoutVersion));
982 path = DFSUtil.strings2PathString(components);
983 }
984 }
985 if (!NameNodeLayoutVersion.supports(Feature.SNAPSHOT, layoutVersion)) {
986 String[] components = INode.getPathNames(path);
987 // Special case the root path
988 if (components.length == 0) {
989 return path;
990 }
991 for (int i=0; i<components.length; i++) {
992 components[i] = DFSUtil.bytes2String(
993 renameReservedComponentOnUpgrade(
994 DFSUtil.string2Bytes(components[i]),
995 layoutVersion));
996 }
997 path = DFSUtil.strings2PathString(components);
998 }
999
1000 if (!path.equals(oldPath)) {
1001 LOG.info("Upgrade process renamed reserved path " + oldPath + " to "
1002 + path);
1003 }
1004 return path;
1005 }
1006
1007 private final static String RESERVED_ERROR_MSG =
1008 FSDirectory.DOT_RESERVED_PATH_PREFIX + " is a reserved path and "
1009 + HdfsConstants.DOT_SNAPSHOT_DIR + " is a reserved path component in"
1010 + " this version of HDFS. Please rollback and delete or rename"
1011 + " this path, or upgrade with the "
1012 + StartupOption.RENAMERESERVED.getName()
1013 + " [key-value pairs]"
1014 + " option to automatically rename these paths during upgrade.";
1015
1016 /**
1017 * Same as {@link #renameReservedPathsOnUpgrade}, but for a single
1018 * byte array path component.
1019 */
1020 private static byte[] renameReservedComponentOnUpgrade(byte[] component,
1021 final int layoutVersion) {
1022 // If the LV doesn't support snapshots, we're doing an upgrade
1023 if (!NameNodeLayoutVersion.supports(Feature.SNAPSHOT, layoutVersion)) {
1024 if (Arrays.equals(component, HdfsConstants.DOT_SNAPSHOT_DIR_BYTES)) {
1025 Preconditions.checkArgument(
1026 renameReservedMap != null &&
1027 renameReservedMap.containsKey(HdfsConstants.DOT_SNAPSHOT_DIR),
1028 RESERVED_ERROR_MSG);
1029 component =
1030 DFSUtil.string2Bytes(renameReservedMap
1031 .get(HdfsConstants.DOT_SNAPSHOT_DIR));
1032 }
1033 }
1034 return component;
1035 }
1036
1037 /**
1038 * Same as {@link #renameReservedPathsOnUpgrade}, but for a single
1039 * byte array path component.
1040 */
1041 private static byte[] renameReservedRootComponentOnUpgrade(byte[] component,
1042 final int layoutVersion) {
1043 // If the LV doesn't support inode IDs, we're doing an upgrade
1044 if (!NameNodeLayoutVersion.supports(Feature.ADD_INODE_ID, layoutVersion)) {
1045 if (Arrays.equals(component, FSDirectory.DOT_RESERVED)) {
1046 Preconditions.checkArgument(
1047 renameReservedMap != null &&
1048 renameReservedMap.containsKey(FSDirectory.DOT_RESERVED_STRING),
1049 RESERVED_ERROR_MSG);
1050 final String renameString = renameReservedMap
1051 .get(FSDirectory.DOT_RESERVED_STRING);
1052 component =
1053 DFSUtil.string2Bytes(renameString);
1054 LOG.info("Renamed root path " + FSDirectory.DOT_RESERVED_STRING
1055 + " to " + renameString);
1056 }
1057 }
1058 return component;
1059 }
1060 }