001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 package org.apache.hadoop.hdfs.server.namenode;
020
021 import java.io.BufferedInputStream;
022 import java.io.BufferedOutputStream;
023 import java.io.File;
024 import java.io.FileInputStream;
025 import java.io.FileOutputStream;
026 import java.io.IOException;
027 import java.io.InputStream;
028 import java.io.OutputStream;
029 import java.io.RandomAccessFile;
030 import java.nio.ByteBuffer;
031 import java.nio.channels.FileChannel;
032 import java.security.DigestOutputStream;
033 import java.security.MessageDigest;
034 import java.util.ArrayList;
035 import java.util.Collections;
036 import java.util.Comparator;
037 import java.util.Map;
038 import java.util.Map.Entry;
039 import java.util.Set;
040
041 import org.apache.commons.logging.Log;
042 import org.apache.commons.logging.LogFactory;
043 import org.apache.hadoop.classification.InterfaceAudience;
044 import org.apache.hadoop.conf.Configuration;
045 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
046 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
047 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
048 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.CacheManagerSection;
049 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
050 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.NameSystemSection;
051 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection;
052 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.StringTableSection;
053 import org.apache.hadoop.hdfs.server.namenode.snapshot.FSImageFormatPBSnapshot;
054 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
055 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
056 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
057 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType;
058 import org.apache.hadoop.hdfs.util.MD5FileUtils;
059 import org.apache.hadoop.io.MD5Hash;
060 import org.apache.hadoop.io.compress.CompressionCodec;
061 import org.apache.hadoop.io.compress.CompressorStream;
062
063 import com.google.common.collect.Lists;
064 import com.google.common.collect.Maps;
065 import com.google.common.io.LimitInputStream;
066 import com.google.protobuf.CodedOutputStream;
067
068 /**
069 * Utility class to read / write fsimage in protobuf format.
070 */
071 @InterfaceAudience.Private
072 public final class FSImageFormatProtobuf {
073 private static final Log LOG = LogFactory.getLog(FSImageFormatProtobuf.class);
074
075 public static final class LoaderContext {
076 private String[] stringTable;
077 private final ArrayList<INodeReference> refList = Lists.newArrayList();
078
079 public String[] getStringTable() {
080 return stringTable;
081 }
082
083 public ArrayList<INodeReference> getRefList() {
084 return refList;
085 }
086 }
087
088 public static final class SaverContext {
089 public static class DeduplicationMap<E> {
090 private final Map<E, Integer> map = Maps.newHashMap();
091 private DeduplicationMap() {}
092
093 static <T> DeduplicationMap<T> newMap() {
094 return new DeduplicationMap<T>();
095 }
096
097 int getId(E value) {
098 if (value == null) {
099 return 0;
100 }
101 Integer v = map.get(value);
102 if (v == null) {
103 int nv = map.size() + 1;
104 map.put(value, nv);
105 return nv;
106 }
107 return v;
108 }
109
110 int size() {
111 return map.size();
112 }
113
114 Set<Entry<E, Integer>> entrySet() {
115 return map.entrySet();
116 }
117 }
118 private final ArrayList<INodeReference> refList = Lists.newArrayList();
119
120 private final DeduplicationMap<String> stringMap = DeduplicationMap
121 .newMap();
122
123 public DeduplicationMap<String> getStringMap() {
124 return stringMap;
125 }
126
127 public ArrayList<INodeReference> getRefList() {
128 return refList;
129 }
130 }
131
132 public static final class Loader implements FSImageFormat.AbstractLoader {
133 static final int MINIMUM_FILE_LENGTH = 8;
134 private final Configuration conf;
135 private final FSNamesystem fsn;
136 private final LoaderContext ctx;
137 /** The MD5 sum of the loaded file */
138 private MD5Hash imgDigest;
139 /** The transaction ID of the last edit represented by the loaded file */
140 private long imgTxId;
141
142 Loader(Configuration conf, FSNamesystem fsn) {
143 this.conf = conf;
144 this.fsn = fsn;
145 this.ctx = new LoaderContext();
146 }
147
148 @Override
149 public MD5Hash getLoadedImageMd5() {
150 return imgDigest;
151 }
152
153 @Override
154 public long getLoadedImageTxId() {
155 return imgTxId;
156 }
157
158 public LoaderContext getLoaderContext() {
159 return ctx;
160 }
161
162 void load(File file) throws IOException {
163 long start = System.currentTimeMillis();
164 imgDigest = MD5FileUtils.computeMd5ForFile(file);
165 RandomAccessFile raFile = new RandomAccessFile(file, "r");
166 FileInputStream fin = new FileInputStream(file);
167 try {
168 loadInternal(raFile, fin);
169 long end = System.currentTimeMillis();
170 LOG.info("Loaded FSImage in " + (end - start) / 1000 + " seconds.");
171 } finally {
172 fin.close();
173 raFile.close();
174 }
175 }
176
177 private void loadInternal(RandomAccessFile raFile, FileInputStream fin)
178 throws IOException {
179 if (!FSImageUtil.checkFileFormat(raFile)) {
180 throw new IOException("Unrecognized file format");
181 }
182 FileSummary summary = FSImageUtil.loadSummary(raFile);
183
184 FileChannel channel = fin.getChannel();
185
186 FSImageFormatPBINode.Loader inodeLoader = new FSImageFormatPBINode.Loader(
187 fsn, this);
188 FSImageFormatPBSnapshot.Loader snapshotLoader = new FSImageFormatPBSnapshot.Loader(
189 fsn, this);
190
191 ArrayList<FileSummary.Section> sections = Lists.newArrayList(summary
192 .getSectionsList());
193 Collections.sort(sections, new Comparator<FileSummary.Section>() {
194 @Override
195 public int compare(FileSummary.Section s1, FileSummary.Section s2) {
196 SectionName n1 = SectionName.fromString(s1.getName());
197 SectionName n2 = SectionName.fromString(s2.getName());
198 if (n1 == null) {
199 return n2 == null ? 0 : -1;
200 } else if (n2 == null) {
201 return -1;
202 } else {
203 return n1.ordinal() - n2.ordinal();
204 }
205 }
206 });
207
208 StartupProgress prog = NameNode.getStartupProgress();
209 /**
210 * beginStep() and the endStep() calls do not match the boundary of the
211 * sections. This is because that the current implementation only allows
212 * a particular step to be started for once.
213 */
214 Step currentStep = null;
215
216 for (FileSummary.Section s : sections) {
217 channel.position(s.getOffset());
218 InputStream in = new BufferedInputStream(new LimitInputStream(fin,
219 s.getLength()));
220
221 in = FSImageUtil.wrapInputStreamForCompression(conf,
222 summary.getCodec(), in);
223
224 String n = s.getName();
225
226 switch (SectionName.fromString(n)) {
227 case NS_INFO:
228 loadNameSystemSection(in);
229 break;
230 case STRING_TABLE:
231 loadStringTableSection(in);
232 break;
233 case INODE: {
234 currentStep = new Step(StepType.INODES);
235 prog.beginStep(Phase.LOADING_FSIMAGE, currentStep);
236 inodeLoader.loadINodeSection(in);
237 }
238 break;
239 case INODE_REFERENCE:
240 snapshotLoader.loadINodeReferenceSection(in);
241 break;
242 case INODE_DIR:
243 inodeLoader.loadINodeDirectorySection(in);
244 break;
245 case FILES_UNDERCONSTRUCTION:
246 inodeLoader.loadFilesUnderConstructionSection(in);
247 break;
248 case SNAPSHOT:
249 snapshotLoader.loadSnapshotSection(in);
250 break;
251 case SNAPSHOT_DIFF:
252 snapshotLoader.loadSnapshotDiffSection(in);
253 break;
254 case SECRET_MANAGER: {
255 prog.endStep(Phase.LOADING_FSIMAGE, currentStep);
256 Step step = new Step(StepType.DELEGATION_TOKENS);
257 prog.beginStep(Phase.LOADING_FSIMAGE, step);
258 loadSecretManagerSection(in);
259 prog.endStep(Phase.LOADING_FSIMAGE, step);
260 }
261 break;
262 case CACHE_MANAGER: {
263 Step step = new Step(StepType.CACHE_POOLS);
264 prog.beginStep(Phase.LOADING_FSIMAGE, step);
265 loadCacheManagerSection(in);
266 prog.endStep(Phase.LOADING_FSIMAGE, step);
267 }
268 break;
269 default:
270 LOG.warn("Unrecognized section " + n);
271 break;
272 }
273 }
274 }
275
276 private void loadNameSystemSection(InputStream in) throws IOException {
277 NameSystemSection s = NameSystemSection.parseDelimitedFrom(in);
278 fsn.setGenerationStampV1(s.getGenstampV1());
279 fsn.setGenerationStampV2(s.getGenstampV2());
280 fsn.setGenerationStampV1Limit(s.getGenstampV1Limit());
281 fsn.setLastAllocatedBlockId(s.getLastAllocatedBlockId());
282 imgTxId = s.getTransactionId();
283 if (s.hasRollingUpgradeStartTime()
284 && fsn.getFSImage().hasRollbackFSImage()) {
285 // we set the rollingUpgradeInfo only when we make sure we have the
286 // rollback image
287 fsn.setRollingUpgradeInfo(true, s.getRollingUpgradeStartTime());
288 }
289 }
290
291 private void loadStringTableSection(InputStream in) throws IOException {
292 StringTableSection s = StringTableSection.parseDelimitedFrom(in);
293 ctx.stringTable = new String[s.getNumEntry() + 1];
294 for (int i = 0; i < s.getNumEntry(); ++i) {
295 StringTableSection.Entry e = StringTableSection.Entry
296 .parseDelimitedFrom(in);
297 ctx.stringTable[e.getId()] = e.getStr();
298 }
299 }
300
301 private void loadSecretManagerSection(InputStream in) throws IOException {
302 SecretManagerSection s = SecretManagerSection.parseDelimitedFrom(in);
303 int numKeys = s.getNumKeys(), numTokens = s.getNumTokens();
304 ArrayList<SecretManagerSection.DelegationKey> keys = Lists
305 .newArrayListWithCapacity(numKeys);
306 ArrayList<SecretManagerSection.PersistToken> tokens = Lists
307 .newArrayListWithCapacity(numTokens);
308
309 for (int i = 0; i < numKeys; ++i)
310 keys.add(SecretManagerSection.DelegationKey.parseDelimitedFrom(in));
311
312 for (int i = 0; i < numTokens; ++i)
313 tokens.add(SecretManagerSection.PersistToken.parseDelimitedFrom(in));
314
315 fsn.loadSecretManagerState(s, keys, tokens);
316 }
317
318 private void loadCacheManagerSection(InputStream in) throws IOException {
319 CacheManagerSection s = CacheManagerSection.parseDelimitedFrom(in);
320 ArrayList<CachePoolInfoProto> pools = Lists.newArrayListWithCapacity(s
321 .getNumPools());
322 ArrayList<CacheDirectiveInfoProto> directives = Lists
323 .newArrayListWithCapacity(s.getNumDirectives());
324 for (int i = 0; i < s.getNumPools(); ++i)
325 pools.add(CachePoolInfoProto.parseDelimitedFrom(in));
326 for (int i = 0; i < s.getNumDirectives(); ++i)
327 directives.add(CacheDirectiveInfoProto.parseDelimitedFrom(in));
328 fsn.getCacheManager().loadState(
329 new CacheManager.PersistState(s, pools, directives));
330 }
331
332 }
333
334 public static final class Saver {
335 public static final int CHECK_CANCEL_INTERVAL = 4096;
336
337 private final SaveNamespaceContext context;
338 private final SaverContext saverContext;
339 private long currentOffset = FSImageUtil.MAGIC_HEADER.length;
340 private MD5Hash savedDigest;
341
342 private FileChannel fileChannel;
343 // OutputStream for the section data
344 private OutputStream sectionOutputStream;
345 private CompressionCodec codec;
346 private OutputStream underlyingOutputStream;
347
348 Saver(SaveNamespaceContext context) {
349 this.context = context;
350 this.saverContext = new SaverContext();
351 }
352
353 public MD5Hash getSavedDigest() {
354 return savedDigest;
355 }
356
357 public SaveNamespaceContext getContext() {
358 return context;
359 }
360
361 public SaverContext getSaverContext() {
362 return saverContext;
363 }
364
365 public void commitSection(FileSummary.Builder summary, SectionName name)
366 throws IOException {
367 long oldOffset = currentOffset;
368 flushSectionOutputStream();
369
370 if (codec != null) {
371 sectionOutputStream = codec.createOutputStream(underlyingOutputStream);
372 } else {
373 sectionOutputStream = underlyingOutputStream;
374 }
375 long length = fileChannel.position() - oldOffset;
376 summary.addSections(FileSummary.Section.newBuilder().setName(name.name)
377 .setLength(length).setOffset(currentOffset));
378 currentOffset += length;
379 }
380
381 private void flushSectionOutputStream() throws IOException {
382 if (codec != null) {
383 ((CompressorStream) sectionOutputStream).finish();
384 }
385 sectionOutputStream.flush();
386 }
387
388 void save(File file, FSImageCompression compression) throws IOException {
389 FileOutputStream fout = new FileOutputStream(file);
390 fileChannel = fout.getChannel();
391 try {
392 saveInternal(fout, compression, file.getAbsolutePath().toString());
393 } finally {
394 fout.close();
395 }
396 }
397
398 private static void saveFileSummary(OutputStream out, FileSummary summary)
399 throws IOException {
400 summary.writeDelimitedTo(out);
401 int length = getOndiskTrunkSize(summary);
402 byte[] lengthBytes = new byte[4];
403 ByteBuffer.wrap(lengthBytes).asIntBuffer().put(length);
404 out.write(lengthBytes);
405 }
406
407 private void saveInodes(FileSummary.Builder summary) throws IOException {
408 FSImageFormatPBINode.Saver saver = new FSImageFormatPBINode.Saver(this,
409 summary);
410
411 saver.serializeINodeSection(sectionOutputStream);
412 saver.serializeINodeDirectorySection(sectionOutputStream);
413 saver.serializeFilesUCSection(sectionOutputStream);
414 }
415
416 private void saveSnapshots(FileSummary.Builder summary) throws IOException {
417 FSImageFormatPBSnapshot.Saver snapshotSaver = new FSImageFormatPBSnapshot.Saver(
418 this, summary, context, context.getSourceNamesystem());
419
420 snapshotSaver.serializeSnapshotSection(sectionOutputStream);
421 snapshotSaver.serializeSnapshotDiffSection(sectionOutputStream);
422 snapshotSaver.serializeINodeReferenceSection(sectionOutputStream);
423 }
424
425 private void saveInternal(FileOutputStream fout,
426 FSImageCompression compression, String filePath) throws IOException {
427 StartupProgress prog = NameNode.getStartupProgress();
428 MessageDigest digester = MD5Hash.getDigester();
429
430 underlyingOutputStream = new DigestOutputStream(new BufferedOutputStream(
431 fout), digester);
432 underlyingOutputStream.write(FSImageUtil.MAGIC_HEADER);
433
434 fileChannel = fout.getChannel();
435
436 FileSummary.Builder b = FileSummary.newBuilder()
437 .setOndiskVersion(FSImageUtil.FILE_VERSION)
438 .setLayoutVersion(NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION);
439
440 codec = compression.getImageCodec();
441 if (codec != null) {
442 b.setCodec(codec.getClass().getCanonicalName());
443 sectionOutputStream = codec.createOutputStream(underlyingOutputStream);
444 } else {
445 sectionOutputStream = underlyingOutputStream;
446 }
447
448 saveNameSystemSection(b);
449 // Check for cancellation right after serializing the name system section.
450 // Some unit tests, such as TestSaveNamespace#testCancelSaveNameSpace
451 // depends on this behavior.
452 context.checkCancelled();
453
454 Step step = new Step(StepType.INODES, filePath);
455 prog.beginStep(Phase.SAVING_CHECKPOINT, step);
456 saveInodes(b);
457 saveSnapshots(b);
458 prog.endStep(Phase.SAVING_CHECKPOINT, step);
459
460 step = new Step(StepType.DELEGATION_TOKENS, filePath);
461 prog.beginStep(Phase.SAVING_CHECKPOINT, step);
462 saveSecretManagerSection(b);
463 prog.endStep(Phase.SAVING_CHECKPOINT, step);
464
465 step = new Step(StepType.CACHE_POOLS, filePath);
466 prog.beginStep(Phase.SAVING_CHECKPOINT, step);
467 saveCacheManagerSection(b);
468 prog.endStep(Phase.SAVING_CHECKPOINT, step);
469
470 saveStringTableSection(b);
471
472 // We use the underlyingOutputStream to write the header. Therefore flush
473 // the buffered stream (which is potentially compressed) first.
474 flushSectionOutputStream();
475
476 FileSummary summary = b.build();
477 saveFileSummary(underlyingOutputStream, summary);
478 underlyingOutputStream.close();
479 savedDigest = new MD5Hash(digester.digest());
480 }
481
482 private void saveSecretManagerSection(FileSummary.Builder summary)
483 throws IOException {
484 final FSNamesystem fsn = context.getSourceNamesystem();
485 DelegationTokenSecretManager.SecretManagerState state = fsn
486 .saveSecretManagerState();
487 state.section.writeDelimitedTo(sectionOutputStream);
488 for (SecretManagerSection.DelegationKey k : state.keys)
489 k.writeDelimitedTo(sectionOutputStream);
490
491 for (SecretManagerSection.PersistToken t : state.tokens)
492 t.writeDelimitedTo(sectionOutputStream);
493
494 commitSection(summary, SectionName.SECRET_MANAGER);
495 }
496
497 private void saveCacheManagerSection(FileSummary.Builder summary)
498 throws IOException {
499 final FSNamesystem fsn = context.getSourceNamesystem();
500 CacheManager.PersistState state = fsn.getCacheManager().saveState();
501 state.section.writeDelimitedTo(sectionOutputStream);
502
503 for (CachePoolInfoProto p : state.pools)
504 p.writeDelimitedTo(sectionOutputStream);
505
506 for (CacheDirectiveInfoProto p : state.directives)
507 p.writeDelimitedTo(sectionOutputStream);
508
509 commitSection(summary, SectionName.CACHE_MANAGER);
510 }
511
512 private void saveNameSystemSection(FileSummary.Builder summary)
513 throws IOException {
514 final FSNamesystem fsn = context.getSourceNamesystem();
515 OutputStream out = sectionOutputStream;
516 NameSystemSection.Builder b = NameSystemSection.newBuilder()
517 .setGenstampV1(fsn.getGenerationStampV1())
518 .setGenstampV1Limit(fsn.getGenerationStampV1Limit())
519 .setGenstampV2(fsn.getGenerationStampV2())
520 .setLastAllocatedBlockId(fsn.getLastAllocatedBlockId())
521 .setTransactionId(context.getTxId());
522
523 // We use the non-locked version of getNamespaceInfo here since
524 // the coordinating thread of saveNamespace already has read-locked
525 // the namespace for us. If we attempt to take another readlock
526 // from the actual saver thread, there's a potential of a
527 // fairness-related deadlock. See the comments on HDFS-2223.
528 b.setNamespaceId(fsn.unprotectedGetNamespaceInfo().getNamespaceID());
529 if (fsn.isRollingUpgrade()) {
530 b.setRollingUpgradeStartTime(fsn.getRollingUpgradeInfo().getStartTime());
531 }
532 NameSystemSection s = b.build();
533 s.writeDelimitedTo(out);
534
535 commitSection(summary, SectionName.NS_INFO);
536 }
537
538 private void saveStringTableSection(FileSummary.Builder summary)
539 throws IOException {
540 OutputStream out = sectionOutputStream;
541 StringTableSection.Builder b = StringTableSection.newBuilder()
542 .setNumEntry(saverContext.stringMap.size());
543 b.build().writeDelimitedTo(out);
544 for (Entry<String, Integer> e : saverContext.stringMap.entrySet()) {
545 StringTableSection.Entry.Builder eb = StringTableSection.Entry
546 .newBuilder().setId(e.getValue()).setStr(e.getKey());
547 eb.build().writeDelimitedTo(out);
548 }
549 commitSection(summary, SectionName.STRING_TABLE);
550 }
551 }
552
553 /**
554 * Supported section name. The order of the enum determines the order of
555 * loading.
556 */
557 public enum SectionName {
558 NS_INFO("NS_INFO"),
559 STRING_TABLE("STRING_TABLE"),
560 EXTENDED_ACL("EXTENDED_ACL"),
561 INODE("INODE"),
562 INODE_REFERENCE("INODE_REFERENCE"),
563 SNAPSHOT("SNAPSHOT"),
564 INODE_DIR("INODE_DIR"),
565 FILES_UNDERCONSTRUCTION("FILES_UNDERCONSTRUCTION"),
566 SNAPSHOT_DIFF("SNAPSHOT_DIFF"),
567 SECRET_MANAGER("SECRET_MANAGER"),
568 CACHE_MANAGER("CACHE_MANAGER");
569
570 private static final SectionName[] values = SectionName.values();
571
572 public static SectionName fromString(String name) {
573 for (SectionName n : values) {
574 if (n.name.equals(name))
575 return n;
576 }
577 return null;
578 }
579
580 private final String name;
581
582 private SectionName(String name) {
583 this.name = name;
584 }
585 }
586
587 private static int getOndiskTrunkSize(com.google.protobuf.GeneratedMessage s) {
588 return CodedOutputStream.computeRawVarint32Size(s.getSerializedSize())
589 + s.getSerializedSize();
590 }
591
592 private FSImageFormatProtobuf() {
593 }
594 }