001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.hdfs.server.namenode;
019
020 import java.io.File;
021 import java.io.IOException;
022 import java.util.ArrayList;
023 import java.util.Collections;
024 import java.util.Comparator;
025 import java.util.List;
026 import java.util.TreeSet;
027
028 import org.apache.commons.logging.Log;
029 import org.apache.commons.logging.LogFactory;
030 import org.apache.hadoop.conf.Configuration;
031 import org.apache.hadoop.hdfs.DFSConfigKeys;
032 import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFile;
033 import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile;
034 import org.apache.hadoop.hdfs.util.MD5FileUtils;
035
036 import com.google.common.base.Preconditions;
037 import com.google.common.collect.ComparisonChain;
038 import com.google.common.collect.Lists;
039 import com.google.common.collect.Sets;
040
041 /**
042 * The NNStorageRetentionManager is responsible for inspecting the storage
043 * directories of the NN and enforcing a retention policy on checkpoints
044 * and edit logs.
045 *
046 * It delegates the actual removal of files to a StoragePurger
047 * implementation, which might delete the files or instead copy them to
048 * a filer or HDFS for later analysis.
049 */
050 public class NNStorageRetentionManager {
051
052 private final int numCheckpointsToRetain;
053 private final long numExtraEditsToRetain;
054 private final int maxExtraEditsSegmentsToRetain;
055 private static final Log LOG = LogFactory.getLog(
056 NNStorageRetentionManager.class);
057 private final NNStorage storage;
058 private final StoragePurger purger;
059 private final LogsPurgeable purgeableLogs;
060
061 public NNStorageRetentionManager(
062 Configuration conf,
063 NNStorage storage,
064 LogsPurgeable purgeableLogs,
065 StoragePurger purger) {
066 this.numCheckpointsToRetain = conf.getInt(
067 DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY,
068 DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_DEFAULT);
069 this.numExtraEditsToRetain = conf.getLong(
070 DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY,
071 DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_DEFAULT);
072 this.maxExtraEditsSegmentsToRetain = conf.getInt(
073 DFSConfigKeys.DFS_NAMENODE_MAX_EXTRA_EDITS_SEGMENTS_RETAINED_KEY,
074 DFSConfigKeys.DFS_NAMENODE_MAX_EXTRA_EDITS_SEGMENTS_RETAINED_DEFAULT);
075 Preconditions.checkArgument(numCheckpointsToRetain > 0,
076 "Must retain at least one checkpoint");
077 Preconditions.checkArgument(numExtraEditsToRetain >= 0,
078 DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY +
079 " must not be negative");
080
081 this.storage = storage;
082 this.purgeableLogs = purgeableLogs;
083 this.purger = purger;
084 }
085
086 public NNStorageRetentionManager(Configuration conf, NNStorage storage,
087 LogsPurgeable purgeableLogs) {
088 this(conf, storage, purgeableLogs, new DeletionStoragePurger());
089 }
090
091 public void purgeOldStorage() throws IOException {
092 FSImageTransactionalStorageInspector inspector =
093 new FSImageTransactionalStorageInspector();
094 storage.inspectStorageDirs(inspector);
095
096 long minImageTxId = getImageTxIdToRetain(inspector);
097 purgeCheckpointsOlderThan(inspector, minImageTxId);
098 // If fsimage_N is the image we want to keep, then we need to keep
099 // all txns > N. We can remove anything < N+1, since fsimage_N
100 // reflects the state up to and including N. However, we also
101 // provide a "cushion" of older txns that we keep, which is
102 // handy for HA, where a remote node may not have as many
103 // new images.
104 //
105 // First, determine the target number of extra transactions to retain based
106 // on the configured amount.
107 long minimumRequiredTxId = minImageTxId + 1;
108 long purgeLogsFrom = Math.max(0, minimumRequiredTxId - numExtraEditsToRetain);
109
110 ArrayList<EditLogInputStream> editLogs = new ArrayList<EditLogInputStream>();
111 purgeableLogs.selectInputStreams(editLogs, purgeLogsFrom, false, false);
112 Collections.sort(editLogs, new Comparator<EditLogInputStream>() {
113 @Override
114 public int compare(EditLogInputStream a, EditLogInputStream b) {
115 return ComparisonChain.start()
116 .compare(a.getFirstTxId(), b.getFirstTxId())
117 .compare(a.getLastTxId(), b.getLastTxId())
118 .result();
119 }
120 });
121
122 // Remove from consideration any edit logs that are in fact required.
123 while (editLogs.size() > 0 &&
124 editLogs.get(editLogs.size() - 1).getFirstTxId() >= minimumRequiredTxId) {
125 editLogs.remove(editLogs.size() - 1);
126 }
127
128 // Next, adjust the number of transactions to retain if doing so would mean
129 // keeping too many segments around.
130 while (editLogs.size() > maxExtraEditsSegmentsToRetain) {
131 purgeLogsFrom = editLogs.get(0).getLastTxId() + 1;
132 editLogs.remove(0);
133 }
134
135 // Finally, ensure that we're not trying to purge any transactions that we
136 // actually need.
137 if (purgeLogsFrom > minimumRequiredTxId) {
138 throw new AssertionError("Should not purge more edits than required to "
139 + "restore: " + purgeLogsFrom + " should be <= "
140 + minimumRequiredTxId);
141 }
142
143 purgeableLogs.purgeLogsOlderThan(purgeLogsFrom);
144 }
145
146 private void purgeCheckpointsOlderThan(
147 FSImageTransactionalStorageInspector inspector,
148 long minTxId) {
149 for (FSImageFile image : inspector.getFoundImages()) {
150 if (image.getCheckpointTxId() < minTxId) {
151 purger.purgeImage(image);
152 }
153 }
154 }
155
156 /**
157 * @param inspector inspector that has already inspected all storage dirs
158 * @return the transaction ID corresponding to the oldest checkpoint
159 * that should be retained.
160 */
161 private long getImageTxIdToRetain(FSImageTransactionalStorageInspector inspector) {
162
163 List<FSImageFile> images = inspector.getFoundImages();
164 TreeSet<Long> imageTxIds = Sets.newTreeSet();
165 for (FSImageFile image : images) {
166 imageTxIds.add(image.getCheckpointTxId());
167 }
168
169 List<Long> imageTxIdsList = Lists.newArrayList(imageTxIds);
170 if (imageTxIdsList.isEmpty()) {
171 return 0;
172 }
173
174 Collections.reverse(imageTxIdsList);
175 int toRetain = Math.min(numCheckpointsToRetain, imageTxIdsList.size());
176 long minTxId = imageTxIdsList.get(toRetain - 1);
177 LOG.info("Going to retain " + toRetain + " images with txid >= " +
178 minTxId);
179 return minTxId;
180 }
181
182 /**
183 * Interface responsible for disposing of old checkpoints and edit logs.
184 */
185 static interface StoragePurger {
186 void purgeLog(EditLogFile log);
187 void purgeImage(FSImageFile image);
188 }
189
190 static class DeletionStoragePurger implements StoragePurger {
191 @Override
192 public void purgeLog(EditLogFile log) {
193 LOG.info("Purging old edit log " + log);
194 deleteOrWarn(log.getFile());
195 }
196
197 @Override
198 public void purgeImage(FSImageFile image) {
199 LOG.info("Purging old image " + image);
200 deleteOrWarn(image.getFile());
201 deleteOrWarn(MD5FileUtils.getDigestFileForFile(image.getFile()));
202 }
203
204 private static void deleteOrWarn(File file) {
205 if (!file.delete()) {
206 // It's OK if we fail to delete something -- we'll catch it
207 // next time we swing through this directory.
208 LOG.warn("Could not delete " + file);
209 }
210 }
211 }
212 }