001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.hdfs.server.namenode;
019
020 import java.io.File;
021 import java.io.FilenameFilter;
022 import java.io.IOException;
023 import java.util.ArrayList;
024 import java.util.Collections;
025 import java.util.Comparator;
026 import java.util.EnumSet;
027 import java.util.Iterator;
028 import java.util.List;
029 import java.util.TreeSet;
030
031 import org.apache.commons.logging.Log;
032 import org.apache.commons.logging.LogFactory;
033 import org.apache.hadoop.conf.Configuration;
034 import org.apache.hadoop.hdfs.DFSConfigKeys;
035 import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFile;
036 import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile;
037 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
038 import org.apache.hadoop.hdfs.util.MD5FileUtils;
039
040 import com.google.common.base.Preconditions;
041 import com.google.common.collect.ComparisonChain;
042 import com.google.common.collect.Lists;
043 import com.google.common.collect.Sets;
044
045 /**
046 * The NNStorageRetentionManager is responsible for inspecting the storage
047 * directories of the NN and enforcing a retention policy on checkpoints
048 * and edit logs.
049 *
050 * It delegates the actual removal of files to a StoragePurger
051 * implementation, which might delete the files or instead copy them to
052 * a filer or HDFS for later analysis.
053 */
054 public class NNStorageRetentionManager {
055
056 private final int numCheckpointsToRetain;
057 private final long numExtraEditsToRetain;
058 private final int maxExtraEditsSegmentsToRetain;
059 private static final Log LOG = LogFactory.getLog(
060 NNStorageRetentionManager.class);
061 private final NNStorage storage;
062 private final StoragePurger purger;
063 private final LogsPurgeable purgeableLogs;
064
065 public NNStorageRetentionManager(
066 Configuration conf,
067 NNStorage storage,
068 LogsPurgeable purgeableLogs,
069 StoragePurger purger) {
070 this.numCheckpointsToRetain = conf.getInt(
071 DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY,
072 DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_DEFAULT);
073 this.numExtraEditsToRetain = conf.getLong(
074 DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY,
075 DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_DEFAULT);
076 this.maxExtraEditsSegmentsToRetain = conf.getInt(
077 DFSConfigKeys.DFS_NAMENODE_MAX_EXTRA_EDITS_SEGMENTS_RETAINED_KEY,
078 DFSConfigKeys.DFS_NAMENODE_MAX_EXTRA_EDITS_SEGMENTS_RETAINED_DEFAULT);
079 Preconditions.checkArgument(numCheckpointsToRetain > 0,
080 "Must retain at least one checkpoint");
081 Preconditions.checkArgument(numExtraEditsToRetain >= 0,
082 DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY +
083 " must not be negative");
084
085 this.storage = storage;
086 this.purgeableLogs = purgeableLogs;
087 this.purger = purger;
088 }
089
090 public NNStorageRetentionManager(Configuration conf, NNStorage storage,
091 LogsPurgeable purgeableLogs) {
092 this(conf, storage, purgeableLogs, new DeletionStoragePurger());
093 }
094
095 void purgeCheckpoints(NameNodeFile nnf) throws IOException {
096 purgeCheckpoinsAfter(nnf, -1);
097 }
098
099 void purgeCheckpoinsAfter(NameNodeFile nnf, long fromTxId)
100 throws IOException {
101 FSImageTransactionalStorageInspector inspector =
102 new FSImageTransactionalStorageInspector(EnumSet.of(nnf));
103 storage.inspectStorageDirs(inspector);
104 for (FSImageFile image : inspector.getFoundImages()) {
105 if (image.getCheckpointTxId() > fromTxId) {
106 purger.purgeImage(image);
107 }
108 }
109 }
110
111 void purgeOldStorage(NameNodeFile nnf) throws IOException {
112 FSImageTransactionalStorageInspector inspector =
113 new FSImageTransactionalStorageInspector(EnumSet.of(nnf));
114 storage.inspectStorageDirs(inspector);
115
116 long minImageTxId = getImageTxIdToRetain(inspector);
117 purgeCheckpointsOlderThan(inspector, minImageTxId);
118
119 if (nnf == NameNodeFile.IMAGE_ROLLBACK) {
120 // do not purge edits for IMAGE_ROLLBACK.
121 return;
122 }
123
124 // If fsimage_N is the image we want to keep, then we need to keep
125 // all txns > N. We can remove anything < N+1, since fsimage_N
126 // reflects the state up to and including N. However, we also
127 // provide a "cushion" of older txns that we keep, which is
128 // handy for HA, where a remote node may not have as many
129 // new images.
130 //
131 // First, determine the target number of extra transactions to retain based
132 // on the configured amount.
133 long minimumRequiredTxId = minImageTxId + 1;
134 long purgeLogsFrom = Math.max(0, minimumRequiredTxId - numExtraEditsToRetain);
135
136 ArrayList<EditLogInputStream> editLogs = new ArrayList<EditLogInputStream>();
137 purgeableLogs.selectInputStreams(editLogs, purgeLogsFrom, false);
138 Collections.sort(editLogs, new Comparator<EditLogInputStream>() {
139 @Override
140 public int compare(EditLogInputStream a, EditLogInputStream b) {
141 return ComparisonChain.start()
142 .compare(a.getFirstTxId(), b.getFirstTxId())
143 .compare(a.getLastTxId(), b.getLastTxId())
144 .result();
145 }
146 });
147
148 // Remove from consideration any edit logs that are in fact required.
149 while (editLogs.size() > 0 &&
150 editLogs.get(editLogs.size() - 1).getFirstTxId() >= minimumRequiredTxId) {
151 editLogs.remove(editLogs.size() - 1);
152 }
153
154 // Next, adjust the number of transactions to retain if doing so would mean
155 // keeping too many segments around.
156 while (editLogs.size() > maxExtraEditsSegmentsToRetain) {
157 purgeLogsFrom = editLogs.get(0).getLastTxId() + 1;
158 editLogs.remove(0);
159 }
160
161 // Finally, ensure that we're not trying to purge any transactions that we
162 // actually need.
163 if (purgeLogsFrom > minimumRequiredTxId) {
164 throw new AssertionError("Should not purge more edits than required to "
165 + "restore: " + purgeLogsFrom + " should be <= "
166 + minimumRequiredTxId);
167 }
168
169 purgeableLogs.purgeLogsOlderThan(purgeLogsFrom);
170 }
171
172 private void purgeCheckpointsOlderThan(
173 FSImageTransactionalStorageInspector inspector,
174 long minTxId) {
175 for (FSImageFile image : inspector.getFoundImages()) {
176 if (image.getCheckpointTxId() < minTxId) {
177 purger.purgeImage(image);
178 }
179 }
180 }
181
182 /**
183 * @param inspector inspector that has already inspected all storage dirs
184 * @return the transaction ID corresponding to the oldest checkpoint
185 * that should be retained.
186 */
187 private long getImageTxIdToRetain(FSImageTransactionalStorageInspector inspector) {
188
189 List<FSImageFile> images = inspector.getFoundImages();
190 TreeSet<Long> imageTxIds = Sets.newTreeSet();
191 for (FSImageFile image : images) {
192 imageTxIds.add(image.getCheckpointTxId());
193 }
194
195 List<Long> imageTxIdsList = Lists.newArrayList(imageTxIds);
196 if (imageTxIdsList.isEmpty()) {
197 return 0;
198 }
199
200 Collections.reverse(imageTxIdsList);
201 int toRetain = Math.min(numCheckpointsToRetain, imageTxIdsList.size());
202 long minTxId = imageTxIdsList.get(toRetain - 1);
203 LOG.info("Going to retain " + toRetain + " images with txid >= " +
204 minTxId);
205 return minTxId;
206 }
207
208 /**
209 * Interface responsible for disposing of old checkpoints and edit logs.
210 */
211 static interface StoragePurger {
212 void purgeLog(EditLogFile log);
213 void purgeImage(FSImageFile image);
214 }
215
216 static class DeletionStoragePurger implements StoragePurger {
217 @Override
218 public void purgeLog(EditLogFile log) {
219 LOG.info("Purging old edit log " + log);
220 deleteOrWarn(log.getFile());
221 }
222
223 @Override
224 public void purgeImage(FSImageFile image) {
225 LOG.info("Purging old image " + image);
226 deleteOrWarn(image.getFile());
227 deleteOrWarn(MD5FileUtils.getDigestFileForFile(image.getFile()));
228 }
229
230 private static void deleteOrWarn(File file) {
231 if (!file.delete()) {
232 // It's OK if we fail to delete something -- we'll catch it
233 // next time we swing through this directory.
234 LOG.warn("Could not delete " + file);
235 }
236 }
237 }
238
239 /**
240 * Delete old OIV fsimages. Since the target dir is not a full blown
241 * storage directory, we simply list and keep the latest ones. For the
242 * same reason, no storage inspector is used.
243 */
244 void purgeOldLegacyOIVImages(String dir, long txid) {
245 File oivImageDir = new File(dir);
246 final String oivImagePrefix = NameNodeFile.IMAGE_LEGACY_OIV.getName();
247 String filesInStorage[];
248
249 // Get the listing
250 filesInStorage = oivImageDir.list(new FilenameFilter() {
251 @Override
252 public boolean accept(File dir, String name) {
253 return name.matches(oivImagePrefix + "_(\\d+)");
254 }
255 });
256
257 // Check whether there is any work to do.
258 if (filesInStorage.length <= numCheckpointsToRetain) {
259 return;
260 }
261
262 // Create a sorted list of txids from the file names.
263 TreeSet<Long> sortedTxIds = new TreeSet<Long>();
264 for (String fName : filesInStorage) {
265 // Extract the transaction id from the file name.
266 long fTxId;
267 try {
268 fTxId = Long.parseLong(fName.substring(oivImagePrefix.length() + 1));
269 } catch (NumberFormatException nfe) {
270 // This should not happen since we have already filtered it.
271 // Log and continue.
272 LOG.warn("Invalid file name. Skipping " + fName);
273 continue;
274 }
275 sortedTxIds.add(Long.valueOf(fTxId));
276 }
277
278 int numFilesToDelete = sortedTxIds.size() - numCheckpointsToRetain;
279 Iterator<Long> iter = sortedTxIds.iterator();
280 while (numFilesToDelete > 0 && iter.hasNext()) {
281 long txIdVal = iter.next().longValue();
282 String fileName = NNStorage.getLegacyOIVImageFileName(txIdVal);
283 LOG.info("Deleting " + fileName);
284 File fileToDelete = new File(oivImageDir, fileName);
285 if (!fileToDelete.delete()) {
286 // deletion failed.
287 LOG.warn("Failed to delete image file: " + fileToDelete);
288 }
289 numFilesToDelete--;
290 }
291 }
292 }