001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.hdfs.server.namenode;
019    
020    import java.io.File;
021    import java.io.FilenameFilter;
022    import java.io.IOException;
023    import java.util.ArrayList;
024    import java.util.Collections;
025    import java.util.Comparator;
026    import java.util.EnumSet;
027    import java.util.Iterator;
028    import java.util.List;
029    import java.util.TreeSet;
030    
031    import org.apache.commons.logging.Log;
032    import org.apache.commons.logging.LogFactory;
033    import org.apache.hadoop.conf.Configuration;
034    import org.apache.hadoop.hdfs.DFSConfigKeys;
035    import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFile;
036    import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile;
037    import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
038    import org.apache.hadoop.hdfs.util.MD5FileUtils;
039    
040    import com.google.common.base.Preconditions;
041    import com.google.common.collect.ComparisonChain;
042    import com.google.common.collect.Lists;
043    import com.google.common.collect.Sets;
044    
045    /**
046     * The NNStorageRetentionManager is responsible for inspecting the storage
047     * directories of the NN and enforcing a retention policy on checkpoints
048     * and edit logs.
049     * 
050     * It delegates the actual removal of files to a StoragePurger
051     * implementation, which might delete the files or instead copy them to
052     * a filer or HDFS for later analysis.
053     */
054    public class NNStorageRetentionManager {
055      
056      private final int numCheckpointsToRetain;
057      private final long numExtraEditsToRetain;
058      private final int maxExtraEditsSegmentsToRetain;
059      private static final Log LOG = LogFactory.getLog(
060          NNStorageRetentionManager.class);
061      private final NNStorage storage;
062      private final StoragePurger purger;
063      private final LogsPurgeable purgeableLogs;
064      
065      public NNStorageRetentionManager(
066          Configuration conf,
067          NNStorage storage,
068          LogsPurgeable purgeableLogs,
069          StoragePurger purger) {
070        this.numCheckpointsToRetain = conf.getInt(
071            DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY,
072            DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_DEFAULT);
073        this.numExtraEditsToRetain = conf.getLong(
074            DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY,
075            DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_DEFAULT);
076        this.maxExtraEditsSegmentsToRetain = conf.getInt(
077            DFSConfigKeys.DFS_NAMENODE_MAX_EXTRA_EDITS_SEGMENTS_RETAINED_KEY,
078            DFSConfigKeys.DFS_NAMENODE_MAX_EXTRA_EDITS_SEGMENTS_RETAINED_DEFAULT);
079        Preconditions.checkArgument(numCheckpointsToRetain > 0,
080            "Must retain at least one checkpoint");
081        Preconditions.checkArgument(numExtraEditsToRetain >= 0,
082            DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY +
083            " must not be negative");
084        
085        this.storage = storage;
086        this.purgeableLogs = purgeableLogs;
087        this.purger = purger;
088      }
089      
090      public NNStorageRetentionManager(Configuration conf, NNStorage storage,
091          LogsPurgeable purgeableLogs) {
092        this(conf, storage, purgeableLogs, new DeletionStoragePurger());
093      }
094    
095      void purgeCheckpoints(NameNodeFile nnf) throws IOException {
096        purgeCheckpoinsAfter(nnf, -1);
097      }
098    
099      void purgeCheckpoinsAfter(NameNodeFile nnf, long fromTxId)
100          throws IOException {
101        FSImageTransactionalStorageInspector inspector =
102            new FSImageTransactionalStorageInspector(EnumSet.of(nnf));
103        storage.inspectStorageDirs(inspector);
104        for (FSImageFile image : inspector.getFoundImages()) {
105          if (image.getCheckpointTxId() > fromTxId) {
106            purger.purgeImage(image);
107          }
108        }
109      }
110    
111      void purgeOldStorage(NameNodeFile nnf) throws IOException {
112        FSImageTransactionalStorageInspector inspector =
113            new FSImageTransactionalStorageInspector(EnumSet.of(nnf));
114        storage.inspectStorageDirs(inspector);
115    
116        long minImageTxId = getImageTxIdToRetain(inspector);
117        purgeCheckpointsOlderThan(inspector, minImageTxId);
118        
119        if (nnf == NameNodeFile.IMAGE_ROLLBACK) {
120          // do not purge edits for IMAGE_ROLLBACK.
121          return;
122        }
123    
124        // If fsimage_N is the image we want to keep, then we need to keep
125        // all txns > N. We can remove anything < N+1, since fsimage_N
126        // reflects the state up to and including N. However, we also
127        // provide a "cushion" of older txns that we keep, which is
128        // handy for HA, where a remote node may not have as many
129        // new images.
130        //
131        // First, determine the target number of extra transactions to retain based
132        // on the configured amount.
133        long minimumRequiredTxId = minImageTxId + 1;
134        long purgeLogsFrom = Math.max(0, minimumRequiredTxId - numExtraEditsToRetain);
135        
136        ArrayList<EditLogInputStream> editLogs = new ArrayList<EditLogInputStream>();
137        purgeableLogs.selectInputStreams(editLogs, purgeLogsFrom, false);
138        Collections.sort(editLogs, new Comparator<EditLogInputStream>() {
139          @Override
140          public int compare(EditLogInputStream a, EditLogInputStream b) {
141            return ComparisonChain.start()
142                .compare(a.getFirstTxId(), b.getFirstTxId())
143                .compare(a.getLastTxId(), b.getLastTxId())
144                .result();
145          }
146        });
147    
148        // Remove from consideration any edit logs that are in fact required.
149        while (editLogs.size() > 0 &&
150            editLogs.get(editLogs.size() - 1).getFirstTxId() >= minimumRequiredTxId) {
151          editLogs.remove(editLogs.size() - 1);
152        }
153        
154        // Next, adjust the number of transactions to retain if doing so would mean
155        // keeping too many segments around.
156        while (editLogs.size() > maxExtraEditsSegmentsToRetain) {
157          purgeLogsFrom = editLogs.get(0).getLastTxId() + 1;
158          editLogs.remove(0);
159        }
160        
161        // Finally, ensure that we're not trying to purge any transactions that we
162        // actually need.
163        if (purgeLogsFrom > minimumRequiredTxId) {
164          throw new AssertionError("Should not purge more edits than required to "
165              + "restore: " + purgeLogsFrom + " should be <= "
166              + minimumRequiredTxId);
167        }
168        
169        purgeableLogs.purgeLogsOlderThan(purgeLogsFrom);
170      }
171      
172      private void purgeCheckpointsOlderThan(
173          FSImageTransactionalStorageInspector inspector,
174          long minTxId) {
175        for (FSImageFile image : inspector.getFoundImages()) {
176          if (image.getCheckpointTxId() < minTxId) {
177            purger.purgeImage(image);
178          }
179        }
180      }
181    
182      /**
183       * @param inspector inspector that has already inspected all storage dirs
184       * @return the transaction ID corresponding to the oldest checkpoint
185       * that should be retained. 
186       */
187      private long getImageTxIdToRetain(FSImageTransactionalStorageInspector inspector) {
188          
189        List<FSImageFile> images = inspector.getFoundImages();
190        TreeSet<Long> imageTxIds = Sets.newTreeSet();
191        for (FSImageFile image : images) {
192          imageTxIds.add(image.getCheckpointTxId());
193        }
194        
195        List<Long> imageTxIdsList = Lists.newArrayList(imageTxIds);
196        if (imageTxIdsList.isEmpty()) {
197          return 0;
198        }
199        
200        Collections.reverse(imageTxIdsList);
201        int toRetain = Math.min(numCheckpointsToRetain, imageTxIdsList.size());    
202        long minTxId = imageTxIdsList.get(toRetain - 1);
203        LOG.info("Going to retain " + toRetain + " images with txid >= " +
204            minTxId);
205        return minTxId;
206      }
207      
208      /**
209       * Interface responsible for disposing of old checkpoints and edit logs.
210       */
211      static interface StoragePurger {
212        void purgeLog(EditLogFile log);
213        void purgeImage(FSImageFile image);
214      }
215      
216      static class DeletionStoragePurger implements StoragePurger {
217        @Override
218        public void purgeLog(EditLogFile log) {
219          LOG.info("Purging old edit log " + log);
220          deleteOrWarn(log.getFile());
221        }
222    
223        @Override
224        public void purgeImage(FSImageFile image) {
225          LOG.info("Purging old image " + image);
226          deleteOrWarn(image.getFile());
227          deleteOrWarn(MD5FileUtils.getDigestFileForFile(image.getFile()));
228        }
229    
230        private static void deleteOrWarn(File file) {
231          if (!file.delete()) {
232            // It's OK if we fail to delete something -- we'll catch it
233            // next time we swing through this directory.
234            LOG.warn("Could not delete " + file);
235          }      
236        }
237      }
238    
239      /**
240       * Delete old OIV fsimages. Since the target dir is not a full blown
241       * storage directory, we simply list and keep the latest ones. For the
242       * same reason, no storage inspector is used.
243       */
244      void purgeOldLegacyOIVImages(String dir, long txid) {
245        File oivImageDir = new File(dir);
246        final String oivImagePrefix = NameNodeFile.IMAGE_LEGACY_OIV.getName();
247        String filesInStorage[];
248    
249        // Get the listing
250        filesInStorage = oivImageDir.list(new FilenameFilter() {
251          @Override
252          public boolean accept(File dir, String name) {
253            return name.matches(oivImagePrefix + "_(\\d+)");
254          }
255        });
256    
257        // Check whether there is any work to do.
258        if (filesInStorage.length <= numCheckpointsToRetain) {
259          return;
260        }
261    
262        // Create a sorted list of txids from the file names.
263        TreeSet<Long> sortedTxIds = new TreeSet<Long>();
264        for (String fName : filesInStorage) {
265          // Extract the transaction id from the file name.
266          long fTxId;
267          try {
268            fTxId = Long.parseLong(fName.substring(oivImagePrefix.length() + 1));
269          } catch (NumberFormatException nfe) {
270            // This should not happen since we have already filtered it.
271            // Log and continue.
272            LOG.warn("Invalid file name. Skipping " + fName);
273            continue;
274          }
275          sortedTxIds.add(Long.valueOf(fTxId));
276        }
277    
278        int numFilesToDelete = sortedTxIds.size() - numCheckpointsToRetain;
279        Iterator<Long> iter = sortedTxIds.iterator();
280        while (numFilesToDelete > 0 && iter.hasNext()) {
281          long txIdVal = iter.next().longValue();
282          String fileName = NNStorage.getLegacyOIVImageFileName(txIdVal);
283          LOG.info("Deleting " + fileName);
284          File fileToDelete = new File(oivImageDir, fileName);
285          if (!fileToDelete.delete()) {
286            // deletion failed.
287            LOG.warn("Failed to delete image file: " + fileToDelete);
288          }
289          numFilesToDelete--;
290        }
291      }
292    }