001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.hdfs.server.namenode;
019    
020    import java.io.File;
021    import java.io.IOException;
022    import java.util.ArrayList;
023    import java.util.Collections;
024    import java.util.Comparator;
025    import java.util.EnumSet;
026    import java.util.List;
027    import java.util.TreeSet;
028    
029    import org.apache.commons.logging.Log;
030    import org.apache.commons.logging.LogFactory;
031    import org.apache.hadoop.conf.Configuration;
032    import org.apache.hadoop.hdfs.DFSConfigKeys;
033    import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFile;
034    import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile;
035    import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
036    import org.apache.hadoop.hdfs.util.MD5FileUtils;
037    
038    import com.google.common.base.Preconditions;
039    import com.google.common.collect.ComparisonChain;
040    import com.google.common.collect.Lists;
041    import com.google.common.collect.Sets;
042    
043    /**
044     * The NNStorageRetentionManager is responsible for inspecting the storage
045     * directories of the NN and enforcing a retention policy on checkpoints
046     * and edit logs.
047     * 
048     * It delegates the actual removal of files to a StoragePurger
049     * implementation, which might delete the files or instead copy them to
050     * a filer or HDFS for later analysis.
051     */
052    public class NNStorageRetentionManager {
053      
054      private final int numCheckpointsToRetain;
055      private final long numExtraEditsToRetain;
056      private final int maxExtraEditsSegmentsToRetain;
057      private static final Log LOG = LogFactory.getLog(
058          NNStorageRetentionManager.class);
059      private final NNStorage storage;
060      private final StoragePurger purger;
061      private final LogsPurgeable purgeableLogs;
062      
063      public NNStorageRetentionManager(
064          Configuration conf,
065          NNStorage storage,
066          LogsPurgeable purgeableLogs,
067          StoragePurger purger) {
068        this.numCheckpointsToRetain = conf.getInt(
069            DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY,
070            DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_DEFAULT);
071        this.numExtraEditsToRetain = conf.getLong(
072            DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY,
073            DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_DEFAULT);
074        this.maxExtraEditsSegmentsToRetain = conf.getInt(
075            DFSConfigKeys.DFS_NAMENODE_MAX_EXTRA_EDITS_SEGMENTS_RETAINED_KEY,
076            DFSConfigKeys.DFS_NAMENODE_MAX_EXTRA_EDITS_SEGMENTS_RETAINED_DEFAULT);
077        Preconditions.checkArgument(numCheckpointsToRetain > 0,
078            "Must retain at least one checkpoint");
079        Preconditions.checkArgument(numExtraEditsToRetain >= 0,
080            DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY +
081            " must not be negative");
082        
083        this.storage = storage;
084        this.purgeableLogs = purgeableLogs;
085        this.purger = purger;
086      }
087      
088      public NNStorageRetentionManager(Configuration conf, NNStorage storage,
089          LogsPurgeable purgeableLogs) {
090        this(conf, storage, purgeableLogs, new DeletionStoragePurger());
091      }
092    
093      void purgeCheckpoints(NameNodeFile nnf) throws IOException {
094        purgeCheckpoinsAfter(nnf, -1);
095      }
096    
097      void purgeCheckpoinsAfter(NameNodeFile nnf, long fromTxId)
098          throws IOException {
099        FSImageTransactionalStorageInspector inspector =
100            new FSImageTransactionalStorageInspector(EnumSet.of(nnf));
101        storage.inspectStorageDirs(inspector);
102        for (FSImageFile image : inspector.getFoundImages()) {
103          if (image.getCheckpointTxId() > fromTxId) {
104            purger.purgeImage(image);
105          }
106        }
107      }
108    
109      void purgeOldStorage(NameNodeFile nnf) throws IOException {
110        FSImageTransactionalStorageInspector inspector =
111            new FSImageTransactionalStorageInspector(EnumSet.of(nnf));
112        storage.inspectStorageDirs(inspector);
113    
114        long minImageTxId = getImageTxIdToRetain(inspector);
115        purgeCheckpointsOlderThan(inspector, minImageTxId);
116        
117        if (nnf == NameNodeFile.IMAGE_ROLLBACK) {
118          // do not purge edits for IMAGE_ROLLBACK.
119          return;
120        }
121    
122        // If fsimage_N is the image we want to keep, then we need to keep
123        // all txns > N. We can remove anything < N+1, since fsimage_N
124        // reflects the state up to and including N. However, we also
125        // provide a "cushion" of older txns that we keep, which is
126        // handy for HA, where a remote node may not have as many
127        // new images.
128        //
129        // First, determine the target number of extra transactions to retain based
130        // on the configured amount.
131        long minimumRequiredTxId = minImageTxId + 1;
132        long purgeLogsFrom = Math.max(0, minimumRequiredTxId - numExtraEditsToRetain);
133        
134        ArrayList<EditLogInputStream> editLogs = new ArrayList<EditLogInputStream>();
135        purgeableLogs.selectInputStreams(editLogs, purgeLogsFrom, false);
136        Collections.sort(editLogs, new Comparator<EditLogInputStream>() {
137          @Override
138          public int compare(EditLogInputStream a, EditLogInputStream b) {
139            return ComparisonChain.start()
140                .compare(a.getFirstTxId(), b.getFirstTxId())
141                .compare(a.getLastTxId(), b.getLastTxId())
142                .result();
143          }
144        });
145    
146        // Remove from consideration any edit logs that are in fact required.
147        while (editLogs.size() > 0 &&
148            editLogs.get(editLogs.size() - 1).getFirstTxId() >= minimumRequiredTxId) {
149          editLogs.remove(editLogs.size() - 1);
150        }
151        
152        // Next, adjust the number of transactions to retain if doing so would mean
153        // keeping too many segments around.
154        while (editLogs.size() > maxExtraEditsSegmentsToRetain) {
155          purgeLogsFrom = editLogs.get(0).getLastTxId() + 1;
156          editLogs.remove(0);
157        }
158        
159        // Finally, ensure that we're not trying to purge any transactions that we
160        // actually need.
161        if (purgeLogsFrom > minimumRequiredTxId) {
162          throw new AssertionError("Should not purge more edits than required to "
163              + "restore: " + purgeLogsFrom + " should be <= "
164              + minimumRequiredTxId);
165        }
166        
167        purgeableLogs.purgeLogsOlderThan(purgeLogsFrom);
168      }
169      
170      private void purgeCheckpointsOlderThan(
171          FSImageTransactionalStorageInspector inspector,
172          long minTxId) {
173        for (FSImageFile image : inspector.getFoundImages()) {
174          if (image.getCheckpointTxId() < minTxId) {
175            purger.purgeImage(image);
176          }
177        }
178      }
179    
180      /**
181       * @param inspector inspector that has already inspected all storage dirs
182       * @return the transaction ID corresponding to the oldest checkpoint
183       * that should be retained. 
184       */
185      private long getImageTxIdToRetain(FSImageTransactionalStorageInspector inspector) {
186          
187        List<FSImageFile> images = inspector.getFoundImages();
188        TreeSet<Long> imageTxIds = Sets.newTreeSet();
189        for (FSImageFile image : images) {
190          imageTxIds.add(image.getCheckpointTxId());
191        }
192        
193        List<Long> imageTxIdsList = Lists.newArrayList(imageTxIds);
194        if (imageTxIdsList.isEmpty()) {
195          return 0;
196        }
197        
198        Collections.reverse(imageTxIdsList);
199        int toRetain = Math.min(numCheckpointsToRetain, imageTxIdsList.size());    
200        long minTxId = imageTxIdsList.get(toRetain - 1);
201        LOG.info("Going to retain " + toRetain + " images with txid >= " +
202            minTxId);
203        return minTxId;
204      }
205      
206      /**
207       * Interface responsible for disposing of old checkpoints and edit logs.
208       */
209      static interface StoragePurger {
210        void purgeLog(EditLogFile log);
211        void purgeImage(FSImageFile image);
212      }
213      
214      static class DeletionStoragePurger implements StoragePurger {
215        @Override
216        public void purgeLog(EditLogFile log) {
217          LOG.info("Purging old edit log " + log);
218          deleteOrWarn(log.getFile());
219        }
220    
221        @Override
222        public void purgeImage(FSImageFile image) {
223          LOG.info("Purging old image " + image);
224          deleteOrWarn(image.getFile());
225          deleteOrWarn(MD5FileUtils.getDigestFileForFile(image.getFile()));
226        }
227    
228        private static void deleteOrWarn(File file) {
229          if (!file.delete()) {
230            // It's OK if we fail to delete something -- we'll catch it
231            // next time we swing through this directory.
232            LOG.warn("Could not delete " + file);
233          }      
234        }
235      }
236    }