001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.hdfs.server.datanode.fsdataset;
019    
020    
021    import java.io.File;
022    import java.io.IOException;
023    import java.io.InputStream;
024    import java.util.List;
025    import java.util.Map;
026    
027    import org.apache.hadoop.classification.InterfaceAudience;
028    import org.apache.hadoop.conf.Configuration;
029    import org.apache.hadoop.hdfs.DFSConfigKeys;
030    import org.apache.hadoop.hdfs.protocol.Block;
031    import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
032    import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo;
033    import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
034    import org.apache.hadoop.hdfs.protocol.HdfsBlocksMetadata;
035    import org.apache.hadoop.hdfs.server.datanode.DataNode;
036    import org.apache.hadoop.hdfs.server.datanode.DataStorage;
037    import org.apache.hadoop.hdfs.server.datanode.FinalizedReplica;
038    import org.apache.hadoop.hdfs.server.datanode.Replica;
039    import org.apache.hadoop.hdfs.server.datanode.ReplicaInPipelineInterface;
040    import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetFactory;
041    import org.apache.hadoop.hdfs.server.datanode.metrics.FSDatasetMBean;
042    import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand.RecoveringBlock;
043    import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
044    import org.apache.hadoop.hdfs.server.protocol.ReplicaRecoveryInfo;
045    import org.apache.hadoop.hdfs.server.protocol.StorageReport;
046    import org.apache.hadoop.util.DiskChecker.DiskErrorException;
047    import org.apache.hadoop.util.ReflectionUtils;
048    
049    /**
050     * This is a service provider interface for the underlying storage that
051     * stores replicas for a data node.
052     * The default implementation stores replicas on local drives. 
053     */
054    @InterfaceAudience.Private
055    public interface FsDatasetSpi<V extends FsVolumeSpi> extends FSDatasetMBean {
056      /**
057       * A factory for creating {@link FsDatasetSpi} objects.
058       */
059      public static abstract class Factory<D extends FsDatasetSpi<?>> {
060        /** @return the configured factory. */
061        public static Factory<?> getFactory(Configuration conf) {
062          @SuppressWarnings("rawtypes")
063          final Class<? extends Factory> clazz = conf.getClass(
064              DFSConfigKeys.DFS_DATANODE_FSDATASET_FACTORY_KEY,
065              FsDatasetFactory.class,
066              Factory.class);
067          return ReflectionUtils.newInstance(clazz, conf);
068        }
069    
070        /** Create a new object. */
071        public abstract D newInstance(DataNode datanode, DataStorage storage,
072            Configuration conf) throws IOException;
073    
074        /** Does the factory create simulated objects? */
075        public boolean isSimulated() {
076          return false;
077        }
078      }
079    
080      /**
081       * Create rolling logs.
082       *
083       * @param prefix the prefix of the log names.
084       * @return rolling logs
085       */
086      public RollingLogs createRollingLogs(String bpid, String prefix
087          ) throws IOException;
088    
089      /** @return a list of volumes. */
090      public List<V> getVolumes();
091    
092      /** @return a storage with the given storage ID */
093      public DatanodeStorage getStorage(final String storageUuid);
094    
095      /** @return one or more storage reports for attached volumes. */
096      public StorageReport[] getStorageReports(String bpid)
097          throws IOException;
098    
099      /** @return the volume that contains a replica of the block. */
100      public V getVolume(ExtendedBlock b);
101    
102      /** @return a volume information map (name => info). */
103      public Map<String, Object> getVolumeInfoMap();
104    
105      /** @return a list of finalized blocks for the given block pool. */
106      public List<FinalizedReplica> getFinalizedBlocks(String bpid);
107    
108      /**
109       * Check whether the in-memory block record matches the block on the disk,
110       * and, in case that they are not matched, update the record or mark it
111       * as corrupted.
112       */
113      public void checkAndUpdate(String bpid, long blockId, File diskFile,
114          File diskMetaFile, FsVolumeSpi vol);
115    
116      /**
117       * @param b - the block
118       * @return a stream if the meta-data of the block exists;
119       *         otherwise, return null.
120       * @throws IOException
121       */
122      public LengthInputStream getMetaDataInputStream(ExtendedBlock b
123          ) throws IOException;
124    
125      /**
126       * Returns the specified block's on-disk length (excluding metadata)
127       * @param b
128       * @return   the specified block's on-disk length (excluding metadta)
129       * @throws IOException
130       */
131      public long getLength(ExtendedBlock b) throws IOException;
132    
133      /**
134       * Get reference to the replica meta info in the replicasMap. 
135       * To be called from methods that are synchronized on {@link FSDataset}
136       * @param blockId
137       * @return replica from the replicas map
138       */
139      @Deprecated
140      public Replica getReplica(String bpid, long blockId);
141    
142      /**
143       * @return replica meta information
144       */
145      public String getReplicaString(String bpid, long blockId);
146    
147      /**
148       * @return the generation stamp stored with the block.
149       */
150      public Block getStoredBlock(String bpid, long blkid) throws IOException;
151      
152      /**
153       * Returns an input stream at specified offset of the specified block
154       * @param b
155       * @param seekOffset
156       * @return an input stream to read the contents of the specified block,
157       *  starting at the offset
158       * @throws IOException
159       */
160      public InputStream getBlockInputStream(ExtendedBlock b, long seekOffset)
161                throws IOException;
162    
163      /**
164       * Returns an input stream at specified offset of the specified block
165       * The block is still in the tmp directory and is not finalized
166       * @param b
167       * @param blkoff
168       * @param ckoff
169       * @return an input stream to read the contents of the specified block,
170       *  starting at the offset
171       * @throws IOException
172       */
173      public ReplicaInputStreams getTmpInputStreams(ExtendedBlock b, long blkoff,
174          long ckoff) throws IOException;
175    
176      /**
177       * Creates a temporary replica and returns the meta information of the replica
178       * 
179       * @param b block
180       * @return the meta info of the replica which is being written to
181       * @throws IOException if an error occurs
182       */
183      public ReplicaInPipelineInterface createTemporary(ExtendedBlock b
184          ) throws IOException;
185    
186      /**
187       * Creates a RBW replica and returns the meta info of the replica
188       * 
189       * @param b block
190       * @return the meta info of the replica which is being written to
191       * @throws IOException if an error occurs
192       */
193      public ReplicaInPipelineInterface createRbw(ExtendedBlock b
194          ) throws IOException;
195    
196      /**
197       * Recovers a RBW replica and returns the meta info of the replica
198       * 
199       * @param b block
200       * @param newGS the new generation stamp for the replica
201       * @param minBytesRcvd the minimum number of bytes that the replica could have
202       * @param maxBytesRcvd the maximum number of bytes that the replica could have
203       * @return the meta info of the replica which is being written to
204       * @throws IOException if an error occurs
205       */
206      public ReplicaInPipelineInterface recoverRbw(ExtendedBlock b, 
207          long newGS, long minBytesRcvd, long maxBytesRcvd) throws IOException;
208    
209      /**
210       * Covert a temporary replica to a RBW.
211       * @param temporary the temporary replica being converted
212       * @return the result RBW
213       */
214      public ReplicaInPipelineInterface convertTemporaryToRbw(
215          ExtendedBlock temporary) throws IOException;
216    
217      /**
218       * Append to a finalized replica and returns the meta info of the replica
219       * 
220       * @param b block
221       * @param newGS the new generation stamp for the replica
222       * @param expectedBlockLen the number of bytes the replica is expected to have
223       * @return the meata info of the replica which is being written to
224       * @throws IOException
225       */
226      public ReplicaInPipelineInterface append(ExtendedBlock b, long newGS,
227          long expectedBlockLen) throws IOException;
228    
229      /**
230       * Recover a failed append to a finalized replica
231       * and returns the meta info of the replica
232       * 
233       * @param b block
234       * @param newGS the new generation stamp for the replica
235       * @param expectedBlockLen the number of bytes the replica is expected to have
236       * @return the meta info of the replica which is being written to
237       * @throws IOException
238       */
239      public ReplicaInPipelineInterface recoverAppend(ExtendedBlock b, long newGS,
240          long expectedBlockLen) throws IOException;
241      
242      /**
243       * Recover a failed pipeline close
244       * It bumps the replica's generation stamp and finalize it if RBW replica
245       * 
246       * @param b block
247       * @param newGS the new generation stamp for the replica
248       * @param expectedBlockLen the number of bytes the replica is expected to have
249       * @return the storage uuid of the replica.
250       * @throws IOException
251       */
252      public String recoverClose(ExtendedBlock b, long newGS, long expectedBlockLen
253          ) throws IOException;
254      
255      /**
256       * Finalizes the block previously opened for writing using writeToBlock.
257       * The block size is what is in the parameter b and it must match the amount
258       *  of data written
259       * @param b
260       * @throws IOException
261       */
262      public void finalizeBlock(ExtendedBlock b) throws IOException;
263    
264      /**
265       * Unfinalizes the block previously opened for writing using writeToBlock.
266       * The temporary file associated with this block is deleted.
267       * @param b
268       * @throws IOException
269       */
270      public void unfinalizeBlock(ExtendedBlock b) throws IOException;
271    
272      /**
273       * Returns one block report per volume.
274       * @param bpid Block Pool Id
275       * @return - a map of DatanodeStorage to block report for the volume.
276       */
277      public Map<DatanodeStorage, BlockListAsLongs> getBlockReports(String bpid);
278    
279      /**
280       * Returns the cache report - the full list of cached block IDs of a
281       * block pool.
282       * @param   bpid Block Pool Id
283       * @return  the cache report - the full list of cached block IDs.
284       */
285      public List<Long> getCacheReport(String bpid);
286    
287      /** Does the dataset contain the block? */
288      public boolean contains(ExtendedBlock block);
289    
290      /**
291       * Is the block valid?
292       * @param b
293       * @return - true if the specified block is valid
294       */
295      public boolean isValidBlock(ExtendedBlock b);
296    
297      /**
298       * Is the block a valid RBW?
299       * @param b
300       * @return - true if the specified block is a valid RBW
301       */
302      public boolean isValidRbw(ExtendedBlock b);
303    
304      /**
305       * Invalidates the specified blocks
306       * @param bpid Block pool Id
307       * @param invalidBlks - the blocks to be invalidated
308       * @throws IOException
309       */
310      public void invalidate(String bpid, Block invalidBlks[]) throws IOException;
311    
312      /**
313       * Caches the specified blocks
314       * @param bpid Block pool id
315       * @param blockIds - block ids to cache
316       */
317      public void cache(String bpid, long[] blockIds);
318    
319      /**
320       * Uncaches the specified blocks
321       * @param bpid Block pool id
322       * @param blockIds - blocks ids to uncache
323       */
324      public void uncache(String bpid, long[] blockIds);
325    
326      /**
327       * Determine if the specified block is cached.
328       * @param bpid Block pool id
329       * @param blockIds - block id
330       * @returns true if the block is cached
331       */
332      public boolean isCached(String bpid, long blockId);
333    
334        /**
335         * Check if all the data directories are healthy
336         * @throws DiskErrorException
337         */
338      public void checkDataDir() throws DiskErrorException;
339    
340      /**
341       * Shutdown the FSDataset
342       */
343      public void shutdown();
344    
345      /**
346       * Sets the file pointer of the checksum stream so that the last checksum
347       * will be overwritten
348       * @param b block
349       * @param outs The streams for the data file and checksum file
350       * @param checksumSize number of bytes each checksum has
351       * @throws IOException
352       */
353      public void adjustCrcChannelPosition(ExtendedBlock b,
354          ReplicaOutputStreams outs, int checksumSize) throws IOException;
355    
356      /**
357       * Checks how many valid storage volumes there are in the DataNode.
358       * @return true if more than the minimum number of valid volumes are left 
359       * in the FSDataSet.
360       */
361      public boolean hasEnoughResource();
362    
363      /**
364       * Get visible length of the specified replica.
365       */
366      long getReplicaVisibleLength(final ExtendedBlock block) throws IOException;
367    
368      /**
369       * Initialize a replica recovery.
370       * @return actual state of the replica on this data-node or 
371       * null if data-node does not have the replica.
372       */
373      public ReplicaRecoveryInfo initReplicaRecovery(RecoveringBlock rBlock
374          ) throws IOException;
375    
376      /**
377       * Update replica's generation stamp and length and finalize it.
378       * @return the ID of storage that stores the block
379       */
380      public String updateReplicaUnderRecovery(ExtendedBlock oldBlock,
381          long recoveryId, long newLength) throws IOException;
382    
383      /**
384       * add new block pool ID
385       * @param bpid Block pool Id
386       * @param conf Configuration
387       */
388      public void addBlockPool(String bpid, Configuration conf) throws IOException;
389      
390      /**
391       * Shutdown and remove the block pool from underlying storage.
392       * @param bpid Block pool Id to be removed
393       */
394      public void shutdownBlockPool(String bpid) ;
395      
396      /**
397       * Deletes the block pool directories. If force is false, directories are 
398       * deleted only if no block files exist for the block pool. If force 
399       * is true entire directory for the blockpool is deleted along with its
400       * contents.
401       * @param bpid BlockPool Id to be deleted.
402       * @param force If force is false, directories are deleted only if no
403       *        block files exist for the block pool, otherwise entire 
404       *        directory for the blockpool is deleted along with its contents.
405       * @throws IOException
406       */
407      public void deleteBlockPool(String bpid, boolean force) throws IOException;
408      
409      /**
410       * Get {@link BlockLocalPathInfo} for the given block.
411       */
412      public BlockLocalPathInfo getBlockLocalPathInfo(ExtendedBlock b
413          ) throws IOException;
414    
415      /**
416       * Get a {@link HdfsBlocksMetadata} corresponding to the list of blocks in 
417       * <code>blocks</code>.
418       * 
419       * @param bpid pool to query
420       * @param blockIds List of block ids for which to return metadata
421       * @return metadata Metadata for the list of blocks
422       * @throws IOException
423       */
424      public HdfsBlocksMetadata getHdfsBlocksMetadata(String bpid,
425          long[] blockIds) throws IOException;
426    
427      /**
428       * Enable 'trash' for the given dataset. When trash is enabled, files are
429       * moved to a separate trash directory instead of being deleted immediately.
430       * This can be useful for example during rolling upgrades.
431       */
432      public void enableTrash(String bpid);
433    
434      /**
435       * Restore trash
436       */
437      public void restoreTrash(String bpid);
438    
439      /**
440       * @return true when trash is enabled
441       */
442      public boolean trashEnabled(String bpid);
443    }
444