001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.hdfs.server.datanode.fsdataset;
019
020
021 import java.io.File;
022 import java.io.FileDescriptor;
023 import java.io.IOException;
024 import java.io.InputStream;
025 import java.util.Collection;
026 import java.util.List;
027 import java.util.Map;
028
029 import org.apache.hadoop.classification.InterfaceAudience;
030 import org.apache.hadoop.conf.Configuration;
031 import org.apache.hadoop.hdfs.DFSConfigKeys;
032 import org.apache.hadoop.hdfs.StorageType;
033 import org.apache.hadoop.hdfs.protocol.Block;
034 import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
035 import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo;
036 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
037 import org.apache.hadoop.hdfs.protocol.HdfsBlocksMetadata;
038 import org.apache.hadoop.hdfs.server.datanode.DataNode;
039 import org.apache.hadoop.hdfs.server.datanode.DataStorage;
040 import org.apache.hadoop.hdfs.server.datanode.FinalizedReplica;
041 import org.apache.hadoop.hdfs.server.datanode.Replica;
042 import org.apache.hadoop.hdfs.server.datanode.ReplicaInPipelineInterface;
043 import org.apache.hadoop.hdfs.server.datanode.StorageLocation;
044 import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetFactory;
045 import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsVolumeImpl;
046 import org.apache.hadoop.hdfs.server.datanode.metrics.FSDatasetMBean;
047 import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand.RecoveringBlock;
048 import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
049 import org.apache.hadoop.hdfs.server.protocol.ReplicaRecoveryInfo;
050 import org.apache.hadoop.hdfs.server.protocol.StorageReport;
051 import org.apache.hadoop.util.DiskChecker.DiskErrorException;
052 import org.apache.hadoop.util.ReflectionUtils;
053
054 /**
055 * This is a service provider interface for the underlying storage that
056 * stores replicas for a data node.
057 * The default implementation stores replicas on local drives.
058 */
059 @InterfaceAudience.Private
060 public interface FsDatasetSpi<V extends FsVolumeSpi> extends FSDatasetMBean {
061 /**
062 * A factory for creating {@link FsDatasetSpi} objects.
063 */
064 public static abstract class Factory<D extends FsDatasetSpi<?>> {
065 /** @return the configured factory. */
066 public static Factory<?> getFactory(Configuration conf) {
067 @SuppressWarnings("rawtypes")
068 final Class<? extends Factory> clazz = conf.getClass(
069 DFSConfigKeys.DFS_DATANODE_FSDATASET_FACTORY_KEY,
070 FsDatasetFactory.class,
071 Factory.class);
072 return ReflectionUtils.newInstance(clazz, conf);
073 }
074
075 /** Create a new object. */
076 public abstract D newInstance(DataNode datanode, DataStorage storage,
077 Configuration conf) throws IOException;
078
079 /** Does the factory create simulated objects? */
080 public boolean isSimulated() {
081 return false;
082 }
083 }
084
085 /**
086 * Create rolling logs.
087 *
088 * @param prefix the prefix of the log names.
089 * @return rolling logs
090 */
091 public RollingLogs createRollingLogs(String bpid, String prefix
092 ) throws IOException;
093
094 /** @return a list of volumes. */
095 public List<V> getVolumes();
096
097 /** Add an array of StorageLocation to FsDataset. */
098 public List<StorageLocation> addVolumes(List<StorageLocation> volumes,
099 final Collection<String> bpids);
100
101 /** Removes a collection of volumes from FsDataset. */
102 public void removeVolumes(Collection<StorageLocation> volumes);
103
104 /** @return a storage with the given storage ID */
105 public DatanodeStorage getStorage(final String storageUuid);
106
107 /** @return one or more storage reports for attached volumes. */
108 public StorageReport[] getStorageReports(String bpid)
109 throws IOException;
110
111 /** @return the volume that contains a replica of the block. */
112 public V getVolume(ExtendedBlock b);
113
114 /** @return a volume information map (name => info). */
115 public Map<String, Object> getVolumeInfoMap();
116
117 /** @return a list of finalized blocks for the given block pool. */
118 public List<FinalizedReplica> getFinalizedBlocks(String bpid);
119
120 /** @return a list of finalized blocks for the given block pool. */
121 public List<FinalizedReplica> getFinalizedBlocksOnPersistentStorage(String bpid);
122
123 /**
124 * Check whether the in-memory block record matches the block on the disk,
125 * and, in case that they are not matched, update the record or mark it
126 * as corrupted.
127 */
128 public void checkAndUpdate(String bpid, long blockId, File diskFile,
129 File diskMetaFile, FsVolumeSpi vol) throws IOException;
130
131 /**
132 * @param b - the block
133 * @return a stream if the meta-data of the block exists;
134 * otherwise, return null.
135 * @throws IOException
136 */
137 public LengthInputStream getMetaDataInputStream(ExtendedBlock b
138 ) throws IOException;
139
140 /**
141 * Returns the specified block's on-disk length (excluding metadata)
142 * @return the specified block's on-disk length (excluding metadta)
143 * @throws IOException on error
144 */
145 public long getLength(ExtendedBlock b) throws IOException;
146
147 /**
148 * Get reference to the replica meta info in the replicasMap.
149 * To be called from methods that are synchronized on {@link FSDataset}
150 * @return replica from the replicas map
151 */
152 @Deprecated
153 public Replica getReplica(String bpid, long blockId);
154
155 /**
156 * @return replica meta information
157 */
158 public String getReplicaString(String bpid, long blockId);
159
160 /**
161 * @return the generation stamp stored with the block.
162 */
163 public Block getStoredBlock(String bpid, long blkid) throws IOException;
164
165 /**
166 * Returns an input stream at specified offset of the specified block
167 * @param b block
168 * @param seekOffset offset with in the block to seek to
169 * @return an input stream to read the contents of the specified block,
170 * starting at the offset
171 * @throws IOException
172 */
173 public InputStream getBlockInputStream(ExtendedBlock b, long seekOffset)
174 throws IOException;
175
176 /**
177 * Returns an input stream at specified offset of the specified block
178 * The block is still in the tmp directory and is not finalized
179 * @return an input stream to read the contents of the specified block,
180 * starting at the offset
181 * @throws IOException
182 */
183 public ReplicaInputStreams getTmpInputStreams(ExtendedBlock b, long blkoff,
184 long ckoff) throws IOException;
185
186 /**
187 * Creates a temporary replica and returns the meta information of the replica
188 *
189 * @param b block
190 * @return the meta info of the replica which is being written to
191 * @throws IOException if an error occurs
192 */
193 public ReplicaInPipelineInterface createTemporary(StorageType storageType,
194 ExtendedBlock b) throws IOException;
195
196 /**
197 * Creates a RBW replica and returns the meta info of the replica
198 *
199 * @param b block
200 * @return the meta info of the replica which is being written to
201 * @throws IOException if an error occurs
202 */
203 public ReplicaInPipelineInterface createRbw(StorageType storageType,
204 ExtendedBlock b, boolean allowLazyPersist) throws IOException;
205
206 /**
207 * Recovers a RBW replica and returns the meta info of the replica
208 *
209 * @param b block
210 * @param newGS the new generation stamp for the replica
211 * @param minBytesRcvd the minimum number of bytes that the replica could have
212 * @param maxBytesRcvd the maximum number of bytes that the replica could have
213 * @return the meta info of the replica which is being written to
214 * @throws IOException if an error occurs
215 */
216 public ReplicaInPipelineInterface recoverRbw(ExtendedBlock b,
217 long newGS, long minBytesRcvd, long maxBytesRcvd) throws IOException;
218
219 /**
220 * Covert a temporary replica to a RBW.
221 * @param temporary the temporary replica being converted
222 * @return the result RBW
223 */
224 public ReplicaInPipelineInterface convertTemporaryToRbw(
225 ExtendedBlock temporary) throws IOException;
226
227 /**
228 * Append to a finalized replica and returns the meta info of the replica
229 *
230 * @param b block
231 * @param newGS the new generation stamp for the replica
232 * @param expectedBlockLen the number of bytes the replica is expected to have
233 * @return the meata info of the replica which is being written to
234 * @throws IOException
235 */
236 public ReplicaInPipelineInterface append(ExtendedBlock b, long newGS,
237 long expectedBlockLen) throws IOException;
238
239 /**
240 * Recover a failed append to a finalized replica
241 * and returns the meta info of the replica
242 *
243 * @param b block
244 * @param newGS the new generation stamp for the replica
245 * @param expectedBlockLen the number of bytes the replica is expected to have
246 * @return the meta info of the replica which is being written to
247 * @throws IOException
248 */
249 public ReplicaInPipelineInterface recoverAppend(ExtendedBlock b, long newGS,
250 long expectedBlockLen) throws IOException;
251
252 /**
253 * Recover a failed pipeline close
254 * It bumps the replica's generation stamp and finalize it if RBW replica
255 *
256 * @param b block
257 * @param newGS the new generation stamp for the replica
258 * @param expectedBlockLen the number of bytes the replica is expected to have
259 * @return the storage uuid of the replica.
260 * @throws IOException
261 */
262 public String recoverClose(ExtendedBlock b, long newGS, long expectedBlockLen
263 ) throws IOException;
264
265 /**
266 * Finalizes the block previously opened for writing using writeToBlock.
267 * The block size is what is in the parameter b and it must match the amount
268 * of data written
269 * @throws IOException
270 */
271 public void finalizeBlock(ExtendedBlock b) throws IOException;
272
273 /**
274 * Unfinalizes the block previously opened for writing using writeToBlock.
275 * The temporary file associated with this block is deleted.
276 * @throws IOException
277 */
278 public void unfinalizeBlock(ExtendedBlock b) throws IOException;
279
280 /**
281 * Returns one block report per volume.
282 * @param bpid Block Pool Id
283 * @return - a map of DatanodeStorage to block report for the volume.
284 */
285 public Map<DatanodeStorage, BlockListAsLongs> getBlockReports(String bpid);
286
287 /**
288 * Returns the cache report - the full list of cached block IDs of a
289 * block pool.
290 * @param bpid Block Pool Id
291 * @return the cache report - the full list of cached block IDs.
292 */
293 public List<Long> getCacheReport(String bpid);
294
295 /** Does the dataset contain the block? */
296 public boolean contains(ExtendedBlock block);
297
298 /**
299 * Is the block valid?
300 * @return - true if the specified block is valid
301 */
302 public boolean isValidBlock(ExtendedBlock b);
303
304 /**
305 * Is the block a valid RBW?
306 * @return - true if the specified block is a valid RBW
307 */
308 public boolean isValidRbw(ExtendedBlock b);
309
310 /**
311 * Invalidates the specified blocks
312 * @param bpid Block pool Id
313 * @param invalidBlks - the blocks to be invalidated
314 * @throws IOException
315 */
316 public void invalidate(String bpid, Block invalidBlks[]) throws IOException;
317
318 /**
319 * Caches the specified blocks
320 * @param bpid Block pool id
321 * @param blockIds - block ids to cache
322 */
323 public void cache(String bpid, long[] blockIds);
324
325 /**
326 * Uncaches the specified blocks
327 * @param bpid Block pool id
328 * @param blockIds - blocks ids to uncache
329 */
330 public void uncache(String bpid, long[] blockIds);
331
332 /**
333 * Determine if the specified block is cached.
334 * @param bpid Block pool id
335 * @param blockIds - block id
336 * @return true if the block is cached
337 */
338 public boolean isCached(String bpid, long blockId);
339
340 /**
341 * Check if all the data directories are healthy
342 * @throws DiskErrorException
343 */
344 public void checkDataDir() throws DiskErrorException;
345
346 /**
347 * Shutdown the FSDataset
348 */
349 public void shutdown();
350
351 /**
352 * Sets the file pointer of the checksum stream so that the last checksum
353 * will be overwritten
354 * @param b block
355 * @param outs The streams for the data file and checksum file
356 * @param checksumSize number of bytes each checksum has
357 * @throws IOException
358 */
359 public void adjustCrcChannelPosition(ExtendedBlock b,
360 ReplicaOutputStreams outs, int checksumSize) throws IOException;
361
362 /**
363 * Checks how many valid storage volumes there are in the DataNode.
364 * @return true if more than the minimum number of valid volumes are left
365 * in the FSDataSet.
366 */
367 public boolean hasEnoughResource();
368
369 /**
370 * Get visible length of the specified replica.
371 */
372 long getReplicaVisibleLength(final ExtendedBlock block) throws IOException;
373
374 /**
375 * Initialize a replica recovery.
376 * @return actual state of the replica on this data-node or
377 * null if data-node does not have the replica.
378 */
379 public ReplicaRecoveryInfo initReplicaRecovery(RecoveringBlock rBlock
380 ) throws IOException;
381
382 /**
383 * Update replica's generation stamp and length and finalize it.
384 * @return the ID of storage that stores the block
385 */
386 public String updateReplicaUnderRecovery(ExtendedBlock oldBlock,
387 long recoveryId, long newLength) throws IOException;
388
389 /**
390 * add new block pool ID
391 * @param bpid Block pool Id
392 * @param conf Configuration
393 */
394 public void addBlockPool(String bpid, Configuration conf) throws IOException;
395
396 /**
397 * Shutdown and remove the block pool from underlying storage.
398 * @param bpid Block pool Id to be removed
399 */
400 public void shutdownBlockPool(String bpid) ;
401
402 /**
403 * Deletes the block pool directories. If force is false, directories are
404 * deleted only if no block files exist for the block pool. If force
405 * is true entire directory for the blockpool is deleted along with its
406 * contents.
407 * @param bpid BlockPool Id to be deleted.
408 * @param force If force is false, directories are deleted only if no
409 * block files exist for the block pool, otherwise entire
410 * directory for the blockpool is deleted along with its contents.
411 * @throws IOException
412 */
413 public void deleteBlockPool(String bpid, boolean force) throws IOException;
414
415 /**
416 * Get {@link BlockLocalPathInfo} for the given block.
417 */
418 public BlockLocalPathInfo getBlockLocalPathInfo(ExtendedBlock b
419 ) throws IOException;
420
421 /**
422 * Get a {@link HdfsBlocksMetadata} corresponding to the list of blocks in
423 * <code>blocks</code>.
424 *
425 * @param bpid pool to query
426 * @param blockIds List of block ids for which to return metadata
427 * @return metadata Metadata for the list of blocks
428 * @throws IOException
429 */
430 public HdfsBlocksMetadata getHdfsBlocksMetadata(String bpid,
431 long[] blockIds) throws IOException;
432
433 /**
434 * Enable 'trash' for the given dataset. When trash is enabled, files are
435 * moved to a separate trash directory instead of being deleted immediately.
436 * This can be useful for example during rolling upgrades.
437 */
438 public void enableTrash(String bpid);
439
440 /**
441 * Restore trash
442 */
443 public void restoreTrash(String bpid);
444
445 /**
446 * @return true when trash is enabled
447 */
448 public boolean trashEnabled(String bpid);
449
450 /**
451 * Create a marker file indicating that a rolling upgrade is in progress.
452 */
453 public void setRollingUpgradeMarker(String bpid) throws IOException;
454
455 /**
456 * Delete the rolling upgrade marker file if it exists.
457 * @param bpid
458 */
459 public void clearRollingUpgradeMarker(String bpid) throws IOException;
460
461 /**
462 * submit a sync_file_range request to AsyncDiskService
463 */
464 public void submitBackgroundSyncFileRangeRequest(final ExtendedBlock block,
465 final FileDescriptor fd, final long offset, final long nbytes,
466 final int flags);
467
468 /**
469 * Callback from RamDiskAsyncLazyPersistService upon async lazy persist task end
470 */
471 public void onCompleteLazyPersist(String bpId, long blockId,
472 long creationTime, File[] savedFiles, FsVolumeImpl targetVolume);
473
474 /**
475 * Callback from RamDiskAsyncLazyPersistService upon async lazy persist task fail
476 */
477 public void onFailLazyPersist(String bpId, long blockId);
478 }