001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.hdfs.server.blockmanagement;
019
020 import java.util.ArrayList;
021 import java.util.Collection;
022 import java.util.Collections;
023 import java.util.HashMap;
024 import java.util.Iterator;
025 import java.util.LinkedList;
026 import java.util.List;
027 import java.util.Map;
028 import java.util.Queue;
029
030 import com.google.common.annotations.VisibleForTesting;
031 import org.apache.commons.logging.Log;
032 import org.apache.commons.logging.LogFactory;
033 import org.apache.hadoop.classification.InterfaceAudience;
034 import org.apache.hadoop.classification.InterfaceStability;
035 import org.apache.hadoop.hdfs.protocol.Block;
036 import org.apache.hadoop.hdfs.protocol.DatanodeID;
037 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
038 import org.apache.hadoop.hdfs.server.namenode.CachedBlock;
039 import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
040 import org.apache.hadoop.hdfs.server.protocol.StorageReport;
041 import org.apache.hadoop.hdfs.util.LightWeightHashSet;
042 import org.apache.hadoop.util.IntrusiveCollection;
043 import org.apache.hadoop.util.Time;
044
045 import com.google.common.annotations.VisibleForTesting;
046
047 /**
048 * This class extends the DatanodeInfo class with ephemeral information (eg
049 * health, capacity, what blocks are associated with the Datanode) that is
050 * private to the Namenode, ie this class is not exposed to clients.
051 */
052 @InterfaceAudience.Private
053 @InterfaceStability.Evolving
054 public class DatanodeDescriptor extends DatanodeInfo {
055 public static final Log LOG = LogFactory.getLog(DatanodeDescriptor.class);
056 public static final DatanodeDescriptor[] EMPTY_ARRAY = {};
057
058 // Stores status of decommissioning.
059 // If node is not decommissioning, do not use this object for anything.
060 public final DecommissioningStatus decommissioningStatus = new DecommissioningStatus();
061
062 /** Block and targets pair */
063 @InterfaceAudience.Private
064 @InterfaceStability.Evolving
065 public static class BlockTargetPair {
066 public final Block block;
067 public final DatanodeStorageInfo[] targets;
068
069 BlockTargetPair(Block block, DatanodeStorageInfo[] targets) {
070 this.block = block;
071 this.targets = targets;
072 }
073 }
074
075 /** A BlockTargetPair queue. */
076 private static class BlockQueue<E> {
077 private final Queue<E> blockq = new LinkedList<E>();
078
079 /** Size of the queue */
080 synchronized int size() {return blockq.size();}
081
082 /** Enqueue */
083 synchronized boolean offer(E e) {
084 return blockq.offer(e);
085 }
086
087 /** Dequeue */
088 synchronized List<E> poll(int numBlocks) {
089 if (numBlocks <= 0 || blockq.isEmpty()) {
090 return null;
091 }
092
093 List<E> results = new ArrayList<E>();
094 for(; !blockq.isEmpty() && numBlocks > 0; numBlocks--) {
095 results.add(blockq.poll());
096 }
097 return results;
098 }
099
100 /**
101 * Returns <tt>true</tt> if the queue contains the specified element.
102 */
103 boolean contains(E e) {
104 return blockq.contains(e);
105 }
106
107 synchronized void clear() {
108 blockq.clear();
109 }
110 }
111
112 private final Map<String, DatanodeStorageInfo> storageMap =
113 new HashMap<String, DatanodeStorageInfo>();
114
115 /**
116 * A list of CachedBlock objects on this datanode.
117 */
118 public static class CachedBlocksList extends IntrusiveCollection<CachedBlock> {
119 public enum Type {
120 PENDING_CACHED,
121 CACHED,
122 PENDING_UNCACHED
123 }
124
125 private final DatanodeDescriptor datanode;
126
127 private final Type type;
128
129 CachedBlocksList(DatanodeDescriptor datanode, Type type) {
130 this.datanode = datanode;
131 this.type = type;
132 }
133
134 public DatanodeDescriptor getDatanode() {
135 return datanode;
136 }
137
138 public Type getType() {
139 return type;
140 }
141 }
142
143 /**
144 * The blocks which we want to cache on this DataNode.
145 */
146 private final CachedBlocksList pendingCached =
147 new CachedBlocksList(this, CachedBlocksList.Type.PENDING_CACHED);
148
149 /**
150 * The blocks which we know are cached on this datanode.
151 * This list is updated by periodic cache reports.
152 */
153 private final CachedBlocksList cached =
154 new CachedBlocksList(this, CachedBlocksList.Type.CACHED);
155
156 /**
157 * The blocks which we want to uncache on this DataNode.
158 */
159 private final CachedBlocksList pendingUncached =
160 new CachedBlocksList(this, CachedBlocksList.Type.PENDING_UNCACHED);
161
162 public CachedBlocksList getPendingCached() {
163 return pendingCached;
164 }
165
166 public CachedBlocksList getCached() {
167 return cached;
168 }
169
170 public CachedBlocksList getPendingUncached() {
171 return pendingUncached;
172 }
173
174 /**
175 * The time when the last batch of caching directives was sent, in
176 * monotonic milliseconds.
177 */
178 private long lastCachingDirectiveSentTimeMs;
179
180 // isAlive == heartbeats.contains(this)
181 // This is an optimization, because contains takes O(n) time on Arraylist
182 public boolean isAlive = false;
183 public boolean needKeyUpdate = false;
184
185
186 // A system administrator can tune the balancer bandwidth parameter
187 // (dfs.balance.bandwidthPerSec) dynamically by calling
188 // "dfsadmin -setBalanacerBandwidth <newbandwidth>", at which point the
189 // following 'bandwidth' variable gets updated with the new value for each
190 // node. Once the heartbeat command is issued to update the value on the
191 // specified datanode, this value will be set back to 0.
192 private long bandwidth;
193
194 /** A queue of blocks to be replicated by this datanode */
195 private final BlockQueue<BlockTargetPair> replicateBlocks = new BlockQueue<BlockTargetPair>();
196 /** A queue of blocks to be recovered by this datanode */
197 private final BlockQueue<BlockInfoUnderConstruction> recoverBlocks =
198 new BlockQueue<BlockInfoUnderConstruction>();
199 /** A set of blocks to be invalidated by this datanode */
200 private final LightWeightHashSet<Block> invalidateBlocks = new LightWeightHashSet<Block>();
201
202 /* Variables for maintaining number of blocks scheduled to be written to
203 * this storage. This count is approximate and might be slightly bigger
204 * in case of errors (e.g. datanode does not report if an error occurs
205 * while writing the block).
206 */
207 private int currApproxBlocksScheduled = 0;
208 private int prevApproxBlocksScheduled = 0;
209 private long lastBlocksScheduledRollTime = 0;
210 private static final int BLOCKS_SCHEDULED_ROLL_INTERVAL = 600*1000; //10min
211 private int volumeFailures = 0;
212
213 /**
214 * When set to true, the node is not in include list and is not allowed
215 * to communicate with the namenode
216 */
217 private boolean disallowed = false;
218
219 /**
220 * DatanodeDescriptor constructor
221 * @param nodeID id of the data node
222 */
223 public DatanodeDescriptor(DatanodeID nodeID) {
224 super(nodeID);
225 updateHeartbeat(StorageReport.EMPTY_ARRAY, 0L, 0L, 0, 0);
226 }
227
228 /**
229 * DatanodeDescriptor constructor
230 * @param nodeID id of the data node
231 * @param networkLocation location of the data node in network
232 */
233 public DatanodeDescriptor(DatanodeID nodeID,
234 String networkLocation) {
235 super(nodeID, networkLocation);
236 updateHeartbeat(StorageReport.EMPTY_ARRAY, 0L, 0L, 0, 0);
237 }
238
239 /**
240 * Add data-node to the block. Add block to the head of the list of blocks
241 * belonging to the data-node.
242 */
243 public boolean addBlock(String storageID, BlockInfo b) {
244 DatanodeStorageInfo s = getStorageInfo(storageID);
245 if (s != null) {
246 return s.addBlock(b);
247 }
248 return false;
249 }
250
251 @VisibleForTesting
252 public DatanodeStorageInfo getStorageInfo(String storageID) {
253 synchronized (storageMap) {
254 return storageMap.get(storageID);
255 }
256 }
257 DatanodeStorageInfo[] getStorageInfos() {
258 synchronized (storageMap) {
259 final Collection<DatanodeStorageInfo> storages = storageMap.values();
260 return storages.toArray(new DatanodeStorageInfo[storages.size()]);
261 }
262 }
263
264 boolean hasStaleStorages() {
265 synchronized (storageMap) {
266 for (DatanodeStorageInfo storage : storageMap.values()) {
267 if (storage.areBlockContentsStale()) {
268 return true;
269 }
270 }
271 return false;
272 }
273 }
274
275 /**
276 * Remove block from the list of blocks belonging to the data-node. Remove
277 * data-node from the block.
278 */
279 boolean removeBlock(BlockInfo b) {
280 int index = b.findStorageInfo(this);
281 // if block exists on this datanode
282 if (index >= 0) {
283 DatanodeStorageInfo s = b.getStorageInfo(index);
284 if (s != null) {
285 return s.removeBlock(b);
286 }
287 }
288 return false;
289 }
290
291 /**
292 * Remove block from the list of blocks belonging to the data-node. Remove
293 * data-node from the block.
294 */
295 boolean removeBlock(String storageID, BlockInfo b) {
296 DatanodeStorageInfo s = getStorageInfo(storageID);
297 if (s != null) {
298 return s.removeBlock(b);
299 }
300 return false;
301 }
302
303 /**
304 * Replace specified old block with a new one in the DataNodeDescriptor.
305 *
306 * @param oldBlock - block to be replaced
307 * @param newBlock - a replacement block
308 * @return the new block
309 */
310 public BlockInfo replaceBlock(BlockInfo oldBlock, BlockInfo newBlock) {
311 int index = oldBlock.findStorageInfo(this);
312 DatanodeStorageInfo s = oldBlock.getStorageInfo(index);
313 boolean done = s.removeBlock(oldBlock);
314 assert done : "Old block should belong to the data-node when replacing";
315
316 done = s.addBlock(newBlock);
317 assert done : "New block should not belong to the data-node when replacing";
318 return newBlock;
319 }
320
321 public void resetBlocks() {
322 setCapacity(0);
323 setRemaining(0);
324 setBlockPoolUsed(0);
325 setDfsUsed(0);
326 setXceiverCount(0);
327 this.invalidateBlocks.clear();
328 this.volumeFailures = 0;
329 // pendingCached, cached, and pendingUncached are protected by the
330 // FSN lock.
331 this.pendingCached.clear();
332 this.cached.clear();
333 this.pendingUncached.clear();
334 }
335
336 public void clearBlockQueues() {
337 synchronized (invalidateBlocks) {
338 this.invalidateBlocks.clear();
339 this.recoverBlocks.clear();
340 this.replicateBlocks.clear();
341 }
342 // pendingCached, cached, and pendingUncached are protected by the
343 // FSN lock.
344 this.pendingCached.clear();
345 this.cached.clear();
346 this.pendingUncached.clear();
347 }
348
349 public int numBlocks() {
350 int blocks = 0;
351 for (DatanodeStorageInfo entry : getStorageInfos()) {
352 blocks += entry.numBlocks();
353 }
354 return blocks;
355 }
356
357 /**
358 * Updates stats from datanode heartbeat.
359 */
360 public void updateHeartbeat(StorageReport[] reports, long cacheCapacity,
361 long cacheUsed, int xceiverCount, int volFailures) {
362 long totalCapacity = 0;
363 long totalRemaining = 0;
364 long totalBlockPoolUsed = 0;
365 long totalDfsUsed = 0;
366
367 setCacheCapacity(cacheCapacity);
368 setCacheUsed(cacheUsed);
369 setXceiverCount(xceiverCount);
370 setLastUpdate(Time.now());
371 this.volumeFailures = volFailures;
372 for (StorageReport report : reports) {
373 DatanodeStorageInfo storage = updateStorage(report.getStorage());
374 storage.receivedHeartbeat(report);
375 totalCapacity += report.getCapacity();
376 totalRemaining += report.getRemaining();
377 totalBlockPoolUsed += report.getBlockPoolUsed();
378 totalDfsUsed += report.getDfsUsed();
379 }
380 rollBlocksScheduled(getLastUpdate());
381
382 // Update total metrics for the node.
383 setCapacity(totalCapacity);
384 setRemaining(totalRemaining);
385 setBlockPoolUsed(totalBlockPoolUsed);
386 setDfsUsed(totalDfsUsed);
387 }
388
389 private static class BlockIterator implements Iterator<BlockInfo> {
390 private int index = 0;
391 private final List<Iterator<BlockInfo>> iterators;
392
393 private BlockIterator(final DatanodeStorageInfo... storages) {
394 List<Iterator<BlockInfo>> iterators = new ArrayList<Iterator<BlockInfo>>();
395 for (DatanodeStorageInfo e : storages) {
396 iterators.add(e.getBlockIterator());
397 }
398 this.iterators = Collections.unmodifiableList(iterators);
399 }
400
401 @Override
402 public boolean hasNext() {
403 update();
404 return !iterators.isEmpty() && iterators.get(index).hasNext();
405 }
406
407 @Override
408 public BlockInfo next() {
409 update();
410 return iterators.get(index).next();
411 }
412
413 @Override
414 public void remove() {
415 throw new UnsupportedOperationException("Remove unsupported.");
416 }
417
418 private void update() {
419 while(index < iterators.size() - 1 && !iterators.get(index).hasNext()) {
420 index++;
421 }
422 }
423 }
424
425 Iterator<BlockInfo> getBlockIterator() {
426 return new BlockIterator(getStorageInfos());
427 }
428 Iterator<BlockInfo> getBlockIterator(final String storageID) {
429 return new BlockIterator(getStorageInfo(storageID));
430 }
431
432 /**
433 * Store block replication work.
434 */
435 void addBlockToBeReplicated(Block block, DatanodeStorageInfo[] targets) {
436 assert(block != null && targets != null && targets.length > 0);
437 replicateBlocks.offer(new BlockTargetPair(block, targets));
438 }
439
440 /**
441 * Store block recovery work.
442 */
443 void addBlockToBeRecovered(BlockInfoUnderConstruction block) {
444 if(recoverBlocks.contains(block)) {
445 // this prevents adding the same block twice to the recovery queue
446 BlockManager.LOG.info(block + " is already in the recovery queue");
447 return;
448 }
449 recoverBlocks.offer(block);
450 }
451
452 /**
453 * Store block invalidation work.
454 */
455 void addBlocksToBeInvalidated(List<Block> blocklist) {
456 assert(blocklist != null && blocklist.size() > 0);
457 synchronized (invalidateBlocks) {
458 for(Block blk : blocklist) {
459 invalidateBlocks.add(blk);
460 }
461 }
462 }
463
464 /**
465 * The number of work items that are pending to be replicated
466 */
467 int getNumberOfBlocksToBeReplicated() {
468 return replicateBlocks.size();
469 }
470
471 /**
472 * The number of block invalidation items that are pending to
473 * be sent to the datanode
474 */
475 int getNumberOfBlocksToBeInvalidated() {
476 synchronized (invalidateBlocks) {
477 return invalidateBlocks.size();
478 }
479 }
480
481 public List<BlockTargetPair> getReplicationCommand(int maxTransfers) {
482 return replicateBlocks.poll(maxTransfers);
483 }
484
485 public BlockInfoUnderConstruction[] getLeaseRecoveryCommand(int maxTransfers) {
486 List<BlockInfoUnderConstruction> blocks = recoverBlocks.poll(maxTransfers);
487 if(blocks == null)
488 return null;
489 return blocks.toArray(new BlockInfoUnderConstruction[blocks.size()]);
490 }
491
492 /**
493 * Remove the specified number of blocks to be invalidated
494 */
495 public Block[] getInvalidateBlocks(int maxblocks) {
496 synchronized (invalidateBlocks) {
497 Block[] deleteList = invalidateBlocks.pollToArray(new Block[Math.min(
498 invalidateBlocks.size(), maxblocks)]);
499 return deleteList.length == 0 ? null : deleteList;
500 }
501 }
502
503 /**
504 * @return Approximate number of blocks currently scheduled to be written
505 * to this datanode.
506 */
507 public int getBlocksScheduled() {
508 return currApproxBlocksScheduled + prevApproxBlocksScheduled;
509 }
510
511 /** Increment the number of blocks scheduled. */
512 void incrementBlocksScheduled() {
513 currApproxBlocksScheduled++;
514 }
515
516 /** Decrement the number of blocks scheduled. */
517 void decrementBlocksScheduled() {
518 if (prevApproxBlocksScheduled > 0) {
519 prevApproxBlocksScheduled--;
520 } else if (currApproxBlocksScheduled > 0) {
521 currApproxBlocksScheduled--;
522 }
523 // its ok if both counters are zero.
524 }
525
526 /** Adjusts curr and prev number of blocks scheduled every few minutes. */
527 private void rollBlocksScheduled(long now) {
528 if (now - lastBlocksScheduledRollTime > BLOCKS_SCHEDULED_ROLL_INTERVAL) {
529 prevApproxBlocksScheduled = currApproxBlocksScheduled;
530 currApproxBlocksScheduled = 0;
531 lastBlocksScheduledRollTime = now;
532 }
533 }
534
535 @Override
536 public int hashCode() {
537 // Super implementation is sufficient
538 return super.hashCode();
539 }
540
541 @Override
542 public boolean equals(Object obj) {
543 // Sufficient to use super equality as datanodes are uniquely identified
544 // by DatanodeID
545 return (this == obj) || super.equals(obj);
546 }
547
548 /** Decommissioning status */
549 public class DecommissioningStatus {
550 private int underReplicatedBlocks;
551 private int decommissionOnlyReplicas;
552 private int underReplicatedInOpenFiles;
553 private long startTime;
554
555 synchronized void set(int underRep,
556 int onlyRep, int underConstruction) {
557 if (isDecommissionInProgress() == false) {
558 return;
559 }
560 underReplicatedBlocks = underRep;
561 decommissionOnlyReplicas = onlyRep;
562 underReplicatedInOpenFiles = underConstruction;
563 }
564
565 /** @return the number of under-replicated blocks */
566 public synchronized int getUnderReplicatedBlocks() {
567 if (isDecommissionInProgress() == false) {
568 return 0;
569 }
570 return underReplicatedBlocks;
571 }
572 /** @return the number of decommission-only replicas */
573 public synchronized int getDecommissionOnlyReplicas() {
574 if (isDecommissionInProgress() == false) {
575 return 0;
576 }
577 return decommissionOnlyReplicas;
578 }
579 /** @return the number of under-replicated blocks in open files */
580 public synchronized int getUnderReplicatedInOpenFiles() {
581 if (isDecommissionInProgress() == false) {
582 return 0;
583 }
584 return underReplicatedInOpenFiles;
585 }
586 /** Set start time */
587 public synchronized void setStartTime(long time) {
588 startTime = time;
589 }
590 /** @return start time */
591 public synchronized long getStartTime() {
592 if (isDecommissionInProgress() == false) {
593 return 0;
594 }
595 return startTime;
596 }
597 } // End of class DecommissioningStatus
598
599 /**
600 * Set the flag to indicate if this datanode is disallowed from communicating
601 * with the namenode.
602 */
603 public void setDisallowed(boolean flag) {
604 disallowed = flag;
605 }
606 /** Is the datanode disallowed from communicating with the namenode? */
607 public boolean isDisallowed() {
608 return disallowed;
609 }
610
611 /**
612 * @return number of failed volumes in the datanode.
613 */
614 public int getVolumeFailures() {
615 return volumeFailures;
616 }
617
618 /**
619 * @param nodeReg DatanodeID to update registration for.
620 */
621 @Override
622 public void updateRegInfo(DatanodeID nodeReg) {
623 super.updateRegInfo(nodeReg);
624
625 // must re-process IBR after re-registration
626 for(DatanodeStorageInfo storage : getStorageInfos()) {
627 storage.setBlockReportCount(0);
628 }
629 }
630
631 /**
632 * @return balancer bandwidth in bytes per second for this datanode
633 */
634 public long getBalancerBandwidth() {
635 return this.bandwidth;
636 }
637
638 /**
639 * @param bandwidth balancer bandwidth in bytes per second for this datanode
640 */
641 public void setBalancerBandwidth(long bandwidth) {
642 this.bandwidth = bandwidth;
643 }
644
645 @Override
646 public String dumpDatanode() {
647 StringBuilder sb = new StringBuilder(super.dumpDatanode());
648 int repl = replicateBlocks.size();
649 if (repl > 0) {
650 sb.append(" ").append(repl).append(" blocks to be replicated;");
651 }
652 int inval = invalidateBlocks.size();
653 if (inval > 0) {
654 sb.append(" ").append(inval).append(" blocks to be invalidated;");
655 }
656 int recover = recoverBlocks.size();
657 if (recover > 0) {
658 sb.append(" ").append(recover).append(" blocks to be recovered;");
659 }
660 return sb.toString();
661 }
662
663 DatanodeStorageInfo updateStorage(DatanodeStorage s) {
664 synchronized (storageMap) {
665 DatanodeStorageInfo storage = storageMap.get(s.getStorageID());
666 if (storage == null) {
667 LOG.info("Adding new storage ID " + s.getStorageID() +
668 " for DN " + getXferAddr());
669 storage = new DatanodeStorageInfo(this, s);
670 storageMap.put(s.getStorageID(), storage);
671 } else if (storage.getState() != s.getState() ||
672 storage.getStorageType() != s.getStorageType()) {
673 // For backwards compatibility, make sure that the type and
674 // state are updated. Some reports from older datanodes do
675 // not include these fields so we may have assumed defaults.
676 // This check can be removed in the next major release after
677 // 2.4.
678 storage.updateFromStorage(s);
679 storageMap.put(storage.getStorageID(), storage);
680 }
681 return storage;
682 }
683 }
684
685 /**
686 * @return The time at which we last sent caching directives to this
687 * DataNode, in monotonic milliseconds.
688 */
689 public long getLastCachingDirectiveSentTimeMs() {
690 return this.lastCachingDirectiveSentTimeMs;
691 }
692
693 /**
694 * @param time The time at which we last sent caching directives to this
695 * DataNode, in monotonic milliseconds.
696 */
697 public void setLastCachingDirectiveSentTimeMs(long time) {
698 this.lastCachingDirectiveSentTimeMs = time;
699 }
700 }
701