001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.blockmanagement; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Iterator; 023import java.util.List; 024 025import org.apache.hadoop.hdfs.protocol.Block; 026import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState; 027import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState; 028import org.apache.hadoop.hdfs.server.namenode.NameNode; 029 030/** 031 * Represents a block that is currently being constructed.<br> 032 * This is usually the last block of a file opened for write or append. 033 */ 034public class BlockInfoContiguousUnderConstruction extends BlockInfoContiguous { 035 /** Block state. See {@link BlockUCState} */ 036 private BlockUCState blockUCState; 037 038 /** 039 * Block replicas as assigned when the block was allocated. 040 * This defines the pipeline order. 041 */ 042 private List<ReplicaUnderConstruction> replicas; 043 044 /** 045 * Index of the primary data node doing the recovery. Useful for log 046 * messages. 047 */ 048 private int primaryNodeIndex = -1; 049 050 /** 051 * The new generation stamp, which this block will have 052 * after the recovery succeeds. Also used as a recovery id to identify 053 * the right recovery if any of the abandoned recoveries re-appear. 054 */ 055 private long blockRecoveryId = 0; 056 057 /** 058 * The block source to use in the event of copy-on-write truncate. 059 */ 060 private BlockInfoContiguous truncateBlock; 061 062 /** 063 * ReplicaUnderConstruction contains information about replicas while 064 * they are under construction. 065 * The GS, the length and the state of the replica is as reported by 066 * the data-node. 067 * It is not guaranteed, but expected, that data-nodes actually have 068 * corresponding replicas. 069 */ 070 static class ReplicaUnderConstruction extends Block { 071 private final DatanodeStorageInfo expectedLocation; 072 private ReplicaState state; 073 private boolean chosenAsPrimary; 074 075 ReplicaUnderConstruction(Block block, 076 DatanodeStorageInfo target, 077 ReplicaState state) { 078 super(block); 079 this.expectedLocation = target; 080 this.state = state; 081 this.chosenAsPrimary = false; 082 } 083 084 /** 085 * Expected block replica location as assigned when the block was allocated. 086 * This defines the pipeline order. 087 * It is not guaranteed, but expected, that the data-node actually has 088 * the replica. 089 */ 090 public DatanodeStorageInfo getExpectedStorageLocation() { 091 return expectedLocation; 092 } 093 094 /** 095 * Get replica state as reported by the data-node. 096 */ 097 ReplicaState getState() { 098 return state; 099 } 100 101 /** 102 * Whether the replica was chosen for recovery. 103 */ 104 boolean getChosenAsPrimary() { 105 return chosenAsPrimary; 106 } 107 108 /** 109 * Set replica state. 110 */ 111 void setState(ReplicaState s) { 112 state = s; 113 } 114 115 /** 116 * Set whether this replica was chosen for recovery. 117 */ 118 void setChosenAsPrimary(boolean chosenAsPrimary) { 119 this.chosenAsPrimary = chosenAsPrimary; 120 } 121 122 /** 123 * Is data-node the replica belongs to alive. 124 */ 125 boolean isAlive() { 126 return expectedLocation.getDatanodeDescriptor().isAlive; 127 } 128 129 @Override // Block 130 public int hashCode() { 131 return super.hashCode(); 132 } 133 134 @Override // Block 135 public boolean equals(Object obj) { 136 // Sufficient to rely on super's implementation 137 return (this == obj) || super.equals(obj); 138 } 139 140 @Override 141 public String toString() { 142 final StringBuilder b = new StringBuilder(50); 143 appendStringTo(b); 144 return b.toString(); 145 } 146 147 @Override 148 public void appendStringTo(StringBuilder sb) { 149 sb.append("ReplicaUC[") 150 .append(expectedLocation) 151 .append("|") 152 .append(state) 153 .append("]"); 154 } 155 } 156 157 /** 158 * Create block and set its state to 159 * {@link BlockUCState#UNDER_CONSTRUCTION}. 160 */ 161 public BlockInfoContiguousUnderConstruction(Block blk, short replication) { 162 this(blk, replication, BlockUCState.UNDER_CONSTRUCTION, null); 163 } 164 165 /** 166 * Create a block that is currently being constructed. 167 */ 168 public BlockInfoContiguousUnderConstruction(Block blk, short replication, BlockUCState state, DatanodeStorageInfo[] targets) { 169 super(blk, replication); 170 assert getBlockUCState() != BlockUCState.COMPLETE : 171 "BlockInfoUnderConstruction cannot be in COMPLETE state"; 172 this.blockUCState = state; 173 setExpectedLocations(targets); 174 } 175 176 /** 177 * Convert an under construction block to a complete block. 178 * 179 * @return BlockInfo - a complete block. 180 * @throws IOException if the state of the block 181 * (the generation stamp and the length) has not been committed by 182 * the client or it does not have at least a minimal number of replicas 183 * reported from data-nodes. 184 */ 185 BlockInfoContiguous convertToCompleteBlock() throws IOException { 186 assert getBlockUCState() != BlockUCState.COMPLETE : 187 "Trying to convert a COMPLETE block"; 188 return new BlockInfoContiguous(this); 189 } 190 191 /** Set expected locations */ 192 public void setExpectedLocations(DatanodeStorageInfo[] targets) { 193 if (targets == null) { 194 return; 195 } 196 int numLocations = 0; 197 for (DatanodeStorageInfo target : targets) { 198 if (target != null) { 199 numLocations++; 200 } 201 } 202 203 this.replicas = new ArrayList<ReplicaUnderConstruction>(numLocations); 204 for(int i = 0; i < targets.length; i++) { 205 // Only store non-null DatanodeStorageInfo. 206 if (targets[i] != null) { 207 replicas.add(new ReplicaUnderConstruction(this, targets[i], ReplicaState.RBW)); 208 } 209 } 210 } 211 212 /** 213 * Create array of expected replica locations 214 * (as has been assigned by chooseTargets()). 215 */ 216 public DatanodeStorageInfo[] getExpectedStorageLocations() { 217 int numLocations = replicas == null ? 0 : replicas.size(); 218 DatanodeStorageInfo[] storages = new DatanodeStorageInfo[numLocations]; 219 for(int i = 0; i < numLocations; i++) 220 storages[i] = replicas.get(i).getExpectedStorageLocation(); 221 return storages; 222 } 223 224 /** Get the number of expected locations */ 225 public int getNumExpectedLocations() { 226 return replicas == null ? 0 : replicas.size(); 227 } 228 229 /** 230 * Return the state of the block under construction. 231 * @see BlockUCState 232 */ 233 @Override // BlockInfo 234 public BlockUCState getBlockUCState() { 235 return blockUCState; 236 } 237 238 void setBlockUCState(BlockUCState s) { 239 blockUCState = s; 240 } 241 242 /** Get block recovery ID */ 243 public long getBlockRecoveryId() { 244 return blockRecoveryId; 245 } 246 247 /** Get recover block */ 248 public BlockInfoContiguous getTruncateBlock() { 249 return truncateBlock; 250 } 251 252 public void setTruncateBlock(BlockInfoContiguous recoveryBlock) { 253 this.truncateBlock = recoveryBlock; 254 } 255 256 /** 257 * Process the recorded replicas. When about to commit or finish the 258 * pipeline recovery sort out bad replicas. 259 * @param genStamp The final generation stamp for the block. 260 * @return staleReplica's List. 261 */ 262 public List<ReplicaUnderConstruction> setGenerationStampAndVerifyReplicas( 263 long genStamp) { 264 // Set the generation stamp for the block. 265 setGenerationStamp(genStamp); 266 if (replicas == null) 267 return null; 268 269 List<ReplicaUnderConstruction> staleReplicas = new ArrayList<>(); 270 // Remove replicas with wrong gen stamp. The replica list is unchanged. 271 for (ReplicaUnderConstruction r : replicas) { 272 if (genStamp != r.getGenerationStamp()) { 273 staleReplicas.add(r); 274 } 275 } 276 return staleReplicas; 277 } 278 279 /** 280 * Commit block's length and generation stamp as reported by the client. 281 * Set block state to {@link BlockUCState#COMMITTED}. 282 * @param block - contains client reported block length and generation 283 * @return staleReplica's List. 284 * @throws IOException if block ids are inconsistent. 285 */ 286 List<ReplicaUnderConstruction> commitBlock(Block block) throws IOException { 287 if(getBlockId() != block.getBlockId()) 288 throw new IOException("Trying to commit inconsistent block: id = " 289 + block.getBlockId() + ", expected id = " + getBlockId()); 290 blockUCState = BlockUCState.COMMITTED; 291 this.setNumBytes(block.getNumBytes()); 292 // Sort out invalid replicas. 293 return setGenerationStampAndVerifyReplicas(block.getGenerationStamp()); 294 } 295 296 /** 297 * Initialize lease recovery for this block. 298 * Find the first alive data-node starting from the previous primary and 299 * make it primary. 300 * @param recoveryId Recovery ID (new gen stamp) 301 * @param startRecovery Issue recovery command to datanode if true. 302 */ 303 public void initializeBlockRecovery(long recoveryId, boolean startRecovery) { 304 setBlockUCState(BlockUCState.UNDER_RECOVERY); 305 blockRecoveryId = recoveryId; 306 if (!startRecovery) { 307 return; 308 } 309 if (replicas.size() == 0) { 310 NameNode.blockStateChangeLog.warn("BLOCK*" 311 + " BlockInfoUnderConstruction.initLeaseRecovery:" 312 + " No blocks found, lease removed."); 313 } 314 boolean allLiveReplicasTriedAsPrimary = true; 315 for (int i = 0; i < replicas.size(); i++) { 316 // Check if all replicas have been tried or not. 317 if (replicas.get(i).isAlive()) { 318 allLiveReplicasTriedAsPrimary = 319 (allLiveReplicasTriedAsPrimary && replicas.get(i).getChosenAsPrimary()); 320 } 321 } 322 if (allLiveReplicasTriedAsPrimary) { 323 // Just set all the replicas to be chosen whether they are alive or not. 324 for (int i = 0; i < replicas.size(); i++) { 325 replicas.get(i).setChosenAsPrimary(false); 326 } 327 } 328 long mostRecentLastUpdate = 0; 329 ReplicaUnderConstruction primary = null; 330 primaryNodeIndex = -1; 331 for(int i = 0; i < replicas.size(); i++) { 332 // Skip alive replicas which have been chosen for recovery. 333 if (!(replicas.get(i).isAlive() && !replicas.get(i).getChosenAsPrimary())) { 334 continue; 335 } 336 final ReplicaUnderConstruction ruc = replicas.get(i); 337 final long lastUpdate = ruc.getExpectedStorageLocation() 338 .getDatanodeDescriptor().getLastUpdateMonotonic(); 339 if (lastUpdate > mostRecentLastUpdate) { 340 primaryNodeIndex = i; 341 primary = ruc; 342 mostRecentLastUpdate = lastUpdate; 343 } 344 } 345 if (primary != null) { 346 primary.getExpectedStorageLocation().getDatanodeDescriptor().addBlockToBeRecovered(this); 347 primary.setChosenAsPrimary(true); 348 NameNode.blockStateChangeLog.info( 349 "BLOCK* {} recovery started, primary={}", this, primary); 350 } 351 } 352 353 void addReplicaIfNotPresent(DatanodeStorageInfo storage, 354 Block block, 355 ReplicaState rState) { 356 if (replicas == null) { 357 replicas = new ArrayList<ReplicaUnderConstruction>(1); 358 replicas.add(new ReplicaUnderConstruction(block, storage, 359 rState)); 360 } else { 361 Iterator<ReplicaUnderConstruction> it = replicas.iterator(); 362 while (it.hasNext()) { 363 ReplicaUnderConstruction r = it.next(); 364 DatanodeStorageInfo expectedLocation = r.getExpectedStorageLocation(); 365 if (expectedLocation == storage) { 366 // Record the gen stamp from the report 367 r.setGenerationStamp(block.getGenerationStamp()); 368 return; 369 } else if (expectedLocation != null 370 && expectedLocation.getDatanodeDescriptor() == 371 storage.getDatanodeDescriptor()) { 372 373 // The Datanode reported that the block is on a different storage 374 // than the one chosen by BlockPlacementPolicy. This can occur as 375 // we allow Datanodes to choose the target storage. Update our 376 // state by removing the stale entry and adding a new one. 377 it.remove(); 378 break; 379 } 380 } 381 replicas.add(new ReplicaUnderConstruction(block, storage, rState)); 382 } 383 } 384 385 @Override // BlockInfo 386 // BlockInfoUnderConstruction participates in maps the same way as BlockInfo 387 public int hashCode() { 388 return super.hashCode(); 389 } 390 391 @Override // BlockInfo 392 public boolean equals(Object obj) { 393 // Sufficient to rely on super's implementation 394 return (this == obj) || super.equals(obj); 395 } 396 397 @Override 398 public String toString() { 399 final StringBuilder b = new StringBuilder(100); 400 appendStringTo(b); 401 return b.toString(); 402 } 403 404 @Override 405 public void appendStringTo(StringBuilder sb) { 406 super.appendStringTo(sb); 407 appendUCParts(sb); 408 } 409 410 private void appendUCParts(StringBuilder sb) { 411 sb.append("{UCState=").append(blockUCState) 412 .append(", truncateBlock=" + truncateBlock) 413 .append(", primaryNodeIndex=").append(primaryNodeIndex) 414 .append(", replicas=["); 415 if (replicas != null) { 416 Iterator<ReplicaUnderConstruction> iter = replicas.iterator(); 417 if (iter.hasNext()) { 418 iter.next().appendStringTo(sb); 419 while (iter.hasNext()) { 420 sb.append(", "); 421 iter.next().appendStringTo(sb); 422 } 423 } 424 } 425 sb.append("]}"); 426 } 427}