001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.blockmanagement;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.Iterator;
023import java.util.List;
024
025import org.apache.hadoop.hdfs.protocol.Block;
026import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
027import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState;
028import org.apache.hadoop.hdfs.server.namenode.NameNode;
029
030/**
031 * Represents a block that is currently being constructed.<br>
032 * This is usually the last block of a file opened for write or append.
033 */
034public class BlockInfoContiguousUnderConstruction extends BlockInfoContiguous {
035  /** Block state. See {@link BlockUCState} */
036  private BlockUCState blockUCState;
037
038  /**
039   * Block replicas as assigned when the block was allocated.
040   * This defines the pipeline order.
041   */
042  private List<ReplicaUnderConstruction> replicas;
043
044  /**
045   * Index of the primary data node doing the recovery. Useful for log
046   * messages.
047   */
048  private int primaryNodeIndex = -1;
049
050  /**
051   * The new generation stamp, which this block will have
052   * after the recovery succeeds. Also used as a recovery id to identify
053   * the right recovery if any of the abandoned recoveries re-appear.
054   */
055  private long blockRecoveryId = 0;
056
057  /**
058   * The block source to use in the event of copy-on-write truncate.
059   */
060  private BlockInfoContiguous truncateBlock;
061
062  /**
063   * ReplicaUnderConstruction contains information about replicas while
064   * they are under construction.
065   * The GS, the length and the state of the replica is as reported by 
066   * the data-node.
067   * It is not guaranteed, but expected, that data-nodes actually have
068   * corresponding replicas.
069   */
070  static class ReplicaUnderConstruction extends Block {
071    private final DatanodeStorageInfo expectedLocation;
072    private ReplicaState state;
073    private boolean chosenAsPrimary;
074
075    ReplicaUnderConstruction(Block block,
076                             DatanodeStorageInfo target,
077                             ReplicaState state) {
078      super(block);
079      this.expectedLocation = target;
080      this.state = state;
081      this.chosenAsPrimary = false;
082    }
083
084    /**
085     * Expected block replica location as assigned when the block was allocated.
086     * This defines the pipeline order.
087     * It is not guaranteed, but expected, that the data-node actually has
088     * the replica.
089     */
090    public DatanodeStorageInfo getExpectedStorageLocation() {
091      return expectedLocation;
092    }
093
094    /**
095     * Get replica state as reported by the data-node.
096     */
097    ReplicaState getState() {
098      return state;
099    }
100
101    /**
102     * Whether the replica was chosen for recovery.
103     */
104    boolean getChosenAsPrimary() {
105      return chosenAsPrimary;
106    }
107
108    /**
109     * Set replica state.
110     */
111    void setState(ReplicaState s) {
112      state = s;
113    }
114
115    /**
116     * Set whether this replica was chosen for recovery.
117     */
118    void setChosenAsPrimary(boolean chosenAsPrimary) {
119      this.chosenAsPrimary = chosenAsPrimary;
120    }
121
122    /**
123     * Is data-node the replica belongs to alive.
124     */
125    boolean isAlive() {
126      return expectedLocation.getDatanodeDescriptor().isAlive;
127    }
128
129    @Override // Block
130    public int hashCode() {
131      return super.hashCode();
132    }
133
134    @Override // Block
135    public boolean equals(Object obj) {
136      // Sufficient to rely on super's implementation
137      return (this == obj) || super.equals(obj);
138    }
139
140    @Override
141    public String toString() {
142      final StringBuilder b = new StringBuilder(50);
143      appendStringTo(b);
144      return b.toString();
145    }
146    
147    @Override
148    public void appendStringTo(StringBuilder sb) {
149      sb.append("ReplicaUC[")
150        .append(expectedLocation)
151        .append("|")
152        .append(state)
153        .append("]");
154    }
155  }
156
157  /**
158   * Create block and set its state to
159   * {@link BlockUCState#UNDER_CONSTRUCTION}.
160   */
161  public BlockInfoContiguousUnderConstruction(Block blk, short replication) {
162    this(blk, replication, BlockUCState.UNDER_CONSTRUCTION, null);
163  }
164
165  /**
166   * Create a block that is currently being constructed.
167   */
168  public BlockInfoContiguousUnderConstruction(Block blk, short replication, BlockUCState state, DatanodeStorageInfo[] targets) {
169    super(blk, replication);
170    assert getBlockUCState() != BlockUCState.COMPLETE :
171      "BlockInfoUnderConstruction cannot be in COMPLETE state";
172    this.blockUCState = state;
173    setExpectedLocations(targets);
174  }
175
176  /**
177   * Convert an under construction block to a complete block.
178   * 
179   * @return BlockInfo - a complete block.
180   * @throws IOException if the state of the block 
181   * (the generation stamp and the length) has not been committed by 
182   * the client or it does not have at least a minimal number of replicas 
183   * reported from data-nodes. 
184   */
185  BlockInfoContiguous convertToCompleteBlock() throws IOException {
186    assert getBlockUCState() != BlockUCState.COMPLETE :
187      "Trying to convert a COMPLETE block";
188    return new BlockInfoContiguous(this);
189  }
190
191  /** Set expected locations */
192  public void setExpectedLocations(DatanodeStorageInfo[] targets) {
193    if (targets == null) {
194      return;
195    }
196    int numLocations = 0;
197    for (DatanodeStorageInfo target : targets) {
198      if (target != null) {
199        numLocations++;
200      }
201    }
202
203    this.replicas = new ArrayList<ReplicaUnderConstruction>(numLocations);
204    for(int i = 0; i < targets.length; i++) {
205      // Only store non-null DatanodeStorageInfo.
206      if (targets[i] != null) {
207        replicas.add(new ReplicaUnderConstruction(this, targets[i], ReplicaState.RBW));
208      }
209    }
210  }
211
212  /**
213   * Create array of expected replica locations
214   * (as has been assigned by chooseTargets()).
215   */
216  public DatanodeStorageInfo[] getExpectedStorageLocations() {
217    int numLocations = replicas == null ? 0 : replicas.size();
218    DatanodeStorageInfo[] storages = new DatanodeStorageInfo[numLocations];
219    for(int i = 0; i < numLocations; i++)
220      storages[i] = replicas.get(i).getExpectedStorageLocation();
221    return storages;
222  }
223
224  /** Get the number of expected locations */
225  public int getNumExpectedLocations() {
226    return replicas == null ? 0 : replicas.size();
227  }
228
229  /**
230   * Return the state of the block under construction.
231   * @see BlockUCState
232   */
233  @Override // BlockInfo
234  public BlockUCState getBlockUCState() {
235    return blockUCState;
236  }
237
238  void setBlockUCState(BlockUCState s) {
239    blockUCState = s;
240  }
241
242  /** Get block recovery ID */
243  public long getBlockRecoveryId() {
244    return blockRecoveryId;
245  }
246
247  /** Get recover block */
248  public BlockInfoContiguous getTruncateBlock() {
249    return truncateBlock;
250  }
251
252  public void setTruncateBlock(BlockInfoContiguous recoveryBlock) {
253    this.truncateBlock = recoveryBlock;
254  }
255
256  /**
257   * Process the recorded replicas. When about to commit or finish the
258   * pipeline recovery sort out bad replicas.
259   * @param genStamp  The final generation stamp for the block.
260   * @return staleReplica's List.
261   */
262  public List<ReplicaUnderConstruction> setGenerationStampAndVerifyReplicas(
263      long genStamp) {
264    // Set the generation stamp for the block.
265    setGenerationStamp(genStamp);
266    if (replicas == null)
267      return null;
268
269    List<ReplicaUnderConstruction> staleReplicas = new ArrayList<>();
270    // Remove replicas with wrong gen stamp. The replica list is unchanged.
271    for (ReplicaUnderConstruction r : replicas) {
272      if (genStamp != r.getGenerationStamp()) {
273        staleReplicas.add(r);
274      }
275    }
276    return staleReplicas;
277  }
278
279  /**
280   * Commit block's length and generation stamp as reported by the client.
281   * Set block state to {@link BlockUCState#COMMITTED}.
282   * @param block - contains client reported block length and generation 
283   * @return staleReplica's List.
284   * @throws IOException if block ids are inconsistent.
285   */
286  List<ReplicaUnderConstruction> commitBlock(Block block) throws IOException {
287    if(getBlockId() != block.getBlockId())
288      throw new IOException("Trying to commit inconsistent block: id = "
289          + block.getBlockId() + ", expected id = " + getBlockId());
290    blockUCState = BlockUCState.COMMITTED;
291    this.setNumBytes(block.getNumBytes());
292    // Sort out invalid replicas.
293    return setGenerationStampAndVerifyReplicas(block.getGenerationStamp());
294  }
295
296  /**
297   * Initialize lease recovery for this block.
298   * Find the first alive data-node starting from the previous primary and
299   * make it primary.
300   * @param recoveryId Recovery ID (new gen stamp)
301   * @param startRecovery Issue recovery command to datanode if true.
302   */
303  public void initializeBlockRecovery(long recoveryId, boolean startRecovery) {
304    setBlockUCState(BlockUCState.UNDER_RECOVERY);
305    blockRecoveryId = recoveryId;
306    if (!startRecovery) {
307      return;
308    }
309    if (replicas.size() == 0) {
310      NameNode.blockStateChangeLog.warn("BLOCK*"
311        + " BlockInfoUnderConstruction.initLeaseRecovery:"
312        + " No blocks found, lease removed.");
313    }
314    boolean allLiveReplicasTriedAsPrimary = true;
315    for (int i = 0; i < replicas.size(); i++) {
316      // Check if all replicas have been tried or not.
317      if (replicas.get(i).isAlive()) {
318        allLiveReplicasTriedAsPrimary =
319            (allLiveReplicasTriedAsPrimary && replicas.get(i).getChosenAsPrimary());
320      }
321    }
322    if (allLiveReplicasTriedAsPrimary) {
323      // Just set all the replicas to be chosen whether they are alive or not.
324      for (int i = 0; i < replicas.size(); i++) {
325        replicas.get(i).setChosenAsPrimary(false);
326      }
327    }
328    long mostRecentLastUpdate = 0;
329    ReplicaUnderConstruction primary = null;
330    primaryNodeIndex = -1;
331    for(int i = 0; i < replicas.size(); i++) {
332      // Skip alive replicas which have been chosen for recovery.
333      if (!(replicas.get(i).isAlive() && !replicas.get(i).getChosenAsPrimary())) {
334        continue;
335      }
336      final ReplicaUnderConstruction ruc = replicas.get(i);
337      final long lastUpdate = ruc.getExpectedStorageLocation()
338          .getDatanodeDescriptor().getLastUpdateMonotonic();
339      if (lastUpdate > mostRecentLastUpdate) {
340        primaryNodeIndex = i;
341        primary = ruc;
342        mostRecentLastUpdate = lastUpdate;
343      }
344    }
345    if (primary != null) {
346      primary.getExpectedStorageLocation().getDatanodeDescriptor().addBlockToBeRecovered(this);
347      primary.setChosenAsPrimary(true);
348      NameNode.blockStateChangeLog.info(
349          "BLOCK* {} recovery started, primary={}", this, primary);
350    }
351  }
352
353  void addReplicaIfNotPresent(DatanodeStorageInfo storage,
354                     Block block,
355                     ReplicaState rState) {
356    if (replicas == null) {
357      replicas = new ArrayList<ReplicaUnderConstruction>(1);
358      replicas.add(new ReplicaUnderConstruction(block, storage,
359          rState));
360    } else {
361      Iterator<ReplicaUnderConstruction> it = replicas.iterator();
362      while (it.hasNext()) {
363        ReplicaUnderConstruction r = it.next();
364        DatanodeStorageInfo expectedLocation = r.getExpectedStorageLocation();
365        if (expectedLocation == storage) {
366          // Record the gen stamp from the report
367          r.setGenerationStamp(block.getGenerationStamp());
368          return;
369        } else if (expectedLocation != null
370            && expectedLocation.getDatanodeDescriptor() ==
371            storage.getDatanodeDescriptor()) {
372
373          // The Datanode reported that the block is on a different storage
374          // than the one chosen by BlockPlacementPolicy. This can occur as
375          // we allow Datanodes to choose the target storage. Update our
376          // state by removing the stale entry and adding a new one.
377          it.remove();
378          break;
379        }
380      }
381      replicas.add(new ReplicaUnderConstruction(block, storage, rState));
382    }
383  }
384
385  @Override // BlockInfo
386  // BlockInfoUnderConstruction participates in maps the same way as BlockInfo
387  public int hashCode() {
388    return super.hashCode();
389  }
390
391  @Override // BlockInfo
392  public boolean equals(Object obj) {
393    // Sufficient to rely on super's implementation
394    return (this == obj) || super.equals(obj);
395  }
396
397  @Override
398  public String toString() {
399    final StringBuilder b = new StringBuilder(100);
400    appendStringTo(b);
401    return b.toString();
402  }
403
404  @Override
405  public void appendStringTo(StringBuilder sb) {
406    super.appendStringTo(sb);
407    appendUCParts(sb);
408  }
409
410  private void appendUCParts(StringBuilder sb) {
411    sb.append("{UCState=").append(blockUCState)
412      .append(", truncateBlock=" + truncateBlock)
413      .append(", primaryNodeIndex=").append(primaryNodeIndex)
414      .append(", replicas=[");
415    if (replicas != null) {
416      Iterator<ReplicaUnderConstruction> iter = replicas.iterator();
417      if (iter.hasNext()) {
418        iter.next().appendStringTo(sb);
419        while (iter.hasNext()) {
420          sb.append(", ");
421          iter.next().appendStringTo(sb);
422        }
423      }
424    }
425    sb.append("]}");
426  }
427}