001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.hdfs;
019    
020    import java.io.BufferedOutputStream;
021    import java.io.DataInputStream;
022    import java.io.DataOutputStream;
023    import java.io.FileInputStream;
024    import java.io.IOException;
025    import java.net.InetSocketAddress;
026    
027    import org.apache.commons.lang.mutable.MutableBoolean;
028    import org.apache.commons.logging.Log;
029    import org.apache.commons.logging.LogFactory;
030    import org.apache.hadoop.classification.InterfaceAudience;
031    import org.apache.hadoop.conf.Configuration;
032    import org.apache.hadoop.hdfs.net.DomainPeer;
033    import org.apache.hadoop.hdfs.net.Peer;
034    import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
035    import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
036    import org.apache.hadoop.hdfs.protocol.datatransfer.InvalidEncryptionKeyException;
037    import org.apache.hadoop.hdfs.protocol.datatransfer.Sender;
038    import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto;
039    import org.apache.hadoop.hdfs.protocolPB.PBHelper;
040    import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
041    import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
042    import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
043    import org.apache.hadoop.hdfs.shortcircuit.DomainSocketFactory;
044    import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache;
045    import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.ShortCircuitReplicaCreator;
046    import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitReplica;
047    import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitReplicaInfo;
048    import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.Slot;
049    import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.SlotId;
050    import org.apache.hadoop.io.IOUtils;
051    import org.apache.hadoop.ipc.RemoteException;
052    import org.apache.hadoop.net.unix.DomainSocket;
053    import org.apache.hadoop.security.AccessControlException;
054    import org.apache.hadoop.security.UserGroupInformation;
055    import org.apache.hadoop.security.token.SecretManager.InvalidToken;
056    import org.apache.hadoop.security.token.Token;
057    import org.apache.hadoop.util.PerformanceAdvisory;
058    import org.apache.hadoop.util.Time;
059    
060    import com.google.common.annotations.VisibleForTesting;
061    import com.google.common.base.Preconditions;
062    
063    
064    /** 
065     * Utility class to create BlockReader implementations.
066     */
067    @InterfaceAudience.Private
068    public class BlockReaderFactory implements ShortCircuitReplicaCreator {
069      static final Log LOG = LogFactory.getLog(BlockReaderFactory.class);
070    
071      @VisibleForTesting
072      static ShortCircuitReplicaCreator
073          createShortCircuitReplicaInfoCallback = null;
074    
075      private final DFSClient.Conf conf;
076    
077      /**
078       * The file name, for logging and debugging purposes.
079       */
080      private String fileName;
081    
082      /**
083       * The block ID and block pool ID to use.
084       */
085      private ExtendedBlock block;
086    
087      /**
088       * The block token to use for security purposes.
089       */
090      private Token<BlockTokenIdentifier> token;
091    
092      /**
093       * The offset within the block to start reading at.
094       */
095      private long startOffset;
096    
097      /**
098       * If false, we won't try to verify the block checksum.
099       */
100      private boolean verifyChecksum;
101    
102      /**
103       * The name of this client.
104       */
105      private String clientName; 
106    
107      /**
108       * The DataNode we're talking to.
109       */
110      private DatanodeInfo datanode;
111    
112      /**
113       * StorageType of replica on DataNode.
114       */
115      private StorageType storageType;
116    
117      /**
118       * If false, we won't try short-circuit local reads.
119       */
120      private boolean allowShortCircuitLocalReads;
121    
122      /**
123       * The ClientContext to use for things like the PeerCache.
124       */
125      private ClientContext clientContext;
126    
127      /**
128       * Number of bytes to read.  -1 indicates no limit.
129       */
130      private long length = -1;
131    
132      /**
133       * Caching strategy to use when reading the block.
134       */
135      private CachingStrategy cachingStrategy;
136    
137      /**
138       * Socket address to use to connect to peer.
139       */
140      private InetSocketAddress inetSocketAddress;
141    
142      /**
143       * Remote peer factory to use to create a peer, if needed.
144       */
145      private RemotePeerFactory remotePeerFactory;
146    
147      /**
148       * UserGroupInformation  to use for legacy block reader local objects, if needed.
149       */
150      private UserGroupInformation userGroupInformation;
151    
152      /**
153       * Configuration to use for legacy block reader local objects, if needed.
154       */
155      private Configuration configuration;
156    
157      /**
158       * Information about the domain socket path we should use to connect to the
159       * local peer-- or null if we haven't examined the local domain socket.
160       */
161      private DomainSocketFactory.PathInfo pathInfo;
162    
163      /**
164       * The remaining number of times that we'll try to pull a socket out of the
165       * cache.
166       */
167      private int remainingCacheTries;
168    
169      public BlockReaderFactory(DFSClient.Conf conf) {
170        this.conf = conf;
171        this.remainingCacheTries = conf.nCachedConnRetry;
172      }
173    
174      public BlockReaderFactory setFileName(String fileName) {
175        this.fileName = fileName;
176        return this;
177      }
178    
179      public BlockReaderFactory setBlock(ExtendedBlock block) {
180        this.block = block;
181        return this;
182      }
183    
184      public BlockReaderFactory setBlockToken(Token<BlockTokenIdentifier> token) {
185        this.token = token;
186        return this;
187      }
188    
189      public BlockReaderFactory setStartOffset(long startOffset) {
190        this.startOffset = startOffset;
191        return this;
192      }
193    
194      public BlockReaderFactory setVerifyChecksum(boolean verifyChecksum) {
195        this.verifyChecksum = verifyChecksum;
196        return this;
197      }
198    
199      public BlockReaderFactory setClientName(String clientName) {
200        this.clientName = clientName;
201        return this;
202      }
203    
204      public BlockReaderFactory setDatanodeInfo(DatanodeInfo datanode) {
205        this.datanode = datanode;
206        return this;
207      }
208    
209      public BlockReaderFactory setStorageType(StorageType storageType) {
210        this.storageType = storageType;
211        return this;
212      }
213    
214      public BlockReaderFactory setAllowShortCircuitLocalReads(
215          boolean allowShortCircuitLocalReads) {
216        this.allowShortCircuitLocalReads = allowShortCircuitLocalReads;
217        return this;
218      }
219    
220      public BlockReaderFactory setClientCacheContext(
221          ClientContext clientContext) {
222        this.clientContext = clientContext;
223        return this;
224      }
225    
226      public BlockReaderFactory setLength(long length) {
227        this.length = length;
228        return this;
229      }
230    
231      public BlockReaderFactory setCachingStrategy(
232          CachingStrategy cachingStrategy) {
233        this.cachingStrategy = cachingStrategy;
234        return this;
235      }
236    
237      public BlockReaderFactory setInetSocketAddress (
238          InetSocketAddress inetSocketAddress) {
239        this.inetSocketAddress = inetSocketAddress;
240        return this;
241      }
242    
243      public BlockReaderFactory setUserGroupInformation(
244          UserGroupInformation userGroupInformation) {
245        this.userGroupInformation = userGroupInformation;
246        return this;
247      }
248    
249      public BlockReaderFactory setRemotePeerFactory(
250          RemotePeerFactory remotePeerFactory) {
251        this.remotePeerFactory = remotePeerFactory;
252        return this;
253      }
254    
255      public BlockReaderFactory setConfiguration(
256          Configuration configuration) {
257        this.configuration = configuration;
258        return this;
259      }
260    
261      /**
262       * Build a BlockReader with the given options.
263       *
264       * This function will do the best it can to create a block reader that meets
265       * all of our requirements.  We prefer short-circuit block readers
266       * (BlockReaderLocal and BlockReaderLocalLegacy) over remote ones, since the
267       * former avoid the overhead of socket communication.  If short-circuit is
268       * unavailable, our next fallback is data transfer over UNIX domain sockets,
269       * if dfs.client.domain.socket.data.traffic has been enabled.  If that doesn't
270       * work, we will try to create a remote block reader that operates over TCP
271       * sockets.
272       *
273       * There are a few caches that are important here.
274       *
275       * The ShortCircuitCache stores file descriptor objects which have been passed
276       * from the DataNode. 
277       *
278       * The DomainSocketFactory stores information about UNIX domain socket paths
279       * that we not been able to use in the past, so that we don't waste time
280       * retrying them over and over.  (Like all the caches, it does have a timeout,
281       * though.)
282       *
283       * The PeerCache stores peers that we have used in the past.  If we can reuse
284       * one of these peers, we avoid the overhead of re-opening a socket.  However,
285       * if the socket has been timed out on the remote end, our attempt to reuse
286       * the socket may end with an IOException.  For that reason, we limit our
287       * attempts at socket reuse to dfs.client.cached.conn.retry times.  After
288       * that, we create new sockets.  This avoids the problem where a thread tries
289       * to talk to a peer that it hasn't talked to in a while, and has to clean out
290       * every entry in a socket cache full of stale entries.
291       *
292       * @return The new BlockReader.  We will not return null.
293       *
294       * @throws InvalidToken
295       *             If the block token was invalid.
296       *         InvalidEncryptionKeyException
297       *             If the encryption key was invalid.
298       *         Other IOException
299       *             If there was another problem.
300       */
301      public BlockReader build() throws IOException {
302        BlockReader reader = null;
303    
304        Preconditions.checkNotNull(configuration);
305        if (conf.shortCircuitLocalReads && allowShortCircuitLocalReads) {
306          if (clientContext.getUseLegacyBlockReaderLocal()) {
307            reader = getLegacyBlockReaderLocal();
308            if (reader != null) {
309              if (LOG.isTraceEnabled()) {
310                LOG.trace(this + ": returning new legacy block reader local.");
311              }
312              return reader;
313            }
314          } else {
315            reader = getBlockReaderLocal();
316            if (reader != null) {
317              if (LOG.isTraceEnabled()) {
318                LOG.trace(this + ": returning new block reader local.");
319              }
320              return reader;
321            }
322          }
323        }
324        if (conf.domainSocketDataTraffic) {
325          reader = getRemoteBlockReaderFromDomain();
326          if (reader != null) {
327            if (LOG.isTraceEnabled()) {
328              LOG.trace(this + ": returning new remote block reader using " +
329                  "UNIX domain socket on " + pathInfo.getPath());
330            }
331            return reader;
332          }
333        }
334        Preconditions.checkState(!DFSInputStream.tcpReadsDisabledForTesting,
335            "TCP reads were disabled for testing, but we failed to " +
336            "do a non-TCP read.");
337        return getRemoteBlockReaderFromTcp();
338      }
339    
340      /**
341       * Get {@link BlockReaderLocalLegacy} for short circuited local reads.
342       * This block reader implements the path-based style of local reads
343       * first introduced in HDFS-2246.
344       */
345      private BlockReader getLegacyBlockReaderLocal() throws IOException {
346        if (LOG.isTraceEnabled()) {
347          LOG.trace(this + ": trying to construct BlockReaderLocalLegacy");
348        }
349        if (!DFSClient.isLocalAddress(inetSocketAddress)) {
350          if (LOG.isTraceEnabled()) {
351            LOG.trace(this + ": can't construct BlockReaderLocalLegacy because " +
352                "the address " + inetSocketAddress + " is not local");
353          }
354          return null;
355        }
356        if (clientContext.getDisableLegacyBlockReaderLocal()) {
357          PerformanceAdvisory.LOG.debug(this + ": can't construct " +
358              "BlockReaderLocalLegacy because " +
359              "disableLegacyBlockReaderLocal is set.");
360          return null;
361        }
362        IOException ioe = null;
363        try {
364          return BlockReaderLocalLegacy.newBlockReader(conf,
365              userGroupInformation, configuration, fileName, block, token,
366              datanode, startOffset, length, storageType);
367        } catch (RemoteException remoteException) {
368          ioe = remoteException.unwrapRemoteException(
369                    InvalidToken.class, AccessControlException.class);
370        } catch (IOException e) {
371          ioe = e;
372        }
373        if ((!(ioe instanceof AccessControlException)) &&
374            isSecurityException(ioe)) {
375          // Handle security exceptions.
376          // We do not handle AccessControlException here, since
377          // BlockReaderLocalLegacy#newBlockReader uses that exception to indicate
378          // that the user is not in dfs.block.local-path-access.user, a condition
379          // which requires us to disable legacy SCR.
380          throw ioe;
381        }
382        LOG.warn(this + ": error creating legacy BlockReaderLocal.  " +
383            "Disabling legacy local reads.", ioe);
384        clientContext.setDisableLegacyBlockReaderLocal();
385        return null;
386      }
387    
388      private BlockReader getBlockReaderLocal() throws InvalidToken {
389        if (LOG.isTraceEnabled()) {
390          LOG.trace(this + ": trying to construct a BlockReaderLocal " +
391              "for short-circuit reads.");
392        }
393        if (pathInfo == null) {
394          pathInfo = clientContext.getDomainSocketFactory().
395                          getPathInfo(inetSocketAddress, conf);
396        }
397        if (!pathInfo.getPathState().getUsableForShortCircuit()) {
398          PerformanceAdvisory.LOG.debug(this + ": " + pathInfo + " is not " +
399              "usable for short circuit; giving up on BlockReaderLocal.");
400          return null;
401        }
402        ShortCircuitCache cache = clientContext.getShortCircuitCache();
403        ExtendedBlockId key = new ExtendedBlockId(block.getBlockId(), block.getBlockPoolId());
404        ShortCircuitReplicaInfo info = cache.fetchOrCreate(key, this);
405        InvalidToken exc = info.getInvalidTokenException();
406        if (exc != null) {
407          if (LOG.isTraceEnabled()) {
408            LOG.trace(this + ": got InvalidToken exception while trying to " +
409                "construct BlockReaderLocal via " + pathInfo.getPath());
410          }
411          throw exc;
412        }
413        if (info.getReplica() == null) {
414          if (LOG.isTraceEnabled()) {
415            PerformanceAdvisory.LOG.debug(this + ": failed to get " +
416                "ShortCircuitReplica. Cannot construct " +
417                "BlockReaderLocal via " + pathInfo.getPath());
418          }
419          return null;
420        }
421        return new BlockReaderLocal.Builder(conf).
422            setFilename(fileName).
423            setBlock(block).
424            setStartOffset(startOffset).
425            setShortCircuitReplica(info.getReplica()).
426            setVerifyChecksum(verifyChecksum).
427            setCachingStrategy(cachingStrategy).
428            setStorageType(storageType).
429            build();
430      }
431    
432      /**
433       * Fetch a pair of short-circuit block descriptors from a local DataNode.
434       *
435       * @return    Null if we could not communicate with the datanode,
436       *            a new ShortCircuitReplicaInfo object otherwise.
437       *            ShortCircuitReplicaInfo objects may contain either an InvalidToken
438       *            exception, or a ShortCircuitReplica object ready to use.
439       */
440      @Override
441      public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
442        if (createShortCircuitReplicaInfoCallback != null) {
443          ShortCircuitReplicaInfo info =
444            createShortCircuitReplicaInfoCallback.createShortCircuitReplicaInfo();
445          if (info != null) return info;
446        }
447        if (LOG.isTraceEnabled()) {
448          LOG.trace(this + ": trying to create ShortCircuitReplicaInfo.");
449        }
450        BlockReaderPeer curPeer;
451        while (true) {
452          curPeer = nextDomainPeer();
453          if (curPeer == null) break;
454          if (curPeer.fromCache) remainingCacheTries--;
455          DomainPeer peer = (DomainPeer)curPeer.peer;
456          Slot slot = null;
457          ShortCircuitCache cache = clientContext.getShortCircuitCache();
458          try {
459            MutableBoolean usedPeer = new MutableBoolean(false);
460            slot = cache.allocShmSlot(datanode, peer, usedPeer,
461                new ExtendedBlockId(block.getBlockId(), block.getBlockPoolId()),
462                clientName);
463            if (usedPeer.booleanValue()) {
464              if (LOG.isTraceEnabled()) {
465                LOG.trace(this + ": allocShmSlot used up our previous socket " +
466                  peer.getDomainSocket() + ".  Allocating a new one...");
467              }
468              curPeer = nextDomainPeer();
469              if (curPeer == null) break;
470              peer = (DomainPeer)curPeer.peer;
471            }
472            ShortCircuitReplicaInfo info = requestFileDescriptors(peer, slot);
473            clientContext.getPeerCache().put(datanode, peer);
474            return info;
475          } catch (IOException e) {
476            if (slot != null) {
477              cache.freeSlot(slot);
478            }
479            if (curPeer.fromCache) {
480              // Handle an I/O error we got when using a cached socket.
481              // These are considered less serious, because the socket may be stale.
482              if (LOG.isDebugEnabled()) {
483                LOG.debug(this + ": closing stale domain peer " + peer, e);
484              }
485              IOUtils.cleanup(LOG, peer);
486            } else {
487              // Handle an I/O error we got when using a newly created socket.
488              // We temporarily disable the domain socket path for a few minutes in
489              // this case, to prevent wasting more time on it.
490              LOG.warn(this + ": I/O error requesting file descriptors.  " + 
491                  "Disabling domain socket " + peer.getDomainSocket(), e);
492              IOUtils.cleanup(LOG, peer);
493              clientContext.getDomainSocketFactory()
494                  .disableDomainSocketPath(pathInfo.getPath());
495              return null;
496            }
497          }
498        }
499        return null;
500      }
501    
502      /**
503       * Request file descriptors from a DomainPeer.
504       *
505       * @param peer   The peer to use for communication.
506       * @param slot   If non-null, the shared memory slot to associate with the 
507       *               new ShortCircuitReplica.
508       * 
509       * @return  A ShortCircuitReplica object if we could communicate with the
510       *          datanode; null, otherwise. 
511       * @throws  IOException If we encountered an I/O exception while communicating
512       *          with the datanode.
513       */
514      private ShortCircuitReplicaInfo requestFileDescriptors(DomainPeer peer,
515              Slot slot) throws IOException {
516        ShortCircuitCache cache = clientContext.getShortCircuitCache();
517        final DataOutputStream out =
518            new DataOutputStream(new BufferedOutputStream(peer.getOutputStream()));
519        SlotId slotId = slot == null ? null : slot.getSlotId();
520        new Sender(out).requestShortCircuitFds(block, token, slotId, 1);
521        DataInputStream in = new DataInputStream(peer.getInputStream());
522        BlockOpResponseProto resp = BlockOpResponseProto.parseFrom(
523            PBHelper.vintPrefixed(in));
524        DomainSocket sock = peer.getDomainSocket();
525        switch (resp.getStatus()) {
526        case SUCCESS:
527          byte buf[] = new byte[1];
528          FileInputStream fis[] = new FileInputStream[2];
529          sock.recvFileInputStreams(fis, buf, 0, buf.length);
530          ShortCircuitReplica replica = null;
531          try {
532            ExtendedBlockId key =
533                new ExtendedBlockId(block.getBlockId(), block.getBlockPoolId());
534            replica = new ShortCircuitReplica(key, fis[0], fis[1], cache,
535                Time.monotonicNow(), slot);
536          } catch (IOException e) {
537            // This indicates an error reading from disk, or a format error.  Since
538            // it's not a socket communication problem, we return null rather than
539            // throwing an exception.
540            LOG.warn(this + ": error creating ShortCircuitReplica.", e);
541            return null;
542          } finally {
543            if (replica == null) {
544              IOUtils.cleanup(DFSClient.LOG, fis[0], fis[1]);
545            }
546          }
547          return new ShortCircuitReplicaInfo(replica);
548        case ERROR_UNSUPPORTED:
549          if (!resp.hasShortCircuitAccessVersion()) {
550            LOG.warn("short-circuit read access is disabled for " +
551                "DataNode " + datanode + ".  reason: " + resp.getMessage());
552            clientContext.getDomainSocketFactory()
553                .disableShortCircuitForPath(pathInfo.getPath());
554          } else {
555            LOG.warn("short-circuit read access for the file " +
556                fileName + " is disabled for DataNode " + datanode +
557                ".  reason: " + resp.getMessage());
558          }
559          return null;
560        case ERROR_ACCESS_TOKEN:
561          String msg = "access control error while " +
562              "attempting to set up short-circuit access to " +
563              fileName + resp.getMessage();
564          if (LOG.isDebugEnabled()) {
565            LOG.debug(this + ":" + msg);
566          }
567          return new ShortCircuitReplicaInfo(new InvalidToken(msg));
568        default:
569          LOG.warn(this + ": unknown response code " + resp.getStatus() +
570              " while attempting to set up short-circuit access. " +
571              resp.getMessage());
572          clientContext.getDomainSocketFactory()
573              .disableShortCircuitForPath(pathInfo.getPath());
574          return null;
575        }
576      }
577    
578      /**
579       * Get a RemoteBlockReader that communicates over a UNIX domain socket.
580       *
581       * @return The new BlockReader, or null if we failed to create the block
582       * reader.
583       *
584       * @throws InvalidToken    If the block token was invalid.
585       * Potentially other security-related execptions.
586       */
587      private BlockReader getRemoteBlockReaderFromDomain() throws IOException {
588        if (pathInfo == null) {
589          pathInfo = clientContext.getDomainSocketFactory().
590                          getPathInfo(inetSocketAddress, conf);
591        }
592        if (!pathInfo.getPathState().getUsableForDataTransfer()) {
593          PerformanceAdvisory.LOG.debug(this + ": not trying to create a " +
594              "remote block reader because the UNIX domain socket at " +
595              pathInfo + " is not usable.");
596          return null;
597        }
598        if (LOG.isTraceEnabled()) {
599          LOG.trace(this + ": trying to create a remote block reader from the " +
600              "UNIX domain socket at " + pathInfo.getPath());
601        }
602    
603        while (true) {
604          BlockReaderPeer curPeer = nextDomainPeer();
605          if (curPeer == null) break;
606          if (curPeer.fromCache) remainingCacheTries--;
607          DomainPeer peer = (DomainPeer)curPeer.peer;
608          BlockReader blockReader = null;
609          try {
610            blockReader = getRemoteBlockReader(peer);
611            return blockReader;
612          } catch (IOException ioe) {
613            IOUtils.cleanup(LOG, peer);
614            if (isSecurityException(ioe)) {
615              if (LOG.isTraceEnabled()) {
616                LOG.trace(this + ": got security exception while constructing " +
617                    "a remote block reader from the unix domain socket at " +
618                    pathInfo.getPath(), ioe);
619              }
620              throw ioe;
621            }
622            if (curPeer.fromCache) {
623              // Handle an I/O error we got when using a cached peer.  These are
624              // considered less serious, because the underlying socket may be stale.
625              if (LOG.isDebugEnabled()) {
626                LOG.debug("Closed potentially stale domain peer " + peer, ioe);
627              }
628            } else {
629              // Handle an I/O error we got when using a newly created domain peer.
630              // We temporarily disable the domain socket path for a few minutes in
631              // this case, to prevent wasting more time on it.
632              LOG.warn("I/O error constructing remote block reader.  Disabling " +
633                  "domain socket " + peer.getDomainSocket(), ioe);
634              clientContext.getDomainSocketFactory()
635                  .disableDomainSocketPath(pathInfo.getPath());
636              return null;
637            }
638          } finally {
639            if (blockReader == null) {
640              IOUtils.cleanup(LOG, peer);
641            }
642          }
643        }
644        return null;
645      }
646    
647      /**
648       * Get a RemoteBlockReader that communicates over a TCP socket.
649       *
650       * @return The new BlockReader.  We will not return null, but instead throw
651       *         an exception if this fails.
652       *
653       * @throws InvalidToken
654       *             If the block token was invalid.
655       *         InvalidEncryptionKeyException
656       *             If the encryption key was invalid.
657       *         Other IOException
658       *             If there was another problem.
659       */
660      private BlockReader getRemoteBlockReaderFromTcp() throws IOException {
661        if (LOG.isTraceEnabled()) {
662          LOG.trace(this + ": trying to create a remote block reader from a " +
663              "TCP socket");
664        }
665        BlockReader blockReader = null;
666        while (true) {
667          BlockReaderPeer curPeer = null;
668          Peer peer = null;
669          try {
670            curPeer = nextTcpPeer();
671            if (curPeer == null) break;
672            if (curPeer.fromCache) remainingCacheTries--;
673            peer = curPeer.peer;
674            blockReader = getRemoteBlockReader(peer);
675            return blockReader;
676          } catch (IOException ioe) {
677            if (isSecurityException(ioe)) {
678              if (LOG.isTraceEnabled()) {
679                LOG.trace(this + ": got security exception while constructing " +
680                    "a remote block reader from " + peer, ioe);
681              }
682              throw ioe;
683            }
684            if ((curPeer != null) && curPeer.fromCache) {
685              // Handle an I/O error we got when using a cached peer.  These are
686              // considered less serious, because the underlying socket may be
687              // stale.
688              if (LOG.isDebugEnabled()) {
689                LOG.debug("Closed potentially stale remote peer " + peer, ioe);
690              }
691            } else {
692              // Handle an I/O error we got when using a newly created peer.
693              LOG.warn("I/O error constructing remote block reader.", ioe);
694              throw ioe;
695            }
696          } finally {
697            if (blockReader == null) {
698              IOUtils.cleanup(LOG, peer);
699            }
700          }
701        }
702        return null;
703      }
704    
705      public static class BlockReaderPeer {
706        final Peer peer;
707        final boolean fromCache;
708        
709        BlockReaderPeer(Peer peer, boolean fromCache) {
710          this.peer = peer;
711          this.fromCache = fromCache;
712        }
713      }
714    
715      /**
716       * Get the next DomainPeer-- either from the cache or by creating it.
717       *
718       * @return the next DomainPeer, or null if we could not construct one.
719       */
720      private BlockReaderPeer nextDomainPeer() {
721        if (remainingCacheTries > 0) {
722          Peer peer = clientContext.getPeerCache().get(datanode, true);
723          if (peer != null) {
724            if (LOG.isTraceEnabled()) {
725              LOG.trace("nextDomainPeer: reusing existing peer " + peer);
726            }
727            return new BlockReaderPeer(peer, true);
728          }
729        }
730        DomainSocket sock = clientContext.getDomainSocketFactory().
731            createSocket(pathInfo, conf.socketTimeout);
732        if (sock == null) return null;
733        return new BlockReaderPeer(new DomainPeer(sock), false);
734      }
735    
736      /**
737       * Get the next TCP-based peer-- either from the cache or by creating it.
738       *
739       * @return the next Peer, or null if we could not construct one.
740       *
741       * @throws IOException  If there was an error while constructing the peer
742       *                      (such as an InvalidEncryptionKeyException)
743       */
744      private BlockReaderPeer nextTcpPeer() throws IOException {
745        if (remainingCacheTries > 0) {
746          Peer peer = clientContext.getPeerCache().get(datanode, false);
747          if (peer != null) {
748            if (LOG.isTraceEnabled()) {
749              LOG.trace("nextTcpPeer: reusing existing peer " + peer);
750            }
751            return new BlockReaderPeer(peer, true);
752          }
753        }
754        try {
755          Peer peer = remotePeerFactory.newConnectedPeer(inetSocketAddress, token,
756            datanode);
757          if (LOG.isTraceEnabled()) {
758            LOG.trace("nextTcpPeer: created newConnectedPeer " + peer);
759          }
760          return new BlockReaderPeer(peer, false);
761        } catch (IOException e) {
762          if (LOG.isTraceEnabled()) {
763            LOG.trace("nextTcpPeer: failed to create newConnectedPeer " +
764                      "connected to " + datanode);
765          }
766          throw e;
767        }
768      }
769    
770      /**
771       * Determine if an exception is security-related.
772       *
773       * We need to handle these exceptions differently than other IOExceptions.
774       * They don't indicate a communication problem.  Instead, they mean that there
775       * is some action the client needs to take, such as refetching block tokens,
776       * renewing encryption keys, etc.
777       *
778       * @param ioe    The exception
779       * @return       True only if the exception is security-related.
780       */
781      private static boolean isSecurityException(IOException ioe) {
782        return (ioe instanceof InvalidToken) ||
783                (ioe instanceof InvalidEncryptionKeyException) ||
784                (ioe instanceof InvalidBlockTokenException) ||
785                (ioe instanceof AccessControlException);
786      }
787    
788      @SuppressWarnings("deprecation")
789      private BlockReader getRemoteBlockReader(Peer peer) throws IOException {
790        if (conf.useLegacyBlockReader) {
791          return RemoteBlockReader.newBlockReader(fileName,
792              block, token, startOffset, length, conf.ioBufferSize,
793              verifyChecksum, clientName, peer, datanode,
794              clientContext.getPeerCache(), cachingStrategy);
795        } else {
796          return RemoteBlockReader2.newBlockReader(
797              fileName, block, token, startOffset, length,
798              verifyChecksum, clientName, peer, datanode,
799              clientContext.getPeerCache(), cachingStrategy);
800        }
801      }
802    
803      @Override
804      public String toString() {
805        return "BlockReaderFactory(fileName=" + fileName + ", block=" + block + ")";
806      }
807    
808      /**
809       * File name to print when accessing a block directly (from servlets)
810       * @param s Address of the block location
811       * @param poolId Block pool ID of the block
812       * @param blockId Block ID of the block
813       * @return string that has a file name for debug purposes
814       */
815      public static String getFileName(final InetSocketAddress s,
816          final String poolId, final long blockId) {
817        return s.toString() + ":" + poolId + ":" + blockId;
818      }
819    }