001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.hdfs;
019    
020    import java.io.BufferedOutputStream;
021    import java.io.DataInputStream;
022    import java.io.DataOutputStream;
023    import java.io.FileInputStream;
024    import java.io.IOException;
025    import java.net.InetSocketAddress;
026    
027    import org.apache.commons.lang.mutable.MutableBoolean;
028    import org.apache.commons.logging.LogFactory;
029    import org.apache.commons.logging.Log;
030    import org.apache.hadoop.classification.InterfaceAudience;
031    import org.apache.hadoop.conf.Configuration;
032    import org.apache.hadoop.hdfs.client.ShortCircuitCache;
033    import org.apache.hadoop.hdfs.client.ShortCircuitCache.ShortCircuitReplicaCreator;
034    import org.apache.hadoop.hdfs.client.ShortCircuitReplica;
035    import org.apache.hadoop.hdfs.client.ShortCircuitReplicaInfo;
036    import org.apache.hadoop.hdfs.ShortCircuitShm.Slot;
037    import org.apache.hadoop.hdfs.ShortCircuitShm.SlotId;
038    import org.apache.hadoop.hdfs.net.DomainPeer;
039    import org.apache.hadoop.hdfs.net.Peer;
040    import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
041    import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
042    import org.apache.hadoop.hdfs.protocol.datatransfer.InvalidEncryptionKeyException;
043    import org.apache.hadoop.hdfs.protocol.datatransfer.Sender;
044    import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto;
045    import org.apache.hadoop.hdfs.protocolPB.PBHelper;
046    import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
047    import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
048    import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
049    import org.apache.hadoop.io.IOUtils;
050    import org.apache.hadoop.ipc.RemoteException;
051    import org.apache.hadoop.net.unix.DomainSocket;
052    import org.apache.hadoop.security.AccessControlException;
053    import org.apache.hadoop.security.UserGroupInformation;
054    import org.apache.hadoop.security.token.SecretManager.InvalidToken;
055    import org.apache.hadoop.security.token.Token;
056    import org.apache.hadoop.util.Time;
057    
058    import com.google.common.annotations.VisibleForTesting;
059    import com.google.common.base.Preconditions;
060    
061    
062    /** 
063     * Utility class to create BlockReader implementations.
064     */
065    @InterfaceAudience.Private
066    public class BlockReaderFactory implements ShortCircuitReplicaCreator {
067      static final Log LOG = LogFactory.getLog(BlockReaderFactory.class);
068    
069      @VisibleForTesting
070      static ShortCircuitReplicaCreator
071          createShortCircuitReplicaInfoCallback = null;
072    
073      private final DFSClient.Conf conf;
074    
075      /**
076       * The file name, for logging and debugging purposes.
077       */
078      private String fileName;
079    
080      /**
081       * The block ID and block pool ID to use.
082       */
083      private ExtendedBlock block;
084    
085      /**
086       * The block token to use for security purposes.
087       */
088      private Token<BlockTokenIdentifier> token;
089    
090      /**
091       * The offset within the block to start reading at.
092       */
093      private long startOffset;
094    
095      /**
096       * If false, we won't try to verify the block checksum.
097       */
098      private boolean verifyChecksum;
099    
100      /**
101       * The name of this client.
102       */
103      private String clientName; 
104    
105      /**
106       * The DataNode we're talking to.
107       */
108      private DatanodeInfo datanode;
109    
110      /**
111       * If false, we won't try short-circuit local reads.
112       */
113      private boolean allowShortCircuitLocalReads;
114    
115      /**
116       * The ClientContext to use for things like the PeerCache.
117       */
118      private ClientContext clientContext;
119    
120      /**
121       * Number of bytes to read.  -1 indicates no limit.
122       */
123      private long length = -1;
124    
125      /**
126       * Caching strategy to use when reading the block.
127       */
128      private CachingStrategy cachingStrategy;
129    
130      /**
131       * Socket address to use to connect to peer.
132       */
133      private InetSocketAddress inetSocketAddress;
134    
135      /**
136       * Remote peer factory to use to create a peer, if needed.
137       */
138      private RemotePeerFactory remotePeerFactory;
139    
140      /**
141       * UserGroupInformation  to use for legacy block reader local objects, if needed.
142       */
143      private UserGroupInformation userGroupInformation;
144    
145      /**
146       * Configuration to use for legacy block reader local objects, if needed.
147       */
148      private Configuration configuration;
149    
150      /**
151       * Information about the domain socket path we should use to connect to the
152       * local peer-- or null if we haven't examined the local domain socket.
153       */
154      private DomainSocketFactory.PathInfo pathInfo;
155    
156      /**
157       * The remaining number of times that we'll try to pull a socket out of the
158       * cache.
159       */
160      private int remainingCacheTries;
161    
162      public BlockReaderFactory(DFSClient.Conf conf) {
163        this.conf = conf;
164        this.remainingCacheTries = conf.nCachedConnRetry;
165      }
166    
167      public BlockReaderFactory setFileName(String fileName) {
168        this.fileName = fileName;
169        return this;
170      }
171    
172      public BlockReaderFactory setBlock(ExtendedBlock block) {
173        this.block = block;
174        return this;
175      }
176    
177      public BlockReaderFactory setBlockToken(Token<BlockTokenIdentifier> token) {
178        this.token = token;
179        return this;
180      }
181    
182      public BlockReaderFactory setStartOffset(long startOffset) {
183        this.startOffset = startOffset;
184        return this;
185      }
186    
187      public BlockReaderFactory setVerifyChecksum(boolean verifyChecksum) {
188        this.verifyChecksum = verifyChecksum;
189        return this;
190      }
191    
192      public BlockReaderFactory setClientName(String clientName) {
193        this.clientName = clientName;
194        return this;
195      }
196    
197      public BlockReaderFactory setDatanodeInfo(DatanodeInfo datanode) {
198        this.datanode = datanode;
199        return this;
200      }
201    
202      public BlockReaderFactory setAllowShortCircuitLocalReads(
203          boolean allowShortCircuitLocalReads) {
204        this.allowShortCircuitLocalReads = allowShortCircuitLocalReads;
205        return this;
206      }
207    
208      public BlockReaderFactory setClientCacheContext(
209          ClientContext clientContext) {
210        this.clientContext = clientContext;
211        return this;
212      }
213    
214      public BlockReaderFactory setLength(long length) {
215        this.length = length;
216        return this;
217      }
218    
219      public BlockReaderFactory setCachingStrategy(
220          CachingStrategy cachingStrategy) {
221        this.cachingStrategy = cachingStrategy;
222        return this;
223      }
224    
225      public BlockReaderFactory setInetSocketAddress (
226          InetSocketAddress inetSocketAddress) {
227        this.inetSocketAddress = inetSocketAddress;
228        return this;
229      }
230    
231      public BlockReaderFactory setUserGroupInformation(
232          UserGroupInformation userGroupInformation) {
233        this.userGroupInformation = userGroupInformation;
234        return this;
235      }
236    
237      public BlockReaderFactory setRemotePeerFactory(
238          RemotePeerFactory remotePeerFactory) {
239        this.remotePeerFactory = remotePeerFactory;
240        return this;
241      }
242    
243      public BlockReaderFactory setConfiguration(
244          Configuration configuration) {
245        this.configuration = configuration;
246        return this;
247      }
248    
249      /**
250       * Build a BlockReader with the given options.
251       *
252       * This function will do the best it can to create a block reader that meets
253       * all of our requirements.  We prefer short-circuit block readers
254       * (BlockReaderLocal and BlockReaderLocalLegacy) over remote ones, since the
255       * former avoid the overhead of socket communication.  If short-circuit is
256       * unavailable, our next fallback is data transfer over UNIX domain sockets,
257       * if dfs.client.domain.socket.data.traffic has been enabled.  If that doesn't
258       * work, we will try to create a remote block reader that operates over TCP
259       * sockets.
260       *
261       * There are a few caches that are important here.
262       *
263       * The ShortCircuitCache stores file descriptor objects which have been passed
264       * from the DataNode. 
265       *
266       * The DomainSocketFactory stores information about UNIX domain socket paths
267       * that we not been able to use in the past, so that we don't waste time
268       * retrying them over and over.  (Like all the caches, it does have a timeout,
269       * though.)
270       *
271       * The PeerCache stores peers that we have used in the past.  If we can reuse
272       * one of these peers, we avoid the overhead of re-opening a socket.  However,
273       * if the socket has been timed out on the remote end, our attempt to reuse
274       * the socket may end with an IOException.  For that reason, we limit our
275       * attempts at socket reuse to dfs.client.cached.conn.retry times.  After
276       * that, we create new sockets.  This avoids the problem where a thread tries
277       * to talk to a peer that it hasn't talked to in a while, and has to clean out
278       * every entry in a socket cache full of stale entries.
279       *
280       * @return The new BlockReader.  We will not return null.
281       *
282       * @throws InvalidToken
283       *             If the block token was invalid.
284       *         InvalidEncryptionKeyException
285       *             If the encryption key was invalid.
286       *         Other IOException
287       *             If there was another problem.
288       */
289      public BlockReader build() throws IOException {
290        BlockReader reader = null;
291    
292        Preconditions.checkNotNull(configuration);
293        if (conf.shortCircuitLocalReads && allowShortCircuitLocalReads) {
294          if (clientContext.getUseLegacyBlockReaderLocal()) {
295            reader = getLegacyBlockReaderLocal();
296            if (reader != null) {
297              if (LOG.isTraceEnabled()) {
298                LOG.trace(this + ": returning new legacy block reader local.");
299              }
300              return reader;
301            }
302          } else {
303            reader = getBlockReaderLocal();
304            if (reader != null) {
305              if (LOG.isTraceEnabled()) {
306                LOG.trace(this + ": returning new block reader local.");
307              }
308              return reader;
309            }
310          }
311        }
312        if (conf.domainSocketDataTraffic) {
313          reader = getRemoteBlockReaderFromDomain();
314          if (reader != null) {
315            if (LOG.isTraceEnabled()) {
316              LOG.trace(this + ": returning new remote block reader using " +
317                  "UNIX domain socket on " + pathInfo.getPath());
318            }
319            return reader;
320          }
321        }
322        Preconditions.checkState(!DFSInputStream.tcpReadsDisabledForTesting,
323            "TCP reads were disabled for testing, but we failed to " +
324            "do a non-TCP read.");
325        return getRemoteBlockReaderFromTcp();
326      }
327    
328      /**
329       * Get {@link BlockReaderLocalLegacy} for short circuited local reads.
330       * This block reader implements the path-based style of local reads
331       * first introduced in HDFS-2246.
332       */
333      private BlockReader getLegacyBlockReaderLocal() throws IOException {
334        if (LOG.isTraceEnabled()) {
335          LOG.trace(this + ": trying to construct BlockReaderLocalLegacy");
336        }
337        if (!DFSClient.isLocalAddress(inetSocketAddress)) {
338          if (LOG.isTraceEnabled()) {
339            LOG.trace(this + ": can't construct BlockReaderLocalLegacy because " +
340                "the address " + inetSocketAddress + " is not local");
341          }
342          return null;
343        }
344        if (clientContext.getDisableLegacyBlockReaderLocal()) {
345          if (LOG.isTraceEnabled()) {
346            LOG.trace(this + ": can't construct BlockReaderLocalLegacy because " +
347                "disableLegacyBlockReaderLocal is set.");
348          }
349          return null;
350        }
351        IOException ioe = null;
352        try {
353          return BlockReaderLocalLegacy.newBlockReader(conf,
354              userGroupInformation, configuration, fileName, block, token,
355              datanode, startOffset, length);
356        } catch (RemoteException remoteException) {
357          ioe = remoteException.unwrapRemoteException(
358                    InvalidToken.class, AccessControlException.class);
359        } catch (IOException e) {
360          ioe = e;
361        }
362        if ((!(ioe instanceof AccessControlException)) &&
363            isSecurityException(ioe)) {
364          // Handle security exceptions.
365          // We do not handle AccessControlException here, since
366          // BlockReaderLocalLegacy#newBlockReader uses that exception to indicate
367          // that the user is not in dfs.block.local-path-access.user, a condition
368          // which requires us to disable legacy SCR.
369          throw ioe;
370        }
371        LOG.warn(this + ": error creating legacy BlockReaderLocal.  " +
372            "Disabling legacy local reads.", ioe);
373        clientContext.setDisableLegacyBlockReaderLocal();
374        return null;
375      }
376    
377      private BlockReader getBlockReaderLocal() throws InvalidToken {
378        if (LOG.isTraceEnabled()) {
379          LOG.trace(this + ": trying to construct a BlockReaderLocal " +
380              "for short-circuit reads.");
381        }
382        if (pathInfo == null) {
383          pathInfo = clientContext.getDomainSocketFactory().
384                          getPathInfo(inetSocketAddress, conf);
385        }
386        if (!pathInfo.getPathState().getUsableForShortCircuit()) {
387          if (LOG.isTraceEnabled()) {
388            LOG.trace(this + ": " + pathInfo + " is not " +
389                "usable for short circuit; giving up on BlockReaderLocal.");
390          }
391          return null;
392        }
393        ShortCircuitCache cache = clientContext.getShortCircuitCache();
394        ExtendedBlockId key = new ExtendedBlockId(block.getBlockId(), block.getBlockPoolId());
395        ShortCircuitReplicaInfo info = cache.fetchOrCreate(key, this);
396        InvalidToken exc = info.getInvalidTokenException();
397        if (exc != null) {
398          if (LOG.isTraceEnabled()) {
399            LOG.trace(this + ": got InvalidToken exception while trying to " +
400                "construct BlockReaderLocal via " + pathInfo.getPath());
401          }
402          throw exc;
403        }
404        if (info.getReplica() == null) {
405          if (LOG.isTraceEnabled()) {
406            LOG.trace(this + ": failed to get ShortCircuitReplica.  " +
407                "Cannot construct BlockReaderLocal via " + pathInfo.getPath());
408          }
409          return null;
410        }
411        return new BlockReaderLocal.Builder(conf).
412            setFilename(fileName).
413            setBlock(block).
414            setStartOffset(startOffset).
415            setShortCircuitReplica(info.getReplica()).
416            setVerifyChecksum(verifyChecksum).
417            setCachingStrategy(cachingStrategy).
418            build();
419      }
420    
421      /**
422       * Fetch a pair of short-circuit block descriptors from a local DataNode.
423       *
424       * @return    Null if we could not communicate with the datanode,
425       *            a new ShortCircuitReplicaInfo object otherwise.
426       *            ShortCircuitReplicaInfo objects may contain either an InvalidToken
427       *            exception, or a ShortCircuitReplica object ready to use.
428       */
429      @Override
430      public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
431        if (createShortCircuitReplicaInfoCallback != null) {
432          ShortCircuitReplicaInfo info =
433            createShortCircuitReplicaInfoCallback.createShortCircuitReplicaInfo();
434          if (info != null) return info;
435        }
436        if (LOG.isTraceEnabled()) {
437          LOG.trace(this + ": trying to create ShortCircuitReplicaInfo.");
438        }
439        BlockReaderPeer curPeer;
440        while (true) {
441          curPeer = nextDomainPeer();
442          if (curPeer == null) break;
443          if (curPeer.fromCache) remainingCacheTries--;
444          DomainPeer peer = (DomainPeer)curPeer.peer;
445          Slot slot = null;
446          ShortCircuitCache cache = clientContext.getShortCircuitCache();
447          try {
448            MutableBoolean usedPeer = new MutableBoolean(false);
449            slot = cache.allocShmSlot(datanode, peer, usedPeer,
450                new ExtendedBlockId(block.getBlockId(), block.getBlockPoolId()),
451                clientName);
452            if (usedPeer.booleanValue()) {
453              if (LOG.isTraceEnabled()) {
454                LOG.trace(this + ": allocShmSlot used up our previous socket " +
455                  peer.getDomainSocket() + ".  Allocating a new one...");
456              }
457              curPeer = nextDomainPeer();
458              if (curPeer == null) break;
459              peer = (DomainPeer)curPeer.peer;
460            }
461            ShortCircuitReplicaInfo info = requestFileDescriptors(peer, slot);
462            clientContext.getPeerCache().put(datanode, peer);
463            return info;
464          } catch (IOException e) {
465            if (slot != null) {
466              cache.freeSlot(slot);
467            }
468            if (curPeer.fromCache) {
469              // Handle an I/O error we got when using a cached socket.
470              // These are considered less serious, because the socket may be stale.
471              if (LOG.isDebugEnabled()) {
472                LOG.debug(this + ": closing stale domain peer " + peer, e);
473              }
474              IOUtils.cleanup(LOG, peer);
475            } else {
476              // Handle an I/O error we got when using a newly created socket.
477              // We temporarily disable the domain socket path for a few minutes in
478              // this case, to prevent wasting more time on it.
479              LOG.warn(this + ": I/O error requesting file descriptors.  " + 
480                  "Disabling domain socket " + peer.getDomainSocket(), e);
481              IOUtils.cleanup(LOG, peer);
482              clientContext.getDomainSocketFactory()
483                  .disableDomainSocketPath(pathInfo.getPath());
484              return null;
485            }
486          }
487        }
488        return null;
489      }
490    
491      /**
492       * Request file descriptors from a DomainPeer.
493       *
494       * @param peer   The peer to use for communication.
495       * @param slot   If non-null, the shared memory slot to associate with the 
496       *               new ShortCircuitReplica.
497       * 
498       * @return  A ShortCircuitReplica object if we could communicate with the
499       *          datanode; null, otherwise. 
500       * @throws  IOException If we encountered an I/O exception while communicating
501       *          with the datanode.
502       */
503      private ShortCircuitReplicaInfo requestFileDescriptors(DomainPeer peer,
504              Slot slot) throws IOException {
505        ShortCircuitCache cache = clientContext.getShortCircuitCache();
506        final DataOutputStream out =
507            new DataOutputStream(new BufferedOutputStream(peer.getOutputStream()));
508        SlotId slotId = slot == null ? null : slot.getSlotId();
509        new Sender(out).requestShortCircuitFds(block, token, slotId, 1);
510        DataInputStream in = new DataInputStream(peer.getInputStream());
511        BlockOpResponseProto resp = BlockOpResponseProto.parseFrom(
512            PBHelper.vintPrefixed(in));
513        DomainSocket sock = peer.getDomainSocket();
514        switch (resp.getStatus()) {
515        case SUCCESS:
516          byte buf[] = new byte[1];
517          FileInputStream fis[] = new FileInputStream[2];
518          sock.recvFileInputStreams(fis, buf, 0, buf.length);
519          ShortCircuitReplica replica = null;
520          try {
521            ExtendedBlockId key =
522                new ExtendedBlockId(block.getBlockId(), block.getBlockPoolId());
523            replica = new ShortCircuitReplica(key, fis[0], fis[1], cache,
524                Time.monotonicNow(), slot);
525          } catch (IOException e) {
526            // This indicates an error reading from disk, or a format error.  Since
527            // it's not a socket communication problem, we return null rather than
528            // throwing an exception.
529            LOG.warn(this + ": error creating ShortCircuitReplica.", e);
530            return null;
531          } finally {
532            if (replica == null) {
533              IOUtils.cleanup(DFSClient.LOG, fis[0], fis[1]);
534            }
535          }
536          return new ShortCircuitReplicaInfo(replica);
537        case ERROR_UNSUPPORTED:
538          if (!resp.hasShortCircuitAccessVersion()) {
539            LOG.warn("short-circuit read access is disabled for " +
540                "DataNode " + datanode + ".  reason: " + resp.getMessage());
541            clientContext.getDomainSocketFactory()
542                .disableShortCircuitForPath(pathInfo.getPath());
543          } else {
544            LOG.warn("short-circuit read access for the file " +
545                fileName + " is disabled for DataNode " + datanode +
546                ".  reason: " + resp.getMessage());
547          }
548          return null;
549        case ERROR_ACCESS_TOKEN:
550          String msg = "access control error while " +
551              "attempting to set up short-circuit access to " +
552              fileName + resp.getMessage();
553          if (LOG.isDebugEnabled()) {
554            LOG.debug(this + ":" + msg);
555          }
556          return new ShortCircuitReplicaInfo(new InvalidToken(msg));
557        default:
558          LOG.warn(this + ": unknown response code " + resp.getStatus() +
559              " while attempting to set up short-circuit access. " +
560              resp.getMessage());
561          clientContext.getDomainSocketFactory()
562              .disableShortCircuitForPath(pathInfo.getPath());
563          return null;
564        }
565      }
566    
567      /**
568       * Get a RemoteBlockReader that communicates over a UNIX domain socket.
569       *
570       * @return The new BlockReader, or null if we failed to create the block
571       * reader.
572       *
573       * @throws InvalidToken    If the block token was invalid.
574       * Potentially other security-related execptions.
575       */
576      private BlockReader getRemoteBlockReaderFromDomain() throws IOException {
577        if (pathInfo == null) {
578          pathInfo = clientContext.getDomainSocketFactory().
579                          getPathInfo(inetSocketAddress, conf);
580        }
581        if (!pathInfo.getPathState().getUsableForDataTransfer()) {
582          if (LOG.isTraceEnabled()) {
583            LOG.trace(this + ": not trying to create a remote block reader " +
584                "because the UNIX domain socket at " + pathInfo +
585                " is not usable.");
586          }
587          return null;
588        }
589        if (LOG.isTraceEnabled()) {
590          LOG.trace(this + ": trying to create a remote block reader from the " +
591              "UNIX domain socket at " + pathInfo.getPath());
592        }
593    
594        while (true) {
595          BlockReaderPeer curPeer = nextDomainPeer();
596          if (curPeer == null) break;
597          if (curPeer.fromCache) remainingCacheTries--;
598          DomainPeer peer = (DomainPeer)curPeer.peer;
599          BlockReader blockReader = null;
600          try {
601            blockReader = getRemoteBlockReader(peer);
602            return blockReader;
603          } catch (IOException ioe) {
604            IOUtils.cleanup(LOG, peer);
605            if (isSecurityException(ioe)) {
606              if (LOG.isTraceEnabled()) {
607                LOG.trace(this + ": got security exception while constructing " +
608                    "a remote block reader from the unix domain socket at " +
609                    pathInfo.getPath(), ioe);
610              }
611              throw ioe;
612            }
613            if (curPeer.fromCache) {
614              // Handle an I/O error we got when using a cached peer.  These are
615              // considered less serious, because the underlying socket may be stale.
616              if (LOG.isDebugEnabled()) {
617                LOG.debug("Closed potentially stale domain peer " + peer, ioe);
618              }
619            } else {
620              // Handle an I/O error we got when using a newly created domain peer.
621              // We temporarily disable the domain socket path for a few minutes in
622              // this case, to prevent wasting more time on it.
623              LOG.warn("I/O error constructing remote block reader.  Disabling " +
624                  "domain socket " + peer.getDomainSocket(), ioe);
625              clientContext.getDomainSocketFactory()
626                  .disableDomainSocketPath(pathInfo.getPath());
627              return null;
628            }
629          } finally {
630            if (blockReader == null) {
631              IOUtils.cleanup(LOG, peer);
632            }
633          }
634        }
635        return null;
636      }
637    
638      /**
639       * Get a RemoteBlockReader that communicates over a TCP socket.
640       *
641       * @return The new BlockReader.  We will not return null, but instead throw
642       *         an exception if this fails.
643       *
644       * @throws InvalidToken
645       *             If the block token was invalid.
646       *         InvalidEncryptionKeyException
647       *             If the encryption key was invalid.
648       *         Other IOException
649       *             If there was another problem.
650       */
651      private BlockReader getRemoteBlockReaderFromTcp() throws IOException {
652        if (LOG.isTraceEnabled()) {
653          LOG.trace(this + ": trying to create a remote block reader from a " +
654              "TCP socket");
655        }
656        BlockReader blockReader = null;
657        while (true) {
658          BlockReaderPeer curPeer = null;
659          Peer peer = null;
660          try {
661            curPeer = nextTcpPeer();
662            if (curPeer == null) break;
663            if (curPeer.fromCache) remainingCacheTries--;
664            peer = curPeer.peer;
665            blockReader = getRemoteBlockReader(peer);
666            return blockReader;
667          } catch (IOException ioe) {
668            if (isSecurityException(ioe)) {
669              if (LOG.isTraceEnabled()) {
670                LOG.trace(this + ": got security exception while constructing " +
671                    "a remote block reader from " + peer, ioe);
672              }
673              throw ioe;
674            }
675            if ((curPeer != null) && curPeer.fromCache) {
676              // Handle an I/O error we got when using a cached peer.  These are
677              // considered less serious, because the underlying socket may be
678              // stale.
679              if (LOG.isDebugEnabled()) {
680                LOG.debug("Closed potentially stale remote peer " + peer, ioe);
681              }
682            } else {
683              // Handle an I/O error we got when using a newly created peer.
684              LOG.warn("I/O error constructing remote block reader.", ioe);
685              throw ioe;
686            }
687          } finally {
688            if (blockReader == null) {
689              IOUtils.cleanup(LOG, peer);
690            }
691          }
692        }
693        return null;
694      }
695    
696      public static class BlockReaderPeer {
697        final Peer peer;
698        final boolean fromCache;
699        
700        BlockReaderPeer(Peer peer, boolean fromCache) {
701          this.peer = peer;
702          this.fromCache = fromCache;
703        }
704      }
705    
706      /**
707       * Get the next DomainPeer-- either from the cache or by creating it.
708       *
709       * @return the next DomainPeer, or null if we could not construct one.
710       */
711      private BlockReaderPeer nextDomainPeer() {
712        if (remainingCacheTries > 0) {
713          Peer peer = clientContext.getPeerCache().get(datanode, true);
714          if (peer != null) {
715            if (LOG.isTraceEnabled()) {
716              LOG.trace("nextDomainPeer: reusing existing peer " + peer);
717            }
718            return new BlockReaderPeer(peer, true);
719          }
720        }
721        DomainSocket sock = clientContext.getDomainSocketFactory().
722            createSocket(pathInfo, conf.socketTimeout);
723        if (sock == null) return null;
724        return new BlockReaderPeer(new DomainPeer(sock), false);
725      }
726    
727      /**
728       * Get the next TCP-based peer-- either from the cache or by creating it.
729       *
730       * @return the next Peer, or null if we could not construct one.
731       *
732       * @throws IOException  If there was an error while constructing the peer
733       *                      (such as an InvalidEncryptionKeyException)
734       */
735      private BlockReaderPeer nextTcpPeer() throws IOException {
736        if (remainingCacheTries > 0) {
737          Peer peer = clientContext.getPeerCache().get(datanode, false);
738          if (peer != null) {
739            if (LOG.isTraceEnabled()) {
740              LOG.trace("nextTcpPeer: reusing existing peer " + peer);
741            }
742            return new BlockReaderPeer(peer, true);
743          }
744        }
745        try {
746          Peer peer = remotePeerFactory.newConnectedPeer(inetSocketAddress);
747          if (LOG.isTraceEnabled()) {
748            LOG.trace("nextTcpPeer: created newConnectedPeer " + peer);
749          }
750          return new BlockReaderPeer(peer, false);
751        } catch (IOException e) {
752          if (LOG.isTraceEnabled()) {
753            LOG.trace("nextTcpPeer: failed to create newConnectedPeer " +
754                      "connected to " + datanode);
755          }
756          throw e;
757        }
758      }
759    
760      /**
761       * Determine if an exception is security-related.
762       *
763       * We need to handle these exceptions differently than other IOExceptions.
764       * They don't indicate a communication problem.  Instead, they mean that there
765       * is some action the client needs to take, such as refetching block tokens,
766       * renewing encryption keys, etc.
767       *
768       * @param ioe    The exception
769       * @return       True only if the exception is security-related.
770       */
771      private static boolean isSecurityException(IOException ioe) {
772        return (ioe instanceof InvalidToken) ||
773                (ioe instanceof InvalidEncryptionKeyException) ||
774                (ioe instanceof InvalidBlockTokenException) ||
775                (ioe instanceof AccessControlException);
776      }
777    
778      @SuppressWarnings("deprecation")
779      private BlockReader getRemoteBlockReader(Peer peer) throws IOException {
780        if (conf.useLegacyBlockReader) {
781          return RemoteBlockReader.newBlockReader(fileName,
782              block, token, startOffset, length, conf.ioBufferSize,
783              verifyChecksum, clientName, peer, datanode,
784              clientContext.getPeerCache(), cachingStrategy);
785        } else {
786          return RemoteBlockReader2.newBlockReader(
787              fileName, block, token, startOffset, length,
788              verifyChecksum, clientName, peer, datanode,
789              clientContext.getPeerCache(), cachingStrategy);
790        }
791      }
792    
793      @Override
794      public String toString() {
795        return "BlockReaderFactory(fileName=" + fileName + ", block=" + block + ")";
796      }
797    
798      /**
799       * File name to print when accessing a block directly (from servlets)
800       * @param s Address of the block location
801       * @param poolId Block pool ID of the block
802       * @param blockId Block ID of the block
803       * @return string that has a file name for debug purposes
804       */
805      public static String getFileName(final InetSocketAddress s,
806          final String poolId, final long blockId) {
807        return s.toString() + ":" + poolId + ":" + blockId;
808      }
809    }