001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.hdfs.server.namenode;
019
020 import java.io.IOException;
021 import java.net.InetAddress;
022 import java.net.UnknownHostException;
023 import java.util.HashMap;
024 import java.util.HashSet;
025 import java.util.Iterator;
026 import java.util.Map;
027 import java.util.TreeMap;
028
029 import org.apache.commons.logging.Log;
030 import org.apache.commons.logging.LogFactory;
031 import org.apache.hadoop.hdfs.protocol.DatanodeID;
032 import org.apache.hadoop.util.HostsFileReader;
033
034 /**
035 * This class manages the include and exclude files for HDFS.
036 *
037 * These files control which DataNodes the NameNode expects to see in the
038 * cluster. Loosely speaking, the include file, if it exists and is not
039 * empty, is a list of everything we expect to see. The exclude file is
040 * a list of everything we want to ignore if we do see it.
041 *
042 * Entries may or may not specify a port. If they don't, we consider
043 * them to apply to every DataNode on that host. For example, putting
044 * 192.168.0.100 in the excludes file blacklists both 192.168.0.100:5000 and
045 * 192.168.0.100:6000. This case comes up in unit tests.
046 *
047 * When reading the hosts files, we try to find the IP address for each
048 * entry. This is important because it allows us to de-duplicate entries.
049 * If the user specifies a node as foo.bar.com in the include file, but
050 * 192.168.0.100 in the exclude file, we need to realize that these are
051 * the same node. Resolving the IP address also allows us to give more
052 * information back to getDatanodeListForReport, which makes the web UI
053 * look nicer (among other things.) See HDFS-3934 for more details.
054 *
055 * DNS resolution can be slow. For this reason, we ONLY do it when (re)reading
056 * the hosts files. In all other cases, we rely on the cached values either
057 * in the DatanodeID objects, or in HostFileManager#Entry.
058 * We also don't want to be holding locks when doing this.
059 * See HDFS-3990 for more discussion of DNS overheads.
060 *
061 * Not all entries in the hosts files will have an associated IP address.
062 * Some entries may be "registration names." The "registration name" of
063 * a DataNode is either the actual hostname, or an arbitrary string configured
064 * by dfs.datanode.hostname. It's possible to add registration names to the
065 * include or exclude files. If we can't find an IP address associated with
066 * a host file entry, we assume it's a registered hostname and act accordingly.
067 * The "registration name" feature is a little odd and it may be removed in the
068 * future (I hope?)
069 */
070 public class HostFileManager {
071 private static final Log LOG = LogFactory.getLog(HostFileManager.class);
072
073 public static class Entry {
074 /**
075 * This what the user put on the line before the colon, or the whole line
076 * if there is no colon.
077 */
078 private final String prefix;
079
080 /**
081 * This is the port which was specified after the colon. It is 0 if no
082 * port was given.
083 */
084 private final int port;
085
086 /**
087 * If we can resolve the IP address, this is it. Otherwise, it is the
088 * empty string.
089 */
090 private final String ipAddress;
091
092 /**
093 * Parse a hosts file Entry.
094 */
095 static Entry parse(String fileName, String entry) throws IOException {
096 final String prefix;
097 final int port;
098 String ipAddress = "";
099
100 int idx = entry.indexOf(':');
101 if (-1 == idx) {
102 prefix = entry;
103 port = 0;
104 } else {
105 prefix = entry.substring(0, idx);
106 String portStr = entry.substring(idx + 1);
107 try {
108 port = Integer.valueOf(portStr);
109 } catch (NumberFormatException e) {
110 throw new IOException("unable to parse port number for " +
111 "'" + entry + "'", e);
112 }
113 }
114 try {
115 // Let's see if we can resolve this prefix to an IP address.
116 // This may fail; one example is with a registered hostname
117 // which is not actually a real DNS name.
118 InetAddress addr = InetAddress.getByName(prefix);
119 ipAddress = addr.getHostAddress();
120 } catch (UnknownHostException e) {
121 LOG.info("When reading " + fileName + ", could not look up " +
122 "IP address for " + prefix + ". We will assume this is a " +
123 "registration name.", e);
124 }
125 return new Entry(prefix, port, ipAddress);
126 }
127
128 public String getIdentifier() {
129 return ipAddress.isEmpty() ? prefix : ipAddress;
130 }
131
132 public Entry(String prefix, int port, String ipAddress) {
133 this.prefix = prefix;
134 this.port = port;
135 this.ipAddress = ipAddress;
136 }
137
138 public String getPrefix() {
139 return prefix;
140 }
141
142 public int getPort() {
143 return port;
144 }
145
146 public String getIpAddress() {
147 return ipAddress;
148 }
149
150 public String toString() {
151 StringBuilder bld = new StringBuilder();
152 bld.append("Entry{").append(prefix).append(", port=").
153 append(port).append(", ipAddress=").append(ipAddress).append("}");
154 return bld.toString();
155 }
156 }
157
158 public static class EntrySet implements Iterable<Entry> {
159 /**
160 * The index. Each Entry appears in here exactly once.
161 *
162 * It may be indexed by one of:
163 * ipAddress:port
164 * ipAddress
165 * registeredHostname:port
166 * registeredHostname
167 *
168 * The different indexing strategies reflect the fact that we may or may
169 * not have a port or IP address for each entry.
170 */
171 TreeMap<String, Entry> index = new TreeMap<String, Entry>();
172
173 public boolean isEmpty() {
174 return index.isEmpty();
175 }
176
177 public Entry find(DatanodeID datanodeID) {
178 Entry entry;
179 int xferPort = datanodeID.getXferPort();
180 assert(xferPort > 0);
181 String datanodeIpAddr = datanodeID.getIpAddr();
182 if (datanodeIpAddr != null) {
183 entry = index.get(datanodeIpAddr + ":" + xferPort);
184 if (entry != null) {
185 return entry;
186 }
187 entry = index.get(datanodeIpAddr);
188 if (entry != null) {
189 return entry;
190 }
191 }
192 String registeredHostName = datanodeID.getHostName();
193 if (registeredHostName != null) {
194 entry = index.get(registeredHostName + ":" + xferPort);
195 if (entry != null) {
196 return entry;
197 }
198 entry = index.get(registeredHostName);
199 if (entry != null) {
200 return entry;
201 }
202 }
203 return null;
204 }
205
206 public Entry find(Entry toFind) {
207 int port = toFind.getPort();
208 if (port != 0) {
209 return index.get(toFind.getIdentifier() + ":" + port);
210 } else {
211 // An Entry with no port matches any entry with the same identifer.
212 // In other words, we treat 0 as "any port."
213 Map.Entry<String, Entry> ceil =
214 index.ceilingEntry(toFind.getIdentifier());
215 if ((ceil != null) &&
216 (ceil.getValue().getIdentifier().equals(
217 toFind.getIdentifier()))) {
218 return ceil.getValue();
219 }
220 return null;
221 }
222 }
223
224 public String toString() {
225 StringBuilder bld = new StringBuilder();
226
227 bld.append("HostSet(");
228 for (Map.Entry<String, Entry> entry : index.entrySet()) {
229 bld.append("\n\t");
230 bld.append(entry.getKey()).append("->").
231 append(entry.getValue().toString());
232 }
233 bld.append("\n)");
234 return bld.toString();
235 }
236
237 @Override
238 public Iterator<Entry> iterator() {
239 return index.values().iterator();
240 }
241 }
242
243 public static class MutableEntrySet extends EntrySet {
244 public void add(DatanodeID datanodeID) {
245 Entry entry = new Entry(datanodeID.getHostName(),
246 datanodeID.getXferPort(), datanodeID.getIpAddr());
247 index.put(datanodeID.getIpAddr() + ":" + datanodeID.getXferPort(),
248 entry);
249 }
250
251 public void add(Entry entry) {
252 int port = entry.getPort();
253 if (port != 0) {
254 index.put(entry.getIdentifier() + ":" + port, entry);
255 } else {
256 index.put(entry.getIdentifier(), entry);
257 }
258 }
259
260 void readFile(String type, String filename) throws IOException {
261 if (filename.isEmpty()) {
262 return;
263 }
264 HashSet<String> entrySet = new HashSet<String>();
265 HostsFileReader.readFileToSet(type, filename, entrySet);
266 for (String str : entrySet) {
267 Entry entry = Entry.parse(filename, str);
268 add(entry);
269 }
270 }
271 }
272
273 private EntrySet includes = new EntrySet();
274 private EntrySet excludes = new EntrySet();
275
276 public HostFileManager() {
277 }
278
279 public void refresh(String includeFile, String excludeFile)
280 throws IOException {
281 MutableEntrySet newIncludes = new MutableEntrySet();
282 IOException includeException = null;
283 try {
284 newIncludes.readFile("included", includeFile);
285 } catch (IOException e) {
286 includeException = e;
287 }
288 MutableEntrySet newExcludes = new MutableEntrySet();
289 IOException excludeException = null;
290 try {
291 newExcludes.readFile("excluded", excludeFile);
292 } catch (IOException e) {
293 excludeException = e;
294 }
295 synchronized(this) {
296 if (includeException == null) {
297 includes = newIncludes;
298 }
299 if (excludeException == null) {
300 excludes = newExcludes;
301 }
302 }
303 if (includeException == null) {
304 LOG.info("read includes:\n" + newIncludes);
305 } else {
306 LOG.error("failed to read include file '" + includeFile + "'. " +
307 "Continuing to use previous include list.",
308 includeException);
309 }
310 if (excludeException == null) {
311 LOG.info("read excludes:\n" + newExcludes);
312 } else {
313 LOG.error("failed to read exclude file '" + excludeFile + "'." +
314 "Continuing to use previous exclude list.",
315 excludeException);
316 }
317 if (includeException != null) {
318 throw new IOException("error reading hosts file " + includeFile,
319 includeException);
320 }
321 if (excludeException != null) {
322 throw new IOException("error reading exclude file " + excludeFile,
323 excludeException);
324 }
325 }
326
327 public synchronized boolean isIncluded(DatanodeID dn) {
328 if (includes.isEmpty()) {
329 // If the includes list is empty, act as if everything is in the
330 // includes list.
331 return true;
332 } else {
333 return includes.find(dn) != null;
334 }
335 }
336
337 public synchronized boolean isExcluded(DatanodeID dn) {
338 return excludes.find(dn) != null;
339 }
340
341 public synchronized boolean hasIncludes() {
342 return !includes.isEmpty();
343 }
344
345 /**
346 * @return the includes as an immutable set.
347 */
348 public synchronized EntrySet getIncludes() {
349 return includes;
350 }
351
352 /**
353 * @return the excludes as an immutable set.
354 */
355 public synchronized EntrySet getExcludes() {
356 return excludes;
357 }
358 }