001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.hdfs.server.datanode.fsdataset;
019
020 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_DEFAULT;
021 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_KEY;
022 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT;
023 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY;
024
025 import java.io.IOException;
026 import java.util.ArrayList;
027 import java.util.List;
028 import java.util.Random;
029
030 import org.apache.commons.logging.Log;
031 import org.apache.commons.logging.LogFactory;
032 import org.apache.hadoop.conf.Configurable;
033 import org.apache.hadoop.conf.Configuration;
034 import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException;
035
036 /**
037 * A DN volume choosing policy which takes into account the amount of free
038 * space on each of the available volumes when considering where to assign a
039 * new replica allocation. By default this policy prefers assigning replicas to
040 * those volumes with more available free space, so as to over time balance the
041 * available space of all the volumes within a DN.
042 */
043 public class AvailableSpaceVolumeChoosingPolicy<V extends FsVolumeSpi>
044 implements VolumeChoosingPolicy<V>, Configurable {
045
046 private static final Log LOG = LogFactory.getLog(AvailableSpaceVolumeChoosingPolicy.class);
047
048 private static final Random RAND = new Random();
049
050 private long balancedSpaceThreshold = DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_DEFAULT;
051 private float balancedPreferencePercent = DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT;
052
053 @Override
054 public synchronized void setConf(Configuration conf) {
055 balancedSpaceThreshold = conf.getLong(
056 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_KEY,
057 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_DEFAULT);
058 balancedPreferencePercent = conf.getFloat(
059 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY,
060 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT);
061
062 LOG.info("Available space volume choosing policy initialized: " +
063 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_KEY +
064 " = " + balancedSpaceThreshold + ", " +
065 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY +
066 " = " + balancedPreferencePercent);
067
068 if (balancedPreferencePercent > 1.0) {
069 LOG.warn("The value of " + DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY +
070 " is greater than 1.0 but should be in the range 0.0 - 1.0");
071 }
072
073 if (balancedPreferencePercent < 0.5) {
074 LOG.warn("The value of " + DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY +
075 " is less than 0.5 so volumes with less available disk space will receive more block allocations");
076 }
077 }
078
079 @Override
080 public synchronized Configuration getConf() {
081 // Nothing to do. Only added to fulfill the Configurable contract.
082 return null;
083 }
084
085 private final VolumeChoosingPolicy<V> roundRobinPolicyBalanced =
086 new RoundRobinVolumeChoosingPolicy<V>();
087 private final VolumeChoosingPolicy<V> roundRobinPolicyHighAvailable =
088 new RoundRobinVolumeChoosingPolicy<V>();
089 private final VolumeChoosingPolicy<V> roundRobinPolicyLowAvailable =
090 new RoundRobinVolumeChoosingPolicy<V>();
091
092 @Override
093 public synchronized V chooseVolume(List<V> volumes,
094 final long replicaSize) throws IOException {
095 if (volumes.size() < 1) {
096 throw new DiskOutOfSpaceException("No more available volumes");
097 }
098
099 AvailableSpaceVolumeList volumesWithSpaces =
100 new AvailableSpaceVolumeList(volumes);
101
102 if (volumesWithSpaces.areAllVolumesWithinFreeSpaceThreshold()) {
103 // If they're actually not too far out of whack, fall back on pure round
104 // robin.
105 V volume = roundRobinPolicyBalanced.chooseVolume(volumes, replicaSize);
106 if (LOG.isDebugEnabled()) {
107 LOG.debug("All volumes are within the configured free space balance " +
108 "threshold. Selecting " + volume + " for write of block size " +
109 replicaSize);
110 }
111 return volume;
112 } else {
113 V volume = null;
114 // If none of the volumes with low free space have enough space for the
115 // replica, always try to choose a volume with a lot of free space.
116 long mostAvailableAmongLowVolumes = volumesWithSpaces
117 .getMostAvailableSpaceAmongVolumesWithLowAvailableSpace();
118
119 List<V> highAvailableVolumes = extractVolumesFromPairs(
120 volumesWithSpaces.getVolumesWithHighAvailableSpace());
121 List<V> lowAvailableVolumes = extractVolumesFromPairs(
122 volumesWithSpaces.getVolumesWithLowAvailableSpace());
123
124 float preferencePercentScaler =
125 (highAvailableVolumes.size() * balancedPreferencePercent) +
126 (lowAvailableVolumes.size() * (1 - balancedPreferencePercent));
127 float scaledPreferencePercent =
128 (highAvailableVolumes.size() * balancedPreferencePercent) /
129 preferencePercentScaler;
130 if (mostAvailableAmongLowVolumes < replicaSize ||
131 RAND.nextFloat() < scaledPreferencePercent) {
132 volume = roundRobinPolicyHighAvailable.chooseVolume(
133 highAvailableVolumes,
134 replicaSize);
135 if (LOG.isDebugEnabled()) {
136 LOG.debug("Volumes are imbalanced. Selecting " + volume +
137 " from high available space volumes for write of block size "
138 + replicaSize);
139 }
140 } else {
141 volume = roundRobinPolicyLowAvailable.chooseVolume(
142 lowAvailableVolumes,
143 replicaSize);
144 if (LOG.isDebugEnabled()) {
145 LOG.debug("Volumes are imbalanced. Selecting " + volume +
146 " from low available space volumes for write of block size "
147 + replicaSize);
148 }
149 }
150 return volume;
151 }
152 }
153
154 /**
155 * Used to keep track of the list of volumes we're choosing from.
156 */
157 private class AvailableSpaceVolumeList {
158 private final List<AvailableSpaceVolumePair> volumes;
159
160 public AvailableSpaceVolumeList(List<V> volumes) throws IOException {
161 this.volumes = new ArrayList<AvailableSpaceVolumePair>();
162 for (V volume : volumes) {
163 this.volumes.add(new AvailableSpaceVolumePair(volume));
164 }
165 }
166
167 /**
168 * Check if the available space on all the volumes is roughly equal.
169 *
170 * @param volumes the volumes to check
171 * @return true if all volumes' free space is within the configured threshold,
172 * false otherwise.
173 * @throws IOException
174 * in the event of error checking amount of available space
175 */
176 public boolean areAllVolumesWithinFreeSpaceThreshold() {
177 long leastAvailable = Long.MAX_VALUE;
178 long mostAvailable = 0;
179 for (AvailableSpaceVolumePair volume : volumes) {
180 leastAvailable = Math.min(leastAvailable, volume.getAvailable());
181 mostAvailable = Math.max(mostAvailable, volume.getAvailable());
182 }
183 return (mostAvailable - leastAvailable) < balancedSpaceThreshold;
184 }
185
186 /**
187 * @return the minimum amount of space available on a single volume,
188 * across all volumes.
189 */
190 private long getLeastAvailableSpace() {
191 long leastAvailable = Long.MAX_VALUE;
192 for (AvailableSpaceVolumePair volume : volumes) {
193 leastAvailable = Math.min(leastAvailable, volume.getAvailable());
194 }
195 return leastAvailable;
196 }
197
198 /**
199 * @return the maximum amount of space available across volumes with low space.
200 */
201 public long getMostAvailableSpaceAmongVolumesWithLowAvailableSpace() {
202 long mostAvailable = Long.MIN_VALUE;
203 for (AvailableSpaceVolumePair volume : getVolumesWithLowAvailableSpace()) {
204 mostAvailable = Math.max(mostAvailable, volume.getAvailable());
205 }
206 return mostAvailable;
207 }
208
209 /**
210 * @return the list of volumes with relatively low available space.
211 */
212 public List<AvailableSpaceVolumePair> getVolumesWithLowAvailableSpace() {
213 long leastAvailable = getLeastAvailableSpace();
214 List<AvailableSpaceVolumePair> ret = new ArrayList<AvailableSpaceVolumePair>();
215 for (AvailableSpaceVolumePair volume : volumes) {
216 if (volume.getAvailable() <= leastAvailable + balancedSpaceThreshold) {
217 ret.add(volume);
218 }
219 }
220 return ret;
221 }
222
223 /**
224 * @return the list of volumes with a lot of available space.
225 */
226 public List<AvailableSpaceVolumePair> getVolumesWithHighAvailableSpace() {
227 long leastAvailable = getLeastAvailableSpace();
228 List<AvailableSpaceVolumePair> ret = new ArrayList<AvailableSpaceVolumePair>();
229 for (AvailableSpaceVolumePair volume : volumes) {
230 if (volume.getAvailable() > leastAvailable + balancedSpaceThreshold) {
231 ret.add(volume);
232 }
233 }
234 return ret;
235 }
236
237 }
238
239 /**
240 * Used so that we only check the available space on a given volume once, at
241 * the beginning of {@link AvailableSpaceVolumeChoosingPolicy#chooseVolume(List, long)}.
242 */
243 private class AvailableSpaceVolumePair {
244 private final V volume;
245 private final long availableSpace;
246
247 public AvailableSpaceVolumePair(V volume) throws IOException {
248 this.volume = volume;
249 this.availableSpace = volume.getAvailable();
250 }
251
252 public long getAvailable() {
253 return availableSpace;
254 }
255
256 public V getVolume() {
257 return volume;
258 }
259 }
260
261 private List<V> extractVolumesFromPairs(List<AvailableSpaceVolumePair> volumes) {
262 List<V> ret = new ArrayList<V>();
263 for (AvailableSpaceVolumePair volume : volumes) {
264 ret.add(volume.getVolume());
265 }
266 return ret;
267 }
268
269 }