001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.camel.component.hdfs;
018
019 import java.io.ByteArrayOutputStream;
020 import java.io.Closeable;
021 import java.io.File;
022 import java.io.FileInputStream;
023 import java.io.IOException;
024 import java.io.InputStream;
025 import java.io.OutputStream;
026 import java.io.PrintStream;
027 import java.nio.ByteBuffer;
028 import java.util.HashMap;
029 import java.util.Map;
030
031 import javax.xml.ws.Holder;
032
033 import org.apache.camel.RuntimeCamelException;
034 import org.apache.camel.TypeConverter;
035 import org.apache.camel.util.IOHelper;
036 import org.apache.hadoop.conf.Configuration;
037 import org.apache.hadoop.fs.FSDataOutputStream;
038 import org.apache.hadoop.fs.FileSystem;
039 import org.apache.hadoop.fs.FileUtil;
040 import org.apache.hadoop.fs.Path;
041 import org.apache.hadoop.io.ArrayFile;
042 import org.apache.hadoop.io.BloomMapFile;
043 import org.apache.hadoop.io.BooleanWritable;
044 import org.apache.hadoop.io.ByteWritable;
045 import org.apache.hadoop.io.BytesWritable;
046 import org.apache.hadoop.io.DoubleWritable;
047 import org.apache.hadoop.io.FloatWritable;
048 import org.apache.hadoop.io.IntWritable;
049 import org.apache.hadoop.io.LongWritable;
050 import org.apache.hadoop.io.MapFile;
051 import org.apache.hadoop.io.NullWritable;
052 import org.apache.hadoop.io.SequenceFile;
053 import org.apache.hadoop.io.SequenceFile.Writer;
054 import org.apache.hadoop.io.Text;
055 import org.apache.hadoop.io.Writable;
056 import org.apache.hadoop.io.WritableComparable;
057 import org.apache.hadoop.util.Progressable;
058 import org.apache.hadoop.util.ReflectionUtils;
059
060 public enum HdfsFileType {
061
062 NORMAL_FILE {
063 @Override
064 public long append(HdfsOutputStream hdfsostr, Object key, Object value, TypeConverter typeConverter) {
065 InputStream is = null;
066 try {
067 is = typeConverter.convertTo(InputStream.class, value);
068 return copyBytes(is, (FSDataOutputStream) hdfsostr.getOut(), HdfsConstants.DEFAULT_BUFFERSIZE, false);
069 } catch (IOException ex) {
070 throw new RuntimeCamelException(ex);
071 } finally {
072 IOHelper.close(is);
073 }
074 }
075
076 @Override
077 public long next(HdfsInputStream hdfsistr, Holder<Object> key, Holder<Object> value) {
078 try {
079 ByteArrayOutputStream bos = new ByteArrayOutputStream(hdfsistr.getChunkSize());
080 byte buf[] = new byte[HdfsConstants.DEFAULT_BUFFERSIZE];
081 int bytesRead = ((InputStream) hdfsistr.getIn()).read(buf);
082 if (bytesRead >= 0) {
083 bos.write(buf, 0, bytesRead);
084 key.value = null;
085 value.value = bos;
086 return bytesRead;
087 } else {
088 key.value = null;
089 value.value = null;
090 return 0;
091 }
092 } catch (IOException ex) {
093 throw new RuntimeCamelException(ex);
094 }
095 }
096
097 @Override
098 public Closeable createOutputStream(String hdfsPath, HdfsConfiguration configuration) {
099 try {
100 Closeable rout;
101 HdfsInfo hdfsInfo = new HdfsInfo(hdfsPath);
102 if (!configuration.isAppend()) {
103 rout = hdfsInfo.getFileSystem().create(hdfsInfo.getPath(), configuration.isOverwrite(), configuration.getBufferSize(),
104 configuration.getReplication(), configuration.getBlockSize(), new Progressable() {
105 @Override
106 public void progress() {
107 }
108 });
109 } else {
110 rout = hdfsInfo.getFileSystem().append(hdfsInfo.getPath(), configuration.getBufferSize(), new Progressable() {
111 @Override
112 public void progress() {
113 }
114 });
115 }
116 return rout;
117 } catch (IOException ex) {
118 throw new RuntimeCamelException(ex);
119 }
120 }
121
122 @Override
123 public Closeable createInputStream(String hdfsPath, HdfsConfiguration configuration) {
124 try {
125 Closeable rin;
126 if (configuration.getFileSystemType().equals(HdfsFileSystemType.LOCAL)) {
127 HdfsInfo hdfsInfo = new HdfsInfo(hdfsPath);
128 rin = hdfsInfo.getFileSystem().open(hdfsInfo.getPath());
129 } else {
130 rin = new FileInputStream(getHfdsFileToTmpFile(hdfsPath, configuration));
131 }
132 return rin;
133 } catch (IOException ex) {
134 throw new RuntimeCamelException(ex);
135 }
136 }
137
138 private File getHfdsFileToTmpFile(String hdfsPath, HdfsConfiguration configuration) {
139 try {
140 String fname = hdfsPath.substring(hdfsPath.lastIndexOf('/'));
141
142 File outputDest = File.createTempFile(fname, ".hdfs");
143 if (outputDest.exists()) {
144 outputDest.delete();
145 }
146
147 HdfsInfo hdfsInfo = new HdfsInfo(hdfsPath);
148 FileSystem fileSystem = hdfsInfo.getFileSystem();
149 FileUtil.copy(fileSystem, new Path(hdfsPath), outputDest, false, fileSystem.getConf());
150 try {
151 FileUtil.copyMerge(
152 fileSystem, // src
153 new Path(hdfsPath),
154 FileSystem.getLocal(new Configuration()), // dest
155 new Path(outputDest.toURI()),
156 false, fileSystem.getConf(), null);
157 } catch (IOException e) {
158 return outputDest;
159 }
160
161 return new File(outputDest, fname);
162 } catch (IOException ex) {
163 throw new RuntimeCamelException(ex);
164 }
165 }
166 },
167
168 SEQUENCE_FILE {
169 @Override
170 public long append(HdfsOutputStream hdfsostr, Object key, Object value, TypeConverter typeConverter) {
171 try {
172 Holder<Integer> keySize = new Holder<Integer>();
173 Writable keyWritable = getWritable(key, typeConverter, keySize);
174 Holder<Integer> valueSize = new Holder<Integer>();
175 Writable valueWritable = getWritable(value, typeConverter, valueSize);
176 Writer writer = (SequenceFile.Writer) hdfsostr.getOut();
177 writer.append(keyWritable, valueWritable);
178 writer.sync();
179 return keySize.value + valueSize.value;
180 } catch (Exception ex) {
181 throw new RuntimeCamelException(ex);
182 }
183 }
184
185 @Override
186 public long next(HdfsInputStream hdfsistr, Holder<Object> key, Holder<Object> value) {
187 try {
188 SequenceFile.Reader reader = (SequenceFile.Reader) hdfsistr.getIn();
189 Holder<Integer> keySize = new Holder<Integer>();
190 Writable keyWritable = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), new Configuration());
191 Holder<Integer> valueSize = new Holder<Integer>();
192 Writable valueWritable = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), new Configuration());
193 if (reader.next(keyWritable, valueWritable)) {
194 key.value = getObject(keyWritable, keySize);
195 value.value = getObject(valueWritable, valueSize);
196 return keySize.value + valueSize.value;
197 } else {
198 return 0;
199 }
200 } catch (Exception ex) {
201 throw new RuntimeCamelException(ex);
202 }
203 }
204
205 @Override
206 public Closeable createOutputStream(String hdfsPath, HdfsConfiguration configuration) {
207 try {
208 Closeable rout;
209 HdfsInfo hdfsInfo = new HdfsInfo(hdfsPath);
210 Class keyWritableClass = configuration.getKeyType().getWritableClass();
211 Class valueWritableClass = configuration.getValueType().getWritableClass();
212 rout = SequenceFile.createWriter(hdfsInfo.getFileSystem(), hdfsInfo.getConf(), hdfsInfo.getPath(), keyWritableClass,
213 valueWritableClass, configuration.getBufferSize(), configuration.getReplication(), configuration.getBlockSize(),
214 configuration.getCompressionType(), configuration.getCompressionCodec().getCodec(), new Progressable() {
215 @Override
216 public void progress() {
217 }
218 }, new SequenceFile.Metadata());
219 return rout;
220 } catch (IOException ex) {
221 throw new RuntimeCamelException(ex);
222 }
223 }
224
225 @Override
226 public Closeable createInputStream(String hdfsPath, HdfsConfiguration configuration) {
227 try {
228 Closeable rin;
229 HdfsInfo hdfsInfo = new HdfsInfo(hdfsPath);
230 rin = new SequenceFile.Reader(hdfsInfo.getFileSystem(), hdfsInfo.getPath(), hdfsInfo.getConf());
231 return rin;
232 } catch (IOException ex) {
233 throw new RuntimeCamelException(ex);
234 }
235 }
236 },
237
238 MAP_FILE {
239 @Override
240 public long append(HdfsOutputStream hdfsostr, Object key, Object value, TypeConverter typeConverter) {
241 try {
242 Holder<Integer> keySize = new Holder<Integer>();
243 Writable keyWritable = getWritable(key, typeConverter, keySize);
244 Holder<Integer> valueSize = new Holder<Integer>();
245 Writable valueWritable = getWritable(value, typeConverter, valueSize);
246 ((MapFile.Writer) hdfsostr.getOut()).append((WritableComparable) keyWritable, valueWritable);
247 return keySize.value + valueSize.value;
248 } catch (Exception ex) {
249 throw new RuntimeCamelException(ex);
250 }
251 }
252
253 @Override
254 public long next(HdfsInputStream hdfsistr, Holder<Object> key, Holder<Object> value) {
255 try {
256 MapFile.Reader reader = (MapFile.Reader) hdfsistr.getIn();
257 Holder<Integer> keySize = new Holder<Integer>();
258 WritableComparable keyWritable = (WritableComparable) ReflectionUtils.newInstance(reader.getKeyClass(), new Configuration());
259 Holder<Integer> valueSize = new Holder<Integer>();
260 Writable valueWritable = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), new Configuration());
261 if (reader.next(keyWritable, valueWritable)) {
262 key.value = getObject(keyWritable, keySize);
263 value.value = getObject(valueWritable, valueSize);
264 return keySize.value + valueSize.value;
265 } else {
266 return 0;
267 }
268 } catch (Exception ex) {
269 throw new RuntimeCamelException(ex);
270 }
271 }
272
273 @Override
274 @SuppressWarnings("unchecked")
275 public Closeable createOutputStream(String hdfsPath, HdfsConfiguration configuration) {
276 try {
277 Closeable rout;
278 HdfsInfo hdfsInfo = new HdfsInfo(hdfsPath);
279 Class keyWritableClass = configuration.getKeyType().getWritableClass();
280 Class valueWritableClass = configuration.getValueType().getWritableClass();
281 rout = new MapFile.Writer(hdfsInfo.getConf(), hdfsInfo.getFileSystem(), hdfsPath, keyWritableClass, valueWritableClass,
282 configuration.getCompressionType(), configuration.getCompressionCodec().getCodec(), new Progressable() {
283 @Override
284 public void progress() {
285 }
286 });
287 return rout;
288 } catch (IOException ex) {
289 throw new RuntimeCamelException(ex);
290 }
291 }
292
293 @Override
294 public Closeable createInputStream(String hdfsPath, HdfsConfiguration configuration) {
295 try {
296 Closeable rin;
297 HdfsInfo hdfsInfo = new HdfsInfo(hdfsPath);
298 rin = new MapFile.Reader(hdfsInfo.getFileSystem(), hdfsPath, hdfsInfo.getConf());
299 return rin;
300 } catch (IOException ex) {
301 throw new RuntimeCamelException(ex);
302 }
303 }
304 },
305
306 BLOOMMAP_FILE {
307 @Override
308 public long append(HdfsOutputStream hdfsostr, Object key, Object value, TypeConverter typeConverter) {
309 try {
310 Holder<Integer> keySize = new Holder<Integer>();
311 Writable keyWritable = getWritable(key, typeConverter, keySize);
312 Holder<Integer> valueSize = new Holder<Integer>();
313 Writable valueWritable = getWritable(value, typeConverter, valueSize);
314 ((BloomMapFile.Writer) hdfsostr.getOut()).append((WritableComparable) keyWritable, valueWritable);
315 return keySize.value + valueSize.value;
316 } catch (Exception ex) {
317 throw new RuntimeCamelException(ex);
318 }
319 }
320
321 @Override
322 public long next(HdfsInputStream hdfsistr, Holder<Object> key, Holder<Object> value) {
323 try {
324 MapFile.Reader reader = (BloomMapFile.Reader) hdfsistr.getIn();
325 Holder<Integer> keySize = new Holder<Integer>();
326 WritableComparable keyWritable = (WritableComparable) ReflectionUtils.newInstance(reader.getKeyClass(), new Configuration());
327 Holder<Integer> valueSize = new Holder<Integer>();
328 Writable valueWritable = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), new Configuration());
329 if (reader.next(keyWritable, valueWritable)) {
330 key.value = getObject(keyWritable, keySize);
331 value.value = getObject(valueWritable, valueSize);
332 return keySize.value + valueSize.value;
333 } else {
334 return 0;
335 }
336 } catch (Exception ex) {
337 throw new RuntimeCamelException(ex);
338 }
339 }
340
341 @Override
342 @SuppressWarnings("unchecked")
343 public Closeable createOutputStream(String hdfsPath, HdfsConfiguration configuration) {
344 try {
345 Closeable rout;
346 HdfsInfo hdfsInfo = new HdfsInfo(hdfsPath);
347 Class keyWritableClass = configuration.getKeyType().getWritableClass();
348 Class valueWritableClass = configuration.getValueType().getWritableClass();
349 rout = new BloomMapFile.Writer(hdfsInfo.getConf(), hdfsInfo.getFileSystem(), hdfsPath, keyWritableClass, valueWritableClass,
350 configuration.getCompressionType(), configuration.getCompressionCodec().getCodec(), new Progressable() {
351 @Override
352 public void progress() {
353 }
354 });
355 return rout;
356 } catch (IOException ex) {
357 throw new RuntimeCamelException(ex);
358 }
359 }
360
361 @Override
362 public Closeable createInputStream(String hdfsPath, HdfsConfiguration configuration) {
363 try {
364 Closeable rin;
365 HdfsInfo hdfsInfo = new HdfsInfo(hdfsPath);
366 rin = new BloomMapFile.Reader(hdfsInfo.getFileSystem(), hdfsPath, hdfsInfo.getConf());
367 return rin;
368 } catch (IOException ex) {
369 throw new RuntimeCamelException(ex);
370 }
371 }
372 },
373
374 ARRAY_FILE {
375 @Override
376 public long append(HdfsOutputStream hdfsostr, Object key, Object value, TypeConverter typeConverter) {
377 try {
378 Holder<Integer> valueSize = new Holder<Integer>();
379 Writable valueWritable = getWritable(value, typeConverter, valueSize);
380 ((ArrayFile.Writer) hdfsostr.getOut()).append(valueWritable);
381 return valueSize.value;
382 } catch (Exception ex) {
383 throw new RuntimeCamelException(ex);
384 }
385 }
386
387 @Override
388 public long next(HdfsInputStream hdfsistr, Holder<Object> key, Holder<Object> value) {
389 try {
390 ArrayFile.Reader reader = (ArrayFile.Reader) hdfsistr.getIn();
391 Holder<Integer> valueSize = new Holder<Integer>();
392 Writable valueWritable = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), new Configuration());
393 if (reader.next(valueWritable) != null) {
394 value.value = getObject(valueWritable, valueSize);
395 return valueSize.value;
396 } else {
397 return 0;
398 }
399 } catch (Exception ex) {
400 throw new RuntimeCamelException(ex);
401 }
402 }
403
404 @Override
405 @SuppressWarnings("unchecked")
406 public Closeable createOutputStream(String hdfsPath, HdfsConfiguration configuration) {
407 try {
408 Closeable rout;
409 HdfsInfo hdfsInfo = new HdfsInfo(hdfsPath);
410 Class valueWritableClass = configuration.getValueType().getWritableClass();
411 rout = new ArrayFile.Writer(hdfsInfo.getConf(), hdfsInfo.getFileSystem(), hdfsPath, valueWritableClass,
412 configuration.getCompressionType(), new Progressable() {
413 @Override
414 public void progress() {
415 }
416 });
417 return rout;
418 } catch (IOException ex) {
419 throw new RuntimeCamelException(ex);
420 }
421 }
422
423 @Override
424 public Closeable createInputStream(String hdfsPath, HdfsConfiguration configuration) {
425 try {
426 Closeable rin;
427 HdfsInfo hdfsInfo = new HdfsInfo(hdfsPath);
428 rin = new ArrayFile.Reader(hdfsInfo.getFileSystem(), hdfsPath, hdfsInfo.getConf());
429 return rin;
430 } catch (IOException ex) {
431 throw new RuntimeCamelException(ex);
432 }
433 }
434 };
435
436 private static final class WritableCache {
437
438 private static Map<Class, HdfsWritableFactories.HdfsWritableFactory> writables = new HashMap<Class, HdfsWritableFactories.HdfsWritableFactory>();
439 private static Map<Class, HdfsWritableFactories.HdfsWritableFactory> readables = new HashMap<Class, HdfsWritableFactories.HdfsWritableFactory>();
440
441 private WritableCache() {
442 }
443
444 static {
445 writables.put(Boolean.class, new HdfsWritableFactories.HdfsBooleanWritableFactory());
446 writables.put(Byte.class, new HdfsWritableFactories.HdfsByteWritableFactory());
447 writables.put(ByteBuffer.class, new HdfsWritableFactories.HdfsBytesWritableFactory());
448 writables.put(Double.class, new HdfsWritableFactories.HdfsDoubleWritableFactory());
449 writables.put(Float.class, new HdfsWritableFactories.HdfsFloatWritableFactory());
450 writables.put(Integer.class, new HdfsWritableFactories.HdfsIntWritableFactory());
451 writables.put(Long.class, new HdfsWritableFactories.HdfsLongWritableFactory());
452 writables.put(String.class, new HdfsWritableFactories.HdfsTextWritableFactory());
453 writables.put(null, new HdfsWritableFactories.HdfsNullWritableFactory());
454 }
455
456 static {
457 readables.put(BooleanWritable.class, new HdfsWritableFactories.HdfsBooleanWritableFactory());
458 readables.put(ByteWritable.class, new HdfsWritableFactories.HdfsByteWritableFactory());
459 readables.put(BytesWritable.class, new HdfsWritableFactories.HdfsBytesWritableFactory());
460 readables.put(DoubleWritable.class, new HdfsWritableFactories.HdfsDoubleWritableFactory());
461 readables.put(FloatWritable.class, new HdfsWritableFactories.HdfsFloatWritableFactory());
462 readables.put(IntWritable.class, new HdfsWritableFactories.HdfsIntWritableFactory());
463 readables.put(LongWritable.class, new HdfsWritableFactories.HdfsLongWritableFactory());
464 readables.put(Text.class, new HdfsWritableFactories.HdfsTextWritableFactory());
465 readables.put(NullWritable.class, new HdfsWritableFactories.HdfsNullWritableFactory());
466 }
467 }
468
469 private static Writable getWritable(Object obj, TypeConverter typeConverter, Holder<Integer> size) {
470 Class objCls = obj == null ? null : obj.getClass();
471 HdfsWritableFactories.HdfsWritableFactory objWritableFactory = WritableCache.writables.get(objCls);
472 if (objWritableFactory == null) {
473 objWritableFactory = new HdfsWritableFactories.HdfsObjectWritableFactory();
474 }
475 return objWritableFactory.create(obj, typeConverter, size);
476 }
477
478 private static Object getObject(Writable writable, Holder<Integer> size) {
479 Class writableClass = NullWritable.class;
480 if (writable != null) {
481 writableClass = writable.getClass();
482 }
483 HdfsWritableFactories.HdfsWritableFactory writableObjectFactory = WritableCache.readables.get(writableClass);
484 return writableObjectFactory.read(writable, size);
485 }
486
487 public abstract long append(HdfsOutputStream hdfsostr, Object key, Object value, TypeConverter typeConverter);
488
489 public abstract long next(HdfsInputStream hdfsistr, Holder<Object> key, Holder<Object> value);
490
491 public abstract Closeable createOutputStream(String hdfsPath, HdfsConfiguration configuration);
492
493 public abstract Closeable createInputStream(String hdfsPath, HdfsConfiguration configuration);
494
495 public static long copyBytes(InputStream in, OutputStream out, int buffSize, boolean close) throws IOException {
496 long numBytes = 0;
497 PrintStream ps = out instanceof PrintStream ? (PrintStream) out : null;
498 byte buf[] = new byte[buffSize];
499 try {
500 int bytesRead = in.read(buf);
501 while (bytesRead >= 0) {
502 out.write(buf, 0, bytesRead);
503 numBytes += bytesRead;
504 if ((ps != null) && ps.checkError()) {
505 throw new IOException("Unable to write to output stream.");
506 }
507 bytesRead = in.read(buf);
508 }
509 } finally {
510 if (close) {
511 IOHelper.close(out);
512 IOHelper.close(in);
513 }
514 }
515 return numBytes;
516 }
517 }