001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.camel.component.hdfs;
018    
019    import java.io.ByteArrayOutputStream;
020    import java.io.Closeable;
021    import java.io.File;
022    import java.io.FileInputStream;
023    import java.io.IOException;
024    import java.io.InputStream;
025    import java.io.OutputStream;
026    import java.io.PrintStream;
027    import java.nio.ByteBuffer;
028    import java.util.HashMap;
029    import java.util.Map;
030    
031    import javax.xml.ws.Holder;
032    
033    import org.apache.camel.RuntimeCamelException;
034    import org.apache.camel.TypeConverter;
035    import org.apache.camel.util.IOHelper;
036    import org.apache.hadoop.conf.Configuration;
037    import org.apache.hadoop.fs.FSDataOutputStream;
038    import org.apache.hadoop.fs.FileSystem;
039    import org.apache.hadoop.fs.FileUtil;
040    import org.apache.hadoop.fs.Path;
041    import org.apache.hadoop.io.ArrayFile;
042    import org.apache.hadoop.io.BloomMapFile;
043    import org.apache.hadoop.io.BooleanWritable;
044    import org.apache.hadoop.io.ByteWritable;
045    import org.apache.hadoop.io.BytesWritable;
046    import org.apache.hadoop.io.DoubleWritable;
047    import org.apache.hadoop.io.FloatWritable;
048    import org.apache.hadoop.io.IntWritable;
049    import org.apache.hadoop.io.LongWritable;
050    import org.apache.hadoop.io.MapFile;
051    import org.apache.hadoop.io.NullWritable;
052    import org.apache.hadoop.io.SequenceFile;
053    import org.apache.hadoop.io.SequenceFile.Writer;
054    import org.apache.hadoop.io.Text;
055    import org.apache.hadoop.io.Writable;
056    import org.apache.hadoop.io.WritableComparable;
057    import org.apache.hadoop.util.Progressable;
058    import org.apache.hadoop.util.ReflectionUtils;
059    
060    public enum HdfsFileType {
061    
062        NORMAL_FILE {
063            @Override
064            public long append(HdfsOutputStream hdfsostr, Object key, Object value, TypeConverter typeConverter) {
065                InputStream is = null;
066                try {
067                    is = typeConverter.convertTo(InputStream.class, value);
068                    return copyBytes(is, (FSDataOutputStream) hdfsostr.getOut(), HdfsConstants.DEFAULT_BUFFERSIZE, false);
069                } catch (IOException ex) {
070                    throw new RuntimeCamelException(ex);
071                } finally {
072                    IOHelper.close(is);
073                }
074            }
075    
076            @Override
077            public long next(HdfsInputStream hdfsistr, Holder<Object> key, Holder<Object> value) {
078                try {
079                    ByteArrayOutputStream bos = new ByteArrayOutputStream(hdfsistr.getChunkSize());
080                    byte buf[] = new byte[HdfsConstants.DEFAULT_BUFFERSIZE];
081                    int bytesRead = ((InputStream) hdfsistr.getIn()).read(buf);
082                    if (bytesRead >= 0) {
083                        bos.write(buf, 0, bytesRead);
084                        key.value = null;
085                        value.value = bos;
086                        return bytesRead;
087                    } else {
088                        key.value = null;
089                        value.value = null;
090                        return 0;
091                    }
092                } catch (IOException ex) {
093                    throw new RuntimeCamelException(ex);
094                }
095            }
096    
097            @Override
098            public Closeable createOutputStream(String hdfsPath, HdfsConfiguration configuration) {
099                try {
100                    Closeable rout;
101                    HdfsInfo hdfsInfo = new HdfsInfo(hdfsPath);
102                    if (!configuration.isAppend()) {
103                        rout = hdfsInfo.getFileSystem().create(hdfsInfo.getPath(), configuration.isOverwrite(), configuration.getBufferSize(),
104                                configuration.getReplication(), configuration.getBlockSize(), new Progressable() {
105                                    @Override
106                                    public void progress() {
107                                    }
108                                });
109                    } else {
110                        rout = hdfsInfo.getFileSystem().append(hdfsInfo.getPath(), configuration.getBufferSize(), new Progressable() {
111                            @Override
112                            public void progress() {
113                            }
114                        });
115                    }
116                    return rout;
117                } catch (IOException ex) {
118                    throw new RuntimeCamelException(ex);
119                }
120            }
121    
122            @Override
123            public Closeable createInputStream(String hdfsPath, HdfsConfiguration configuration) {
124                try {
125                    Closeable rin;
126                    if (configuration.getFileSystemType().equals(HdfsFileSystemType.LOCAL)) {
127                        HdfsInfo hdfsInfo = new HdfsInfo(hdfsPath);
128                        rin = hdfsInfo.getFileSystem().open(hdfsInfo.getPath());
129                    } else {
130                        rin = new FileInputStream(getHfdsFileToTmpFile(hdfsPath, configuration));
131                    }
132                    return rin;
133                } catch (IOException ex) {
134                    throw new RuntimeCamelException(ex);
135                }
136            }
137    
138            private File getHfdsFileToTmpFile(String hdfsPath, HdfsConfiguration configuration) {
139                try {
140                    String fname = hdfsPath.substring(hdfsPath.lastIndexOf('/'));
141    
142                    File outputDest = File.createTempFile(fname, ".hdfs");
143                    if (outputDest.exists()) {
144                        outputDest.delete();
145                    }
146    
147                    HdfsInfo hdfsInfo = new HdfsInfo(hdfsPath);
148                    FileSystem fileSystem = hdfsInfo.getFileSystem();
149                    FileUtil.copy(fileSystem, new Path(hdfsPath), outputDest, false, fileSystem.getConf());
150                    try {
151                        FileUtil.copyMerge(
152                                fileSystem, // src
153                                new Path(hdfsPath),
154                                FileSystem.getLocal(new Configuration()), // dest
155                                new Path(outputDest.toURI()),
156                                false, fileSystem.getConf(), null);
157                    } catch (IOException e) {
158                        return outputDest;
159                    }
160    
161                    return new File(outputDest, fname);
162                } catch (IOException ex) {
163                    throw new RuntimeCamelException(ex);
164                }
165            }
166        },
167    
168        SEQUENCE_FILE {
169            @Override
170            public long append(HdfsOutputStream hdfsostr, Object key, Object value, TypeConverter typeConverter) {
171                try {
172                    Holder<Integer> keySize = new Holder<Integer>();
173                    Writable keyWritable = getWritable(key, typeConverter, keySize);
174                    Holder<Integer> valueSize = new Holder<Integer>();
175                    Writable valueWritable = getWritable(value, typeConverter, valueSize);
176                    Writer writer = (SequenceFile.Writer) hdfsostr.getOut();
177                    writer.append(keyWritable, valueWritable);
178                    writer.sync();
179                    return keySize.value + valueSize.value;
180                } catch (Exception ex) {
181                    throw new RuntimeCamelException(ex);
182                }
183            }
184    
185            @Override
186            public long next(HdfsInputStream hdfsistr, Holder<Object> key, Holder<Object> value) {
187                try {
188                    SequenceFile.Reader reader = (SequenceFile.Reader) hdfsistr.getIn();
189                    Holder<Integer> keySize = new Holder<Integer>();
190                    Writable keyWritable = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), new Configuration());
191                    Holder<Integer> valueSize = new Holder<Integer>();
192                    Writable valueWritable = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), new Configuration());
193                    if (reader.next(keyWritable, valueWritable)) {
194                        key.value = getObject(keyWritable, keySize);
195                        value.value = getObject(valueWritable, valueSize);
196                        return keySize.value + valueSize.value;
197                    } else {
198                        return 0;
199                    }
200                } catch (Exception ex) {
201                    throw new RuntimeCamelException(ex);
202                }
203            }
204    
205            @Override
206            public Closeable createOutputStream(String hdfsPath, HdfsConfiguration configuration) {
207                try {
208                    Closeable rout;
209                    HdfsInfo hdfsInfo = new HdfsInfo(hdfsPath);
210                    Class keyWritableClass = configuration.getKeyType().getWritableClass();
211                    Class valueWritableClass = configuration.getValueType().getWritableClass();
212                    rout = SequenceFile.createWriter(hdfsInfo.getFileSystem(), hdfsInfo.getConf(), hdfsInfo.getPath(), keyWritableClass,
213                            valueWritableClass, configuration.getBufferSize(), configuration.getReplication(), configuration.getBlockSize(),
214                            configuration.getCompressionType(), configuration.getCompressionCodec().getCodec(), new Progressable() {
215                                @Override
216                                public void progress() {
217                                }
218                            }, new SequenceFile.Metadata());
219                    return rout;
220                } catch (IOException ex) {
221                    throw new RuntimeCamelException(ex);
222                }
223            }
224    
225            @Override
226            public Closeable createInputStream(String hdfsPath, HdfsConfiguration configuration) {
227                try {
228                    Closeable rin;
229                    HdfsInfo hdfsInfo = new HdfsInfo(hdfsPath);
230                    rin = new SequenceFile.Reader(hdfsInfo.getFileSystem(), hdfsInfo.getPath(), hdfsInfo.getConf());
231                    return rin;
232                } catch (IOException ex) {
233                    throw new RuntimeCamelException(ex);
234                }
235            }
236        },
237    
238        MAP_FILE {
239            @Override
240            public long append(HdfsOutputStream hdfsostr, Object key, Object value, TypeConverter typeConverter) {
241                try {
242                    Holder<Integer> keySize = new Holder<Integer>();
243                    Writable keyWritable = getWritable(key, typeConverter, keySize);
244                    Holder<Integer> valueSize = new Holder<Integer>();
245                    Writable valueWritable = getWritable(value, typeConverter, valueSize);
246                    ((MapFile.Writer) hdfsostr.getOut()).append((WritableComparable) keyWritable, valueWritable);
247                    return keySize.value + valueSize.value;
248                } catch (Exception ex) {
249                    throw new RuntimeCamelException(ex);
250                }
251            }
252    
253            @Override
254            public long next(HdfsInputStream hdfsistr, Holder<Object> key, Holder<Object> value) {
255                try {
256                    MapFile.Reader reader = (MapFile.Reader) hdfsistr.getIn();
257                    Holder<Integer> keySize = new Holder<Integer>();
258                    WritableComparable keyWritable = (WritableComparable) ReflectionUtils.newInstance(reader.getKeyClass(), new Configuration());
259                    Holder<Integer> valueSize = new Holder<Integer>();
260                    Writable valueWritable = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), new Configuration());
261                    if (reader.next(keyWritable, valueWritable)) {
262                        key.value = getObject(keyWritable, keySize);
263                        value.value = getObject(valueWritable, valueSize);
264                        return keySize.value + valueSize.value;
265                    } else {
266                        return 0;
267                    }
268                } catch (Exception ex) {
269                    throw new RuntimeCamelException(ex);
270                }
271            }
272    
273            @Override
274            @SuppressWarnings("unchecked")
275            public Closeable createOutputStream(String hdfsPath, HdfsConfiguration configuration) {
276                try {
277                    Closeable rout;
278                    HdfsInfo hdfsInfo = new HdfsInfo(hdfsPath);
279                    Class keyWritableClass = configuration.getKeyType().getWritableClass();
280                    Class valueWritableClass = configuration.getValueType().getWritableClass();
281                    rout = new MapFile.Writer(hdfsInfo.getConf(), hdfsInfo.getFileSystem(), hdfsPath, keyWritableClass, valueWritableClass,
282                            configuration.getCompressionType(), configuration.getCompressionCodec().getCodec(), new Progressable() {
283                                @Override
284                                public void progress() {
285                                }
286                            });
287                    return rout;
288                } catch (IOException ex) {
289                    throw new RuntimeCamelException(ex);
290                }
291            }
292    
293            @Override
294            public Closeable createInputStream(String hdfsPath, HdfsConfiguration configuration) {
295                try {
296                    Closeable rin;
297                    HdfsInfo hdfsInfo = new HdfsInfo(hdfsPath);
298                    rin = new MapFile.Reader(hdfsInfo.getFileSystem(), hdfsPath, hdfsInfo.getConf());
299                    return rin;
300                } catch (IOException ex) {
301                    throw new RuntimeCamelException(ex);
302                }
303            }
304        },
305    
306        BLOOMMAP_FILE {
307            @Override
308            public long append(HdfsOutputStream hdfsostr, Object key, Object value, TypeConverter typeConverter) {
309                try {
310                    Holder<Integer> keySize = new Holder<Integer>();
311                    Writable keyWritable = getWritable(key, typeConverter, keySize);
312                    Holder<Integer> valueSize = new Holder<Integer>();
313                    Writable valueWritable = getWritable(value, typeConverter, valueSize);
314                    ((BloomMapFile.Writer) hdfsostr.getOut()).append((WritableComparable) keyWritable, valueWritable);
315                    return keySize.value + valueSize.value;
316                } catch (Exception ex) {
317                    throw new RuntimeCamelException(ex);
318                }
319            }
320    
321            @Override
322            public long next(HdfsInputStream hdfsistr, Holder<Object> key, Holder<Object> value) {
323                try {
324                    MapFile.Reader reader = (BloomMapFile.Reader) hdfsistr.getIn();
325                    Holder<Integer> keySize = new Holder<Integer>();
326                    WritableComparable keyWritable = (WritableComparable) ReflectionUtils.newInstance(reader.getKeyClass(), new Configuration());
327                    Holder<Integer> valueSize = new Holder<Integer>();
328                    Writable valueWritable = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), new Configuration());
329                    if (reader.next(keyWritable, valueWritable)) {
330                        key.value = getObject(keyWritable, keySize);
331                        value.value = getObject(valueWritable, valueSize);
332                        return keySize.value + valueSize.value;
333                    } else {
334                        return 0;
335                    }
336                } catch (Exception ex) {
337                    throw new RuntimeCamelException(ex);
338                }
339            }
340    
341            @Override
342            @SuppressWarnings("unchecked")
343            public Closeable createOutputStream(String hdfsPath, HdfsConfiguration configuration) {
344                try {
345                    Closeable rout;
346                    HdfsInfo hdfsInfo = new HdfsInfo(hdfsPath);
347                    Class keyWritableClass = configuration.getKeyType().getWritableClass();
348                    Class valueWritableClass = configuration.getValueType().getWritableClass();
349                    rout = new BloomMapFile.Writer(hdfsInfo.getConf(), hdfsInfo.getFileSystem(), hdfsPath, keyWritableClass, valueWritableClass,
350                            configuration.getCompressionType(), configuration.getCompressionCodec().getCodec(), new Progressable() {
351                                @Override
352                                public void progress() {
353                                }
354                            });
355                    return rout;
356                } catch (IOException ex) {
357                    throw new RuntimeCamelException(ex);
358                }
359            }
360    
361            @Override
362            public Closeable createInputStream(String hdfsPath, HdfsConfiguration configuration) {
363                try {
364                    Closeable rin;
365                    HdfsInfo hdfsInfo = new HdfsInfo(hdfsPath);
366                    rin = new BloomMapFile.Reader(hdfsInfo.getFileSystem(), hdfsPath, hdfsInfo.getConf());
367                    return rin;
368                } catch (IOException ex) {
369                    throw new RuntimeCamelException(ex);
370                }
371            }
372        },
373    
374        ARRAY_FILE {
375            @Override
376            public long append(HdfsOutputStream hdfsostr, Object key, Object value, TypeConverter typeConverter) {
377                try {
378                    Holder<Integer> valueSize = new Holder<Integer>();
379                    Writable valueWritable = getWritable(value, typeConverter, valueSize);
380                    ((ArrayFile.Writer) hdfsostr.getOut()).append(valueWritable);
381                    return valueSize.value;
382                } catch (Exception ex) {
383                    throw new RuntimeCamelException(ex);
384                }
385            }
386    
387            @Override
388            public long next(HdfsInputStream hdfsistr, Holder<Object> key, Holder<Object> value) {
389                try {
390                    ArrayFile.Reader reader = (ArrayFile.Reader) hdfsistr.getIn();
391                    Holder<Integer> valueSize = new Holder<Integer>();
392                    Writable valueWritable = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), new Configuration());
393                    if (reader.next(valueWritable) != null) {
394                        value.value = getObject(valueWritable, valueSize);
395                        return valueSize.value;
396                    } else {
397                        return 0;
398                    }
399                } catch (Exception ex) {
400                    throw new RuntimeCamelException(ex);
401                }
402            }
403    
404            @Override
405            @SuppressWarnings("unchecked")
406            public Closeable createOutputStream(String hdfsPath, HdfsConfiguration configuration) {
407                try {
408                    Closeable rout;
409                    HdfsInfo hdfsInfo = new HdfsInfo(hdfsPath);
410                    Class valueWritableClass = configuration.getValueType().getWritableClass();
411                    rout = new ArrayFile.Writer(hdfsInfo.getConf(), hdfsInfo.getFileSystem(), hdfsPath, valueWritableClass,
412                            configuration.getCompressionType(), new Progressable() {
413                                @Override
414                                public void progress() {
415                                }
416                            });
417                    return rout;
418                } catch (IOException ex) {
419                    throw new RuntimeCamelException(ex);
420                }
421            }
422    
423            @Override
424            public Closeable createInputStream(String hdfsPath, HdfsConfiguration configuration) {
425                try {
426                    Closeable rin;
427                    HdfsInfo hdfsInfo = new HdfsInfo(hdfsPath);
428                    rin = new ArrayFile.Reader(hdfsInfo.getFileSystem(), hdfsPath, hdfsInfo.getConf());
429                    return rin;
430                } catch (IOException ex) {
431                    throw new RuntimeCamelException(ex);
432                }
433            }
434        };
435    
436        private static final class WritableCache {
437    
438            private static Map<Class, HdfsWritableFactories.HdfsWritableFactory> writables = new HashMap<Class, HdfsWritableFactories.HdfsWritableFactory>();
439            private static Map<Class, HdfsWritableFactories.HdfsWritableFactory> readables = new HashMap<Class, HdfsWritableFactories.HdfsWritableFactory>();
440    
441            private WritableCache() {
442            }
443    
444            static {
445                writables.put(Boolean.class, new HdfsWritableFactories.HdfsBooleanWritableFactory());
446                writables.put(Byte.class, new HdfsWritableFactories.HdfsByteWritableFactory());
447                writables.put(ByteBuffer.class, new HdfsWritableFactories.HdfsBytesWritableFactory());
448                writables.put(Double.class, new HdfsWritableFactories.HdfsDoubleWritableFactory());
449                writables.put(Float.class, new HdfsWritableFactories.HdfsFloatWritableFactory());
450                writables.put(Integer.class, new HdfsWritableFactories.HdfsIntWritableFactory());
451                writables.put(Long.class, new HdfsWritableFactories.HdfsLongWritableFactory());
452                writables.put(String.class, new HdfsWritableFactories.HdfsTextWritableFactory());
453                writables.put(null, new HdfsWritableFactories.HdfsNullWritableFactory());
454            }
455    
456            static {
457                readables.put(BooleanWritable.class, new HdfsWritableFactories.HdfsBooleanWritableFactory());
458                readables.put(ByteWritable.class, new HdfsWritableFactories.HdfsByteWritableFactory());
459                readables.put(BytesWritable.class, new HdfsWritableFactories.HdfsBytesWritableFactory());
460                readables.put(DoubleWritable.class, new HdfsWritableFactories.HdfsDoubleWritableFactory());
461                readables.put(FloatWritable.class, new HdfsWritableFactories.HdfsFloatWritableFactory());
462                readables.put(IntWritable.class, new HdfsWritableFactories.HdfsIntWritableFactory());
463                readables.put(LongWritable.class, new HdfsWritableFactories.HdfsLongWritableFactory());
464                readables.put(Text.class, new HdfsWritableFactories.HdfsTextWritableFactory());
465                readables.put(NullWritable.class, new HdfsWritableFactories.HdfsNullWritableFactory());
466            }
467        }
468    
469        private static Writable getWritable(Object obj, TypeConverter typeConverter, Holder<Integer> size) {
470            Class objCls = obj == null ? null : obj.getClass();
471            HdfsWritableFactories.HdfsWritableFactory objWritableFactory = WritableCache.writables.get(objCls);
472            if (objWritableFactory == null) {
473                objWritableFactory = new HdfsWritableFactories.HdfsObjectWritableFactory();
474            }
475            return objWritableFactory.create(obj, typeConverter, size);
476        }
477    
478        private static Object getObject(Writable writable, Holder<Integer> size) {
479            Class writableClass = NullWritable.class;
480            if (writable != null) {
481                writableClass = writable.getClass();
482            }
483            HdfsWritableFactories.HdfsWritableFactory writableObjectFactory = WritableCache.readables.get(writableClass);
484            return writableObjectFactory.read(writable, size);
485        }
486    
487        public abstract long append(HdfsOutputStream hdfsostr, Object key, Object value, TypeConverter typeConverter);
488    
489        public abstract long next(HdfsInputStream hdfsistr, Holder<Object> key, Holder<Object> value);
490    
491        public abstract Closeable createOutputStream(String hdfsPath, HdfsConfiguration configuration);
492    
493        public abstract Closeable createInputStream(String hdfsPath, HdfsConfiguration configuration);
494    
495        public static long copyBytes(InputStream in, OutputStream out, int buffSize, boolean close) throws IOException {
496            long numBytes = 0;
497            PrintStream ps = out instanceof PrintStream ? (PrintStream) out : null;
498            byte buf[] = new byte[buffSize];
499            try {
500                int bytesRead = in.read(buf);
501                while (bytesRead >= 0) {
502                    out.write(buf, 0, bytesRead);
503                    numBytes += bytesRead;
504                    if ((ps != null) && ps.checkError()) {
505                        throw new IOException("Unable to write to output stream.");
506                    }
507                    bytesRead = in.read(buf);
508                }
509            } finally {
510                if (close) {
511                    IOHelper.close(out);
512                    IOHelper.close(in);
513                }
514            }
515            return numBytes;
516        }
517    }