001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.camel.component.hdfs;
018
019 import java.io.IOException;
020 import java.util.concurrent.atomic.AtomicBoolean;
021 import java.util.concurrent.locks.ReadWriteLock;
022 import java.util.concurrent.locks.ReentrantReadWriteLock;
023
024 import javax.xml.ws.Holder;
025
026 import org.apache.camel.Exchange;
027 import org.apache.camel.Message;
028 import org.apache.camel.Processor;
029 import org.apache.camel.impl.DefaultEndpoint;
030 import org.apache.camel.impl.DefaultMessage;
031 import org.apache.camel.impl.ScheduledPollConsumer;
032 import org.apache.commons.lang.StringUtils;
033 import org.apache.hadoop.fs.FileStatus;
034 import org.apache.hadoop.fs.Path;
035 import org.apache.hadoop.fs.PathFilter;
036
037 public final class HdfsConsumer extends ScheduledPollConsumer {
038
039 private final HdfsConfiguration config;
040 private final StringBuilder hdfsPath;
041 private final Processor processor;
042 private AtomicBoolean idle = new AtomicBoolean(false);
043 private final ReadWriteLock rwlock = new ReentrantReadWriteLock();
044 private HdfsInputStream istream;
045
046 public HdfsConsumer(DefaultEndpoint endpoint, Processor processor, HdfsConfiguration config) {
047 super(endpoint, processor);
048 this.config = config;
049 this.hdfsPath = config.getFileSystemType().getHdfsPath(config);
050 this.processor = processor;
051 }
052
053 @Override
054 protected void doStart() throws Exception {
055 super.setInitialDelay(config.getInitialDelay());
056 super.setDelay(config.getDelay());
057 super.setUseFixedDelay(false);
058 super.doStart();
059 }
060
061 @Override
062 protected int poll() throws Exception {
063 class ExcludePathFilter implements PathFilter {
064 public boolean accept(Path path) {
065 return !(path.toString().endsWith(config.getOpenedSuffix()) || path.toString().endsWith(config.getReadSuffix()));
066 }
067 }
068
069 int numMessages = 0;
070
071 HdfsInfo info = new HdfsInfo(this.hdfsPath.toString());
072 FileStatus fileStatuses[];
073 if (info.getFileSystem().isFile(info.getPath())) {
074 fileStatuses = info.getFileSystem().globStatus(info.getPath());
075 } else {
076 Path pattern = info.getPath().suffix("/" + this.config.getPattern());
077 fileStatuses = info.getFileSystem().globStatus(pattern, new ExcludePathFilter());
078 }
079
080 if (fileStatuses.length > 0) {
081 this.idle.set(false);
082 }
083
084 for (int i = 0; i < fileStatuses.length; ++i) {
085 FileStatus status = fileStatuses[i];
086 if (normalFileIsDirectoryNoSuccessFile(status, info)) {
087 continue;
088 }
089 try {
090 this.rwlock.writeLock().lock();
091 this.istream = HdfsInputStream.createInputStream(fileStatuses[i].getPath().toString(), this.config);
092 } finally {
093 this.rwlock.writeLock().unlock();
094 }
095
096 Holder<Object> key = new Holder<Object>();
097 Holder<Object> value = new Holder<Object>();
098 while (this.istream.next(key, value) != 0) {
099 Exchange exchange = this.getEndpoint().createExchange();
100 Message message = new DefaultMessage();
101 message.setHeader(Exchange.FILE_NAME, StringUtils
102 .substringAfterLast(status.getPath().toString(), "/"));
103 if (key.value != null) {
104 message.setHeader(HdfsHeader.KEY.name(), key.value);
105 }
106 message.setBody(value.value);
107 exchange.setIn(message);
108 this.processor.process(exchange);
109 numMessages++;
110 }
111 this.istream.close();
112 }
113 this.idle.set(true);
114 return numMessages;
115 }
116
117 private boolean normalFileIsDirectoryNoSuccessFile(FileStatus status, HdfsInfo info) throws IOException {
118 if (config.getFileType().equals(HdfsFileType.NORMAL_FILE) && status.isDir()) {
119 Path successPath = new Path(status.getPath().toString() + "/_SUCCESS");
120 if (!info.getFileSystem().exists(successPath)) {
121 return true;
122 }
123 }
124 return false;
125 }
126
127 public HdfsInputStream getIstream() {
128 try {
129 rwlock.readLock().lock();
130 return istream;
131 } finally {
132 rwlock.readLock().unlock();
133 }
134 }
135
136 public AtomicBoolean isIdle() {
137 return idle;
138 }
139
140 }