001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.camel.component.hdfs;
018    
019    import java.io.IOException;
020    import java.util.concurrent.atomic.AtomicBoolean;
021    import java.util.concurrent.locks.ReadWriteLock;
022    import java.util.concurrent.locks.ReentrantReadWriteLock;
023    
024    import javax.xml.ws.Holder;
025    
026    import org.apache.camel.Exchange;
027    import org.apache.camel.Message;
028    import org.apache.camel.Processor;
029    import org.apache.camel.impl.DefaultEndpoint;
030    import org.apache.camel.impl.DefaultMessage;
031    import org.apache.camel.impl.ScheduledPollConsumer;
032    import org.apache.commons.lang.StringUtils;
033    import org.apache.hadoop.fs.FileStatus;
034    import org.apache.hadoop.fs.Path;
035    import org.apache.hadoop.fs.PathFilter;
036    
037    public final class HdfsConsumer extends ScheduledPollConsumer {
038    
039        private final HdfsConfiguration config;
040        private final StringBuilder hdfsPath;
041        private final Processor processor;
042        private AtomicBoolean idle = new AtomicBoolean(false);
043        private final ReadWriteLock rwlock = new ReentrantReadWriteLock();
044        private HdfsInputStream istream;
045    
046        public HdfsConsumer(DefaultEndpoint endpoint, Processor processor, HdfsConfiguration config) {
047            super(endpoint, processor);
048            this.config = config;
049            this.hdfsPath = config.getFileSystemType().getHdfsPath(config);
050            this.processor = processor;
051        }
052    
053        @Override
054        protected void doStart() throws Exception {
055            super.setInitialDelay(config.getInitialDelay());
056            super.setDelay(config.getDelay());
057            super.setUseFixedDelay(false);
058            super.doStart();
059        }
060    
061        @Override
062        protected int poll() throws Exception {
063            class ExcludePathFilter implements PathFilter {
064                public boolean accept(Path path) {
065                    return !(path.toString().endsWith(config.getOpenedSuffix()) || path.toString().endsWith(config.getReadSuffix()));
066                }
067            }
068    
069            int numMessages = 0;
070    
071            HdfsInfo info = new HdfsInfo(this.hdfsPath.toString());
072            FileStatus fileStatuses[];
073            if (info.getFileSystem().isFile(info.getPath())) {
074                fileStatuses = info.getFileSystem().globStatus(info.getPath());
075            } else {
076                Path pattern = info.getPath().suffix("/" + this.config.getPattern());
077                fileStatuses = info.getFileSystem().globStatus(pattern, new ExcludePathFilter());
078            }
079    
080            if (fileStatuses.length > 0) {
081                this.idle.set(false);
082            }
083    
084            for (int i = 0; i < fileStatuses.length; ++i) {
085                FileStatus status = fileStatuses[i];
086                if (normalFileIsDirectoryNoSuccessFile(status, info)) {
087                    continue;
088                }
089                try {
090                    this.rwlock.writeLock().lock();
091                    this.istream = HdfsInputStream.createInputStream(fileStatuses[i].getPath().toString(), this.config);
092                } finally {
093                    this.rwlock.writeLock().unlock();
094                }
095    
096                Holder<Object> key = new Holder<Object>();
097                Holder<Object> value = new Holder<Object>();
098                while (this.istream.next(key, value) != 0) {
099                    Exchange exchange = this.getEndpoint().createExchange();
100                    Message message = new DefaultMessage();
101                    message.setHeader(Exchange.FILE_NAME, StringUtils
102                            .substringAfterLast(status.getPath().toString(), "/"));
103                    if (key.value != null) {
104                        message.setHeader(HdfsHeader.KEY.name(), key.value);
105                    }
106                    message.setBody(value.value);
107                    exchange.setIn(message);
108                    this.processor.process(exchange);
109                    numMessages++;
110                }
111                this.istream.close();
112            }
113            this.idle.set(true);
114            return numMessages;
115        }
116    
117        private boolean normalFileIsDirectoryNoSuccessFile(FileStatus status, HdfsInfo info) throws IOException {
118            if (config.getFileType().equals(HdfsFileType.NORMAL_FILE) && status.isDir()) {
119                Path successPath = new Path(status.getPath().toString() + "/_SUCCESS");
120                if (!info.getFileSystem().exists(successPath)) {
121                    return true;
122                }
123            }
124            return false;
125        }
126    
127        public HdfsInputStream getIstream() {
128            try {
129                rwlock.readLock().lock();
130                return istream;
131            } finally {
132                rwlock.readLock().unlock();
133            }
134        }
135    
136        public AtomicBoolean isIdle() {
137            return idle;
138        }
139    
140    }