/*
 * Decompiled with CFR 0.152.
 */
package info.debatty.java.datasets.reuters;

import info.debatty.java.datasets.reuters.News;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileFilter;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class Dataset
extends info.debatty.java.datasets.Dataset<News> {
    private final String directory;
    static Pattern EXTRACTION_PATTERN = Pattern.compile("<DATE>(.*?)</DATE>.*?<TITLE>(.*?)</TITLE>.*?<BODY>(.*?)</BODY>");

    public Dataset(String reutersDir) {
        this.directory = reutersDir;
    }

    public static ArrayList<News> parseString(String reuters_string) {
        ArrayList<News> reuters_feed = new ArrayList<News>();
        Matcher matcher = EXTRACTION_PATTERN.matcher(reuters_string);
        while (matcher.find()) {
            News reuters = new News();
            reuters.date = matcher.group(1);
            reuters.title = matcher.group(2).replaceAll("&lt;", "<");
            reuters.body = matcher.group(3).replaceAll("&lt;", "<");
            reuters_feed.add(reuters);
        }
        return reuters_feed;
    }

    @Override
    public Iterator<News> iterator() {
        return new ReutersIterator(this.directory);
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    private static class ReutersIterator
    implements Iterator<News> {
        private LinkedList<News> available = new LinkedList();
        private final LinkedList<File> files = new LinkedList();
        BufferedReader file_reader;

        public ReutersIterator(String dir_name) {
            File directory = new File(dir_name);
            this.files.addAll(Arrays.asList(directory.listFiles(new FileFilter(){

                public boolean accept(File file) {
                    return file.getName().endsWith(".sgm");
                }
            })));
            this.openNextFile();
            this.readNextElements();
        }

        @Override
        public boolean hasNext() {
            return !this.available.isEmpty();
        }

        @Override
        public News next() {
            News current = this.available.removeFirst();
            if (this.available.isEmpty()) {
                this.readNextElements();
            }
            return current;
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException("Not supported!");
        }

        private void readNextElements() {
            StringBuilder buffer = new StringBuilder(1024);
            while (true) {
                try {
                    while (true) {
                        String line;
                        if ((line = this.file_reader.readLine()) == null) {
                            this.file_reader.close();
                            if (!this.openNextFile()) {
                                return;
                            }
                            line = this.file_reader.readLine();
                        }
                        if (line.indexOf("</REUTERS") == -1) {
                            buffer.append(line);
                            continue;
                        }
                        this.available.addAll(Dataset.parseString(buffer.toString()));
                        if (!this.available.isEmpty()) break;
                    }
                    return;
                }
                catch (IOException ex) {
                    Logger.getLogger(Dataset.class.getName()).log(Level.SEVERE, null, ex);
                    continue;
                }
                break;
            }
        }

        private boolean openNextFile() {
            if (this.files.isEmpty()) {
                return false;
            }
            try {
                this.file_reader = new BufferedReader(new FileReader(this.files.removeFirst()));
            }
            catch (FileNotFoundException ex) {
                return false;
            }
            return true;
        }
    }
}

