package com.github.axet.wget;

import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;

public class ApacheIndex extends HashMap<String, ApacheIndex.Index> {
    public static SimpleDateFormat APACHE_DATE = new SimpleDateFormat("d-MMM-yyyy HH:mm");
    public static Pattern DATE_SIZE = Pattern.compile("(([0-9][0-9])-([a-zA-Z][a-zA-Z][a-zA-Z])-([0-9][0-9][0-9][0-9]) ([0-9][0-9]:[0-9][0-9]))[ ]+([0-9]+)");

    public long now;

    public static class Index {
        public String name;
        public Date date;
        public long size;

        public Index(String n, Date d, long s) {
            name = n;
            date = d;
            size = s;
        }
    }

    public ApacheIndex(String url) throws IOException {
        Document doc = Jsoup.connect(url).get();
        Elements aa = doc.select("a");
        for (Element a : aa) {
            Node n = a.nextSibling();
            String t = n.toString();
            Matcher m = DATE_SIZE.matcher(t);
            if (m.find()) {
                Date d = null;
                long s = 0;
                try {
                    d = APACHE_DATE.parse(m.group(1));
                } catch (ParseException e) {
                }
                try {
                    s = Long.valueOf(m.group(6));
                } catch (NumberFormatException e) {
                }
                put(a.attr("href"), new Index(a.text(), d, s));
            }
        }
        now = System.currentTimeMillis();
    }
}
