package com.github.axet.wget;

import java.io.IOException;
import java.net.URLDecoder;
import java.nio.charset.Charset;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;

public class ApacheIndex extends HashMap<String, ApacheIndex.Index> {
    public static SimpleDateFormat APACHE_DATE = new SimpleDateFormat("d-MMM-yyyy HH:mm");
    public static Pattern DATE_SIZE = Pattern.compile("(([0-9][0-9])-([a-zA-Z][a-zA-Z][a-zA-Z])-([0-9][0-9][0-9][0-9]) ([0-9][0-9]:[0-9][0-9]))[ ]+([0-9]+)");

    public long now;

    public static class Index {
        public String name;
        public Date date;
        public long size;

        public Index(String n, String d, String s) {
            try {
                name = n;
                date = APACHE_DATE.parse(d);
                size = Long.valueOf(s);
            } catch (ParseException e) {
                throw new RuntimeException(e);
            }
        }
    }

    public ApacheIndex(String url) throws IOException {
        Document doc = Jsoup.connect(url).get();
        Elements aa = doc.select("a");
        for (Element a : aa) {
            Node n = a.nextSibling();
            String d = n.toString();
            Matcher m = DATE_SIZE.matcher(d);
            if (m.find()) {
                Index i = new Index(a.text(), m.group(1), m.group(6));
                String u = a.attr("href");
                u = URLDecoder.decode(u, Charset.defaultCharset().name());
                put(u, i);
            }
        }
        now = System.currentTimeMillis();
    }
}
