/*
 * StructModel.java
 *
 * Created on October 18, 2005, 2:36 PM
 *
 * To change this template, choose Tools | Options and locate the template under
 * the Source Creation and Management node. Right-click the template and choose
 * Open. You can then make changes to the template in the Source Editor.
 */

package findstruct;
import java.util.*;
import java.util.regex.*;
import org.jdom.*;

/**Model of a document structure.  This represents the information drawn from
 *an XML file, from which it is constructed.
 *
 * @author Administrator
 */
public class StructModel {
    SM topPart = null;
    Pattern itemPat, itemEndPat, itemEnd2Pat;
    StructOptions rootStructOptions = null;
    
    /** Creates a new instance of StructModel */
    public StructModel(org.jdom.Document doc) {
        Element root = doc.getRootElement();
        rootStructOptions = new StructOptions(root);
        itemPat = rootStructOptions.createListPattern();
        itemEndPat = Pattern.compile("$", Pattern.MULTILINE);
        itemEnd2Pat = Pattern.compile("$^$", Pattern.MULTILINE);
        topPart = new SM((Element)root.getChildren().get(0), this, rootStructOptions);
        //System.out.println("Finished creating StructModel");
    }
    
    public Element process(String s) {
        if (topPart!=null) return topPart.process(s,0,s.length());
        else return null;
    }
    
    ArrayList<LFR> findItemMarkers(String s) {
        ArrayList<LFR> r = new ArrayList<LFR>();
        int progress = 0;
        Matcher m = itemPat.matcher(s);
        //Matcher mLine = itemEndPat.matcher(s);
        //Matcher mLine2 = itemEnd2Pat.matcher(s);
        while (m.find()) {
            try {
                Integer i = new Integer(m.group(2));
                r.add(new LFR(m.start(), m.end(), i,
                        m.group(1), m.group(3)));
            } catch (NumberFormatException nfe) {}
        }
        return r;
    }
    
    public String toString() {
        StringBuffer sb = new StringBuffer("<StructModel");
        sb.append(" prefix=\""+rootStructOptions.prefix+"\"");
        sb.append(" suffix=\""+rootStructOptions.suffix+"\"");
        sb.append(" flags="+rootStructOptions.patternFlags);
        sb.append(">");
        return sb.toString();
    }
    
    class LFR implements Comparable {
        Integer num;
        int start;
        int end;
        String pre;
        String post;
        
        /** Creates a new instance of LFR */
        LFR(int start, int end, Integer num, String pre, String post) {
            this.num=num;
            this.start=start;
            this.end=end;
            this.pre=pre;
            this.post=post;
        }
        
        public int compareTo(Object o) {
            if (!(o instanceof LFR)) return -1;
            LFR other = (LFR)o;
            if (start<other.start) return -1;
            if (start>other.start) return 1;
            if (end <other.end) return -1;
            if (end >other.end) return 1;
            return 0;
        }
        
        public String toString() {
            StringBuffer sb = new StringBuffer("<LFR ");
            sb.append(num);
            sb.append(": ");
            sb.append(start);
            sb.append("-");
            sb.append(end);
            sb.append("[");
            sb.append(pre);
            sb.append("][");
            sb.append(post);
            sb.append("]>");
            return sb.toString();
        }
    }
    
    class SM {
        ArrayList<Pattern> beg = new ArrayList<Pattern>();
        String name = null;
        //Pattern end = null;
        Pattern pattern = null;
        String [] bindings = null;
        StructOptions opts;
        //boolean repeat = false;
        
        ArrayList<SM> parts = null;
        StructModel context = null;
        
        SM(org.jdom.Element e, StructModel m, StructOptions defaultOpts) {
            context = m;
            name = e.getName();
            opts = new StructOptions(e, defaultOpts);
            beg.add(opts.createSearchPattern(name));
            String patternString = e.getAttributeValue("pattern");
            if (patternString!=null)
                pattern = Pattern.compile(patternString, opts.patternFlags);
            String attrsString = e.getAttributeValue("attrs");
            if (attrsString!=null) bindings = U.split(attrsString, ",");
            String altString = e.getAttributeValue("alt");
            if (altString!=null) {
                String[] attrs = U.split(altString, ",");
                for (String a: attrs) {
                    beg.add(opts.createSearchPattern(a));
                }
            }
            List<Element> ch = (List<Element>)e.getChildren();
            if (ch.size()>0) parts = new ArrayList<SM>(ch.size());
            for (Object po: e.getChildren()) {
                Element se = (Element)po;
                parts.add(new SM(se, m, opts));
            }
            //System.out.println(this);
        }
        
        boolean isPattern() {
            return pattern!=null;
        }
        
        public String toString() {
            StringBuffer sb = new StringBuffer("<SM ");
            sb.append(name);
            if (pattern!=null) {
                sb.append(" pattern=\"");
                sb.append(pattern);
                sb.append("\"");
            }
            for (Pattern p: beg) {
                sb.append(" beg=\"");
                sb.append(p);
                sb.append("\"");
            }
            if (parts!=null) {
                sb.append(" #");
                sb.append(parts.size());
            }
            sb.append(">");
            return sb.toString();
        }
        
        /**
         * Matches the substructure of this SM to the input s between start and
         * end.
         * @param s String Input
         * @param start int first position
         * @param end int beyond last position
         * @return Element representing the corresponding content
         */
        
        Element process(String s, int start, int end) {
            //System.out.println("Try to process "+name+": "+start+"-"+end);
            Element result = new Element(name);
            TreeSet<Match> matches = new TreeSet<Match>();
            if (parts!=null) {
                for (SM ssm: parts) {
                    for (Match m: ssm.tryMatch(s, start, end)) {
                        matches.add(m);
                    }
                }
            }
            // Here we've matched all subparts, sorted in matches
            Iterator<Match> it = matches.iterator();
            Match next = null;
            if (it.hasNext()) next = it.next();
            else {
                // No substructure matched. It's all text.
                addText(result,s,start,end);
                //result.addContent(s.substring(start, end).trim());
                return result;
            }
            Match curr = null;
            int used = start;
            do {
                curr = next;
                next = (it.hasNext())? it.next():null;
                if (used<curr.start) {
                    addText(result,s,used,curr.start);
                    //result.addContent(s.substring(used, curr.start).trim());
                    used = curr.start;
                } else if (used>curr.start) {
                    System.err.println("Overlapped matches among parts of "
                            +name+": "+used+" vs. "+curr.start);
                }
                SM mod = curr.matched;
                if (curr.matched.isPattern()) {
                    Element patElt = new Element(mod.name);
                    int nb = curr.numBindings();
                    if (nb<0) patElt.addContent(curr.vals.get(0));
                    else for (int i=0; i<nb; i++) {
                        patElt.addContent(curr.bindingToXML(i));
                    }
                    result.addContent(patElt);
                    used = curr.end;
                } else { // Start element, not pattern
                    used = curr.end;
                    int thisEnd = (next==null)? end: next.start;
                    result.addContent(mod.process(s, used, thisEnd));
                    used = thisEnd;
                }
            } while (next!=null);
            if (used<end) {
                addText(result,s,used,end);
                //result.addContent(s.substring(used, end).trim());
            }
            return result;
        }
        
        ArrayList<Match> tryMatch(String s, int start, int end) {
            ArrayList<Match> alm = new ArrayList<Match>();
            if (isPattern()) {
                //System.out.println("Try pmatch pattern \""+pattern+"\" at "+start+"-"+end);
                Matcher m = pattern.matcher(s);
                m.region(start, end);
                while (m.find()) {
                    //System.out.println("Matched "+m.start()+"-"+m.end());
                    Match mtch = new Match(m.start(), m.end(), this);
                    if (bindings!=null) {
                        int gc = Math.min(m.groupCount(), bindings.length);
                        for (int i=1; i<=gc; i++) {
                            mtch.bind(bindings[i-1], m.group(i));
                        }
                    } else if (m.groupCount()>0) { // no explicit bindings; save group 1
                        mtch.bind(null,m.group(1));
                    }
                    alm.add(mtch);
                }
            } else {
                for (Pattern p: beg) {
                    //System.out.println("Try match \""+p+"\" at "+start+"-"+end);
                    Matcher m = p.matcher(s);
                    m.region(start, end);
                    // Find it only once: PE: ... Other PE: problem!
                    if (m.find()) {
                        //System.out.println("Matched "+m.start()+"-"+m.end());
                        Match mtch = new Match(m.start(), m.end(), this);
                        alm.add(mtch);
                    }
                }
            }
            return alm;
        }
        
        void addText(Element elt, String s, int start, int end) {
            String content = s.substring(start,end);
            if (!opts.findLists) elt.addContent(content.trim());
            else {
                ArrayList<LFR> r = findItemMarkers(content);
            /*
            if (r.size()==0) {
                elt.addContent(content.trim());
                return;
            } */
                //for (LFR l: r) System.out.println(l);
                LFR last = null, curr = null;
                Iterator<LFR> it = r.iterator();
                int nextItemNum = 1;
                int used = 0; // text to here has been added
                Element currList = null;
                while (curr!=null || it.hasNext()) {
                    last = curr;
                    curr = (it.hasNext())? it.next(): null;
                    //System.out.println("Iterating on last={"+last+"}, curr={"+curr+"}");
                    if (curr!=null && curr.num!=nextItemNum && curr.num!=1) {
                        // this is an error; just skip
                        System.err.println("Mistaken number " + curr.num + " should be " + nextItemNum);
                        curr = last;
                        continue;
                    }
                    if (last!=null) {
                        // We add an <item> to the open currList
                        Element item = new Element("item");
                        item.setAttribute("number", last.num.toString());
                        currList.addContent(item);
                    /* Figure out the end of the last item. If curr starts a new
                     *list at 1 or is at the end, we seek a blank line break to
                     *separate the last item from top-level text that goes into
                     *elt.
                     */
                        int itemEnd = content.length();
                        if (curr!=null) itemEnd = curr.start;
                        int itemBreak = itemEnd;
                        if (curr==null || curr.num==1) {
                            currList = null;
                            Matcher mLineBreak = itemEnd2Pat.matcher(content);
                            if (mLineBreak.find(last.end)) {
                                //System.out.println("Found blank line at "+mLineBreak.start()+", itemEnd was "+itemEnd);
                                itemBreak = Math.min(itemEnd, mLineBreak.start());
                            }
                        }
                        //System.out.println("Last!=null, adding item "+last.num
                        //        +" ["+content.substring(last.end,itemBreak)+"] last="+itemBreak);
                        item.addContent(content.substring(last.end, itemBreak));
                        used = itemBreak;
                        if (itemBreak<itemEnd) {
                            //System.out.println("Extra text added at top: ["+content.substring(itemBreak,itemEnd)+"] used="+itemEnd);
                            elt.addContent(content.substring(itemBreak,itemEnd));
                            used = itemEnd;
                        }
                    } // last != null
                    else {
                        // last==null; add text to here
                        elt.addContent(content.substring(used, curr.start));
                        //System.out.println("last==null, text added ["+content.substring(used, curr.start)+"] used="+curr.start);
                        used = curr.start;
                    }
                    if (curr!=null && curr.num==1) {
                        // start of a new list
                        currList = new Element("list");
                        elt.addContent(currList);
                        nextItemNum = 2;
                    } else nextItemNum++;
                }
                if (used<content.length()) {
                    elt.addContent(content.substring(used));
                    //System.out.println("Text added at end ["+content.substring(used)+"]");
                }
            }
        }
    }
    
    class Match implements Comparable {
        int start=0;
        int end=0;
        SM matched;
        ArrayList<String> keys = null;
        ArrayList<String> vals = new ArrayList<String>();
        //HashMap<String,String> bindings = null;
        Match(int s, int e, SM modelMatched) {
            start = s;
            end = e;
            matched = modelMatched;
            
        }
        
        void bind(String key, String val) {
            if (keys==null) {
                keys = new ArrayList<String>();
                vals = new ArrayList<String>();
            }
            keys.add(key);
            vals.add(val);
        }
        
        int numBindings() {
            if (keys==null) return 0;
            if (keys.get(0)==null) return -1;
            return keys.size();
        }
        
        Element bindingToXML(int i) {
            Element e = new Element(keys.get(i));
            e.addContent(vals.get(i));
            return e;
        }
        
        public int compareTo(Object o) {
            if (o instanceof Match) {
                Match other = (Match) o;
                if (this.start<other.start) return -1;
                if (this.start>other.start) return 1;
                if (this.end<other.end) return -1;
                if (this.end>other.end) return 1;
                return 0;
            } else {
                return -1; // Matches sort before anything else
            }
        }
    }
    
    class StructOptions {
        String prefix = "";
        String suffix = "";
        boolean findLists = false;
        String listPrefix = "\\s|^";
        String listPat = "\\d+";
        String listSuffix = "[):\\.]\\s";
        int patternFlags = 0;
        
        private StructOptions() {
        }
        
        private StructOptions(Element e) {
            this(e, null);
        }
        
        private StructOptions(Element e, StructOptions so) {
            this();
            if (so!=null) copyFrom(so);
            String v;
            v = e.getAttributeValue("prefix");
            if (v!=null) prefix = v;
            v = e.getAttributeValue("suffix");
            if (v!=null) suffix = v;
            v = e.getAttributeValue("case_insensitive");
            if (v==null) ;
            else if (v.equalsIgnoreCase("yes"))
                patternFlags |= Pattern.CASE_INSENSITIVE;
            else patternFlags &= ~Pattern.CASE_INSENSITIVE;
            v = e.getAttributeValue("canon_eq");
            if (v==null);
            else if (v.equalsIgnoreCase("yes")) patternFlags |= Pattern.CANON_EQ;
            else patternFlags &= ~Pattern.CANON_EQ;
            v = e.getAttributeValue("comments");
            if (v==null);
            else if (v.equalsIgnoreCase("yes")) patternFlags |= Pattern.COMMENTS;
            else patternFlags &= ~Pattern.COMMENTS;
            v = e.getAttributeValue("dotall");
            if (v==null);
            else if (v.equalsIgnoreCase("yes")) patternFlags |= Pattern.DOTALL;
            else patternFlags &= ~Pattern.DOTALL;
            v = e.getAttributeValue("multiline");
            if (v==null);
            else if (v.equalsIgnoreCase("yes")) patternFlags |= Pattern.MULTILINE;
            else patternFlags &= ~Pattern.MULTILINE;
            v = e.getAttributeValue("unix_lines");
            if (v==null);
            else if (v.equalsIgnoreCase("yes")) patternFlags |= Pattern.UNIX_LINES;
            else patternFlags &= ~Pattern.UNIX_LINES;
            v = e.getAttributeValue("lists");
            if (v==null);
            else if (v.equalsIgnoreCase("yes")) findLists = true;
            else findLists = false;
            v = e.getAttributeValue("listPat");
            if (v!=null) listPat = v;
            v = e.getAttributeValue("listPrefix");
            if (v!=null) listPrefix = v;
            v = e.getAttributeValue("listSuffix");
            if (v!=null) listSuffix = v;
        }
        
        void copyFrom(StructOptions so) {
            prefix = so.prefix;
            suffix = so.suffix;
            findLists = so.findLists;
            listPrefix = so.listPrefix;
            listPat = so.listPat;
            listSuffix = so.listSuffix;
            patternFlags = so.patternFlags;
        }
        
        private Pattern createSearchPattern(String name) {
            String pat = name.replaceAll("_", "\\\\s+");
            return Pattern.compile(prefix+pat+suffix, patternFlags);
        }
        
        private Pattern createListPattern() {
            return Pattern.compile("("+listPrefix+")("+listPat+")("+listSuffix+")",
                    Pattern.MULTILINE);
        }
    }
    
}
