/*
 * Decompiled with CFR 0.152.
 */
package edu.umd.cloud9.collection.aquaint2;

import edu.umd.cloud9.collection.Indexable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.regex.Pattern;
import org.apache.hadoop.io.WritableUtils;

public class Aquaint2Document
extends Indexable {
    private static Pattern TAGS_PATTERN = Pattern.compile("<[^>]+>");
    private static Pattern WHITESPACE_PATTERN = Pattern.compile("\t|\n");
    public static final String XML_START_TAG = "<DOC ";
    public static final String XML_END_TAG = "</DOC>";
    private String raw;
    private String docid;
    private String headline;
    private String text;

    public void write(DataOutput out) throws IOException {
        byte[] bytes = this.raw.getBytes();
        WritableUtils.writeVInt((DataOutput)out, (int)bytes.length);
        out.write(bytes, 0, bytes.length);
    }

    public void readFields(DataInput in) throws IOException {
        int length = WritableUtils.readVInt((DataInput)in);
        byte[] bytes = new byte[length];
        in.readFully(bytes, 0, length);
        Aquaint2Document.readDocument(this, new String(bytes));
    }

    @Override
    public String getDocid() {
        if (this.docid == null) {
            int start = 9;
            int end = this.raw.indexOf("\"", start);
            this.docid = this.raw.substring(start, end).trim();
        }
        return this.docid;
    }

    public String getHeadline() {
        if (this.headline == null) {
            int start = this.raw.indexOf("<HEADLINE>");
            if (start == -1) {
                this.headline = "";
            } else {
                int end = this.raw.indexOf("</HEADLINE>");
                this.headline = this.raw.substring(start + 10, end).trim();
                this.headline = TAGS_PATTERN.matcher(this.headline).replaceAll("");
                this.headline = WHITESPACE_PATTERN.matcher(this.headline).replaceAll(" ");
            }
        }
        return this.headline;
    }

    @Override
    public String getContent() {
        if (this.text == null) {
            int start = this.raw.indexOf(">");
            if (start == -1) {
                this.text = "";
            } else {
                int end = this.raw.length() - 6;
                this.text = this.raw.substring(start + 1, end).trim();
                this.text = TAGS_PATTERN.matcher(this.text).replaceAll("");
            }
        }
        return this.text;
    }

    public static void readDocument(Aquaint2Document doc, String s) {
        if (s == null) {
            throw new RuntimeException("Error, can't read null string!");
        }
        doc.raw = s;
        doc.docid = null;
        doc.headline = null;
        doc.text = null;
    }
}

