/*
 * Decompiled with CFR 0.152.
 */
package org.textmining.extraction.word;

import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.List;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.LittleEndian;
import org.textmining.extraction.TextExtractor;
import org.textmining.extraction.word.PasswordProtectedException;
import org.textmining.extraction.word.WordTextScrubber;
import org.textmining.extraction.word.model.CHPX;
import org.textmining.extraction.word.model.PieceDescriptor;
import org.textmining.extraction.word.model.TextPiece;

public abstract class WordTextExtractor
implements TextExtractor {
    protected byte[] _header;
    protected boolean _fastSave;
    protected POIFSFileSystem _fsys;

    protected void doFastSaveExtraction(Writer stringWriter, int fcMin, List textPieces, List textRuns, WordTextScrubber scrubber) throws UnsupportedEncodingException, IOException {
        for (int x = 0; x < textPieces.size(); ++x) {
            TextPiece currentPiece = (TextPiece)textPieces.get(x);
            PieceDescriptor pd = currentPiece.getPieceDescriptor();
            int fcStart = pd.getFilePosition();
            int fcEnd = fcStart + (currentPiece.getEnd() - currentPiece.getStart()) * (pd.isUnicode() && this.supportsUnicode() ? 2 : 1);
            for (int y = 0; y < textRuns.size(); ++y) {
                CHPX chpx = (CHPX)textRuns.get(y);
                int chpxStart = chpx.getStart() + fcMin;
                int chpxEnd = chpx.getEnd() + fcMin;
                if (chpxStart < fcStart && chpxEnd <= fcStart || chpxStart >= fcEnd && chpxEnd > fcEnd || this.isDeleted(chpx.getGrpprl())) continue;
                int textStart = Math.max(chpxStart, fcStart);
                int textEnd = Math.min(chpxEnd, fcEnd);
                String str = new String(this._header, textStart, textEnd - textStart, pd.isUnicode() && this.supportsUnicode() ? "UTF-16LE" : "Cp1252");
                scrubber.append(stringWriter, str);
            }
        }
    }

    protected abstract boolean isDeleted(byte[] var1);

    protected boolean supportsUnicode() {
        return false;
    }

    protected void initWordHeader(InputStream in) throws IOException, PasswordProtectedException {
        this._fsys = new POIFSFileSystem(in);
        DocumentEntry headerProps = (DocumentEntry)this._fsys.getRoot().getEntry("WordDocument");
        DocumentInputStream din = this._fsys.createDocumentInputStream("WordDocument");
        this._header = new byte[headerProps.getSize()];
        din.read(this._header);
        din.close();
        this.initOptions();
    }

    protected void initOptions() throws PasswordProtectedException {
        short info = LittleEndian.getShort((byte[])this._header, (int)10);
        boolean bl = this._fastSave = (info & 4) != 0;
        if ((info & 0x100) != 0) {
            throw new PasswordProtectedException("This document is password protected");
        }
    }
}

