/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tika.parser.rtf;

import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.util.Calendar;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Locale;
import java.util.Map;
import java.util.TimeZone;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.OfficeOpenXMLCore;
import org.apache.tika.metadata.OfficeOpenXMLExtended;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.rtf.GroupState;
import org.apache.tika.parser.rtf.ListDescriptor;
import org.apache.tika.parser.rtf.RTFEmbObjHandler;
import org.apache.tika.sax.XHTMLContentHandler;
import org.apache.tika.utils.CharsetUtils;
import org.xml.sax.SAXException;

final class TextExtractor {
    private static final Charset ASCII = Charset.forName("US-ASCII");
    private static final Charset WINDOWS_1252 = TextExtractor.getCharset("WINDOWS-1252");
    private static final Charset MAC_ROMAN = TextExtractor.getCharset("MacRoman");
    private static final Charset SHIFT_JIS = TextExtractor.getCharset("Shift_JIS");
    private static final Charset WINDOWS_57011 = TextExtractor.getCharset("windows-57011");
    private static final Charset WINDOWS_57010 = TextExtractor.getCharset("windows-57010");
    private static final Charset WINDOWS_57009 = TextExtractor.getCharset("windows-57009");
    private static final Charset WINDOWS_57008 = TextExtractor.getCharset("windows-57008");
    private static final Charset WINDOWS_57007 = TextExtractor.getCharset("windows-57007");
    private static final Charset WINDOWS_57006 = TextExtractor.getCharset("windows-57006");
    private static final Charset WINDOWS_57005 = TextExtractor.getCharset("windows-57005");
    private static final Charset WINDOWS_57004 = TextExtractor.getCharset("windows-57004");
    private static final Charset WINDOWS_57003 = TextExtractor.getCharset("windows-57003");
    private static final Charset X_ISCII91 = TextExtractor.getCharset("x-ISCII91");
    private static final Charset X_MAC_CENTRAL_EUROPE = TextExtractor.getCharset("x-MacCentralEurope");
    private static final Charset MAC_CYRILLIC = TextExtractor.getCharset("MacCyrillic");
    private static final Charset X_JOHAB = TextExtractor.getCharset("x-Johab");
    private static final Charset CP12582 = TextExtractor.getCharset("CP1258");
    private static final Charset CP12572 = TextExtractor.getCharset("CP1257");
    private static final Charset CP12562 = TextExtractor.getCharset("CP1256");
    private static final Charset CP12552 = TextExtractor.getCharset("CP1255");
    private static final Charset CP12542 = TextExtractor.getCharset("CP1254");
    private static final Charset CP12532 = TextExtractor.getCharset("CP1253");
    private static final Charset CP1252 = TextExtractor.getCharset("CP1252");
    private static final Charset CP12512 = TextExtractor.getCharset("CP1251");
    private static final Charset CP12502 = TextExtractor.getCharset("CP1250");
    private static final Charset CP950 = TextExtractor.getCharset("CP950");
    private static final Charset CP949 = TextExtractor.getCharset("CP949");
    private static final Charset MS9362 = TextExtractor.getCharset("MS936");
    private static final Charset MS8742 = TextExtractor.getCharset("MS874");
    private static final Charset CP866 = TextExtractor.getCharset("CP866");
    private static final Charset CP865 = TextExtractor.getCharset("CP865");
    private static final Charset CP864 = TextExtractor.getCharset("CP864");
    private static final Charset CP863 = TextExtractor.getCharset("CP863");
    private static final Charset CP862 = TextExtractor.getCharset("CP862");
    private static final Charset CP860 = TextExtractor.getCharset("CP860");
    private static final Charset CP852 = TextExtractor.getCharset("CP852");
    private static final Charset CP8502 = TextExtractor.getCharset("CP850");
    private static final Charset CP819 = TextExtractor.getCharset("CP819");
    private static final Charset WINDOWS_720 = TextExtractor.getCharset("windows-720");
    private static final Charset WINDOWS_711 = TextExtractor.getCharset("windows-711");
    private static final Charset WINDOWS_710 = TextExtractor.getCharset("windows-710");
    private static final Charset WINDOWS_709 = TextExtractor.getCharset("windows-709");
    private static final Charset ISO_8859_6 = TextExtractor.getCharset("ISO-8859-6");
    private static final Charset CP4372 = TextExtractor.getCharset("CP437");
    private static final Charset CP850 = TextExtractor.getCharset("cp850");
    private static final Charset CP437 = TextExtractor.getCharset("cp437");
    private static final Charset MS874 = TextExtractor.getCharset("ms874");
    private static final Charset CP1257 = TextExtractor.getCharset("cp1257");
    private static final Charset CP1256 = TextExtractor.getCharset("cp1256");
    private static final Charset CP1255 = TextExtractor.getCharset("cp1255");
    private static final Charset CP1258 = TextExtractor.getCharset("cp1258");
    private static final Charset CP1254 = TextExtractor.getCharset("cp1254");
    private static final Charset CP1253 = TextExtractor.getCharset("cp1253");
    private static final Charset MS950 = TextExtractor.getCharset("ms950");
    private static final Charset MS936 = TextExtractor.getCharset("ms936");
    private static final Charset MS1361 = TextExtractor.getCharset("ms1361");
    private static final Charset MS932 = TextExtractor.getCharset("MS932");
    private static final Charset CP1251 = TextExtractor.getCharset("cp1251");
    private static final Charset CP1250 = TextExtractor.getCharset("cp1250");
    private static final Charset MAC_THAI = TextExtractor.getCharset("MacThai");
    private static final Charset MAC_TURKISH = TextExtractor.getCharset("MacTurkish");
    private static final Charset MAC_GREEK = TextExtractor.getCharset("MacGreek");
    private static final Charset MAC_ARABIC = TextExtractor.getCharset("MacArabic");
    private static final Charset MAC_HEBREW = TextExtractor.getCharset("MacHebrew");
    private static final Charset JOHAB = TextExtractor.getCharset("johab");
    private static final Charset BIG5 = TextExtractor.getCharset("Big5");
    private static final Charset GB2312 = TextExtractor.getCharset("GB2312");
    private static final Charset MS949 = TextExtractor.getCharset("ms949");
    private int written = 0;
    private byte[] pendingBytes = new byte[16];
    private int pendingByteCount;
    private ByteBuffer pendingByteBuffer = ByteBuffer.wrap(this.pendingBytes);
    private char[] pendingChars = new char[10];
    private int pendingCharCount;
    private byte[] pendingControl = new byte[10];
    private int pendingControlCount;
    private final char[] outputArray = new char[128];
    private final CharBuffer outputBuffer = CharBuffer.wrap(this.outputArray);
    private CharsetDecoder decoder;
    private Charset lastCharset;
    private Charset globalCharset = WINDOWS_1252;
    private int globalDefaultFont = -1;
    private int curFontID = -1;
    private final Map<Integer, Charset> fontToCharset = new HashMap<Integer, Charset>();
    private final LinkedList<GroupState> groupStates = new LinkedList();
    private GroupState groupState = new GroupState();
    private boolean inHeader = true;
    private int fontTableState;
    private int fontTableDepth;
    private Property nextMetaData;
    private boolean inParagraph;
    private int fieldState;
    private int pendingListEnd;
    private Map<Integer, ListDescriptor> listTable = new HashMap<Integer, ListDescriptor>();
    private Map<Integer, ListDescriptor> listOverrideTable = new HashMap<Integer, ListDescriptor>();
    private Map<Integer, ListDescriptor> currentListTable;
    private ListDescriptor currentList;
    private int listTableLevel = -1;
    private boolean ignoreLists;
    private String pendingURL;
    private final StringBuilder pendingBuffer = new StringBuilder();
    private int uprState = -1;
    private final XHTMLContentHandler out;
    private final Metadata metadata;
    private final RTFEmbObjHandler embObjHandler;
    private int year;
    private int month;
    private int day;
    private int hour;
    private int minute;
    int ansiSkip = 0;
    private static final Map<Integer, Charset> FCHARSET_MAP = new HashMap<Integer, Charset>();
    private static final Map<Integer, Charset> ANSICPG_MAP;

    private static Charset getCharset(String name) {
        try {
            return CharsetUtils.forName(name);
        }
        catch (Exception e) {
            return ASCII;
        }
    }

    public TextExtractor(XHTMLContentHandler out, Metadata metadata, RTFEmbObjHandler embObjHandler) {
        this.metadata = metadata;
        this.out = out;
        this.embObjHandler = embObjHandler;
    }

    public boolean isIgnoringLists() {
        return this.ignoreLists;
    }

    public void setIgnoreLists(boolean ignore) {
        this.ignoreLists = ignore;
    }

    protected static boolean isHexChar(int ch) {
        return ch >= 48 && ch <= 57 || ch >= 97 && ch <= 102 || ch >= 65 && ch <= 70;
    }

    private static boolean isAlpha(int ch) {
        return ch >= 97 && ch <= 122 || ch >= 65 && ch <= 90;
    }

    private static boolean isDigit(int ch) {
        return ch >= 48 && ch <= 57;
    }

    protected static int hexValue(int ch) {
        if (ch >= 48 && ch <= 57) {
            return ch - 48;
        }
        if (ch >= 97 && ch <= 122) {
            return 10 + (ch - 97);
        }
        assert (ch >= 65 && ch <= 90);
        return 10 + (ch - 65);
    }

    private void pushText() throws IOException, SAXException, TikaException {
        if (this.pendingByteCount != 0) {
            assert (this.pendingCharCount == 0);
            this.pushBytes();
        } else {
            this.pushChars();
        }
    }

    private void addOutputByte(int b) throws IOException, SAXException, TikaException {
        assert (b >= 0 && b < 256) : "byte value out of range: " + b;
        if (this.pendingCharCount != 0) {
            this.pushChars();
        }
        if (this.groupState.pictDepth > 0) {
            this.embObjHandler.writeMetadataChar((char)b);
        } else {
            if (this.pendingByteCount == this.pendingBytes.length) {
                byte[] newArray = new byte[(int)((double)this.pendingBytes.length * 1.25)];
                System.arraycopy(this.pendingBytes, 0, newArray, 0, this.pendingBytes.length);
                this.pendingBytes = newArray;
                this.pendingByteBuffer = ByteBuffer.wrap(this.pendingBytes);
            }
            this.pendingBytes[this.pendingByteCount++] = (byte)b;
        }
    }

    private void addControl(int b) {
        assert (TextExtractor.isAlpha(b));
        if (this.pendingControlCount == this.pendingControl.length) {
            byte[] newArray = new byte[(int)((double)this.pendingControl.length * 1.25)];
            System.arraycopy(this.pendingControl, 0, newArray, 0, this.pendingControl.length);
            this.pendingControl = newArray;
        }
        this.pendingControl[this.pendingControlCount++] = (byte)b;
    }

    private void addOutputChar(char ch) throws IOException, SAXException, TikaException {
        if (this.pendingByteCount != 0) {
            this.pushBytes();
        }
        if (this.inHeader || this.fieldState == 1) {
            this.pendingBuffer.append(ch);
        } else if (this.groupState.sn || this.groupState.sv) {
            this.embObjHandler.writeMetadataChar(ch);
        } else {
            if (this.pendingCharCount == this.pendingChars.length) {
                char[] newArray = new char[(int)((double)this.pendingChars.length * 1.25)];
                System.arraycopy(this.pendingChars, 0, newArray, 0, this.pendingChars.length);
                this.pendingChars = newArray;
            }
            this.pendingChars[this.pendingCharCount++] = ch;
        }
    }

    public void extract(InputStream in) throws IOException, SAXException, TikaException {
        this.extract(new PushbackInputStream(in, 2));
    }

    private void extract(PushbackInputStream in) throws IOException, SAXException, TikaException {
        int b;
        this.out.startDocument();
        while ((b = in.read()) != -1) {
            if (b == 92) {
                this.parseControlToken(in);
                continue;
            }
            if (b == 123) {
                this.pushText();
                this.processGroupStart(in);
                continue;
            }
            if (b == 125) {
                this.pushText();
                this.processGroupEnd();
                if (!this.groupStates.isEmpty()) continue;
                break;
            }
            if (this.groupState.objdata || this.groupState.pictDepth == 1) {
                this.embObjHandler.writeHexChar(b);
                continue;
            }
            if (b == 13 || b == 10 || this.groupState.ignore && this.nextMetaData == null && !this.groupState.sn && !this.groupState.sv) continue;
            if (this.ansiSkip != 0) {
                --this.ansiSkip;
                continue;
            }
            this.addOutputByte(b);
        }
        this.endParagraph(false);
        this.out.endDocument();
    }

    private void parseControlToken(PushbackInputStream in) throws IOException, SAXException, TikaException {
        int b = in.read();
        if (b == 39) {
            this.parseHexChar(in);
        } else if (TextExtractor.isAlpha(b)) {
            this.parseControlWord((char)b, in);
        } else if (b == 123 || b == 125 || b == 92 || b == 13 || b == 10) {
            this.addOutputByte(b);
        } else if (b != -1) {
            this.processControlSymbol((char)b);
        }
    }

    private void parseHexChar(PushbackInputStream in) throws IOException, SAXException, TikaException {
        int hex1 = in.read();
        if (!TextExtractor.isHexChar(hex1)) {
            in.unread(hex1);
            return;
        }
        int hex2 = in.read();
        if (!TextExtractor.isHexChar(hex2)) {
            in.unread(hex2);
            return;
        }
        if (this.ansiSkip != 0) {
            --this.ansiSkip;
        } else {
            this.addOutputByte(16 * TextExtractor.hexValue(hex1) + TextExtractor.hexValue(hex2));
        }
    }

    private void parseControlWord(int firstChar, PushbackInputStream in) throws IOException, SAXException, TikaException {
        this.addControl(firstChar);
        int b = in.read();
        while (TextExtractor.isAlpha(b)) {
            this.addControl(b);
            b = in.read();
        }
        boolean hasParam = false;
        boolean negParam = false;
        if (b == 45) {
            negParam = true;
            hasParam = true;
            b = in.read();
        }
        int param = 0;
        while (TextExtractor.isDigit(b)) {
            param *= 10;
            param += b - 48;
            hasParam = true;
            b = in.read();
        }
        if (b != 32) {
            in.unread(b);
        }
        if (hasParam) {
            if (negParam) {
                param = -param;
            }
            this.processControlWord(param, in);
        } else {
            this.processControlWord();
        }
        this.pendingControlCount = 0;
    }

    private void lazyStartParagraph() throws IOException, SAXException, TikaException {
        if (!this.inParagraph) {
            if (this.groupState.italic) {
                this.end("i");
            }
            if (this.groupState.bold) {
                this.end("b");
            }
            if (this.pendingListEnd != 0 && this.groupState.list != this.pendingListEnd) {
                this.endList(this.pendingListEnd);
                this.pendingListEnd = 0;
            }
            if (this.inList() && this.pendingListEnd != this.groupState.list) {
                this.startList(this.groupState.list);
            }
            if (this.inList()) {
                this.out.startElement("li");
            } else {
                this.out.startElement("p");
            }
            if (this.groupState.bold) {
                this.start("b");
            }
            if (this.groupState.italic) {
                this.start("i");
            }
            this.inParagraph = true;
        }
    }

    private void endParagraph(boolean preserveStyles) throws IOException, SAXException, TikaException {
        this.pushText();
        if (this.inParagraph) {
            if (this.groupState.italic) {
                this.end("i");
                this.groupState.italic = preserveStyles;
            }
            if (this.groupState.bold) {
                this.end("b");
                this.groupState.bold = preserveStyles;
            }
            if (this.inList()) {
                this.out.endElement("li");
            } else {
                this.out.endElement("p");
            }
            if (preserveStyles && (this.groupState.bold || this.groupState.italic)) {
                this.start("p");
                if (this.groupState.bold) {
                    this.start("b");
                }
                if (this.groupState.italic) {
                    this.start("i");
                }
                this.inParagraph = true;
            } else {
                this.inParagraph = false;
            }
        }
        if (!preserveStyles && this.pendingListEnd != 0) {
            this.endList(this.pendingListEnd);
            this.pendingListEnd = 0;
        }
    }

    private void pushChars() throws IOException, SAXException, TikaException {
        if (this.pendingCharCount != 0) {
            this.lazyStartParagraph();
            this.out.characters(this.pendingChars, 0, this.pendingCharCount);
            this.pendingCharCount = 0;
        }
    }

    private void pushBytes() throws IOException, SAXException, TikaException {
        if (!(this.pendingByteCount <= 0 || this.groupState.ignore && this.nextMetaData == null)) {
            int pos;
            CoderResult result;
            CharsetDecoder decoder = this.getDecoder();
            this.pendingByteBuffer.limit(this.pendingByteCount);
            assert (this.pendingByteBuffer.position() == 0);
            assert (this.outputBuffer.position() == 0);
            do {
                result = decoder.decode(this.pendingByteBuffer, this.outputBuffer, true);
                pos = this.outputBuffer.position();
                if (pos <= 0) continue;
                if (this.inHeader || this.fieldState == 1) {
                    this.pendingBuffer.append(this.outputArray, 0, pos);
                } else {
                    this.lazyStartParagraph();
                    this.out.characters(this.outputArray, 0, pos);
                }
                this.outputBuffer.position(0);
            } while (result != CoderResult.UNDERFLOW);
            do {
                result = decoder.flush(this.outputBuffer);
                pos = this.outputBuffer.position();
                if (pos <= 0) continue;
                if (this.inHeader || this.fieldState == 1) {
                    this.pendingBuffer.append(this.outputArray, 0, pos);
                } else {
                    this.lazyStartParagraph();
                    this.out.characters(this.outputArray, 0, pos);
                }
                this.outputBuffer.position(0);
            } while (result != CoderResult.UNDERFLOW);
            decoder.reset();
            this.pendingByteBuffer.position(0);
        }
        this.pendingByteCount = 0;
    }

    private boolean equals(String s) {
        if (this.pendingControlCount != s.length()) {
            return false;
        }
        for (int idx = 0; idx < this.pendingControlCount; ++idx) {
            assert (TextExtractor.isAlpha(s.charAt(idx)));
            if ((byte)s.charAt(idx) == this.pendingControl[idx]) continue;
            return false;
        }
        return true;
    }

    private void processControlSymbol(char ch) throws IOException, SAXException, TikaException {
        switch (ch) {
            case '~': {
                this.addOutputChar('\u00a0');
                break;
            }
            case '*': {
                break;
            }
            case '-': {
                this.addOutputChar('\u00ad');
                break;
            }
            case '_': {
                this.addOutputChar('\u2011');
                break;
            }
        }
    }

    private CharsetDecoder getDecoder() throws TikaException {
        Charset charset = this.getCharset();
        if (this.lastCharset == null || !charset.equals(this.lastCharset)) {
            this.decoder = charset.newDecoder();
            this.decoder.onMalformedInput(CodingErrorAction.REPLACE);
            this.decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
            this.lastCharset = charset;
        }
        return this.decoder;
    }

    private Charset getCharset() throws TikaException {
        Charset cs;
        if (this.groupState.fontCharset != null) {
            return this.groupState.fontCharset;
        }
        if (this.globalDefaultFont != -1 && !this.inHeader && (cs = this.fontToCharset.get(this.globalDefaultFont)) != null) {
            return cs;
        }
        if (this.globalCharset == null) {
            throw new TikaException("unable to determine charset");
        }
        return this.globalCharset;
    }

    private void processControlWord(int param, PushbackInputStream in) throws IOException, SAXException, TikaException {
        if (this.inHeader) {
            Charset cs;
            if (this.equals("ansicpg")) {
                cs = ANSICPG_MAP.get(param);
                if (cs != null) {
                    this.globalCharset = cs;
                }
            } else if (this.equals("deff")) {
                this.globalDefaultFont = param;
            } else if (this.equals("nofpages")) {
                this.metadata.add(Office.PAGE_COUNT, Integer.toString(param));
            } else if (this.equals("nofwords")) {
                this.metadata.add(Office.WORD_COUNT, Integer.toString(param));
            } else if (this.equals("nofchars")) {
                this.metadata.add(Office.CHARACTER_COUNT, Integer.toString(param));
            } else if (this.equals("yr")) {
                this.year = param;
            } else if (this.equals("mo")) {
                this.month = param;
            } else if (this.equals("dy")) {
                this.day = param;
            } else if (this.equals("hr")) {
                this.hour = param;
            } else if (this.equals("min")) {
                this.minute = param;
            }
            if (this.fontTableState == 1) {
                if (this.groupState.depth < this.fontTableDepth) {
                    this.fontTableState = 2;
                } else if (this.equals("f")) {
                    this.curFontID = param;
                } else if (this.equals("fcharset") && (cs = FCHARSET_MAP.get(param)) != null) {
                    this.fontToCharset.put(this.curFontID, cs);
                }
            }
            if (this.currentList != null) {
                if (this.equals("listid")) {
                    this.currentList.id = param;
                    this.currentListTable.put(this.currentList.id, this.currentList);
                } else if (this.equals("listtemplateid")) {
                    this.currentList.templateID = param;
                } else if ((this.equals("levelnfc") || this.equals("levelnfcn")) && this.listTableLevel > -1 && this.listTableLevel < this.currentList.numberType.length) {
                    this.currentList.numberType[this.listTableLevel] = param;
                }
            }
        } else if (this.equals("b")) {
            assert (param == 0);
            if (this.groupState.bold) {
                this.pushText();
                if (this.groupState.italic) {
                    this.end("i");
                }
                this.end("b");
                if (this.groupState.italic) {
                    this.start("i");
                }
                this.groupState.bold = false;
            }
        } else if (this.equals("i")) {
            assert (param == 0);
            if (this.groupState.italic) {
                this.pushText();
                this.end("i");
                this.groupState.italic = false;
            }
        } else if (this.equals("f")) {
            Charset fontCharset = this.fontToCharset.get(param);
            this.pushText();
            this.groupState.fontCharset = fontCharset != null ? fontCharset : null;
        } else if (this.equals("ls")) {
            this.groupState.list = param;
        } else if (this.equals("lslvl")) {
            this.groupState.listLevel = param;
        }
        if (this.equals("u")) {
            if (!this.groupState.ignore || this.groupState.sv || this.groupState.sn) {
                char utf16CodeUnit = (char)(param & 0xFFFF);
                this.addOutputChar(utf16CodeUnit);
            }
            this.ansiSkip = this.groupState.ucSkip;
        } else if (this.equals("uc")) {
            this.groupState.ucSkip = param;
        } else if (this.equals("bin") && param >= 0) {
            if (this.groupState.pictDepth == 1) {
                try {
                    this.embObjHandler.writeBytes(in, param);
                }
                catch (IOException e) {
                    this.embObjHandler.reset();
                }
            } else {
                int r;
                int bytesToRead;
                byte[] tmpArray = new byte[Math.min(1024, bytesToRead)];
                for (bytesToRead = param; bytesToRead > 0; bytesToRead -= r) {
                    r = in.read(tmpArray, 0, Math.min(bytesToRead, tmpArray.length));
                    if (r >= 0) continue;
                    throw new TikaException("unexpected end of file: need " + param + " bytes of binary data, found " + (param - bytesToRead));
                }
            }
        }
    }

    private boolean inList() {
        return !this.ignoreLists && this.groupState.list != 0;
    }

    private void pendingListEnd() {
        this.pendingListEnd = this.groupState.list;
        this.groupState.list = 0;
    }

    private void endList(int listID) throws IOException, SAXException, TikaException {
        if (!this.ignoreLists) {
            this.out.endElement(this.isUnorderedList(listID) ? "ul" : "ol");
        }
    }

    private void startList(int listID) throws IOException, SAXException, TikaException {
        if (!this.ignoreLists) {
            this.out.startElement(this.isUnorderedList(listID) ? "ul" : "ol");
        }
    }

    private boolean isUnorderedList(int listID) {
        ListDescriptor list = this.listTable.get(listID);
        if (list != null) {
            return list.isUnordered(this.groupState.listLevel);
        }
        return true;
    }

    private void end(String tag) throws IOException, SAXException, TikaException {
        this.out.endElement(tag);
    }

    private void start(String tag) throws IOException, SAXException, TikaException {
        this.out.startElement(tag);
    }

    private void processControlWord() throws IOException, SAXException, TikaException {
        if (this.inHeader) {
            if (this.equals("ansi")) {
                this.globalCharset = WINDOWS_1252;
            } else if (this.equals("pca")) {
                this.globalCharset = CP850;
            } else if (this.equals("pc")) {
                this.globalCharset = CP437;
            } else if (this.equals("mac")) {
                this.globalCharset = MAC_ROMAN;
            }
            if (this.equals("colortbl") || this.equals("stylesheet") || this.equals("fonttbl")) {
                this.groupState.ignore = true;
            } else if (this.equals("listtable")) {
                this.currentListTable = this.listTable;
            } else if (this.equals("listoverridetable")) {
                this.currentListTable = this.listOverrideTable;
            }
            if (this.uprState == -1) {
                if (this.equals("author")) {
                    this.nextMetaData = TikaCoreProperties.CREATOR;
                } else if (this.equals("title")) {
                    this.nextMetaData = TikaCoreProperties.TITLE;
                } else if (this.equals("subject")) {
                    this.nextMetaData = TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT;
                } else if (this.equals("keywords")) {
                    this.nextMetaData = TikaCoreProperties.TRANSITION_KEYWORDS_TO_DC_SUBJECT;
                } else if (this.equals("category")) {
                    this.nextMetaData = OfficeOpenXMLCore.CATEGORY;
                } else if (this.equals("comment")) {
                    this.nextMetaData = TikaCoreProperties.COMMENTS;
                } else if (this.equals("company")) {
                    this.nextMetaData = OfficeOpenXMLExtended.COMPANY;
                } else if (this.equals("manager")) {
                    this.nextMetaData = OfficeOpenXMLExtended.MANAGER;
                } else if (this.equals("template")) {
                    this.nextMetaData = OfficeOpenXMLExtended.TEMPLATE;
                } else if (this.equals("creatim")) {
                    this.nextMetaData = TikaCoreProperties.CREATED;
                }
            }
            if (this.fontTableState == 0) {
                if (this.equals("fonttbl")) {
                    this.fontTableState = 1;
                    this.fontTableDepth = this.groupState.depth;
                }
            } else if (this.fontTableState == 1 && this.groupState.depth < this.fontTableDepth) {
                this.fontTableState = 2;
            }
            if (this.currentListTable != null) {
                if (this.equals("list") || this.equals("listoverride")) {
                    this.currentList = new ListDescriptor();
                    this.listTableLevel = -1;
                } else if (this.currentList != null) {
                    if (this.equals("liststylename")) {
                        this.currentList.isStyle = true;
                    } else if (this.equals("listlevel")) {
                        ++this.listTableLevel;
                    }
                }
            }
            if (!this.groupState.ignore && (this.equals("par") || this.equals("pard") || this.equals("sect") || this.equals("sectd") || this.equals("plain") || this.equals("ltrch") || this.equals("rtlch"))) {
                this.inHeader = false;
            }
        } else if (this.equals("b")) {
            if (!this.groupState.bold) {
                this.pushText();
                this.lazyStartParagraph();
                if (this.groupState.italic) {
                    this.end("i");
                }
                this.groupState.bold = true;
                this.start("b");
                if (this.groupState.italic) {
                    this.start("i");
                }
            }
        } else if (this.equals("i") && !this.groupState.italic) {
            this.pushText();
            this.lazyStartParagraph();
            this.groupState.italic = true;
            this.start("i");
        }
        boolean ignored = this.groupState.ignore;
        if (this.equals("pard")) {
            this.pushText();
            if (this.groupState.italic) {
                this.end("i");
                this.groupState.italic = false;
            }
            if (this.groupState.bold) {
                this.end("b");
                this.groupState.bold = false;
            }
            if (this.inList()) {
                this.pendingListEnd();
            }
        } else if (this.equals("par")) {
            if (!ignored) {
                this.endParagraph(true);
            }
        } else if (this.equals("shptxt")) {
            this.pushText();
            this.groupState.ignore = false;
        } else if (this.equals("atnid")) {
            this.pushText();
            this.groupState.ignore = false;
        } else if (this.equals("atnauthor")) {
            this.pushText();
            this.groupState.ignore = false;
        } else if (this.equals("annotation")) {
            this.pushText();
            this.groupState.ignore = false;
        } else if (this.equals("listtext")) {
            this.groupState.ignore = true;
        } else if (this.equals("cell")) {
            this.endParagraph(true);
        } else if (this.equals("sp")) {
            this.groupState.sp = true;
        } else if (this.equals("sn")) {
            this.embObjHandler.startSN();
            this.groupState.sn = true;
        } else if (this.equals("sv")) {
            this.embObjHandler.startSV();
            this.groupState.sv = true;
        } else if (this.equals("object")) {
            this.pushText();
            this.embObjHandler.setInObject(true);
            this.groupState.object = true;
        } else if (this.equals("objdata")) {
            this.groupState.objdata = true;
            this.embObjHandler.startObjData();
        } else if (this.equals("pict")) {
            this.pushText();
            this.groupState.pictDepth = 1;
            this.embObjHandler.startPict();
        } else if (this.equals("line")) {
            if (!ignored) {
                this.addOutputChar('\n');
            }
        } else if (this.equals("column")) {
            if (!ignored) {
                this.addOutputChar(' ');
            }
        } else if (this.equals("page")) {
            if (!ignored) {
                this.addOutputChar('\n');
            }
        } else if (this.equals("softline")) {
            if (!ignored) {
                this.addOutputChar('\n');
            }
        } else if (this.equals("softcolumn")) {
            if (!ignored) {
                this.addOutputChar(' ');
            }
        } else if (this.equals("softpage")) {
            if (!ignored) {
                this.addOutputChar('\n');
            }
        } else if (this.equals("tab")) {
            if (!ignored) {
                this.addOutputChar('\t');
            }
        } else if (this.equals("upr")) {
            this.uprState = 0;
        } else if (this.equals("ud") && this.uprState == 1) {
            this.uprState = -1;
            this.groupState.ignore = false;
        } else if (this.equals("bullet")) {
            if (!ignored) {
                this.addOutputChar('\u2022');
            }
        } else if (this.equals("endash")) {
            if (!ignored) {
                this.addOutputChar('\u2013');
            }
        } else if (this.equals("emdash")) {
            if (!ignored) {
                this.addOutputChar('\u2014');
            }
        } else if (this.equals("enspace")) {
            if (!ignored) {
                this.addOutputChar('\u2002');
            }
        } else if (this.equals("qmspace")) {
            if (!ignored) {
                this.addOutputChar('\u2005');
            }
        } else if (this.equals("emspace")) {
            if (!ignored) {
                this.addOutputChar('\u2003');
            }
        } else if (this.equals("lquote")) {
            if (!ignored) {
                this.addOutputChar('\u2018');
            }
        } else if (this.equals("rquote")) {
            if (!ignored) {
                this.addOutputChar('\u2019');
            }
        } else if (this.equals("ldblquote")) {
            if (!ignored) {
                this.addOutputChar('\u201c');
            }
        } else if (this.equals("rdblquote")) {
            if (!ignored) {
                this.addOutputChar('\u201d');
            }
        } else if (this.equals("fldinst")) {
            this.fieldState = 1;
            this.groupState.ignore = false;
        } else if (this.equals("fldrslt") && this.fieldState == 2) {
            assert (this.pendingURL != null);
            this.lazyStartParagraph();
            this.out.startElement("a", "href", this.pendingURL);
            this.pendingURL = null;
            this.fieldState = 3;
            this.groupState.ignore = false;
        }
    }

    private void processGroupStart(PushbackInputStream in) throws IOException {
        int b2;
        this.ansiSkip = 0;
        this.groupStates.add(this.groupState);
        this.groupState = new GroupState(this.groupState);
        assert (this.groupStates.size() == this.groupState.depth) : "size=" + this.groupStates.size() + " depth=" + this.groupState.depth;
        if (this.uprState == 0) {
            this.uprState = 1;
            this.groupState.ignore = true;
        }
        if ((b2 = in.read()) == 92) {
            int b3 = in.read();
            if (b3 == 42) {
                this.groupState.ignore = true;
            }
            in.unread(b3);
        }
        in.unread(b2);
    }

    private void processGroupEnd() throws IOException, SAXException, TikaException {
        if (this.inHeader) {
            if (this.nextMetaData != null) {
                if (this.nextMetaData == TikaCoreProperties.CREATED) {
                    Calendar cal = Calendar.getInstance(TimeZone.getDefault(), Locale.ROOT);
                    cal.set(this.year, this.month - 1, this.day, this.hour, this.minute, 0);
                    this.metadata.set(this.nextMetaData, cal.getTime());
                } else if (this.nextMetaData.isMultiValuePermitted()) {
                    this.metadata.add(this.nextMetaData, this.pendingBuffer.toString());
                } else {
                    this.metadata.set(this.nextMetaData, this.pendingBuffer.toString());
                }
                this.nextMetaData = null;
            }
            this.pendingBuffer.setLength(0);
        }
        assert (this.groupState.depth > 0);
        this.ansiSkip = 0;
        if (this.groupState.objdata) {
            this.embObjHandler.handleCompletedObject();
            this.groupState.objdata = false;
        } else if (this.groupState.pictDepth > 0) {
            if (this.groupState.sn) {
                this.embObjHandler.endSN();
            } else if (this.groupState.sv) {
                this.embObjHandler.endSV();
            } else if (this.groupState.sp) {
                this.embObjHandler.endSP();
            } else if (this.groupState.pictDepth == 1) {
                this.embObjHandler.handleCompletedObject();
            }
        }
        if (this.groupState.object) {
            this.embObjHandler.setInObject(false);
        }
        if (this.groupStates.size() > 0) {
            GroupState outerGroupState = this.groupStates.removeLast();
            if (this.groupState.italic && (!outerGroupState.italic || this.groupState.bold != outerGroupState.bold)) {
                this.end("i");
                this.groupState.italic = false;
            }
            if (this.groupState.bold && !outerGroupState.bold) {
                this.end("b");
            }
            if (!this.groupState.bold && outerGroupState.bold) {
                this.start("b");
            }
            if (!this.groupState.italic && outerGroupState.italic) {
                this.start("i");
            }
            this.groupState = outerGroupState;
        }
        assert (this.groupStates.size() == this.groupState.depth);
        if (this.fieldState == 1) {
            String s = this.pendingBuffer.toString().trim();
            this.pendingBuffer.setLength(0);
            if (s.startsWith("HYPERLINK")) {
                int idx2;
                s = s.substring(9).trim();
                boolean isLocalLink = s.contains("\\l ");
                int idx = s.indexOf(34);
                if (idx != -1 && (idx2 = s.indexOf(34, 1 + idx)) != -1) {
                    s = s.substring(1 + idx, idx2);
                }
                this.pendingURL = (isLocalLink ? "#" : "") + s;
                this.fieldState = 2;
            } else {
                this.fieldState = 0;
            }
        } else if (this.fieldState == 3) {
            this.out.endElement("a");
            this.fieldState = 0;
        }
    }

    static {
        FCHARSET_MAP.put(0, WINDOWS_1252);
        FCHARSET_MAP.put(77, MAC_ROMAN);
        FCHARSET_MAP.put(78, SHIFT_JIS);
        FCHARSET_MAP.put(79, MS949);
        FCHARSET_MAP.put(80, GB2312);
        FCHARSET_MAP.put(81, BIG5);
        FCHARSET_MAP.put(82, JOHAB);
        FCHARSET_MAP.put(83, MAC_HEBREW);
        FCHARSET_MAP.put(84, MAC_ARABIC);
        FCHARSET_MAP.put(85, MAC_GREEK);
        FCHARSET_MAP.put(86, MAC_TURKISH);
        FCHARSET_MAP.put(87, MAC_THAI);
        FCHARSET_MAP.put(88, CP1250);
        FCHARSET_MAP.put(89, CP1251);
        FCHARSET_MAP.put(128, MS932);
        FCHARSET_MAP.put(129, MS949);
        FCHARSET_MAP.put(130, MS1361);
        FCHARSET_MAP.put(134, MS936);
        FCHARSET_MAP.put(136, MS950);
        FCHARSET_MAP.put(161, CP1253);
        FCHARSET_MAP.put(162, CP1254);
        FCHARSET_MAP.put(163, CP1258);
        FCHARSET_MAP.put(177, CP1255);
        FCHARSET_MAP.put(178, CP1256);
        FCHARSET_MAP.put(186, CP1257);
        FCHARSET_MAP.put(204, CP1251);
        FCHARSET_MAP.put(222, MS874);
        FCHARSET_MAP.put(238, CP1250);
        FCHARSET_MAP.put(254, CP437);
        FCHARSET_MAP.put(255, CP850);
        ANSICPG_MAP = new HashMap<Integer, Charset>();
        ANSICPG_MAP.put(437, CP4372);
        ANSICPG_MAP.put(708, ISO_8859_6);
        ANSICPG_MAP.put(709, WINDOWS_709);
        ANSICPG_MAP.put(710, WINDOWS_710);
        ANSICPG_MAP.put(710, WINDOWS_711);
        ANSICPG_MAP.put(710, WINDOWS_720);
        ANSICPG_MAP.put(819, CP819);
        ANSICPG_MAP.put(819, CP819);
        ANSICPG_MAP.put(819, CP819);
        ANSICPG_MAP.put(850, CP8502);
        ANSICPG_MAP.put(852, CP852);
        ANSICPG_MAP.put(860, CP860);
        ANSICPG_MAP.put(862, CP862);
        ANSICPG_MAP.put(863, CP863);
        ANSICPG_MAP.put(864, CP864);
        ANSICPG_MAP.put(865, CP865);
        ANSICPG_MAP.put(866, CP866);
        ANSICPG_MAP.put(874, MS8742);
        ANSICPG_MAP.put(932, MS932);
        ANSICPG_MAP.put(936, MS9362);
        ANSICPG_MAP.put(949, CP949);
        ANSICPG_MAP.put(950, CP950);
        ANSICPG_MAP.put(1250, CP12502);
        ANSICPG_MAP.put(1251, CP12512);
        ANSICPG_MAP.put(1252, CP1252);
        ANSICPG_MAP.put(1253, CP12532);
        ANSICPG_MAP.put(1254, CP12542);
        ANSICPG_MAP.put(1255, CP12552);
        ANSICPG_MAP.put(1256, CP12562);
        ANSICPG_MAP.put(1257, CP12572);
        ANSICPG_MAP.put(1258, CP12582);
        ANSICPG_MAP.put(1361, X_JOHAB);
        ANSICPG_MAP.put(10000, MAC_ROMAN);
        ANSICPG_MAP.put(10001, SHIFT_JIS);
        ANSICPG_MAP.put(10004, MAC_ARABIC);
        ANSICPG_MAP.put(10005, MAC_HEBREW);
        ANSICPG_MAP.put(10006, MAC_GREEK);
        ANSICPG_MAP.put(10007, MAC_CYRILLIC);
        ANSICPG_MAP.put(10029, X_MAC_CENTRAL_EUROPE);
        ANSICPG_MAP.put(10081, MAC_TURKISH);
        ANSICPG_MAP.put(57002, X_ISCII91);
        ANSICPG_MAP.put(57003, WINDOWS_57003);
        ANSICPG_MAP.put(57004, WINDOWS_57004);
        ANSICPG_MAP.put(57005, WINDOWS_57005);
        ANSICPG_MAP.put(57006, WINDOWS_57006);
        ANSICPG_MAP.put(57007, WINDOWS_57007);
        ANSICPG_MAP.put(57008, WINDOWS_57008);
        ANSICPG_MAP.put(57009, WINDOWS_57009);
        ANSICPG_MAP.put(57010, WINDOWS_57010);
        ANSICPG_MAP.put(57011, WINDOWS_57011);
    }
}

