001/*
002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package org.tribuo.util.tokens.impl;
018
019import com.oracle.labs.mlrg.olcut.provenance.ConfiguredObjectProvenance;
020import com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl;
021import org.tribuo.util.tokens.Token;
022import org.tribuo.util.tokens.Tokenizer;
023
024/**
025 * A convenience class for when you are required to provide a tokenizer but you
026 * don't actually want to split up the text into tokens.  This tokenizer will
027 * serve up a single "token" corresponding to the input text.
028 */
029public class NonTokenizer implements Tokenizer {
030
031    private CharSequence cs;
032
033    private boolean done = false;
034
035    /**
036     * Constructs a NonTokenizer.
037     */
038    public NonTokenizer() { }
039
040    @Override
041    public ConfiguredObjectProvenance getProvenance() {
042        return new ConfiguredObjectProvenanceImpl(this, "Tokenizer");
043    }
044
045    @Override
046    public void reset(CharSequence cs) {
047        this.cs = cs;
048        this.done = false;
049    }
050
051    @Override
052    public boolean advance() {
053        if (cs == null) {
054            throw new IllegalStateException("NonTokenizer has not been reset.");
055        }
056        if (!done) {
057            done = true;
058            return true;
059        }
060        return false;
061    }
062
063    @Override
064    public String getText() {
065        if (done) {
066            return cs.toString();
067        } else {
068            throw new IllegalStateException("NonTokenizer isn't ready.");
069        }
070    }
071
072    @Override
073    public int getStart() {
074        if (done) {
075            return 0;
076        } else {
077            throw new IllegalStateException("NonTokenizer isn't ready.");
078        }
079    }
080
081    @Override
082    public int getEnd() {
083        if (done) {
084            return cs.length();
085        } else {
086            throw new IllegalStateException("NonTokenizer isn't ready.");
087        }
088    }
089
090    @Override
091    public Token.TokenType getType() {
092        if (done) {
093            return Token.TokenType.WORD;
094        } else {
095            throw new IllegalStateException("NonTokenizer isn't ready.");
096        }
097    }
098
099    @Override
100    public NonTokenizer clone() {
101        try {
102            NonTokenizer copy = (NonTokenizer) super.clone();
103            copy.done = false;
104            copy.cs = null;
105            return copy;
106        } catch (CloneNotSupportedException e) {
107            throw new Error("Assertion error, NonTokenizer is Cloneable.");
108        }
109    }
110
111}