001/* 002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.tribuo.util.tokens.impl; 018 019import com.oracle.labs.mlrg.olcut.provenance.ConfiguredObjectProvenance; 020import com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl; 021import org.tribuo.util.tokens.Token; 022import org.tribuo.util.tokens.Tokenizer; 023 024/** 025 * A convenience class for when you are required to provide a tokenizer but you 026 * don't actually want to split up the text into tokens. This tokenizer will 027 * serve up a single "token" corresponding to the input text. 028 */ 029public class NonTokenizer implements Tokenizer { 030 031 private CharSequence cs; 032 033 private boolean done = false; 034 035 /** 036 * Constructs a NonTokenizer. 037 */ 038 public NonTokenizer() { } 039 040 @Override 041 public ConfiguredObjectProvenance getProvenance() { 042 return new ConfiguredObjectProvenanceImpl(this, "Tokenizer"); 043 } 044 045 @Override 046 public void reset(CharSequence cs) { 047 this.cs = cs; 048 this.done = false; 049 } 050 051 @Override 052 public boolean advance() { 053 if (cs == null) { 054 throw new IllegalStateException("NonTokenizer has not been reset."); 055 } 056 if (!done) { 057 done = true; 058 return true; 059 } 060 return false; 061 } 062 063 @Override 064 public String getText() { 065 if (done) { 066 return cs.toString(); 067 } else { 068 throw new IllegalStateException("NonTokenizer isn't ready."); 069 } 070 } 071 072 @Override 073 public int getStart() { 074 if (done) { 075 return 0; 076 } else { 077 throw new IllegalStateException("NonTokenizer isn't ready."); 078 } 079 } 080 081 @Override 082 public int getEnd() { 083 if (done) { 084 return cs.length(); 085 } else { 086 throw new IllegalStateException("NonTokenizer isn't ready."); 087 } 088 } 089 090 @Override 091 public Token.TokenType getType() { 092 if (done) { 093 return Token.TokenType.WORD; 094 } else { 095 throw new IllegalStateException("NonTokenizer isn't ready."); 096 } 097 } 098 099 @Override 100 public NonTokenizer clone() { 101 try { 102 NonTokenizer copy = (NonTokenizer) super.clone(); 103 copy.done = false; 104 copy.cs = null; 105 return copy; 106 } catch (CloneNotSupportedException e) { 107 throw new Error("Assertion error, NonTokenizer is Cloneable."); 108 } 109 } 110 111}