001/*
002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package org.tribuo.util.tokens.universal;
018
019import org.tribuo.util.tokens.Token;
020
021/**
022 * A range currently being segmented.
023 */
024public final class Range implements CharSequence {
025    /**
026     * The character buffer.
027     */
028    public char[] buff = new char[16];
029    /**
030     * The token length.
031     */
032    public int len;
033    /**
034     * The start index.
035     */
036    public int start;
037    /**
038     * The end index.
039     */
040    public int end;
041    /**
042     * The value to increment by.
043     */
044    public int incr;
045    /**
046     * The current token type.
047     */
048    public Token.TokenType type;
049
050    Range() {}
051
052    /**
053     * Sets the first two characters in the range, and the type to NGRAM.
054     * @param c1 The first character.
055     * @param c2 The second character.
056     * @param start The start value.
057     */
058    public void set(char c1, char c2, int start) {
059        buff[0] = c1;
060        buff[1] = c2;
061        this.start = start;
062        this.end = start + 2;
063        this.len = 2;
064        this.incr = 0;
065        this.type = Token.TokenType.NGRAM;
066    }
067
068    /**
069     * Sets the first character in the range.
070     * @param c The first character.
071     * @param start The start value.
072     */
073    public void set(char c, int start) {
074        buff[0] = c;
075        this.start = start;
076        this.end = start + 1;
077        this.len = 1;
078        this.incr = 1;
079        this.type = Token.TokenType.WORD;
080    }
081
082    /**
083     * Sets the character range.
084     * @param buff The characters.
085     * @param len The length of the character buffer.
086     * @param start The start index.
087     */
088    public void set(char[] buff, int len, int start) {
089        if (this.buff.length < buff.length) {
090            this.buff = new char[buff.length + 1];
091        }
092        System.arraycopy(buff, 0, this.buff, 0, len);
093        this.len = len;
094        this.start = start;
095        this.end = start + len;
096        this.incr = 1;
097        this.type = Token.TokenType.WORD;
098    }
099
100    /**
101     * Sets this range to represent a punctuation character.
102     * @param p The punctuation character.
103     * @param start The start index.
104     */
105    public void punct(char p, int start) {
106        buff[0] = p;
107        this.len = 1;
108        this.start = Math.max(start, 0);
109        this.end = this.start + 1;
110        this.incr = 0;
111        this.type = Token.TokenType.PUNCTUATION;
112    }
113
114    /**
115     * Sets the token type.
116     * @param type The token type.
117     */
118    public void setType(Token.TokenType type) {
119        this.type = type;
120    }
121
122    @Override
123    public int length() {
124        return len;
125    }
126
127    @Override
128    public char charAt(int index) {
129        if (index < len) {
130            return buff[index];
131        }
132        throw new IndexOutOfBoundsException(String.format("index %d exceeds length %d", index, len));
133    }
134
135    @Override
136    public CharSequence subSequence(int start, int end) {
137        Range r = new Range();
138        System.arraycopy(buff, start, r.buff, 0, end - start);
139        r.start = 0;
140        r.len = end - start;
141        r.end = r.len;
142        return r;
143    }
144
145    @Override
146    public String toString() {
147        return new String(buff, 0, len) + " " + type + " " + start + " " + end;
148    }
149}