Class CodepointHelper


  • @Immutable
    public final class CodepointHelper
    extends Object
    General utilities for dealing with Unicode characters
    Author:
    Apache Abdera
    • Method Detail

      • inRange

        public static boolean inRange​(@Nonnull
                                      char[] aChars,
                                      char cLow,
                                      char cHigh)
        Parameters:
        aChars - char array
        cLow - Low index
        cHigh - high index
        Returns:
        true if all the characters in chars are within the set [low,high]
      • inRange

        public static boolean inRange​(char[] aChars,
                                      int nLow,
                                      int nHigh)
        Parameters:
        aChars - char array
        nLow - Low index
        nHigh - high index
        Returns:
        true if all the characters in chars are within the set [low,high]
      • inRange

        public static boolean inRange​(int nCodepoint,
                                      int nLow,
                                      int nHigh)
        Parameters:
        nCodepoint - codepoint
        nLow - Low index
        nHigh - high index
        Returns:
        true if the codepoint is within the set [low,high]
      • getHighSurrogate

        public static char getHighSurrogate​(int nCodepoint)
        Parameters:
        nCodepoint - Codepoint
        Returns:
        Get the high surrogate for a particular unicode codepoint
      • getLowSurrogate

        public static char getLowSurrogate​(int nCodepoint)
        Parameters:
        nCodepoint - Codepoint
        Returns:
        Get the low surrogate for a particular unicode codepoint
      • codepointAt

        @Nonnull
        public static Codepoint codepointAt​(@Nonnull
                                            CharSequence aSeq,
                                            int nIndex)
        Parameters:
        aSeq - source sequence
        nIndex - index
        Returns:
        the codepoint at the given location, automatically dealing with surrogate pairs
      • insert

        public static void insert​(CharSequence aSeq,
                                  int nIndex,
                                  @Nonnull
                                  Codepoint aCodepoint)
        Insert a codepoint into the buffer, automatically dealing with surrogate pairs
        Parameters:
        aSeq - source sequence
        nIndex - index
        aCodepoint - codepoint to be inserted
      • insert

        public static void insert​(@Nonnull
                                  CharSequence aSeq,
                                  int nIndex,
                                  int nCodepoint)
        Insert a codepoint into the buffer, automatically dealing with surrogate pairs
        Parameters:
        aSeq - source sequence
        nIndex - index
        nCodepoint - codepoint to be inserted
      • setChar

        public static void setChar​(@Nonnull
                                   CharSequence aSeq,
                                   int nIndex,
                                   @Nonnull
                                   Codepoint aCodepoint)
        Set the character at a given location, automatically dealing with surrogate pairs
        Parameters:
        aSeq - source sequence
        nIndex - index
        aCodepoint - codepoint to be set
      • setChar

        public static void setChar​(@Nonnull
                                   CharSequence aSeq,
                                   int nIndex,
                                   int nCodepoint)
        Set the character at a given location, automatically dealing with surrogate pairs
        Parameters:
        aSeq - source sequence
        nIndex - index
        nCodepoint - codepoint to be set
      • length

        @Nonnegative
        public static int length​(@Nonnull
                                 CharSequence aSeq)
        Parameters:
        aSeq - source sequence
        Returns:
        the total number of codepoints in the buffer. Each surrogate pair counts as a single codepoint
      • length

        @Nonnegative
        public static int length​(@Nonnull
                                 char[] aArray)
        Parameters:
        aArray - source array
        Returns:
        the total number of codepoints in the buffer. Each surrogate pair counts as a single codepoint
      • getAsString

        @Nonnull
        @Nonempty
        public static String getAsString​(int nCodepoint)
        Parameters:
        nCodepoint - codepoint
        Returns:
        the String representation of the codepoint, automatically dealing with surrogate pairs
      • stripBidi

        @Nullable
        public static String stripBidi​(@Nullable
                                       String sStr)
        Removes leading and trailing bidi controls from the string
        Parameters:
        sStr - Source string
        Returns:
        the modified string
      • stripBidiInternal

        @Nonnull
        public static String stripBidiInternal​(@Nonnull
                                               String sStr)
        Removes bidi controls from within a string
        Parameters:
        sStr - Source string
        Returns:
        the modified string
      • wrapBidi

        @Nullable
        public static String wrapBidi​(@Nullable
                                      String sStr,
                                      char cChar)
        Wrap the string with the specified bidi control
        Parameters:
        sStr - source string
        cChar - source char
        Returns:
        The wrapped string
      • isHex

        public static boolean isHex​(int nCodepoint)
      • isBidi

        public static boolean isBidi​(int nCodepoint)
        Parameters:
        nCodepoint - codepoint
        Returns:
        true if the codepoint is a bidi control character
      • inverseSetContains

        public static boolean inverseSetContains​(@Nonnull
                                                 int[] aCodepointSet,
                                                 int value)
        Treats the specified int array as an Inversion Set and returns true if the value is located within the set. This will only work correctly if the values in the int array are monotonically increasing
        Parameters:
        aCodepointSet - Source set
        value - Value to check
        Returns:
        true if the value is located within the set
      • isPctEnc

        public static boolean isPctEnc​(int codepoint)
      • isMark

        public static boolean isMark​(int codepoint)
      • isUnreserved

        public static boolean isUnreserved​(int codepoint)
      • isReserved

        public static boolean isReserved​(int codepoint)
      • isGenDelim

        public static boolean isGenDelim​(int codepoint)
      • isSubDelim

        public static boolean isSubDelim​(int codepoint)
      • isPchar

        public static boolean isPchar​(int codepoint)
      • isPath

        public static boolean isPath​(int codepoint)
      • isPathNoDelims

        public static boolean isPathNoDelims​(int codepoint)
      • isScheme

        public static boolean isScheme​(int codepoint)
      • isUserInfo

        public static boolean isUserInfo​(int codepoint)
      • isQuery

        public static boolean isQuery​(int codepoint)
      • isFragment

        public static boolean isFragment​(int codepoint)
      • is_ucschar

        public static boolean is_ucschar​(int codepoint)
      • is_iprivate

        public static boolean is_iprivate​(int codepoint)
      • is_iunreserved

        public static boolean is_iunreserved​(int codepoint)
      • is_ipchar

        public static boolean is_ipchar​(int codepoint)
      • is_ipath

        public static boolean is_ipath​(int codepoint)
      • is_ipathnodelims

        public static boolean is_ipathnodelims​(int codepoint)
      • is_iquery

        public static boolean is_iquery​(int codepoint)
      • is_ifragment

        public static boolean is_ifragment​(int codepoint)
      • is_iregname

        public static boolean is_iregname​(int codepoint)
      • is_ipliteral

        public static boolean is_ipliteral​(int codepoint)
      • is_ihost

        public static boolean is_ihost​(int codepoint)
      • is_regname

        public static boolean is_regname​(int codepoint)
      • is_iuserinfo

        public static boolean is_iuserinfo​(int codepoint)
      • is_iserver

        public static boolean is_iserver​(int codepoint)
      • verify

        public static void verify​(AbstractCodepointIterator aIter,
                                  IntPredicate aFilter)
        Verifies a sequence of codepoints using the specified filter
        Parameters:
        aIter - Codepointer iterator
        aFilter - filter
      • verify

        public static void verify​(@Nullable
                                  char[] aArray,
                                  @Nonnull
                                  ECodepointProfile eProfile)
        Verifies a sequence of codepoints using the specified profile
        Parameters:
        aArray - char array
        eProfile - profile to use
      • verify

        public static void verify​(@Nullable
                                  String sStr,
                                  @Nonnull
                                  ECodepointProfile eProfile)
        Verifies a sequence of codepoints using the specified profile
        Parameters:
        sStr - String
        eProfile - profile to use
      • verifyNot

        public static void verifyNot​(ICodepointIterator aIter,
                                     IntPredicate aFilter)
        Verifies a sequence of codepoints using the specified filter
        Parameters:
        aIter - Codepoint iterator
        aFilter - Filter to use
      • verifyNot

        public static void verifyNot​(ICodepointIterator aIter,
                                     @Nonnull
                                     ECodepointProfile eProfile)
        Verifies a sequence of codepoints using the specified profile
        Parameters:
        aIter - Codepoint iterator
        eProfile - profile to use
      • verifyNot

        public static void verifyNot​(char[] aArray,
                                     @Nonnull
                                     ECodepointProfile eProfile)
        Verifies a sequence of codepoints using the specified profile
        Parameters:
        aArray - char array
        eProfile - profile to use