Unicode Character Functions

Enumerations

enum  BIDIFLAGS {
  BIDIFLAG_L = (1 << BIDITAG_L), BIDIFLAG_LRE = (1 << BIDITAG_LRE), BIDIFLAG_LRO = (1 << BIDITAG_LRO), BIDIFLAG_R = (1 << BIDITAG_R),
  BIDIFLAG_AL = (1 << BIDITAG_AL), BIDIFLAG_RLE = (1 << BIDITAG_RLE), BIDIFLAG_RLO = (1 << BIDITAG_RLO), BIDIFLAG_PDF = (1 << BIDITAG_PDF),
  BIDIFLAG_EN = (1 << BIDITAG_EN), BIDIFLAG_ES = (1 << BIDITAG_ES), BIDIFLAG_ET = (1 << BIDITAG_ET), BIDIFLAG_AN = (1 << BIDITAG_AN),
  BIDIFLAG_CS = (1 << BIDITAG_CS), BIDIFLAG_NSM = (1 << BIDITAG_NSM), BIDIFLAG_BN = (1 << BIDITAG_BN), BIDIFLAG_B = (1 << BIDITAG_B),
  BIDIFLAG_WS = (1 << BIDITAG_WS), BIDIFLAG_ON = (1 << BIDITAG_ON)
}
enum  BIDITAG {
  BIDITAG_L = 0, BIDITAG_LRE = 1, BIDITAG_LRO = 2, BIDITAG_R = 3,
  BIDITAG_AL = 4, BIDITAG_RLE = 5, BIDITAG_RLO = 6, BIDITAG_PDF = 7,
  BIDITAG_EN = 8, BIDITAG_ES = 9, BIDITAG_ET = 10, BIDITAG_AN = 11,
  BIDITAG_CS = 12, BIDITAG_NSM = 13, BIDITAG_BN = 14, BIDITAG_B = 15,
  BIDITAG_WS = 16, BIDITAG_ON = 17
}
enum  UCCAT {
  UCCAT_Cn = 0, UCCAT_Lu = 1, UCCAT_Ll = 2, UCCAT_Lt = 3,
  UCCAT_Lm = 4, UCCAT_Lo = 5, UCCAT_Mn = 6, UCCAT_Mc = 7,
  UCCAT_Me = 8, UCCAT_Nd = 9, UCCAT_Nl = 10, UCCAT_No = 11,
  UCCAT_Zs = 12, UCCAT_Zl = 13, UCCAT_Zp = 14, UCCAT_Cc = 15,
  UCCAT_Cf = 16, UCCAT_Cs = 17, UCCAT_Co = 18, UCCAT_Pc = 20,
  UCCAT_Pd = 21, UCCAT_Ps = 22, UCCAT_Pe = 23, UCCAT_Pi = 24,
  UCCAT_Pf = 25, UCCAT_Po = 26, UCCAT_Sm = 27, UCCAT_Sc = 28,
  UCCAT_Sk = 29, UCCAT_So = 30
}
enum  UCCATFLAGS {
  UCCATFLAG_Cn = (1 << UCCAT_Cn), UCCATFLAG_Lu = (1 << UCCAT_Lu), UCCATFLAG_Ll = (1 << UCCAT_Ll), UCCATFLAG_Lt = (1 << UCCAT_Lt),
  UCCATFLAG_Lm = (1 << UCCAT_Lm), UCCATFLAG_Lo = (1 << UCCAT_Lo), UCCATFLAG_Mn = (1 << UCCAT_Mn), UCCATFLAG_Mc = (1 << UCCAT_Mc),
  UCCATFLAG_Me = (1 << UCCAT_Me), UCCATFLAG_Nd = (1 << UCCAT_Nd), UCCATFLAG_Nl = (1 << UCCAT_Nl), UCCATFLAG_No = (1 << UCCAT_No),
  UCCATFLAG_Zs = (1 << UCCAT_Zs), UCCATFLAG_Zl = (1 << UCCAT_Zl), UCCATFLAG_Zp = (1 << UCCAT_Zp), UCCATFLAG_Cc = (1 << UCCAT_Cc),
  UCCATFLAG_Cf = (1 << UCCAT_Cf), UCCATFLAG_Cs = (1 << UCCAT_Cs), UCCATFLAG_Co = (1 << UCCAT_Co), UCCATFLAG_Pc = (1 << UCCAT_Pc),
  UCCATFLAG_Pd = (1 << UCCAT_Pd), UCCATFLAG_Ps = (1 << UCCAT_Ps), UCCATFLAG_Pe = (1 << UCCAT_Pe), UCCATFLAG_Pi = (1 << UCCAT_Pi),
  UCCATFLAG_Pf = (1 << UCCAT_Pf), UCCATFLAG_Po = (1 << UCCAT_Po), UCCATFLAG_Sm = (1 << UCCAT_Sm), UCCATFLAG_Sc = (1 << UCCAT_Sc),
  UCCATFLAG_Sk = (1 << UCCAT_Sk), UCCATFLAG_So = (1 << UCCAT_So), UCCATFLAG_Mi = (1 << 31)
}

Functions

bool ucisalnum (MIUNICODE c)
bool ucisalpha (MIUNICODE c)
LIBEXPORT bool ucisbidiprop (MIUNICODE c, UINT32 bidiflags)
bool ucisblank (MIUNICODE c)
bool ucisclosepunct (MIUNICODE c)
bool uciscntrl (MIUNICODE c)
bool ucisconnect (MIUNICODE c)
bool uciscurrency (MIUNICODE c)
bool ucisdash (MIUNICODE c)
LIBEXPORT bool ucisdecompbidiprop (MIUNICODE c, UINT32 decompflags)
bool ucisdigit0to9 (MIUNICODE c)
bool ucisdigit2 (MIUNICODE c)
bool ucisenclosing (MIUNICODE c)
bool ucisfinalpunct (MIUNICODE c)
bool ucisfmtcntrl (MIUNICODE c)
bool ucishan (MIUNICODE c)
bool ucishangul (MIUNICODE c)
bool ucisinitialpunct (MIUNICODE c)
bool ucisisocntrl (MIUNICODE c)
bool ucislower (MIUNICODE c)
bool ucislsep (MIUNICODE c)
bool ucisltr (MIUNICODE c)
bool ucismark (MIUNICODE c)
bool ucismath (MIUNICODE c)
bool ucismodif (MIUNICODE c)
bool ucismodifsymbol (MIUNICODE c)
bool ucisneutral (MIUNICODE c)
bool ucisnonspacing (MIUNICODE c)
bool ucisnsmark (MIUNICODE c)
bool ucisnumber (MIUNICODE c)
bool ucisopenpunct (MIUNICODE c)
LIBEXPORT bool ucisprop (MIUNICODE c, UINT32 uccatflags)
bool ucispsep (MIUNICODE c)
bool ucispunct (MIUNICODE c)
bool ucisrtl (MIUNICODE c)
bool ucisspace (MIUNICODE c)
bool ucisspmark (MIUNICODE c)
bool ucisstrong (MIUNICODE c)
bool ucissymbol (MIUNICODE c)
bool ucistitle (MIUNICODE c)
bool ucisupper (MIUNICODE c)
bool ucisweak (MIUNICODE c)
LIBEXPORT MIUNICODE uctolower (MIUNICODE c)
LIBEXPORT MIUNICODE uctoupper (MIUNICODE c)

Enumeration Type Documentation

enum BIDIFLAGS

Flags for the above which can be ORd together.

Enumerator:
BIDIFLAG_L 
BIDIFLAG_LRE 

Left-to-right.

BIDIFLAG_LRO 

Left-to-right Embedding.

BIDIFLAG_R 

Left-to-right Override.

BIDIFLAG_AL 

Right-to-left.

BIDIFLAG_RLE 

Right-to-left arabic.

BIDIFLAG_RLO 

Right-to-left embedding.

BIDIFLAG_PDF 

Right-to-left override.

BIDIFLAG_EN 

Pop Directional Format.

BIDIFLAG_ES 

European Number.

BIDIFLAG_ET 

European Number separator.

BIDIFLAG_AN 

European Number Terminator.

BIDIFLAG_CS 

Arabic Number.

BIDIFLAG_NSM 

Common Number Separator.

BIDIFLAG_BN 

Non Spacing Mark.

BIDIFLAG_B 

Boundary Neutral.

BIDIFLAG_WS 

Paragraph Separator.

BIDIFLAG_ON 

White Space.

enum BIDITAG

Unicode Bidirectional category from column 4 of UnicodeData.txt See details on UCCATEGORY enum above.

These are summarized in chapter 3 of the Unicode Standard

Enumerator:
BIDITAG_L 

Left-to-right.

BIDITAG_LRE 

Left-to-right Embedding.

BIDITAG_LRO 

Left-to-right Override.

BIDITAG_R 

Right-to-left.

BIDITAG_AL 

Right-to-left arabic.

BIDITAG_RLE 

Right-to-left embedding.

BIDITAG_RLO 

Right-to-left override.

BIDITAG_PDF 

Pop Directional Format.

BIDITAG_EN 

European Number.

BIDITAG_ES 

European Number separator.

BIDITAG_ET 

European Number Terminator.

BIDITAG_AN 

Arabic Number.

BIDITAG_CS 

Common Number Separator.

BIDITAG_NSM 

Non Spacing Mark.

BIDITAG_BN 

Boundary Neutral.

BIDITAG_B 

Paragraph Separator.

BIDITAG_WS 

White Space.

BIDITAG_ON 

Other Neutrals.

enum UCCAT

These are the general category codes from column 2 of the UnicodeData.txt.

The numbers are arbitrarily assigned by me. UnicodeData.txt has the 2-letter codes. You may be asking why I defined the values for all the enums when the compiler would do that for me...See, this way it would be a pain to insert something into the middle and renumber them, wouldn't it? Good! don't do that. ucdata.ref has the numbers and if you renumber them, you'll break things.

Enumerator:
UCCAT_Cn 

Other , Not assigned (norm).

UCCAT_Lu 

Letter, Uppercase (norm).

UCCAT_Ll 

Letter, Lowercase (norm).

UCCAT_Lt 

Letter, Titlecase (norm).

UCCAT_Lm 

Letter, Modifier.

UCCAT_Lo 

Letter, Other.

UCCAT_Mn 

Mark, non-spacing (norm).

UCCAT_Mc 

Mark, spacing combining (norm).

UCCAT_Me 

Mark, encolsing (norm).

UCCAT_Nd 

Number Decimal digit (norm).

UCCAT_Nl 

Number Letter (norm).

UCCAT_No 

Number Other (norm).

UCCAT_Zs 

Separator, space (norm).

UCCAT_Zl 

Separator, line (norm).

UCCAT_Zp 

Separator, paragraph (norm).

UCCAT_Cc 

Other , control (norm).

UCCAT_Cf 

Other , format (norm).

UCCAT_Cs 

Other , surrogate (norm).

UCCAT_Co 

Other , Private use (norm).

UCCAT_Pc 

Punctuation, connector.

UCCAT_Pd 

Punctuation, Dash.

UCCAT_Ps 

Punctuation, Open.

UCCAT_Pe 

Punctuation, Close.

UCCAT_Pi 

Punctuation, Initial quote.

UCCAT_Pf 

Punctuation, final quote.

UCCAT_Po 

Punctuation, other.

UCCAT_Sm 

Symbol, math.

UCCAT_Sc 

Symbol, currency.

UCCAT_Sk 

Symbol, modifier.

UCCAT_So 

Symbol, other.

enum UCCATFLAGS

Flags for the above which can be ORd together.

Enumerator:
UCCATFLAG_Cn 
UCCATFLAG_Lu 

Other , Not assigned (norm).

UCCATFLAG_Ll 

Letter, Uppercase (norm).

UCCATFLAG_Lt 

Letter, Lowercase (norm).

UCCATFLAG_Lm 

Letter, Titlecase (norm).

UCCATFLAG_Lo 

Letter, Modifier.

UCCATFLAG_Mn 

Letter, Other.

UCCATFLAG_Mc 

Mark, non-spacing (norm).

UCCATFLAG_Me 

Mark, spacing combining (norm).

UCCATFLAG_Nd 

Mark, encolsing (norm).

UCCATFLAG_Nl 

Number Decimal digit (norm).

UCCATFLAG_No 

Number Letter (norm).

UCCATFLAG_Zs 

Number Other (norm).

UCCATFLAG_Zl 

Separator, space (norm).

UCCATFLAG_Zp 

Separator, line (norm).

UCCATFLAG_Cc 

Separator, paragraph (norm).

UCCATFLAG_Cf 

Other , control (norm).

UCCATFLAG_Cs 

Other , format (norm).

UCCATFLAG_Co 

Other , surrogate (norm).

UCCATFLAG_Pc 

Other , Private use (norm).

UCCATFLAG_Pd 

Punctuation, connector.

UCCATFLAG_Ps 

Punctuation, Dash.

UCCATFLAG_Pe 

Punctuation, Open.

UCCATFLAG_Pi 

Punctuation, Close.

UCCATFLAG_Pf 

Punctuation, Initial quote.

UCCATFLAG_Po 

Punctuation, final quote.

UCCATFLAG_Sm 

Punctuation, other.

UCCATFLAG_Sc 

Symbol, math.

UCCATFLAG_Sk 

Symbol, currency.

UCCATFLAG_So 

Symbol, modifier.

UCCATFLAG_Mi 

other


Function Documentation

bool ucisalnum ( MIUNICODE  c  )  [inline]

Alpha or Digit.

bool ucisalpha ( MIUNICODE  c  )  [inline]

Alpha.

LIBEXPORT bool ucisbidiprop ( MIUNICODE  c,
UINT32  bidiflags 
)
bool ucisblank ( MIUNICODE  c  )  [inline]

Space.

bool ucisclosepunct ( MIUNICODE  c  )  [inline]

Punctuation, Close.

bool uciscntrl ( MIUNICODE  c  )  [inline]

Control.

bool ucisconnect ( MIUNICODE  c  )  [inline]

Punctuation, Connecting.

bool uciscurrency ( MIUNICODE  c  )  [inline]

Symbol, Currency.

bool ucisdash ( MIUNICODE  c  )  [inline]

Punctuation, Dash.

LIBEXPORT bool ucisdecompbidiprop ( MIUNICODE  c,
UINT32  decompflags 
)
bool ucisdigit0to9 ( MIUNICODE  c  )  [inline]

Determine if digit 0 to 9.

bool ucisdigit2 ( MIUNICODE  c  )  [inline]

Digit (any digit, not just 0-9).

bool ucisenclosing ( MIUNICODE  c  )  [inline]

Mark, enclosing.

bool ucisfinalpunct ( MIUNICODE  c  )  [inline]

Punctuation, Final.

bool ucisfmtcntrl ( MIUNICODE  c  )  [inline]
bool ucishan ( MIUNICODE  c  )  [inline]

Han glyph.

bool ucishangul ( MIUNICODE  c  )  [inline]

Hangul glyph.

bool ucisinitialpunct ( MIUNICODE  c  )  [inline]

Punctuation, Initial.

bool ucisisocntrl ( MIUNICODE  c  )  [inline]
bool ucislower ( MIUNICODE  c  )  [inline]
bool ucislsep ( MIUNICODE  c  )  [inline]

Seperator, Line.

bool ucisltr ( MIUNICODE  c  )  [inline]

Strong Left-to-Right Directionality.

bool ucismark ( MIUNICODE  c  )  [inline]

Mark (any).

bool ucismath ( MIUNICODE  c  )  [inline]

Symbol, math.

bool ucismodif ( MIUNICODE  c  )  [inline]

Letter, modifier.

bool ucismodifsymbol ( MIUNICODE  c  )  [inline]

Symbol, modifier.

bool ucisneutral ( MIUNICODE  c  )  [inline]

Netutral Directionality.

bool ucisnonspacing ( MIUNICODE  c  )  [inline]

Mark, Non spacing.

bool ucisnsmark ( MIUNICODE  c  )  [inline]

Mark, non-spacing.

bool ucisnumber ( MIUNICODE  c  )  [inline]

Number (digit, letter form, other).

bool ucisopenpunct ( MIUNICODE  c  )  [inline]

Punctuation, Open.

LIBEXPORT bool ucisprop ( MIUNICODE  c,
UINT32  uccatflags 
)

Determine of a Unicode character has a given property.

The property flags are listed in ucstring.h. (the name is derived from isprop() from ctype.h) There are two sets of flags because there are so many possibilites with Unicode. You won't usually use this function directly.

bool ucispsep ( MIUNICODE  c  )  [inline]

Seperator, Paragraph.

bool ucispunct ( MIUNICODE  c  )  [inline]
bool ucisrtl ( MIUNICODE  c  )  [inline]

Strong Right-to-Left Directionality.

bool ucisspace ( MIUNICODE  c  )  [inline]

Space.

bool ucisspmark ( MIUNICODE  c  )  [inline]

Mark, spacing.

bool ucisstrong ( MIUNICODE  c  )  [inline]

Strong R-to-L or L-to-R Directionality.

bool ucissymbol ( MIUNICODE  c  )  [inline]
bool ucistitle ( MIUNICODE  c  )  [inline]
bool ucisupper ( MIUNICODE  c  )  [inline]
bool ucisweak ( MIUNICODE  c  )  [inline]

Weak Directionality.

LIBEXPORT MIUNICODE uctolower ( MIUNICODE  c  ) 
LIBEXPORT MIUNICODE uctoupper ( MIUNICODE  c  ) 

Generated on Sun Oct 7 21:33:04 2012 for TNTsdk 2012 by  doxygen 1.6.1