mi32/mucdefns.h File Reference

Definitions for Muc functions. More...

#include <mi32/charencoding.h>
Include dependency graph for mucdefns.h:

Go to the source code of this file.

Defines

#define CHARALG_EUC   0x00020000
#define CHARALG_GL2GR   0x00040000
#define CHARALG_GR2GL   0x00080000
#define CHARALG_SJIS   0x00010000
#define CHARSET_ArabicUC   0x00000033
#define CHARSET_ASCII   CHARSET_ISO_Latin_1
#define CHARSET_BigFive   0x00000007
#define CHARSET_CNS_11543_01   0x00000008
#define CHARSET_CNS_11543_02   0x00000009
#define CHARSET_CNS_11543_14   0x0000000A
#define CHARSET_CP932   0x00000051
#define CHARSET_DOSCP_437   0x00000022
#define CHARSET_DOSCP_850   0x00000023
#define CHARSET_DOSCP_852   0x00000024
#define CHARSET_DOSCP_857   0x00000025
#define CHARSET_DOSCP_861   0x00000026
#define CHARSET_DOSCP_863   0x00000027
#define CHARSET_DOSCP_865   0x00000028
#define CHARSET_GB_12345   0x00000002
#define CHARSET_GB_2312   0x00000001
#define CHARSET_GB_7589   0x00000003
#define CHARSET_GB_7590   0x00000004
#define CHARSET_GB_8565   0x00000006
#define CHARSET_GB_Han   0x00000005
#define CHARSET_GB_ROMAN   0x00000019
#define CHARSET_HW_KATAKANA   0x0000001B
#define CHARSET_IBM_1046   0x00000044
#define CHARSET_IBMCP_1040   0x0000002C
#define CHARSET_IBMCP_1041   0x0000002D
#define CHARSET_IBMCP_1043   0x0000002E
#define CHARSET_IBMCP_855   0x0000002A
#define CHARSET_IBMCP_864   0x0000002B
#define CHARSET_IBMCP_869   0x00000029
#define CHARSET_ISCII_Bengali   0x00000048
#define CHARSET_ISCII_Devanagari   0x00000047
#define CHARSET_ISCII_Gujarati   0x0000004A
#define CHARSET_ISCII_Gurmukhi   0x00000049
#define CHARSET_ISCII_Kannada   0x0000004E
#define CHARSET_ISCII_Malayalam   0x0000004F
#define CHARSET_ISCII_Oriya   0x0000004B
#define CHARSET_ISCII_Tamil   0x0000004C
#define CHARSET_ISCII_Telugu   0x0000004D
#define CHARSET_ISO_8859_1   0x0000000F
#define CHARSET_ISO_8859_2   0x00000010
#define CHARSET_ISO_8859_3   0x00000011
#define CHARSET_ISO_8859_4   0x00000012
#define CHARSET_ISO_8859_5   0x00000013
#define CHARSET_ISO_8859_6   0x00000014
#define CHARSET_ISO_8859_7   0x00000015
#define CHARSET_ISO_8859_8   0x00000016
#define CHARSET_ISO_8859_9   0x00000017
#define CHARSET_ISO_Latin_1   CHARSET_ISO_8859_1
#define CHARSET_JIS_0208   CHARSET_JIS_X_0208_1990
#define CHARSET_JIS_0212   CHARSET_JIS_X_0212_1990
#define CHARSET_JIS_C_6226   0x0000001A
#define CHARSET_JIS_ROMAN   0x00000018
#define CHARSET_JIS_X_0208_1990   0x0000000B
#define CHARSET_JIS_X_0212_1990   0x0000000C
#define CHARSET_KOI8   0x00000050
#define CHARSET_KS_C_5601_1987   0x0000000D
#define CHARSET_KS_C_5601_1992   0x00000046
#define CHARSET_KS_C_5601_Unif   0x00000045
#define CHARSET_KS_C_5657_1991   0x0000000E
#define CHARSET_MacArabic   0x00000034
#define CHARSET_MacCentEurope   0x00000038
#define CHARSET_MacCroatian   0x00000035
#define CHARSET_MacCyrillic   0x00000036
#define CHARSET_MacDingbats   0x00000037
#define CHARSET_MacGreek   0x00000039
#define CHARSET_MacHebrew   0x0000003A
#define CHARSET_MacIcelandic   0x0000003B
#define CHARSET_MacJapanese   0x0000003C
#define CHARSET_MacRoman   0x0000003E
#define CHARSET_MacRomanian   0x0000003D
#define CHARSET_MacSymbol   0x0000003F
#define CHARSET_MacThai   0x00000040
#define CHARSET_MacTurkish   0x00000041
#define CHARSET_MacUkrainian   0x00000042
#define CHARSET_MAX   0x00000051
#define CHARSET_MI_Thai   0x00000031
#define CHARSET_SHIFT_JIS   (CHARALG_SJIS | CHARSET_JIS_X_0208_1990)
#define CHARSET_TIS620_2529   0x0000002F
#define CHARSET_Unicode   0x00000000
#define CHARSET_WinCP_ANSI   0x00000032
#define CHARSET_WinCP_Arab   0x00000020
#define CHARSET_WinCP_Baltic   0x00000043
#define CHARSET_WinCP_Cyrl   0x0000001E
#define CHARSET_WinCP_EE   0x0000001D
#define CHARSET_WinCP_Greek   0x0000001F
#define CHARSET_WinCP_Hebr   0x00000021
#define CHARSET_WinCP_Thai   0x00000030
#define CHARSET_WinCP_Turk   0x0000001C
#define GLYPHSUB_AllowCircleForms   0x00000200
#define GLYPHSUB_AllowCompatForms   0x00000004
#define GLYPHSUB_AllowFontForms   0x00000400
#define GLYPHSUB_AllowFractionForms   0x00000010
#define GLYPHSUB_AllowNarrowForms   0x00000002
#define GLYPHSUB_AllowNoBreakForms   0x00000800
#define GLYPHSUB_AllowPositionalForms   0x00000008
#define GLYPHSUB_AllowSmallForms   0x00000080
#define GLYPHSUB_AllowSquareForms   0x00000100
#define GLYPHSUB_AllowSubscriptForms   0x00000020
#define GLYPHSUB_AllowSuperscriptForms   0x00000040
#define GLYPHSUB_AllowVerticalForms   0x00001000
#define GLYPHSUB_AllowWideForms   0x00000001
#define LIBEXPORT   MI_DLLIMPORT
#define MASK_ALG   0x7FFF0000
#define MASK_CHARSET   0x0000FFFF
#define MUC_THAI_OF   1
#define MUC_THAI_TTF   2
#define MUCEVENT_ComposeOff   0x02
#define MUCEVENT_ComposeOn   0x01
#define MUCEVENT_ComposeToggle   (MUCEVENT_ComposeOn|MUCEVENT_ComposeOff)
#define MucGLYPHDIRECTION_LtoR   0
#define MucGLYPHDIRECTION_RtoL   1
#define MucGLYPHDIRECTION_Weak   2
#define MucGLYPHFLAG_Mark   0x40
#define MucGLYPHFLAG_NonJoining   0x80
#define MucGLYPHFLAG_PartOfRtoLWord   0x01
#define MUCVERSION   2
#define UCCONV_NoByteOrderMark   0x00000001

Enumerations

enum  SCRIPTTAG {
  SCRIPTTAG_arab = 0x61726162, SCRIPTTAG_armn = 0x61726D6E, SCRIPTTAG_beng = 0x62656E67, SCRIPTTAG_bpmf = 0x62706D66,
  SCRIPTTAG_cyrl = 0x6379726C, SCRIPTTAG_deva = 0x64657661, SCRIPTTAG_grek = 0x6772656B, SCRIPTTAG_grgn = 0x6772676E,
  SCRIPTTAG_gujr = 0x67756A72, SCRIPTTAG_hang = 0x68616E67, SCRIPTTAG_hani = 0x68616E69, SCRIPTTAG_hebr = 0x68656272,
  SCRIPTTAG_kana = 0x6B616E61, SCRIPTTAG_knbn = 0x6B6E626E, SCRIPTTAG_knda = 0x6B6E6461, SCRIPTTAG_laoS = 0x6C616F20,
  SCRIPTTAG_latn = 0x6C61746E, SCRIPTTAG_mlym = 0x6D6C796D, SCRIPTTAG_orya = 0x6F727961, SCRIPTTAG_punj = 0x70756E6A,
  SCRIPTTAG_taml = 0x74616D6C, SCRIPTTAG_telu = 0x74656C75, SCRIPTTAG_thai = 0x74686169, SCRIPTTAG_tibt = 0x74696174,
  SCRIPTTAG_neut = 0x4E455554, SCRIPTTAG_puse = 0x50555345, SCRIPTTAG_spcl = 0x5350434C, SCRIPTTAG_surr = 0x53555252,
  SCRIPTTAG_Default = 0x00000000
}

Functions

LIBEXPORT int MucCharsetToUnicode (int val, int charset)
LIBEXPORT int MucCharsetToUnicodeBuf (MIUNICODE *buf, int charset)
LIBEXPORT int MucConvertFromISO2022 (void *vcd, const void *vinbuf, char **outbuf)
LIBEXPORT int MucConvertFromUnicode (void *vcd, const MIUNICODE *ucbuf, void **outbufp)
LIBEXPORT int MucConvertISO2022aToUnicode (const void *, MIUNICODE **)
LIBEXPORT int MucConvertISO2022ToUnicode (const void *, MIUNICODE **)
LIBEXPORT int MucConvertToISO2022 (void *vcd, const void *vinbuf, char **outbuf)
LIBEXPORT int MucConvertToUnicode (void *vcd, const void *vinbuf, MIUNICODE **outbuf)
LIBEXPORT int MucConvertUnicodeGen (const MIUNICODE *inbuf, int(*NewCharsetFunc)(int NewCharset, int OldCharset, void *UserData), int(*AddCharsFunc)(UINT8 *chars, int len, void *UserData), void *UserData, UINT32 flags)
LIBEXPORT int MucConvertUnicodeToISO2022 (const MIUNICODE *inbuf, char **outbuf)
LIBEXPORT int MucConvertUnicodeToUTF8 (const MIUNICODE *instr, UINT8 **outstr)
LIBEXPORT int MucConvertUTF8ToUnicode (const UINT8 *instr, MIUNICODE **outstr)
LIBEXPORT MIUNICODEMucDecompose (const MIUNICODE *string)
LIBEXPORT MIUNICODEMucDoGlyphSubstitutions (const MIUNICODE *instr, bool(*CB_AllowSub)(const UCDATA *sub, void *cbdata), void *cbdata, UINT32 flags=0)
LIBEXPORT MIUNICODEMucFlipVisualAndLogicalOrder (const MIUNICODE *)
LIBEXPORT int MucGetDefaultCharsets (int *charset1, int *charset2, int *charset3)
LIBEXPORT CHAR_ENCODING MucGetEncodingFromName (const char *name)
LIBEXPORT CHAR_ENCODING MucGetEncodingFromNum (int num, ENCODELIST_FLAGS flags)
LIBEXPORT int MucGetEncodingNum (CHAR_ENCODING encoding, ENCODELIST_FLAGS flags)
LIBEXPORT int MucGetGlyphDirection (const MIUNICODE *uc, int curdir, UINT8 *glyphflags)
LIBEXPORT const char * MucGetName (CHAR_ENCODING encoding)
LIBEXPORT int MucGetNumEncodings (ENCODELIST_FLAGS flags)
LIBEXPORT int MucGuessJapaneseEncoding (const UINT8 *in, char *encoding)
LIBEXPORT int MucHasRightToLeftTextISO2022 (const char *in)
LIBEXPORT int MucHasRightToLeftTextUC (const MIUNICODE *in)
LIBEXPORT ERRVALUE MucIndicGlyphSubstitutions (const MIUNICODE *instr, MIUNICODE **outstr, SCRIPTTAG script=SCRIPTTAG_Default, INT32 len=-1, bool bApplyRephHack=false)
LIBEXPORT int MucInitConversion (void **handle, const char *encoding, UINT32 flags)
LIBEXPORT bool MucIsThaiLowerVowel (MIUNICODE ch)
LIBEXPORT bool MucIsThaiTone (MIUNICODE ch)
LIBEXPORT bool MucIsThaiUpperVowel (MIUNICODE ch)
LIBEXPORT bool MucIsThaiVowel (MIUNICODE ch)
LIBEXPORT int MucNameToCharset (const char *name, ENCODELIST_FLAGS flags)
LIBEXPORT char * MucNameToDesc (const char *name)
LIBEXPORT MIUNICODEMucNameToDescUC (const char *name)
LIBEXPORT int MucNameToNum (const char *name, ENCODELIST_FLAGS flags)
LIBEXPORT char * MucNumToDesc (int num, ENCODELIST_FLAGS flags)
LIBEXPORT MIUNICODEMucNumToDescUC (int num, ENCODELIST_FLAGS flags)
LIBEXPORT const char * MucNumToName (int num, ENCODELIST_FLAGS flags)
LIBEXPORT int MucQuickConvToISO2022 (int encodetype, const void *in, char **outp)
LIBEXPORT int MucQuickConvToUnicode (int encodetype, const void *in, MIUNICODE **ucp)
LIBEXPORT MIUNICODEMucRecompose (const MIUNICODE *string)
LIBEXPORT void MucReset (void *handle)
LIBEXPORT void MucStopConversion (void *handle)
LIBEXPORT int MucStrLenISO2022 (const void *vinbuf)
LIBEXPORT MIUNICODEMucThaiShiftTonesAndVowels (const MIUNICODE *str, UINT32 fontencoding)
LIBEXPORT int MucUnicodeToCharset (int val, int charset)

Detailed Description

Definitions for Muc functions.


Define Documentation

#define CHARALG_EUC   0x00020000
#define CHARALG_GL2GR   0x00040000

Set high bits.

#define CHARALG_GR2GL   0x00080000

Strip high bits.

#define CHARALG_SJIS   0x00010000
#define CHARSET_ArabicUC   0x00000033

The Arabic range of Unicode.

#define CHARSET_ASCII   CHARSET_ISO_Latin_1
#define CHARSET_BigFive   0x00000007
#define CHARSET_CNS_11543_01   0x00000008
#define CHARSET_CNS_11543_02   0x00000009
#define CHARSET_CNS_11543_14   0x0000000A
#define CHARSET_CP932   0x00000051

Shift-JIS with MS Extensions.

#define CHARSET_DOSCP_437   0x00000022

DOS Codepage.

#define CHARSET_DOSCP_850   0x00000023

DOS Codepage.

#define CHARSET_DOSCP_852   0x00000024

DOS Codepage.

#define CHARSET_DOSCP_857   0x00000025

DOS Codepage.

#define CHARSET_DOSCP_861   0x00000026

DOS Codepage.

#define CHARSET_DOSCP_863   0x00000027

DOS Codepage.

#define CHARSET_DOSCP_865   0x00000028

DOS Codepage.

#define CHARSET_GB_12345   0x00000002
#define CHARSET_GB_2312   0x00000001
#define CHARSET_GB_7589   0x00000003
#define CHARSET_GB_7590   0x00000004
#define CHARSET_GB_8565   0x00000006
#define CHARSET_GB_Han   0x00000005
#define CHARSET_GB_ROMAN   0x00000019

From ISO 2022.

#define CHARSET_HW_KATAKANA   0x0000001B

From ISO 2022.

#define CHARSET_IBM_1046   0x00000044

IBM Arabic encoding.

#define CHARSET_IBMCP_1040   0x0000002C

IBM Codepage.

#define CHARSET_IBMCP_1041   0x0000002D

IBM Codepage.

#define CHARSET_IBMCP_1043   0x0000002E

IBM Codepage.

#define CHARSET_IBMCP_855   0x0000002A

IBM Codepage.

#define CHARSET_IBMCP_864   0x0000002B

IBM Codepage.

#define CHARSET_IBMCP_869   0x00000029

IBM Codepage.

#define CHARSET_ISCII_Bengali   0x00000048

Indic.

#define CHARSET_ISCII_Devanagari   0x00000047

Indic.

#define CHARSET_ISCII_Gujarati   0x0000004A

Indic.

#define CHARSET_ISCII_Gurmukhi   0x00000049

Indic.

#define CHARSET_ISCII_Kannada   0x0000004E

Indic.

#define CHARSET_ISCII_Malayalam   0x0000004F

Indic.

#define CHARSET_ISCII_Oriya   0x0000004B

Indic.

#define CHARSET_ISCII_Tamil   0x0000004C

Indic.

#define CHARSET_ISCII_Telugu   0x0000004D

Indic.

#define CHARSET_ISO_8859_1   0x0000000F

ASCII.

#define CHARSET_ISO_8859_2   0x00000010
#define CHARSET_ISO_8859_3   0x00000011
#define CHARSET_ISO_8859_4   0x00000012
#define CHARSET_ISO_8859_5   0x00000013
#define CHARSET_ISO_8859_6   0x00000014
#define CHARSET_ISO_8859_7   0x00000015
#define CHARSET_ISO_8859_8   0x00000016
#define CHARSET_ISO_8859_9   0x00000017
#define CHARSET_ISO_Latin_1   CHARSET_ISO_8859_1
#define CHARSET_JIS_0208   CHARSET_JIS_X_0208_1990
#define CHARSET_JIS_0212   CHARSET_JIS_X_0212_1990
#define CHARSET_JIS_C_6226   0x0000001A

From ISO 2022.

#define CHARSET_JIS_ROMAN   0x00000018
#define CHARSET_JIS_X_0208_1990   0x0000000B
#define CHARSET_JIS_X_0212_1990   0x0000000C
#define CHARSET_KOI8   0x00000050

Russian/Ukranian/Etc.

#define CHARSET_KS_C_5601_1987   0x0000000D

Wansung.

#define CHARSET_KS_C_5601_1992   0x00000046

Johab.

#define CHARSET_KS_C_5601_Unif   0x00000045

Unified Hangul (UHang).

#define CHARSET_KS_C_5657_1991   0x0000000E
#define CHARSET_MacArabic   0x00000034
#define CHARSET_MacCentEurope   0x00000038
#define CHARSET_MacCroatian   0x00000035
#define CHARSET_MacCyrillic   0x00000036
#define CHARSET_MacDingbats   0x00000037
#define CHARSET_MacGreek   0x00000039
#define CHARSET_MacHebrew   0x0000003A
#define CHARSET_MacIcelandic   0x0000003B
#define CHARSET_MacJapanese   0x0000003C
#define CHARSET_MacRoman   0x0000003E
#define CHARSET_MacRomanian   0x0000003D
#define CHARSET_MacSymbol   0x0000003F
#define CHARSET_MacThai   0x00000040
#define CHARSET_MacTurkish   0x00000041
#define CHARSET_MacUkrainian   0x00000042
#define CHARSET_MAX   0x00000051
#define CHARSET_MI_Thai   0x00000031

Thai Windows ordering.

#define CHARSET_SHIFT_JIS   (CHARALG_SJIS | CHARSET_JIS_X_0208_1990)
#define CHARSET_TIS620_2529   0x0000002F

Thai (BDF font order).

#define CHARSET_Unicode   0x00000000
#define CHARSET_WinCP_ANSI   0x00000032

Windows ANSI Code Page.

#define CHARSET_WinCP_Arab   0x00000020

Windows Codepage (cp1256).

#define CHARSET_WinCP_Baltic   0x00000043
#define CHARSET_WinCP_Cyrl   0x0000001E

Windows Codepage (cp1251).

#define CHARSET_WinCP_EE   0x0000001D

Windows Codepage (cp1250).

#define CHARSET_WinCP_Greek   0x0000001F

Windows Codepage (cp1253).

#define CHARSET_WinCP_Hebr   0x00000021

Windows Codepage (cp1255).

#define CHARSET_WinCP_Thai   0x00000030

Thai Windows ordering.

#define CHARSET_WinCP_Turk   0x0000001C

Windows Codepage (cp1254).

#define GLYPHSUB_AllowCircleForms   0x00000200
#define GLYPHSUB_AllowCompatForms   0x00000004

Compatibility (equivalent) forms.

#define GLYPHSUB_AllowFontForms   0x00000400
#define GLYPHSUB_AllowFractionForms   0x00000010

"1/2" -> single glyph

#define GLYPHSUB_AllowNarrowForms   0x00000002
#define GLYPHSUB_AllowNoBreakForms   0x00000800
#define GLYPHSUB_AllowPositionalForms   0x00000008

Arabic/Hebrew.

#define GLYPHSUB_AllowSmallForms   0x00000080
#define GLYPHSUB_AllowSquareForms   0x00000100
#define GLYPHSUB_AllowSubscriptForms   0x00000020
#define GLYPHSUB_AllowSuperscriptForms   0x00000040
#define GLYPHSUB_AllowVerticalForms   0x00001000
#define GLYPHSUB_AllowWideForms   0x00000001
#define LIBEXPORT   MI_DLLIMPORT
#define MASK_ALG   0x7FFF0000
#define MASK_CHARSET   0x0000FFFF
#define MUC_THAI_OF   1
#define MUC_THAI_TTF   2
#define MUCEVENT_ComposeOff   0x02
#define MUCEVENT_ComposeOn   0x01
#define MUCEVENT_ComposeToggle   (MUCEVENT_ComposeOn|MUCEVENT_ComposeOff)
#define MucGLYPHDIRECTION_LtoR   0
#define MucGLYPHDIRECTION_RtoL   1
#define MucGLYPHDIRECTION_Weak   2
#define MucGLYPHFLAG_Mark   0x40
#define MucGLYPHFLAG_NonJoining   0x80
#define MucGLYPHFLAG_PartOfRtoLWord   0x01

Some special glyph flags.

The method we use to generate GLYPHCONTEXTBITs requires that PartOfRtoLWord be 1 and that we don't use bits 0x02 and 0x04 here. The PartOfRtoLWord flag will be shifted and OR'd with the bits of the glyphs on either side of it to create GLYPHCONTEXT bits. Yes, they're bytes, not longs. Don't muck with `em!

#define MUCVERSION   2
#define UCCONV_NoByteOrderMark   0x00000001

Enumeration Type Documentation

enum SCRIPTTAG

Script tags These are used mostly for when multiple "scripts" (as in writing systems) use the same character set but have different rules.

A good example of this is the Indic languages which mostly share the ISCII character set but Devanagari and Bengali have different formatting rules. The numbers below are not random gibberish as it may seem. If you take each pair of hex digits, they form an ASCII value so that together they make up a 4 letter abbreviation. This is how these tags are encoded in TrueType fonts.

Enumerator:
SCRIPTTAG_arab 

'arab' *** Arabic

SCRIPTTAG_armn 

'armn' *** Armenian: no MS definition ***

SCRIPTTAG_beng 

'beng' *** Bengali (Indic)

SCRIPTTAG_bpmf 

'bpmf' *** Bopomofo: no MS definition ***

SCRIPTTAG_cyrl 

'cyrl' *** Cyrillic

SCRIPTTAG_deva 

'deva' *** Devanagari (Indic)

SCRIPTTAG_grek 

'grek' *** Greek

SCRIPTTAG_grgn 

'grgn' *** Georgian: no MS definition ***

SCRIPTTAG_gujr 

'gujr' *** Gujarati (Indic)

SCRIPTTAG_hang 

'hang' ***

SCRIPTTAG_hani 

'hani' ***

SCRIPTTAG_hebr 

'hebr' *** Hebrew

SCRIPTTAG_kana 

'kana' ***

SCRIPTTAG_knbn 

'knbn' *** Kanbun: no MS definition ***

SCRIPTTAG_knda 

'knda' *** Kannada (Indic)

SCRIPTTAG_laoS 

'lao ' *** Lao: no MS definition ***

SCRIPTTAG_latn 

'latn' *** Latin

SCRIPTTAG_mlym 

'mlym' *** Malayalam (Indic)

SCRIPTTAG_orya 

'orya' *** Oriya (Indic)

SCRIPTTAG_punj 

'punj' *** punjabi == gurmukhi

SCRIPTTAG_taml 

'taml' *** Tamil (Indic)

SCRIPTTAG_telu 

'telu' *** Telugu (Indic)

SCRIPTTAG_thai 

'thai' *** Thai

SCRIPTTAG_tibt 

'tibt' *** Tibetan

SCRIPTTAG_neut 

'NEUT'

SCRIPTTAG_puse 

'PUSE'

SCRIPTTAG_spcl 

'SPCL'

SCRIPTTAG_surr 

'SURR'

SCRIPTTAG_Default 

''


Function Documentation

LIBEXPORT int MucCharsetToUnicode ( int  val,
int  charset 
)

Convert a single character from a given character set to MIUNICODE.

LIBEXPORT int MucCharsetToUnicodeBuf ( MIUNICODE buf,
int  charset 
)

Convert a string characters from a given character set to MIUNICODE.

Operates in-place.

LIBEXPORT int MucConvertFromISO2022 ( void *  vcd,
const void *  vinbuf,
char **  outbuf 
)

Convert string from ISO-2022 to anything.

LIBEXPORT int MucConvertFromUnicode ( void *  vcd,
const MIUNICODE ucbuf,
void **  outbufp 
)

Convert string from MIUNICODE to anything.

LIBEXPORT int MucConvertISO2022aToUnicode ( const void *  ,
MIUNICODE **   
)

Convert an ISO-2022a string to MIUNICODE.

ISO-2022a is a MicroImages invention -- a slightly modified version of ISO-2022 in which all backslashes are doubled. This is done because X resource files try to intrepret back slashes as something special even it the backslash is the 2nd byte of a 2-byte character.

The caller should free the buffer returned

LIBEXPORT int MucConvertISO2022ToUnicode ( const void *  ,
MIUNICODE **   
)

Convert an ISO-2022 string to MIUNICODE.

The caller should free the buffer returned

LIBEXPORT int MucConvertToISO2022 ( void *  vcd,
const void *  vinbuf,
char **  outbuf 
)

Convert string from anything to ISO-2022.

The "vcd" must have been initialized by calling MucInitConversion(). Assumes vinbuf is whatever vcd was initialized to. Caller must free outbuf when done

LIBEXPORT int MucConvertToUnicode ( void *  vcd,
const void *  vinbuf,
MIUNICODE **  outbuf 
)

Convert string from anything to MIUNICODE.

The "vcd" must have been initialized by calling MucInitConversion(). Assumes vinbuf is whatever vcd was initialized to. Caller must free outbuf when done

LIBEXPORT int MucConvertUnicodeGen ( const MIUNICODE inbuf,
int(*)(int NewCharset, int OldCharset, void *UserData)  NewCharsetFunc,
int(*)(UINT8 *chars, int len, void *UserData)  AddCharsFunc,
void *  UserData,
UINT32  flags 
)

Convert string from MIUNICODE by calling callbacks.

This function can be used to convert a string from MIUNICODE to just about anything. It assumes ISO-8859-1 as an initial character set.

NewCharsetFunc() will be called when the function detects a character not available in the current character set. It tells you what character set it was in and what character set it wants to switch to. At this point, you can do whatever you need to do to switch fonts to one that supports the requested character set. You should return 0 or an error code < 0.

AddCharsFunc() is called when an internal buffer fills up or just before calling NewCharsetFunc() to let add the characters to your output buffer or display them or whatever it is you're doing. The "characters" passed to you may be multi-byte. If this is the case, "len" is in bytes.

LIBEXPORT int MucConvertUnicodeToISO2022 ( const MIUNICODE inbuf,
char **  outbuf 
)

Convert a MIUNICODE string to ISO-2022.

The caller should free the buffer returned

LIBEXPORT int MucConvertUnicodeToUTF8 ( const MIUNICODE instr,
UINT8 **  outstr 
)

Convert MIUNICODE (UTF-16) to UTF8.

Warning! This function assumes that the pointer passed in for outstr is either NULL or can be realloc'd as needed. You must therefore initialize it to somthing.

LIBEXPORT int MucConvertUTF8ToUnicode ( const UINT8 instr,
MIUNICODE **  outstr 
)

Convert UTF8 to MIUNICODE (UTF-16).

Warning! This function assumes that the pointer passed in for outstr is either NULL or can be realloc'd as needed. You must therefore initialize it to somthing.

LIBEXPORT MIUNICODE* MucDecompose ( const MIUNICODE string  ) 

Decompose a Unicode string into Canonical form.

In a cononical Unicode string, all decomposable characters are decomposed. For example, 0x00E9 (LATIN SMALL LETTER E WITH ACUTE) is represented as 0x0065 (LATIN SMALL LETTER E) + 0x0301 (COMBINING ACUTE ACCENT)

The caller should free the returned string

LIBEXPORT MIUNICODE* MucDoGlyphSubstitutions ( const MIUNICODE instr,
bool(*)(const UCDATA *sub, void *cbdata CB_AllowSub,
void *  cbdata,
UINT32  flags = 0 
)

Perform glyph substitutions on a MIUNICODE string.

This function returns a copy of the given string after performing glyph substitutions based on the decomposition data in UnicodeData.txt (which we've cooked into a compact form, ucdata.ref)

Note: This function assumes the input string is in logical order and does not flip it in any way.

Parameters:
instr String to convert
CB_AllowSub Aallows you to intercept and deny substitutions. A good use for this feature would be to verify that the selected font has a given glyph available before allowing the substitution. The callback should return true to allow the substitution, false to prevent it. If no callback is provided, all substitutions are allowed.
cbdata Callback data. Will be passed to CB_AllowSub as the 2nd parameter
flags 
  • GLYPHSUB_AllowWideForms Wide (or zenkaku) compatibility characters
  • GLYPHSUB_AllowNarrowForms Narrow (or hankaku) compatibility characters
  • GLYPHSUB_AllowCompatForms Compatibility (equivalent) forms
  • GLYPHSUB_AllowPositionalForms Arabic/Hebrew positional forms
  • GLYPHSUB_AllowFractionForms Converts the three characters "1/2" to a single fraction glyph
  • GLYPHSUB_AllowSubscriptForms
  • GLYPHSUB_AllowSuperscriptForms
  • GLYPHSUB_AllowSmallForms Small variant forms (CNS compatibility)
  • GLYPHSUB_AllowSquareForms CJK squared font variant
  • GLYPHSUB_AllowCircleForms Encircled forms
  • GLYPHSUB_AllowFontForms Font variants (e.g. a blackletter form)
  • GLYPHSUB_AllowNoBreakForms Non-breaking spaces and hyphens
  • GLYPHSUB_AllowVerticalForms Vertical presentation forms c++
LIBEXPORT MIUNICODE* MucFlipVisualAndLogicalOrder ( const MIUNICODE  ) 

Flip Right-To-Left text to "visual" order.

Returns a copy of the input string which caller must free. This function is symetric. That is, flipping the same string twice gives the original string.

LIBEXPORT int MucGetDefaultCharsets ( int *  charset1,
int *  charset2,
int *  charset3 
)

Returns the default charset for keyboard input.

Returns 3 charset codes which can be used as hints when trying to convert MIUNICODE to ISO-2022. This is used by the input of the XeText widget.

Parameters:
charset1 Charset to use for single-byte things < 128
charset2 Charset to use for single-byte things > 128
charset3 Charset to use for multi-byte things.
LIBEXPORT CHAR_ENCODING MucGetEncodingFromName ( const char *  name  ) 

Given an encoding name, return the CHAR_ENCODING value.

LIBEXPORT CHAR_ENCODING MucGetEncodingFromNum ( int  num,
ENCODELIST_FLAGS  flags 
)

Given an encoding number, return the CHAR_ENCODING value.

This will return the Nth encoding which matches the criteria set forth by the flags parameter. Encoding numbers (as passed to this function) are not fixed and can change at any time. The only thing this function should be used for is retrieving a list of encodings to present the user with a choice.

Parameters:
num The encoding number.
flags 
  • ENCODELIST_FLAG_Any Allow any encoding
  • ENCODELIST_FLAG_Importable Allow importable encodings
  • ENCODELIST_FLAG_Exportable Allow exportable encodings
  • ENCODELIST_FLAG_Both Allow importanble or exportable encodings
  • ENCODELIST_FLAG_NoUnicode Don't include MIUNICODE
LIBEXPORT int MucGetEncodingNum ( CHAR_ENCODING  encoding,
ENCODELIST_FLAGS  flags 
)

Return the index into array of encodings for a given encoding.

If you use MucGetNumEncodings() and MucNumToName() to iterate through all the encodings of a given type for the purposes of generating a list for the user to select from, then this function is useful for determining which item in that list a given encoding will be so that you can make it the default.

Parameters:
encoding The encoding to find
flags 
  • ENCODELIST_FLAG_Any Allow any encoding
  • ENCODELIST_FLAG_Importable Allow importable encodings
  • ENCODELIST_FLAG_Exportable Allow exportable encodings
  • ENCODELIST_FLAG_Both Allow importanble or exportable encodings
  • ENCODELIST_FLAG_NoUnicode Don't include MIUNICODE
Returns:
Index into array or -1 if encoding not found.
LIBEXPORT int MucGetGlyphDirection ( const MIUNICODE uc,
int  curdir,
UINT8 glyphflags 
)

Determine the direction of a glyph.

Given a MIUNICODE string and a current direction, determine the direction of the next character in the string.

The glyphflags array is filled in by this function.

Directions (both curdir and the return value)

  • MucGLYPHDIRECTION_LtoR
  • MucGLYPHDIRECTION_RtoL

glyphflags

  • MucGLYPHFLAG_PartOfRtoLWord
  • MucGLYPHFLAG_NonJoining
  • MucGLYPHFLAG_Mark
LIBEXPORT const char* MucGetName ( CHAR_ENCODING  encoding  ) 

Return an encoding name.

Once an encoding is named, the name is set in stone and can be written to files without fear of the name changing. The actual name presented to the user is looked up in messages.txt by calling MucNameToDescUC()

Do NOT free the return value from this function

LIBEXPORT int MucGetNumEncodings ( ENCODELIST_FLAGS  flags  ) 

Get the number of supported encodings of a given type.

Parameters:
flags 
  • ENCODELIST_FLAG_Any Allow any encoding
  • ENCODELIST_FLAG_Importable Allow importable encodings
  • ENCODELIST_FLAG_Exportable Allow exportable encodings
  • ENCODELIST_FLAG_Both Allow importanble or exportable encodings
  • ENCODELIST_FLAG_NoUnicode Don't include MIUNICODE
LIBEXPORT int MucGuessJapaneseEncoding ( const UINT8 in,
char *  encoding 
)

Given a string which may be in either JIS, Shift-JIS, or EUC encoding, try to determine which it is.

The "encoding" string passed in must be big enough to hold the resulting encoding name "Shift-JIS" is the longest string it can return.

LIBEXPORT int MucHasRightToLeftTextISO2022 ( const char *  in  ) 

Determine if an ISO-2022 string has right-to-left text in it.

LIBEXPORT int MucHasRightToLeftTextUC ( const MIUNICODE in  ) 

Determine if a MIUNICODE string has right-to-left text in it.

LIBEXPORT ERRVALUE MucIndicGlyphSubstitutions ( const MIUNICODE instr,
MIUNICODE **  outstr,
SCRIPTTAG  script = SCRIPTTAG_Default,
INT32  len = -1,
bool  bApplyRephHack = false 
)

Does Indic glyph substitution based on various rules.

script should be one of the following: SCRIPTTAG_Default (See below) SCRIPTTAG_beng (Bengali) SCRIPTTAG_deva (Devanagari) SCRIPTTAG_gujr (Gujarati) SCRIPTTAG_knda (Kannada) SCRIPTTAG_mlym (Malayalam) SCRIPTTAG_orya (Oriya) SCRIPTTAG_punj (punjabi) SCRIPTTAG_taml (Tamil) SCRIPTTAG_telu (Telugu)

If script is SCRIPTTAG_Default, currently it will fall back on Devanagari, but eventually it will try to find the current system script and use that.

The len parameter is used to avoid an extra ucstrlen() call. If the caller already knows the length of instr, pass it. If not, just omit the parameter.

The caller is responsible for freeing outstr

The flag bApplyRephHack is just that, a hack. In order to correctly process the GSUB tables, we need to only apply the 'rphf' table if we have reordered a reph+halant+consonant combination to consonant+reph+halant. In order to do that, we need to somehow distinguish between text where we moved the reph and text where it was already there. To do this, if this flag is set, we change the reph to a fake Unicode character, one that we chose by subtracting 0x30 from the reph glyph. In all the Indic encodings, this falls on an unused code point (for example, 0x0930 becomes 0x0900). The caller knows to look for this and converts it back to the real reph glyph, but sets a flag telling us to process the 'rphf' table on it.

Parameters:
len Length of instr if known (just to avoid extra ucstrlen())
LIBEXPORT int MucInitConversion ( void **  handle,
const char *  encoding,
UINT32  flags 
)

Initialize a MIUNICODE conversion handle.

LIBEXPORT bool MucIsThaiLowerVowel ( MIUNICODE  ch  ) 

Returns true if ch is a Thai vowel character that sits below the consonant.

LIBEXPORT bool MucIsThaiTone ( MIUNICODE  ch  ) 

Returns true if ch is a Thai tone character.

LIBEXPORT bool MucIsThaiUpperVowel ( MIUNICODE  ch  ) 

Returns true if ch is a Thai vowel character that sits above the consonant.

LIBEXPORT bool MucIsThaiVowel ( MIUNICODE  ch  ) 

Returns true if ch is a Thai vowel character.

LIBEXPORT int MucNameToCharset ( const char *  name,
ENCODELIST_FLAGS  flags 
)

Determine the primary character set for a given encoding.

LIBEXPORT char* MucNameToDesc ( const char *  name  ) 

Return the description of an encoding given the name (char*).

This looks up the incoding description from messages.txt. You should free the return value when you're done with it. Don't call this. Calle MucNameToDescUC() instead

See also:
MucNameToDescUC()
LIBEXPORT MIUNICODE* MucNameToDescUC ( const char *  name  ) 

Return the description of an encoding given the name (MIUNICODE*).

This looks up the incoding description from messages.txt. You should free the return value when you're done with it.

LIBEXPORT int MucNameToNum ( const char *  name,
ENCODELIST_FLAGS  flags 
)

The opposite of MucNumToName().

LIBEXPORT char* MucNumToDesc ( int  num,
ENCODELIST_FLAGS  flags 
)

Return the description of an encoding given the number (char*).

This looks up the incoding description from messages.txt. You should free the return value when you're done with it. Don't call this. Calle MucNumToDescUC() instead

See also:
MucNumToDescUC()
LIBEXPORT MIUNICODE* MucNumToDescUC ( int  num,
ENCODELIST_FLAGS  flags 
)

Return the description of an encoding given the number (MIUNICODE*).

This looks up the incoding description from messages.txt. You should free the return value when you're done with it.

LIBEXPORT const char* MucNumToName ( int  num,
ENCODELIST_FLAGS  flags 
)

Convert an encoding number to its name.

Combines the following two functions

See also:
MucGetEncodingFromNum()
MucGetName()

Do not free the output from this function

LIBEXPORT int MucQuickConvToISO2022 ( int  encodetype,
const void *  in,
char **  outp 
)

Don't use this function.

LIBEXPORT int MucQuickConvToUnicode ( int  encodetype,
const void *  in,
MIUNICODE **  ucp 
)

Don't use this function.

LIBEXPORT MIUNICODE* MucRecompose ( const MIUNICODE string  ) 

Recompose a Canonical form Unicode string into the combined forms.

In a cononical Unicode string, all decomposable characters are decomposed. For example, 0x00E9 (LATIN SMALL LETTER E WITH ACUTE) is represented as 0x0065 (LATIN SMALL LETTER E) + 0x0301 (COMBINING ACUTE ACCENT). This function puts them back together.

The caller should free the returned string

LIBEXPORT void MucReset ( void *  handle  ) 

Reset an Muc handle.

LIBEXPORT void MucStopConversion ( void *  handle  ) 

Stop a MIUNICODE conversion handle.

LIBEXPORT int MucStrLenISO2022 ( const void *  vinbuf  ) 

Determine the number of characters in an ISO-2022 string.

Since ISO-2022 can consist of single-byte and two-byte characters and escape codes, a simple strlen() won't give you an accurate length. This function gives you the same result you'd get if you converted the string to MIUNICODE and called ucstrlen() on the result, but without going to the actual work.

LIBEXPORT MIUNICODE* MucThaiShiftTonesAndVowels ( const MIUNICODE str,
UINT32  fontencoding 
)
Parameters:
fontencoding MUC_THAI_OF or MUC_THAI_TTF
LIBEXPORT int MucUnicodeToCharset ( int  val,
int  charset 
)

Convert a single MIUNICODE character to a given character set.

Returns 0 if the character is not available in the given charset


Generated on Sun Oct 7 21:27:33 2012 for TNTsdk 2012 by  doxygen 1.6.1