Definitions for Muc functions. More...
#include <mi32/charencoding.h>
Go to the source code of this file.
Defines | |
| #define | CHARALG_EUC 0x00020000 |
| #define | CHARALG_GL2GR 0x00040000 |
| #define | CHARALG_GR2GL 0x00080000 |
| #define | CHARALG_SJIS 0x00010000 |
| #define | CHARSET_ArabicUC 0x00000033 |
| #define | CHARSET_ASCII CHARSET_ISO_Latin_1 |
| #define | CHARSET_BigFive 0x00000007 |
| #define | CHARSET_CNS_11543_01 0x00000008 |
| #define | CHARSET_CNS_11543_02 0x00000009 |
| #define | CHARSET_CNS_11543_14 0x0000000A |
| #define | CHARSET_CP932 0x00000051 |
| #define | CHARSET_DOSCP_437 0x00000022 |
| #define | CHARSET_DOSCP_850 0x00000023 |
| #define | CHARSET_DOSCP_852 0x00000024 |
| #define | CHARSET_DOSCP_857 0x00000025 |
| #define | CHARSET_DOSCP_861 0x00000026 |
| #define | CHARSET_DOSCP_863 0x00000027 |
| #define | CHARSET_DOSCP_865 0x00000028 |
| #define | CHARSET_GB_12345 0x00000002 |
| #define | CHARSET_GB_2312 0x00000001 |
| #define | CHARSET_GB_7589 0x00000003 |
| #define | CHARSET_GB_7590 0x00000004 |
| #define | CHARSET_GB_8565 0x00000006 |
| #define | CHARSET_GB_Han 0x00000005 |
| #define | CHARSET_GB_ROMAN 0x00000019 |
| #define | CHARSET_HW_KATAKANA 0x0000001B |
| #define | CHARSET_IBM_1046 0x00000044 |
| #define | CHARSET_IBMCP_1040 0x0000002C |
| #define | CHARSET_IBMCP_1041 0x0000002D |
| #define | CHARSET_IBMCP_1043 0x0000002E |
| #define | CHARSET_IBMCP_855 0x0000002A |
| #define | CHARSET_IBMCP_864 0x0000002B |
| #define | CHARSET_IBMCP_869 0x00000029 |
| #define | CHARSET_ISCII_Bengali 0x00000048 |
| #define | CHARSET_ISCII_Devanagari 0x00000047 |
| #define | CHARSET_ISCII_Gujarati 0x0000004A |
| #define | CHARSET_ISCII_Gurmukhi 0x00000049 |
| #define | CHARSET_ISCII_Kannada 0x0000004E |
| #define | CHARSET_ISCII_Malayalam 0x0000004F |
| #define | CHARSET_ISCII_Oriya 0x0000004B |
| #define | CHARSET_ISCII_Tamil 0x0000004C |
| #define | CHARSET_ISCII_Telugu 0x0000004D |
| #define | CHARSET_ISO_8859_1 0x0000000F |
| #define | CHARSET_ISO_8859_2 0x00000010 |
| #define | CHARSET_ISO_8859_3 0x00000011 |
| #define | CHARSET_ISO_8859_4 0x00000012 |
| #define | CHARSET_ISO_8859_5 0x00000013 |
| #define | CHARSET_ISO_8859_6 0x00000014 |
| #define | CHARSET_ISO_8859_7 0x00000015 |
| #define | CHARSET_ISO_8859_8 0x00000016 |
| #define | CHARSET_ISO_8859_9 0x00000017 |
| #define | CHARSET_ISO_Latin_1 CHARSET_ISO_8859_1 |
| #define | CHARSET_JIS_0208 CHARSET_JIS_X_0208_1990 |
| #define | CHARSET_JIS_0212 CHARSET_JIS_X_0212_1990 |
| #define | CHARSET_JIS_C_6226 0x0000001A |
| #define | CHARSET_JIS_ROMAN 0x00000018 |
| #define | CHARSET_JIS_X_0208_1990 0x0000000B |
| #define | CHARSET_JIS_X_0212_1990 0x0000000C |
| #define | CHARSET_KOI8 0x00000050 |
| #define | CHARSET_KS_C_5601_1987 0x0000000D |
| #define | CHARSET_KS_C_5601_1992 0x00000046 |
| #define | CHARSET_KS_C_5601_Unif 0x00000045 |
| #define | CHARSET_KS_C_5657_1991 0x0000000E |
| #define | CHARSET_MacArabic 0x00000034 |
| #define | CHARSET_MacCentEurope 0x00000038 |
| #define | CHARSET_MacCroatian 0x00000035 |
| #define | CHARSET_MacCyrillic 0x00000036 |
| #define | CHARSET_MacDingbats 0x00000037 |
| #define | CHARSET_MacGreek 0x00000039 |
| #define | CHARSET_MacHebrew 0x0000003A |
| #define | CHARSET_MacIcelandic 0x0000003B |
| #define | CHARSET_MacJapanese 0x0000003C |
| #define | CHARSET_MacRoman 0x0000003E |
| #define | CHARSET_MacRomanian 0x0000003D |
| #define | CHARSET_MacSymbol 0x0000003F |
| #define | CHARSET_MacThai 0x00000040 |
| #define | CHARSET_MacTurkish 0x00000041 |
| #define | CHARSET_MacUkrainian 0x00000042 |
| #define | CHARSET_MAX 0x00000051 |
| #define | CHARSET_MI_Thai 0x00000031 |
| #define | CHARSET_SHIFT_JIS (CHARALG_SJIS | CHARSET_JIS_X_0208_1990) |
| #define | CHARSET_TIS620_2529 0x0000002F |
| #define | CHARSET_Unicode 0x00000000 |
| #define | CHARSET_WinCP_ANSI 0x00000032 |
| #define | CHARSET_WinCP_Arab 0x00000020 |
| #define | CHARSET_WinCP_Baltic 0x00000043 |
| #define | CHARSET_WinCP_Cyrl 0x0000001E |
| #define | CHARSET_WinCP_EE 0x0000001D |
| #define | CHARSET_WinCP_Greek 0x0000001F |
| #define | CHARSET_WinCP_Hebr 0x00000021 |
| #define | CHARSET_WinCP_Thai 0x00000030 |
| #define | CHARSET_WinCP_Turk 0x0000001C |
| #define | GLYPHSUB_AllowCircleForms 0x00000200 |
| #define | GLYPHSUB_AllowCompatForms 0x00000004 |
| #define | GLYPHSUB_AllowFontForms 0x00000400 |
| #define | GLYPHSUB_AllowFractionForms 0x00000010 |
| #define | GLYPHSUB_AllowNarrowForms 0x00000002 |
| #define | GLYPHSUB_AllowNoBreakForms 0x00000800 |
| #define | GLYPHSUB_AllowPositionalForms 0x00000008 |
| #define | GLYPHSUB_AllowSmallForms 0x00000080 |
| #define | GLYPHSUB_AllowSquareForms 0x00000100 |
| #define | GLYPHSUB_AllowSubscriptForms 0x00000020 |
| #define | GLYPHSUB_AllowSuperscriptForms 0x00000040 |
| #define | GLYPHSUB_AllowVerticalForms 0x00001000 |
| #define | GLYPHSUB_AllowWideForms 0x00000001 |
| #define | LIBEXPORT MI_DLLIMPORT |
| #define | MASK_ALG 0x7FFF0000 |
| #define | MASK_CHARSET 0x0000FFFF |
| #define | MUC_THAI_OF 1 |
| #define | MUC_THAI_TTF 2 |
| #define | MUCEVENT_ComposeOff 0x02 |
| #define | MUCEVENT_ComposeOn 0x01 |
| #define | MUCEVENT_ComposeToggle (MUCEVENT_ComposeOn|MUCEVENT_ComposeOff) |
| #define | MucGLYPHDIRECTION_LtoR 0 |
| #define | MucGLYPHDIRECTION_RtoL 1 |
| #define | MucGLYPHDIRECTION_Weak 2 |
| #define | MucGLYPHFLAG_Mark 0x40 |
| #define | MucGLYPHFLAG_NonJoining 0x80 |
| #define | MucGLYPHFLAG_PartOfRtoLWord 0x01 |
| #define | MUCVERSION 2 |
| #define | UCCONV_NoByteOrderMark 0x00000001 |
Enumerations | |
| enum | SCRIPTTAG { SCRIPTTAG_arab = 0x61726162, SCRIPTTAG_armn = 0x61726D6E, SCRIPTTAG_beng = 0x62656E67, SCRIPTTAG_bpmf = 0x62706D66, SCRIPTTAG_cyrl = 0x6379726C, SCRIPTTAG_deva = 0x64657661, SCRIPTTAG_grek = 0x6772656B, SCRIPTTAG_grgn = 0x6772676E, SCRIPTTAG_gujr = 0x67756A72, SCRIPTTAG_hang = 0x68616E67, SCRIPTTAG_hani = 0x68616E69, SCRIPTTAG_hebr = 0x68656272, SCRIPTTAG_kana = 0x6B616E61, SCRIPTTAG_knbn = 0x6B6E626E, SCRIPTTAG_knda = 0x6B6E6461, SCRIPTTAG_laoS = 0x6C616F20, SCRIPTTAG_latn = 0x6C61746E, SCRIPTTAG_mlym = 0x6D6C796D, SCRIPTTAG_orya = 0x6F727961, SCRIPTTAG_punj = 0x70756E6A, SCRIPTTAG_taml = 0x74616D6C, SCRIPTTAG_telu = 0x74656C75, SCRIPTTAG_thai = 0x74686169, SCRIPTTAG_tibt = 0x74696174, SCRIPTTAG_neut = 0x4E455554, SCRIPTTAG_puse = 0x50555345, SCRIPTTAG_spcl = 0x5350434C, SCRIPTTAG_surr = 0x53555252, SCRIPTTAG_Default = 0x00000000 } |
Functions | |
| LIBEXPORT int | MucCharsetToUnicode (int val, int charset) |
| LIBEXPORT int | MucCharsetToUnicodeBuf (MIUNICODE *buf, int charset) |
| LIBEXPORT int | MucConvertFromISO2022 (void *vcd, const void *vinbuf, char **outbuf) |
| LIBEXPORT int | MucConvertFromUnicode (void *vcd, const MIUNICODE *ucbuf, void **outbufp) |
| LIBEXPORT int | MucConvertISO2022aToUnicode (const void *, MIUNICODE **) |
| LIBEXPORT int | MucConvertISO2022ToUnicode (const void *, MIUNICODE **) |
| LIBEXPORT int | MucConvertToISO2022 (void *vcd, const void *vinbuf, char **outbuf) |
| LIBEXPORT int | MucConvertToUnicode (void *vcd, const void *vinbuf, MIUNICODE **outbuf) |
| LIBEXPORT int | MucConvertUnicodeGen (const MIUNICODE *inbuf, int(*NewCharsetFunc)(int NewCharset, int OldCharset, void *UserData), int(*AddCharsFunc)(UINT8 *chars, int len, void *UserData), void *UserData, UINT32 flags) |
| LIBEXPORT int | MucConvertUnicodeToISO2022 (const MIUNICODE *inbuf, char **outbuf) |
| LIBEXPORT int | MucConvertUnicodeToUTF8 (const MIUNICODE *instr, UINT8 **outstr) |
| LIBEXPORT int | MucConvertUTF8ToUnicode (const UINT8 *instr, MIUNICODE **outstr) |
| LIBEXPORT MIUNICODE * | MucDecompose (const MIUNICODE *string) |
| LIBEXPORT MIUNICODE * | MucDoGlyphSubstitutions (const MIUNICODE *instr, bool(*CB_AllowSub)(const UCDATA *sub, void *cbdata), void *cbdata, UINT32 flags=0) |
| LIBEXPORT MIUNICODE * | MucFlipVisualAndLogicalOrder (const MIUNICODE *) |
| LIBEXPORT int | MucGetDefaultCharsets (int *charset1, int *charset2, int *charset3) |
| LIBEXPORT CHAR_ENCODING | MucGetEncodingFromName (const char *name) |
| LIBEXPORT CHAR_ENCODING | MucGetEncodingFromNum (int num, ENCODELIST_FLAGS flags) |
| LIBEXPORT int | MucGetEncodingNum (CHAR_ENCODING encoding, ENCODELIST_FLAGS flags) |
| LIBEXPORT int | MucGetGlyphDirection (const MIUNICODE *uc, int curdir, UINT8 *glyphflags) |
| LIBEXPORT const char * | MucGetName (CHAR_ENCODING encoding) |
| LIBEXPORT int | MucGetNumEncodings (ENCODELIST_FLAGS flags) |
| LIBEXPORT int | MucGuessJapaneseEncoding (const UINT8 *in, char *encoding) |
| LIBEXPORT int | MucHasRightToLeftTextISO2022 (const char *in) |
| LIBEXPORT int | MucHasRightToLeftTextUC (const MIUNICODE *in) |
| LIBEXPORT ERRVALUE | MucIndicGlyphSubstitutions (const MIUNICODE *instr, MIUNICODE **outstr, SCRIPTTAG script=SCRIPTTAG_Default, INT32 len=-1, bool bApplyRephHack=false) |
| LIBEXPORT int | MucInitConversion (void **handle, const char *encoding, UINT32 flags) |
| LIBEXPORT bool | MucIsThaiLowerVowel (MIUNICODE ch) |
| LIBEXPORT bool | MucIsThaiTone (MIUNICODE ch) |
| LIBEXPORT bool | MucIsThaiUpperVowel (MIUNICODE ch) |
| LIBEXPORT bool | MucIsThaiVowel (MIUNICODE ch) |
| LIBEXPORT int | MucNameToCharset (const char *name, ENCODELIST_FLAGS flags) |
| LIBEXPORT char * | MucNameToDesc (const char *name) |
| LIBEXPORT MIUNICODE * | MucNameToDescUC (const char *name) |
| LIBEXPORT int | MucNameToNum (const char *name, ENCODELIST_FLAGS flags) |
| LIBEXPORT char * | MucNumToDesc (int num, ENCODELIST_FLAGS flags) |
| LIBEXPORT MIUNICODE * | MucNumToDescUC (int num, ENCODELIST_FLAGS flags) |
| LIBEXPORT const char * | MucNumToName (int num, ENCODELIST_FLAGS flags) |
| LIBEXPORT int | MucQuickConvToISO2022 (int encodetype, const void *in, char **outp) |
| LIBEXPORT int | MucQuickConvToUnicode (int encodetype, const void *in, MIUNICODE **ucp) |
| LIBEXPORT MIUNICODE * | MucRecompose (const MIUNICODE *string) |
| LIBEXPORT void | MucReset (void *handle) |
| LIBEXPORT void | MucStopConversion (void *handle) |
| LIBEXPORT int | MucStrLenISO2022 (const void *vinbuf) |
| LIBEXPORT MIUNICODE * | MucThaiShiftTonesAndVowels (const MIUNICODE *str, UINT32 fontencoding) |
| LIBEXPORT int | MucUnicodeToCharset (int val, int charset) |
Definitions for Muc functions.
| #define CHARALG_EUC 0x00020000 |
| #define CHARALG_GL2GR 0x00040000 |
Set high bits.
| #define CHARALG_GR2GL 0x00080000 |
Strip high bits.
| #define CHARALG_SJIS 0x00010000 |
| #define CHARSET_ArabicUC 0x00000033 |
The Arabic range of Unicode.
| #define CHARSET_ASCII CHARSET_ISO_Latin_1 |
| #define CHARSET_BigFive 0x00000007 |
| #define CHARSET_CNS_11543_01 0x00000008 |
| #define CHARSET_CNS_11543_02 0x00000009 |
| #define CHARSET_CNS_11543_14 0x0000000A |
| #define CHARSET_CP932 0x00000051 |
Shift-JIS with MS Extensions.
| #define CHARSET_DOSCP_437 0x00000022 |
DOS Codepage.
| #define CHARSET_DOSCP_850 0x00000023 |
DOS Codepage.
| #define CHARSET_DOSCP_852 0x00000024 |
DOS Codepage.
| #define CHARSET_DOSCP_857 0x00000025 |
DOS Codepage.
| #define CHARSET_DOSCP_861 0x00000026 |
DOS Codepage.
| #define CHARSET_DOSCP_863 0x00000027 |
DOS Codepage.
| #define CHARSET_DOSCP_865 0x00000028 |
DOS Codepage.
| #define CHARSET_GB_12345 0x00000002 |
| #define CHARSET_GB_2312 0x00000001 |
| #define CHARSET_GB_7589 0x00000003 |
| #define CHARSET_GB_7590 0x00000004 |
| #define CHARSET_GB_8565 0x00000006 |
| #define CHARSET_GB_Han 0x00000005 |
| #define CHARSET_GB_ROMAN 0x00000019 |
From ISO 2022.
| #define CHARSET_HW_KATAKANA 0x0000001B |
From ISO 2022.
| #define CHARSET_IBM_1046 0x00000044 |
IBM Arabic encoding.
| #define CHARSET_IBMCP_1040 0x0000002C |
IBM Codepage.
| #define CHARSET_IBMCP_1041 0x0000002D |
IBM Codepage.
| #define CHARSET_IBMCP_1043 0x0000002E |
IBM Codepage.
| #define CHARSET_IBMCP_855 0x0000002A |
IBM Codepage.
| #define CHARSET_IBMCP_864 0x0000002B |
IBM Codepage.
| #define CHARSET_IBMCP_869 0x00000029 |
IBM Codepage.
| #define CHARSET_ISCII_Bengali 0x00000048 |
Indic.
| #define CHARSET_ISCII_Devanagari 0x00000047 |
Indic.
| #define CHARSET_ISCII_Gujarati 0x0000004A |
Indic.
| #define CHARSET_ISCII_Gurmukhi 0x00000049 |
Indic.
| #define CHARSET_ISCII_Kannada 0x0000004E |
Indic.
| #define CHARSET_ISCII_Malayalam 0x0000004F |
Indic.
| #define CHARSET_ISCII_Oriya 0x0000004B |
Indic.
| #define CHARSET_ISCII_Tamil 0x0000004C |
Indic.
| #define CHARSET_ISCII_Telugu 0x0000004D |
Indic.
| #define CHARSET_ISO_8859_1 0x0000000F |
ASCII.
| #define CHARSET_ISO_8859_2 0x00000010 |
| #define CHARSET_ISO_8859_3 0x00000011 |
| #define CHARSET_ISO_8859_4 0x00000012 |
| #define CHARSET_ISO_8859_5 0x00000013 |
| #define CHARSET_ISO_8859_6 0x00000014 |
| #define CHARSET_ISO_8859_7 0x00000015 |
| #define CHARSET_ISO_8859_8 0x00000016 |
| #define CHARSET_ISO_8859_9 0x00000017 |
| #define CHARSET_ISO_Latin_1 CHARSET_ISO_8859_1 |
| #define CHARSET_JIS_0208 CHARSET_JIS_X_0208_1990 |
| #define CHARSET_JIS_0212 CHARSET_JIS_X_0212_1990 |
| #define CHARSET_JIS_C_6226 0x0000001A |
From ISO 2022.
| #define CHARSET_JIS_ROMAN 0x00000018 |
| #define CHARSET_JIS_X_0208_1990 0x0000000B |
| #define CHARSET_JIS_X_0212_1990 0x0000000C |
| #define CHARSET_KOI8 0x00000050 |
Russian/Ukranian/Etc.
| #define CHARSET_KS_C_5601_1987 0x0000000D |
Wansung.
| #define CHARSET_KS_C_5601_1992 0x00000046 |
Johab.
| #define CHARSET_KS_C_5601_Unif 0x00000045 |
Unified Hangul (UHang).
| #define CHARSET_KS_C_5657_1991 0x0000000E |
| #define CHARSET_MacArabic 0x00000034 |
| #define CHARSET_MacCentEurope 0x00000038 |
| #define CHARSET_MacCroatian 0x00000035 |
| #define CHARSET_MacCyrillic 0x00000036 |
| #define CHARSET_MacDingbats 0x00000037 |
| #define CHARSET_MacGreek 0x00000039 |
| #define CHARSET_MacHebrew 0x0000003A |
| #define CHARSET_MacIcelandic 0x0000003B |
| #define CHARSET_MacJapanese 0x0000003C |
| #define CHARSET_MacRoman 0x0000003E |
| #define CHARSET_MacRomanian 0x0000003D |
| #define CHARSET_MacSymbol 0x0000003F |
| #define CHARSET_MacThai 0x00000040 |
| #define CHARSET_MacTurkish 0x00000041 |
| #define CHARSET_MacUkrainian 0x00000042 |
| #define CHARSET_MAX 0x00000051 |
| #define CHARSET_MI_Thai 0x00000031 |
Thai Windows ordering.
| #define CHARSET_SHIFT_JIS (CHARALG_SJIS | CHARSET_JIS_X_0208_1990) |
| #define CHARSET_TIS620_2529 0x0000002F |
Thai (BDF font order).
| #define CHARSET_Unicode 0x00000000 |
| #define CHARSET_WinCP_ANSI 0x00000032 |
Windows ANSI Code Page.
| #define CHARSET_WinCP_Arab 0x00000020 |
Windows Codepage (cp1256).
| #define CHARSET_WinCP_Baltic 0x00000043 |
| #define CHARSET_WinCP_Cyrl 0x0000001E |
Windows Codepage (cp1251).
| #define CHARSET_WinCP_EE 0x0000001D |
Windows Codepage (cp1250).
| #define CHARSET_WinCP_Greek 0x0000001F |
Windows Codepage (cp1253).
| #define CHARSET_WinCP_Hebr 0x00000021 |
Windows Codepage (cp1255).
| #define CHARSET_WinCP_Thai 0x00000030 |
Thai Windows ordering.
| #define CHARSET_WinCP_Turk 0x0000001C |
Windows Codepage (cp1254).
| #define GLYPHSUB_AllowCircleForms 0x00000200 |
| #define GLYPHSUB_AllowCompatForms 0x00000004 |
Compatibility (equivalent) forms.
| #define GLYPHSUB_AllowFontForms 0x00000400 |
| #define GLYPHSUB_AllowFractionForms 0x00000010 |
"1/2" -> single glyph
| #define GLYPHSUB_AllowNarrowForms 0x00000002 |
| #define GLYPHSUB_AllowNoBreakForms 0x00000800 |
| #define GLYPHSUB_AllowPositionalForms 0x00000008 |
Arabic/Hebrew.
| #define GLYPHSUB_AllowSmallForms 0x00000080 |
| #define GLYPHSUB_AllowSquareForms 0x00000100 |
| #define GLYPHSUB_AllowSubscriptForms 0x00000020 |
| #define GLYPHSUB_AllowSuperscriptForms 0x00000040 |
| #define GLYPHSUB_AllowVerticalForms 0x00001000 |
| #define GLYPHSUB_AllowWideForms 0x00000001 |
| #define LIBEXPORT MI_DLLIMPORT |
| #define MASK_ALG 0x7FFF0000 |
| #define MASK_CHARSET 0x0000FFFF |
| #define MUC_THAI_OF 1 |
| #define MUC_THAI_TTF 2 |
| #define MUCEVENT_ComposeOff 0x02 |
| #define MUCEVENT_ComposeOn 0x01 |
| #define MUCEVENT_ComposeToggle (MUCEVENT_ComposeOn|MUCEVENT_ComposeOff) |
| #define MucGLYPHDIRECTION_LtoR 0 |
| #define MucGLYPHDIRECTION_RtoL 1 |
| #define MucGLYPHDIRECTION_Weak 2 |
| #define MucGLYPHFLAG_Mark 0x40 |
| #define MucGLYPHFLAG_NonJoining 0x80 |
| #define MucGLYPHFLAG_PartOfRtoLWord 0x01 |
Some special glyph flags.
The method we use to generate GLYPHCONTEXTBITs requires that PartOfRtoLWord be 1 and that we don't use bits 0x02 and 0x04 here. The PartOfRtoLWord flag will be shifted and OR'd with the bits of the glyphs on either side of it to create GLYPHCONTEXT bits. Yes, they're bytes, not longs. Don't muck with `em!
| #define MUCVERSION 2 |
| #define UCCONV_NoByteOrderMark 0x00000001 |
| enum SCRIPTTAG |
Script tags These are used mostly for when multiple "scripts" (as in writing systems) use the same character set but have different rules.
A good example of this is the Indic languages which mostly share the ISCII character set but Devanagari and Bengali have different formatting rules. The numbers below are not random gibberish as it may seem. If you take each pair of hex digits, they form an ASCII value so that together they make up a 4 letter abbreviation. This is how these tags are encoded in TrueType fonts.
| LIBEXPORT int MucCharsetToUnicode | ( | int | val, | |
| int | charset | |||
| ) |
Convert a single character from a given character set to MIUNICODE.
| LIBEXPORT int MucCharsetToUnicodeBuf | ( | MIUNICODE * | buf, | |
| int | charset | |||
| ) |
Convert a string characters from a given character set to MIUNICODE.
Operates in-place.
| LIBEXPORT int MucConvertFromISO2022 | ( | void * | vcd, | |
| const void * | vinbuf, | |||
| char ** | outbuf | |||
| ) |
Convert string from ISO-2022 to anything.
| LIBEXPORT int MucConvertFromUnicode | ( | void * | vcd, | |
| const MIUNICODE * | ucbuf, | |||
| void ** | outbufp | |||
| ) |
Convert string from MIUNICODE to anything.
| LIBEXPORT int MucConvertISO2022aToUnicode | ( | const void * | , | |
| MIUNICODE ** | ||||
| ) |
Convert an ISO-2022a string to MIUNICODE.
ISO-2022a is a MicroImages invention -- a slightly modified version of ISO-2022 in which all backslashes are doubled. This is done because X resource files try to intrepret back slashes as something special even it the backslash is the 2nd byte of a 2-byte character.
The caller should free the buffer returned
| LIBEXPORT int MucConvertISO2022ToUnicode | ( | const void * | , | |
| MIUNICODE ** | ||||
| ) |
Convert an ISO-2022 string to MIUNICODE.
The caller should free the buffer returned
| LIBEXPORT int MucConvertToISO2022 | ( | void * | vcd, | |
| const void * | vinbuf, | |||
| char ** | outbuf | |||
| ) |
Convert string from anything to ISO-2022.
The "vcd" must have been initialized by calling MucInitConversion(). Assumes vinbuf is whatever vcd was initialized to. Caller must free outbuf when done
| LIBEXPORT int MucConvertToUnicode | ( | void * | vcd, | |
| const void * | vinbuf, | |||
| MIUNICODE ** | outbuf | |||
| ) |
Convert string from anything to MIUNICODE.
The "vcd" must have been initialized by calling MucInitConversion(). Assumes vinbuf is whatever vcd was initialized to. Caller must free outbuf when done
| LIBEXPORT int MucConvertUnicodeGen | ( | const MIUNICODE * | inbuf, | |
| int(*)(int NewCharset, int OldCharset, void *UserData) | NewCharsetFunc, | |||
| int(*)(UINT8 *chars, int len, void *UserData) | AddCharsFunc, | |||
| void * | UserData, | |||
| UINT32 | flags | |||
| ) |
Convert string from MIUNICODE by calling callbacks.
This function can be used to convert a string from MIUNICODE to just about anything. It assumes ISO-8859-1 as an initial character set.
NewCharsetFunc() will be called when the function detects a character not available in the current character set. It tells you what character set it was in and what character set it wants to switch to. At this point, you can do whatever you need to do to switch fonts to one that supports the requested character set. You should return 0 or an error code < 0.
AddCharsFunc() is called when an internal buffer fills up or just before calling NewCharsetFunc() to let add the characters to your output buffer or display them or whatever it is you're doing. The "characters" passed to you may be multi-byte. If this is the case, "len" is in bytes.
| LIBEXPORT int MucConvertUnicodeToISO2022 | ( | const MIUNICODE * | inbuf, | |
| char ** | outbuf | |||
| ) |
Convert a MIUNICODE string to ISO-2022.
The caller should free the buffer returned
Convert MIUNICODE (UTF-16) to UTF8.
Warning! This function assumes that the pointer passed in for outstr is either NULL or can be realloc'd as needed. You must therefore initialize it to somthing.
Convert UTF8 to MIUNICODE (UTF-16).
Warning! This function assumes that the pointer passed in for outstr is either NULL or can be realloc'd as needed. You must therefore initialize it to somthing.
Decompose a Unicode string into Canonical form.
In a cononical Unicode string, all decomposable characters are decomposed. For example, 0x00E9 (LATIN SMALL LETTER E WITH ACUTE) is represented as 0x0065 (LATIN SMALL LETTER E) + 0x0301 (COMBINING ACUTE ACCENT)
The caller should free the returned string
| LIBEXPORT MIUNICODE* MucDoGlyphSubstitutions | ( | const MIUNICODE * | instr, | |
| bool(*)(const UCDATA *sub, void *cbdata) | CB_AllowSub, | |||
| void * | cbdata, | |||
| UINT32 | flags = 0 | |||
| ) |
Perform glyph substitutions on a MIUNICODE string.
This function returns a copy of the given string after performing glyph substitutions based on the decomposition data in UnicodeData.txt (which we've cooked into a compact form, ucdata.ref)
Note: This function assumes the input string is in logical order and does not flip it in any way.
| instr | String to convert | |
| CB_AllowSub | Aallows you to intercept and deny substitutions. A good use for this feature would be to verify that the selected font has a given glyph available before allowing the substitution. The callback should return true to allow the substitution, false to prevent it. If no callback is provided, all substitutions are allowed. | |
| cbdata | Callback data. Will be passed to CB_AllowSub as the 2nd parameter | |
| flags |
|
Flip Right-To-Left text to "visual" order.
Returns a copy of the input string which caller must free. This function is symetric. That is, flipping the same string twice gives the original string.
| LIBEXPORT int MucGetDefaultCharsets | ( | int * | charset1, | |
| int * | charset2, | |||
| int * | charset3 | |||
| ) |
Returns the default charset for keyboard input.
Returns 3 charset codes which can be used as hints when trying to convert MIUNICODE to ISO-2022. This is used by the input of the XeText widget.
| charset1 | Charset to use for single-byte things < 128 | |
| charset2 | Charset to use for single-byte things > 128 | |
| charset3 | Charset to use for multi-byte things. |
| LIBEXPORT CHAR_ENCODING MucGetEncodingFromName | ( | const char * | name | ) |
Given an encoding name, return the CHAR_ENCODING value.
| LIBEXPORT CHAR_ENCODING MucGetEncodingFromNum | ( | int | num, | |
| ENCODELIST_FLAGS | flags | |||
| ) |
Given an encoding number, return the CHAR_ENCODING value.
This will return the Nth encoding which matches the criteria set forth by the flags parameter. Encoding numbers (as passed to this function) are not fixed and can change at any time. The only thing this function should be used for is retrieving a list of encodings to present the user with a choice.
| num | The encoding number. | |
| flags |
|
| LIBEXPORT int MucGetEncodingNum | ( | CHAR_ENCODING | encoding, | |
| ENCODELIST_FLAGS | flags | |||
| ) |
Return the index into array of encodings for a given encoding.
If you use MucGetNumEncodings() and MucNumToName() to iterate through all the encodings of a given type for the purposes of generating a list for the user to select from, then this function is useful for determining which item in that list a given encoding will be so that you can make it the default.
| encoding | The encoding to find | |
| flags |
|
Determine the direction of a glyph.
Given a MIUNICODE string and a current direction, determine the direction of the next character in the string.
The glyphflags array is filled in by this function.
Directions (both curdir and the return value)
glyphflags
| LIBEXPORT const char* MucGetName | ( | CHAR_ENCODING | encoding | ) |
Return an encoding name.
Once an encoding is named, the name is set in stone and can be written to files without fear of the name changing. The actual name presented to the user is looked up in messages.txt by calling MucNameToDescUC()
Do NOT free the return value from this function
| LIBEXPORT int MucGetNumEncodings | ( | ENCODELIST_FLAGS | flags | ) |
Get the number of supported encodings of a given type.
| flags |
|
| LIBEXPORT int MucGuessJapaneseEncoding | ( | const UINT8 * | in, | |
| char * | encoding | |||
| ) |
Given a string which may be in either JIS, Shift-JIS, or EUC encoding, try to determine which it is.
The "encoding" string passed in must be big enough to hold the resulting encoding name "Shift-JIS" is the longest string it can return.
| LIBEXPORT int MucHasRightToLeftTextISO2022 | ( | const char * | in | ) |
Determine if an ISO-2022 string has right-to-left text in it.
| LIBEXPORT int MucHasRightToLeftTextUC | ( | const MIUNICODE * | in | ) |
Determine if a MIUNICODE string has right-to-left text in it.
| LIBEXPORT ERRVALUE MucIndicGlyphSubstitutions | ( | const MIUNICODE * | instr, | |
| MIUNICODE ** | outstr, | |||
| SCRIPTTAG | script = SCRIPTTAG_Default, |
|||
| INT32 | len = -1, |
|||
| bool | bApplyRephHack = false | |||
| ) |
Does Indic glyph substitution based on various rules.
script should be one of the following: SCRIPTTAG_Default (See below) SCRIPTTAG_beng (Bengali) SCRIPTTAG_deva (Devanagari) SCRIPTTAG_gujr (Gujarati) SCRIPTTAG_knda (Kannada) SCRIPTTAG_mlym (Malayalam) SCRIPTTAG_orya (Oriya) SCRIPTTAG_punj (punjabi) SCRIPTTAG_taml (Tamil) SCRIPTTAG_telu (Telugu)
If script is SCRIPTTAG_Default, currently it will fall back on Devanagari, but eventually it will try to find the current system script and use that.
The len parameter is used to avoid an extra ucstrlen() call. If the caller already knows the length of instr, pass it. If not, just omit the parameter.
The caller is responsible for freeing outstr
The flag bApplyRephHack is just that, a hack. In order to correctly process the GSUB tables, we need to only apply the 'rphf' table if we have reordered a reph+halant+consonant combination to consonant+reph+halant. In order to do that, we need to somehow distinguish between text where we moved the reph and text where it was already there. To do this, if this flag is set, we change the reph to a fake Unicode character, one that we chose by subtracting 0x30 from the reph glyph. In all the Indic encodings, this falls on an unused code point (for example, 0x0930 becomes 0x0900). The caller knows to look for this and converts it back to the real reph glyph, but sets a flag telling us to process the 'rphf' table on it.
| len | Length of instr if known (just to avoid extra ucstrlen()) |
| LIBEXPORT int MucInitConversion | ( | void ** | handle, | |
| const char * | encoding, | |||
| UINT32 | flags | |||
| ) |
Initialize a MIUNICODE conversion handle.
| LIBEXPORT bool MucIsThaiLowerVowel | ( | MIUNICODE | ch | ) |
Returns true if ch is a Thai vowel character that sits below the consonant.
| LIBEXPORT bool MucIsThaiTone | ( | MIUNICODE | ch | ) |
Returns true if ch is a Thai tone character.
| LIBEXPORT bool MucIsThaiUpperVowel | ( | MIUNICODE | ch | ) |
Returns true if ch is a Thai vowel character that sits above the consonant.
| LIBEXPORT bool MucIsThaiVowel | ( | MIUNICODE | ch | ) |
Returns true if ch is a Thai vowel character.
| LIBEXPORT int MucNameToCharset | ( | const char * | name, | |
| ENCODELIST_FLAGS | flags | |||
| ) |
Determine the primary character set for a given encoding.
| LIBEXPORT char* MucNameToDesc | ( | const char * | name | ) |
Return the description of an encoding given the name (char*).
This looks up the incoding description from messages.txt. You should free the return value when you're done with it. Don't call this. Calle MucNameToDescUC() instead
| LIBEXPORT MIUNICODE* MucNameToDescUC | ( | const char * | name | ) |
Return the description of an encoding given the name (MIUNICODE*).
This looks up the incoding description from messages.txt. You should free the return value when you're done with it.
| LIBEXPORT int MucNameToNum | ( | const char * | name, | |
| ENCODELIST_FLAGS | flags | |||
| ) |
The opposite of MucNumToName().
| LIBEXPORT char* MucNumToDesc | ( | int | num, | |
| ENCODELIST_FLAGS | flags | |||
| ) |
Return the description of an encoding given the number (char*).
This looks up the incoding description from messages.txt. You should free the return value when you're done with it. Don't call this. Calle MucNumToDescUC() instead
| LIBEXPORT MIUNICODE* MucNumToDescUC | ( | int | num, | |
| ENCODELIST_FLAGS | flags | |||
| ) |
Return the description of an encoding given the number (MIUNICODE*).
This looks up the incoding description from messages.txt. You should free the return value when you're done with it.
| LIBEXPORT const char* MucNumToName | ( | int | num, | |
| ENCODELIST_FLAGS | flags | |||
| ) |
Convert an encoding number to its name.
Combines the following two functions
Do not free the output from this function
| LIBEXPORT int MucQuickConvToISO2022 | ( | int | encodetype, | |
| const void * | in, | |||
| char ** | outp | |||
| ) |
Don't use this function.
| LIBEXPORT int MucQuickConvToUnicode | ( | int | encodetype, | |
| const void * | in, | |||
| MIUNICODE ** | ucp | |||
| ) |
Don't use this function.
Recompose a Canonical form Unicode string into the combined forms.
In a cononical Unicode string, all decomposable characters are decomposed. For example, 0x00E9 (LATIN SMALL LETTER E WITH ACUTE) is represented as 0x0065 (LATIN SMALL LETTER E) + 0x0301 (COMBINING ACUTE ACCENT). This function puts them back together.
The caller should free the returned string
| LIBEXPORT void MucReset | ( | void * | handle | ) |
Reset an Muc handle.
| LIBEXPORT void MucStopConversion | ( | void * | handle | ) |
Stop a MIUNICODE conversion handle.
| LIBEXPORT int MucStrLenISO2022 | ( | const void * | vinbuf | ) |
Determine the number of characters in an ISO-2022 string.
Since ISO-2022 can consist of single-byte and two-byte characters and escape codes, a simple strlen() won't give you an accurate length. This function gives you the same result you'd get if you converted the string to MIUNICODE and called ucstrlen() on the result, but without going to the actual work.
| fontencoding | MUC_THAI_OF or MUC_THAI_TTF |
| LIBEXPORT int MucUnicodeToCharset | ( | int | val, | |
| int | charset | |||
| ) |
Convert a single MIUNICODE character to a given character set.
Returns 0 if the character is not available in the given charset
1.6.1