diff -ru abi-0.7.11-orig~/abi-0.7.11/CREDITS.TXT abi-0.7.11-orig-orig/abi-0.7.11/CREDITS.TXT --- abi-0.7.11-orig~/abi-0.7.11/CREDITS.TXT Thu Nov 9 19:38:46 2000 +++ abi-0.7.11-orig-orig/abi-0.7.11/CREDITS.TXT Fri Nov 10 15:05:41 2000 @@ -44,7 +44,7 @@ Vlad Harchev Support for non latin-1 Languages Stephen Hack options dialog Martin Willemoes Hansen -hj XIM, focus +hj XIM, focus, principal author of CJK support patch Roman Hodek m68k Endian patch Ming-I Hsieh FreeBSD Perry Ismangil AbiHello @@ -106,6 +106,7 @@ Robert G. Werner VI keybindings John Wood NetBSD Alan Young Alpha/NT +Belcon Zhao Testing/fixing CJK support translators ----------- diff -ru abi-0.7.11-orig~/abi-0.7.11/src/af/util/xp/ut_mbtowc.cpp abi-0.7.11-orig-orig/abi-0.7.11/src/af/util/xp/ut_mbtowc.cpp --- abi-0.7.11-orig~/abi-0.7.11/src/af/util/xp/ut_mbtowc.cpp Thu Nov 9 19:38:46 2000 +++ abi-0.7.11-orig-orig/abi-0.7.11/src/af/util/xp/ut_mbtowc.cpp Thu Nov 9 22:09:50 2000 @@ -240,7 +240,9 @@ UT_Mbtowc::~UT_Mbtowc() { - iconv_close(cd); + /*libiconv is stupid - we'll get segfault if we don't check - VH */ + if (cd!=(iconv_t)-1) + iconv_close(cd); }; int UT_Mbtowc::mbtowc(wchar_t &wc,char mb) diff -ru abi-0.7.11-orig~/abi-0.7.11/src/af/util/xp/ut_wctomb.cpp abi-0.7.11-orig-orig/abi-0.7.11/src/af/util/xp/ut_wctomb.cpp --- abi-0.7.11-orig~/abi-0.7.11/src/af/util/xp/ut_wctomb.cpp Thu Nov 9 19:38:47 2000 +++ abi-0.7.11-orig-orig/abi-0.7.11/src/af/util/xp/ut_wctomb.cpp Thu Nov 9 22:09:30 2000 @@ -185,7 +185,9 @@ UT_Wctomb::~UT_Wctomb() { - iconv_close(cd); + /*libiconv is stupid - we'll get segfault if we don't check - VH */ + if (cd!=(iconv_t)-1) + iconv_close(cd); }; int UT_Wctomb::wctomb(char * pC,int &length,wchar_t wc) diff -ru abi-0.7.11-orig~/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.cpp abi-0.7.11-orig-orig/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.cpp --- abi-0.7.11-orig~/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.cpp Thu Nov 9 19:38:47 2000 +++ abi-0.7.11-orig-orig/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.cpp Fri Nov 10 15:08:48 2000 @@ -395,6 +395,15 @@ static const char* wincharsetcode_th[]= /* thai charset*/ { "th", NULL }; +/*I'm not sure that charset code is the same for Big5 and GB2312. + Tested with GB2312 only. +*/ +static const char* wincharsetcode_zh_GB2312[]= /* chinese*/ +{ "zh_CN.GB2312", "zh_TW.GB2312", NULL }; + +static const char* wincharsetcode_zh_BIG5[]= /* chinese*/ +{ "zh_CN.BIG5", "zh_TW.BIG5", NULL }; + static const _rmap langcode_to_wincharsetcode[]= { {"0"}, /* default value - ansi charset*/ @@ -403,6 +412,8 @@ {"162",wincharsetcode_tr}, {"163",wincharsetcode_vi}, {"222",wincharsetcode_th}, + {"134",wincharsetcode_zh_GB2312}, + {"136",wincharsetcode_zh_BIG5}, {NULL} }; @@ -449,12 +460,16 @@ {NULL} }; +/* + This table is useful since iconv implementations don't know some cpNNNN + charsets but under some different name. +*/ static const _map MSCodepagename_to_charset_name_map[]= { /*key, value*/ {NULL,NULL}, - {"CP936","BIG5"}, /* most probably it's correct - VH*/ - {"CP950","GB2312"}, /* 100% correct */ + {"CP936","GB2312"}, + {"CP950","BIG5"}, {NULL,NULL} }; @@ -463,7 +478,10 @@ { /*key, value*/ {NULL}, -/* {"0x404","zh_CN"},*/ /*I guess - VH*/ + {"zh_CN.BIG5", "0x404"}, + {"zh_CN.GB2312", "0x804"}, + {"zh_TW.BIG5", "0x404"}, + {"zh_TW.GB2312", "0x804"}, {NULL} }; @@ -728,7 +746,7 @@ len += sprintf(buf+len,"\\usepackage[%s]{inputenc}\n",NativeTexEncodingName); if (NativeBabelArgument) len += sprintf(buf+len,"\\usepackage[%s]{babel}\n",NativeBabelArgument); - TexPrologue = len ? UT_strdup(buf) : ""; + TexPrologue = len ? UT_strdup(buf) : " "; }; } if (cjk_locale()) { @@ -815,7 +833,9 @@ const char* XAP_EncodingManager::charsetFromCodepage(int lid) const { - char* cpname = wvLIDToCodePageConverter(lid); + static char buf[100]; + sprintf(buf,"CP%d",lid); + char* cpname = buf; UT_Bool is_default; const char* ret = search_map(MSCodepagename_to_charset_name_map,cpname,&is_default); return is_default ? cpname : ret; @@ -823,7 +843,10 @@ const char* XAP_EncodingManager::WindowsCharsetName() const { - return charsetFromCodepage( getWinLanguageCode() ); + char* cpname = wvLIDToCodePageConverter(getWinLanguageCode()); + UT_Bool is_default; + const char* ret = search_map(MSCodepagename_to_charset_name_map,cpname,&is_default); + return is_default ? cpname : ret; }; UT_uint32 XAP_EncodingManager::getWinLanguageCode() const diff -ru abi-0.7.11-orig~/abi-0.7.11/src/wp/impexp/xp/ie_exp_RTF_listenerWriteDoc.cpp abi-0.7.11-orig-orig/abi-0.7.11/src/wp/impexp/xp/ie_exp_RTF_listenerWriteDoc.cpp --- abi-0.7.11-orig~/abi-0.7.11/src/wp/impexp/xp/ie_exp_RTF_listenerWriteDoc.cpp Thu Nov 9 19:38:49 2000 +++ abi-0.7.11-orig-orig/abi-0.7.11/src/wp/impexp/xp/ie_exp_RTF_listenerWriteDoc.cpp Thu Nov 9 20:24:06 2000 @@ -197,13 +197,11 @@ /*FIXME: can it happen that wctomb will fail under CJK locales? */ m_wctomb.wctomb_or_fallback(mbbuf,mblen,*pData++); for(int i=0;i 0x007f) - m_pie->_rtf_nonascii_hex2(c); - else - *pBuf++ = c; - - }; + FlushBuffer(); + m_pie->_rtf_nonascii_hex2(c); + } } else if (!m_pie->m_atticFormat) { if (*pData > 0x00ff) // emit unicode character diff -ru abi-0.7.11-orig~/abi-0.7.11/src/wp/impexp/xp/ie_imp_RTF.cpp abi-0.7.11-orig-orig/abi-0.7.11/src/wp/impexp/xp/ie_imp_RTF.cpp --- abi-0.7.11-orig~/abi-0.7.11/src/wp/impexp/xp/ie_imp_RTF.cpp Thu Nov 9 19:38:49 2000 +++ abi-0.7.11-orig-orig/abi-0.7.11/src/wp/impexp/xp/ie_imp_RTF.cpp Fri Nov 10 15:11:25 2000 @@ -1447,6 +1447,10 @@ //is seen // Now comes the font name, terminated by either a close brace or a slash or a semi-colon int count = 0; + /* + FIXME: CJK font names come in form \'aa\'cd\'ef - so we have to + parse \'HH correctly (currently we ignore them!) - VH + */ while ( ch != '}' && ch != '\\' && ch != ';' && ch!= '{') { keyword[count++] = ch; @@ -1472,6 +1476,8 @@ { if (!ReadCharFromFile(&ch)) return UT_FALSE; + if (ch=='{') + ++nesting; } if (nesting>0 && i!=nesting) //we need to skip '}' we've just seen. if (!ReadCharFromFile(&ch))