--- abi/src/wp/impexp/xp/ie_imp_RTF.cpp	2005-06-03 04:19:40.645871064 +0100
+++ abi/src/wp/impexp/xp/ie_imp_RTF.cpp-NEWEST	2005-06-03 04:08:56.000000000 +0100
@@ -919,8 +919,10 @@
 
 
 // Font table items
-RTFFontTableItem::RTFFontTableItem(FontFamilyEnum fontFamily, int charSet, int codepage, FontPitch pitch,
-									unsigned char* panose, char* pFontName, char* pAlternativeFontName)
+RTFFontTableItem::RTFFontTableItem(FontFamilyEnum fontFamily, int charSet, 
+                                   int codepage, FontPitch pitch,
+                                   unsigned char* panose, char*
+                                   pFontName, char* pAlternativeFontName)
 {
 	m_family = fontFamily;
 	m_charSet = charSet;
@@ -7919,8 +7921,9 @@
 	int codepage = 0;
 	unsigned char panose[10];
 	memset(panose, 0, sizeof(unsigned char));
-	char* pFontName = NULL;
-	char* pAlternativeFontName = NULL;
+	UT_String sFontName;
+	UT_String sAlternativeFontName;
+	bool bFontNameHasHex, bAltFontNameHasHex;
 	RTFTokenType tokenType;
 
 	//TODO - this should be intialized once for the whole RTF reader.
@@ -7974,8 +7977,12 @@
 			fontFamily = RTFFontTableItem::ffNone;
 		}
 	}
-	// Now (possibly) comes some optional keyword before the fontname
-	while (tokenType != RTF_TOKEN_DATA || nesting > 0)
+
+	
+	// Loop through the rest of the font definition, calling out to
+	// ReadFontName() to read the font names.
+	nesting=1; // We should have had a "{" at the start of the font definition.
+	while (nesting > 0)
 	{
     	tokenType = NextToken(keyword,&parameter,&paramUsed,MAX_KEYWORD_LEN,true);
 		switch (tokenType)
@@ -7986,7 +7993,12 @@
 		case RTF_TOKEN_CLOSE_BRACE:
 			nesting --;
 			break;
+		// Data indicates the start of the font name.
 		case RTF_TOKEN_DATA:
+			SkipBackChar(keyword[0]);  // Data can only be one byte, right?
+			if (!ReadFontName(&sFontName, &sAlternativeFontName,
+			                  &bFontNameHasHex, &bAltFontNameHasHex)) 
+				return false;
 			break;
 		case RTF_TOKEN_KEYWORD:
 			pValue = const_cast<int*>(static_cast<const int*>(keywordMap.pick(reinterpret_cast<char*>(&keyword[0]))));
@@ -8038,81 +8050,77 @@
 			{
 				charSet = parameter;
 			}
+			// Escaped hex is really data, so this should be the start of the
+			// font name.
+			if (strcmp(reinterpret_cast<char*>(&keyword[0]),"'") == 0) {
+				SkipBackChar('\'');
+				SkipBackChar('\\');
+				if (!ReadFontName(&sFontName, &sAlternativeFontName,
+			                      &bFontNameHasHex, &bAltFontNameHasHex)) 
+					return false;
+			}
 			break;
 		default:
 			//TODO: handle errors
 			break;
 		}
 	}
-	if (nesting == -1)
-	{
-		UT_DEBUGMSG(("RTF: Font name not found in font definition %d",fontIndex));
-	}
-	// Now comes the font name, terminated by either a close brace or a slash or a semi-colon
-	ch = keyword[0];
-	int count = 0;
-	/*
-	    FIXME: CJK font names come in form \'aa\'cd\'ef - so we have to
-	    parse \'HH correctly (currently we ignore them!) - VH
-	*/
-	while ( ch != '}'  &&  ch != '\\'  &&  ch != ';' && ch!= '{')
-	{
-		keyword[count++] = ch;
-		if (!ReadCharFromFile(&ch))
-		{
-			return false;
-		}
-	}
-	if (ch=='{')
-	{
-		++nesting;
-	}
 
-	keyword[count] = 0;
 #ifndef XP_TARGET_COCOA
 	/*work around "helvetica" font name -replace it with "Helvetic"*/
-	if (!UT_stricmp(reinterpret_cast<char*>(&keyword[0]),"helvetica"))
+	if (sFontName == "helvetica")
 	{
-		strcpy(reinterpret_cast<char*>(&keyword[0]),"Helvetic");
+		sFontName == "Helvetica";
 	}
 #endif /* ! XP_TARGET_COCOA */
 
-	if (!UT_cloneString(pFontName, reinterpret_cast<char*>(&keyword[0])))
-	{
-		// TODO outofmem
+	/* TODO:
+	 *
+	 * In an ideal world, this function could just set the FontName and the
+	 * Alternative fontname and not do any of the messing around below. 
+	 * Unfortunately, at the moment the rest of abiword doesn't support
+	 * non-ASCII font names and doesn't appear to do anything much with the
+	 * alternative fontname. Therefore, we have to do our best to provide a 
+	 * sensible font name here even if the file only specifies a non-ASCII
+	 * name.
+	 *
+	 * Just ignoring non-ASCII characters don't help, because then font names
+	 * like XXXXX_GB2312 get called "_GB2312". Therefore, if the font name
+	 * contains non-ASCII characters and there's an ASCII alternative name
+	 * we use the alternative. If the alternative is missing, or itself
+	 * uses non-ASCII characters we set the font name to
+	 * "UnknownUnicodeFontName". This at least gives the user something 
+	 * understandable to see in the font list. Also, if they alias
+	 * UnknownUnicodeFontName to the most likely font for their region (e.g.
+	 * SongTi in China) then most of their documents should display correctly.
+	 */
+	if (sFontName.length() == 0 || bFontNameHasHex) {
+		if (sAlternativeFontName.length() > 0 && !bAltFontNameHasHex) 
+			sFontName = sAlternativeFontName;
+		else
+			sFontName = "UnknownUnicodeFontName";
 	}
-	for (int i=0; i <= nesting; ++i)
-	{
-		// Munch the remaining control words down to the close brace
-		while (ch != '}')
-		{
-			if (ch == ';' && !bNested && (i == nesting))
-			{
-				break; // Cocoa RTF: {\fonttbl\f0\fnil\fcharset78 HiraKakuPro-W3;\f1\fnil\fcharset102 STXihei;}
-			}
-			if (!ReadCharFromFile(&ch))
-			{
-				return false;
-			}
-			if (ch=='{')
-			{
-				++nesting;
-			}
+
+	// Clone the font name, or set it to NULL if it was an empty string.
+	char *fn=NULL;
+	if (sFontName.length()) {
+		if (!UT_cloneString(fn, sFontName.c_str())) {
+			UT_DEBUGMSG(("RTF: Out of memory parsing font table.\n"));
+			return false;
 		}
-		if (nesting>0 && i!=nesting) //we need to skip '}' we've just seen.
-		{
-			if (!ReadCharFromFile(&ch))
-			{
-				return false;
-			}
+	}
+	// Do the same for the alternative font name.
+	char *afn=NULL;
+	if (sAlternativeFontName.length()) {
+		if (!UT_cloneString(afn, sAlternativeFontName.c_str())) {
+			UT_DEBUGMSG(("RTF: Out of memory parsing font table.\n"));
+			return false;
 		}
 	}
-
+	
 	// Create the font entry and put it into the font table
 	RTFFontTableItem* pNewFont = new RTFFontTableItem(fontFamily, charSet,
-													  codepage, pitch,
-													  panose, pFontName,
-													  pAlternativeFontName);
+													  codepage, pitch, panose, fn, afn);
 	if (pNewFont == NULL)
 	{
 		return false;
@@ -8134,7 +8142,7 @@
 	}
 	else
 	{
-		UT_DEBUGMSG (("RTF: font %d (named %s) already defined. Ignoring\n", fontIndex, pFontName));
+		UT_DEBUGMSG (("RTF: font %d (named %s) already defined. Ignoring\n", fontIndex, sFontName.c_str()));
 		DELETEP (pNewFont);
 	}
 
@@ -8142,6 +8150,172 @@
 }
 
 
+/*
+ * Read the font name of the current font from the input stream.
+ * If present, also read the alternative font name. This should
+ * deal correctly with any commands embedded in the font name.
+
+ * Eg:
+ *     {\f18\fnil\fcharset134\fprq2{\*\panose 02010600030101010101}
+ *     \'cb\'ce\'cc\'e5{\*\falt SimSun};}
+ *
+ * or even:
+ *	   {\f20\froman Times New {\*\unknowncommand Fibble!}Roman;}
+ * 
+ *
+ * Currently escaped hex data (\'XX) is discarded since the rest of the
+ * program cannot cope with non-ASCII fontnames.
+ */
+
+/* The state used while reading in the font name.
+ * This points to the font name that we are currently writing. Initially
+ * it's set to point to FontName. We switch it to point to AltFontName
+ * when we see a \falt command. When the group containing the \falt ends
+ * we pop the state of the stack and so this pointer reverts back to
+ * FontName.
+ */
+struct SFontNameState {
+	UT_String *pFontName;
+	bool *pbFontNameHasHex;
+	bool bSkipping;
+};
+
+bool IE_Imp_RTF::ReadFontName(UT_String *sFontName,
+                              UT_String *sAltFontName,
+                              bool *bFontNameHasHex,
+                              bool *bAltFontNameHasHex) 
+{
+	unsigned char keyword[MAX_KEYWORD_LEN];
+	RTFTokenType tokenType;
+    UT_sint32 parameter = 0;
+	unsigned char ch;
+	bool paramUsed = false;
+	bool bSeenStar = false;  // Was the last keyword "\*"?	
+	UT_Stack stateStack;
+	// Allocate the initial state on the functions stack so that it gets
+	// automatically freed when the function returns. (If we used a "new"
+	// command then we'd need a delete before every return).
+	SFontNameState baseState; 
+	struct SFontNameState *currentState = &baseState;
+	struct SFontNameState *oldState;
+
+	// Initialise the current state.
+	currentState->pFontName = sFontName;
+	currentState->pbFontNameHasHex = bFontNameHasHex;
+	currentState->bSkipping = false;
+
+	*bFontNameHasHex = false;
+	*bAltFontNameHasHex = false;
+	while (true)
+	{
+		// NB: This doesn't ignore whitespace.
+    	tokenType = NextToken(keyword,&parameter,&paramUsed,MAX_KEYWORD_LEN,false);
+		switch (tokenType)
+		{
+		case RTF_TOKEN_OPEN_BRACE:
+			oldState = currentState;
+			// Push the current state onto the stack...
+			stateStack.push(reinterpret_cast<void*>(currentState));
+			// ...allocate a new one...
+			currentState = new SFontNameState;
+			if (!currentState) {
+				UT_DEBUGMSG(("RTF: Out of memory.\n"));
+				return false;
+			}
+			// ...and initialise it as a copy of the old one.
+			currentState->pFontName = oldState->pFontName;
+			currentState->pbFontNameHasHex = oldState->pbFontNameHasHex;
+			currentState->bSkipping = oldState->bSkipping;
+			break;
+		case RTF_TOKEN_CLOSE_BRACE:
+			// Throw away the current state.
+			delete currentState;
+			// Pop an old state off the stack .
+			if (!stateStack.pop(reinterpret_cast<void**>(&currentState))) 
+			{
+				UT_DEBUGMSG(("RTF: Too many closing parenthesises in font table.\n"));
+				return false;
+			}
+			break;
+		case RTF_TOKEN_DATA:
+			// Are we skipping?
+			if (currentState->bSkipping)
+				break;
+			// We found the font name terminator.
+			if (keyword[0] == ';')
+			{
+				if (stateStack.getDepth()==0)
+					return true;
+				else 
+				{
+					UT_DEBUGMSG(("RTF: Too many opening parenthesises in font table.\n"));
+					// Memory clean up: loop freeing everything left on the
+					// stack.
+					while (stateStack.getDepth() > 0)
+					{
+						stateStack.pop(reinterpret_cast<void**>(&currentState));
+						if (currentState)
+							delete currentState;	
+					}
+					return false;
+				}
+			}
+			// Other data must be one of the font names, so write it to the
+			// current font name pointer.
+			*(currentState->pFontName) += keyword[0];	
+			break;
+		case RTF_TOKEN_KEYWORD:
+			// Are we skipping?
+			if (currentState->bSkipping)
+				break;
+
+			// Handle hex escaped data.
+			if (strcmp(reinterpret_cast<char*>(&keyword[0]),"'") == 0)
+			{
+				// Sadly the Abi backend lacks supports for multibyte fontnames.
+				// Thus, we have to skip over hex sequences and hope for an
+				// alternate font name.
+				// TODO: This will also ignore hex escaped ASCII, which would be
+				// legal in the file, if a little strange.
+				if ( !ReadCharFromFile(&ch)  ||  !ReadCharFromFile(&ch) )
+				{
+					return false;
+				}
+				// Record that we've seen hex.
+				*(currentState->pbFontNameHasHex) = true;
+				break;  // Break out after handling keyword.
+			}
+			// Handle the "*" keyword.
+			if (strcmp(reinterpret_cast<char*>(&keyword[0]),"*") == 0)
+			{
+				bSeenStar = true;
+				break;  // Break out after handling keyword.
+			}
+			// Handle "\falt" keyword.
+			if (strcmp(reinterpret_cast<char*>(&keyword[0]),"falt") == 0)
+			{
+				// Change the font name pointer so that data will be written to
+				// the alternative fontname.
+				currentState->pFontName = sAltFontName;
+				currentState->pbFontNameHasHex = bAltFontNameHasHex;
+				break;
+			}
+
+			// If we get here then this is an unknown keyword.
+			if (bSeenStar) 
+			{
+				bSeenStar = false;
+				currentState->bSkipping = true;
+			}
+			break;
+		case RTF_TOKEN_NONE:
+			UT_DEBUGMSG(("Premature end of file reading font table.\n"));
+			return false;
+		default:
+			break;
+		} // switch
+	}; // while
+}
 
 
 //////////////////////////////////////////////////////////////////////////////
--- abi/src/wp/impexp/xp/ie_imp_RTF.h	2005-06-03 04:10:01.619896392 +0100
+++ abi/src/wp/impexp/xp/ie_imp_RTF.h-NEWEST	2005-06-03 04:09:06.000000000 +0100
@@ -612,6 +612,7 @@
 	bool ReadColourTable();
 	bool ReadFontTable();
 	bool ReadOneFontFromTable(bool bNested);
+	bool ReadFontName(UT_String *sFontName, UT_String *sAltFontName, bool *bFontNameHasHex, bool *bAltFontNameHasHex);
 	bool ReadRevisionTable();
 	void setEncoding();  
 public: