diff -uNr abi-old/src/af/gr/xp/gr_Graphics.cpp abi/src/af/gr/xp/gr_Graphics.cpp
--- abi-old/src/af/gr/xp/gr_Graphics.cpp	2005-03-21 05:50:42.000000000 +0000
+++ abi/src/af/gr/xp/gr_Graphics.cpp	2005-03-21 09:04:36.000000000 +0000
@@ -1096,20 +1096,40 @@
 
 /*!
     return true if linebreak at character c is permissible
-    the built-in class is too simple to differentiate between breaks before and after character
 */
-bool GR_Graphics::canBreak(GR_RenderInfo & ri, UT_sint32 &iNext, bool /* bAfter */)
+bool GR_Graphics::canBreak(GR_RenderInfo & ri, UT_sint32 &iNext, bool bAfter)
 {
+	UT_UCS4Char c[2];
+
 	iNext = -1; // we do not bother with this
 	UT_return_val_if_fail(ri.m_pText && ri.m_pText->getStatus() == UTIter_OK, false);
 	
 	*(ri.m_pText) += ri.m_iOffset;
 	UT_return_val_if_fail(ri.m_pText->getStatus() == UTIter_OK, false);
 	
-	UT_UCS4Char c = ri.m_pText->getChar();
-
 	UT_return_val_if_fail(getApp(), false);
-	return getApp()->getEncodingManager()->can_break_at(c);
+
+
+	if (bAfter) 
+	{
+		// Look up this character and the next.
+		c[0] = ri.m_pText->getChar();
+		++(*ri.m_pText);
+		c[1] = ri.m_pText->getChar();
+
+		return getApp()->getEncodingManager()->canBreakBetween(c);
+	}
+	else
+	{
+		// Look up this character and the one before.
+		c[1] = ri.m_pText->getChar();
+		--(*ri.m_pText);
+		c[0] = ri.m_pText->getChar();
+
+		return getApp()->getEncodingManager()->canBreakBetween(c);
+	}
+	// Control should never reach here.
+	UT_ASSERT(UT_SHOULD_NOT_HAPPEN);
 }
 
 /*!
diff -uNr abi-old/src/af/xap/xp/xap_EncodingManager.cpp abi/src/af/xap/xp/xap_EncodingManager.cpp
--- abi-old/src/af/xap/xp/xap_EncodingManager.cpp	2005-03-21 05:50:59.000000000 +0000
+++ abi/src/af/xap/xp/xap_EncodingManager.cpp	2005-03-21 10:22:00.417841864 +0000
@@ -646,16 +647,6 @@
 };
 
 
-/*
- TODO I'm pretty sure you can't break Korean at any character.
- TODO And what about Japanese Katakana and Hiragana?
-*/
-static const _rmap can_break_words_data[]=
-{
-	{"0"}, /* default value - can't break words at any character. */    
-	{"1",cjk_languages},
-	{NULL}
-};
 
 /*
  This table is useful since some iconv implementations don't know some cpNNNN 
@@ -959,6 +950,155 @@
     {{   NULL,                "",       "",       "",                                  "" 	   }}
 };
 
+/* 
+ * Line Breaking tables 
+ */
+
+
+enum EUniCat {
+	NONATOMIC=0, 
+	ATOMIC=1, 
+	PUNCNOEND=2, 
+	PUNCNOSTART=3, 
+	PUNCFORCE=4,
+	UNKNOWN=5
+};
+// Prototype.
+static enum EUniCat categoriseUniChar(UT_UCS4Char c);
+
+struct SCatRange {
+	UT_UCS4Char start;
+	UT_UCS4Char end;
+	enum EUniCat cat;
+};
+
+/* 
+ * This table catagorises all known Unicode characters.
+ * The entries are inclusive ranges which must be in numerical order.
+ *
+ * Defaults should be provided by access functions for unknown characters.
+ */
+struct SCatRange UniCharCats[] = {
+	{0x20, 0x20, PUNCFORCE},   // Space
+	{0x21, 0x21, PUNCNOSTART}, // !
+	{0x22, 0x27, NONATOMIC},   // "#$%&'
+	{0x28, 0x28, PUNCNOEND},   // ( 
+	{0x29, 0x29, PUNCNOSTART},  // ) 
+	{0x2a, 0x2b, NONATOMIC},   // *+
+	{0x2c, 0x2e, PUNCNOSTART},  // ,-.
+	{0x2f, 0x2f, NONATOMIC},   // /
+	{0x30, 0x39, NONATOMIC},   // Western numerals.
+	{0x3a, 0x3b, PUNCNOSTART}, // :;
+	{0x3c, 0x3c, PUNCNOEND},   // <
+	{0x3d, 0x3d, NONATOMIC},   // =
+	{0x3e, 0x3f, PUNCNOSTART}, // >?
+	{0x40, 0x40, NONATOMIC},   // @
+	{0x41, 0x5a, NONATOMIC},   // Western A-Z.
+	{0x5b, 0x5b, PUNCNOEND},   // [ 
+	{0x5c, 0x5c, NONATOMIC},   // "\"
+	{0x5d, 0x5d, PUNCNOSTART}, // ]
+	{0x5e, 0x60, NONATOMIC},   // ^_`
+	{0x61, 0x7a, NONATOMIC},   // Western a-z.
+	{0x7b, 0x7b, PUNCNOEND},   // {
+	{0x7c, 0x7c, NONATOMIC},   // "\" 
+	{0x7d, 0x7d, PUNCNOSTART}, // }
+	{0x7e, 0x7e, NONATOMIC},   // ~ 
+
+	/* General punctuation */
+	{0x2002, 0x2003, PUNCFORCE},     // en-space, em-space
+	{0x2010, 0x2010, PUNCNOSTART},   // Hyphen
+	{0x2013, 0x2014, PUNCNOSTART},   // en-dash, em-dash
+	{0x2018, 0x2018, PUNCNOEND},     // Open single quote
+	{0x2019, 0x2019, PUNCNOSTART},   // Close single quote
+	{0x201c, 0x201c, PUNCNOEND},     // Open double quote
+	{0x201d, 0x201d, PUNCNOSTART},   // Close double quote
+	{0x2020, 0x2021, PUNCNOSTART},   // Long cross, double dagger
+	{0x2026, 0x2026, PUNCNOSTART},   // ...
+	{0x2030, 0x2031, PUNCNOSTART},   // Per mille/10000 signs
+	{0x2032, 0x2034, PUNCNOSTART},   // Single, double, triple primes
+
+	/* CJK Blocks */
+
+	{0x3008, 0x3008, PUNCNOEND},     //  CJK <
+	{0x3009, 0x3009, PUNCNOSTART},   //  CJK >
+	{0x300a, 0x300a, PUNCNOEND},     //  CJK <<
+	{0x300b, 0x300b, PUNCNOSTART},   //  CJK >>
+	{0x300c, 0x300c, PUNCNOEND},     //  CJK quote open
+	{0x300d, 0x300d, PUNCNOSTART},   //  CJK quote close
+	{0x300e, 0x300e, PUNCNOEND},     //  CJK thick quote open
+	{0x300f, 0x300f, PUNCNOSTART},   //  CJK thick quote close
+	{0x3010, 0x3010, PUNCNOEND},     //  CJK |(
+	{0x3011, 0x3011, PUNCNOSTART},   //  CJK )|
+	{0x3014, 0x3014, PUNCNOEND},     //  CJK [
+	{0x3015, 0x3015, PUNCNOSTART},   //  CJK ]
+	{0x3016, 0x3016, PUNCNOEND},     //  CJK [(
+	{0x3015, 0x3017, PUNCNOSTART},   //  CJK )]
+	{0x3018, 0x3018, PUNCNOEND},     //  CJK [[
+	{0x3019, 0x3019, PUNCNOSTART},   //  CJK ]]
+	{0x301a, 0x301a, PUNCNOEND},     //  CJK ]|
+	{0x301d, 0x301d, PUNCNOEND},     //  CJK ``
+	{0x301e, 0x301e, PUNCNOSTART},   //  CJK ''
+	{0x301b, 0x301b, PUNCNOSTART},   //  CJK |[
+	{0x3200, 0x32ff, ATOMIC},        // Enclosed CJK Letters and Months
+	{0x3300, 0x33ff, ATOMIC},        // CJK Compatibility
+	{0x3400, 0x34ff, ATOMIC},        // CJK Unified Ideographs Ext. A
+	{0x4e00, 0x9faf, ATOMIC},        // CJK Unified Ideographs 
+
+	/* Halfwidth and Fullwidth Forms. */
+	{0xff01, 0xff01, PUNCNOSTART},   // !
+	{0xff02, 0xff02, ATOMIC},        // "
+	{0xff05, 0xff05, PUNCNOSTART},   // %
+	{0xff06, 0xff07, ATOMIC},        // &'
+	{0xff08, 0xff08, PUNCNOEND},     // (
+	{0xff09, 0xff09, PUNCNOSTART},   // )
+	{0xff0a, 0xff0b, ATOMIC},        // *+
+	{0xff0c, 0xff0c, PUNCNOSTART},   // ,
+	{0xff0a, 0xff0b, ATOMIC},        // -
+	{0xff0e, 0xff0e, PUNCNOSTART},   // .
+	{0xff0f, 0xff0f, ATOMIC},        // /
+	{0xff10, 0xff29, ATOMIC},        // Numerals
+	{0xff1a, 0xff1b, PUNCNOSTART},   // :;
+	{0xff1c, 0xff1c, PUNCNOEND},     // <
+	{0xff1d, 0xff1d, ATOMIC},        // =
+	{0xff1e, 0xff1e, PUNCNOSTART},   // >
+	{0xff1f, 0xff1f, PUNCNOSTART},   // ?
+	{0xff20, 0xff20, ATOMIC},        // @
+	{0xff21, 0xff3a, ATOMIC},        // A-Z
+	{0xff3b, 0xff3b, PUNCNOEND},     // [
+	{0xff3c, 0xff3c, ATOMIC},        // "\" 
+	{0xff3d, 0xff3d, PUNCNOSTART},   // ]
+	{0xff3e, 0xff5a, ATOMIC},        // ^_`a-z
+	{0xff5b, 0xff4b, PUNCNOEND},     // {
+	{0xff5c, 0xff5c, ATOMIC},        // |
+	{0xff5d, 0xff5d, PUNCNOSTART},   // }
+	{0xff5e, 0xff5e, ATOMIC},        // ~
+	{0xff61, 0xff61, PUNCNOSTART},   // Ideographic full stop
+	{0xff62, 0xff62, PUNCNOEND},     // Halfwidth left corner bracket
+	{0xff63, 0xff63, PUNCNOSTART},   // Halfwidth right corner bracket
+	{0xff64, 0xff64, PUNCNOSTART},   // Halfwidth ideographic comma
+	{0xffe0, 0xffe0, PUNCNOEND},     // Fullwidth Cent sign
+	{0xffe1, 0xffe1, PUNCNOEND},     // Fullwidth Pound sign
+	{0xffe5, 0xffe5, PUNCNOEND},     // Fullwidth Yen sign
+
+    /* More CJK blocks. */
+	
+	{0xf900,  0xfaff, ATOMIC},       // CJK Compatibility Ideographs
+	{0x20000, 0x2a6df, ATOMIC},      // CJK Unified Ideographs Ext. B
+	{0x2f800, 0x2fa1f, ATOMIC},      // CJK Compatibility Ideographs Sup.
+	{0,0,ATOMIC}
+};
+
+/*
+ * Boolean rules for whether a line break is allowed between all possible
+ * combinations of two categories.
+ */
+static bool blineBreakRules[] = {false, true,  false, false, true,
+                                 true,  true,  true,  false, true,
+                                 false, false, false, false, true,
+                                 true,  true,  true,  false, true,
+                                 true,  true,  true,  true,  true};
+
+
 /* ************************* here end tables *************************/
 
 const XAP_LangInfo* XAP_EncodingManager::findLangInfo(const char* key,XAP_LangInfo::fieldidx idx)
@@ -1111,8 +1251,6 @@
 	{	
 	    const char* str = search_rmap_with_opt_suffix(langcode_to_cjk,SEARCH_PARAMS);
 	    is_cjk_ = *str == '1';
-	    str = search_rmap_with_opt_suffix(can_break_words_data,SEARCH_PARAMS);
-	    can_break_words_ = *str == '1';
 	}
 	{
 	    if (cjk_locale()) {
@@ -1160,11 +1298,6 @@
 
 int XAP_EncodingManager__swap_stou,XAP_EncodingManager__swap_utos;
 
-bool XAP_EncodingManager::can_break_words() const
-{
-    return can_break_words_;
-}
-
 /*
     I'm not sure whether any non-cjk language doesn't make distinction
     between upper and lower case of the letter, but let's be prepared.
@@ -1191,19 +1324,18 @@
 }
 
 /*
-    This one correlates with can_break_words() very tightly.
-        Under CJK locales it returns 1 for cjk letters. 
-    Under non-CJK locales returns 0.
-*/
-bool XAP_EncodingManager::can_break_at(const UT_UCSChar c) const
+ * Returns true or false depending on whether a break between c[0] and c[1]
+ * is permissible.
+ */
+bool XAP_EncodingManager::canBreakBetween(const UT_UCS4Char c[]) const
 {
-    if (c == UCS_SPACE 
-	 || c == UCS_MINUS 
-	 || c == UCS_HYPHEN
-	 || c == UCS_EN_DASH
-	 || c == UCS_EM_DASH)
-	return 1;
-    return is_cjk_letter(c);
+	UT_uint8 rule;
+
+	// Find rule number based on character categories.
+	rule = categoriseUniChar(c[0]) * 5 + categoriseUniChar(c[1]);
+
+	// Return corresponding answer.
+	return blineBreakRules[rule];
 }
 
 
@@ -1267,7 +1399,7 @@
 		"--->8--------------\n"
 		
 		"	WinLanguageCode is 0x%04x, WinCharsetCode is %d\n"
-		"	cjk_locale %d, can_break_words %d, swap_utos %d, swap_stou %d\n",
+		"	cjk_locale %d, swap_utos %d, swap_stou %d\n",
 		getLanguageISOName(), getLanguageISOTerritory() ? getLanguageISOTerritory() : "NULL",
 		getNativeEncodingName(),getNativeSystemEncodingName(),
 		getNative8BitEncodingName(),getNativeNonUnicodeEncodingName(),
@@ -1275,7 +1407,7 @@
 		fallbackChar(1072),
 		getTexPrologue(),
 		getWinLanguageCode(), getWinCharsetCode(),
-		int(cjk_locale()), int(can_break_words()),int(swap_utos),int(swap_stou)
+		int(cjk_locale()), int(swap_utos),int(swap_stou)
 		));
 	UT_ASSERT( UT_iconv_isValid(iconv_handle_N2U) && UT_iconv_isValid(iconv_handle_U2N) );
 }
@@ -1351,3 +1483,45 @@
 }
 
 }
+
+
+/*
+ * Return the line breaking catagory that "c" belongs to.
+ *
+ * Note: For performance reasons this function assumes that the entries
+ *       in UniCharCats are in NUMERICAL ORDER starting with the smallest 
+ *       value.
+ */ 
+static enum EUniCat categoriseUniChar(UT_UCS4Char c) {
+	UT_uint32 i=0;
+	enum EUniCat cat=UNKNOWN;
+
+	/*
+	 * Loop for each range of characters.
+	 */
+	while ( c>= UniCharCats[i].start && UniCharCats[i].start != 0) 
+	{
+		if ((c >= UniCharCats[i].start) && (c <= UniCharCats[i].end))
+		{
+			cat = UniCharCats[i].cat;
+			break;
+		}
+		i++;
+	}
+
+	/*
+	 * Crude defaults:
+	 *
+	 * If the character is not listed then assume it's nonatomic (like western
+	 * letters) for all code blocks below "Armenian". If it belongs to the
+	 * "Armenian" block or above we assume CJK like atomic letters.
+	 *
+	 * This is not sensible, but it should at least mean that Greek, Cyrillic,
+	 * maybe Korean, Chinese and maybe Japanese get handled OK.
+	 */
+	if (c <0x0530) 
+		cat = NONATOMIC;
+	else
+		cat = ATOMIC;
+}
+
diff -uNr abi-old/src/af/xap/xp/xap_EncodingManager.h abi/src/af/xap/xp/xap_EncodingManager.h
--- abi-old/src/af/xap/xp/xap_EncodingManager.h	2005-03-21 05:50:59.000000000 +0000
+++ abi/src/af/xap/xp/xap_EncodingManager.h	2005-03-21 08:27:14.000000000 +0000
@@ -166,11 +166,6 @@
 	*/
 	inline virtual bool cjk_locale() const { return is_cjk_; }
 
-	/*  whether words can be broken at any character of the word (wide 
-	    character, not byte). True for japanese.
-	*/
-	virtual bool can_break_words() const;
-
 	/*
 	    returns true if there is no distinction between upper and lower
 	    letters.
@@ -185,11 +180,9 @@
 	virtual bool noncjk_letters(const UT_UCSChar* str,int len) const;
 
 	/*
-	    This one correlates with can_break_words() very tightly.
-		Under CJK locales it returns 1 for cjk letters. 
-	    Under non-CJK locales returns 0.
-	*/
-	virtual bool can_break_at(const UT_UCSChar c) const;
+	 * Returns true if a break between c[0] and c[1] is permissible.
+	 */
+	virtual bool canBreakBetween(const UT_UCS4Char c[]) const;
 
 	/*
 	    This should be as precise as possible.
@@ -285,7 +278,7 @@
 
 	const char* TexPrologue;
 	UT_uint32 WinLanguageCode,WinCharsetCode;
-	bool is_cjk_,can_break_words_,m_bIsUnicodeLocale;
+	bool is_cjk_,m_bIsUnicodeLocale;
 };
 
 /*
diff -uNr abi-old/src/text/fmt/xp/fp_TextRun.cpp abi/src/text/fmt/xp/fp_TextRun.cpp
--- abi-old/src/text/fmt/xp/fp_TextRun.cpp	2005-03-21 05:51:19.000000000 +0000
+++ abi/src/text/fmt/xp/fp_TextRun.cpp	2005-03-21 08:56:26.000000000 +0000
@@ -355,7 +355,8 @@
 		PD_StruxIterator text(getBlock()->getStruxDocHandle(),
 							  getBlockOffset() + fl_BLOCK_STRUX_OFFSET);
 		UT_return_val_if_fail(text.getStatus() == UTIter_OK, false);
-		text.setUpperLimit(text.getPosition() + getLength() - 1);
+		// Removed to allow new line break handling to work.
+		//text.setUpperLimit(text.getPosition() + getLength() - 1);
 		
 		UT_return_val_if_fail(m_pRenderInfo, false);
 		m_pRenderInfo->m_pText = &text;
@@ -389,7 +390,8 @@
 							  getBlockOffset() + fl_BLOCK_STRUX_OFFSET );
 
 		UT_return_val_if_fail(text.getStatus() == UTIter_OK, false);
-		text.setUpperLimit(text.getPosition() + getLength() - 1);
+		// Removed to enable new line breaking to work.
+		//text.setUpperLimit(text.getPosition() + getLength() - 1);
 		
 		UT_return_val_if_fail(m_pRenderInfo, false);
 		m_pRenderInfo->m_pText = &text;
@@ -532,7 +534,8 @@
 						  offset + fl_BLOCK_STRUX_OFFSET);
 
 	m_pRenderInfo->m_pText = &text;
-	text.setUpperLimit(text.getPosition() + getLength() - 1);
+	// Removed to enable new line breaking to work.
+	//text.setUpperLimit(text.getPosition() + getLength() - 1);
 	UT_uint32 iPosStart = text.getPosition();
 	
 	//bool bReverse = (getVisDirection() == UT_BIDI_RTL);