1 661f8c0b920f5da Mon Sep 17 00:00:00 2001
2 From: Kristian Rietveld <kris@lanedo.com>
3 Date: Tue, 19 Mar 2013 11:23:49 +0100
4 Subject: [PATCH 2/2] Detect and handle characters encoded in two UTF16 code
7 Another important change: gi->index should point at the current
8 character, not the current into the string. Before this change,
9 the current character equaled the current index into the string.
11 modules/basic/basic-coretext.c | 55 ++++++++++++++++++++++++++++-----------
12 1 files changed, 39 insertions(+), 16 deletions(-)
14 diff --git a/modules/basic/basic-coretext.c b/modules/basic/basic-coretext.c
15 index 33ce479..06b648e 100644
16 --- a/modules/basic/basic-coretext.c
17 +++ b/modules/basic/basic-coretext.c
18 @@ -166,7 +166,42 @@ run_iterator_run_is_non_monotonic (struct RunIterator *iter)
20 run_iterator_get_character (struct RunIterator *iter)
22 - return CFStringGetCharacterAtIndex (iter->cstr, iter->current_indices[iter->ct_i]);
25 + lower = iter->current_indices[iter->ct_i];
26 + if (iter->ct_i + 1 < CTRunGetGlyphCount (iter->current_run))
27 + upper = iter->current_indices[iter->ct_i + 1];
30 + CFRange range = CTRunGetStringRange (iter->current_run);
31 + upper = range.location + range.length;
34 + if (upper - lower == 1)
35 + return CFStringGetCharacterAtIndex (iter->cstr, lower);
36 + if (upper - lower == 2)
38 + /* Character is encoded in two UTF16 code points. */
43 + orig[0] = CFStringGetCharacterAtIndex (iter->cstr, lower);
44 + orig[1] = CFStringGetCharacterAtIndex (iter->cstr, lower + 1);
46 + ch = g_utf16_to_ucs4 (orig, 2, NULL, NULL, NULL);
53 + /* This should not be reached, because other cases cannot occur. Instead
54 + * of crashing, return the first character which will likely be displayed
58 + return CFStringGetCharacterAtIndex (iter->cstr, lower);
62 @@ -175,12 +210,6 @@ run_iterator_get_cgglyph (struct RunIterator *iter)
63 return iter->current_cgglyphs[iter->ct_i];
67 -run_iterator_get_index (struct RunIterator *iter)
69 - return iter->current_indices[iter->ct_i];
73 run_iterator_create (struct RunIterator *iter,
75 @@ -336,7 +365,7 @@ create_core_text_glyph_list (const char *text,
78 gi = g_slice_new (struct GlyphInfo);
79 - gi->index = run_iterator_get_index (&riter);
80 + gi->index = riter.total_ct_i;
81 gi->cgglyph = run_iterator_get_cgglyph (&riter);
82 gi->wc = run_iterator_get_character (&riter);
84 @@ -376,9 +405,8 @@ basic_engine_shape (PangoEngineShape *engine,
85 * glyph sequence generated by the CoreText typesetter:
86 * # E.g. zero-width spaces do not end up in the CoreText glyph sequence. We have
87 * to manually account for the gap in the character indices.
88 - * # Sometimes, CoreText generates two glyph for the same character index. We
89 - * currently handle this "properly" as in we do not crash or corrupt memory,
90 - * but that's about it.
91 + * # Sometimes, CoreText generates two glyph for the same character index. These
92 + * are properly composed into a single 32-bit gunichar.
93 * # Due to mismatches in size, the CoreText glyph sequence can either be longer or
94 * shorter than the PangoGlyphString. Note that the size of the PangoGlyphString
95 * should match the number of characters in "text".
96 @@ -390,11 +418,6 @@ basic_engine_shape (PangoEngineShape *engine,
97 * increasing/decreasing.
99 * FIXME items for future fixing:
100 - * # CoreText strings are UTF16, and the indices *often* refer to characters,
101 - * but not *always*. Notable exception is when a character is encoded using
102 - * two UTF16 code points. This are two characters in a CFString. At this point
103 - * advancing a single character in the CFString and advancing a single character
104 - * using g_utf8_next_char in the const char string goes out of sync.
105 * # We currently don't bother about LTR, Pango core appears to fix this up for us.
106 * (Even when we cared warnings were generated that strings were in the wrong
107 * order, this should be investigated).