bockbuild/MacSDK/patches/pango-coretext-astral-plane-2.patch

   1 661f8c0b920f5da Mon Sep 17 00:00:00 2001
   2 From: Kristian Rietveld <kris@lanedo.com>
   3 Date: Tue, 19 Mar 2013 11:23:49 +0100
   4 Subject: [PATCH 2/2] Detect and handle characters encoded in two UTF16 code
   5  points
   6
   7 Another important change: gi->index should point at the current
   8 character, not the current into the string. Before this change,
   9 the current character equaled the current index into the string.
  10 ---
  11  modules/basic/basic-coretext.c |   55 ++++++++++++++++++++++++++++-----------
  12  1 files changed, 39 insertions(+), 16 deletions(-)
  13
  14 diff --git a/modules/basic/basic-coretext.c b/modules/basic/basic-coretext.c
  15 index 33ce479..06b648e 100644
  16 --- a/modules/basic/basic-coretext.c
  17 +++ b/modules/basic/basic-coretext.c
  18 @@ -166,7 +166,42 @@ run_iterator_run_is_non_monotonic (struct RunIterator *iter)
  19  static gunichar
  20  run_iterator_get_character (struct RunIterator *iter)
  21  {
  22 -  return CFStringGetCharacterAtIndex (iter->cstr, iter->current_indices[iter->ct_i]);
  23 +  int lower, upper;
  24 +
  25 +  lower = iter->current_indices[iter->ct_i];
  26 +  if (iter->ct_i + 1 < CTRunGetGlyphCount (iter->current_run))
  27 +    upper = iter->current_indices[iter->ct_i + 1];
  28 +  else
  29 +    {
  30 +      CFRange range = CTRunGetStringRange (iter->current_run);
  31 +      upper = range.location + range.length;
  32 +    }
  33 +
  34 +  if (upper - lower == 1)
  35 +    return CFStringGetCharacterAtIndex (iter->cstr, lower);
  36 +  if (upper - lower == 2)
  37 +    {
  38 +      /* Character is encoded in two UTF16 code points. */
  39 +      gunichar *ch;
  40 +      gunichar retval;
  41 +      gunichar2 orig[2];
  42 +
  43 +      orig[0] = CFStringGetCharacterAtIndex (iter->cstr, lower);
  44 +      orig[1] = CFStringGetCharacterAtIndex (iter->cstr, lower + 1);
  45 +
  46 +      ch = g_utf16_to_ucs4 (orig, 2, NULL, NULL, NULL);
  47 +      retval = *ch;
  48 +      g_free (ch);
  49 +
  50 +      return retval;
  51 +    }
  52 +
  53 +  /* This should not be reached, because other cases cannot occur. Instead
  54 +   * of crashing, return the first character which will likely be displayed
  55 +   * as unknown glyph.
  56 +   */
  57 +
  58 +  return CFStringGetCharacterAtIndex (iter->cstr, lower);
  59  }
  60
  61  static CGGlyph
  62 @@ -175,12 +210,6 @@ run_iterator_get_cgglyph (struct RunIterator *iter)
  63    return iter->current_cgglyphs[iter->ct_i];
  64  }
  65
  66 -static CFIndex
  67 -run_iterator_get_index (struct RunIterator *iter)
  68 -{
  69 -  return iter->current_indices[iter->ct_i];
  70 -}
  71 -
  72  static gboolean
  73  run_iterator_create (struct RunIterator *iter,
  74                       const char         *text,
  75 @@ -336,7 +365,7 @@ create_core_text_glyph_list (const char *text,
  76        struct GlyphInfo *gi;
  77
  78        gi = g_slice_new (struct GlyphInfo);
  79 -      gi->index = run_iterator_get_index (&riter);
  80 +      gi->index = riter.total_ct_i;
  81        gi->cgglyph = run_iterator_get_cgglyph (&riter);
  82        gi->wc = run_iterator_get_character (&riter);
  83
  84 @@ -376,9 +405,8 @@ basic_engine_shape (PangoEngineShape    *engine,
  85     * glyph sequence generated by the CoreText typesetter:
  86     *   # E.g. zero-width spaces do not end up in the CoreText glyph sequence. We have
  87     *     to manually account for the gap in the character indices.
  88 -   *   # Sometimes, CoreText generates two glyph for the same character index. We
  89 -   *     currently handle this "properly" as in we do not crash or corrupt memory,
  90 -   *     but that's about it.
  91 +   *   # Sometimes, CoreText generates two glyph for the same character index. These
  92 +   *     are properly composed into a single 32-bit gunichar.
  93     *   # Due to mismatches in size, the CoreText glyph sequence can either be longer or
  94     *     shorter than the PangoGlyphString. Note that the size of the PangoGlyphString
  95     *     should match the number of characters in "text".
  96 @@ -390,11 +418,6 @@ basic_engine_shape (PangoEngineShape    *engine,
  97     * increasing/decreasing.
  98     *
  99     * FIXME items for future fixing:
 100 -   *   # CoreText strings are UTF16, and the indices *often* refer to characters,
 101 -   *     but not *always*. Notable exception is when a character is encoded using
 102 -   *     two UTF16 code points. This are two characters in a CFString. At this point
 103 -   *     advancing a single character in the CFString and advancing a single character
 104 -   *     using g_utf8_next_char in the const char string goes out of sync.
 105     *   # We currently don't bother about LTR, Pango core appears to fix this up for us.
 106     *     (Even when we cared warnings were generated that strings were in the wrong
 107     *     order, this should be investigated).
 108 --
 109 1.7.4.4