From 21eea58b8c34d30c08cb9d28ee7e3841bb762c97 Mon Sep 17 00:00:00 2001 From: Jason Crain Date: Mon, 12 Aug 2013 22:26:58 -0500 Subject: [PATCH 1/2] Add Unicode mappings to gen-unicode-tables.py Add mappings for ligature OE and some greek letters to gen-unicode-tables.py. Should help with searches including these letters. --- poppler/gen-unicode-tables.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/poppler/gen-unicode-tables.py b/poppler/gen-unicode-tables.py index 282f6e5..c268d4b 100644 --- a/poppler/gen-unicode-tables.py +++ b/poppler/gen-unicode-tables.py @@ -1,3 +1,4 @@ +# coding=utf8 UNICODE_LAST_CHAR_PART1 = 0x2FAFF HANGUL_S_BASE = 0xAC00 HANGUL_S_COUNT = 19 * 21 * 28 @@ -16,10 +17,20 @@ decomp_table = [] max_index = 0 decomp_expansion_index = {} decomp_expansion = [] +decomp_extra = {0x0152: "OE", # Œ -> OE + 0x0153: "oe", # œ -> oe + 0x2206: u"\u0394", # ∆ -> Δ + 0x220A: u"\u03B5", # ∊ -> ε + 0x220F: u"\u03A0", # ∏ -> Π + 0x2211: u"\u03A3", # ∑ -> Σ + } for u in xrange(0, UNICODE_LAST_CHAR_PART1): if (u >= HANGUL_S_BASE and u < HANGUL_S_BASE + HANGUL_S_COUNT): continue - norm = tuple(map(ord, unicodedata.normalize("NFKD", unichr(u)))) + if u in decomp_extra: + norm = tuple(map(ord, decomp_extra[u])) + else: + norm = tuple(map(ord, unicodedata.normalize("NFKD", unichr(u)))) if norm != (u,): try: i = decomp_expansion_index[norm] -- 1.7.10.4