From b973d71e82e9ec042d604226ccf0ba641773f38f Mon Sep 17 00:00:00 2001 From: Ran Benita Date: Fri, 21 Mar 2014 23:00:17 +0200 Subject: [PATCH libxkbcommon 2/3] state: add xkb_state_key_get_{utf8,utf32}() API functions These functions generally have the same effect as xkb_state_key_get_syms() + xkb_keysym_to_utf{8,32}(). So why add them? - They provide a slightly nicer interface, especially if the string is the only interest. - It makes the handling of multiple-keysyms-to-utf8 transparent. For the designated use-case of multiple-keysyms (unicode combining characters), this is a must. We also validate the UTF-8, which the user might not otherwise do. - We will need to apply some transformation on the resulting string which depend on the xkb_state. This is not possible with the xkb_keysym_* functions. With these functions, the existing xkb_keysym_to_utf{8,32}() are not expected to be used by a typical user; they are "raw" functions. Signed-off-by: Ran Benita --- src/state.c | 65 +++++++++++++++++++++++++++++++++++++++++++ test/common.c | 14 ++-------- test/state.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++ xkbcommon/xkbcommon.h | 48 ++++++++++++++++++++++++++++++++ 4 files changed, 191 insertions(+), 12 deletions(-) diff --git a/src/state.c b/src/state.c index f409a3e..ebd0ca6 100644 --- a/src/state.c +++ b/src/state.c @@ -61,6 +61,7 @@ #include "keymap.h" #include "keysym.h" +#include "utf8.h" struct xkb_filter { union xkb_action action; @@ -870,6 +871,70 @@ xkb_state_key_get_one_sym(struct xkb_state *state, xkb_keycode_t kc) return sym; } +XKB_EXPORT int +xkb_state_key_get_utf8(struct xkb_state *state, xkb_keycode_t kc, + char *buffer, size_t size) +{ + xkb_keysym_t sym; + const xkb_keysym_t *syms; + int nsyms; + int offset; + char tmp[7]; + + /* Make sure the keysym transformations are applied. */ + sym = xkb_state_key_get_one_sym(state, kc); + if (sym != XKB_KEY_NoSymbol) { + nsyms = 1; syms = &sym; + } + else { + nsyms = xkb_state_key_get_syms(state, kc, &syms); + } + + /* Make sure not to truncate in the middle of a UTF-8 sequence. */ + offset = 0; + for (int i = 0; i < nsyms; i++) { + int ret = xkb_keysym_to_utf8(syms[i], tmp, sizeof(tmp)); + if (ret <= 0) + goto err_bad; + + ret--; + if ((size_t) (offset + ret) <= size) + memcpy(buffer + offset, tmp, ret); + offset += ret; + } + + if ((size_t) offset >= size) + goto err_trunc; + buffer[offset] = '\0'; + + if (!is_valid_utf8(buffer, offset)) + goto err_bad; + + return offset; + +err_trunc: + if (size > 0) + buffer[size - 1] = '\0'; + return offset; + +err_bad: + if (size > 0) + buffer[0] = '\0'; + return 0; +} + +XKB_EXPORT uint32_t +xkb_state_key_get_utf32(struct xkb_state *state, xkb_keycode_t kc) +{ + xkb_keysym_t sym; + uint32_t cp; + + sym = xkb_state_key_get_one_sym(state, kc); + cp = xkb_keysym_to_utf32(sym); + + return cp; +} + /** * Serialises the requested modifier state into an xkb_mod_mask_t, with all * the same disclaimers as in xkb_state_update_mask. diff --git a/test/common.c b/test/common.c index fd013ca..8b3f954 100644 --- a/test/common.c +++ b/test/common.c @@ -371,18 +371,8 @@ test_print_keycode_state(struct xkb_state *state, xkb_keycode_t keycode) printf("] "); } - /* - * Only do this if wchar_t is UCS-4, so we can be lazy and print - * with %lc. - */ -#ifdef __STDC_ISO_10646__ - printf("unicode [ "); - for (int i = 0; i < nsyms; i++) { - uint32_t unicode = xkb_keysym_to_utf32(syms[i]); - printf("%lc ", (int) (unicode > 32 ? unicode : L' ')); - } - printf("] "); -#endif + xkb_state_key_get_utf8(state, keycode, s, sizeof(s)); + printf("unicode [ %s ] ", s); layout = xkb_state_key_get_layout(state, keycode); printf("layout [ %s (%d) ] ", diff --git a/test/state.c b/test/state.c index 950b423..95852b2 100644 --- a/test/state.c +++ b/test/state.c @@ -428,6 +428,81 @@ test_caps_keysym_transformation(struct xkb_keymap *keymap) xkb_state_unref(state); } +static void +test_get_utf8_utf32(struct xkb_keymap *keymap) +{ + char buf[256]; + struct xkb_state *state = xkb_state_new(keymap); + assert(state); + +#define TEST_KEY(key, expected_utf8, expected_utf32) do { \ + assert(xkb_state_key_get_utf8(state, key + 8, NULL, 0) == strlen(expected_utf8)); \ + assert(xkb_state_key_get_utf8(state, key + 8, buf, sizeof(buf)) == strlen(expected_utf8)); \ + assert(memcmp(buf, expected_utf8, sizeof(expected_utf8)) == 0); \ + assert(xkb_state_key_get_utf32(state, key + 8) == expected_utf32); \ +} while (0) + + /* Simple ASCII. */ + TEST_KEY(KEY_A, "a", 0x61); + TEST_KEY(KEY_ESC, "\x1B", 0x1B); + TEST_KEY(KEY_1, "1", 0x31); + + /* Invalid. */ + TEST_KEY(XKB_KEYCODE_INVALID - 8, "", 0); + TEST_KEY(300, "", 0); + + /* No string. */ + TEST_KEY(KEY_LEFTCTRL, "", 0); + TEST_KEY(KEY_NUMLOCK, "", 0); + + /* Multiple keysyms. */ + TEST_KEY(KEY_6, "HELLO", 0); + TEST_KEY(KEY_7, "YES THIS IS DOG", 0); + + /* Check truncation. */ + memset(buf, 'X', sizeof(buf)); + assert(xkb_state_key_get_utf8(state, KEY_6 + 8, buf, 0) == strlen("HELLO")); + assert(memcmp(buf, "X", 1) == 0); + assert(xkb_state_key_get_utf8(state, KEY_6 + 8, buf, 1) == strlen("HELLO")); + assert(memcmp(buf, "", 1) == 0); + assert(xkb_state_key_get_utf8(state, KEY_6 + 8, buf, 2) == strlen("HELLO")); + assert(memcmp(buf, "H", 2) == 0); + assert(xkb_state_key_get_utf8(state, KEY_6 + 8, buf, 3) == strlen("HELLO")); + assert(memcmp(buf, "HE", 3) == 0); + assert(xkb_state_key_get_utf8(state, KEY_6 + 8, buf, 5) == strlen("HELLO")); + assert(memcmp(buf, "HELL", 5) == 0); + assert(xkb_state_key_get_utf8(state, KEY_6 + 8, buf, 6) == strlen("HELLO")); + assert(memcmp(buf, "HELLO", 6) == 0); + assert(xkb_state_key_get_utf8(state, KEY_6 + 8, buf, 7) == strlen("HELLO")); + assert(memcmp(buf, "HELLO\0X", 7) == 0); + + /* Switch to ru layout */ + xkb_state_update_key(state, KEY_COMPOSE + EVDEV_OFFSET, XKB_KEY_DOWN); + xkb_state_update_key(state, KEY_COMPOSE + EVDEV_OFFSET, XKB_KEY_UP); + assert(xkb_state_key_get_layout(state, KEY_A + 8) == 1); + + /* Non ASCII. */ + TEST_KEY(KEY_ESC, "\x1B", 0x1B); + TEST_KEY(KEY_A, "ф", 0x0444); + TEST_KEY(KEY_Z, "я", 0x044F); + + /* Switch back to us layout */ + xkb_state_update_key(state, KEY_COMPOSE + EVDEV_OFFSET, XKB_KEY_DOWN); + xkb_state_update_key(state, KEY_COMPOSE + EVDEV_OFFSET, XKB_KEY_UP); + assert(xkb_state_key_get_layout(state, KEY_A + 8) == 0); + + xkb_state_update_key(state, KEY_LEFTSHIFT + EVDEV_OFFSET, XKB_KEY_DOWN); + TEST_KEY(KEY_A, "A", 0x41); + TEST_KEY(KEY_ESC, "\x1B", 0x1B); + TEST_KEY(KEY_1, "!", 0x21); + xkb_state_update_key(state, KEY_LEFTSHIFT + EVDEV_OFFSET, XKB_KEY_UP); + + TEST_KEY(KEY_6, "HELLO", 0); + TEST_KEY(KEY_7, "YES THIS IS DOG", 0); + + xkb_state_unref(state); +} + int main(void) { @@ -449,6 +524,7 @@ main(void) test_repeat(keymap); test_consume(keymap); test_range(keymap); + test_get_utf8_utf32(keymap); xkb_keymap_unref(keymap); keymap = test_compile_rules(context, "evdev", NULL, "ch", "fr", NULL); diff --git a/xkbcommon/xkbcommon.h b/xkbcommon/xkbcommon.h index ab3be9f..36251db 100644 --- a/xkbcommon/xkbcommon.h +++ b/xkbcommon/xkbcommon.h @@ -433,6 +433,11 @@ xkb_keysym_from_name(const char *name, enum xkb_keysym_flags flags); * @returns The number of bytes written to the buffer (including the * terminating byte). If the keysym does not have a Unicode * representation, returns 0. If the buffer is too small, returns -1. + * + * Prefer not to use this function on keysyms obtained from an + * xkb_state. In this case, use xkb_state_key_get_utf8() instead. + * + * @sa xkb_state_key_get_utf8() */ int xkb_keysym_to_utf8(xkb_keysym_t keysym, char *buffer, size_t size); @@ -443,6 +448,11 @@ xkb_keysym_to_utf8(xkb_keysym_t keysym, char *buffer, size_t size); * @returns The Unicode/UTF-32 representation of keysym, which is also * compatible with UCS-4. If the keysym does not have a Unicode * representation, returns 0. + * + * Prefer not to use this function on keysyms obtained from an + * xkb_state. In this case, use xkb_state_key_get_utf32() instead. + * + * @sa xkb_state_key_get_utf32() */ uint32_t xkb_keysym_to_utf32(xkb_keysym_t keysym); @@ -1269,6 +1279,44 @@ xkb_state_key_get_syms(struct xkb_state *state, xkb_keycode_t key, const xkb_keysym_t **syms_out); /** + * Get the Unicode/UTF-8 string obtained from pressing a particular key + * in a given keyboard state. + * + * @param[in] state The keyboard state object. + * @param[in] key The keycode of the key. + * @param[out] buffer A buffer to write the string into. + * @param[in] size Size of the buffer. + * + * @warning If the buffer passed is too small, the string is truncated + * (though still NUL-terminated). + * + * @returns The number of bytes required for the string, excluding the + * NUL byte. If there is nothing to write, returns 0. + * + * You may check if truncation has occurred by comparing the return value + * with the size of @p buffer, similarly to the snprintf(3) function. + * You may safely pass NULL and 0 to @p buffer and @p size to find the + * required size (without the NUL-byte). + * + * @memberof xkb_state + */ +int +xkb_state_key_get_utf8(struct xkb_state *state, xkb_keycode_t key, + char *buffer, size_t size); + +/** + * Get the Unicode/UTF-32 codepoint obtained from pressing a particular + * key in a a given keyboard state. + * + * @returns The UTF-32 representation for the key, if it consists of only + * a single codepoint. Otherwise, returns 0. + * + * @memberof xkb_state + */ +uint32_t +xkb_state_key_get_utf32(struct xkb_state *state, xkb_keycode_t key); + +/** * Get the single keysym obtained from pressing a particular key in a * given keyboard state. * -- 1.9.1