unicode: cache the normalization tables in struct unicode_map

Instead of repeatedly looking up the version add pointers to the
NFD and NFD+CF tables to struct unicode_map, and pass a
unicode_map plus index to the functions using the normalization
tables.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
This commit is contained in:
Christoph Hellwig
2021-09-15 09:00:04 +02:00
committed by Gabriel Krisman Bertazi
parent fbc59d6505
commit 6ca99ce756
5 changed files with 99 additions and 96 deletions

View File

@@ -18,9 +18,7 @@ unsigned int failed_tests;
unsigned int total_tests;
/* Tests will be based on this version. */
#define latest_maj 12
#define latest_min 1
#define latest_rev 0
#define UTF8_LATEST UNICODE_AGE(12, 1, 0)
#define _test(cond, func, line, fmt, ...) do { \
total_tests++; \
@@ -160,29 +158,22 @@ static const struct {
}
};
static ssize_t utf8len(const struct utf8data *data, const char *s)
{
return utf8nlen(data, s, (size_t)-1);
}
static int utf8cursor(struct utf8cursor *u8c, const struct utf8data *data,
static ssize_t utf8len(const struct unicode_map *um, enum utf8_normalization n,
const char *s)
{
return utf8ncursor(u8c, data, s, (unsigned int)-1);
return utf8nlen(um, n, s, (size_t)-1);
}
static void check_utf8_nfdi(void)
static int utf8cursor(struct utf8cursor *u8c, const struct unicode_map *um,
enum utf8_normalization n, const char *s)
{
return utf8ncursor(u8c, um, n, s, (unsigned int)-1);
}
static void check_utf8_nfdi(struct unicode_map *um)
{
int i;
struct utf8cursor u8c;
const struct utf8data *data;
data = utf8nfdi(UNICODE_AGE(latest_maj, latest_min, latest_rev));
if (!data) {
pr_err("%s: Unable to load utf8-%d.%d.%d. Skipping.\n",
__func__, latest_maj, latest_min, latest_rev);
return;
}
for (i = 0; i < ARRAY_SIZE(nfdi_test_data); i++) {
int len = strlen(nfdi_test_data[i].str);
@@ -190,10 +181,11 @@ static void check_utf8_nfdi(void)
int j = 0;
unsigned char c;
test((utf8len(data, nfdi_test_data[i].str) == nlen));
test((utf8nlen(data, nfdi_test_data[i].str, len) == nlen));
test((utf8len(um, UTF8_NFDI, nfdi_test_data[i].str) == nlen));
test((utf8nlen(um, UTF8_NFDI, nfdi_test_data[i].str, len) ==
nlen));
if (utf8cursor(&u8c, data, nfdi_test_data[i].str) < 0)
if (utf8cursor(&u8c, um, UTF8_NFDI, nfdi_test_data[i].str) < 0)
pr_err("can't create cursor\n");
while ((c = utf8byte(&u8c)) > 0) {
@@ -207,18 +199,10 @@ static void check_utf8_nfdi(void)
}
}
static void check_utf8_nfdicf(void)
static void check_utf8_nfdicf(struct unicode_map *um)
{
int i;
struct utf8cursor u8c;
const struct utf8data *data;
data = utf8nfdicf(UNICODE_AGE(latest_maj, latest_min, latest_rev));
if (!data) {
pr_err("%s: Unable to load utf8-%d.%d.%d. Skipping.\n",
__func__, latest_maj, latest_min, latest_rev);
return;
}
for (i = 0; i < ARRAY_SIZE(nfdicf_test_data); i++) {
int len = strlen(nfdicf_test_data[i].str);
@@ -226,10 +210,13 @@ static void check_utf8_nfdicf(void)
int j = 0;
unsigned char c;
test((utf8len(data, nfdicf_test_data[i].str) == nlen));
test((utf8nlen(data, nfdicf_test_data[i].str, len) == nlen));
test((utf8len(um, UTF8_NFDICF, nfdicf_test_data[i].str) ==
nlen));
test((utf8nlen(um, UTF8_NFDICF, nfdicf_test_data[i].str, len) ==
nlen));
if (utf8cursor(&u8c, data, nfdicf_test_data[i].str) < 0)
if (utf8cursor(&u8c, um, UTF8_NFDICF,
nfdicf_test_data[i].str) < 0)
pr_err("can't create cursor\n");
while ((c = utf8byte(&u8c)) > 0) {
@@ -243,16 +230,9 @@ static void check_utf8_nfdicf(void)
}
}
static void check_utf8_comparisons(void)
static void check_utf8_comparisons(struct unicode_map *table)
{
int i;
struct unicode_map *table = utf8_load(UNICODE_AGE(12, 1, 0));
if (IS_ERR(table)) {
pr_err("%s: Unable to load utf8 %d.%d.%d. Skipping.\n",
__func__, latest_maj, latest_min, latest_rev);
return;
}
for (i = 0; i < ARRAY_SIZE(nfdi_test_data); i++) {
const struct qstr s1 = {.name = nfdi_test_data[i].str,
@@ -273,8 +253,6 @@ static void check_utf8_comparisons(void)
test_f(!utf8_strncasecmp(table, &s1, &s2),
"%s %s comparison mismatch\n", s1.name, s2.name);
}
utf8_unload(table);
}
static void check_supported_versions(void)
@@ -286,8 +264,7 @@ static void check_supported_versions(void)
test(utf8version_is_supported(UNICODE_AGE(9, 0, 0)));
/* Unicode 1x.0.0 (the latest version) should be supported. */
test(utf8version_is_supported(
UNICODE_AGE(latest_maj, latest_min, latest_rev)));
test(utf8version_is_supported(UTF8_LATEST));
/* Next versions don't exist. */
test(!utf8version_is_supported(UNICODE_AGE(13, 0, 0)));
@@ -297,19 +274,28 @@ static void check_supported_versions(void)
static int __init init_test_ucd(void)
{
struct unicode_map *um;
failed_tests = 0;
total_tests = 0;
um = utf8_load(UTF8_LATEST);
if (IS_ERR(um)) {
pr_err("%s: Unable to load utf8 table.\n", __func__);
return PTR_ERR(um);
}
check_supported_versions();
check_utf8_nfdi();
check_utf8_nfdicf();
check_utf8_comparisons();
check_utf8_nfdi(um);
check_utf8_nfdicf(um);
check_utf8_comparisons(um);
if (!failed_tests)
pr_info("All %u tests passed\n", total_tests);
else
pr_err("%u out of %u tests failed\n", failed_tests,
total_tests);
utf8_unload(um);
return 0;
}