unicode: cache the normalization tables in struct unicode_map

Instead of repeatedly looking up the version add pointers to the NFD and NFD+CF tables to struct unicode_map, and pass a unicode_map plus index to the functions using the normalization tables. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
2025-07-23 20:51:03 +02:00 · 2021-09-15 09:00:04 +02:00
parent fbc59d6505
commit 6ca99ce756
5 changed files with 99 additions and 96 deletions
--- a/fs/unicode/utf8-selftest.c
+++ b/fs/unicode/utf8-selftest.c
@@ -18,9 +18,7 @@ unsigned int failed_tests;
 unsigned int total_tests;

 /* Tests will be based on this version. */
-#define latest_maj 12
-#define latest_min 1
-#define latest_rev 0
+#define UTF8_LATEST	UNICODE_AGE(12, 1, 0)

 #define _test(cond, func, line, fmt, ...) do {				\
 		total_tests++;						\
@@ -160,29 +158,22 @@ static const struct {
 	}
 };

-static ssize_t utf8len(const struct utf8data *data, const char *s)
-{
-	return utf8nlen(data, s, (size_t)-1);
-}
-
-static int utf8cursor(struct utf8cursor *u8c, const struct utf8data *data,
+static ssize_t utf8len(const struct unicode_map *um, enum utf8_normalization n,
 		const char *s)
 {
-	return utf8ncursor(u8c, data, s, (unsigned int)-1);
+	return utf8nlen(um, n, s, (size_t)-1);
 }

-static void check_utf8_nfdi(void)
+static int utf8cursor(struct utf8cursor *u8c, const struct unicode_map *um,
+		enum utf8_normalization n, const char *s)
+{
+	return utf8ncursor(u8c, um, n, s, (unsigned int)-1);
+}
+
+static void check_utf8_nfdi(struct unicode_map *um)
 {
 	int i;
 	struct utf8cursor u8c;
-	const struct utf8data *data;
-
-	data = utf8nfdi(UNICODE_AGE(latest_maj, latest_min, latest_rev));
-	if (!data) {
-		pr_err("%s: Unable to load utf8-%d.%d.%d. Skipping.\n",
-		       __func__, latest_maj, latest_min, latest_rev);
-		return;
-	}

 	for (i = 0; i < ARRAY_SIZE(nfdi_test_data); i++) {
 		int len = strlen(nfdi_test_data[i].str);
@@ -190,10 +181,11 @@ static void check_utf8_nfdi(void)
 		int j = 0;
 		unsigned char c;

-		test((utf8len(data, nfdi_test_data[i].str) == nlen));
-		test((utf8nlen(data, nfdi_test_data[i].str, len) == nlen));
+		test((utf8len(um, UTF8_NFDI, nfdi_test_data[i].str) == nlen));
+		test((utf8nlen(um, UTF8_NFDI, nfdi_test_data[i].str, len) ==
+			nlen));

-		if (utf8cursor(&u8c, data, nfdi_test_data[i].str) < 0)
+		if (utf8cursor(&u8c, um, UTF8_NFDI, nfdi_test_data[i].str) < 0)
 			pr_err("can't create cursor\n");

 		while ((c = utf8byte(&u8c)) > 0) {
@@ -207,18 +199,10 @@ static void check_utf8_nfdi(void)
 	}
 }

-static void check_utf8_nfdicf(void)
+static void check_utf8_nfdicf(struct unicode_map *um)
 {
 	int i;
 	struct utf8cursor u8c;
-	const struct utf8data *data;
-
-	data = utf8nfdicf(UNICODE_AGE(latest_maj, latest_min, latest_rev));
-	if (!data) {
-		pr_err("%s: Unable to load utf8-%d.%d.%d. Skipping.\n",
-		       __func__, latest_maj, latest_min, latest_rev);
-		return;
-	}

 	for (i = 0; i < ARRAY_SIZE(nfdicf_test_data); i++) {
 		int len = strlen(nfdicf_test_data[i].str);
@@ -226,10 +210,13 @@ static void check_utf8_nfdicf(void)
 		int j = 0;
 		unsigned char c;

-		test((utf8len(data, nfdicf_test_data[i].str) == nlen));
-		test((utf8nlen(data, nfdicf_test_data[i].str, len) == nlen));
+		test((utf8len(um, UTF8_NFDICF, nfdicf_test_data[i].str) ==
+				nlen));
+		test((utf8nlen(um, UTF8_NFDICF, nfdicf_test_data[i].str, len) ==
+				nlen));

-		if (utf8cursor(&u8c, data, nfdicf_test_data[i].str) < 0)
+		if (utf8cursor(&u8c, um, UTF8_NFDICF,
+				nfdicf_test_data[i].str) < 0)
 			pr_err("can't create cursor\n");

 		while ((c = utf8byte(&u8c)) > 0) {
@@ -243,16 +230,9 @@ static void check_utf8_nfdicf(void)
 	}
 }

-static void check_utf8_comparisons(void)
+static void check_utf8_comparisons(struct unicode_map *table)
 {
 	int i;
-	struct unicode_map *table = utf8_load(UNICODE_AGE(12, 1, 0));
-
-	if (IS_ERR(table)) {
-		pr_err("%s: Unable to load utf8 %d.%d.%d. Skipping.\n",
-		       __func__, latest_maj, latest_min, latest_rev);
-		return;
-	}

 	for (i = 0; i < ARRAY_SIZE(nfdi_test_data); i++) {
 		const struct qstr s1 = {.name = nfdi_test_data[i].str,
@@ -273,8 +253,6 @@ static void check_utf8_comparisons(void)
 		test_f(!utf8_strncasecmp(table, &s1, &s2),
 		       "%s %s comparison mismatch\n", s1.name, s2.name);
 	}
-
-	utf8_unload(table);
 }

 static void check_supported_versions(void)
@@ -286,8 +264,7 @@ static void check_supported_versions(void)
 	test(utf8version_is_supported(UNICODE_AGE(9, 0, 0)));

 	/* Unicode 1x.0.0 (the latest version) should be supported. */
-	test(utf8version_is_supported(
-		UNICODE_AGE(latest_maj, latest_min, latest_rev)));
+	test(utf8version_is_supported(UTF8_LATEST));

 	/* Next versions don't exist. */
 	test(!utf8version_is_supported(UNICODE_AGE(13, 0, 0)));
@@ -297,19 +274,28 @@ static void check_supported_versions(void)

 static int __init init_test_ucd(void)
 {
+	struct unicode_map *um;
+
 	failed_tests = 0;
 	total_tests = 0;

+	um = utf8_load(UTF8_LATEST);
+	if (IS_ERR(um)) {
+		pr_err("%s: Unable to load utf8 table.\n", __func__);
+		return PTR_ERR(um);
+	}
+
 	check_supported_versions();
-	check_utf8_nfdi();
-	check_utf8_nfdicf();
-	check_utf8_comparisons();
+	check_utf8_nfdi(um);
+	check_utf8_nfdicf(um);
+	check_utf8_comparisons(um);

 	if (!failed_tests)
 		pr_info("All %u tests passed\n", total_tests);
 	else
 		pr_err("%u out of %u tests failed\n", failed_tests,
 		       total_tests);
+	utf8_unload(um);
 	return 0;
 }