2023-10-16 18:20:08 +07:00

281 lines
6.9 KiB
C

/*
* SPDX-License-Identifier: ISC
*
* Copyright (C) 2018-2021 Michael Drake <tlsa@netsurf-browser.org>
*/
#include <stdbool.h>
#include <assert.h>
#include <stdio.h>
#include <cyaml/cyaml.h>
#include "../../src/utf8.h"
#include "ttest.h"
#include "test.h"
/** Helper macro to squash unused variable warnings. */
#define UNUSED(_x) ((void)(_x))
/** Helper macro to get the length of string string literals. */
#define SLEN(_s) (CYAML_ARRAY_LEN(_s) - 1)
/**
* Test utf-8 decoding.
*
* \param[in] report The test report context.
* \return true if test passes, false otherwise.
*/
static bool test_utf8_get_codepoint(
ttest_report_ctx_t *report)
{
static const struct tests {
unsigned c;
const char *s;
unsigned l;
} t[] = {
{ 0xfffd, "\ufffd", SLEN("\ufffd") },
{ 0xfffd, "\xC1\x9C", SLEN("\xC1\x9C") },
{ 0x1f638, u8"😸", SLEN(u8"😸") },
{ 0xfffd, u8"😸", 0 },
{ 0xfffd, u8"😸", 5 },
};
bool pass = true;
for (unsigned i = 0; i < CYAML_ARRAY_LEN(t); i++) {
unsigned l;
unsigned c;
ttest_ctx_t tc;
char name[sizeof(__func__) + 32];
sprintf(name, "%s_%u", __func__, i);
if (!ttest_start(report, name, NULL, NULL, &tc)) {
continue;
}
l = t[i].l;
c = cyaml_utf8_get_codepoint((uint8_t *)t[i].s, &l);
if (c != t[i].c) {
pass &= ttest_fail(&tc, "Incorrect codepoint for %s "
"(expecting %4.4x, got %4.4x)",
t[i].s, t[i].c, c);
continue;
}
pass &= ttest_pass(&tc);
}
return pass;
}
/**
* Test comparing the same strings.
*
* \param[in] report The test report context.
* \return true if test passes, false otherwise.
*/
static bool test_utf8_strcmp_same(
ttest_report_ctx_t *report)
{
const char *strings[] = {
"Simple",
"test",
"This is a LONGER string, if you see what I mean.",
"29087 lsdkfj </,.{}'#\"|@>",
u8"ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß",
u8"àáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ",
u8"¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿",
"\xc3\0",
u8"αβγδε",
u8"¯\\_(ツ)_/¯",
"\xfa",
u8"😸",
};
bool pass = true;
for (unsigned i = 0; i < CYAML_ARRAY_LEN(strings); i++) {
ttest_ctx_t tc;
char name[sizeof(__func__) + 32];
sprintf(name, "%s_%u", __func__, i);
if (!ttest_start(report, name, NULL, NULL, &tc)) {
continue;
}
if (cyaml_utf8_casecmp(strings[i], strings[i]) != 0) {
pass &= ttest_fail(&tc, "Failed to match: %s",
strings[i]);
continue;
}
pass &= ttest_pass(&tc);
}
return pass;
}
/**
* Test comparing strings that match.
*
* \param[in] report The test report context.
* \return true if test passes, false otherwise.
*/
static bool test_utf8_strcmp_matches(
ttest_report_ctx_t *report)
{
static const struct string_pairs {
const char *a;
const char *b;
} pairs[] = {
{ "", "" },
{ "This is a TEST", "this is A test" },
{ u8"ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ", u8"àáâãäåæçèéêëìíîïðñòóôõö" },
{ u8"ĀĂĄĆĈĊČĎĐĒĔĖĘĚĜĞ", u8"āăąćĉċčďđēĕėęěĝğ" },
{ u8"ijĵķĸĺļľ", u8"IJĴĶĸĹĻĽ" },
{ u8"ŊŌŎŐŒŔŖŘŚŜŞŠŢŤŦŨŪŬŮŰŲŴŶ", u8"ŋōŏőœŕŗřśŝşšţťŧũūŭůűųŵŷ" },
{ u8"űųŵŷŸźżž", u8"űųŵŷÿŹŻŽ" },
{ u8"ƂƄ ơƣƥ", u8"ƃƅ ƠƢƤ" },
{ u8"ǞǠǢǤǦǨǪǬǮ", u8"ǟǡǣǥǧǩǫǭǯ" },
{ u8"ǸǺǼǾȀȂȄȆȈȊȌȎȐȒȔȖȘȚȜȞ", u8"ǹǻǽǿȁȃȅȇȉȋȍȏȑȓȕȗșțȝȟ" },
{ u8"ȢȤȦȨȪȬȮȰȲ", u8"ȣȥȧȩȫȭȯȱȳ" },
{ u8"ɇɉɋɍɏ", u8"ɆɈɊɌɎ" },
{ u8"ƯźżžƳƵ", u8"ưŹŻŽƴƶ" },
{ u8"ǍǏǑǓǕǗǙǛ", u8"ǎǐǒǔǖǘǚǜ" },
{ u8"\u0178", u8"\u00ff" },
{ u8"\u0187", u8"\u0188" },
{ u8"\u018b", u8"\u018c" },
{ u8"\u018e", u8"\u01dd" },
{ u8"\u0191", u8"\u0192" },
{ u8"\u0198", u8"\u0199" },
{ u8"\u01a7", u8"\u01a8" },
{ u8"\u01ac", u8"\u01ad" },
{ u8"\u01af", u8"\u01b0" },
{ u8"\u01b7", u8"\u0292" },
{ u8"\u01b8", u8"\u01b9" },
{ u8"\u01bc", u8"\u01bd" },
{ u8"\u01c4", u8"\u01c6" },
{ u8"\u01c5", u8"\u01c6" },
{ u8"\u01c7", u8"\u01c9" },
{ u8"\u01c8", u8"\u01c9" },
{ u8"\u01ca", u8"\u01cc" },
{ u8"\u01cb", u8"\u01cc" },
{ u8"\u01f1", u8"\u01f3" },
{ u8"\u01f2", u8"\u01f3" },
{ u8"\u01f4", u8"\u01f5" },
{ u8"\u01f7", u8"\u01bf" },
{ u8"\u0220", u8"\u019e" },
{ u8"\u023b", u8"\u023c" },
{ u8"\u023d", u8"\u019a" },
{ u8"\u0241", u8"\u0242" },
{ u8"\u0243", u8"\u0180" },
{ "\xF0\x9F\x98\xB8", "\xF0\x9F\x98\xB8" },
{ "\xF0\x00\x98\xB8", "\xF0\x00\x98\xB8" },
{ "\xF0\x9F\x00\xB8", "\xF0\x9F\x00\xB8" },
{ "\xF0\x9F\x98\x00", "\xF0\x9F\x98\x00" },
{ "\xE2\x9F\x9A", "\xE2\x9F\x9A" },
{ "\xE2\x00\x9A", "\xE2\x00\x9A" },
{ "\xE2\x9F\x00", "\xE2\x9F\x00" },
{ "A\xc2""C", "A\xc2""C" },
{ "A\xc2""C", u8"A\ufffdC" },
{ u8"A\ufffdC", "A\xc2""C" },
};
bool pass = true;
for (unsigned i = 0; i < CYAML_ARRAY_LEN(pairs); i++) {
ttest_ctx_t tc;
char name[sizeof(__func__) + 32];
sprintf(name, "%s_%u", __func__, i);
if (!ttest_start(report, name, NULL, NULL, &tc)) {
continue;
}
if (cyaml_utf8_casecmp(pairs[i].a, pairs[i].b) != 0) {
pass &= ttest_fail(&tc, "Failed to match strings: "
"%s and %s", pairs[i].a, pairs[i].b);
continue;
}
pass &= ttest_pass(&tc);
}
return pass;
}
/**
* Test comparing strings that match.
*
* \param[in] report The test report context.
* \return true if test passes, false otherwise.
*/
static bool test_utf8_strcmp_mismatches(
ttest_report_ctx_t *report)
{
static const struct string_pairs {
const char *a;
const char *b;
} pairs[] = {
{ "Invalid", "\xfa" },
{ "Cat", u8"😸" },
{ "cat", u8"😸" },
{ "1 cat", u8"😸" },
{ "[cat]", u8"😸" },
{ "Ü cat", u8"😸" },
{ "Ü cat", u8"😸" },
{ "\\", "\xC1\x9C" },
};
bool pass = true;
for (unsigned i = 0; i < CYAML_ARRAY_LEN(pairs); i++) {
ttest_ctx_t tc;
char name[sizeof(__func__) + 32];
sprintf(name, "%s_%u", __func__, i);
if (!ttest_start(report, name, NULL, NULL, &tc)) {
continue;
}
if (cyaml_utf8_casecmp(pairs[i].a, pairs[i].b) == 0) {
pass &= ttest_fail(&tc, "Failed to detect mismatch: "
"%s and %s", pairs[i].a, pairs[i].b);
continue;
}
pass &= ttest_pass(&tc);
}
return pass;
}
/**
* Run the CYAML util unit tests.
*
* \param[in] rc The ttest report context.
* \param[in] log_level CYAML log level.
* \param[in] log_fn CYAML logging function, or NULL.
* \return true iff all unit tests pass, otherwise false.
*/
bool utf8_tests(
ttest_report_ctx_t *rc,
cyaml_log_t log_level,
cyaml_log_fn_t log_fn)
{
bool pass = true;
UNUSED(log_level);
UNUSED(log_fn);
ttest_heading(rc, "UTF-8 tests: Codepoint composition");
pass &= test_utf8_get_codepoint(rc);
ttest_heading(rc, "UTF-8 tests: String comparison");
pass &= test_utf8_strcmp_same(rc);
pass &= test_utf8_strcmp_matches(rc);
pass &= test_utf8_strcmp_mismatches(rc);
return pass;
}