Merge pull request #74760 from lassade/unicode-err-uses-replacement-char

Don't append non unicode scalar values on the string, instead use the unicode replacement char
This commit is contained in:
Rémi Verschelde
2023-06-22 18:30:04 +02:00
3 changed files with 58 additions and 38 deletions

View File

@@ -170,10 +170,10 @@ TEST_CASE("[String] Invalid UTF8 (non-standard)") {
ERR_PRINT_OFF
static const uint8_t u8str[] = { 0x45, 0xE3, 0x81, 0x8A, 0xE3, 0x82, 0x88, 0xE3, 0x81, 0x86, 0xF0, 0x9F, 0x8E, 0xA4, 0xF0, 0x82, 0x82, 0xAC, 0xED, 0xA0, 0x81, 0 };
// + +2 +2 +2 +3 overlong +3 unpaired +2
static const char32_t u32str[] = { 0x45, 0x304A, 0x3088, 0x3046, 0x1F3A4, 0x20AC, 0xD801, 0 };
static const char32_t u32str[] = { 0x45, 0x304A, 0x3088, 0x3046, 0x1F3A4, 0x20AC, 0xFFFD, 0 };
String s;
Error err = s.parse_utf8((const char *)u8str);
CHECK(err == ERR_PARSE_ERROR);
CHECK(err == ERR_INVALID_DATA);
CHECK(s == u32str);
CharString cs = (const char *)u8str;
@@ -185,7 +185,7 @@ TEST_CASE("[String] Invalid UTF8 (unrecoverable)") {
ERR_PRINT_OFF
static const uint8_t u8str[] = { 0x45, 0xE3, 0x81, 0x8A, 0x8F, 0xE3, 0xE3, 0x98, 0x8F, 0xE3, 0x82, 0x88, 0xE3, 0x81, 0x86, 0xC0, 0x80, 0xF0, 0x9F, 0x8E, 0xA4, 0xF0, 0x82, 0x82, 0xAC, 0xED, 0xA0, 0x81, 0 };
// + +2 inv +2 inv inv inv +2 +2 ovl NUL +1 +3 overlong +3 unpaired +2
static const char32_t u32str[] = { 0x45, 0x304A, 0x20, 0x20, 0x20, 0x20, 0x3088, 0x3046, 0x20, 0x1F3A4, 0x20AC, 0xD801, 0 };
static const char32_t u32str[] = { 0x45, 0x304A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x3088, 0x3046, 0xFFFD, 0x1F3A4, 0x20AC, 0xFFFD, 0 };
String s;
Error err = s.parse_utf8((const char *)u8str);
CHECK(err == ERR_INVALID_DATA);
@@ -301,8 +301,8 @@ TEST_CASE("[String] Test chr") {
CHECK(String::chr('H') == "H");
CHECK(String::chr(0x3012)[0] == 0x3012);
ERR_PRINT_OFF
CHECK(String::chr(0xd812)[0] == 0xd812); // Unpaired UTF-16 surrogate
CHECK(String::chr(0x20d812)[0] == 0x20d812); // Outside UTF-32 range
CHECK(String::chr(0xd812)[0] == 0xfffd); // Unpaired UTF-16 surrogate
CHECK(String::chr(0x20d812)[0] == 0xfffd); // Outside UTF-32 range
ERR_PRINT_ON
}