utf16_literal_to_utf8: Eliminate Duff's Device

This fixes -Wimplicit-fallthrough warnings with GCC7.
2023-08-10 21:13:26 +03:00 · 2017-03-02 13:46:31 +01:00
parent ad5abf4c5b
commit 9d07917feb
1 changed files with 19 additions and 30 deletions
--- a/cJSON.c
+++ b/cJSON.c
@@ -452,21 +452,13 @@ static unsigned parse_hex4(const unsigned char * const input)
 * A literal can be one or two sequences of the form \uXXXX */
 static unsigned char utf16_literal_to_utf8(const unsigned char * const input_pointer, const unsigned char * const input_end, unsigned char **output_pointer, const unsigned char **error_pointer)
 {
-    /* first bytes of UTF8 encoding for a given length in bytes */
-    static const unsigned char firstByteMark[5] =
-    {
-        0x00, /* should never happen */
-        0x00, /* 0xxxxxxx */
-        0xC0, /* 110xxxxx */
-        0xE0, /* 1110xxxx */
-        0xF0 /* 11110xxx */
-    };
-
    long unsigned int codepoint = 0;
    unsigned int first_code = 0;
    const unsigned char *first_sequence = input_pointer;
    unsigned char utf8_length = 0;
+    unsigned char utf8_position = 0;
    unsigned char sequence_length = 0;
+    unsigned char first_byte_mark = 0;

    /* get the first utf16 sequence */
    first_code = parse_hex4(first_sequence + 2);
@@ -537,16 +529,19 @@ static unsigned char utf16_literal_to_utf8(const unsigned char * const input_poi
    {
        /* two bytes, encoding 110xxxxx 10xxxxxx */
        utf8_length = 2;
+        first_byte_mark = 0xC0; /* 11000000 */
    }
    else if (codepoint < 0x10000)
    {
        /* three bytes, encoding 1110xxxx 10xxxxxx 10xxxxxx */
        utf8_length = 3;
+        first_byte_mark = 0xE0; /* 11100000 */
    }
    else if (codepoint <= 0x10FFFF)
    {
        /* four bytes, encoding 1110xxxx 10xxxxxx 10xxxxxx 10xxxxxx */
        utf8_length = 4;
+        first_byte_mark = 0xF0; /* 11110000 */
    }
    else
    {
@@ -556,28 +551,22 @@ static unsigned char utf16_literal_to_utf8(const unsigned char * const input_poi
    }

    /* encode as utf8 */
-    switch (utf8_length)
+    for (utf8_position = (unsigned char)(utf8_length - 1); utf8_position > 0; utf8_position--)
    {
-        case 4:
-            /* 10xxxxxx */
-            (*output_pointer)[3] = (unsigned char)((codepoint | 0x80) & 0xBF);
-            codepoint >>= 6;
-        case 3:
-            /* 10xxxxxx */
-            (*output_pointer)[2] = (unsigned char)((codepoint | 0x80) & 0xBF);
-            codepoint >>= 6;
-        case 2:
-            (*output_pointer)[1] = (unsigned char)((codepoint | 0x80) & 0xBF);
-            codepoint >>= 6;
-        case 1:
-            /* depending on the length in bytes this determines the
-               encoding of the first UTF8 byte */
-            (*output_pointer)[0] = (unsigned char)((codepoint | firstByteMark[utf8_length]) & 0xFF);
-            break;
-        default:
-            *error_pointer = first_sequence;
-            goto fail;
+        /* 10xxxxxx */
+        (*output_pointer)[utf8_position] = (unsigned char)((codepoint | 0x80) & 0xBF);
+        codepoint >>= 6;
    }
+    /* encode first byte */
+    if (utf8_length > 1)
+    {
+        (*output_pointer)[0] = (unsigned char)((codepoint | first_byte_mark) & 0xFF);
+    }
+    else
+    {
+        (*output_pointer)[0] = (unsigned char)(codepoint & 0x7F);
+    }
+
    *output_pointer += utf8_length;

    return sequence_length;