mirror of
https://github.com/DaveGamble/cJSON.git
synced 2023-08-10 21:13:26 +03:00
Optimise string parsing when there are no escape characters
This resulted in approximately 40% reduction in instructions used in parse_string() when parsing a 4MB JSON file, as measured in callgrind.
This commit is contained in:
parent
bb27ffa152
commit
e64b984ddc
124
cJSON.c
124
cJSON.c
@ -776,6 +776,8 @@ static cJSON_bool parse_string(cJSON * const item, parse_buffer * const input_bu
|
|||||||
const unsigned char *input_end = buffer_at_offset(input_buffer) + 1;
|
const unsigned char *input_end = buffer_at_offset(input_buffer) + 1;
|
||||||
unsigned char *output_pointer = NULL;
|
unsigned char *output_pointer = NULL;
|
||||||
unsigned char *output = NULL;
|
unsigned char *output = NULL;
|
||||||
|
size_t skipped_bytes = 0;
|
||||||
|
size_t allocation_length = 0;
|
||||||
|
|
||||||
/* not a string */
|
/* not a string */
|
||||||
if (buffer_at_offset(input_buffer)[0] != '\"')
|
if (buffer_at_offset(input_buffer)[0] != '\"')
|
||||||
@ -785,8 +787,6 @@ static cJSON_bool parse_string(cJSON * const item, parse_buffer * const input_bu
|
|||||||
|
|
||||||
{
|
{
|
||||||
/* calculate approximate size of the output (overestimate) */
|
/* calculate approximate size of the output (overestimate) */
|
||||||
size_t allocation_length = 0;
|
|
||||||
size_t skipped_bytes = 0;
|
|
||||||
while (((size_t)(input_end - input_buffer->content) < input_buffer->length) && (*input_end != '\"'))
|
while (((size_t)(input_end - input_buffer->content) < input_buffer->length) && (*input_end != '\"'))
|
||||||
{
|
{
|
||||||
/* is escape sequence */
|
/* is escape sequence */
|
||||||
@ -816,65 +816,73 @@ static cJSON_bool parse_string(cJSON * const item, parse_buffer * const input_bu
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
output_pointer = output;
|
/* If there are no escaped characters, we can use memcmp */
|
||||||
/* loop through the string literal */
|
if(skipped_bytes == 0)
|
||||||
while (input_pointer < input_end)
|
|
||||||
{
|
{
|
||||||
if (*input_pointer != '\\')
|
memcpy(output, input_pointer, allocation_length);
|
||||||
{
|
output[allocation_length-1] = '\0';
|
||||||
*output_pointer++ = *input_pointer++;
|
|
||||||
}
|
|
||||||
/* escape sequence */
|
|
||||||
else
|
|
||||||
{
|
|
||||||
unsigned char sequence_length = 2;
|
|
||||||
if ((input_end - input_pointer) < 1)
|
|
||||||
{
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (input_pointer[1])
|
|
||||||
{
|
|
||||||
case 'b':
|
|
||||||
*output_pointer++ = '\b';
|
|
||||||
break;
|
|
||||||
case 'f':
|
|
||||||
*output_pointer++ = '\f';
|
|
||||||
break;
|
|
||||||
case 'n':
|
|
||||||
*output_pointer++ = '\n';
|
|
||||||
break;
|
|
||||||
case 'r':
|
|
||||||
*output_pointer++ = '\r';
|
|
||||||
break;
|
|
||||||
case 't':
|
|
||||||
*output_pointer++ = '\t';
|
|
||||||
break;
|
|
||||||
case '\"':
|
|
||||||
case '\\':
|
|
||||||
case '/':
|
|
||||||
*output_pointer++ = input_pointer[1];
|
|
||||||
break;
|
|
||||||
|
|
||||||
/* UTF-16 literal */
|
|
||||||
case 'u':
|
|
||||||
sequence_length = utf16_literal_to_utf8(input_pointer, input_end, &output_pointer);
|
|
||||||
if (sequence_length == 0)
|
|
||||||
{
|
|
||||||
/* failed to convert UTF16-literal to UTF-8 */
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
input_pointer += sequence_length;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* else loop through the string literal */
|
||||||
|
output_pointer = output;
|
||||||
|
while (input_pointer < input_end)
|
||||||
|
{
|
||||||
|
if (*input_pointer != '\\')
|
||||||
|
{
|
||||||
|
*output_pointer++ = *input_pointer++;
|
||||||
|
}
|
||||||
|
/* escape sequence */
|
||||||
|
else
|
||||||
|
{
|
||||||
|
unsigned char sequence_length = 2;
|
||||||
|
if ((input_end - input_pointer) < 1)
|
||||||
|
{
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
/* zero terminate the output */
|
switch (input_pointer[1])
|
||||||
*output_pointer = '\0';
|
{
|
||||||
|
case 'b':
|
||||||
|
*output_pointer++ = '\b';
|
||||||
|
break;
|
||||||
|
case 'f':
|
||||||
|
*output_pointer++ = '\f';
|
||||||
|
break;
|
||||||
|
case 'n':
|
||||||
|
*output_pointer++ = '\n';
|
||||||
|
break;
|
||||||
|
case 'r':
|
||||||
|
*output_pointer++ = '\r';
|
||||||
|
break;
|
||||||
|
case 't':
|
||||||
|
*output_pointer++ = '\t';
|
||||||
|
break;
|
||||||
|
case '\"':
|
||||||
|
case '\\':
|
||||||
|
case '/':
|
||||||
|
*output_pointer++ = input_pointer[1];
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* UTF-16 literal */
|
||||||
|
case 'u':
|
||||||
|
sequence_length = utf16_literal_to_utf8(input_pointer, input_end, &output_pointer);
|
||||||
|
if (sequence_length == 0)
|
||||||
|
{
|
||||||
|
/* failed to convert UTF16-literal to UTF-8 */
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
input_pointer += sequence_length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* zero terminate the output */
|
||||||
|
*output_pointer = '\0';
|
||||||
|
}
|
||||||
|
|
||||||
item->type = cJSON_String;
|
item->type = cJSON_String;
|
||||||
item->valuestring = (char*)output;
|
item->valuestring = (char*)output;
|
||||||
|
@ -73,6 +73,7 @@ static void assert_not_parse_string(const char * const string)
|
|||||||
static void parse_string_should_parse_strings(void)
|
static void parse_string_should_parse_strings(void)
|
||||||
{
|
{
|
||||||
assert_parse_string("\"\"", "");
|
assert_parse_string("\"\"", "");
|
||||||
|
assert_parse_string("\"Simple string\"", "Simple string");
|
||||||
assert_parse_string(
|
assert_parse_string(
|
||||||
"\" !\\\"#$%&'()*+,-./\\/0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_'abcdefghijklmnopqrstuvwxyz{|}~\"",
|
"\" !\\\"#$%&'()*+,-./\\/0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_'abcdefghijklmnopqrstuvwxyz{|}~\"",
|
||||||
" !\"#$%&'()*+,-.//0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_'abcdefghijklmnopqrstuvwxyz{|}~");
|
" !\"#$%&'()*+,-.//0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_'abcdefghijklmnopqrstuvwxyz{|}~");
|
||||||
|
Loading…
Reference in New Issue
Block a user