Integrate patch for handling broken unicode surrogate pairs, with

thanks to kolman and Irwan Djajadi on SourceForge.
This commit is contained in:
Dave Gamble 2016-03-19 11:19:24 +00:00
parent a6a75645e4
commit ee579ecbd6

18
cJSON.c
View File

@ -192,16 +192,18 @@ static unsigned parse_hex4(const char *str)
static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
static const char *parse_string(cJSON *item,const char *str) static const char *parse_string(cJSON *item,const char *str)
{ {
const char *ptr=str+1;char *ptr2;char *out;int len=0;unsigned uc,uc2; const char *ptr=str+1,*end_ptr=str+1;char *ptr2;char *out;int len=0;unsigned uc,uc2;
if (*str!='\"') {ep=str;return 0;} /* not a string! */ if (*str!='\"') {ep=str;return 0;} /* not a string! */
while (*ptr!='\"' && *ptr && ++len) if (*ptr++ == '\\') ptr++; /* Skip escaped quotes. */ while (*end_ptr!='\"' && *end_ptr && ++len) if (*end_ptr++ == '\\') end_ptr++; /* Skip escaped quotes. */
out=(char*)cJSON_malloc(len+1); /* This is how long we need for the string, roughly. */ out=(char*)cJSON_malloc(len+1); /* This is how long we need for the string, roughly. */
if (!out) return 0; if (!out) return 0;
item->valuestring=out; /* assign here so out will be deleted during cJSON_Delete() later */
item->type=cJSON_String;
ptr=str+1;ptr2=out; ptr=str+1;ptr2=out;
while (*ptr!='\"' && *ptr) while (ptr < end_ptr)
{ {
if (*ptr!='\\') *ptr2++=*ptr++; if (*ptr!='\\') *ptr2++=*ptr++;
else else
@ -216,14 +218,16 @@ static const char *parse_string(cJSON *item,const char *str)
case 't': *ptr2++='\t'; break; case 't': *ptr2++='\t'; break;
case 'u': /* transcode utf16 to utf8. */ case 'u': /* transcode utf16 to utf8. */
uc=parse_hex4(ptr+1);ptr+=4; /* get the unicode char. */ uc=parse_hex4(ptr+1);ptr+=4; /* get the unicode char. */
if (ptr >= end_ptr) {ep=str;return 0;} /* invalid */
if ((uc>=0xDC00 && uc<=0xDFFF) || uc==0) break; /* check for invalid. */ if ((uc>=0xDC00 && uc<=0xDFFF) || uc==0) {ep=str;return 0;} /* check for invalid. */
if (uc>=0xD800 && uc<=0xDBFF) /* UTF16 surrogate pairs. */ if (uc>=0xD800 && uc<=0xDBFF) /* UTF16 surrogate pairs. */
{ {
if (ptr[1]!='\\' || ptr[2]!='u') break; /* missing second-half of surrogate. */ if (ptr+6 > end_ptr) {ep=str;return 0;} /* invalid */
if (ptr[1]!='\\' || ptr[2]!='u') {ep=str;return 0;} /* missing second-half of surrogate. */
uc2=parse_hex4(ptr+3);ptr+=6; uc2=parse_hex4(ptr+3);ptr+=6;
if (uc2<0xDC00 || uc2>0xDFFF) break; /* invalid second-half of surrogate. */ if (uc2<0xDC00 || uc2>0xDFFF) {ep=str;return 0;} /* invalid second-half of surrogate. */
uc=0x10000 + (((uc&0x3FF)<<10) | (uc2&0x3FF)); uc=0x10000 + (((uc&0x3FF)<<10) | (uc2&0x3FF));
} }
@ -244,8 +248,6 @@ static const char *parse_string(cJSON *item,const char *str)
} }
*ptr2=0; *ptr2=0;
if (*ptr=='\"') ptr++; if (*ptr=='\"') ptr++;
item->valuestring=out;
item->type=cJSON_String;
return ptr; return ptr;
} }