mirror of
https://github.com/DaveGamble/cJSON.git
synced 2023-08-10 21:13:26 +03:00
Integrate patch for handling broken unicode surrogate pairs, with
thanks to kolman and Irwan Djajadi on SourceForge.
This commit is contained in:
parent
a6a75645e4
commit
ee579ecbd6
18
cJSON.c
18
cJSON.c
@ -192,16 +192,18 @@ static unsigned parse_hex4(const char *str)
|
|||||||
static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
|
static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
|
||||||
static const char *parse_string(cJSON *item,const char *str)
|
static const char *parse_string(cJSON *item,const char *str)
|
||||||
{
|
{
|
||||||
const char *ptr=str+1;char *ptr2;char *out;int len=0;unsigned uc,uc2;
|
const char *ptr=str+1,*end_ptr=str+1;char *ptr2;char *out;int len=0;unsigned uc,uc2;
|
||||||
if (*str!='\"') {ep=str;return 0;} /* not a string! */
|
if (*str!='\"') {ep=str;return 0;} /* not a string! */
|
||||||
|
|
||||||
while (*ptr!='\"' && *ptr && ++len) if (*ptr++ == '\\') ptr++; /* Skip escaped quotes. */
|
while (*end_ptr!='\"' && *end_ptr && ++len) if (*end_ptr++ == '\\') end_ptr++; /* Skip escaped quotes. */
|
||||||
|
|
||||||
out=(char*)cJSON_malloc(len+1); /* This is how long we need for the string, roughly. */
|
out=(char*)cJSON_malloc(len+1); /* This is how long we need for the string, roughly. */
|
||||||
if (!out) return 0;
|
if (!out) return 0;
|
||||||
|
item->valuestring=out; /* assign here so out will be deleted during cJSON_Delete() later */
|
||||||
|
item->type=cJSON_String;
|
||||||
|
|
||||||
ptr=str+1;ptr2=out;
|
ptr=str+1;ptr2=out;
|
||||||
while (*ptr!='\"' && *ptr)
|
while (ptr < end_ptr)
|
||||||
{
|
{
|
||||||
if (*ptr!='\\') *ptr2++=*ptr++;
|
if (*ptr!='\\') *ptr2++=*ptr++;
|
||||||
else
|
else
|
||||||
@ -216,14 +218,16 @@ static const char *parse_string(cJSON *item,const char *str)
|
|||||||
case 't': *ptr2++='\t'; break;
|
case 't': *ptr2++='\t'; break;
|
||||||
case 'u': /* transcode utf16 to utf8. */
|
case 'u': /* transcode utf16 to utf8. */
|
||||||
uc=parse_hex4(ptr+1);ptr+=4; /* get the unicode char. */
|
uc=parse_hex4(ptr+1);ptr+=4; /* get the unicode char. */
|
||||||
|
if (ptr >= end_ptr) {ep=str;return 0;} /* invalid */
|
||||||
|
|
||||||
if ((uc>=0xDC00 && uc<=0xDFFF) || uc==0) break; /* check for invalid. */
|
if ((uc>=0xDC00 && uc<=0xDFFF) || uc==0) {ep=str;return 0;} /* check for invalid. */
|
||||||
|
|
||||||
if (uc>=0xD800 && uc<=0xDBFF) /* UTF16 surrogate pairs. */
|
if (uc>=0xD800 && uc<=0xDBFF) /* UTF16 surrogate pairs. */
|
||||||
{
|
{
|
||||||
if (ptr[1]!='\\' || ptr[2]!='u') break; /* missing second-half of surrogate. */
|
if (ptr+6 > end_ptr) {ep=str;return 0;} /* invalid */
|
||||||
|
if (ptr[1]!='\\' || ptr[2]!='u') {ep=str;return 0;} /* missing second-half of surrogate. */
|
||||||
uc2=parse_hex4(ptr+3);ptr+=6;
|
uc2=parse_hex4(ptr+3);ptr+=6;
|
||||||
if (uc2<0xDC00 || uc2>0xDFFF) break; /* invalid second-half of surrogate. */
|
if (uc2<0xDC00 || uc2>0xDFFF) {ep=str;return 0;} /* invalid second-half of surrogate. */
|
||||||
uc=0x10000 + (((uc&0x3FF)<<10) | (uc2&0x3FF));
|
uc=0x10000 + (((uc&0x3FF)<<10) | (uc2&0x3FF));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -244,8 +248,6 @@ static const char *parse_string(cJSON *item,const char *str)
|
|||||||
}
|
}
|
||||||
*ptr2=0;
|
*ptr2=0;
|
||||||
if (*ptr=='\"') ptr++;
|
if (*ptr=='\"') ptr++;
|
||||||
item->valuestring=out;
|
|
||||||
item->type=cJSON_String;
|
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user