mirror of
https://github.com/DaveGamble/cJSON.git
synced 2023-08-10 21:13:26 +03:00
Update to cJSON! We now support UTF-16 surrogate pairs :)
git-svn-id: http://svn.code.sf.net/p/cjson/code@41 e3330c51-1366-4df0-8b21-3ccf24e3d50e
This commit is contained in:
parent
9061b7a7e7
commit
0d268cfef7
22
cJSON.c
22
cJSON.c
@ -142,7 +142,7 @@ static char *print_number(cJSON *item)
|
||||
static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
|
||||
static const char *parse_string(cJSON *item,const char *str)
|
||||
{
|
||||
const char *ptr=str+1;char *ptr2;char *out;int len=0;unsigned uc;
|
||||
const char *ptr=str+1;char *ptr2;char *out;int len=0;unsigned uc,uc2;
|
||||
if (*str!='\"') {ep=str;return 0;} /* not a string! */
|
||||
|
||||
while (*ptr!='\"' && *ptr && ++len) if (*ptr++ == '\\') ptr++; /* Skip escaped quotes. */
|
||||
@ -164,16 +164,28 @@ static const char *parse_string(cJSON *item,const char *str)
|
||||
case 'n': *ptr2++='\n'; break;
|
||||
case 'r': *ptr2++='\r'; break;
|
||||
case 't': *ptr2++='\t'; break;
|
||||
case 'u': /* transcode utf16 to utf8. DOES NOT SUPPORT SURROGATE PAIRS CORRECTLY. */
|
||||
sscanf(ptr+1,"%4x",&uc); /* get the unicode char. */
|
||||
len=3;if (uc<0x80) len=1;else if (uc<0x800) len=2;ptr2+=len;
|
||||
case 'u': /* transcode utf16 to utf8. */
|
||||
sscanf(ptr+1,"%4x",&uc);ptr+=4; /* get the unicode char. */
|
||||
|
||||
if ((uc>=0xDC00 && uc<=0xDFFF) || uc==0) break; // check for invalid.
|
||||
|
||||
if (uc>=0xD800 && uc<=0xDBFF) // UTF16 surrogate pairs.
|
||||
{
|
||||
if (ptr[1]!='\\' || ptr[2]!='u') break; // missing second-half of surrogate.
|
||||
sscanf(ptr+3,"%4x",&uc2);ptr+=6;
|
||||
if (uc2<0xDC00 || uc2>0xDFFF) break; // invalid second-half of surrogate.
|
||||
uc=0x10000 | ((uc&0x3FF)<<10) | (uc2&0x3FF);
|
||||
}
|
||||
|
||||
len=4;if (uc<0x80) len=1;else if (uc<0x800) len=2;else if (uc<0x10000) len=3; ptr2+=len;
|
||||
|
||||
switch (len) {
|
||||
case 4: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6;
|
||||
case 3: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6;
|
||||
case 2: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6;
|
||||
case 1: *--ptr2 =(uc | firstByteMark[len]);
|
||||
}
|
||||
ptr2+=len;ptr+=4;
|
||||
ptr2+=len;
|
||||
break;
|
||||
default: *ptr2++=*ptr; break;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user