Update to cJSON! We now support UTF-16 surrogate pairs :)

git-svn-id: http://svn.code.sf.net/p/cjson/code@41 e3330c51-1366-4df0-8b21-3ccf24e3d50e
This commit is contained in:
Dave Gamble 2011-10-10 15:22:34 +00:00
parent 9061b7a7e7
commit 0d268cfef7

22
cJSON.c
View File

@ -142,7 +142,7 @@ static char *print_number(cJSON *item)
static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
static const char *parse_string(cJSON *item,const char *str) static const char *parse_string(cJSON *item,const char *str)
{ {
const char *ptr=str+1;char *ptr2;char *out;int len=0;unsigned uc; const char *ptr=str+1;char *ptr2;char *out;int len=0;unsigned uc,uc2;
if (*str!='\"') {ep=str;return 0;} /* not a string! */ if (*str!='\"') {ep=str;return 0;} /* not a string! */
while (*ptr!='\"' && *ptr && ++len) if (*ptr++ == '\\') ptr++; /* Skip escaped quotes. */ while (*ptr!='\"' && *ptr && ++len) if (*ptr++ == '\\') ptr++; /* Skip escaped quotes. */
@ -164,16 +164,28 @@ static const char *parse_string(cJSON *item,const char *str)
case 'n': *ptr2++='\n'; break; case 'n': *ptr2++='\n'; break;
case 'r': *ptr2++='\r'; break; case 'r': *ptr2++='\r'; break;
case 't': *ptr2++='\t'; break; case 't': *ptr2++='\t'; break;
case 'u': /* transcode utf16 to utf8. DOES NOT SUPPORT SURROGATE PAIRS CORRECTLY. */ case 'u': /* transcode utf16 to utf8. */
sscanf(ptr+1,"%4x",&uc); /* get the unicode char. */ sscanf(ptr+1,"%4x",&uc);ptr+=4; /* get the unicode char. */
len=3;if (uc<0x80) len=1;else if (uc<0x800) len=2;ptr2+=len;
if ((uc>=0xDC00 && uc<=0xDFFF) || uc==0) break; // check for invalid.
if (uc>=0xD800 && uc<=0xDBFF) // UTF16 surrogate pairs.
{
if (ptr[1]!='\\' || ptr[2]!='u') break; // missing second-half of surrogate.
sscanf(ptr+3,"%4x",&uc2);ptr+=6;
if (uc2<0xDC00 || uc2>0xDFFF) break; // invalid second-half of surrogate.
uc=0x10000 | ((uc&0x3FF)<<10) | (uc2&0x3FF);
}
len=4;if (uc<0x80) len=1;else if (uc<0x800) len=2;else if (uc<0x10000) len=3; ptr2+=len;
switch (len) { switch (len) {
case 4: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6;
case 3: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6; case 3: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6;
case 2: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6; case 2: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6;
case 1: *--ptr2 =(uc | firstByteMark[len]); case 1: *--ptr2 =(uc | firstByteMark[len]);
} }
ptr2+=len;ptr+=4; ptr2+=len;
break; break;
default: *ptr2++=*ptr; break; default: *ptr2++=*ptr; break;
} }