inbuilt hex parser for unicode, which ought to be a lot faster.

git-svn-id: http://svn.code.sf.net/p/cjson/code@57 e3330c51-1366-4df0-8b21-3ccf24e3d50e
This commit is contained in:
Dave Gamble 2013-08-14 13:20:42 +00:00
parent 22e51c92f0
commit c537515c17

18
cJSON.c
View File

@ -97,7 +97,6 @@ static const char *parse_number(cJSON *item,const char *num)
{
double n=0,sign=1,scale=0;int subscale=0,signsubscale=1;
/* Could use sscanf for this? */
if (*num=='-') sign=-1,num++; /* Has sign? */
if (*num=='0') num++; /* is zero */
if (*num>='1' && *num<='9') do n=(n*10.0)+(*num++ -'0'); while (*num>='0' && *num<='9'); /* Number? */
@ -138,6 +137,19 @@ static char *print_number(cJSON *item)
return str;
}
static unsigned parse_hex4(const char *str)
{
unsigned h=0;
if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0;
h=(h&15)<<4;str++;
if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0;
h=(h&15)<<4;str++;
if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0;
h=(h&15)<<4;str++;
if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0;
return h;
}
/* Parse the input text into an unescaped cstring, and populate item. */
static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
static const char *parse_string(cJSON *item,const char *str)
@ -165,14 +177,14 @@ static const char *parse_string(cJSON *item,const char *str)
case 'r': *ptr2++='\r'; break;
case 't': *ptr2++='\t'; break;
case 'u': /* transcode utf16 to utf8. */
sscanf(ptr+1,"%4x",&uc);ptr+=4; /* get the unicode char. */
uc=parse_hex4(ptr+1);ptr+=4; /* get the unicode char. */
if ((uc>=0xDC00 && uc<=0xDFFF) || uc==0) break; /* check for invalid. */
if (uc>=0xD800 && uc<=0xDBFF) /* UTF16 surrogate pairs. */
{
if (ptr[1]!='\\' || ptr[2]!='u') break; /* missing second-half of surrogate. */
sscanf(ptr+3,"%4x",&uc2);ptr+=6;
uc2=parse_hex4(ptr+3);ptr+=6;
if (uc2<0xDC00 || uc2>0xDFFF) break; /* invalid second-half of surrogate. */
uc=0x10000 + (((uc&0x3FF)<<10) | (uc2&0x3FF));
}