Integrate patch for handling broken unicode surrogate pairs, with

thanks to kolman and Irwan Djajadi on SourceForge.
2023-08-10 21:13:26 +03:00 · 2016-03-19 11:19:24 +00:00 · 2016-03-19 11:19:24 +00:00 · ee579ecbd6
commit ee579ecbd6
parent a6a75645e4
1 changed files with 12 additions and 10 deletions
--- a/cJSON.c
+++ b/cJSON.c
@ -192,16 +192,18 @@ static unsigned parse_hex4(const char *str)
 static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
 static const char *parse_string(cJSON *item,const char *str)
 {
-	const char *ptr=str+1;char *ptr2;char *out;int len=0;unsigned uc,uc2;
+	const char *ptr=str+1,*end_ptr=str+1;char *ptr2;char *out;int len=0;unsigned uc,uc2;
 	if (*str!='\"') {ep=str;return 0;}	/* not a string! */
-	while (*ptr!='\"' && *ptr && ++len) if (*ptr++ == '\\') ptr++;	/* Skip escaped quotes. */
+	while (*end_ptr!='\"' && *end_ptr && ++len) if (*end_ptr++ == '\\') end_ptr++;	/* Skip escaped quotes. */
 	out=(char*)cJSON_malloc(len+1);	/* This is how long we need for the string, roughly. */
 	if (!out) return 0;
 	item->valuestring=out; /* assign here so out will be deleted during cJSON_Delete() later */
 	item->type=cJSON_String;
 	ptr=str+1;ptr2=out;
-	while (*ptr!='\"' && *ptr)
+	while (ptr < end_ptr)
 	{
 		if (*ptr!='\\') *ptr2++=*ptr++;
 		else
@ -216,14 +218,16 @@ static const char *parse_string(cJSON *item,const char *str)
 				case 't': *ptr2++='\t';	break;
 				case 'u':	 /* transcode utf16 to utf8. */
 					uc=parse_hex4(ptr+1);ptr+=4;	/* get the unicode char. */
 					if (ptr >= end_ptr) {ep=str;return 0;}	/* invalid */
-					if ((uc>=0xDC00 && uc<=0xDFFF) || uc==0)	break;	/* check for invalid.	*/
+					if ((uc>=0xDC00 && uc<=0xDFFF) || uc==0)    {ep=str;return 0;}	/* check for invalid.   */
 					if (uc>=0xD800 && uc<=0xDBFF)	/* UTF16 surrogate pairs.	*/
 					{
-						if (ptr[1]!='\\' || ptr[2]!='u')	break;	/* missing second-half of surrogate.	*/
+						if (ptr+6 > end_ptr)    {ep=str;return 0;}	/* invalid */
 						if (ptr[1]!='\\' || ptr[2]!='u')    {ep=str;return 0;}	/* missing second-half of surrogate.    */
 						uc2=parse_hex4(ptr+3);ptr+=6;
-						if (uc2<0xDC00 || uc2>0xDFFF)		break;	/* invalid second-half of surrogate.	*/
+						if (uc2<0xDC00 || uc2>0xDFFF)       {ep=str;return 0;}	/* invalid second-half of surrogate.    */
 						uc=0x10000 + (((uc&0x3FF)<<10) | (uc2&0x3FF));
 					}
@ -244,8 +248,6 @@ static const char *parse_string(cJSON *item,const char *str)
 	}
 	*ptr2=0;
 	if (*ptr=='\"') ptr++;
 	item->valuestring=out;
 	item->type=cJSON_String;
 	return ptr;
 }