Workshop o mikrokontrolérech na SKSP 2024.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

453 lines
10 KiB

3 months ago
/*
* UCW JSON Library -- Parser
*
* (c) 2015 Martin Mares <mj@ucw.cz>
*
* This software may be freely distributed and used according to the terms
* of the GNU Lesser General Public License.
*/
#include <ucw/lib.h>
#include <ucw/fastbuf.h>
#include <ucw/ff-unicode.h>
#include <ucw/trans.h>
#include <ucw/unicode.h>
#include <ucw-json/json.h>
#include <errno.h>
#include <stdlib.h>
void json_set_input(struct json_context *js, struct fastbuf *in)
{
js->in_fb = in;
js->in_line = 1;
js->in_column = 0;
js->next_char = -1;
js->next_token = NULL;
js->in_eof = 0;
}
static void NONRET json_parse_error(struct json_context *js, const char *msg)
{
trans_throw("ucw.json.parse", js, "%s at line %u:%u", msg, js->in_line, js->in_column);
}
static int json_get_char(struct json_context *js)
{
int c = bget_utf8_32_repl(js->in_fb, -2);
if (unlikely(c < 0))
{
if (c == -2)
json_parse_error(js, "Malformed UTF-8 character");
js->in_eof = 1;
return c;
}
js->in_column++;
return c;
}
static void json_unget_char(struct json_context *js, int c)
{
js->next_char = c;
}
static struct json_node *json_triv_token(struct json_context *js, enum json_node_type type)
{
js->trivial_token->type = type;
return js->trivial_token;
}
static struct json_node *json_parse_number(struct json_context *js, int c)
{
mp_push(js->pool);
char *p = mp_start_noalign(js->pool, 0);
// Optional minus
if (c == '-')
{
p = mp_append_char(js->pool, p, c);
c = json_get_char(js);
if (!(c >= '0' && c <= '9'))
json_parse_error(js, "Malformed number: just minus");
}
// Integer part
if (c == '0')
{
// Leading zeroes are forbidden by RFC 7159
p = mp_append_char(js->pool, p, c);
c = json_get_char(js);
if (c >= '0' && c <= '9')
json_parse_error(js, "Malformed number: leading zero");
}
else
{
while (c >= '0' && c <= '9')
{
p = mp_append_char(js->pool, p, c);
c = json_get_char(js);
}
}
// Fractional part
if (c == '.')
{
p = mp_append_char(js->pool, p, c);
c = json_get_char(js);
if (!(c >= '0' && c <= '9'))
json_parse_error(js, "Malformed number: no digits after decimal point");
while (c >= '0' && c <= '9')
{
p = mp_append_char(js->pool, p, c);
c = json_get_char(js);
}
}
// Exponent
if (c == 'e' || c == 'E')
{
p = mp_append_char(js->pool, p, c);
c = json_get_char(js);
if (c == '+' || c == '-')
{
p = mp_append_char(js->pool, p, c);
c = json_get_char(js);
}
if (!(c >= '0' && c <= '9'))
json_parse_error(js, "Malformed number: empty exponent");
while (c >= '0' && c <= '9')
{
p = mp_append_char(js->pool, p, c);
c = json_get_char(js);
}
}
json_unget_char(js, c);
p = mp_end_string(js->pool, p);
errno = 0;
double val = strtod(p, NULL);
if (errno == ERANGE)
json_parse_error(js, "Number out of range");
mp_pop(js->pool);
return json_new_number(js, val);
}
static struct json_node *json_parse_name(struct json_context *js, int c)
{
char name[16];
uint i = 0;
while (c >= 'a' && c <= 'z')
{
if (i < sizeof(name) - 1)
name[i++] = c;
c = json_get_char(js);
}
if (i >= sizeof(name) - 1)
json_parse_error(js, "Invalid literal name");
name[i] = 0;
json_unget_char(js, c);
struct json_node *n;
if (!strcmp(name, "null"))
n = json_new_null(js);
else if (!strcmp(name, "false"))
n = json_new_bool(js, 0);
else if (!strcmp(name, "true"))
n = json_new_bool(js, 1);
else
json_parse_error(js, "Invalid literal name");
return n;
}
static uint json_parse_hex4(struct json_context *js)
{
uint x = 0;
for (int i=0; i<4; i++)
{
x = x << 4;
int c = json_get_char(js);
if (c >= '0' && c <= '9')
x += c - '0';
else if (c >= 'a' && c <= 'f')
x += c - 'a' + 10;
else if (c >= 'A' && c <= 'F')
x += c - 'A' + 10;
else
json_parse_error(js, "Invalid Unicode escape sequence");
}
return x;
}
static struct json_node *json_parse_string(struct json_context *js, int c)
{
char *p = mp_start_noalign(js->pool, 0);
c = json_get_char(js);
while (c != '"')
{
if (unlikely(c < 0x20))
{
if (c < 0 || c == 0x0d || c == 0x0a)
json_parse_error(js, "Unterminated string");
else
json_parse_error(js, "Invalid control character in string");
}
if (unlikely(c >= 0xd800 && c < 0xf900))
{
if (c < 0xe000)
json_parse_error(js, "Invalid surrogate character in string");
else
json_parse_error(js, "Invalid private-use character in string");
}
if (unlikely(c >= 0xf0000))
{
if (c > 0x10ffff)
json_parse_error(js, "Invalid non-Unicode character in string");
else
json_parse_error(js, "Invalid private-use character in string");
}
if (c == '\\')
{
c = json_get_char(js);
switch (c)
{
case '"':
case '\\':
case '/':
break;
case 'b':
c = 0x08;
break;
case 'f':
c = 0x0c;
break;
case 'n':
c = 0x0a;
break;
case 'r':
c = 0x0d;
break;
case 't':
c = 0x09;
break;
case 'u':
{
uint x = json_parse_hex4(js);
if (!x)
json_parse_error(js, "Zero bytes in strings are not supported");
if (x >= 0xd800 && x < 0xf900)
{
if (x < 0xdc00)
{
// High surrogate: low surrogate must follow
uint y = 0;
if (json_get_char(js) == '\\' && json_get_char(js) == 'u')
y = json_parse_hex4(js);
if (!(y >= 0xdc00 && y < 0xe000))
json_parse_error(js, "Escaped high surrogate codepoint must be followed by a low surrogate codepoint");
c = 0x10000 + ((x & 0x03ff) << 10) | (y & 0x03ff);
if (c > 0xf0000)
json_parse_error(js, "Invalid escaped private-use character");
}
else if (x < 0xe000)
{
// Low surrogate
json_parse_error(js, "Invalid escaped surrogate codepoint");
}
else
json_parse_error(js, "Invalid escaped private-use character");
}
else
c = x;
break;
}
default:
json_parse_error(js, "Invalid backslash sequence in string");
}
}
p = mp_append_utf8_32(js->pool, p, c);
c = json_get_char(js);
}
p = mp_end_string(js->pool, p);
return json_new_string_ref(js, p);
}
static struct json_node *json_read_token(struct json_context *js)
{
if (unlikely(js->in_eof))
return json_triv_token(js, JSON_EOF);
int c = js->next_char;
if (c >= 0)
js->next_char = -1;
else
c = json_get_char(js);
while (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d)
{
if (c == 0x0a)
{
js->in_line++;
js->in_column = 0;
}
c = json_get_char(js);
}
if (c < 0)
return json_triv_token(js, JSON_EOF);
if (c >= '0' && c <= '9' || c == '-')
return json_parse_number(js, c);
if (c >= 'a' && c <= 'z')
return json_parse_name(js, c);
if (c == '"')
return json_parse_string(js, c);
switch (c)
{
case '[':
return json_triv_token(js, JSON_BEGIN_ARRAY);
case ']':
return json_triv_token(js, JSON_END_ARRAY);
case '{':
return json_triv_token(js, JSON_BEGIN_OBJECT);
case '}':
return json_triv_token(js, JSON_END_OBJECT);
case ':':
return json_triv_token(js, JSON_NAME_SEP);
case ',':
return json_triv_token(js, JSON_VALUE_SEP);
case '.':
json_parse_error(js, "Numbers must start with a digit");
case 0xfeff:
json_parse_error(js, "Misplaced byte-order mark, complain in Redmond");
default:
json_parse_error(js, "Invalid character");
}
}
struct json_node *json_peek_token(struct json_context *js)
{
if (!js->next_token)
js->next_token = json_read_token(js);
return js->next_token;
}
struct json_node *json_next_token(struct json_context *js)
{
struct json_node *t = js->next_token;
if (t)
{
js->next_token = NULL;
return t;
}
return json_read_token(js);
}
struct json_node *json_next_value(struct json_context *js)
{
struct json_node *t = json_next_token(js);
switch (t->type)
{
case JSON_EOF:
return NULL;
// Elementary values
case JSON_NULL:
case JSON_BOOLEAN:
case JSON_NUMBER:
case JSON_STRING:
return t;
// Array
case JSON_BEGIN_ARRAY:
{
struct json_node *a = json_new_array(js);
if (json_peek_token(js)->type == JSON_END_ARRAY)
json_next_token(js);
else for (;;)
{
struct json_node *v = json_next_value(js);
if (!v)
json_parse_error(js, "Unterminated array");
json_array_append(a, v);
t = json_next_token(js);
if (t->type == JSON_END_ARRAY)
break;
if (t->type != JSON_VALUE_SEP)
json_parse_error(js, "Comma or right bracket expected");
}
return a;
}
// Object
case JSON_BEGIN_OBJECT:
{
struct json_node *o = json_new_object(js);
if (json_peek_token(js)->type == JSON_END_OBJECT)
json_next_token(js);
else for (;;)
{
struct json_node *k = json_next_value(js);
if (!k)
json_parse_error(js, "Unterminated object");
if (k->type != JSON_STRING)
json_parse_error(js, "Object key must be a string");
t = json_next_token(js);
if (t->type != JSON_NAME_SEP)
json_parse_error(js, "Colon expected");
struct json_node *v = json_next_value(js);
if (!v)
json_parse_error(js, "Unterminated object");
if (json_object_get(o, k->string)) // FIXME: Optimize
json_parse_error(js, "Key already set");
json_object_set(o, k->string, v);
t = json_next_token(js);
if (t->type == JSON_END_OBJECT)
break;
if (t->type != JSON_VALUE_SEP)
json_parse_error(js, "Comma expected");
}
return o;
}
// Misplaced characters
case JSON_END_ARRAY:
json_parse_error(js, "Misplaced end of array");
case JSON_END_OBJECT:
json_parse_error(js, "Misplaced end of object");
case JSON_NAME_SEP:
json_parse_error(js, "Misplaced colon");
case JSON_VALUE_SEP:
json_parse_error(js, "Misplaced comma");
default:
ASSERT(0);
}
}
struct json_node *json_parse(struct json_context *js, struct fastbuf *fb)
{
json_set_input(js, fb);
struct json_node *n = json_next_value(js);
if (!n)
json_parse_error(js, "Empty input");
struct json_node *t = json_next_token(js);
if (t->type != JSON_EOF)
json_parse_error(js, "Only one top-level value allowed");
return n;
}