summaryrefslogtreecommitdiffstats
path: root/jv_parse.c
diff options
context:
space:
mode:
authorStephen Dolan <mu@netsoc.tcd.ie>2012-12-02 23:53:55 +0000
committerStephen Dolan <mu@netsoc.tcd.ie>2012-12-02 23:53:55 +0000
commit67f8ad943538e00826966c069d917b5bc99a4e47 (patch)
tree2db175b49ab9269c389f37d0d275a73b51d14287 /jv_parse.c
parent5b45184a1a79372630df76c0e840f4fffcce9494 (diff)
Ignore a UTF-8 BOM if one appears at the start of a JSON document.
Closes #45.
Diffstat (limited to 'jv_parse.c')
-rw-r--r--jv_parse.c20
1 files changed, 20 insertions, 0 deletions
diff --git a/jv_parse.c b/jv_parse.c
index 63cdf935..738beb94 100644
--- a/jv_parse.c
+++ b/jv_parse.c
@@ -24,6 +24,7 @@ void jv_parser_init(struct jv_parser* p) {
p->st = JV_PARSER_NORMAL;
p->curr_buf = 0;
p->curr_buf_length = p->curr_buf_pos = p->curr_buf_is_partial = 0;
+ p->bom_strip_position = 0;
jvp_dtoa_context_init(&p->dtoa);
}
@@ -332,9 +333,27 @@ static pfunc scan(struct jv_parser* p, char ch, jv* out) {
return answer;
}
+static unsigned char UTF8_BOM[] = {0xEF,0xBB,0xBF};
+
void jv_parser_set_buf(struct jv_parser* p, const char* buf, int length, int is_partial) {
assert((p->curr_buf == 0 || p->curr_buf_pos == p->curr_buf_length)
&& "previous buffer not exhausted");
+ while (p->bom_strip_position < sizeof(UTF8_BOM)) {
+ if ((unsigned char)*buf == UTF8_BOM[p->bom_strip_position]) {
+ // matched a BOM character
+ buf++;
+ length--;
+ p->bom_strip_position++;
+ } else {
+ if (p->bom_strip_position == 0) {
+ // no BOM in this document
+ p->bom_strip_position = sizeof(UTF8_BOM);
+ } else {
+ // malformed BOM (prefix present, rest missing)
+ p->bom_strip_position = 0xff;
+ }
+ }
+ }
p->curr_buf = buf;
p->curr_buf_length = length;
p->curr_buf_pos = 0;
@@ -343,6 +362,7 @@ void jv_parser_set_buf(struct jv_parser* p, const char* buf, int length, int is_
jv jv_parser_next(struct jv_parser* p) {
assert(p->curr_buf && "a buffer must be provided");
+ if (p->bom_strip_position == 0xff) return jv_invalid_with_msg(jv_string("Malformed BOM"));
jv value;
presult msg = 0;
while (!msg && p->curr_buf_pos < p->curr_buf_length) {