1 | // __ _____ _____ _____ |
2 | // __| | __| | | | JSON for Modern C++ |
3 | // | | |__ | | | | | | version 3.11.3 |
4 | // |_____|_____|_____|_|___| https://github.com/nlohmann/json |
5 | // |
6 | // SPDX-FileCopyrightText: 2013-2023 Niels Lohmann <https://nlohmann.me> |
7 | // SPDX-License-Identifier: MIT |
8 | |
9 | #pragma once |
10 | |
11 | #include <cmath> // isfinite |
12 | #include <cstdint> // uint8_t |
13 | #include <functional> // function |
14 | #include <string> // string |
15 | #include <utility> // move |
16 | #include <vector> // vector |
17 | |
18 | #include <nlohmann/detail/exceptions.hpp> |
19 | #include <nlohmann/detail/input/input_adapters.hpp> |
20 | #include <nlohmann/detail/input/json_sax.hpp> |
21 | #include <nlohmann/detail/input/lexer.hpp> |
22 | #include <nlohmann/detail/macro_scope.hpp> |
23 | #include <nlohmann/detail/meta/is_sax.hpp> |
24 | #include <nlohmann/detail/string_concat.hpp> |
25 | #include <nlohmann/detail/value_t.hpp> |
26 | |
27 | NLOHMANN_JSON_NAMESPACE_BEGIN |
28 | namespace detail |
29 | { |
30 | //////////// |
31 | // parser // |
32 | //////////// |
33 | |
34 | enum class parse_event_t : std::uint8_t |
35 | { |
36 | /// the parser read `{` and started to process a JSON object |
37 | object_start, |
38 | /// the parser read `}` and finished processing a JSON object |
39 | object_end, |
40 | /// the parser read `[` and started to process a JSON array |
41 | array_start, |
42 | /// the parser read `]` and finished processing a JSON array |
43 | array_end, |
44 | /// the parser read a key of a value in an object |
45 | key, |
46 | /// the parser finished reading a JSON value |
47 | value |
48 | }; |
49 | |
50 | template<typename BasicJsonType> |
51 | using parser_callback_t = |
52 | std::function<bool(int /*depth*/, parse_event_t /*event*/, BasicJsonType& /*parsed*/)>; |
53 | |
54 | /*! |
55 | @brief syntax analysis |
56 | |
57 | This class implements a recursive descent parser. |
58 | */ |
59 | template<typename BasicJsonType, typename InputAdapterType> |
60 | class parser |
61 | { |
62 | using number_integer_t = typename BasicJsonType::number_integer_t; |
63 | using number_unsigned_t = typename BasicJsonType::number_unsigned_t; |
64 | using number_float_t = typename BasicJsonType::number_float_t; |
65 | using string_t = typename BasicJsonType::string_t; |
66 | using lexer_t = lexer<BasicJsonType, InputAdapterType>; |
67 | using token_type = typename lexer_t::token_type; |
68 | |
69 | public: |
70 | /// a parser reading from an input adapter |
71 | explicit parser(InputAdapterType&& adapter, |
72 | const parser_callback_t<BasicJsonType> cb = nullptr, |
73 | const bool allow_exceptions_ = true, |
74 | const bool = false) |
75 | : callback(cb) |
76 | , m_lexer(std::move(adapter), skip_comments) |
77 | , allow_exceptions(allow_exceptions_) |
78 | { |
79 | // read first token |
80 | get_token(); |
81 | } |
82 | |
83 | /*! |
84 | @brief public parser interface |
85 | |
86 | @param[in] strict whether to expect the last token to be EOF |
87 | @param[in,out] result parsed JSON value |
88 | |
89 | @throw parse_error.101 in case of an unexpected token |
90 | @throw parse_error.102 if to_unicode fails or surrogate error |
91 | @throw parse_error.103 if to_unicode fails |
92 | */ |
93 | void parse(const bool strict, BasicJsonType& result) |
94 | { |
95 | if (callback) |
96 | { |
97 | json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions); |
98 | sax_parse_internal(&sdp); |
99 | |
100 | // in strict mode, input must be completely read |
101 | if (strict && (get_token() != token_type::end_of_input)) |
102 | { |
103 | sdp.parse_error(m_lexer.get_position(), |
104 | m_lexer.get_token_string(), |
105 | parse_error::create(101, m_lexer.get_position(), |
106 | exception_message(expected: token_type::end_of_input, context: "value" ), nullptr)); |
107 | } |
108 | |
109 | // in case of an error, return discarded value |
110 | if (sdp.is_errored()) |
111 | { |
112 | result = value_t::discarded; |
113 | return; |
114 | } |
115 | |
116 | // set top-level value to null if it was discarded by the callback |
117 | // function |
118 | if (result.is_discarded()) |
119 | { |
120 | result = nullptr; |
121 | } |
122 | } |
123 | else |
124 | { |
125 | json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions); |
126 | sax_parse_internal(&sdp); |
127 | |
128 | // in strict mode, input must be completely read |
129 | if (strict && (get_token() != token_type::end_of_input)) |
130 | { |
131 | sdp.parse_error(m_lexer.get_position(), |
132 | m_lexer.get_token_string(), |
133 | parse_error::create(101, m_lexer.get_position(), exception_message(expected: token_type::end_of_input, context: "value" ), nullptr)); |
134 | } |
135 | |
136 | // in case of an error, return discarded value |
137 | if (sdp.is_errored()) |
138 | { |
139 | result = value_t::discarded; |
140 | return; |
141 | } |
142 | } |
143 | |
144 | result.assert_invariant(); |
145 | } |
146 | |
147 | /*! |
148 | @brief public accept interface |
149 | |
150 | @param[in] strict whether to expect the last token to be EOF |
151 | @return whether the input is a proper JSON text |
152 | */ |
153 | bool accept(const bool strict = true) |
154 | { |
155 | json_sax_acceptor<BasicJsonType> sax_acceptor; |
156 | return sax_parse(&sax_acceptor, strict); |
157 | } |
158 | |
159 | template<typename SAX> |
160 | JSON_HEDLEY_NON_NULL(2) |
161 | bool sax_parse(SAX* sax, const bool strict = true) |
162 | { |
163 | (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {}; |
164 | const bool result = sax_parse_internal(sax); |
165 | |
166 | // strict mode: next byte must be EOF |
167 | if (result && strict && (get_token() != token_type::end_of_input)) |
168 | { |
169 | return sax->parse_error(m_lexer.get_position(), |
170 | m_lexer.get_token_string(), |
171 | parse_error::create(101, m_lexer.get_position(), exception_message(expected: token_type::end_of_input, context: "value" ), nullptr)); |
172 | } |
173 | |
174 | return result; |
175 | } |
176 | |
177 | private: |
178 | template<typename SAX> |
179 | JSON_HEDLEY_NON_NULL(2) |
180 | bool sax_parse_internal(SAX* sax) |
181 | { |
182 | // stack to remember the hierarchy of structured values we are parsing |
183 | // true = array; false = object |
184 | std::vector<bool> states; |
185 | // value to avoid a goto (see comment where set to true) |
186 | bool skip_to_state_evaluation = false; |
187 | |
188 | while (true) |
189 | { |
190 | if (!skip_to_state_evaluation) |
191 | { |
192 | // invariant: get_token() was called before each iteration |
193 | switch (last_token) |
194 | { |
195 | case token_type::begin_object: |
196 | { |
197 | if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast<std::size_t>(-1)))) |
198 | { |
199 | return false; |
200 | } |
201 | |
202 | // closing } -> we are done |
203 | if (get_token() == token_type::end_object) |
204 | { |
205 | if (JSON_HEDLEY_UNLIKELY(!sax->end_object())) |
206 | { |
207 | return false; |
208 | } |
209 | break; |
210 | } |
211 | |
212 | // parse key |
213 | if (JSON_HEDLEY_UNLIKELY(last_token != token_type::value_string)) |
214 | { |
215 | return sax->parse_error(m_lexer.get_position(), |
216 | m_lexer.get_token_string(), |
217 | parse_error::create(101, m_lexer.get_position(), exception_message(expected: token_type::value_string, context: "object key" ), nullptr)); |
218 | } |
219 | if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string()))) |
220 | { |
221 | return false; |
222 | } |
223 | |
224 | // parse separator (:) |
225 | if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator)) |
226 | { |
227 | return sax->parse_error(m_lexer.get_position(), |
228 | m_lexer.get_token_string(), |
229 | parse_error::create(101, m_lexer.get_position(), exception_message(expected: token_type::name_separator, context: "object separator" ), nullptr)); |
230 | } |
231 | |
232 | // remember we are now inside an object |
233 | states.push_back(x: false); |
234 | |
235 | // parse values |
236 | get_token(); |
237 | continue; |
238 | } |
239 | |
240 | case token_type::begin_array: |
241 | { |
242 | if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast<std::size_t>(-1)))) |
243 | { |
244 | return false; |
245 | } |
246 | |
247 | // closing ] -> we are done |
248 | if (get_token() == token_type::end_array) |
249 | { |
250 | if (JSON_HEDLEY_UNLIKELY(!sax->end_array())) |
251 | { |
252 | return false; |
253 | } |
254 | break; |
255 | } |
256 | |
257 | // remember we are now inside an array |
258 | states.push_back(x: true); |
259 | |
260 | // parse values (no need to call get_token) |
261 | continue; |
262 | } |
263 | |
264 | case token_type::value_float: |
265 | { |
266 | const auto res = m_lexer.get_number_float(); |
267 | |
268 | if (JSON_HEDLEY_UNLIKELY(!std::isfinite(res))) |
269 | { |
270 | return sax->parse_error(m_lexer.get_position(), |
271 | m_lexer.get_token_string(), |
272 | out_of_range::create(406, concat("number overflow parsing '" , m_lexer.get_token_string(), '\''), nullptr)); |
273 | } |
274 | |
275 | if (JSON_HEDLEY_UNLIKELY(!sax->number_float(res, m_lexer.get_string()))) |
276 | { |
277 | return false; |
278 | } |
279 | |
280 | break; |
281 | } |
282 | |
283 | case token_type::literal_false: |
284 | { |
285 | if (JSON_HEDLEY_UNLIKELY(!sax->boolean(false))) |
286 | { |
287 | return false; |
288 | } |
289 | break; |
290 | } |
291 | |
292 | case token_type::literal_null: |
293 | { |
294 | if (JSON_HEDLEY_UNLIKELY(!sax->null())) |
295 | { |
296 | return false; |
297 | } |
298 | break; |
299 | } |
300 | |
301 | case token_type::literal_true: |
302 | { |
303 | if (JSON_HEDLEY_UNLIKELY(!sax->boolean(true))) |
304 | { |
305 | return false; |
306 | } |
307 | break; |
308 | } |
309 | |
310 | case token_type::value_integer: |
311 | { |
312 | if (JSON_HEDLEY_UNLIKELY(!sax->number_integer(m_lexer.get_number_integer()))) |
313 | { |
314 | return false; |
315 | } |
316 | break; |
317 | } |
318 | |
319 | case token_type::value_string: |
320 | { |
321 | if (JSON_HEDLEY_UNLIKELY(!sax->string(m_lexer.get_string()))) |
322 | { |
323 | return false; |
324 | } |
325 | break; |
326 | } |
327 | |
328 | case token_type::value_unsigned: |
329 | { |
330 | if (JSON_HEDLEY_UNLIKELY(!sax->number_unsigned(m_lexer.get_number_unsigned()))) |
331 | { |
332 | return false; |
333 | } |
334 | break; |
335 | } |
336 | |
337 | case token_type::parse_error: |
338 | { |
339 | // using "uninitialized" to avoid "expected" message |
340 | return sax->parse_error(m_lexer.get_position(), |
341 | m_lexer.get_token_string(), |
342 | parse_error::create(101, m_lexer.get_position(), exception_message(expected: token_type::uninitialized, context: "value" ), nullptr)); |
343 | } |
344 | case token_type::end_of_input: |
345 | { |
346 | if (JSON_HEDLEY_UNLIKELY(m_lexer.get_position().chars_read_total == 1)) |
347 | { |
348 | return sax->parse_error(m_lexer.get_position(), |
349 | m_lexer.get_token_string(), |
350 | parse_error::create(101, m_lexer.get_position(), |
351 | "attempting to parse an empty input; check that your input string or stream contains the expected JSON" , nullptr)); |
352 | } |
353 | |
354 | return sax->parse_error(m_lexer.get_position(), |
355 | m_lexer.get_token_string(), |
356 | parse_error::create(101, m_lexer.get_position(), exception_message(expected: token_type::literal_or_value, context: "value" ), nullptr)); |
357 | } |
358 | case token_type::uninitialized: |
359 | case token_type::end_array: |
360 | case token_type::end_object: |
361 | case token_type::name_separator: |
362 | case token_type::value_separator: |
363 | case token_type::literal_or_value: |
364 | default: // the last token was unexpected |
365 | { |
366 | return sax->parse_error(m_lexer.get_position(), |
367 | m_lexer.get_token_string(), |
368 | parse_error::create(101, m_lexer.get_position(), exception_message(expected: token_type::literal_or_value, context: "value" ), nullptr)); |
369 | } |
370 | } |
371 | } |
372 | else |
373 | { |
374 | skip_to_state_evaluation = false; |
375 | } |
376 | |
377 | // we reached this line after we successfully parsed a value |
378 | if (states.empty()) |
379 | { |
380 | // empty stack: we reached the end of the hierarchy: done |
381 | return true; |
382 | } |
383 | |
384 | if (states.back()) // array |
385 | { |
386 | // comma -> next value |
387 | if (get_token() == token_type::value_separator) |
388 | { |
389 | // parse a new value |
390 | get_token(); |
391 | continue; |
392 | } |
393 | |
394 | // closing ] |
395 | if (JSON_HEDLEY_LIKELY(last_token == token_type::end_array)) |
396 | { |
397 | if (JSON_HEDLEY_UNLIKELY(!sax->end_array())) |
398 | { |
399 | return false; |
400 | } |
401 | |
402 | // We are done with this array. Before we can parse a |
403 | // new value, we need to evaluate the new state first. |
404 | // By setting skip_to_state_evaluation to false, we |
405 | // are effectively jumping to the beginning of this if. |
406 | JSON_ASSERT(!states.empty()); |
407 | states.pop_back(); |
408 | skip_to_state_evaluation = true; |
409 | continue; |
410 | } |
411 | |
412 | return sax->parse_error(m_lexer.get_position(), |
413 | m_lexer.get_token_string(), |
414 | parse_error::create(101, m_lexer.get_position(), exception_message(expected: token_type::end_array, context: "array" ), nullptr)); |
415 | } |
416 | |
417 | // states.back() is false -> object |
418 | |
419 | // comma -> next value |
420 | if (get_token() == token_type::value_separator) |
421 | { |
422 | // parse key |
423 | if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::value_string)) |
424 | { |
425 | return sax->parse_error(m_lexer.get_position(), |
426 | m_lexer.get_token_string(), |
427 | parse_error::create(101, m_lexer.get_position(), exception_message(expected: token_type::value_string, context: "object key" ), nullptr)); |
428 | } |
429 | |
430 | if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string()))) |
431 | { |
432 | return false; |
433 | } |
434 | |
435 | // parse separator (:) |
436 | if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator)) |
437 | { |
438 | return sax->parse_error(m_lexer.get_position(), |
439 | m_lexer.get_token_string(), |
440 | parse_error::create(101, m_lexer.get_position(), exception_message(expected: token_type::name_separator, context: "object separator" ), nullptr)); |
441 | } |
442 | |
443 | // parse values |
444 | get_token(); |
445 | continue; |
446 | } |
447 | |
448 | // closing } |
449 | if (JSON_HEDLEY_LIKELY(last_token == token_type::end_object)) |
450 | { |
451 | if (JSON_HEDLEY_UNLIKELY(!sax->end_object())) |
452 | { |
453 | return false; |
454 | } |
455 | |
456 | // We are done with this object. Before we can parse a |
457 | // new value, we need to evaluate the new state first. |
458 | // By setting skip_to_state_evaluation to false, we |
459 | // are effectively jumping to the beginning of this if. |
460 | JSON_ASSERT(!states.empty()); |
461 | states.pop_back(); |
462 | skip_to_state_evaluation = true; |
463 | continue; |
464 | } |
465 | |
466 | return sax->parse_error(m_lexer.get_position(), |
467 | m_lexer.get_token_string(), |
468 | parse_error::create(101, m_lexer.get_position(), exception_message(expected: token_type::end_object, context: "object" ), nullptr)); |
469 | } |
470 | } |
471 | |
472 | /// get next token from lexer |
473 | token_type get_token() |
474 | { |
475 | return last_token = m_lexer.scan(); |
476 | } |
477 | |
478 | std::string exception_message(const token_type expected, const std::string& context) |
479 | { |
480 | std::string error_msg = "syntax error " ; |
481 | |
482 | if (!context.empty()) |
483 | { |
484 | error_msg += concat(args: "while parsing " , args: context, args: ' '); |
485 | } |
486 | |
487 | error_msg += "- " ; |
488 | |
489 | if (last_token == token_type::parse_error) |
490 | { |
491 | error_msg += concat(m_lexer.get_error_message(), "; last read: '" , |
492 | m_lexer.get_token_string(), '\''); |
493 | } |
494 | else |
495 | { |
496 | error_msg += concat("unexpected " , lexer_t::token_type_name(last_token)); |
497 | } |
498 | |
499 | if (expected != token_type::uninitialized) |
500 | { |
501 | error_msg += concat("; expected " , lexer_t::token_type_name(expected)); |
502 | } |
503 | |
504 | return error_msg; |
505 | } |
506 | |
507 | private: |
508 | /// callback function |
509 | const parser_callback_t<BasicJsonType> callback = nullptr; |
510 | /// the type of the last read token |
511 | token_type last_token = token_type::uninitialized; |
512 | /// the lexer |
513 | lexer_t m_lexer; |
514 | /// whether to throw exceptions in case of errors |
515 | const bool allow_exceptions = true; |
516 | }; |
517 | |
518 | } // namespace detail |
519 | NLOHMANN_JSON_NAMESPACE_END |
520 | |