1// __ _____ _____ _____
2// __| | __| | | | JSON for Modern C++
3// | | |__ | | | | | | version 3.11.3
4// |_____|_____|_____|_|___| https://github.com/nlohmann/json
5//
6// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann <https://nlohmann.me>
7// SPDX-License-Identifier: MIT
8
9#pragma once
10
11#include <algorithm> // generate_n
12#include <array> // array
13#include <cmath> // ldexp
14#include <cstddef> // size_t
15#include <cstdint> // uint8_t, uint16_t, uint32_t, uint64_t
16#include <cstdio> // snprintf
17#include <cstring> // memcpy
18#include <iterator> // back_inserter
19#include <limits> // numeric_limits
20#include <string> // char_traits, string
21#include <utility> // make_pair, move
22#include <vector> // vector
23
24#include <nlohmann/detail/exceptions.hpp>
25#include <nlohmann/detail/input/input_adapters.hpp>
26#include <nlohmann/detail/input/json_sax.hpp>
27#include <nlohmann/detail/input/lexer.hpp>
28#include <nlohmann/detail/macro_scope.hpp>
29#include <nlohmann/detail/meta/is_sax.hpp>
30#include <nlohmann/detail/meta/type_traits.hpp>
31#include <nlohmann/detail/string_concat.hpp>
32#include <nlohmann/detail/value_t.hpp>
33
34NLOHMANN_JSON_NAMESPACE_BEGIN
35namespace detail
36{
37
38/// how to treat CBOR tags
39enum class cbor_tag_handler_t
40{
41 error, ///< throw a parse_error exception in case of a tag
42 ignore, ///< ignore tags
43 store ///< store tags as binary type
44};
45
46/*!
47@brief determine system byte order
48
49@return true if and only if system's byte order is little endian
50
51@note from https://stackoverflow.com/a/1001328/266378
52*/
53static inline bool little_endianness(int num = 1) noexcept
54{
55 return *reinterpret_cast<char*>(&num) == 1;
56}
57
58///////////////////
59// binary reader //
60///////////////////
61
62/*!
63@brief deserialization of CBOR, MessagePack, and UBJSON values
64*/
65template<typename BasicJsonType, typename InputAdapterType, typename SAX = json_sax_dom_parser<BasicJsonType>>
66class binary_reader
67{
68 using number_integer_t = typename BasicJsonType::number_integer_t;
69 using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
70 using number_float_t = typename BasicJsonType::number_float_t;
71 using string_t = typename BasicJsonType::string_t;
72 using binary_t = typename BasicJsonType::binary_t;
73 using json_sax_t = SAX;
74 using char_type = typename InputAdapterType::char_type;
75 using char_int_type = typename char_traits<char_type>::int_type;
76
77 public:
78 /*!
79 @brief create a binary reader
80
81 @param[in] adapter input adapter to read from
82 */
83 explicit binary_reader(InputAdapterType&& adapter, const input_format_t format = input_format_t::json) noexcept : ia(std::move(adapter)), input_format(format)
84 {
85 (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {};
86 }
87
88 // make class move-only
89 binary_reader(const binary_reader&) = delete;
90 binary_reader(binary_reader&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
91 binary_reader& operator=(const binary_reader&) = delete;
92 binary_reader& operator=(binary_reader&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
93 ~binary_reader() = default;
94
95 /*!
96 @param[in] format the binary format to parse
97 @param[in] sax_ a SAX event processor
98 @param[in] strict whether to expect the input to be consumed completed
99 @param[in] tag_handler how to treat CBOR tags
100
101 @return whether parsing was successful
102 */
103 JSON_HEDLEY_NON_NULL(3)
104 bool sax_parse(const input_format_t format,
105 json_sax_t* sax_,
106 const bool strict = true,
107 const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error)
108 {
109 sax = sax_;
110 bool result = false;
111
112 switch (format)
113 {
114 case input_format_t::bson:
115 result = parse_bson_internal();
116 break;
117
118 case input_format_t::cbor:
119 result = parse_cbor_internal(get_char: true, tag_handler);
120 break;
121
122 case input_format_t::msgpack:
123 result = parse_msgpack_internal();
124 break;
125
126 case input_format_t::ubjson:
127 case input_format_t::bjdata:
128 result = parse_ubjson_internal();
129 break;
130
131 case input_format_t::json: // LCOV_EXCL_LINE
132 default: // LCOV_EXCL_LINE
133 JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
134 }
135
136 // strict mode: next byte must be EOF
137 if (result && strict)
138 {
139 if (input_format == input_format_t::ubjson || input_format == input_format_t::bjdata)
140 {
141 get_ignore_noop();
142 }
143 else
144 {
145 get();
146 }
147
148 if (JSON_HEDLEY_UNLIKELY(current != char_traits<char_type>::eof()))
149 {
150 return sax->parse_error(chars_read, get_token_string(), parse_error::create(110, chars_read,
151 exception_message(format: input_format, detail: concat("expected end of input; last byte: 0x", get_token_string()), context: "value"), nullptr));
152 }
153 }
154
155 return result;
156 }
157
158 private:
159 //////////
160 // BSON //
161 //////////
162
163 /*!
164 @brief Reads in a BSON-object and passes it to the SAX-parser.
165 @return whether a valid BSON-value was passed to the SAX parser
166 */
167 bool parse_bson_internal()
168 {
169 std::int32_t document_size{};
170 get_number<std::int32_t, true>(input_format_t::bson, document_size);
171
172 if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast<std::size_t>(-1))))
173 {
174 return false;
175 }
176
177 if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_list(/*is_array*/false)))
178 {
179 return false;
180 }
181
182 return sax->end_object();
183 }
184
185 /*!
186 @brief Parses a C-style string from the BSON input.
187 @param[in,out] result A reference to the string variable where the read
188 string is to be stored.
189 @return `true` if the \x00-byte indicating the end of the string was
190 encountered before the EOF; false` indicates an unexpected EOF.
191 */
192 bool get_bson_cstr(string_t& result)
193 {
194 auto out = std::back_inserter(result);
195 while (true)
196 {
197 get();
198 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bson, "cstring")))
199 {
200 return false;
201 }
202 if (current == 0x00)
203 {
204 return true;
205 }
206 *out++ = static_cast<typename string_t::value_type>(current);
207 }
208 }
209
210 /*!
211 @brief Parses a zero-terminated string of length @a len from the BSON
212 input.
213 @param[in] len The length (including the zero-byte at the end) of the
214 string to be read.
215 @param[in,out] result A reference to the string variable where the read
216 string is to be stored.
217 @tparam NumberType The type of the length @a len
218 @pre len >= 1
219 @return `true` if the string was successfully parsed
220 */
221 template<typename NumberType>
222 bool get_bson_string(const NumberType len, string_t& result)
223 {
224 if (JSON_HEDLEY_UNLIKELY(len < 1))
225 {
226 auto last_token = get_token_string();
227 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
228 exception_message(format: input_format_t::bson, detail: concat("string length must be at least 1, is ", std::to_string(len)), context: "string"), nullptr));
229 }
230
231 return get_string(input_format_t::bson, len - static_cast<NumberType>(1), result) && get() != char_traits<char_type>::eof();
232 }
233
234 /*!
235 @brief Parses a byte array input of length @a len from the BSON input.
236 @param[in] len The length of the byte array to be read.
237 @param[in,out] result A reference to the binary variable where the read
238 array is to be stored.
239 @tparam NumberType The type of the length @a len
240 @pre len >= 0
241 @return `true` if the byte array was successfully parsed
242 */
243 template<typename NumberType>
244 bool get_bson_binary(const NumberType len, binary_t& result)
245 {
246 if (JSON_HEDLEY_UNLIKELY(len < 0))
247 {
248 auto last_token = get_token_string();
249 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
250 exception_message(format: input_format_t::bson, detail: concat("byte array length cannot be negative, is ", std::to_string(len)), context: "binary"), nullptr));
251 }
252
253 // All BSON binary values have a subtype
254 std::uint8_t subtype{};
255 get_number<std::uint8_t>(input_format_t::bson, subtype);
256 result.set_subtype(subtype);
257
258 return get_binary(input_format_t::bson, len, result);
259 }
260
261 /*!
262 @brief Read a BSON document element of the given @a element_type.
263 @param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html
264 @param[in] element_type_parse_position The position in the input stream,
265 where the `element_type` was read.
266 @warning Not all BSON element types are supported yet. An unsupported
267 @a element_type will give rise to a parse_error.114:
268 Unsupported BSON record type 0x...
269 @return whether a valid BSON-object/array was passed to the SAX parser
270 */
271 bool parse_bson_element_internal(const char_int_type element_type,
272 const std::size_t element_type_parse_position)
273 {
274 switch (element_type)
275 {
276 case 0x01: // double
277 {
278 double number{};
279 return get_number<double, true>(input_format_t::bson, number) && sax->number_float(static_cast<number_float_t>(number), "");
280 }
281
282 case 0x02: // string
283 {
284 std::int32_t len{};
285 string_t value;
286 return get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_string(len, value) && sax->string(value);
287 }
288
289 case 0x03: // object
290 {
291 return parse_bson_internal();
292 }
293
294 case 0x04: // array
295 {
296 return parse_bson_array();
297 }
298
299 case 0x05: // binary
300 {
301 std::int32_t len{};
302 binary_t value;
303 return get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_binary(len, value) && sax->binary(value);
304 }
305
306 case 0x08: // boolean
307 {
308 return sax->boolean(get() != 0);
309 }
310
311 case 0x0A: // null
312 {
313 return sax->null();
314 }
315
316 case 0x10: // int32
317 {
318 std::int32_t value{};
319 return get_number<std::int32_t, true>(input_format_t::bson, value) && sax->number_integer(value);
320 }
321
322 case 0x12: // int64
323 {
324 std::int64_t value{};
325 return get_number<std::int64_t, true>(input_format_t::bson, value) && sax->number_integer(value);
326 }
327
328 default: // anything else not supported (yet)
329 {
330 std::array<char, 3> cr{._M_elems: {}};
331 static_cast<void>((std::snprintf)(s: cr.data(), maxlen: cr.size(), format: "%.2hhX", static_cast<unsigned char>(element_type))); // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
332 const std::string cr_str{cr.data()};
333 return sax->parse_error(element_type_parse_position, cr_str,
334 parse_error::create(id_: 114, byte_: element_type_parse_position, what_arg: concat(args: "Unsupported BSON record type 0x", args: cr_str), context: nullptr));
335 }
336 }
337 }
338
339 /*!
340 @brief Read a BSON element list (as specified in the BSON-spec)
341
342 The same binary layout is used for objects and arrays, hence it must be
343 indicated with the argument @a is_array which one is expected
344 (true --> array, false --> object).
345
346 @param[in] is_array Determines if the element list being read is to be
347 treated as an object (@a is_array == false), or as an
348 array (@a is_array == true).
349 @return whether a valid BSON-object/array was passed to the SAX parser
350 */
351 bool parse_bson_element_list(const bool is_array)
352 {
353 string_t key;
354
355 while (auto element_type = get())
356 {
357 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bson, "element list")))
358 {
359 return false;
360 }
361
362 const std::size_t element_type_parse_position = chars_read;
363 if (JSON_HEDLEY_UNLIKELY(!get_bson_cstr(key)))
364 {
365 return false;
366 }
367
368 if (!is_array && !sax->key(key))
369 {
370 return false;
371 }
372
373 if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_internal(element_type, element_type_parse_position)))
374 {
375 return false;
376 }
377
378 // get_bson_cstr only appends
379 key.clear();
380 }
381
382 return true;
383 }
384
385 /*!
386 @brief Reads an array from the BSON input and passes it to the SAX-parser.
387 @return whether a valid BSON-array was passed to the SAX parser
388 */
389 bool parse_bson_array()
390 {
391 std::int32_t document_size{};
392 get_number<std::int32_t, true>(input_format_t::bson, document_size);
393
394 if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast<std::size_t>(-1))))
395 {
396 return false;
397 }
398
399 if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_list(/*is_array*/true)))
400 {
401 return false;
402 }
403
404 return sax->end_array();
405 }
406
407 //////////
408 // CBOR //
409 //////////
410
411 /*!
412 @param[in] get_char whether a new character should be retrieved from the
413 input (true) or whether the last read character should
414 be considered instead (false)
415 @param[in] tag_handler how CBOR tags should be treated
416
417 @return whether a valid CBOR value was passed to the SAX parser
418 */
419 bool parse_cbor_internal(const bool get_char,
420 const cbor_tag_handler_t tag_handler)
421 {
422 switch (get_char ? get() : current)
423 {
424 // EOF
425 case char_traits<char_type>::eof():
426 return unexpect_eof(format: input_format_t::cbor, context: "value");
427
428 // Integer 0x00..0x17 (0..23)
429 case 0x00:
430 case 0x01:
431 case 0x02:
432 case 0x03:
433 case 0x04:
434 case 0x05:
435 case 0x06:
436 case 0x07:
437 case 0x08:
438 case 0x09:
439 case 0x0A:
440 case 0x0B:
441 case 0x0C:
442 case 0x0D:
443 case 0x0E:
444 case 0x0F:
445 case 0x10:
446 case 0x11:
447 case 0x12:
448 case 0x13:
449 case 0x14:
450 case 0x15:
451 case 0x16:
452 case 0x17:
453 return sax->number_unsigned(static_cast<number_unsigned_t>(current));
454
455 case 0x18: // Unsigned integer (one-byte uint8_t follows)
456 {
457 std::uint8_t number{};
458 return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
459 }
460
461 case 0x19: // Unsigned integer (two-byte uint16_t follows)
462 {
463 std::uint16_t number{};
464 return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
465 }
466
467 case 0x1A: // Unsigned integer (four-byte uint32_t follows)
468 {
469 std::uint32_t number{};
470 return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
471 }
472
473 case 0x1B: // Unsigned integer (eight-byte uint64_t follows)
474 {
475 std::uint64_t number{};
476 return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
477 }
478
479 // Negative integer -1-0x00..-1-0x17 (-1..-24)
480 case 0x20:
481 case 0x21:
482 case 0x22:
483 case 0x23:
484 case 0x24:
485 case 0x25:
486 case 0x26:
487 case 0x27:
488 case 0x28:
489 case 0x29:
490 case 0x2A:
491 case 0x2B:
492 case 0x2C:
493 case 0x2D:
494 case 0x2E:
495 case 0x2F:
496 case 0x30:
497 case 0x31:
498 case 0x32:
499 case 0x33:
500 case 0x34:
501 case 0x35:
502 case 0x36:
503 case 0x37:
504 return sax->number_integer(static_cast<std::int8_t>(0x20 - 1 - current));
505
506 case 0x38: // Negative integer (one-byte uint8_t follows)
507 {
508 std::uint8_t number{};
509 return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
510 }
511
512 case 0x39: // Negative integer -1-n (two-byte uint16_t follows)
513 {
514 std::uint16_t number{};
515 return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
516 }
517
518 case 0x3A: // Negative integer -1-n (four-byte uint32_t follows)
519 {
520 std::uint32_t number{};
521 return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
522 }
523
524 case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows)
525 {
526 std::uint64_t number{};
527 return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1)
528 - static_cast<number_integer_t>(number));
529 }
530
531 // Binary data (0x00..0x17 bytes follow)
532 case 0x40:
533 case 0x41:
534 case 0x42:
535 case 0x43:
536 case 0x44:
537 case 0x45:
538 case 0x46:
539 case 0x47:
540 case 0x48:
541 case 0x49:
542 case 0x4A:
543 case 0x4B:
544 case 0x4C:
545 case 0x4D:
546 case 0x4E:
547 case 0x4F:
548 case 0x50:
549 case 0x51:
550 case 0x52:
551 case 0x53:
552 case 0x54:
553 case 0x55:
554 case 0x56:
555 case 0x57:
556 case 0x58: // Binary data (one-byte uint8_t for n follows)
557 case 0x59: // Binary data (two-byte uint16_t for n follow)
558 case 0x5A: // Binary data (four-byte uint32_t for n follow)
559 case 0x5B: // Binary data (eight-byte uint64_t for n follow)
560 case 0x5F: // Binary data (indefinite length)
561 {
562 binary_t b;
563 return get_cbor_binary(result&: b) && sax->binary(b);
564 }
565
566 // UTF-8 string (0x00..0x17 bytes follow)
567 case 0x60:
568 case 0x61:
569 case 0x62:
570 case 0x63:
571 case 0x64:
572 case 0x65:
573 case 0x66:
574 case 0x67:
575 case 0x68:
576 case 0x69:
577 case 0x6A:
578 case 0x6B:
579 case 0x6C:
580 case 0x6D:
581 case 0x6E:
582 case 0x6F:
583 case 0x70:
584 case 0x71:
585 case 0x72:
586 case 0x73:
587 case 0x74:
588 case 0x75:
589 case 0x76:
590 case 0x77:
591 case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
592 case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
593 case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
594 case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
595 case 0x7F: // UTF-8 string (indefinite length)
596 {
597 string_t s;
598 return get_cbor_string(result&: s) && sax->string(s);
599 }
600
601 // array (0x00..0x17 data items follow)
602 case 0x80:
603 case 0x81:
604 case 0x82:
605 case 0x83:
606 case 0x84:
607 case 0x85:
608 case 0x86:
609 case 0x87:
610 case 0x88:
611 case 0x89:
612 case 0x8A:
613 case 0x8B:
614 case 0x8C:
615 case 0x8D:
616 case 0x8E:
617 case 0x8F:
618 case 0x90:
619 case 0x91:
620 case 0x92:
621 case 0x93:
622 case 0x94:
623 case 0x95:
624 case 0x96:
625 case 0x97:
626 return get_cbor_array(
627 len: conditional_static_cast<std::size_t>(value: static_cast<unsigned int>(current) & 0x1Fu), tag_handler);
628
629 case 0x98: // array (one-byte uint8_t for n follows)
630 {
631 std::uint8_t len{};
632 return get_number(input_format_t::cbor, len) && get_cbor_array(len: static_cast<std::size_t>(len), tag_handler);
633 }
634
635 case 0x99: // array (two-byte uint16_t for n follow)
636 {
637 std::uint16_t len{};
638 return get_number(input_format_t::cbor, len) && get_cbor_array(len: static_cast<std::size_t>(len), tag_handler);
639 }
640
641 case 0x9A: // array (four-byte uint32_t for n follow)
642 {
643 std::uint32_t len{};
644 return get_number(input_format_t::cbor, len) && get_cbor_array(len: conditional_static_cast<std::size_t>(value: len), tag_handler);
645 }
646
647 case 0x9B: // array (eight-byte uint64_t for n follow)
648 {
649 std::uint64_t len{};
650 return get_number(input_format_t::cbor, len) && get_cbor_array(len: conditional_static_cast<std::size_t>(value: len), tag_handler);
651 }
652
653 case 0x9F: // array (indefinite length)
654 return get_cbor_array(len: static_cast<std::size_t>(-1), tag_handler);
655
656 // map (0x00..0x17 pairs of data items follow)
657 case 0xA0:
658 case 0xA1:
659 case 0xA2:
660 case 0xA3:
661 case 0xA4:
662 case 0xA5:
663 case 0xA6:
664 case 0xA7:
665 case 0xA8:
666 case 0xA9:
667 case 0xAA:
668 case 0xAB:
669 case 0xAC:
670 case 0xAD:
671 case 0xAE:
672 case 0xAF:
673 case 0xB0:
674 case 0xB1:
675 case 0xB2:
676 case 0xB3:
677 case 0xB4:
678 case 0xB5:
679 case 0xB6:
680 case 0xB7:
681 return get_cbor_object(len: conditional_static_cast<std::size_t>(value: static_cast<unsigned int>(current) & 0x1Fu), tag_handler);
682
683 case 0xB8: // map (one-byte uint8_t for n follows)
684 {
685 std::uint8_t len{};
686 return get_number(input_format_t::cbor, len) && get_cbor_object(len: static_cast<std::size_t>(len), tag_handler);
687 }
688
689 case 0xB9: // map (two-byte uint16_t for n follow)
690 {
691 std::uint16_t len{};
692 return get_number(input_format_t::cbor, len) && get_cbor_object(len: static_cast<std::size_t>(len), tag_handler);
693 }
694
695 case 0xBA: // map (four-byte uint32_t for n follow)
696 {
697 std::uint32_t len{};
698 return get_number(input_format_t::cbor, len) && get_cbor_object(len: conditional_static_cast<std::size_t>(value: len), tag_handler);
699 }
700
701 case 0xBB: // map (eight-byte uint64_t for n follow)
702 {
703 std::uint64_t len{};
704 return get_number(input_format_t::cbor, len) && get_cbor_object(len: conditional_static_cast<std::size_t>(value: len), tag_handler);
705 }
706
707 case 0xBF: // map (indefinite length)
708 return get_cbor_object(len: static_cast<std::size_t>(-1), tag_handler);
709
710 case 0xC6: // tagged item
711 case 0xC7:
712 case 0xC8:
713 case 0xC9:
714 case 0xCA:
715 case 0xCB:
716 case 0xCC:
717 case 0xCD:
718 case 0xCE:
719 case 0xCF:
720 case 0xD0:
721 case 0xD1:
722 case 0xD2:
723 case 0xD3:
724 case 0xD4:
725 case 0xD8: // tagged item (1 bytes follow)
726 case 0xD9: // tagged item (2 bytes follow)
727 case 0xDA: // tagged item (4 bytes follow)
728 case 0xDB: // tagged item (8 bytes follow)
729 {
730 switch (tag_handler)
731 {
732 case cbor_tag_handler_t::error:
733 {
734 auto last_token = get_token_string();
735 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
736 exception_message(format: input_format_t::cbor, detail: concat("invalid byte: 0x", last_token), context: "value"), nullptr));
737 }
738
739 case cbor_tag_handler_t::ignore:
740 {
741 // ignore binary subtype
742 switch (current)
743 {
744 case 0xD8:
745 {
746 std::uint8_t subtype_to_ignore{};
747 get_number(input_format_t::cbor, subtype_to_ignore);
748 break;
749 }
750 case 0xD9:
751 {
752 std::uint16_t subtype_to_ignore{};
753 get_number(input_format_t::cbor, subtype_to_ignore);
754 break;
755 }
756 case 0xDA:
757 {
758 std::uint32_t subtype_to_ignore{};
759 get_number(input_format_t::cbor, subtype_to_ignore);
760 break;
761 }
762 case 0xDB:
763 {
764 std::uint64_t subtype_to_ignore{};
765 get_number(input_format_t::cbor, subtype_to_ignore);
766 break;
767 }
768 default:
769 break;
770 }
771 return parse_cbor_internal(get_char: true, tag_handler);
772 }
773
774 case cbor_tag_handler_t::store:
775 {
776 binary_t b;
777 // use binary subtype and store in binary container
778 switch (current)
779 {
780 case 0xD8:
781 {
782 std::uint8_t subtype{};
783 get_number(input_format_t::cbor, subtype);
784 b.set_subtype(detail::conditional_static_cast<typename binary_t::subtype_type>(subtype));
785 break;
786 }
787 case 0xD9:
788 {
789 std::uint16_t subtype{};
790 get_number(input_format_t::cbor, subtype);
791 b.set_subtype(detail::conditional_static_cast<typename binary_t::subtype_type>(subtype));
792 break;
793 }
794 case 0xDA:
795 {
796 std::uint32_t subtype{};
797 get_number(input_format_t::cbor, subtype);
798 b.set_subtype(detail::conditional_static_cast<typename binary_t::subtype_type>(subtype));
799 break;
800 }
801 case 0xDB:
802 {
803 std::uint64_t subtype{};
804 get_number(input_format_t::cbor, subtype);
805 b.set_subtype(detail::conditional_static_cast<typename binary_t::subtype_type>(subtype));
806 break;
807 }
808 default:
809 return parse_cbor_internal(get_char: true, tag_handler);
810 }
811 get();
812 return get_cbor_binary(result&: b) && sax->binary(b);
813 }
814
815 default: // LCOV_EXCL_LINE
816 JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
817 return false; // LCOV_EXCL_LINE
818 }
819 }
820
821 case 0xF4: // false
822 return sax->boolean(false);
823
824 case 0xF5: // true
825 return sax->boolean(true);
826
827 case 0xF6: // null
828 return sax->null();
829
830 case 0xF9: // Half-Precision Float (two-byte IEEE 754)
831 {
832 const auto byte1_raw = get();
833 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number")))
834 {
835 return false;
836 }
837 const auto byte2_raw = get();
838 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number")))
839 {
840 return false;
841 }
842
843 const auto byte1 = static_cast<unsigned char>(byte1_raw);
844 const auto byte2 = static_cast<unsigned char>(byte2_raw);
845
846 // code from RFC 7049, Appendix D, Figure 3:
847 // As half-precision floating-point numbers were only added
848 // to IEEE 754 in 2008, today's programming platforms often
849 // still only have limited support for them. It is very
850 // easy to include at least decoding support for them even
851 // without such support. An example of a small decoder for
852 // half-precision floating-point numbers in the C language
853 // is shown in Fig. 3.
854 const auto half = static_cast<unsigned int>((byte1 << 8u) + byte2);
855 const double val = [&half]
856 {
857 const int exp = (half >> 10u) & 0x1Fu;
858 const unsigned int mant = half & 0x3FFu;
859 JSON_ASSERT(0 <= exp&& exp <= 32);
860 JSON_ASSERT(mant <= 1024);
861 switch (exp)
862 {
863 case 0:
864 return std::ldexp(x: mant, exp: -24);
865 case 31:
866 return (mant == 0)
867 ? std::numeric_limits<double>::infinity()
868 : std::numeric_limits<double>::quiet_NaN();
869 default:
870 return std::ldexp(x: mant + 1024, exp: exp - 25);
871 }
872 }();
873 return sax->number_float((half & 0x8000u) != 0
874 ? static_cast<number_float_t>(-val)
875 : static_cast<number_float_t>(val), "");
876 }
877
878 case 0xFA: // Single-Precision Float (four-byte IEEE 754)
879 {
880 float number{};
881 return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), "");
882 }
883
884 case 0xFB: // Double-Precision Float (eight-byte IEEE 754)
885 {
886 double number{};
887 return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), "");
888 }
889
890 default: // anything else (0xFF is handled inside the other types)
891 {
892 auto last_token = get_token_string();
893 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
894 exception_message(format: input_format_t::cbor, detail: concat("invalid byte: 0x", last_token), context: "value"), nullptr));
895 }
896 }
897 }
898
899 /*!
900 @brief reads a CBOR string
901
902 This function first reads starting bytes to determine the expected
903 string length and then copies this number of bytes into a string.
904 Additionally, CBOR's strings with indefinite lengths are supported.
905
906 @param[out] result created string
907
908 @return whether string creation completed
909 */
910 bool get_cbor_string(string_t& result)
911 {
912 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "string")))
913 {
914 return false;
915 }
916
917 switch (current)
918 {
919 // UTF-8 string (0x00..0x17 bytes follow)
920 case 0x60:
921 case 0x61:
922 case 0x62:
923 case 0x63:
924 case 0x64:
925 case 0x65:
926 case 0x66:
927 case 0x67:
928 case 0x68:
929 case 0x69:
930 case 0x6A:
931 case 0x6B:
932 case 0x6C:
933 case 0x6D:
934 case 0x6E:
935 case 0x6F:
936 case 0x70:
937 case 0x71:
938 case 0x72:
939 case 0x73:
940 case 0x74:
941 case 0x75:
942 case 0x76:
943 case 0x77:
944 {
945 return get_string(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result);
946 }
947
948 case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
949 {
950 std::uint8_t len{};
951 return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
952 }
953
954 case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
955 {
956 std::uint16_t len{};
957 return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
958 }
959
960 case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
961 {
962 std::uint32_t len{};
963 return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
964 }
965
966 case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
967 {
968 std::uint64_t len{};
969 return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
970 }
971
972 case 0x7F: // UTF-8 string (indefinite length)
973 {
974 while (get() != 0xFF)
975 {
976 string_t chunk;
977 if (!get_cbor_string(result&: chunk))
978 {
979 return false;
980 }
981 result.append(chunk);
982 }
983 return true;
984 }
985
986 default:
987 {
988 auto last_token = get_token_string();
989 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read,
990 exception_message(format: input_format_t::cbor, detail: concat("expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x", last_token), context: "string"), nullptr));
991 }
992 }
993 }
994
995 /*!
996 @brief reads a CBOR byte array
997
998 This function first reads starting bytes to determine the expected
999 byte array length and then copies this number of bytes into the byte array.
1000 Additionally, CBOR's byte arrays with indefinite lengths are supported.
1001
1002 @param[out] result created byte array
1003
1004 @return whether byte array creation completed
1005 */
1006 bool get_cbor_binary(binary_t& result)
1007 {
1008 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "binary")))
1009 {
1010 return false;
1011 }
1012
1013 switch (current)
1014 {
1015 // Binary data (0x00..0x17 bytes follow)
1016 case 0x40:
1017 case 0x41:
1018 case 0x42:
1019 case 0x43:
1020 case 0x44:
1021 case 0x45:
1022 case 0x46:
1023 case 0x47:
1024 case 0x48:
1025 case 0x49:
1026 case 0x4A:
1027 case 0x4B:
1028 case 0x4C:
1029 case 0x4D:
1030 case 0x4E:
1031 case 0x4F:
1032 case 0x50:
1033 case 0x51:
1034 case 0x52:
1035 case 0x53:
1036 case 0x54:
1037 case 0x55:
1038 case 0x56:
1039 case 0x57:
1040 {
1041 return get_binary(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result);
1042 }
1043
1044 case 0x58: // Binary data (one-byte uint8_t for n follows)
1045 {
1046 std::uint8_t len{};
1047 return get_number(input_format_t::cbor, len) &&
1048 get_binary(input_format_t::cbor, len, result);
1049 }
1050
1051 case 0x59: // Binary data (two-byte uint16_t for n follow)
1052 {
1053 std::uint16_t len{};
1054 return get_number(input_format_t::cbor, len) &&
1055 get_binary(input_format_t::cbor, len, result);
1056 }
1057
1058 case 0x5A: // Binary data (four-byte uint32_t for n follow)
1059 {
1060 std::uint32_t len{};
1061 return get_number(input_format_t::cbor, len) &&
1062 get_binary(input_format_t::cbor, len, result);
1063 }
1064
1065 case 0x5B: // Binary data (eight-byte uint64_t for n follow)
1066 {
1067 std::uint64_t len{};
1068 return get_number(input_format_t::cbor, len) &&
1069 get_binary(input_format_t::cbor, len, result);
1070 }
1071
1072 case 0x5F: // Binary data (indefinite length)
1073 {
1074 while (get() != 0xFF)
1075 {
1076 binary_t chunk;
1077 if (!get_cbor_binary(result&: chunk))
1078 {
1079 return false;
1080 }
1081 result.insert(result.end(), chunk.begin(), chunk.end());
1082 }
1083 return true;
1084 }
1085
1086 default:
1087 {
1088 auto last_token = get_token_string();
1089 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read,
1090 exception_message(format: input_format_t::cbor, detail: concat("expected length specification (0x40-0x5B) or indefinite binary array type (0x5F); last byte: 0x", last_token), context: "binary"), nullptr));
1091 }
1092 }
1093 }
1094
1095 /*!
1096 @param[in] len the length of the array or static_cast<std::size_t>(-1) for an
1097 array of indefinite size
1098 @param[in] tag_handler how CBOR tags should be treated
1099 @return whether array creation completed
1100 */
1101 bool get_cbor_array(const std::size_t len,
1102 const cbor_tag_handler_t tag_handler)
1103 {
1104 if (JSON_HEDLEY_UNLIKELY(!sax->start_array(len)))
1105 {
1106 return false;
1107 }
1108
1109 if (len != static_cast<std::size_t>(-1))
1110 {
1111 for (std::size_t i = 0; i < len; ++i)
1112 {
1113 if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
1114 {
1115 return false;
1116 }
1117 }
1118 }
1119 else
1120 {
1121 while (get() != 0xFF)
1122 {
1123 if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(false, tag_handler)))
1124 {
1125 return false;
1126 }
1127 }
1128 }
1129
1130 return sax->end_array();
1131 }
1132
1133 /*!
1134 @param[in] len the length of the object or static_cast<std::size_t>(-1) for an
1135 object of indefinite size
1136 @param[in] tag_handler how CBOR tags should be treated
1137 @return whether object creation completed
1138 */
1139 bool get_cbor_object(const std::size_t len,
1140 const cbor_tag_handler_t tag_handler)
1141 {
1142 if (JSON_HEDLEY_UNLIKELY(!sax->start_object(len)))
1143 {
1144 return false;
1145 }
1146
1147 if (len != 0)
1148 {
1149 string_t key;
1150 if (len != static_cast<std::size_t>(-1))
1151 {
1152 for (std::size_t i = 0; i < len; ++i)
1153 {
1154 get();
1155 if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key)))
1156 {
1157 return false;
1158 }
1159
1160 if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
1161 {
1162 return false;
1163 }
1164 key.clear();
1165 }
1166 }
1167 else
1168 {
1169 while (get() != 0xFF)
1170 {
1171 if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key)))
1172 {
1173 return false;
1174 }
1175
1176 if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
1177 {
1178 return false;
1179 }
1180 key.clear();
1181 }
1182 }
1183 }
1184
1185 return sax->end_object();
1186 }
1187
1188 /////////////
1189 // MsgPack //
1190 /////////////
1191
1192 /*!
1193 @return whether a valid MessagePack value was passed to the SAX parser
1194 */
1195 bool parse_msgpack_internal()
1196 {
1197 switch (get())
1198 {
1199 // EOF
1200 case char_traits<char_type>::eof():
1201 return unexpect_eof(format: input_format_t::msgpack, context: "value");
1202
1203 // positive fixint
1204 case 0x00:
1205 case 0x01:
1206 case 0x02:
1207 case 0x03:
1208 case 0x04:
1209 case 0x05:
1210 case 0x06:
1211 case 0x07:
1212 case 0x08:
1213 case 0x09:
1214 case 0x0A:
1215 case 0x0B:
1216 case 0x0C:
1217 case 0x0D:
1218 case 0x0E:
1219 case 0x0F:
1220 case 0x10:
1221 case 0x11:
1222 case 0x12:
1223 case 0x13:
1224 case 0x14:
1225 case 0x15:
1226 case 0x16:
1227 case 0x17:
1228 case 0x18:
1229 case 0x19:
1230 case 0x1A:
1231 case 0x1B:
1232 case 0x1C:
1233 case 0x1D:
1234 case 0x1E:
1235 case 0x1F:
1236 case 0x20:
1237 case 0x21:
1238 case 0x22:
1239 case 0x23:
1240 case 0x24:
1241 case 0x25:
1242 case 0x26:
1243 case 0x27:
1244 case 0x28:
1245 case 0x29:
1246 case 0x2A:
1247 case 0x2B:
1248 case 0x2C:
1249 case 0x2D:
1250 case 0x2E:
1251 case 0x2F:
1252 case 0x30:
1253 case 0x31:
1254 case 0x32:
1255 case 0x33:
1256 case 0x34:
1257 case 0x35:
1258 case 0x36:
1259 case 0x37:
1260 case 0x38:
1261 case 0x39:
1262 case 0x3A:
1263 case 0x3B:
1264 case 0x3C:
1265 case 0x3D:
1266 case 0x3E:
1267 case 0x3F:
1268 case 0x40:
1269 case 0x41:
1270 case 0x42:
1271 case 0x43:
1272 case 0x44:
1273 case 0x45:
1274 case 0x46:
1275 case 0x47:
1276 case 0x48:
1277 case 0x49:
1278 case 0x4A:
1279 case 0x4B:
1280 case 0x4C:
1281 case 0x4D:
1282 case 0x4E:
1283 case 0x4F:
1284 case 0x50:
1285 case 0x51:
1286 case 0x52:
1287 case 0x53:
1288 case 0x54:
1289 case 0x55:
1290 case 0x56:
1291 case 0x57:
1292 case 0x58:
1293 case 0x59:
1294 case 0x5A:
1295 case 0x5B:
1296 case 0x5C:
1297 case 0x5D:
1298 case 0x5E:
1299 case 0x5F:
1300 case 0x60:
1301 case 0x61:
1302 case 0x62:
1303 case 0x63:
1304 case 0x64:
1305 case 0x65:
1306 case 0x66:
1307 case 0x67:
1308 case 0x68:
1309 case 0x69:
1310 case 0x6A:
1311 case 0x6B:
1312 case 0x6C:
1313 case 0x6D:
1314 case 0x6E:
1315 case 0x6F:
1316 case 0x70:
1317 case 0x71:
1318 case 0x72:
1319 case 0x73:
1320 case 0x74:
1321 case 0x75:
1322 case 0x76:
1323 case 0x77:
1324 case 0x78:
1325 case 0x79:
1326 case 0x7A:
1327 case 0x7B:
1328 case 0x7C:
1329 case 0x7D:
1330 case 0x7E:
1331 case 0x7F:
1332 return sax->number_unsigned(static_cast<number_unsigned_t>(current));
1333
1334 // fixmap
1335 case 0x80:
1336 case 0x81:
1337 case 0x82:
1338 case 0x83:
1339 case 0x84:
1340 case 0x85:
1341 case 0x86:
1342 case 0x87:
1343 case 0x88:
1344 case 0x89:
1345 case 0x8A:
1346 case 0x8B:
1347 case 0x8C:
1348 case 0x8D:
1349 case 0x8E:
1350 case 0x8F:
1351 return get_msgpack_object(len: conditional_static_cast<std::size_t>(value: static_cast<unsigned int>(current) & 0x0Fu));
1352
1353 // fixarray
1354 case 0x90:
1355 case 0x91:
1356 case 0x92:
1357 case 0x93:
1358 case 0x94:
1359 case 0x95:
1360 case 0x96:
1361 case 0x97:
1362 case 0x98:
1363 case 0x99:
1364 case 0x9A:
1365 case 0x9B:
1366 case 0x9C:
1367 case 0x9D:
1368 case 0x9E:
1369 case 0x9F:
1370 return get_msgpack_array(len: conditional_static_cast<std::size_t>(value: static_cast<unsigned int>(current) & 0x0Fu));
1371
1372 // fixstr
1373 case 0xA0:
1374 case 0xA1:
1375 case 0xA2:
1376 case 0xA3:
1377 case 0xA4:
1378 case 0xA5:
1379 case 0xA6:
1380 case 0xA7:
1381 case 0xA8:
1382 case 0xA9:
1383 case 0xAA:
1384 case 0xAB:
1385 case 0xAC:
1386 case 0xAD:
1387 case 0xAE:
1388 case 0xAF:
1389 case 0xB0:
1390 case 0xB1:
1391 case 0xB2:
1392 case 0xB3:
1393 case 0xB4:
1394 case 0xB5:
1395 case 0xB6:
1396 case 0xB7:
1397 case 0xB8:
1398 case 0xB9:
1399 case 0xBA:
1400 case 0xBB:
1401 case 0xBC:
1402 case 0xBD:
1403 case 0xBE:
1404 case 0xBF:
1405 case 0xD9: // str 8
1406 case 0xDA: // str 16
1407 case 0xDB: // str 32
1408 {
1409 string_t s;
1410 return get_msgpack_string(result&: s) && sax->string(s);
1411 }
1412
1413 case 0xC0: // nil
1414 return sax->null();
1415
1416 case 0xC2: // false
1417 return sax->boolean(false);
1418
1419 case 0xC3: // true
1420 return sax->boolean(true);
1421
1422 case 0xC4: // bin 8
1423 case 0xC5: // bin 16
1424 case 0xC6: // bin 32
1425 case 0xC7: // ext 8
1426 case 0xC8: // ext 16
1427 case 0xC9: // ext 32
1428 case 0xD4: // fixext 1
1429 case 0xD5: // fixext 2
1430 case 0xD6: // fixext 4
1431 case 0xD7: // fixext 8
1432 case 0xD8: // fixext 16
1433 {
1434 binary_t b;
1435 return get_msgpack_binary(result&: b) && sax->binary(b);
1436 }
1437
1438 case 0xCA: // float 32
1439 {
1440 float number{};
1441 return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), "");
1442 }
1443
1444 case 0xCB: // float 64
1445 {
1446 double number{};
1447 return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), "");
1448 }
1449
1450 case 0xCC: // uint 8
1451 {
1452 std::uint8_t number{};
1453 return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1454 }
1455
1456 case 0xCD: // uint 16
1457 {
1458 std::uint16_t number{};
1459 return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1460 }
1461
1462 case 0xCE: // uint 32
1463 {
1464 std::uint32_t number{};
1465 return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1466 }
1467
1468 case 0xCF: // uint 64
1469 {
1470 std::uint64_t number{};
1471 return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1472 }
1473
1474 case 0xD0: // int 8
1475 {
1476 std::int8_t number{};
1477 return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1478 }
1479
1480 case 0xD1: // int 16
1481 {
1482 std::int16_t number{};
1483 return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1484 }
1485
1486 case 0xD2: // int 32
1487 {
1488 std::int32_t number{};
1489 return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1490 }
1491
1492 case 0xD3: // int 64
1493 {
1494 std::int64_t number{};
1495 return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1496 }
1497
1498 case 0xDC: // array 16
1499 {
1500 std::uint16_t len{};
1501 return get_number(input_format_t::msgpack, len) && get_msgpack_array(len: static_cast<std::size_t>(len));
1502 }
1503
1504 case 0xDD: // array 32
1505 {
1506 std::uint32_t len{};
1507 return get_number(input_format_t::msgpack, len) && get_msgpack_array(len: conditional_static_cast<std::size_t>(value: len));
1508 }
1509
1510 case 0xDE: // map 16
1511 {
1512 std::uint16_t len{};
1513 return get_number(input_format_t::msgpack, len) && get_msgpack_object(len: static_cast<std::size_t>(len));
1514 }
1515
1516 case 0xDF: // map 32
1517 {
1518 std::uint32_t len{};
1519 return get_number(input_format_t::msgpack, len) && get_msgpack_object(len: conditional_static_cast<std::size_t>(value: len));
1520 }
1521
1522 // negative fixint
1523 case 0xE0:
1524 case 0xE1:
1525 case 0xE2:
1526 case 0xE3:
1527 case 0xE4:
1528 case 0xE5:
1529 case 0xE6:
1530 case 0xE7:
1531 case 0xE8:
1532 case 0xE9:
1533 case 0xEA:
1534 case 0xEB:
1535 case 0xEC:
1536 case 0xED:
1537 case 0xEE:
1538 case 0xEF:
1539 case 0xF0:
1540 case 0xF1:
1541 case 0xF2:
1542 case 0xF3:
1543 case 0xF4:
1544 case 0xF5:
1545 case 0xF6:
1546 case 0xF7:
1547 case 0xF8:
1548 case 0xF9:
1549 case 0xFA:
1550 case 0xFB:
1551 case 0xFC:
1552 case 0xFD:
1553 case 0xFE:
1554 case 0xFF:
1555 return sax->number_integer(static_cast<std::int8_t>(current));
1556
1557 default: // anything else
1558 {
1559 auto last_token = get_token_string();
1560 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
1561 exception_message(format: input_format_t::msgpack, detail: concat("invalid byte: 0x", last_token), context: "value"), nullptr));
1562 }
1563 }
1564 }
1565
1566 /*!
1567 @brief reads a MessagePack string
1568
1569 This function first reads starting bytes to determine the expected
1570 string length and then copies this number of bytes into a string.
1571
1572 @param[out] result created string
1573
1574 @return whether string creation completed
1575 */
1576 bool get_msgpack_string(string_t& result)
1577 {
1578 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::msgpack, "string")))
1579 {
1580 return false;
1581 }
1582
1583 switch (current)
1584 {
1585 // fixstr
1586 case 0xA0:
1587 case 0xA1:
1588 case 0xA2:
1589 case 0xA3:
1590 case 0xA4:
1591 case 0xA5:
1592 case 0xA6:
1593 case 0xA7:
1594 case 0xA8:
1595 case 0xA9:
1596 case 0xAA:
1597 case 0xAB:
1598 case 0xAC:
1599 case 0xAD:
1600 case 0xAE:
1601 case 0xAF:
1602 case 0xB0:
1603 case 0xB1:
1604 case 0xB2:
1605 case 0xB3:
1606 case 0xB4:
1607 case 0xB5:
1608 case 0xB6:
1609 case 0xB7:
1610 case 0xB8:
1611 case 0xB9:
1612 case 0xBA:
1613 case 0xBB:
1614 case 0xBC:
1615 case 0xBD:
1616 case 0xBE:
1617 case 0xBF:
1618 {
1619 return get_string(input_format_t::msgpack, static_cast<unsigned int>(current) & 0x1Fu, result);
1620 }
1621
1622 case 0xD9: // str 8
1623 {
1624 std::uint8_t len{};
1625 return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
1626 }
1627
1628 case 0xDA: // str 16
1629 {
1630 std::uint16_t len{};
1631 return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
1632 }
1633
1634 case 0xDB: // str 32
1635 {
1636 std::uint32_t len{};
1637 return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
1638 }
1639
1640 default:
1641 {
1642 auto last_token = get_token_string();
1643 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read,
1644 exception_message(format: input_format_t::msgpack, detail: concat("expected length specification (0xA0-0xBF, 0xD9-0xDB); last byte: 0x", last_token), context: "string"), nullptr));
1645 }
1646 }
1647 }
1648
1649 /*!
1650 @brief reads a MessagePack byte array
1651
1652 This function first reads starting bytes to determine the expected
1653 byte array length and then copies this number of bytes into a byte array.
1654
1655 @param[out] result created byte array
1656
1657 @return whether byte array creation completed
1658 */
1659 bool get_msgpack_binary(binary_t& result)
1660 {
1661 // helper function to set the subtype
1662 auto assign_and_return_true = [&result](std::int8_t subtype)
1663 {
1664 result.set_subtype(static_cast<std::uint8_t>(subtype));
1665 return true;
1666 };
1667
1668 switch (current)
1669 {
1670 case 0xC4: // bin 8
1671 {
1672 std::uint8_t len{};
1673 return get_number(input_format_t::msgpack, len) &&
1674 get_binary(input_format_t::msgpack, len, result);
1675 }
1676
1677 case 0xC5: // bin 16
1678 {
1679 std::uint16_t len{};
1680 return get_number(input_format_t::msgpack, len) &&
1681 get_binary(input_format_t::msgpack, len, result);
1682 }
1683
1684 case 0xC6: // bin 32
1685 {
1686 std::uint32_t len{};
1687 return get_number(input_format_t::msgpack, len) &&
1688 get_binary(input_format_t::msgpack, len, result);
1689 }
1690
1691 case 0xC7: // ext 8
1692 {
1693 std::uint8_t len{};
1694 std::int8_t subtype{};
1695 return get_number(input_format_t::msgpack, len) &&
1696 get_number(input_format_t::msgpack, subtype) &&
1697 get_binary(input_format_t::msgpack, len, result) &&
1698 assign_and_return_true(subtype);
1699 }
1700
1701 case 0xC8: // ext 16
1702 {
1703 std::uint16_t len{};
1704 std::int8_t subtype{};
1705 return get_number(input_format_t::msgpack, len) &&
1706 get_number(input_format_t::msgpack, subtype) &&
1707 get_binary(input_format_t::msgpack, len, result) &&
1708 assign_and_return_true(subtype);
1709 }
1710
1711 case 0xC9: // ext 32
1712 {
1713 std::uint32_t len{};
1714 std::int8_t subtype{};
1715 return get_number(input_format_t::msgpack, len) &&
1716 get_number(input_format_t::msgpack, subtype) &&
1717 get_binary(input_format_t::msgpack, len, result) &&
1718 assign_and_return_true(subtype);
1719 }
1720
1721 case 0xD4: // fixext 1
1722 {
1723 std::int8_t subtype{};
1724 return get_number(input_format_t::msgpack, subtype) &&
1725 get_binary(input_format_t::msgpack, 1, result) &&
1726 assign_and_return_true(subtype);
1727 }
1728
1729 case 0xD5: // fixext 2
1730 {
1731 std::int8_t subtype{};
1732 return get_number(input_format_t::msgpack, subtype) &&
1733 get_binary(input_format_t::msgpack, 2, result) &&
1734 assign_and_return_true(subtype);
1735 }
1736
1737 case 0xD6: // fixext 4
1738 {
1739 std::int8_t subtype{};
1740 return get_number(input_format_t::msgpack, subtype) &&
1741 get_binary(input_format_t::msgpack, 4, result) &&
1742 assign_and_return_true(subtype);
1743 }
1744
1745 case 0xD7: // fixext 8
1746 {
1747 std::int8_t subtype{};
1748 return get_number(input_format_t::msgpack, subtype) &&
1749 get_binary(input_format_t::msgpack, 8, result) &&
1750 assign_and_return_true(subtype);
1751 }
1752
1753 case 0xD8: // fixext 16
1754 {
1755 std::int8_t subtype{};
1756 return get_number(input_format_t::msgpack, subtype) &&
1757 get_binary(input_format_t::msgpack, 16, result) &&
1758 assign_and_return_true(subtype);
1759 }
1760
1761 default: // LCOV_EXCL_LINE
1762 return false; // LCOV_EXCL_LINE
1763 }
1764 }
1765
1766 /*!
1767 @param[in] len the length of the array
1768 @return whether array creation completed
1769 */
1770 bool get_msgpack_array(const std::size_t len)
1771 {
1772 if (JSON_HEDLEY_UNLIKELY(!sax->start_array(len)))
1773 {
1774 return false;
1775 }
1776
1777 for (std::size_t i = 0; i < len; ++i)
1778 {
1779 if (JSON_HEDLEY_UNLIKELY(!parse_msgpack_internal()))
1780 {
1781 return false;
1782 }
1783 }
1784
1785 return sax->end_array();
1786 }
1787
1788 /*!
1789 @param[in] len the length of the object
1790 @return whether object creation completed
1791 */
1792 bool get_msgpack_object(const std::size_t len)
1793 {
1794 if (JSON_HEDLEY_UNLIKELY(!sax->start_object(len)))
1795 {
1796 return false;
1797 }
1798
1799 string_t key;
1800 for (std::size_t i = 0; i < len; ++i)
1801 {
1802 get();
1803 if (JSON_HEDLEY_UNLIKELY(!get_msgpack_string(key) || !sax->key(key)))
1804 {
1805 return false;
1806 }
1807
1808 if (JSON_HEDLEY_UNLIKELY(!parse_msgpack_internal()))
1809 {
1810 return false;
1811 }
1812 key.clear();
1813 }
1814
1815 return sax->end_object();
1816 }
1817
1818 ////////////
1819 // UBJSON //
1820 ////////////
1821
1822 /*!
1823 @param[in] get_char whether a new character should be retrieved from the
1824 input (true, default) or whether the last read
1825 character should be considered instead
1826
1827 @return whether a valid UBJSON value was passed to the SAX parser
1828 */
1829 bool parse_ubjson_internal(const bool get_char = true)
1830 {
1831 return get_ubjson_value(prefix: get_char ? get_ignore_noop() : current);
1832 }
1833
1834 /*!
1835 @brief reads a UBJSON string
1836
1837 This function is either called after reading the 'S' byte explicitly
1838 indicating a string, or in case of an object key where the 'S' byte can be
1839 left out.
1840
1841 @param[out] result created string
1842 @param[in] get_char whether a new character should be retrieved from the
1843 input (true, default) or whether the last read
1844 character should be considered instead
1845
1846 @return whether string creation completed
1847 */
1848 bool get_ubjson_string(string_t& result, const bool get_char = true)
1849 {
1850 if (get_char)
1851 {
1852 get(); // TODO(niels): may we ignore N here?
1853 }
1854
1855 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "value")))
1856 {
1857 return false;
1858 }
1859
1860 switch (current)
1861 {
1862 case 'U':
1863 {
1864 std::uint8_t len{};
1865 return get_number(input_format, len) && get_string(input_format, len, result);
1866 }
1867
1868 case 'i':
1869 {
1870 std::int8_t len{};
1871 return get_number(input_format, len) && get_string(input_format, len, result);
1872 }
1873
1874 case 'I':
1875 {
1876 std::int16_t len{};
1877 return get_number(input_format, len) && get_string(input_format, len, result);
1878 }
1879
1880 case 'l':
1881 {
1882 std::int32_t len{};
1883 return get_number(input_format, len) && get_string(input_format, len, result);
1884 }
1885
1886 case 'L':
1887 {
1888 std::int64_t len{};
1889 return get_number(input_format, len) && get_string(input_format, len, result);
1890 }
1891
1892 case 'u':
1893 {
1894 if (input_format != input_format_t::bjdata)
1895 {
1896 break;
1897 }
1898 std::uint16_t len{};
1899 return get_number(input_format, len) && get_string(input_format, len, result);
1900 }
1901
1902 case 'm':
1903 {
1904 if (input_format != input_format_t::bjdata)
1905 {
1906 break;
1907 }
1908 std::uint32_t len{};
1909 return get_number(input_format, len) && get_string(input_format, len, result);
1910 }
1911
1912 case 'M':
1913 {
1914 if (input_format != input_format_t::bjdata)
1915 {
1916 break;
1917 }
1918 std::uint64_t len{};
1919 return get_number(input_format, len) && get_string(input_format, len, result);
1920 }
1921
1922 default:
1923 break;
1924 }
1925 auto last_token = get_token_string();
1926 std::string message;
1927
1928 if (input_format != input_format_t::bjdata)
1929 {
1930 message = "expected length type specification (U, i, I, l, L); last byte: 0x" + last_token;
1931 }
1932 else
1933 {
1934 message = "expected length type specification (U, i, u, I, m, l, M, L); last byte: 0x" + last_token;
1935 }
1936 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(format: input_format, detail: message, context: "string"), nullptr));
1937 }
1938
1939 /*!
1940 @param[out] dim an integer vector storing the ND array dimensions
1941 @return whether reading ND array size vector is successful
1942 */
1943 bool get_ubjson_ndarray_size(std::vector<size_t>& dim)
1944 {
1945 std::pair<std::size_t, char_int_type> size_and_type;
1946 size_t dimlen = 0;
1947 bool no_ndarray = true;
1948
1949 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type, no_ndarray)))
1950 {
1951 return false;
1952 }
1953
1954 if (size_and_type.first != npos)
1955 {
1956 if (size_and_type.second != 0)
1957 {
1958 if (size_and_type.second != 'N')
1959 {
1960 for (std::size_t i = 0; i < size_and_type.first; ++i)
1961 {
1962 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, no_ndarray, size_and_type.second)))
1963 {
1964 return false;
1965 }
1966 dim.push_back(x: dimlen);
1967 }
1968 }
1969 }
1970 else
1971 {
1972 for (std::size_t i = 0; i < size_and_type.first; ++i)
1973 {
1974 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, no_ndarray)))
1975 {
1976 return false;
1977 }
1978 dim.push_back(x: dimlen);
1979 }
1980 }
1981 }
1982 else
1983 {
1984 while (current != ']')
1985 {
1986 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, no_ndarray, current)))
1987 {
1988 return false;
1989 }
1990 dim.push_back(x: dimlen);
1991 get_ignore_noop();
1992 }
1993 }
1994 return true;
1995 }
1996
1997 /*!
1998 @param[out] result determined size
1999 @param[in,out] is_ndarray for input, `true` means already inside an ndarray vector
2000 or ndarray dimension is not allowed; `false` means ndarray
2001 is allowed; for output, `true` means an ndarray is found;
2002 is_ndarray can only return `true` when its initial value
2003 is `false`
2004 @param[in] prefix type marker if already read, otherwise set to 0
2005
2006 @return whether size determination completed
2007 */
2008 bool get_ubjson_size_value(std::size_t& result, bool& is_ndarray, char_int_type prefix = 0)
2009 {
2010 if (prefix == 0)
2011 {
2012 prefix = get_ignore_noop();
2013 }
2014
2015 switch (prefix)
2016 {
2017 case 'U':
2018 {
2019 std::uint8_t number{};
2020 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number)))
2021 {
2022 return false;
2023 }
2024 result = static_cast<std::size_t>(number);
2025 return true;
2026 }
2027
2028 case 'i':
2029 {
2030 std::int8_t number{};
2031 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number)))
2032 {
2033 return false;
2034 }
2035 if (number < 0)
2036 {
2037 return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read,
2038 exception_message(format: input_format, detail: "count in an optimized container must be positive", context: "size"), nullptr));
2039 }
2040 result = static_cast<std::size_t>(number); // NOLINT(bugprone-signed-char-misuse,cert-str34-c): number is not a char
2041 return true;
2042 }
2043
2044 case 'I':
2045 {
2046 std::int16_t number{};
2047 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number)))
2048 {
2049 return false;
2050 }
2051 if (number < 0)
2052 {
2053 return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read,
2054 exception_message(format: input_format, detail: "count in an optimized container must be positive", context: "size"), nullptr));
2055 }
2056 result = static_cast<std::size_t>(number);
2057 return true;
2058 }
2059
2060 case 'l':
2061 {
2062 std::int32_t number{};
2063 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number)))
2064 {
2065 return false;
2066 }
2067 if (number < 0)
2068 {
2069 return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read,
2070 exception_message(format: input_format, detail: "count in an optimized container must be positive", context: "size"), nullptr));
2071 }
2072 result = static_cast<std::size_t>(number);
2073 return true;
2074 }
2075
2076 case 'L':
2077 {
2078 std::int64_t number{};
2079 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number)))
2080 {
2081 return false;
2082 }
2083 if (number < 0)
2084 {
2085 return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read,
2086 exception_message(format: input_format, detail: "count in an optimized container must be positive", context: "size"), nullptr));
2087 }
2088 if (!value_in_range_of<std::size_t>(val: number))
2089 {
2090 return sax->parse_error(chars_read, get_token_string(), out_of_range::create(408,
2091 exception_message(format: input_format, detail: "integer value overflow", context: "size"), nullptr));
2092 }
2093 result = static_cast<std::size_t>(number);
2094 return true;
2095 }
2096
2097 case 'u':
2098 {
2099 if (input_format != input_format_t::bjdata)
2100 {
2101 break;
2102 }
2103 std::uint16_t number{};
2104 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number)))
2105 {
2106 return false;
2107 }
2108 result = static_cast<std::size_t>(number);
2109 return true;
2110 }
2111
2112 case 'm':
2113 {
2114 if (input_format != input_format_t::bjdata)
2115 {
2116 break;
2117 }
2118 std::uint32_t number{};
2119 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number)))
2120 {
2121 return false;
2122 }
2123 result = conditional_static_cast<std::size_t>(value: number);
2124 return true;
2125 }
2126
2127 case 'M':
2128 {
2129 if (input_format != input_format_t::bjdata)
2130 {
2131 break;
2132 }
2133 std::uint64_t number{};
2134 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number)))
2135 {
2136 return false;
2137 }
2138 if (!value_in_range_of<std::size_t>(val: number))
2139 {
2140 return sax->parse_error(chars_read, get_token_string(), out_of_range::create(408,
2141 exception_message(format: input_format, detail: "integer value overflow", context: "size"), nullptr));
2142 }
2143 result = detail::conditional_static_cast<std::size_t>(value: number);
2144 return true;
2145 }
2146
2147 case '[':
2148 {
2149 if (input_format != input_format_t::bjdata)
2150 {
2151 break;
2152 }
2153 if (is_ndarray) // ndarray dimensional vector can only contain integers, and can not embed another array
2154 {
2155 return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read, exception_message(format: input_format, detail: "ndarray dimensional vector is not allowed", context: "size"), nullptr));
2156 }
2157 std::vector<size_t> dim;
2158 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_ndarray_size(dim)))
2159 {
2160 return false;
2161 }
2162 if (dim.size() == 1 || (dim.size() == 2 && dim.at(n: 0) == 1)) // return normal array size if 1D row vector
2163 {
2164 result = dim.at(n: dim.size() - 1);
2165 return true;
2166 }
2167 if (!dim.empty()) // if ndarray, convert to an object in JData annotated array format
2168 {
2169 for (auto i : dim) // test if any dimension in an ndarray is 0, if so, return a 1D empty container
2170 {
2171 if ( i == 0 )
2172 {
2173 result = 0;
2174 return true;
2175 }
2176 }
2177
2178 string_t key = "_ArraySize_";
2179 if (JSON_HEDLEY_UNLIKELY(!sax->start_object(3) || !sax->key(key) || !sax->start_array(dim.size())))
2180 {
2181 return false;
2182 }
2183 result = 1;
2184 for (auto i : dim)
2185 {
2186 result *= i;
2187 if (result == 0 || result == npos) // because dim elements shall not have zeros, result = 0 means overflow happened; it also can't be npos as it is used to initialize size in get_ubjson_size_type()
2188 {
2189 return sax->parse_error(chars_read, get_token_string(), out_of_range::create(408, exception_message(format: input_format, detail: "excessive ndarray size caused overflow", context: "size"), nullptr));
2190 }
2191 if (JSON_HEDLEY_UNLIKELY(!sax->number_unsigned(static_cast<number_unsigned_t>(i))))
2192 {
2193 return false;
2194 }
2195 }
2196 is_ndarray = true;
2197 return sax->end_array();
2198 }
2199 result = 0;
2200 return true;
2201 }
2202
2203 default:
2204 break;
2205 }
2206 auto last_token = get_token_string();
2207 std::string message;
2208
2209 if (input_format != input_format_t::bjdata)
2210 {
2211 message = "expected length type specification (U, i, I, l, L) after '#'; last byte: 0x" + last_token;
2212 }
2213 else
2214 {
2215 message = "expected length type specification (U, i, u, I, m, l, M, L) after '#'; last byte: 0x" + last_token;
2216 }
2217 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(format: input_format, detail: message, context: "size"), nullptr));
2218 }
2219
2220 /*!
2221 @brief determine the type and size for a container
2222
2223 In the optimized UBJSON format, a type and a size can be provided to allow
2224 for a more compact representation.
2225
2226 @param[out] result pair of the size and the type
2227 @param[in] inside_ndarray whether the parser is parsing an ND array dimensional vector
2228
2229 @return whether pair creation completed
2230 */
2231 bool get_ubjson_size_type(std::pair<std::size_t, char_int_type>& result, bool inside_ndarray = false)
2232 {
2233 result.first = npos; // size
2234 result.second = 0; // type
2235 bool is_ndarray = false;
2236
2237 get_ignore_noop();
2238
2239 if (current == '$')
2240 {
2241 result.second = get(); // must not ignore 'N', because 'N' maybe the type
2242 if (input_format == input_format_t::bjdata
2243 && JSON_HEDLEY_UNLIKELY(std::binary_search(bjd_optimized_type_markers.begin(), bjd_optimized_type_markers.end(), result.second)))
2244 {
2245 auto last_token = get_token_string();
2246 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
2247 exception_message(format: input_format, detail: concat("marker 0x", last_token, " is not a permitted optimized array type"), context: "type"), nullptr));
2248 }
2249
2250 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "type")))
2251 {
2252 return false;
2253 }
2254
2255 get_ignore_noop();
2256 if (JSON_HEDLEY_UNLIKELY(current != '#'))
2257 {
2258 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "value")))
2259 {
2260 return false;
2261 }
2262 auto last_token = get_token_string();
2263 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
2264 exception_message(format: input_format, detail: concat("expected '#' after type information; last byte: 0x", last_token), context: "size"), nullptr));
2265 }
2266
2267 const bool is_error = get_ubjson_size_value(result&: result.first, is_ndarray);
2268 if (input_format == input_format_t::bjdata && is_ndarray)
2269 {
2270 if (inside_ndarray)
2271 {
2272 return sax->parse_error(chars_read, get_token_string(), parse_error::create(112, chars_read,
2273 exception_message(format: input_format, detail: "ndarray can not be recursive", context: "size"), nullptr));
2274 }
2275 result.second |= (1 << 8); // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters
2276 }
2277 return is_error;
2278 }
2279
2280 if (current == '#')
2281 {
2282 const bool is_error = get_ubjson_size_value(result&: result.first, is_ndarray);
2283 if (input_format == input_format_t::bjdata && is_ndarray)
2284 {
2285 return sax->parse_error(chars_read, get_token_string(), parse_error::create(112, chars_read,
2286 exception_message(format: input_format, detail: "ndarray requires both type and size", context: "size"), nullptr));
2287 }
2288 return is_error;
2289 }
2290
2291 return true;
2292 }
2293
2294 /*!
2295 @param prefix the previously read or set type prefix
2296 @return whether value creation completed
2297 */
2298 bool get_ubjson_value(const char_int_type prefix)
2299 {
2300 switch (prefix)
2301 {
2302 case char_traits<char_type>::eof(): // EOF
2303 return unexpect_eof(format: input_format, context: "value");
2304
2305 case 'T': // true
2306 return sax->boolean(true);
2307 case 'F': // false
2308 return sax->boolean(false);
2309
2310 case 'Z': // null
2311 return sax->null();
2312
2313 case 'U':
2314 {
2315 std::uint8_t number{};
2316 return get_number(input_format, number) && sax->number_unsigned(number);
2317 }
2318
2319 case 'i':
2320 {
2321 std::int8_t number{};
2322 return get_number(input_format, number) && sax->number_integer(number);
2323 }
2324
2325 case 'I':
2326 {
2327 std::int16_t number{};
2328 return get_number(input_format, number) && sax->number_integer(number);
2329 }
2330
2331 case 'l':
2332 {
2333 std::int32_t number{};
2334 return get_number(input_format, number) && sax->number_integer(number);
2335 }
2336
2337 case 'L':
2338 {
2339 std::int64_t number{};
2340 return get_number(input_format, number) && sax->number_integer(number);
2341 }
2342
2343 case 'u':
2344 {
2345 if (input_format != input_format_t::bjdata)
2346 {
2347 break;
2348 }
2349 std::uint16_t number{};
2350 return get_number(input_format, number) && sax->number_unsigned(number);
2351 }
2352
2353 case 'm':
2354 {
2355 if (input_format != input_format_t::bjdata)
2356 {
2357 break;
2358 }
2359 std::uint32_t number{};
2360 return get_number(input_format, number) && sax->number_unsigned(number);
2361 }
2362
2363 case 'M':
2364 {
2365 if (input_format != input_format_t::bjdata)
2366 {
2367 break;
2368 }
2369 std::uint64_t number{};
2370 return get_number(input_format, number) && sax->number_unsigned(number);
2371 }
2372
2373 case 'h':
2374 {
2375 if (input_format != input_format_t::bjdata)
2376 {
2377 break;
2378 }
2379 const auto byte1_raw = get();
2380 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "number")))
2381 {
2382 return false;
2383 }
2384 const auto byte2_raw = get();
2385 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "number")))
2386 {
2387 return false;
2388 }
2389
2390 const auto byte1 = static_cast<unsigned char>(byte1_raw);
2391 const auto byte2 = static_cast<unsigned char>(byte2_raw);
2392
2393 // code from RFC 7049, Appendix D, Figure 3:
2394 // As half-precision floating-point numbers were only added
2395 // to IEEE 754 in 2008, today's programming platforms often
2396 // still only have limited support for them. It is very
2397 // easy to include at least decoding support for them even
2398 // without such support. An example of a small decoder for
2399 // half-precision floating-point numbers in the C language
2400 // is shown in Fig. 3.
2401 const auto half = static_cast<unsigned int>((byte2 << 8u) + byte1);
2402 const double val = [&half]
2403 {
2404 const int exp = (half >> 10u) & 0x1Fu;
2405 const unsigned int mant = half & 0x3FFu;
2406 JSON_ASSERT(0 <= exp&& exp <= 32);
2407 JSON_ASSERT(mant <= 1024);
2408 switch (exp)
2409 {
2410 case 0:
2411 return std::ldexp(x: mant, exp: -24);
2412 case 31:
2413 return (mant == 0)
2414 ? std::numeric_limits<double>::infinity()
2415 : std::numeric_limits<double>::quiet_NaN();
2416 default:
2417 return std::ldexp(x: mant + 1024, exp: exp - 25);
2418 }
2419 }();
2420 return sax->number_float((half & 0x8000u) != 0
2421 ? static_cast<number_float_t>(-val)
2422 : static_cast<number_float_t>(val), "");
2423 }
2424
2425 case 'd':
2426 {
2427 float number{};
2428 return get_number(input_format, number) && sax->number_float(static_cast<number_float_t>(number), "");
2429 }
2430
2431 case 'D':
2432 {
2433 double number{};
2434 return get_number(input_format, number) && sax->number_float(static_cast<number_float_t>(number), "");
2435 }
2436
2437 case 'H':
2438 {
2439 return get_ubjson_high_precision_number();
2440 }
2441
2442 case 'C': // char
2443 {
2444 get();
2445 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "char")))
2446 {
2447 return false;
2448 }
2449 if (JSON_HEDLEY_UNLIKELY(current > 127))
2450 {
2451 auto last_token = get_token_string();
2452 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read,
2453 exception_message(format: input_format, detail: concat("byte after 'C' must be in range 0x00..0x7F; last byte: 0x", last_token), context: "char"), nullptr));
2454 }
2455 string_t s(1, static_cast<typename string_t::value_type>(current));
2456 return sax->string(s);
2457 }
2458
2459 case 'S': // string
2460 {
2461 string_t s;
2462 return get_ubjson_string(result&: s) && sax->string(s);
2463 }
2464
2465 case '[': // array
2466 return get_ubjson_array();
2467
2468 case '{': // object
2469 return get_ubjson_object();
2470
2471 default: // anything else
2472 break;
2473 }
2474 auto last_token = get_token_string();
2475 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(format: input_format, detail: "invalid byte: 0x" + last_token, context: "value"), nullptr));
2476 }
2477
2478 /*!
2479 @return whether array creation completed
2480 */
2481 bool get_ubjson_array()
2482 {
2483 std::pair<std::size_t, char_int_type> size_and_type;
2484 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
2485 {
2486 return false;
2487 }
2488
2489 // if bit-8 of size_and_type.second is set to 1, encode bjdata ndarray as an object in JData annotated array format (https://github.com/NeuroJSON/jdata):
2490 // {"_ArrayType_" : "typeid", "_ArraySize_" : [n1, n2, ...], "_ArrayData_" : [v1, v2, ...]}
2491
2492 if (input_format == input_format_t::bjdata && size_and_type.first != npos && (size_and_type.second & (1 << 8)) != 0)
2493 {
2494 size_and_type.second &= ~(static_cast<char_int_type>(1) << 8); // use bit 8 to indicate ndarray, here we remove the bit to restore the type marker
2495 auto it = std::lower_bound(bjd_types_map.begin(), bjd_types_map.end(), size_and_type.second, [](const bjd_type & p, char_int_type t)
2496 {
2497 return p.first < t;
2498 });
2499 string_t key = "_ArrayType_";
2500 if (JSON_HEDLEY_UNLIKELY(it == bjd_types_map.end() || it->first != size_and_type.second))
2501 {
2502 auto last_token = get_token_string();
2503 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
2504 exception_message(format: input_format, detail: "invalid byte: 0x" + last_token, context: "type"), nullptr));
2505 }
2506
2507 string_t type = it->second; // sax->string() takes a reference
2508 if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->string(type)))
2509 {
2510 return false;
2511 }
2512
2513 if (size_and_type.second == 'C')
2514 {
2515 size_and_type.second = 'U';
2516 }
2517
2518 key = "_ArrayData_";
2519 if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->start_array(size_and_type.first) ))
2520 {
2521 return false;
2522 }
2523
2524 for (std::size_t i = 0; i < size_and_type.first; ++i)
2525 {
2526 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second)))
2527 {
2528 return false;
2529 }
2530 }
2531
2532 return (sax->end_array() && sax->end_object());
2533 }
2534
2535 if (size_and_type.first != npos)
2536 {
2537 if (JSON_HEDLEY_UNLIKELY(!sax->start_array(size_and_type.first)))
2538 {
2539 return false;
2540 }
2541
2542 if (size_and_type.second != 0)
2543 {
2544 if (size_and_type.second != 'N')
2545 {
2546 for (std::size_t i = 0; i < size_and_type.first; ++i)
2547 {
2548 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second)))
2549 {
2550 return false;
2551 }
2552 }
2553 }
2554 }
2555 else
2556 {
2557 for (std::size_t i = 0; i < size_and_type.first; ++i)
2558 {
2559 if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
2560 {
2561 return false;
2562 }
2563 }
2564 }
2565 }
2566 else
2567 {
2568 if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast<std::size_t>(-1))))
2569 {
2570 return false;
2571 }
2572
2573 while (current != ']')
2574 {
2575 if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal(false)))
2576 {
2577 return false;
2578 }
2579 get_ignore_noop();
2580 }
2581 }
2582
2583 return sax->end_array();
2584 }
2585
2586 /*!
2587 @return whether object creation completed
2588 */
2589 bool get_ubjson_object()
2590 {
2591 std::pair<std::size_t, char_int_type> size_and_type;
2592 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
2593 {
2594 return false;
2595 }
2596
2597 // do not accept ND-array size in objects in BJData
2598 if (input_format == input_format_t::bjdata && size_and_type.first != npos && (size_and_type.second & (1 << 8)) != 0)
2599 {
2600 auto last_token = get_token_string();
2601 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
2602 exception_message(format: input_format, detail: "BJData object does not support ND-array size in optimized format", context: "object"), nullptr));
2603 }
2604
2605 string_t key;
2606 if (size_and_type.first != npos)
2607 {
2608 if (JSON_HEDLEY_UNLIKELY(!sax->start_object(size_and_type.first)))
2609 {
2610 return false;
2611 }
2612
2613 if (size_and_type.second != 0)
2614 {
2615 for (std::size_t i = 0; i < size_and_type.first; ++i)
2616 {
2617 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key)))
2618 {
2619 return false;
2620 }
2621 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second)))
2622 {
2623 return false;
2624 }
2625 key.clear();
2626 }
2627 }
2628 else
2629 {
2630 for (std::size_t i = 0; i < size_and_type.first; ++i)
2631 {
2632 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key)))
2633 {
2634 return false;
2635 }
2636 if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
2637 {
2638 return false;
2639 }
2640 key.clear();
2641 }
2642 }
2643 }
2644 else
2645 {
2646 if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast<std::size_t>(-1))))
2647 {
2648 return false;
2649 }
2650
2651 while (current != '}')
2652 {
2653 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key, false) || !sax->key(key)))
2654 {
2655 return false;
2656 }
2657 if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
2658 {
2659 return false;
2660 }
2661 get_ignore_noop();
2662 key.clear();
2663 }
2664 }
2665
2666 return sax->end_object();
2667 }
2668
2669 // Note, no reader for UBJSON binary types is implemented because they do
2670 // not exist
2671
2672 bool get_ubjson_high_precision_number()
2673 {
2674 // get size of following number string
2675 std::size_t size{};
2676 bool no_ndarray = true;
2677 auto res = get_ubjson_size_value(result&: size, is_ndarray&: no_ndarray);
2678 if (JSON_HEDLEY_UNLIKELY(!res))
2679 {
2680 return res;
2681 }
2682
2683 // get number string
2684 std::vector<char> number_vector;
2685 for (std::size_t i = 0; i < size; ++i)
2686 {
2687 get();
2688 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "number")))
2689 {
2690 return false;
2691 }
2692 number_vector.push_back(x: static_cast<char>(current));
2693 }
2694
2695 // parse number string
2696 using ia_type = decltype(detail::input_adapter(container: number_vector));
2697 auto number_lexer = detail::lexer<BasicJsonType, ia_type>(detail::input_adapter(container: number_vector), false);
2698 const auto result_number = number_lexer.scan();
2699 const auto number_string = number_lexer.get_token_string();
2700 const auto result_remainder = number_lexer.scan();
2701
2702 using token_type = typename detail::lexer_base<BasicJsonType>::token_type;
2703
2704 if (JSON_HEDLEY_UNLIKELY(result_remainder != token_type::end_of_input))
2705 {
2706 return sax->parse_error(chars_read, number_string, parse_error::create(115, chars_read,
2707 exception_message(format: input_format, detail: concat("invalid number text: ", number_lexer.get_token_string()), context: "high-precision number"), nullptr));
2708 }
2709
2710 switch (result_number)
2711 {
2712 case token_type::value_integer:
2713 return sax->number_integer(number_lexer.get_number_integer());
2714 case token_type::value_unsigned:
2715 return sax->number_unsigned(number_lexer.get_number_unsigned());
2716 case token_type::value_float:
2717 return sax->number_float(number_lexer.get_number_float(), std::move(number_string));
2718 case token_type::uninitialized:
2719 case token_type::literal_true:
2720 case token_type::literal_false:
2721 case token_type::literal_null:
2722 case token_type::value_string:
2723 case token_type::begin_array:
2724 case token_type::begin_object:
2725 case token_type::end_array:
2726 case token_type::end_object:
2727 case token_type::name_separator:
2728 case token_type::value_separator:
2729 case token_type::parse_error:
2730 case token_type::end_of_input:
2731 case token_type::literal_or_value:
2732 default:
2733 return sax->parse_error(chars_read, number_string, parse_error::create(115, chars_read,
2734 exception_message(format: input_format, detail: concat("invalid number text: ", number_lexer.get_token_string()), context: "high-precision number"), nullptr));
2735 }
2736 }
2737
2738 ///////////////////////
2739 // Utility functions //
2740 ///////////////////////
2741
2742 /*!
2743 @brief get next character from the input
2744
2745 This function provides the interface to the used input adapter. It does
2746 not throw in case the input reached EOF, but returns a -'ve valued
2747 `char_traits<char_type>::eof()` in that case.
2748
2749 @return character read from the input
2750 */
2751 char_int_type get()
2752 {
2753 ++chars_read;
2754 return current = ia.get_character();
2755 }
2756
2757 /*!
2758 @return character read from the input after ignoring all 'N' entries
2759 */
2760 char_int_type get_ignore_noop()
2761 {
2762 do
2763 {
2764 get();
2765 }
2766 while (current == 'N');
2767
2768 return current;
2769 }
2770
2771 /*
2772 @brief read a number from the input
2773
2774 @tparam NumberType the type of the number
2775 @param[in] format the current format (for diagnostics)
2776 @param[out] result number of type @a NumberType
2777
2778 @return whether conversion completed
2779
2780 @note This function needs to respect the system's endianness, because
2781 bytes in CBOR, MessagePack, and UBJSON are stored in network order
2782 (big endian) and therefore need reordering on little endian systems.
2783 On the other hand, BSON and BJData use little endian and should reorder
2784 on big endian systems.
2785 */
2786 template<typename NumberType, bool InputIsLittleEndian = false>
2787 bool get_number(const input_format_t format, NumberType& result)
2788 {
2789 // step 1: read input into array with system's byte order
2790 std::array<std::uint8_t, sizeof(NumberType)> vec{};
2791 for (std::size_t i = 0; i < sizeof(NumberType); ++i)
2792 {
2793 get();
2794 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "number")))
2795 {
2796 return false;
2797 }
2798
2799 // reverse byte order prior to conversion if necessary
2800 if (is_little_endian != (InputIsLittleEndian || format == input_format_t::bjdata))
2801 {
2802 vec[sizeof(NumberType) - i - 1] = static_cast<std::uint8_t>(current);
2803 }
2804 else
2805 {
2806 vec[i] = static_cast<std::uint8_t>(current); // LCOV_EXCL_LINE
2807 }
2808 }
2809
2810 // step 2: convert array into number of type T and return
2811 std::memcpy(dest: &result, src: vec.data(), n: sizeof(NumberType));
2812 return true;
2813 }
2814
2815 /*!
2816 @brief create a string by reading characters from the input
2817
2818 @tparam NumberType the type of the number
2819 @param[in] format the current format (for diagnostics)
2820 @param[in] len number of characters to read
2821 @param[out] result string created by reading @a len bytes
2822
2823 @return whether string creation completed
2824
2825 @note We can not reserve @a len bytes for the result, because @a len
2826 may be too large. Usually, @ref unexpect_eof() detects the end of
2827 the input before we run out of string memory.
2828 */
2829 template<typename NumberType>
2830 bool get_string(const input_format_t format,
2831 const NumberType len,
2832 string_t& result)
2833 {
2834 bool success = true;
2835 for (NumberType i = 0; i < len; i++)
2836 {
2837 get();
2838 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "string")))
2839 {
2840 success = false;
2841 break;
2842 }
2843 result.push_back(static_cast<typename string_t::value_type>(current));
2844 }
2845 return success;
2846 }
2847
2848 /*!
2849 @brief create a byte array by reading bytes from the input
2850
2851 @tparam NumberType the type of the number
2852 @param[in] format the current format (for diagnostics)
2853 @param[in] len number of bytes to read
2854 @param[out] result byte array created by reading @a len bytes
2855
2856 @return whether byte array creation completed
2857
2858 @note We can not reserve @a len bytes for the result, because @a len
2859 may be too large. Usually, @ref unexpect_eof() detects the end of
2860 the input before we run out of memory.
2861 */
2862 template<typename NumberType>
2863 bool get_binary(const input_format_t format,
2864 const NumberType len,
2865 binary_t& result)
2866 {
2867 bool success = true;
2868 for (NumberType i = 0; i < len; i++)
2869 {
2870 get();
2871 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "binary")))
2872 {
2873 success = false;
2874 break;
2875 }
2876 result.push_back(static_cast<std::uint8_t>(current));
2877 }
2878 return success;
2879 }
2880
2881 /*!
2882 @param[in] format the current format (for diagnostics)
2883 @param[in] context further context information (for diagnostics)
2884 @return whether the last read character is not EOF
2885 */
2886 JSON_HEDLEY_NON_NULL(3)
2887 bool unexpect_eof(const input_format_t format, const char* context) const
2888 {
2889 if (JSON_HEDLEY_UNLIKELY(current == char_traits<char_type>::eof()))
2890 {
2891 return sax->parse_error(chars_read, "<end of file>",
2892 parse_error::create(110, chars_read, exception_message(format, detail: "unexpected end of input", context), nullptr));
2893 }
2894 return true;
2895 }
2896
2897 /*!
2898 @return a string representation of the last read byte
2899 */
2900 std::string get_token_string() const
2901 {
2902 std::array<char, 3> cr{._M_elems: {}};
2903 static_cast<void>((std::snprintf)(s: cr.data(), maxlen: cr.size(), format: "%.2hhX", static_cast<unsigned char>(current))); // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
2904 return std::string{cr.data()};
2905 }
2906
2907 /*!
2908 @param[in] format the current format
2909 @param[in] detail a detailed error message
2910 @param[in] context further context information
2911 @return a message string to use in the parse_error exceptions
2912 */
2913 std::string exception_message(const input_format_t format,
2914 const std::string& detail,
2915 const std::string& context) const
2916 {
2917 std::string error_msg = "syntax error while parsing ";
2918
2919 switch (format)
2920 {
2921 case input_format_t::cbor:
2922 error_msg += "CBOR";
2923 break;
2924
2925 case input_format_t::msgpack:
2926 error_msg += "MessagePack";
2927 break;
2928
2929 case input_format_t::ubjson:
2930 error_msg += "UBJSON";
2931 break;
2932
2933 case input_format_t::bson:
2934 error_msg += "BSON";
2935 break;
2936
2937 case input_format_t::bjdata:
2938 error_msg += "BJData";
2939 break;
2940
2941 case input_format_t::json: // LCOV_EXCL_LINE
2942 default: // LCOV_EXCL_LINE
2943 JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
2944 }
2945
2946 return concat(args&: error_msg, args: ' ', args: context, args: ": ", args: detail);
2947 }
2948
2949 private:
2950 static JSON_INLINE_VARIABLE constexpr std::size_t npos = static_cast<std::size_t>(-1);
2951
2952 /// input adapter
2953 InputAdapterType ia;
2954
2955 /// the current character
2956 char_int_type current = char_traits<char_type>::eof();
2957
2958 /// the number of characters read
2959 std::size_t chars_read = 0;
2960
2961 /// whether we can assume little endianness
2962 const bool is_little_endian = little_endianness();
2963
2964 /// input format
2965 const input_format_t input_format = input_format_t::json;
2966
2967 /// the SAX parser
2968 json_sax_t* sax = nullptr;
2969
2970 // excluded markers in bjdata optimized type
2971#define JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_ \
2972 make_array<char_int_type>('F', 'H', 'N', 'S', 'T', 'Z', '[', '{')
2973
2974#define JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_ \
2975 make_array<bjd_type>( \
2976 bjd_type{'C', "char"}, \
2977 bjd_type{'D', "double"}, \
2978 bjd_type{'I', "int16"}, \
2979 bjd_type{'L', "int64"}, \
2980 bjd_type{'M', "uint64"}, \
2981 bjd_type{'U', "uint8"}, \
2982 bjd_type{'d', "single"}, \
2983 bjd_type{'i', "int8"}, \
2984 bjd_type{'l', "int32"}, \
2985 bjd_type{'m', "uint32"}, \
2986 bjd_type{'u', "uint16"})
2987
2988 JSON_PRIVATE_UNLESS_TESTED:
2989 // lookup tables
2990 // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
2991 const decltype(JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_) bjd_optimized_type_markers =
2992 JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_;
2993
2994 using bjd_type = std::pair<char_int_type, string_t>;
2995 // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
2996 const decltype(JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_) bjd_types_map =
2997 JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_;
2998
2999#undef JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_
3000#undef JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_
3001};
3002
3003#ifndef JSON_HAS_CPP_17
3004 template<typename BasicJsonType, typename InputAdapterType, typename SAX>
3005 constexpr std::size_t binary_reader<BasicJsonType, InputAdapterType, SAX>::npos;
3006#endif
3007
3008} // namespace detail
3009NLOHMANN_JSON_NAMESPACE_END
3010