parser.hpp source code [include/nlohmann/detail/input/parser.hpp]

1	// __ _____ _____ _____
2	// __\| \| __\| \| \| \| JSON for Modern C++
3	// \| \| \|__ \| \| \| \| \| \| version 3.11.3
4	// \|_____\|_____\|_____\|_\|___\| https://github.com/nlohmann/json
5	//
6	// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann <https://nlohmann.me>
7	// SPDX-License-Identifier: MIT
8
9	#pragma once
10
11	#include <cmath> // isfinite
12	#include <cstdint> // uint8_t
13	#include <functional> // function
14	#include <string> // string
15	#include <utility> // move
16	#include <vector> // vector
17
18	#include <nlohmann/detail/exceptions.hpp>
19	#include <nlohmann/detail/input/input_adapters.hpp>
20	#include <nlohmann/detail/input/json_sax.hpp>
21	#include <nlohmann/detail/input/lexer.hpp>
22	#include <nlohmann/detail/macro_scope.hpp>
23	#include <nlohmann/detail/meta/is_sax.hpp>
24	#include <nlohmann/detail/string_concat.hpp>
25	#include <nlohmann/detail/value_t.hpp>
26
27	NLOHMANN_JSON_NAMESPACE_BEGIN
28	namespace detail
29	{
30	////////////
31	// parser //
32	////////////
33
34	enum class parse_event_t : std::uint8_t
35	{
36	/// the parser read `{` and started to process a JSON object
37	object_start,
38	/// the parser read `}` and finished processing a JSON object
39	object_end,
40	/// the parser read `[` and started to process a JSON array
41	array_start,
42	/// the parser read `]` and finished processing a JSON array
43	array_end,
44	/// the parser read a key of a value in an object
45	key,
46	/// the parser finished reading a JSON value
47	value
48	};
49
50	template<typename BasicJsonType>
51	using parser_callback_t =
52	std::function<bool(int /depth/, parse_event_t /event/, BasicJsonType& /parsed/)>;
53
54	/!*
55	@brief syntax analysis
56
57	This class implements a recursive descent parser.
58	*/
59	template<typename BasicJsonType, typename InputAdapterType>
60	class parser
61	{
62	using number_integer_t = typename BasicJsonType::number_integer_t;
63	using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
64	using number_float_t = typename BasicJsonType::number_float_t;
65	using string_t = typename BasicJsonType::string_t;
66	using lexer_t = lexer<BasicJsonType, InputAdapterType>;
67	using token_type = typename lexer_t::token_type;
68
69	public:
70	/// a parser reading from an input adapter
71	explicit parser(InputAdapterType&& adapter,
72	const parser_callback_t<BasicJsonType> cb = nullptr,
73	const bool allow_exceptions_ = true,
74	const bool skip_comments = false)
75	: callback(cb)
76	, m_lexer(std::move(adapter), skip_comments)
77	, allow_exceptions(allow_exceptions_)
78	{
79	// read first token
80	get_token();
81	}
82
83	/!*
84	@brief public parser interface
85
86	@param[in] strict whether to expect the last token to be EOF
87	@param[in,out] result parsed JSON value
88
89	@throw parse_error.101 in case of an unexpected token
90	@throw parse_error.102 if to_unicode fails or surrogate error
91	@throw parse_error.103 if to_unicode fails
92	*/
93	void parse(const bool strict, BasicJsonType& result)
94	{
95	if (callback)
96	{
97	json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions);
98	sax_parse_internal(&sdp);
99
100	// in strict mode, input must be completely read
101	if (strict && (get_token() != token_type::end_of_input))
102	{
103	sdp.parse_error(m_lexer.get_position(),
104	m_lexer.get_token_string(),
105	parse_error::create(`101`, m_lexer.get_position(),
106	exception_message(expected: token_type::end_of_input, context: "value"), nullptr));
107	}
108
109	// in case of an error, return discarded value
110	if (sdp.is_errored())
111	{
112	result = value_t::discarded;
113	return;
114	}
115
116	// set top-level value to null if it was discarded by the callback
117	// function
118	if (result.is_discarded())
119	{
120	result = nullptr;
121	}
122	}
123	else
124	{
125	json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions);
126	sax_parse_internal(&sdp);
127
128	// in strict mode, input must be completely read
129	if (strict && (get_token() != token_type::end_of_input))
130	{
131	sdp.parse_error(m_lexer.get_position(),
132	m_lexer.get_token_string(),
133	parse_error::create(`101`, m_lexer.get_position(), exception_message(expected: token_type::end_of_input, context: "value"), nullptr));
134	}
135
136	// in case of an error, return discarded value
137	if (sdp.is_errored())
138	{
139	result = value_t::discarded;
140	return;
141	}
142	}
143
144	result.assert_invariant();
145	}
146
147	/!*
148	@brief public accept interface
149
150	@param[in] strict whether to expect the last token to be EOF
151	@return whether the input is a proper JSON text
152	*/
153	bool accept(const bool strict = true)
154	{
155	json_sax_acceptor<BasicJsonType> sax_acceptor;
156	return sax_parse(&sax_acceptor, strict);
157	}
158
159	template<typename SAX>
160	JSON_HEDLEY_NON_NULL(`2`)
161	bool sax_parse(SAX* sax, const bool strict = true)
162	{
163	(void)detail::is_sax_static_asserts<SAX, BasicJsonType> {};
164	const bool result = sax_parse_internal(sax);
165
166	// strict mode: next byte must be EOF
167	if (result && strict && (get_token() != token_type::end_of_input))
168	{
169	return sax->parse_error(m_lexer.get_position(),
170	m_lexer.get_token_string(),
171	parse_error::create(`101`, m_lexer.get_position(), exception_message(expected: token_type::end_of_input, context: "value"), nullptr));
172	}
173
174	return result;
175	}
176
177	private:
178	template<typename SAX>
179	JSON_HEDLEY_NON_NULL(`2`)
180	bool sax_parse_internal(SAX* sax)
181	{
182	// stack to remember the hierarchy of structured values we are parsing
183	// true = array; false = object
184	std::vector<bool> states;
185	// value to avoid a goto (see comment where set to true)
186	bool skip_to_state_evaluation = false;
187
188	while (true)
189	{
190	if (!skip_to_state_evaluation)
191	{
192	// invariant: get_token() was called before each iteration
193	switch (last_token)
194	{
195	case token_type::begin_object:
196	{
197	if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast<std::size_t>(-`1`))))
198	{
199	return false;
200	}
201
202	// closing } -> we are done
203	if (get_token() == token_type::end_object)
204	{
205	if (JSON_HEDLEY_UNLIKELY(!sax->end_object()))
206	{
207	return false;
208	}
209	break;
210	}
211
212	// parse key
213	if (JSON_HEDLEY_UNLIKELY(last_token != token_type::value_string))
214	{
215	return sax->parse_error(m_lexer.get_position(),
216	m_lexer.get_token_string(),
217	parse_error::create(`101`, m_lexer.get_position(), exception_message(expected: token_type::value_string, context: "object key"), nullptr));
218	}
219	if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string())))
220	{
221	return false;
222	}
223
224	// parse separator (:)
225	if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
226	{
227	return sax->parse_error(m_lexer.get_position(),
228	m_lexer.get_token_string(),
229	parse_error::create(`101`, m_lexer.get_position(), exception_message(expected: token_type::name_separator, context: "object separator"), nullptr));
230	}
231
232	// remember we are now inside an object
233	states.push_back(x: false);
234
235	// parse values
236	get_token();
237	continue;
238	}
239
240	case token_type::begin_array:
241	{
242	if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast<std::size_t>(-`1`))))
243	{
244	return false;
245	}
246
247	// closing ] -> we are done
248	if (get_token() == token_type::end_array)
249	{
250	if (JSON_HEDLEY_UNLIKELY(!sax->end_array()))
251	{
252	return false;
253	}
254	break;
255	}
256
257	// remember we are now inside an array
258	states.push_back(x: true);
259
260	// parse values (no need to call get_token)
261	continue;
262	}
263
264	case token_type::value_float:
265	{
266	const auto res = m_lexer.get_number_float();
267
268	if (JSON_HEDLEY_UNLIKELY(!std::isfinite(res)))
269	{
270	return sax->parse_error(m_lexer.get_position(),
271	m_lexer.get_token_string(),
272	out_of_range::create(`406`, concat("number overflow parsing '", m_lexer.get_token_string(), `'\''`), nullptr));
273	}
274
275	if (JSON_HEDLEY_UNLIKELY(!sax->number_float(res, m_lexer.get_string())))
276	{
277	return false;
278	}
279
280	break;
281	}
282
283	case token_type::literal_false:
284	{
285	if (JSON_HEDLEY_UNLIKELY(!sax->boolean(false)))
286	{
287	return false;
288	}
289	break;
290	}
291
292	case token_type::literal_null:
293	{
294	if (JSON_HEDLEY_UNLIKELY(!sax->null()))
295	{
296	return false;
297	}
298	break;
299	}
300
301	case token_type::literal_true:
302	{
303	if (JSON_HEDLEY_UNLIKELY(!sax->boolean(true)))
304	{
305	return false;
306	}
307	break;
308	}
309
310	case token_type::value_integer:
311	{
312	if (JSON_HEDLEY_UNLIKELY(!sax->number_integer(m_lexer.get_number_integer())))
313	{
314	return false;
315	}
316	break;
317	}
318
319	case token_type::value_string:
320	{
321	if (JSON_HEDLEY_UNLIKELY(!sax->string(m_lexer.get_string())))
322	{
323	return false;
324	}
325	break;
326	}
327
328	case token_type::value_unsigned:
329	{
330	if (JSON_HEDLEY_UNLIKELY(!sax->number_unsigned(m_lexer.get_number_unsigned())))
331	{
332	return false;
333	}
334	break;
335	}
336
337	case token_type::parse_error:
338	{
339	// using "uninitialized" to avoid "expected" message
340	return sax->parse_error(m_lexer.get_position(),
341	m_lexer.get_token_string(),
342	parse_error::create(`101`, m_lexer.get_position(), exception_message(expected: token_type::uninitialized, context: "value"), nullptr));
343	}
344	case token_type::end_of_input:
345	{
346	if (JSON_HEDLEY_UNLIKELY(m_lexer.get_position().chars_read_total == `1`))
347	{
348	return sax->parse_error(m_lexer.get_position(),
349	m_lexer.get_token_string(),
350	parse_error::create(`101`, m_lexer.get_position(),
351	"attempting to parse an empty input; check that your input string or stream contains the expected JSON", nullptr));
352	}
353
354	return sax->parse_error(m_lexer.get_position(),
355	m_lexer.get_token_string(),
356	parse_error::create(`101`, m_lexer.get_position(), exception_message(expected: token_type::literal_or_value, context: "value"), nullptr));
357	}
358	case token_type::uninitialized:
359	case token_type::end_array:
360	case token_type::end_object:
361	case token_type::name_separator:
362	case token_type::value_separator:
363	case token_type::literal_or_value:
364	default: // the last token was unexpected
365	{
366	return sax->parse_error(m_lexer.get_position(),
367	m_lexer.get_token_string(),
368	parse_error::create(`101`, m_lexer.get_position(), exception_message(expected: token_type::literal_or_value, context: "value"), nullptr));
369	}
370	}
371	}
372	else
373	{
374	skip_to_state_evaluation = false;
375	}
376
377	// we reached this line after we successfully parsed a value
378	if (states.empty())
379	{
380	// empty stack: we reached the end of the hierarchy: done
381	return true;
382	}
383
384	if (states.back()) // array
385	{
386	// comma -> next value
387	if (get_token() == token_type::value_separator)
388	{
389	// parse a new value
390	get_token();
391	continue;
392	}
393
394	// closing ]
395	if (JSON_HEDLEY_LIKELY(last_token == token_type::end_array))
396	{
397	if (JSON_HEDLEY_UNLIKELY(!sax->end_array()))
398	{
399	return false;
400	}
401
402	// We are done with this array. Before we can parse a
403	// new value, we need to evaluate the new state first.
404	// By setting skip_to_state_evaluation to false, we
405	// are effectively jumping to the beginning of this if.
406	JSON_ASSERT(!states.empty());
407	states.pop_back();
408	skip_to_state_evaluation = true;
409	continue;
410	}
411
412	return sax->parse_error(m_lexer.get_position(),
413	m_lexer.get_token_string(),
414	parse_error::create(`101`, m_lexer.get_position(), exception_message(expected: token_type::end_array, context: "array"), nullptr));
415	}
416
417	// states.back() is false -> object
418
419	// comma -> next value
420	if (get_token() == token_type::value_separator)
421	{
422	// parse key
423	if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::value_string))
424	{
425	return sax->parse_error(m_lexer.get_position(),
426	m_lexer.get_token_string(),
427	parse_error::create(`101`, m_lexer.get_position(), exception_message(expected: token_type::value_string, context: "object key"), nullptr));
428	}
429
430	if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string())))
431	{
432	return false;
433	}
434
435	// parse separator (:)
436	if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
437	{
438	return sax->parse_error(m_lexer.get_position(),
439	m_lexer.get_token_string(),
440	parse_error::create(`101`, m_lexer.get_position(), exception_message(expected: token_type::name_separator, context: "object separator"), nullptr));
441	}
442
443	// parse values
444	get_token();
445	continue;
446	}
447
448	// closing }
449	if (JSON_HEDLEY_LIKELY(last_token == token_type::end_object))
450	{
451	if (JSON_HEDLEY_UNLIKELY(!sax->end_object()))
452	{
453	return false;
454	}
455
456	// We are done with this object. Before we can parse a
457	// new value, we need to evaluate the new state first.
458	// By setting skip_to_state_evaluation to false, we
459	// are effectively jumping to the beginning of this if.
460	JSON_ASSERT(!states.empty());
461	states.pop_back();
462	skip_to_state_evaluation = true;
463	continue;
464	}
465
466	return sax->parse_error(m_lexer.get_position(),
467	m_lexer.get_token_string(),
468	parse_error::create(`101`, m_lexer.get_position(), exception_message(expected: token_type::end_object, context: "object"), nullptr));
469	}
470	}
471
472	/// get next token from lexer
473	token_type get_token()
474	{
475	return last_token = m_lexer.scan();
476	}
477
478	std::string exception_message(const token_type expected, const std::string& context)
479	{
480	std::string error_msg = "syntax error ";
481
482	if (!context.empty())
483	{
484	error_msg += concat(args: "while parsing ", args: context, args: `' '`);
485	}
486
487	error_msg += "- ";
488
489	if (last_token == token_type::parse_error)
490	{
491	error_msg += concat(m_lexer.get_error_message(), "; last read: '",
492	m_lexer.get_token_string(), `'\''`);
493	}
494	else
495	{
496	error_msg += concat("unexpected ", lexer_t::token_type_name(last_token));
497	}
498
499	if (expected != token_type::uninitialized)
500	{
501	error_msg += concat("; expected ", lexer_t::token_type_name(expected));
502	}
503
504	return error_msg;
505	}
506
507	private:
508	/// callback function
509	const parser_callback_t<BasicJsonType> callback = nullptr;
510	/// the type of the last read token
511	token_type last_token = token_type::uninitialized;
512	/// the lexer
513	lexer_t m_lexer;
514	/// whether to throw exceptions in case of errors
515	const bool allow_exceptions = true;
516	};
517
518	} // namespace detail
519	NLOHMANN_JSON_NAMESPACE_END
520

Browse the source code of include/nlohmann/detail/input/parser.hpp