1// Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com>
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3#ifndef QSTRINGTOKENIZER_H
4#define QSTRINGTOKENIZER_H
5
6#include <QtCore/qnamespace.h>
7#include <QtCore/qcontainerfwd.h>
8#include <iterator>
9
10QT_BEGIN_NAMESPACE
11
12template <typename, typename> class QStringBuilder;
13
14#define Q_STRINGTOKENIZER_USE_SENTINEL
15
16class QStringTokenizerBaseBase
17{
18protected:
19 ~QStringTokenizerBaseBase() = default;
20 constexpr QStringTokenizerBaseBase(Qt::SplitBehavior sb, Qt::CaseSensitivity cs) noexcept
21 : m_sb{sb}, m_cs{cs} {}
22
23 struct tokenizer_state {
24 qsizetype start, end, extra;
25 friend constexpr bool operator==(tokenizer_state lhs, tokenizer_state rhs) noexcept
26 { return lhs.start == rhs.start && lhs.end == rhs.end && lhs.extra == rhs.extra; }
27 friend constexpr bool operator!=(tokenizer_state lhs, tokenizer_state rhs) noexcept
28 { return !operator==(lhs, rhs); }
29 };
30
31 Qt::SplitBehavior m_sb;
32 Qt::CaseSensitivity m_cs;
33};
34
35template <typename Haystack, typename Needle>
36class QStringTokenizerBase : protected QStringTokenizerBaseBase
37{
38 struct next_result {
39 Haystack value;
40 bool ok;
41 tokenizer_state state;
42 };
43 inline next_result next(tokenizer_state state) const noexcept;
44 inline next_result toFront() const noexcept { return next(state: {}); }
45public:
46 constexpr explicit QStringTokenizerBase(Haystack haystack, Needle needle, Qt::SplitBehavior sb, Qt::CaseSensitivity cs) noexcept
47 : QStringTokenizerBaseBase{sb, cs}, m_haystack{haystack}, m_needle{needle} {}
48
49 class iterator;
50 friend class iterator;
51#ifdef Q_STRINGTOKENIZER_USE_SENTINEL
52 class sentinel {
53 friend constexpr bool operator==(sentinel, sentinel) noexcept { return true; }
54 friend constexpr bool operator!=(sentinel, sentinel) noexcept { return false; }
55 };
56#else
57 using sentinel = iterator;
58#endif
59 class iterator {
60 const QStringTokenizerBase *tokenizer;
61 next_result current;
62 friend class QStringTokenizerBase;
63 explicit iterator(const QStringTokenizerBase &t) noexcept
64 : tokenizer{&t}, current{t.toFront()} {}
65 public:
66 using difference_type = qsizetype;
67 using value_type = Haystack;
68 using pointer = const value_type*;
69 using reference = const value_type&;
70 using iterator_category = std::forward_iterator_tag;
71
72 iterator() noexcept = default;
73
74 // violates std::forward_iterator (returns a reference into the iterator)
75 [[nodiscard]] constexpr const Haystack* operator->() const { return Q_ASSERT(current.ok), &current.value; }
76 [[nodiscard]] constexpr const Haystack& operator*() const { return *operator->(); }
77
78 iterator& operator++() { advance(); return *this; }
79 iterator operator++(int) { auto tmp = *this; advance(); return tmp; }
80
81 friend constexpr bool operator==(const iterator &lhs, const iterator &rhs) noexcept
82 { return lhs.current.ok == rhs.current.ok && (!lhs.current.ok || (Q_ASSERT(lhs.tokenizer == rhs.tokenizer), lhs.current.state == rhs.current.state)); }
83 friend constexpr bool operator!=(const iterator &lhs, const iterator &rhs) noexcept
84 { return !operator==(lhs, rhs); }
85#ifdef Q_STRINGTOKENIZER_USE_SENTINEL
86 friend constexpr bool operator==(const iterator &lhs, sentinel) noexcept
87 { return !lhs.current.ok; }
88 friend constexpr bool operator!=(const iterator &lhs, sentinel) noexcept
89 { return !operator==(lhs, sentinel{}); }
90 friend constexpr bool operator==(sentinel, const iterator &rhs) noexcept
91 { return !rhs.current.ok; }
92 friend constexpr bool operator!=(sentinel, const iterator &rhs) noexcept
93 { return !operator==(sentinel{}, rhs); }
94#endif
95 private:
96 void advance() {
97 Q_ASSERT(current.ok);
98 current = tokenizer->next(current.state);
99 }
100 };
101 using const_iterator = iterator;
102
103 using size_type = std::size_t;
104 using difference_type = typename iterator::difference_type;
105 using value_type = typename iterator::value_type;
106 using pointer = typename iterator::pointer;
107 using const_pointer = pointer;
108 using reference = typename iterator::reference;
109 using const_reference = reference;
110
111 [[nodiscard]] iterator begin() const noexcept { return iterator{*this}; }
112 [[nodiscard]] iterator cbegin() const noexcept { return begin(); }
113 template <bool = std::is_same<iterator, sentinel>::value> // ODR protection
114 [[nodiscard]] constexpr sentinel end() const noexcept { return {}; }
115 template <bool = std::is_same<iterator, sentinel>::value> // ODR protection
116 [[nodiscard]] constexpr sentinel cend() const noexcept { return {}; }
117
118private:
119 Haystack m_haystack;
120 Needle m_needle;
121};
122
123QT_BEGIN_INCLUDE_NAMESPACE
124#include <QtCore/qstringview.h>
125QT_END_INCLUDE_NAMESPACE
126
127namespace QtPrivate {
128namespace Tok {
129
130 constexpr qsizetype size(QChar) noexcept { return 1; }
131 template <typename String>
132 constexpr qsizetype size(const String &s) noexcept { return static_cast<qsizetype>(s.size()); }
133
134 template <typename String> struct ViewForImpl {};
135 template <> struct ViewForImpl<QStringView> { using type = QStringView; };
136 template <> struct ViewForImpl<QLatin1StringView> { using type = QLatin1StringView; };
137 template <> struct ViewForImpl<QChar> { using type = QChar; };
138 template <> struct ViewForImpl<QString> : ViewForImpl<QStringView> {};
139 template <> struct ViewForImpl<QLatin1Char> : ViewForImpl<QChar> {};
140 template <> struct ViewForImpl<char16_t> : ViewForImpl<QChar> {};
141 template <> struct ViewForImpl<char16_t*> : ViewForImpl<QStringView> {};
142 template <> struct ViewForImpl<const char16_t*> : ViewForImpl<QStringView> {};
143 template <typename LHS, typename RHS>
144 struct ViewForImpl<QStringBuilder<LHS, RHS>> : ViewForImpl<typename QStringBuilder<LHS,RHS>::ConvertTo> {};
145 template <typename Char, typename...Args>
146 struct ViewForImpl<std::basic_string<Char, Args...>> : ViewForImpl<Char*> {};
147#ifdef __cpp_lib_string_view
148 template <typename Char, typename...Args>
149 struct ViewForImpl<std::basic_string_view<Char, Args...>> : ViewForImpl<Char*> {};
150#endif
151
152 // This metafunction maps a StringLike to a View (currently, QChar,
153 // QStringView, QLatin1StringView). This is what QStringTokenizerBase
154 // operates on. QStringTokenizer adds pinning to keep rvalues alive
155 // for the duration of the algorithm.
156 template <typename String>
157 using ViewFor = typename ViewForImpl<typename std::decay<String>::type>::type;
158
159 // Pinning:
160 // rvalues of owning string types need to be moved into QStringTokenizer
161 // to keep them alive for the lifetime of the tokenizer. For lvalues, we
162 // assume the user takes care of that.
163
164 // default: don't pin anything (characters are pinned implicitly)
165 template <typename String>
166 struct PinForImpl { using type = ViewFor<String>; };
167
168 // rvalue QString -> QString
169 template <>
170 struct PinForImpl<QString> { using type = QString; };
171
172 // rvalue std::basic_string -> basic_string
173 template <typename Char, typename...Args>
174 struct PinForImpl<std::basic_string<Char, Args...>>
175 { using type = std::basic_string<Char, Args...>; };
176
177 // rvalue QStringBuilder -> pin as the nested ConvertTo type
178 template <typename LHS, typename RHS>
179 struct PinForImpl<QStringBuilder<LHS, RHS>>
180 : PinForImpl<typename QStringBuilder<LHS, RHS>::ConvertTo> {};
181
182 template <typename StringLike>
183 using PinFor = typename PinForImpl<typename std::remove_cv<StringLike>::type>::type;
184
185 template <typename T> struct is_owning_string_type : std::false_type {};
186 template <> struct is_owning_string_type<QString> : std::true_type {};
187 template <typename...Args> struct is_owning_string_type<std::basic_string<Args...>> : std::true_type {};
188
189 // unpinned
190 template <typename T, bool pinned = is_owning_string_type<T>::value>
191 struct Pinning
192 {
193 // this is the storage for non-pinned types - no storage
194 constexpr Pinning(const T&) noexcept {}
195 // Since we don't store something, the view() method needs to be
196 // given something it can return.
197 constexpr T view(T t) const noexcept { return t; }
198 };
199
200 // pinned
201 template <typename T>
202 struct Pinning<T, true>
203 {
204 T m_string;
205 // specialisation for owning string types (QString, std::u16string):
206 // stores the string:
207 constexpr Pinning(T &&s) noexcept : m_string{std::move(s)} {}
208 // ... and thus view() uses that instead of the argument passed in:
209 constexpr QStringView view(const T&) const noexcept { return m_string; }
210 };
211
212 // NeedlePinning and HaystackPinning are there to distinguish them as
213 // base classes of QStringTokenizer. We use inheritance to reap the
214 // empty base class optimization.
215 template <typename T>
216 struct NeedlePinning : Pinning<T>
217 {
218 using Pinning<T>::Pinning;
219 template <typename Arg>
220 constexpr auto needleView(Arg &&a) noexcept
221 -> decltype(this->view(std::forward<Arg>(a)))
222 { return this->view(std::forward<Arg>(a)); }
223 };
224
225 template <typename T>
226 struct HaystackPinning : Pinning<T>
227 {
228 using Pinning<T>::Pinning;
229 template <typename Arg>
230 constexpr auto haystackView(Arg &&a) noexcept
231 -> decltype(this->view(std::forward<Arg>(a)))
232 { return this->view(std::forward<Arg>(a)); }
233 };
234
235 // The Base of a QStringTokenizer is QStringTokenizerBase for the views
236 // corresponding to the Haystack and Needle template arguments
237 //
238 // ie. QStringTokenizer<QString, QString>
239 // : QStringTokenizerBase<QStringView, QStringView> (+ pinning)
240 template <typename Haystack, typename Needle>
241 using TokenizerBase = QStringTokenizerBase<ViewFor<Haystack>, ViewFor<Needle>>;
242} // namespace Tok
243} // namespace QtPrivate
244
245template <typename Haystack, typename Needle>
246class QStringTokenizer
247 : private QtPrivate::Tok::HaystackPinning<Haystack>,
248 private QtPrivate::Tok::NeedlePinning<Needle>,
249 public QtPrivate::Tok::TokenizerBase<Haystack, Needle>
250{
251 using HPin = QtPrivate::Tok::HaystackPinning<Haystack>;
252 using NPin = QtPrivate::Tok::NeedlePinning<Needle>;
253 using Base = QtPrivate::Tok::TokenizerBase<Haystack, Needle>;
254 template <typename Container, typename HPin>
255 struct if_haystack_not_pinned_impl : std::enable_if<std::is_empty<HPin>::value, bool> {};
256 template <typename Container>
257 using if_haystack_not_pinned = typename if_haystack_not_pinned_impl<Container, HPin>::type;
258 template <typename Container, typename Iterator = decltype(std::begin(std::declval<Container>()))>
259 using if_compatible_container = typename std::enable_if<
260 std::is_convertible<
261 typename Base::value_type,
262 typename std::iterator_traits<Iterator>::value_type
263 >::value,
264 bool
265 >::type;
266public:
267 using value_type = typename Base::value_type;
268 using difference_type = typename Base::difference_type;
269 using size_type = typename Base::size_type;
270 using reference = typename Base::reference;
271 using const_reference = typename Base::const_reference;
272 using pointer = typename Base::pointer;
273 using const_pointer = typename Base::const_pointer;
274 using iterator = typename Base::iterator;
275 using const_iterator = typename Base::const_iterator;
276 using sentinel = typename Base::sentinel;
277
278#ifdef Q_QDOC
279 [[nodiscard]] iterator begin() const noexcept { return Base::begin(); }
280 [[nodiscard]] iterator cbegin() const noexcept { return begin(); }
281 [[nodiscard]] constexpr sentinel end() const noexcept { return {}; }
282 [[nodiscard]] constexpr sentinel cend() const noexcept { return {}; }
283#endif
284
285 constexpr explicit QStringTokenizer(Haystack haystack, Needle needle,
286 Qt::CaseSensitivity cs,
287 Qt::SplitBehavior sb = Qt::KeepEmptyParts)
288 noexcept(std::is_nothrow_copy_constructible<QStringTokenizer>::value)
289 // here, we present the haystack to Pinning<>, for optional storing.
290 // If it did store, haystack is moved-from and mustn't be touched
291 // any longer, which is why view() for these Pinning<>s ignores the
292 // argument.
293 : HPin{std::forward<Haystack>(haystack)},
294 NPin{std::forward<Needle>(needle)},
295 // If Pinning<> didn't store, we pass the haystack (ditto needle)
296 // to view() again, so it can be copied from there.
297 Base{this->haystackView(haystack),
298 this->needleView(needle), sb, cs}
299 {}
300 constexpr explicit QStringTokenizer(Haystack haystack, Needle needle,
301 Qt::SplitBehavior sb = Qt::KeepEmptyParts,
302 Qt::CaseSensitivity cs = Qt::CaseSensitive)
303 noexcept(std::is_nothrow_copy_constructible<QStringTokenizer>::value)
304 : HPin{std::forward<Haystack>(haystack)},
305 NPin{std::forward<Needle>(needle)},
306 Base{this->haystackView(haystack),
307 this->needleView(needle), sb, cs}
308 {}
309
310#ifdef Q_QDOC
311 template<typename LContainer> LContainer toContainer(LContainer &&c = {}) const & {}
312 template<typename RContainer> RContainer toContainer(RContainer &&c = {}) const && {}
313#else
314 template<typename Container = QList<value_type>, if_compatible_container<Container> = true>
315 Container toContainer(Container &&c = {}) const &
316 {
317 for (auto e : *this)
318 c.emplace_back(e);
319 return std::forward<Container>(c);
320 }
321 template<typename Container = QList<value_type>, if_compatible_container<Container> = true,
322 if_haystack_not_pinned<Container> = true>
323 Container toContainer(Container &&c = {}) const &&
324 {
325 for (auto e : *this)
326 c.emplace_back(e);
327 return std::forward<Container>(c);
328 }
329#endif
330};
331
332namespace QtPrivate {
333namespace Tok {
334// This meta function just calculated the template arguments for the
335// QStringTokenizer (not -Base), based on the actual arguments passed
336// to qTokenize() (or the ctor, with CTAD). It basically detects rvalue
337// QString and std::basic_string and otherwise decays the arguments to
338// the respective view type.
339//
340// #define works around a C++ restriction: [temp.deduct.guide]/3 seems
341// to ask for the simple-template-id following the `->` of a deduction
342// guide to be identical to the class name for which we guide deduction.
343// In particular, Clang rejects a template alias there, while GCC accepts
344// it.
345#define Q_TOK_RESULT \
346 QStringTokenizer< \
347 QtPrivate::Tok::PinFor<Haystack>, \
348 QtPrivate::Tok::PinFor<Needle> \
349 > \
350 /*end*/
351template <typename Haystack, typename Needle>
352using TokenizerResult = Q_TOK_RESULT;
353template <typename Haystack, typename Needle>
354using is_nothrow_constructible_from = std::is_nothrow_copy_constructible<TokenizerResult<Haystack, Needle>>;
355}
356}
357
358#ifdef __cpp_deduction_guides
359// these tell the compiler how to determine the QStringTokenizer
360// template arguments based on the constructor arguments (CTAD):
361template <typename Haystack, typename Needle>
362QStringTokenizer(Haystack&&, Needle&&)
363 -> Q_TOK_RESULT;
364template <typename Haystack, typename Needle>
365QStringTokenizer(Haystack&&, Needle&&, Qt::SplitBehavior)
366 -> Q_TOK_RESULT;
367template <typename Haystack, typename Needle>
368QStringTokenizer(Haystack&&, Needle&&, Qt::SplitBehavior, Qt::CaseSensitivity)
369 -> Q_TOK_RESULT;
370template <typename Haystack, typename Needle>
371QStringTokenizer(Haystack&&, Needle&&, Qt::CaseSensitivity)
372 -> Q_TOK_RESULT;
373template <typename Haystack, typename Needle>
374QStringTokenizer(Haystack&&, Needle&&, Qt::CaseSensitivity, Qt::SplitBehavior)
375 -> Q_TOK_RESULT;
376#endif
377
378#undef Q_TOK_RESULT
379
380template <typename Haystack, typename Needle, typename...Flags>
381[[nodiscard]] constexpr auto
382qTokenize(Haystack &&h, Needle &&n, Flags...flags)
383 noexcept(QtPrivate::Tok::is_nothrow_constructible_from<Haystack, Needle>::value)
384 -> decltype(QtPrivate::Tok::TokenizerResult<Haystack, Needle>{std::forward<Haystack>(h),
385 std::forward<Needle>(n), flags...})
386{ return QtPrivate::Tok::TokenizerResult<Haystack, Needle>{std::forward<Haystack>(h),
387 std::forward<Needle>(n),
388 flags...}; }
389
390template <typename Haystack, typename Needle>
391auto QStringTokenizerBase<Haystack, Needle>::next(tokenizer_state state) const noexcept -> next_result
392{
393 while (true) {
394 if (state.end < 0) {
395 // already at end:
396 return {{}, false, state};
397 }
398 state.end = m_haystack.indexOf(m_needle, state.start + state.extra, m_cs);
399 Haystack result;
400 if (state.end >= 0) {
401 // token separator found => return intermediate element:
402 result = m_haystack.sliced(state.start, state.end - state.start);
403 const auto ns = QtPrivate::Tok::size(m_needle);
404 state.start = state.end + ns;
405 state.extra = (ns == 0 ? 1 : 0);
406 } else {
407 // token separator not found => return final element:
408 result = m_haystack.sliced(state.start);
409 }
410 if ((m_sb & Qt::SkipEmptyParts) && result.isEmpty())
411 continue;
412 return {result, true, state};
413 }
414}
415
416QT_END_NAMESPACE
417
418#endif /* QSTRINGTOKENIZER_H */
419