1 | // Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com> |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | #ifndef QSTRINGTOKENIZER_H |
4 | #define QSTRINGTOKENIZER_H |
5 | |
6 | #include <QtCore/qnamespace.h> |
7 | #include <QtCore/qcontainerfwd.h> |
8 | #include <iterator> |
9 | |
10 | QT_BEGIN_NAMESPACE |
11 | |
12 | template <typename, typename> class QStringBuilder; |
13 | |
14 | #define Q_STRINGTOKENIZER_USE_SENTINEL |
15 | |
16 | class QStringTokenizerBaseBase |
17 | { |
18 | protected: |
19 | ~QStringTokenizerBaseBase() = default; |
20 | constexpr QStringTokenizerBaseBase(Qt::SplitBehavior sb, Qt::CaseSensitivity cs) noexcept |
21 | : m_sb{sb}, m_cs{cs} {} |
22 | |
23 | struct tokenizer_state { |
24 | qsizetype start, end, ; |
25 | friend constexpr bool operator==(tokenizer_state lhs, tokenizer_state rhs) noexcept |
26 | { return lhs.start == rhs.start && lhs.end == rhs.end && lhs.extra == rhs.extra; } |
27 | friend constexpr bool operator!=(tokenizer_state lhs, tokenizer_state rhs) noexcept |
28 | { return !operator==(lhs, rhs); } |
29 | }; |
30 | |
31 | Qt::SplitBehavior m_sb; |
32 | Qt::CaseSensitivity m_cs; |
33 | }; |
34 | |
35 | template <typename Haystack, typename Needle> |
36 | class QStringTokenizerBase : protected QStringTokenizerBaseBase |
37 | { |
38 | struct next_result { |
39 | Haystack value; |
40 | bool ok; |
41 | tokenizer_state state; |
42 | }; |
43 | inline next_result next(tokenizer_state state) const noexcept; |
44 | inline next_result toFront() const noexcept { return next(state: {}); } |
45 | public: |
46 | constexpr explicit QStringTokenizerBase(Haystack haystack, Needle needle, Qt::SplitBehavior sb, Qt::CaseSensitivity cs) noexcept |
47 | : QStringTokenizerBaseBase{sb, cs}, m_haystack{haystack}, m_needle{needle} {} |
48 | |
49 | class iterator; |
50 | friend class iterator; |
51 | #ifdef Q_STRINGTOKENIZER_USE_SENTINEL |
52 | class sentinel { |
53 | friend constexpr bool operator==(sentinel, sentinel) noexcept { return true; } |
54 | friend constexpr bool operator!=(sentinel, sentinel) noexcept { return false; } |
55 | }; |
56 | #else |
57 | using sentinel = iterator; |
58 | #endif |
59 | class iterator { |
60 | const QStringTokenizerBase *tokenizer; |
61 | next_result current; |
62 | friend class QStringTokenizerBase; |
63 | explicit iterator(const QStringTokenizerBase &t) noexcept |
64 | : tokenizer{&t}, current{t.toFront()} {} |
65 | public: |
66 | using difference_type = qsizetype; |
67 | using value_type = Haystack; |
68 | using pointer = const value_type*; |
69 | using reference = const value_type&; |
70 | using iterator_category = std::forward_iterator_tag; |
71 | |
72 | iterator() noexcept = default; |
73 | |
74 | // violates std::forward_iterator (returns a reference into the iterator) |
75 | [[nodiscard]] constexpr const Haystack* operator->() const { return Q_ASSERT(current.ok), ¤t.value; } |
76 | [[nodiscard]] constexpr const Haystack& operator*() const { return *operator->(); } |
77 | |
78 | iterator& operator++() { advance(); return *this; } |
79 | iterator operator++(int) { auto tmp = *this; advance(); return tmp; } |
80 | |
81 | friend constexpr bool operator==(const iterator &lhs, const iterator &rhs) noexcept |
82 | { return lhs.current.ok == rhs.current.ok && (!lhs.current.ok || (Q_ASSERT(lhs.tokenizer == rhs.tokenizer), lhs.current.state == rhs.current.state)); } |
83 | friend constexpr bool operator!=(const iterator &lhs, const iterator &rhs) noexcept |
84 | { return !operator==(lhs, rhs); } |
85 | #ifdef Q_STRINGTOKENIZER_USE_SENTINEL |
86 | friend constexpr bool operator==(const iterator &lhs, sentinel) noexcept |
87 | { return !lhs.current.ok; } |
88 | friend constexpr bool operator!=(const iterator &lhs, sentinel) noexcept |
89 | { return !operator==(lhs, sentinel{}); } |
90 | friend constexpr bool operator==(sentinel, const iterator &rhs) noexcept |
91 | { return !rhs.current.ok; } |
92 | friend constexpr bool operator!=(sentinel, const iterator &rhs) noexcept |
93 | { return !operator==(sentinel{}, rhs); } |
94 | #endif |
95 | private: |
96 | void advance() { |
97 | Q_ASSERT(current.ok); |
98 | current = tokenizer->next(current.state); |
99 | } |
100 | }; |
101 | using const_iterator = iterator; |
102 | |
103 | using size_type = std::size_t; |
104 | using difference_type = typename iterator::difference_type; |
105 | using value_type = typename iterator::value_type; |
106 | using pointer = typename iterator::pointer; |
107 | using const_pointer = pointer; |
108 | using reference = typename iterator::reference; |
109 | using const_reference = reference; |
110 | |
111 | [[nodiscard]] iterator begin() const noexcept { return iterator{*this}; } |
112 | [[nodiscard]] iterator cbegin() const noexcept { return begin(); } |
113 | template <bool = std::is_same<iterator, sentinel>::value> // ODR protection |
114 | [[nodiscard]] constexpr sentinel end() const noexcept { return {}; } |
115 | template <bool = std::is_same<iterator, sentinel>::value> // ODR protection |
116 | [[nodiscard]] constexpr sentinel cend() const noexcept { return {}; } |
117 | |
118 | private: |
119 | Haystack m_haystack; |
120 | Needle m_needle; |
121 | }; |
122 | |
123 | QT_BEGIN_INCLUDE_NAMESPACE |
124 | #include <QtCore/qstringview.h> |
125 | QT_END_INCLUDE_NAMESPACE |
126 | |
127 | namespace QtPrivate { |
128 | namespace Tok { |
129 | |
130 | constexpr qsizetype size(QChar) noexcept { return 1; } |
131 | template <typename String> |
132 | constexpr qsizetype size(const String &s) noexcept { return static_cast<qsizetype>(s.size()); } |
133 | |
134 | template <typename String> struct ViewForImpl {}; |
135 | template <> struct ViewForImpl<QStringView> { using type = QStringView; }; |
136 | template <> struct ViewForImpl<QLatin1StringView> { using type = QLatin1StringView; }; |
137 | template <> struct ViewForImpl<QChar> { using type = QChar; }; |
138 | template <> struct ViewForImpl<QString> : ViewForImpl<QStringView> {}; |
139 | template <> struct ViewForImpl<QLatin1Char> : ViewForImpl<QChar> {}; |
140 | template <> struct ViewForImpl<char16_t> : ViewForImpl<QChar> {}; |
141 | template <> struct ViewForImpl<char16_t*> : ViewForImpl<QStringView> {}; |
142 | template <> struct ViewForImpl<const char16_t*> : ViewForImpl<QStringView> {}; |
143 | template <typename LHS, typename RHS> |
144 | struct ViewForImpl<QStringBuilder<LHS, RHS>> : ViewForImpl<typename QStringBuilder<LHS,RHS>::ConvertTo> {}; |
145 | template <typename Char, typename...Args> |
146 | struct ViewForImpl<std::basic_string<Char, Args...>> : ViewForImpl<Char*> {}; |
147 | #ifdef __cpp_lib_string_view |
148 | template <typename Char, typename...Args> |
149 | struct ViewForImpl<std::basic_string_view<Char, Args...>> : ViewForImpl<Char*> {}; |
150 | #endif |
151 | |
152 | // This metafunction maps a StringLike to a View (currently, QChar, |
153 | // QStringView, QLatin1StringView). This is what QStringTokenizerBase |
154 | // operates on. QStringTokenizer adds pinning to keep rvalues alive |
155 | // for the duration of the algorithm. |
156 | template <typename String> |
157 | using ViewFor = typename ViewForImpl<typename std::decay<String>::type>::type; |
158 | |
159 | // Pinning: |
160 | // rvalues of owning string types need to be moved into QStringTokenizer |
161 | // to keep them alive for the lifetime of the tokenizer. For lvalues, we |
162 | // assume the user takes care of that. |
163 | |
164 | // default: don't pin anything (characters are pinned implicitly) |
165 | template <typename String> |
166 | struct PinForImpl { using type = ViewFor<String>; }; |
167 | |
168 | // rvalue QString -> QString |
169 | template <> |
170 | struct PinForImpl<QString> { using type = QString; }; |
171 | |
172 | // rvalue std::basic_string -> basic_string |
173 | template <typename Char, typename...Args> |
174 | struct PinForImpl<std::basic_string<Char, Args...>> |
175 | { using type = std::basic_string<Char, Args...>; }; |
176 | |
177 | // rvalue QStringBuilder -> pin as the nested ConvertTo type |
178 | template <typename LHS, typename RHS> |
179 | struct PinForImpl<QStringBuilder<LHS, RHS>> |
180 | : PinForImpl<typename QStringBuilder<LHS, RHS>::ConvertTo> {}; |
181 | |
182 | template <typename StringLike> |
183 | using PinFor = typename PinForImpl<typename std::remove_cv<StringLike>::type>::type; |
184 | |
185 | template <typename T> struct is_owning_string_type : std::false_type {}; |
186 | template <> struct is_owning_string_type<QString> : std::true_type {}; |
187 | template <typename...Args> struct is_owning_string_type<std::basic_string<Args...>> : std::true_type {}; |
188 | |
189 | // unpinned |
190 | template <typename T, bool pinned = is_owning_string_type<T>::value> |
191 | struct Pinning |
192 | { |
193 | // this is the storage for non-pinned types - no storage |
194 | constexpr Pinning(const T&) noexcept {} |
195 | // Since we don't store something, the view() method needs to be |
196 | // given something it can return. |
197 | constexpr T view(T t) const noexcept { return t; } |
198 | }; |
199 | |
200 | // pinned |
201 | template <typename T> |
202 | struct Pinning<T, true> |
203 | { |
204 | T m_string; |
205 | // specialisation for owning string types (QString, std::u16string): |
206 | // stores the string: |
207 | constexpr Pinning(T &&s) noexcept : m_string{std::move(s)} {} |
208 | // ... and thus view() uses that instead of the argument passed in: |
209 | constexpr QStringView view(const T&) const noexcept { return m_string; } |
210 | }; |
211 | |
212 | // NeedlePinning and HaystackPinning are there to distinguish them as |
213 | // base classes of QStringTokenizer. We use inheritance to reap the |
214 | // empty base class optimization. |
215 | template <typename T> |
216 | struct NeedlePinning : Pinning<T> |
217 | { |
218 | using Pinning<T>::Pinning; |
219 | template <typename Arg> |
220 | constexpr auto needleView(Arg &&a) noexcept |
221 | -> decltype(this->view(std::forward<Arg>(a))) |
222 | { return this->view(std::forward<Arg>(a)); } |
223 | }; |
224 | |
225 | template <typename T> |
226 | struct HaystackPinning : Pinning<T> |
227 | { |
228 | using Pinning<T>::Pinning; |
229 | template <typename Arg> |
230 | constexpr auto haystackView(Arg &&a) noexcept |
231 | -> decltype(this->view(std::forward<Arg>(a))) |
232 | { return this->view(std::forward<Arg>(a)); } |
233 | }; |
234 | |
235 | // The Base of a QStringTokenizer is QStringTokenizerBase for the views |
236 | // corresponding to the Haystack and Needle template arguments |
237 | // |
238 | // ie. QStringTokenizer<QString, QString> |
239 | // : QStringTokenizerBase<QStringView, QStringView> (+ pinning) |
240 | template <typename Haystack, typename Needle> |
241 | using TokenizerBase = QStringTokenizerBase<ViewFor<Haystack>, ViewFor<Needle>>; |
242 | } // namespace Tok |
243 | } // namespace QtPrivate |
244 | |
245 | template <typename Haystack, typename Needle> |
246 | class QStringTokenizer |
247 | : private QtPrivate::Tok::HaystackPinning<Haystack>, |
248 | private QtPrivate::Tok::NeedlePinning<Needle>, |
249 | public QtPrivate::Tok::TokenizerBase<Haystack, Needle> |
250 | { |
251 | using HPin = QtPrivate::Tok::HaystackPinning<Haystack>; |
252 | using NPin = QtPrivate::Tok::NeedlePinning<Needle>; |
253 | using Base = QtPrivate::Tok::TokenizerBase<Haystack, Needle>; |
254 | template <typename Container, typename HPin> |
255 | struct if_haystack_not_pinned_impl : std::enable_if<std::is_empty<HPin>::value, bool> {}; |
256 | template <typename Container> |
257 | using if_haystack_not_pinned = typename if_haystack_not_pinned_impl<Container, HPin>::type; |
258 | template <typename Container, typename Iterator = decltype(std::begin(std::declval<Container>()))> |
259 | using if_compatible_container = typename std::enable_if< |
260 | std::is_convertible< |
261 | typename Base::value_type, |
262 | typename std::iterator_traits<Iterator>::value_type |
263 | >::value, |
264 | bool |
265 | >::type; |
266 | public: |
267 | using value_type = typename Base::value_type; |
268 | using difference_type = typename Base::difference_type; |
269 | using size_type = typename Base::size_type; |
270 | using reference = typename Base::reference; |
271 | using const_reference = typename Base::const_reference; |
272 | using pointer = typename Base::pointer; |
273 | using const_pointer = typename Base::const_pointer; |
274 | using iterator = typename Base::iterator; |
275 | using const_iterator = typename Base::const_iterator; |
276 | using sentinel = typename Base::sentinel; |
277 | |
278 | #ifdef Q_QDOC |
279 | [[nodiscard]] iterator begin() const noexcept { return Base::begin(); } |
280 | [[nodiscard]] iterator cbegin() const noexcept { return begin(); } |
281 | [[nodiscard]] constexpr sentinel end() const noexcept { return {}; } |
282 | [[nodiscard]] constexpr sentinel cend() const noexcept { return {}; } |
283 | #endif |
284 | |
285 | constexpr explicit QStringTokenizer(Haystack haystack, Needle needle, |
286 | Qt::CaseSensitivity cs, |
287 | Qt::SplitBehavior sb = Qt::KeepEmptyParts) |
288 | noexcept(std::is_nothrow_copy_constructible<QStringTokenizer>::value) |
289 | // here, we present the haystack to Pinning<>, for optional storing. |
290 | // If it did store, haystack is moved-from and mustn't be touched |
291 | // any longer, which is why view() for these Pinning<>s ignores the |
292 | // argument. |
293 | : HPin{std::forward<Haystack>(haystack)}, |
294 | NPin{std::forward<Needle>(needle)}, |
295 | // If Pinning<> didn't store, we pass the haystack (ditto needle) |
296 | // to view() again, so it can be copied from there. |
297 | Base{this->haystackView(haystack), |
298 | this->needleView(needle), sb, cs} |
299 | {} |
300 | constexpr explicit QStringTokenizer(Haystack haystack, Needle needle, |
301 | Qt::SplitBehavior sb = Qt::KeepEmptyParts, |
302 | Qt::CaseSensitivity cs = Qt::CaseSensitive) |
303 | noexcept(std::is_nothrow_copy_constructible<QStringTokenizer>::value) |
304 | : HPin{std::forward<Haystack>(haystack)}, |
305 | NPin{std::forward<Needle>(needle)}, |
306 | Base{this->haystackView(haystack), |
307 | this->needleView(needle), sb, cs} |
308 | {} |
309 | |
310 | #ifdef Q_QDOC |
311 | template<typename LContainer> LContainer toContainer(LContainer &&c = {}) const & {} |
312 | template<typename RContainer> RContainer toContainer(RContainer &&c = {}) const && {} |
313 | #else |
314 | template<typename Container = QList<value_type>, if_compatible_container<Container> = true> |
315 | Container toContainer(Container &&c = {}) const & |
316 | { |
317 | for (auto e : *this) |
318 | c.emplace_back(e); |
319 | return std::forward<Container>(c); |
320 | } |
321 | template<typename Container = QList<value_type>, if_compatible_container<Container> = true, |
322 | if_haystack_not_pinned<Container> = true> |
323 | Container toContainer(Container &&c = {}) const && |
324 | { |
325 | for (auto e : *this) |
326 | c.emplace_back(e); |
327 | return std::forward<Container>(c); |
328 | } |
329 | #endif |
330 | }; |
331 | |
332 | namespace QtPrivate { |
333 | namespace Tok { |
334 | // This meta function just calculated the template arguments for the |
335 | // QStringTokenizer (not -Base), based on the actual arguments passed |
336 | // to qTokenize() (or the ctor, with CTAD). It basically detects rvalue |
337 | // QString and std::basic_string and otherwise decays the arguments to |
338 | // the respective view type. |
339 | // |
340 | // #define works around a C++ restriction: [temp.deduct.guide]/3 seems |
341 | // to ask for the simple-template-id following the `->` of a deduction |
342 | // guide to be identical to the class name for which we guide deduction. |
343 | // In particular, Clang rejects a template alias there, while GCC accepts |
344 | // it. |
345 | #define Q_TOK_RESULT \ |
346 | QStringTokenizer< \ |
347 | QtPrivate::Tok::PinFor<Haystack>, \ |
348 | QtPrivate::Tok::PinFor<Needle> \ |
349 | > \ |
350 | /*end*/ |
351 | template <typename Haystack, typename Needle> |
352 | using TokenizerResult = Q_TOK_RESULT; |
353 | template <typename Haystack, typename Needle> |
354 | using is_nothrow_constructible_from = std::is_nothrow_copy_constructible<TokenizerResult<Haystack, Needle>>; |
355 | } |
356 | } |
357 | |
358 | #ifdef __cpp_deduction_guides |
359 | // these tell the compiler how to determine the QStringTokenizer |
360 | // template arguments based on the constructor arguments (CTAD): |
361 | template <typename Haystack, typename Needle> |
362 | QStringTokenizer(Haystack&&, Needle&&) |
363 | -> Q_TOK_RESULT; |
364 | template <typename Haystack, typename Needle> |
365 | QStringTokenizer(Haystack&&, Needle&&, Qt::SplitBehavior) |
366 | -> Q_TOK_RESULT; |
367 | template <typename Haystack, typename Needle> |
368 | QStringTokenizer(Haystack&&, Needle&&, Qt::SplitBehavior, Qt::CaseSensitivity) |
369 | -> Q_TOK_RESULT; |
370 | template <typename Haystack, typename Needle> |
371 | QStringTokenizer(Haystack&&, Needle&&, Qt::CaseSensitivity) |
372 | -> Q_TOK_RESULT; |
373 | template <typename Haystack, typename Needle> |
374 | QStringTokenizer(Haystack&&, Needle&&, Qt::CaseSensitivity, Qt::SplitBehavior) |
375 | -> Q_TOK_RESULT; |
376 | #endif |
377 | |
378 | #undef Q_TOK_RESULT |
379 | |
380 | template <typename Haystack, typename Needle, typename...Flags> |
381 | [[nodiscard]] constexpr auto |
382 | qTokenize(Haystack &&h, Needle &&n, Flags...flags) |
383 | noexcept(QtPrivate::Tok::is_nothrow_constructible_from<Haystack, Needle>::value) |
384 | -> decltype(QtPrivate::Tok::TokenizerResult<Haystack, Needle>{std::forward<Haystack>(h), |
385 | std::forward<Needle>(n), flags...}) |
386 | { return QtPrivate::Tok::TokenizerResult<Haystack, Needle>{std::forward<Haystack>(h), |
387 | std::forward<Needle>(n), |
388 | flags...}; } |
389 | |
390 | template <typename Haystack, typename Needle> |
391 | auto QStringTokenizerBase<Haystack, Needle>::next(tokenizer_state state) const noexcept -> next_result |
392 | { |
393 | while (true) { |
394 | if (state.end < 0) { |
395 | // already at end: |
396 | return {{}, false, state}; |
397 | } |
398 | state.end = m_haystack.indexOf(m_needle, state.start + state.extra, m_cs); |
399 | Haystack result; |
400 | if (state.end >= 0) { |
401 | // token separator found => return intermediate element: |
402 | result = m_haystack.sliced(state.start, state.end - state.start); |
403 | const auto ns = QtPrivate::Tok::size(m_needle); |
404 | state.start = state.end + ns; |
405 | state.extra = (ns == 0 ? 1 : 0); |
406 | } else { |
407 | // token separator not found => return final element: |
408 | result = m_haystack.sliced(state.start); |
409 | } |
410 | if ((m_sb & Qt::SkipEmptyParts) && result.isEmpty()) |
411 | continue; |
412 | return {result, true, state}; |
413 | } |
414 | } |
415 | |
416 | QT_END_NAMESPACE |
417 | |
418 | #endif /* QSTRINGTOKENIZER_H */ |
419 | |