Ada 3.4.4
Fast spec-compliant URL parser
Loading...
Searching...
No Matches
parse.cc
Go to the documentation of this file.
1#include <fuzzer/FuzzedDataProvider.h>
2
3#include <cassert>
4#include <cstdio>
5#include <memory>
6#include <string>
7
8#include "ada.cpp"
9#include "ada.h"
10
11bool is_valid_utf8_string(const char *buf, size_t len) {
12 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
13 uint64_t pos = 0;
14 uint32_t code_point = 0;
15 while (pos < len) {
16 uint64_t next_pos = pos + 16;
17 if (next_pos <= len) { // if it is safe to read 16 more bytes, check that
18 // they are ascii
19 uint64_t v1;
20 std::memcpy(&v1, data + pos, sizeof(uint64_t));
21 uint64_t v2;
22 std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
23 uint64_t v{v1 | v2};
24 if ((v & 0x8080808080808080) == 0) {
25 pos = next_pos;
26 continue;
27 }
28 }
29 unsigned char byte = data[pos];
30 while (byte < 0b10000000) {
31 if (++pos == len) {
32 return true;
33 }
34 byte = data[pos];
35 }
36
37 if ((byte & 0b11100000) == 0b11000000) {
38 next_pos = pos + 2;
39 if (next_pos > len) {
40 return false;
41 }
42 if ((data[pos + 1] & 0b11000000) != 0b10000000) {
43 return false;
44 }
45 code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
46 if ((code_point < 0x80) || (0x7ff < code_point)) {
47 return false;
48 }
49 } else if ((byte & 0b11110000) == 0b11100000) {
50 next_pos = pos + 3;
51 if (next_pos > len) {
52 return false;
53 }
54 if ((data[pos + 1] & 0b11000000) != 0b10000000) {
55 return false;
56 }
57 if ((data[pos + 2] & 0b11000000) != 0b10000000) {
58 return false;
59 }
60 code_point = (byte & 0b00001111) << 12 |
61 (data[pos + 1] & 0b00111111) << 6 |
62 (data[pos + 2] & 0b00111111);
63 if ((code_point < 0x800) || (0xffff < code_point) ||
64 (0xd7ff < code_point && code_point < 0xe000)) {
65 return false;
66 }
67 } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000
68 next_pos = pos + 4;
69 if (next_pos > len) {
70 return false;
71 }
72 if ((data[pos + 1] & 0b11000000) != 0b10000000) {
73 return false;
74 }
75 if ((data[pos + 2] & 0b11000000) != 0b10000000) {
76 return false;
77 }
78 if ((data[pos + 3] & 0b11000000) != 0b10000000) {
79 return false;
80 }
81 code_point =
82 (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
83 (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
84 if (code_point <= 0xffff || 0x10ffff < code_point) {
85 return false;
86 }
87 } else {
88 return false;
89 }
90 pos = next_pos;
91 }
92 return true;
93}
94
95// Exercise all getters and boolean predicates on ada::url
96static void exercise_url_predicates(const ada::url &u) {
97 volatile size_t length = 0;
98 length += u.get_href().size();
99 length += u.get_origin().size();
100 length += u.get_protocol().size();
101 length += u.get_username().size();
102 length += u.get_password().size();
103 length += u.get_host().size();
104 length += u.get_hostname().size();
105 length += u.get_pathname().size();
106 length += u.get_search().size();
107 length += u.get_hash().size();
108 length += u.get_port().size();
109 length += u.to_string().size();
110 length += u.get_pathname_length();
111 (void)u.has_valid_domain();
112 (void)u.has_credentials();
113 (void)u.has_empty_hostname();
114 (void)u.has_hostname();
115 (void)u.has_port();
116 (void)u.has_hash();
117 (void)u.has_search();
118 (void)u.get_components();
119}
120
121// Exercise all getters and boolean predicates on ada::url_aggregator
123 volatile size_t length = 0;
124 length += u.get_href().size();
125 length += u.get_origin().size();
126 length += u.get_protocol().size();
127 length += u.get_username().size();
128 length += u.get_password().size();
129 length += u.get_host().size();
130 length += u.get_hostname().size();
131 length += u.get_pathname().size();
132 length += u.get_search().size();
133 length += u.get_hash().size();
134 length += u.get_port().size();
135 length += u.to_string().size();
136 length += u.get_pathname_length();
137 (void)u.has_valid_domain();
138 (void)u.has_credentials();
139 (void)u.has_empty_hostname();
140 (void)u.has_hostname();
141 (void)u.has_non_empty_username();
142 (void)u.has_non_empty_password();
143 (void)u.has_password();
144 (void)u.has_port();
145 (void)u.has_hash();
146 (void)u.has_search();
147 (void)u.get_components();
148 volatile bool is_valid = u.validate();
149 (void)is_valid;
150 (void)u.to_diagram();
151}
152
153extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
154 FuzzedDataProvider fdp(data, size);
155 std::string source = fdp.ConsumeRandomLengthString(256);
156 std::string base = fdp.ConsumeRandomLengthString(256);
157
158 // volatile forces the compiler to store the results without undue
159 // optimizations
160 volatile size_t length = 0;
161
162 auto parse_url = ada::parse<ada::url>(source);
163 auto parse_url_aggregator = ada::parse<ada::url_aggregator>(source);
164
165 if (is_valid_utf8_string(source.data(), source.length())) {
166 if (parse_url.has_value() ^ parse_url_aggregator.has_value()) {
167 printf("Source used to parse: %s", source.c_str());
168 abort();
169 }
170 }
171
172 if (parse_url) {
173 length += parse_url->get_href().size();
174 length += parse_url->get_origin().size();
175 }
176
177 if (parse_url_aggregator) {
178 length += parse_url_aggregator->get_href().size();
179 length += parse_url_aggregator->get_origin().size();
180
181 volatile bool is_parse_url_aggregator_output_valid = false;
182 is_parse_url_aggregator_output_valid = parse_url_aggregator->validate();
183
184 assert(parse_url->get_protocol() == parse_url_aggregator->get_protocol());
185 assert(parse_url->get_href() == parse_url_aggregator->get_href());
186 assert(std::string(parse_url->get_hostname()) ==
187 std::string(parse_url_aggregator->get_hostname()));
188 assert(std::string(parse_url->get_pathname()) ==
189 std::string(parse_url_aggregator->get_pathname()));
190 assert(std::string(parse_url->get_search()) ==
191 std::string(parse_url_aggregator->get_search()));
192 assert(std::string(parse_url->get_hash()) ==
193 std::string(parse_url_aggregator->get_hash()));
194 assert(std::string(parse_url->get_port()) ==
195 std::string(parse_url_aggregator->get_port()));
196 assert(parse_url->get_username() ==
197 std::string(parse_url_aggregator->get_username()));
198 assert(parse_url->get_password() ==
199 std::string(parse_url_aggregator->get_password()));
200 assert(std::string(parse_url->get_host()) ==
201 std::string(parse_url_aggregator->get_host()));
202
203 // Exercise all predicates on both types
204 exercise_url_predicates(*parse_url);
205 exercise_aggregator_predicates(*parse_url_aggregator);
206
207 // Test set_href consistency
208 parse_url->set_href(source);
209 parse_url_aggregator->set_href(source);
210 assert(parse_url->get_href() == parse_url_aggregator->get_href());
211 }
212
216 if (parse_url) {
217 // Copy constructor
218 ada::url copied_url = *parse_url;
219 assert(copied_url.get_href() == parse_url->get_href());
220
221 // Copy assignment
222 ada::url assigned_url;
223 assigned_url = *parse_url;
224 assert(assigned_url.get_href() == parse_url->get_href());
225
226 // Move constructor
227 ada::url moved_url = std::move(copied_url);
228 assert(moved_url.get_href() == parse_url->get_href());
229 }
230
231 if (parse_url_aggregator) {
232 // Copy constructor
233 ada::url_aggregator copied_agg = *parse_url_aggregator;
234 assert(std::string(copied_agg.get_href()) ==
235 std::string(parse_url_aggregator->get_href()));
236
237 // Copy assignment
238 ada::url_aggregator assigned_agg;
239 assigned_agg = *parse_url_aggregator;
240 assert(std::string(assigned_agg.get_href()) ==
241 std::string(parse_url_aggregator->get_href()));
242
243 // Move constructor
244 ada::url_aggregator moved_agg = std::move(copied_agg);
245 assert(std::string(moved_agg.get_href()) ==
246 std::string(parse_url_aggregator->get_href()));
247
248 // Move assignment
249 ada::url_aggregator move_assigned_agg;
250 move_assigned_agg = std::move(assigned_agg);
251 assert(std::string(move_assigned_agg.get_href()) ==
252 std::string(parse_url_aggregator->get_href()));
253 }
254
258 auto out_url = ada::parse<ada::url>("https://www.ada-url.com");
259
260 if (out_url) {
261 out_url->set_protocol(source);
262 out_url->set_username(source);
263 out_url->set_password(source);
264 out_url->set_hostname(source);
265 out_url->set_host(source);
266 out_url->set_pathname(source);
267 out_url->set_search(source);
268 out_url->set_hash(source);
269 out_url->set_port(source);
270
271 // getters
272 length += out_url->get_protocol().size();
273 length += out_url->get_username().size();
274 length += out_url->get_password().size();
275 length += out_url->get_hostname().size();
276 length += out_url->get_host().size();
277 length += out_url->get_pathname().size();
278 length += out_url->get_search().size();
279 length += out_url->get_hash().size();
280 length += out_url->get_origin().size();
281 length += out_url->get_port().size();
282 length += out_url->get_pathname_length();
283
284 length += out_url->to_string().size();
285
286 // boolean predicates after setters
287 (void)out_url->has_valid_domain();
288 (void)out_url->has_credentials();
289 (void)out_url->has_empty_hostname();
290 (void)out_url->has_hostname();
291 (void)out_url->has_port();
292 (void)out_url->has_hash();
293 (void)out_url->has_search();
294 (void)out_url->get_components();
295 }
296
300 auto out_aggregator =
301 ada::parse<ada::url_aggregator>("https://www.ada-url.com");
302
303 if (out_aggregator) {
304 out_aggregator->set_protocol(source);
305 out_aggregator->set_username(source);
306 out_aggregator->set_password(source);
307 out_aggregator->set_hostname(source);
308 out_aggregator->set_host(source);
309 out_aggregator->set_pathname(source);
310 out_aggregator->set_search(source);
311 out_aggregator->set_hash(source);
312 out_aggregator->set_port(source);
313
314 // getters
315 length += out_aggregator->get_protocol().size();
316 length += out_aggregator->get_username().size();
317 length += out_aggregator->get_password().size();
318 length += out_aggregator->get_hostname().size();
319 length += out_aggregator->get_host().size();
320 length += out_aggregator->get_pathname().size();
321 length += out_aggregator->get_search().size();
322 length += out_aggregator->get_hash().size();
323 length += out_aggregator->get_origin().size();
324 length += out_aggregator->get_port().size();
325 length += out_aggregator->get_pathname_length();
326
327 length += out_aggregator->to_string().size();
328
329 volatile bool is_output_valid = false;
330 is_output_valid = out_aggregator->validate();
331
332 (void)out_aggregator->to_diagram();
333
334 // boolean predicates after setters
335 (void)out_aggregator->has_valid_domain();
336 (void)out_aggregator->has_credentials();
337 (void)out_aggregator->has_empty_hostname();
338 (void)out_aggregator->has_hostname();
339 (void)out_aggregator->has_non_empty_username();
340 (void)out_aggregator->has_non_empty_password();
341 (void)out_aggregator->has_password();
342 (void)out_aggregator->has_port();
343 (void)out_aggregator->has_hash();
344 (void)out_aggregator->has_search();
345 (void)out_aggregator->get_components();
346
347 // clear methods
348 out_aggregator->clear_port();
349 out_aggregator->clear_search();
350 out_aggregator->clear_hash();
351 }
352
356 auto base_url = ada::parse<ada::url>(base);
357 auto base_agg = ada::parse<ada::url_aggregator>(base);
358
359 if (base_url) {
360 auto result = ada::parse<ada::url>(source, &*base_url);
361 if (result) {
362 length += result->get_href().size();
363 length += result->get_origin().size();
365 }
366 }
367
368 if (base_agg) {
369 auto result = ada::parse<ada::url_aggregator>(source, &*base_agg);
370 if (result) {
371 length += result->get_href().size();
372 length += result->get_origin().size();
374 }
375 }
376
377 // Cross-type consistency: relative URL parsing with a base should agree
378 // between url and url_aggregator representations for valid UTF-8 inputs.
379 if (is_valid_utf8_string(source.data(), source.length()) &&
380 is_valid_utf8_string(base.data(), base.length()) && base_url &&
381 base_agg) {
382 auto res_url = ada::parse<ada::url>(source, &*base_url);
383 auto res_agg = ada::parse<ada::url_aggregator>(source, &*base_agg);
384 if (res_url.has_value() ^ res_agg.has_value()) {
385 printf("Relative parse inconsistency for source=%s base=%s\n",
386 source.c_str(), base.c_str());
387 abort();
388 }
389 if (res_url && res_agg) {
390 if (res_url->get_href() != std::string(res_agg->get_href())) {
391 printf("Relative parse href mismatch for source=%s base=%s\n",
392 source.c_str(), base.c_str());
393 abort();
394 }
395 }
396 }
397
402 if (base_agg) {
403 auto level1 = ada::parse<ada::url_aggregator>(source, &*base_agg);
404 if (level1) {
405 std::string input2 = fdp.ConsumeRandomLengthString(128);
406 auto level2 = ada::parse<ada::url_aggregator>(input2, &*level1);
407 if (level2) {
408 length += level2->get_href().size();
409 volatile bool v = level2->validate();
410 (void)v;
411 }
412 }
413 }
414
420 {
421 auto known_base =
422 ada::parse<ada::url_aggregator>("https://example.com/a/b/c?query#hash");
423 if (known_base) {
424 auto result = ada::parse<ada::url_aggregator>(source, &*known_base);
425 if (result) {
426 length += result->get_href().size();
428 }
429 }
430 }
431
435 length += ada::href_from_file(source).size();
436
440 bool is_valid = ada::checkers::verify_dns_length(source);
441
442 (void)is_valid;
443
444 return 0;
445} // extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
Main header for the Ada URL parser library.
std::string href_from_file(std::string_view path)
ada_warn_unused ada::result< result_type > parse(std::string_view input, const result_type *base_url=nullptr)
bool is_valid_utf8_string(const char *buf, size_t len)
Definition parse.cc:11
static void exercise_url_predicates(const ada::url &u)
Definition parse.cc:96
static void exercise_aggregator_predicates(const ada::url_aggregator &u)
Definition parse.cc:122
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
Definition parse.cc:153
Memory-efficient URL representation using a single buffer.
ada_really_inline const url_components & get_components() const noexcept
constexpr bool has_non_empty_password() const noexcept
constexpr bool validate() const noexcept
std::string_view get_search() const ada_lifetime_bound
std::string_view get_hash() const ada_lifetime_bound
std::string to_string() const override
std::string_view get_password() const ada_lifetime_bound
std::string_view get_host() const ada_lifetime_bound
std::string_view get_username() const ada_lifetime_bound
std::string_view get_port() const ada_lifetime_bound
std::string to_diagram() const
constexpr bool has_hostname() const noexcept
constexpr bool has_search() const noexcept override
std::string get_origin() const override
constexpr std::string_view get_href() const noexcept ada_lifetime_bound
constexpr bool has_empty_hostname() const noexcept
constexpr bool has_password() const noexcept
ada_really_inline uint32_t get_pathname_length() const noexcept
bool has_valid_domain() const noexcept override
constexpr bool has_hash() const noexcept override
constexpr std::string_view get_pathname() const ada_lifetime_bound
std::string_view get_hostname() const ada_lifetime_bound
std::string_view get_protocol() const ada_lifetime_bound
constexpr bool has_port() const noexcept
ada_really_inline constexpr bool has_credentials() const noexcept
constexpr bool has_non_empty_username() const noexcept
Represents a parsed URL with individual string components.
Definition url.h:62
ada_really_inline ada::url_components get_components() const noexcept
Definition url-inl.h:50
bool has_empty_hostname() const noexcept
Definition url-inl.h:29
bool has_port() const noexcept
Definition url-inl.h:22
ada_really_inline bool has_credentials() const noexcept
Definition url-inl.h:19
ada_really_inline size_t get_pathname_length() const noexcept
Definition url-inl.h:42
std::string get_search() const
Definition url.cpp:659
std::string get_port() const
Definition url.cpp:674
std::string get_protocol() const
Definition url.cpp:633
bool has_hostname() const noexcept
Definition url-inl.h:35
std::string get_host() const
Definition url.cpp:641
constexpr std::string_view get_pathname() const noexcept
Definition url-inl.h:46
std::string get_hostname() const
Definition url.cpp:655
ada_really_inline std::string get_href() const
Definition url-inl.h:188
std::string get_origin() const override
Definition url.cpp:607
const std::string & get_password() const noexcept
Definition url.cpp:670
const std::string & get_username() const noexcept
Definition url.cpp:666
constexpr bool has_search() const noexcept override
Definition url-inl.h:164
std::string to_string() const override
Definition url.cpp:550
std::string get_hash() const
Definition url.cpp:678
constexpr bool has_hash() const noexcept override
Definition url-inl.h:160
bool has_valid_domain() const noexcept override
Definition url.cpp:600