Coverage for src/httpx/_urls.py: 84%
170 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-05-27 19:25 +0100
« prev ^ index » next coverage.py v7.6.12, created at 2025-05-27 19:25 +0100
1from __future__ import annotations
3import typing
5from ._urlparse import urlparse
6from ._urlencode import unquote, urldecode, urlencode
8__all__ = ["QueryParams", "URL"]
11class URL:
12 """
13 url = httpx.URL("HTTPS://jo%40email.com:a%20secret@müller.de:1234/pa%20th?search=ab#anchorlink")
15 assert url.scheme == "https"
16 assert url.username == "jo@email.com"
17 assert url.password == "a secret"
18 assert url.userinfo == b"jo%40email.com:a%20secret"
19 assert url.host == "müller.de"
20 assert url.raw_host == b"xn--mller-kva.de"
21 assert url.port == 1234
22 assert url.netloc == b"xn--mller-kva.de:1234"
23 assert url.path == "/pa th"
24 assert url.query == b"?search=ab"
25 assert url.raw_path == b"/pa%20th?search=ab"
26 assert url.fragment == "anchorlink"
28 The components of a URL are broken down like this:
30 https://jo%40email.com:a%20secret@müller.de:1234/pa%20th?search=ab#anchorlink
31 [scheme] [ username ] [password] [ host ][port][ path ] [ query ] [fragment]
32 [ userinfo ] [ netloc ][ raw_path ]
34 Note that:
36 * `url.scheme` is normalized to always be lowercased.
38 * `url.host` is normalized to always be lowercased. Internationalized domain
39 names are represented in unicode, without IDNA encoding applied. For instance:
41 url = httpx.URL("http://中国.icom.museum")
42 assert url.host == "中国.icom.museum"
43 url = httpx.URL("http://xn--fiqs8s.icom.museum")
44 assert url.host == "中国.icom.museum"
46 * `url.raw_host` is normalized to always be lowercased, and is IDNA encoded.
48 url = httpx.URL("http://中国.icom.museum")
49 assert url.raw_host == b"xn--fiqs8s.icom.museum"
50 url = httpx.URL("http://xn--fiqs8s.icom.museum")
51 assert url.raw_host == b"xn--fiqs8s.icom.museum"
53 * `url.port` is either None or an integer. URLs that include the default port for
54 "http", "https", "ws", "wss", and "ftp" schemes have their port
55 normalized to `None`.
57 assert httpx.URL("http://example.com") == httpx.URL("http://example.com:80")
58 assert httpx.URL("http://example.com").port is None
59 assert httpx.URL("http://example.com:80").port is None
61 * `url.userinfo` is raw bytes, without URL escaping. Usually you'll want to work
62 with `url.username` and `url.password` instead, which handle the URL escaping.
64 * `url.raw_path` is raw bytes of both the path and query, without URL escaping.
65 This portion is used as the target when constructing HTTP requests. Usually you'll
66 want to work with `url.path` instead.
68 * `url.query` is raw bytes, without URL escaping. A URL query string portion can
69 only be properly URL escaped when decoding the parameter names and values
70 themselves.
71 """
73 def __init__(self, url: "URL" | str = "", **kwargs: typing.Any) -> None:
74 if kwargs:
75 allowed = {
76 "scheme": str,
77 "username": str,
78 "password": str,
79 "userinfo": bytes,
80 "host": str,
81 "port": int,
82 "netloc": str,
83 "path": str,
84 "query": bytes,
85 "raw_path": bytes,
86 "fragment": str,
87 "params": object,
88 }
90 # Perform type checking for all supported keyword arguments.
91 for key, value in kwargs.items():
92 if key not in allowed:
93 message = f"{key!r} is an invalid keyword argument for URL()"
94 raise TypeError(message)
95 if value is not None and not isinstance(value, allowed[key]):
96 expected = allowed[key].__name__
97 seen = type(value).__name__
98 message = f"Argument {key!r} must be {expected} but got {seen}"
99 raise TypeError(message)
100 if isinstance(value, bytes):
101 kwargs[key] = value.decode("ascii")
103 if "params" in kwargs:
104 # Replace any "params" keyword with the raw "query" instead.
105 #
106 # Ensure that empty params use `kwargs["query"] = None` rather
107 # than `kwargs["query"] = ""`, so that generated URLs do not
108 # include an empty trailing "?".
109 params = kwargs.pop("params")
110 kwargs["query"] = None if not params else str(QueryParams(params))
112 if isinstance(url, str):
113 self._uri_reference = urlparse(url, **kwargs)
114 elif isinstance(url, URL):
115 self._uri_reference = url._uri_reference.copy_with(**kwargs)
116 else:
117 raise TypeError(
118 "Invalid type for url. Expected str or httpx.URL,"
119 f" got {type(url)}: {url!r}"
120 )
122 @property
123 def scheme(self) -> str:
124 """
125 The URL scheme, such as "http", "https".
126 Always normalised to lowercase.
127 """
128 return self._uri_reference.scheme
130 @property
131 def userinfo(self) -> bytes:
132 """
133 The URL userinfo as a raw bytestring.
134 For example: b"jo%40email.com:a%20secret".
135 """
136 return self._uri_reference.userinfo.encode("ascii")
138 @property
139 def username(self) -> str:
140 """
141 The URL username as a string, with URL decoding applied.
142 For example: "jo@email.com"
143 """
144 userinfo = self._uri_reference.userinfo
145 return unquote(userinfo.partition(":")[0])
147 @property
148 def password(self) -> str:
149 """
150 The URL password as a string, with URL decoding applied.
151 For example: "a secret"
152 """
153 userinfo = self._uri_reference.userinfo
154 return unquote(userinfo.partition(":")[2])
156 @property
157 def host(self) -> str:
158 """
159 The URL host as a string.
160 Always normalized to lowercase. Possibly IDNA encoded.
162 Examples:
164 url = httpx.URL("http://www.EXAMPLE.org")
165 assert url.host == "www.example.org"
167 url = httpx.URL("http://中国.icom.museum")
168 assert url.host == "xn--fiqs8s"
170 url = httpx.URL("http://xn--fiqs8s.icom.museum")
171 assert url.host == "xn--fiqs8s"
173 url = httpx.URL("https://[::ffff:192.168.0.1]")
174 assert url.host == "::ffff:192.168.0.1"
175 """
176 return self._uri_reference.host
178 @property
179 def port(self) -> int | None:
180 """
181 The URL port as an integer.
183 Note that the URL class performs port normalization as per the WHATWG spec.
184 Default ports for "http", "https", "ws", "wss", and "ftp" schemes are always
185 treated as `None`.
187 For example:
189 assert httpx.URL("http://www.example.com") == httpx.URL("http://www.example.com:80")
190 assert httpx.URL("http://www.example.com:80").port is None
191 """
192 return self._uri_reference.port
194 @property
195 def netloc(self) -> str:
196 """
197 Either `<host>` or `<host>:<port>` as bytes.
198 Always normalized to lowercase, and IDNA encoded.
200 This property may be used for generating the value of a request
201 "Host" header.
202 """
203 return self._uri_reference.netloc
205 @property
206 def path(self) -> str:
207 """
208 The URL path as a string. Excluding the query string, and URL decoded.
210 For example:
212 url = httpx.URL("https://example.com/pa%20th")
213 assert url.path == "/pa th"
214 """
215 path = self._uri_reference.path or "/"
216 return unquote(path)
218 @property
219 def query(self) -> bytes:
220 """
221 The URL query string, as raw bytes, excluding the leading b"?".
223 This is necessarily a bytewise interface, because we cannot
224 perform URL decoding of this representation until we've parsed
225 the keys and values into a QueryParams instance.
227 For example:
229 url = httpx.URL("https://example.com/?filter=some%20search%20terms")
230 assert url.query == b"filter=some%20search%20terms"
231 """
232 query = self._uri_reference.query or ""
233 return query.encode("ascii")
235 @property
236 def params(self) -> "QueryParams":
237 """
238 The URL query parameters, neatly parsed and packaged into an immutable
239 multidict representation.
240 """
241 return QueryParams(self._uri_reference.query)
243 @property
244 def target(self) -> str:
245 """
246 The complete URL path and query string as raw bytes.
247 Used as the target when constructing HTTP requests.
249 For example:
251 GET /users?search=some%20text HTTP/1.1
252 Host: www.example.org
253 Connection: close
254 """
255 target = self._uri_reference.path or "/"
256 if self._uri_reference.query is not None:
257 target += "?" + self._uri_reference.query
258 return target
260 @property
261 def fragment(self) -> str:
262 """
263 The URL fragments, as used in HTML anchors.
264 As a string, without the leading '#'.
265 """
266 return unquote(self._uri_reference.fragment or "")
268 @property
269 def is_absolute_url(self) -> bool:
270 """
271 Return `True` for absolute URLs such as 'http://example.com/path',
272 and `False` for relative URLs such as '/path'.
273 """
274 # We don't use `.is_absolute` from `rfc3986` because it treats
275 # URLs with a fragment portion as not absolute.
276 # What we actually care about is if the URL provides
277 # a scheme and hostname to which connections should be made.
278 return bool(self._uri_reference.scheme and self._uri_reference.host)
280 @property
281 def is_relative_url(self) -> bool:
282 """
283 Return `False` for absolute URLs such as 'http://example.com/path',
284 and `True` for relative URLs such as '/path'.
285 """
286 return not self.is_absolute_url
288 def copy_with(self, **kwargs: typing.Any) -> "URL":
289 """
290 Copy this URL, returning a new URL with some components altered.
291 Accepts the same set of parameters as the components that are made
292 available via properties on the `URL` class.
294 For example:
296 url = httpx.URL("https://www.example.com").copy_with(
297 username="jo@gmail.com", password="a secret"
298 )
299 assert url == "https://jo%40email.com:a%20secret@www.example.com"
300 """
301 return URL(self, **kwargs)
303 def copy_set_param(self, key: str, value: typing.Any = None) -> "URL":
304 return self.copy_with(params=self.params.copy_set(key, value))
306 def copy_append_param(self, key: str, value: typing.Any = None) -> "URL":
307 return self.copy_with(params=self.params.copy_append(key, value))
309 def copy_remove_param(self, key: str) -> "URL":
310 return self.copy_with(params=self.params.copy_remove(key))
312 def copy_merge_params(
313 self,
314 params: "QueryParams" | dict[str, str | list[str]] | list[tuple[str, str]] | None,
315 ) -> "URL":
316 return self.copy_with(params=self.params.copy_update(params))
318 def join(self, url: "URL" | str) -> "URL":
319 """
320 Return an absolute URL, using this URL as the base.
322 Eg.
324 url = httpx.URL("https://www.example.com/test")
325 url = url.join("/new/path")
326 assert url == "https://www.example.com/new/path"
327 """
328 from urllib.parse import urljoin
330 return URL(urljoin(str(self), str(URL(url))))
332 def __hash__(self) -> int:
333 return hash(str(self))
335 def __eq__(self, other: typing.Any) -> bool:
336 return isinstance(other, (URL, str)) and str(self) == str(URL(other))
338 def __str__(self) -> str:
339 return str(self._uri_reference)
341 def __repr__(self) -> str:
342 return f"<URL {str(self)!r}>"
345class QueryParams(typing.Mapping[str, str]):
346 """
347 URL query parameters, as a multi-dict.
348 """
350 def __init__(
351 self,
352 params: (
353 "QueryParams" | dict[str, str | list[str]] | list[tuple[str, str]] | str | None
354 ) = None,
355 ) -> None:
356 d: dict[str, list[str]] = {}
358 if params is None:
359 d = {}
360 elif isinstance(params, str):
361 d = urldecode(params)
362 elif isinstance(params, QueryParams):
363 d = params.multi_dict()
364 elif isinstance(params, dict):
365 # Convert dict inputs like:
366 # {"a": "123", "b": ["456", "789"]}
367 # To dict inputs where values are always lists, like:
368 # {"a": ["123"], "b": ["456", "789"]}
369 d = {k: [v] if isinstance(v, str) else list(v) for k, v in params.items()}
370 else:
371 # Convert list inputs like:
372 # [("a", "123"), ("a", "456"), ("b", "789")]
373 # To a dict representation, like:
374 # {"a": ["123", "456"], "b": ["789"]}
375 for k, v in params:
376 d.setdefault(k, []).append(v)
378 self._dict = d
380 def keys(self) -> typing.KeysView[str]:
381 """
382 Return all the keys in the query params.
384 Usage:
386 q = httpx.QueryParams("a=123&a=456&b=789")
387 assert list(q.keys()) == ["a", "b"]
388 """
389 return self._dict.keys()
391 def values(self) -> typing.ValuesView[str]:
392 """
393 Return all the values in the query params. If a key occurs more than once
394 only the first item for that key is returned.
396 Usage:
398 q = httpx.QueryParams("a=123&a=456&b=789")
399 assert list(q.values()) == ["123", "789"]
400 """
401 return {k: v[0] for k, v in self._dict.items()}.values()
403 def items(self) -> typing.ItemsView[str, str]:
404 """
405 Return all items in the query params. If a key occurs more than once
406 only the first item for that key is returned.
408 Usage:
410 q = httpx.QueryParams("a=123&a=456&b=789")
411 assert list(q.items()) == [("a", "123"), ("b", "789")]
412 """
413 return {k: v[0] for k, v in self._dict.items()}.items()
415 def multi_items(self) -> list[tuple[str, str]]:
416 """
417 Return all items in the query params. Allow duplicate keys to occur.
419 Usage:
421 q = httpx.QueryParams("a=123&a=456&b=789")
422 assert list(q.multi_items()) == [("a", "123"), ("a", "456"), ("b", "789")]
423 """
424 multi_items: list[tuple[str, str]] = []
425 for k, v in self._dict.items():
426 multi_items.extend([(k, i) for i in v])
427 return multi_items
429 def multi_dict(self) -> dict[str, list[str]]:
430 return {k: list(v) for k, v in self._dict.items()}
432 def get(self, key: str, default: typing.Any = None) -> typing.Any:
433 """
434 Get a value from the query param for a given key. If the key occurs
435 more than once, then only the first value is returned.
437 Usage:
439 q = httpx.QueryParams("a=123&a=456&b=789")
440 assert q.get("a") == "123"
441 """
442 if key in self._dict:
443 return self._dict[key][0]
444 return default
446 def get_list(self, key: str) -> list[str]:
447 """
448 Get all values from the query param for a given key.
450 Usage:
452 q = httpx.QueryParams("a=123&a=456&b=789")
453 assert q.get_list("a") == ["123", "456"]
454 """
455 return list(self._dict.get(key, []))
457 def copy_set(self, key: str, value: str) -> "QueryParams":
458 """
459 Return a new QueryParams instance, setting the value of a key.
461 Usage:
463 q = httpx.QueryParams("a=123")
464 q = q.set("a", "456")
465 assert q == httpx.QueryParams("a=456")
466 """
467 q = QueryParams()
468 q._dict = dict(self._dict)
469 q._dict[key] = [value]
470 return q
472 def copy_append(self, key: str, value: str) -> "QueryParams":
473 """
474 Return a new QueryParams instance, setting or appending the value of a key.
476 Usage:
478 q = httpx.QueryParams("a=123")
479 q = q.append("a", "456")
480 assert q == httpx.QueryParams("a=123&a=456")
481 """
482 q = QueryParams()
483 q._dict = dict(self._dict)
484 q._dict[key] = q.get_list(key) + [value]
485 return q
487 def copy_remove(self, key: str) -> QueryParams:
488 """
489 Return a new QueryParams instance, removing the value of a key.
491 Usage:
493 q = httpx.QueryParams("a=123")
494 q = q.remove("a")
495 assert q == httpx.QueryParams("")
496 """
497 q = QueryParams()
498 q._dict = dict(self._dict)
499 q._dict.pop(str(key), None)
500 return q
502 def copy_update(
503 self,
504 params: (
505 "QueryParams" | dict[str, str | list[str]] | list[tuple[str, str]] | None
506 ) = None,
507 ) -> "QueryParams":
508 """
509 Return a new QueryParams instance, updated with.
511 Usage:
513 q = httpx.QueryParams("a=123")
514 q = q.copy_update({"b": "456"})
515 assert q == httpx.QueryParams("a=123&b=456")
517 q = httpx.QueryParams("a=123")
518 q = q.copy_update({"a": "456", "b": "789"})
519 assert q == httpx.QueryParams("a=456&b=789")
520 """
521 q = QueryParams(params)
522 q._dict = {**self._dict, **q._dict}
523 return q
525 def __getitem__(self, key: str) -> str:
526 return self._dict[key][0]
528 def __contains__(self, key: typing.Any) -> bool:
529 return key in self._dict
531 def __iter__(self) -> typing.Iterator[str]:
532 return iter(self.keys())
534 def __len__(self) -> int:
535 return len(self._dict)
537 def __bool__(self) -> bool:
538 return bool(self._dict)
540 def __hash__(self) -> int:
541 return hash(str(self))
543 def __eq__(self, other: typing.Any) -> bool:
544 if not isinstance(other, self.__class__):
545 return False
546 return sorted(self.multi_items()) == sorted(other.multi_items())
548 def __str__(self) -> str:
549 return urlencode(self.multi_dict())
551 def __repr__(self) -> str:
552 return f"<QueryParams {str(self)!r}>"