Coverage for src/httpx/_urls.py: 84%

170 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-05-27 19:25 +0100

1from __future__ import annotations 

2 

3import typing 

4 

5from ._urlparse import urlparse 

6from ._urlencode import unquote, urldecode, urlencode 

7 

8__all__ = ["QueryParams", "URL"] 

9 

10 

11class URL: 

12 """ 

13 url = httpx.URL("HTTPS://jo%40email.com:a%20secret@müller.de:1234/pa%20th?search=ab#anchorlink") 

14 

15 assert url.scheme == "https" 

16 assert url.username == "jo@email.com" 

17 assert url.password == "a secret" 

18 assert url.userinfo == b"jo%40email.com:a%20secret" 

19 assert url.host == "müller.de" 

20 assert url.raw_host == b"xn--mller-kva.de" 

21 assert url.port == 1234 

22 assert url.netloc == b"xn--mller-kva.de:1234" 

23 assert url.path == "/pa th" 

24 assert url.query == b"?search=ab" 

25 assert url.raw_path == b"/pa%20th?search=ab" 

26 assert url.fragment == "anchorlink" 

27 

28 The components of a URL are broken down like this: 

29 

30 https://jo%40email.com:a%20secret@müller.de:1234/pa%20th?search=ab#anchorlink 

31 [scheme] [ username ] [password] [ host ][port][ path ] [ query ] [fragment] 

32 [ userinfo ] [ netloc ][ raw_path ] 

33 

34 Note that: 

35 

36 * `url.scheme` is normalized to always be lowercased. 

37 

38 * `url.host` is normalized to always be lowercased. Internationalized domain 

39 names are represented in unicode, without IDNA encoding applied. For instance: 

40 

41 url = httpx.URL("http://中国.icom.museum") 

42 assert url.host == "中国.icom.museum" 

43 url = httpx.URL("http://xn--fiqs8s.icom.museum") 

44 assert url.host == "中国.icom.museum" 

45 

46 * `url.raw_host` is normalized to always be lowercased, and is IDNA encoded. 

47 

48 url = httpx.URL("http://中国.icom.museum") 

49 assert url.raw_host == b"xn--fiqs8s.icom.museum" 

50 url = httpx.URL("http://xn--fiqs8s.icom.museum") 

51 assert url.raw_host == b"xn--fiqs8s.icom.museum" 

52 

53 * `url.port` is either None or an integer. URLs that include the default port for 

54 "http", "https", "ws", "wss", and "ftp" schemes have their port 

55 normalized to `None`. 

56 

57 assert httpx.URL("http://example.com") == httpx.URL("http://example.com:80") 

58 assert httpx.URL("http://example.com").port is None 

59 assert httpx.URL("http://example.com:80").port is None 

60 

61 * `url.userinfo` is raw bytes, without URL escaping. Usually you'll want to work 

62 with `url.username` and `url.password` instead, which handle the URL escaping. 

63 

64 * `url.raw_path` is raw bytes of both the path and query, without URL escaping. 

65 This portion is used as the target when constructing HTTP requests. Usually you'll 

66 want to work with `url.path` instead. 

67 

68 * `url.query` is raw bytes, without URL escaping. A URL query string portion can 

69 only be properly URL escaped when decoding the parameter names and values 

70 themselves. 

71 """ 

72 

73 def __init__(self, url: "URL" | str = "", **kwargs: typing.Any) -> None: 

74 if kwargs: 

75 allowed = { 

76 "scheme": str, 

77 "username": str, 

78 "password": str, 

79 "userinfo": bytes, 

80 "host": str, 

81 "port": int, 

82 "netloc": str, 

83 "path": str, 

84 "query": bytes, 

85 "raw_path": bytes, 

86 "fragment": str, 

87 "params": object, 

88 } 

89 

90 # Perform type checking for all supported keyword arguments. 

91 for key, value in kwargs.items(): 

92 if key not in allowed: 

93 message = f"{key!r} is an invalid keyword argument for URL()" 

94 raise TypeError(message) 

95 if value is not None and not isinstance(value, allowed[key]): 

96 expected = allowed[key].__name__ 

97 seen = type(value).__name__ 

98 message = f"Argument {key!r} must be {expected} but got {seen}" 

99 raise TypeError(message) 

100 if isinstance(value, bytes): 

101 kwargs[key] = value.decode("ascii") 

102 

103 if "params" in kwargs: 

104 # Replace any "params" keyword with the raw "query" instead. 

105 # 

106 # Ensure that empty params use `kwargs["query"] = None` rather 

107 # than `kwargs["query"] = ""`, so that generated URLs do not 

108 # include an empty trailing "?". 

109 params = kwargs.pop("params") 

110 kwargs["query"] = None if not params else str(QueryParams(params)) 

111 

112 if isinstance(url, str): 

113 self._uri_reference = urlparse(url, **kwargs) 

114 elif isinstance(url, URL): 

115 self._uri_reference = url._uri_reference.copy_with(**kwargs) 

116 else: 

117 raise TypeError( 

118 "Invalid type for url. Expected str or httpx.URL," 

119 f" got {type(url)}: {url!r}" 

120 ) 

121 

122 @property 

123 def scheme(self) -> str: 

124 """ 

125 The URL scheme, such as "http", "https". 

126 Always normalised to lowercase. 

127 """ 

128 return self._uri_reference.scheme 

129 

130 @property 

131 def userinfo(self) -> bytes: 

132 """ 

133 The URL userinfo as a raw bytestring. 

134 For example: b"jo%40email.com:a%20secret". 

135 """ 

136 return self._uri_reference.userinfo.encode("ascii") 

137 

138 @property 

139 def username(self) -> str: 

140 """ 

141 The URL username as a string, with URL decoding applied. 

142 For example: "jo@email.com" 

143 """ 

144 userinfo = self._uri_reference.userinfo 

145 return unquote(userinfo.partition(":")[0]) 

146 

147 @property 

148 def password(self) -> str: 

149 """ 

150 The URL password as a string, with URL decoding applied. 

151 For example: "a secret" 

152 """ 

153 userinfo = self._uri_reference.userinfo 

154 return unquote(userinfo.partition(":")[2]) 

155 

156 @property 

157 def host(self) -> str: 

158 """ 

159 The URL host as a string. 

160 Always normalized to lowercase. Possibly IDNA encoded. 

161 

162 Examples: 

163 

164 url = httpx.URL("http://www.EXAMPLE.org") 

165 assert url.host == "www.example.org" 

166 

167 url = httpx.URL("http://中国.icom.museum") 

168 assert url.host == "xn--fiqs8s" 

169 

170 url = httpx.URL("http://xn--fiqs8s.icom.museum") 

171 assert url.host == "xn--fiqs8s" 

172 

173 url = httpx.URL("https://[::ffff:192.168.0.1]") 

174 assert url.host == "::ffff:192.168.0.1" 

175 """ 

176 return self._uri_reference.host 

177 

178 @property 

179 def port(self) -> int | None: 

180 """ 

181 The URL port as an integer. 

182 

183 Note that the URL class performs port normalization as per the WHATWG spec. 

184 Default ports for "http", "https", "ws", "wss", and "ftp" schemes are always 

185 treated as `None`. 

186 

187 For example: 

188 

189 assert httpx.URL("http://www.example.com") == httpx.URL("http://www.example.com:80") 

190 assert httpx.URL("http://www.example.com:80").port is None 

191 """ 

192 return self._uri_reference.port 

193 

194 @property 

195 def netloc(self) -> str: 

196 """ 

197 Either `<host>` or `<host>:<port>` as bytes. 

198 Always normalized to lowercase, and IDNA encoded. 

199 

200 This property may be used for generating the value of a request 

201 "Host" header. 

202 """ 

203 return self._uri_reference.netloc 

204 

205 @property 

206 def path(self) -> str: 

207 """ 

208 The URL path as a string. Excluding the query string, and URL decoded. 

209 

210 For example: 

211 

212 url = httpx.URL("https://example.com/pa%20th") 

213 assert url.path == "/pa th" 

214 """ 

215 path = self._uri_reference.path or "/" 

216 return unquote(path) 

217 

218 @property 

219 def query(self) -> bytes: 

220 """ 

221 The URL query string, as raw bytes, excluding the leading b"?". 

222 

223 This is necessarily a bytewise interface, because we cannot 

224 perform URL decoding of this representation until we've parsed 

225 the keys and values into a QueryParams instance. 

226 

227 For example: 

228 

229 url = httpx.URL("https://example.com/?filter=some%20search%20terms") 

230 assert url.query == b"filter=some%20search%20terms" 

231 """ 

232 query = self._uri_reference.query or "" 

233 return query.encode("ascii") 

234 

235 @property 

236 def params(self) -> "QueryParams": 

237 """ 

238 The URL query parameters, neatly parsed and packaged into an immutable 

239 multidict representation. 

240 """ 

241 return QueryParams(self._uri_reference.query) 

242 

243 @property 

244 def target(self) -> str: 

245 """ 

246 The complete URL path and query string as raw bytes. 

247 Used as the target when constructing HTTP requests. 

248 

249 For example: 

250 

251 GET /users?search=some%20text HTTP/1.1 

252 Host: www.example.org 

253 Connection: close 

254 """ 

255 target = self._uri_reference.path or "/" 

256 if self._uri_reference.query is not None: 

257 target += "?" + self._uri_reference.query 

258 return target 

259 

260 @property 

261 def fragment(self) -> str: 

262 """ 

263 The URL fragments, as used in HTML anchors. 

264 As a string, without the leading '#'. 

265 """ 

266 return unquote(self._uri_reference.fragment or "") 

267 

268 @property 

269 def is_absolute_url(self) -> bool: 

270 """ 

271 Return `True` for absolute URLs such as 'http://example.com/path', 

272 and `False` for relative URLs such as '/path'. 

273 """ 

274 # We don't use `.is_absolute` from `rfc3986` because it treats 

275 # URLs with a fragment portion as not absolute. 

276 # What we actually care about is if the URL provides 

277 # a scheme and hostname to which connections should be made. 

278 return bool(self._uri_reference.scheme and self._uri_reference.host) 

279 

280 @property 

281 def is_relative_url(self) -> bool: 

282 """ 

283 Return `False` for absolute URLs such as 'http://example.com/path', 

284 and `True` for relative URLs such as '/path'. 

285 """ 

286 return not self.is_absolute_url 

287 

288 def copy_with(self, **kwargs: typing.Any) -> "URL": 

289 """ 

290 Copy this URL, returning a new URL with some components altered. 

291 Accepts the same set of parameters as the components that are made 

292 available via properties on the `URL` class. 

293 

294 For example: 

295 

296 url = httpx.URL("https://www.example.com").copy_with( 

297 username="jo@gmail.com", password="a secret" 

298 ) 

299 assert url == "https://jo%40email.com:a%20secret@www.example.com" 

300 """ 

301 return URL(self, **kwargs) 

302 

303 def copy_set_param(self, key: str, value: typing.Any = None) -> "URL": 

304 return self.copy_with(params=self.params.copy_set(key, value)) 

305 

306 def copy_append_param(self, key: str, value: typing.Any = None) -> "URL": 

307 return self.copy_with(params=self.params.copy_append(key, value)) 

308 

309 def copy_remove_param(self, key: str) -> "URL": 

310 return self.copy_with(params=self.params.copy_remove(key)) 

311 

312 def copy_merge_params( 

313 self, 

314 params: "QueryParams" | dict[str, str | list[str]] | list[tuple[str, str]] | None, 

315 ) -> "URL": 

316 return self.copy_with(params=self.params.copy_update(params)) 

317 

318 def join(self, url: "URL" | str) -> "URL": 

319 """ 

320 Return an absolute URL, using this URL as the base. 

321 

322 Eg. 

323 

324 url = httpx.URL("https://www.example.com/test") 

325 url = url.join("/new/path") 

326 assert url == "https://www.example.com/new/path" 

327 """ 

328 from urllib.parse import urljoin 

329 

330 return URL(urljoin(str(self), str(URL(url)))) 

331 

332 def __hash__(self) -> int: 

333 return hash(str(self)) 

334 

335 def __eq__(self, other: typing.Any) -> bool: 

336 return isinstance(other, (URL, str)) and str(self) == str(URL(other)) 

337 

338 def __str__(self) -> str: 

339 return str(self._uri_reference) 

340 

341 def __repr__(self) -> str: 

342 return f"<URL {str(self)!r}>" 

343 

344 

345class QueryParams(typing.Mapping[str, str]): 

346 """ 

347 URL query parameters, as a multi-dict. 

348 """ 

349 

350 def __init__( 

351 self, 

352 params: ( 

353 "QueryParams" | dict[str, str | list[str]] | list[tuple[str, str]] | str | None 

354 ) = None, 

355 ) -> None: 

356 d: dict[str, list[str]] = {} 

357 

358 if params is None: 

359 d = {} 

360 elif isinstance(params, str): 

361 d = urldecode(params) 

362 elif isinstance(params, QueryParams): 

363 d = params.multi_dict() 

364 elif isinstance(params, dict): 

365 # Convert dict inputs like: 

366 # {"a": "123", "b": ["456", "789"]} 

367 # To dict inputs where values are always lists, like: 

368 # {"a": ["123"], "b": ["456", "789"]} 

369 d = {k: [v] if isinstance(v, str) else list(v) for k, v in params.items()} 

370 else: 

371 # Convert list inputs like: 

372 # [("a", "123"), ("a", "456"), ("b", "789")] 

373 # To a dict representation, like: 

374 # {"a": ["123", "456"], "b": ["789"]} 

375 for k, v in params: 

376 d.setdefault(k, []).append(v) 

377 

378 self._dict = d 

379 

380 def keys(self) -> typing.KeysView[str]: 

381 """ 

382 Return all the keys in the query params. 

383 

384 Usage: 

385 

386 q = httpx.QueryParams("a=123&a=456&b=789") 

387 assert list(q.keys()) == ["a", "b"] 

388 """ 

389 return self._dict.keys() 

390 

391 def values(self) -> typing.ValuesView[str]: 

392 """ 

393 Return all the values in the query params. If a key occurs more than once 

394 only the first item for that key is returned. 

395 

396 Usage: 

397 

398 q = httpx.QueryParams("a=123&a=456&b=789") 

399 assert list(q.values()) == ["123", "789"] 

400 """ 

401 return {k: v[0] for k, v in self._dict.items()}.values() 

402 

403 def items(self) -> typing.ItemsView[str, str]: 

404 """ 

405 Return all items in the query params. If a key occurs more than once 

406 only the first item for that key is returned. 

407 

408 Usage: 

409 

410 q = httpx.QueryParams("a=123&a=456&b=789") 

411 assert list(q.items()) == [("a", "123"), ("b", "789")] 

412 """ 

413 return {k: v[0] for k, v in self._dict.items()}.items() 

414 

415 def multi_items(self) -> list[tuple[str, str]]: 

416 """ 

417 Return all items in the query params. Allow duplicate keys to occur. 

418 

419 Usage: 

420 

421 q = httpx.QueryParams("a=123&a=456&b=789") 

422 assert list(q.multi_items()) == [("a", "123"), ("a", "456"), ("b", "789")] 

423 """ 

424 multi_items: list[tuple[str, str]] = [] 

425 for k, v in self._dict.items(): 

426 multi_items.extend([(k, i) for i in v]) 

427 return multi_items 

428 

429 def multi_dict(self) -> dict[str, list[str]]: 

430 return {k: list(v) for k, v in self._dict.items()} 

431 

432 def get(self, key: str, default: typing.Any = None) -> typing.Any: 

433 """ 

434 Get a value from the query param for a given key. If the key occurs 

435 more than once, then only the first value is returned. 

436 

437 Usage: 

438 

439 q = httpx.QueryParams("a=123&a=456&b=789") 

440 assert q.get("a") == "123" 

441 """ 

442 if key in self._dict: 

443 return self._dict[key][0] 

444 return default 

445 

446 def get_list(self, key: str) -> list[str]: 

447 """ 

448 Get all values from the query param for a given key. 

449 

450 Usage: 

451 

452 q = httpx.QueryParams("a=123&a=456&b=789") 

453 assert q.get_list("a") == ["123", "456"] 

454 """ 

455 return list(self._dict.get(key, [])) 

456 

457 def copy_set(self, key: str, value: str) -> "QueryParams": 

458 """ 

459 Return a new QueryParams instance, setting the value of a key. 

460 

461 Usage: 

462 

463 q = httpx.QueryParams("a=123") 

464 q = q.set("a", "456") 

465 assert q == httpx.QueryParams("a=456") 

466 """ 

467 q = QueryParams() 

468 q._dict = dict(self._dict) 

469 q._dict[key] = [value] 

470 return q 

471 

472 def copy_append(self, key: str, value: str) -> "QueryParams": 

473 """ 

474 Return a new QueryParams instance, setting or appending the value of a key. 

475 

476 Usage: 

477 

478 q = httpx.QueryParams("a=123") 

479 q = q.append("a", "456") 

480 assert q == httpx.QueryParams("a=123&a=456") 

481 """ 

482 q = QueryParams() 

483 q._dict = dict(self._dict) 

484 q._dict[key] = q.get_list(key) + [value] 

485 return q 

486 

487 def copy_remove(self, key: str) -> QueryParams: 

488 """ 

489 Return a new QueryParams instance, removing the value of a key. 

490 

491 Usage: 

492 

493 q = httpx.QueryParams("a=123") 

494 q = q.remove("a") 

495 assert q == httpx.QueryParams("") 

496 """ 

497 q = QueryParams() 

498 q._dict = dict(self._dict) 

499 q._dict.pop(str(key), None) 

500 return q 

501 

502 def copy_update( 

503 self, 

504 params: ( 

505 "QueryParams" | dict[str, str | list[str]] | list[tuple[str, str]] | None 

506 ) = None, 

507 ) -> "QueryParams": 

508 """ 

509 Return a new QueryParams instance, updated with. 

510 

511 Usage: 

512 

513 q = httpx.QueryParams("a=123") 

514 q = q.copy_update({"b": "456"}) 

515 assert q == httpx.QueryParams("a=123&b=456") 

516 

517 q = httpx.QueryParams("a=123") 

518 q = q.copy_update({"a": "456", "b": "789"}) 

519 assert q == httpx.QueryParams("a=456&b=789") 

520 """ 

521 q = QueryParams(params) 

522 q._dict = {**self._dict, **q._dict} 

523 return q 

524 

525 def __getitem__(self, key: str) -> str: 

526 return self._dict[key][0] 

527 

528 def __contains__(self, key: typing.Any) -> bool: 

529 return key in self._dict 

530 

531 def __iter__(self) -> typing.Iterator[str]: 

532 return iter(self.keys()) 

533 

534 def __len__(self) -> int: 

535 return len(self._dict) 

536 

537 def __bool__(self) -> bool: 

538 return bool(self._dict) 

539 

540 def __hash__(self) -> int: 

541 return hash(str(self)) 

542 

543 def __eq__(self, other: typing.Any) -> bool: 

544 if not isinstance(other, self.__class__): 

545 return False 

546 return sorted(self.multi_items()) == sorted(other.multi_items()) 

547 

548 def __str__(self) -> str: 

549 return urlencode(self.multi_dict()) 

550 

551 def __repr__(self) -> str: 

552 return f"<QueryParams {str(self)!r}>"