Coverage for src/httpx/_parsers.py: 99%

195 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-06-11 16:56 +0100

1import enum 

2 

3__all__ = ['HTTPParser', 'ProtocolError'] 

4 

5 

6# TODO... 

7 

8# * Expect: 100 continue 

9# * Connection: keep-alive / close 

10# * Transfer-Encoding: chunked 

11# * Upgrade: ... / (CONNECT?) 

12# * Host: required (ordering?) 

13 

14# * Add 'Error' state transitions 

15# * Add tests to trickle data 

16# * Add type annotations 

17 

18# * Integer conversions should always be bounded and +ve. `boundedint(..., maxdigits, base)` 

19# * Optional... HTTP/1.0 support 

20# * Read trailing headers on Transfer-Encoding: chunked. Not just '\r\n'. 

21# * When writing Transfer-Encoding: chunked, split large writes into buffer size. 

22# * When reading Transfer-Encoding: chunked, handle incomplete reads from large chunk sizes. 

23# * .read() doesn't document if will always return maximum available. 

24 

25# * validate method, target, protocol in request line 

26# * validate protocol, status_code, reason_phrase in response line 

27# * validate name, value on headers 

28 

29 

30class State(enum.Enum): 

31 IDLE = 0 

32 SEND_HEADERS = 1 

33 SEND_BODY = 2 

34 DONE = 3 

35 MUST_CLOSE = 4 

36 CLOSED = 5 

37 ERROR = 6 

38 

39 

40class ProtocolError(Exception): 

41 pass 

42 

43 

44class HTTPParser: 

45 """ 

46 Usage... 

47 

48 client = HTTPParser(writer, reader) 

49 client.send_method_line() IDLE -> SEND_HEADERS 

50 client.send_headers() SEND_HEADERS -> SEND_BODY 

51 client.send_body() SEND_BODY -> SEND_BODY or DONE or MUST_CLOSE 

52 client.recv_status_line() 

53 client.recv_headers() 

54 client.recv_body() 

55 client.start_next_cycle() DONE -> IDLE 

56 client.close() CLOSED 

57 """ 

58 def __init__(self, writer, reader): 

59 self.writer = writer 

60 self.reader = reader 

61 self.parser = ReadAheadParser(reader) 

62 

63 self.our_state = State.IDLE 

64 self.their_state = State.IDLE 

65 self.our_content_length = 0 

66 self.their_content_length = 0 

67 self.our_seen_length = 0 

68 self.their_seen_length = 0 

69 self.our_keep_alive = True 

70 self.their_keep_alive = True 

71 

72 def send_method_line(self, method, target, protocol): 

73 if self.our_state != State.IDLE: 

74 msg = f"Called 'send_method_line' in invalid state {self.description()}" 

75 raise ProtocolError(msg) 

76 

77 # Send initial request line, eg. "GET / HTTP/1.1" 

78 if protocol != b'HTTP/1.1': 

79 raise ProtocolError("Sent unsupported protocol version") 

80 data = b" ".join([method, target, protocol]) + b"\r\n" 

81 self.writer.write(data) 

82 

83 self.our_state = State.SEND_HEADERS 

84 

85 def send_headers(self, headers): 

86 if self.our_state != State.SEND_HEADERS: 

87 msg = f"Called 'send_headers' in invalid state {self.description()}" 

88 raise ProtocolError(msg) 

89 

90 # Update header state 

91 seen_host = False 

92 for name, value in headers: 

93 lname = name.lower() 

94 if lname == b'host': 

95 seen_host = True 

96 elif lname == b'content-length': 

97 self.our_content_length = int(value) 

98 elif lname == b'connection' and value == b'close': 

99 self.our_keep_alive = False 

100 elif lname == b'transfer-encoding' and value == b'chunked': 

101 self.our_content_length = None 

102 if not seen_host: 

103 raise ProtocolError("Request missing 'Host' header") 

104 

105 # Send request headers 

106 lines = [name + b": " + value + b"\r\n" for name, value in headers] 

107 data = b"".join(lines) + b"\r\n" 

108 self.writer.write(data) 

109 

110 self.our_state = State.SEND_BODY 

111 

112 def send_body(self, body): 

113 if self.our_state != State.SEND_BODY: 

114 msg = f"Called 'send_body' in invalid state {self.description()}" 

115 raise ProtocolError(msg) 

116 

117 if self.our_content_length is None: 

118 # Transfer-Encoding: chunked 

119 self.our_seen_length += len(body) 

120 self.writer.write(f'{len(body):x}\r\n'.encode('ascii')) 

121 self.writer.write(body + b'\r\n') 

122 

123 else: 

124 # Content-Length: xxx 

125 self.our_seen_length += len(body) 

126 if self.our_seen_length > self.our_content_length: 

127 msg = 'Too much data sent for declared Content-Length' 

128 raise ProtocolError(msg) 

129 if self.our_seen_length < self.our_content_length and body == b'': 

130 msg = 'Not enough data sent for declared Content-Length' 

131 raise ProtocolError(msg) 

132 if body: 

133 self.writer.write(body) 

134 

135 if body == b'': 

136 # Handle body close 

137 if self.our_keep_alive: 

138 self.our_state = State.DONE 

139 else: 

140 self.our_state = State.MUST_CLOSE 

141 

142 def recv_status_line(self): 

143 if self.their_state != State.IDLE: 

144 msg = f"Called 'recv_status_line' in invalid state {self.description()}" 

145 raise ProtocolError(msg) 

146 

147 # Read initial response line, eg. "HTTP/1.1 200 OK" 

148 exc_text = "reading response status line" 

149 line = self.parser.read_until(b"\r\n", max_size=4096, exc_text=exc_text) 

150 protocol, status_code, reason_phrase = line.split(b" ", 2) 

151 if protocol != b'HTTP/1.1': 

152 raise ProtocolError("Received unsupported protocol version") 

153 

154 self.their_state = State.SEND_HEADERS 

155 return protocol, status_code, reason_phrase 

156 

157 def recv_headers(self): 

158 if self.their_state != State.SEND_HEADERS: 

159 msg = f"Called 'recv_headers' in invalid state {self.description()}" 

160 raise ProtocolError(msg) 

161 

162 # Read response headers 

163 headers = [] 

164 exc_text = "reading response headers" 

165 while line := self.parser.read_until(b"\r\n", max_size=4096, exc_text=exc_text): 

166 name, value = line.split(b":", 1) 

167 value = value.strip(b" ") 

168 headers.append((name, value)) 

169 

170 # Update header state 

171 for name, value in headers: 

172 lname = name.lower() 

173 if lname == b'content-length': 

174 self.their_content_length = int(value) 

175 elif lname == b'connection' and value == b'close': 

176 self.their_keep_alive = False 

177 elif lname == b'transfer-encoding' and value == b'chunked': 

178 self.their_content_length = None 

179 

180 self.their_state = State.SEND_BODY 

181 return headers 

182 

183 def recv_body(self): 

184 if self.their_state != State.SEND_BODY: 

185 msg = f"Called 'recv_body' in invalid state {self.description()}" 

186 raise ProtocolError(msg) 

187 

188 if self.their_content_length is None: 

189 # Transfer-Encoding: chunked 

190 exc_text = 'reading chunk size' 

191 line = self.parser.read_until(b"\r\n", max_size=4096, exc_text=exc_text) 

192 sizestr, _, _ = line.partition(b";") 

193 size = int(sizestr, base=16) 

194 if size > 0: 

195 body = self.parser.read(size=size) 

196 exc_text = 'reading chunk data' 

197 self.parser.read_until(b"\r\n", max_size=2, exc_text=exc_text) 

198 self.their_seen_length += len(body) 

199 else: 

200 body = b'' 

201 exc_text = 'reading chunk termination' 

202 self.parser.read_until(b"\r\n", max_size=2, exc_text=exc_text) 

203 

204 else: 

205 # Content-Length: xxx 

206 remaining = self.their_content_length - self.their_seen_length 

207 size = min(remaining, 4096) 

208 body = self.parser.read(size=size) 

209 self.their_seen_length += len(body) 

210 if self.their_seen_length < self.their_content_length and body == b'': 

211 msg = 'Not enough data received for declared Content-Length' 

212 raise ProtocolError(msg) 

213 

214 if body == b'': 

215 # Handle body close 

216 if self.their_keep_alive: 

217 self.their_state = State.DONE 

218 else: 

219 self.their_state = State.MUST_CLOSE 

220 return body 

221 

222 def start_next_cycle(self): 

223 if self.our_state != State.DONE or self.their_state != State.DONE: 

224 msg = f"Called 'start_next_cycle' in invalid state {self.description()}" 

225 raise ProtocolError(msg) 

226 

227 self.our_state = State.IDLE 

228 self.their_state = State.IDLE 

229 self.our_content_length = 0 

230 self.their_content_length = 0 

231 self.our_seen_length = 0 

232 self.their_seen_length = 0 

233 self.our_keep_alive = True 

234 self.their_keep_alive = True 

235 

236 def description(self) -> str: 

237 cl_state = self.our_state.name 

238 sr_state = self.their_state.name 

239 return f"client {cl_state}, server {sr_state}" 

240 

241 def __repr__(self) -> str: 

242 desc = self.description() 

243 return f'<HTTPParser [{desc}]>' 

244 

245 

246class ReadAheadParser: 

247 """ 

248 A buffered I/O stream, with methods for read-ahead parsing. 

249 """ 

250 def __init__(self, stream): 

251 self._buffer = b'' 

252 self._stream = stream 

253 self._chunk_size = 4096 

254 

255 def _read_some(self): 

256 if self._buffer: 

257 ret, self._buffer = self._buffer, b'' 

258 return ret 

259 return self._stream.read(self._chunk_size) 

260 

261 def _push_back(self, buffer): 

262 assert self._buffer == b'' 

263 self._buffer = buffer 

264 

265 def read(self, size): 

266 """ 

267 Read and return up to 'size' bytes from the stream, with I/O buffering provided. 

268 

269 * Returns b'' to indicate connection close. 

270 """ 

271 buffer = bytearray() 

272 while len(buffer) < size: 

273 chunk = self._read_some() 

274 if not chunk: 

275 break 

276 buffer.extend(chunk) 

277 

278 if len(buffer) > size: 

279 buffer, push_back = buffer[:size], buffer[size:] 

280 self._push_back(bytes(push_back)) 

281 return bytes(buffer) 

282 

283 def read_until(self, marker, max_size, exc_text): 

284 """ 

285 Read and return bytes from the stream, delimited by marker. 

286 

287 * The marker is not included in the return bytes. 

288 * The marker is consumed from the I/O stream. 

289 * Raises `ProtocolError` if the stream closes before a marker occurance. 

290 * Raises `ProtocolError` if marker did not occur within 'max_size + len(marker)' bytes. 

291 """ 

292 buffer = bytearray() 

293 while len(buffer) <= max_size: 

294 chunk = self._read_some() 

295 if not chunk: 

296 # stream closed before marker found. 

297 raise ProtocolError(f"Stream closed early {exc_text}") 

298 start_search = max(len(buffer) - len(marker), 0) 

299 buffer.extend(chunk) 

300 index = buffer.find(marker, start_search) 

301 

302 if index > max_size: 

303 # marker was found, though 'max_size' exceeded. 

304 raise ProtocolError(f"Exceeded maximum size {exc_text}") 

305 elif index >= 0: 

306 endindex = index + len(marker) 

307 self._push_back(bytes(buffer[endindex:])) 

308 return bytes(buffer[:index]) 

309 

310 raise ProtocolError(f"Exceeded maximum size {exc_text}")