Coverage for src/httpx/_parsers.py: 99%
195 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-06-11 16:56 +0100
« prev ^ index » next coverage.py v7.6.12, created at 2025-06-11 16:56 +0100
1import enum
3__all__ = ['HTTPParser', 'ProtocolError']
6# TODO...
8# * Expect: 100 continue
9# * Connection: keep-alive / close
10# * Transfer-Encoding: chunked
11# * Upgrade: ... / (CONNECT?)
12# * Host: required (ordering?)
14# * Add 'Error' state transitions
15# * Add tests to trickle data
16# * Add type annotations
18# * Integer conversions should always be bounded and +ve. `boundedint(..., maxdigits, base)`
19# * Optional... HTTP/1.0 support
20# * Read trailing headers on Transfer-Encoding: chunked. Not just '\r\n'.
21# * When writing Transfer-Encoding: chunked, split large writes into buffer size.
22# * When reading Transfer-Encoding: chunked, handle incomplete reads from large chunk sizes.
23# * .read() doesn't document if will always return maximum available.
25# * validate method, target, protocol in request line
26# * validate protocol, status_code, reason_phrase in response line
27# * validate name, value on headers
30class State(enum.Enum):
31 IDLE = 0
32 SEND_HEADERS = 1
33 SEND_BODY = 2
34 DONE = 3
35 MUST_CLOSE = 4
36 CLOSED = 5
37 ERROR = 6
40class ProtocolError(Exception):
41 pass
44class HTTPParser:
45 """
46 Usage...
48 client = HTTPParser(writer, reader)
49 client.send_method_line() IDLE -> SEND_HEADERS
50 client.send_headers() SEND_HEADERS -> SEND_BODY
51 client.send_body() SEND_BODY -> SEND_BODY or DONE or MUST_CLOSE
52 client.recv_status_line()
53 client.recv_headers()
54 client.recv_body()
55 client.start_next_cycle() DONE -> IDLE
56 client.close() CLOSED
57 """
58 def __init__(self, writer, reader):
59 self.writer = writer
60 self.reader = reader
61 self.parser = ReadAheadParser(reader)
63 self.our_state = State.IDLE
64 self.their_state = State.IDLE
65 self.our_content_length = 0
66 self.their_content_length = 0
67 self.our_seen_length = 0
68 self.their_seen_length = 0
69 self.our_keep_alive = True
70 self.their_keep_alive = True
72 def send_method_line(self, method, target, protocol):
73 if self.our_state != State.IDLE:
74 msg = f"Called 'send_method_line' in invalid state {self.description()}"
75 raise ProtocolError(msg)
77 # Send initial request line, eg. "GET / HTTP/1.1"
78 if protocol != b'HTTP/1.1':
79 raise ProtocolError("Sent unsupported protocol version")
80 data = b" ".join([method, target, protocol]) + b"\r\n"
81 self.writer.write(data)
83 self.our_state = State.SEND_HEADERS
85 def send_headers(self, headers):
86 if self.our_state != State.SEND_HEADERS:
87 msg = f"Called 'send_headers' in invalid state {self.description()}"
88 raise ProtocolError(msg)
90 # Update header state
91 seen_host = False
92 for name, value in headers:
93 lname = name.lower()
94 if lname == b'host':
95 seen_host = True
96 elif lname == b'content-length':
97 self.our_content_length = int(value)
98 elif lname == b'connection' and value == b'close':
99 self.our_keep_alive = False
100 elif lname == b'transfer-encoding' and value == b'chunked':
101 self.our_content_length = None
102 if not seen_host:
103 raise ProtocolError("Request missing 'Host' header")
105 # Send request headers
106 lines = [name + b": " + value + b"\r\n" for name, value in headers]
107 data = b"".join(lines) + b"\r\n"
108 self.writer.write(data)
110 self.our_state = State.SEND_BODY
112 def send_body(self, body):
113 if self.our_state != State.SEND_BODY:
114 msg = f"Called 'send_body' in invalid state {self.description()}"
115 raise ProtocolError(msg)
117 if self.our_content_length is None:
118 # Transfer-Encoding: chunked
119 self.our_seen_length += len(body)
120 self.writer.write(f'{len(body):x}\r\n'.encode('ascii'))
121 self.writer.write(body + b'\r\n')
123 else:
124 # Content-Length: xxx
125 self.our_seen_length += len(body)
126 if self.our_seen_length > self.our_content_length:
127 msg = 'Too much data sent for declared Content-Length'
128 raise ProtocolError(msg)
129 if self.our_seen_length < self.our_content_length and body == b'':
130 msg = 'Not enough data sent for declared Content-Length'
131 raise ProtocolError(msg)
132 if body:
133 self.writer.write(body)
135 if body == b'':
136 # Handle body close
137 if self.our_keep_alive:
138 self.our_state = State.DONE
139 else:
140 self.our_state = State.MUST_CLOSE
142 def recv_status_line(self):
143 if self.their_state != State.IDLE:
144 msg = f"Called 'recv_status_line' in invalid state {self.description()}"
145 raise ProtocolError(msg)
147 # Read initial response line, eg. "HTTP/1.1 200 OK"
148 exc_text = "reading response status line"
149 line = self.parser.read_until(b"\r\n", max_size=4096, exc_text=exc_text)
150 protocol, status_code, reason_phrase = line.split(b" ", 2)
151 if protocol != b'HTTP/1.1':
152 raise ProtocolError("Received unsupported protocol version")
154 self.their_state = State.SEND_HEADERS
155 return protocol, status_code, reason_phrase
157 def recv_headers(self):
158 if self.their_state != State.SEND_HEADERS:
159 msg = f"Called 'recv_headers' in invalid state {self.description()}"
160 raise ProtocolError(msg)
162 # Read response headers
163 headers = []
164 exc_text = "reading response headers"
165 while line := self.parser.read_until(b"\r\n", max_size=4096, exc_text=exc_text):
166 name, value = line.split(b":", 1)
167 value = value.strip(b" ")
168 headers.append((name, value))
170 # Update header state
171 for name, value in headers:
172 lname = name.lower()
173 if lname == b'content-length':
174 self.their_content_length = int(value)
175 elif lname == b'connection' and value == b'close':
176 self.their_keep_alive = False
177 elif lname == b'transfer-encoding' and value == b'chunked':
178 self.their_content_length = None
180 self.their_state = State.SEND_BODY
181 return headers
183 def recv_body(self):
184 if self.their_state != State.SEND_BODY:
185 msg = f"Called 'recv_body' in invalid state {self.description()}"
186 raise ProtocolError(msg)
188 if self.their_content_length is None:
189 # Transfer-Encoding: chunked
190 exc_text = 'reading chunk size'
191 line = self.parser.read_until(b"\r\n", max_size=4096, exc_text=exc_text)
192 sizestr, _, _ = line.partition(b";")
193 size = int(sizestr, base=16)
194 if size > 0:
195 body = self.parser.read(size=size)
196 exc_text = 'reading chunk data'
197 self.parser.read_until(b"\r\n", max_size=2, exc_text=exc_text)
198 self.their_seen_length += len(body)
199 else:
200 body = b''
201 exc_text = 'reading chunk termination'
202 self.parser.read_until(b"\r\n", max_size=2, exc_text=exc_text)
204 else:
205 # Content-Length: xxx
206 remaining = self.their_content_length - self.their_seen_length
207 size = min(remaining, 4096)
208 body = self.parser.read(size=size)
209 self.their_seen_length += len(body)
210 if self.their_seen_length < self.their_content_length and body == b'':
211 msg = 'Not enough data received for declared Content-Length'
212 raise ProtocolError(msg)
214 if body == b'':
215 # Handle body close
216 if self.their_keep_alive:
217 self.their_state = State.DONE
218 else:
219 self.their_state = State.MUST_CLOSE
220 return body
222 def start_next_cycle(self):
223 if self.our_state != State.DONE or self.their_state != State.DONE:
224 msg = f"Called 'start_next_cycle' in invalid state {self.description()}"
225 raise ProtocolError(msg)
227 self.our_state = State.IDLE
228 self.their_state = State.IDLE
229 self.our_content_length = 0
230 self.their_content_length = 0
231 self.our_seen_length = 0
232 self.their_seen_length = 0
233 self.our_keep_alive = True
234 self.their_keep_alive = True
236 def description(self) -> str:
237 cl_state = self.our_state.name
238 sr_state = self.their_state.name
239 return f"client {cl_state}, server {sr_state}"
241 def __repr__(self) -> str:
242 desc = self.description()
243 return f'<HTTPParser [{desc}]>'
246class ReadAheadParser:
247 """
248 A buffered I/O stream, with methods for read-ahead parsing.
249 """
250 def __init__(self, stream):
251 self._buffer = b''
252 self._stream = stream
253 self._chunk_size = 4096
255 def _read_some(self):
256 if self._buffer:
257 ret, self._buffer = self._buffer, b''
258 return ret
259 return self._stream.read(self._chunk_size)
261 def _push_back(self, buffer):
262 assert self._buffer == b''
263 self._buffer = buffer
265 def read(self, size):
266 """
267 Read and return up to 'size' bytes from the stream, with I/O buffering provided.
269 * Returns b'' to indicate connection close.
270 """
271 buffer = bytearray()
272 while len(buffer) < size:
273 chunk = self._read_some()
274 if not chunk:
275 break
276 buffer.extend(chunk)
278 if len(buffer) > size:
279 buffer, push_back = buffer[:size], buffer[size:]
280 self._push_back(bytes(push_back))
281 return bytes(buffer)
283 def read_until(self, marker, max_size, exc_text):
284 """
285 Read and return bytes from the stream, delimited by marker.
287 * The marker is not included in the return bytes.
288 * The marker is consumed from the I/O stream.
289 * Raises `ProtocolError` if the stream closes before a marker occurance.
290 * Raises `ProtocolError` if marker did not occur within 'max_size + len(marker)' bytes.
291 """
292 buffer = bytearray()
293 while len(buffer) <= max_size:
294 chunk = self._read_some()
295 if not chunk:
296 # stream closed before marker found.
297 raise ProtocolError(f"Stream closed early {exc_text}")
298 start_search = max(len(buffer) - len(marker), 0)
299 buffer.extend(chunk)
300 index = buffer.find(marker, start_search)
302 if index > max_size:
303 # marker was found, though 'max_size' exceeded.
304 raise ProtocolError(f"Exceeded maximum size {exc_text}")
305 elif index >= 0:
306 endindex = index + len(marker)
307 self._push_back(bytes(buffer[endindex:]))
308 return bytes(buffer[:index])
310 raise ProtocolError(f"Exceeded maximum size {exc_text}")