Integração Pangolin Proxy

This commit is contained in:
2025-12-06 21:11:34 -03:00
parent dc7c446254
commit 5291d8ccae
2008 changed files with 1062 additions and 477 deletions

View File

@@ -255,13 +255,19 @@ class HTTPHeaderDict(typing.MutableMapping[str, str]):
self._container[key.lower()] = [key, val]
def __getitem__(self, key: str) -> str:
if isinstance(key, bytes):
key = key.decode("latin-1")
val = self._container[key.lower()]
return ", ".join(val[1:])
def __delitem__(self, key: str) -> None:
if isinstance(key, bytes):
key = key.decode("latin-1")
del self._container[key.lower()]
def __contains__(self, key: object) -> bool:
if isinstance(key, bytes):
key = key.decode("latin-1")
if isinstance(key, str):
return key.lower() in self._container
return False
@@ -376,6 +382,8 @@ class HTTPHeaderDict(typing.MutableMapping[str, str]):
) -> list[str] | _DT:
"""Returns a list of all the values for the named field. Returns an
empty list if the key doesn't exist."""
if isinstance(key, bytes):
key = key.decode("latin-1")
try:
vals = self._container[key.lower()]
except KeyError:

View File

@@ -1,7 +1,14 @@
# file generated by setuptools-scm
# don't change, don't track in version control
__all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
__all__ = [
"__version__",
"__version_tuple__",
"version",
"version_tuple",
"__commit_id__",
"commit_id",
]
TYPE_CHECKING = False
if TYPE_CHECKING:
@@ -9,13 +16,19 @@ if TYPE_CHECKING:
from typing import Union
VERSION_TUPLE = Tuple[Union[int, str], ...]
COMMIT_ID = Union[str, None]
else:
VERSION_TUPLE = object
COMMIT_ID = object
version: str
__version__: str
__version_tuple__: VERSION_TUPLE
version_tuple: VERSION_TUPLE
commit_id: COMMIT_ID
__commit_id__: COMMIT_ID
__version__ = version = '2.5.0'
__version_tuple__ = version_tuple = (2, 5, 0)
__version__ = version = '2.6.0'
__version_tuple__ = version_tuple = (2, 6, 0)
__commit_id__ = commit_id = None

View File

@@ -160,6 +160,12 @@ class HTTPConnection(_HTTPConnection):
self._tunnel_port: int | None = None
self._tunnel_scheme: str | None = None
def __str__(self) -> str:
return f"{type(self).__name__}(host={self.host!r}, port={self.port!r})"
def __repr__(self) -> str:
return f"<{self} at {id(self):#x}>"
@property
def host(self) -> str:
"""

View File

@@ -98,8 +98,12 @@ class EmscriptenHTTPConnection:
) -> None:
self._closed = False
if url.startswith("/"):
if self.port is not None:
port = f":{self.port}"
else:
port = ""
# no scheme / host / port included, make a full url
url = f"{self.scheme}://{self.host}:{self.port}" + url
url = f"{self.scheme}://{self.host}{port}{url}"
request = EmscriptenRequest(
url=url,
method=method,

View File

@@ -5,19 +5,19 @@ let Status = {
ERROR_EXCEPTION: -4,
};
let connections = {};
let connections = new Map();
let nextConnectionID = 1;
const encoder = new TextEncoder();
self.addEventListener("message", async function (event) {
if (event.data.close) {
let connectionID = event.data.close;
delete connections[connectionID];
connections.delete(connectionID);
return;
} else if (event.data.getMore) {
let connectionID = event.data.getMore;
let { curOffset, value, reader, intBuffer, byteBuffer } =
connections[connectionID];
connections.get(connectionID);
// if we still have some in buffer, then just send it back straight away
if (!value || curOffset >= value.length) {
// read another buffer if required
@@ -26,7 +26,7 @@ self.addEventListener("message", async function (event) {
if (readResponse.done) {
// read everything - clear connection and return
delete connections[connectionID];
connections.delete(connectionID);
Atomics.store(intBuffer, 0, Status.SUCCESS_EOF);
Atomics.notify(intBuffer, 0);
// finished reading successfully
@@ -34,7 +34,7 @@ self.addEventListener("message", async function (event) {
return;
}
curOffset = 0;
connections[connectionID].value = readResponse.value;
connections.get(connectionID).value = readResponse.value;
value = readResponse.value;
} catch (error) {
console.log("Request exception:", error);
@@ -57,7 +57,7 @@ self.addEventListener("message", async function (event) {
Atomics.store(intBuffer, 0, curLen); // store current length in bytes
Atomics.notify(intBuffer, 0);
curOffset += curLen;
connections[connectionID].curOffset = curOffset;
connections.get(connectionID).curOffset = curOffset;
return;
} else {
@@ -84,13 +84,13 @@ self.addEventListener("message", async function (event) {
byteBuffer.set(headerBytes);
intBuffer[1] = written;
// make a connection
connections[connectionID] = {
connections.set(connectionID, {
reader: response.body.getReader(),
intBuffer: intBuffer,
byteBuffer: byteBuffer,
value: undefined,
curOffset: 0,
};
});
// set header ready
Atomics.store(intBuffer, 0, Status.SUCCESS_HEADER);
Atomics.notify(intBuffer, 0);

View File

@@ -67,12 +67,6 @@ SUCCESS_EOF = -2
ERROR_TIMEOUT = -3
ERROR_EXCEPTION = -4
_STREAMING_WORKER_CODE = (
files(__package__)
.joinpath("emscripten_fetch_worker.js")
.read_text(encoding="utf-8")
)
class _RequestError(Exception):
def __init__(
@@ -207,9 +201,13 @@ class _StreamingFetcher:
def __init__(self) -> None:
# make web-worker and data buffer on startup
self.streaming_ready = False
streaming_worker_code = (
files(__package__)
.joinpath("emscripten_fetch_worker.js")
.read_text(encoding="utf-8")
)
js_data_blob = js.Blob.new(
to_js([_STREAMING_WORKER_CODE], create_pyproxies=False),
to_js([streaming_worker_code], create_pyproxies=False),
_obj_from_dict({"type": "application/javascript"}),
)

View File

@@ -40,7 +40,7 @@ like this:
from __future__ import annotations
import OpenSSL.SSL # type: ignore[import-untyped]
import OpenSSL.SSL # type: ignore[import-not-found]
from cryptography import x509
try:
@@ -61,7 +61,7 @@ from socket import timeout
from .. import util
if typing.TYPE_CHECKING:
from OpenSSL.crypto import X509 # type: ignore[import-untyped]
from OpenSSL.crypto import X509 # type: ignore[import-not-found]
__all__ = ["inject_into_urllib3", "extract_from_urllib3"]

View File

@@ -41,7 +41,7 @@ with the proxy:
from __future__ import annotations
try:
import socks # type: ignore[import-not-found]
import socks # type: ignore[import-untyped]
except ImportError:
import warnings

View File

@@ -42,7 +42,7 @@ class PoolError(HTTPError):
class RequestError(PoolError):
"""Base exception for PoolErrors that have associated URLs."""
def __init__(self, pool: ConnectionPool, url: str, message: str) -> None:
def __init__(self, pool: ConnectionPool, url: str | None, message: str) -> None:
self.url = url
super().__init__(pool, message)
@@ -93,7 +93,7 @@ class MaxRetryError(RequestError):
"""
def __init__(
self, pool: ConnectionPool, url: str, reason: Exception | None = None
self, pool: ConnectionPool, url: str | None, reason: Exception | None = None
) -> None:
self.reason = reason

View File

@@ -185,7 +185,7 @@ class HTTP2Connection(HTTPSConnection):
if not chunk:
break
if isinstance(chunk, str):
chunk = chunk.encode() # pragma: no cover
chunk = chunk.encode()
conn.send_data(self._h2_stream, chunk, end_stream=False)
if data_to_send := conn.data_to_send():
self.sock.sendall(data_to_send)

View File

@@ -203,20 +203,18 @@ class PoolManager(RequestMethods):
**connection_pool_kw: typing.Any,
) -> None:
super().__init__(headers)
# PoolManager handles redirects itself in PoolManager.urlopen().
# It always passes redirect=False to the underlying connection pool to
# suppress per-pool redirect handling. If the user supplied a non-Retry
# value (int/bool/etc) for retries and we let the pool normalize it
# while redirect=False, the resulting Retry object would have redirect
# handling disabled, which can interfere with PoolManager's own
# redirect logic. Normalize here so redirects remain governed solely by
# PoolManager logic.
if "retries" in connection_pool_kw:
retries = connection_pool_kw["retries"]
if not isinstance(retries, Retry):
# When Retry is initialized, raise_on_redirect is based
# on a redirect boolean value.
# But requests made via a pool manager always set
# redirect to False, and raise_on_redirect always ends
# up being False consequently.
# Here we fix the issue by setting raise_on_redirect to
# a value needed by the pool manager without considering
# the redirect boolean.
raise_on_redirect = retries is not False
retries = Retry.from_int(retries, redirect=False)
retries.raise_on_redirect = raise_on_redirect
retries = Retry.from_int(retries)
connection_pool_kw = connection_pool_kw.copy()
connection_pool_kw["retries"] = retries
self.connection_pool_kw = connection_pool_kw

View File

@@ -4,7 +4,6 @@ import collections
import io
import json as _json
import logging
import re
import socket
import sys
import typing
@@ -33,6 +32,7 @@ from .connection import BaseSSLError, HTTPConnection, HTTPException
from .exceptions import (
BodyNotHttplibCompatible,
DecodeError,
DependencyWarning,
HTTPError,
IncompleteRead,
InvalidChunkLength,
@@ -52,7 +52,11 @@ log = logging.getLogger(__name__)
class ContentDecoder:
def decompress(self, data: bytes) -> bytes:
def decompress(self, data: bytes, max_length: int = -1) -> bytes:
raise NotImplementedError()
@property
def has_unconsumed_tail(self) -> bool:
raise NotImplementedError()
def flush(self) -> bytes:
@@ -62,30 +66,57 @@ class ContentDecoder:
class DeflateDecoder(ContentDecoder):
def __init__(self) -> None:
self._first_try = True
self._data = b""
self._first_try_data = b""
self._unfed_data = b""
self._obj = zlib.decompressobj()
def decompress(self, data: bytes) -> bytes:
if not data:
def decompress(self, data: bytes, max_length: int = -1) -> bytes:
data = self._unfed_data + data
self._unfed_data = b""
if not data and not self._obj.unconsumed_tail:
return data
original_max_length = max_length
if original_max_length < 0:
max_length = 0
elif original_max_length == 0:
# We should not pass 0 to the zlib decompressor because 0 is
# the default value that will make zlib decompress without a
# length limit.
# Data should be stored for subsequent calls.
self._unfed_data = data
return b""
# Subsequent calls always reuse `self._obj`. zlib requires
# passing the unconsumed tail if decompression is to continue.
if not self._first_try:
return self._obj.decompress(data)
return self._obj.decompress(
self._obj.unconsumed_tail + data, max_length=max_length
)
self._data += data
# First call tries with RFC 1950 ZLIB format.
self._first_try_data += data
try:
decompressed = self._obj.decompress(data)
decompressed = self._obj.decompress(data, max_length=max_length)
if decompressed:
self._first_try = False
self._data = None # type: ignore[assignment]
self._first_try_data = b""
return decompressed
# On failure, it falls back to RFC 1951 DEFLATE format.
except zlib.error:
self._first_try = False
self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
try:
return self.decompress(self._data)
return self.decompress(
self._first_try_data, max_length=original_max_length
)
finally:
self._data = None # type: ignore[assignment]
self._first_try_data = b""
@property
def has_unconsumed_tail(self) -> bool:
return bool(self._unfed_data) or (
bool(self._obj.unconsumed_tail) and not self._first_try
)
def flush(self) -> bytes:
return self._obj.flush()
@@ -101,27 +132,61 @@ class GzipDecoder(ContentDecoder):
def __init__(self) -> None:
self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
self._state = GzipDecoderState.FIRST_MEMBER
self._unconsumed_tail = b""
def decompress(self, data: bytes) -> bytes:
def decompress(self, data: bytes, max_length: int = -1) -> bytes:
ret = bytearray()
if self._state == GzipDecoderState.SWALLOW_DATA or not data:
if self._state == GzipDecoderState.SWALLOW_DATA:
return bytes(ret)
if max_length == 0:
# We should not pass 0 to the zlib decompressor because 0 is
# the default value that will make zlib decompress without a
# length limit.
# Data should be stored for subsequent calls.
self._unconsumed_tail += data
return b""
# zlib requires passing the unconsumed tail to the subsequent
# call if decompression is to continue.
data = self._unconsumed_tail + data
if not data and self._obj.eof:
return bytes(ret)
while True:
try:
ret += self._obj.decompress(data)
ret += self._obj.decompress(
data, max_length=max(max_length - len(ret), 0)
)
except zlib.error:
previous_state = self._state
# Ignore data after the first error
self._state = GzipDecoderState.SWALLOW_DATA
self._unconsumed_tail = b""
if previous_state == GzipDecoderState.OTHER_MEMBERS:
# Allow trailing garbage acceptable in other gzip clients
return bytes(ret)
raise
data = self._obj.unused_data
self._unconsumed_tail = data = (
self._obj.unconsumed_tail or self._obj.unused_data
)
if max_length > 0 and len(ret) >= max_length:
break
if not data:
return bytes(ret)
self._state = GzipDecoderState.OTHER_MEMBERS
self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
# When the end of a gzip member is reached, a new decompressor
# must be created for unused (possibly future) data.
if self._obj.eof:
self._state = GzipDecoderState.OTHER_MEMBERS
self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
return bytes(ret)
@property
def has_unconsumed_tail(self) -> bool:
return bool(self._unconsumed_tail)
def flush(self) -> bytes:
return self._obj.flush()
@@ -136,9 +201,35 @@ if brotli is not None:
def __init__(self) -> None:
self._obj = brotli.Decompressor()
if hasattr(self._obj, "decompress"):
setattr(self, "decompress", self._obj.decompress)
setattr(self, "_decompress", self._obj.decompress)
else:
setattr(self, "decompress", self._obj.process)
setattr(self, "_decompress", self._obj.process)
# Requires Brotli >= 1.2.0 for `output_buffer_limit`.
def _decompress(self, data: bytes, output_buffer_limit: int = -1) -> bytes:
raise NotImplementedError()
def decompress(self, data: bytes, max_length: int = -1) -> bytes:
try:
if max_length > 0:
return self._decompress(data, output_buffer_limit=max_length)
else:
return self._decompress(data)
except TypeError:
# Fallback for Brotli/brotlicffi/brotlipy versions without
# the `output_buffer_limit` parameter.
warnings.warn(
"Brotli >= 1.2.0 is required to prevent decompression bombs.",
DependencyWarning,
)
return self._decompress(data)
@property
def has_unconsumed_tail(self) -> bool:
try:
return not self._obj.can_accept_more_data()
except AttributeError:
return False
def flush(self) -> bytes:
if hasattr(self._obj, "flush"):
@@ -147,69 +238,64 @@ if brotli is not None:
try:
# Python 3.14+
from compression import zstd # type: ignore[import-not-found] # noqa: F401
if sys.version_info >= (3, 14):
from compression import zstd
else:
from backports import zstd
except ImportError:
HAS_ZSTD = False
else:
HAS_ZSTD = True
class ZstdDecoder(ContentDecoder):
def __init__(self) -> None:
self._obj = zstd.ZstdDecompressor()
def decompress(self, data: bytes) -> bytes:
if not data:
def decompress(self, data: bytes, max_length: int = -1) -> bytes:
if not data and not self.has_unconsumed_tail:
return b""
data_parts = [self._obj.decompress(data)]
while self._obj.eof and self._obj.unused_data:
unused_data = self._obj.unused_data
if self._obj.eof:
data = self._obj.unused_data + data
self._obj = zstd.ZstdDecompressor()
data_parts.append(self._obj.decompress(unused_data))
part = self._obj.decompress(data, max_length=max_length)
length = len(part)
data_parts = [part]
# Every loop iteration is supposed to read data from a separate frame.
# The loop breaks when:
# - enough data is read;
# - no more unused data is available;
# - end of the last read frame has not been reached (i.e.,
# more data has to be fed).
while (
self._obj.eof
and self._obj.unused_data
and (max_length < 0 or length < max_length)
):
unused_data = self._obj.unused_data
if not self._obj.needs_input:
self._obj = zstd.ZstdDecompressor()
part = self._obj.decompress(
unused_data,
max_length=(max_length - length) if max_length > 0 else -1,
)
if part_length := len(part):
data_parts.append(part)
length += part_length
elif self._obj.needs_input:
break
return b"".join(data_parts)
@property
def has_unconsumed_tail(self) -> bool:
return not (self._obj.needs_input or self._obj.eof) or bool(
self._obj.unused_data
)
def flush(self) -> bytes:
if not self._obj.eof:
raise DecodeError("Zstandard data is incomplete")
return b""
except ImportError:
try:
# Python 3.13 and earlier require the 'zstandard' module.
import zstandard as zstd
# The package 'zstandard' added the 'eof' property starting
# in v0.18.0 which we require to ensure a complete and
# valid zstd stream was fed into the ZstdDecoder.
# See: https://github.com/urllib3/urllib3/pull/2624
_zstd_version = tuple(
map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr]
)
if _zstd_version < (0, 18): # Defensive:
raise ImportError("zstandard module doesn't have eof")
except (AttributeError, ImportError, ValueError): # Defensive:
HAS_ZSTD = False
else:
HAS_ZSTD = True
class ZstdDecoder(ContentDecoder): # type: ignore[no-redef]
def __init__(self) -> None:
self._obj = zstd.ZstdDecompressor().decompressobj()
def decompress(self, data: bytes) -> bytes:
if not data:
return b""
data_parts = [self._obj.decompress(data)]
while self._obj.eof and self._obj.unused_data:
unused_data = self._obj.unused_data
self._obj = zstd.ZstdDecompressor().decompressobj()
data_parts.append(self._obj.decompress(unused_data))
return b"".join(data_parts)
def flush(self) -> bytes:
ret = self._obj.flush() # note: this is a no-op
if not self._obj.eof:
raise DecodeError("Zstandard data is incomplete")
return ret # type: ignore[no-any-return]
class MultiDecoder(ContentDecoder):
"""
@@ -220,16 +306,51 @@ class MultiDecoder(ContentDecoder):
they were applied.
"""
# Maximum allowed number of chained HTTP encodings in the
# Content-Encoding header.
max_decode_links = 5
def __init__(self, modes: str) -> None:
self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]
encodings = [m.strip() for m in modes.split(",")]
if len(encodings) > self.max_decode_links:
raise DecodeError(
"Too many content encodings in the chain: "
f"{len(encodings)} > {self.max_decode_links}"
)
self._decoders = [_get_decoder(e) for e in encodings]
def flush(self) -> bytes:
return self._decoders[0].flush()
def decompress(self, data: bytes) -> bytes:
for d in reversed(self._decoders):
data = d.decompress(data)
return data
def decompress(self, data: bytes, max_length: int = -1) -> bytes:
if max_length <= 0:
for d in reversed(self._decoders):
data = d.decompress(data)
return data
ret = bytearray()
# Every while loop iteration goes through all decoders once.
# It exits when enough data is read or no more data can be read.
# It is possible that the while loop iteration does not produce
# any data because we retrieve up to `max_length` from every
# decoder, and the amount of bytes may be insufficient for the
# next decoder to produce enough/any output.
while True:
any_data = False
for d in reversed(self._decoders):
data = d.decompress(data, max_length=max_length - len(ret))
if data:
any_data = True
# We should not break when no data is returned because
# next decoders may produce data even with empty input.
ret += data
if not any_data or len(ret) >= max_length:
return bytes(ret)
data = b""
@property
def has_unconsumed_tail(self) -> bool:
return any(d.has_unconsumed_tail for d in self._decoders)
def _get_decoder(mode: str) -> ContentDecoder:
@@ -262,13 +383,10 @@ class BytesQueueBuffer:
* self.buffer, which contains the full data
* the largest chunk that we will copy in get()
The worst case scenario is a single chunk, in which case we'll make a full copy of
the data inside get().
"""
def __init__(self) -> None:
self.buffer: typing.Deque[bytes] = collections.deque()
self.buffer: typing.Deque[bytes | memoryview[bytes]] = collections.deque()
self._size: int = 0
def __len__(self) -> int:
@@ -286,6 +404,10 @@ class BytesQueueBuffer:
elif n < 0:
raise ValueError("n should be > 0")
if len(self.buffer[0]) == n and isinstance(self.buffer[0], bytes):
self._size -= n
return self.buffer.popleft()
fetched = 0
ret = io.BytesIO()
while fetched < n:
@@ -293,6 +415,7 @@ class BytesQueueBuffer:
chunk = self.buffer.popleft()
chunk_length = len(chunk)
if remaining < chunk_length:
chunk = memoryview(chunk)
left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]
ret.write(left_chunk)
self.buffer.appendleft(right_chunk)
@@ -315,6 +438,8 @@ class BytesQueueBuffer:
return b""
if len(buffer) == 1:
result = buffer.pop()
if isinstance(result, memoryview):
result = result.tobytes()
else:
ret = io.BytesIO()
ret.writelines(buffer.popleft() for _ in range(len(buffer)))
@@ -492,7 +617,11 @@ class BaseHTTPResponse(io.IOBase):
self._decoder = _get_decoder(content_encoding)
def _decode(
self, data: bytes, decode_content: bool | None, flush_decoder: bool
self,
data: bytes,
decode_content: bool | None,
flush_decoder: bool,
max_length: int | None = None,
) -> bytes:
"""
Decode the data passed in and potentially flush the decoder.
@@ -505,9 +634,12 @@ class BaseHTTPResponse(io.IOBase):
)
return data
if max_length is None or flush_decoder:
max_length = -1
try:
if self._decoder:
data = self._decoder.decompress(data)
data = self._decoder.decompress(data, max_length=max_length)
self._has_decoded_content = True
except self.DECODER_ERROR_CLASSES as e:
content_encoding = self.headers.get("content-encoding", "").lower()
@@ -539,25 +671,6 @@ class BaseHTTPResponse(io.IOBase):
b[: len(temp)] = temp
return len(temp)
# Compatibility methods for http.client.HTTPResponse
def getheaders(self) -> HTTPHeaderDict:
warnings.warn(
"HTTPResponse.getheaders() is deprecated and will be removed "
"in urllib3 v2.6.0. Instead access HTTPResponse.headers directly.",
category=DeprecationWarning,
stacklevel=2,
)
return self.headers
def getheader(self, name: str, default: str | None = None) -> str | None:
warnings.warn(
"HTTPResponse.getheader() is deprecated and will be removed "
"in urllib3 v2.6.0. Instead use HTTPResponse.headers.get(name, default).",
category=DeprecationWarning,
stacklevel=2,
)
return self.headers.get(name, default)
# Compatibility method for http.cookiejar
def info(self) -> HTTPHeaderDict:
return self.headers
@@ -974,6 +1087,14 @@ class HTTPResponse(BaseHTTPResponse):
elif amt is not None:
cache_content = False
if self._decoder and self._decoder.has_unconsumed_tail:
decoded_data = self._decode(
b"",
decode_content,
flush_decoder=False,
max_length=amt - len(self._decoded_buffer),
)
self._decoded_buffer.put(decoded_data)
if len(self._decoded_buffer) >= amt:
return self._decoded_buffer.get(amt)
@@ -981,7 +1102,11 @@ class HTTPResponse(BaseHTTPResponse):
flush_decoder = amt is None or (amt != 0 and not data)
if not data and len(self._decoded_buffer) == 0:
if (
not data
and len(self._decoded_buffer) == 0
and not (self._decoder and self._decoder.has_unconsumed_tail)
):
return data
if amt is None:
@@ -998,7 +1123,12 @@ class HTTPResponse(BaseHTTPResponse):
)
return data
decoded_data = self._decode(data, decode_content, flush_decoder)
decoded_data = self._decode(
data,
decode_content,
flush_decoder,
max_length=amt - len(self._decoded_buffer),
)
self._decoded_buffer.put(decoded_data)
while len(self._decoded_buffer) < amt and data:
@@ -1006,7 +1136,12 @@ class HTTPResponse(BaseHTTPResponse):
# For example, the GZ file header takes 10 bytes, we don't want to read
# it one byte at a time
data = self._raw_read(amt)
decoded_data = self._decode(data, decode_content, flush_decoder)
decoded_data = self._decode(
data,
decode_content,
flush_decoder,
max_length=amt - len(self._decoded_buffer),
)
self._decoded_buffer.put(decoded_data)
data = self._decoded_buffer.get(amt)
@@ -1041,6 +1176,20 @@ class HTTPResponse(BaseHTTPResponse):
"Calling read1(decode_content=False) is not supported after "
"read1(decode_content=True) was called."
)
if (
self._decoder
and self._decoder.has_unconsumed_tail
and (amt is None or len(self._decoded_buffer) < amt)
):
decoded_data = self._decode(
b"",
decode_content,
flush_decoder=False,
max_length=(
amt - len(self._decoded_buffer) if amt is not None else None
),
)
self._decoded_buffer.put(decoded_data)
if len(self._decoded_buffer) > 0:
if amt is None:
return self._decoded_buffer.get_all()
@@ -1056,7 +1205,9 @@ class HTTPResponse(BaseHTTPResponse):
self._init_decoder()
while True:
flush_decoder = not data
decoded_data = self._decode(data, decode_content, flush_decoder)
decoded_data = self._decode(
data, decode_content, flush_decoder, max_length=amt
)
self._decoded_buffer.put(decoded_data)
if decoded_data or flush_decoder:
break
@@ -1087,7 +1238,11 @@ class HTTPResponse(BaseHTTPResponse):
if self.chunked and self.supports_chunked_reads():
yield from self.read_chunked(amt, decode_content=decode_content)
else:
while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0:
while (
not is_fp_closed(self._fp)
or len(self._decoded_buffer) > 0
or (self._decoder and self._decoder.has_unconsumed_tail)
):
data = self.read(amt=amt, decode_content=decode_content)
if data:
@@ -1250,7 +1405,10 @@ class HTTPResponse(BaseHTTPResponse):
break
chunk = self._handle_chunk(amt)
decoded = self._decode(
chunk, decode_content=decode_content, flush_decoder=False
chunk,
decode_content=decode_content,
flush_decoder=False,
max_length=amt,
)
if decoded:
yield decoded
@@ -1286,7 +1444,7 @@ class HTTPResponse(BaseHTTPResponse):
return self._request_url
@url.setter
def url(self, url: str) -> None:
def url(self, url: str | None) -> None:
self._request_url = url
def __iter__(self) -> typing.Iterator[bytes]:

View File

@@ -1,6 +1,7 @@
from __future__ import annotations
import io
import sys
import typing
from base64 import b64encode
from enum import Enum
@@ -30,18 +31,14 @@ else:
ACCEPT_ENCODING += ",br"
try:
from compression import ( # type: ignore[import-not-found] # noqa: F401
zstd as _unused_module_zstd,
)
ACCEPT_ENCODING += ",zstd"
if sys.version_info >= (3, 14):
from compression import zstd as _unused_module_zstd # noqa: F401
else:
from backports import zstd as _unused_module_zstd # noqa: F401
except ImportError:
try:
import zstandard as _unused_module_zstd # noqa: F401
ACCEPT_ENCODING += ",zstd"
except ImportError:
pass
pass
else:
ACCEPT_ENCODING += ",zstd"
class _TYPE_FAILEDTELL(Enum):
@@ -77,8 +74,9 @@ def make_headers(
:param accept_encoding:
Can be a boolean, list, or string.
``True`` translates to 'gzip,deflate'. If the dependencies for
Brotli (either the ``brotli`` or ``brotlicffi`` package) and/or Zstandard
(the ``zstandard`` package) algorithms are installed, then their encodings are
Brotli (either the ``brotli`` or ``brotlicffi`` package) and/or
Zstandard (the ``backports.zstd`` package for Python before 3.14)
algorithms are installed, then their encodings are
included in the string ('br' and 'zstd', respectively).
List will get joined by comma.
String will be used as provided.
@@ -230,7 +228,6 @@ def body_to_chunks(
elif hasattr(body, "read"):
def chunk_readable() -> typing.Iterable[bytes]:
nonlocal body, blocksize
encode = isinstance(body, io.TextIOBase)
while True:
datablock = body.read(blocksize)

View File

@@ -136,14 +136,14 @@ try: # Do we have ssl at all?
from .ssltransport import SSLTransport # type: ignore[assignment]
except ImportError:
OP_NO_COMPRESSION = 0x20000 # type: ignore[assignment]
OP_NO_TICKET = 0x4000 # type: ignore[assignment]
OP_NO_SSLv2 = 0x1000000 # type: ignore[assignment]
OP_NO_SSLv3 = 0x2000000 # type: ignore[assignment]
PROTOCOL_SSLv23 = PROTOCOL_TLS = 2 # type: ignore[assignment]
PROTOCOL_TLS_CLIENT = 16 # type: ignore[assignment]
OP_NO_COMPRESSION = 0x20000 # type: ignore[assignment, misc]
OP_NO_TICKET = 0x4000 # type: ignore[assignment, misc]
OP_NO_SSLv2 = 0x1000000 # type: ignore[assignment, misc]
OP_NO_SSLv3 = 0x2000000 # type: ignore[assignment, misc]
PROTOCOL_SSLv23 = PROTOCOL_TLS = 2 # type: ignore[assignment, misc]
PROTOCOL_TLS_CLIENT = 16 # type: ignore[assignment, misc]
VERIFY_X509_PARTIAL_CHAIN = 0x80000
VERIFY_X509_STRICT = 0x20 # type: ignore[assignment]
VERIFY_X509_STRICT = 0x20 # type: ignore[assignment, misc]
_TYPE_PEER_CERT_RET = typing.Union["_TYPE_PEER_CERT_RET_DICT", bytes, None]
@@ -364,7 +364,10 @@ def create_urllib3_context(
except AttributeError: # Defensive: for CPython < 3.9.3; for PyPy < 7.3.8
pass
sslkeylogfile = os.environ.get("SSLKEYLOGFILE")
if "SSLKEYLOGFILE" in os.environ:
sslkeylogfile = os.path.expandvars(os.environ.get("SSLKEYLOGFILE"))
else:
sslkeylogfile = None
if sslkeylogfile:
context.keylog_filename = sslkeylogfile