Integração Pangolin Proxy

2025-12-06 21:11:34 -03:00
parent dc7c446254
commit 5291d8ccae
2008 changed files with 1062 additions and 477 deletions
--- a/agent/build_venv/lib/python3.11/site-packages/urllib3/response.py
+++ b/agent/build_venv/lib/python3.11/site-packages/urllib3/response.py
@@ -4,7 +4,6 @@ import collections
 import io
 import json as _json
 import logging
-import re
 import socket
 import sys
 import typing
@@ -33,6 +32,7 @@ from .connection import BaseSSLError, HTTPConnection, HTTPException
 from .exceptions import (
    BodyNotHttplibCompatible,
    DecodeError,
+    DependencyWarning,
    HTTPError,
    IncompleteRead,
    InvalidChunkLength,
@@ -52,7 +52,11 @@ log = logging.getLogger(__name__)


 class ContentDecoder:
-    def decompress(self, data: bytes) -> bytes:
+    def decompress(self, data: bytes, max_length: int = -1) -> bytes:
+        raise NotImplementedError()
+
+    @property
+    def has_unconsumed_tail(self) -> bool:
        raise NotImplementedError()

    def flush(self) -> bytes:
@@ -62,30 +66,57 @@ class ContentDecoder:
 class DeflateDecoder(ContentDecoder):
    def __init__(self) -> None:
        self._first_try = True
-        self._data = b""
+        self._first_try_data = b""
+        self._unfed_data = b""
        self._obj = zlib.decompressobj()

-    def decompress(self, data: bytes) -> bytes:
-        if not data:
+    def decompress(self, data: bytes, max_length: int = -1) -> bytes:
+        data = self._unfed_data + data
+        self._unfed_data = b""
+        if not data and not self._obj.unconsumed_tail:
            return data
+        original_max_length = max_length
+        if original_max_length < 0:
+            max_length = 0
+        elif original_max_length == 0:
+            # We should not pass 0 to the zlib decompressor because 0 is
+            # the default value that will make zlib decompress without a
+            # length limit.
+            # Data should be stored for subsequent calls.
+            self._unfed_data = data
+            return b""

+        # Subsequent calls always reuse `self._obj`. zlib requires
+        # passing the unconsumed tail if decompression is to continue.
        if not self._first_try:
-            return self._obj.decompress(data)
+            return self._obj.decompress(
+                self._obj.unconsumed_tail + data, max_length=max_length
+            )

-        self._data += data
+        # First call tries with RFC 1950 ZLIB format.
+        self._first_try_data += data
        try:
-            decompressed = self._obj.decompress(data)
+            decompressed = self._obj.decompress(data, max_length=max_length)
            if decompressed:
                self._first_try = False
-                self._data = None  # type: ignore[assignment]
+                self._first_try_data = b""
            return decompressed
+        # On failure, it falls back to RFC 1951 DEFLATE format.
        except zlib.error:
            self._first_try = False
            self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
            try:
-                return self.decompress(self._data)
+                return self.decompress(
+                    self._first_try_data, max_length=original_max_length
+                )
            finally:
-                self._data = None  # type: ignore[assignment]
+                self._first_try_data = b""
+
+    @property
+    def has_unconsumed_tail(self) -> bool:
+        return bool(self._unfed_data) or (
+            bool(self._obj.unconsumed_tail) and not self._first_try
+        )

    def flush(self) -> bytes:
        return self._obj.flush()
@@ -101,27 +132,61 @@ class GzipDecoder(ContentDecoder):
    def __init__(self) -> None:
        self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
        self._state = GzipDecoderState.FIRST_MEMBER
+        self._unconsumed_tail = b""

-    def decompress(self, data: bytes) -> bytes:
+    def decompress(self, data: bytes, max_length: int = -1) -> bytes:
        ret = bytearray()
-        if self._state == GzipDecoderState.SWALLOW_DATA or not data:
+        if self._state == GzipDecoderState.SWALLOW_DATA:
            return bytes(ret)
+
+        if max_length == 0:
+            # We should not pass 0 to the zlib decompressor because 0 is
+            # the default value that will make zlib decompress without a
+            # length limit.
+            # Data should be stored for subsequent calls.
+            self._unconsumed_tail += data
+            return b""
+
+        # zlib requires passing the unconsumed tail to the subsequent
+        # call if decompression is to continue.
+        data = self._unconsumed_tail + data
+        if not data and self._obj.eof:
+            return bytes(ret)
+
        while True:
            try:
-                ret += self._obj.decompress(data)
+                ret += self._obj.decompress(
+                    data, max_length=max(max_length - len(ret), 0)
+                )
            except zlib.error:
                previous_state = self._state
                # Ignore data after the first error
                self._state = GzipDecoderState.SWALLOW_DATA
+                self._unconsumed_tail = b""
                if previous_state == GzipDecoderState.OTHER_MEMBERS:
                    # Allow trailing garbage acceptable in other gzip clients
                    return bytes(ret)
                raise
-            data = self._obj.unused_data
+
+            self._unconsumed_tail = data = (
+                self._obj.unconsumed_tail or self._obj.unused_data
+            )
+            if max_length > 0 and len(ret) >= max_length:
+                break
+
            if not data:
                return bytes(ret)
-            self._state = GzipDecoderState.OTHER_MEMBERS
-            self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
+            # When the end of a gzip member is reached, a new decompressor
+            # must be created for unused (possibly future) data.
+            if self._obj.eof:
+                self._state = GzipDecoderState.OTHER_MEMBERS
+                self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
+
+        return bytes(ret)
+
+    @property
+    def has_unconsumed_tail(self) -> bool:
+        return bool(self._unconsumed_tail)

    def flush(self) -> bytes:
        return self._obj.flush()
@@ -136,9 +201,35 @@ if brotli is not None:
        def __init__(self) -> None:
            self._obj = brotli.Decompressor()
            if hasattr(self._obj, "decompress"):
-                setattr(self, "decompress", self._obj.decompress)
+                setattr(self, "_decompress", self._obj.decompress)
            else:
-                setattr(self, "decompress", self._obj.process)
+                setattr(self, "_decompress", self._obj.process)
+
+        # Requires Brotli >= 1.2.0 for `output_buffer_limit`.
+        def _decompress(self, data: bytes, output_buffer_limit: int = -1) -> bytes:
+            raise NotImplementedError()
+
+        def decompress(self, data: bytes, max_length: int = -1) -> bytes:
+            try:
+                if max_length > 0:
+                    return self._decompress(data, output_buffer_limit=max_length)
+                else:
+                    return self._decompress(data)
+            except TypeError:
+                # Fallback for Brotli/brotlicffi/brotlipy versions without
+                # the `output_buffer_limit` parameter.
+                warnings.warn(
+                    "Brotli >= 1.2.0 is required to prevent decompression bombs.",
+                    DependencyWarning,
+                )
+                return self._decompress(data)
+
+        @property
+        def has_unconsumed_tail(self) -> bool:
+            try:
+                return not self._obj.can_accept_more_data()
+            except AttributeError:
+                return False

        def flush(self) -> bytes:
            if hasattr(self._obj, "flush"):
@@ -147,69 +238,64 @@ if brotli is not None:


 try:
-    # Python 3.14+
-    from compression import zstd  # type: ignore[import-not-found] # noqa: F401
-
+    if sys.version_info >= (3, 14):
+        from compression import zstd
+    else:
+        from backports import zstd
+except ImportError:
+    HAS_ZSTD = False
+else:
    HAS_ZSTD = True

    class ZstdDecoder(ContentDecoder):
        def __init__(self) -> None:
            self._obj = zstd.ZstdDecompressor()

-        def decompress(self, data: bytes) -> bytes:
-            if not data:
+        def decompress(self, data: bytes, max_length: int = -1) -> bytes:
+            if not data and not self.has_unconsumed_tail:
                return b""
-            data_parts = [self._obj.decompress(data)]
-            while self._obj.eof and self._obj.unused_data:
-                unused_data = self._obj.unused_data
+            if self._obj.eof:
+                data = self._obj.unused_data + data
                self._obj = zstd.ZstdDecompressor()
-                data_parts.append(self._obj.decompress(unused_data))
+            part = self._obj.decompress(data, max_length=max_length)
+            length = len(part)
+            data_parts = [part]
+            # Every loop iteration is supposed to read data from a separate frame.
+            # The loop breaks when:
+            #   - enough data is read;
+            #   - no more unused data is available;
+            #   - end of the last read frame has not been reached (i.e.,
+            #     more data has to be fed).
+            while (
+                self._obj.eof
+                and self._obj.unused_data
+                and (max_length < 0 or length < max_length)
+            ):
+                unused_data = self._obj.unused_data
+                if not self._obj.needs_input:
+                    self._obj = zstd.ZstdDecompressor()
+                part = self._obj.decompress(
+                    unused_data,
+                    max_length=(max_length - length) if max_length > 0 else -1,
+                )
+                if part_length := len(part):
+                    data_parts.append(part)
+                    length += part_length
+                elif self._obj.needs_input:
+                    break
            return b"".join(data_parts)

+        @property
+        def has_unconsumed_tail(self) -> bool:
+            return not (self._obj.needs_input or self._obj.eof) or bool(
+                self._obj.unused_data
+            )
+
        def flush(self) -> bytes:
            if not self._obj.eof:
                raise DecodeError("Zstandard data is incomplete")
            return b""

-except ImportError:
-    try:
-        # Python 3.13 and earlier require the 'zstandard' module.
-        import zstandard as zstd
-
-        # The package 'zstandard' added the 'eof' property starting
-        # in v0.18.0 which we require to ensure a complete and
-        # valid zstd stream was fed into the ZstdDecoder.
-        # See: https://github.com/urllib3/urllib3/pull/2624
-        _zstd_version = tuple(
-            map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups())  # type: ignore[union-attr]
-        )
-        if _zstd_version < (0, 18):  # Defensive:
-            raise ImportError("zstandard module doesn't have eof")
-    except (AttributeError, ImportError, ValueError):  # Defensive:
-        HAS_ZSTD = False
-    else:
-        HAS_ZSTD = True
-
-        class ZstdDecoder(ContentDecoder):  # type: ignore[no-redef]
-            def __init__(self) -> None:
-                self._obj = zstd.ZstdDecompressor().decompressobj()
-
-            def decompress(self, data: bytes) -> bytes:
-                if not data:
-                    return b""
-                data_parts = [self._obj.decompress(data)]
-                while self._obj.eof and self._obj.unused_data:
-                    unused_data = self._obj.unused_data
-                    self._obj = zstd.ZstdDecompressor().decompressobj()
-                    data_parts.append(self._obj.decompress(unused_data))
-                return b"".join(data_parts)
-
-            def flush(self) -> bytes:
-                ret = self._obj.flush()  # note: this is a no-op
-                if not self._obj.eof:
-                    raise DecodeError("Zstandard data is incomplete")
-                return ret  # type: ignore[no-any-return]
-

 class MultiDecoder(ContentDecoder):
    """
@@ -220,16 +306,51 @@ class MultiDecoder(ContentDecoder):
        they were applied.
    """

+    # Maximum allowed number of chained HTTP encodings in the
+    # Content-Encoding header.
+    max_decode_links = 5
+
    def __init__(self, modes: str) -> None:
-        self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]
+        encodings = [m.strip() for m in modes.split(",")]
+        if len(encodings) > self.max_decode_links:
+            raise DecodeError(
+                "Too many content encodings in the chain: "
+                f"{len(encodings)} > {self.max_decode_links}"
+            )
+        self._decoders = [_get_decoder(e) for e in encodings]

    def flush(self) -> bytes:
        return self._decoders[0].flush()

-    def decompress(self, data: bytes) -> bytes:
-        for d in reversed(self._decoders):
-            data = d.decompress(data)
-        return data
+    def decompress(self, data: bytes, max_length: int = -1) -> bytes:
+        if max_length <= 0:
+            for d in reversed(self._decoders):
+                data = d.decompress(data)
+            return data
+
+        ret = bytearray()
+        # Every while loop iteration goes through all decoders once.
+        # It exits when enough data is read or no more data can be read.
+        # It is possible that the while loop iteration does not produce
+        # any data because we retrieve up to `max_length` from every
+        # decoder, and the amount of bytes may be insufficient for the
+        # next decoder to produce enough/any output.
+        while True:
+            any_data = False
+            for d in reversed(self._decoders):
+                data = d.decompress(data, max_length=max_length - len(ret))
+                if data:
+                    any_data = True
+                # We should not break when no data is returned because
+                # next decoders may produce data even with empty input.
+            ret += data
+            if not any_data or len(ret) >= max_length:
+                return bytes(ret)
+            data = b""
+
+    @property
+    def has_unconsumed_tail(self) -> bool:
+        return any(d.has_unconsumed_tail for d in self._decoders)


 def _get_decoder(mode: str) -> ContentDecoder:
@@ -262,13 +383,10 @@ class BytesQueueBuffer:

     * self.buffer, which contains the full data
     * the largest chunk that we will copy in get()
-
-    The worst case scenario is a single chunk, in which case we'll make a full copy of
-    the data inside get().
    """

    def __init__(self) -> None:
-        self.buffer: typing.Deque[bytes] = collections.deque()
+        self.buffer: typing.Deque[bytes | memoryview[bytes]] = collections.deque()
        self._size: int = 0

    def __len__(self) -> int:
@@ -286,6 +404,10 @@ class BytesQueueBuffer:
        elif n < 0:
            raise ValueError("n should be > 0")

+        if len(self.buffer[0]) == n and isinstance(self.buffer[0], bytes):
+            self._size -= n
+            return self.buffer.popleft()
+
        fetched = 0
        ret = io.BytesIO()
        while fetched < n:
@@ -293,6 +415,7 @@ class BytesQueueBuffer:
            chunk = self.buffer.popleft()
            chunk_length = len(chunk)
            if remaining < chunk_length:
+                chunk = memoryview(chunk)
                left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]
                ret.write(left_chunk)
                self.buffer.appendleft(right_chunk)
@@ -315,6 +438,8 @@ class BytesQueueBuffer:
            return b""
        if len(buffer) == 1:
            result = buffer.pop()
+            if isinstance(result, memoryview):
+                result = result.tobytes()
        else:
            ret = io.BytesIO()
            ret.writelines(buffer.popleft() for _ in range(len(buffer)))
@@ -492,7 +617,11 @@ class BaseHTTPResponse(io.IOBase):
                    self._decoder = _get_decoder(content_encoding)

    def _decode(
-        self, data: bytes, decode_content: bool | None, flush_decoder: bool
+        self,
+        data: bytes,
+        decode_content: bool | None,
+        flush_decoder: bool,
+        max_length: int | None = None,
    ) -> bytes:
        """
        Decode the data passed in and potentially flush the decoder.
@@ -505,9 +634,12 @@ class BaseHTTPResponse(io.IOBase):
                )
            return data

+        if max_length is None or flush_decoder:
+            max_length = -1
+
        try:
            if self._decoder:
-                data = self._decoder.decompress(data)
+                data = self._decoder.decompress(data, max_length=max_length)
                self._has_decoded_content = True
        except self.DECODER_ERROR_CLASSES as e:
            content_encoding = self.headers.get("content-encoding", "").lower()
@@ -539,25 +671,6 @@ class BaseHTTPResponse(io.IOBase):
            b[: len(temp)] = temp
            return len(temp)

-    # Compatibility methods for http.client.HTTPResponse
-    def getheaders(self) -> HTTPHeaderDict:
-        warnings.warn(
-            "HTTPResponse.getheaders() is deprecated and will be removed "
-            "in urllib3 v2.6.0. Instead access HTTPResponse.headers directly.",
-            category=DeprecationWarning,
-            stacklevel=2,
-        )
-        return self.headers
-
-    def getheader(self, name: str, default: str | None = None) -> str | None:
-        warnings.warn(
-            "HTTPResponse.getheader() is deprecated and will be removed "
-            "in urllib3 v2.6.0. Instead use HTTPResponse.headers.get(name, default).",
-            category=DeprecationWarning,
-            stacklevel=2,
-        )
-        return self.headers.get(name, default)
-
    # Compatibility method for http.cookiejar
    def info(self) -> HTTPHeaderDict:
        return self.headers
@@ -974,6 +1087,14 @@ class HTTPResponse(BaseHTTPResponse):
        elif amt is not None:
            cache_content = False

+            if self._decoder and self._decoder.has_unconsumed_tail:
+                decoded_data = self._decode(
+                    b"",
+                    decode_content,
+                    flush_decoder=False,
+                    max_length=amt - len(self._decoded_buffer),
+                )
+                self._decoded_buffer.put(decoded_data)
            if len(self._decoded_buffer) >= amt:
                return self._decoded_buffer.get(amt)

@@ -981,7 +1102,11 @@ class HTTPResponse(BaseHTTPResponse):

        flush_decoder = amt is None or (amt != 0 and not data)

-        if not data and len(self._decoded_buffer) == 0:
+        if (
+            not data
+            and len(self._decoded_buffer) == 0
+            and not (self._decoder and self._decoder.has_unconsumed_tail)
+        ):
            return data

        if amt is None:
@@ -998,7 +1123,12 @@ class HTTPResponse(BaseHTTPResponse):
                    )
                return data

-            decoded_data = self._decode(data, decode_content, flush_decoder)
+            decoded_data = self._decode(
+                data,
+                decode_content,
+                flush_decoder,
+                max_length=amt - len(self._decoded_buffer),
+            )
            self._decoded_buffer.put(decoded_data)

            while len(self._decoded_buffer) < amt and data:
@@ -1006,7 +1136,12 @@ class HTTPResponse(BaseHTTPResponse):
                # For example, the GZ file header takes 10 bytes, we don't want to read
                # it one byte at a time
                data = self._raw_read(amt)
-                decoded_data = self._decode(data, decode_content, flush_decoder)
+                decoded_data = self._decode(
+                    data,
+                    decode_content,
+                    flush_decoder,
+                    max_length=amt - len(self._decoded_buffer),
+                )
                self._decoded_buffer.put(decoded_data)
            data = self._decoded_buffer.get(amt)

@@ -1041,6 +1176,20 @@ class HTTPResponse(BaseHTTPResponse):
                    "Calling read1(decode_content=False) is not supported after "
                    "read1(decode_content=True) was called."
                )
+            if (
+                self._decoder
+                and self._decoder.has_unconsumed_tail
+                and (amt is None or len(self._decoded_buffer) < amt)
+            ):
+                decoded_data = self._decode(
+                    b"",
+                    decode_content,
+                    flush_decoder=False,
+                    max_length=(
+                        amt - len(self._decoded_buffer) if amt is not None else None
+                    ),
+                )
+                self._decoded_buffer.put(decoded_data)
            if len(self._decoded_buffer) > 0:
                if amt is None:
                    return self._decoded_buffer.get_all()
@@ -1056,7 +1205,9 @@ class HTTPResponse(BaseHTTPResponse):
        self._init_decoder()
        while True:
            flush_decoder = not data
-            decoded_data = self._decode(data, decode_content, flush_decoder)
+            decoded_data = self._decode(
+                data, decode_content, flush_decoder, max_length=amt
+            )
            self._decoded_buffer.put(decoded_data)
            if decoded_data or flush_decoder:
                break
@@ -1087,7 +1238,11 @@ class HTTPResponse(BaseHTTPResponse):
        if self.chunked and self.supports_chunked_reads():
            yield from self.read_chunked(amt, decode_content=decode_content)
        else:
-            while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0:
+            while (
+                not is_fp_closed(self._fp)
+                or len(self._decoded_buffer) > 0
+                or (self._decoder and self._decoder.has_unconsumed_tail)
+            ):
                data = self.read(amt=amt, decode_content=decode_content)

                if data:
@@ -1250,7 +1405,10 @@ class HTTPResponse(BaseHTTPResponse):
                    break
                chunk = self._handle_chunk(amt)
                decoded = self._decode(
-                    chunk, decode_content=decode_content, flush_decoder=False
+                    chunk,
+                    decode_content=decode_content,
+                    flush_decoder=False,
+                    max_length=amt,
                )
                if decoded:
                    yield decoded
@@ -1286,7 +1444,7 @@ class HTTPResponse(BaseHTTPResponse):
        return self._request_url

    @url.setter
-    def url(self, url: str) -> None:
+    def url(self, url: str | None) -> None:
        self._request_url = url

    def __iter__(self) -> typing.Iterator[bytes]: