From e5d0464a7f2ed6e688cbaa25dbac97e4e96b6b81 Mon Sep 17 00:00:00 2001 From: icp Date: Thu, 9 Mar 2023 15:15:09 +0530 Subject: [PATCH] python3-httpx: do not depend on incompatible python3-rfc3986 --- .../0001-drop-rfc3986-requirement.patch | 925 ++++++++++++++++++ srcpkgs/python3-httpx/template | 6 +- 2 files changed, 928 insertions(+), 3 deletions(-) create mode 100644 srcpkgs/python3-httpx/patches/0001-drop-rfc3986-requirement.patch diff --git a/srcpkgs/python3-httpx/patches/0001-drop-rfc3986-requirement.patch b/srcpkgs/python3-httpx/patches/0001-drop-rfc3986-requirement.patch new file mode 100644 index 000000000000..b48d96378a67 --- /dev/null +++ b/srcpkgs/python3-httpx/patches/0001-drop-rfc3986-requirement.patch @@ -0,0 +1,925 @@ +From 57daabf673705954afa94686c0002801c93d31f3 Mon Sep 17 00:00:00 2001 +From: Tom Christie +Date: Tue, 10 Jan 2023 10:36:15 +0000 +Subject: [PATCH] Drop `rfc3986` requirement. (#2252) + +* Drop RawURL + +* First pass at adding urlparse + +* Update urlparse + +* Add urlparse + +* Add urlparse + +* Unicode non-printables can be valid in IDNA hostnames + +* Update _urlparse.py docstring + +* Linting + +* Trim away ununsed codepaths + +* Tweaks for path validation depending on scheme and authority presence + +* Minor cleanups + +* Minor cleanups + +* full_path -> raw_path, forr internal consistency + +* Linting fixes + +* Drop rfc3986 dependency + +* Add test for #1833 + +* Linting + +* Drop 'rfc3986' dependancy from README and docs homepage + +Co-authored-by: Thomas Grainger +--- + README.md | 3 +- + httpx/_urlparse.py | 435 +++++++++++++++++++++++++++++++++++++++ + httpx/_urls.py | 290 +++++++------------------- + pyproject.toml | 2 +- + 8 files changed, 762 insertions(+), 257 deletions(-) + create mode 100644 httpx/_urlparse.py + create mode 100644 tests/test_urlparse.py + +diff --git a/README.md b/README.md +index 520e85c36..4d25491a6 100644 +--- a/README.md ++++ b/README.md +@@ -128,8 +128,7 @@ The HTTPX project relies on these excellent libraries: + * `httpcore` - The underlying transport implementation for `httpx`. + * `h11` - HTTP/1.1 support. + * `certifi` - SSL certificates. +-* `rfc3986` - URL parsing & normalization. +- * `idna` - Internationalized domain name support. ++* `idna` - Internationalized domain name support. + * `sniffio` - Async library autodetection. + + As well as these optional installs: +diff --git a/httpx/_urlparse.py b/httpx/_urlparse.py +new file mode 100644 +index 000000000..e16e81239 +--- /dev/null ++++ b/httpx/_urlparse.py +@@ -0,0 +1,435 @@ ++""" ++An implementation of `urlparse` that provides URL validation and normalization ++as described by RFC3986. ++ ++We rely on this implementation rather than the one in Python's stdlib, because: ++ ++* It provides more complete URL validation. ++* It properly differentiates between an empty querystring and an absent querystring, ++ to distinguish URLs with a trailing '?'. ++* It handles scheme, hostname, port, and path normalization. ++* It supports IDNA hostnames, normalizing them to their encoded form. ++* The API supports passing individual components, as well as the complete URL string. ++ ++Previously we relied on the excellent `rfc3986` package to handle URL parsing and ++validation, but this module provides a simpler alternative, with less indirection ++required. ++""" ++import ipaddress ++import re ++import typing ++ ++import idna ++ ++from ._exceptions import InvalidURL ++ ++MAX_URL_LENGTH = 65536 ++ ++# https://datatracker.ietf.org/doc/html/rfc3986.html#section-2.3 ++UNRESERVED_CHARACTERS = ( ++ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~" ++) ++SUB_DELIMS = "!$&'()*+,;=" ++ ++PERCENT_ENCODED_REGEX = re.compile("%[A-Fa-f0-9]{2}") ++ ++ ++# {scheme}: (optional) ++# //{authority} (optional) ++# {path} ++# ?{query} (optional) ++# #{fragment} (optional) ++URL_REGEX = re.compile( ++ ( ++ r"(?:(?P{scheme}):)?" ++ r"(?://(?P{authority}))?" ++ r"(?P{path})" ++ r"(?:\?(?P{query}))?" ++ r"(?:#(?P{fragment}))?" ++ ).format( ++ scheme="([a-zA-Z][a-zA-Z0-9+.-]*)?", ++ authority="[^/?#]*", ++ path="[^?#]*", ++ query="[^#]*", ++ fragment=".*", ++ ) ++) ++ ++# {userinfo}@ (optional) ++# {host} ++# :{port} (optional) ++AUTHORITY_REGEX = re.compile( ++ ( ++ r"(?:(?P{userinfo})@)?" r"(?P{host})" r":?(?P{port})?" ++ ).format( ++ userinfo="[^@]*", # Any character sequence not including '@'. ++ host="(\\[.*\\]|[^:]*)", # Either any character sequence not including ':', ++ # or an IPv6 address enclosed within square brackets. ++ port=".*", # Any character sequence. ++ ) ++) ++ ++ ++# If we call urlparse with an individual component, then we need to regex ++# validate that component individually. ++# Note that we're duplicating the same strings as above. Shock! Horror!! ++COMPONENT_REGEX = { ++ "scheme": re.compile("([a-zA-Z][a-zA-Z0-9+.-]*)?"), ++ "authority": re.compile("[^/?#]*"), ++ "path": re.compile("[^?#]*"), ++ "query": re.compile("[^#]*"), ++ "fragment": re.compile(".*"), ++ "userinfo": re.compile("[^@]*"), ++ "host": re.compile("(\\[.*\\]|[^:]*)"), ++ "port": re.compile(".*"), ++} ++ ++ ++# We use these simple regexs as a first pass before handing off to ++# the stdlib 'ipaddress' module for IP address validation. ++IPv4_STYLE_HOSTNAME = re.compile(r"^[0-9]+.[0-9]+.[0-9]+.[0-9]+$") ++IPv6_STYLE_HOSTNAME = re.compile(r"^\[.*\]$") ++ ++ ++class ParseResult(typing.NamedTuple): ++ scheme: str ++ userinfo: str ++ host: str ++ port: typing.Optional[int] ++ path: str ++ query: typing.Optional[str] ++ fragment: typing.Optional[str] ++ ++ @property ++ def authority(self) -> str: ++ return "".join( ++ [ ++ f"{self.userinfo}@" if self.userinfo else "", ++ f"[{self.host}]" if ":" in self.host else self.host, ++ f":{self.port}" if self.port is not None else "", ++ ] ++ ) ++ ++ @property ++ def netloc(self) -> str: ++ return "".join( ++ [ ++ f"[{self.host}]" if ":" in self.host else self.host, ++ f":{self.port}" if self.port is not None else "", ++ ] ++ ) ++ ++ def copy_with(self, **kwargs: typing.Optional[str]) -> "ParseResult": ++ if not kwargs: ++ return self ++ ++ defaults = { ++ "scheme": self.scheme, ++ "authority": self.authority, ++ "path": self.path, ++ "query": self.query, ++ "fragment": self.fragment, ++ } ++ defaults.update(kwargs) ++ return urlparse("", **defaults) ++ ++ def __str__(self) -> str: ++ authority = self.authority ++ return "".join( ++ [ ++ f"{self.scheme}:" if self.scheme else "", ++ f"//{authority}" if authority else "", ++ self.path, ++ f"?{self.query}" if self.query is not None else "", ++ f"#{self.fragment}" if self.fragment is not None else "", ++ ] ++ ) ++ ++ ++def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult: ++ # Initial basic checks on allowable URLs. ++ # --------------------------------------- ++ ++ # Hard limit the maximum allowable URL length. ++ if len(url) > MAX_URL_LENGTH: ++ raise InvalidURL("URL too long") ++ ++ # If a URL includes any ASCII control characters including \t, \r, \n, ++ # then treat it as invalid. ++ if any(char.isascii() and not char.isprintable() for char in url): ++ raise InvalidURL("Invalid non-printable ASCII character in URL") ++ ++ # Some keyword arguments require special handling. ++ # ------------------------------------------------ ++ ++ # Coerce "port" to a string, if it is provided as an integer. ++ if "port" in kwargs: ++ port = kwargs["port"] ++ kwargs["port"] = str(port) if isinstance(port, int) else port ++ ++ # Replace "netloc" with "host and "port". ++ if "netloc" in kwargs: ++ netloc = kwargs.pop("netloc") or "" ++ kwargs["host"], _, kwargs["port"] = netloc.partition(":") ++ ++ # Replace "username" and/or "password" with "userinfo". ++ if "username" in kwargs or "password" in kwargs: ++ username = quote(kwargs.pop("username", "") or "") ++ password = quote(kwargs.pop("password", "") or "") ++ kwargs["userinfo"] = f"{username}:{password}" if password else username ++ ++ # Replace "raw_path" with "path" and "query". ++ if "raw_path" in kwargs: ++ raw_path = kwargs.pop("raw_path") or "" ++ kwargs["path"], seperator, kwargs["query"] = raw_path.partition("?") ++ if not seperator: ++ kwargs["query"] = None ++ ++ # Ensure that IPv6 "host" addresses are always escaped with "[...]". ++ if "host" in kwargs: ++ host = kwargs.get("host") or "" ++ if ":" in host and not (host.startswith("[") and host.endswith("]")): ++ kwargs["host"] = f"[{host}]" ++ ++ # If any keyword arguments are provided, ensure they are valid. ++ # ------------------------------------------------------------- ++ ++ for key, value in kwargs.items(): ++ if key not in ( ++ "scheme", ++ "authority", ++ "path", ++ "query", ++ "fragment", ++ "userinfo", ++ "host", ++ "port", ++ ): ++ raise TypeError(f"'{key}' is an invalid keyword argument for urlparse()") ++ ++ if value is not None: ++ if len(value) > MAX_URL_LENGTH: ++ raise InvalidURL(f"URL component '{key}' too long") ++ ++ # If a component includes any ASCII control characters including \t, \r, \n, ++ # then treat it as invalid. ++ if any(char.isascii() and not char.isprintable() for char in value): ++ raise InvalidURL( ++ f"Invalid non-printable ASCII character in URL component '{key}'" ++ ) ++ ++ # Ensure that keyword arguments match as a valid regex. ++ if not COMPONENT_REGEX[key].fullmatch(value): ++ raise InvalidURL(f"Invalid URL component '{key}'") ++ ++ # The URL_REGEX will always match, but may have empty components. ++ url_match = URL_REGEX.match(url) ++ assert url_match is not None ++ url_dict = url_match.groupdict() ++ ++ # * 'scheme', 'authority', and 'path' may be empty strings. ++ # * 'query' may be 'None', indicating no trailing "?" portion. ++ # Any string including the empty string, indicates a trailing "?". ++ # * 'fragment' may be 'None', indicating no trailing "#" portion. ++ # Any string including the empty string, indicates a trailing "#". ++ scheme = kwargs.get("scheme", url_dict["scheme"]) or "" ++ authority = kwargs.get("authority", url_dict["authority"]) or "" ++ path = kwargs.get("path", url_dict["path"]) or "" ++ query = kwargs.get("query", url_dict["query"]) ++ fragment = kwargs.get("fragment", url_dict["fragment"]) ++ ++ # The AUTHORITY_REGEX will always match, but may have empty components. ++ authority_match = AUTHORITY_REGEX.match(authority) ++ assert authority_match is not None ++ authority_dict = authority_match.groupdict() ++ ++ # * 'userinfo' and 'host' may be empty strings. ++ # * 'port' may be 'None'. ++ userinfo = kwargs.get("userinfo", authority_dict["userinfo"]) or "" ++ host = kwargs.get("host", authority_dict["host"]) or "" ++ port = kwargs.get("port", authority_dict["port"]) ++ ++ # Normalize and validate each component. ++ # We end up with a parsed representation of the URL, ++ # with components that are plain ASCII bytestrings. ++ parsed_scheme: str = scheme.lower() ++ parsed_userinfo: str = quote(userinfo, safe=SUB_DELIMS + ":") ++ parsed_host: str = encode_host(host) ++ parsed_port: typing.Optional[int] = normalize_port(port, scheme) ++ ++ has_scheme = parsed_scheme != "" ++ has_authority = ( ++ parsed_userinfo != "" or parsed_host != "" or parsed_port is not None ++ ) ++ validate_path(path, has_scheme=has_scheme, has_authority=has_authority) ++ if has_authority: ++ path = normalize_path(path) ++ ++ parsed_path: str = quote(path, safe=SUB_DELIMS + ":@/") ++ parsed_query: typing.Optional[str] = ( ++ None if query is None else quote(query, safe=SUB_DELIMS + "/?") ++ ) ++ parsed_fragment: typing.Optional[str] = ( ++ None if fragment is None else quote(fragment, safe=SUB_DELIMS + "/?") ++ ) ++ ++ # The parsed ASCII bytestrings are our canonical form. ++ # All properties of the URL are derived from these. ++ return ParseResult( ++ parsed_scheme, ++ parsed_userinfo, ++ parsed_host, ++ parsed_port, ++ parsed_path, ++ parsed_query, ++ parsed_fragment, ++ ) ++ ++ ++def encode_host(host: str) -> str: ++ if not host: ++ return "" ++ ++ elif IPv4_STYLE_HOSTNAME.match(host): ++ # Validate IPv4 hostnames like #.#.#.# ++ # ++ # From https://datatracker.ietf.org/doc/html/rfc3986/#section-3.2.2 ++ # ++ # IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet ++ try: ++ ipaddress.IPv4Address(host) ++ except ipaddress.AddressValueError: ++ raise InvalidURL("Invalid IPv4 address") ++ return host ++ ++ elif IPv6_STYLE_HOSTNAME.match(host): ++ # Validate IPv6 hostnames like [...] ++ # ++ # From https://datatracker.ietf.org/doc/html/rfc3986/#section-3.2.2 ++ # ++ # "A host identified by an Internet Protocol literal address, version 6 ++ # [RFC3513] or later, is distinguished by enclosing the IP literal ++ # within square brackets ("[" and "]"). This is the only place where ++ # square bracket characters are allowed in the URI syntax." ++ try: ++ ipaddress.IPv6Address(host[1:-1]) ++ except ipaddress.AddressValueError: ++ raise InvalidURL("Invalid IPv6 address") ++ return host[1:-1] ++ ++ elif host.isascii(): ++ # Regular ASCII hostnames ++ # ++ # From https://datatracker.ietf.org/doc/html/rfc3986/#section-3.2.2 ++ # ++ # reg-name = *( unreserved / pct-encoded / sub-delims ) ++ return quote(host.lower(), safe=SUB_DELIMS) ++ ++ # IDNA hostnames ++ try: ++ return idna.encode(host.lower()).decode("ascii") ++ except idna.IDNAError: ++ raise InvalidURL("Invalid IDNA hostname") ++ ++ ++def normalize_port( ++ port: typing.Optional[typing.Union[str, int]], scheme: str ++) -> typing.Optional[int]: ++ # From https://tools.ietf.org/html/rfc3986#section-3.2.3 ++ # ++ # "A scheme may define a default port. For example, the "http" scheme ++ # defines a default port of "80", corresponding to its reserved TCP ++ # port number. The type of port designated by the port number (e.g., ++ # TCP, UDP, SCTP) is defined by the URI scheme. URI producers and ++ # normalizers should omit the port component and its ":" delimiter if ++ # port is empty or if its value would be the same as that of the ++ # scheme's default." ++ if port is None or port == "": ++ return None ++ ++ try: ++ port_as_int = int(port) ++ except ValueError: ++ raise InvalidURL("Invalid port") ++ ++ # See https://url.spec.whatwg.org/#url-miscellaneous ++ default_port = {"ftp": 21, "http": 80, "https": 443, "ws": 80, "wss": 443}.get( ++ scheme ++ ) ++ if port_as_int == default_port: ++ return None ++ return port_as_int ++ ++ ++def validate_path(path: str, has_scheme: bool, has_authority: bool) -> None: ++ """ ++ Path validation rules that depend on if the URL contains a scheme or authority component. ++ ++ See https://datatracker.ietf.org/doc/html/rfc3986.html#section-3.3 ++ """ ++ if has_authority: ++ # > If a URI contains an authority component, then the path component ++ # > must either be empty or begin with a slash ("/") character." ++ if path and not path.startswith("/"): ++ raise InvalidURL("For absolute URLs, path must be empty or begin with '/'") ++ else: ++ # > If a URI does not contain an authority component, then the path cannot begin ++ # > with two slash characters ("//"). ++ if path.startswith("//"): ++ raise InvalidURL( ++ "URLs with no authority component cannot have a path starting with '//'" ++ ) ++ # > In addition, a URI reference (Section 4.1) may be a relative-path reference, in which ++ # > case the first path segment cannot contain a colon (":") character. ++ if path.startswith(":") and not has_scheme: ++ raise InvalidURL( ++ "URLs with no scheme component cannot have a path starting with ':'" ++ ) ++ ++ ++def normalize_path(path: str) -> str: ++ """ ++ Drop "." and ".." segments from a URL path. ++ ++ For example: ++ ++ normalize_path("/path/./to/somewhere/..") == "/path/to" ++ """ ++ # https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4 ++ components = path.split("/") ++ output: typing.List[str] = [] ++ for component in components: ++ if component == ".": ++ pass ++ elif component == "..": ++ if output and output != [""]: ++ output.pop() ++ else: ++ output.append(component) ++ return "/".join(output) ++ ++ ++def percent_encode(char: str) -> str: ++ """ ++ Replace every character in a string with the percent-encoded representation. ++ ++ Characters outside the ASCII range are represented with their a percent-encoded ++ representation of their UTF-8 byte sequence. ++ ++ For example: ++ ++ percent_encode(" ") == "%20" ++ """ ++ return "".join([f"%{byte:02x}" for byte in char.encode("utf-8")]).upper() ++ ++ ++def quote(string: str, safe: str = "/") -> str: ++ NON_ESCAPED_CHARS = UNRESERVED_CHARACTERS + safe ++ if string.count("%") == len(PERCENT_ENCODED_REGEX.findall(string)): ++ # If all occurances of '%' are valid '%xx' escapes, then treat ++ # percent as a non-escaping character. ++ NON_ESCAPED_CHARS += "%" ++ ++ return "".join( ++ [char if char in NON_ESCAPED_CHARS else percent_encode(char) for char in string] ++ ) +diff --git a/httpx/_urls.py b/httpx/_urls.py +index f26b2eb2d..1bcbc8b29 100644 +--- a/httpx/_urls.py ++++ b/httpx/_urls.py +@@ -1,12 +1,10 @@ + import typing +-from urllib.parse import parse_qs, quote, unquote, urlencode ++from urllib.parse import parse_qs, unquote, urlencode + + import idna +-import rfc3986 +-import rfc3986.exceptions + +-from ._exceptions import InvalidURL + from ._types import PrimitiveData, QueryParamTypes, RawURL, URLTypes ++from ._urlparse import urlparse + from ._utils import primitive_value_to_str + + +@@ -70,56 +68,63 @@ class URL: + be properly URL escaped when decoding the parameter names and values themselves. + """ + +- _uri_reference: rfc3986.URIReference +- + def __init__( + self, url: typing.Union["URL", str] = "", **kwargs: typing.Any + ) -> None: ++ if kwargs: ++ allowed = { ++ "scheme": str, ++ "username": str, ++ "password": str, ++ "userinfo": bytes, ++ "host": str, ++ "port": int, ++ "netloc": bytes, ++ "path": str, ++ "query": bytes, ++ "raw_path": bytes, ++ "fragment": str, ++ "params": object, ++ } ++ ++ # Perform type checking for all supported keyword arguments. ++ for key, value in kwargs.items(): ++ if key not in allowed: ++ message = f"{key!r} is an invalid keyword argument for URL()" ++ raise TypeError(message) ++ if value is not None and not isinstance(value, allowed[key]): ++ expected = allowed[key].__name__ ++ seen = type(value).__name__ ++ message = f"Argument {key!r} must be {expected} but got {seen}" ++ raise TypeError(message) ++ if isinstance(value, bytes): ++ kwargs[key] = value.decode("ascii") ++ ++ if "params" in kwargs: ++ # Replace any "params" keyword with the raw "query" instead. ++ # ++ # Ensure that empty params use `kwargs["query"] = None` rather ++ # than `kwargs["query"] = ""`, so that generated URLs do not ++ # include an empty trailing "?". ++ params = kwargs.pop("params") ++ kwargs["query"] = None if not params else str(QueryParams(params)) ++ + if isinstance(url, str): +- try: +- self._uri_reference = rfc3986.iri_reference(url).encode() +- except rfc3986.exceptions.InvalidAuthority as exc: +- raise InvalidURL(message=str(exc)) from None +- +- if self.is_absolute_url: +- # We don't want to normalize relative URLs, since doing so +- # removes any leading `../` portion. +- self._uri_reference = self._uri_reference.normalize() ++ self._uri_reference = urlparse(url, **kwargs) + elif isinstance(url, URL): +- self._uri_reference = url._uri_reference ++ self._uri_reference = url._uri_reference.copy_with(**kwargs) + else: + raise TypeError( + f"Invalid type for url. Expected str or httpx.URL, got {type(url)}: {url!r}" + ) + +- # Perform port normalization, following the WHATWG spec for default ports. +- # +- # See: +- # * https://tools.ietf.org/html/rfc3986#section-3.2.3 +- # * https://url.spec.whatwg.org/#url-miscellaneous +- # * https://url.spec.whatwg.org/#scheme-state +- default_port = { +- "ftp": ":21", +- "http": ":80", +- "https": ":443", +- "ws": ":80", +- "wss": ":443", +- }.get(self._uri_reference.scheme, "") +- authority = self._uri_reference.authority or "" +- if default_port and authority.endswith(default_port): +- authority = authority[: -len(default_port)] +- self._uri_reference = self._uri_reference.copy_with(authority=authority) +- +- if kwargs: +- self._uri_reference = self.copy_with(**kwargs)._uri_reference +- + @property + def scheme(self) -> str: + """ + The URL scheme, such as "http", "https". + Always normalised to lowercase. + """ +- return self._uri_reference.scheme or "" ++ return self._uri_reference.scheme + + @property + def raw_scheme(self) -> bytes: +@@ -127,7 +132,7 @@ def raw_scheme(self) -> bytes: + The raw bytes representation of the URL scheme, such as b"http", b"https". + Always normalised to lowercase. + """ +- return self.scheme.encode("ascii") ++ return self._uri_reference.scheme.encode("ascii") + + @property + def userinfo(self) -> bytes: +@@ -135,8 +140,7 @@ def userinfo(self) -> bytes: + The URL userinfo as a raw bytestring. + For example: b"jo%40email.com:a%20secret". + """ +- userinfo = self._uri_reference.userinfo or "" +- return userinfo.encode("ascii") ++ return self._uri_reference.userinfo.encode("ascii") + + @property + def username(self) -> str: +@@ -144,7 +148,7 @@ def username(self) -> str: + The URL username as a string, with URL decoding applied. + For example: "jo@email.com" + """ +- userinfo = self._uri_reference.userinfo or "" ++ userinfo = self._uri_reference.userinfo + return unquote(userinfo.partition(":")[0]) + + @property +@@ -153,7 +157,7 @@ def password(self) -> str: + The URL password as a string, with URL decoding applied. + For example: "a secret" + """ +- userinfo = self._uri_reference.userinfo or "" ++ userinfo = self._uri_reference.userinfo + return unquote(userinfo.partition(":")[2]) + + @property +@@ -176,11 +180,7 @@ def host(self) -> str: + url = httpx.URL("https://[::ffff:192.168.0.1]") + assert url.host == "::ffff:192.168.0.1" + """ +- host: str = self._uri_reference.host or "" +- +- if host and ":" in host and host[0] == "[": +- # it's an IPv6 address +- host = host.lstrip("[").rstrip("]") ++ host: str = self._uri_reference.host + + if host.startswith("xn--"): + host = idna.decode(host) +@@ -207,13 +207,7 @@ def raw_host(self) -> bytes: + url = httpx.URL("https://[::ffff:192.168.0.1]") + assert url.raw_host == b"::ffff:192.168.0.1" + """ +- host: str = self._uri_reference.host or "" +- +- if host and ":" in host and host[0] == "[": +- # it's an IPv6 address +- host = host.lstrip("[").rstrip("]") +- +- return host.encode("ascii") ++ return self._uri_reference.host.encode("ascii") + + @property + def port(self) -> typing.Optional[int]: +@@ -229,8 +223,7 @@ def port(self) -> typing.Optional[int]: + assert httpx.URL("http://www.example.com") == httpx.URL("http://www.example.com:80") + assert httpx.URL("http://www.example.com:80").port is None + """ +- port = self._uri_reference.port +- return int(port) if port else None ++ return self._uri_reference.port + + @property + def netloc(self) -> bytes: +@@ -241,12 +234,7 @@ def netloc(self) -> bytes: + This property may be used for generating the value of a request + "Host" header. + """ +- host = self._uri_reference.host or "" +- port = self._uri_reference.port +- netloc = host.encode("ascii") +- if port: +- netloc = netloc + b":" + port.encode("ascii") +- return netloc ++ return self._uri_reference.netloc.encode("ascii") + + @property + def path(self) -> str: +@@ -357,127 +345,7 @@ def copy_with(self, **kwargs: typing.Any) -> "URL": + url = httpx.URL("https://www.example.com").copy_with(username="jo@gmail.com", password="a secret") + assert url == "https://jo%40email.com:a%20secret@www.example.com" + """ +- allowed = { +- "scheme": str, +- "username": str, +- "password": str, +- "userinfo": bytes, +- "host": str, +- "port": int, +- "netloc": bytes, +- "path": str, +- "query": bytes, +- "raw_path": bytes, +- "fragment": str, +- "params": object, +- } +- +- # Step 1 +- # ====== +- # +- # Perform type checking for all supported keyword arguments. +- for key, value in kwargs.items(): +- if key not in allowed: +- message = f"{key!r} is an invalid keyword argument for copy_with()" +- raise TypeError(message) +- if value is not None and not isinstance(value, allowed[key]): +- expected = allowed[key].__name__ +- seen = type(value).__name__ +- message = f"Argument {key!r} must be {expected} but got {seen}" +- raise TypeError(message) +- +- # Step 2 +- # ====== +- # +- # Consolidate "username", "password", "userinfo", "host", "port" and "netloc" +- # into a single "authority" keyword, for `rfc3986`. +- if "username" in kwargs or "password" in kwargs: +- # Consolidate "username" and "password" into "userinfo". +- username = quote(kwargs.pop("username", self.username) or "") +- password = quote(kwargs.pop("password", self.password) or "") +- userinfo = f"{username}:{password}" if password else username +- kwargs["userinfo"] = userinfo.encode("ascii") +- +- if "host" in kwargs or "port" in kwargs: +- # Consolidate "host" and "port" into "netloc". +- host = kwargs.pop("host", self.host) or "" +- port = kwargs.pop("port", self.port) +- +- if host and ":" in host and host[0] != "[": +- # IPv6 addresses need to be escaped within square brackets. +- host = f"[{host}]" +- +- kwargs["netloc"] = ( +- f"{host}:{port}".encode("ascii") +- if port is not None +- else host.encode("ascii") +- ) +- +- if "userinfo" in kwargs or "netloc" in kwargs: +- # Consolidate "userinfo" and "netloc" into authority. +- userinfo = (kwargs.pop("userinfo", self.userinfo) or b"").decode("ascii") +- netloc = (kwargs.pop("netloc", self.netloc) or b"").decode("ascii") +- authority = f"{userinfo}@{netloc}" if userinfo else netloc +- kwargs["authority"] = authority +- +- # Step 3 +- # ====== +- # +- # Wrangle any "path", "query", "raw_path" and "params" keywords into +- # "query" and "path" keywords for `rfc3986`. +- if "raw_path" in kwargs: +- # If "raw_path" is included, then split it into "path" and "query" components. +- raw_path = kwargs.pop("raw_path") or b"" +- path, has_query, query = raw_path.decode("ascii").partition("?") +- kwargs["path"] = path +- kwargs["query"] = query if has_query else None +- +- else: +- if kwargs.get("path") is not None: +- # Ensure `kwargs["path"] = ` for `rfc3986`. +- kwargs["path"] = quote(kwargs["path"]) +- +- if kwargs.get("query") is not None: +- # Ensure `kwargs["query"] = ` for `rfc3986`. +- # +- # Note that `.copy_with(query=None)` and `.copy_with(query=b"")` +- # are subtly different. The `None` style will not include an empty +- # trailing "?" character. +- kwargs["query"] = kwargs["query"].decode("ascii") +- +- if "params" in kwargs: +- # Replace any "params" keyword with the raw "query" instead. +- # +- # Ensure that empty params use `kwargs["query"] = None` rather +- # than `kwargs["query"] = ""`, so that generated URLs do not +- # include an empty trailing "?". +- params = kwargs.pop("params") +- kwargs["query"] = None if not params else str(QueryParams(params)) +- +- # Step 4 +- # ====== +- # +- # Ensure any fragment component is quoted. +- if kwargs.get("fragment") is not None: +- kwargs["fragment"] = quote(kwargs["fragment"]) +- +- # Step 5 +- # ====== +- # +- # At this point kwargs may include keys for "scheme", "authority", "path", +- # "query" and "fragment". Together these constitute the entire URL. +- # +- # See https://tools.ietf.org/html/rfc3986#section-3 +- # +- # foo://example.com:8042/over/there?name=ferret#nose +- # \_/ \______________/\_________/ \_________/ \__/ +- # | | | | | +- # scheme authority path query fragment +- new_url = URL(self) +- new_url._uri_reference = self._uri_reference.copy_with(**kwargs) +- if new_url.is_absolute_url: +- new_url._uri_reference = new_url._uri_reference.normalize() +- return URL(new_url) ++ return URL(self, **kwargs) + + def copy_set_param(self, key: str, value: typing.Any = None) -> "URL": + return self.copy_with(params=self.params.set(key, value)) +@@ -501,21 +369,9 @@ def join(self, url: URLTypes) -> "URL": + url = url.join("/new/path") + assert url == "https://www.example.com/new/path" + """ +- if self.is_relative_url: +- # Workaround to handle relative URLs, which otherwise raise +- # rfc3986.exceptions.ResolutionError when used as an argument +- # in `.resolve_with`. +- return ( +- self.copy_with(scheme="http", host="example.com") +- .join(url) +- .copy_with(scheme=None, host=None) +- ) ++ from urllib.parse import urljoin + +- # We drop any fragment portion, because RFC 3986 strictly +- # treats URLs with a fragment portion as not being absolute URLs. +- base_uri = self._uri_reference.copy_with(fragment=None) +- relative_url = URL(url) +- return URL(relative_url._uri_reference.resolve_with(base_uri).unsplit()) ++ return URL(urljoin(str(self), str(URL(url)))) + + def __hash__(self) -> int: + return hash(str(self)) +@@ -524,21 +380,33 @@ def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, (URL, str)) and str(self) == str(URL(other)) + + def __str__(self) -> str: +- return typing.cast(str, self._uri_reference.unsplit()) ++ return str(self._uri_reference) + + def __repr__(self) -> str: +- class_name = self.__class__.__name__ +- url_str = str(self) +- if self._uri_reference.userinfo: +- # Mask any password component in the URL representation, to lower the +- # risk of unintended leakage, such as in debug information and logging. +- username = quote(self.username) +- url_str = ( +- rfc3986.urlparse(url_str) +- .copy_with(userinfo=f"{username}:[secure]") +- .unsplit() +- ) +- return f"{class_name}({url_str!r})" ++ scheme, userinfo, host, port, path, query, fragment = self._uri_reference ++ ++ if ":" in userinfo: ++ # Mask any password component. ++ userinfo = f'{userinfo.split(":")[0]}:[secure]' ++ ++ authority = "".join( ++ [ ++ f"{userinfo}@" if userinfo else "", ++ f"[{host}]" if ":" in host else host, ++ f":{port}" if port is not None else "", ++ ] ++ ) ++ url = "".join( ++ [ ++ f"{self.scheme}:" if scheme else "", ++ f"//{authority}" if authority else "", ++ path, ++ f"?{query}" if query is not None else "", ++ f"#{fragment}" if fragment is not None else "", ++ ] ++ ) ++ ++ return f"{self.__class__.__name__}({url!r})" + + + class QueryParams(typing.Mapping[str, str]): +diff --git a/pyproject.toml b/pyproject.toml +index 316772931..b11c02825 100644 +--- a/pyproject.toml ++++ b/pyproject.toml +@@ -30,7 +30,7 @@ classifiers = [ + dependencies = [ + "certifi", + "httpcore>=0.15.0,<0.17.0", +- "rfc3986[idna2008]>=1.3,<2", ++ "idna", + "sniffio", + ] + dynamic = ["readme", "version"] diff --git a/srcpkgs/python3-httpx/template b/srcpkgs/python3-httpx/template index ff0c1e98466e..7c9ba911b7db 100644 --- a/srcpkgs/python3-httpx/template +++ b/srcpkgs/python3-httpx/template @@ -1,10 +1,10 @@ # Template file for 'python3-httpx' pkgname=python3-httpx version=0.23.3 -revision=1 +revision=2 build_style=python3-pep517 -hostmakedepends="python3-poetry-core hatchling" -depends="python3-rfc3986 python3-certifi python3-charset-normalizer +hostmakedepends="hatchling" +depends="python3-idna python3-certifi python3-charset-normalizer python3-sniffio python3-httpcore python3-click python3-rich python3-Pygments python3-h2" short_desc="Next generation HTTP client for Python"