aboutsummaryrefslogtreecommitdiff
path: root/venv/lib/python3.8/site-packages/werkzeug/urls.py
diff options
context:
space:
mode:
authorsotech117 <michael_foiani@brown.edu>2025-07-31 17:27:24 -0400
committersotech117 <michael_foiani@brown.edu>2025-07-31 17:27:24 -0400
commit5bf22fc7e3c392c8bd44315ca2d06d7dca7d084e (patch)
tree8dacb0f195df1c0788d36dd0064f6bbaa3143ede /venv/lib/python3.8/site-packages/werkzeug/urls.py
parentb832d364da8c2efe09e3f75828caf73c50d01ce3 (diff)
add code for analysis of data
Diffstat (limited to 'venv/lib/python3.8/site-packages/werkzeug/urls.py')
-rw-r--r--venv/lib/python3.8/site-packages/werkzeug/urls.py203
1 files changed, 203 insertions, 0 deletions
diff --git a/venv/lib/python3.8/site-packages/werkzeug/urls.py b/venv/lib/python3.8/site-packages/werkzeug/urls.py
new file mode 100644
index 0000000..5bffe39
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/werkzeug/urls.py
@@ -0,0 +1,203 @@
+from __future__ import annotations
+
+import codecs
+import re
+import typing as t
+import urllib.parse
+from urllib.parse import quote
+from urllib.parse import unquote
+from urllib.parse import urlencode
+from urllib.parse import urlsplit
+from urllib.parse import urlunsplit
+
+from .datastructures import iter_multi_items
+
+
+def _codec_error_url_quote(e: UnicodeError) -> tuple[str, int]:
+ """Used in :func:`uri_to_iri` after unquoting to re-quote any
+ invalid bytes.
+ """
+ # the docs state that UnicodeError does have these attributes,
+ # but mypy isn't picking them up
+ out = quote(e.object[e.start : e.end], safe="") # type: ignore
+ return out, e.end # type: ignore
+
+
+codecs.register_error("werkzeug.url_quote", _codec_error_url_quote)
+
+
+def _make_unquote_part(name: str, chars: str) -> t.Callable[[str], str]:
+ """Create a function that unquotes all percent encoded characters except those
+ given. This allows working with unquoted characters if possible while not changing
+ the meaning of a given part of a URL.
+ """
+ choices = "|".join(f"{ord(c):02X}" for c in sorted(chars))
+ pattern = re.compile(f"((?:%(?:{choices}))+)", re.I)
+
+ def _unquote_partial(value: str) -> str:
+ parts = iter(pattern.split(value))
+ out = []
+
+ for part in parts:
+ out.append(unquote(part, "utf-8", "werkzeug.url_quote"))
+ out.append(next(parts, ""))
+
+ return "".join(out)
+
+ _unquote_partial.__name__ = f"_unquote_{name}"
+ return _unquote_partial
+
+
+# characters that should remain quoted in URL parts
+# based on https://url.spec.whatwg.org/#percent-encoded-bytes
+# always keep all controls, space, and % quoted
+_always_unsafe = bytes((*range(0x21), 0x25, 0x7F)).decode()
+_unquote_fragment = _make_unquote_part("fragment", _always_unsafe)
+_unquote_query = _make_unquote_part("query", _always_unsafe + "&=+#")
+_unquote_path = _make_unquote_part("path", _always_unsafe + "/?#")
+_unquote_user = _make_unquote_part("user", _always_unsafe + ":@/?#")
+
+
+def uri_to_iri(uri: str) -> str:
+ """Convert a URI to an IRI. All valid UTF-8 characters are unquoted,
+ leaving all reserved and invalid characters quoted. If the URL has
+ a domain, it is decoded from Punycode.
+
+ >>> uri_to_iri("http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF")
+ 'http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF'
+
+ :param uri: The URI to convert.
+
+ .. versionchanged:: 3.0
+ Passing a tuple or bytes, and the ``charset`` and ``errors`` parameters,
+ are removed.
+
+ .. versionchanged:: 2.3
+ Which characters remain quoted is specific to each part of the URL.
+
+ .. versionchanged:: 0.15
+ All reserved and invalid characters remain quoted. Previously,
+ only some reserved characters were preserved, and invalid bytes
+ were replaced instead of left quoted.
+
+ .. versionadded:: 0.6
+ """
+ parts = urlsplit(uri)
+ path = _unquote_path(parts.path)
+ query = _unquote_query(parts.query)
+ fragment = _unquote_fragment(parts.fragment)
+
+ if parts.hostname:
+ netloc = _decode_idna(parts.hostname)
+ else:
+ netloc = ""
+
+ if ":" in netloc:
+ netloc = f"[{netloc}]"
+
+ if parts.port:
+ netloc = f"{netloc}:{parts.port}"
+
+ if parts.username:
+ auth = _unquote_user(parts.username)
+
+ if parts.password:
+ password = _unquote_user(parts.password)
+ auth = f"{auth}:{password}"
+
+ netloc = f"{auth}@{netloc}"
+
+ return urlunsplit((parts.scheme, netloc, path, query, fragment))
+
+
+def iri_to_uri(iri: str) -> str:
+ """Convert an IRI to a URI. All non-ASCII and unsafe characters are
+ quoted. If the URL has a domain, it is encoded to Punycode.
+
+ >>> iri_to_uri('http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF')
+ 'http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF'
+
+ :param iri: The IRI to convert.
+
+ .. versionchanged:: 3.0
+ Passing a tuple or bytes, the ``charset`` and ``errors`` parameters,
+ and the ``safe_conversion`` parameter, are removed.
+
+ .. versionchanged:: 2.3
+ Which characters remain unquoted is specific to each part of the URL.
+
+ .. versionchanged:: 0.15
+ All reserved characters remain unquoted. Previously, only some reserved
+ characters were left unquoted.
+
+ .. versionchanged:: 0.9.6
+ The ``safe_conversion`` parameter was added.
+
+ .. versionadded:: 0.6
+ """
+ parts = urlsplit(iri)
+ # safe = https://url.spec.whatwg.org/#url-path-segment-string
+ # as well as percent for things that are already quoted
+ path = quote(parts.path, safe="%!$&'()*+,/:;=@")
+ query = quote(parts.query, safe="%!$&'()*+,/:;=?@")
+ fragment = quote(parts.fragment, safe="%!#$&'()*+,/:;=?@")
+
+ if parts.hostname:
+ netloc = parts.hostname.encode("idna").decode("ascii")
+ else:
+ netloc = ""
+
+ if ":" in netloc:
+ netloc = f"[{netloc}]"
+
+ if parts.port:
+ netloc = f"{netloc}:{parts.port}"
+
+ if parts.username:
+ auth = quote(parts.username, safe="%!$&'()*+,;=")
+
+ if parts.password:
+ password = quote(parts.password, safe="%!$&'()*+,;=")
+ auth = f"{auth}:{password}"
+
+ netloc = f"{auth}@{netloc}"
+
+ return urlunsplit((parts.scheme, netloc, path, query, fragment))
+
+
+# Python < 3.12
+# itms-services was worked around in previous iri_to_uri implementations, but
+# we can tell Python directly that it needs to preserve the //.
+if "itms-services" not in urllib.parse.uses_netloc:
+ urllib.parse.uses_netloc.append("itms-services")
+
+
+def _decode_idna(domain: str) -> str:
+ try:
+ data = domain.encode("ascii")
+ except UnicodeEncodeError:
+ # If the domain is not ASCII, it's decoded already.
+ return domain
+
+ try:
+ # Try decoding in one shot.
+ return data.decode("idna")
+ except UnicodeDecodeError:
+ pass
+
+ # Decode each part separately, leaving invalid parts as punycode.
+ parts = []
+
+ for part in data.split(b"."):
+ try:
+ parts.append(part.decode("idna"))
+ except UnicodeDecodeError:
+ parts.append(part.decode("ascii"))
+
+ return ".".join(parts)
+
+
+def _urlencode(query: t.Mapping[str, str] | t.Iterable[tuple[str, str]]) -> str:
+ items = [x for x in iter_multi_items(query) if x[1] is not None]
+ # safe = https://url.spec.whatwg.org/#percent-encoded-bytes
+ return urlencode(items, safe="!$'()*,/:;?@")