mirror of
https://github.com/kellyjonbrazil/jc.git
synced 2025-07-13 01:20:24 +02:00
clarify quoting rules
This commit is contained in:
@ -15,6 +15,8 @@ This parser will work with naked and wrapped URL strings:
|
|||||||
Normalized encoded and decoded versions of the original URL and URL parts
|
Normalized encoded and decoded versions of the original URL and URL parts
|
||||||
are included in the output.
|
are included in the output.
|
||||||
|
|
||||||
|
> Note: Do not use the encoded fields for a URL that is already encoded.
|
||||||
|
|
||||||
Usage (cli):
|
Usage (cli):
|
||||||
|
|
||||||
$ echo "http://example.com/test/path?q1=foo&q2=bar#frag" | jc --url
|
$ echo "http://example.com/test/path?q1=foo&q2=bar#frag" | jc --url
|
||||||
|
@ -10,6 +10,8 @@ This parser will work with naked and wrapped URL strings:
|
|||||||
Normalized encoded and decoded versions of the original URL and URL parts
|
Normalized encoded and decoded versions of the original URL and URL parts
|
||||||
are included in the output.
|
are included in the output.
|
||||||
|
|
||||||
|
> Note: Do not use the encoded fields for a URL that is already encoded.
|
||||||
|
|
||||||
Usage (cli):
|
Usage (cli):
|
||||||
|
|
||||||
$ echo "http://example.com/test/path?q1=foo&q2=bar#frag" | jc --url
|
$ echo "http://example.com/test/path?q1=foo&q2=bar#frag" | jc --url
|
||||||
@ -182,15 +184,36 @@ def parse(
|
|||||||
|
|
||||||
raw_output: Dict = {}
|
raw_output: Dict = {}
|
||||||
|
|
||||||
|
# Best-effort to find safe characters in each URL part. Python
|
||||||
|
# urllib.parse.quote will always treat the following as safe: `_.-~`
|
||||||
|
# https://docs.python.org/3/library/urllib.parse.html#urllib.parse.quote
|
||||||
|
#
|
||||||
|
# Below are additional safe chars per URL part:
|
||||||
|
|
||||||
|
# https://datatracker.ietf.org/doc/html/rfc3986#section-3.1 (scheme)
|
||||||
|
SCHEME_SAFE = '+'
|
||||||
|
|
||||||
|
# https://datatracker.ietf.org/doc/html/rfc3986#section-3.2 (netloc)
|
||||||
|
NETLOC_SAFE = '+@:[]'
|
||||||
|
|
||||||
|
# https://datatracker.ietf.org/doc/html/rfc3986#section-3.3 (path)
|
||||||
|
PATH_SAFE = '+/@:;,='
|
||||||
|
|
||||||
|
# https://datatracker.ietf.org/doc/html/rfc3986#section-3.4 (query)
|
||||||
|
QUERY_SAFE = '+/@:;,=&?'
|
||||||
|
|
||||||
|
# https://datatracker.ietf.org/doc/html/rfc3986#section-3.5 (fragment)
|
||||||
|
FRAGMENT_SAFE = '+/@:;,=&?'
|
||||||
|
|
||||||
if jc.utils.has_data(data):
|
if jc.utils.has_data(data):
|
||||||
parts = urlsplit(unwrap(data))
|
parts = urlsplit(unwrap(data))
|
||||||
normalized = urlsplit(urlunsplit(parts))
|
normalized = urlsplit(urlunsplit(parts))
|
||||||
|
|
||||||
quoted = normalized._replace(scheme=quote(normalized.scheme),
|
quoted = normalized._replace(scheme=quote(normalized.scheme, safe=SCHEME_SAFE),
|
||||||
netloc=quote(normalized.netloc, safe='/?#@:[]'),
|
netloc=quote(normalized.netloc, safe=NETLOC_SAFE),
|
||||||
path=quote(normalized.path),
|
path=quote(normalized.path, safe=PATH_SAFE),
|
||||||
query=quote_plus(normalized.query, safe='+'),
|
query=quote_plus(normalized.query, safe=QUERY_SAFE),
|
||||||
fragment=quote(normalized.fragment)).geturl()
|
fragment=quote(normalized.fragment, safe=FRAGMENT_SAFE)).geturl()
|
||||||
|
|
||||||
unquoted = normalized._replace(scheme=unquote(normalized.scheme),
|
unquoted = normalized._replace(scheme=unquote(normalized.scheme),
|
||||||
netloc=unquote(normalized.netloc),
|
netloc=unquote(normalized.netloc),
|
||||||
@ -225,16 +248,16 @@ def parse(
|
|||||||
query_obj = parse_qs(unquoted_parts.query)
|
query_obj = parse_qs(unquoted_parts.query)
|
||||||
|
|
||||||
if unquoted_parts.username:
|
if unquoted_parts.username:
|
||||||
encoded_username = quote(unquoted_parts.username, safe='/?#@:[]')
|
encoded_username = quote(unquoted_parts.username, safe=NETLOC_SAFE)
|
||||||
|
|
||||||
if unquoted_parts.password:
|
if unquoted_parts.password:
|
||||||
encoded_password = quote(unquoted_parts.password, safe='/?#@:[]')
|
encoded_password = quote(unquoted_parts.password, safe=NETLOC_SAFE)
|
||||||
|
|
||||||
if unquoted_parts.hostname:
|
if unquoted_parts.hostname:
|
||||||
encoded_hostname = quote(unquoted_parts.hostname, safe='/?#@:[]')
|
encoded_hostname = quote(unquoted_parts.hostname, safe=NETLOC_SAFE)
|
||||||
|
|
||||||
if unquoted_parts.port:
|
if unquoted_parts.port:
|
||||||
encoded_port = quote(str(unquoted_parts.port), safe='/?#@:[]')
|
encoded_port = quote(str(unquoted_parts.port), safe=NETLOC_SAFE)
|
||||||
|
|
||||||
raw_output = {
|
raw_output = {
|
||||||
'url': unquoted or None,
|
'url': unquoted or None,
|
||||||
|
Reference in New Issue
Block a user