From 55ae836a4311320359d7420906fede157bdf0522 Mon Sep 17 00:00:00 2001 From: Kelly Brazil Date: Wed, 20 Jul 2022 21:46:06 -0700 Subject: [PATCH] add encoded fields --- jc/parsers/url.py | 128 ++++++++++++++++++++++++---------------------- 1 file changed, 68 insertions(+), 60 deletions(-) diff --git a/jc/parsers/url.py b/jc/parsers/url.py index 64411551..f56ed0ca 100644 --- a/jc/parsers/url.py +++ b/jc/parsers/url.py @@ -24,52 +24,55 @@ Usage (module): Schema: { - "quoted": string, - 'unquoted": string, - "scheme": string, - "netloc": string, - "path": string or null, - "path_list": [ array or null - string + "url": string, + 'url_encoded": string, + "scheme": string, + "scheme_encoded": string, + "netloc": string, + "netloc_encoded": string, + "path": string or null, + "path_encoded": string or null, + "path_list": [ array or null + string ], - "query": { object or null - : [ array or null - string # [0] + "query": string or Null, + "query_encoded": string or Null, + "query_obj": { object or null + : [ array or null + string # [0] ] }, - "query_list": [ array or null - [ - string, # [1] - string - ] - ], - "fragment": string or null, - "username": string or null, - "password": string or null, - "hostname": string or null, - "port": integer or null + "fragment": string or null, + "fragment_encoded": string or null, + "username": string or null, + "password": string or null, + "hostname": string or null, + "port": integer or null, } [0] Duplicate query-keys will have their values consolidated into the array of query-values - [1] The first array value is the query-key and the second value is the - query-value Examples: % echo "http://example.com/test/path?q1=foo&q1=bar&q2=baz#frag" \\ | jc --url -p { - "quoted": "http://example.com/test/path?q1%3Dfoo%26q1%3Dbar%26q2%3Dbaz#frag", - "unquoted": "http://example.com/test/path?q1=foo&q1=bar&q2=baz#frag", + "url": "http://example.com/test/path?q1=foo&q1=bar&q2=baz#frag", + "url_encoded": "http://example.com/test/path?q1%3Dfoo%26q1%3Dbar%26q2%3Dbaz#frag", "scheme": "http", + "scheme_encoded": "http", "netloc": "example.com", + "netloc_encoded": "example.com", "path": "/test/path", + "path_encoded": "/test/path", "path_list": [ "test", "path" ], - "query": { + "query": "q1=foo&q1=bar&q2=baz", + "query_encoded": "q1%3Dfoo%26q1%3Dbar%26q2%3Dbaz", + "query_obj": { "q1": [ "foo", "bar" @@ -78,21 +81,8 @@ Examples: "baz" ] }, - "query_list": [ - [ - "q1", - "foo" - ], - [ - "q1", - "bar" - ], - [ - "q2", - "baz" - ] - ], "fragment": "frag", + "fragment_encoded": "frag", "username": null, "password": null, "hostname": "example.com", @@ -101,17 +91,22 @@ Examples: $ echo "ftp://localhost/filepath" | jc --url -p { - "quoted": "ftp://localhost/filepath", - "unquoted": "ftp://localhost/filepath", + "url": "ftp://localhost/filepath", + "url_encoded": "ftp://localhost/filepath", "scheme": "ftp", + "scheme_encoded": "ftp", "netloc": "localhost", + "netloc_encoded": "localhost", "path": "/filepath", + "path_encoded": "/filepath", "path_list": [ "filepath" ], "query": null, - "query_list": null, + "query_encoded": null, + "query_obj": null, "fragment": null, + "fragment_encoded": null, "username": null, "password": null, "hostname": "localhost", @@ -181,46 +176,59 @@ def parse( parts = urlsplit(unwrap(data)) normalized = urlsplit(urlunsplit(parts)) - quoted = normalized._replace(path=quote(normalized.path), - query=quote_plus(normalized.query, safe='+')).geturl() + quoted = normalized._replace(scheme=quote_plus(normalized.scheme), + netloc=quote_plus(normalized.netloc), + path=quote(normalized.path), + query=quote_plus(normalized.query, safe='+'), + fragment=quote_plus(normalized.fragment)).geturl() - unquoted = normalized._replace(path=unquote(normalized.path), - query=unquote_plus(normalized.query)).geturl() + unquoted = normalized._replace(scheme=unquote_plus(normalized.scheme), + netloc=unquote_plus(normalized.netloc), + path=unquote(normalized.path), + query=unquote_plus(normalized.query), + fragment=unquote_plus(normalized.fragment)).geturl() + quoted_parts = urlsplit(quoted) unquoted_parts = urlsplit(unquoted) my_path = None + encoded_path = None path_list = None query_obj = None - query_list = None if unquoted_parts.path: # normalize the path by removing any duplicate `/` chars my_path = re.sub(r'/+', '/', unquoted_parts.path) + encoded_path = re.sub(r'/+', '/', quoted_parts.path) # remove first '/' and split path_list = my_path.replace('/', '', 1).split('/') + if path_list == ['']: path_list = None if unquoted_parts.query: query_obj = parse_qs(unquoted_parts.query) - query_list = parse_qsl(unquoted_parts.query) raw_output = { - 'quoted': quoted or None, - 'unquoted': unquoted or None, - 'scheme': parts.scheme or None, - 'netloc': parts.netloc or None, + 'url': unquoted or None, + 'url_encoded': quoted or None, + 'scheme': unquoted_parts.scheme or None, + 'scheme_encoded': quoted_parts.scheme or None, + 'netloc': unquoted_parts.netloc or None, + 'netloc_encoded': quoted_parts.netloc or None, 'path': my_path or None, + 'path_encoded': encoded_path or None, 'path_list': path_list or None, - 'query': query_obj or None, - 'query_list': query_list or None, - 'fragment': parts.fragment or None, - 'username': parts.username, - 'password': parts.password, - 'hostname': parts.hostname, - 'port': parts.port + 'query': unquoted_parts.query or None, + 'query_encoded': quoted_parts.query or None, + 'query_obj': query_obj or None, + 'fragment': unquoted_parts.fragment or None, + 'fragment_encoded': quoted_parts.fragment or None, + 'username': unquoted_parts.username or None, + 'password': unquoted_parts.password or None, + 'hostname': unquoted_parts.hostname or None, + 'port': unquoted_parts.port or None, } return raw_output if raw else _process(raw_output)