From 3999276d173d4629d80dced08a7ba92cb197b81e Mon Sep 17 00:00:00 2001 From: Kelly Brazil Date: Fri, 22 Dec 2023 14:36:12 -0800 Subject: [PATCH] add parent, filename, stem, and extension to schema --- jc/parsers/url.py | 81 ++++++++++++++++++++++++++++++++++++++++++++++- tests/test_url.py | 33 ++++++++++++------- 2 files changed, 101 insertions(+), 13 deletions(-) diff --git a/jc/parsers/url.py b/jc/parsers/url.py index d211ee2d..f69cce12 100644 --- a/jc/parsers/url.py +++ b/jc/parsers/url.py @@ -9,6 +9,7 @@ are included in the output. Encoding and Decoding is best effort. This parser will work with naked and wrapped URL strings: +- `/path` - `scheme://host/path` - `URL:scheme://host/path` - `` @@ -30,6 +31,10 @@ Schema: "scheme": string or null, "netloc": string or null, "path": string or null, + "parent": string or null, + "filename": string or null, + "stem": string or null, + "extension": string or null, "path_list": [ array or null string ], @@ -49,6 +54,10 @@ Schema: "scheme": string or null, "netloc": string or null, "path": string or null, + "parent": string or null, + "filename": string or null, + "stem": string or null, + "extension": string or null, "path_list": [ array or null string ], @@ -64,6 +73,10 @@ Schema: "scheme": string or null, "netloc": string or null, "path": string or null, + "parent": string or null, + "filename": string or null, + "stem": string or null, + "extension": string or null, "path_list": [ array or null string ], @@ -91,6 +104,10 @@ Examples: "scheme": "http", "netloc": "example.com", "path": "/test/path", + "parent": "/test", + "filename": "path", + "stem": "path", + "extension": null, "path_list": [ "test", "path" @@ -115,6 +132,10 @@ Examples: "scheme": "http", "netloc": "example.com", "path": "/test/path", + "parent": "/test", + "filename": "path", + "stem": "path", + "extension": null, "path_list": [ "test", "path" @@ -131,6 +152,10 @@ Examples: "scheme": "http", "netloc": "example.com", "path": "/test/path", + "parent": "/test", + "filename": "path", + "stem": "path", + "extension": null, "path_list": [ "test", "path" @@ -150,6 +175,10 @@ Examples: "scheme": "ftp", "netloc": "localhost", "path": "/filepath", + "parent": "/", + "filename": "filepath", + "stem": "filepath", + "extension": null, "path_list": [ "filepath" ], @@ -165,6 +194,10 @@ Examples: "scheme": "ftp", "netloc": "localhost", "path": "/filepath", + "parent": "/", + "filename": "filepath", + "stem": "filepath", + "extension": null, "path_list": [ "filepath" ], @@ -180,6 +213,10 @@ Examples: "scheme": "ftp", "netloc": "localhost", "path": "/filepath", + "parent": "/", + "filename": "filepath", + "stem": "filepath", + "extension": null, "path_list": [ "filepath" ], @@ -192,6 +229,7 @@ Examples: } } """ +import pathlib import re from urllib.parse import ( urlsplit, unwrap, parse_qs, urlunsplit, quote, quote_plus, unquote, unquote_plus @@ -202,7 +240,7 @@ import jc.utils class info(): """Provides parser metadata (version, author, etc.)""" - version = '1.0' + version = '1.1' description = 'URL string parser' author = 'Kelly Brazil' author_email = 'kellyjonbrazil@gmail.com' @@ -294,6 +332,18 @@ def parse( my_path = None encoded_path = None decoded_path = None + parent = None + encoded_parent = None + decoded_parent = None + filename = None + encoded_filename = None + decoded_filename = None + stem = None + encoded_stem = None + decoded_stem = None + extension = None + encoded_extension = None + decoded_extension = None path_list = None encoded_path_list = None decoded_path_list = None @@ -314,6 +364,23 @@ def parse( encoded_path = re.sub(r'/+', '/', quoted_parts.path) decoded_path = re.sub(r'/+', '/', unquoted_parts.path) + # get parent, file, stem, and exension info from path + parent = str(pathlib.PurePath(my_path).parent) + encoded_parent = str(pathlib.PurePath(encoded_path).parent) + decoded_parent = str(pathlib.PurePath(decoded_path).parent) + + filename = str(pathlib.PurePath(my_path).name) + encoded_filename = str(pathlib.PurePath(encoded_path).name) + decoded_filename = str(pathlib.PurePath(decoded_path).name) + + stem = str(pathlib.PurePath(my_path).stem) + encoded_stem = str(pathlib.PurePath(encoded_path).stem) + decoded_stem = str(pathlib.PurePath(decoded_path).stem) + + extension = str(pathlib.PurePath(my_path).suffix)[1:] + encoded_extension = str(pathlib.PurePath(encoded_path).suffix)[1:] + decoded_extension = str(pathlib.PurePath(decoded_path).suffix)[1:] + # remove first '/' and split path_list = my_path.replace('/', '', 1).split('/') encoded_path_list = encoded_path.replace('/', '', 1).split('/') @@ -372,6 +439,10 @@ def parse( 'scheme': normalized.scheme or None, 'netloc': normalized.netloc or None, 'path': my_path or None, + 'parent': parent or None, + 'filename': filename or None, + 'stem': stem or None, + 'extension': extension or None, 'path_list': path_list or None, 'query': normalized.query or None, 'query_obj': query_obj or None, @@ -385,6 +456,10 @@ def parse( 'scheme': quoted_parts.scheme or None, 'netloc': quoted_parts.netloc or None, 'path': encoded_path or None, + 'parent': encoded_parent or None, + 'filename': encoded_filename or None, + 'stem': encoded_stem or None, + 'extension': encoded_extension or None, 'path_list': encoded_path_list or None, 'query': quoted_parts.query or None, 'fragment': quoted_parts.fragment or None, @@ -398,6 +473,10 @@ def parse( 'scheme': unquoted_parts.scheme or None, 'netloc': unquoted_parts.netloc or None, 'path': decoded_path or None, + 'parent': decoded_parent or None, + 'filename': decoded_filename or None, + 'stem': decoded_stem or None, + 'extension': decoded_extension or None, 'path_list': decoded_path_list or None, 'query': unquoted_parts.query or None, 'fragment': unquoted_parts.fragment or None, diff --git a/tests/test_url.py b/tests/test_url.py index 4af2fc6e..634a4846 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -16,7 +16,7 @@ class MyTests(unittest.TestCase): Test mailto URL """ data = r'' - expected = json.loads(r'''{"url":"mailto:fred@example.com","scheme":"mailto","netloc":null,"path":"fred@example.com","path_list":["fred@example.com"],"query":null,"query_obj":null,"fragment":null,"username":null,"password":null,"hostname":null,"port":null,"encoded":{"url":"mailto:fred@example.com","scheme":"mailto","netloc":null,"path":"fred@example.com","path_list":["fred@example.com"],"query":null,"fragment":null,"username":null,"password":null,"hostname":null,"port":null},"decoded":{"url":"mailto:fred@example.com","scheme":"mailto","netloc":null,"path":"fred@example.com","path_list":["fred@example.com"],"query":null,"fragment":null,"username":null,"password":null,"hostname":null,"port":null}}''') + expected = json.loads(r'''{"url":"mailto:fred@example.com","scheme":"mailto","netloc":null,"path":"fred@example.com","parent":".","filename":"fred@example.com","stem":"fred@example","extension":"com","path_list":["fred@example.com"],"query":null,"query_obj":null,"fragment":null,"username":null,"password":null,"hostname":null,"port":null,"encoded":{"url":"mailto:fred@example.com","scheme":"mailto","netloc":null,"path":"fred@example.com","parent":".","filename":"fred@example.com","stem":"fred@example","extension":"com","path_list":["fred@example.com"],"query":null,"fragment":null,"username":null,"password":null,"hostname":null,"port":null},"decoded":{"url":"mailto:fred@example.com","scheme":"mailto","netloc":null,"path":"fred@example.com","parent":".","filename":"fred@example.com","stem":"fred@example","extension":"com","path_list":["fred@example.com"],"query":null,"fragment":null,"username":null,"password":null,"hostname":null,"port":null}}''') self.assertEqual(jc.parsers.url.parse(data, quiet=True), expected) @@ -25,7 +25,7 @@ class MyTests(unittest.TestCase): Test ftp URL """ data = r'ftp://localhost/filepath/filename.txt' - expected = json.loads(r'''{"url":"ftp://localhost/filepath/filename.txt","scheme":"ftp","netloc":"localhost","path":"/filepath/filename.txt","path_list":["filepath","filename.txt"],"query":null,"query_obj":null,"fragment":null,"username":null,"password":null,"hostname":"localhost","port":null,"encoded":{"url":"ftp://localhost/filepath/filename.txt","scheme":"ftp","netloc":"localhost","path":"/filepath/filename.txt","path_list":["filepath","filename.txt"],"query":null,"fragment":null,"username":null,"password":null,"hostname":"localhost","port":null},"decoded":{"url":"ftp://localhost/filepath/filename.txt","scheme":"ftp","netloc":"localhost","path":"/filepath/filename.txt","path_list":["filepath","filename.txt"],"query":null,"fragment":null,"username":null,"password":null,"hostname":"localhost","port":null}}''') + expected = json.loads(r'''{"url":"ftp://localhost/filepath/filename.txt","scheme":"ftp","netloc":"localhost","path":"/filepath/filename.txt","parent":"/filepath","filename":"filename.txt","stem":"filename","extension":"txt","path_list":["filepath","filename.txt"],"query":null,"query_obj":null,"fragment":null,"username":null,"password":null,"hostname":"localhost","port":null,"encoded":{"url":"ftp://localhost/filepath/filename.txt","scheme":"ftp","netloc":"localhost","path":"/filepath/filename.txt","parent":"/filepath","filename":"filename.txt","stem":"filename","extension":"txt","path_list":["filepath","filename.txt"],"query":null,"fragment":null,"username":null,"password":null,"hostname":"localhost","port":null},"decoded":{"url":"ftp://localhost/filepath/filename.txt","scheme":"ftp","netloc":"localhost","path":"/filepath/filename.txt","parent":"/filepath","filename":"filename.txt","stem":"filename","extension":"txt","path_list":["filepath","filename.txt"],"query":null,"fragment":null,"username":null,"password":null,"hostname":"localhost","port":null}}''') self.assertEqual(jc.parsers.url.parse(data, quiet=True), expected) @@ -34,7 +34,7 @@ class MyTests(unittest.TestCase): Test HTTP URL with encodable characters (ipv4 host) """ data = r'' - expected = json.loads(r'''{"url":"http://user{one:pass{two@127.0.0.1:8000/a space/b/c/d?q1=foo with {space}&q2=bar&q2=baz#frag{frag","scheme":"http","netloc":"user{one:pass{two@127.0.0.1:8000","path":"/a space/b/c/d","path_list":["a space","b","c","d"],"query":"q1=foo with {space}&q2=bar&q2=baz","query_obj":{"q1":["foo with {space}"],"q2":["bar","baz"]},"fragment":"frag{frag","username":"user{one","password":"pass{two","hostname":"127.0.0.1","port":8000,"encoded":{"url":"http://user%7Bone:pass%7Btwo@127.0.0.1:8000/a%20space/b/c/d?q1=foo+with+%7Bspace%7D&q2=bar&q2=baz#frag%7Bfrag","scheme":"http","netloc":"user%7Bone:pass%7Btwo@127.0.0.1:8000","path":"/a%20space/b/c/d","path_list":["a%20space","b","c","d"],"query":"q1=foo+with+%7Bspace%7D&q2=bar&q2=baz","fragment":"frag%7Bfrag","username":"user%7Bone","password":"pass%7Btwo","hostname":"127.0.0.1","port":8000},"decoded":{"url":"http://user{one:pass{two@127.0.0.1:8000/a space/b/c/d?q1=foo with {space}&q2=bar&q2=baz#frag{frag","scheme":"http","netloc":"user{one:pass{two@127.0.0.1:8000","path":"/a space/b/c/d","path_list":["a space","b","c","d"],"query":"q1=foo with {space}&q2=bar&q2=baz","fragment":"frag{frag","username":"user{one","password":"pass{two","hostname":"127.0.0.1","port":8000}}''') + expected = json.loads(r'''{"url":"http://user{one:pass{two@127.0.0.1:8000/a space/b/c/d?q1=foo with {space}&q2=bar&q2=baz#frag{frag","scheme":"http","netloc":"user{one:pass{two@127.0.0.1:8000","path":"/a space/b/c/d","parent":"/a space/b/c","filename":"d","stem":"d","extension":null,"path_list":["a space","b","c","d"],"query":"q1=foo with {space}&q2=bar&q2=baz","query_obj":{"q1":["foo with {space}"],"q2":["bar","baz"]},"fragment":"frag{frag","username":"user{one","password":"pass{two","hostname":"127.0.0.1","port":8000,"encoded":{"url":"http://user%7Bone:pass%7Btwo@127.0.0.1:8000/a%20space/b/c/d?q1=foo+with+%7Bspace%7D&q2=bar&q2=baz#frag%7Bfrag","scheme":"http","netloc":"user%7Bone:pass%7Btwo@127.0.0.1:8000","path":"/a%20space/b/c/d","parent":"/a%20space/b/c","filename":"d","stem":"d","extension":null,"path_list":["a%20space","b","c","d"],"query":"q1=foo+with+%7Bspace%7D&q2=bar&q2=baz","fragment":"frag%7Bfrag","username":"user%7Bone","password":"pass%7Btwo","hostname":"127.0.0.1","port":8000},"decoded":{"url":"http://user{one:pass{two@127.0.0.1:8000/a space/b/c/d?q1=foo with {space}&q2=bar&q2=baz#frag{frag","scheme":"http","netloc":"user{one:pass{two@127.0.0.1:8000","path":"/a space/b/c/d","parent":"/a space/b/c","filename":"d","stem":"d","extension":null,"path_list":["a space","b","c","d"],"query":"q1=foo with {space}&q2=bar&q2=baz","fragment":"frag{frag","username":"user{one","password":"pass{two","hostname":"127.0.0.1","port":8000}}''') self.assertEqual(jc.parsers.url.parse(data, quiet=True), expected) @@ -43,7 +43,7 @@ class MyTests(unittest.TestCase): Test HTTP URL with encodable characters (ipv6 host) """ data = r'' - expected = json.loads(r'''{"url":"http://user{one:pass{two@[1:2::127]:8000/a space/b/c/d?q1=foo with {space}&q2=bar&q2=baz#frag{frag","scheme":"http","netloc":"user{one:pass{two@[1:2::127]:8000","path":"/a space/b/c/d","path_list":["a space","b","c","d"],"query":"q1=foo with {space}&q2=bar&q2=baz","query_obj":{"q1":["foo with {space}"],"q2":["bar","baz"]},"fragment":"frag{frag","username":"user{one","password":"pass{two","hostname":"1:2::127","port":8000,"encoded":{"url":"http://user%7Bone:pass%7Btwo@[1:2::127]:8000/a%20space/b/c/d?q1=foo+with+%7Bspace%7D&q2=bar&q2=baz#frag%7Bfrag","scheme":"http","netloc":"user%7Bone:pass%7Btwo@[1:2::127]:8000","path":"/a%20space/b/c/d","path_list":["a%20space","b","c","d"],"query":"q1=foo+with+%7Bspace%7D&q2=bar&q2=baz","fragment":"frag%7Bfrag","username":"user%7Bone","password":"pass%7Btwo","hostname":"1:2::127","port":8000},"decoded":{"url":"http://user{one:pass{two@[1:2::127]:8000/a space/b/c/d?q1=foo with {space}&q2=bar&q2=baz#frag{frag","scheme":"http","netloc":"user{one:pass{two@[1:2::127]:8000","path":"/a space/b/c/d","path_list":["a space","b","c","d"],"query":"q1=foo with {space}&q2=bar&q2=baz","fragment":"frag{frag","username":"user{one","password":"pass{two","hostname":"1:2::127","port":8000}}''') + expected = json.loads(r'''{"url":"http://user{one:pass{two@[1:2::127]:8000/a space/b/c/d?q1=foo with {space}&q2=bar&q2=baz#frag{frag","scheme":"http","netloc":"user{one:pass{two@[1:2::127]:8000","path":"/a space/b/c/d","parent":"/a space/b/c","filename":"d","stem":"d","extension":null,"path_list":["a space","b","c","d"],"query":"q1=foo with {space}&q2=bar&q2=baz","query_obj":{"q1":["foo with {space}"],"q2":["bar","baz"]},"fragment":"frag{frag","username":"user{one","password":"pass{two","hostname":"1:2::127","port":8000,"encoded":{"url":"http://user%7Bone:pass%7Btwo@[1:2::127]:8000/a%20space/b/c/d?q1=foo+with+%7Bspace%7D&q2=bar&q2=baz#frag%7Bfrag","scheme":"http","netloc":"user%7Bone:pass%7Btwo@[1:2::127]:8000","path":"/a%20space/b/c/d","parent":"/a%20space/b/c","filename":"d","stem":"d","extension":null,"path_list":["a%20space","b","c","d"],"query":"q1=foo+with+%7Bspace%7D&q2=bar&q2=baz","fragment":"frag%7Bfrag","username":"user%7Bone","password":"pass%7Btwo","hostname":"1:2::127","port":8000},"decoded":{"url":"http://user{one:pass{two@[1:2::127]:8000/a space/b/c/d?q1=foo with {space}&q2=bar&q2=baz#frag{frag","scheme":"http","netloc":"user{one:pass{two@[1:2::127]:8000","path":"/a space/b/c/d","parent":"/a space/b/c","filename":"d","stem":"d","extension":null,"path_list":["a space","b","c","d"],"query":"q1=foo with {space}&q2=bar&q2=baz","fragment":"frag{frag","username":"user{one","password":"pass{two","hostname":"1:2::127","port":8000}}''') self.assertEqual(jc.parsers.url.parse(data, quiet=True), expected) @@ -52,7 +52,7 @@ class MyTests(unittest.TestCase): Test HTTP URL with encodable characters (domain name host) """ data = r'' - expected = json.loads(r'''{"url":"http://user{one:pass{two@www.example.com:8000/a space/b/c/d?q1=foo with {space}&q2=bar&q2=baz#frag{frag","scheme":"http","netloc":"user{one:pass{two@www.example.com:8000","path":"/a space/b/c/d","path_list":["a space","b","c","d"],"query":"q1=foo with {space}&q2=bar&q2=baz","query_obj":{"q1":["foo with {space}"],"q2":["bar","baz"]},"fragment":"frag{frag","username":"user{one","password":"pass{two","hostname":"www.example.com","port":8000,"encoded":{"url":"http://user%7Bone:pass%7Btwo@www.example.com:8000/a%20space/b/c/d?q1=foo+with+%7Bspace%7D&q2=bar&q2=baz#frag%7Bfrag","scheme":"http","netloc":"user%7Bone:pass%7Btwo@www.example.com:8000","path":"/a%20space/b/c/d","path_list":["a%20space","b","c","d"],"query":"q1=foo+with+%7Bspace%7D&q2=bar&q2=baz","fragment":"frag%7Bfrag","username":"user%7Bone","password":"pass%7Btwo","hostname":"www.example.com","port":8000},"decoded":{"url":"http://user{one:pass{two@www.example.com:8000/a space/b/c/d?q1=foo with {space}&q2=bar&q2=baz#frag{frag","scheme":"http","netloc":"user{one:pass{two@www.example.com:8000","path":"/a space/b/c/d","path_list":["a space","b","c","d"],"query":"q1=foo with {space}&q2=bar&q2=baz","fragment":"frag{frag","username":"user{one","password":"pass{two","hostname":"www.example.com","port":8000}}''') + expected = json.loads(r'''{"url":"http://user{one:pass{two@www.example.com:8000/a space/b/c/d?q1=foo with {space}&q2=bar&q2=baz#frag{frag","scheme":"http","netloc":"user{one:pass{two@www.example.com:8000","path":"/a space/b/c/d","parent":"/a space/b/c","filename":"d","stem":"d","extension":null,"path_list":["a space","b","c","d"],"query":"q1=foo with {space}&q2=bar&q2=baz","query_obj":{"q1":["foo with {space}"],"q2":["bar","baz"]},"fragment":"frag{frag","username":"user{one","password":"pass{two","hostname":"www.example.com","port":8000,"encoded":{"url":"http://user%7Bone:pass%7Btwo@www.example.com:8000/a%20space/b/c/d?q1=foo+with+%7Bspace%7D&q2=bar&q2=baz#frag%7Bfrag","scheme":"http","netloc":"user%7Bone:pass%7Btwo@www.example.com:8000","path":"/a%20space/b/c/d","parent":"/a%20space/b/c","filename":"d","stem":"d","extension":null,"path_list":["a%20space","b","c","d"],"query":"q1=foo+with+%7Bspace%7D&q2=bar&q2=baz","fragment":"frag%7Bfrag","username":"user%7Bone","password":"pass%7Btwo","hostname":"www.example.com","port":8000},"decoded":{"url":"http://user{one:pass{two@www.example.com:8000/a space/b/c/d?q1=foo with {space}&q2=bar&q2=baz#frag{frag","scheme":"http","netloc":"user{one:pass{two@www.example.com:8000","path":"/a space/b/c/d","parent":"/a space/b/c","filename":"d","stem":"d","extension":null,"path_list":["a space","b","c","d"],"query":"q1=foo with {space}&q2=bar&q2=baz","fragment":"frag{frag","username":"user{one","password":"pass{two","hostname":"www.example.com","port":8000}}''') self.assertEqual(jc.parsers.url.parse(data, quiet=True), expected) @@ -61,7 +61,7 @@ class MyTests(unittest.TestCase): Test HTTP URL with encoded characters """ data = r'http://user%7Bone:pass%7Btwo@www.example.com:8000/a%20space/b/c/d?q1=foo+with+%7Bspace%7D&q2=bar&q2=baz#frag%7Bfrag' - expected = json.loads(r'''{"url":"http://user%7Bone:pass%7Btwo@www.example.com:8000/a%20space/b/c/d?q1=foo+with+%7Bspace%7D&q2=bar&q2=baz#frag%7Bfrag","scheme":"http","netloc":"user%7Bone:pass%7Btwo@www.example.com:8000","path":"/a%20space/b/c/d","path_list":["a%20space","b","c","d"],"query":"q1=foo+with+%7Bspace%7D&q2=bar&q2=baz","query_obj":{"q1":["foo with {space}"],"q2":["bar","baz"]},"fragment":"frag%7Bfrag","username":"user%7Bone","password":"pass%7Btwo","hostname":"www.example.com","port":8000,"encoded":{"url":"http://user%257Bone:pass%257Btwo@www.example.com:8000/a%2520space/b/c/d?q1=foo+with+%257Bspace%257D&q2=bar&q2=baz#frag%257Bfrag","scheme":"http","netloc":"user%257Bone:pass%257Btwo@www.example.com:8000","path":"/a%2520space/b/c/d","path_list":["a%2520space","b","c","d"],"query":"q1=foo+with+%257Bspace%257D&q2=bar&q2=baz","fragment":"frag%257Bfrag","username":"user%257Bone","password":"pass%257Btwo","hostname":"www.example.com","port":8000},"decoded":{"url":"http://user{one:pass{two@www.example.com:8000/a space/b/c/d?q1=foo with {space}&q2=bar&q2=baz#frag{frag","scheme":"http","netloc":"user{one:pass{two@www.example.com:8000","path":"/a space/b/c/d","path_list":["a space","b","c","d"],"query":"q1=foo with {space}&q2=bar&q2=baz","fragment":"frag{frag","username":"user{one","password":"pass{two","hostname":"www.example.com","port":8000}}''') + expected = json.loads(r'''{"url":"http://user%7Bone:pass%7Btwo@www.example.com:8000/a%20space/b/c/d?q1=foo+with+%7Bspace%7D&q2=bar&q2=baz#frag%7Bfrag","scheme":"http","netloc":"user%7Bone:pass%7Btwo@www.example.com:8000","path":"/a%20space/b/c/d","parent":"/a%20space/b/c","filename":"d","stem":"d","extension":null,"path_list":["a%20space","b","c","d"],"query":"q1=foo+with+%7Bspace%7D&q2=bar&q2=baz","query_obj":{"q1":["foo with {space}"],"q2":["bar","baz"]},"fragment":"frag%7Bfrag","username":"user%7Bone","password":"pass%7Btwo","hostname":"www.example.com","port":8000,"encoded":{"url":"http://user%257Bone:pass%257Btwo@www.example.com:8000/a%2520space/b/c/d?q1=foo+with+%257Bspace%257D&q2=bar&q2=baz#frag%257Bfrag","scheme":"http","netloc":"user%257Bone:pass%257Btwo@www.example.com:8000","path":"/a%2520space/b/c/d","parent":"/a%2520space/b/c","filename":"d","stem":"d","extension":null,"path_list":["a%2520space","b","c","d"],"query":"q1=foo+with+%257Bspace%257D&q2=bar&q2=baz","fragment":"frag%257Bfrag","username":"user%257Bone","password":"pass%257Btwo","hostname":"www.example.com","port":8000},"decoded":{"url":"http://user{one:pass{two@www.example.com:8000/a space/b/c/d?q1=foo with {space}&q2=bar&q2=baz#frag{frag","scheme":"http","netloc":"user{one:pass{two@www.example.com:8000","path":"/a space/b/c/d","parent":"/a space/b/c","filename":"d","stem":"d","extension":null,"path_list":["a space","b","c","d"],"query":"q1=foo with {space}&q2=bar&q2=baz","fragment":"frag{frag","username":"user{one","password":"pass{two","hostname":"www.example.com","port":8000}}''') self.assertEqual(jc.parsers.url.parse(data, quiet=True), expected) @@ -70,7 +70,7 @@ class MyTests(unittest.TestCase): Test HTTP URL with encodable characters in the hostname and port """ data = r'http://хост.домен:8080' - expected = json.loads(r'''{"url":"http://хост.домен:8080","scheme":"http","netloc":"хост.домен:8080","path":null,"path_list":null,"query":null,"query_obj":null,"fragment":null,"username":null,"password":null,"hostname":"хост.домен","port":8080,"encoded":{"url":"http://%D1%85%D0%BE%D1%81%D1%82.%D0%B4%D0%BE%D0%BC%D0%B5%D0%BD:8080","scheme":"http","netloc":"%D1%85%D0%BE%D1%81%D1%82.%D0%B4%D0%BE%D0%BC%D0%B5%D0%BD:8080","path":null,"path_list":null,"query":null,"fragment":null,"username":null,"password":null,"hostname":"%D1%85%D0%BE%D1%81%D1%82.%D0%B4%D0%BE%D0%BC%D0%B5%D0%BD","port":8080},"decoded":{"url":"http://хост.домен:8080","scheme":"http","netloc":"хост.домен:8080","path":null,"path_list":null,"query":null,"fragment":null,"username":null,"password":null,"hostname":"хост.домен","port":8080}}''') + expected = json.loads(r'''{"url":"http://хост.домен:8080","scheme":"http","netloc":"хост.домен:8080","path":null,"parent":null,"filename":null,"stem":null,"extension":null,"path_list":null,"query":null,"query_obj":null,"fragment":null,"username":null,"password":null,"hostname":"хост.домен","port":8080,"encoded":{"url":"http://%D1%85%D0%BE%D1%81%D1%82.%D0%B4%D0%BE%D0%BC%D0%B5%D0%BD:8080","scheme":"http","netloc":"%D1%85%D0%BE%D1%81%D1%82.%D0%B4%D0%BE%D0%BC%D0%B5%D0%BD:8080","path":null,"parent":null,"filename":null,"stem":null,"extension":null,"path_list":null,"query":null,"fragment":null,"username":null,"password":null,"hostname":"%D1%85%D0%BE%D1%81%D1%82.%D0%B4%D0%BE%D0%BC%D0%B5%D0%BD","port":8080},"decoded":{"url":"http://хост.домен:8080","scheme":"http","netloc":"хост.домен:8080","path":null,"parent":null,"filename":null,"stem":null,"extension":null,"path_list":null,"query":null,"fragment":null,"username":null,"password":null,"hostname":"хост.домен","port":8080}}''') self.assertEqual(jc.parsers.url.parse(data, quiet=True), expected) @@ -79,7 +79,7 @@ class MyTests(unittest.TestCase): Test HTTP URL with encoded characters in the hostname and port """ data = r'http://%D1%85%D0%BE%D1%81%D1%82.%D0%B4%D0%BE%D0%BC%D0%B5%D0%BD:%38%38' - expected = json.loads(r'''{"url":"http://%D1%85%D0%BE%D1%81%D1%82.%D0%B4%D0%BE%D0%BC%D0%B5%D0%BD:%38%38","scheme":"http","netloc":"%D1%85%D0%BE%D1%81%D1%82.%D0%B4%D0%BE%D0%BC%D0%B5%D0%BD:%38%38","path":null,"path_list":null,"query":null,"query_obj":null,"fragment":null,"username":null,"password":null,"hostname":"%D1%85%D0%BE%D1%81%D1%82.%D0%B4%D0%BE%D0%BC%D0%B5%D0%BD","port":88,"encoded":{"url":"http://%25D1%2585%25D0%25BE%25D1%2581%25D1%2582.%25D0%25B4%25D0%25BE%25D0%25BC%25D0%25B5%25D0%25BD:%2538%2538","scheme":"http","netloc":"%25D1%2585%25D0%25BE%25D1%2581%25D1%2582.%25D0%25B4%25D0%25BE%25D0%25BC%25D0%25B5%25D0%25BD:%2538%2538","path":null,"path_list":null,"query":null,"fragment":null,"username":null,"password":null,"hostname":"%25D1%2585%25D0%25BE%25D1%2581%25D1%2582.%25D0%25B4%25D0%25BE%25D0%25BC%25D0%25B5%25D0%25BD","port":88},"decoded":{"url":"http://хост.домен:88","scheme":"http","netloc":"хост.домен:88","path":null,"path_list":null,"query":null,"fragment":null,"username":null,"password":null,"hostname":"хост.домен","port":88}}''') + expected = json.loads(r'''{"url":"http://%D1%85%D0%BE%D1%81%D1%82.%D0%B4%D0%BE%D0%BC%D0%B5%D0%BD:%38%38","scheme":"http","netloc":"%D1%85%D0%BE%D1%81%D1%82.%D0%B4%D0%BE%D0%BC%D0%B5%D0%BD:%38%38","path":null,"parent":null,"filename":null,"stem":null,"extension":null,"path_list":null,"query":null,"query_obj":null,"fragment":null,"username":null,"password":null,"hostname":"%D1%85%D0%BE%D1%81%D1%82.%D0%B4%D0%BE%D0%BC%D0%B5%D0%BD","port":88,"encoded":{"url":"http://%25D1%2585%25D0%25BE%25D1%2581%25D1%2582.%25D0%25B4%25D0%25BE%25D0%25BC%25D0%25B5%25D0%25BD:%2538%2538","scheme":"http","netloc":"%25D1%2585%25D0%25BE%25D1%2581%25D1%2582.%25D0%25B4%25D0%25BE%25D0%25BC%25D0%25B5%25D0%25BD:%2538%2538","path":null,"parent":null,"filename":null,"stem":null,"extension":null,"path_list":null,"query":null,"fragment":null,"username":null,"password":null,"hostname":"%25D1%2585%25D0%25BE%25D1%2581%25D1%2582.%25D0%25B4%25D0%25BE%25D0%25BC%25D0%25B5%25D0%25BD","port":88},"decoded":{"url":"http://хост.домен:88","scheme":"http","netloc":"хост.домен:88","path":null,"parent":null,"filename":null,"stem":null,"extension":null,"path_list":null,"query":null,"fragment":null,"username":null,"password":null,"hostname":"хост.домен","port":88}}''') self.assertEqual(jc.parsers.url.parse(data, quiet=True), expected) @@ -88,7 +88,7 @@ class MyTests(unittest.TestCase): Test HTTP URL with encoded characters in the hostname and an invalid encoded port """ data = r'http://хост.домен:%38{%38#frag' - expected = json.loads(r'''{"url":"http://хост.домен:%38{%38#frag","scheme":"http","netloc":"хост.домен:%38{%38","path":null,"path_list":null,"query":null,"query_obj":null,"fragment":"frag","username":null,"password":null,"hostname":"хост.домен","port":null,"encoded":{"url":"http://%D1%85%D0%BE%D1%81%D1%82.%D0%B4%D0%BE%D0%BC%D0%B5%D0%BD:%2538%7B%2538#frag","scheme":"http","netloc":"%D1%85%D0%BE%D1%81%D1%82.%D0%B4%D0%BE%D0%BC%D0%B5%D0%BD:%2538%7B%2538","path":null,"path_list":null,"query":null,"fragment":"frag","username":null,"password":null,"hostname":"%D1%85%D0%BE%D1%81%D1%82.%D0%B4%D0%BE%D0%BC%D0%B5%D0%BD","port":null},"decoded":{"url":"http://хост.домен:8{8#frag","scheme":"http","netloc":"хост.домен:8{8","path":null,"path_list":null,"query":null,"fragment":"frag","username":null,"password":null,"hostname":"хост.домен","port":null}}''') + expected = json.loads(r'''{"url":"http://хост.домен:%38{%38#frag","scheme":"http","netloc":"хост.домен:%38{%38","path":null,"parent":null,"filename":null,"stem":null,"extension":null,"path_list":null,"query":null,"query_obj":null,"fragment":"frag","username":null,"password":null,"hostname":"хост.домен","port":null,"encoded":{"url":"http://%D1%85%D0%BE%D1%81%D1%82.%D0%B4%D0%BE%D0%BC%D0%B5%D0%BD:%2538%7B%2538#frag","scheme":"http","netloc":"%D1%85%D0%BE%D1%81%D1%82.%D0%B4%D0%BE%D0%BC%D0%B5%D0%BD:%2538%7B%2538","path":null,"parent":null,"filename":null,"stem":null,"extension":null,"path_list":null,"query":null,"fragment":"frag","username":null,"password":null,"hostname":"%D1%85%D0%BE%D1%81%D1%82.%D0%B4%D0%BE%D0%BC%D0%B5%D0%BD","port":null},"decoded":{"url":"http://хост.домен:8{8#frag","scheme":"http","netloc":"хост.домен:8{8","path":null,"parent":null,"filename":null,"stem":null,"extension":null,"path_list":null,"query":null,"fragment":"frag","username":null,"password":null,"hostname":"хост.домен","port":null}}''') self.assertEqual(jc.parsers.url.parse(data, quiet=True), expected) @@ -97,7 +97,7 @@ class MyTests(unittest.TestCase): Test HTTP URL with a forward slash as the last part of the path """ data = r'https://pypi.org/project/jc/1.20.3/' - expected = json.loads(r'''{"url":"https://pypi.org/project/jc/1.20.3/","scheme":"https","netloc":"pypi.org","path":"/project/jc/1.20.3/","path_list":["project","jc","1.20.3"],"query":null,"query_obj":null,"fragment":null,"username":null,"password":null,"hostname":"pypi.org","port":null,"encoded":{"url":"https://pypi.org/project/jc/1.20.3/","scheme":"https","netloc":"pypi.org","path":"/project/jc/1.20.3/","path_list":["project","jc","1.20.3"],"query":null,"fragment":null,"username":null,"password":null,"hostname":"pypi.org","port":null},"decoded":{"url":"https://pypi.org/project/jc/1.20.3/","scheme":"https","netloc":"pypi.org","path":"/project/jc/1.20.3/","path_list":["project","jc","1.20.3"],"query":null,"fragment":null,"username":null,"password":null,"hostname":"pypi.org","port":null}}''') + expected = json.loads(r'''{"url":"https://pypi.org/project/jc/1.20.3/","scheme":"https","netloc":"pypi.org","path":"/project/jc/1.20.3/","parent":"/project/jc","filename":"1.20.3","stem":"1.20","extension":"3","path_list":["project","jc","1.20.3"],"query":null,"query_obj":null,"fragment":null,"username":null,"password":null,"hostname":"pypi.org","port":null,"encoded":{"url":"https://pypi.org/project/jc/1.20.3/","scheme":"https","netloc":"pypi.org","path":"/project/jc/1.20.3/","parent":"/project/jc","filename":"1.20.3","stem":"1.20","extension":"3","path_list":["project","jc","1.20.3"],"query":null,"fragment":null,"username":null,"password":null,"hostname":"pypi.org","port":null},"decoded":{"url":"https://pypi.org/project/jc/1.20.3/","scheme":"https","netloc":"pypi.org","path":"/project/jc/1.20.3/","parent":"/project/jc","filename":"1.20.3","stem":"1.20","extension":"3","path_list":["project","jc","1.20.3"],"query":null,"fragment":null,"username":null,"password":null,"hostname":"pypi.org","port":null}}''') self.assertEqual(jc.parsers.url.parse(data, quiet=True), expected) @@ -106,7 +106,7 @@ class MyTests(unittest.TestCase): Test HTTP URL with a forward slash as the last only part of the path """ data = r'https://pypi.org/' - expected = json.loads(r'''{"url":"https://pypi.org/","scheme":"https","netloc":"pypi.org","path":"/","path_list":null,"query":null,"query_obj":null,"fragment":null,"username":null,"password":null,"hostname":"pypi.org","port":null,"encoded":{"url":"https://pypi.org/","scheme":"https","netloc":"pypi.org","path":"/","path_list":null,"query":null,"fragment":null,"username":null,"password":null,"hostname":"pypi.org","port":null},"decoded":{"url":"https://pypi.org/","scheme":"https","netloc":"pypi.org","path":"/","path_list":null,"query":null,"fragment":null,"username":null,"password":null,"hostname":"pypi.org","port":null}}''') + expected = json.loads(r'''{"url":"https://pypi.org/","scheme":"https","netloc":"pypi.org","path":"/","parent":"/","filename":null,"stem":null,"extension":null,"path_list":null,"query":null,"query_obj":null,"fragment":null,"username":null,"password":null,"hostname":"pypi.org","port":null,"encoded":{"url":"https://pypi.org/","scheme":"https","netloc":"pypi.org","path":"/","parent":"/","filename":null,"stem":null,"extension":null,"path_list":null,"query":null,"fragment":null,"username":null,"password":null,"hostname":"pypi.org","port":null},"decoded":{"url":"https://pypi.org/","scheme":"https","netloc":"pypi.org","path":"/","parent":"/","filename":null,"stem":null,"extension":null,"path_list":null,"query":null,"fragment":null,"username":null,"password":null,"hostname":"pypi.org","port":null}}''') self.assertEqual(jc.parsers.url.parse(data, quiet=True), expected) @@ -115,7 +115,16 @@ class MyTests(unittest.TestCase): Test HTTP URL with no forward slash at the end """ data = r'https://pypi.org' - expected = json.loads(r'''{"url":"https://pypi.org","scheme":"https","netloc":"pypi.org","path":null,"path_list":null,"query":null,"query_obj":null,"fragment":null,"username":null,"password":null,"hostname":"pypi.org","port":null,"encoded":{"url":"https://pypi.org","scheme":"https","netloc":"pypi.org","path":null,"path_list":null,"query":null,"fragment":null,"username":null,"password":null,"hostname":"pypi.org","port":null},"decoded":{"url":"https://pypi.org","scheme":"https","netloc":"pypi.org","path":null,"path_list":null,"query":null,"fragment":null,"username":null,"password":null,"hostname":"pypi.org","port":null}}''') + expected = json.loads(r'''{"url":"https://pypi.org","scheme":"https","netloc":"pypi.org","path":null,"parent":null,"filename":null,"stem":null,"extension":null,"path_list":null,"query":null,"query_obj":null,"fragment":null,"username":null,"password":null,"hostname":"pypi.org","port":null,"encoded":{"url":"https://pypi.org","scheme":"https","netloc":"pypi.org","path":null,"parent":null,"filename":null,"stem":null,"extension":null,"path_list":null,"query":null,"fragment":null,"username":null,"password":null,"hostname":"pypi.org","port":null},"decoded":{"url":"https://pypi.org","scheme":"https","netloc":"pypi.org","path":null,"parent":null,"filename":null,"stem":null,"extension":null,"path_list":null,"query":null,"fragment":null,"username":null,"password":null,"hostname":"pypi.org","port":null}}''') + self.assertEqual(jc.parsers.url.parse(data, quiet=True), expected) + + + def test_url_file_path(self): + """ + Test a normal file path URL + """ + data = r'/this/is my/path/to the file/myfile.tar.gz' + expected = json.loads(r'''{"url":"/this/is my/path/to the file/myfile.tar.gz","scheme":null,"netloc":null,"path":"/this/is my/path/to the file/myfile.tar.gz","parent":"/this/is my/path/to the file","filename":"myfile.tar.gz","stem":"myfile.tar","extension":"gz","path_list":["this","is my","path","to the file","myfile.tar.gz"],"query":null,"query_obj":null,"fragment":null,"username":null,"password":null,"hostname":null,"port":null,"encoded":{"url":"/this/is%20my/path/to%20the%20file/myfile.tar.gz","scheme":null,"netloc":null,"path":"/this/is%20my/path/to%20the%20file/myfile.tar.gz","parent":"/this/is%20my/path/to%20the%20file","filename":"myfile.tar.gz","stem":"myfile.tar","extension":"gz","path_list":["this","is%20my","path","to%20the%20file","myfile.tar.gz"],"query":null,"fragment":null,"username":null,"password":null,"hostname":null,"port":null},"decoded":{"url":"/this/is my/path/to the file/myfile.tar.gz","scheme":null,"netloc":null,"path":"/this/is my/path/to the file/myfile.tar.gz","parent":"/this/is my/path/to the file","filename":"myfile.tar.gz","stem":"myfile.tar","extension":"gz","path_list":["this","is my","path","to the file","myfile.tar.gz"],"query":null,"fragment":null,"username":null,"password":null,"hostname":null,"port":null}}''') self.assertEqual(jc.parsers.url.parse(data, quiet=True), expected)