From fa7721c31d47e6876728c19773c54cf5ab28da82 Mon Sep 17 00:00:00 2001 From: Kelly Brazil Date: Tue, 19 Jul 2022 07:16:28 -0700 Subject: [PATCH] add initial URL parser --- CHANGELOG | 3 +- jc/lib.py | 1 + jc/parsers/url.py | 97 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 100 insertions(+), 1 deletion(-) create mode 100644 jc/parsers/url.py diff --git a/CHANGELOG b/CHANGELOG index 08a39992..a0369407 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,8 +1,9 @@ jc changelog 20220705 v1.20.3 -- Add pager functionality to help (parser documentation only) +- Add URL string parser - Add m3u/m3u8 file parser +- Add pager functionality to help (parser documentation only) - Minor parser performance optimizations 20220705 v1.20.2 diff --git a/jc/lib.py b/jc/lib.py index f554a824..dd8767cc 100644 --- a/jc/lib.py +++ b/jc/lib.py @@ -105,6 +105,7 @@ parsers = [ 'update-alt-q', 'upower', 'uptime', + 'url', 'vmstat', 'vmstat-s', 'w', diff --git a/jc/parsers/url.py b/jc/parsers/url.py new file mode 100644 index 00000000..3e3c1181 --- /dev/null +++ b/jc/parsers/url.py @@ -0,0 +1,97 @@ +"""jc - JSON Convert URL parser + +Usage (cli): + + $ echo "http://example.com/test/path?q1=foo&q2=bar#frag" | jc --url + +Usage (module): + + import jc + result = jc.parse('url', url_string) + +Schema: + + [ + { + "url": string, + "bar": boolean, + "baz": integer + } + ] + +Examples: + + $ echo "http://example.com/test/path?q1=foo&q2=bar#frag" | jc --url -p + [] + + $ FTP example, etc. + [] +""" +from urllib.parse import urlparse +from typing import List, Dict +import jc.utils + + +class info(): + """Provides parser metadata (version, author, etc.)""" + version = '1.0' + description = 'URL parser' + author = 'Kelly Brazil' + author_email = 'kellyjonbrazil@gmail.com' + compatible = ['linux', 'darwin', 'cygwin', 'win32', 'aix', 'freebsd'] + + +__version__ = info.version + + +def _process(proc_data: Dict) -> Dict: + """ + Final processing to conform to the schema. + + Parameters: + + proc_data: (Dictionary) raw structured data to process + + Returns: + + Dictionary. Structured to conform to the schema. + """ + return proc_data + + +def parse( + data: str, + raw: bool = False, + quiet: bool = False +) -> Dict: + """ + Main text parsing function + + Parameters: + + data: (string) text data to parse + raw: (boolean) unprocessed output if True + quiet: (boolean) suppress warning messages if True + + Returns: + + Dictionary. Raw or processed structured data. + """ + jc.utils.compatibility(__name__, info.compatible, quiet) + jc.utils.input_type_check(data) + + raw_output: Dict = {} + + if jc.utils.has_data(data): + parts = urlparse(data) + + raw_output = { + 'scheme': parts.scheme, + 'netloc': parts.netloc, + 'path': parts.path, + 'params': parts.params, + 'query': parts.query, + 'fragment': parts.fragment + } + + return raw_output if raw else _process(raw_output)