1
0
mirror of https://github.com/kellyjonbrazil/jc.git synced 2025-06-17 00:07:37 +02:00
Files
jc/docs/parsers/url.md

155 lines
4.2 KiB
Markdown
Raw Normal View History

2022-07-19 13:02:09 -07:00
[Home](https://kellyjonbrazil.github.io/jc/)
<a id="jc.parsers.url"></a>
# jc.parsers.url
jc - JSON Convert URL string parser
2022-07-20 17:09:09 -07:00
This parser will work with naked and wrapped URL strings:
- `scheme://host/path`
- `URL:scheme://host/path`
- `<scheme://host/path>`
- `<URL:scheme://host/path>`
2022-07-20 22:31:04 -07:00
Normalized encoded and decoded versions of the original URL and URL parts
2022-07-20 22:28:12 -07:00
are included in the output.
2022-07-20 17:09:09 -07:00
2022-07-19 13:02:09 -07:00
Usage (cli):
$ echo "http://example.com/test/path?q1=foo&q2=bar#frag" | jc --url
Usage (module):
import jc
result = jc.parse('url', url_string)
Schema:
{
2022-07-20 22:28:12 -07:00
"url": string,
'url_encoded": string,
"scheme": string,
"scheme_encoded": string,
"netloc": string,
"netloc_encoded": string,
"path": string or null,
"path_encoded": string or null,
"path_list": [ array or null
string
2022-07-20 17:09:09 -07:00
],
2022-07-20 22:28:12 -07:00
"query": string or Null,
"query_encoded": string or Null,
"query_obj": { object or null
<query-key>: [ array or null
<query-value> string # [0]
2022-07-20 17:09:09 -07:00
]
2022-07-19 13:02:09 -07:00
},
2022-07-20 22:28:12 -07:00
"fragment": string or null,
"fragment_encoded": string or null,
"username": string or null,
"username_encoded": string or null,
"password": string or null,
"password_encoded": string or null,
"hostname": string or null,
"hostname_encoded": string or null,
"port": integer or null,
"port_encoded": string or null
2022-07-19 13:02:09 -07:00
}
2022-07-20 17:09:09 -07:00
[0] Duplicate query-keys will have their values consolidated into the
array of query-values
2022-07-19 13:02:09 -07:00
Examples:
2022-07-20 17:09:09 -07:00
% echo "http://example.com/test/path?q1=foo&q1=bar&q2=baz#frag" \\
| jc --url -p
2022-07-19 13:02:09 -07:00
{
2022-07-20 22:28:12 -07:00
"url": "http://example.com/test/path?q1=foo&q1=bar&q2=baz#frag",
"url_encoded": "http://example.com/test/path?q1%3Dfoo%26q1%3Dbar%26q2%3Dbaz#frag",
2022-07-19 13:02:09 -07:00
"scheme": "http",
2022-07-20 22:28:12 -07:00
"scheme_encoded": "http",
2022-07-19 13:02:09 -07:00
"netloc": "example.com",
2022-07-20 22:28:12 -07:00
"netloc_encoded": "example.com",
2022-07-19 13:02:09 -07:00
"path": "/test/path",
2022-07-20 22:28:12 -07:00
"path_encoded": "/test/path",
2022-07-20 17:09:09 -07:00
"path_list": [
"test",
"path"
],
2022-07-20 22:28:12 -07:00
"query": "q1=foo&q1=bar&q2=baz",
"query_encoded": "q1%3Dfoo%26q1%3Dbar%26q2%3Dbaz",
"query_obj": {
2022-07-20 17:09:09 -07:00
"q1": [
"foo",
"bar"
],
"q2": [
"baz"
]
2022-07-19 13:02:09 -07:00
},
"fragment": "frag",
2022-07-20 22:28:12 -07:00
"fragment_encoded": "frag",
2022-07-19 13:02:09 -07:00
"username": null,
2022-07-20 22:28:12 -07:00
"username_encoded": null,
2022-07-19 13:02:09 -07:00
"password": null,
2022-07-20 22:28:12 -07:00
"password_encoded": null,
2022-07-19 13:02:09 -07:00
"hostname": "example.com",
2022-07-20 22:28:12 -07:00
"hostname_encoded": "example.com",
"port": null,
"port_encoded": null
2022-07-19 13:02:09 -07:00
}
$ echo "ftp://localhost/filepath" | jc --url -p
{
2022-07-20 22:28:12 -07:00
"url": "ftp://localhost/filepath",
"url_encoded": "ftp://localhost/filepath",
2022-07-19 13:02:09 -07:00
"scheme": "ftp",
2022-07-20 22:28:12 -07:00
"scheme_encoded": "ftp",
2022-07-19 13:02:09 -07:00
"netloc": "localhost",
2022-07-20 22:28:12 -07:00
"netloc_encoded": "localhost",
2022-07-19 13:02:09 -07:00
"path": "/filepath",
2022-07-20 22:28:12 -07:00
"path_encoded": "/filepath",
2022-07-20 17:09:09 -07:00
"path_list": [
"filepath"
],
2022-07-19 13:02:09 -07:00
"query": null,
2022-07-20 22:28:12 -07:00
"query_encoded": null,
"query_obj": null,
2022-07-19 13:02:09 -07:00
"fragment": null,
2022-07-20 22:28:12 -07:00
"fragment_encoded": null,
2022-07-19 13:02:09 -07:00
"username": null,
2022-07-20 22:28:12 -07:00
"username_encoded": null,
2022-07-19 13:02:09 -07:00
"password": null,
2022-07-20 22:28:12 -07:00
"password_encoded": null,
2022-07-19 13:02:09 -07:00
"hostname": "localhost",
2022-07-20 22:28:12 -07:00
"hostname_encoded": "localhost",
"port": null,
"port_encoded": null
2022-07-19 13:02:09 -07:00
}
<a id="jc.parsers.url.parse"></a>
### parse
```python
def parse(data: str, raw: bool = False, quiet: bool = False) -> Dict
```
Main text parsing function
Parameters:
data: (string) text data to parse
raw: (boolean) unprocessed output if True
quiet: (boolean) suppress warning messages if True
Returns:
Dictionary. Raw or processed structured data.
### Parser Information
Compatibility: linux, darwin, cygwin, win32, aix, freebsd
Version 1.0 by Kelly Brazil (kellyjonbrazil@gmail.com)