2022-07-19 13:02:09 -07:00
|
|
|
[Home](https://kellyjonbrazil.github.io/jc/)
|
|
|
|
<a id="jc.parsers.url"></a>
|
|
|
|
|
|
|
|
# jc.parsers.url
|
|
|
|
|
|
|
|
jc - JSON Convert URL string parser
|
|
|
|
|
2022-07-22 11:37:00 -07:00
|
|
|
Normalized, Encoded, and Decoded versions of the original URL and URL parts
|
|
|
|
are included in the output. Encoding and Decoding is best effort.
|
|
|
|
|
|
|
|
> Note: Do not use the Encoded fields for a URL that has already been
|
|
|
|
> Encoded. Similarly, do not use the Decoded fields for a URL that has
|
|
|
|
> already been Decoded.
|
|
|
|
|
2022-07-20 17:09:09 -07:00
|
|
|
This parser will work with naked and wrapped URL strings:
|
|
|
|
|
2023-12-22 14:38:01 -08:00
|
|
|
- `/path`
|
2022-07-20 17:09:09 -07:00
|
|
|
- `scheme://host/path`
|
|
|
|
- `URL:scheme://host/path`
|
|
|
|
- `<scheme://host/path>`
|
|
|
|
- `<URL:scheme://host/path>`
|
|
|
|
|
2022-07-19 13:02:09 -07:00
|
|
|
Usage (cli):
|
|
|
|
|
|
|
|
$ echo "http://example.com/test/path?q1=foo&q2=bar#frag" | jc --url
|
|
|
|
|
|
|
|
Usage (module):
|
|
|
|
|
|
|
|
import jc
|
|
|
|
result = jc.parse('url', url_string)
|
|
|
|
|
|
|
|
Schema:
|
|
|
|
|
|
|
|
{
|
2022-07-20 22:28:12 -07:00
|
|
|
"url": string,
|
2022-07-23 10:45:11 -07:00
|
|
|
"scheme": string or null,
|
2022-07-22 11:46:52 -07:00
|
|
|
"netloc": string or null,
|
2022-07-20 22:28:12 -07:00
|
|
|
"path": string or null,
|
2023-12-22 14:38:01 -08:00
|
|
|
"parent": string or null,
|
|
|
|
"filename": string or null,
|
|
|
|
"stem": string or null,
|
|
|
|
"extension": string or null,
|
2022-07-20 22:28:12 -07:00
|
|
|
"path_list": [ array or null
|
|
|
|
string
|
2022-07-20 17:09:09 -07:00
|
|
|
],
|
2022-07-22 09:28:18 -07:00
|
|
|
"query": string or null,
|
2022-07-20 22:28:12 -07:00
|
|
|
"query_obj": { object or null
|
|
|
|
<query-key>: [ array or null
|
2022-07-22 11:42:00 -07:00
|
|
|
<query-value> string # [0]
|
2022-07-20 17:09:09 -07:00
|
|
|
]
|
2022-07-19 13:02:09 -07:00
|
|
|
},
|
2022-07-20 22:28:12 -07:00
|
|
|
"fragment": string or null,
|
|
|
|
"username": string or null,
|
|
|
|
"password": string or null,
|
|
|
|
"hostname": string or null,
|
2022-07-22 11:37:00 -07:00
|
|
|
"port": integer or null, # [1]
|
2022-07-22 09:28:18 -07:00
|
|
|
"encoded": {
|
|
|
|
"url": string,
|
2022-07-23 10:45:11 -07:00
|
|
|
"scheme": string or null,
|
2022-07-22 11:46:52 -07:00
|
|
|
"netloc": string or null,
|
2022-07-22 09:28:18 -07:00
|
|
|
"path": string or null,
|
2023-12-22 14:38:01 -08:00
|
|
|
"parent": string or null,
|
|
|
|
"filename": string or null,
|
|
|
|
"stem": string or null,
|
|
|
|
"extension": string or null,
|
2022-07-22 09:28:18 -07:00
|
|
|
"path_list": [ array or null
|
|
|
|
string
|
|
|
|
],
|
|
|
|
"query": string or null,
|
|
|
|
"fragment": string or null,
|
|
|
|
"username": string or null,
|
|
|
|
"password": string or null,
|
|
|
|
"hostname": string or null,
|
2022-07-22 11:37:00 -07:00
|
|
|
"port": integer or null, # [1]
|
2022-07-22 09:28:18 -07:00
|
|
|
},
|
|
|
|
"decoded": {
|
|
|
|
"url": string,
|
2022-07-23 10:45:11 -07:00
|
|
|
"scheme": string or null,
|
2022-07-22 11:46:52 -07:00
|
|
|
"netloc": string or null,
|
2022-07-22 09:28:18 -07:00
|
|
|
"path": string or null,
|
2023-12-22 14:38:01 -08:00
|
|
|
"parent": string or null,
|
|
|
|
"filename": string or null,
|
|
|
|
"stem": string or null,
|
|
|
|
"extension": string or null,
|
2022-07-22 09:28:18 -07:00
|
|
|
"path_list": [ array or null
|
|
|
|
string
|
|
|
|
],
|
|
|
|
"query": string or null,
|
|
|
|
"fragment": string or null,
|
|
|
|
"username": string or null,
|
|
|
|
"password": string or null,
|
|
|
|
"hostname": string or null,
|
2022-07-22 11:37:00 -07:00
|
|
|
"port": integer or null, # [1]
|
2022-07-22 09:28:18 -07:00
|
|
|
}
|
2022-07-19 13:02:09 -07:00
|
|
|
}
|
|
|
|
|
2022-07-20 17:09:09 -07:00
|
|
|
[0] Duplicate query-keys will have their values consolidated into the
|
|
|
|
array of query-values
|
|
|
|
|
2022-07-22 11:37:00 -07:00
|
|
|
[1] Invalid port values will be converted to null/None and a warning
|
|
|
|
message will be printed to `STDERR` if quiet=False
|
|
|
|
|
2022-07-19 13:02:09 -07:00
|
|
|
Examples:
|
|
|
|
|
2022-07-23 10:57:46 -07:00
|
|
|
$ echo "http://example.com/test/path?q1=foo&q1=bar&q2=baz#frag" \\
|
2022-07-20 17:09:09 -07:00
|
|
|
| jc --url -p
|
2022-07-19 13:02:09 -07:00
|
|
|
{
|
2022-07-20 22:28:12 -07:00
|
|
|
"url": "http://example.com/test/path?q1=foo&q1=bar&q2=baz#frag",
|
2022-07-19 13:02:09 -07:00
|
|
|
"scheme": "http",
|
|
|
|
"netloc": "example.com",
|
|
|
|
"path": "/test/path",
|
2023-12-22 14:38:01 -08:00
|
|
|
"parent": "/test",
|
|
|
|
"filename": "path",
|
|
|
|
"stem": "path",
|
|
|
|
"extension": null,
|
2022-07-20 17:09:09 -07:00
|
|
|
"path_list": [
|
|
|
|
"test",
|
|
|
|
"path"
|
|
|
|
],
|
2022-07-20 22:28:12 -07:00
|
|
|
"query": "q1=foo&q1=bar&q2=baz",
|
|
|
|
"query_obj": {
|
2022-07-20 17:09:09 -07:00
|
|
|
"q1": [
|
|
|
|
"foo",
|
|
|
|
"bar"
|
|
|
|
],
|
|
|
|
"q2": [
|
|
|
|
"baz"
|
|
|
|
]
|
2022-07-19 13:02:09 -07:00
|
|
|
},
|
|
|
|
"fragment": "frag",
|
|
|
|
"username": null,
|
|
|
|
"password": null,
|
|
|
|
"hostname": "example.com",
|
2022-07-20 22:28:12 -07:00
|
|
|
"port": null,
|
2022-07-22 09:28:18 -07:00
|
|
|
"encoded": {
|
|
|
|
"url": "http://example.com/test/path?q1=foo&q1=bar&q2=baz#frag",
|
|
|
|
"scheme": "http",
|
|
|
|
"netloc": "example.com",
|
|
|
|
"path": "/test/path",
|
2023-12-22 14:38:01 -08:00
|
|
|
"parent": "/test",
|
|
|
|
"filename": "path",
|
|
|
|
"stem": "path",
|
|
|
|
"extension": null,
|
2022-07-22 09:28:18 -07:00
|
|
|
"path_list": [
|
|
|
|
"test",
|
|
|
|
"path"
|
|
|
|
],
|
|
|
|
"query": "q1=foo&q1=bar&q2=baz",
|
|
|
|
"fragment": "frag",
|
|
|
|
"username": null,
|
|
|
|
"password": null,
|
|
|
|
"hostname": "example.com",
|
|
|
|
"port": null
|
|
|
|
},
|
|
|
|
"decoded": {
|
|
|
|
"url": "http://example.com/test/path?q1=foo&q1=bar&q2=baz#frag",
|
|
|
|
"scheme": "http",
|
|
|
|
"netloc": "example.com",
|
|
|
|
"path": "/test/path",
|
2023-12-22 14:38:01 -08:00
|
|
|
"parent": "/test",
|
|
|
|
"filename": "path",
|
|
|
|
"stem": "path",
|
|
|
|
"extension": null,
|
2022-07-22 09:28:18 -07:00
|
|
|
"path_list": [
|
|
|
|
"test",
|
|
|
|
"path"
|
|
|
|
],
|
|
|
|
"query": "q1=foo&q1=bar&q2=baz",
|
|
|
|
"fragment": "frag",
|
|
|
|
"username": null,
|
|
|
|
"password": null,
|
|
|
|
"hostname": "example.com",
|
|
|
|
"port": null
|
|
|
|
}
|
2022-07-19 13:02:09 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
$ echo "ftp://localhost/filepath" | jc --url -p
|
|
|
|
{
|
2022-07-20 22:28:12 -07:00
|
|
|
"url": "ftp://localhost/filepath",
|
2022-07-19 13:02:09 -07:00
|
|
|
"scheme": "ftp",
|
|
|
|
"netloc": "localhost",
|
|
|
|
"path": "/filepath",
|
2023-12-22 14:38:01 -08:00
|
|
|
"parent": "/",
|
|
|
|
"filename": "filepath",
|
|
|
|
"stem": "filepath",
|
|
|
|
"extension": null,
|
2022-07-20 17:09:09 -07:00
|
|
|
"path_list": [
|
|
|
|
"filepath"
|
|
|
|
],
|
2022-07-19 13:02:09 -07:00
|
|
|
"query": null,
|
2022-07-20 22:28:12 -07:00
|
|
|
"query_obj": null,
|
2022-07-19 13:02:09 -07:00
|
|
|
"fragment": null,
|
|
|
|
"username": null,
|
|
|
|
"password": null,
|
|
|
|
"hostname": "localhost",
|
2022-07-20 22:28:12 -07:00
|
|
|
"port": null,
|
2022-07-22 09:28:18 -07:00
|
|
|
"encoded": {
|
|
|
|
"url": "ftp://localhost/filepath",
|
|
|
|
"scheme": "ftp",
|
|
|
|
"netloc": "localhost",
|
|
|
|
"path": "/filepath",
|
2023-12-22 14:38:01 -08:00
|
|
|
"parent": "/",
|
|
|
|
"filename": "filepath",
|
|
|
|
"stem": "filepath",
|
|
|
|
"extension": null,
|
2022-07-22 09:28:18 -07:00
|
|
|
"path_list": [
|
|
|
|
"filepath"
|
|
|
|
],
|
|
|
|
"query": null,
|
|
|
|
"fragment": null,
|
|
|
|
"username": null,
|
|
|
|
"password": null,
|
|
|
|
"hostname": "localhost",
|
|
|
|
"port": null
|
|
|
|
},
|
|
|
|
"decoded": {
|
|
|
|
"url": "ftp://localhost/filepath",
|
|
|
|
"scheme": "ftp",
|
|
|
|
"netloc": "localhost",
|
|
|
|
"path": "/filepath",
|
2023-12-22 14:38:01 -08:00
|
|
|
"parent": "/",
|
|
|
|
"filename": "filepath",
|
|
|
|
"stem": "filepath",
|
|
|
|
"extension": null,
|
2022-07-22 09:28:18 -07:00
|
|
|
"path_list": [
|
|
|
|
"filepath"
|
|
|
|
],
|
|
|
|
"query": null,
|
|
|
|
"fragment": null,
|
|
|
|
"username": null,
|
|
|
|
"password": null,
|
|
|
|
"hostname": "localhost",
|
|
|
|
"port": null
|
|
|
|
}
|
2022-07-19 13:02:09 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
<a id="jc.parsers.url.parse"></a>
|
|
|
|
|
|
|
|
### parse
|
|
|
|
|
|
|
|
```python
|
|
|
|
def parse(data: str, raw: bool = False, quiet: bool = False) -> Dict
|
|
|
|
```
|
|
|
|
|
|
|
|
Main text parsing function
|
|
|
|
|
|
|
|
Parameters:
|
|
|
|
|
|
|
|
data: (string) text data to parse
|
|
|
|
raw: (boolean) unprocessed output if True
|
|
|
|
quiet: (boolean) suppress warning messages if True
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
|
|
Dictionary. Raw or processed structured data.
|
|
|
|
|
|
|
|
### Parser Information
|
|
|
|
Compatibility: linux, darwin, cygwin, win32, aix, freebsd
|
|
|
|
|
2023-12-21 14:55:21 -08:00
|
|
|
Source: [`jc/parsers/url.py`](https://github.com/kellyjonbrazil/jc/blob/master/jc/parsers/url.py)
|
|
|
|
|
2024-01-03 15:57:08 -08:00
|
|
|
This parser can be used with the `--slurp` command-line option.
|
|
|
|
|
2024-02-05 17:31:53 -08:00
|
|
|
Version 1.1 by Kelly Brazil (kellyjonbrazil@gmail.com)
|