mirror of
https://github.com/kellyjonbrazil/jc.git
synced 2025-06-19 00:17:51 +02:00
add clf-s streaming parser
This commit is contained in:
@ -166,6 +166,7 @@ option.
|
|||||||
| ` --chage` | `chage --list` command parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/chage) |
|
| ` --chage` | `chage --list` command parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/chage) |
|
||||||
| ` --cksum` | `cksum` and `sum` command parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/cksum) |
|
| ` --cksum` | `cksum` and `sum` command parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/cksum) |
|
||||||
| ` --clf` | Common and Combined Log Format file parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/clf) |
|
| ` --clf` | Common and Combined Log Format file parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/clf) |
|
||||||
|
| ` --clf-s` | Common and Combined Log Format file streaming parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/clf_s) |
|
||||||
| ` --crontab` | `crontab` command and file parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/crontab) |
|
| ` --crontab` | `crontab` command and file parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/crontab) |
|
||||||
| ` --crontab-u` | `crontab` file parser with user support | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/crontab_u) |
|
| ` --crontab-u` | `crontab` file parser with user support | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/crontab_u) |
|
||||||
| ` --csv` | CSV file parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/csv) |
|
| ` --csv` | CSV file parser | [details](https://kellyjonbrazil.github.io/jc/docs/parsers/csv) |
|
||||||
|
@ -4,7 +4,7 @@ _jc()
|
|||||||
jc_about_options jc_about_mod_options jc_help_options jc_special_options
|
jc_about_options jc_about_mod_options jc_help_options jc_special_options
|
||||||
|
|
||||||
jc_commands=(acpi airport arp blkid chage cksum crontab date df dig dmidecode dpkg du env file findmnt finger free git gpg hciconfig id ifconfig iostat iptables iw jobs last lastb ls lsblk lsmod lsof lspci lsusb md5 md5sum mdadm mount mpstat netstat nmcli ntpq os-prober pidstat ping ping6 pip pip3 postconf printenv ps route rpm rsync sfdisk sha1sum sha224sum sha256sum sha384sum sha512sum shasum ss sshd stat sum sysctl systemctl systeminfo timedatectl top tracepath tracepath6 traceroute traceroute6 udevadm ufw uname update-alternatives upower uptime vdir vmstat w wc who xrandr zipinfo)
|
jc_commands=(acpi airport arp blkid chage cksum crontab date df dig dmidecode dpkg du env file findmnt finger free git gpg hciconfig id ifconfig iostat iptables iw jobs last lastb ls lsblk lsmod lsof lspci lsusb md5 md5sum mdadm mount mpstat netstat nmcli ntpq os-prober pidstat ping ping6 pip pip3 postconf printenv ps route rpm rsync sfdisk sha1sum sha224sum sha256sum sha384sum sha512sum shasum ss sshd stat sum sysctl systemctl systeminfo timedatectl top tracepath tracepath6 traceroute traceroute6 udevadm ufw uname update-alternatives upower uptime vdir vmstat w wc who xrandr zipinfo)
|
||||||
jc_parsers=(--acpi --airport --airport-s --arp --asciitable --asciitable-m --blkid --cef --cef-s --chage --cksum --clf --crontab --crontab-u --csv --csv-s --date --datetime-iso --df --dig --dir --dmidecode --dpkg-l --du --email-address --env --file --findmnt --finger --free --fstab --git-log --git-log-s --git-ls-remote --gpg --group --gshadow --hash --hashsum --hciconfig --history --hosts --id --ifconfig --ini --iostat --iostat-s --ip-address --iptables --iw-scan --jar-manifest --jobs --jwt --kv --last --ls --ls-s --lsblk --lsmod --lsof --lspci --lsusb --m3u --mdadm --mount --mpstat --mpstat-s --netstat --nmcli --ntpq --os-prober --passwd --pci-ids --pidstat --pidstat-s --ping --ping-s --pip-list --pip-show --plist --postconf --proc --proc-buddyinfo --proc-consoles --proc-cpuinfo --proc-crypto --proc-devices --proc-diskstats --proc-filesystems --proc-interrupts --proc-iomem --proc-ioports --proc-loadavg --proc-locks --proc-meminfo --proc-modules --proc-mtrr --proc-pagetypeinfo --proc-partitions --proc-slabinfo --proc-softirqs --proc-stat --proc-swaps --proc-uptime --proc-version --proc-vmallocinfo --proc-vmstat --proc-zoneinfo --proc-driver-rtc --proc-net-arp --proc-net-dev --proc-net-dev-mcast --proc-net-if-inet6 --proc-net-igmp --proc-net-igmp6 --proc-net-ipv6-route --proc-net-netlink --proc-net-netstat --proc-net-packet --proc-net-protocols --proc-net-route --proc-net-unix --proc-pid-fdinfo --proc-pid-io --proc-pid-maps --proc-pid-mountinfo --proc-pid-numa-maps --proc-pid-smaps --proc-pid-stat --proc-pid-statm --proc-pid-status --ps --route --rpm-qi --rsync --rsync-s --semver --sfdisk --shadow --ss --sshd-conf --stat --stat-s --sysctl --syslog --syslog-s --syslog-bsd --syslog-bsd-s --systemctl --systemctl-lj --systemctl-ls --systemctl-luf --systeminfo --time --timedatectl --timestamp --top --top-s --tracepath --traceroute --udevadm --ufw --ufw-appinfo --uname --update-alt-gs --update-alt-q --upower --uptime --url --vmstat --vmstat-s --w --wc --who --x509-cert --xml --xrandr --yaml --zipinfo)
|
jc_parsers=(--acpi --airport --airport-s --arp --asciitable --asciitable-m --blkid --cef --cef-s --chage --cksum --clf --clf-s --crontab --crontab-u --csv --csv-s --date --datetime-iso --df --dig --dir --dmidecode --dpkg-l --du --email-address --env --file --findmnt --finger --free --fstab --git-log --git-log-s --git-ls-remote --gpg --group --gshadow --hash --hashsum --hciconfig --history --hosts --id --ifconfig --ini --iostat --iostat-s --ip-address --iptables --iw-scan --jar-manifest --jobs --jwt --kv --last --ls --ls-s --lsblk --lsmod --lsof --lspci --lsusb --m3u --mdadm --mount --mpstat --mpstat-s --netstat --nmcli --ntpq --os-prober --passwd --pci-ids --pidstat --pidstat-s --ping --ping-s --pip-list --pip-show --plist --postconf --proc --proc-buddyinfo --proc-consoles --proc-cpuinfo --proc-crypto --proc-devices --proc-diskstats --proc-filesystems --proc-interrupts --proc-iomem --proc-ioports --proc-loadavg --proc-locks --proc-meminfo --proc-modules --proc-mtrr --proc-pagetypeinfo --proc-partitions --proc-slabinfo --proc-softirqs --proc-stat --proc-swaps --proc-uptime --proc-version --proc-vmallocinfo --proc-vmstat --proc-zoneinfo --proc-driver-rtc --proc-net-arp --proc-net-dev --proc-net-dev-mcast --proc-net-if-inet6 --proc-net-igmp --proc-net-igmp6 --proc-net-ipv6-route --proc-net-netlink --proc-net-netstat --proc-net-packet --proc-net-protocols --proc-net-route --proc-net-unix --proc-pid-fdinfo --proc-pid-io --proc-pid-maps --proc-pid-mountinfo --proc-pid-numa-maps --proc-pid-smaps --proc-pid-stat --proc-pid-statm --proc-pid-status --ps --route --rpm-qi --rsync --rsync-s --semver --sfdisk --shadow --ss --sshd-conf --stat --stat-s --sysctl --syslog --syslog-s --syslog-bsd --syslog-bsd-s --systemctl --systemctl-lj --systemctl-ls --systemctl-luf --systeminfo --time --timedatectl --timestamp --top --top-s --tracepath --traceroute --udevadm --ufw --ufw-appinfo --uname --update-alt-gs --update-alt-q --upower --uptime --url --vmstat --vmstat-s --w --wc --who --x509-cert --xml --xrandr --yaml --zipinfo)
|
||||||
jc_options=(--force-color -C --debug -d --monochrome -m --meta-out -M --pretty -p --quiet -q --raw -r --unbuffer -u --yaml-out -y)
|
jc_options=(--force-color -C --debug -d --monochrome -m --meta-out -M --pretty -p --quiet -q --raw -r --unbuffer -u --yaml-out -y)
|
||||||
jc_about_options=(--about -a)
|
jc_about_options=(--about -a)
|
||||||
jc_about_mod_options=(--pretty -p --yaml-out -y --monochrome -m --force-color -C)
|
jc_about_mod_options=(--pretty -p --yaml-out -y --monochrome -m --force-color -C)
|
||||||
|
@ -100,7 +100,7 @@ _jc() {
|
|||||||
'xrandr:run "xrandr" command with magic syntax.'
|
'xrandr:run "xrandr" command with magic syntax.'
|
||||||
'zipinfo:run "zipinfo" command with magic syntax.'
|
'zipinfo:run "zipinfo" command with magic syntax.'
|
||||||
)
|
)
|
||||||
jc_parsers=(--acpi --airport --airport-s --arp --asciitable --asciitable-m --blkid --cef --cef-s --chage --cksum --clf --crontab --crontab-u --csv --csv-s --date --datetime-iso --df --dig --dir --dmidecode --dpkg-l --du --email-address --env --file --findmnt --finger --free --fstab --git-log --git-log-s --git-ls-remote --gpg --group --gshadow --hash --hashsum --hciconfig --history --hosts --id --ifconfig --ini --iostat --iostat-s --ip-address --iptables --iw-scan --jar-manifest --jobs --jwt --kv --last --ls --ls-s --lsblk --lsmod --lsof --lspci --lsusb --m3u --mdadm --mount --mpstat --mpstat-s --netstat --nmcli --ntpq --os-prober --passwd --pci-ids --pidstat --pidstat-s --ping --ping-s --pip-list --pip-show --plist --postconf --proc --proc-buddyinfo --proc-consoles --proc-cpuinfo --proc-crypto --proc-devices --proc-diskstats --proc-filesystems --proc-interrupts --proc-iomem --proc-ioports --proc-loadavg --proc-locks --proc-meminfo --proc-modules --proc-mtrr --proc-pagetypeinfo --proc-partitions --proc-slabinfo --proc-softirqs --proc-stat --proc-swaps --proc-uptime --proc-version --proc-vmallocinfo --proc-vmstat --proc-zoneinfo --proc-driver-rtc --proc-net-arp --proc-net-dev --proc-net-dev-mcast --proc-net-if-inet6 --proc-net-igmp --proc-net-igmp6 --proc-net-ipv6-route --proc-net-netlink --proc-net-netstat --proc-net-packet --proc-net-protocols --proc-net-route --proc-net-unix --proc-pid-fdinfo --proc-pid-io --proc-pid-maps --proc-pid-mountinfo --proc-pid-numa-maps --proc-pid-smaps --proc-pid-stat --proc-pid-statm --proc-pid-status --ps --route --rpm-qi --rsync --rsync-s --semver --sfdisk --shadow --ss --sshd-conf --stat --stat-s --sysctl --syslog --syslog-s --syslog-bsd --syslog-bsd-s --systemctl --systemctl-lj --systemctl-ls --systemctl-luf --systeminfo --time --timedatectl --timestamp --top --top-s --tracepath --traceroute --udevadm --ufw --ufw-appinfo --uname --update-alt-gs --update-alt-q --upower --uptime --url --vmstat --vmstat-s --w --wc --who --x509-cert --xml --xrandr --yaml --zipinfo)
|
jc_parsers=(--acpi --airport --airport-s --arp --asciitable --asciitable-m --blkid --cef --cef-s --chage --cksum --clf --clf-s --crontab --crontab-u --csv --csv-s --date --datetime-iso --df --dig --dir --dmidecode --dpkg-l --du --email-address --env --file --findmnt --finger --free --fstab --git-log --git-log-s --git-ls-remote --gpg --group --gshadow --hash --hashsum --hciconfig --history --hosts --id --ifconfig --ini --iostat --iostat-s --ip-address --iptables --iw-scan --jar-manifest --jobs --jwt --kv --last --ls --ls-s --lsblk --lsmod --lsof --lspci --lsusb --m3u --mdadm --mount --mpstat --mpstat-s --netstat --nmcli --ntpq --os-prober --passwd --pci-ids --pidstat --pidstat-s --ping --ping-s --pip-list --pip-show --plist --postconf --proc --proc-buddyinfo --proc-consoles --proc-cpuinfo --proc-crypto --proc-devices --proc-diskstats --proc-filesystems --proc-interrupts --proc-iomem --proc-ioports --proc-loadavg --proc-locks --proc-meminfo --proc-modules --proc-mtrr --proc-pagetypeinfo --proc-partitions --proc-slabinfo --proc-softirqs --proc-stat --proc-swaps --proc-uptime --proc-version --proc-vmallocinfo --proc-vmstat --proc-zoneinfo --proc-driver-rtc --proc-net-arp --proc-net-dev --proc-net-dev-mcast --proc-net-if-inet6 --proc-net-igmp --proc-net-igmp6 --proc-net-ipv6-route --proc-net-netlink --proc-net-netstat --proc-net-packet --proc-net-protocols --proc-net-route --proc-net-unix --proc-pid-fdinfo --proc-pid-io --proc-pid-maps --proc-pid-mountinfo --proc-pid-numa-maps --proc-pid-smaps --proc-pid-stat --proc-pid-statm --proc-pid-status --ps --route --rpm-qi --rsync --rsync-s --semver --sfdisk --shadow --ss --sshd-conf --stat --stat-s --sysctl --syslog --syslog-s --syslog-bsd --syslog-bsd-s --systemctl --systemctl-lj --systemctl-ls --systemctl-luf --systeminfo --time --timedatectl --timestamp --top --top-s --tracepath --traceroute --udevadm --ufw --ufw-appinfo --uname --update-alt-gs --update-alt-q --upower --uptime --url --vmstat --vmstat-s --w --wc --who --x509-cert --xml --xrandr --yaml --zipinfo)
|
||||||
jc_parsers_describe=(
|
jc_parsers_describe=(
|
||||||
'--acpi:`acpi` command parser'
|
'--acpi:`acpi` command parser'
|
||||||
'--airport:`airport -I` command parser'
|
'--airport:`airport -I` command parser'
|
||||||
@ -114,6 +114,7 @@ _jc() {
|
|||||||
'--chage:`chage --list` command parser'
|
'--chage:`chage --list` command parser'
|
||||||
'--cksum:`cksum` and `sum` command parser'
|
'--cksum:`cksum` and `sum` command parser'
|
||||||
'--clf:Common and Combined Log Format file parser'
|
'--clf:Common and Combined Log Format file parser'
|
||||||
|
'--clf-s:Common and Combined Log Format file streaming parser'
|
||||||
'--crontab:`crontab` command and file parser'
|
'--crontab:`crontab` command and file parser'
|
||||||
'--crontab-u:`crontab` file parser with user support'
|
'--crontab-u:`crontab` file parser with user support'
|
||||||
'--csv:CSV file parser'
|
'--csv:CSV file parser'
|
||||||
|
@ -70,10 +70,106 @@ Empty strings and `-` values are converted to `null`/`None`.
|
|||||||
Examples:
|
Examples:
|
||||||
|
|
||||||
$ cat file.log | jc --clf -p
|
$ cat file.log | jc --clf -p
|
||||||
[]
|
[
|
||||||
|
{
|
||||||
|
"host": "127.0.0.1",
|
||||||
|
"ident": "user-identifier",
|
||||||
|
"authuser": "frank",
|
||||||
|
"date": "10/Oct/2000:13:55:36 -0700",
|
||||||
|
"day": 10,
|
||||||
|
"month": "Oct",
|
||||||
|
"year": 2000,
|
||||||
|
"hour": 13,
|
||||||
|
"minute": 55,
|
||||||
|
"second": 36,
|
||||||
|
"tz": "-0700",
|
||||||
|
"request": "GET /apache_pb.gif HTTPS/1.0",
|
||||||
|
"status": 200,
|
||||||
|
"bytes": 2326,
|
||||||
|
"referer": null,
|
||||||
|
"user_agent": null,
|
||||||
|
"extra": null,
|
||||||
|
"request_method": "GET",
|
||||||
|
"request_url": "/apache_pb.gif",
|
||||||
|
"request_version": "HTTPS/1.0",
|
||||||
|
"epoch": 971211336,
|
||||||
|
"epoch_utc": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"host": "1.1.1.2",
|
||||||
|
"ident": null,
|
||||||
|
"authuser": null,
|
||||||
|
"date": "11/Nov/2016:03:04:55 +0100",
|
||||||
|
"day": 11,
|
||||||
|
"month": "Nov",
|
||||||
|
"year": 2016,
|
||||||
|
"hour": 3,
|
||||||
|
"minute": 4,
|
||||||
|
"second": 55,
|
||||||
|
"tz": "+0100",
|
||||||
|
"request": "GET /",
|
||||||
|
"status": 200,
|
||||||
|
"bytes": 83,
|
||||||
|
"referer": null,
|
||||||
|
"user_agent": null,
|
||||||
|
"extra": "- 9221 1.1.1.1",
|
||||||
|
"request_method": "GET",
|
||||||
|
"request_url": "/",
|
||||||
|
"request_version": null,
|
||||||
|
"epoch": 1478862295,
|
||||||
|
"epoch_utc": null
|
||||||
|
},
|
||||||
|
...
|
||||||
|
]
|
||||||
|
|
||||||
$ cat file.log | jc --clf -p -r
|
$ cat file.log | jc --clf -p -r
|
||||||
[]
|
[
|
||||||
|
{
|
||||||
|
"host": "127.0.0.1",
|
||||||
|
"ident": "user-identifier",
|
||||||
|
"authuser": "frank",
|
||||||
|
"date": "10/Oct/2000:13:55:36 -0700",
|
||||||
|
"day": "10",
|
||||||
|
"month": "Oct",
|
||||||
|
"year": "2000",
|
||||||
|
"hour": "13",
|
||||||
|
"minute": "55",
|
||||||
|
"second": "36",
|
||||||
|
"tz": "-0700",
|
||||||
|
"request": "GET /apache_pb.gif HTTPS/1.0",
|
||||||
|
"status": "200",
|
||||||
|
"bytes": "2326",
|
||||||
|
"referer": null,
|
||||||
|
"user_agent": null,
|
||||||
|
"extra": "",
|
||||||
|
"request_method": "GET",
|
||||||
|
"request_url": "/apache_pb.gif",
|
||||||
|
"request_version": "HTTPS/1.0"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"host": "1.1.1.2",
|
||||||
|
"ident": "-",
|
||||||
|
"authuser": "-",
|
||||||
|
"date": "11/Nov/2016:03:04:55 +0100",
|
||||||
|
"day": "11",
|
||||||
|
"month": "Nov",
|
||||||
|
"year": "2016",
|
||||||
|
"hour": "03",
|
||||||
|
"minute": "04",
|
||||||
|
"second": "55",
|
||||||
|
"tz": "+0100",
|
||||||
|
"request": "GET /",
|
||||||
|
"status": "200",
|
||||||
|
"bytes": "83",
|
||||||
|
"referer": "-",
|
||||||
|
"user_agent": "-",
|
||||||
|
"extra": "- 9221 1.1.1.1",
|
||||||
|
"request_method": "GET",
|
||||||
|
"request_url": "/",
|
||||||
|
"request_version": null
|
||||||
|
},
|
||||||
|
...
|
||||||
|
]
|
||||||
|
|
||||||
<a id="jc.parsers.clf.parse"></a>
|
<a id="jc.parsers.clf.parse"></a>
|
||||||
|
|
||||||
|
@ -24,6 +24,7 @@ parsers: List[str] = [
|
|||||||
'chage',
|
'chage',
|
||||||
'cksum',
|
'cksum',
|
||||||
'clf',
|
'clf',
|
||||||
|
'clf-s',
|
||||||
'crontab',
|
'crontab',
|
||||||
'crontab-u',
|
'crontab-u',
|
||||||
'csv',
|
'csv',
|
||||||
|
@ -65,10 +65,106 @@ Empty strings and `-` values are converted to `null`/`None`.
|
|||||||
Examples:
|
Examples:
|
||||||
|
|
||||||
$ cat file.log | jc --clf -p
|
$ cat file.log | jc --clf -p
|
||||||
[]
|
[
|
||||||
|
{
|
||||||
|
"host": "127.0.0.1",
|
||||||
|
"ident": "user-identifier",
|
||||||
|
"authuser": "frank",
|
||||||
|
"date": "10/Oct/2000:13:55:36 -0700",
|
||||||
|
"day": 10,
|
||||||
|
"month": "Oct",
|
||||||
|
"year": 2000,
|
||||||
|
"hour": 13,
|
||||||
|
"minute": 55,
|
||||||
|
"second": 36,
|
||||||
|
"tz": "-0700",
|
||||||
|
"request": "GET /apache_pb.gif HTTPS/1.0",
|
||||||
|
"status": 200,
|
||||||
|
"bytes": 2326,
|
||||||
|
"referer": null,
|
||||||
|
"user_agent": null,
|
||||||
|
"extra": null,
|
||||||
|
"request_method": "GET",
|
||||||
|
"request_url": "/apache_pb.gif",
|
||||||
|
"request_version": "HTTPS/1.0",
|
||||||
|
"epoch": 971211336,
|
||||||
|
"epoch_utc": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"host": "1.1.1.2",
|
||||||
|
"ident": null,
|
||||||
|
"authuser": null,
|
||||||
|
"date": "11/Nov/2016:03:04:55 +0100",
|
||||||
|
"day": 11,
|
||||||
|
"month": "Nov",
|
||||||
|
"year": 2016,
|
||||||
|
"hour": 3,
|
||||||
|
"minute": 4,
|
||||||
|
"second": 55,
|
||||||
|
"tz": "+0100",
|
||||||
|
"request": "GET /",
|
||||||
|
"status": 200,
|
||||||
|
"bytes": 83,
|
||||||
|
"referer": null,
|
||||||
|
"user_agent": null,
|
||||||
|
"extra": "- 9221 1.1.1.1",
|
||||||
|
"request_method": "GET",
|
||||||
|
"request_url": "/",
|
||||||
|
"request_version": null,
|
||||||
|
"epoch": 1478862295,
|
||||||
|
"epoch_utc": null
|
||||||
|
},
|
||||||
|
...
|
||||||
|
]
|
||||||
|
|
||||||
$ cat file.log | jc --clf -p -r
|
$ cat file.log | jc --clf -p -r
|
||||||
[]
|
[
|
||||||
|
{
|
||||||
|
"host": "127.0.0.1",
|
||||||
|
"ident": "user-identifier",
|
||||||
|
"authuser": "frank",
|
||||||
|
"date": "10/Oct/2000:13:55:36 -0700",
|
||||||
|
"day": "10",
|
||||||
|
"month": "Oct",
|
||||||
|
"year": "2000",
|
||||||
|
"hour": "13",
|
||||||
|
"minute": "55",
|
||||||
|
"second": "36",
|
||||||
|
"tz": "-0700",
|
||||||
|
"request": "GET /apache_pb.gif HTTPS/1.0",
|
||||||
|
"status": "200",
|
||||||
|
"bytes": "2326",
|
||||||
|
"referer": null,
|
||||||
|
"user_agent": null,
|
||||||
|
"extra": "",
|
||||||
|
"request_method": "GET",
|
||||||
|
"request_url": "/apache_pb.gif",
|
||||||
|
"request_version": "HTTPS/1.0"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"host": "1.1.1.2",
|
||||||
|
"ident": "-",
|
||||||
|
"authuser": "-",
|
||||||
|
"date": "11/Nov/2016:03:04:55 +0100",
|
||||||
|
"day": "11",
|
||||||
|
"month": "Nov",
|
||||||
|
"year": "2016",
|
||||||
|
"hour": "03",
|
||||||
|
"minute": "04",
|
||||||
|
"second": "55",
|
||||||
|
"tz": "+0100",
|
||||||
|
"request": "GET /",
|
||||||
|
"status": "200",
|
||||||
|
"bytes": "83",
|
||||||
|
"referer": "-",
|
||||||
|
"user_agent": "-",
|
||||||
|
"extra": "- 9221 1.1.1.1",
|
||||||
|
"request_method": "GET",
|
||||||
|
"request_url": "/",
|
||||||
|
"request_version": null
|
||||||
|
},
|
||||||
|
...
|
||||||
|
]
|
||||||
"""
|
"""
|
||||||
import re
|
import re
|
||||||
from typing import List, Dict
|
from typing import List, Dict
|
||||||
|
223
jc/parsers/clf_s.py
Normal file
223
jc/parsers/clf_s.py
Normal file
@ -0,0 +1,223 @@
|
|||||||
|
"""jc - JSON Convert Common Log Format file streaming parser
|
||||||
|
|
||||||
|
> This streaming parser outputs JSON Lines (cli) or returns an Iterable of
|
||||||
|
> Dictionaries (module)
|
||||||
|
|
||||||
|
This parser will handle the Common Log Format standard as specified at
|
||||||
|
https://www.w3.org/Daemon/User/Config/Logging.html#common-logfile-format.
|
||||||
|
|
||||||
|
Combined Log Format is also supported. (Referer and User Agent fields added)
|
||||||
|
|
||||||
|
Extra fields may be present and will be enclosed in the `extra` field as
|
||||||
|
a single string.
|
||||||
|
|
||||||
|
If a log line cannot be parsed, an object with an `unparsable` field will
|
||||||
|
be present with a value of the original line.
|
||||||
|
|
||||||
|
The `epoch` calculated timestamp field is naive. (i.e. based on the
|
||||||
|
local time of the system the parser is run on)
|
||||||
|
|
||||||
|
The `epoch_utc` calculated timestamp field is timezone-aware and is
|
||||||
|
only available if the timezone field is UTC.
|
||||||
|
|
||||||
|
Usage (cli):
|
||||||
|
|
||||||
|
$ cat file.log | jc --clf-s
|
||||||
|
|
||||||
|
Usage (module):
|
||||||
|
|
||||||
|
import jc
|
||||||
|
|
||||||
|
result = jc.parse('clf_s', common_log_file_output.splitlines())
|
||||||
|
for item in result:
|
||||||
|
# do something
|
||||||
|
|
||||||
|
Schema:
|
||||||
|
|
||||||
|
Empty strings and `-` values are converted to `null`/`None`.
|
||||||
|
|
||||||
|
{
|
||||||
|
"host": string,
|
||||||
|
"ident": string,
|
||||||
|
"authuser": string,
|
||||||
|
"date": string,
|
||||||
|
"day": integer,
|
||||||
|
"month": string,
|
||||||
|
"year": integer,
|
||||||
|
"hour": integer,
|
||||||
|
"minute": integer,
|
||||||
|
"second": integer,
|
||||||
|
"tz": string,
|
||||||
|
"request": string,
|
||||||
|
"request_method": string,
|
||||||
|
"request_url": string,
|
||||||
|
"request_version": string,
|
||||||
|
"status": integer,
|
||||||
|
"bytes": integer,
|
||||||
|
"referer": string,
|
||||||
|
"user_agent": string,
|
||||||
|
"extra": string,
|
||||||
|
"epoch": integer, # [0]
|
||||||
|
"epoch_utc": integer, # [1]
|
||||||
|
"unparsable": string # [2]
|
||||||
|
}
|
||||||
|
|
||||||
|
[0] naive timestamp
|
||||||
|
[1] timezone-aware timestamp. Only available if timezone field is UTC
|
||||||
|
[2] exists if the line was not able to be parsed
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
$ cat file.log | jc --clf-s
|
||||||
|
{"host":"127.0.0.1","ident":"user-identifier","authuser":"frank","...}
|
||||||
|
{"host":"1.1.1.2","ident":null,"authuser":null,"date":"11/Nov/2016...}
|
||||||
|
...
|
||||||
|
|
||||||
|
$ cat file.log | jc --clf-s -r
|
||||||
|
{"host":"127.0.0.1","ident":"user-identifier","authuser":"frank","...}
|
||||||
|
{"host":"1.1.1.2","ident":"-","authuser":"-","date":"11/Nov/2016:0...}
|
||||||
|
...
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
from typing import Dict, Iterable
|
||||||
|
import jc.utils
|
||||||
|
from jc.streaming import (
|
||||||
|
add_jc_meta, streaming_input_type_check, streaming_line_input_type_check, raise_or_yield
|
||||||
|
)
|
||||||
|
from jc.jc_types import JSONDictType, StreamingOutputType
|
||||||
|
from jc.exceptions import ParseError
|
||||||
|
|
||||||
|
|
||||||
|
class info():
|
||||||
|
"""Provides parser metadata (version, author, etc.)"""
|
||||||
|
version = '1.0'
|
||||||
|
description = 'Common and Combined Log Format file streaming parser'
|
||||||
|
author = 'Kelly Brazil'
|
||||||
|
author_email = 'kellyjonbrazil@gmail.com'
|
||||||
|
compatible = ['linux', 'darwin', 'cygwin', 'win32', 'aix', 'freebsd']
|
||||||
|
streaming = True
|
||||||
|
|
||||||
|
|
||||||
|
__version__ = info.version
|
||||||
|
|
||||||
|
|
||||||
|
def _process(proc_data: JSONDictType) -> JSONDictType:
|
||||||
|
"""
|
||||||
|
Final processing to conform to the schema.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
|
||||||
|
proc_data: (Dictionary) raw structured data to process
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
Dictionary. Structured data to conform to the schema.
|
||||||
|
"""
|
||||||
|
int_list = {'day', 'year', 'hour', 'minute', 'second', 'status', 'bytes'}
|
||||||
|
|
||||||
|
for key, val in proc_data.items():
|
||||||
|
|
||||||
|
# integer conversions
|
||||||
|
if key in int_list:
|
||||||
|
proc_data[key] = jc.utils.convert_to_int(val)
|
||||||
|
|
||||||
|
# convert `-` and blank values to None
|
||||||
|
if val == '-' or val == '':
|
||||||
|
proc_data[key] = None
|
||||||
|
|
||||||
|
# add unix timestamps
|
||||||
|
if 'date' in proc_data:
|
||||||
|
ts = jc.utils.timestamp(proc_data['date'], format_hint=(1800,)) # type: ignore
|
||||||
|
proc_data['epoch'] = ts.naive
|
||||||
|
proc_data['epoch_utc'] = ts.utc
|
||||||
|
|
||||||
|
return proc_data
|
||||||
|
|
||||||
|
|
||||||
|
@add_jc_meta
|
||||||
|
def parse(
|
||||||
|
data: Iterable[str],
|
||||||
|
raw: bool = False,
|
||||||
|
quiet: bool = False,
|
||||||
|
ignore_exceptions: bool = False
|
||||||
|
) -> StreamingOutputType:
|
||||||
|
"""
|
||||||
|
Main text parsing generator function. Returns an iterable object.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
|
||||||
|
data: (iterable) line-based text data to parse
|
||||||
|
(e.g. sys.stdin or str.splitlines())
|
||||||
|
|
||||||
|
raw: (boolean) unprocessed output if True
|
||||||
|
quiet: (boolean) suppress warning messages if True
|
||||||
|
ignore_exceptions: (boolean) ignore parsing exceptions if True
|
||||||
|
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
Iterable of Dictionaries
|
||||||
|
"""
|
||||||
|
jc.utils.compatibility(__name__, info.compatible, quiet)
|
||||||
|
streaming_input_type_check(data)
|
||||||
|
|
||||||
|
clf_pattern = re.compile(r'''
|
||||||
|
^(?P<host>-|\S+)\s
|
||||||
|
(?P<ident>-|\S+)\s
|
||||||
|
(?P<authuser>-|\S+)\s
|
||||||
|
\[
|
||||||
|
(?P<date>
|
||||||
|
(?P<day>\d+)/
|
||||||
|
(?P<month>\S\S\S)/
|
||||||
|
(?P<year>\d\d\d\d):
|
||||||
|
(?P<hour>\d\d):
|
||||||
|
(?P<minute>\d\d):
|
||||||
|
(?P<second>\d\d)\s
|
||||||
|
(?P<tz>\S+)
|
||||||
|
)
|
||||||
|
\]\s
|
||||||
|
\"(?P<request>.*?)\"\s
|
||||||
|
(?P<status>-|\d\d\d)\s
|
||||||
|
(?P<bytes>-|\d+)\s?
|
||||||
|
(?:\"(?P<referer>.*?)\"\s?)?
|
||||||
|
(?:\"(?P<user_agent>.*?)\"\s?)?
|
||||||
|
(?P<extra>.*)
|
||||||
|
''', re.VERBOSE
|
||||||
|
)
|
||||||
|
|
||||||
|
request_pattern = re.compile(r'''
|
||||||
|
(?P<request_method>\S+)\s
|
||||||
|
(?P<request_url>.*?(?=\sHTTPS?/|$))\s? # positive lookahead for HTTP(S)/ or end of string
|
||||||
|
(?P<request_version>HTTPS?/[\d\.]+)?
|
||||||
|
''', re.VERBOSE
|
||||||
|
)
|
||||||
|
|
||||||
|
for line in data:
|
||||||
|
try:
|
||||||
|
streaming_line_input_type_check(line)
|
||||||
|
output_line: Dict = {}
|
||||||
|
|
||||||
|
if line == '' or line == '\n':
|
||||||
|
continue
|
||||||
|
|
||||||
|
clf_match = re.match(clf_pattern, line)
|
||||||
|
|
||||||
|
if clf_match:
|
||||||
|
output_line = clf_match.groupdict()
|
||||||
|
|
||||||
|
if clf_match.groupdict().get('request', None):
|
||||||
|
request_string = clf_match.groupdict()['request']
|
||||||
|
request_match = re.match(request_pattern, request_string)
|
||||||
|
if request_match:
|
||||||
|
output_line.update(request_match.groupdict())
|
||||||
|
|
||||||
|
else:
|
||||||
|
output_line = {"unparsable": line.strip()}
|
||||||
|
|
||||||
|
if output_line:
|
||||||
|
yield output_line if raw else _process(output_line)
|
||||||
|
else:
|
||||||
|
raise ParseError('Not clf data')
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
yield raise_or_yield(ignore_exceptions, e, line)
|
Reference in New Issue
Block a user