1
0
mirror of https://github.com/kellyjonbrazil/jc.git synced 2025-08-10 22:41:51 +02:00

add multi-archive support

This commit is contained in:
Kelly Brazil
2021-12-21 11:11:44 -08:00
parent ff78a46c48
commit 51d2f316f3

View File

@@ -19,7 +19,7 @@ Usage (module):
result = jc.parsers.zipinfo.parse(zipinfo_command_output) result = jc.parsers.zipinfo.parse(zipinfo_command_output)
Schema: Schema:
[
{ {
"archive": string, "archive": string,
"size": integer, "size": integer,
@@ -43,6 +43,7 @@ Schema:
} }
] ]
} }
]
Examples: Examples:
@@ -103,17 +104,20 @@ def _process(proc_data):
""" """
for entry in proc_data: for entry in proc_data:
int_list = ['bytes_compressed', 'bytes_uncompressed', 'number_entries', 'number_files', 'size'] int_list = ['bytes_compressed', 'bytes_uncompressed', 'number_entries',
'number_files', 'size', 'filesize']
float_list = ['percent_compressed']
for key in entry: for key in entry:
if key in int_list: if key in int_list:
entry[key] = jc.utils.convert_to_int(entry[key]) entry[key] = jc.utils.convert_to_int(entry[key])
if key in float_list:
entry[key] = jc.utils.convert_to_float(entry[key])
if key in "files": if 'files' in key:
for d in entry[key]: for item in entry['files']:
for key in d: for key in item:
if key in "filesize": if key in int_list:
d[key] = jc.utils.convert_to_int(d[key]) item[key] = jc.utils.convert_to_int(item[key])
return proc_data return proc_data
@@ -134,38 +138,58 @@ def parse(data, raw=False, quiet=False):
jc.utils.compatibility(__name__, info.compatible, quiet) jc.utils.compatibility(__name__, info.compatible, quiet)
jc.utils.input_type_check(data) jc.utils.input_type_check(data)
raw_output = {} raw_output = []
archives = []
datalines = data.splitlines()
datalist = list(filter(None, datalines))
if jc.utils.has_data(data): if jc.utils.has_data(data):
datalines = data.splitlines()
archive_info = [] # remove last line of multi-archive output since it is not needed
if datalines[-1].endswith('archives were successfully processed.'):
datalines.pop(-1)
# extract each archive into its own list of lines.
# archives are separated by a blank line
this_archive = []
for row in datalines:
if row == '':
archives.append(this_archive)
this_archive = []
continue
this_archive.append(row)
if this_archive:
archives.append(this_archive)
# iterate through list of archives and parse
for archive_item in archives:
archive_info = {}
# 1st line # 1st line
# Archive: log4j-core-2.16.0.jar # Archive: log4j-core-2.16.0.jar
line = datalist.pop(0) line = archive_item.pop(0)
_, archive = line.split() _, archive = line.split()
# 2nd line # 2nd line
# Zip file size: 1789565 bytes, number of entries: 1218 # Zip file size: 1789565 bytes, number of entries: 1218
line = datalist.pop(0) line = archive_item.pop(0)
_, _, _, size, size_unit, _, _, _, number_entries = line.split() _, _, _, size, size_unit, *_, number_entries = line.split()
size_unit = size_unit.rstrip(',') size_unit = size_unit.rstrip(',')
# last line # last line
# 1218 files, 3974141 bytes uncompressed, 1515455 bytes compressed: 61.9% # 1218 files, 3974141 bytes uncompressed, 1515455 bytes compressed: 61.9%
line = datalist.pop(-1) line = archive_item.pop(-1)
number_files, _, bytes_uncompressed, _, _, bytes_compressed, _, _, percent_compressed = line.split() number_files, _, bytes_uncompressed, _, _, bytes_compressed, *_, percent_compressed = line.split()
percent_compressed = float(percent_compressed.rstrip("%")) percent_compressed = percent_compressed.rstrip("%")
# Add header row for parsing # Add header row for parsing
datalist[:0] = ['flags zipversion zipunder filesize type method date time filename'] archive_item[:0] = ['flags zipversion zipunder filesize type method date time filename']
file_list = jc.parsers.universal.simple_table_parse(datalist) file_list = jc.parsers.universal.simple_table_parse(archive_item)
archive_info.append({'archive': archive, archive_info = {
'archive': archive,
'size': size, 'size': size,
'size_unit': size_unit, 'size_unit': size_unit,
'number_entries': number_entries, 'number_entries': number_entries,
@@ -173,8 +197,9 @@ def parse(data, raw=False, quiet=False):
'bytes_uncompressed': bytes_uncompressed, 'bytes_uncompressed': bytes_uncompressed,
'bytes_compressed': bytes_compressed, 'bytes_compressed': bytes_compressed,
'percent_compressed': percent_compressed, 'percent_compressed': percent_compressed,
'files': file_list}) 'files': file_list
}
raw_output = archive_info raw_output.append(archive_info)
return raw_output if raw else _process(raw_output) return raw_output if raw else _process(raw_output)