1
0
mirror of https://github.com/kellyjonbrazil/jc.git synced 2025-06-19 00:17:51 +02:00
Files
jc/jc/parsers/m3u.py
2022-07-16 20:51:18 -07:00

184 lines
5.1 KiB
Python

"""jc - JSON Convert M3U and M3U8 file parser
This parser will make a best-effort to parse extended field information. If
the extended fields cannot be successfully parsed, then an `unparsed_info`
field will be added to the object. If not using `--quiet`, then a warning
message also will be printed to `STDERR`.
Usage (cli):
$ cat playlist.m3u | jc --m3u
Usage (module):
import jc
result = jc.parse('m3u', m3u_file_output)
Schema:
[
{
"runtime": integer,
"display": string,
"path": string,
<extended fields>: string, # [0]
"unparsed_info": string, # [1]
}
]
[0] Field names are pulled directly from the #EXTINF: line
[1] Only added if the extended information cannot be parsed
Examples:
$ cat playlist.m3u | jc --m3u -p
[
{
"runtime": 105,
"display": "Example artist - Example title",
"path": "C:\\Files\\My Music\\Example.mp3"
},
{
"runtime": 321,
"display": "Example Artist2 - Example title2",
"path": "C:\\Files\\My Music\\Favorites\\Example2.ogg"
}
]
$ cat playlist.m3u | jc --m3u -p -r
[
{
"runtime": "105",
"display": "Example artist - Example title",
"path": "C:\\Files\\My Music\\Example.mp3"
},
{
"runtime": "321",
"display": "Example Artist2 - Example title2",
"path": "C:\\Files\\My Music\\Favorites\\Example2.ogg"
}
]
"""
import shlex
from typing import List, Dict
import jc.utils
class info():
"""Provides parser metadata (version, author, etc.)"""
version = '1.0'
description = 'M3U and M3U8 file parser'
author = 'Kelly Brazil'
author_email = 'kellyjonbrazil@gmail.com'
compatible = ['linux', 'darwin', 'cygwin', 'win32', 'aix', 'freebsd']
__version__ = info.version
def _process(proc_data: List[Dict]) -> List[Dict]:
"""
Final processing to conform to the schema.
Parameters:
proc_data: (List of Dictionaries) raw structured data to process
Returns:
List of Dictionaries. Structured to conform to the schema.
"""
int_list = {'runtime'}
for entry in proc_data:
for key in entry:
if key in int_list:
entry[key] = jc.utils.convert_to_int(entry[key])
return proc_data
def parse(
data: str,
raw: bool = False,
quiet: bool = False
) -> List[Dict]:
"""
Main text parsing function
Parameters:
data: (string) text data to parse
raw: (boolean) unprocessed output if True
quiet: (boolean) suppress warning messages if True
Returns:
List of Dictionaries. Raw or processed structured data.
"""
jc.utils.compatibility(__name__, info.compatible, quiet)
jc.utils.input_type_check(data)
raw_output: List = []
output_line = {}
if jc.utils.has_data(data):
for line in filter(None, data.splitlines()):
# ignore any lines with only whitespace
if not jc.utils.has_data(line):
continue
# extended info fields
if line.lstrip().startswith('#EXTINF:'):
newline = line.replace("'", "") # single quotes break shlex split
splitline = newline.strip().split(':', maxsplit=1)
# best-effort to parse additional extended fields
# if a parsing error occurs, a warning message will be
# printed to STDERR and `unparsed_info` added
try:
extline = shlex.shlex(splitline[1], posix=True)
extline.whitespace_split = True
extline.whitespace = ', ' # add comma to whitespace detection
extline_list = list(extline)
runtime = extline_list.pop(0)
display_list = []
for item in extline_list:
if '=' in item:
k, v = item.split('=', maxsplit=1)
output_line.update({k: v})
else:
display_list.append(item)
display = ' '.join(display_list).replace("", "'")
output_line.update({
'runtime': runtime,
'display': display
})
except Exception:
if not quiet:
jc.utils.warning_message([
'Not able to parse non-standard extensions in the following line:',
line
])
output_line = {'unparsed_info': line}
continue
# ignore all other extension info (obsolete)
if line.lstrip().startswith('#'):
continue
# any lines left over are paths
output_line.update(
{'path': line.strip()}
)
raw_output.append(output_line)
output_line = {}
return raw_output if raw else _process(raw_output)