mirror of
https://github.com/ryanoasis/nerd-fonts.git
synced 2024-12-19 20:12:52 +02:00
01569cad8e
[why] Some PS weights have a dash in the weight, like 'Extra-Light' in Iosevka. The parser can not parse it because it expects 'ExtraLight'. [how] Filter out all '-' and ' ' from the PS weight string before actually parsing the string. Signed-off-by: Fini Jastrow <ulf.fini.jastrow@desy.de>
422 lines
17 KiB
Python
422 lines
17 KiB
Python
#!/usr/bin/env python
|
|
# coding=utf8
|
|
|
|
import re
|
|
import sys
|
|
|
|
class FontnameTools:
|
|
"""Deconstruct a fontname to get standardized name parts"""
|
|
|
|
@staticmethod
|
|
def front_upper(word):
|
|
"""Capitalize a string (but keep case of subsequent chars)"""
|
|
return word[:1].upper() + word[1:]
|
|
|
|
@staticmethod
|
|
def camel_casify(word):
|
|
"""Remove blanks and use CamelCase for the new word"""
|
|
return ''.join(map(FontnameTools.front_upper, word.split(' ')))
|
|
|
|
@staticmethod
|
|
def camel_explode(word):
|
|
"""Explode CamelCase -> Camel Case"""
|
|
# But do not explode "JetBrains" etc at string start...
|
|
excludes = [
|
|
'JetBrains',
|
|
'DejaVu',
|
|
'OpenDyslexicAlta',
|
|
'OpenDyslexicMono',
|
|
'OpenDyslexic',
|
|
'DaddyTimeMono',
|
|
'InconsolataGo',
|
|
'ProFontWindows',
|
|
'ProFont',
|
|
'ProggyClean',
|
|
]
|
|
m = re.match('(' + '|'.join(excludes) + ')(.*)', word)
|
|
(prefix, word) = m.group(1,2) if m != None else ('', word)
|
|
if len(word) == 0:
|
|
return prefix
|
|
parts = re.split('(?<=[a-z0-9])(?=[A-Z])', word)
|
|
if len(prefix):
|
|
parts.insert(0, prefix)
|
|
return ' '.join(parts)
|
|
|
|
@staticmethod
|
|
def drop_empty(l):
|
|
"""Remove empty strings from list of strings"""
|
|
return [x for x in l if len(x) > 0]
|
|
|
|
@staticmethod
|
|
def concat(*all_things):
|
|
"""Flatten list of (strings or lists of strings) to a blank-separated string"""
|
|
all = []
|
|
for thing in all_things:
|
|
if type(thing) is not list:
|
|
all.append(thing)
|
|
else:
|
|
all += thing
|
|
return ' '.join(FontnameTools.drop_empty(all))
|
|
|
|
@staticmethod
|
|
def unify_style_names(style_name):
|
|
"""Substitude some known token with standard wording"""
|
|
known_names = {
|
|
# Source of the table is the current sourcefonts
|
|
# Left side needs to be lower case
|
|
'book': '',
|
|
'ce': 'CE',
|
|
'normal': 'Regular',
|
|
}
|
|
return known_names.get(style_name.lower(), style_name)
|
|
|
|
@staticmethod
|
|
def find_in_dicts(key, dicts):
|
|
"""Find an entry in a list of dicts, return entry and in which list it was"""
|
|
for i, d in enumerate(dicts):
|
|
if key in d:
|
|
return ( d[key], i )
|
|
return (None, 0)
|
|
|
|
@staticmethod
|
|
def get_shorten_form_idx(aggressive, prefix, form_if_prefixed):
|
|
"""Get the tuple index of known_* data tables"""
|
|
if aggressive:
|
|
return 0
|
|
if len(prefix):
|
|
return form_if_prefixed
|
|
return 1
|
|
|
|
@staticmethod
|
|
def shorten_style_name(name, aggressive):
|
|
"""Substitude some known styles to short form"""
|
|
# If aggressive is False create the mild short form
|
|
# aggressive == True: Always use first form of everything
|
|
# aggressive == False:
|
|
# - has no modifier: use the second form
|
|
# - has modifier: use second form of mod plus first form of weights2
|
|
# - has modifier: use second form of mod plus second form of widths
|
|
name_rest = name
|
|
name_pre = ''
|
|
form = FontnameTools.get_shorten_form_idx(aggressive, '', 0)
|
|
for mod in FontnameTools.known_modifiers:
|
|
if name.startswith(mod) and len(name) > len(mod): # Second condition specifically for 'Demi'
|
|
name_pre = FontnameTools.known_modifiers[mod][form]
|
|
name_rest = name[len(mod):]
|
|
break
|
|
subst, i = FontnameTools.find_in_dicts(name_rest, [ FontnameTools.known_weights2, FontnameTools.known_widths ])
|
|
form = FontnameTools.get_shorten_form_idx(aggressive, name_pre, i)
|
|
if isinstance(subst, tuple):
|
|
return name_pre + subst[form]
|
|
if not len(name_pre):
|
|
# The following sets do not allow modifiers
|
|
subst, _ = FontnameTools.find_in_dicts(name_rest, [ FontnameTools.known_weights1, FontnameTools.known_slopes ])
|
|
if isinstance(subst, tuple):
|
|
return subst[form]
|
|
return name
|
|
|
|
@staticmethod
|
|
def short_styles(lists, aggressive):
|
|
"""Shorten all style names in a list or a list of lists"""
|
|
if not len(lists) or not isinstance(lists[0], list):
|
|
return list(map(lambda x: FontnameTools.shorten_style_name(x, aggressive), lists))
|
|
return [ list(map(lambda x: FontnameTools.shorten_style_name(x, aggressive), styles)) for styles in lists ]
|
|
|
|
@staticmethod
|
|
def make_oblique_style(weights, styles):
|
|
"""Move "Oblique" from weights to styles for font naming purposes"""
|
|
if 'Oblique' in weights:
|
|
weights = list(weights)
|
|
weights.remove('Oblique')
|
|
styles = list(styles)
|
|
styles.append('Oblique')
|
|
return (weights, styles)
|
|
|
|
@staticmethod
|
|
def get_name_token(name, tokens):
|
|
"""Try to find any case insensitive token from tokens in the name, return tuple with found token-list and rest"""
|
|
# The default mode (allow_regex_token = False) will try to find any verbatim string in the
|
|
# tokens list (case insensitive matching) and give that tokens list item back with
|
|
# unchanged case (i.e. [ 'Bold' ] will match "bold" and return it as [ 'Bold', ]
|
|
# In the regex mode (allow_regex_token = True) it will use the tokens elements as
|
|
# regexes and return the original (i.e. from name) case.
|
|
#
|
|
# Token are always used in a regex and may not capture, use non capturing
|
|
# grouping if needed (?: ... )
|
|
lower_tokens = [ t.lower() for t in tokens ]
|
|
not_matched = ""
|
|
all_tokens = []
|
|
j = 1
|
|
token_regex = '|'.join(tokens)
|
|
# Allow a dash between CamelCase token word parts, i.e. Camel-Case
|
|
# This allows for styles like Extra-Bold
|
|
token_regex = re.sub(r'(?<=[a-z])(?=[A-Z])', '-?', token_regex)
|
|
regex = re.compile('(.*?)(' + token_regex + ')(.*)', re.IGNORECASE)
|
|
while j:
|
|
j = regex.match(name)
|
|
if not j:
|
|
break
|
|
if len(j.groups()) != 3:
|
|
sys.exit('Malformed regex in FontnameTools.get_name_token()')
|
|
not_matched += ' ' + j.groups()[0] # Blanc prevents unwanted concatenation of unmatched substrings
|
|
tok = j.groups()[1].lower()
|
|
tok = tok.replace('-', '') # Remove dashes between CamelCase token words
|
|
if tok in lower_tokens:
|
|
tok = tokens[lower_tokens.index(tok)]
|
|
tok = FontnameTools.unify_style_names(tok)
|
|
if len(tok):
|
|
all_tokens.append(tok)
|
|
name = j.groups()[2] # Recurse rest
|
|
not_matched += ' ' + name
|
|
return ( not_matched.strip(), all_tokens )
|
|
|
|
@staticmethod
|
|
def postscript_char_filter(name):
|
|
"""Filter out characters that are not allowed in Postscript names"""
|
|
# The name string must be restricted to the printable ASCII subset, codes 33 to 126,
|
|
# except for the 10 characters '[', ']', '(', ')', '{', '}', '<', '>', '/', '%'
|
|
out = ''
|
|
for c in name:
|
|
if c in '[](){}<>/%' or ord(c) < 33 or ord(c) > 126:
|
|
continue
|
|
out += c
|
|
return out
|
|
|
|
SIL_TABLE = [
|
|
( '(a)nka/(c)oder', r'\1na\2onder' ),
|
|
( '(a)nonymous', r'\1nonymice' ),
|
|
( '(b)itstream( ?)(v)era( ?sans ?mono)?', r'\1itstrom\2Wera' ),
|
|
( '(c)ascadia( ?)(c)ode', r'\1askaydia\2\3ove' ),
|
|
( '(c)ascadia( ?)(m)ono', r'\1askaydia\2\3ono' ),
|
|
( 'Gohufont', r'GohuFont'), # Correct to CamelCase
|
|
( '(h)ermit', r'\1urmit' ),
|
|
( '(h)asklig', r'\1asklug' ),
|
|
( 'iA([- ]?)writer', r'iM\1Writing' ),
|
|
( 'IBM[- ]?plex', r'Blex' ), # We do not keep the case here
|
|
( '(i)ntel( ?)(o)ne', r'\1ntone' ),
|
|
( '(l)iberation', r'\1iteration' ),
|
|
( '(m)( ?)plus', r'\1+'), # Added this, because they use a plus symbol :->
|
|
( '(s)hare', r'\1hure' ),
|
|
( '(s)ource', r'\1auce' ),
|
|
( '(t)erminus', r'\1erminess' ),
|
|
# Noone cares that font names starting with a digit are forbidden:
|
|
( 'IBM 3270', r'3270'), # for historical reasons and 'IBM' is a TM or something
|
|
# Some name parts that are too long for us
|
|
( '(.*sans ?m)ono', r'\1'), # Various SomenameSansMono fonts
|
|
( '(.*code ?lat)in Expanded', r'\1X'), # for 'M PLUS Code Latin Expanded'
|
|
( '(.*code ?lat)in', r'\1'), # for 'M PLUS Code Latin'
|
|
( '(b)ig( ?)(b)lue( ?)(t)erminal', r'\1ig\3lue\5erm'), # Shorten BigBlueTerminal
|
|
( '(.*)437TT', r'\g<1>437'), # Shorten BigBlueTerminal 437 TT even further
|
|
( '(.*dyslexic ?alt)a', r'\1'), # Open Dyslexic Alta -> Open Dyslexic Alt
|
|
( '(.*dyslexic ?m)ono', r'\1'), # Open Dyslexic Mono -> Open Dyslexic M
|
|
( '(overpass ?m)ono', r'\1'), # Overpass Mono -> Overpass M
|
|
( '(proggyclean) ?tt', r'\1'), # Remove TT from ProggyClean
|
|
( '(terminess) ?\(ttf\)', r'\1'), # Remove TTF from Terminus (after renamed to Terminess)
|
|
( '(im ?writing ?q)uattro', r'\1uat'), # Rename iM Writing Quattro to Quat
|
|
( '(im ?writing ?(mono|duo|quat)) ?s', r'\1'), # Remove S from all iM Writing styles
|
|
]
|
|
|
|
# From https://adobe-type-tools.github.io/font-tech-notes/pdfs/5088.FontNames.pdf
|
|
# The first short variant is from the linked table.
|
|
# The second (longer) short variant is from diverse fonts like Noto.
|
|
# We can
|
|
# - use the long form
|
|
# - use the very short form (first)
|
|
# - use mild short form:
|
|
# - has no modifier: use the second form
|
|
# - has modifier: use second form of mod plus first form of weights2
|
|
# - has modifier: use second form of mod plus second form of widths
|
|
# This is encoded in get_shorten_form_idx()
|
|
known_weights1 = { # can not take modifiers
|
|
'Medium': ('Md', 'Med'),
|
|
'Nord': ('Nd', 'Nord'),
|
|
'Book': ('Bk', 'Book'),
|
|
'Text': ('Txt', 'Text'),
|
|
'Poster': ('Po', 'Poster'),
|
|
'Demi': ('Dm', 'Demi'), # Demi is sometimes used as a weight, sometimes as a modifier
|
|
'Regular': ('Rg', 'Reg'),
|
|
'Display': ('DS', 'Disp'),
|
|
'Super': ('Su', 'Sup'),
|
|
'Retina': ('Rt', 'Ret'),
|
|
}
|
|
known_weights2 = { # can take modifiers
|
|
'Black': ('Blk', 'Black'),
|
|
'Bold': ('Bd', 'Bold'),
|
|
'Heavy': ('Hv', 'Heavy'),
|
|
'Thin': ('Th', 'Thin'),
|
|
'Light': ('Lt', 'Light'),
|
|
' ': (), # Just for CodeClimate :-/
|
|
}
|
|
known_styles = [ # Keywords that end up as style (i.e. a RIBBI set)
|
|
'Bold', 'Italic', 'Regular', 'Normal'
|
|
]
|
|
known_widths = { # can take modifiers
|
|
'Compressed': ('Cm', 'Comp'),
|
|
'Extended': ('Ex', 'Extd'),
|
|
'Condensed': ('Cn', 'Cond'),
|
|
'Narrow': ('Nr', 'Narrow'),
|
|
'Compact': ('Ct', 'Compact'),
|
|
}
|
|
known_slopes = { # can not take modifiers
|
|
'Inclined': ('Ic', 'Incl'),
|
|
'Oblique': ('Obl', 'Obl'),
|
|
'Italic': ('It', 'Italic'),
|
|
'Upright': ('Up', 'Uprght'),
|
|
'Kursiv': ('Ks', 'Kurs'),
|
|
'Sloped': ('Sl', 'Slop'),
|
|
}
|
|
known_modifiers = {
|
|
'Demi': ('Dm', 'Dem'),
|
|
'Ultra': ('Ult', 'Ult'),
|
|
'Semi': ('Sm', 'Sem'),
|
|
'Extra': ('X', 'Ext'),
|
|
}
|
|
equivalent_weights = {
|
|
100: ('thin', 'hairline'),
|
|
200: ('extralight', 'ultralight'),
|
|
300: ('light', ),
|
|
350: ('semilight', ),
|
|
400: ('regular', 'normal', 'book', 'text', 'nord', 'retina'),
|
|
500: ('medium', ),
|
|
600: ('semibold', 'demibold', 'demi'),
|
|
700: ('bold', ),
|
|
800: ('extrabold', 'ultrabold'),
|
|
900: ('black', 'heavy', 'poster', 'extrablack', 'ultrablack'),
|
|
}
|
|
|
|
@staticmethod
|
|
def weight_string_to_number(w):
|
|
""" Convert a common string approximation to a PS/2 weight value """
|
|
if not isinstance(w, str) or len(w) < 1:
|
|
return 400
|
|
w = w.lower().replace('-', '').replace(' ', '')
|
|
for num, strs in FontnameTools.equivalent_weights.items():
|
|
if w in strs:
|
|
return num
|
|
return None
|
|
|
|
@staticmethod
|
|
def weight_to_string(w):
|
|
""" Convert a PS/2 weight value to the common string approximation """
|
|
if w < 150:
|
|
str = 'Thin'
|
|
elif w < 250:
|
|
str = 'Extra-Light'
|
|
elif w < 350:
|
|
str = 'Light'
|
|
elif w < 450:
|
|
str = 'Regular'
|
|
elif w < 550:
|
|
str = 'Medium'
|
|
elif w < 650:
|
|
str = 'Semi-Bold'
|
|
elif w < 750:
|
|
str = 'Bold'
|
|
elif w < 850:
|
|
str = 'Extra-Bold'
|
|
else:
|
|
str = 'Black'
|
|
return str
|
|
|
|
@staticmethod
|
|
def is_keep_regular(basename):
|
|
"""This has been decided by the font designers, we need to mimic that (for comparison purposes)"""
|
|
KEEP_REGULAR = [
|
|
'Agave',
|
|
'Arimo',
|
|
'Aurulent',
|
|
'Cascadia',
|
|
'Cousine',
|
|
'Fantasque',
|
|
'Fira',
|
|
|
|
'Overpass',
|
|
'Lilex',
|
|
'Inconsolata$', # not InconsolataGo
|
|
'IAWriter',
|
|
'Meslo',
|
|
'Monoid',
|
|
'Mononoki',
|
|
'Hack',
|
|
'JetBrains Mono',
|
|
'Noto Sans',
|
|
'Noto Serif',
|
|
'Victor',
|
|
]
|
|
for kr in KEEP_REGULAR:
|
|
if (basename.rstrip() + '$').startswith(kr): return True
|
|
return False
|
|
|
|
@staticmethod
|
|
def _parse_simple_font_name(name):
|
|
"""Parse a fontname that does not follow the 'FontFamilyName-FontStyle' pattern"""
|
|
# This is the usual case, because the font-patcher usually uses the fullname and
|
|
# not the PS name
|
|
if ' ' in name:
|
|
return FontnameTools.parse_font_name(name.replace(' ', '-'))
|
|
# Do we have a number-name boundary?
|
|
p = re.split('(?<=[0-9])(?=[a-zA-Z])', name)
|
|
if len(p) > 1:
|
|
return FontnameTools.parse_font_name('-'.join(p))
|
|
# Or do we have CamelCase?
|
|
n = FontnameTools.camel_explode(name)
|
|
if n != name:
|
|
return FontnameTools.parse_font_name(n.replace(' ', '-'))
|
|
return (False, FontnameTools.camel_casify(name), [], [], [], '')
|
|
|
|
@staticmethod
|
|
def parse_font_name(name):
|
|
"""Expects a fontname following the 'FontFamilyName-FontStyle' pattern and returns ... parts"""
|
|
# This could parse filenames in the beginning but that was never used in production; code removed with this commit
|
|
for special in [
|
|
('ExtLt', 'ExtraLight'), # IBM-Plex
|
|
('Medm', 'Medium'), # IBM-Plex
|
|
('Semi-Condensed', 'SemiCondensed'), # 3270
|
|
('SmBld', 'SemiBold'), # IBM-Plex
|
|
]:
|
|
name = re.sub(r'\b' + special[0] + r'\b', special[1], name, 1, re.IGNORECASE)
|
|
name = re.sub('[_\s]+', ' ', name)
|
|
matches = re.match(r'([^-]+)(?:-(.*))?', name)
|
|
familyname = FontnameTools.camel_casify(matches.group(1))
|
|
style = matches.group(2)
|
|
|
|
if not style:
|
|
return FontnameTools._parse_simple_font_name(name)
|
|
|
|
# These are the FontStyle keywords we know, in three categories
|
|
# Weights end up as Typographic Family parts ('after the dash')
|
|
# Styles end up as Family parts (for classic grouping of four)
|
|
# Others also end up in Typographic Family ('before the dash')
|
|
weights = [ m + s
|
|
for s in list(FontnameTools.known_weights2) + list(FontnameTools.known_widths)
|
|
for m in list(FontnameTools.known_modifiers) + [''] if m != s
|
|
] + list(FontnameTools.known_weights1) + list(FontnameTools.known_slopes)
|
|
weights = [ w for w in weights if w not in FontnameTools.known_styles ]
|
|
# Some font specialities:
|
|
other = [
|
|
'-', 'Book', 'For', 'Powerline',
|
|
'IIx', # Profont IIx
|
|
'LGC', # Inconsolata LGC
|
|
r'\bCE\b', # ProggycleanTT CE
|
|
r'[12][cmp]n?', # MPlus
|
|
r'(?:uni-)?1[14]', # GohuFont uni
|
|
]
|
|
|
|
( style, weight_token ) = FontnameTools.get_name_token(style, weights)
|
|
( style, style_token ) = FontnameTools.get_name_token(style, FontnameTools.known_styles)
|
|
( style, other_token ) = FontnameTools.get_name_token(style, other)
|
|
while 'Regular' in style_token and len(style_token) > 1:
|
|
# Correct situation where "Regular" and something else is given
|
|
style_token.remove('Regular')
|
|
|
|
# Recurse to see if unmatched stuff between dashes can belong to familyname
|
|
matches2 = re.match(r'(\w+)-(.*)', style)
|
|
if matches2:
|
|
return FontnameTools.parse_font_name(familyname + matches2.group(1) + '-' + matches2.group(2))
|
|
|
|
style = re.sub(r'(^|\s)\d+(\.\d+)+(\s|$)', r'\1\3', style) # Remove (free standing) version numbers
|
|
style_parts = FontnameTools.drop_empty(style.split(' '))
|
|
style = ' '.join(map(FontnameTools.front_upper, style_parts))
|
|
familyname = FontnameTools.camel_explode(familyname)
|
|
return (True, familyname, weight_token, style_token, other_token, style)
|