2021-12-02 23:29:54 +02:00
|
|
|
#!/usr/bin/env python
|
|
|
|
# coding=utf8
|
|
|
|
|
|
|
|
import re
|
|
|
|
import sys
|
|
|
|
|
|
|
|
class FontnameTools:
|
2023-05-25 08:09:43 +02:00
|
|
|
"""Deconstruct a fontname to get standardized name parts"""
|
2021-12-02 23:29:54 +02:00
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def front_upper(word):
|
|
|
|
"""Capitalize a string (but keep case of subsequent chars)"""
|
|
|
|
return word[:1].upper() + word[1:]
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def camel_casify(word):
|
|
|
|
"""Remove blanks and use CamelCase for the new word"""
|
|
|
|
return ''.join(map(FontnameTools.front_upper, word.split(' ')))
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def camel_explode(word):
|
|
|
|
"""Explode CamelCase -> Camel Case"""
|
|
|
|
# But do not explode "JetBrains" etc at string start...
|
|
|
|
excludes = [
|
|
|
|
'JetBrains',
|
|
|
|
'DejaVu',
|
|
|
|
'OpenDyslexicAlta',
|
|
|
|
'OpenDyslexicMono',
|
|
|
|
'OpenDyslexic',
|
|
|
|
'DaddyTimeMono',
|
|
|
|
'InconsolataGo',
|
|
|
|
'ProFontWindows',
|
|
|
|
'ProFont',
|
|
|
|
'ProggyClean',
|
|
|
|
]
|
|
|
|
m = re.match('(' + '|'.join(excludes) + ')(.*)', word)
|
|
|
|
(prefix, word) = m.group(1,2) if m != None else ('', word)
|
|
|
|
if len(word) == 0:
|
|
|
|
return prefix
|
|
|
|
parts = re.split('(?<=[a-z0-9])(?=[A-Z])', word)
|
|
|
|
if len(prefix):
|
|
|
|
parts.insert(0, prefix)
|
|
|
|
return ' '.join(parts)
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def drop_empty(l):
|
|
|
|
"""Remove empty strings from list of strings"""
|
|
|
|
return [x for x in l if len(x) > 0]
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def concat(*all_things):
|
|
|
|
"""Flatten list of (strings or lists of strings) to a blank-separated string"""
|
|
|
|
all = []
|
|
|
|
for thing in all_things:
|
2022-02-06 21:58:01 +02:00
|
|
|
if type(thing) is not list:
|
2021-12-02 23:29:54 +02:00
|
|
|
all.append(thing)
|
|
|
|
else:
|
|
|
|
all += thing
|
|
|
|
return ' '.join(FontnameTools.drop_empty(all))
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def unify_style_names(style_name):
|
|
|
|
"""Substitude some known token with standard wording"""
|
|
|
|
known_names = {
|
|
|
|
# Source of the table is the current sourcefonts
|
|
|
|
# Left side needs to be lower case
|
|
|
|
'book': '',
|
|
|
|
'ce': 'CE',
|
|
|
|
'normal': 'Regular',
|
|
|
|
}
|
2023-05-26 07:57:05 +02:00
|
|
|
return known_names.get(style_name.lower(), style_name)
|
2021-12-02 23:29:54 +02:00
|
|
|
|
|
|
|
@staticmethod
|
2023-04-07 23:57:49 +02:00
|
|
|
def find_in_dicts(key, dicts):
|
2023-04-19 16:44:41 +02:00
|
|
|
"""Find an entry in a list of dicts, return entry and in which list it was"""
|
|
|
|
for i, d in enumerate(dicts):
|
2023-04-07 23:57:49 +02:00
|
|
|
if key in d:
|
2023-04-19 16:44:41 +02:00
|
|
|
return ( d[key], i )
|
|
|
|
return (None, 0)
|
2023-04-07 23:57:49 +02:00
|
|
|
|
2023-04-20 12:09:50 +02:00
|
|
|
@staticmethod
|
|
|
|
def get_shorten_form_idx(aggressive, prefix, form_if_prefixed):
|
|
|
|
"""Get the tuple index of known_* data tables"""
|
|
|
|
if aggressive:
|
|
|
|
return 0
|
|
|
|
if len(prefix):
|
|
|
|
return form_if_prefixed
|
|
|
|
return 1
|
|
|
|
|
2021-12-02 23:29:54 +02:00
|
|
|
@staticmethod
|
2023-04-19 15:57:50 +02:00
|
|
|
def shorten_style_name(name, aggressive):
|
2021-12-02 23:29:54 +02:00
|
|
|
"""Substitude some known styles to short form"""
|
2023-04-19 15:57:50 +02:00
|
|
|
# If aggressive is False create the mild short form
|
|
|
|
# aggressive == True: Always use first form of everything
|
|
|
|
# aggressive == False:
|
|
|
|
# - has no modifier: use the second form
|
2023-04-19 16:44:41 +02:00
|
|
|
# - has modifier: use second form of mod plus first form of weights2
|
|
|
|
# - has modifier: use second form of mod plus second form of widths
|
2023-04-07 09:26:23 +02:00
|
|
|
name_rest = name
|
|
|
|
name_pre = ''
|
2023-04-20 12:09:50 +02:00
|
|
|
form = FontnameTools.get_shorten_form_idx(aggressive, '', 0)
|
2023-04-07 10:18:57 +02:00
|
|
|
for mod in FontnameTools.known_modifiers:
|
2023-04-07 23:57:49 +02:00
|
|
|
if name.startswith(mod) and len(name) > len(mod): # Second condition specifically for 'Demi'
|
2023-04-19 15:57:50 +02:00
|
|
|
name_pre = FontnameTools.known_modifiers[mod][form]
|
2023-04-07 23:57:49 +02:00
|
|
|
name_rest = name[len(mod):]
|
|
|
|
break
|
2023-04-19 16:44:41 +02:00
|
|
|
subst, i = FontnameTools.find_in_dicts(name_rest, [ FontnameTools.known_weights2, FontnameTools.known_widths ])
|
2023-04-20 12:09:50 +02:00
|
|
|
form = FontnameTools.get_shorten_form_idx(aggressive, name_pre, i)
|
2023-04-19 15:57:50 +02:00
|
|
|
if isinstance(subst, tuple):
|
2023-04-20 12:09:50 +02:00
|
|
|
return name_pre + subst[form]
|
2023-04-07 23:57:49 +02:00
|
|
|
if not len(name_pre):
|
|
|
|
# The following sets do not allow modifiers
|
2023-04-19 16:44:41 +02:00
|
|
|
subst, _ = FontnameTools.find_in_dicts(name_rest, [ FontnameTools.known_weights1, FontnameTools.known_slopes ])
|
2023-04-19 15:57:50 +02:00
|
|
|
if isinstance(subst, tuple):
|
|
|
|
return subst[form]
|
2021-12-02 23:29:54 +02:00
|
|
|
return name
|
|
|
|
|
|
|
|
@staticmethod
|
2023-04-19 15:57:50 +02:00
|
|
|
def short_styles(lists, aggressive):
|
2023-04-07 23:57:49 +02:00
|
|
|
"""Shorten all style names in a list or a list of lists"""
|
|
|
|
if not len(lists) or not isinstance(lists[0], list):
|
2023-04-19 15:57:50 +02:00
|
|
|
return list(map(lambda x: FontnameTools.shorten_style_name(x, aggressive), lists))
|
|
|
|
return [ list(map(lambda x: FontnameTools.shorten_style_name(x, aggressive), styles)) for styles in lists ]
|
2023-04-07 23:57:49 +02:00
|
|
|
|
2021-12-02 23:29:54 +02:00
|
|
|
@staticmethod
|
|
|
|
def make_oblique_style(weights, styles):
|
|
|
|
"""Move "Oblique" from weights to styles for font naming purposes"""
|
|
|
|
if 'Oblique' in weights:
|
|
|
|
weights = list(weights)
|
|
|
|
weights.remove('Oblique')
|
|
|
|
styles = list(styles)
|
|
|
|
styles.append('Oblique')
|
|
|
|
return (weights, styles)
|
|
|
|
|
|
|
|
@staticmethod
|
2023-05-26 12:09:44 +02:00
|
|
|
def get_name_token(name, tokens):
|
2021-12-02 23:29:54 +02:00
|
|
|
"""Try to find any case insensitive token from tokens in the name, return tuple with found token-list and rest"""
|
|
|
|
# The default mode (allow_regex_token = False) will try to find any verbatim string in the
|
|
|
|
# tokens list (case insensitive matching) and give that tokens list item back with
|
|
|
|
# unchanged case (i.e. [ 'Bold' ] will match "bold" and return it as [ 'Bold', ]
|
|
|
|
# In the regex mode (allow_regex_token = True) it will use the tokens elements as
|
|
|
|
# regexes and return the original (i.e. from name) case.
|
|
|
|
#
|
|
|
|
# Token are always used in a regex and may not capture, use non capturing
|
|
|
|
# grouping if needed (?: ... )
|
|
|
|
lower_tokens = [ t.lower() for t in tokens ]
|
|
|
|
not_matched = ""
|
|
|
|
all_tokens = []
|
|
|
|
j = 1
|
name-parser: Allow dashes between modifier and weight
[why]
Some fonts might have a non-standard (i.e. broken) weight naming scheme:
They put a blank or a dash between the modifier and the weight, for
example "Extra Bold" or "Demi-Condensed", when they mean "ExtraBold"
resp "DemiCondensed".
The former happens with CartographCF, the later with IBM3270.
[how]
Automatically allow a dash between modifier and weight, which comes up
as CamelCase boundary. Insert an optional dash (r'-?') into such
boundaries.
For the further lookup we need to remove the dash in the found keyword,
if there is any, to get back to standard naming.
This might break if the font name ends in a modifier. So we can not
really distinguish
Font Name Extra Bold Italic
=> Font Name - ExtraBold Italic
=> Font Name Extra - Bold Italic
The known modifiers are 'Demi', 'Ultra', 'Semi', 'Extra'.
It is possible but unlikely that a font name ends in one of these.
For example "Modern Ultra - Bold".
[note]
The question arises if we should not parse the PSname instead of the
Fullname; and stick to the dash there as boundary.
The problem might be prepatched fonts with broken naming, that would be
parsed completely wrong then. So maybe the current approach is still the
best, with the caveat given above (fontnames ending in a modifier).
[note 2]
Funny enough the variable allow_regex_token was not used at all :->
Some leftover? Anyhow we use it now.
[note 3]
We can still not remove the special handling for IBM3270, because the
font initially looks like a PSname and this is parsed as such, which
breaks the name in the incorrect place:
PSname template = "Name-StylesWeights"
Fullname of 3270 = "IBM 3270 Semi-Condensed"
Signed-off-by: Fini Jastrow <ulf.fini.jastrow@desy.de>
2023-05-26 08:33:06 +02:00
|
|
|
token_regex = '|'.join(tokens)
|
2023-05-26 12:09:44 +02:00
|
|
|
# Allow a dash between CamelCase token word parts, i.e. Camel-Case
|
|
|
|
# This allows for styles like Extra-Bold
|
|
|
|
token_regex = re.sub(r'(?<=[a-z])(?=[A-Z])', '-?', token_regex)
|
name-parser: Allow dashes between modifier and weight
[why]
Some fonts might have a non-standard (i.e. broken) weight naming scheme:
They put a blank or a dash between the modifier and the weight, for
example "Extra Bold" or "Demi-Condensed", when they mean "ExtraBold"
resp "DemiCondensed".
The former happens with CartographCF, the later with IBM3270.
[how]
Automatically allow a dash between modifier and weight, which comes up
as CamelCase boundary. Insert an optional dash (r'-?') into such
boundaries.
For the further lookup we need to remove the dash in the found keyword,
if there is any, to get back to standard naming.
This might break if the font name ends in a modifier. So we can not
really distinguish
Font Name Extra Bold Italic
=> Font Name - ExtraBold Italic
=> Font Name Extra - Bold Italic
The known modifiers are 'Demi', 'Ultra', 'Semi', 'Extra'.
It is possible but unlikely that a font name ends in one of these.
For example "Modern Ultra - Bold".
[note]
The question arises if we should not parse the PSname instead of the
Fullname; and stick to the dash there as boundary.
The problem might be prepatched fonts with broken naming, that would be
parsed completely wrong then. So maybe the current approach is still the
best, with the caveat given above (fontnames ending in a modifier).
[note 2]
Funny enough the variable allow_regex_token was not used at all :->
Some leftover? Anyhow we use it now.
[note 3]
We can still not remove the special handling for IBM3270, because the
font initially looks like a PSname and this is parsed as such, which
breaks the name in the incorrect place:
PSname template = "Name-StylesWeights"
Fullname of 3270 = "IBM 3270 Semi-Condensed"
Signed-off-by: Fini Jastrow <ulf.fini.jastrow@desy.de>
2023-05-26 08:33:06 +02:00
|
|
|
regex = re.compile('(.*?)(' + token_regex + ')(.*)', re.IGNORECASE)
|
2021-12-02 23:29:54 +02:00
|
|
|
while j:
|
|
|
|
j = regex.match(name)
|
|
|
|
if not j:
|
|
|
|
break
|
|
|
|
if len(j.groups()) != 3:
|
|
|
|
sys.exit('Malformed regex in FontnameTools.get_name_token()')
|
|
|
|
not_matched += ' ' + j.groups()[0] # Blanc prevents unwanted concatenation of unmatched substrings
|
|
|
|
tok = j.groups()[1].lower()
|
2023-05-26 12:09:44 +02:00
|
|
|
tok = tok.replace('-', '') # Remove dashes between CamelCase token words
|
2021-12-02 23:29:54 +02:00
|
|
|
if tok in lower_tokens:
|
|
|
|
tok = tokens[lower_tokens.index(tok)]
|
|
|
|
tok = FontnameTools.unify_style_names(tok)
|
|
|
|
if len(tok):
|
|
|
|
all_tokens.append(tok)
|
|
|
|
name = j.groups()[2] # Recurse rest
|
|
|
|
not_matched += ' ' + name
|
|
|
|
return ( not_matched.strip(), all_tokens )
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def postscript_char_filter(name):
|
|
|
|
"""Filter out characters that are not allowed in Postscript names"""
|
|
|
|
# The name string must be restricted to the printable ASCII subset, codes 33 to 126,
|
|
|
|
# except for the 10 characters '[', ']', '(', ')', '{', '}', '<', '>', '/', '%'
|
2023-09-27 05:35:43 +02:00
|
|
|
out = ''
|
2021-12-02 23:29:54 +02:00
|
|
|
for c in name:
|
|
|
|
if c in '[](){}<>/%' or ord(c) < 33 or ord(c) > 126:
|
|
|
|
continue
|
|
|
|
out += c
|
|
|
|
return out
|
|
|
|
|
|
|
|
SIL_TABLE = [
|
2023-07-26 07:36:19 +02:00
|
|
|
( '(a)nka/(c)oder', r'\1na\2onder' ),
|
2023-04-21 09:08:33 +02:00
|
|
|
( '(a)nonymous', r'\1nonymice' ),
|
2023-04-27 20:24:58 +02:00
|
|
|
( '(b)itstream( ?)(v)era( ?sans ?mono)?', r'\1itstrom\2Wera' ),
|
2023-07-26 07:36:19 +02:00
|
|
|
( '(c)ascadia( ?)(c)ode', r'\1askaydia\2\3ove' ),
|
|
|
|
( '(c)ascadia( ?)(m)ono', r'\1askaydia\2\3ono' ),
|
|
|
|
( 'Gohufont', r'GohuFont'), # Correct to CamelCase
|
2021-12-02 23:29:54 +02:00
|
|
|
( '(h)ermit', r'\1urmit' ),
|
|
|
|
( '(h)asklig', r'\1asklug' ),
|
2023-07-26 07:36:19 +02:00
|
|
|
( 'iA([- ]?)writer', r'iM\1Writing' ),
|
2021-12-02 23:29:54 +02:00
|
|
|
( 'IBM[- ]?plex', r'Blex' ), # We do not keep the case here
|
2023-07-26 07:39:29 +02:00
|
|
|
( '(i)ntel( ?)(o)ne', r'\1ntone' ),
|
2021-12-02 23:29:54 +02:00
|
|
|
( '(l)iberation', r'\1iteration' ),
|
2023-04-15 20:17:00 +02:00
|
|
|
( '(m)( ?)plus', r'\1+'), # Added this, because they use a plus symbol :->
|
2023-07-26 07:36:19 +02:00
|
|
|
( '(s)hare', r'\1hure' ),
|
|
|
|
( '(s)ource', r'\1auce' ),
|
|
|
|
( '(t)erminus', r'\1erminess' ),
|
2021-12-02 23:29:54 +02:00
|
|
|
# Noone cares that font names starting with a digit are forbidden:
|
2023-01-17 16:40:52 +02:00
|
|
|
( 'IBM 3270', r'3270'), # for historical reasons and 'IBM' is a TM or something
|
2023-04-15 20:17:00 +02:00
|
|
|
# Some name parts that are too long for us
|
2023-04-20 15:55:42 +02:00
|
|
|
( '(.*sans ?m)ono', r'\1'), # Various SomenameSansMono fonts
|
|
|
|
( '(.*code ?lat)in Expanded', r'\1X'), # for 'M PLUS Code Latin Expanded'
|
|
|
|
( '(.*code ?lat)in', r'\1'), # for 'M PLUS Code Latin'
|
2023-04-15 20:17:00 +02:00
|
|
|
( '(b)ig( ?)(b)lue( ?)(t)erminal', r'\1ig\3lue\5erm'), # Shorten BigBlueTerminal
|
2023-04-19 18:47:24 +02:00
|
|
|
( '(.*)437TT', r'\g<1>437'), # Shorten BigBlueTerminal 437 TT even further
|
2023-04-20 15:55:42 +02:00
|
|
|
( '(.*dyslexic ?alt)a', r'\1'), # Open Dyslexic Alta -> Open Dyslexic Alt
|
|
|
|
( '(.*dyslexic ?m)ono', r'\1'), # Open Dyslexic Mono -> Open Dyslexic M
|
|
|
|
( '(overpass ?m)ono', r'\1'), # Overpass Mono -> Overpass M
|
|
|
|
( '(proggyclean) ?tt', r'\1'), # Remove TT from ProggyClean
|
|
|
|
( '(terminess) ?\(ttf\)', r'\1'), # Remove TTF from Terminus (after renamed to Terminess)
|
2023-04-21 19:36:07 +02:00
|
|
|
( '(im ?writing ?q)uattro', r'\1uat'), # Rename iM Writing Quattro to Quat
|
|
|
|
( '(im ?writing ?(mono|duo|quat)) ?s', r'\1'), # Remove S from all iM Writing styles
|
2021-12-02 23:29:54 +02:00
|
|
|
]
|
|
|
|
|
2023-04-07 10:18:57 +02:00
|
|
|
# From https://adobe-type-tools.github.io/font-tech-notes/pdfs/5088.FontNames.pdf
|
2023-04-19 15:57:50 +02:00
|
|
|
# The first short variant is from the linked table.
|
|
|
|
# The second (longer) short variant is from diverse fonts like Noto.
|
|
|
|
# We can
|
|
|
|
# - use the long form
|
|
|
|
# - use the very short form (first)
|
|
|
|
# - use mild short form:
|
|
|
|
# - has no modifier: use the second form
|
2023-04-19 16:44:41 +02:00
|
|
|
# - has modifier: use second form of mod plus first form of weights2
|
|
|
|
# - has modifier: use second form of mod plus second form of widths
|
2023-04-20 12:09:50 +02:00
|
|
|
# This is encoded in get_shorten_form_idx()
|
2023-04-07 10:18:57 +02:00
|
|
|
known_weights1 = { # can not take modifiers
|
2023-04-19 15:57:50 +02:00
|
|
|
'Medium': ('Md', 'Med'),
|
|
|
|
'Nord': ('Nd', 'Nord'),
|
|
|
|
'Book': ('Bk', 'Book'),
|
2023-05-26 11:21:13 +02:00
|
|
|
'Text': ('Txt', 'Text'),
|
2023-04-19 15:57:50 +02:00
|
|
|
'Poster': ('Po', 'Poster'),
|
|
|
|
'Demi': ('Dm', 'Demi'), # Demi is sometimes used as a weight, sometimes as a modifier
|
|
|
|
'Regular': ('Rg', 'Reg'),
|
|
|
|
'Display': ('DS', 'Disp'),
|
|
|
|
'Super': ('Su', 'Sup'),
|
|
|
|
'Retina': ('Rt', 'Ret'),
|
2023-04-07 10:18:57 +02:00
|
|
|
}
|
|
|
|
known_weights2 = { # can take modifiers
|
2023-04-19 15:57:50 +02:00
|
|
|
'Black': ('Blk', 'Black'),
|
|
|
|
'Bold': ('Bd', 'Bold'),
|
|
|
|
'Heavy': ('Hv', 'Heavy'),
|
|
|
|
'Thin': ('Th', 'Thin'),
|
|
|
|
'Light': ('Lt', 'Light'),
|
2023-04-19 16:44:41 +02:00
|
|
|
' ': (), # Just for CodeClimate :-/
|
2023-04-07 10:18:57 +02:00
|
|
|
}
|
2023-09-26 14:40:56 +02:00
|
|
|
known_styles = [ # Keywords that end up as style (i.e. a RIBBI set)
|
|
|
|
'Bold', 'Italic', 'Regular', 'Normal'
|
|
|
|
]
|
2023-04-07 10:18:57 +02:00
|
|
|
known_widths = { # can take modifiers
|
2023-04-19 15:57:50 +02:00
|
|
|
'Compressed': ('Cm', 'Comp'),
|
|
|
|
'Extended': ('Ex', 'Extd'),
|
|
|
|
'Condensed': ('Cn', 'Cond'),
|
|
|
|
'Narrow': ('Nr', 'Narrow'),
|
|
|
|
'Compact': ('Ct', 'Compact'),
|
2023-04-07 10:18:57 +02:00
|
|
|
}
|
2023-04-19 16:44:41 +02:00
|
|
|
known_slopes = { # can not take modifiers
|
2023-04-19 15:57:50 +02:00
|
|
|
'Inclined': ('Ic', 'Incl'),
|
|
|
|
'Oblique': ('Obl', 'Obl'),
|
|
|
|
'Italic': ('It', 'Italic'),
|
|
|
|
'Upright': ('Up', 'Uprght'),
|
|
|
|
'Kursiv': ('Ks', 'Kurs'),
|
|
|
|
'Sloped': ('Sl', 'Slop'),
|
2023-04-07 10:18:57 +02:00
|
|
|
}
|
|
|
|
known_modifiers = {
|
2023-04-19 15:57:50 +02:00
|
|
|
'Demi': ('Dm', 'Dem'),
|
|
|
|
'Ultra': ('Ult', 'Ult'),
|
|
|
|
'Semi': ('Sm', 'Sem'),
|
|
|
|
'Extra': ('X', 'Ext'),
|
2023-04-07 10:18:57 +02:00
|
|
|
}
|
2023-09-26 14:40:56 +02:00
|
|
|
equivalent_weights = {
|
|
|
|
100: ('thin', 'hairline'),
|
|
|
|
200: ('extralight', 'ultralight'),
|
|
|
|
300: ('light', ),
|
|
|
|
350: ('semilight', ),
|
|
|
|
400: ('regular', 'normal', 'book', 'text', 'nord', 'retina'),
|
|
|
|
500: ('medium', ),
|
|
|
|
600: ('semibold', 'demibold', 'demi'),
|
|
|
|
700: ('bold', ),
|
|
|
|
800: ('extrabold', 'ultrabold'),
|
|
|
|
900: ('black', 'heavy', 'poster', 'extrablack', 'ultrablack'),
|
|
|
|
}
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def weight_string_to_number(w):
|
|
|
|
""" Convert a common string approximation to a PS/2 weight value """
|
2023-09-30 16:01:38 +02:00
|
|
|
if not isinstance(w, str) or len(w) < 1:
|
2023-09-26 14:40:56 +02:00
|
|
|
return 400
|
|
|
|
for num, strs in FontnameTools.equivalent_weights.items():
|
|
|
|
if w.lower() in strs:
|
|
|
|
return num
|
|
|
|
return None
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def weight_to_string(w):
|
|
|
|
""" Convert a PS/2 weight value to the common string approximation """
|
|
|
|
if w < 150:
|
|
|
|
str = 'Thin'
|
|
|
|
elif w < 250:
|
|
|
|
str = 'Extra-Light'
|
|
|
|
elif w < 350:
|
|
|
|
str = 'Light'
|
|
|
|
elif w < 450:
|
|
|
|
str = 'Regular'
|
|
|
|
elif w < 550:
|
|
|
|
str = 'Medium'
|
|
|
|
elif w < 650:
|
|
|
|
str = 'Semi-Bold'
|
|
|
|
elif w < 750:
|
|
|
|
str = 'Bold'
|
|
|
|
elif w < 850:
|
|
|
|
str = 'Extra-Bold'
|
|
|
|
else:
|
|
|
|
str = 'Black'
|
|
|
|
return str
|
2023-04-07 10:18:57 +02:00
|
|
|
|
2021-12-02 23:29:54 +02:00
|
|
|
@staticmethod
|
|
|
|
def is_keep_regular(basename):
|
|
|
|
"""This has been decided by the font designers, we need to mimic that (for comparison purposes)"""
|
|
|
|
KEEP_REGULAR = [
|
|
|
|
'Agave',
|
|
|
|
'Arimo',
|
|
|
|
'Aurulent',
|
|
|
|
'Cascadia',
|
|
|
|
'Cousine',
|
|
|
|
'Fantasque',
|
|
|
|
'Fira',
|
|
|
|
|
|
|
|
'Overpass',
|
|
|
|
'Lilex',
|
|
|
|
'Inconsolata$', # not InconsolataGo
|
|
|
|
'IAWriter',
|
|
|
|
'Meslo',
|
|
|
|
'Monoid',
|
|
|
|
'Mononoki',
|
|
|
|
'Hack',
|
|
|
|
'JetBrains Mono',
|
|
|
|
'Noto Sans',
|
|
|
|
'Noto Serif',
|
|
|
|
'Victor',
|
|
|
|
]
|
|
|
|
for kr in KEEP_REGULAR:
|
|
|
|
if (basename.rstrip() + '$').startswith(kr): return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def _parse_simple_font_name(name):
|
2023-05-25 08:09:43 +02:00
|
|
|
"""Parse a fontname that does not follow the 'FontFamilyName-FontStyle' pattern"""
|
|
|
|
# This is the usual case, because the font-patcher usually uses the fullname and
|
|
|
|
# not the PS name
|
2021-12-02 23:29:54 +02:00
|
|
|
if ' ' in name:
|
|
|
|
return FontnameTools.parse_font_name(name.replace(' ', '-'))
|
|
|
|
# Do we have a number-name boundary?
|
|
|
|
p = re.split('(?<=[0-9])(?=[a-zA-Z])', name)
|
|
|
|
if len(p) > 1:
|
|
|
|
return FontnameTools.parse_font_name('-'.join(p))
|
|
|
|
# Or do we have CamelCase?
|
|
|
|
n = FontnameTools.camel_explode(name)
|
|
|
|
if n != name:
|
|
|
|
return FontnameTools.parse_font_name(n.replace(' ', '-'))
|
|
|
|
return (False, FontnameTools.camel_casify(name), [], [], [], '')
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def parse_font_name(name):
|
2023-05-25 08:09:43 +02:00
|
|
|
"""Expects a fontname following the 'FontFamilyName-FontStyle' pattern and returns ... parts"""
|
|
|
|
# This could parse filenames in the beginning but that was never used in production; code removed with this commit
|
2023-05-26 12:10:19 +02:00
|
|
|
for special in [
|
|
|
|
('ExtLt', 'ExtraLight'), # IBM-Plex
|
|
|
|
('Medm', 'Medium'), # IBM-Plex
|
|
|
|
('Semi-Condensed', 'SemiCondensed'), # 3270
|
|
|
|
('SmBld', 'SemiBold'), # IBM-Plex
|
|
|
|
]:
|
|
|
|
name = re.sub(r'\b' + special[0] + r'\b', special[1], name, 1, re.IGNORECASE)
|
2021-12-02 23:29:54 +02:00
|
|
|
name = re.sub('[_\s]+', ' ', name)
|
|
|
|
matches = re.match(r'([^-]+)(?:-(.*))?', name)
|
|
|
|
familyname = FontnameTools.camel_casify(matches.group(1))
|
|
|
|
style = matches.group(2)
|
|
|
|
|
|
|
|
if not style:
|
|
|
|
return FontnameTools._parse_simple_font_name(name)
|
|
|
|
|
|
|
|
# These are the FontStyle keywords we know, in three categories
|
|
|
|
# Weights end up as Typographic Family parts ('after the dash')
|
|
|
|
# Styles end up as Family parts (for classic grouping of four)
|
|
|
|
# Others also end up in Typographic Family ('before the dash')
|
2023-04-07 10:18:57 +02:00
|
|
|
weights = [ m + s
|
|
|
|
for s in list(FontnameTools.known_weights2) + list(FontnameTools.known_widths)
|
|
|
|
for m in list(FontnameTools.known_modifiers) + [''] if m != s
|
2023-04-24 14:28:45 +02:00
|
|
|
] + list(FontnameTools.known_weights1) + list(FontnameTools.known_slopes)
|
2023-09-26 14:40:56 +02:00
|
|
|
weights = [ w for w in weights if w not in FontnameTools.known_styles ]
|
2021-12-02 23:29:54 +02:00
|
|
|
# Some font specialities:
|
|
|
|
other = [
|
|
|
|
'-', 'Book', 'For', 'Powerline',
|
|
|
|
'IIx', # Profont IIx
|
|
|
|
'LGC', # Inconsolata LGC
|
|
|
|
r'\bCE\b', # ProggycleanTT CE
|
|
|
|
r'[12][cmp]n?', # MPlus
|
|
|
|
r'(?:uni-)?1[14]', # GohuFont uni
|
|
|
|
]
|
|
|
|
|
|
|
|
( style, weight_token ) = FontnameTools.get_name_token(style, weights)
|
2023-09-26 14:40:56 +02:00
|
|
|
( style, style_token ) = FontnameTools.get_name_token(style, FontnameTools.known_styles)
|
2023-05-26 12:09:44 +02:00
|
|
|
( style, other_token ) = FontnameTools.get_name_token(style, other)
|
2021-12-02 23:29:54 +02:00
|
|
|
while 'Regular' in style_token and len(style_token) > 1:
|
|
|
|
# Correct situation where "Regular" and something else is given
|
|
|
|
style_token.remove('Regular')
|
|
|
|
|
|
|
|
# Recurse to see if unmatched stuff between dashes can belong to familyname
|
|
|
|
matches2 = re.match(r'(\w+)-(.*)', style)
|
|
|
|
if matches2:
|
|
|
|
return FontnameTools.parse_font_name(familyname + matches2.group(1) + '-' + matches2.group(2))
|
|
|
|
|
|
|
|
style = re.sub(r'(^|\s)\d+(\.\d+)+(\s|$)', r'\1\3', style) # Remove (free standing) version numbers
|
|
|
|
style_parts = FontnameTools.drop_empty(style.split(' '))
|
|
|
|
style = ' '.join(map(FontnameTools.front_upper, style_parts))
|
|
|
|
familyname = FontnameTools.camel_explode(familyname)
|
|
|
|
return (True, familyname, weight_token, style_token, other_token, style)
|