nerd-fonts/bin/scripts/name_parser/FontnameTools.py

#!/usr/bin/env python
# coding=utf8

import re
import sys

class FontnameTools:
    """Deconstruct a font filename to get standardized name parts"""

    @staticmethod
    def front_upper(word):
        """Capitalize a string (but keep case of subsequent chars)"""
        return word[:1].upper() + word[1:]

    @staticmethod
    def camel_casify(word):
        """Remove blanks and use CamelCase for the new word"""
        return ''.join(map(FontnameTools.front_upper, word.split(' ')))

    @staticmethod
    def camel_explode(word):
        """Explode CamelCase -> Camel Case"""
        # But do not explode "JetBrains" etc at string start...
        excludes = [
                'JetBrains',
                'DejaVu',
                'OpenDyslexicAlta',
                'OpenDyslexicMono',
                'OpenDyslexic',
                'DaddyTimeMono',
                'InconsolataGo',
                'ProFontWindows',
                'ProFont',
                'ProggyClean',
                ]
        m = re.match('(' + '|'.join(excludes) + ')(.*)', word)
        (prefix, word) = m.group(1,2) if m != None else ('', word)
        if len(word) == 0:
            return prefix
        parts = re.split('(?<=[a-z0-9])(?=[A-Z])', word)
        if len(prefix):
            parts.insert(0, prefix)
        return ' '.join(parts)

    @staticmethod
    def drop_empty(l):
        """Remove empty strings from list of strings"""
        return [x for x in l if len(x) > 0]

    @staticmethod
    def concat(*all_things):
        """Flatten list of (strings or lists of strings) to a blank-separated string"""
        all = []
        for thing in all_things:
            if type(thing) is not list:
                all.append(thing)
            else:
                all += thing
        return ' '.join(FontnameTools.drop_empty(all))

    @staticmethod
    def unify_style_names(style_name):
        """Substitude some known token with standard wording"""
        known_names = {
            # Source of the table is the current sourcefonts
            # Left side needs to be lower case
            '-':            '',
            'book':         '',
            'text':         '',
            'ce':           'CE',
            '(ttf)':        '(TTF)',
            #'semibold':     'Demi',
            'ob':           'Oblique',
            'it':           'Italic',
            'i':            'Italic',
            'b':            'Bold',
            'normal':       'Regular',
            'c':            'Condensed',
            'r':            'Regular',
            'm':            'Medium',
            'l':            'Light',
        }
        if style_name in known_names:
            return known_names[style_name.lower()]
        return style_name

    @staticmethod
    def shorten_style_name(name):
        """Substitude some known styles to short form"""
        # From https://adobe-type-tools.github.io/font-tech-notes/pdfs/5088.FontNames.pdf
        known_names = {
            # Weights
            'Black': 'Blk',
            'Medium': 'Md',
            'Bold': 'Bd',
            'Nord': 'Nd',
            'Book': 'Bk',
            'Poster': 'Po',
            'Demi': 'Dm', # Demi is sometimes used as a weight, sometimes as a modifier
            'Regular': 'Rg',
            'Display': 'DS',
            'Super': 'Su',
            'Heavy': 'Hv',
            'Thin': 'Th',
            'Light': 'Lt',
            # Widths
            'Compressed': 'Cm',
            'Extended': 'Ex',
            'Condensed': 'Cn',
            'Narrow': 'Nr',
            'Compact': 'Ct',
            # Slope
            'Inclined': 'Ic',
            'Oblique': 'Obl',
            'Italic': 'It',
            'Upright': 'Up',
            'Kursiv': 'Ks',
            'Sloped': 'Sl',
        }
        modifiers = {
            'Demi': 'Dm',
            'Ultra': 'Ult',
            'Semi': 'Sm',
            'Extra': 'X',
        }
        name_rest = name
        name_pre = ''
        for mod in modifiers:
            if not name.startswith(mod) or len(name) <= len(mod):
                continue
            name_pre = modifiers[mod]
            name_rest = name[len(mod):]
            break
        if name_rest in known_names:
            return name_pre + known_names[name_rest]
        return name

    @staticmethod
    def short_styles(styles):
        """Shorten all style names in a list"""
        return list(map(FontnameTools.shorten_style_name, styles))
    @staticmethod
    def make_oblique_style(weights, styles):
        """Move "Oblique" from weights to styles for font naming purposes"""
        if 'Oblique' in weights:
            weights = list(weights)
            weights.remove('Oblique')
            styles = list(styles)
            styles.append('Oblique')
        return (weights, styles)

    @staticmethod
    def get_name_token(name, tokens, allow_regex_token = False):
        """Try to find any case insensitive token from tokens in the name, return tuple with found token-list and rest"""
        # The default mode (allow_regex_token = False) will try to find any verbatim string in the
        # tokens list (case insensitive matching) and give that tokens list item back with
        # unchanged case (i.e. [ 'Bold' ] will match "bold" and return it as [ 'Bold', ]
        # In the regex mode (allow_regex_token = True) it will use the tokens elements as
        # regexes and return the original (i.e. from name) case.
        #
        # Token are always used in a regex and may not capture, use non capturing
        # grouping if needed (?: ... )
        lower_tokens = [ t.lower() for t in tokens ]
        not_matched = ""
        all_tokens = []
        j = 1
        regex = re.compile('(.*?)(' + '|'.join(tokens) + ')(.*)', re.IGNORECASE)
        while j:
            j = regex.match(name)
            if not j:
                break
            if len(j.groups()) != 3:
                sys.exit('Malformed regex in FontnameTools.get_name_token()')
            not_matched += ' ' + j.groups()[0] # Blanc prevents unwanted concatenation of unmatched substrings
            tok = j.groups()[1].lower()
            if tok in lower_tokens:
                tok = tokens[lower_tokens.index(tok)]
            tok = FontnameTools.unify_style_names(tok)
            if len(tok):
                all_tokens.append(tok)
            name = j.groups()[2] # Recurse rest
        not_matched += ' ' + name
        return ( not_matched.strip(), all_tokens )

    @staticmethod
    def postscript_char_filter(name):
        """Filter out characters that are not allowed in Postscript names"""
        # The name string must be restricted to the printable ASCII subset, codes 33 to 126,
        # except for the 10 characters '[', ']', '(', ')', '{', '}', '<', '>', '/', '%'
        out = ""
        for c in name:
            if c in '[](){}<>/%' or ord(c) < 33 or ord(c) > 126:
                continue
            out += c
        return out

    SIL_TABLE = [
        ( '(s)ource',                   r'\1auce' ),
        ( '(h)ermit',                   r'\1urmit' ),
        ( '(h)asklig',                  r'\1asklug' ),
        ( '(s)hare',                    r'\1hure' ),
        ( 'IBM[- ]?plex',               r'Blex' ), # We do not keep the case here
        ( '(t)erminus',                 r'\1erminess' ),
        ( '(l)iberation',               r'\1iteration' ),
        ( 'iA([- ]?)writer',            r'iM\1Writing' ),
        ( '(a)nka/(c)oder',             r'\1na\2onder' ),
        ( '(c)ascadia( ?)(c)ode',       r'\1askaydia\2\3ove' ),
        ( '(c)ascadia( ?)(m)ono',       r'\1askaydia\2\3ono' ),
        ( '(m)plus',                    r'\1+'), # Added this, because they use a plus symbol :->
        ( 'Gohufont',                   r'GohuFont'), # Correct to CamelCase
        # Noone cares that font names starting with a digit are forbidden:
        ( 'IBM 3270',                   r'3270'), # for historical reasons and 'IBM' is a TM or something
    ]

    @staticmethod
    def is_keep_regular(basename):
        """This has been decided by the font designers, we need to mimic that (for comparison purposes)"""
        KEEP_REGULAR = [
            'Agave',
            'Arimo',
            'Aurulent',
            'Cascadia',
            'Cousine',
            'Fantasque',
            'Fira',

            'Overpass',
            'Lilex',
            'Inconsolata$', # not InconsolataGo
            'IAWriter',
            'Meslo',
            'Monoid',
            'Mononoki',
            'Hack',
            'JetBrains Mono',
            'Noto Sans',
            'Noto Serif',
            'Victor',
        ]
        for kr in KEEP_REGULAR:
            if (basename.rstrip() + '$').startswith(kr): return True
        return False

    @staticmethod
    def _parse_simple_font_name(name):
        """Parse a filename that does not follow the 'FontFamilyName-FontStyle' pattern"""
        # No dash in name, maybe we have blanc separated filename?
        if ' ' in name:
            return FontnameTools.parse_font_name(name.replace(' ', '-'))
        # Do we have a number-name boundary?
        p = re.split('(?<=[0-9])(?=[a-zA-Z])', name)
        if len(p) > 1:
            return FontnameTools.parse_font_name('-'.join(p))
        # Or do we have CamelCase?
        n = FontnameTools.camel_explode(name)
        if n != name:
            return FontnameTools.parse_font_name(n.replace(' ', '-'))
        return (False, FontnameTools.camel_casify(name), [], [], [], '')

    @staticmethod
    def parse_font_name(name):
        """Expects a filename following the 'FontFamilyName-FontStyle' pattern and returns ... parts"""
        name = re.sub(r'\bsemi-condensed\b', 'SemiCondensed', name, 1, re.IGNORECASE) # Just for "3270 Semi-Condensed" :-/
        name = re.sub('[_\s]+', ' ', name)
        matches = re.match(r'([^-]+)(?:-(.*))?', name)
        familyname = FontnameTools.camel_casify(matches.group(1))
        style = matches.group(2)

        if not style:
            return FontnameTools._parse_simple_font_name(name)

        # These are the FontStyle keywords we know, in three categories
        # Weights end up as Typographic Family parts ('after the dash')
        # Styles end up as Family parts (for classic grouping of four)
        # Others also end up in Typographic Family ('before the dash')
        weights = [ 'Thin', 'Light', 'ExtraLight', 'SemiBold', 'Demi',
                    'SemiLight', 'Medium', 'Black', 'ExtraBold', 'Heavy',
                    'Oblique', 'Condensed', 'SemiCondensed', 'ExtraCondensed',
                    'Narrow', 'SemiNarrow', 'Retina', 'Extended']
        styles = [ 'Bold', 'Italic', 'Regular', 'Normal', ]
        weights = [ w for w in weights if w not in styles ]
        # Some font specialities:
        other = [
            '-', 'Book', 'For', 'Powerline',
            'Text',             # Plex
            'IIx',              # Profont IIx
            'LGC',              # Inconsolata LGC
            r'\(TTF\)',         # Terminus (TTF)
            r'\bCE\b',          # ProggycleanTT CE
            r'[12][cmp]n?',     # MPlus
            r'(?:uni-)?1[14]',  # GohuFont uni
        ]

        # Sometimes used abbreviations
        weight_abbrevs = [ 'ob', 'c', 'm', 'l', ]
        style_abbrevs = [ 'it', 'r', 'b', 'i', ]

        ( style, weight_token ) = FontnameTools.get_name_token(style, weights)
        ( style, style_token ) = FontnameTools.get_name_token(style, styles)
        ( style, other_token ) = FontnameTools.get_name_token(style, other, True)
        if len(style) < 4:
            ( style, weight_token_abbrevs ) = FontnameTools.get_name_token(style, weight_abbrevs)
            ( style, style_token_abbrevs ) = FontnameTools.get_name_token(style, style_abbrevs)
            weight_token += weight_token_abbrevs
            style_token += style_token_abbrevs
        while 'Regular' in style_token and len(style_token) > 1:
            # Correct situation where "Regular" and something else is given
            style_token.remove('Regular')

        # Recurse to see if unmatched stuff between dashes can belong to familyname
        matches2 = re.match(r'(\w+)-(.*)', style)
        if matches2:
            return FontnameTools.parse_font_name(familyname + matches2.group(1) + '-' + matches2.group(2))

        style = re.sub(r'(^|\s)\d+(\.\d+)+(\s|$)', r'\1\3', style) # Remove (free standing) version numbers
        style_parts = FontnameTools.drop_empty(style.split(' '))
        style = ' '.join(map(FontnameTools.front_upper, style_parts))
        familyname = FontnameTools.camel_explode(familyname)
        return (True, familyname, weight_token, style_token, other_token, style)
Draft: Introduce a file name parser DO NOT MERGE [why] A lot of the fonts have incorrect naming after patching. A completely different approach can help to come up with a consistent naming scheme. [how] See bin/scripts/name-parser/README.md Signed-off-by: Fini Jastrow <ulf.fini.jastrow@desy.de> 2021-12-02 23:29:54 +02:00			`#!/usr/bin/env python`
			`# coding=utf8`

			`import re`
			`import sys`

			`class FontnameTools:`
			`"""Deconstruct a font filename to get standardized name parts"""`

			`@staticmethod`
			`def front_upper(word):`
			`"""Capitalize a string (but keep case of subsequent chars)"""`
			`return word[:1].upper() + word[1:]`

			`@staticmethod`
			`def camel_casify(word):`
			`"""Remove blanks and use CamelCase for the new word"""`
			`return ''.join(map(FontnameTools.front_upper, word.split(' ')))`

			`@staticmethod`
			`def camel_explode(word):`
			`"""Explode CamelCase -> Camel Case"""`
			`# But do not explode "JetBrains" etc at string start...`
			`excludes = [`
			`'JetBrains',`
			`'DejaVu',`
			`'OpenDyslexicAlta',`
			`'OpenDyslexicMono',`
			`'OpenDyslexic',`
			`'DaddyTimeMono',`
			`'InconsolataGo',`
			`'ProFontWindows',`
			`'ProFont',`
			`'ProggyClean',`
			`]`
			`m = re.match('(' + '\|'.join(excludes) + ')(.*)', word)`
			`(prefix, word) = m.group(1,2) if m != None else ('', word)`
			`if len(word) == 0:`
			`return prefix`
			`parts = re.split('(?<=[a-z0-9])(?=[A-Z])', word)`
			`if len(prefix):`
			`parts.insert(0, prefix)`
			`return ' '.join(parts)`

			`@staticmethod`
			`def drop_empty(l):`
			`"""Remove empty strings from list of strings"""`
			`return [x for x in l if len(x) > 0]`

			`@staticmethod`
			`def concat(*all_things):`
			`"""Flatten list of (strings or lists of strings) to a blank-separated string"""`
			`all = []`
			`for thing in all_things:`
name-parser: Fix Python2 compatibility [why] The naming has bizarre blanks strewn in sometimes, or is all caps. For example `C a s k a y d i a C o v e` or `CASKAYDIACOVE-Regular` [how] When run under Python2 all strings are unicode strings because `unicode_literals` is imported by `font-patcher`. Unfortunately the code checks for type str; but that will all become type unicode with the import. One check is suboptimal anyhow and can be dropped, while the other is turned around. Signed-off-by: Fini Jastrow <ulf.fini.jastrow@desy.de> 2022-02-06 21:58:01 +02:00			`if type(thing) is not list:`
Draft: Introduce a file name parser DO NOT MERGE [why] A lot of the fonts have incorrect naming after patching. A completely different approach can help to come up with a consistent naming scheme. [how] See bin/scripts/name-parser/README.md Signed-off-by: Fini Jastrow <ulf.fini.jastrow@desy.de> 2021-12-02 23:29:54 +02:00			`all.append(thing)`
			`else:`
			`all += thing`
			`return ' '.join(FontnameTools.drop_empty(all))`

			`@staticmethod`
			`def unify_style_names(style_name):`
			`"""Substitude some known token with standard wording"""`
			`known_names = {`
			`# Source of the table is the current sourcefonts`
			`# Left side needs to be lower case`
			`'-': '',`
			`'book': '',`
			`'text': '',`
			`'ce': 'CE',`
			`'(ttf)': '(TTF)',`
			`#'semibold': 'Demi',`
			`'ob': 'Oblique',`
			`'it': 'Italic',`
			`'i': 'Italic',`
			`'b': 'Bold',`
			`'normal': 'Regular',`
			`'c': 'Condensed',`
			`'r': 'Regular',`
			`'m': 'Medium',`
			`'l': 'Light',`
			`}`
			`if style_name in known_names:`
			`return known_names[style_name.lower()]`
			`return style_name`

			`@staticmethod`
			`def shorten_style_name(name):`
			`"""Substitude some known styles to short form"""`
name-parser: Generalize and shorten style-shortening [why] We really struggle to keep the font names below the length limits. To achieve this some styles are abbreviated. The abbreviations have been taken from Noto and were initially used to mimic Noto's naming scheme. But a bit shorter names would help in some instances to produce short enough name entries. Also some styles that are used by fonts other than Noto are not abbreviated at all. [how] In document [1] Adobe gives examples of very short style abbreviations. We just implement all these. Example: 'ExtraCondensed' now becomes 'XCn' instead of the more readable but longer 'ExtCond' that Noto uses. [1] https://adobe-type-tools.github.io/font-tech-notes/pdfs/5088.FontNames.pdf Signed-off-by: Fini Jastrow <ulf.fini.jastrow@desy.de> 2023-04-07 09:26:23 +02:00			`# From https://adobe-type-tools.github.io/font-tech-notes/pdfs/5088.FontNames.pdf`
Draft: Introduce a file name parser DO NOT MERGE [why] A lot of the fonts have incorrect naming after patching. A completely different approach can help to come up with a consistent naming scheme. [how] See bin/scripts/name-parser/README.md Signed-off-by: Fini Jastrow <ulf.fini.jastrow@desy.de> 2021-12-02 23:29:54 +02:00			`known_names = {`
name-parser: Generalize and shorten style-shortening [why] We really struggle to keep the font names below the length limits. To achieve this some styles are abbreviated. The abbreviations have been taken from Noto and were initially used to mimic Noto's naming scheme. But a bit shorter names would help in some instances to produce short enough name entries. Also some styles that are used by fonts other than Noto are not abbreviated at all. [how] In document [1] Adobe gives examples of very short style abbreviations. We just implement all these. Example: 'ExtraCondensed' now becomes 'XCn' instead of the more readable but longer 'ExtCond' that Noto uses. [1] https://adobe-type-tools.github.io/font-tech-notes/pdfs/5088.FontNames.pdf Signed-off-by: Fini Jastrow <ulf.fini.jastrow@desy.de> 2023-04-07 09:26:23 +02:00			`# Weights`
			`'Black': 'Blk',`
			`'Medium': 'Md',`
			`'Bold': 'Bd',`
			`'Nord': 'Nd',`
			`'Book': 'Bk',`
			`'Poster': 'Po',`
			`'Demi': 'Dm', # Demi is sometimes used as a weight, sometimes as a modifier`
			`'Regular': 'Rg',`
			`'Display': 'DS',`
			`'Super': 'Su',`
			`'Heavy': 'Hv',`
			`'Thin': 'Th',`
			`'Light': 'Lt',`
			`# Widths`
			`'Compressed': 'Cm',`
			`'Extended': 'Ex',`
			`'Condensed': 'Cn',`
			`'Narrow': 'Nr',`
			`'Compact': 'Ct',`
			`# Slope`
			`'Inclined': 'Ic',`
			`'Oblique': 'Obl',`
			`'Italic': 'It',`
			`'Upright': 'Up',`
			`'Kursiv': 'Ks',`
			`'Sloped': 'Sl',`
Draft: Introduce a file name parser DO NOT MERGE [why] A lot of the fonts have incorrect naming after patching. A completely different approach can help to come up with a consistent naming scheme. [how] See bin/scripts/name-parser/README.md Signed-off-by: Fini Jastrow <ulf.fini.jastrow@desy.de> 2021-12-02 23:29:54 +02:00			`}`
name-parser: Generalize and shorten style-shortening [why] We really struggle to keep the font names below the length limits. To achieve this some styles are abbreviated. The abbreviations have been taken from Noto and were initially used to mimic Noto's naming scheme. But a bit shorter names would help in some instances to produce short enough name entries. Also some styles that are used by fonts other than Noto are not abbreviated at all. [how] In document [1] Adobe gives examples of very short style abbreviations. We just implement all these. Example: 'ExtraCondensed' now becomes 'XCn' instead of the more readable but longer 'ExtCond' that Noto uses. [1] https://adobe-type-tools.github.io/font-tech-notes/pdfs/5088.FontNames.pdf Signed-off-by: Fini Jastrow <ulf.fini.jastrow@desy.de> 2023-04-07 09:26:23 +02:00			`modifiers = {`
			`'Demi': 'Dm',`
			`'Ultra': 'Ult',`
			`'Semi': 'Sm',`
			`'Extra': 'X',`
			`}`
			`name_rest = name`
			`name_pre = ''`
			`for mod in modifiers:`
			`if not name.startswith(mod) or len(name) <= len(mod):`
			`continue`
			`name_pre = modifiers[mod]`
			`name_rest = name[len(mod):]`
			`break`
			`if name_rest in known_names:`
			`return name_pre + known_names[name_rest]`
Draft: Introduce a file name parser DO NOT MERGE [why] A lot of the fonts have incorrect naming after patching. A completely different approach can help to come up with a consistent naming scheme. [how] See bin/scripts/name-parser/README.md Signed-off-by: Fini Jastrow <ulf.fini.jastrow@desy.de> 2021-12-02 23:29:54 +02:00			`return name`

			`@staticmethod`
			`def short_styles(styles):`
			`"""Shorten all style names in a list"""`
			`return list(map(FontnameTools.shorten_style_name, styles))`
			`@staticmethod`
			`def make_oblique_style(weights, styles):`
			`"""Move "Oblique" from weights to styles for font naming purposes"""`
			`if 'Oblique' in weights:`
			`weights = list(weights)`
			`weights.remove('Oblique')`
			`styles = list(styles)`
			`styles.append('Oblique')`
			`return (weights, styles)`

			`@staticmethod`
			`def get_name_token(name, tokens, allow_regex_token = False):`
			`"""Try to find any case insensitive token from tokens in the name, return tuple with found token-list and rest"""`
			`# The default mode (allow_regex_token = False) will try to find any verbatim string in the`
			`# tokens list (case insensitive matching) and give that tokens list item back with`
			`# unchanged case (i.e. [ 'Bold' ] will match "bold" and return it as [ 'Bold', ]`
			`# In the regex mode (allow_regex_token = True) it will use the tokens elements as`
			`# regexes and return the original (i.e. from name) case.`
			`#`
			`# Token are always used in a regex and may not capture, use non capturing`
			`# grouping if needed (?: ... )`
			`lower_tokens = [ t.lower() for t in tokens ]`
			`not_matched = ""`
			`all_tokens = []`
			`j = 1`
			`regex = re.compile('(.?)(' + '\|'.join(tokens) + ')(.)', re.IGNORECASE)`
			`while j:`
			`j = regex.match(name)`
			`if not j:`
			`break`
			`if len(j.groups()) != 3:`
			`sys.exit('Malformed regex in FontnameTools.get_name_token()')`
			`not_matched += ' ' + j.groups()[0] # Blanc prevents unwanted concatenation of unmatched substrings`
			`tok = j.groups()[1].lower()`
			`if tok in lower_tokens:`
			`tok = tokens[lower_tokens.index(tok)]`
			`tok = FontnameTools.unify_style_names(tok)`
			`if len(tok):`
			`all_tokens.append(tok)`
			`name = j.groups()[2] # Recurse rest`
			`not_matched += ' ' + name`
			`return ( not_matched.strip(), all_tokens )`

			`@staticmethod`
			`def postscript_char_filter(name):`
			`"""Filter out characters that are not allowed in Postscript names"""`
			`# The name string must be restricted to the printable ASCII subset, codes 33 to 126,`
			`# except for the 10 characters '[', ']', '(', ')', '{', '}', '<', '>', '/', '%'`
			`out = ""`
			`for c in name:`
			`if c in '[](){}<>/%' or ord(c) < 33 or ord(c) > 126:`
			`continue`
			`out += c`
			`return out`

			`SIL_TABLE = [`
			`( '(s)ource', r'\1auce' ),`
			`( '(h)ermit', r'\1urmit' ),`
			`( '(h)asklig', r'\1asklug' ),`
			`( '(s)hare', r'\1hure' ),`
			`( 'IBM[- ]?plex', r'Blex' ), # We do not keep the case here`
			`( '(t)erminus', r'\1erminess' ),`
			`( '(l)iberation', r'\1iteration' ),`
			`( 'iA([- ]?)writer', r'iM\1Writing' ),`
			`( '(a)nka/(c)oder', r'\1na\2onder' ),`
			`( '(c)ascadia( ?)(c)ode', r'\1askaydia\2\3ove' ),`
			`( '(c)ascadia( ?)(m)ono', r'\1askaydia\2\3ono' ),`
			`( '(m)plus', r'\1+'), # Added this, because they use a plus symbol :->`
			`( 'Gohufont', r'GohuFont'), # Correct to CamelCase`
			`# Noone cares that font names starting with a digit are forbidden:`
Drop 'IBM' from 3270's names [why] The font was always called 3270. Having a big company's name in the fontname is scary :grimacing: See https://github.com/rbanffy/3270font/issues/60 [how] Add renaming rule. Fixes: #1012 Signed-off-by: Fini Jastrow <ulf.fini.jastrow@desy.de> 2023-01-17 16:40:52 +02:00			`( 'IBM 3270', r'3270'), # for historical reasons and 'IBM' is a TM or something`
Draft: Introduce a file name parser DO NOT MERGE [why] A lot of the fonts have incorrect naming after patching. A completely different approach can help to come up with a consistent naming scheme. [how] See bin/scripts/name-parser/README.md Signed-off-by: Fini Jastrow <ulf.fini.jastrow@desy.de> 2021-12-02 23:29:54 +02:00			`]`

			`@staticmethod`
			`def is_keep_regular(basename):`
			`"""This has been decided by the font designers, we need to mimic that (for comparison purposes)"""`
			`KEEP_REGULAR = [`
			`'Agave',`
			`'Arimo',`
			`'Aurulent',`
			`'Cascadia',`
			`'Cousine',`
			`'Fantasque',`
			`'Fira',`

			`'Overpass',`
			`'Lilex',`
			`'Inconsolata$', # not InconsolataGo`
			`'IAWriter',`
			`'Meslo',`
			`'Monoid',`
			`'Mononoki',`
			`'Hack',`
			`'JetBrains Mono',`
			`'Noto Sans',`
			`'Noto Serif',`
			`'Victor',`
			`]`
			`for kr in KEEP_REGULAR:`
			`if (basename.rstrip() + '$').startswith(kr): return True`
			`return False`

			`@staticmethod`
			`def _parse_simple_font_name(name):`
			`"""Parse a filename that does not follow the 'FontFamilyName-FontStyle' pattern"""`
			`# No dash in name, maybe we have blanc separated filename?`
			`if ' ' in name:`
			`return FontnameTools.parse_font_name(name.replace(' ', '-'))`
			`# Do we have a number-name boundary?`
			`p = re.split('(?<=[0-9])(?=[a-zA-Z])', name)`
			`if len(p) > 1:`
			`return FontnameTools.parse_font_name('-'.join(p))`
			`# Or do we have CamelCase?`
			`n = FontnameTools.camel_explode(name)`
			`if n != name:`
			`return FontnameTools.parse_font_name(n.replace(' ', '-'))`
			`return (False, FontnameTools.camel_casify(name), [], [], [], '')`

			`@staticmethod`
			`def parse_font_name(name):`
			`"""Expects a filename following the 'FontFamilyName-FontStyle' pattern and returns ... parts"""`
Fix 3270 naming [why] Somehow the `IBM 3270 SemiCondensed` font turn out as `IBM3270Semi Nerd Font Condensed`. The 3270 font always had the quirk to have a non-standard style with a dash. We have specific code to circumvent that. [how] After updating 3270 the 'Narrow' had been renamed to 'Condensed' and so our specific patch did not work anymore. Adapt the regex to find the new style that needs correction. [note] https://github.com/ryanoasis/nerd-fonts/issues/1012#issuecomment-1385497230 Signed-off-by: Fini Jastrow <ulf.fini.jastrow@desy.de> 2023-01-17 16:28:03 +02:00			`name = re.sub(r'\bsemi-condensed\b', 'SemiCondensed', name, 1, re.IGNORECASE) # Just for "3270 Semi-Condensed" :-/`
Draft: Introduce a file name parser DO NOT MERGE [why] A lot of the fonts have incorrect naming after patching. A completely different approach can help to come up with a consistent naming scheme. [how] See bin/scripts/name-parser/README.md Signed-off-by: Fini Jastrow <ulf.fini.jastrow@desy.de> 2021-12-02 23:29:54 +02:00			`name = re.sub('[_\s]+', ' ', name)`
			`matches = re.match(r'([^-]+)(?:-(.*))?', name)`
			`familyname = FontnameTools.camel_casify(matches.group(1))`
			`style = matches.group(2)`

			`if not style:`
			`return FontnameTools._parse_simple_font_name(name)`

			`# These are the FontStyle keywords we know, in three categories`
			`# Weights end up as Typographic Family parts ('after the dash')`
			`# Styles end up as Family parts (for classic grouping of four)`
			`# Others also end up in Typographic Family ('before the dash')`
			`weights = [ 'Thin', 'Light', 'ExtraLight', 'SemiBold', 'Demi',`
			`'SemiLight', 'Medium', 'Black', 'ExtraBold', 'Heavy',`
			`'Oblique', 'Condensed', 'SemiCondensed', 'ExtraCondensed',`
name-parser: Add Extended to known styles [why] This is missing, for example for iosevka-extendedextralightoblique Signed-off-by: Fini Jastrow <ulf.fini.jastrow@desy.de> 2023-03-27 17:10:16 +02:00			`'Narrow', 'SemiNarrow', 'Retina', 'Extended']`
Draft: Introduce a file name parser DO NOT MERGE [why] A lot of the fonts have incorrect naming after patching. A completely different approach can help to come up with a consistent naming scheme. [how] See bin/scripts/name-parser/README.md Signed-off-by: Fini Jastrow <ulf.fini.jastrow@desy.de> 2021-12-02 23:29:54 +02:00			`styles = [ 'Bold', 'Italic', 'Regular', 'Normal', ]`
name-parser: Generalize and shorten style-shortening [why] We really struggle to keep the font names below the length limits. To achieve this some styles are abbreviated. The abbreviations have been taken from Noto and were initially used to mimic Noto's naming scheme. But a bit shorter names would help in some instances to produce short enough name entries. Also some styles that are used by fonts other than Noto are not abbreviated at all. [how] In document [1] Adobe gives examples of very short style abbreviations. We just implement all these. Example: 'ExtraCondensed' now becomes 'XCn' instead of the more readable but longer 'ExtCond' that Noto uses. [1] https://adobe-type-tools.github.io/font-tech-notes/pdfs/5088.FontNames.pdf Signed-off-by: Fini Jastrow <ulf.fini.jastrow@desy.de> 2023-04-07 09:26:23 +02:00			`weights = [ w for w in weights if w not in styles ]`
Draft: Introduce a file name parser DO NOT MERGE [why] A lot of the fonts have incorrect naming after patching. A completely different approach can help to come up with a consistent naming scheme. [how] See bin/scripts/name-parser/README.md Signed-off-by: Fini Jastrow <ulf.fini.jastrow@desy.de> 2021-12-02 23:29:54 +02:00			`# Some font specialities:`
			`other = [`
			`'-', 'Book', 'For', 'Powerline',`
			`'Text', # Plex`
			`'IIx', # Profont IIx`
			`'LGC', # Inconsolata LGC`
			`r'\(TTF\)', # Terminus (TTF)`
			`r'\bCE\b', # ProggycleanTT CE`
			`r'[12][cmp]n?', # MPlus`
			`r'(?:uni-)?1[14]', # GohuFont uni`
			`]`

			`# Sometimes used abbreviations`
			`weight_abbrevs = [ 'ob', 'c', 'm', 'l', ]`
			`style_abbrevs = [ 'it', 'r', 'b', 'i', ]`

			`( style, weight_token ) = FontnameTools.get_name_token(style, weights)`
			`( style, style_token ) = FontnameTools.get_name_token(style, styles)`
			`( style, other_token ) = FontnameTools.get_name_token(style, other, True)`
			`if len(style) < 4:`
			`( style, weight_token_abbrevs ) = FontnameTools.get_name_token(style, weight_abbrevs)`
			`( style, style_token_abbrevs ) = FontnameTools.get_name_token(style, style_abbrevs)`
			`weight_token += weight_token_abbrevs`
			`style_token += style_token_abbrevs`
			`while 'Regular' in style_token and len(style_token) > 1:`
			`# Correct situation where "Regular" and something else is given`
			`style_token.remove('Regular')`

			`# Recurse to see if unmatched stuff between dashes can belong to familyname`
			`matches2 = re.match(r'(\w+)-(.*)', style)`
			`if matches2:`
			`return FontnameTools.parse_font_name(familyname + matches2.group(1) + '-' + matches2.group(2))`

			`style = re.sub(r'(^\|\s)\d+(\.\d+)+(\s\|$)', r'\1\3', style) # Remove (free standing) version numbers`
			`style_parts = FontnameTools.drop_empty(style.split(' '))`
			`style = ' '.join(map(FontnameTools.front_upper, style_parts))`
			`familyname = FontnameTools.camel_explode(familyname)`
			`return (True, familyname, weight_token, style_token, other_token, style)`