You've already forked Irene-Voice-Assistant
mirror of
https://github.com/janvarev/Irene-Voice-Assistant.git
synced 2025-11-26 22:50:58 +02:00
v5.1 - сделана функция core.all_num_to_text(text), позволяющая конвертировать все числа в тексте для произношения. Очень нужна для работы TTS silero. Опирается на написанную utils/all_num_to_text.
plugin_tts_silero_v3.py - обработка текста - конвертация чисел в строку. Параметры расстановки акцента и "ё" вынесены в опции прикручена библиотека mycroftAI/lingua-franca для конвертации чисел в строку. core.py - инициализация библиотеки lingua-franca
This commit is contained in:
5
LICENSE
5
LICENSE
@@ -2,7 +2,7 @@ Irene - russian offline voice assistant
|
|||||||
|
|
||||||
MIT License
|
MIT License
|
||||||
|
|
||||||
Copyright (c) 2021 Vladislav Janvarev
|
Copyright (c) 2021-2022 Vladislav Janvarev
|
||||||
Copyright (c) 2020 EnjiRouz
|
Copyright (c) 2020 EnjiRouz
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
@@ -54,4 +54,7 @@ timer.wav:
|
|||||||
- licensed under Creative Commons 0 License
|
- licensed under Creative Commons 0 License
|
||||||
- URL: https://freesound.org/people/AlphaDarkWolf/sounds/591109/
|
- URL: https://freesound.org/people/AlphaDarkWolf/sounds/591109/
|
||||||
|
|
||||||
|
MycroftAI/lingua-franca:
|
||||||
|
- licensed under Apache License 2.0
|
||||||
|
- URL: https://github.com/MycroftAI/lingua-franca
|
||||||
|
|
||||||
|
|||||||
6
lingua_franca/__init__.py
Normal file
6
lingua_franca/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
from .internal import get_default_lang, set_default_lang, get_default_loc, \
|
||||||
|
get_active_langs, _set_active_langs, get_primary_lang_code, \
|
||||||
|
get_full_lang_code, resolve_resource_file, load_language, \
|
||||||
|
load_languages, unload_language, unload_languages, get_supported_langs
|
||||||
|
|
||||||
|
from lingua_franca import config
|
||||||
175
lingua_franca/bracket_expansion.py
Normal file
175
lingua_franca/bracket_expansion.py
Normal file
@@ -0,0 +1,175 @@
|
|||||||
|
# Copyright 2017 Mycroft AI, Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
|
||||||
|
class Fragment(object):
|
||||||
|
"""(Abstract) empty sentence fragment"""
|
||||||
|
|
||||||
|
def __init__(self, tree):
|
||||||
|
"""
|
||||||
|
Construct a sentence tree fragment which is merely a wrapper for
|
||||||
|
a list of Strings
|
||||||
|
Args:
|
||||||
|
tree (?): Base tree for the sentence fragment, type depends on
|
||||||
|
subclass, refer to those subclasses
|
||||||
|
"""
|
||||||
|
self._tree = tree
|
||||||
|
|
||||||
|
def tree(self):
|
||||||
|
"""Return the represented sentence tree as raw data."""
|
||||||
|
return self._tree
|
||||||
|
|
||||||
|
def expand(self):
|
||||||
|
"""
|
||||||
|
Expanded version of the fragment. In this case an empty sentence.
|
||||||
|
Returns:
|
||||||
|
List<List<str>>: A list with an empty sentence (= token/string list)
|
||||||
|
"""
|
||||||
|
return [[]]
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self._tree.__str__()
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return self._tree.__repr__()
|
||||||
|
|
||||||
|
|
||||||
|
class Word(Fragment):
|
||||||
|
"""
|
||||||
|
Single word in the sentence tree.
|
||||||
|
Construct with a string as argument.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def expand(self):
|
||||||
|
"""
|
||||||
|
Creates one sentence that contains exactly that word.
|
||||||
|
Returns:
|
||||||
|
List<List<str>>: A list with the given string as sentence
|
||||||
|
(= token/string list)
|
||||||
|
"""
|
||||||
|
return [[self._tree]]
|
||||||
|
|
||||||
|
|
||||||
|
class Sentence(Fragment):
|
||||||
|
"""
|
||||||
|
A Sentence made of several concatenations/words.
|
||||||
|
Construct with a List<Fragment> as argument.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def expand(self):
|
||||||
|
"""
|
||||||
|
Creates a combination of all sub-sentences.
|
||||||
|
Returns:
|
||||||
|
List<List<str>>: A list with all subsentence expansions combined in
|
||||||
|
every possible way
|
||||||
|
"""
|
||||||
|
old_expanded = [[]]
|
||||||
|
for sub in self._tree:
|
||||||
|
sub_expanded = sub.expand()
|
||||||
|
new_expanded = []
|
||||||
|
while len(old_expanded) > 0:
|
||||||
|
sentence = old_expanded.pop()
|
||||||
|
for new in sub_expanded:
|
||||||
|
new_expanded.append(sentence + new)
|
||||||
|
old_expanded = new_expanded
|
||||||
|
return old_expanded
|
||||||
|
|
||||||
|
|
||||||
|
class Options(Fragment):
|
||||||
|
"""
|
||||||
|
A Combination of possible sub-sentences.
|
||||||
|
Construct with List<Fragment> as argument.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def expand(self):
|
||||||
|
"""
|
||||||
|
Returns all of its options as seperated sub-sentences.
|
||||||
|
Returns:
|
||||||
|
List<List<str>>: A list containing the sentences created by all
|
||||||
|
expansions of its sub-sentences
|
||||||
|
"""
|
||||||
|
options = []
|
||||||
|
for option in self._tree:
|
||||||
|
options.extend(option.expand())
|
||||||
|
return options
|
||||||
|
|
||||||
|
|
||||||
|
class SentenceTreeParser(object):
|
||||||
|
"""
|
||||||
|
Generate sentence token trees from a list of tokens
|
||||||
|
['1', '(', '2', '|', '3, ')'] -> [['1', '2'], ['1', '3']]
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, tokens):
|
||||||
|
self.tokens = tokens
|
||||||
|
|
||||||
|
def _parse(self):
|
||||||
|
"""
|
||||||
|
Generate sentence token trees
|
||||||
|
['1', '(', '2', '|', '3, ')'] -> ['1', ['2', '3']]
|
||||||
|
"""
|
||||||
|
self._current_position = 0
|
||||||
|
return self._parse_expr()
|
||||||
|
|
||||||
|
def _parse_expr(self):
|
||||||
|
"""
|
||||||
|
Generate sentence token trees from the current position to
|
||||||
|
the next closing parentheses / end of the list and return it
|
||||||
|
['1', '(', '2', '|', '3, ')'] -> ['1', [['2'], ['3']]]
|
||||||
|
['2', '|', '3'] -> [['2'], ['3']]
|
||||||
|
"""
|
||||||
|
# List of all generated sentences
|
||||||
|
sentence_list = []
|
||||||
|
# Currently active sentence
|
||||||
|
cur_sentence = []
|
||||||
|
sentence_list.append(Sentence(cur_sentence))
|
||||||
|
# Determine which form the current expression has
|
||||||
|
while self._current_position < len(self.tokens):
|
||||||
|
cur = self.tokens[self._current_position]
|
||||||
|
self._current_position += 1
|
||||||
|
if cur == '(':
|
||||||
|
# Parse the subexpression
|
||||||
|
subexpr = self._parse_expr()
|
||||||
|
# Check if the subexpression only has one branch
|
||||||
|
# -> If so, append "(" and ")" and add it as is
|
||||||
|
normal_brackets = False
|
||||||
|
if len(subexpr.tree()) == 1:
|
||||||
|
normal_brackets = True
|
||||||
|
cur_sentence.append(Word('('))
|
||||||
|
# add it to the sentence
|
||||||
|
cur_sentence.append(subexpr)
|
||||||
|
if normal_brackets:
|
||||||
|
cur_sentence.append(Word(')'))
|
||||||
|
elif cur == '|':
|
||||||
|
# Begin parsing a new sentence
|
||||||
|
cur_sentence = []
|
||||||
|
sentence_list.append(Sentence(cur_sentence))
|
||||||
|
elif cur == ')':
|
||||||
|
# End parsing the current subexpression
|
||||||
|
break
|
||||||
|
# TODO anything special about {sth}?
|
||||||
|
else:
|
||||||
|
cur_sentence.append(Word(cur))
|
||||||
|
return Options(sentence_list)
|
||||||
|
|
||||||
|
def _expand_tree(self, tree):
|
||||||
|
"""
|
||||||
|
Expand a list of sub sentences to all combinated sentences.
|
||||||
|
['1', ['2', '3']] -> [['1', '2'], ['1', '3']]
|
||||||
|
"""
|
||||||
|
return tree.expand()
|
||||||
|
|
||||||
|
def expand_parentheses(self):
|
||||||
|
tree = self._parse()
|
||||||
|
return self._expand_tree(tree)
|
||||||
2
lingua_franca/config.py
Normal file
2
lingua_franca/config.py
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
load_langs_on_demand = False
|
||||||
|
inject_timezones = True
|
||||||
568
lingua_franca/format.py
Normal file
568
lingua_franca/format.py
Normal file
@@ -0,0 +1,568 @@
|
|||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from collections import namedtuple
|
||||||
|
from warnings import warn
|
||||||
|
from os.path import join
|
||||||
|
|
||||||
|
|
||||||
|
from lingua_franca.bracket_expansion import SentenceTreeParser
|
||||||
|
from lingua_franca.internal import localized_function, \
|
||||||
|
populate_localized_function_dict, get_active_langs, \
|
||||||
|
get_full_lang_code, get_default_lang, get_default_loc, \
|
||||||
|
is_supported_full_lang, _raise_unsupported_language, \
|
||||||
|
UnsupportedLanguageError, NoneLangWarning, InvalidLangWarning, \
|
||||||
|
FunctionNotLocalizedError
|
||||||
|
|
||||||
|
|
||||||
|
_REGISTERED_FUNCTIONS = ("nice_number",
|
||||||
|
"nice_time",
|
||||||
|
"pronounce_number",
|
||||||
|
"nice_response",
|
||||||
|
"nice_duration")
|
||||||
|
|
||||||
|
populate_localized_function_dict("format", langs=get_active_langs())
|
||||||
|
|
||||||
|
|
||||||
|
def _translate_word(name, lang=''):
|
||||||
|
""" Helper to get word translations
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name (str): Word name. Returned as the default value if not translated
|
||||||
|
lang (str, optional): an optional BCP-47 language code, if omitted
|
||||||
|
the default language will be used.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: translated version of resource name
|
||||||
|
"""
|
||||||
|
from lingua_franca.internal import resolve_resource_file
|
||||||
|
if not lang:
|
||||||
|
if lang is None:
|
||||||
|
warn(NoneLangWarning)
|
||||||
|
lang = get_default_loc()
|
||||||
|
|
||||||
|
lang_code = lang if is_supported_full_lang(lang) else \
|
||||||
|
get_full_lang_code(lang)
|
||||||
|
|
||||||
|
filename = resolve_resource_file(join("text", lang_code, name + ".word"))
|
||||||
|
if filename:
|
||||||
|
# open the file
|
||||||
|
try:
|
||||||
|
with open(filename, 'r', encoding='utf8') as f:
|
||||||
|
for line in f:
|
||||||
|
word = line.strip()
|
||||||
|
if word.startswith("#"):
|
||||||
|
continue # skip comment lines
|
||||||
|
return word
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return name # use resource name as the word
|
||||||
|
|
||||||
|
|
||||||
|
NUMBER_TUPLE = namedtuple(
|
||||||
|
'number',
|
||||||
|
('x, xx, x0, x_in_x0, xxx, x00, x_in_x00, xx00, xx_in_xx00, x000, ' +
|
||||||
|
'x_in_x000, x0_in_x000, x_in_0x00'))
|
||||||
|
|
||||||
|
|
||||||
|
class DateTimeFormat:
|
||||||
|
def __init__(self, config_path):
|
||||||
|
self.lang_config = {}
|
||||||
|
self.config_path = config_path
|
||||||
|
|
||||||
|
def cache(self, lang):
|
||||||
|
if lang not in self.lang_config:
|
||||||
|
try:
|
||||||
|
# Attempt to load the language-specific formatting data
|
||||||
|
with open(self.config_path + '/' + lang + '/date_time.json',
|
||||||
|
'r', encoding='utf8') as lang_config_file:
|
||||||
|
self.lang_config[lang] = json.loads(
|
||||||
|
lang_config_file.read())
|
||||||
|
except FileNotFoundError:
|
||||||
|
# Fallback to English formatting
|
||||||
|
with open(self.config_path + '/en-us/date_time.json',
|
||||||
|
'r') as lang_config_file:
|
||||||
|
self.lang_config[lang] = json.loads(
|
||||||
|
lang_config_file.read())
|
||||||
|
|
||||||
|
for x in ['decade_format', 'hundreds_format', 'thousand_format',
|
||||||
|
'year_format']:
|
||||||
|
i = 1
|
||||||
|
while self.lang_config[lang][x].get(str(i)):
|
||||||
|
self.lang_config[lang][x][str(i)]['re'] = (
|
||||||
|
re.compile(self.lang_config[lang][x][str(i)]['match']
|
||||||
|
))
|
||||||
|
i = i + 1
|
||||||
|
|
||||||
|
def _number_strings(self, number, lang):
|
||||||
|
x = (self.lang_config[lang]['number'].get(str(number % 10)) or
|
||||||
|
str(number % 10))
|
||||||
|
xx = (self.lang_config[lang]['number'].get(str(number % 100)) or
|
||||||
|
str(number % 100))
|
||||||
|
x_in_x0 = self.lang_config[lang]['number'].get(
|
||||||
|
str(int(number % 100 / 10))) or str(int(number % 100 / 10))
|
||||||
|
x0 = (self.lang_config[lang]['number'].get(
|
||||||
|
str(int(number % 100 / 10) * 10)) or
|
||||||
|
str(int(number % 100 / 10) * 10))
|
||||||
|
xxx = (self.lang_config[lang]['number'].get(str(number % 1000)) or
|
||||||
|
str(number % 1000))
|
||||||
|
x00 = (self.lang_config[lang]['number'].get(str(int(
|
||||||
|
number % 1000 / 100) * 100)) or
|
||||||
|
str(int(number % 1000 / 100) * 100))
|
||||||
|
x_in_x00 = self.lang_config[lang]['number'].get(str(int(
|
||||||
|
number % 1000 / 100))) or str(int(number % 1000 / 100))
|
||||||
|
xx00 = self.lang_config[lang]['number'].get(str(int(
|
||||||
|
number % 10000 / 100) * 100)) or str(int(number % 10000 / 100) *
|
||||||
|
100)
|
||||||
|
xx_in_xx00 = self.lang_config[lang]['number'].get(str(int(
|
||||||
|
number % 10000 / 100))) or str(int(number % 10000 / 100))
|
||||||
|
x000 = (self.lang_config[lang]['number'].get(str(int(
|
||||||
|
number % 10000 / 1000) * 1000)) or
|
||||||
|
str(int(number % 10000 / 1000) * 1000))
|
||||||
|
x_in_x000 = self.lang_config[lang]['number'].get(str(int(
|
||||||
|
number % 10000 / 1000))) or str(int(number % 10000 / 1000))
|
||||||
|
x0_in_x000 = self.lang_config[lang]['number'].get(str(int(
|
||||||
|
number % 10000 / 1000) * 10)) or str(int(number % 10000 / 1000) * 10)
|
||||||
|
x_in_0x00 = self.lang_config[lang]['number'].get(str(int(
|
||||||
|
number % 1000 / 100)) or str(int(number % 1000 / 100)))
|
||||||
|
|
||||||
|
return NUMBER_TUPLE(
|
||||||
|
x, xx, x0, x_in_x0, xxx, x00, x_in_x00, xx00, xx_in_xx00, x000,
|
||||||
|
x_in_x000, x0_in_x000, x_in_0x00)
|
||||||
|
|
||||||
|
def _format_string(self, number, format_section, lang):
|
||||||
|
s = self.lang_config[lang][format_section]['default']
|
||||||
|
i = 1
|
||||||
|
while self.lang_config[lang][format_section].get(str(i)):
|
||||||
|
e = self.lang_config[lang][format_section][str(i)]
|
||||||
|
if e['re'].match(str(number)):
|
||||||
|
return e['format']
|
||||||
|
i = i + 1
|
||||||
|
return s
|
||||||
|
|
||||||
|
def _decade_format(self, number, number_tuple, lang):
|
||||||
|
s = self._format_string(number % 100, 'decade_format', lang)
|
||||||
|
return s.format(x=number_tuple.x, xx=number_tuple.xx,
|
||||||
|
x0=number_tuple.x0, x_in_x0=number_tuple.x_in_x0,
|
||||||
|
number=str(number % 100))
|
||||||
|
|
||||||
|
def _number_format_hundreds(self, number, number_tuple, lang,
|
||||||
|
formatted_decade):
|
||||||
|
s = self._format_string(number % 1000, 'hundreds_format', lang)
|
||||||
|
return s.format(xxx=number_tuple.xxx, x00=number_tuple.x00,
|
||||||
|
x_in_x00=number_tuple.x_in_x00,
|
||||||
|
formatted_decade=formatted_decade,
|
||||||
|
number=str(number % 1000))
|
||||||
|
|
||||||
|
def _number_format_thousand(self, number, number_tuple, lang,
|
||||||
|
formatted_decade, formatted_hundreds):
|
||||||
|
s = self._format_string(number % 10000, 'thousand_format', lang)
|
||||||
|
return s.format(x_in_x00=number_tuple.x_in_x00,
|
||||||
|
xx00=number_tuple.xx00,
|
||||||
|
xx_in_xx00=number_tuple.xx_in_xx00,
|
||||||
|
x000=number_tuple.x000,
|
||||||
|
x_in_x000=number_tuple.x_in_x000,
|
||||||
|
x0_in_x000=number_tuple.x0_in_x000,
|
||||||
|
x_in_0x00=number_tuple.x_in_0x00,
|
||||||
|
formatted_decade=formatted_decade,
|
||||||
|
formatted_hundreds=formatted_hundreds,
|
||||||
|
number=str(number % 10000))
|
||||||
|
|
||||||
|
def date_format(self, dt, lang, now):
|
||||||
|
format_str = 'date_full'
|
||||||
|
if now:
|
||||||
|
if dt.year == now.year:
|
||||||
|
format_str = 'date_full_no_year'
|
||||||
|
if dt.month == now.month and dt.day > now.day:
|
||||||
|
format_str = 'date_full_no_year_month'
|
||||||
|
|
||||||
|
tomorrow = now + datetime.timedelta(days=1)
|
||||||
|
yesterday = now - datetime.timedelta(days=1)
|
||||||
|
if tomorrow.date() == dt.date():
|
||||||
|
format_str = 'tomorrow'
|
||||||
|
elif now.date() == dt.date():
|
||||||
|
format_str = 'today'
|
||||||
|
elif yesterday.date() == dt.date():
|
||||||
|
format_str = 'yesterday'
|
||||||
|
|
||||||
|
return self.lang_config[lang]['date_format'][format_str].format(
|
||||||
|
weekday=self.lang_config[lang]['weekday'][str(dt.weekday())],
|
||||||
|
month=self.lang_config[lang]['month'][str(dt.month)],
|
||||||
|
day=self.lang_config[lang]['date'][str(dt.day)],
|
||||||
|
formatted_year=self.year_format(dt, lang, False))
|
||||||
|
|
||||||
|
def date_time_format(self, dt, lang, now, use_24hour, use_ampm):
|
||||||
|
date_str = self.date_format(dt, lang, now)
|
||||||
|
time_str = nice_time(dt, lang, use_24hour=use_24hour,
|
||||||
|
use_ampm=use_ampm)
|
||||||
|
return self.lang_config[lang]['date_time_format']['date_time'].format(
|
||||||
|
formatted_date=date_str, formatted_time=time_str)
|
||||||
|
|
||||||
|
def year_format(self, dt, lang, bc):
|
||||||
|
number_tuple = self._number_strings(dt.year, lang)
|
||||||
|
formatted_bc = (
|
||||||
|
self.lang_config[lang]['year_format']['bc'] if bc else '')
|
||||||
|
formatted_decade = self._decade_format(
|
||||||
|
dt.year, number_tuple, lang)
|
||||||
|
formatted_hundreds = self._number_format_hundreds(
|
||||||
|
dt.year, number_tuple, lang, formatted_decade)
|
||||||
|
formatted_thousand = self._number_format_thousand(
|
||||||
|
dt.year, number_tuple, lang, formatted_decade, formatted_hundreds)
|
||||||
|
|
||||||
|
s = self._format_string(dt.year, 'year_format', lang)
|
||||||
|
|
||||||
|
return re.sub(' +', ' ',
|
||||||
|
s.format(
|
||||||
|
year=str(dt.year),
|
||||||
|
century=str(int(dt.year / 100)),
|
||||||
|
decade=str(dt.year % 100),
|
||||||
|
formatted_hundreds=formatted_hundreds,
|
||||||
|
formatted_decade=formatted_decade,
|
||||||
|
formatted_thousand=formatted_thousand,
|
||||||
|
bc=formatted_bc)).strip()
|
||||||
|
|
||||||
|
|
||||||
|
date_time_format = DateTimeFormat(os.path.join(os.path.dirname(__file__),
|
||||||
|
'res/text'))
|
||||||
|
|
||||||
|
|
||||||
|
@localized_function(run_own_code_on=[UnsupportedLanguageError])
|
||||||
|
def nice_number(number, lang='', speech=True, denominators=None):
|
||||||
|
"""Format a float to human readable functions
|
||||||
|
|
||||||
|
This function formats a float to human understandable functions. Like
|
||||||
|
4.5 becomes 4 and a half for speech and 4 1/2 for text
|
||||||
|
Args:
|
||||||
|
number (int or float): the float to format
|
||||||
|
lang (str, optional): an optional BCP-47 language code, if omitted
|
||||||
|
the default language will be used.
|
||||||
|
speech (bool): format for speech (True) or display (False)
|
||||||
|
denominators (iter of ints): denominators to use, default [1 .. 20]
|
||||||
|
Returns:
|
||||||
|
(str): The formatted string.
|
||||||
|
"""
|
||||||
|
return str(number)
|
||||||
|
|
||||||
|
|
||||||
|
@localized_function()
|
||||||
|
def nice_time(dt, lang='', speech=True, use_24hour=False,
|
||||||
|
use_ampm=False, variant=None):
|
||||||
|
"""
|
||||||
|
Format a time to a comfortable human format
|
||||||
|
|
||||||
|
For example, generate 'five thirty' for speech or '5:30' for
|
||||||
|
text display.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
dt (datetime): date to format (assumes already in local timezone)
|
||||||
|
lang (str, optional): an optional BCP-47 language code, if omitted
|
||||||
|
the default language will be used.
|
||||||
|
speech (bool): format for speech (default/True) or display (False)
|
||||||
|
use_24hour (bool): output in 24-hour/military or 12-hour format
|
||||||
|
use_ampm (bool): include the am/pm for 12-hour format
|
||||||
|
variant (string): alternative time system to be used, string must
|
||||||
|
match language specific mappings
|
||||||
|
Returns:
|
||||||
|
(str): The formatted time string
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
@localized_function()
|
||||||
|
def pronounce_number(number, lang='', places=2, short_scale=True,
|
||||||
|
scientific=False, ordinals=False):
|
||||||
|
"""
|
||||||
|
Convert a number to it's spoken equivalent
|
||||||
|
|
||||||
|
For example, '5' would be 'five'
|
||||||
|
|
||||||
|
Args:
|
||||||
|
number: the number to pronounce
|
||||||
|
lang (str, optional): an optional BCP-47 language code, if omitted
|
||||||
|
the default language will be used.
|
||||||
|
places (int): number of decimal places to express, default 2
|
||||||
|
short_scale (bool) : use short (True) or long scale (False)
|
||||||
|
https://en.wikipedia.org/wiki/Names_of_large_numbers
|
||||||
|
scientific (bool) : convert and pronounce in scientific notation
|
||||||
|
ordinals (bool): pronounce in ordinal form "first" instead of "one"
|
||||||
|
Returns:
|
||||||
|
(str): The pronounced number
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def nice_date(dt, lang='', now=None):
|
||||||
|
"""
|
||||||
|
Format a datetime to a pronounceable date
|
||||||
|
|
||||||
|
For example, generates 'tuesday, june the fifth, 2018'
|
||||||
|
|
||||||
|
Args:
|
||||||
|
dt (datetime): date to format (assumes already in local timezone)
|
||||||
|
lang (str, optional): an optional BCP-47 language code, if omitted
|
||||||
|
the default language will be used.
|
||||||
|
now (datetime): Current date. If provided, the returned date for speech
|
||||||
|
will be shortened accordingly: No year is returned if now is in the
|
||||||
|
same year as td, no month is returned if now is in the same month
|
||||||
|
as td. If now and td is the same day, 'today' is returned.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(str): The formatted date string
|
||||||
|
"""
|
||||||
|
full_code = get_full_lang_code(lang)
|
||||||
|
date_time_format.cache(full_code)
|
||||||
|
|
||||||
|
return date_time_format.date_format(dt, full_code, now)
|
||||||
|
|
||||||
|
|
||||||
|
def nice_date_time(dt, lang='', now=None, use_24hour=False,
|
||||||
|
use_ampm=False):
|
||||||
|
"""
|
||||||
|
Format a datetime to a pronounceable date and time
|
||||||
|
|
||||||
|
For example, generate 'tuesday, june the fifth, 2018 at five thirty'
|
||||||
|
|
||||||
|
Args:
|
||||||
|
dt (datetime): date to format (assumes already in local timezone)
|
||||||
|
lang (str, optional): an optional BCP-47 language code, if omitted
|
||||||
|
the default language will be used.
|
||||||
|
now (datetime): Current date. If provided, the returned date for
|
||||||
|
speech will be shortened accordingly: No year is returned if
|
||||||
|
now is in the same year as td, no month is returned if now is
|
||||||
|
in the same month as td. If now and td is the same day, 'today'
|
||||||
|
is returned.
|
||||||
|
use_24hour (bool): output in 24-hour/military or 12-hour format
|
||||||
|
use_ampm (bool): include the am/pm for 12-hour format
|
||||||
|
Returns:
|
||||||
|
(str): The formatted date time string
|
||||||
|
"""
|
||||||
|
|
||||||
|
full_code = get_full_lang_code(lang)
|
||||||
|
date_time_format.cache(full_code)
|
||||||
|
|
||||||
|
return date_time_format.date_time_format(dt, full_code, now, use_24hour,
|
||||||
|
use_ampm)
|
||||||
|
|
||||||
|
|
||||||
|
def nice_year(dt, lang='', bc=False):
|
||||||
|
"""
|
||||||
|
Format a datetime to a pronounceable year
|
||||||
|
|
||||||
|
For example, generate 'nineteen-hundred and eighty-four' for year 1984
|
||||||
|
|
||||||
|
Args:
|
||||||
|
dt (datetime): date to format (assumes already in local timezone)
|
||||||
|
lang (str, optional): an optional BCP-47 language code, if omitted
|
||||||
|
the default language will be used.
|
||||||
|
bc (bool) pust B.C. after the year (python does not support dates
|
||||||
|
B.C. in datetime)
|
||||||
|
Returns:
|
||||||
|
(str): The formatted year string
|
||||||
|
"""
|
||||||
|
|
||||||
|
full_code = get_full_lang_code(lang)
|
||||||
|
date_time_format.cache(full_code)
|
||||||
|
|
||||||
|
return date_time_format.year_format(dt, full_code, bc)
|
||||||
|
|
||||||
|
|
||||||
|
@localized_function(run_own_code_on=[FunctionNotLocalizedError])
|
||||||
|
def nice_duration(duration, lang='', speech=True):
|
||||||
|
""" Convert duration in seconds to a nice spoken timespan
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
duration = 60 -> "1:00" or "one minute"
|
||||||
|
duration = 163 -> "2:43" or "two minutes forty three seconds"
|
||||||
|
|
||||||
|
Args:
|
||||||
|
duration: time, in seconds
|
||||||
|
lang (str, optional): an optional BCP-47 language code, if omitted
|
||||||
|
the default language will be used.
|
||||||
|
speech (bool): format for speech (True) or display (False)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: timespan as a string
|
||||||
|
"""
|
||||||
|
if not lang:
|
||||||
|
if lang is None:
|
||||||
|
warn(NoneLangWarning)
|
||||||
|
lang = get_default_loc()
|
||||||
|
if not is_supported_full_lang(lang):
|
||||||
|
# TODO deprecated; delete when 'lang=None' and 'lang=invalid' are
|
||||||
|
# removed
|
||||||
|
try:
|
||||||
|
lang = get_full_lang_code(lang)
|
||||||
|
except UnsupportedLanguageError:
|
||||||
|
warn(InvalidLangWarning)
|
||||||
|
lang = get_default_loc()
|
||||||
|
|
||||||
|
if isinstance(duration, datetime.timedelta):
|
||||||
|
duration = duration.total_seconds()
|
||||||
|
|
||||||
|
# Do traditional rounding: 2.5->3, 3.5->4, plus this
|
||||||
|
# helps in a few cases of where calculations generate
|
||||||
|
# times like 2:59:59.9 instead of 3:00.
|
||||||
|
duration += 0.5
|
||||||
|
|
||||||
|
days = int(duration // 86400)
|
||||||
|
hours = int(duration // 3600 % 24)
|
||||||
|
minutes = int(duration // 60 % 60)
|
||||||
|
seconds = int(duration % 60)
|
||||||
|
|
||||||
|
if speech:
|
||||||
|
out = ""
|
||||||
|
if days > 0:
|
||||||
|
out += pronounce_number(days, lang) + " "
|
||||||
|
if days == 1:
|
||||||
|
out += _translate_word("day", lang)
|
||||||
|
else:
|
||||||
|
out += _translate_word("days", lang)
|
||||||
|
out += " "
|
||||||
|
if hours > 0:
|
||||||
|
if out:
|
||||||
|
out += " "
|
||||||
|
out += pronounce_number(hours, lang) + " "
|
||||||
|
if hours == 1:
|
||||||
|
out += _translate_word("hour", lang)
|
||||||
|
else:
|
||||||
|
out += _translate_word("hours", lang)
|
||||||
|
if minutes > 0:
|
||||||
|
if out:
|
||||||
|
out += " "
|
||||||
|
out += pronounce_number(minutes, lang) + " "
|
||||||
|
if minutes == 1:
|
||||||
|
out += _translate_word("minute", lang)
|
||||||
|
else:
|
||||||
|
out += _translate_word("minutes", lang)
|
||||||
|
if seconds > 0:
|
||||||
|
if out:
|
||||||
|
out += " "
|
||||||
|
out += pronounce_number(seconds, lang) + " "
|
||||||
|
if seconds == 1:
|
||||||
|
out += _translate_word("second", lang)
|
||||||
|
else:
|
||||||
|
out += _translate_word("seconds", lang)
|
||||||
|
else:
|
||||||
|
# M:SS, MM:SS, H:MM:SS, Dd H:MM:SS format
|
||||||
|
out = ""
|
||||||
|
if days > 0:
|
||||||
|
out = str(days) + "d "
|
||||||
|
if hours > 0 or days > 0:
|
||||||
|
out += str(hours) + ":"
|
||||||
|
if minutes < 10 and (hours > 0 or days > 0):
|
||||||
|
out += "0"
|
||||||
|
out += str(minutes) + ":"
|
||||||
|
if seconds < 10:
|
||||||
|
out += "0"
|
||||||
|
out += str(seconds)
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def join_list(items, connector, sep=None, lang=''):
|
||||||
|
""" Join a list into a phrase using the given connector word
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
join_list([1,2,3], "and") -> "1, 2 and 3"
|
||||||
|
join_list([1,2,3], "and", ";") -> "1; 2 and 3"
|
||||||
|
|
||||||
|
Args:
|
||||||
|
items (array): items to be joined
|
||||||
|
connector (str): connecting word (resource name), like "and" or "or"
|
||||||
|
sep (str, optional): separator character, default = ","
|
||||||
|
lang (str, optional): an optional BCP-47 language code, if omitted
|
||||||
|
the default language will be used.
|
||||||
|
Returns:
|
||||||
|
str: the connected list phrase
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not items:
|
||||||
|
return ""
|
||||||
|
if len(items) == 1:
|
||||||
|
return str(items[0])
|
||||||
|
|
||||||
|
if not sep:
|
||||||
|
sep = ", "
|
||||||
|
else:
|
||||||
|
sep += " "
|
||||||
|
return (sep.join(str(item) for item in items[:-1]) +
|
||||||
|
" " + _translate_word(connector, lang) +
|
||||||
|
" " + items[-1])
|
||||||
|
|
||||||
|
|
||||||
|
def expand_parentheses(sent):
|
||||||
|
"""
|
||||||
|
['1', '(', '2', '|', '3, ')'] -> [['1', '2'], ['1', '3']]
|
||||||
|
For example:
|
||||||
|
Will it (rain|pour) (today|tomorrow|)?
|
||||||
|
---->
|
||||||
|
Will it rain today?
|
||||||
|
Will it rain tomorrow?
|
||||||
|
Will it rain?
|
||||||
|
Will it pour today?
|
||||||
|
Will it pour tomorrow?
|
||||||
|
Will it pour?
|
||||||
|
|
||||||
|
Args:
|
||||||
|
sent (list<str>): List of tokens in sentence
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list<list<str>>: Multiple possible sentences from original
|
||||||
|
"""
|
||||||
|
return SentenceTreeParser(sent).expand_parentheses()
|
||||||
|
|
||||||
|
|
||||||
|
def expand_options(parentheses_line: str) -> list:
|
||||||
|
"""
|
||||||
|
Convert 'test (a|b)' -> ['test a', 'test b']
|
||||||
|
|
||||||
|
Args:
|
||||||
|
parentheses_line: Input line to expand
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of expanded possibilities
|
||||||
|
"""
|
||||||
|
# 'a(this|that)b' -> [['a', 'this', 'b'], ['a', 'that', 'b']]
|
||||||
|
options = expand_parentheses(re.split(r'([(|)])', parentheses_line))
|
||||||
|
return [re.sub(r'\s+', ' ', ' '.join(i)).strip() for i in options]
|
||||||
|
|
||||||
|
|
||||||
|
@localized_function()
|
||||||
|
def nice_response(text, lang=''):
|
||||||
|
"""
|
||||||
|
In some languages, sanitizes certain numeric input for TTS
|
||||||
|
|
||||||
|
Most of the time, this function will be called by any formatters
|
||||||
|
which might need it. It's exposed here just in case you've got a clever
|
||||||
|
use.
|
||||||
|
|
||||||
|
As of July 2020, this function sanitizes some dates and "x ^ y"-formatted
|
||||||
|
exponents in the following primary language codes:
|
||||||
|
da de nl sv
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text (str): input text to sanitize
|
||||||
|
lang (str, optional): an optional BCP-47 language code, if omitted
|
||||||
|
the default language will be used.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
assertEqual(nice_response_de("dies ist der 31. mai"),
|
||||||
|
"dies ist der einunddreißigste mai")
|
||||||
|
|
||||||
|
assertEqual(nice_response_de("10 ^ 2"),
|
||||||
|
"10 hoch 2")
|
||||||
|
"""
|
||||||
774
lingua_franca/internal.py
Normal file
774
lingua_franca/internal.py
Normal file
@@ -0,0 +1,774 @@
|
|||||||
|
import os.path
|
||||||
|
from functools import wraps
|
||||||
|
from importlib import import_module
|
||||||
|
from inspect import signature
|
||||||
|
|
||||||
|
from warnings import warn
|
||||||
|
from datetime import datetime
|
||||||
|
from lingua_franca import config
|
||||||
|
from lingua_franca.time import to_local
|
||||||
|
|
||||||
|
|
||||||
|
_SUPPORTED_LANGUAGES = ("ca", "cs", "da", "de", "en", "es", "fr", "hu",
|
||||||
|
"it", "nl", "pl", "pt", "ru", "sl", "sv", "fa")
|
||||||
|
|
||||||
|
_SUPPORTED_FULL_LOCALIZATIONS = ("ca-es", "cs-cz", "da-dk", "de-de",
|
||||||
|
"en-au", "en-us", "es-es", "fr-fr",
|
||||||
|
"hu-hu", "it-it", "nl-nl", "pl-pl",
|
||||||
|
"fa-ir", "pt-pt", "ru-ru", "sl-si",
|
||||||
|
"sv-se", "tr-tr")
|
||||||
|
|
||||||
|
_DEFAULT_FULL_LANG_CODES = {'ca': 'ca-es',
|
||||||
|
'cs': 'cs-cz',
|
||||||
|
'da': 'da-dk',
|
||||||
|
'de': 'de-de',
|
||||||
|
'en': 'en-us',
|
||||||
|
'es': 'es-es',
|
||||||
|
'fa': 'fa-ir',
|
||||||
|
'fr': 'fr-fr',
|
||||||
|
'hu': 'hu-hu',
|
||||||
|
'it': 'it-it',
|
||||||
|
'nl': 'nl-nl',
|
||||||
|
'pl': 'pl-pl',
|
||||||
|
'pt': 'pt-pt',
|
||||||
|
'ru': 'ru-ru',
|
||||||
|
'sl': 'sl-si',
|
||||||
|
'sv': 'sv-se',
|
||||||
|
'tr': 'tr-tr'}
|
||||||
|
|
||||||
|
__default_lang = None
|
||||||
|
__active_lang_code = None
|
||||||
|
__loaded_langs = []
|
||||||
|
|
||||||
|
_localized_functions = {}
|
||||||
|
|
||||||
|
# TODO the deprecation of 'lang=None' and 'lang=<invalid>' can refer to
|
||||||
|
# commit 35efd0661a178e82f6745ad17e10e607c0d83472 for the "proper" state
|
||||||
|
# of affairs, raising the errors below instead of deprecation warnings
|
||||||
|
|
||||||
|
# Once the deprecation is complete, functions which have had their default
|
||||||
|
# parameter changed from lang=None to lang='' should be switched back
|
||||||
|
|
||||||
|
|
||||||
|
class UnsupportedLanguageError(NotImplementedError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class FunctionNotLocalizedError(NotImplementedError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
NoneLangWarning = \
|
||||||
|
DeprecationWarning("Lingua Franca is dropping support"
|
||||||
|
" for 'lang=None' as an explicit"
|
||||||
|
" argument.")
|
||||||
|
InvalidLangWarning = \
|
||||||
|
DeprecationWarning("Invalid language code detected. Falling back on "
|
||||||
|
"default.\nThis behavior is deprecated. The 'lang' "
|
||||||
|
"parameter is optional, and only accepts supported "
|
||||||
|
"language codes, beginning with Lingua Franca 0.3.0")
|
||||||
|
|
||||||
|
|
||||||
|
def _raise_unsupported_language(language):
|
||||||
|
"""
|
||||||
|
Raise an error when a language is unsupported
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
language: str
|
||||||
|
The language that was supplied.
|
||||||
|
"""
|
||||||
|
supported = ' '.join(_SUPPORTED_LANGUAGES)
|
||||||
|
raise UnsupportedLanguageError("\nLanguage '{language}' is not yet "
|
||||||
|
"supported by Lingua Franca. "
|
||||||
|
"Supported language codes "
|
||||||
|
"include the following:\n{supported}"
|
||||||
|
.format(language=language, supported=supported))
|
||||||
|
|
||||||
|
|
||||||
|
def get_supported_langs():
|
||||||
|
"""
|
||||||
|
Returns:
|
||||||
|
list(str)
|
||||||
|
"""
|
||||||
|
return _SUPPORTED_LANGUAGES
|
||||||
|
|
||||||
|
|
||||||
|
def get_active_langs():
|
||||||
|
""" Get the list of currently-loaded language codes
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list(str)
|
||||||
|
"""
|
||||||
|
return __loaded_langs
|
||||||
|
|
||||||
|
|
||||||
|
def _set_active_langs(langs=None, override_default=True):
|
||||||
|
""" Set the list of languages to load.
|
||||||
|
Unloads previously-loaded languages which are not specified here.
|
||||||
|
If the input list does not contain the current default language,
|
||||||
|
langs[0] will become the new default language. This behavior
|
||||||
|
can be overridden.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
langs: {list(str) or str} -- a list of language codes to load
|
||||||
|
|
||||||
|
Keyword Arguments:
|
||||||
|
override_default (bool) -- Change default language to first entry if
|
||||||
|
the current default is no longer present
|
||||||
|
(default: True)
|
||||||
|
"""
|
||||||
|
if isinstance(langs, str):
|
||||||
|
langs = [langs]
|
||||||
|
if not isinstance(langs, list):
|
||||||
|
raise(TypeError("lingua_franca.internal._set_active_langs expects"
|
||||||
|
" 'str' or 'list'"))
|
||||||
|
global __loaded_langs, __default_lang
|
||||||
|
__loaded_langs = list(dict.fromkeys(langs))
|
||||||
|
if __default_lang:
|
||||||
|
if override_default or get_primary_lang_code(__default_lang) \
|
||||||
|
not in __loaded_langs:
|
||||||
|
if len(__loaded_langs):
|
||||||
|
set_default_lang(get_full_lang_code(__loaded_langs[0]))
|
||||||
|
else:
|
||||||
|
__default_lang = None
|
||||||
|
_refresh_function_dict()
|
||||||
|
|
||||||
|
|
||||||
|
def _refresh_function_dict():
|
||||||
|
for mod in _localized_functions.keys():
|
||||||
|
populate_localized_function_dict(mod, langs=__loaded_langs)
|
||||||
|
|
||||||
|
|
||||||
|
def is_supported_lang(lang):
|
||||||
|
try:
|
||||||
|
return lang.lower() in _SUPPORTED_LANGUAGES
|
||||||
|
except AttributeError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def is_supported_full_lang(lang):
|
||||||
|
"""
|
||||||
|
Arguments:
|
||||||
|
lang (str): a full language code, such as "en-US" (case insensitive)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool - does Lingua Franca support this language code?
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return lang.lower() in _SUPPORTED_FULL_LOCALIZATIONS
|
||||||
|
except AttributeError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def load_language(lang):
|
||||||
|
"""Load `lang` and its functions into memory. Will only import those
|
||||||
|
functions which belong to a loaded module. In other words, if you have
|
||||||
|
lingua_franca.parse loaded, but *not* lingua_franca.format,
|
||||||
|
running `load_language('es') will only import the Spanish-language
|
||||||
|
parsers, and not the formatters.
|
||||||
|
|
||||||
|
The reverse is also true: importing a module, such as
|
||||||
|
`import lingua_franca.parse`, will only import those functions
|
||||||
|
which belong to currently-loaded languages.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
lang (str): the language code to load (any supported lang code,
|
||||||
|
whether 'primary' or 'full')
|
||||||
|
Case-insensitive.
|
||||||
|
"""
|
||||||
|
if not isinstance(lang, str):
|
||||||
|
raise TypeError("lingua_franca.load_language expects 'str' "
|
||||||
|
"(got " + type(lang) + ")")
|
||||||
|
if lang not in _SUPPORTED_LANGUAGES:
|
||||||
|
if lang in _SUPPORTED_FULL_LOCALIZATIONS:
|
||||||
|
lang = get_primary_lang_code(lang)
|
||||||
|
if lang not in __loaded_langs:
|
||||||
|
__loaded_langs.append(lang)
|
||||||
|
if not __default_lang:
|
||||||
|
set_default_lang(lang)
|
||||||
|
_set_active_langs(__loaded_langs)
|
||||||
|
|
||||||
|
|
||||||
|
def load_languages(langs):
|
||||||
|
"""Load multiple languages at once
|
||||||
|
Simple for loop using load_language()
|
||||||
|
|
||||||
|
Args:
|
||||||
|
langs (list[str])
|
||||||
|
"""
|
||||||
|
for lang in langs:
|
||||||
|
load_language(lang)
|
||||||
|
|
||||||
|
|
||||||
|
def unload_language(lang):
|
||||||
|
"""Opposite of load_language()
|
||||||
|
Unloading the default causes the next language in
|
||||||
|
`lingua_franca.get_active_langs()` to become the default.
|
||||||
|
|
||||||
|
Will not stop you from unloading the last language, as this may be
|
||||||
|
desirable for some applications.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
lang (str): language code to unload
|
||||||
|
"""
|
||||||
|
if lang in __loaded_langs:
|
||||||
|
__loaded_langs.remove(lang)
|
||||||
|
_set_active_langs(__loaded_langs)
|
||||||
|
|
||||||
|
|
||||||
|
def unload_languages(langs):
|
||||||
|
"""Opposite of load_languages()
|
||||||
|
Simple for loop using unload_language()
|
||||||
|
|
||||||
|
Args:
|
||||||
|
langs (list[str])
|
||||||
|
"""
|
||||||
|
for lang in langs:
|
||||||
|
__loaded_langs.remove(lang)
|
||||||
|
_set_active_langs(__loaded_langs)
|
||||||
|
|
||||||
|
|
||||||
|
def get_default_lang():
|
||||||
|
""" Return the current default language.
|
||||||
|
This returns the active BCP-47 code, such as 'en' or 'es'.
|
||||||
|
For the current localization/full language code,
|
||||||
|
such as 'en-US' or 'es-ES', call `get_default_loc()`
|
||||||
|
|
||||||
|
See:
|
||||||
|
https://en.wikipedia.org/wiki/IETF_language_tag
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: A primary language code, e.g. ("en", or "pt")
|
||||||
|
"""
|
||||||
|
return __default_lang
|
||||||
|
|
||||||
|
|
||||||
|
def get_default_loc():
|
||||||
|
""" Return the current, localized BCP-47 language code, such as 'en-US'
|
||||||
|
or 'es-ES'. For the default language *family* - which is passed to
|
||||||
|
most parsers and formatters - call `get_default_lang`
|
||||||
|
|
||||||
|
The 'localized' portion conforms to ISO 3166-1 alpha-2
|
||||||
|
https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
|
||||||
|
"""
|
||||||
|
return __active_lang_code
|
||||||
|
|
||||||
|
|
||||||
|
def set_default_lang(lang_code):
|
||||||
|
""" Set the active BCP-47 language code to be used in formatting/parsing
|
||||||
|
Will choose a default localization if passed a primary language family
|
||||||
|
(ex: `set_default_lang("en")` will default to "en-US")
|
||||||
|
|
||||||
|
Will respect localization when passed a full lang code.
|
||||||
|
|
||||||
|
For more information about valid lang codes, see get_default_lang()
|
||||||
|
and get_default_loc()
|
||||||
|
|
||||||
|
Args:
|
||||||
|
lang(str): BCP-47 language code, e.g. "en-us" or "es-mx"
|
||||||
|
"""
|
||||||
|
global __default_lang, __active_lang_code
|
||||||
|
|
||||||
|
lang_code = lang_code.lower()
|
||||||
|
primary_lang_code = get_primary_lang_code(lang_code)
|
||||||
|
if primary_lang_code not in _SUPPORTED_LANGUAGES:
|
||||||
|
_raise_unsupported_language(lang_code)
|
||||||
|
else:
|
||||||
|
__default_lang = primary_lang_code
|
||||||
|
|
||||||
|
# make sure the default language is loaded.
|
||||||
|
# also make sure the default language is at the front.
|
||||||
|
# position doesn't matter here, but it clarifies things while debugging.
|
||||||
|
if __default_lang in __loaded_langs:
|
||||||
|
__loaded_langs.remove(__default_lang)
|
||||||
|
__loaded_langs.insert(0, __default_lang)
|
||||||
|
_refresh_function_dict()
|
||||||
|
|
||||||
|
if is_supported_full_lang(lang_code):
|
||||||
|
__active_lang_code = lang_code
|
||||||
|
else:
|
||||||
|
__active_lang_code = get_full_lang_code(__default_lang)
|
||||||
|
|
||||||
|
# TODO remove this when invalid lang codes are removed (currently deprecated)
|
||||||
|
|
||||||
|
|
||||||
|
def get_primary_lang_code(lang=''):
|
||||||
|
if not lang:
|
||||||
|
if lang is None:
|
||||||
|
warn(NoneLangWarning)
|
||||||
|
lang = get_default_loc()
|
||||||
|
# if not (lang):
|
||||||
|
try:
|
||||||
|
lang = __get_primary_lang_code_deprecation_warning(lang)
|
||||||
|
except UnsupportedLanguageError:
|
||||||
|
warn(InvalidLangWarning)
|
||||||
|
lang = get_default_loc()
|
||||||
|
return lang
|
||||||
|
|
||||||
|
|
||||||
|
def __get_primary_lang_code_deprecation_warning(lang=''):
|
||||||
|
""" Get the primary language code
|
||||||
|
|
||||||
|
Args:
|
||||||
|
lang(str, optional): A BCP-47 language code
|
||||||
|
(If omitted, equivalent to
|
||||||
|
`lingua_franca.get_default_lang()`)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: A primary language family, such as "en", "de" or "pt"
|
||||||
|
"""
|
||||||
|
# split on the hyphen and only return the primary-language code
|
||||||
|
# NOTE: This is typically a two character code. The standard allows
|
||||||
|
# 1, 2, 3 and 4 character codes. In the future we can consider
|
||||||
|
# mapping from the 3 to 2 character codes, for example. But for
|
||||||
|
# now we can just be careful in use.
|
||||||
|
if not lang:
|
||||||
|
return get_default_lang()
|
||||||
|
elif not isinstance(lang, str):
|
||||||
|
raise(TypeError("lingua_franca.get_primary_lang_code() expects"
|
||||||
|
" an (optional)argument of type 'str', but got " +
|
||||||
|
type(lang)))
|
||||||
|
else:
|
||||||
|
lang_code = lang.lower()
|
||||||
|
if lang_code not in _SUPPORTED_FULL_LOCALIZATIONS and lang_code not in \
|
||||||
|
_SUPPORTED_LANGUAGES:
|
||||||
|
# We don't know this language code. Check if the input is
|
||||||
|
# formatted like a language code.
|
||||||
|
if lang == (("-".join([lang[:2], lang[3:]]) or None)):
|
||||||
|
warn("Unrecognized language code: '" + lang + "', but it appears "
|
||||||
|
"to be a valid language code. Returning the first two chars.")
|
||||||
|
return lang_code.split("-")[0]
|
||||||
|
else:
|
||||||
|
raise(ValueError("Invalid input: " + lang))
|
||||||
|
return lang_code.split("-")[0]
|
||||||
|
|
||||||
|
# TODO remove this when invalid lang codes are removed (currently deprecated)
|
||||||
|
|
||||||
|
|
||||||
|
def get_full_lang_code(lang=''):
|
||||||
|
if not lang:
|
||||||
|
if lang is None:
|
||||||
|
warn(NoneLangWarning)
|
||||||
|
lang = get_default_loc()
|
||||||
|
if not is_supported_full_lang(lang):
|
||||||
|
try:
|
||||||
|
lang = __get_full_lang_code_deprecation_warning(lang)
|
||||||
|
except UnsupportedLanguageError:
|
||||||
|
warn(InvalidLangWarning)
|
||||||
|
lang = get_default_loc()
|
||||||
|
return lang
|
||||||
|
|
||||||
|
|
||||||
|
def __get_full_lang_code_deprecation_warning(lang=''):
|
||||||
|
""" Get the full language code
|
||||||
|
|
||||||
|
Args:
|
||||||
|
lang(str, optional): A BCP-47 language code
|
||||||
|
(if omitted, equivalent to
|
||||||
|
`lingua_franca.get_default_loc()`)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: A full language code, such as "en-us" or "de-de"
|
||||||
|
"""
|
||||||
|
if lang is None:
|
||||||
|
return __active_lang_code.lower()
|
||||||
|
elif not isinstance(lang, str):
|
||||||
|
raise TypeError("get_full_lang_code expects str, "
|
||||||
|
"got {}".format(type(lang)))
|
||||||
|
if lang.lower() in _SUPPORTED_FULL_LOCALIZATIONS:
|
||||||
|
return lang
|
||||||
|
elif lang in _DEFAULT_FULL_LANG_CODES:
|
||||||
|
return _DEFAULT_FULL_LANG_CODES[lang]
|
||||||
|
else:
|
||||||
|
raise UnsupportedLanguageError(lang)
|
||||||
|
|
||||||
|
|
||||||
|
def localized_function(run_own_code_on=[type(None)]):
|
||||||
|
"""
|
||||||
|
Decorator which finds localized functions, and calls them, from signatures
|
||||||
|
defined in the top-level modules. See lingua_franca.format or .parse for
|
||||||
|
examples of the decorator in action.
|
||||||
|
|
||||||
|
Note that, by default, wrapped functions will never actually be executed.
|
||||||
|
Rather, when they're called, their arguments will be passed directly to
|
||||||
|
their localized equivalent, specified by the 'lang' parameter.
|
||||||
|
|
||||||
|
The wrapper can be instructed to execute the wrapped function itself when
|
||||||
|
a specified error is raised (see the argument 'run_own_code_on')
|
||||||
|
|
||||||
|
For instance, this decorator wraps parse.extract_number(), which has no
|
||||||
|
logic of its own. A call to
|
||||||
|
|
||||||
|
extract_number('uno', lang='es')
|
||||||
|
|
||||||
|
will locate and call
|
||||||
|
|
||||||
|
lingua_franca.lang.parse_es.extract_number_es('uno')
|
||||||
|
|
||||||
|
By contrast, here's the decorator above format.nice_number, with the param:
|
||||||
|
|
||||||
|
@localized_function(run_own_code_on=[UnsupportedLanguageError])
|
||||||
|
def nice_number(number, lang='', speech=True, denominators=None):
|
||||||
|
|
||||||
|
Here, nice_number() itself will be executed in the event that the localizer
|
||||||
|
raises an UnsupportedLanguageError.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
run_own_code_on(list(type), optional)
|
||||||
|
A list of Error types (ValueError, NotImplementedError, etc)
|
||||||
|
which, if they are raised, will trigger the wrapped function's
|
||||||
|
own code.
|
||||||
|
|
||||||
|
If this argument is omitted, the function itself will never
|
||||||
|
be run. Calls to the wrapped function will be passed to the
|
||||||
|
appropriate, localized function.
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Make sure everything in run_own_code_on is an Error or None
|
||||||
|
BadTypeError = \
|
||||||
|
ValueError("@localized_function(run_own_code_on=<>) expected an "
|
||||||
|
"Error type, or a list of Error types. Instead, it "
|
||||||
|
"received this value:\n" + str(run_own_code_on))
|
||||||
|
# TODO deprecate these kwarg values 6-12 months after v0.3.0 releases
|
||||||
|
|
||||||
|
if run_own_code_on != [None]:
|
||||||
|
def is_error_type(_type):
|
||||||
|
if not callable(_type):
|
||||||
|
return False
|
||||||
|
_instance = _type()
|
||||||
|
rval = isinstance(_instance, BaseException) if _instance else True
|
||||||
|
del _instance
|
||||||
|
return rval
|
||||||
|
if not isinstance(run_own_code_on, list):
|
||||||
|
try:
|
||||||
|
run_own_code_on = list(run_own_code_on)
|
||||||
|
except TypeError:
|
||||||
|
raise BadTypeError
|
||||||
|
if not all((is_error_type(e) for e in run_own_code_on)):
|
||||||
|
raise BadTypeError
|
||||||
|
|
||||||
|
# Begin wrapper
|
||||||
|
def localized_function_decorator(func):
|
||||||
|
# Wrapper's logic
|
||||||
|
def _call_localized_function(func, *args, **kwargs):
|
||||||
|
lang_code = None
|
||||||
|
load_langs_on_demand = config.load_langs_on_demand
|
||||||
|
unload_language_afterward = False
|
||||||
|
func_signature = signature(func)
|
||||||
|
func_params = list(func_signature.parameters)
|
||||||
|
lang_param_index = func_params.index('lang')
|
||||||
|
full_lang_code = None
|
||||||
|
|
||||||
|
# Check if we need to add timezone awareness to any datetime object
|
||||||
|
if config.inject_timezones:
|
||||||
|
for key, value in kwargs.items():
|
||||||
|
if isinstance(value, datetime) and value.tzinfo is None:
|
||||||
|
kwargs[key] = to_local(value)
|
||||||
|
for idx, value in enumerate(args):
|
||||||
|
if isinstance(value, datetime) and value.tzinfo is None:
|
||||||
|
args = (*args[:idx], to_local(value), *args[idx + 1:])
|
||||||
|
|
||||||
|
# Check if we're passing a lang as a kwarg
|
||||||
|
if 'lang' in kwargs.keys():
|
||||||
|
lang_param = kwargs['lang']
|
||||||
|
if lang_param is None:
|
||||||
|
warn(NoneLangWarning)
|
||||||
|
lang_code = get_default_lang()
|
||||||
|
else:
|
||||||
|
lang_code = lang_param
|
||||||
|
|
||||||
|
# Check if we're passing a lang as a positional arg
|
||||||
|
elif lang_param_index < len(args):
|
||||||
|
lang_param = args[lang_param_index]
|
||||||
|
if lang_param is None:
|
||||||
|
warn(NoneLangWarning)
|
||||||
|
lang_code = get_default_lang()
|
||||||
|
elif lang_param in _SUPPORTED_LANGUAGES or \
|
||||||
|
lang_param in _SUPPORTED_FULL_LOCALIZATIONS:
|
||||||
|
lang_code = args[lang_param_index]
|
||||||
|
args = args[:lang_param_index] + args[lang_param_index+1:]
|
||||||
|
|
||||||
|
# Turns out, we aren't passing a lang code at all
|
||||||
|
lang_code = lang_code or get_default_lang()
|
||||||
|
if not lang_code:
|
||||||
|
if load_langs_on_demand:
|
||||||
|
raise ModuleNotFoundError("No language module loaded "
|
||||||
|
"and none specified.")
|
||||||
|
else:
|
||||||
|
raise ModuleNotFoundError("No language module loaded.")
|
||||||
|
|
||||||
|
if lang_code not in _SUPPORTED_LANGUAGES:
|
||||||
|
try:
|
||||||
|
tmp = lang_code
|
||||||
|
__use_tmp = True
|
||||||
|
lang_code = get_primary_lang_code(lang_code)
|
||||||
|
except ValueError:
|
||||||
|
__error = \
|
||||||
|
UnsupportedLanguageError("\nLanguage '{language}' is not yet "
|
||||||
|
"supported by Lingua Franca. "
|
||||||
|
"Supported language codes "
|
||||||
|
"include the following:\n{supported}"
|
||||||
|
.format(
|
||||||
|
language=lang_code,
|
||||||
|
supported=_SUPPORTED_FULL_LOCALIZATIONS))
|
||||||
|
if UnsupportedLanguageError in run_own_code_on:
|
||||||
|
raise __error
|
||||||
|
else:
|
||||||
|
warn(DeprecationWarning("The following warning will "
|
||||||
|
"become an exception in a future "
|
||||||
|
"version of Lingua Franca." +
|
||||||
|
str(__error)))
|
||||||
|
lang_code = get_default_lang()
|
||||||
|
full_lang_code = get_full_lang_code()
|
||||||
|
__use_tmp = False
|
||||||
|
if lang_code not in _SUPPORTED_LANGUAGES:
|
||||||
|
_raise_unsupported_language(lang_code)
|
||||||
|
if __use_tmp:
|
||||||
|
full_lang_code = tmp
|
||||||
|
else:
|
||||||
|
full_lang_code = get_full_lang_code(lang_code)
|
||||||
|
|
||||||
|
# Here comes the ugly business.
|
||||||
|
_module_name = func.__module__.split('.')[-1]
|
||||||
|
_module = import_module(".lang." + _module_name +
|
||||||
|
"_" + lang_code, "lingua_franca")
|
||||||
|
# The nonsense above gets you from lingua_franca.parse
|
||||||
|
# to lingua_franca.lang.parse_xx
|
||||||
|
if _module_name not in _localized_functions.keys():
|
||||||
|
raise ModuleNotFoundError("Module lingua_franca." +
|
||||||
|
_module_name + " not recognized")
|
||||||
|
if lang_code not in _localized_functions[_module_name].keys():
|
||||||
|
if load_langs_on_demand:
|
||||||
|
load_language(lang_code)
|
||||||
|
unload_language_afterward = True
|
||||||
|
else:
|
||||||
|
raise ModuleNotFoundError(_module_name +
|
||||||
|
" module of language '" +
|
||||||
|
lang_code +
|
||||||
|
"' is not currently loaded.")
|
||||||
|
func_name = func.__name__.split('.')[-1]
|
||||||
|
# At some point in the past, both the module and the language
|
||||||
|
# were imported/loaded, respectively.
|
||||||
|
# When that happened, we cached the *signature* of each
|
||||||
|
# localized function.
|
||||||
|
#
|
||||||
|
# This is the crucial element that allows us to import funcs
|
||||||
|
# on the fly.
|
||||||
|
#
|
||||||
|
# If we didn't find a localized function to correspond with
|
||||||
|
# the wrapped function, we cached NotImplementedError in its
|
||||||
|
# place.
|
||||||
|
loc_signature = _localized_functions[_module_name][lang_code][func_name]
|
||||||
|
if isinstance(loc_signature, type(NotImplementedError())):
|
||||||
|
raise loc_signature
|
||||||
|
|
||||||
|
# Now we have the appropriate localized module. Let's get
|
||||||
|
# the localized function.
|
||||||
|
try:
|
||||||
|
localized_func = getattr(
|
||||||
|
_module, func_name + "_" + lang_code)
|
||||||
|
except AttributeError:
|
||||||
|
raise FunctionNotLocalizedError(func_name, lang_code)
|
||||||
|
|
||||||
|
# We now have a localized function, such as
|
||||||
|
# lingua_franca.parse.extract_datetime_en
|
||||||
|
# Get 'lang' out of its parameters.
|
||||||
|
if 'lang' in kwargs:
|
||||||
|
del kwargs['lang']
|
||||||
|
args = tuple(arg for arg in list(args) if
|
||||||
|
arg not in (lang_code, full_lang_code))
|
||||||
|
|
||||||
|
# Now we call the function, ignoring any kwargs from the
|
||||||
|
# wrapped function that aren't in the localized function.
|
||||||
|
r_val = localized_func(*args,
|
||||||
|
**{arg: val for arg, val
|
||||||
|
in kwargs.items()
|
||||||
|
if arg in loc_signature.parameters})
|
||||||
|
|
||||||
|
# Unload all the stuff we just assembled and imported
|
||||||
|
del localized_func
|
||||||
|
del _module
|
||||||
|
if unload_language_afterward:
|
||||||
|
unload_language(lang_code)
|
||||||
|
return r_val
|
||||||
|
|
||||||
|
# Actual wrapper
|
||||||
|
@wraps(func)
|
||||||
|
def call_localized_function(*args, **kwargs):
|
||||||
|
if run_own_code_on != [type(None)]:
|
||||||
|
try:
|
||||||
|
return _call_localized_function(func, *args, **kwargs)
|
||||||
|
except Exception as e: # Intercept, check for run_own_code_on
|
||||||
|
if any((isinstance(e, error) for error in run_own_code_on)):
|
||||||
|
return func(*args, **kwargs)
|
||||||
|
else:
|
||||||
|
raise e
|
||||||
|
else: # don't intercept any exceptions
|
||||||
|
return _call_localized_function(func, *args, **kwargs)
|
||||||
|
return call_localized_function
|
||||||
|
try:
|
||||||
|
return localized_function_decorator
|
||||||
|
except NotImplementedError as e:
|
||||||
|
warn(str(e))
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def populate_localized_function_dict(lf_module, langs=get_active_langs()):
|
||||||
|
"""Returns a dictionary of dictionaries, containing localized functions.
|
||||||
|
|
||||||
|
Used by the top-level modules to locate, cache, and call localized funcs.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
lf_module(str) - - the name of the top-level module
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict - - {language_code: {function_name(str): function}}
|
||||||
|
|
||||||
|
Note:
|
||||||
|
The dictionary returned can be used directly,
|
||||||
|
but it's normally discarded. Rather, this function will create
|
||||||
|
the dictionary as a member of
|
||||||
|
`lingua_franca.internal._localized_functions`,
|
||||||
|
and its members are invoked via the `@localized_function` decorator.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
populate_localized_function_dict("format")["en"]["pronounce_number"](1)
|
||||||
|
"one"
|
||||||
|
"""
|
||||||
|
bad_lang_code = "Language code '{}' is registered with" \
|
||||||
|
" Lingua Franca, but its " + lf_module + " module" \
|
||||||
|
" could not be found."
|
||||||
|
return_dict = {}
|
||||||
|
for lang_code in langs:
|
||||||
|
primary_lang_code = get_primary_lang_code(lang_code)
|
||||||
|
return_dict[primary_lang_code] = {}
|
||||||
|
_FUNCTION_NOT_FOUND = ""
|
||||||
|
try:
|
||||||
|
lang_common_data = import_module(".lang.common_data_" + primary_lang_code,
|
||||||
|
"lingua_franca")
|
||||||
|
_FUNCTION_NOT_FOUND = getattr(lang_common_data,
|
||||||
|
"_FUNCTION_NOT_IMPLEMENTED_WARNING")
|
||||||
|
del lang_common_data
|
||||||
|
except Exception:
|
||||||
|
_FUNCTION_NOT_FOUND = "This function has not been implemented" \
|
||||||
|
" in the specified language."
|
||||||
|
_FUNCTION_NOT_FOUND = FunctionNotLocalizedError(_FUNCTION_NOT_FOUND)
|
||||||
|
|
||||||
|
try:
|
||||||
|
mod = import_module(".lang." + lf_module + "_" + primary_lang_code,
|
||||||
|
"lingua_franca")
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
warn(Warning(bad_lang_code.format(primary_lang_code)))
|
||||||
|
continue
|
||||||
|
|
||||||
|
function_names = getattr(import_module("." + lf_module, "lingua_franca"),
|
||||||
|
"_REGISTERED_FUNCTIONS")
|
||||||
|
for function_name in function_names:
|
||||||
|
try:
|
||||||
|
function = getattr(mod, function_name
|
||||||
|
+ "_" + primary_lang_code)
|
||||||
|
function_signature = signature(function)
|
||||||
|
del function
|
||||||
|
except AttributeError:
|
||||||
|
function_signature = _FUNCTION_NOT_FOUND
|
||||||
|
# TODO log these occurrences: "function 'function_name' not
|
||||||
|
# implemented in language 'primary_lang_code'"
|
||||||
|
#
|
||||||
|
# Perhaps provide this info to autodocs, to help volunteers
|
||||||
|
# identify the functions in need of localization
|
||||||
|
return_dict[primary_lang_code][function_name] = function_signature
|
||||||
|
|
||||||
|
del mod
|
||||||
|
_localized_functions[lf_module] = return_dict
|
||||||
|
return _localized_functions[lf_module]
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_resource_file(res_name, data_dir=None):
|
||||||
|
"""Convert a resource into an absolute filename.
|
||||||
|
|
||||||
|
Resource names are in the form: 'filename.ext'
|
||||||
|
or 'path/filename.ext'
|
||||||
|
|
||||||
|
The system wil look for ~/.mycroft/res_name first, and
|
||||||
|
if not found will look at / opt/mycroft/res_name,
|
||||||
|
then finally it will look for res_name in the 'mycroft/res'
|
||||||
|
folder of the source code package.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
With mycroft running as the user 'bob', if you called
|
||||||
|
resolve_resource_file('snd/beep.wav')
|
||||||
|
it would return either '/home/bob/.mycroft/snd/beep.wav' or
|
||||||
|
'/opt/mycroft/snd/beep.wav' or '.../mycroft/res/snd/beep.wav',
|
||||||
|
where the '...' is replaced by the path where the package has
|
||||||
|
been installed.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
res_name(str): a resource path/name
|
||||||
|
Returns:
|
||||||
|
str: path to resource or None if no resource found
|
||||||
|
"""
|
||||||
|
# First look for fully qualified file (e.g. a user setting)
|
||||||
|
if os.path.isfile(res_name):
|
||||||
|
return res_name
|
||||||
|
|
||||||
|
# Now look for ~/.mycroft/res_name (in user folder)
|
||||||
|
filename = os.path.expanduser("~/.mycroft/" + res_name)
|
||||||
|
if os.path.isfile(filename):
|
||||||
|
return filename
|
||||||
|
|
||||||
|
# Next look for /opt/mycroft/res/res_name
|
||||||
|
data_dir = data_dir or os.path.expanduser("/opt/mycroft/res/")
|
||||||
|
filename = os.path.expanduser(os.path.join(data_dir, res_name))
|
||||||
|
if os.path.isfile(filename):
|
||||||
|
return filename
|
||||||
|
|
||||||
|
# Finally look for it in the source package
|
||||||
|
filename = os.path.join(os.path.dirname(__file__), 'res', res_name)
|
||||||
|
filename = os.path.abspath(os.path.normpath(filename))
|
||||||
|
if os.path.isfile(filename):
|
||||||
|
return filename
|
||||||
|
|
||||||
|
return None # Resource cannot be resolved
|
||||||
|
|
||||||
|
|
||||||
|
def lookup_variant(mappings, key="variant"):
|
||||||
|
"""function decorator
|
||||||
|
maps strings to Enums expected by language specific functions
|
||||||
|
mappings can be used to translate values read from configuration files
|
||||||
|
|
||||||
|
Example usage:
|
||||||
|
|
||||||
|
@lookup_variant({
|
||||||
|
"default": TimeVariant.DEFAULT,
|
||||||
|
"traditional": TimeVariant.TRADITIONAL
|
||||||
|
})
|
||||||
|
def nice_time_XX(dt, speech=True, use_24hour=False, use_ampm=False,
|
||||||
|
variant=None):
|
||||||
|
variant = variant or TimeVariant.DEFAULT
|
||||||
|
(...)
|
||||||
|
|
||||||
|
"""
|
||||||
|
if not isinstance(mappings, dict):
|
||||||
|
raise ValueError
|
||||||
|
|
||||||
|
# Begin wrapper
|
||||||
|
def lang_variant_function_decorator(func):
|
||||||
|
|
||||||
|
@wraps(func)
|
||||||
|
def call_function(*args, **kwargs):
|
||||||
|
if key in kwargs and isinstance(kwargs[key], str):
|
||||||
|
if kwargs[key] in mappings:
|
||||||
|
kwargs[key] = mappings[kwargs[key]]
|
||||||
|
else:
|
||||||
|
raise ValueError("Unknown variant, mapping does not "
|
||||||
|
"exist for {v}".format(v=key))
|
||||||
|
return func(*args, **kwargs)
|
||||||
|
|
||||||
|
return call_function
|
||||||
|
|
||||||
|
try:
|
||||||
|
return lang_variant_function_decorator
|
||||||
|
except NotImplementedError as e:
|
||||||
|
warn(str(e))
|
||||||
|
return
|
||||||
72
lingua_franca/lang/__init__.py
Normal file
72
lingua_franca/lang/__init__.py
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
from warnings import warn
|
||||||
|
from lingua_franca.internal import get_default_lang, \
|
||||||
|
set_default_lang, get_primary_lang_code as gplc, get_full_lang_code as gflc
|
||||||
|
|
||||||
|
|
||||||
|
def get_active_lang():
|
||||||
|
""" Get the active full language code (BCP-47)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: A BCP-47 language code, e.g. ("en-us", or "pt-pt")
|
||||||
|
"""
|
||||||
|
_getlang = "Direct imports from lingua_franca.lang"
|
||||||
|
" have been deprecated. Use"
|
||||||
|
" lingua_franca.get_default_lang()"
|
||||||
|
warn(_getlang, DeprecationWarning)
|
||||||
|
return get_default_lang()
|
||||||
|
|
||||||
|
|
||||||
|
def set_active_lang(lang_code):
|
||||||
|
""" Set the active BCP-47 language code to be used in formatting/parsing
|
||||||
|
|
||||||
|
Args:
|
||||||
|
lang (str): BCP-47 language code, e.g. "en-us" or "es-mx"
|
||||||
|
"""
|
||||||
|
_setlang = "Direct imports from lingua_franca.lang"
|
||||||
|
" have been deprecated. Use"
|
||||||
|
" lingua_franca.set_default_lang()"
|
||||||
|
warn(_setlang, DeprecationWarning)
|
||||||
|
set_default_lang(lang_code=lang_code)
|
||||||
|
|
||||||
|
|
||||||
|
def get_primary_lang_code(lang=None):
|
||||||
|
""" Get the primary language code
|
||||||
|
|
||||||
|
Args:
|
||||||
|
lang (str, optional): A BCP-47 language code, or None for default
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: A primary language family, such as "en", "de" or "pt"
|
||||||
|
"""
|
||||||
|
warn("Direct imports from lingua_franca.lang have been deprecated. Use"
|
||||||
|
" lingua_franca.get_primary_lang_code()", DeprecationWarning)
|
||||||
|
return gplc(lang=lang)
|
||||||
|
|
||||||
|
|
||||||
|
def get_full_lang_code(lang=None):
|
||||||
|
""" Get the full language code
|
||||||
|
|
||||||
|
Args:
|
||||||
|
lang (str, optional): A BCP-47 language code, or None for default
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: A full language code, such as "en-us" or "de-de"
|
||||||
|
"""
|
||||||
|
warn("Direct imports from lingua_franca.lang have been deprecated. Use"
|
||||||
|
" lingua_franca.get_full_lang_code()", DeprecationWarning)
|
||||||
|
return gflc(lang=lang)
|
||||||
197
lingua_franca/lang/common_data_ca.py
Normal file
197
lingua_franca/lang/common_data_ca.py
Normal file
@@ -0,0 +1,197 @@
|
|||||||
|
_FUNCTION_NOT_IMPLEMENTED_WARNING = "aquesta funció encara no s'ha implementat en 'ca'"
|
||||||
|
|
||||||
|
# Undefined articles ["un", "una", "uns", "unes"] can not be supressed,
|
||||||
|
# in CA, "un cavall" means "a horse" or "one horse".
|
||||||
|
|
||||||
|
_ARTICLES_CA = ["el", "la", "l", "lo", "els", "les", "los"]
|
||||||
|
|
||||||
|
# word rules for gender
|
||||||
|
_FEMALE_ENDINGS_CA = ["a", "esa", "essa", "esses", "eses", "ena", "enes",
|
||||||
|
"ques", "asi", "esi", "isi", "osi", "ut", "at",
|
||||||
|
"eta", "etes", "tja", "tges", "ica", "iques",
|
||||||
|
"ada", "ades"]
|
||||||
|
_MALE_ENDINGS_CA = ["o", "os", "ll", "lls", "ig", "igs", "itjos", "rs",
|
||||||
|
"et", "ets", "ès", "ns", "ic", "ics", "at", "ats"]
|
||||||
|
|
||||||
|
# special cases, word lookup for words not covered by above rule
|
||||||
|
_GENDERS_CA = {
|
||||||
|
"dones": "f",
|
||||||
|
"home": "m",
|
||||||
|
"pell": "f",
|
||||||
|
"pells": "f"
|
||||||
|
}
|
||||||
|
|
||||||
|
# context rules for gender
|
||||||
|
_MALE_DETERMINANTS_CA = ["el", "els", "l", "lo", "es", "aquest", "aquests",
|
||||||
|
"aquell", "aquells", "aqueix", "aqueixos",
|
||||||
|
"algun", "alguns", "este", "estos", "altre",
|
||||||
|
"mon", "mos", "mons", "meus", "meus"]
|
||||||
|
_FEMALE_DETERMINANTS_CA = ["la", "les", "sa", "ses", "aquesta", "aquestes",
|
||||||
|
"aquella", "aquelles", "aqueixa", "aqueixes",
|
||||||
|
"alguna", "algunes", "esta", "estes", "altra",
|
||||||
|
"ma", "mes", "meva", "meua", "meves"]
|
||||||
|
|
||||||
|
_NUMBERS_CA = {
|
||||||
|
"zero": 0,
|
||||||
|
"u": 1,
|
||||||
|
"un": 1,
|
||||||
|
"una": 1,
|
||||||
|
"uns": 1,
|
||||||
|
"unes": 1,
|
||||||
|
"primer": 1,
|
||||||
|
"primera": 1,
|
||||||
|
"segon": 2,
|
||||||
|
"segona": 2,
|
||||||
|
"tercer": 3,
|
||||||
|
"tercera": 3,
|
||||||
|
"dos": 2,
|
||||||
|
"dues": 2,
|
||||||
|
"tres": 3,
|
||||||
|
"quatre": 4,
|
||||||
|
"cinc": 5,
|
||||||
|
"sis": 6,
|
||||||
|
"set": 7,
|
||||||
|
"vuit": 8,
|
||||||
|
"huit": 8,
|
||||||
|
"nou": 9,
|
||||||
|
"deu": 10,
|
||||||
|
"onze": 11,
|
||||||
|
"dotze": 12,
|
||||||
|
"tretze": 13,
|
||||||
|
"catorze": 14,
|
||||||
|
"quinze": 15,
|
||||||
|
"setze": 16,
|
||||||
|
"disset": 17,
|
||||||
|
"divuit": 18,
|
||||||
|
"dinou": 19,
|
||||||
|
"vint": 20,
|
||||||
|
"trenta": 30,
|
||||||
|
"quaranta": 40,
|
||||||
|
"cinquanta": 50,
|
||||||
|
"seixanta": 60,
|
||||||
|
"setanta": 70,
|
||||||
|
"vuitanta": 80,
|
||||||
|
"noranta": 90,
|
||||||
|
"cent": 100,
|
||||||
|
"cents": 100,
|
||||||
|
"dos-cents": 200,
|
||||||
|
"dues-centes": 200,
|
||||||
|
"tres-cents": 300,
|
||||||
|
"tres-centes": 300,
|
||||||
|
"quatre-cents": 400,
|
||||||
|
"quatre-centes": 400,
|
||||||
|
"cinc-cents": 500,
|
||||||
|
"cinc-centes": 500,
|
||||||
|
"sis-cents": 600,
|
||||||
|
"sis-centes": 600,
|
||||||
|
"set--cents": 700,
|
||||||
|
"set-centes": 700,
|
||||||
|
"vuit-cents": 800,
|
||||||
|
"vuit-centes": 800,
|
||||||
|
"nou-cents": 900,
|
||||||
|
"nou-centes": 900,
|
||||||
|
"mil": 1000,
|
||||||
|
"milió": 1000000
|
||||||
|
}
|
||||||
|
|
||||||
|
_FRACTION_STRING_CA = {
|
||||||
|
2: 'mig',
|
||||||
|
3: 'terç',
|
||||||
|
4: 'quart',
|
||||||
|
5: 'cinquè',
|
||||||
|
6: 'sisè',
|
||||||
|
7: 'setè',
|
||||||
|
8: 'vuitè',
|
||||||
|
9: 'novè',
|
||||||
|
10: 'desè',
|
||||||
|
11: 'onzè',
|
||||||
|
12: 'dotzè',
|
||||||
|
13: 'tretzè',
|
||||||
|
14: 'catorzè',
|
||||||
|
15: 'quinzè',
|
||||||
|
16: 'setzè',
|
||||||
|
17: 'dissetè',
|
||||||
|
18: 'divuitè',
|
||||||
|
19: 'dinovè',
|
||||||
|
20: 'vintè',
|
||||||
|
30: 'trentè',
|
||||||
|
100: 'centè',
|
||||||
|
1000: 'milè'
|
||||||
|
}
|
||||||
|
|
||||||
|
_NUM_STRING_CA = {
|
||||||
|
0: 'zero',
|
||||||
|
1: 'un',
|
||||||
|
2: 'dos',
|
||||||
|
3: 'tres',
|
||||||
|
4: 'quatre',
|
||||||
|
5: 'cinc',
|
||||||
|
6: 'sis',
|
||||||
|
7: 'set',
|
||||||
|
8: 'vuit',
|
||||||
|
9: 'nou',
|
||||||
|
10: 'deu',
|
||||||
|
11: 'onze',
|
||||||
|
12: 'dotze',
|
||||||
|
13: 'tretze',
|
||||||
|
14: 'catorze',
|
||||||
|
15: 'quinze',
|
||||||
|
16: 'setze',
|
||||||
|
17: 'disset',
|
||||||
|
18: 'divuit',
|
||||||
|
19: 'dinou',
|
||||||
|
20: 'vint',
|
||||||
|
30: 'trenta',
|
||||||
|
40: 'quaranta',
|
||||||
|
50: 'cinquanta',
|
||||||
|
60: 'seixanta',
|
||||||
|
70: 'setanta',
|
||||||
|
80: 'vuitanta',
|
||||||
|
90: 'noranta'
|
||||||
|
}
|
||||||
|
|
||||||
|
_TENS_CA = {
|
||||||
|
"vint": 20,
|
||||||
|
"trenta": 30,
|
||||||
|
"quaranta": 40,
|
||||||
|
"cinquanta": 50,
|
||||||
|
"seixanta": 60,
|
||||||
|
"setanta": 70,
|
||||||
|
"vuitanta": 80,
|
||||||
|
"huitanta": 80,
|
||||||
|
"noranta": 90
|
||||||
|
}
|
||||||
|
|
||||||
|
_AFTER_TENS_CA = {
|
||||||
|
"u": 1,
|
||||||
|
"un": 1,
|
||||||
|
"dos": 2,
|
||||||
|
"dues": 2,
|
||||||
|
"tres": 3,
|
||||||
|
"quatre": 4,
|
||||||
|
"cinc": 5,
|
||||||
|
"sis": 6,
|
||||||
|
"set": 7,
|
||||||
|
"vuit": 8,
|
||||||
|
"huit": 8,
|
||||||
|
"nou": 9
|
||||||
|
}
|
||||||
|
|
||||||
|
_BEFORE_HUNDREDS_CA = {
|
||||||
|
"dos": 2,
|
||||||
|
"dues": 2,
|
||||||
|
"tres": 3,
|
||||||
|
"quatre": 4,
|
||||||
|
"cinc": 5,
|
||||||
|
"sis": 6,
|
||||||
|
"set": 7,
|
||||||
|
"vuit": 8,
|
||||||
|
"huit": 8,
|
||||||
|
"nou": 9,
|
||||||
|
}
|
||||||
|
|
||||||
|
_HUNDREDS_CA = {
|
||||||
|
"cent": 100,
|
||||||
|
"cents": 100,
|
||||||
|
"centes": 100
|
||||||
|
}
|
||||||
305
lingua_franca/lang/common_data_cs.py
Normal file
305
lingua_franca/lang/common_data_cs.py
Normal file
@@ -0,0 +1,305 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
|
|
||||||
|
#_ARTICLES_CS = {}
|
||||||
|
|
||||||
|
|
||||||
|
_NUM_STRING_CS = {
|
||||||
|
0: 'nula',
|
||||||
|
1: 'jedna',
|
||||||
|
2: 'dva',
|
||||||
|
3: 'tři',
|
||||||
|
4: 'čtyři',
|
||||||
|
5: 'pět',
|
||||||
|
6: 'šest',
|
||||||
|
7: 'sedm',
|
||||||
|
8: 'osm',
|
||||||
|
9: 'devět',
|
||||||
|
10: 'deset',
|
||||||
|
11: 'jedenáct',
|
||||||
|
12: 'dvanáct',
|
||||||
|
13: 'třináct',
|
||||||
|
14: 'čtrnáct',
|
||||||
|
15: 'patnáct',
|
||||||
|
16: 'šestnáct',
|
||||||
|
17: 'sedmnáct',
|
||||||
|
18: 'osmnáct',
|
||||||
|
19: 'devatenáct',
|
||||||
|
20: 'dvacet',
|
||||||
|
30: 'třicet',
|
||||||
|
40: 'čtyřicet',
|
||||||
|
50: 'padesát',
|
||||||
|
60: 'šedesát',
|
||||||
|
70: 'sedmdesát',
|
||||||
|
80: 'osmdesát',
|
||||||
|
90: 'devadesát'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
_FRACTION_STRING_CS = {
|
||||||
|
2: 'polovina',
|
||||||
|
3: 'třetina',
|
||||||
|
4: 'čtvrtina',
|
||||||
|
5: 'pětina',
|
||||||
|
6: 'šestina',
|
||||||
|
7: 'sedmina',
|
||||||
|
8: 'osmina',
|
||||||
|
9: 'devítina',
|
||||||
|
10: 'desetina',
|
||||||
|
11: 'jedenáctina',
|
||||||
|
12: 'dvanáctina',
|
||||||
|
13: 'třináctina',
|
||||||
|
14: 'čtrnáctina',
|
||||||
|
15: 'patnáctina',
|
||||||
|
16: 'šestnáctina',
|
||||||
|
17: 'sedmnáctina',
|
||||||
|
18: 'osmnáctina',
|
||||||
|
19: 'devatenáctina',
|
||||||
|
20: 'dvacetina',
|
||||||
|
30: 'třicetina',
|
||||||
|
40: 'čtyřicetina',
|
||||||
|
50: 'padesátina',
|
||||||
|
60: 'šedesátina',
|
||||||
|
70: 'sedmdesátina',
|
||||||
|
80: 'osmdesátina',
|
||||||
|
90: 'devadesátina',
|
||||||
|
1e2: 'setina',
|
||||||
|
1e3: 'tisícina'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
_LONG_SCALE_CS = OrderedDict([
|
||||||
|
(100, 'sto'),
|
||||||
|
(1000, 'tisíc'),
|
||||||
|
(1000000, 'milion'),
|
||||||
|
(1e9, "miliarda"),
|
||||||
|
(1e12, "bilion"),
|
||||||
|
(1e15, "biliarda"),
|
||||||
|
(1e18, "trilion"),
|
||||||
|
(1e21, "triliarda"),
|
||||||
|
(1e24, "kvadrilion"),
|
||||||
|
(1e27, "kvadriliarda"),
|
||||||
|
(1e30, "kvintilion"),
|
||||||
|
(1e33, "kvintiliarda"),
|
||||||
|
(1e36, "sextilion"),
|
||||||
|
(1e39, "sextiliarda"),
|
||||||
|
(1e42, "septilion"),
|
||||||
|
(1e45, "septiliarda"),
|
||||||
|
(1e48, "oktilion"),
|
||||||
|
(1e51, "oktiliarda"),
|
||||||
|
(1e54, "nonilion"),
|
||||||
|
(1e57, "noniliarda"),
|
||||||
|
(1e60, "decilion"),
|
||||||
|
(1e63, "deciliarda"),
|
||||||
|
(1e120, "vigintilion"),
|
||||||
|
(1e180, "trigintilion"),
|
||||||
|
(1e303, "kvinkvagintiliarda"),
|
||||||
|
(1e600, "centilion"),
|
||||||
|
(1e603, "centiliarda")
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
_SHORT_SCALE_CS = OrderedDict([
|
||||||
|
(100, 'sto'),
|
||||||
|
(1000, 'tisíc'),
|
||||||
|
(1000000, 'million'),
|
||||||
|
(1e9, "billion"),
|
||||||
|
(1e12, 'trillion'),
|
||||||
|
(1e15, "quadrillion"),
|
||||||
|
(1e18, "quintillion"),
|
||||||
|
(1e21, "sextillion"),
|
||||||
|
(1e24, "septillion"),
|
||||||
|
(1e27, "octillion"),
|
||||||
|
(1e30, "nonillion"),
|
||||||
|
(1e33, "decillion"),
|
||||||
|
(1e36, "undecillion"),
|
||||||
|
(1e39, "duodecillion"),
|
||||||
|
(1e42, "tredecillion"),
|
||||||
|
(1e45, "quadrdecillion"),
|
||||||
|
(1e48, "quindecillion"),
|
||||||
|
(1e51, "sexdecillion"),
|
||||||
|
(1e54, "septendecillion"),
|
||||||
|
(1e57, "octodecillion"),
|
||||||
|
(1e60, "novemdecillion"),
|
||||||
|
(1e63, "vigintillion"),
|
||||||
|
(1e66, "unvigintillion"),
|
||||||
|
(1e69, "uuovigintillion"),
|
||||||
|
(1e72, "tresvigintillion"),
|
||||||
|
(1e75, "quattuorvigintillion"),
|
||||||
|
(1e78, "quinquavigintillion"),
|
||||||
|
(1e81, "qesvigintillion"),
|
||||||
|
(1e84, "septemvigintillion"),
|
||||||
|
(1e87, "octovigintillion"),
|
||||||
|
(1e90, "novemvigintillion"),
|
||||||
|
(1e93, "trigintillion"),
|
||||||
|
(1e96, "untrigintillion"),
|
||||||
|
(1e99, "duotrigintillion"),
|
||||||
|
(1e102, "trestrigintillion"),
|
||||||
|
(1e105, "quattuortrigintillion"),
|
||||||
|
(1e108, "quinquatrigintillion"),
|
||||||
|
(1e111, "sestrigintillion"),
|
||||||
|
(1e114, "septentrigintillion"),
|
||||||
|
(1e117, "octotrigintillion"),
|
||||||
|
(1e120, "noventrigintillion"),
|
||||||
|
(1e123, "quadragintillion"),
|
||||||
|
(1e153, "quinquagintillion"),
|
||||||
|
(1e183, "sexagintillion"),
|
||||||
|
(1e213, "septuagintillion"),
|
||||||
|
(1e243, "octogintillion"),
|
||||||
|
(1e273, "nonagintillion"),
|
||||||
|
(1e303, "centillion"),
|
||||||
|
(1e306, "uncentillion"),
|
||||||
|
(1e309, "duocentillion"),
|
||||||
|
(1e312, "trescentillion"),
|
||||||
|
(1e333, "decicentillion"),
|
||||||
|
(1e336, "undecicentillion"),
|
||||||
|
(1e363, "viginticentillion"),
|
||||||
|
(1e366, "unviginticentillion"),
|
||||||
|
(1e393, "trigintacentillion"),
|
||||||
|
(1e423, "quadragintacentillion"),
|
||||||
|
(1e453, "quinquagintacentillion"),
|
||||||
|
(1e483, "sexagintacentillion"),
|
||||||
|
(1e513, "septuagintacentillion"),
|
||||||
|
(1e543, "ctogintacentillion"),
|
||||||
|
(1e573, "nonagintacentillion"),
|
||||||
|
(1e603, "ducentillion"),
|
||||||
|
(1e903, "trecentillion"),
|
||||||
|
(1e1203, "quadringentillion"),
|
||||||
|
(1e1503, "quingentillion"),
|
||||||
|
(1e1803, "sescentillion"),
|
||||||
|
(1e2103, "septingentillion"),
|
||||||
|
(1e2403, "octingentillion"),
|
||||||
|
(1e2703, "nongentillion"),
|
||||||
|
(1e3003, "millinillion")
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
_ORDINAL_BASE_CS = {
|
||||||
|
1: 'první',
|
||||||
|
2: 'druhý',
|
||||||
|
3: 'třetí',
|
||||||
|
4: 'čtvrtý',
|
||||||
|
5: 'pátý',
|
||||||
|
6: 'šestý',
|
||||||
|
7: 'sedmý',
|
||||||
|
8: 'osmý',
|
||||||
|
9: 'devátý',
|
||||||
|
10: 'desátý',
|
||||||
|
11: 'jedenáctý',
|
||||||
|
12: 'dvanáctý',
|
||||||
|
13: 'třináctý',
|
||||||
|
14: 'čtrnáctý',
|
||||||
|
15: 'patnáctý',
|
||||||
|
16: 'šestnáctý',
|
||||||
|
17: 'sedmnáctý',
|
||||||
|
18: 'osmnáctý',
|
||||||
|
19: 'devatenáctý',
|
||||||
|
20: 'dvacátý',
|
||||||
|
30: 'třicátý',
|
||||||
|
40: "čtyřicátý",
|
||||||
|
50: "padesátý",
|
||||||
|
60: "šedesátý",
|
||||||
|
70: "sedmdesátý",
|
||||||
|
80: "osmdesátý",
|
||||||
|
90: "devadesátý",
|
||||||
|
1e2: "stý",
|
||||||
|
1e3: "tisící"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
_SHORT_ORDINAL_CS = {
|
||||||
|
1e6: "miliontý",
|
||||||
|
1e9: "billiontý",
|
||||||
|
1e12: "trilliontý",
|
||||||
|
1e15: "quadrilliontý",
|
||||||
|
1e18: "quintilliontý",
|
||||||
|
1e21: "sextilliontý",
|
||||||
|
1e24: "septilliontý",
|
||||||
|
1e27: "oktiliontý",
|
||||||
|
1e30: "nonilliontý",
|
||||||
|
1e33: "decilliontý"
|
||||||
|
# TODO > 1e-33
|
||||||
|
}
|
||||||
|
_SHORT_ORDINAL_CS.update(_ORDINAL_BASE_CS)
|
||||||
|
|
||||||
|
|
||||||
|
_LONG_ORDINAL_CS = {
|
||||||
|
1e6: "miliontý",
|
||||||
|
1e9: "miliardtý",
|
||||||
|
1e12: "biliontý",
|
||||||
|
1e15: "biliardtý",
|
||||||
|
1e18: "triliontý",
|
||||||
|
1e21: "triliardtý",
|
||||||
|
1e24: "kvadriliontý",
|
||||||
|
1e27: "kvadriliardtý",
|
||||||
|
1e30: "kvintiliontý",
|
||||||
|
1e33: "kvintiliardtý",
|
||||||
|
1e36: "sextiliontý",
|
||||||
|
1e39: "sextiliardtý",
|
||||||
|
1e42: "septiliontý",
|
||||||
|
1e45: "septiliardtý",
|
||||||
|
1e48: "oktilion",
|
||||||
|
1e51: "oktiliardtý",
|
||||||
|
1e54: "noniliontý",
|
||||||
|
1e57: "noniliardtý",
|
||||||
|
1e60: "deciliontý"
|
||||||
|
# TODO > 1e60
|
||||||
|
}
|
||||||
|
_LONG_ORDINAL_CS.update(_ORDINAL_BASE_CS)
|
||||||
|
|
||||||
|
# Months
|
||||||
|
|
||||||
|
_MONTHS_CONVERSION = {
|
||||||
|
0: "january",
|
||||||
|
1: "february",
|
||||||
|
2: "march",
|
||||||
|
3: "april",
|
||||||
|
4: "may",
|
||||||
|
5: "june",
|
||||||
|
6: "july",
|
||||||
|
7: "august",
|
||||||
|
8: "september",
|
||||||
|
9: "october",
|
||||||
|
10: "november",
|
||||||
|
11: "december"
|
||||||
|
}
|
||||||
|
|
||||||
|
_MONTHS_CZECH = ['leden', 'únor', 'březen', 'duben', 'květen', 'červen',
|
||||||
|
'červenec', 'srpen', 'září', 'říjen', 'listopad',
|
||||||
|
'prosinec']
|
||||||
|
|
||||||
|
# Time
|
||||||
|
_TIME_UNITS_CONVERSION = {
|
||||||
|
'mikrosekund': 'microseconds',
|
||||||
|
'milisekund': 'milliseconds',
|
||||||
|
'sekundu': 'seconds',
|
||||||
|
'sekundy': 'seconds',
|
||||||
|
'sekund': 'seconds',
|
||||||
|
'minutu': 'minutes',
|
||||||
|
'minuty': 'minutes',
|
||||||
|
'minut': 'minutes',
|
||||||
|
'hodin': 'hours',
|
||||||
|
'den': 'days', # 1 day
|
||||||
|
'dny': 'days', # 2-4 days
|
||||||
|
'dnů': 'days', # 5+ days
|
||||||
|
'dní': 'days', # 5+ days - different inflection
|
||||||
|
'dne': 'days', # a half day
|
||||||
|
'týden': 'weeks',
|
||||||
|
'týdny': 'weeks',
|
||||||
|
'týdnů': 'weeks'
|
||||||
|
}
|
||||||
133
lingua_franca/lang/common_data_da.py
Normal file
133
lingua_franca/lang/common_data_da.py
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
_FUNCTION_NOT_IMPLEMENTED_WARNING = "Denne funktion er ikke implementeret i 'dk'."
|
||||||
|
|
||||||
|
_DA_NUMBERS = {
|
||||||
|
'nul': 0,
|
||||||
|
'en': 1,
|
||||||
|
'et': 1,
|
||||||
|
'to': 2,
|
||||||
|
'tre': 3,
|
||||||
|
'fire': 4,
|
||||||
|
'fem': 5,
|
||||||
|
'seks': 6,
|
||||||
|
'syv': 7,
|
||||||
|
'otte': 8,
|
||||||
|
'ni': 9,
|
||||||
|
'ti': 10,
|
||||||
|
'elve': 11,
|
||||||
|
'tolv': 12,
|
||||||
|
'tretten': 13,
|
||||||
|
'fjorten': 14,
|
||||||
|
'femten': 15,
|
||||||
|
'seksten': 16,
|
||||||
|
'sytten': 17,
|
||||||
|
'atten': 18,
|
||||||
|
'nitten': 19,
|
||||||
|
'tyve': 20,
|
||||||
|
'enogtyve': 21,
|
||||||
|
'toogtyve': 22,
|
||||||
|
'treogtyve': 23,
|
||||||
|
'fireogtyve': 24,
|
||||||
|
'femogtyve': 25,
|
||||||
|
'seksogtyve': 26,
|
||||||
|
'syvogtyve': 27,
|
||||||
|
'otteogtyve': 28,
|
||||||
|
'niogtyve': 29,
|
||||||
|
'tredive': 30,
|
||||||
|
'enogtredive': 31,
|
||||||
|
'fyrrre': 40,
|
||||||
|
'halvtres': 50,
|
||||||
|
'tres': 60,
|
||||||
|
'halvfjers': 70,
|
||||||
|
'firs': 80,
|
||||||
|
'halvfems': 90,
|
||||||
|
'hunderede': 100,
|
||||||
|
'tohundrede': 200,
|
||||||
|
'trehundrede': 300,
|
||||||
|
'firehundrede': 400,
|
||||||
|
'femhundrede': 500,
|
||||||
|
'sekshundrede': 600,
|
||||||
|
'syvhundrede': 700,
|
||||||
|
'ottehundrede': 800,
|
||||||
|
'nihundrede': 900,
|
||||||
|
'tusinde': 1000,
|
||||||
|
'million': 1000000
|
||||||
|
}
|
||||||
|
|
||||||
|
_MONTHS_DA = ['januar', 'februar', 'märz', 'april', 'mai', 'juni',
|
||||||
|
'juli', 'august', 'september', 'oktober', 'november',
|
||||||
|
'dezember']
|
||||||
|
|
||||||
|
_NUM_STRING_DA = {
|
||||||
|
0: 'nul',
|
||||||
|
1: 'en',
|
||||||
|
2: 'to',
|
||||||
|
3: 'tre',
|
||||||
|
4: 'fire',
|
||||||
|
5: 'fem',
|
||||||
|
6: 'seks',
|
||||||
|
7: 'syv',
|
||||||
|
8: 'otte',
|
||||||
|
9: 'ni',
|
||||||
|
10: 'ti',
|
||||||
|
11: 'elve',
|
||||||
|
12: 'tolv',
|
||||||
|
13: 'tretten',
|
||||||
|
14: 'fjorten',
|
||||||
|
15: 'femten',
|
||||||
|
16: 'seksten',
|
||||||
|
17: 'sytten',
|
||||||
|
18: 'atten',
|
||||||
|
19: 'nitten',
|
||||||
|
20: 'tyve',
|
||||||
|
30: 'tredive',
|
||||||
|
40: 'fyrre',
|
||||||
|
50: 'halvtres',
|
||||||
|
60: 'tres',
|
||||||
|
70: 'halvfjers',
|
||||||
|
80: 'firs',
|
||||||
|
90: 'halvfems',
|
||||||
|
100: 'hundrede'
|
||||||
|
}
|
||||||
|
|
||||||
|
_NUM_POWERS_OF_TEN = [
|
||||||
|
'hundred',
|
||||||
|
'tusind',
|
||||||
|
'million',
|
||||||
|
'milliard',
|
||||||
|
'billion',
|
||||||
|
'billiard',
|
||||||
|
'trillion',
|
||||||
|
'trilliard'
|
||||||
|
]
|
||||||
|
|
||||||
|
_FRACTION_STRING_DA = {
|
||||||
|
2: 'halv',
|
||||||
|
3: 'trediedel',
|
||||||
|
4: 'fjerdedel',
|
||||||
|
5: 'femtedel',
|
||||||
|
6: 'sjettedel',
|
||||||
|
7: 'syvendedel',
|
||||||
|
8: 'ottendedel',
|
||||||
|
9: 'niendedel',
|
||||||
|
10: 'tiendedel',
|
||||||
|
11: 'elftedel',
|
||||||
|
12: 'tolvtedel',
|
||||||
|
13: 'trettendedel',
|
||||||
|
14: 'fjortendedel',
|
||||||
|
15: 'femtendedel',
|
||||||
|
16: 'sejstendedel',
|
||||||
|
17: 'syttendedel',
|
||||||
|
18: 'attendedel',
|
||||||
|
19: 'nittendedel',
|
||||||
|
20: 'tyvendedel'
|
||||||
|
}
|
||||||
|
|
||||||
|
# Numbers below 1 million are written in one word in Danish, yielding very
|
||||||
|
# long words
|
||||||
|
# In some circumstances it may better to seperate individual words
|
||||||
|
# Set _EXTRA_SPACE_DA=" " for separating numbers below 1 million (
|
||||||
|
# orthographically incorrect)
|
||||||
|
# Set _EXTRA_SPACE_DA="" for correct spelling, this is standard
|
||||||
|
|
||||||
|
# _EXTRA_SPACE_DA = " "
|
||||||
|
_EXTRA_SPACE_DA = ""
|
||||||
135
lingua_franca/lang/common_data_de.py
Normal file
135
lingua_franca/lang/common_data_de.py
Normal file
@@ -0,0 +1,135 @@
|
|||||||
|
_DE_NUMBERS = {
|
||||||
|
'null': 0,
|
||||||
|
'ein': 1,
|
||||||
|
'eins': 1,
|
||||||
|
'eine': 1,
|
||||||
|
'einer': 1,
|
||||||
|
'einem': 1,
|
||||||
|
'einen': 1,
|
||||||
|
'eines': 1,
|
||||||
|
'zwei': 2,
|
||||||
|
'drei': 3,
|
||||||
|
'vier': 4,
|
||||||
|
'fünf': 5,
|
||||||
|
'sechs': 6,
|
||||||
|
'sieben': 7,
|
||||||
|
'acht': 8,
|
||||||
|
'neun': 9,
|
||||||
|
'zehn': 10,
|
||||||
|
'elf': 11,
|
||||||
|
'zwölf': 12,
|
||||||
|
'dreizehn': 13,
|
||||||
|
'vierzehn': 14,
|
||||||
|
'fünfzehn': 15,
|
||||||
|
'sechzehn': 16,
|
||||||
|
'siebzehn': 17,
|
||||||
|
'achtzehn': 18,
|
||||||
|
'neunzehn': 19,
|
||||||
|
'zwanzig': 20,
|
||||||
|
'einundzwanzig': 21,
|
||||||
|
'zweiundzwanzig': 22,
|
||||||
|
'dreiundzwanzig': 23,
|
||||||
|
'vierundzwanzig': 24,
|
||||||
|
'fünfundzwanzig': 25,
|
||||||
|
'sechsundzwanzig': 26,
|
||||||
|
'siebenundzwanzig': 27,
|
||||||
|
'achtundzwanzig': 28,
|
||||||
|
'neunundzwanzig': 29,
|
||||||
|
'dreißig': 30,
|
||||||
|
'einunddreißig': 31,
|
||||||
|
'vierzig': 40,
|
||||||
|
'fünfzig': 50,
|
||||||
|
'sechzig': 60,
|
||||||
|
'siebzig': 70,
|
||||||
|
'achtzig': 80,
|
||||||
|
'neunzig': 90,
|
||||||
|
'hundert': 100,
|
||||||
|
'zweihundert': 200,
|
||||||
|
'dreihundert': 300,
|
||||||
|
'vierhundert': 400,
|
||||||
|
'fünfhundert': 500,
|
||||||
|
'sechshundert': 600,
|
||||||
|
'siebenhundert': 700,
|
||||||
|
'achthundert': 800,
|
||||||
|
'neunhundert': 900,
|
||||||
|
'tausend': 1000,
|
||||||
|
'million': 1000000
|
||||||
|
}
|
||||||
|
|
||||||
|
_MONTHS_DE = ['januar', 'februar', 'märz', 'april', 'mai', 'juni',
|
||||||
|
'juli', 'august', 'september', 'oktober', 'november',
|
||||||
|
'dezember']
|
||||||
|
|
||||||
|
_NUM_STRING_DE = {
|
||||||
|
0: 'null',
|
||||||
|
1: 'ein', # ein Viertel etc., nicht eins Viertel
|
||||||
|
2: 'zwei',
|
||||||
|
3: 'drei',
|
||||||
|
4: 'vier',
|
||||||
|
5: 'fünf',
|
||||||
|
6: 'sechs',
|
||||||
|
7: 'sieben',
|
||||||
|
8: 'acht',
|
||||||
|
9: 'neun',
|
||||||
|
10: 'zehn',
|
||||||
|
11: 'elf',
|
||||||
|
12: 'zwölf',
|
||||||
|
13: 'dreizehn',
|
||||||
|
14: 'vierzehn',
|
||||||
|
15: 'fünfzehn',
|
||||||
|
16: 'sechzehn',
|
||||||
|
17: 'siebzehn',
|
||||||
|
18: 'achtzehn',
|
||||||
|
19: 'neunzehn',
|
||||||
|
20: 'zwanzig',
|
||||||
|
30: 'dreißig',
|
||||||
|
40: 'vierzig',
|
||||||
|
50: 'fünfzig',
|
||||||
|
60: 'sechzig',
|
||||||
|
70: 'siebzig',
|
||||||
|
80: 'achtzig',
|
||||||
|
90: 'neunzig',
|
||||||
|
100: 'hundert'
|
||||||
|
}
|
||||||
|
|
||||||
|
# German uses "long scale" https://en.wikipedia.org/wiki/Long_and_short_scales
|
||||||
|
# Currently, numbers are limited to 1000000000000000000000000,
|
||||||
|
# but _NUM_POWERS_OF_TEN can be extended to include additional number words
|
||||||
|
|
||||||
|
|
||||||
|
_NUM_POWERS_OF_TEN_DE = [
|
||||||
|
'', 'tausend', 'Million', 'Milliarde', 'Billion', 'Billiarde', 'Trillion',
|
||||||
|
'Trilliarde'
|
||||||
|
]
|
||||||
|
|
||||||
|
_FRACTION_STRING_DE = {
|
||||||
|
2: 'halb',
|
||||||
|
3: 'drittel',
|
||||||
|
4: 'viertel',
|
||||||
|
5: 'fünftel',
|
||||||
|
6: 'sechstel',
|
||||||
|
7: 'siebtel',
|
||||||
|
8: 'achtel',
|
||||||
|
9: 'neuntel',
|
||||||
|
10: 'zehntel',
|
||||||
|
11: 'elftel',
|
||||||
|
12: 'zwölftel',
|
||||||
|
13: 'dreizehntel',
|
||||||
|
14: 'vierzehntel',
|
||||||
|
15: 'fünfzehntel',
|
||||||
|
16: 'sechzehntel',
|
||||||
|
17: 'siebzehntel',
|
||||||
|
18: 'achtzehntel',
|
||||||
|
19: 'neunzehntel',
|
||||||
|
20: 'zwanzigstel'
|
||||||
|
}
|
||||||
|
|
||||||
|
# Numbers below 1 million are written in one word in German, yielding very
|
||||||
|
# long words
|
||||||
|
# In some circumstances it may better to seperate individual words
|
||||||
|
# Set _EXTRA_SPACE_DA=" " for separating numbers below 1 million (
|
||||||
|
# orthographically incorrect)
|
||||||
|
# Set _EXTRA_SPACE_DA="" for correct spelling, this is standard
|
||||||
|
|
||||||
|
# _EXTRA_SPACE_DA = " "
|
||||||
|
_EXTRA_SPACE_DE = ""
|
||||||
297
lingua_franca/lang/common_data_en.py
Normal file
297
lingua_franca/lang/common_data_en.py
Normal file
@@ -0,0 +1,297 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
from collections import OrderedDict
|
||||||
|
from .parse_common import invert_dict
|
||||||
|
|
||||||
|
_FUNCTION_NOT_IMPLEMENTED_WARNING = "The requested function is not implemented in English."
|
||||||
|
|
||||||
|
_ARTICLES_EN = {'a', 'an', 'the'}
|
||||||
|
|
||||||
|
|
||||||
|
_NUM_STRING_EN = {
|
||||||
|
0: 'zero',
|
||||||
|
1: 'one',
|
||||||
|
2: 'two',
|
||||||
|
3: 'three',
|
||||||
|
4: 'four',
|
||||||
|
5: 'five',
|
||||||
|
6: 'six',
|
||||||
|
7: 'seven',
|
||||||
|
8: 'eight',
|
||||||
|
9: 'nine',
|
||||||
|
10: 'ten',
|
||||||
|
11: 'eleven',
|
||||||
|
12: 'twelve',
|
||||||
|
13: 'thirteen',
|
||||||
|
14: 'fourteen',
|
||||||
|
15: 'fifteen',
|
||||||
|
16: 'sixteen',
|
||||||
|
17: 'seventeen',
|
||||||
|
18: 'eighteen',
|
||||||
|
19: 'nineteen',
|
||||||
|
20: 'twenty',
|
||||||
|
30: 'thirty',
|
||||||
|
40: 'forty',
|
||||||
|
50: 'fifty',
|
||||||
|
60: 'sixty',
|
||||||
|
70: 'seventy',
|
||||||
|
80: 'eighty',
|
||||||
|
90: 'ninety'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
_FRACTION_STRING_EN = {
|
||||||
|
2: 'half',
|
||||||
|
3: 'third',
|
||||||
|
4: 'forth',
|
||||||
|
5: 'fifth',
|
||||||
|
6: 'sixth',
|
||||||
|
7: 'seventh',
|
||||||
|
8: 'eigth',
|
||||||
|
9: 'ninth',
|
||||||
|
10: 'tenth',
|
||||||
|
11: 'eleventh',
|
||||||
|
12: 'twelveth',
|
||||||
|
13: 'thirteenth',
|
||||||
|
14: 'fourteenth',
|
||||||
|
15: 'fifteenth',
|
||||||
|
16: 'sixteenth',
|
||||||
|
17: 'seventeenth',
|
||||||
|
18: 'eighteenth',
|
||||||
|
19: 'nineteenth',
|
||||||
|
20: 'twentyith'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
_LONG_SCALE_EN = OrderedDict([
|
||||||
|
(100, 'hundred'),
|
||||||
|
(1000, 'thousand'),
|
||||||
|
(1000000, 'million'),
|
||||||
|
(1e12, "billion"),
|
||||||
|
(1e18, 'trillion'),
|
||||||
|
(1e24, "quadrillion"),
|
||||||
|
(1e30, "quintillion"),
|
||||||
|
(1e36, "sextillion"),
|
||||||
|
(1e42, "septillion"),
|
||||||
|
(1e48, "octillion"),
|
||||||
|
(1e54, "nonillion"),
|
||||||
|
(1e60, "decillion"),
|
||||||
|
(1e66, "undecillion"),
|
||||||
|
(1e72, "duodecillion"),
|
||||||
|
(1e78, "tredecillion"),
|
||||||
|
(1e84, "quattuordecillion"),
|
||||||
|
(1e90, "quinquadecillion"),
|
||||||
|
(1e96, "sedecillion"),
|
||||||
|
(1e102, "septendecillion"),
|
||||||
|
(1e108, "octodecillion"),
|
||||||
|
(1e114, "novendecillion"),
|
||||||
|
(1e120, "vigintillion"),
|
||||||
|
(1e306, "unquinquagintillion"),
|
||||||
|
(1e312, "duoquinquagintillion"),
|
||||||
|
(1e336, "sesquinquagintillion"),
|
||||||
|
(1e366, "unsexagintillion")
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
_SHORT_SCALE_EN = OrderedDict([
|
||||||
|
(100, 'hundred'),
|
||||||
|
(1000, 'thousand'),
|
||||||
|
(1000000, 'million'),
|
||||||
|
(1e9, "billion"),
|
||||||
|
(1e12, 'trillion'),
|
||||||
|
(1e15, "quadrillion"),
|
||||||
|
(1e18, "quintillion"),
|
||||||
|
(1e21, "sextillion"),
|
||||||
|
(1e24, "septillion"),
|
||||||
|
(1e27, "octillion"),
|
||||||
|
(1e30, "nonillion"),
|
||||||
|
(1e33, "decillion"),
|
||||||
|
(1e36, "undecillion"),
|
||||||
|
(1e39, "duodecillion"),
|
||||||
|
(1e42, "tredecillion"),
|
||||||
|
(1e45, "quattuordecillion"),
|
||||||
|
(1e48, "quinquadecillion"),
|
||||||
|
(1e51, "sedecillion"),
|
||||||
|
(1e54, "septendecillion"),
|
||||||
|
(1e57, "octodecillion"),
|
||||||
|
(1e60, "novendecillion"),
|
||||||
|
(1e63, "vigintillion"),
|
||||||
|
(1e66, "unvigintillion"),
|
||||||
|
(1e69, "uuovigintillion"),
|
||||||
|
(1e72, "tresvigintillion"),
|
||||||
|
(1e75, "quattuorvigintillion"),
|
||||||
|
(1e78, "quinquavigintillion"),
|
||||||
|
(1e81, "qesvigintillion"),
|
||||||
|
(1e84, "septemvigintillion"),
|
||||||
|
(1e87, "octovigintillion"),
|
||||||
|
(1e90, "novemvigintillion"),
|
||||||
|
(1e93, "trigintillion"),
|
||||||
|
(1e96, "untrigintillion"),
|
||||||
|
(1e99, "duotrigintillion"),
|
||||||
|
(1e102, "trestrigintillion"),
|
||||||
|
(1e105, "quattuortrigintillion"),
|
||||||
|
(1e108, "quinquatrigintillion"),
|
||||||
|
(1e111, "sestrigintillion"),
|
||||||
|
(1e114, "septentrigintillion"),
|
||||||
|
(1e117, "octotrigintillion"),
|
||||||
|
(1e120, "noventrigintillion"),
|
||||||
|
(1e123, "quadragintillion"),
|
||||||
|
(1e153, "quinquagintillion"),
|
||||||
|
(1e183, "sexagintillion"),
|
||||||
|
(1e213, "septuagintillion"),
|
||||||
|
(1e243, "octogintillion"),
|
||||||
|
(1e273, "nonagintillion"),
|
||||||
|
(1e303, "centillion"),
|
||||||
|
(1e306, "uncentillion"),
|
||||||
|
(1e309, "duocentillion"),
|
||||||
|
(1e312, "trescentillion"),
|
||||||
|
(1e333, "decicentillion"),
|
||||||
|
(1e336, "undecicentillion"),
|
||||||
|
(1e363, "viginticentillion"),
|
||||||
|
(1e366, "unviginticentillion"),
|
||||||
|
(1e393, "trigintacentillion"),
|
||||||
|
(1e423, "quadragintacentillion"),
|
||||||
|
(1e453, "quinquagintacentillion"),
|
||||||
|
(1e483, "sexagintacentillion"),
|
||||||
|
(1e513, "septuagintacentillion"),
|
||||||
|
(1e543, "ctogintacentillion"),
|
||||||
|
(1e573, "nonagintacentillion"),
|
||||||
|
(1e603, "ducentillion"),
|
||||||
|
(1e903, "trecentillion"),
|
||||||
|
(1e1203, "quadringentillion"),
|
||||||
|
(1e1503, "quingentillion"),
|
||||||
|
(1e1803, "sescentillion"),
|
||||||
|
(1e2103, "septingentillion"),
|
||||||
|
(1e2403, "octingentillion"),
|
||||||
|
(1e2703, "nongentillion"),
|
||||||
|
(1e3003, "millinillion")
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
_ORDINAL_BASE_EN = {
|
||||||
|
1: 'first',
|
||||||
|
2: 'second',
|
||||||
|
3: 'third',
|
||||||
|
4: 'fourth',
|
||||||
|
5: 'fifth',
|
||||||
|
6: 'sixth',
|
||||||
|
7: 'seventh',
|
||||||
|
8: 'eighth',
|
||||||
|
9: 'ninth',
|
||||||
|
10: 'tenth',
|
||||||
|
11: 'eleventh',
|
||||||
|
12: 'twelfth',
|
||||||
|
13: 'thirteenth',
|
||||||
|
14: 'fourteenth',
|
||||||
|
15: 'fifteenth',
|
||||||
|
16: 'sixteenth',
|
||||||
|
17: 'seventeenth',
|
||||||
|
18: 'eighteenth',
|
||||||
|
19: 'nineteenth',
|
||||||
|
20: 'twentieth',
|
||||||
|
30: 'thirtieth',
|
||||||
|
40: "fortieth",
|
||||||
|
50: "fiftieth",
|
||||||
|
60: "sixtieth",
|
||||||
|
70: "seventieth",
|
||||||
|
80: "eightieth",
|
||||||
|
90: "ninetieth",
|
||||||
|
1e2: "hundredth",
|
||||||
|
1e3: "thousandth"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
_SHORT_ORDINAL_EN = {
|
||||||
|
1e6: "millionth",
|
||||||
|
1e9: "billionth",
|
||||||
|
1e12: "trillionth",
|
||||||
|
1e15: "quadrillionth",
|
||||||
|
1e18: "quintillionth",
|
||||||
|
1e21: "sextillionth",
|
||||||
|
1e24: "septillionth",
|
||||||
|
1e27: "octillionth",
|
||||||
|
1e30: "nonillionth",
|
||||||
|
1e33: "decillionth"
|
||||||
|
# TODO > 1e-33
|
||||||
|
}
|
||||||
|
_SHORT_ORDINAL_EN.update(_ORDINAL_BASE_EN)
|
||||||
|
|
||||||
|
|
||||||
|
_LONG_ORDINAL_EN = {
|
||||||
|
1e6: "millionth",
|
||||||
|
1e12: "billionth",
|
||||||
|
1e18: "trillionth",
|
||||||
|
1e24: "quadrillionth",
|
||||||
|
1e30: "quintillionth",
|
||||||
|
1e36: "sextillionth",
|
||||||
|
1e42: "septillionth",
|
||||||
|
1e48: "octillionth",
|
||||||
|
1e54: "nonillionth",
|
||||||
|
1e60: "decillionth"
|
||||||
|
# TODO > 1e60
|
||||||
|
}
|
||||||
|
_LONG_ORDINAL_EN.update(_ORDINAL_BASE_EN)
|
||||||
|
|
||||||
|
|
||||||
|
# negate next number (-2 = 0 - 2)
|
||||||
|
_NEGATIVES_EN = {"negative", "minus"}
|
||||||
|
|
||||||
|
# sum the next number (twenty two = 20 + 2)
|
||||||
|
_SUMS_EN = {'twenty', '20', 'thirty', '30', 'forty', '40', 'fifty', '50',
|
||||||
|
'sixty', '60', 'seventy', '70', 'eighty', '80', 'ninety', '90'}
|
||||||
|
|
||||||
|
|
||||||
|
def _generate_plurals_en(originals):
|
||||||
|
"""
|
||||||
|
Return a new set or dict containing the plural form of the original values,
|
||||||
|
|
||||||
|
In English this means all with 's' appended to them.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
originals set(str) or dict(str, any): values to pluralize
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
set(str) or dict(str, any)
|
||||||
|
|
||||||
|
"""
|
||||||
|
# TODO migrate to https://github.com/MycroftAI/lingua-franca/pull/36
|
||||||
|
if isinstance(originals, dict):
|
||||||
|
return {key + 's': value for key, value in originals.items()}
|
||||||
|
return {value + "s" for value in originals}
|
||||||
|
|
||||||
|
|
||||||
|
_MULTIPLIES_LONG_SCALE_EN = set(_LONG_SCALE_EN.values()) | \
|
||||||
|
_generate_plurals_en(_LONG_SCALE_EN.values())
|
||||||
|
|
||||||
|
_MULTIPLIES_SHORT_SCALE_EN = set(_SHORT_SCALE_EN.values()) | \
|
||||||
|
_generate_plurals_en(_SHORT_SCALE_EN.values())
|
||||||
|
|
||||||
|
# split sentence parse separately and sum ( 2 and a half = 2 + 0.5 )
|
||||||
|
_FRACTION_MARKER_EN = {"and"}
|
||||||
|
|
||||||
|
# decimal marker ( 1 point 5 = 1 + 0.5)
|
||||||
|
_DECIMAL_MARKER_EN = {"point", "dot"}
|
||||||
|
|
||||||
|
_STRING_NUM_EN = invert_dict(_NUM_STRING_EN)
|
||||||
|
_STRING_NUM_EN.update(_generate_plurals_en(_STRING_NUM_EN))
|
||||||
|
|
||||||
|
_SPOKEN_EXTRA_NUM_EN = {
|
||||||
|
"half": 0.5,
|
||||||
|
"halves": 0.5,
|
||||||
|
"couple": 2
|
||||||
|
}
|
||||||
|
_STRING_SHORT_ORDINAL_EN = invert_dict(_SHORT_ORDINAL_EN)
|
||||||
|
_STRING_LONG_ORDINAL_EN = invert_dict(_LONG_ORDINAL_EN)
|
||||||
313
lingua_franca/lang/common_data_es.py
Normal file
313
lingua_franca/lang/common_data_es.py
Normal file
@@ -0,0 +1,313 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
# NOTE: This file as no use yet. It needs to be called from other functions
|
||||||
|
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
|
|
||||||
|
_ARTICLES_ES = {'el', 'la', 'los', 'las'}
|
||||||
|
|
||||||
|
_NUM_STRING_ES = {
|
||||||
|
0: 'cero',
|
||||||
|
1: 'uno',
|
||||||
|
2: 'dos',
|
||||||
|
3: 'tres',
|
||||||
|
4: 'cuatro',
|
||||||
|
5: 'cinco',
|
||||||
|
6: 'seis',
|
||||||
|
7: 'siete',
|
||||||
|
8: 'ocho',
|
||||||
|
9: 'nueve',
|
||||||
|
10: 'diez',
|
||||||
|
11: 'once',
|
||||||
|
12: 'doce',
|
||||||
|
13: 'trece',
|
||||||
|
14: 'catorce',
|
||||||
|
15: 'quince',
|
||||||
|
16: 'dieciséis',
|
||||||
|
17: 'diecisete',
|
||||||
|
18: 'dieciocho',
|
||||||
|
19: 'diecinueve',
|
||||||
|
20: 'veinte',
|
||||||
|
30: 'treinta',
|
||||||
|
40: 'cuarenta',
|
||||||
|
50: 'cincuenta',
|
||||||
|
60: 'sesenta',
|
||||||
|
70: 'setenta',
|
||||||
|
80: 'ochenta',
|
||||||
|
90: 'noventa'
|
||||||
|
}
|
||||||
|
|
||||||
|
_STRING_NUM_ES = {
|
||||||
|
"cero": 0,
|
||||||
|
"un": 1,
|
||||||
|
"uno": 1,
|
||||||
|
"una": 1,
|
||||||
|
"dos": 2,
|
||||||
|
"tres": 3,
|
||||||
|
"trés": 3,
|
||||||
|
"cuatro": 4,
|
||||||
|
"cinco": 5,
|
||||||
|
"seis": 6,
|
||||||
|
"siete": 7,
|
||||||
|
"ocho": 8,
|
||||||
|
"nueve": 9,
|
||||||
|
"diez": 10,
|
||||||
|
"once": 11,
|
||||||
|
"doce": 12,
|
||||||
|
"trece": 13,
|
||||||
|
"catorce": 14,
|
||||||
|
"quince": 15,
|
||||||
|
"dieciseis": 16,
|
||||||
|
"dieciséis": 16,
|
||||||
|
"diecisiete": 17,
|
||||||
|
"dieciocho": 18,
|
||||||
|
"diecinueve": 19,
|
||||||
|
"veinte": 20,
|
||||||
|
"veintiuno": 21,
|
||||||
|
"veintid�s": 22,
|
||||||
|
"veintitr�s": 23,
|
||||||
|
"veintidos": 22,
|
||||||
|
"veintitres": 23,
|
||||||
|
"veintitrés": 23,
|
||||||
|
"veinticuatro": 24,
|
||||||
|
"veinticinco": 25,
|
||||||
|
"veintiséis": 26,
|
||||||
|
"veintiseis": 26,
|
||||||
|
"veintisiete": 27,
|
||||||
|
"veintiocho": 28,
|
||||||
|
"veintinueve": 29,
|
||||||
|
"treinta": 30,
|
||||||
|
"cuarenta": 40,
|
||||||
|
"cincuenta": 50,
|
||||||
|
"sesenta": 60,
|
||||||
|
"setenta": 70,
|
||||||
|
"ochenta": 80,
|
||||||
|
"noventa": 90,
|
||||||
|
"cien": 100,
|
||||||
|
"ciento": 100,
|
||||||
|
"doscientos": 200,
|
||||||
|
"doscientas": 200,
|
||||||
|
"trescientos": 300,
|
||||||
|
"trescientas": 300,
|
||||||
|
"cuatrocientos": 400,
|
||||||
|
"cuatrocientas": 400,
|
||||||
|
"quinientos": 500,
|
||||||
|
"quinientas": 500,
|
||||||
|
"seiscientos": 600,
|
||||||
|
"seiscientas": 600,
|
||||||
|
"setecientos": 700,
|
||||||
|
"setecientas": 700,
|
||||||
|
"ochocientos": 800,
|
||||||
|
"ochocientas": 800,
|
||||||
|
"novecientos": 900,
|
||||||
|
"novecientas": 900,
|
||||||
|
"mil": 1000}
|
||||||
|
|
||||||
|
|
||||||
|
_FRACTION_STRING_ES = {
|
||||||
|
2: 'medio',
|
||||||
|
3: 'tercio',
|
||||||
|
4: 'cuarto',
|
||||||
|
5: 'quinto',
|
||||||
|
6: 'sexto',
|
||||||
|
7: 'séptimo',
|
||||||
|
8: 'octavo',
|
||||||
|
9: 'noveno',
|
||||||
|
10: 'décimo',
|
||||||
|
11: 'onceavo',
|
||||||
|
12: 'doceavo',
|
||||||
|
13: 'treceavo',
|
||||||
|
14: 'catorceavo',
|
||||||
|
15: 'quinceavo',
|
||||||
|
16: 'dieciseisavo',
|
||||||
|
17: 'diecisieteavo',
|
||||||
|
18: 'dieciochoavo',
|
||||||
|
19: 'diecinueveavo',
|
||||||
|
20: 'veinteavo'
|
||||||
|
}
|
||||||
|
|
||||||
|
# https://www.grobauer.at/es_eur/zahlnamen.php
|
||||||
|
_LONG_SCALE_ES = OrderedDict([
|
||||||
|
(100, 'centena'),
|
||||||
|
(1000, 'millar'),
|
||||||
|
(1000000, 'millón'),
|
||||||
|
(1e9, "millardo"),
|
||||||
|
(1e12, "billón"),
|
||||||
|
(1e18, 'trillón'),
|
||||||
|
(1e24, "cuatrillón"),
|
||||||
|
(1e30, "quintillón"),
|
||||||
|
(1e36, "sextillón"),
|
||||||
|
(1e42, "septillón"),
|
||||||
|
(1e48, "octillón"),
|
||||||
|
(1e54, "nonillón"),
|
||||||
|
(1e60, "decillón"),
|
||||||
|
(1e66, "undecillón"),
|
||||||
|
(1e72, "duodecillón"),
|
||||||
|
(1e78, "tredecillón"),
|
||||||
|
(1e84, "cuatrodecillón"),
|
||||||
|
(1e90, "quindecillón"),
|
||||||
|
(1e96, "sexdecillón"),
|
||||||
|
(1e102, "septendecillón"),
|
||||||
|
(1e108, "octodecillón"),
|
||||||
|
(1e114, "novendecillón"),
|
||||||
|
(1e120, "vigintillón"),
|
||||||
|
(1e306, "unquinquagintillón"),
|
||||||
|
(1e312, "duoquinquagintillón"),
|
||||||
|
(1e336, "sexquinquagintillón"),
|
||||||
|
(1e366, "unsexagintillón")
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
_SHORT_SCALE_ES = OrderedDict([
|
||||||
|
(100, 'centena'),
|
||||||
|
(1000, 'millar'),
|
||||||
|
(1000000, 'millón'),
|
||||||
|
(1e9, "billón"),
|
||||||
|
(1e12, 'trillón'),
|
||||||
|
(1e15, "cuatrillón"),
|
||||||
|
(1e18, "quintillón"),
|
||||||
|
(1e21, "sextillón"),
|
||||||
|
(1e24, "septillón"),
|
||||||
|
(1e27, "octillón"),
|
||||||
|
(1e30, "nonillón"),
|
||||||
|
(1e33, "decillón"),
|
||||||
|
(1e36, "undecillón"),
|
||||||
|
(1e39, "duodecillón"),
|
||||||
|
(1e42, "tredecillón"),
|
||||||
|
(1e45, "cuatrodecillón"),
|
||||||
|
(1e48, "quindecillón"),
|
||||||
|
(1e51, "sexdecillón"),
|
||||||
|
(1e54, "septendecillón"),
|
||||||
|
(1e57, "octodecillón"),
|
||||||
|
(1e60, "novendecillón"),
|
||||||
|
(1e63, "vigintillón"),
|
||||||
|
(1e66, "unvigintillón"),
|
||||||
|
(1e69, "uuovigintillón"),
|
||||||
|
(1e72, "tresvigintillón"),
|
||||||
|
(1e75, "quattuorvigintillón"),
|
||||||
|
(1e78, "quinquavigintillón"),
|
||||||
|
(1e81, "qesvigintillón"),
|
||||||
|
(1e84, "septemvigintillón"),
|
||||||
|
(1e87, "octovigintillón"),
|
||||||
|
(1e90, "novemvigintillón"),
|
||||||
|
(1e93, "trigintillón"),
|
||||||
|
(1e96, "untrigintillón"),
|
||||||
|
(1e99, "duotrigintillón"),
|
||||||
|
(1e102, "trestrigintillón"),
|
||||||
|
(1e105, "quattuortrigintillón"),
|
||||||
|
(1e108, "quinquatrigintillón"),
|
||||||
|
(1e111, "sestrigintillón"),
|
||||||
|
(1e114, "septentrigintillón"),
|
||||||
|
(1e117, "octotrigintillón"),
|
||||||
|
(1e120, "noventrigintillón"),
|
||||||
|
(1e123, "quadragintillón"),
|
||||||
|
(1e153, "quinquagintillón"),
|
||||||
|
(1e183, "sexagintillón"),
|
||||||
|
(1e213, "septuagintillón"),
|
||||||
|
(1e243, "octogintillón"),
|
||||||
|
(1e273, "nonagintillón"),
|
||||||
|
(1e303, "centillón"),
|
||||||
|
(1e306, "uncentillón"),
|
||||||
|
(1e309, "duocentillón"),
|
||||||
|
(1e312, "trescentillón"),
|
||||||
|
(1e333, "decicentillón"),
|
||||||
|
(1e336, "undecicentillón"),
|
||||||
|
(1e363, "viginticentillón"),
|
||||||
|
(1e366, "unviginticentillón"),
|
||||||
|
(1e393, "trigintacentillón"),
|
||||||
|
(1e423, "quadragintacentillón"),
|
||||||
|
(1e453, "quinquagintacentillón"),
|
||||||
|
(1e483, "sexagintacentillón"),
|
||||||
|
(1e513, "septuagintacentillón"),
|
||||||
|
(1e543, "ctogintacentillón"),
|
||||||
|
(1e573, "nonagintacentillón"),
|
||||||
|
(1e603, "ducentillón"),
|
||||||
|
(1e903, "trecentillón"),
|
||||||
|
(1e1203, "quadringentillón"),
|
||||||
|
(1e1503, "quingentillón"),
|
||||||
|
(1e1803, "sexcentillón"),
|
||||||
|
(1e2103, "septingentillón"),
|
||||||
|
(1e2403, "octingentillón"),
|
||||||
|
(1e2703, "nongentillón"),
|
||||||
|
(1e3003, "millinillón")
|
||||||
|
])
|
||||||
|
|
||||||
|
# TODO: female forms.
|
||||||
|
_ORDINAL_STRING_BASE_ES = {
|
||||||
|
1: 'primero',
|
||||||
|
2: 'segundo',
|
||||||
|
3: 'tercero',
|
||||||
|
4: 'cuarto',
|
||||||
|
5: 'quinto',
|
||||||
|
6: 'sexto',
|
||||||
|
7: 'séptimo',
|
||||||
|
8: 'octavo',
|
||||||
|
9: 'noveno',
|
||||||
|
10: 'décimo',
|
||||||
|
11: 'undécimo',
|
||||||
|
12: 'duodécimo',
|
||||||
|
13: 'decimotercero',
|
||||||
|
14: 'decimocuarto',
|
||||||
|
15: 'decimoquinto',
|
||||||
|
16: 'decimosexto',
|
||||||
|
17: 'decimoséptimo',
|
||||||
|
18: 'decimoctavo',
|
||||||
|
19: 'decimonoveno',
|
||||||
|
20: 'vigésimo',
|
||||||
|
30: 'trigésimo',
|
||||||
|
40: "cuadragésimo",
|
||||||
|
50: "quincuagésimo",
|
||||||
|
60: "sexagésimo",
|
||||||
|
70: "septuagésimo",
|
||||||
|
80: "octogésimo",
|
||||||
|
90: "nonagésimo",
|
||||||
|
10e3: "centésimó",
|
||||||
|
1e3: "milésimo"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
_SHORT_ORDINAL_STRING_ES = {
|
||||||
|
1e6: "millonésimo",
|
||||||
|
1e9: "milmillonésimo",
|
||||||
|
1e12: "billonésimo",
|
||||||
|
1e15: "milbillonésimo",
|
||||||
|
1e18: "trillonésimo",
|
||||||
|
1e21: "miltrillonésimo",
|
||||||
|
1e24: "cuatrillonésimo",
|
||||||
|
1e27: "milcuatrillonésimo",
|
||||||
|
1e30: "quintillonésimo",
|
||||||
|
1e33: "milquintillonésimo"
|
||||||
|
# TODO > 1e-33
|
||||||
|
}
|
||||||
|
_SHORT_ORDINAL_STRING_ES.update(_ORDINAL_STRING_BASE_ES)
|
||||||
|
|
||||||
|
|
||||||
|
_LONG_ORDINAL_STRING_ES = {
|
||||||
|
1e6: "millonésimo",
|
||||||
|
1e12: "billionth",
|
||||||
|
1e18: "trillonésimo",
|
||||||
|
1e24: "cuatrillonésimo",
|
||||||
|
1e30: "quintillonésimo",
|
||||||
|
1e36: "sextillonésimo",
|
||||||
|
1e42: "septillonésimo",
|
||||||
|
1e48: "octillonésimo",
|
||||||
|
1e54: "nonillonésimo",
|
||||||
|
1e60: "decillonésimo"
|
||||||
|
# TODO > 1e60
|
||||||
|
}
|
||||||
|
_LONG_ORDINAL_STRING_ES.update(_ORDINAL_STRING_BASE_ES)
|
||||||
115
lingua_franca/lang/common_data_fa.py
Normal file
115
lingua_franca/lang/common_data_fa.py
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
from collections import OrderedDict
|
||||||
|
from .parse_common import invert_dict
|
||||||
|
|
||||||
|
_FUNCTION_NOT_IMPLEMENTED_WARNING = "تابع خواسته شده در زبان فارسی پیاده سازی نشده است."
|
||||||
|
|
||||||
|
|
||||||
|
_FRACTION_STRING_FA = {
|
||||||
|
2: 'دوم',
|
||||||
|
3: 'سوم',
|
||||||
|
4: 'چهارم',
|
||||||
|
5: 'پنجم',
|
||||||
|
6: 'ششم',
|
||||||
|
7: 'هفتم',
|
||||||
|
8: 'هشتم',
|
||||||
|
9: 'نهم',
|
||||||
|
10: 'دهم',
|
||||||
|
11: 'یازدهم',
|
||||||
|
12: 'دوازدهم',
|
||||||
|
13: 'سیزدهم',
|
||||||
|
14: 'چهاردهم',
|
||||||
|
15: 'پونزدهم',
|
||||||
|
16: 'شونزدهم',
|
||||||
|
17: 'هیفدهم',
|
||||||
|
18: 'هیجدهم',
|
||||||
|
19: 'نوزدهم',
|
||||||
|
20: 'بیستم'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
_FARSI_ONES = [
|
||||||
|
"",
|
||||||
|
"یک",
|
||||||
|
"دو",
|
||||||
|
"سه",
|
||||||
|
"چهار",
|
||||||
|
"پنج",
|
||||||
|
"شش",
|
||||||
|
"هفت",
|
||||||
|
"هشت",
|
||||||
|
"نه",
|
||||||
|
"ده",
|
||||||
|
"یازده",
|
||||||
|
"دوازده",
|
||||||
|
"سیزده",
|
||||||
|
"چهارده",
|
||||||
|
"پونزده",
|
||||||
|
"شونزده",
|
||||||
|
"هیفده",
|
||||||
|
"هیجده",
|
||||||
|
"نوزده",
|
||||||
|
]
|
||||||
|
|
||||||
|
_FARSI_TENS = [
|
||||||
|
"",
|
||||||
|
"ده",
|
||||||
|
"بیست",
|
||||||
|
"سی",
|
||||||
|
"چهل",
|
||||||
|
"پنجاه",
|
||||||
|
"شصت",
|
||||||
|
"هفتاد",
|
||||||
|
"هشتاد",
|
||||||
|
"نود",
|
||||||
|
]
|
||||||
|
|
||||||
|
_FARSI_HUNDREDS = [
|
||||||
|
"",
|
||||||
|
"صد",
|
||||||
|
"دویست",
|
||||||
|
"سیصد",
|
||||||
|
"چهارصد",
|
||||||
|
"پانصد",
|
||||||
|
"ششصد",
|
||||||
|
"هفتصد",
|
||||||
|
"هشتصد",
|
||||||
|
"نهصد",
|
||||||
|
]
|
||||||
|
|
||||||
|
_FARSI_BIG = [
|
||||||
|
'',
|
||||||
|
'هزار',
|
||||||
|
'میلیون',
|
||||||
|
"میلیارد",
|
||||||
|
'تریلیون',
|
||||||
|
"تریلیارد",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
_FORMAL_VARIANT = {
|
||||||
|
'هفده': 'هیفده',
|
||||||
|
'هجده': 'هیجده',
|
||||||
|
'شانزده': 'شونزده',
|
||||||
|
'پانزده': 'پونزده',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
_FARSI_FRAC = ["", "ده", "صد"]
|
||||||
|
_FARSI_FRAC_BIG = ["", "هزار", "میلیونی", "میلیاردی"]
|
||||||
|
|
||||||
|
_FARSI_SEPERATOR = ' و '
|
||||||
98
lingua_franca/lang/common_data_fr.py
Normal file
98
lingua_franca/lang/common_data_fr.py
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
# Undefined articles ["un", "une"] cannot be supressed,
|
||||||
|
# in French, "un cheval" means "a horse" or "one horse".
|
||||||
|
_ARTICLES_FR = ["le", "la", "du", "de", "les", "des"]
|
||||||
|
|
||||||
|
_NUMBERS_FR = {
|
||||||
|
"zéro": 0,
|
||||||
|
"un": 1,
|
||||||
|
"une": 1,
|
||||||
|
"deux": 2,
|
||||||
|
"trois": 3,
|
||||||
|
"quatre": 4,
|
||||||
|
"cinq": 5,
|
||||||
|
"six": 6,
|
||||||
|
"sept": 7,
|
||||||
|
"huit": 8,
|
||||||
|
"neuf": 9,
|
||||||
|
"dix": 10,
|
||||||
|
"onze": 11,
|
||||||
|
"douze": 12,
|
||||||
|
"treize": 13,
|
||||||
|
"quatorze": 14,
|
||||||
|
"quinze": 15,
|
||||||
|
"seize": 16,
|
||||||
|
"vingt": 20,
|
||||||
|
"trente": 30,
|
||||||
|
"quarante": 40,
|
||||||
|
"cinquante": 50,
|
||||||
|
"soixante": 60,
|
||||||
|
"soixante-dix": 70,
|
||||||
|
"septante": 70,
|
||||||
|
"quatre-vingt": 80,
|
||||||
|
"quatre-vingts": 80,
|
||||||
|
"octante": 80,
|
||||||
|
"huitante": 80,
|
||||||
|
"quatre-vingt-dix": 90,
|
||||||
|
"nonante": 90,
|
||||||
|
"cent": 100,
|
||||||
|
"cents": 100,
|
||||||
|
"mille": 1000,
|
||||||
|
"mil": 1000,
|
||||||
|
"millier": 1000,
|
||||||
|
"milliers": 1000,
|
||||||
|
"million": 1000000,
|
||||||
|
"millions": 1000000,
|
||||||
|
"milliard": 1000000000,
|
||||||
|
"milliards": 1000000000}
|
||||||
|
|
||||||
|
_ORDINAL_ENDINGS_FR = ("er", "re", "ère", "nd", "nde" "ième", "ème", "e")
|
||||||
|
|
||||||
|
_NUM_STRING_FR = {
|
||||||
|
0: 'zéro',
|
||||||
|
1: 'un',
|
||||||
|
2: 'deux',
|
||||||
|
3: 'trois',
|
||||||
|
4: 'quatre',
|
||||||
|
5: 'cinq',
|
||||||
|
6: 'six',
|
||||||
|
7: 'sept',
|
||||||
|
8: 'huit',
|
||||||
|
9: 'neuf',
|
||||||
|
10: 'dix',
|
||||||
|
11: 'onze',
|
||||||
|
12: 'douze',
|
||||||
|
13: 'treize',
|
||||||
|
14: 'quatorze',
|
||||||
|
15: 'quinze',
|
||||||
|
16: 'seize',
|
||||||
|
20: 'vingt',
|
||||||
|
30: 'trente',
|
||||||
|
40: 'quarante',
|
||||||
|
50: 'cinquante',
|
||||||
|
60: 'soixante',
|
||||||
|
70: 'soixante-dix',
|
||||||
|
80: 'quatre-vingt',
|
||||||
|
90: 'quatre-vingt-dix'
|
||||||
|
}
|
||||||
|
|
||||||
|
_FRACTION_STRING_FR = {
|
||||||
|
2: 'demi',
|
||||||
|
3: 'tiers',
|
||||||
|
4: 'quart',
|
||||||
|
5: 'cinquième',
|
||||||
|
6: 'sixième',
|
||||||
|
7: 'septième',
|
||||||
|
8: 'huitième',
|
||||||
|
9: 'neuvième',
|
||||||
|
10: 'dixième',
|
||||||
|
11: 'onzième',
|
||||||
|
12: 'douzième',
|
||||||
|
13: 'treizième',
|
||||||
|
14: 'quatorzième',
|
||||||
|
15: 'quinzième',
|
||||||
|
16: 'seizième',
|
||||||
|
17: 'dix-septième',
|
||||||
|
18: 'dix-huitième',
|
||||||
|
19: 'dix-neuvième',
|
||||||
|
20: 'vingtième'
|
||||||
|
}
|
||||||
77
lingua_franca/lang/common_data_hu.py
Normal file
77
lingua_franca/lang/common_data_hu.py
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
_MONTHS_HU = ['január', 'február', 'március', 'április', 'május', 'június',
|
||||||
|
'július', 'augusztus', 'szeptember', 'október', 'november',
|
||||||
|
'december']
|
||||||
|
|
||||||
|
_NUM_STRING_HU = {
|
||||||
|
0: 'nulla',
|
||||||
|
1: 'egy',
|
||||||
|
2: 'kettő',
|
||||||
|
3: 'három',
|
||||||
|
4: 'négy',
|
||||||
|
5: 'öt',
|
||||||
|
6: 'hat',
|
||||||
|
7: 'hét',
|
||||||
|
8: 'nyolc',
|
||||||
|
9: 'kilenc',
|
||||||
|
10: 'tíz',
|
||||||
|
11: 'tizenegy',
|
||||||
|
12: 'tizenkettő',
|
||||||
|
13: 'tizenhárom',
|
||||||
|
14: 'tizennégy',
|
||||||
|
15: 'tizenöt',
|
||||||
|
16: 'tizenhat',
|
||||||
|
17: 'tizenhét',
|
||||||
|
18: 'tizennyolc',
|
||||||
|
19: 'tizenkilenc',
|
||||||
|
20: 'húsz',
|
||||||
|
30: 'harminc',
|
||||||
|
40: 'negyven',
|
||||||
|
50: 'ötven',
|
||||||
|
60: 'hatvan',
|
||||||
|
70: 'hetven',
|
||||||
|
80: 'nyolcvan',
|
||||||
|
90: 'kilencven',
|
||||||
|
100: 'száz'
|
||||||
|
}
|
||||||
|
|
||||||
|
# Hungarian uses "long scale"
|
||||||
|
# https://en.wikipedia.org/wiki/Long_and_short_scales
|
||||||
|
# Currently, numbers are limited to 1000000000000000000000000,
|
||||||
|
# but _NUM_POWERS_OF_TEN can be extended to include additional number words
|
||||||
|
|
||||||
|
_NUM_POWERS_OF_TEN = [
|
||||||
|
'', 'ezer', 'millió', 'milliárd', 'billió', 'billiárd', 'trillió',
|
||||||
|
'trilliárd'
|
||||||
|
]
|
||||||
|
|
||||||
|
_FRACTION_STRING_HU = {
|
||||||
|
2: 'fél',
|
||||||
|
3: 'harmad',
|
||||||
|
4: 'negyed',
|
||||||
|
5: 'ötöd',
|
||||||
|
6: 'hatod',
|
||||||
|
7: 'heted',
|
||||||
|
8: 'nyolcad',
|
||||||
|
9: 'kilenced',
|
||||||
|
10: 'tized',
|
||||||
|
11: 'tizenegyed',
|
||||||
|
12: 'tizenketted',
|
||||||
|
13: 'tizenharmad',
|
||||||
|
14: 'tizennegyed',
|
||||||
|
15: 'tizenötöd',
|
||||||
|
16: 'tizenhatod',
|
||||||
|
17: 'tizenheted',
|
||||||
|
18: 'tizennyolcad',
|
||||||
|
19: 'tizenkilenced',
|
||||||
|
20: 'huszad'
|
||||||
|
}
|
||||||
|
|
||||||
|
# Numbers below 2 thousand are written in one word in Hungarian
|
||||||
|
# Numbers above 2 thousand are separated by hyphens
|
||||||
|
# In some circumstances it may better to seperate individual words
|
||||||
|
# Set _EXTRA_SPACE_HU=" " for separating numbers below 2 thousand (
|
||||||
|
# orthographically incorrect)
|
||||||
|
# Set _EXTRA_SPACE_HU="" for correct spelling, this is standard
|
||||||
|
|
||||||
|
# _EXTRA_SPACE_HU = " "
|
||||||
|
_EXTRA_SPACE_HU = ""
|
||||||
321
lingua_franca/lang/common_data_it.py
Normal file
321
lingua_franca/lang/common_data_it.py
Normal file
@@ -0,0 +1,321 @@
|
|||||||
|
import collections
|
||||||
|
|
||||||
|
|
||||||
|
_SHORT_ORDINAL_STRING_IT = {
|
||||||
|
1: 'primo',
|
||||||
|
2: 'secondo',
|
||||||
|
3: 'terzo',
|
||||||
|
4: 'quarto',
|
||||||
|
5: 'quinto',
|
||||||
|
6: 'sesto',
|
||||||
|
7: 'settimo',
|
||||||
|
8: 'ottavo',
|
||||||
|
9: 'nono',
|
||||||
|
10: 'decimo',
|
||||||
|
11: 'undicesimo',
|
||||||
|
12: 'dodicesimo',
|
||||||
|
13: 'tredicesimo',
|
||||||
|
14: 'quattordicesimo',
|
||||||
|
15: 'quindicesimo',
|
||||||
|
16: 'sedicesimo',
|
||||||
|
17: 'diciassettesimo',
|
||||||
|
18: 'diciottesimo',
|
||||||
|
19: 'diciannovesimo',
|
||||||
|
20: 'ventesimo',
|
||||||
|
30: 'trentesimo',
|
||||||
|
40: 'quarantesimo',
|
||||||
|
50: 'cinquantesimo',
|
||||||
|
60: 'sessantesimo',
|
||||||
|
70: 'settantesimo',
|
||||||
|
80: 'ottantesimo',
|
||||||
|
90: 'novantesimo',
|
||||||
|
1e2: 'centesimo',
|
||||||
|
1e3: 'millesimo',
|
||||||
|
1e6: 'milionesimo',
|
||||||
|
1e9: 'miliardesimo',
|
||||||
|
1e12: 'trilionesimo',
|
||||||
|
1e15: 'quadrilionesimo',
|
||||||
|
1e18: 'quintilionesim',
|
||||||
|
1e21: 'sestilionesimo',
|
||||||
|
1e24: 'settilionesimo',
|
||||||
|
1e27: 'ottilionesimo',
|
||||||
|
1e30: 'nonilionesimo',
|
||||||
|
1e33: 'decilionesimo'
|
||||||
|
# TODO > 1e-33
|
||||||
|
}
|
||||||
|
|
||||||
|
# per i > 10e12 modificata solo la desinenza: da sistemare a fine debug
|
||||||
|
_LONG_ORDINAL_STRING_IT = {
|
||||||
|
1: 'primo',
|
||||||
|
2: 'secondo',
|
||||||
|
3: 'terzo',
|
||||||
|
4: 'quarto',
|
||||||
|
5: 'quinto',
|
||||||
|
6: 'sesto',
|
||||||
|
7: 'settimo',
|
||||||
|
8: 'ottavo',
|
||||||
|
9: 'nono',
|
||||||
|
10: 'decimo',
|
||||||
|
11: 'undicesimo',
|
||||||
|
12: 'dodicesimo',
|
||||||
|
13: 'tredicesimo',
|
||||||
|
14: 'quattordicesimo',
|
||||||
|
15: 'quindicesimo',
|
||||||
|
16: 'sedicesimo',
|
||||||
|
17: 'diciassettesimo',
|
||||||
|
18: 'diciottesimo',
|
||||||
|
19: 'diciannovesimo',
|
||||||
|
20: 'ventesimo',
|
||||||
|
30: 'trentesimo',
|
||||||
|
40: 'quarantesimo',
|
||||||
|
50: 'cinquantesimo',
|
||||||
|
60: 'sessantesimo',
|
||||||
|
70: 'settantesimo',
|
||||||
|
80: 'ottantesimo',
|
||||||
|
90: 'novantesimo',
|
||||||
|
1e2: 'centesimo',
|
||||||
|
1e3: 'millesimo',
|
||||||
|
1e6: 'milionesimo',
|
||||||
|
1e12: 'bilionesimo',
|
||||||
|
1e18: 'trilionesimo',
|
||||||
|
1e24: 'quadrilionesimo',
|
||||||
|
1e30: 'quintilionesimo',
|
||||||
|
1e36: 'sestilionesimo',
|
||||||
|
1e42: 'settilionesimo',
|
||||||
|
1e48: 'ottilionesimo',
|
||||||
|
1e54: 'nonilionesimo',
|
||||||
|
1e60: 'decilionesimo'
|
||||||
|
# TODO > 1e60
|
||||||
|
}
|
||||||
|
|
||||||
|
# Undefined articles ['un', 'una', 'un\''] can not be supressed,
|
||||||
|
# in Italian, 'un cavallo' means 'a horse' or 'one horse'.
|
||||||
|
_ARTICLES_IT = ['il', 'lo', 'la', 'i', 'gli', 'le']
|
||||||
|
|
||||||
|
_STRING_NUM_IT = {
|
||||||
|
'zero': 0,
|
||||||
|
'un': 1,
|
||||||
|
'uno': 1,
|
||||||
|
'una': 1,
|
||||||
|
'un\'': 1,
|
||||||
|
'due': 2,
|
||||||
|
'tre': 3,
|
||||||
|
'quattro': 4,
|
||||||
|
'cinque': 5,
|
||||||
|
'sei': 6,
|
||||||
|
'sette': 7,
|
||||||
|
'otto': 8,
|
||||||
|
'nove': 9,
|
||||||
|
'dieci': 10,
|
||||||
|
'undici': 11,
|
||||||
|
'dodici': 12,
|
||||||
|
'tredici': 13,
|
||||||
|
'quattordici': 14,
|
||||||
|
'quindici': 15,
|
||||||
|
'sedici': 16,
|
||||||
|
'diciassette': 17,
|
||||||
|
'diciotto': 18,
|
||||||
|
'diciannove': 19,
|
||||||
|
'venti': 20,
|
||||||
|
'vent': 20,
|
||||||
|
'trenta': 30,
|
||||||
|
'trent': 30,
|
||||||
|
'quaranta': 40,
|
||||||
|
'quarant': 40,
|
||||||
|
'cinquanta': 50,
|
||||||
|
'cinquant': 50,
|
||||||
|
'sessanta': 60,
|
||||||
|
'sessant': 60,
|
||||||
|
'settanta': 70,
|
||||||
|
'settant': 70,
|
||||||
|
'ottanta': 80,
|
||||||
|
'ottant': 80,
|
||||||
|
'novanta': 90,
|
||||||
|
'novant': 90,
|
||||||
|
'cento': 100,
|
||||||
|
'duecento': 200,
|
||||||
|
'trecento': 300,
|
||||||
|
'quattrocento': 400,
|
||||||
|
'cinquecento': 500,
|
||||||
|
'seicento': 600,
|
||||||
|
'settecento': 700,
|
||||||
|
'ottocento': 800,
|
||||||
|
'novecento': 900,
|
||||||
|
'mille': 1000,
|
||||||
|
'mila': 1000,
|
||||||
|
'centomila': 100000,
|
||||||
|
'milione': 1000000,
|
||||||
|
'miliardo': 1000000000,
|
||||||
|
'primo': 1,
|
||||||
|
'secondo': 2,
|
||||||
|
'mezzo': 0.5,
|
||||||
|
'mezza': 0.5,
|
||||||
|
'paio': 2,
|
||||||
|
'decina': 10,
|
||||||
|
'decine': 10,
|
||||||
|
'dozzina': 12,
|
||||||
|
'dozzine': 12,
|
||||||
|
'centinaio': 100,
|
||||||
|
'centinaia': 100,
|
||||||
|
'migliaio': 1000,
|
||||||
|
'migliaia': 1000
|
||||||
|
}
|
||||||
|
|
||||||
|
_NUM_STRING_IT = {
|
||||||
|
0: 'zero',
|
||||||
|
1: 'uno',
|
||||||
|
2: 'due',
|
||||||
|
3: 'tre',
|
||||||
|
4: 'quattro',
|
||||||
|
5: 'cinque',
|
||||||
|
6: 'sei',
|
||||||
|
7: 'sette',
|
||||||
|
8: 'otto',
|
||||||
|
9: 'nove',
|
||||||
|
10: 'dieci',
|
||||||
|
11: 'undici',
|
||||||
|
12: 'dodici',
|
||||||
|
13: 'tredici',
|
||||||
|
14: 'quattordici',
|
||||||
|
15: 'quindici',
|
||||||
|
16: 'sedici',
|
||||||
|
17: 'diciassette',
|
||||||
|
18: 'diciotto',
|
||||||
|
19: 'diciannove',
|
||||||
|
20: 'venti',
|
||||||
|
30: 'trenta',
|
||||||
|
40: 'quaranta',
|
||||||
|
50: 'cinquanta',
|
||||||
|
60: 'sessanta',
|
||||||
|
70: 'settanta',
|
||||||
|
80: 'ottanta',
|
||||||
|
90: 'novanta'
|
||||||
|
}
|
||||||
|
|
||||||
|
_FRACTION_STRING_IT = {
|
||||||
|
2: 'mezz',
|
||||||
|
3: 'terz',
|
||||||
|
4: 'quart',
|
||||||
|
5: 'quint',
|
||||||
|
6: 'sest',
|
||||||
|
7: 'settim',
|
||||||
|
8: 'ottav',
|
||||||
|
9: 'non',
|
||||||
|
10: 'decim',
|
||||||
|
11: 'undicesim',
|
||||||
|
12: 'dodicesim',
|
||||||
|
13: 'tredicesim',
|
||||||
|
14: 'quattordicesim',
|
||||||
|
15: 'quindicesim',
|
||||||
|
16: 'sedicesim',
|
||||||
|
17: 'diciassettesim',
|
||||||
|
18: 'diciottesim',
|
||||||
|
19: 'diciannovesim',
|
||||||
|
20: 'ventesim'
|
||||||
|
}
|
||||||
|
|
||||||
|
# fonte: http://tulengua.es/numeros-texto/default.aspx
|
||||||
|
_LONG_SCALE_IT = collections.OrderedDict([
|
||||||
|
(100, 'cento'),
|
||||||
|
(1000, 'mila'),
|
||||||
|
(1000000, 'milioni'),
|
||||||
|
(1e9, "miliardi"),
|
||||||
|
(1e12, "bilioni"),
|
||||||
|
(1e18, 'trilioni'),
|
||||||
|
(1e24, "quadrilioni"),
|
||||||
|
(1e30, "quintilioni"),
|
||||||
|
(1e36, "sestilioni"),
|
||||||
|
(1e42, "settilioni"),
|
||||||
|
(1e48, "ottillioni"),
|
||||||
|
(1e54, "nonillioni"),
|
||||||
|
(1e60, "decemillioni"),
|
||||||
|
(1e66, "undicilione"),
|
||||||
|
(1e72, "dodicilione"),
|
||||||
|
(1e78, "tredicilione"),
|
||||||
|
(1e84, "quattordicilione"),
|
||||||
|
(1e90, "quindicilione"),
|
||||||
|
(1e96, "sedicilione"),
|
||||||
|
(1e102, "diciasettilione"),
|
||||||
|
(1e108, "diciottilione"),
|
||||||
|
(1e114, "dicianovilione"),
|
||||||
|
(1e120, "vintilione"),
|
||||||
|
(1e306, "unquinquagintilione"),
|
||||||
|
(1e312, "duoquinquagintilione"),
|
||||||
|
(1e336, "sesquinquagintilione"),
|
||||||
|
(1e366, "unsexagintilione")
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
_SHORT_SCALE_IT = collections.OrderedDict([
|
||||||
|
(100, 'cento'),
|
||||||
|
(1000, 'mila'),
|
||||||
|
(1000000, 'milioni'),
|
||||||
|
(1e9, "miliardi"),
|
||||||
|
(1e12, 'bilioni'),
|
||||||
|
(1e15, "biliardi"),
|
||||||
|
(1e18, "trilioni"),
|
||||||
|
(1e21, "triliardi"),
|
||||||
|
(1e24, "quadrilioni"),
|
||||||
|
(1e27, "quadriliardi"),
|
||||||
|
(1e30, "quintilioni"),
|
||||||
|
(1e33, "quintiliardi"),
|
||||||
|
(1e36, "sestilioni"),
|
||||||
|
(1e39, "sestiliardi"),
|
||||||
|
(1e42, "settilioni"),
|
||||||
|
(1e45, "settiliardi"),
|
||||||
|
(1e48, "ottilioni"),
|
||||||
|
(1e51, "ottiliardi"),
|
||||||
|
(1e54, "nonilioni"),
|
||||||
|
(1e57, "noniliardi"),
|
||||||
|
(1e60, "decilioni"),
|
||||||
|
(1e63, "deciliardi"),
|
||||||
|
(1e66, "undicilioni"),
|
||||||
|
(1e69, "undiciliardi"),
|
||||||
|
(1e72, "dodicilioni"),
|
||||||
|
(1e75, "dodiciliardi"),
|
||||||
|
(1e78, "tredicilioni"),
|
||||||
|
(1e81, "trediciliardi"),
|
||||||
|
(1e84, "quattordicilioni"),
|
||||||
|
(1e87, "quattordiciliardi"),
|
||||||
|
(1e90, "quindicilioni"),
|
||||||
|
(1e93, "quindiciliardi"),
|
||||||
|
(1e96, "sedicilioni"),
|
||||||
|
(1e99, "sediciliardi"),
|
||||||
|
(1e102, "diciassettilioni"),
|
||||||
|
(1e105, "diciassettiliardi"),
|
||||||
|
(1e108, "diciottilioni"),
|
||||||
|
(1e111, "diciottiliardi"),
|
||||||
|
(1e114, "dicianovilioni"),
|
||||||
|
(1e117, "dicianoviliardi"),
|
||||||
|
(1e120, "vintilioni"),
|
||||||
|
(1e123, "vintiliardi"),
|
||||||
|
(1e153, "quinquagintillion"),
|
||||||
|
(1e183, "sexagintillion"),
|
||||||
|
(1e213, "septuagintillion"),
|
||||||
|
(1e243, "ottogintilioni"),
|
||||||
|
(1e273, "nonigintillioni"),
|
||||||
|
(1e303, "centilioni"),
|
||||||
|
(1e306, "uncentilioni"),
|
||||||
|
(1e309, "duocentilioni"),
|
||||||
|
(1e312, "trecentilioni"),
|
||||||
|
(1e333, "decicentilioni"),
|
||||||
|
(1e336, "undicicentilioni"),
|
||||||
|
(1e363, "viginticentilioni"),
|
||||||
|
(1e366, "unviginticentilioni"),
|
||||||
|
(1e393, "trigintacentilioni"),
|
||||||
|
(1e423, "quadragintacentillion"),
|
||||||
|
(1e453, "quinquagintacentillion"),
|
||||||
|
(1e483, "sexagintacentillion"),
|
||||||
|
(1e513, "septuagintacentillion"),
|
||||||
|
(1e543, "ctogintacentillion"),
|
||||||
|
(1e573, "nonagintacentillion"),
|
||||||
|
(1e603, "ducentillion"),
|
||||||
|
(1e903, "trecentillion"),
|
||||||
|
(1e1203, "quadringentillion"),
|
||||||
|
(1e1503, "quingentillion"),
|
||||||
|
(1e1803, "sescentillion"),
|
||||||
|
(1e2103, "septingentillion"),
|
||||||
|
(1e2403, "octingentillion"),
|
||||||
|
(1e2703, "nongentillion"),
|
||||||
|
(1e3003, "millinillion")
|
||||||
|
])
|
||||||
323
lingua_franca/lang/common_data_nl.py
Normal file
323
lingua_franca/lang/common_data_nl.py
Normal file
@@ -0,0 +1,323 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright 2019 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
from collections import OrderedDict
|
||||||
|
from .parse_common import invert_dict
|
||||||
|
|
||||||
|
_ARTICLES_NL = {'de', 'het'}
|
||||||
|
|
||||||
|
_NUM_STRING_NL = {
|
||||||
|
0: 'nul',
|
||||||
|
1: 'een',
|
||||||
|
2: 'twee',
|
||||||
|
3: 'drie',
|
||||||
|
4: 'vier',
|
||||||
|
5: 'vijf',
|
||||||
|
6: 'zes',
|
||||||
|
7: 'zeven',
|
||||||
|
8: 'acht',
|
||||||
|
9: 'negen',
|
||||||
|
10: 'tien',
|
||||||
|
11: 'elf',
|
||||||
|
12: 'twaalf',
|
||||||
|
13: 'dertien',
|
||||||
|
14: 'veertien',
|
||||||
|
15: 'vijftien',
|
||||||
|
16: 'zestien',
|
||||||
|
17: 'zeventien',
|
||||||
|
18: 'achttien',
|
||||||
|
19: 'negentien',
|
||||||
|
20: 'twintig',
|
||||||
|
30: 'dertig',
|
||||||
|
40: 'veertig',
|
||||||
|
50: 'vijftig',
|
||||||
|
60: 'zestig',
|
||||||
|
70: 'zeventig',
|
||||||
|
80: 'tachtig',
|
||||||
|
90: 'negentig'
|
||||||
|
}
|
||||||
|
|
||||||
|
_FRACTION_STRING_NL = {
|
||||||
|
2: 'half',
|
||||||
|
3: 'derde',
|
||||||
|
4: 'vierde',
|
||||||
|
5: 'vijfde',
|
||||||
|
6: 'zesde',
|
||||||
|
7: 'zevende',
|
||||||
|
8: 'achtste',
|
||||||
|
9: 'negende',
|
||||||
|
10: 'tiende',
|
||||||
|
11: 'elfde',
|
||||||
|
12: 'twaalfde',
|
||||||
|
13: 'dertiende',
|
||||||
|
14: 'veertiende',
|
||||||
|
15: 'vijftiende',
|
||||||
|
16: 'zestiende',
|
||||||
|
17: 'zeventiende',
|
||||||
|
18: 'achttiende',
|
||||||
|
19: 'negentiende',
|
||||||
|
20: 'twintigste'
|
||||||
|
}
|
||||||
|
|
||||||
|
_LONG_SCALE_NL = OrderedDict([
|
||||||
|
(100, 'honderd'),
|
||||||
|
(1000, 'duizend'),
|
||||||
|
(1000000, 'miljoen'),
|
||||||
|
(1e12, "biljoen"),
|
||||||
|
(1e18, 'triljoen'),
|
||||||
|
(1e24, "quadriljoen"),
|
||||||
|
(1e30, "quintillion"),
|
||||||
|
(1e36, "sextillion"),
|
||||||
|
(1e42, "septillion"),
|
||||||
|
(1e48, "octillion"),
|
||||||
|
(1e54, "nonillion"),
|
||||||
|
(1e60, "decillion"),
|
||||||
|
(1e66, "undecillion"),
|
||||||
|
(1e72, "duodecillion"),
|
||||||
|
(1e78, "tredecillion"),
|
||||||
|
(1e84, "quattuordecillion"),
|
||||||
|
(1e90, "quinquadecillion"),
|
||||||
|
(1e96, "sedecillion"),
|
||||||
|
(1e102, "septendecillion"),
|
||||||
|
(1e108, "octodecillion"),
|
||||||
|
(1e114, "novendecillion"),
|
||||||
|
(1e120, "vigintillion"),
|
||||||
|
(1e306, "unquinquagintillion"),
|
||||||
|
(1e312, "duoquinquagintillion"),
|
||||||
|
(1e336, "sesquinquagintillion"),
|
||||||
|
(1e366, "unsexagintillion")
|
||||||
|
])
|
||||||
|
|
||||||
|
_SHORT_SCALE_NL = OrderedDict([
|
||||||
|
(100, 'honderd'),
|
||||||
|
(1000, 'duizend'),
|
||||||
|
(1000000, 'miljoen'),
|
||||||
|
(1e9, "miljard"),
|
||||||
|
(1e12, 'biljoen'),
|
||||||
|
(1e15, "quadrillion"),
|
||||||
|
(1e18, "quintiljoen"),
|
||||||
|
(1e21, "sextiljoen"),
|
||||||
|
(1e24, "septiljoen"),
|
||||||
|
(1e27, "octiljoen"),
|
||||||
|
(1e30, "noniljoen"),
|
||||||
|
(1e33, "deciljoen"),
|
||||||
|
(1e36, "undeciljoen"),
|
||||||
|
(1e39, "duodeciljoen"),
|
||||||
|
(1e42, "tredeciljoen"),
|
||||||
|
(1e45, "quattuordeciljoen"),
|
||||||
|
(1e48, "quinquadeciljoen"),
|
||||||
|
(1e51, "sedeciljoen"),
|
||||||
|
(1e54, "septendeciljoen"),
|
||||||
|
(1e57, "octodeciljoen"),
|
||||||
|
(1e60, "novendeciljoen"),
|
||||||
|
(1e63, "vigintiljoen"),
|
||||||
|
(1e66, "unvigintiljoen"),
|
||||||
|
(1e69, "uuovigintiljoen"),
|
||||||
|
(1e72, "tresvigintiljoen"),
|
||||||
|
(1e75, "quattuorvigintiljoen"),
|
||||||
|
(1e78, "quinquavigintiljoen"),
|
||||||
|
(1e81, "qesvigintiljoen"),
|
||||||
|
(1e84, "septemvigintiljoen"),
|
||||||
|
(1e87, "octovigintiljoen"),
|
||||||
|
(1e90, "novemvigintiljoen"),
|
||||||
|
(1e93, "trigintiljoen"),
|
||||||
|
(1e96, "untrigintiljoen"),
|
||||||
|
(1e99, "duotrigintiljoen"),
|
||||||
|
(1e102, "trestrigintiljoen"),
|
||||||
|
(1e105, "quattuortrigintiljoen"),
|
||||||
|
(1e108, "quinquatrigintiljoen"),
|
||||||
|
(1e111, "sestrigintiljoen"),
|
||||||
|
(1e114, "septentrigintiljoen"),
|
||||||
|
(1e117, "octotrigintiljoen"),
|
||||||
|
(1e120, "noventrigintiljoen"),
|
||||||
|
(1e123, "quadragintiljoen"),
|
||||||
|
(1e153, "quinquagintiljoen"),
|
||||||
|
(1e183, "sexagintiljoen"),
|
||||||
|
(1e213, "septuagintiljoen"),
|
||||||
|
(1e243, "octogintiljoen"),
|
||||||
|
(1e273, "nonagintiljoen"),
|
||||||
|
(1e303, "centiljoen"),
|
||||||
|
(1e306, "uncentiljoen"),
|
||||||
|
(1e309, "duocentiljoen"),
|
||||||
|
(1e312, "trescentiljoen"),
|
||||||
|
(1e333, "decicentiljoen"),
|
||||||
|
(1e336, "undecicentiljoen"),
|
||||||
|
(1e363, "viginticentiljoen"),
|
||||||
|
(1e366, "unviginticentiljoen"),
|
||||||
|
(1e393, "trigintacentiljoen"),
|
||||||
|
(1e423, "quadragintacentiljoen"),
|
||||||
|
(1e453, "quinquagintacentiljoen"),
|
||||||
|
(1e483, "sexagintacentiljoen"),
|
||||||
|
(1e513, "septuagintacentiljoen"),
|
||||||
|
(1e543, "ctogintacentiljoen"),
|
||||||
|
(1e573, "nonagintacentiljoen"),
|
||||||
|
(1e603, "ducentiljoen"),
|
||||||
|
(1e903, "trecentiljoen"),
|
||||||
|
(1e1203, "quadringentiljoen"),
|
||||||
|
(1e1503, "quingentiljoen"),
|
||||||
|
(1e1803, "sescentiljoen"),
|
||||||
|
(1e2103, "septingentiljoen"),
|
||||||
|
(1e2403, "octingentiljoen"),
|
||||||
|
(1e2703, "nongentiljoen"),
|
||||||
|
(1e3003, "milliniljoen")
|
||||||
|
])
|
||||||
|
|
||||||
|
_ORDINAL_STRING_BASE_NL = {
|
||||||
|
1: 'eerste',
|
||||||
|
2: 'tweede',
|
||||||
|
3: 'derde',
|
||||||
|
4: 'vierde',
|
||||||
|
5: 'vijfde',
|
||||||
|
6: 'zesde',
|
||||||
|
7: 'zevende',
|
||||||
|
8: 'achtste',
|
||||||
|
9: 'negende',
|
||||||
|
10: 'tiende',
|
||||||
|
11: 'elfde',
|
||||||
|
12: 'twaalfde',
|
||||||
|
13: 'dertiende',
|
||||||
|
14: 'veertiende',
|
||||||
|
15: 'vijftiende',
|
||||||
|
16: 'zestiende',
|
||||||
|
17: 'zeventiende',
|
||||||
|
18: 'achttiende',
|
||||||
|
19: 'negentiende',
|
||||||
|
20: 'twintigste',
|
||||||
|
30: 'dertigste',
|
||||||
|
40: "veertigste",
|
||||||
|
50: "vijftigste",
|
||||||
|
60: "zestigste",
|
||||||
|
70: "zeventigste",
|
||||||
|
80: "tachtigste",
|
||||||
|
90: "negentigste",
|
||||||
|
10e3: "honderdste",
|
||||||
|
1e3: "duizendste"
|
||||||
|
}
|
||||||
|
|
||||||
|
_SHORT_ORDINAL_STRING_NL = {
|
||||||
|
1e6: "miloenste",
|
||||||
|
1e9: "miljardste",
|
||||||
|
1e12: "biljoenste",
|
||||||
|
1e15: "biljardste",
|
||||||
|
1e18: "triljoenste",
|
||||||
|
1e21: "trijardste",
|
||||||
|
1e24: "quadriljoenste",
|
||||||
|
1e27: "quadriljardste",
|
||||||
|
1e30: "quintiljoenste",
|
||||||
|
1e33: "quintiljardste"
|
||||||
|
# TODO > 1e-33
|
||||||
|
}
|
||||||
|
_SHORT_ORDINAL_STRING_NL.update(_ORDINAL_STRING_BASE_NL)
|
||||||
|
|
||||||
|
_LONG_ORDINAL_STRING_NL = {
|
||||||
|
1e6: "miloenste",
|
||||||
|
1e9: "miljardste",
|
||||||
|
1e12: "biljoenste",
|
||||||
|
1e15: "biljardste",
|
||||||
|
1e18: "triljoenste",
|
||||||
|
1e21: "trijardste",
|
||||||
|
1e24: "quadriljoenste",
|
||||||
|
1e27: "quadriljardste",
|
||||||
|
1e30: "quintiljoenste",
|
||||||
|
1e33: "quintiljardste"
|
||||||
|
# TODO > 1e60
|
||||||
|
}
|
||||||
|
_LONG_ORDINAL_STRING_NL.update(_ORDINAL_STRING_BASE_NL)
|
||||||
|
|
||||||
|
# negate next number (-2 = 0 - 2)
|
||||||
|
_NEGATIVES_NL = {"min", "minus"}
|
||||||
|
|
||||||
|
# sum the next number (twenty two = 20 + 2)
|
||||||
|
_SUMS_NL = {'twintig', '20', 'dertig', '30', 'veertig', '40', 'vijftig', '50',
|
||||||
|
'zestig', '60', 'zeventig', '70', 'techtig', '80', 'negentig',
|
||||||
|
'90'}
|
||||||
|
|
||||||
|
_MULTIPLIES_LONG_SCALE_NL = set(_LONG_SCALE_NL.values())
|
||||||
|
|
||||||
|
_MULTIPLIES_SHORT_SCALE_NL = set(_SHORT_SCALE_NL.values())
|
||||||
|
|
||||||
|
# split sentence parse separately and sum ( 2 and a half = 2 + 0.5 )
|
||||||
|
_FRACTION_MARKER_NL = {"en"}
|
||||||
|
|
||||||
|
# decimal marker ( 1 point 5 = 1 + 0.5)
|
||||||
|
_DECIMAL_MARKER_NL = {"komma", "punt"}
|
||||||
|
|
||||||
|
_STRING_NUM_NL = invert_dict(_NUM_STRING_NL)
|
||||||
|
_STRING_NUM_NL.update({
|
||||||
|
"half": 0.5,
|
||||||
|
"driekwart": 0.75,
|
||||||
|
"anderhalf": 1.5,
|
||||||
|
"paar": 2
|
||||||
|
})
|
||||||
|
|
||||||
|
_STRING_SHORT_ORDINAL_NL = invert_dict(_SHORT_ORDINAL_STRING_NL)
|
||||||
|
_STRING_LONG_ORDINAL_NL = invert_dict(_LONG_ORDINAL_STRING_NL)
|
||||||
|
|
||||||
|
_MONTHS_NL = ['januari', 'februari', 'maart', 'april', 'mei', 'juni',
|
||||||
|
'juli', 'augustus', 'september', 'oktober', 'november',
|
||||||
|
'december']
|
||||||
|
|
||||||
|
_NUM_STRING_NL = {
|
||||||
|
0: 'nul',
|
||||||
|
1: 'één',
|
||||||
|
2: 'twee',
|
||||||
|
3: 'drie',
|
||||||
|
4: 'vier',
|
||||||
|
5: 'vijf',
|
||||||
|
6: 'zes',
|
||||||
|
7: 'zeven',
|
||||||
|
8: 'acht',
|
||||||
|
9: 'negen',
|
||||||
|
10: 'tien',
|
||||||
|
11: 'elf',
|
||||||
|
12: 'twaalf',
|
||||||
|
13: 'dertien',
|
||||||
|
14: 'veertien',
|
||||||
|
15: 'vijftien',
|
||||||
|
16: 'zestien',
|
||||||
|
17: 'zeventien',
|
||||||
|
18: 'actien',
|
||||||
|
19: 'negentien',
|
||||||
|
20: 'twintig',
|
||||||
|
30: 'dertig',
|
||||||
|
40: 'veertig',
|
||||||
|
50: 'vijftig',
|
||||||
|
60: 'zestig',
|
||||||
|
70: 'zeventig',
|
||||||
|
80: 'tachtig',
|
||||||
|
90: 'negentig',
|
||||||
|
100: 'honderd'
|
||||||
|
}
|
||||||
|
|
||||||
|
# Dutch uses "long scale" https://en.wikipedia.org/wiki/Long_and_short_scales
|
||||||
|
# Currently, numbers are limited to 1000000000000000000000000,
|
||||||
|
# but _NUM_POWERS_OF_TEN can be extended to include additional number words
|
||||||
|
|
||||||
|
|
||||||
|
_NUM_POWERS_OF_TEN = [
|
||||||
|
'', 'duizend', 'miljoen', 'miljard', 'biljoen', 'biljard', 'triljoen',
|
||||||
|
'triljard'
|
||||||
|
]
|
||||||
|
|
||||||
|
# Numbers below 1 million are written in one word in dutch, yielding very
|
||||||
|
# long words
|
||||||
|
# In some circumstances it may better to seperate individual words
|
||||||
|
# Set _EXTRA_SPACE_NL=" " for separating numbers below 1 million (
|
||||||
|
# orthographically incorrect)
|
||||||
|
# Set _EXTRA_SPACE_NL="" for correct spelling, this is standard
|
||||||
|
|
||||||
|
# _EXTRA_SPACE_NL = " "
|
||||||
|
_EXTRA_SPACE_NL = ""
|
||||||
497
lingua_franca/lang/common_data_pl.py
Normal file
497
lingua_franca/lang/common_data_pl.py
Normal file
@@ -0,0 +1,497 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
|
|
||||||
|
_NUM_STRING_PL = {
|
||||||
|
0: 'zero',
|
||||||
|
1: 'jeden',
|
||||||
|
2: 'dwa',
|
||||||
|
3: 'trzy',
|
||||||
|
4: 'cztery',
|
||||||
|
5: 'pięć',
|
||||||
|
6: 'sześć',
|
||||||
|
7: 'siedem',
|
||||||
|
8: 'osiem',
|
||||||
|
9: 'dziewięć',
|
||||||
|
10: 'dziesięć',
|
||||||
|
11: 'jedenaście',
|
||||||
|
12: 'dwanaście',
|
||||||
|
13: 'trzynaście',
|
||||||
|
14: 'czternaście',
|
||||||
|
15: 'piętnaście',
|
||||||
|
16: 'szesnaście',
|
||||||
|
17: 'siedemnaście',
|
||||||
|
18: 'osiemnaście',
|
||||||
|
19: 'dziewiętnaście',
|
||||||
|
20: 'dwadzieścia',
|
||||||
|
30: 'trzydzieści',
|
||||||
|
40: 'czterdzieści',
|
||||||
|
50: 'pięćdziesiąt',
|
||||||
|
60: 'sześćdziesiąt',
|
||||||
|
70: 'siedemdziesiąt',
|
||||||
|
80: 'osiemdziesiąt',
|
||||||
|
90: 'dziewięćdziesiąt',
|
||||||
|
100: 'sto',
|
||||||
|
200: 'dwieście',
|
||||||
|
300: 'trzysta',
|
||||||
|
400: 'czterysta',
|
||||||
|
500: 'pięćset',
|
||||||
|
600: 'sześćset',
|
||||||
|
700: 'siedemset',
|
||||||
|
800: 'osiemset',
|
||||||
|
900: 'dziewięćset',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
_FRACTION_STRING_PL = {
|
||||||
|
1: 'jedna',
|
||||||
|
2: 'druga',
|
||||||
|
3: 'trzecia',
|
||||||
|
4: 'czwarta',
|
||||||
|
5: 'piąta',
|
||||||
|
6: 'szósta',
|
||||||
|
7: 'siódma',
|
||||||
|
8: 'ósma',
|
||||||
|
9: 'dziewiąta',
|
||||||
|
10: 'dziesiąta',
|
||||||
|
11: 'jedenasta',
|
||||||
|
12: 'dwunasta',
|
||||||
|
13: 'trzynasta',
|
||||||
|
14: 'czternasta',
|
||||||
|
15: 'piętnasta',
|
||||||
|
16: 'szesnasta',
|
||||||
|
17: 'siedemnasta',
|
||||||
|
18: 'osiemnasta',
|
||||||
|
19: 'dziewiętnasta',
|
||||||
|
20: 'dwudziesta',
|
||||||
|
30: 'trzydziesta',
|
||||||
|
40: 'czterdziesta',
|
||||||
|
50: 'pięćdziesiąta',
|
||||||
|
60: 'sześćdziesiąta',
|
||||||
|
70: 'siedemdziesiąta',
|
||||||
|
80: 'osiemdziesiąta',
|
||||||
|
90: 'dziewięćdziesiąta',
|
||||||
|
100: 'setna',
|
||||||
|
200: 'dwusetna',
|
||||||
|
300: 'trzysetna',
|
||||||
|
400: 'czterysetna',
|
||||||
|
500: 'pięćsetna',
|
||||||
|
600: 'sześćsetna',
|
||||||
|
700: 'siedemsetna',
|
||||||
|
800: 'osiemsetna',
|
||||||
|
900: 'dziewięćsetna',
|
||||||
|
1000: 'tysięczna',
|
||||||
|
}
|
||||||
|
|
||||||
|
_SHORT_SCALE_PL = OrderedDict([
|
||||||
|
(100, 'sto'),
|
||||||
|
(200, 'dwieście'),
|
||||||
|
(300, 'trzysta'),
|
||||||
|
(400, 'czterysta'),
|
||||||
|
(500, 'pięćset'),
|
||||||
|
(600, 'sześćset'),
|
||||||
|
(700, 'siedemset'),
|
||||||
|
(800, 'osiemset'),
|
||||||
|
(900, 'dziewięćset'),
|
||||||
|
(1000, 'tysiąc'),
|
||||||
|
(1000000, 'milion'),
|
||||||
|
(1e9, "miliard"),
|
||||||
|
(1e12, 'bilion'),
|
||||||
|
(1e15, "biliard"),
|
||||||
|
(1e18, "trylion"),
|
||||||
|
(1e21, "sekstilion"),
|
||||||
|
(1e24, "kwadrylion"),
|
||||||
|
(1e27, "kwadryliard"),
|
||||||
|
(1e30, "kwintylion"),
|
||||||
|
(1e33, "kwintyliard"),
|
||||||
|
(1e36, "sekstylion"),
|
||||||
|
(1e39, "sekstyliard"),
|
||||||
|
(1e42, "septylion"),
|
||||||
|
(1e45, "septyliard"),
|
||||||
|
(1e48, "oktylion"),
|
||||||
|
(1e51, "oktyliard"),
|
||||||
|
(1e54, "nonilion"),
|
||||||
|
(1e57, "noniliard"),
|
||||||
|
(1e60, "decylion"),
|
||||||
|
(1e63, "decyliard"),
|
||||||
|
(1e66, "undecylion"),
|
||||||
|
(1e69, "undecyliard"),
|
||||||
|
(1e72, "duodecylion"),
|
||||||
|
(1e75, "duodecyliard"),
|
||||||
|
(1e78, "tredecylion"),
|
||||||
|
(1e81, "tredecyliard"),
|
||||||
|
(1e84, "kwartyduodecylion"),
|
||||||
|
(1e87, "kwartyduodecyliard"),
|
||||||
|
(1e90, "kwintyduodecylion"),
|
||||||
|
(1e93, "kwintyduodecyliard"),
|
||||||
|
(1e96, "seksdecylion"),
|
||||||
|
(1e99, "seksdecyliard"),
|
||||||
|
(1e102, "septydecylion"),
|
||||||
|
(1e105, "septydecyliard"),
|
||||||
|
(1e108, "oktodecylion"),
|
||||||
|
(1e111, "oktodecyliard"),
|
||||||
|
(1e114, "nondecylion"),
|
||||||
|
(1e117, "nondecyliard"),
|
||||||
|
(1e120, "wigintylion"),
|
||||||
|
(1e123, "wigintyliard"),
|
||||||
|
(1e153, "quinquagintylion"),
|
||||||
|
(1e183, "trycyliard"),
|
||||||
|
(1e213, "septuagintylion"),
|
||||||
|
(1e243, "kwadragiliard"),
|
||||||
|
(1e273, "nonagintylion"),
|
||||||
|
(1e303, "centezylion"),
|
||||||
|
(1e306, "uncentylion"),
|
||||||
|
(1e309, "duocentylion"),
|
||||||
|
(1e312, "trescentylion"),
|
||||||
|
(1e333, "decicentylion"),
|
||||||
|
(1e336, "undecicentylion"),
|
||||||
|
(1e363, "viginticentylion"),
|
||||||
|
(1e366, "unviginticentylion"),
|
||||||
|
(1e393, "trigintacentylion"),
|
||||||
|
(1e423, "quadragintacentylion"),
|
||||||
|
(1e453, "quinquagintacentylion"),
|
||||||
|
(1e483, "sexagintacentylion"),
|
||||||
|
(1e513, "septuagintacentylion"),
|
||||||
|
(1e543, "ctogintacentylion"),
|
||||||
|
(1e573, "nonagintacentylion"),
|
||||||
|
(1e603, "centyliard"),
|
||||||
|
(1e903, "trecentylion"),
|
||||||
|
(1e1203, "quadringentylion"),
|
||||||
|
(1e1503, "quingentylion"),
|
||||||
|
(1e1803, "sescentylion"),
|
||||||
|
(1e2103, "septingentylion"),
|
||||||
|
(1e2403, "octingentylion"),
|
||||||
|
(1e2703, "nongentylion"),
|
||||||
|
(1e3003, "milinylion")
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
_ORDINAL_BASE_PL = {
|
||||||
|
1: 'pierwszy',
|
||||||
|
2: 'drugi',
|
||||||
|
3: 'trzeci',
|
||||||
|
4: 'czwarty',
|
||||||
|
5: 'piąty',
|
||||||
|
6: 'szósty',
|
||||||
|
7: 'siódmy',
|
||||||
|
8: 'ósmy',
|
||||||
|
9: 'dziewiąty',
|
||||||
|
10: 'dziesiąty',
|
||||||
|
11: 'jedenasty',
|
||||||
|
12: 'dwunasty',
|
||||||
|
13: 'trzynasty',
|
||||||
|
14: 'czternasty',
|
||||||
|
15: 'piętnasty',
|
||||||
|
16: 'szesnasty',
|
||||||
|
17: 'siedemnasty',
|
||||||
|
18: 'osiemnasty',
|
||||||
|
19: 'dziewiętnasty',
|
||||||
|
20: 'dwudziesty',
|
||||||
|
30: 'trzydziesty',
|
||||||
|
40: "czterdziesty",
|
||||||
|
50: "pięćdziesiąty",
|
||||||
|
60: "sześćdziesiąty",
|
||||||
|
70: "siedemdziesiąty",
|
||||||
|
80: "osiemdziesiąty",
|
||||||
|
90: "dziewięćdziesiąty",
|
||||||
|
1e2: "setny",
|
||||||
|
1e3: "tysięczny"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
_SHORT_ORDINAL_PL = {
|
||||||
|
1e6: "milionowy",
|
||||||
|
1e9: "miliardowy",
|
||||||
|
1e12: "bilionowy",
|
||||||
|
1e15: "biliardowy",
|
||||||
|
1e18: "trylionowy",
|
||||||
|
1e21: "tryliardowy",
|
||||||
|
1e24: "kwadrylionowy",
|
||||||
|
1e27: "kwadryliardowy",
|
||||||
|
1e30: "kwintylionowy",
|
||||||
|
1e33: "kwintyliardowy",
|
||||||
|
1e36: "sektylionowy",
|
||||||
|
1e42: "septylionowy",
|
||||||
|
1e48: "oktylionowy",
|
||||||
|
1e54: "nonylionowy",
|
||||||
|
1e60: "decylionowy"
|
||||||
|
# TODO > 1e-33
|
||||||
|
}
|
||||||
|
_SHORT_ORDINAL_PL.update(_ORDINAL_BASE_PL)
|
||||||
|
|
||||||
|
_ALT_ORDINALS_PL = {
|
||||||
|
1: 'pierwszej',
|
||||||
|
2: 'drugiej',
|
||||||
|
3: 'trzeciej',
|
||||||
|
4: 'czwartej',
|
||||||
|
5: 'piątej',
|
||||||
|
6: 'szóstej',
|
||||||
|
7: 'siódmej',
|
||||||
|
8: 'ósmej',
|
||||||
|
9: 'dziewiątej',
|
||||||
|
10: 'dziesięcio',
|
||||||
|
11: 'jedenasto',
|
||||||
|
12: 'dwunasto',
|
||||||
|
13: 'trzynasto',
|
||||||
|
14: 'czternasto',
|
||||||
|
15: 'piętnasto',
|
||||||
|
16: 'szesnasto',
|
||||||
|
17: 'siedemnasto',
|
||||||
|
18: 'osiemnasto',
|
||||||
|
19: 'dziewiętnasto',
|
||||||
|
20: 'dwudziesto',
|
||||||
|
30: 'trzydziesto',
|
||||||
|
40: 'czterdziesto',
|
||||||
|
50: 'pięćdziesiecio',
|
||||||
|
60: 'sześćdziesięcio',
|
||||||
|
70: 'siedemdziesięcio',
|
||||||
|
80: 'osiemdziesięcio',
|
||||||
|
90: 'dziewięćdziesięcio',
|
||||||
|
}
|
||||||
|
|
||||||
|
_TIME_UNITS_CONVERSION = {
|
||||||
|
'mikrosekund': 'microseconds',
|
||||||
|
'mikrosekundy': 'microseconds',
|
||||||
|
'milisekund': 'milliseconds',
|
||||||
|
'milisekundy': 'milliseconds',
|
||||||
|
'sekunda': 'seconds',
|
||||||
|
'sekundy': 'seconds',
|
||||||
|
'sekund': 'seconds',
|
||||||
|
'minuta': 'minutes',
|
||||||
|
'minuty': 'minutes',
|
||||||
|
'minut': 'minutes',
|
||||||
|
'godzina': 'hours',
|
||||||
|
'godziny': 'hours',
|
||||||
|
'godzin': 'hours',
|
||||||
|
'dzień': 'days',
|
||||||
|
'dni': 'days',
|
||||||
|
'tydzień': 'weeks',
|
||||||
|
'tygodni': 'weeks',
|
||||||
|
'tygodnie': 'weeks',
|
||||||
|
'tygodniu': 'weeks',
|
||||||
|
}
|
||||||
|
|
||||||
|
_TIME_UNITS_NORMALIZATION = {
|
||||||
|
'mikrosekunda': 'mikrosekunda',
|
||||||
|
'mikrosekundę': 'mikrosekunda',
|
||||||
|
'mikrosekund': 'mikrosekunda',
|
||||||
|
'mikrosekundy': 'mikrosekunda',
|
||||||
|
'milisekunda': 'milisekunda',
|
||||||
|
'milisekundę': 'milisekunda',
|
||||||
|
'milisekund': 'milisekunda',
|
||||||
|
'milisekundy': 'milisekunda',
|
||||||
|
'sekunda': 'sekunda',
|
||||||
|
'sekundę': 'sekunda',
|
||||||
|
'sekundy': 'sekunda',
|
||||||
|
'sekund': 'sekunda',
|
||||||
|
'minuta': 'minuta',
|
||||||
|
'minutę': 'minuta',
|
||||||
|
'minut': 'minuta',
|
||||||
|
'minuty': 'minuta',
|
||||||
|
'godzina': 'godzina',
|
||||||
|
'godzinę': 'godzina',
|
||||||
|
'godzin': 'godzina',
|
||||||
|
'godziny': 'godzina',
|
||||||
|
'dzień': 'dzień',
|
||||||
|
'dni': 'dzień',
|
||||||
|
'tydzień': 'tydzień',
|
||||||
|
'tygodni': 'tydzień',
|
||||||
|
'tygodnie': 'tydzień',
|
||||||
|
'tygodniu': 'tydzień',
|
||||||
|
'miesiąc': 'miesiąc',
|
||||||
|
'miesiące': 'miesiąc',
|
||||||
|
'miesięcy': 'miesiąc',
|
||||||
|
'rok': 'rok',
|
||||||
|
'lata': 'rok',
|
||||||
|
'lat': 'rok',
|
||||||
|
'dekada': 'dekada',
|
||||||
|
'dekad': 'dekada',
|
||||||
|
'dekady': 'dekada',
|
||||||
|
'dekadę': 'dekada',
|
||||||
|
'wiek': 'wiek',
|
||||||
|
'wieki': 'wiek',
|
||||||
|
'milenia': 'milenia',
|
||||||
|
'milenium': 'milenia',
|
||||||
|
}
|
||||||
|
|
||||||
|
_MONTHS_TO_EN = {
|
||||||
|
'styczeń': 'January',
|
||||||
|
'stycznia': 'January',
|
||||||
|
'luty': 'February',
|
||||||
|
'lutego': 'February',
|
||||||
|
'marzec': 'March',
|
||||||
|
'marca': 'March',
|
||||||
|
'kwiecień': 'April',
|
||||||
|
'kwietnia': 'April',
|
||||||
|
'maj': 'May',
|
||||||
|
'maja': 'May',
|
||||||
|
'czerwiec': 'June',
|
||||||
|
'czerwca': 'June',
|
||||||
|
'lipiec': 'July',
|
||||||
|
'lipca': 'July',
|
||||||
|
'sierpień': 'August',
|
||||||
|
'sierpnia': 'August',
|
||||||
|
'wrzesień': 'September',
|
||||||
|
'września': 'September',
|
||||||
|
'październik': 'October',
|
||||||
|
'października': 'October',
|
||||||
|
'listopad': 'November',
|
||||||
|
'listopada': 'November',
|
||||||
|
'grudzień': 'December',
|
||||||
|
'grudnia': 'December',
|
||||||
|
}
|
||||||
|
|
||||||
|
_DAYS_TO_EN = {
|
||||||
|
'poniedziałek': 0,
|
||||||
|
'poniedziałkach': 0,
|
||||||
|
'poniedziałkami': 0,
|
||||||
|
'poniedziałki': 0,
|
||||||
|
'poniedziałkiem': 0,
|
||||||
|
'poniedziałkom': 0,
|
||||||
|
'poniedziałkowa': 0,
|
||||||
|
'poniedziałkową': 0,
|
||||||
|
'poniedziałkowe': 0,
|
||||||
|
'poniedziałkowego': 0,
|
||||||
|
'poniedziałkowej': 0,
|
||||||
|
'poniedziałkowemu': 0,
|
||||||
|
'poniedziałkowi': 0,
|
||||||
|
'poniedziałkowy': 0,
|
||||||
|
'poniedziałkowych': 0,
|
||||||
|
'poniedziałkowym': 0,
|
||||||
|
'poniedziałkowymi': 0,
|
||||||
|
'poniedziałków': 0,
|
||||||
|
'poniedziałku': 0,
|
||||||
|
'wtorek': 1,
|
||||||
|
'wtorkach': 1,
|
||||||
|
'wtorkami': 1,
|
||||||
|
'wtorki': 1,
|
||||||
|
'wtorkiem': 1,
|
||||||
|
'wtorkom': 1,
|
||||||
|
'wtorkowa': 1,
|
||||||
|
'wtorkową': 1,
|
||||||
|
'wtorkowe': 1,
|
||||||
|
'wtorkowego': 1,
|
||||||
|
'wtorkowej': 1,
|
||||||
|
'wtorkowemu': 1,
|
||||||
|
'wtorkowi': 1,
|
||||||
|
'wtorkowy': 1,
|
||||||
|
'wtorkowych': 1,
|
||||||
|
'wtorkowym': 1,
|
||||||
|
'wtorkowymi': 1,
|
||||||
|
'wtorków': 1,
|
||||||
|
'wtorku': 1,
|
||||||
|
'środa': 2,
|
||||||
|
'środach': 2,
|
||||||
|
'środami': 2,
|
||||||
|
'środą': 2,
|
||||||
|
'środę': 2,
|
||||||
|
'środo': 2,
|
||||||
|
'środom': 2,
|
||||||
|
'środowa': 2,
|
||||||
|
'środową': 2,
|
||||||
|
'środowe': 2,
|
||||||
|
'środowego': 2,
|
||||||
|
'środowej': 2,
|
||||||
|
'środowemu': 2,
|
||||||
|
'środowi': 2,
|
||||||
|
'środowy': 2,
|
||||||
|
'środowych': 2,
|
||||||
|
'środowym': 2,
|
||||||
|
'środowymi': 2,
|
||||||
|
'środy': 2,
|
||||||
|
'środzie': 2,
|
||||||
|
'śród': 2,
|
||||||
|
'czwartek': 3,
|
||||||
|
'czwartkach': 3,
|
||||||
|
'czwartkami': 3,
|
||||||
|
'czwartki': 3,
|
||||||
|
'czwartkiem': 3,
|
||||||
|
'czwartkom': 3,
|
||||||
|
'czwartkowa': 3,
|
||||||
|
'czwartkową': 3,
|
||||||
|
'czwartkowe': 3,
|
||||||
|
'czwartkowego': 3,
|
||||||
|
'czwartkowej': 3,
|
||||||
|
'czwartkowemu': 3,
|
||||||
|
'czwartkowi': 3,
|
||||||
|
'czwartkowy': 3,
|
||||||
|
'czwartkowych': 3,
|
||||||
|
'czwartkowym': 3,
|
||||||
|
'czwartkowymi': 3,
|
||||||
|
'czwartków': 3,
|
||||||
|
'czwartku': 3,
|
||||||
|
'piątek': 4,
|
||||||
|
'piątkach': 4,
|
||||||
|
'piątkami': 4,
|
||||||
|
'piątki': 4,
|
||||||
|
'piątkiem': 4,
|
||||||
|
'piątkom': 4,
|
||||||
|
'piątkowa': 4,
|
||||||
|
'piątkową': 4,
|
||||||
|
'piątkowe': 4,
|
||||||
|
'piątkowego': 4,
|
||||||
|
'piątkowej': 4,
|
||||||
|
'piątkowemu': 4,
|
||||||
|
'piątkowi': 4,
|
||||||
|
'piątkowy': 4,
|
||||||
|
'piątkowych': 4,
|
||||||
|
'piątkowym': 4,
|
||||||
|
'piątkowymi': 4,
|
||||||
|
'piątków': 4,
|
||||||
|
'piątku': 4,
|
||||||
|
'sobocie': 5,
|
||||||
|
'sobota': 5,
|
||||||
|
'sobotach': 5,
|
||||||
|
'sobotami': 5,
|
||||||
|
'sobotą': 5,
|
||||||
|
'sobotę': 5,
|
||||||
|
'sobotni': 5,
|
||||||
|
'sobotnia': 5,
|
||||||
|
'sobotnią': 5,
|
||||||
|
'sobotnich': 5,
|
||||||
|
'sobotnie': 5,
|
||||||
|
'sobotniego': 5,
|
||||||
|
'sobotniej': 5,
|
||||||
|
'sobotniemu': 5,
|
||||||
|
'sobotnim': 5,
|
||||||
|
'sobotnimi': 5,
|
||||||
|
'soboto': 5,
|
||||||
|
'sobotom': 5,
|
||||||
|
'soboty': 5,
|
||||||
|
'sobót': 5,
|
||||||
|
'niedziel': 6,
|
||||||
|
'niedziela': 6,
|
||||||
|
'niedzielach': 6,
|
||||||
|
'niedzielami': 6,
|
||||||
|
'niedzielą': 6,
|
||||||
|
'niedziele': 6,
|
||||||
|
'niedzielę': 6,
|
||||||
|
'niedzieli': 6,
|
||||||
|
'niedzielna': 6,
|
||||||
|
'niedzielną': 6,
|
||||||
|
'niedzielne': 6,
|
||||||
|
'niedzielnego': 6,
|
||||||
|
'niedzielnej': 6,
|
||||||
|
'niedzielnemu': 6,
|
||||||
|
'niedzielni': 6,
|
||||||
|
'niedzielny': 6,
|
||||||
|
'niedzielnych': 6,
|
||||||
|
'niedzielnym': 6,
|
||||||
|
'niedzielnymi': 6,
|
||||||
|
'niedzielo': 6,
|
||||||
|
'niedzielom': 6
|
||||||
|
}
|
||||||
135
lingua_franca/lang/common_data_pt.py
Normal file
135
lingua_franca/lang/common_data_pt.py
Normal file
@@ -0,0 +1,135 @@
|
|||||||
|
_FUNCTION_NOT_IMPLEMENTED_WARNING = "esta função não foi implementada em 'pt'"
|
||||||
|
|
||||||
|
# Undefined articles ["um", "uma", "uns", "umas"] can not be supressed,
|
||||||
|
# in PT, "um cavalo" means "a horse" or "one horse".
|
||||||
|
|
||||||
|
_ARTICLES_PT = ["o", "a", "os", "as"]
|
||||||
|
|
||||||
|
# word rules for gender
|
||||||
|
_FEMALE_ENDINGS_PT = ["a", "as"]
|
||||||
|
_MALE_ENDINGS_PT = ["o", "os"]
|
||||||
|
|
||||||
|
# special cases, word lookup for words not covered by above rule
|
||||||
|
_GENDERS_PT = {
|
||||||
|
"mulher": "f",
|
||||||
|
"mulheres": "f",
|
||||||
|
"homem": "m"
|
||||||
|
}
|
||||||
|
|
||||||
|
# context rules for gender
|
||||||
|
_MALE_DETERMINANTS_PT = ["o", "os", "este", "estes", "esse", "esses"]
|
||||||
|
_FEMALE_DETERMINANTS_PT = ["a", "as", "estas", "estas", "essa", "essas"]
|
||||||
|
|
||||||
|
_NUMBERS_PT = {
|
||||||
|
"zero": 0,
|
||||||
|
"um": 1,
|
||||||
|
"uma": 1,
|
||||||
|
"uns": 1,
|
||||||
|
"umas": 1,
|
||||||
|
"primeiro": 1,
|
||||||
|
"segundo": 2,
|
||||||
|
"terceiro": 3,
|
||||||
|
"dois": 2,
|
||||||
|
"duas": 2,
|
||||||
|
"tres": 3,
|
||||||
|
"três": 3,
|
||||||
|
"quatro": 4,
|
||||||
|
"cinco": 5,
|
||||||
|
"seis": 6,
|
||||||
|
"sete": 7,
|
||||||
|
"oito": 8,
|
||||||
|
"nove": 9,
|
||||||
|
"dez": 10,
|
||||||
|
"onze": 11,
|
||||||
|
"doze": 12,
|
||||||
|
"treze": 13,
|
||||||
|
"catorze": 14,
|
||||||
|
"quinze": 15,
|
||||||
|
"dezasseis": 16,
|
||||||
|
"dezassete": 17,
|
||||||
|
"dezoito": 18,
|
||||||
|
"dezanove": 19,
|
||||||
|
"vinte": 20,
|
||||||
|
"trinta": 30,
|
||||||
|
"quarenta": 40,
|
||||||
|
"cinquenta": 50,
|
||||||
|
"sessenta": 60,
|
||||||
|
"setenta": 70,
|
||||||
|
"oitenta": 80,
|
||||||
|
"noventa": 90,
|
||||||
|
"cem": 100,
|
||||||
|
"cento": 100,
|
||||||
|
"duzentos": 200,
|
||||||
|
"duzentas": 200,
|
||||||
|
"trezentos": 300,
|
||||||
|
"trezentas": 300,
|
||||||
|
"quatrocentos": 400,
|
||||||
|
"quatrocentas": 400,
|
||||||
|
"quinhentos": 500,
|
||||||
|
"quinhentas": 500,
|
||||||
|
"seiscentos": 600,
|
||||||
|
"seiscentas": 600,
|
||||||
|
"setecentos": 700,
|
||||||
|
"setecentas": 700,
|
||||||
|
"oitocentos": 800,
|
||||||
|
"oitocentas": 800,
|
||||||
|
"novecentos": 900,
|
||||||
|
"novecentas": 900,
|
||||||
|
"mil": 1000,
|
||||||
|
"milh�o": 1000000}
|
||||||
|
|
||||||
|
_FRACTION_STRING_PT = {
|
||||||
|
2: 'meio',
|
||||||
|
3: 'terço',
|
||||||
|
4: 'quarto',
|
||||||
|
5: 'quinto',
|
||||||
|
6: 'sexto',
|
||||||
|
7: 'sétimo',
|
||||||
|
8: 'oitavo',
|
||||||
|
9: 'nono',
|
||||||
|
10: 'décimo',
|
||||||
|
11: 'onze avos',
|
||||||
|
12: 'doze avos',
|
||||||
|
13: 'treze avos',
|
||||||
|
14: 'catorze avos',
|
||||||
|
15: 'quinze avos',
|
||||||
|
16: 'dezasseis avos',
|
||||||
|
17: 'dezassete avos',
|
||||||
|
18: 'dezoito avos',
|
||||||
|
19: 'dezanove avos',
|
||||||
|
20: 'vigésimo',
|
||||||
|
30: 'trigésimo',
|
||||||
|
100: 'centésimo',
|
||||||
|
1000: 'milésimo'
|
||||||
|
}
|
||||||
|
|
||||||
|
_NUM_STRING_PT = {
|
||||||
|
0: 'zero',
|
||||||
|
1: 'um',
|
||||||
|
2: 'dois',
|
||||||
|
3: 'três',
|
||||||
|
4: 'quatro',
|
||||||
|
5: 'cinco',
|
||||||
|
6: 'seis',
|
||||||
|
7: 'sete',
|
||||||
|
8: 'oito',
|
||||||
|
9: 'nove',
|
||||||
|
10: 'dez',
|
||||||
|
11: 'onze',
|
||||||
|
12: 'doze',
|
||||||
|
13: 'treze',
|
||||||
|
14: 'catorze',
|
||||||
|
15: 'quinze',
|
||||||
|
16: 'dezasseis',
|
||||||
|
17: 'dezassete',
|
||||||
|
18: 'dezoito',
|
||||||
|
19: 'dezanove',
|
||||||
|
20: 'vinte',
|
||||||
|
30: 'trinta',
|
||||||
|
40: 'quarenta',
|
||||||
|
50: 'cinquenta',
|
||||||
|
60: 'sessenta',
|
||||||
|
70: 'setenta',
|
||||||
|
80: 'oitenta',
|
||||||
|
90: 'noventa'
|
||||||
|
}
|
||||||
304
lingua_franca/lang/common_data_ru.py
Normal file
304
lingua_franca/lang/common_data_ru.py
Normal file
@@ -0,0 +1,304 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
|
|
||||||
|
_NUM_STRING_RU = {
|
||||||
|
0: 'ноль',
|
||||||
|
1: 'один',
|
||||||
|
2: 'два',
|
||||||
|
3: 'три',
|
||||||
|
4: 'четыре',
|
||||||
|
5: 'пять',
|
||||||
|
6: 'шесть',
|
||||||
|
7: 'семь',
|
||||||
|
8: 'восемь',
|
||||||
|
9: 'девять',
|
||||||
|
10: 'десять',
|
||||||
|
11: 'одиннадцать',
|
||||||
|
12: 'двенадцать',
|
||||||
|
13: 'тринадцать',
|
||||||
|
14: 'четырнадцать',
|
||||||
|
15: 'пятнадцать',
|
||||||
|
16: 'шестнадцать',
|
||||||
|
17: 'семнадцать',
|
||||||
|
18: 'восемнадцать',
|
||||||
|
19: 'девятнадцать',
|
||||||
|
20: 'двадцать',
|
||||||
|
30: 'тридцать',
|
||||||
|
40: 'сорок',
|
||||||
|
50: 'пятьдесят',
|
||||||
|
60: 'шестьдесят',
|
||||||
|
70: 'семьдесят',
|
||||||
|
80: 'восемьдесят',
|
||||||
|
90: 'девяносто',
|
||||||
|
100: 'сто',
|
||||||
|
200: 'двести',
|
||||||
|
300: 'триста',
|
||||||
|
400: 'четыреста',
|
||||||
|
500: 'пятьсот',
|
||||||
|
600: 'шестьсот',
|
||||||
|
700: 'семьсот',
|
||||||
|
800: 'восемьсот',
|
||||||
|
900: 'девятьсот'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
_FRACTION_STRING_RU = {
|
||||||
|
2: 'половина',
|
||||||
|
3: 'треть',
|
||||||
|
4: 'четверть',
|
||||||
|
5: 'пятая',
|
||||||
|
6: 'шестая',
|
||||||
|
7: 'седьмая',
|
||||||
|
8: 'восьмая',
|
||||||
|
9: 'девятая',
|
||||||
|
10: 'десятая',
|
||||||
|
11: 'одиннадцатая',
|
||||||
|
12: 'двенадцатая',
|
||||||
|
13: 'тринадцатая',
|
||||||
|
14: 'четырнадцатая',
|
||||||
|
15: 'пятнадцатая',
|
||||||
|
16: 'шестнадцатая',
|
||||||
|
17: 'семнадцатая',
|
||||||
|
18: 'восемнадцатая',
|
||||||
|
19: 'девятнадцатая',
|
||||||
|
20: 'двадцатая',
|
||||||
|
30: 'тридцатая',
|
||||||
|
40: 'сороковая',
|
||||||
|
50: 'пятидесятая',
|
||||||
|
60: 'шестидесятая',
|
||||||
|
70: 'семидесятая',
|
||||||
|
80: 'восьмидесятая',
|
||||||
|
90: 'девяностая',
|
||||||
|
1e2: 'сотая',
|
||||||
|
1e3: 'тысячная',
|
||||||
|
1e6: 'миллионная',
|
||||||
|
1e9: 'миллиардная'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
_SHORT_SCALE_RU = OrderedDict([
|
||||||
|
(1e3, 'тысяча'),
|
||||||
|
(1e6, "миллион"),
|
||||||
|
(1e9, "миллиард"),
|
||||||
|
(1e12, "триллион"),
|
||||||
|
(1e15, "квадриллион"),
|
||||||
|
(1e18, "квинтиллион"),
|
||||||
|
(1e21, "секстиллион"),
|
||||||
|
(1e24, "септиллион"),
|
||||||
|
(1e27, "октиллион"),
|
||||||
|
(1e30, "нониллион"),
|
||||||
|
(1e33, "дециллион"),
|
||||||
|
(1e36, "ундециллион"),
|
||||||
|
(1e39, "дуодециллион"),
|
||||||
|
(1e42, "тредециллион"),
|
||||||
|
(1e45, "кваттордециллион"),
|
||||||
|
(1e48, "квиндециллион"),
|
||||||
|
(1e51, "сексдециллион"),
|
||||||
|
(1e54, "септендециллион"),
|
||||||
|
(1e57, "октодециллион"),
|
||||||
|
(1e60, "новемдециллион"),
|
||||||
|
(1e63, "вигинтиллион"),
|
||||||
|
(1e66, "унвигинтиллион"),
|
||||||
|
(1e69, "дуовигинтиллион"),
|
||||||
|
(1e72, "тревигинтиллион"),
|
||||||
|
(1e75, "кватторвигинтиллион"),
|
||||||
|
(1e78, "квинвигинтиллион"),
|
||||||
|
(1e81, "секснвигинтиллион"),
|
||||||
|
(1e84, "септенвигинтиллион"),
|
||||||
|
(1e87, "октовигинтиллион"),
|
||||||
|
(1e90, "новемвигинтиллион"),
|
||||||
|
(1e93, "тригинтиллион"),
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
_LONG_SCALE_RU = OrderedDict([
|
||||||
|
(1e3, 'тысяча'),
|
||||||
|
(1e6, "миллион"),
|
||||||
|
(1e9, "миллиард"),
|
||||||
|
(1e12, "биллион"),
|
||||||
|
(1e15, "биллиард"),
|
||||||
|
(1e18, "триллион"),
|
||||||
|
(1e21, "триллиард"),
|
||||||
|
(1e24, "квадриллион"),
|
||||||
|
(1e27, "квадриллиард"),
|
||||||
|
(1e30, "квинтиллион"),
|
||||||
|
(1e33, "квинтиллиард"),
|
||||||
|
(1e36, "секстиллион"),
|
||||||
|
(1e39, "секстиллиард"),
|
||||||
|
(1e42, "септиллион"),
|
||||||
|
(1e45, "септиллиард"),
|
||||||
|
(1e48, "октиллион"),
|
||||||
|
(1e51, "октиллиард"),
|
||||||
|
(1e54, "нониллион"),
|
||||||
|
(1e57, "нониллиард"),
|
||||||
|
(1e60, "дециллион"),
|
||||||
|
(1e63, "дециллиард"),
|
||||||
|
(1e66, "ундециллион"),
|
||||||
|
(1e72, "дуодециллион"),
|
||||||
|
(1e78, "тредециллион"),
|
||||||
|
(1e84, "кваттордециллион"),
|
||||||
|
(1e90, "квиндециллион"),
|
||||||
|
(1e96, "сексдециллион"),
|
||||||
|
(1e102, "септендециллион"),
|
||||||
|
(1e108, "октодециллион"),
|
||||||
|
(1e114, "новемдециллион"),
|
||||||
|
(1e120, "вигинтиллион"),
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
_ORDINAL_BASE_RU = {
|
||||||
|
1: 'первый',
|
||||||
|
2: 'второй',
|
||||||
|
3: 'третий',
|
||||||
|
4: 'четвёртый',
|
||||||
|
5: 'пятый',
|
||||||
|
6: 'шестой',
|
||||||
|
7: 'седьмой',
|
||||||
|
8: 'восьмой',
|
||||||
|
9: 'девятый',
|
||||||
|
10: 'десятый',
|
||||||
|
11: 'одиннадцатый',
|
||||||
|
12: 'двенадцатый',
|
||||||
|
13: 'тринадцатый',
|
||||||
|
14: 'четырнадцатый',
|
||||||
|
15: 'пятнадцатый',
|
||||||
|
16: 'шестнадцатый',
|
||||||
|
17: 'семнадцатый',
|
||||||
|
18: 'восемнадцатый',
|
||||||
|
19: 'девятнадцатый',
|
||||||
|
20: 'двадцатый',
|
||||||
|
30: 'тридцатый',
|
||||||
|
40: "сороковой",
|
||||||
|
50: "пятидесятый",
|
||||||
|
60: "шестидесятый",
|
||||||
|
70: "семидесятый",
|
||||||
|
80: "восьмидесятый",
|
||||||
|
90: "девяностый",
|
||||||
|
1e2: "сотый",
|
||||||
|
2e2: "двухсотый",
|
||||||
|
3e2: "трёхсотый",
|
||||||
|
4e2: "четырёхсотый",
|
||||||
|
5e2: "пятисотый",
|
||||||
|
6e2: "шестисотый",
|
||||||
|
7e2: "семисотый",
|
||||||
|
8e2: "восьмисотый",
|
||||||
|
9e2: "девятисотый",
|
||||||
|
1e3: "тысячный"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
_SHORT_ORDINAL_RU = {
|
||||||
|
1e6: "миллион",
|
||||||
|
1e9: "миллиард",
|
||||||
|
1e12: "триллион",
|
||||||
|
1e15: "квадриллион",
|
||||||
|
1e18: "квинтиллион",
|
||||||
|
1e21: "секстиллион",
|
||||||
|
1e24: "септиллион",
|
||||||
|
1e27: "октиллион",
|
||||||
|
1e30: "нониллион",
|
||||||
|
1e33: "дециллион",
|
||||||
|
1e36: "ундециллион",
|
||||||
|
1e39: "дуодециллион",
|
||||||
|
1e42: "тредециллион",
|
||||||
|
1e45: "кваттордециллион",
|
||||||
|
1e48: "квиндециллион",
|
||||||
|
1e51: "сексдециллион",
|
||||||
|
1e54: "септендециллион",
|
||||||
|
1e57: "октодециллион",
|
||||||
|
1e60: "новемдециллион",
|
||||||
|
1e63: "вигинтиллион"
|
||||||
|
}
|
||||||
|
_SHORT_ORDINAL_RU.update(_ORDINAL_BASE_RU)
|
||||||
|
|
||||||
|
|
||||||
|
_LONG_ORDINAL_RU = {
|
||||||
|
1e6: "миллион",
|
||||||
|
1e9: "миллиард",
|
||||||
|
1e12: "биллион",
|
||||||
|
1e15: "биллиард",
|
||||||
|
1e18: "триллион",
|
||||||
|
1e21: "триллиард",
|
||||||
|
1e24: "квадриллион",
|
||||||
|
1e27: "квадриллиард",
|
||||||
|
1e30: "квинтиллион",
|
||||||
|
1e33: "квинтиллиард",
|
||||||
|
1e36: "секстиллион",
|
||||||
|
1e39: "секстиллиард",
|
||||||
|
1e42: "септиллион",
|
||||||
|
1e45: "септиллиард",
|
||||||
|
1e48: "октиллион",
|
||||||
|
1e51: "октиллиард",
|
||||||
|
1e54: "нониллион",
|
||||||
|
1e57: "нониллиард",
|
||||||
|
1e60: "дециллион",
|
||||||
|
1e63: "дециллиард",
|
||||||
|
1e66: "ундециллион",
|
||||||
|
1e72: "дуодециллион",
|
||||||
|
1e78: "тредециллион",
|
||||||
|
1e84: "кваттордециллион",
|
||||||
|
1e90: "квиндециллион",
|
||||||
|
1e96: "сексдециллион",
|
||||||
|
1e102: "септендециллион",
|
||||||
|
1e108: "октодециллион",
|
||||||
|
1e114: "новемдециллион",
|
||||||
|
1e120: "вигинтиллион"
|
||||||
|
}
|
||||||
|
_LONG_ORDINAL_RU.update(_ORDINAL_BASE_RU)
|
||||||
|
|
||||||
|
# Months
|
||||||
|
|
||||||
|
_MONTHS_CONVERSION = {
|
||||||
|
0: "january",
|
||||||
|
1: "february",
|
||||||
|
2: "march",
|
||||||
|
3: "april",
|
||||||
|
4: "may",
|
||||||
|
5: "june",
|
||||||
|
6: "july",
|
||||||
|
7: "august",
|
||||||
|
8: "september",
|
||||||
|
9: "october",
|
||||||
|
10: "november",
|
||||||
|
11: "december"
|
||||||
|
}
|
||||||
|
|
||||||
|
_MONTHS_RU = ['январь', 'февраль', 'март', 'апрель', 'май', 'июнь',
|
||||||
|
'июль', 'август', 'сентябрь', 'октябрь', 'ноябрь',
|
||||||
|
'декабрь']
|
||||||
|
|
||||||
|
# Time
|
||||||
|
_TIME_UNITS_CONVERSION = {
|
||||||
|
'микросекунд': 'microseconds',
|
||||||
|
'милисекунд': 'milliseconds',
|
||||||
|
'секунда': 'seconds',
|
||||||
|
'секунды': 'seconds',
|
||||||
|
'секунд': 'seconds',
|
||||||
|
'минута': 'minutes',
|
||||||
|
'минуты': 'minutes',
|
||||||
|
'минут': 'minutes',
|
||||||
|
'час': 'hours',
|
||||||
|
'часа': 'hours',
|
||||||
|
'часов': 'hours',
|
||||||
|
'день': 'days',
|
||||||
|
'дня': 'days',
|
||||||
|
'дней': 'days',
|
||||||
|
'неделя': 'weeks',
|
||||||
|
'недели': 'weeks',
|
||||||
|
'недель': 'weeks'
|
||||||
|
}
|
||||||
173
lingua_franca/lang/common_data_sl.py
Normal file
173
lingua_franca/lang/common_data_sl.py
Normal file
@@ -0,0 +1,173 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the 'License');
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an 'AS IS' BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
|
|
||||||
|
_ARTICLES_SL = {}
|
||||||
|
|
||||||
|
|
||||||
|
_NUM_STRING_SL = {
|
||||||
|
0: 'nič',
|
||||||
|
1: 'ena',
|
||||||
|
2: 'dve',
|
||||||
|
3: 'tri',
|
||||||
|
4: 'štiri',
|
||||||
|
5: 'pet',
|
||||||
|
6: 'šest',
|
||||||
|
7: 'sedem',
|
||||||
|
8: 'osem',
|
||||||
|
9: 'devet',
|
||||||
|
10: 'deset',
|
||||||
|
11: 'enajst',
|
||||||
|
12: 'dvanajst',
|
||||||
|
13: 'trinajst',
|
||||||
|
14: 'štirinajst',
|
||||||
|
15: 'petnajst',
|
||||||
|
16: 'šestnajst',
|
||||||
|
17: 'sedemnajst',
|
||||||
|
18: 'osemnajst',
|
||||||
|
19: 'devetnajst',
|
||||||
|
20: 'dvajset',
|
||||||
|
30: 'trideset',
|
||||||
|
40: 'štirideset',
|
||||||
|
50: 'petdeset',
|
||||||
|
60: 'šestdeset',
|
||||||
|
70: 'sedemdeset',
|
||||||
|
80: 'osemdeset',
|
||||||
|
90: 'devetdeset'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
_FRACTION_STRING_SL = {
|
||||||
|
2: 'polovica',
|
||||||
|
3: 'tretjina',
|
||||||
|
4: 'četrtina',
|
||||||
|
5: 'petina',
|
||||||
|
6: 'šestina',
|
||||||
|
7: 'sedmina',
|
||||||
|
8: 'osmina',
|
||||||
|
9: 'devetina',
|
||||||
|
10: 'desetina',
|
||||||
|
11: 'enajstina',
|
||||||
|
12: 'dvanajstina',
|
||||||
|
13: 'trinajstina',
|
||||||
|
14: 'štirinajstina',
|
||||||
|
15: 'petnajstina',
|
||||||
|
16: 'šestnajstina',
|
||||||
|
17: 'sedemnajstina',
|
||||||
|
18: 'osemnajstina',
|
||||||
|
19: 'devetnajstina',
|
||||||
|
20: 'dvajsetina'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
_LONG_SCALE_SL = OrderedDict([
|
||||||
|
(100, 'sto'),
|
||||||
|
(1000, 'tisoč'),
|
||||||
|
(1000000, 'milijon'),
|
||||||
|
(1e12, 'bilijon'),
|
||||||
|
(1e18, 'trilijon'),
|
||||||
|
(1e24, 'kvadrilijon'),
|
||||||
|
(1e30, 'kvintilijon'),
|
||||||
|
(1e36, 'sekstilijon'),
|
||||||
|
(1e42, 'septilijon'),
|
||||||
|
(1e48, 'oktilijon'),
|
||||||
|
(1e54, 'nonilijon'),
|
||||||
|
(1e60, 'decilijon')
|
||||||
|
# TODO > 1e63
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
_SHORT_SCALE_SL = OrderedDict([
|
||||||
|
(100, 'sto'),
|
||||||
|
(1000, 'tisoč'),
|
||||||
|
(1000000, 'milijon'),
|
||||||
|
(1e9, 'bilijon'),
|
||||||
|
(1e12, 'trilijon'),
|
||||||
|
(1e15, 'kvadrilijon'),
|
||||||
|
(1e18, 'kvintilijon'),
|
||||||
|
(1e21, 'sekstilijon'),
|
||||||
|
(1e24, 'septilijon'),
|
||||||
|
(1e27, 'oktilijon'),
|
||||||
|
(1e30, 'nonilijon'),
|
||||||
|
(1e33, 'decilijon')
|
||||||
|
# TODO > 1e33
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
_ORDINAL_BASE_SL = {
|
||||||
|
1: 'prvi',
|
||||||
|
2: 'drugi',
|
||||||
|
3: 'tretji',
|
||||||
|
4: 'četrti',
|
||||||
|
5: 'peti',
|
||||||
|
6: 'šesti',
|
||||||
|
7: 'sedmi',
|
||||||
|
8: 'osmi',
|
||||||
|
9: 'deveti',
|
||||||
|
10: 'deseti',
|
||||||
|
11: 'enajsti',
|
||||||
|
12: 'dvanajsti',
|
||||||
|
13: 'trinajsti',
|
||||||
|
14: 'štirinajsti',
|
||||||
|
15: 'petnajsti',
|
||||||
|
16: 'šestnajsti',
|
||||||
|
17: 'sedemnajsti',
|
||||||
|
18: 'osemnajsti',
|
||||||
|
19: 'devetnajsti',
|
||||||
|
20: 'dvajseti',
|
||||||
|
30: 'trideseti',
|
||||||
|
40: 'štirideseti',
|
||||||
|
50: 'petdeseti',
|
||||||
|
60: 'šestdeseti',
|
||||||
|
70: 'sedemdeseti',
|
||||||
|
80: 'osemdeseti',
|
||||||
|
90: 'devetdeseti',
|
||||||
|
1e2: 'stoti',
|
||||||
|
1e3: 'tisoči'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
_LONG_ORDINAL_SL = {
|
||||||
|
1e6: 'milijonti',
|
||||||
|
1e12: 'bilijonti',
|
||||||
|
1e18: 'trilijonti',
|
||||||
|
1e24: 'kvadrilijonti',
|
||||||
|
1e30: 'kvintiljonti',
|
||||||
|
1e36: 'sekstilijonti',
|
||||||
|
1e42: 'septilijonti',
|
||||||
|
1e48: 'oktilijonti',
|
||||||
|
1e54: 'nonilijonti',
|
||||||
|
1e60: 'decilijonti'
|
||||||
|
# TODO > 1e60
|
||||||
|
}
|
||||||
|
_LONG_ORDINAL_SL.update(_ORDINAL_BASE_SL)
|
||||||
|
|
||||||
|
|
||||||
|
_SHORT_ORDINAL_SL = {
|
||||||
|
1e6: 'milijonti',
|
||||||
|
1e9: 'bilijonti',
|
||||||
|
1e12: 'trilijonti',
|
||||||
|
1e15: 'kvadrilijonti',
|
||||||
|
1e18: 'kvintiljonti',
|
||||||
|
1e21: 'sekstilijonti',
|
||||||
|
1e24: 'septilijonti',
|
||||||
|
1e27: 'oktilijonti',
|
||||||
|
1e30: 'nonilijonti',
|
||||||
|
1e33: 'decilijonti'
|
||||||
|
# TODO > 1e33
|
||||||
|
}
|
||||||
|
_SHORT_ORDINAL_SL.update(_ORDINAL_BASE_SL)
|
||||||
72
lingua_franca/lang/common_data_sv.py
Normal file
72
lingua_franca/lang/common_data_sv.py
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
_FUNCTION_NOT_IMPLEMENTED_WARNING = "Denna funktion har inte implementerats i 'sv'"
|
||||||
|
|
||||||
|
_MONTHS_SV = ['januari', 'februari', 'mars', 'april', 'maj', 'juni',
|
||||||
|
'juli', 'augusti', 'september', 'oktober', 'november',
|
||||||
|
'december']
|
||||||
|
|
||||||
|
_NUM_STRING_SV = {
|
||||||
|
0: 'noll',
|
||||||
|
1: 'en',
|
||||||
|
2: 'två',
|
||||||
|
3: 'tre',
|
||||||
|
4: 'fyra',
|
||||||
|
5: 'fem',
|
||||||
|
6: 'sex',
|
||||||
|
7: 'sju',
|
||||||
|
8: 'åtta',
|
||||||
|
9: 'nio',
|
||||||
|
10: 'tio',
|
||||||
|
11: 'elva',
|
||||||
|
12: 'tolv',
|
||||||
|
13: 'tretton',
|
||||||
|
14: 'fjorton',
|
||||||
|
15: 'femton',
|
||||||
|
16: 'sexton',
|
||||||
|
17: 'sjutton',
|
||||||
|
18: 'arton',
|
||||||
|
19: 'nitton',
|
||||||
|
20: 'tjugo',
|
||||||
|
30: 'trettio',
|
||||||
|
40: 'fyrtio',
|
||||||
|
50: 'femtio',
|
||||||
|
60: 'sextio',
|
||||||
|
70: 'sjuttio',
|
||||||
|
80: 'åttio',
|
||||||
|
90: 'nittio',
|
||||||
|
100: 'hundra'
|
||||||
|
}
|
||||||
|
|
||||||
|
_NUM_POWERS_OF_TEN_SV = [
|
||||||
|
'hundra',
|
||||||
|
'tusen',
|
||||||
|
'miljon',
|
||||||
|
'miljard',
|
||||||
|
'biljon',
|
||||||
|
'biljard',
|
||||||
|
'triljon',
|
||||||
|
'triljard'
|
||||||
|
]
|
||||||
|
|
||||||
|
_FRACTION_STRING_SV = {
|
||||||
|
2: 'halv',
|
||||||
|
3: 'tredjedel',
|
||||||
|
4: 'fjärdedel',
|
||||||
|
5: 'femtedel',
|
||||||
|
6: 'sjättedel',
|
||||||
|
7: 'sjundedel',
|
||||||
|
8: 'åttondel',
|
||||||
|
9: 'niondel',
|
||||||
|
10: 'tiondel',
|
||||||
|
11: 'elftedel',
|
||||||
|
12: 'tolftedel',
|
||||||
|
13: 'trettondel',
|
||||||
|
14: 'fjortondel',
|
||||||
|
15: 'femtondel',
|
||||||
|
16: 'sextondel',
|
||||||
|
17: 'sjuttondel',
|
||||||
|
18: 'artondel',
|
||||||
|
19: 'nittondel',
|
||||||
|
20: 'tjugondel'
|
||||||
|
}
|
||||||
|
|
||||||
|
_EXTRA_SPACE_SV = " "
|
||||||
596
lingua_franca/lang/format_ca.py
Normal file
596
lingua_franca/lang/format_ca.py
Normal file
@@ -0,0 +1,596 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
from lingua_franca.lang.format_common import convert_to_mixed_fraction
|
||||||
|
from lingua_franca.lang.common_data_ca import _FRACTION_STRING_CA, \
|
||||||
|
_NUM_STRING_CA
|
||||||
|
from lingua_franca.internal import lookup_variant
|
||||||
|
from enum import IntEnum
|
||||||
|
|
||||||
|
|
||||||
|
class TimeVariantCA(IntEnum):
|
||||||
|
DEFAULT = 0
|
||||||
|
BELL = 1
|
||||||
|
FULL_BELL = 2
|
||||||
|
SPANISH_LIKE = 3
|
||||||
|
|
||||||
|
|
||||||
|
def nice_number_ca(number, speech, denominators=range(1, 21)):
|
||||||
|
""" Catalan helper for nice_number
|
||||||
|
|
||||||
|
This function formats a float to human understandable functions. Like
|
||||||
|
4.5 becomes "4 i mig" for speech and "4 1/2" for text
|
||||||
|
|
||||||
|
Args:
|
||||||
|
number (int or float): the float to format
|
||||||
|
speech (bool): format for speech (True) or display (False)
|
||||||
|
denominators (iter of ints): denominators to use, default [1 .. 20]
|
||||||
|
Returns:
|
||||||
|
(str): The formatted string.
|
||||||
|
"""
|
||||||
|
|
||||||
|
result = convert_to_mixed_fraction(number, denominators)
|
||||||
|
if not result:
|
||||||
|
# Give up, just represent as a 3 decimal number
|
||||||
|
return str(round(number, 3))
|
||||||
|
|
||||||
|
whole, num, den = result
|
||||||
|
|
||||||
|
if not speech:
|
||||||
|
if num == 0:
|
||||||
|
# TODO: Number grouping? E.g. "1,000,000"
|
||||||
|
return str(whole)
|
||||||
|
else:
|
||||||
|
return '{} {}/{}'.format(whole, num, den)
|
||||||
|
|
||||||
|
if num == 0:
|
||||||
|
return str(whole)
|
||||||
|
# denominador
|
||||||
|
den_str = _FRACTION_STRING_CA[den]
|
||||||
|
# fraccions
|
||||||
|
if whole == 0:
|
||||||
|
if num == 1:
|
||||||
|
# un desè
|
||||||
|
return_string = 'un {}'.format(den_str)
|
||||||
|
else:
|
||||||
|
# tres mig
|
||||||
|
return_string = '{} {}'.format(num, den_str)
|
||||||
|
# inteiros >10
|
||||||
|
elif num == 1:
|
||||||
|
# trenta-un
|
||||||
|
return_string = '{}-{}'.format(whole, den_str)
|
||||||
|
# inteiros >10 com fracções
|
||||||
|
else:
|
||||||
|
# vint i 3 desens
|
||||||
|
return_string = '{} i {} {}'.format(whole, num, den_str)
|
||||||
|
# plural
|
||||||
|
if num > 1:
|
||||||
|
return_string += 's'
|
||||||
|
return return_string
|
||||||
|
|
||||||
|
|
||||||
|
def pronounce_number_ca(number, places=2):
|
||||||
|
"""
|
||||||
|
Convert a number to it's spoken equivalent
|
||||||
|
For example, '5.2' would return 'cinc coma dos'
|
||||||
|
Args:
|
||||||
|
number(float or int): the number to pronounce (under 100)
|
||||||
|
places(int): maximum decimal places to speak
|
||||||
|
Returns:
|
||||||
|
(str): The pronounced number
|
||||||
|
"""
|
||||||
|
if abs(number) >= 100:
|
||||||
|
# TODO: Support n > 100
|
||||||
|
return str(number)
|
||||||
|
|
||||||
|
result = ""
|
||||||
|
if number < 0:
|
||||||
|
result = "menys "
|
||||||
|
number = abs(number)
|
||||||
|
|
||||||
|
if number >= 20:
|
||||||
|
tens = int(number - int(number) % 10)
|
||||||
|
ones = int(number - tens)
|
||||||
|
result += _NUM_STRING_CA[tens]
|
||||||
|
if ones > 0:
|
||||||
|
if tens == 20:
|
||||||
|
result += "-i-" + _NUM_STRING_CA[ones]
|
||||||
|
else:
|
||||||
|
result += "-" + _NUM_STRING_CA[ones]
|
||||||
|
else:
|
||||||
|
result += _NUM_STRING_CA[int(number)]
|
||||||
|
|
||||||
|
# Deal with decimal part, in Catalan is commonly used the comma
|
||||||
|
# instead the dot. Decimal part can be written both with comma
|
||||||
|
# and dot, but when pronounced, its pronounced "coma"
|
||||||
|
if not number == int(number) and places > 0:
|
||||||
|
if abs(number) < 1.0 and (result == "menys " or not result):
|
||||||
|
result += "zero"
|
||||||
|
result += " coma"
|
||||||
|
_num_str = str(number)
|
||||||
|
_num_str = _num_str.split(".")[1][0:places]
|
||||||
|
for char in _num_str:
|
||||||
|
result += " " + _NUM_STRING_CA[int(char)]
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
@lookup_variant({
|
||||||
|
"default": TimeVariantCA.DEFAULT,
|
||||||
|
"traditional": TimeVariantCA.FULL_BELL,
|
||||||
|
"bell": TimeVariantCA.BELL,
|
||||||
|
"full_bell": TimeVariantCA.FULL_BELL,
|
||||||
|
"spanish": TimeVariantCA.SPANISH_LIKE
|
||||||
|
})
|
||||||
|
def nice_time_ca(dt, speech=True, use_24hour=False, use_ampm=False,
|
||||||
|
variant=None):
|
||||||
|
"""
|
||||||
|
Format a time to a comfortable human format
|
||||||
|
For example, generate 'cinc trenta' for speech or '5:30' for
|
||||||
|
text display.
|
||||||
|
Args:
|
||||||
|
dt (datetime): date to format (assumes already in local timezone)
|
||||||
|
speech (bool): format for speech (default/True) or display (False)=Fal
|
||||||
|
use_24hour (bool): output in 24-hour/military or 12-hour format
|
||||||
|
use_ampm (bool): include the am/pm for 12-hour format
|
||||||
|
Returns:
|
||||||
|
(str): The formatted time string
|
||||||
|
"""
|
||||||
|
variant = variant or TimeVariantCA.DEFAULT
|
||||||
|
|
||||||
|
if use_24hour:
|
||||||
|
# e.g. "03:01" or "14:22"
|
||||||
|
string = dt.strftime("%H:%M")
|
||||||
|
else:
|
||||||
|
if use_ampm:
|
||||||
|
# e.g. "3:01 AM" or "2:22 PM"
|
||||||
|
string = dt.strftime("%I:%M %p")
|
||||||
|
else:
|
||||||
|
# e.g. "3:01" or "2:22"
|
||||||
|
string = dt.strftime("%I:%M")
|
||||||
|
if string[0] == '0':
|
||||||
|
string = string[1:] # strip leading zeros
|
||||||
|
|
||||||
|
if not speech:
|
||||||
|
return string
|
||||||
|
|
||||||
|
# Generate a speakable version of the time
|
||||||
|
speak = ""
|
||||||
|
if variant == TimeVariantCA.BELL:
|
||||||
|
# Bell Catalan Time System
|
||||||
|
# https://en.wikipedia.org/wiki/Catalan_time_system
|
||||||
|
|
||||||
|
if dt.minute < 7:
|
||||||
|
next_hour = False
|
||||||
|
elif dt.minute == 7 or dt.minute == 8:
|
||||||
|
speak += "mig quart"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute < 15:
|
||||||
|
next_hour = False
|
||||||
|
elif dt.minute == 15:
|
||||||
|
speak += "un quart"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute == 16:
|
||||||
|
speak += "un quart i un minut"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute < 21:
|
||||||
|
speak += "un quart i " + pronounce_number_ca(
|
||||||
|
dt.minute - 15) + " minuts"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute == 22 or dt.minute == 23:
|
||||||
|
speak += "un quart i mig"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute < 30:
|
||||||
|
speak += "un quart i " + pronounce_number_ca(
|
||||||
|
dt.minute - 15) + " minuts"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute == 30:
|
||||||
|
speak += "dos quarts"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute == 31:
|
||||||
|
speak += "dos quarts i un minut"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute < 37:
|
||||||
|
speak += "dos quarts i " + pronounce_number_ca(
|
||||||
|
dt.minute - 30) + " minuts"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute == 37 or dt.minute == 38:
|
||||||
|
speak += "dos quarts i mig"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute < 45:
|
||||||
|
speak += "dos quarts i " + pronounce_number_ca(
|
||||||
|
dt.minute - 30) + " minuts"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute == 45:
|
||||||
|
speak += "tres quarts"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute == 46:
|
||||||
|
speak += "tres quarts i un minut"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute < 52:
|
||||||
|
speak += "tres quarts i " + pronounce_number_ca(
|
||||||
|
dt.minute - 45) + " minuts"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute == 52 or dt.minute == 53:
|
||||||
|
speak += "tres quarts i mig"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute > 53:
|
||||||
|
speak += "tres quarts i " + pronounce_number_ca(
|
||||||
|
dt.minute - 45) + " minuts"
|
||||||
|
next_hour = True
|
||||||
|
|
||||||
|
if next_hour == True:
|
||||||
|
next_hour = (dt.hour + 1) % 12
|
||||||
|
if next_hour == 0:
|
||||||
|
speak += " de dotze"
|
||||||
|
if dt.hour == 11:
|
||||||
|
speak += " del migdia"
|
||||||
|
else:
|
||||||
|
speak += " de la nit"
|
||||||
|
|
||||||
|
elif next_hour == 1:
|
||||||
|
speak += " d'una"
|
||||||
|
if dt.hour == 12:
|
||||||
|
speak += " de la tarda"
|
||||||
|
else:
|
||||||
|
speak += " de la matinada"
|
||||||
|
elif next_hour == 2:
|
||||||
|
speak += "de dues"
|
||||||
|
if dt.hour == 13:
|
||||||
|
speak += " de la tarda"
|
||||||
|
else:
|
||||||
|
speak += " de la nit"
|
||||||
|
|
||||||
|
elif next_hour == 11:
|
||||||
|
speak += "d'onze"
|
||||||
|
if dt.hour == 22:
|
||||||
|
speak += " de la nit"
|
||||||
|
else:
|
||||||
|
speak += " del matí"
|
||||||
|
else:
|
||||||
|
speak += "de " + pronounce_number_ca(next_hour)
|
||||||
|
if dt.hour == 0 and dt.hour < 5:
|
||||||
|
speak += " de la matinada"
|
||||||
|
elif dt.hour >= 5 and dt.hour < 11:
|
||||||
|
speak += " del matí"
|
||||||
|
elif dt.hour == 11:
|
||||||
|
speak += " del migdia"
|
||||||
|
elif dt.hour >= 12 and dt.hour <= 17:
|
||||||
|
speak += " de la tarda"
|
||||||
|
elif dt.hour >= 18 and dt.hour < 20:
|
||||||
|
speak += " del vespre"
|
||||||
|
elif dt.hour >= 21 and dt.hour <= 23:
|
||||||
|
speak += " de la nit"
|
||||||
|
|
||||||
|
|
||||||
|
else:
|
||||||
|
hour = dt.hour % 12
|
||||||
|
if hour == 0:
|
||||||
|
speak += "les dotze"
|
||||||
|
elif hour == 1:
|
||||||
|
speak += "la una"
|
||||||
|
elif hour == 2:
|
||||||
|
speak += "les dues"
|
||||||
|
else:
|
||||||
|
speak += "les " + pronounce_number_ca(hour)
|
||||||
|
|
||||||
|
if dt.minute == 0:
|
||||||
|
speak += " en punt"
|
||||||
|
elif dt.minute == 1:
|
||||||
|
speak += " i un minut"
|
||||||
|
else:
|
||||||
|
speak += " i " + pronounce_number_ca(dt.minute) + " minuts"
|
||||||
|
|
||||||
|
if dt.hour == 0:
|
||||||
|
speak += " de la nit"
|
||||||
|
elif dt.hour >= 1 and dt.hour < 6:
|
||||||
|
speak += " de la matinada"
|
||||||
|
elif dt.hour >= 6 and dt.hour < 11:
|
||||||
|
speak += " del matí"
|
||||||
|
elif dt.hour == 12:
|
||||||
|
speak += " del migdia"
|
||||||
|
elif dt.hour >= 13 and dt.hour < 19:
|
||||||
|
speak += " de la tarda"
|
||||||
|
elif dt.hour >= 19 and dt.hour < 21:
|
||||||
|
speak += " del vespre"
|
||||||
|
elif dt.hour >= 21 and dt.hour <= 23:
|
||||||
|
speak += " de la nit"
|
||||||
|
|
||||||
|
elif variant == TimeVariantCA.FULL_BELL:
|
||||||
|
# Full Bell Catalan Time System
|
||||||
|
# https://en.wikipedia.org/wiki/Catalan_time_system
|
||||||
|
|
||||||
|
if dt.minute < 2:
|
||||||
|
# en punt
|
||||||
|
next_hour = False
|
||||||
|
if dt.minute < 5:
|
||||||
|
# tocades
|
||||||
|
next_hour = False
|
||||||
|
elif dt.minute < 7:
|
||||||
|
# ben tocades
|
||||||
|
next_hour = False
|
||||||
|
elif dt.minute < 9:
|
||||||
|
# mig quart
|
||||||
|
speak += "mig quart"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute < 12:
|
||||||
|
# mig quart passat
|
||||||
|
speak += "mig quart passat"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute < 14:
|
||||||
|
# mig quart passat
|
||||||
|
speak += "mig quart ben passat"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute < 17:
|
||||||
|
speak += "un quart"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute < 20:
|
||||||
|
speak += "un quart tocat"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute < 22:
|
||||||
|
speak += "un quart ben tocat"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute < 24:
|
||||||
|
speak += "un quart i mig"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute < 27:
|
||||||
|
speak += "un quart i mig passat"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute < 29:
|
||||||
|
speak += "un quart i mig ben passat"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute < 32:
|
||||||
|
speak += "dos quarts"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute < 35:
|
||||||
|
speak += "dos quarts tocats"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute < 37:
|
||||||
|
speak += "dos quarts ben tocats"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute < 39:
|
||||||
|
speak += "dos quarts i mig"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute < 42:
|
||||||
|
speak += "dos quarts i mig passats"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute < 44:
|
||||||
|
speak += "dos quarts i mig ben passats"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute < 47:
|
||||||
|
speak += "tres quarts"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute < 50:
|
||||||
|
speak += "tres quarts tocats"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute < 52:
|
||||||
|
speak += "tres quarts ben tocats"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute < 54:
|
||||||
|
speak += "tres quarts i mig"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute < 57:
|
||||||
|
speak += "tres quarts i mig passats"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute < 59:
|
||||||
|
speak += "tres quarts i mig ben passats"
|
||||||
|
next_hour = True
|
||||||
|
elif dt.minute == 59:
|
||||||
|
next_hour = False
|
||||||
|
|
||||||
|
if next_hour == True:
|
||||||
|
next_hour = (dt.hour + 1) % 12
|
||||||
|
if next_hour == 0:
|
||||||
|
speak += " de dotze"
|
||||||
|
if dt.hour == 11:
|
||||||
|
speak += " del migdia"
|
||||||
|
else:
|
||||||
|
speak += " de la nit"
|
||||||
|
|
||||||
|
elif next_hour == 1:
|
||||||
|
speak += " d'una"
|
||||||
|
if dt.hour == 12:
|
||||||
|
speak += " de la tarda"
|
||||||
|
else:
|
||||||
|
speak += " de la matinada"
|
||||||
|
elif next_hour == 2:
|
||||||
|
speak += "de dues"
|
||||||
|
if dt.hour == 13:
|
||||||
|
speak += " de la tarda"
|
||||||
|
else:
|
||||||
|
speak += " de la nit"
|
||||||
|
|
||||||
|
elif next_hour == 11:
|
||||||
|
speak += "d'onze"
|
||||||
|
if dt.hour == 22:
|
||||||
|
speak += " de la nit"
|
||||||
|
else:
|
||||||
|
speak += " del matí"
|
||||||
|
else:
|
||||||
|
speak += "de " + pronounce_number_ca(next_hour)
|
||||||
|
if dt.hour == 0 and dt.hour < 5:
|
||||||
|
speak += " de la matinada"
|
||||||
|
elif dt.hour >= 5 and dt.hour < 11:
|
||||||
|
speak += " del matí"
|
||||||
|
elif dt.hour == 11:
|
||||||
|
speak += " del migdia"
|
||||||
|
elif dt.hour >= 12 and dt.hour <= 17:
|
||||||
|
speak += " de la tarda"
|
||||||
|
elif dt.hour >= 18 and dt.hour < 20:
|
||||||
|
speak += " del vespre"
|
||||||
|
elif dt.hour >= 21 and dt.hour <= 23:
|
||||||
|
speak += " de la nit"
|
||||||
|
|
||||||
|
else:
|
||||||
|
hour = dt.hour % 12
|
||||||
|
if dt.minute == 59:
|
||||||
|
hour = (hour + 1) % 12
|
||||||
|
if hour == 0:
|
||||||
|
speak += "les dotze"
|
||||||
|
elif hour == 1:
|
||||||
|
speak += "la una"
|
||||||
|
elif hour == 2:
|
||||||
|
speak += "les dues"
|
||||||
|
else:
|
||||||
|
speak += "les " + pronounce_number_ca(hour)
|
||||||
|
|
||||||
|
if dt.minute == 0:
|
||||||
|
speak += " en punt"
|
||||||
|
elif dt.minute > 1 and dt.minute < 5:
|
||||||
|
if hour == 1:
|
||||||
|
speak += " tocada"
|
||||||
|
else:
|
||||||
|
speak += " tocades"
|
||||||
|
elif dt.minute < 7:
|
||||||
|
if hour == 1:
|
||||||
|
speak += " ben tocada"
|
||||||
|
else:
|
||||||
|
speak += " ben tocades"
|
||||||
|
|
||||||
|
if dt.hour == 0:
|
||||||
|
if hour == 1:
|
||||||
|
speak += " de la matinada"
|
||||||
|
else:
|
||||||
|
speak += " de la nit"
|
||||||
|
elif dt.hour < 6:
|
||||||
|
if hour == 6:
|
||||||
|
speak += " del matí"
|
||||||
|
else:
|
||||||
|
speak += " de la matinada"
|
||||||
|
elif dt.hour < 12:
|
||||||
|
if hour == 12:
|
||||||
|
speak += " del migdia"
|
||||||
|
else:
|
||||||
|
speak += " del matí"
|
||||||
|
elif dt.hour == 12:
|
||||||
|
if hour == 1:
|
||||||
|
speak += " de la tarda"
|
||||||
|
else:
|
||||||
|
speak += " del migdia"
|
||||||
|
elif dt.hour < 19:
|
||||||
|
if hour == 7:
|
||||||
|
speak += " del vespre"
|
||||||
|
else:
|
||||||
|
speak += " de la tarda"
|
||||||
|
elif dt.hour < 21:
|
||||||
|
if hour == 9:
|
||||||
|
speak += " de la nit"
|
||||||
|
else:
|
||||||
|
speak += " del vespre"
|
||||||
|
elif dt.hour <= 23:
|
||||||
|
speak += " de la nit"
|
||||||
|
|
||||||
|
elif variant == TimeVariantCA.SPANISH_LIKE:
|
||||||
|
# Prepare for "tres menys quart" ??
|
||||||
|
if dt.minute == 35:
|
||||||
|
minute = -25
|
||||||
|
hour = dt.hour + 1
|
||||||
|
elif dt.minute == 40:
|
||||||
|
minute = -20
|
||||||
|
hour = dt.hour + 1
|
||||||
|
elif dt.minute == 45:
|
||||||
|
minute = -15
|
||||||
|
hour = dt.hour + 1
|
||||||
|
elif dt.minute == 50:
|
||||||
|
minute = -10
|
||||||
|
hour = dt.hour + 1
|
||||||
|
elif dt.minute == 55:
|
||||||
|
minute = -5
|
||||||
|
hour = dt.hour + 1
|
||||||
|
else:
|
||||||
|
minute = dt.minute
|
||||||
|
hour = dt.hour
|
||||||
|
|
||||||
|
if hour == 0 or hour == 12:
|
||||||
|
speak += "les dotze"
|
||||||
|
elif hour == 1 or hour == 13:
|
||||||
|
speak += "la una"
|
||||||
|
elif hour < 13:
|
||||||
|
speak = "les " + pronounce_number_ca(hour)
|
||||||
|
else:
|
||||||
|
speak = "les " + pronounce_number_ca(hour - 12)
|
||||||
|
|
||||||
|
if minute != 0:
|
||||||
|
# les hores especials
|
||||||
|
if minute == 15:
|
||||||
|
speak += " i quart"
|
||||||
|
elif minute == 30:
|
||||||
|
speak += " i mitja"
|
||||||
|
elif minute == -15:
|
||||||
|
speak += " menys quart"
|
||||||
|
else: # sis i nou. set i veint-i-cinc
|
||||||
|
if minute > 0:
|
||||||
|
speak += " i " + pronounce_number_ca(minute)
|
||||||
|
else: # si son las set menys vint, no posem la "i"
|
||||||
|
speak += " " + pronounce_number_ca(minute)
|
||||||
|
|
||||||
|
# Default Watch Time Sytem
|
||||||
|
else:
|
||||||
|
if use_24hour:
|
||||||
|
# simply speak the number
|
||||||
|
if dt.hour == 1:
|
||||||
|
speak += "la una"
|
||||||
|
elif dt.hour == 2:
|
||||||
|
speak += "les dues"
|
||||||
|
elif dt.hour == 21:
|
||||||
|
speak += "les vint-i-una"
|
||||||
|
elif dt.hour == 22:
|
||||||
|
speak += "les vint-i-dues"
|
||||||
|
else:
|
||||||
|
speak += "les " + pronounce_number_ca(dt.hour)
|
||||||
|
|
||||||
|
if dt.minute > 0:
|
||||||
|
speak += " i " + pronounce_number_ca(dt.minute)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# speak number and add daytime identifier
|
||||||
|
# (equivalent to "in the morning")
|
||||||
|
if dt.hour == 0:
|
||||||
|
speak += "les dotze"
|
||||||
|
# 1 and 2 are pronounced in female form when talking about hours
|
||||||
|
elif dt.hour == 1 or dt.hour == 13:
|
||||||
|
speak += "la una"
|
||||||
|
elif dt.hour == 2 or dt.hour == 14:
|
||||||
|
speak += "les dues"
|
||||||
|
elif dt.hour < 13:
|
||||||
|
speak = "les " + pronounce_number_ca(dt.hour)
|
||||||
|
else:
|
||||||
|
speak = "les " + pronounce_number_ca(dt.hour - 12)
|
||||||
|
|
||||||
|
# exact time
|
||||||
|
if dt.minute == 0:
|
||||||
|
# 3:00
|
||||||
|
speak += " en punt"
|
||||||
|
# else
|
||||||
|
else:
|
||||||
|
speak += " i " + pronounce_number_ca(dt.minute)
|
||||||
|
|
||||||
|
# TODO: review day-periods
|
||||||
|
if use_ampm:
|
||||||
|
if dt.hour == 0:
|
||||||
|
speak += " de la nit"
|
||||||
|
elif dt.hour >= 1 and dt.hour < 6:
|
||||||
|
speak += " de la matinada"
|
||||||
|
elif dt.hour >= 6 and dt.hour < 12:
|
||||||
|
speak += " del matí"
|
||||||
|
elif dt.hour == 12:
|
||||||
|
speak += " del migdia"
|
||||||
|
elif dt.hour >= 13 and dt.hour <= 18:
|
||||||
|
speak += " de la tarda"
|
||||||
|
elif dt.hour >= 19 and dt.hour < 21:
|
||||||
|
speak += " del vespre"
|
||||||
|
elif dt.hour != 0 and dt.hour != 12:
|
||||||
|
speak += " de la nit"
|
||||||
|
return speak
|
||||||
47
lingua_franca/lang/format_common.py
Normal file
47
lingua_franca/lang/format_common.py
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_mixed_fraction(number, denominators=range(1, 21)):
|
||||||
|
"""
|
||||||
|
Convert floats to components of a mixed fraction representation
|
||||||
|
|
||||||
|
Returns the closest fractional representation using the
|
||||||
|
provided denominators. For example, 4.500002 would become
|
||||||
|
the whole number 4, the numerator 1 and the denominator 2
|
||||||
|
|
||||||
|
Args:
|
||||||
|
number (float): number for convert
|
||||||
|
denominators (iter of ints): denominators to use, default [1 .. 20]
|
||||||
|
Returns:
|
||||||
|
whole, numerator, denominator (int): Integers of the mixed fraction
|
||||||
|
"""
|
||||||
|
int_number = int(number)
|
||||||
|
if int_number == number:
|
||||||
|
return int_number, 0, 1 # whole number, no fraction
|
||||||
|
|
||||||
|
frac_number = abs(number - int_number)
|
||||||
|
if not denominators:
|
||||||
|
denominators = range(1, 21)
|
||||||
|
|
||||||
|
for denominator in denominators:
|
||||||
|
numerator = abs(frac_number) * denominator
|
||||||
|
if abs(numerator - round(numerator)) < 0.01: # 0.01 accuracy
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return int_number, int(round(numerator)), denominator
|
||||||
389
lingua_franca/lang/format_cs.py
Normal file
389
lingua_franca/lang/format_cs.py
Normal file
@@ -0,0 +1,389 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
from lingua_franca.lang.format_common import convert_to_mixed_fraction
|
||||||
|
from lingua_franca.lang.common_data_cs import _NUM_STRING_CS, \
|
||||||
|
_FRACTION_STRING_CS, _LONG_SCALE_CS, _SHORT_SCALE_CS, _SHORT_ORDINAL_CS, _LONG_ORDINAL_CS
|
||||||
|
|
||||||
|
|
||||||
|
def nice_number_cs(number, speech=True, denominators=range(1, 21)):
|
||||||
|
""" English helper for nice_number
|
||||||
|
|
||||||
|
This function formats a float to human understandable functions. Like
|
||||||
|
4.5 becomes "4 and a half" for speech and "4 1/2" for text
|
||||||
|
|
||||||
|
Args:
|
||||||
|
number (int or float): the float to format
|
||||||
|
speech (bool): format for speech (True) or display (False)
|
||||||
|
denominators (iter of ints): denominators to use, default [1 .. 20]
|
||||||
|
Returns:
|
||||||
|
(str): The formatted string.
|
||||||
|
"""
|
||||||
|
|
||||||
|
result = convert_to_mixed_fraction(number, denominators)
|
||||||
|
if not result:
|
||||||
|
# Give up, just represent as a 3 decimal number
|
||||||
|
return str(round(number, 3))
|
||||||
|
|
||||||
|
whole, num, den = result
|
||||||
|
|
||||||
|
if not speech:
|
||||||
|
if num == 0:
|
||||||
|
# TODO: Number grouping? E.g. "1,000,000"
|
||||||
|
return str(whole)
|
||||||
|
else:
|
||||||
|
return '{} {}/{}'.format(whole, num, den)
|
||||||
|
|
||||||
|
if num == 0:
|
||||||
|
return str(whole)
|
||||||
|
den_str = _FRACTION_STRING_CS[den]
|
||||||
|
if whole == 0:
|
||||||
|
if num == 1:
|
||||||
|
return_string = '{}'.format(den_str)
|
||||||
|
else:
|
||||||
|
return_string = '{} {}'.format(num, den_str)
|
||||||
|
elif num == 1:
|
||||||
|
return_string = '{} a {}'.format(whole, den_str)
|
||||||
|
else:
|
||||||
|
return_string = '{} a {} {}'.format(whole, num, den_str)
|
||||||
|
if num > 4:
|
||||||
|
return_string = return_string[:-1]
|
||||||
|
elif num > 1:
|
||||||
|
return_string = return_string[:-1] + 'y'
|
||||||
|
|
||||||
|
return return_string
|
||||||
|
|
||||||
|
|
||||||
|
def pronounce_number_cs(number, places=2, short_scale=True, scientific=False,
|
||||||
|
ordinals=False):
|
||||||
|
"""
|
||||||
|
Convert a number to it's spoken equivalent
|
||||||
|
|
||||||
|
For example, '5.2' would return 'five point two'
|
||||||
|
|
||||||
|
Args:
|
||||||
|
num(float or int): the number to pronounce (under 100)
|
||||||
|
places(int): maximum decimal places to speak
|
||||||
|
short_scale (bool) : use short (True) or long scale (False)
|
||||||
|
https://en.wikipedia.org/wiki/Names_of_large_numbers
|
||||||
|
scientific (bool): pronounce in scientific notation
|
||||||
|
ordinals (bool): pronounce in ordinal form "first" instead of "one"
|
||||||
|
Returns:
|
||||||
|
(str): The pronounced number
|
||||||
|
"""
|
||||||
|
num = number
|
||||||
|
# deal with infinity
|
||||||
|
if num == float("inf"):
|
||||||
|
return "nekonečno"
|
||||||
|
elif num == float("-inf"):
|
||||||
|
return "záporné nekonečno"
|
||||||
|
if scientific:
|
||||||
|
number = '%E' % num
|
||||||
|
n, power = number.replace("+", "").split("E")
|
||||||
|
power = int(power)
|
||||||
|
if power != 0:
|
||||||
|
if ordinals:
|
||||||
|
# This handles zápornés of powers separately from the normal
|
||||||
|
# handling since each call disables the scientific flag
|
||||||
|
return '{}{} krát deset k {}{} mocnině'.format(
|
||||||
|
'záporné ' if float(n) < 0 else '',
|
||||||
|
pronounce_number_cs(
|
||||||
|
abs(float(n)), places, short_scale, False, ordinals=False),
|
||||||
|
'záporné ' if power < 0 else '',
|
||||||
|
pronounce_number_cs(abs(power), places, short_scale, False, ordinals=True))
|
||||||
|
else:
|
||||||
|
# This handles zápornés of powers separately from the normal
|
||||||
|
# handling since each call disables the scientific flag
|
||||||
|
return '{}{} krát deset na mocninu {}{}'.format(
|
||||||
|
'záporné ' if float(n) < 0 else '',
|
||||||
|
pronounce_number_cs(
|
||||||
|
abs(float(n)), places, short_scale, False),
|
||||||
|
'záporné ' if power < 0 else '',
|
||||||
|
pronounce_number_cs(abs(power), places, short_scale, False))
|
||||||
|
|
||||||
|
if short_scale:
|
||||||
|
number_names = _NUM_STRING_CS.copy()
|
||||||
|
number_names.update(_SHORT_SCALE_CS)
|
||||||
|
else:
|
||||||
|
number_names = _NUM_STRING_CS.copy()
|
||||||
|
number_names.update(_LONG_SCALE_CS)
|
||||||
|
|
||||||
|
digits = [number_names[n] for n in range(0, 20)]
|
||||||
|
|
||||||
|
tens = [number_names[n] for n in range(10, 100, 10)]
|
||||||
|
|
||||||
|
if short_scale:
|
||||||
|
hundreds = [_SHORT_SCALE_CS[n] for n in _SHORT_SCALE_CS.keys()]
|
||||||
|
else:
|
||||||
|
hundreds = [_LONG_SCALE_CS[n] for n in _LONG_SCALE_CS.keys()]
|
||||||
|
|
||||||
|
# deal with zápornés
|
||||||
|
result = ""
|
||||||
|
if num < 0:
|
||||||
|
result = "záporné " if scientific else "mínus "
|
||||||
|
num = abs(num)
|
||||||
|
|
||||||
|
if not ordinals:
|
||||||
|
try:
|
||||||
|
# deal with 4 digits
|
||||||
|
# usually if it's a 4 digit num it should be said like a date
|
||||||
|
# i.e. 1972 => nineteen seventy two
|
||||||
|
if len(str(num)) == 4 and isinstance(num, int):
|
||||||
|
_num = str(num)
|
||||||
|
# deal with 1000, 2000, 2001, 2100, 3123, etc
|
||||||
|
# is skipped as the rest of the
|
||||||
|
# functin deals with this already
|
||||||
|
if _num[1:4] == '000' or _num[1:3] == '00' or int(_num[0:2]) >= 20:
|
||||||
|
pass
|
||||||
|
# deal with 1900, 1300, etc
|
||||||
|
# i.e. 1900 => nineteen hundred
|
||||||
|
elif _num[2:4] == '00':
|
||||||
|
first = number_names[int(_num[0:2])]
|
||||||
|
last = number_names[100]
|
||||||
|
return first + " " + last
|
||||||
|
# deal with 1960, 1961, etc
|
||||||
|
# i.e. 1960 => nineteen sixty
|
||||||
|
# 1961 => nineteen sixty one
|
||||||
|
else:
|
||||||
|
first = number_names[int(_num[0:2])]
|
||||||
|
if _num[3:4] == '0':
|
||||||
|
last = number_names[int(_num[2:4])]
|
||||||
|
else:
|
||||||
|
second = number_names[int(_num[2:3])*10]
|
||||||
|
last = second + " " + number_names[int(_num[3:4])]
|
||||||
|
return first + " " + last
|
||||||
|
# exception used to catch any unforseen edge cases
|
||||||
|
# will default back to normal subroutine
|
||||||
|
except Exception as e:
|
||||||
|
# TODO this probably shouldn't go to stdout
|
||||||
|
print('ERROR: Exception in pronounce_number_cs: {}' + repr(e))
|
||||||
|
|
||||||
|
# check for a direct match
|
||||||
|
if num in number_names and not ordinals:
|
||||||
|
if num > 90:
|
||||||
|
result += "jedna "
|
||||||
|
result += number_names[num]
|
||||||
|
else:
|
||||||
|
def _sub_thousand(n, ordinals=False):
|
||||||
|
assert 0 <= n <= 999
|
||||||
|
if n in _SHORT_ORDINAL_CS and ordinals:
|
||||||
|
return _SHORT_ORDINAL_CS[n]
|
||||||
|
if n <= 19:
|
||||||
|
return digits[n]
|
||||||
|
elif n <= 99:
|
||||||
|
q, r = divmod(n, 10)
|
||||||
|
return tens[q - 1] + (" " + _sub_thousand(r, ordinals) if r
|
||||||
|
else "")
|
||||||
|
else:
|
||||||
|
q, r = divmod(n, 100)
|
||||||
|
return digits[q] + " sto" + (
|
||||||
|
" a " + _sub_thousand(r, ordinals) if r else "")
|
||||||
|
|
||||||
|
def _short_scale(n):
|
||||||
|
if n >= max(_SHORT_SCALE_CS.keys()):
|
||||||
|
return "nekonečno"
|
||||||
|
ordi = ordinals
|
||||||
|
|
||||||
|
if int(n) != n:
|
||||||
|
ordi = False
|
||||||
|
n = int(n)
|
||||||
|
assert 0 <= n
|
||||||
|
res = []
|
||||||
|
for i, z in enumerate(_split_by(n, 1000)):
|
||||||
|
if not z:
|
||||||
|
continue
|
||||||
|
number = _sub_thousand(z, not i and ordi)
|
||||||
|
|
||||||
|
if i:
|
||||||
|
if i >= len(hundreds):
|
||||||
|
return ""
|
||||||
|
number += " "
|
||||||
|
if ordi:
|
||||||
|
|
||||||
|
if i * 1000 in _SHORT_ORDINAL_CS:
|
||||||
|
if z == 1:
|
||||||
|
number = _SHORT_ORDINAL_CS[i * 1000]
|
||||||
|
else:
|
||||||
|
number += _SHORT_ORDINAL_CS[i * 1000]
|
||||||
|
else:
|
||||||
|
if n not in _SHORT_SCALE_CS:
|
||||||
|
num = int("1" + "0" * (len(str(n)) - 2))
|
||||||
|
|
||||||
|
number += _SHORT_SCALE_CS[num] + "tý"
|
||||||
|
else:
|
||||||
|
number = _SHORT_SCALE_CS[n] + "tý"
|
||||||
|
else:
|
||||||
|
number += hundreds[i]
|
||||||
|
res.append(number)
|
||||||
|
ordi = False
|
||||||
|
|
||||||
|
return ", ".join(reversed(res))
|
||||||
|
|
||||||
|
def _split_by(n, split=1000):
|
||||||
|
assert 0 <= n
|
||||||
|
res = []
|
||||||
|
while n:
|
||||||
|
n, r = divmod(n, split)
|
||||||
|
res.append(r)
|
||||||
|
return res
|
||||||
|
|
||||||
|
def _long_scale(n):
|
||||||
|
if n >= max(_LONG_SCALE_CS.keys()):
|
||||||
|
return "nekonečno"
|
||||||
|
ordi = ordinals
|
||||||
|
if int(n) != n:
|
||||||
|
ordi = False
|
||||||
|
n = int(n)
|
||||||
|
assert 0 <= n
|
||||||
|
res = []
|
||||||
|
for i, z in enumerate(_split_by(n, 1000000)):
|
||||||
|
if not z:
|
||||||
|
continue
|
||||||
|
number = pronounce_number_cs(z, places, True, scientific,
|
||||||
|
ordinals=ordi and not i)
|
||||||
|
# strip off the comma after the thousand
|
||||||
|
if i:
|
||||||
|
if i >= len(hundreds):
|
||||||
|
return ""
|
||||||
|
# plus one as we skip 'thousand'
|
||||||
|
# (and 'hundred', but this is excluded by index value)
|
||||||
|
number = number.replace(',', '')
|
||||||
|
|
||||||
|
if ordi:
|
||||||
|
if i * 1000000 in _LONG_ORDINAL_CS:
|
||||||
|
if z == 1:
|
||||||
|
number = _LONG_ORDINAL_CS[
|
||||||
|
(i + 1) * 1000000]
|
||||||
|
else:
|
||||||
|
number += _LONG_ORDINAL_CS[
|
||||||
|
(i + 1) * 1000000]
|
||||||
|
else:
|
||||||
|
if n not in _LONG_SCALE_CS:
|
||||||
|
num = int("1" + "0" * (len(str(n)) - 2))
|
||||||
|
|
||||||
|
number += " " + _LONG_SCALE_CS[
|
||||||
|
num] + "tý"
|
||||||
|
else:
|
||||||
|
number = " " + _LONG_SCALE_CS[n] + "tý"
|
||||||
|
else:
|
||||||
|
|
||||||
|
number += " " + hundreds[i + 1]
|
||||||
|
res.append(number)
|
||||||
|
return ", ".join(reversed(res))
|
||||||
|
|
||||||
|
if short_scale:
|
||||||
|
result += _short_scale(num)
|
||||||
|
else:
|
||||||
|
result += _long_scale(num)
|
||||||
|
|
||||||
|
# deal with scientific notation unpronounceable as number
|
||||||
|
if not result and "e" in str(num):
|
||||||
|
return pronounce_number_cs(num, places, short_scale, scientific=True)
|
||||||
|
# Deal with fractional part
|
||||||
|
elif not num == int(num) and places > 0:
|
||||||
|
if abs(num) < 1.0 and (result == "mínus " or not result):
|
||||||
|
result += "nula"
|
||||||
|
result += " tečka"
|
||||||
|
_num_str = str(num)
|
||||||
|
_num_str = _num_str.split(".")[1][0:places]
|
||||||
|
for char in _num_str:
|
||||||
|
result += " " + number_names[int(char)]
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def nice_time_cs(dt, speech=True, use_24hour=True, use_ampm=False):
|
||||||
|
"""
|
||||||
|
Format a time to a comfortable human format
|
||||||
|
For example, generate 'five thirty' for speech or '5:30' for
|
||||||
|
text display.
|
||||||
|
Args:
|
||||||
|
dt (datetime): date to format (assumes already in local timezone)
|
||||||
|
speech (bool): format for speech (default/True) or display (False)=Fal
|
||||||
|
use_24hour (bool): output in 24-hour/military or 12-hour format
|
||||||
|
use_ampm (bool): include the am/pm for 12-hour format
|
||||||
|
Returns:
|
||||||
|
(str): The formatted time string
|
||||||
|
"""
|
||||||
|
if use_24hour:
|
||||||
|
# e.g. "03:01" or "14:22"
|
||||||
|
string = dt.strftime("%H:%M")
|
||||||
|
else:
|
||||||
|
if use_ampm:
|
||||||
|
# e.g. "3:01 AM" or "2:22 PM"
|
||||||
|
string = dt.strftime("%I:%M %p")
|
||||||
|
else:
|
||||||
|
# e.g. "3:01" or "2:22"
|
||||||
|
string = dt.strftime("%I:%M")
|
||||||
|
if string[0] == '0':
|
||||||
|
string = string[1:] # strip leading zeros
|
||||||
|
|
||||||
|
if not speech:
|
||||||
|
return string
|
||||||
|
|
||||||
|
# Generate a speakable version of the time
|
||||||
|
if use_24hour:
|
||||||
|
speak = ""
|
||||||
|
|
||||||
|
# Either "0 8 hundred" or "13 hundred"
|
||||||
|
if string[0] == '0':
|
||||||
|
speak += pronounce_number_cs(int(string[0])) + " "
|
||||||
|
speak += pronounce_number_cs(int(string[1]))
|
||||||
|
else:
|
||||||
|
speak = pronounce_number_cs(int(string[0:2]))
|
||||||
|
|
||||||
|
speak += " "
|
||||||
|
if string[3:5] == '00':
|
||||||
|
speak += "sto"
|
||||||
|
else:
|
||||||
|
if string[3] == '0':
|
||||||
|
speak += pronounce_number_cs(0) + " "
|
||||||
|
speak += pronounce_number_cs(int(string[4]))
|
||||||
|
else:
|
||||||
|
speak += pronounce_number_cs(int(string[3:5]))
|
||||||
|
return speak
|
||||||
|
else:
|
||||||
|
if dt.hour == 0 and dt.minute == 0:
|
||||||
|
return "půlnoc"
|
||||||
|
elif dt.hour == 12 and dt.minute == 0:
|
||||||
|
return "poledne"
|
||||||
|
|
||||||
|
hour = dt.hour % 12 or 12 # 12 hour clock and 0 is spoken as 12
|
||||||
|
if dt.minute == 15:
|
||||||
|
speak = "čtvrt po " + pronounce_number_cs(hour)
|
||||||
|
elif dt.minute == 30:
|
||||||
|
speak = "půl po " + pronounce_number_cs(hour)
|
||||||
|
elif dt.minute == 45:
|
||||||
|
next_hour = (dt.hour + 1) % 12 or 12
|
||||||
|
speak = "třičtvrtě na " + pronounce_number_cs(next_hour)
|
||||||
|
else:
|
||||||
|
speak = pronounce_number_cs(hour)
|
||||||
|
|
||||||
|
if dt.minute == 0:
|
||||||
|
if not use_ampm:
|
||||||
|
return speak + " hodin"
|
||||||
|
else:
|
||||||
|
if dt.minute < 10:
|
||||||
|
speak += " oh"
|
||||||
|
speak += " " + pronounce_number_cs(dt.minute)
|
||||||
|
|
||||||
|
if use_ampm:
|
||||||
|
if dt.hour > 11:
|
||||||
|
speak += " p.m."
|
||||||
|
else:
|
||||||
|
speak += " a.m."
|
||||||
|
|
||||||
|
return speak
|
||||||
339
lingua_franca/lang/format_da.py
Normal file
339
lingua_franca/lang/format_da.py
Normal file
@@ -0,0 +1,339 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
from lingua_franca.lang.format_common import convert_to_mixed_fraction
|
||||||
|
from lingua_franca.lang.common_data_da import _EXTRA_SPACE_DA, \
|
||||||
|
_FRACTION_STRING_DA, _MONTHS_DA, _NUM_POWERS_OF_TEN, _NUM_STRING_DA
|
||||||
|
from math import floor
|
||||||
|
|
||||||
|
|
||||||
|
def nice_number_da(number, speech=True, denominators=range(1, 21)):
|
||||||
|
""" Danish helper for nice_number
|
||||||
|
This function formats a float to human understandable functions. Like
|
||||||
|
4.5 becomes "4 einhalb" for speech and "4 1/2" for text
|
||||||
|
Args:
|
||||||
|
number (int or float): the float to format
|
||||||
|
speech (bool): format for speech (True) or display (False)
|
||||||
|
denominators (iter of ints): denominators to use, default [1 .. 20]
|
||||||
|
Returns:
|
||||||
|
(str): The formatted string.
|
||||||
|
"""
|
||||||
|
result = convert_to_mixed_fraction(number, denominators)
|
||||||
|
if not result:
|
||||||
|
# Give up, just represent as a 3 decimal number
|
||||||
|
return str(round(number, 3)).replace(".", ",")
|
||||||
|
whole, num, den = result
|
||||||
|
if not speech:
|
||||||
|
if num == 0:
|
||||||
|
# TODO: Number grouping? E.g. "1,000,000"
|
||||||
|
return str(whole)
|
||||||
|
else:
|
||||||
|
return '{} {}/{}'.format(whole, num, den)
|
||||||
|
if num == 0:
|
||||||
|
return str(whole)
|
||||||
|
den_str = _FRACTION_STRING_DA[den]
|
||||||
|
if whole == 0:
|
||||||
|
if num == 1:
|
||||||
|
return_string = '{} {}'.format(num, den_str)
|
||||||
|
else:
|
||||||
|
return_string = '{} {}e'.format(num, den_str)
|
||||||
|
else:
|
||||||
|
if num == 1:
|
||||||
|
return_string = '{} og {} {}'.format(whole, num, den_str)
|
||||||
|
else:
|
||||||
|
return_string = '{} og {} {}e'.format(whole, num, den_str)
|
||||||
|
|
||||||
|
return return_string
|
||||||
|
|
||||||
|
|
||||||
|
def pronounce_number_da(number, places=2, short_scale=True, scientific=False,
|
||||||
|
ordinals=False):
|
||||||
|
"""
|
||||||
|
Convert a number to it's spoken equivalent
|
||||||
|
|
||||||
|
For example, '5.2' would return 'five point two'
|
||||||
|
|
||||||
|
Args:
|
||||||
|
number(float or int): the number to pronounce (under 100)
|
||||||
|
places(int): maximum decimal places to speak
|
||||||
|
short_scale (bool) : use short (True) or long scale (False)
|
||||||
|
https://en.wikipedia.org/wiki/Names_of_large_numbers
|
||||||
|
scientific (bool): pronounce in scientific notation
|
||||||
|
ordinals (bool): pronounce in ordinal form "first" instead of "one"
|
||||||
|
Returns:
|
||||||
|
(str): The pronounced number
|
||||||
|
"""
|
||||||
|
# TODO short_scale, scientific and ordinals
|
||||||
|
# currently ignored
|
||||||
|
|
||||||
|
def pronounce_triplet_da(num):
|
||||||
|
result = ""
|
||||||
|
num = floor(num)
|
||||||
|
if num > 99:
|
||||||
|
hundreds = floor(num / 100)
|
||||||
|
if hundreds > 0:
|
||||||
|
if hundreds == 1:
|
||||||
|
result += 'et' + 'hundrede' + _EXTRA_SPACE_DA
|
||||||
|
else:
|
||||||
|
result += _NUM_STRING_DA[hundreds] + \
|
||||||
|
'hundrede' + _EXTRA_SPACE_DA
|
||||||
|
num -= hundreds * 100
|
||||||
|
if num == 0:
|
||||||
|
result += '' # do nothing
|
||||||
|
elif num == 1:
|
||||||
|
result += 'et'
|
||||||
|
elif num <= 20:
|
||||||
|
result += _NUM_STRING_DA[num] + _EXTRA_SPACE_DA
|
||||||
|
elif num > 20:
|
||||||
|
ones = num % 10
|
||||||
|
tens = num - ones
|
||||||
|
if ones > 0:
|
||||||
|
result += _NUM_STRING_DA[ones] + _EXTRA_SPACE_DA
|
||||||
|
if tens > 0:
|
||||||
|
result += 'og' + _EXTRA_SPACE_DA
|
||||||
|
if tens > 0:
|
||||||
|
result += _NUM_STRING_DA[tens] + _EXTRA_SPACE_DA
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def pronounce_fractional_da(num, places):
|
||||||
|
# fixed number of places even with trailing zeros
|
||||||
|
result = ""
|
||||||
|
place = 10
|
||||||
|
while places > 0:
|
||||||
|
# doesn't work with 1.0001 and places = 2: int(
|
||||||
|
# number*place) % 10 > 0 and places > 0:
|
||||||
|
result += " " + _NUM_STRING_DA[int(num * place) % 10]
|
||||||
|
place *= 10
|
||||||
|
places -= 1
|
||||||
|
return result
|
||||||
|
|
||||||
|
def pronounce_whole_number_da(num, scale_level=0):
|
||||||
|
if num == 0:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
num = floor(num)
|
||||||
|
result = ''
|
||||||
|
last_triplet = num % 1000
|
||||||
|
|
||||||
|
if last_triplet == 1:
|
||||||
|
if scale_level == 0:
|
||||||
|
if result != '':
|
||||||
|
result += '' + 'et'
|
||||||
|
else:
|
||||||
|
result += "en"
|
||||||
|
elif scale_level == 1:
|
||||||
|
result += 'et' + _EXTRA_SPACE_DA + 'tusinde' + _EXTRA_SPACE_DA
|
||||||
|
else:
|
||||||
|
result += "en " + _NUM_POWERS_OF_TEN[scale_level] + ' '
|
||||||
|
elif last_triplet > 1:
|
||||||
|
result += pronounce_triplet_da(last_triplet)
|
||||||
|
if scale_level == 1:
|
||||||
|
result += 'tusinde' + _EXTRA_SPACE_DA
|
||||||
|
if scale_level >= 2:
|
||||||
|
result += "og" + _NUM_POWERS_OF_TEN[scale_level]
|
||||||
|
if scale_level >= 2:
|
||||||
|
if scale_level % 2 == 0:
|
||||||
|
result += "er" # MillionER
|
||||||
|
result += "er " # MilliardER, MillioneER
|
||||||
|
|
||||||
|
num = floor(num / 1000)
|
||||||
|
scale_level += 1
|
||||||
|
return pronounce_whole_number_da(num,
|
||||||
|
scale_level) + result + _EXTRA_SPACE_DA
|
||||||
|
|
||||||
|
result = ""
|
||||||
|
if abs(number) >= 1000000000000000000000000: # cannot do more than this
|
||||||
|
return str(number)
|
||||||
|
elif number == 0:
|
||||||
|
return str(_NUM_STRING_DA[0])
|
||||||
|
elif number < 0:
|
||||||
|
return "minus " + pronounce_number_da(abs(number), places)
|
||||||
|
else:
|
||||||
|
if number == int(number):
|
||||||
|
return pronounce_whole_number_da(number)
|
||||||
|
else:
|
||||||
|
whole_number_part = floor(number)
|
||||||
|
fractional_part = number - whole_number_part
|
||||||
|
result += pronounce_whole_number_da(whole_number_part)
|
||||||
|
if places > 0:
|
||||||
|
result += " komma"
|
||||||
|
result += pronounce_fractional_da(fractional_part, places)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def pronounce_ordinal_da(number):
|
||||||
|
"""
|
||||||
|
This function pronounces a number as an ordinal
|
||||||
|
|
||||||
|
1 -> first
|
||||||
|
2 -> second
|
||||||
|
|
||||||
|
Args:
|
||||||
|
number (int): the number to format
|
||||||
|
Returns:
|
||||||
|
(str): The pronounced number string.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# ordinals for 1, 3, 7 and 8 are irregular
|
||||||
|
# this produces the base form, it will have to be adapted for genus,
|
||||||
|
# casus, numerus
|
||||||
|
|
||||||
|
ordinals = ["nulte", "første", "anden", "tredie", "fjerde", "femte",
|
||||||
|
"sjette", "syvende", "ottende", "niende", "tiende"]
|
||||||
|
|
||||||
|
# only for whole positive numbers including zero
|
||||||
|
if number < 0 or number != int(number):
|
||||||
|
return number
|
||||||
|
if number < 10:
|
||||||
|
return ordinals[number]
|
||||||
|
if number < 30:
|
||||||
|
if pronounce_number_da(number)[-1:] == 'e':
|
||||||
|
return pronounce_number_da(number) + "nde"
|
||||||
|
else:
|
||||||
|
return pronounce_number_da(number) + "ende"
|
||||||
|
if number < 40:
|
||||||
|
return pronounce_number_da(number) + "fte"
|
||||||
|
else:
|
||||||
|
if pronounce_number_da(number)[-1:] == 'e':
|
||||||
|
return pronounce_number_da(number) + "nde"
|
||||||
|
else:
|
||||||
|
return pronounce_number_da(number) + "ende"
|
||||||
|
|
||||||
|
|
||||||
|
def nice_time_da(dt, speech=True, use_24hour=False, use_ampm=False):
|
||||||
|
"""
|
||||||
|
Format a time to a comfortable human format
|
||||||
|
|
||||||
|
For example, generate 'five thirty' for speech or '5:30' for
|
||||||
|
text display.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
dt (datetime): date to format (assumes already in local timezone)
|
||||||
|
speech (bool): format for speech (default/True) or display (False)=Fal
|
||||||
|
use_24hour (bool): output in 24-hour/military or 12-hour format
|
||||||
|
use_ampm (bool): include the am/pm for 12-hour format
|
||||||
|
Returns:
|
||||||
|
(str): The formatted time string
|
||||||
|
"""
|
||||||
|
if use_24hour:
|
||||||
|
# e.g. "03:01" or "14:22"
|
||||||
|
string = dt.strftime("%H:%M")
|
||||||
|
else:
|
||||||
|
if use_ampm:
|
||||||
|
# e.g. "3:01 AM" or "2:22 PM"
|
||||||
|
string = dt.strftime("%I:%M %p")
|
||||||
|
else:
|
||||||
|
# e.g. "3:01" or "2:22"
|
||||||
|
string = dt.strftime("%I:%M")
|
||||||
|
|
||||||
|
if not speech:
|
||||||
|
return string
|
||||||
|
|
||||||
|
# Generate a speakable version of the time
|
||||||
|
speak = ""
|
||||||
|
if use_24hour:
|
||||||
|
if dt.hour == 1:
|
||||||
|
speak += "et" # 01:00 is "et" not "en"
|
||||||
|
else:
|
||||||
|
speak += pronounce_number_da(dt.hour)
|
||||||
|
if not dt.minute == 0:
|
||||||
|
if dt.minute < 10:
|
||||||
|
speak += ' nul'
|
||||||
|
speak += " " + pronounce_number_da(dt.minute)
|
||||||
|
|
||||||
|
return speak # ampm is ignored when use_24hour is true
|
||||||
|
else:
|
||||||
|
if dt.hour == 0 and dt.minute == 0:
|
||||||
|
return "midnat"
|
||||||
|
if dt.hour == 12 and dt.minute == 0:
|
||||||
|
return "middag"
|
||||||
|
# TODO: "half past 3", "a quarter of 4" and other idiomatic times
|
||||||
|
|
||||||
|
if dt.hour == 0:
|
||||||
|
speak += pronounce_number_da(12)
|
||||||
|
elif dt.hour <= 13:
|
||||||
|
if dt.hour == 1 or dt.hour == 13: # 01:00 and 13:00 is "et"
|
||||||
|
speak += 'et'
|
||||||
|
else:
|
||||||
|
speak += pronounce_number_da(dt.hour)
|
||||||
|
else:
|
||||||
|
speak += pronounce_number_da(dt.hour - 12)
|
||||||
|
|
||||||
|
if not dt.minute == 0:
|
||||||
|
if dt.minute < 10:
|
||||||
|
speak += ' nul'
|
||||||
|
speak += " " + pronounce_number_da(dt.minute)
|
||||||
|
|
||||||
|
if use_ampm:
|
||||||
|
if dt.hour > 11:
|
||||||
|
if dt.hour < 18:
|
||||||
|
# 12:01 - 17:59 nachmittags/afternoon
|
||||||
|
speak += " om eftermiddagen"
|
||||||
|
elif dt.hour < 22:
|
||||||
|
# 18:00 - 21:59 abends/evening
|
||||||
|
speak += " om aftenen"
|
||||||
|
else:
|
||||||
|
# 22:00 - 23:59 nachts/at night
|
||||||
|
speak += " om natten"
|
||||||
|
elif dt.hour < 3:
|
||||||
|
# 00:01 - 02:59 nachts/at night
|
||||||
|
speak += " om natten"
|
||||||
|
else:
|
||||||
|
# 03:00 - 11:59 morgens/in the morning
|
||||||
|
speak += " om morgenen"
|
||||||
|
|
||||||
|
return speak
|
||||||
|
|
||||||
|
|
||||||
|
def nice_response_da(text):
|
||||||
|
# check for months and call _nice_ordinal_da declension of ordinals
|
||||||
|
# replace "^" with "hoch" (to the power of)
|
||||||
|
words = text.split()
|
||||||
|
|
||||||
|
for idx, word in enumerate(words):
|
||||||
|
if word.lower() in _MONTHS_DA:
|
||||||
|
text = _nice_ordinal_da(text)
|
||||||
|
|
||||||
|
if word == '^':
|
||||||
|
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
|
||||||
|
if wordNext.isnumeric():
|
||||||
|
words[idx] = "opløftet i"
|
||||||
|
text = " ".join(words)
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def _nice_ordinal_da(text, speech=True):
|
||||||
|
# check for months for declension of ordinals before months
|
||||||
|
# depending on articles/prepositions
|
||||||
|
normalized_text = text
|
||||||
|
words = text.split()
|
||||||
|
|
||||||
|
for idx, word in enumerate(words):
|
||||||
|
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
|
||||||
|
wordPrev = words[idx - 1] if idx > 0 else ""
|
||||||
|
if word[-1:] == ".":
|
||||||
|
if word[:-1].isdecimal():
|
||||||
|
if wordNext.lower() in _MONTHS_DA:
|
||||||
|
word = pronounce_ordinal_da(int(word[:-1]))
|
||||||
|
if wordPrev.lower() in ["om", "den", "fra", "til",
|
||||||
|
"(fra", "(om", "til"]:
|
||||||
|
word += "n"
|
||||||
|
elif wordPrev.lower() not in ["den"]:
|
||||||
|
word += "r"
|
||||||
|
words[idx] = word
|
||||||
|
normalized_text = " ".join(words)
|
||||||
|
return normalized_text
|
||||||
327
lingua_franca/lang/format_de.py
Normal file
327
lingua_franca/lang/format_de.py
Normal file
@@ -0,0 +1,327 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
from lingua_franca.lang.format_common import convert_to_mixed_fraction
|
||||||
|
from lingua_franca.lang.common_data_de import _EXTRA_SPACE_DE, \
|
||||||
|
_FRACTION_STRING_DE, _MONTHS_DE, _NUM_POWERS_OF_TEN_DE, _NUM_STRING_DE
|
||||||
|
from math import floor
|
||||||
|
|
||||||
|
|
||||||
|
def nice_number_de(number, speech=True, denominators=range(1, 21)):
|
||||||
|
""" German helper for nice_number
|
||||||
|
This function formats a float to human understandable functions. Like
|
||||||
|
4.5 becomes "4 einhalb" for speech and "4 1/2" for text
|
||||||
|
Args:
|
||||||
|
number (int or float): the float to format
|
||||||
|
speech (bool): format for speech (True) or display (False)
|
||||||
|
denominators (iter of ints): denominators to use, default [1 .. 20]
|
||||||
|
Returns:
|
||||||
|
(str): The formatted string.
|
||||||
|
"""
|
||||||
|
result = convert_to_mixed_fraction(number, denominators)
|
||||||
|
if not result:
|
||||||
|
# Give up, just represent as a 3 decimal number
|
||||||
|
return str(round(number, 3)).replace(".", ",")
|
||||||
|
whole, num, den = result
|
||||||
|
if not speech:
|
||||||
|
if num == 0:
|
||||||
|
# TODO: Number grouping? E.g. "1,000,000"
|
||||||
|
return str(whole)
|
||||||
|
else:
|
||||||
|
return '{} {}/{}'.format(whole, num, den)
|
||||||
|
if num == 0:
|
||||||
|
return str(whole)
|
||||||
|
den_str = _FRACTION_STRING_DE[den]
|
||||||
|
if whole == 0:
|
||||||
|
if num == 1:
|
||||||
|
return_string = 'ein {}'.format(den_str)
|
||||||
|
else:
|
||||||
|
return_string = '{} {}'.format(num, den_str)
|
||||||
|
elif num == 1:
|
||||||
|
return_string = '{} und ein {}'.format(whole, den_str)
|
||||||
|
else:
|
||||||
|
return_string = '{} und {} {}'.format(whole, num, den_str)
|
||||||
|
|
||||||
|
return return_string
|
||||||
|
|
||||||
|
|
||||||
|
def pronounce_number_de(number, places=2, short_scale=True, scientific=False,
|
||||||
|
ordinals=False):
|
||||||
|
"""
|
||||||
|
Convert a number to it's spoken equivalent
|
||||||
|
|
||||||
|
For example, '5.2' would return 'five point two'
|
||||||
|
|
||||||
|
Args:
|
||||||
|
number(float or int): the number to pronounce (under 100)
|
||||||
|
places(int): maximum decimal places to speak
|
||||||
|
short_scale (bool) : use short (True) or long scale (False)
|
||||||
|
https://en.wikipedia.org/wiki/Names_of_large_numbers
|
||||||
|
scientific (bool): pronounce in scientific notation
|
||||||
|
ordinals (bool): pronounce in ordinal form "first" instead of "one"
|
||||||
|
Returns:
|
||||||
|
(str): The pronounced number
|
||||||
|
"""
|
||||||
|
|
||||||
|
# TODO short_scale, scientific and ordinals
|
||||||
|
# currently ignored
|
||||||
|
|
||||||
|
def pronounce_triplet_de(num):
|
||||||
|
result = ""
|
||||||
|
num = floor(num)
|
||||||
|
if num > 99:
|
||||||
|
hundreds = floor(num / 100)
|
||||||
|
if hundreds > 0:
|
||||||
|
result += _NUM_STRING_DE[
|
||||||
|
hundreds] + _EXTRA_SPACE_DE + 'hundert' + _EXTRA_SPACE_DE
|
||||||
|
num -= hundreds * 100
|
||||||
|
if num == 0:
|
||||||
|
result += '' # do nothing
|
||||||
|
elif num == 1:
|
||||||
|
result += 'eins' # need the s for the last digit
|
||||||
|
elif num <= 20:
|
||||||
|
result += _NUM_STRING_DE[num] # + _EXTRA_SPACE_DA
|
||||||
|
elif num > 20:
|
||||||
|
ones = num % 10
|
||||||
|
tens = num - ones
|
||||||
|
if ones > 0:
|
||||||
|
result += _NUM_STRING_DE[ones] + _EXTRA_SPACE_DE
|
||||||
|
if tens > 0:
|
||||||
|
result += 'und' + _EXTRA_SPACE_DE
|
||||||
|
if tens > 0:
|
||||||
|
result += _NUM_STRING_DE[tens] + _EXTRA_SPACE_DE
|
||||||
|
return result
|
||||||
|
|
||||||
|
def pronounce_fractional_de(num,
|
||||||
|
places): # fixed number of places even with
|
||||||
|
# trailing zeros
|
||||||
|
result = ""
|
||||||
|
place = 10
|
||||||
|
while places > 0: # doesn't work with 1.0001 and places = 2: int(
|
||||||
|
# number*place) % 10 > 0 and places > 0:
|
||||||
|
result += " " + _NUM_STRING_DE[int(num * place) % 10]
|
||||||
|
if int(num * place) % 10 == 1:
|
||||||
|
result += 's' # "1" is pronounced "eins" after the decimal
|
||||||
|
# point
|
||||||
|
place *= 10
|
||||||
|
places -= 1
|
||||||
|
return result
|
||||||
|
|
||||||
|
def pronounce_whole_number_de(num, scale_level=0):
|
||||||
|
if num == 0:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
num = floor(num)
|
||||||
|
result = ''
|
||||||
|
last_triplet = num % 1000
|
||||||
|
|
||||||
|
if last_triplet == 1:
|
||||||
|
if scale_level == 0:
|
||||||
|
if result != '':
|
||||||
|
result += '' + 'eins'
|
||||||
|
else:
|
||||||
|
result += "eins"
|
||||||
|
elif scale_level == 1:
|
||||||
|
result += 'ein' + _EXTRA_SPACE_DE + 'tausend' + _EXTRA_SPACE_DE
|
||||||
|
else:
|
||||||
|
result += "eine " + _NUM_POWERS_OF_TEN_DE[scale_level] + ' '
|
||||||
|
elif last_triplet > 1:
|
||||||
|
result += pronounce_triplet_de(last_triplet)
|
||||||
|
if scale_level == 1:
|
||||||
|
# result += _EXTRA_SPACE_DA
|
||||||
|
result += 'tausend' + _EXTRA_SPACE_DE
|
||||||
|
if scale_level >= 2:
|
||||||
|
# if _EXTRA_SPACE_DA == '':
|
||||||
|
# result += " "
|
||||||
|
result += " " + _NUM_POWERS_OF_TEN_DE[scale_level]
|
||||||
|
if scale_level >= 2:
|
||||||
|
if scale_level % 2 == 0:
|
||||||
|
result += "e" # MillionE
|
||||||
|
result += "n " # MilliardeN, MillioneN
|
||||||
|
|
||||||
|
num = floor(num / 1000)
|
||||||
|
scale_level += 1
|
||||||
|
return pronounce_whole_number_de(num,
|
||||||
|
scale_level) + result # + _EXTRA_SPACE_DA
|
||||||
|
|
||||||
|
result = ""
|
||||||
|
if abs(number) >= 1000000000000000000000000: # cannot do more than this
|
||||||
|
return str(number)
|
||||||
|
elif number == 0:
|
||||||
|
return str(_NUM_STRING_DE[0])
|
||||||
|
elif number < 0:
|
||||||
|
return "minus " + pronounce_number_de(abs(number), places)
|
||||||
|
else:
|
||||||
|
if number == int(number):
|
||||||
|
return pronounce_whole_number_de(number)
|
||||||
|
else:
|
||||||
|
whole_number_part = floor(number)
|
||||||
|
fractional_part = number - whole_number_part
|
||||||
|
result += pronounce_whole_number_de(whole_number_part)
|
||||||
|
if places > 0:
|
||||||
|
result += " Komma"
|
||||||
|
result += pronounce_fractional_de(fractional_part, places)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def pronounce_ordinal_de(number):
|
||||||
|
"""
|
||||||
|
This function pronounces a number as an ordinal
|
||||||
|
|
||||||
|
1 -> first
|
||||||
|
2 -> second
|
||||||
|
|
||||||
|
Args:
|
||||||
|
number (int): the number to format
|
||||||
|
Returns:
|
||||||
|
(str): The pronounced number string.
|
||||||
|
"""
|
||||||
|
# ordinals for 1, 3, 7 and 8 are irregular
|
||||||
|
# this produces the base form, it will have to be adapted for genus,
|
||||||
|
# casus, numerus
|
||||||
|
|
||||||
|
ordinals = ["nullte", "erste", "zweite", "dritte", "vierte", "fünfte",
|
||||||
|
"sechste", "siebte", "achte"]
|
||||||
|
|
||||||
|
# only for whole positive numbers including zero
|
||||||
|
if number < 0 or number != int(number):
|
||||||
|
return number
|
||||||
|
elif number < 9:
|
||||||
|
return ordinals[number]
|
||||||
|
elif number < 20:
|
||||||
|
return pronounce_number_de(number) + "te"
|
||||||
|
else:
|
||||||
|
return pronounce_number_de(number) + "ste"
|
||||||
|
|
||||||
|
|
||||||
|
def nice_time_de(dt, speech=True, use_24hour=False, use_ampm=False):
|
||||||
|
"""
|
||||||
|
Format a time to a comfortable human format
|
||||||
|
For example, generate 'five thirty' for speech or '5:30' for
|
||||||
|
text display.
|
||||||
|
Args:
|
||||||
|
dt (datetime): date to format (assumes already in local timezone)
|
||||||
|
speech (bool): format for speech (default/True) or display (False)=Fal
|
||||||
|
use_24hour (bool): output in 24-hour/military or 12-hour format
|
||||||
|
use_ampm (bool): include the am/pm for 12-hour format
|
||||||
|
Returns:
|
||||||
|
(str): The formatted time string
|
||||||
|
"""
|
||||||
|
if not speech:
|
||||||
|
if use_24hour:
|
||||||
|
# e.g. "03:01" or "14:22"
|
||||||
|
string = dt.strftime("%H:%M")
|
||||||
|
else:
|
||||||
|
if use_ampm:
|
||||||
|
# e.g. "3:01 AM" or "2:22 PM"
|
||||||
|
string = dt.strftime("%I:%M %p")
|
||||||
|
else:
|
||||||
|
# e.g. "3:01" or "2:22"
|
||||||
|
string = dt.strftime("%I:%M")
|
||||||
|
if string[0] == '0':
|
||||||
|
string = string[1:] # strip leading zeros
|
||||||
|
return string
|
||||||
|
|
||||||
|
# Generate a speakable version of the time
|
||||||
|
speak = ""
|
||||||
|
if use_24hour:
|
||||||
|
if dt.hour == 1:
|
||||||
|
speak += "ein" # 01:00 is "ein Uhr" not "eins Uhr"
|
||||||
|
else:
|
||||||
|
speak += pronounce_number_de(dt.hour)
|
||||||
|
speak += " Uhr"
|
||||||
|
if not dt.minute == 0: # zero minutes are not pronounced, 13:00 is
|
||||||
|
# "13 Uhr" not "13 hundred hours"
|
||||||
|
speak += " " + pronounce_number_de(dt.minute)
|
||||||
|
|
||||||
|
return speak # ampm is ignored when use_24hour is true
|
||||||
|
else:
|
||||||
|
if dt.hour == 0 and dt.minute == 0:
|
||||||
|
return "Mitternacht"
|
||||||
|
elif dt.hour == 12 and dt.minute == 0:
|
||||||
|
return "Mittag"
|
||||||
|
elif dt.minute == 15:
|
||||||
|
# sentence relative to next hour and 0 spoken as 12
|
||||||
|
next_hour = (dt.hour + 1) % 12 or 12
|
||||||
|
speak = "viertel " + pronounce_number_de(next_hour)
|
||||||
|
elif dt.minute == 30:
|
||||||
|
next_hour = (dt.hour + 1) % 12 or 12
|
||||||
|
speak = "halb " + pronounce_number_de(next_hour)
|
||||||
|
elif dt.minute == 45:
|
||||||
|
next_hour = (dt.hour + 1) % 12 or 12
|
||||||
|
speak = "dreiviertel " + pronounce_number_de(next_hour)
|
||||||
|
else:
|
||||||
|
hour = dt.hour % 12 or 12 # 12 hour clock and 0 is spoken as 12
|
||||||
|
if hour == 1: # 01:00 and 13:00 is "ein Uhr" not "eins Uhr"
|
||||||
|
speak += 'ein'
|
||||||
|
else:
|
||||||
|
speak += pronounce_number_de(hour)
|
||||||
|
speak += " Uhr"
|
||||||
|
|
||||||
|
if not dt.minute == 0:
|
||||||
|
speak += " " + pronounce_number_de(dt.minute)
|
||||||
|
|
||||||
|
if use_ampm:
|
||||||
|
if 3 <= dt.hour < 12:
|
||||||
|
speak += " morgens" # 03:00 - 11:59 morgens/in the morning
|
||||||
|
elif 12 <= dt.hour < 18:
|
||||||
|
speak += " nachmittags" # 12:01 - 17:59 nachmittags/afternoon
|
||||||
|
elif 18 <= dt.hour < 22:
|
||||||
|
speak += " abends" # 18:00 - 21:59 abends/evening
|
||||||
|
else:
|
||||||
|
speak += " nachts" # 22:00 - 02:59 nachts/at night
|
||||||
|
|
||||||
|
return speak
|
||||||
|
|
||||||
|
|
||||||
|
def nice_response_de(text):
|
||||||
|
# check for months and call _nice_ordinal_de declension of ordinals
|
||||||
|
# replace "^" with "hoch" (to the power of)
|
||||||
|
words = text.split()
|
||||||
|
|
||||||
|
for idx, word in enumerate(words):
|
||||||
|
if word.lower() in _MONTHS_DE:
|
||||||
|
text = _nice_ordinal_de(text)
|
||||||
|
|
||||||
|
if word == '^':
|
||||||
|
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
|
||||||
|
if wordNext.isnumeric():
|
||||||
|
words[idx] = "hoch"
|
||||||
|
text = " ".join(words)
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def _nice_ordinal_de(text, speech=True):
|
||||||
|
# check for months for declension of ordinals before months
|
||||||
|
# depending on articles/prepositions
|
||||||
|
normalized_text = text
|
||||||
|
words = text.split()
|
||||||
|
|
||||||
|
for idx, word in enumerate(words):
|
||||||
|
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
|
||||||
|
wordPrev = words[idx - 1] if idx > 0 else ""
|
||||||
|
if word[-1:] == ".":
|
||||||
|
if word[:-1].isdecimal():
|
||||||
|
if wordNext.lower() in _MONTHS_DE:
|
||||||
|
word = pronounce_ordinal_de(int(word[:-1]))
|
||||||
|
if wordPrev.lower() in ["am", "dem", "vom", "zum",
|
||||||
|
"(vom", "(am", "zum"]:
|
||||||
|
word += "n"
|
||||||
|
elif wordPrev.lower() not in ["der", "die", "das"]:
|
||||||
|
word += "r"
|
||||||
|
words[idx] = word
|
||||||
|
normalized_text = " ".join(words)
|
||||||
|
return normalized_text
|
||||||
386
lingua_franca/lang/format_en.py
Normal file
386
lingua_franca/lang/format_en.py
Normal file
@@ -0,0 +1,386 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
from lingua_franca.lang.format_common import convert_to_mixed_fraction
|
||||||
|
from lingua_franca.lang.common_data_en import _NUM_STRING_EN, \
|
||||||
|
_FRACTION_STRING_EN, _LONG_SCALE_EN, _SHORT_SCALE_EN, _SHORT_ORDINAL_EN, _LONG_ORDINAL_EN
|
||||||
|
|
||||||
|
|
||||||
|
def nice_number_en(number, speech=True, denominators=range(1, 21)):
|
||||||
|
""" English helper for nice_number
|
||||||
|
|
||||||
|
This function formats a float to human understandable functions. Like
|
||||||
|
4.5 becomes "4 and a half" for speech and "4 1/2" for text
|
||||||
|
|
||||||
|
Args:
|
||||||
|
number (int or float): the float to format
|
||||||
|
speech (bool): format for speech (True) or display (False)
|
||||||
|
denominators (iter of ints): denominators to use, default [1 .. 20]
|
||||||
|
Returns:
|
||||||
|
(str): The formatted string.
|
||||||
|
"""
|
||||||
|
|
||||||
|
result = convert_to_mixed_fraction(number, denominators)
|
||||||
|
if not result:
|
||||||
|
# Give up, just represent as a 3 decimal number
|
||||||
|
return str(round(number, 3))
|
||||||
|
|
||||||
|
whole, num, den = result
|
||||||
|
|
||||||
|
if not speech:
|
||||||
|
if num == 0:
|
||||||
|
# TODO: Number grouping? E.g. "1,000,000"
|
||||||
|
return str(whole)
|
||||||
|
else:
|
||||||
|
return '{} {}/{}'.format(whole, num, den)
|
||||||
|
|
||||||
|
if num == 0:
|
||||||
|
return str(whole)
|
||||||
|
den_str = _FRACTION_STRING_EN[den]
|
||||||
|
if whole == 0:
|
||||||
|
if num == 1:
|
||||||
|
return_string = 'a {}'.format(den_str)
|
||||||
|
else:
|
||||||
|
return_string = '{} {}'.format(num, den_str)
|
||||||
|
elif num == 1:
|
||||||
|
return_string = '{} and a {}'.format(whole, den_str)
|
||||||
|
else:
|
||||||
|
return_string = '{} and {} {}'.format(whole, num, den_str)
|
||||||
|
if num > 1:
|
||||||
|
return_string += 's'
|
||||||
|
return return_string
|
||||||
|
|
||||||
|
|
||||||
|
def pronounce_number_en(number, places=2, short_scale=True, scientific=False,
|
||||||
|
ordinals=False):
|
||||||
|
"""
|
||||||
|
Convert a number to it's spoken equivalent
|
||||||
|
|
||||||
|
For example, '5.2' would return 'five point two'
|
||||||
|
|
||||||
|
Args:
|
||||||
|
num(float or int): the number to pronounce (under 100)
|
||||||
|
places(int): maximum decimal places to speak
|
||||||
|
short_scale (bool) : use short (True) or long scale (False)
|
||||||
|
https://en.wikipedia.org/wiki/Names_of_large_numbers
|
||||||
|
scientific (bool): pronounce in scientific notation
|
||||||
|
ordinals (bool): pronounce in ordinal form "first" instead of "one"
|
||||||
|
Returns:
|
||||||
|
(str): The pronounced number
|
||||||
|
"""
|
||||||
|
num = number
|
||||||
|
# deal with infinity
|
||||||
|
if num == float("inf"):
|
||||||
|
return "infinity"
|
||||||
|
elif num == float("-inf"):
|
||||||
|
return "negative infinity"
|
||||||
|
if scientific:
|
||||||
|
number = '%E' % num
|
||||||
|
n, power = number.replace("+", "").split("E")
|
||||||
|
power = int(power)
|
||||||
|
if power != 0:
|
||||||
|
if ordinals:
|
||||||
|
# This handles negatives of powers separately from the normal
|
||||||
|
# handling since each call disables the scientific flag
|
||||||
|
return '{}{} times ten to the {}{} power'.format(
|
||||||
|
'negative ' if float(n) < 0 else '',
|
||||||
|
pronounce_number_en(
|
||||||
|
abs(float(n)), places, short_scale, False, ordinals=False),
|
||||||
|
'negative ' if power < 0 else '',
|
||||||
|
pronounce_number_en(abs(power), places, short_scale, False, ordinals=True))
|
||||||
|
else:
|
||||||
|
# This handles negatives of powers separately from the normal
|
||||||
|
# handling since each call disables the scientific flag
|
||||||
|
return '{}{} times ten to the power of {}{}'.format(
|
||||||
|
'negative ' if float(n) < 0 else '',
|
||||||
|
pronounce_number_en(
|
||||||
|
abs(float(n)), places, short_scale, False),
|
||||||
|
'negative ' if power < 0 else '',
|
||||||
|
pronounce_number_en(abs(power), places, short_scale, False))
|
||||||
|
|
||||||
|
if short_scale:
|
||||||
|
number_names = _NUM_STRING_EN.copy()
|
||||||
|
number_names.update(_SHORT_SCALE_EN)
|
||||||
|
else:
|
||||||
|
number_names = _NUM_STRING_EN.copy()
|
||||||
|
number_names.update(_LONG_SCALE_EN)
|
||||||
|
|
||||||
|
digits = [number_names[n] for n in range(0, 20)]
|
||||||
|
|
||||||
|
tens = [number_names[n] for n in range(10, 100, 10)]
|
||||||
|
|
||||||
|
if short_scale:
|
||||||
|
hundreds = [_SHORT_SCALE_EN[n] for n in _SHORT_SCALE_EN.keys()]
|
||||||
|
else:
|
||||||
|
hundreds = [_LONG_SCALE_EN[n] for n in _LONG_SCALE_EN.keys()]
|
||||||
|
|
||||||
|
# deal with negatives
|
||||||
|
result = ""
|
||||||
|
if num < 0:
|
||||||
|
result = "negative " if scientific else "minus "
|
||||||
|
num = abs(num)
|
||||||
|
|
||||||
|
if not ordinals:
|
||||||
|
try:
|
||||||
|
# deal with 4 digits
|
||||||
|
# usually if it's a 4 digit num it should be said like a date
|
||||||
|
# i.e. 1972 => nineteen seventy two
|
||||||
|
if len(str(num)) == 4 and isinstance(num, int):
|
||||||
|
_num = str(num)
|
||||||
|
# deal with 1000, 2000, 2001, 2100, 3123, etc
|
||||||
|
# is skipped as the rest of the
|
||||||
|
# functin deals with this already
|
||||||
|
if _num[1:4] == '000' or _num[1:3] == '00' or int(_num[0:2]) >= 20:
|
||||||
|
pass
|
||||||
|
# deal with 1900, 1300, etc
|
||||||
|
# i.e. 1900 => nineteen hundred
|
||||||
|
elif _num[2:4] == '00':
|
||||||
|
first = number_names[int(_num[0:2])]
|
||||||
|
last = number_names[100]
|
||||||
|
return first + " " + last
|
||||||
|
# deal with 1960, 1961, etc
|
||||||
|
# i.e. 1960 => nineteen sixty
|
||||||
|
# 1961 => nineteen sixty one
|
||||||
|
else:
|
||||||
|
first = number_names[int(_num[0:2])]
|
||||||
|
if _num[3:4] == '0':
|
||||||
|
last = number_names[int(_num[2:4])]
|
||||||
|
else:
|
||||||
|
second = number_names[int(_num[2:3])*10]
|
||||||
|
last = second + " " + number_names[int(_num[3:4])]
|
||||||
|
return first + " " + last
|
||||||
|
# exception used to catch any unforseen edge cases
|
||||||
|
# will default back to normal subroutine
|
||||||
|
except Exception as e:
|
||||||
|
# TODO this probably shouldn't go to stdout
|
||||||
|
print('ERROR: Exception in pronounce_number_en: {}' + repr(e))
|
||||||
|
|
||||||
|
# check for a direct match
|
||||||
|
if num in number_names and not ordinals:
|
||||||
|
if num > 90:
|
||||||
|
result += "one "
|
||||||
|
result += number_names[num]
|
||||||
|
else:
|
||||||
|
def _sub_thousand(n, ordinals=False):
|
||||||
|
assert 0 <= n <= 999
|
||||||
|
if n in _SHORT_ORDINAL_EN and ordinals:
|
||||||
|
return _SHORT_ORDINAL_EN[n]
|
||||||
|
if n <= 19:
|
||||||
|
return digits[n]
|
||||||
|
elif n <= 99:
|
||||||
|
q, r = divmod(n, 10)
|
||||||
|
return tens[q - 1] + (" " + _sub_thousand(r, ordinals) if r
|
||||||
|
else "")
|
||||||
|
else:
|
||||||
|
q, r = divmod(n, 100)
|
||||||
|
return digits[q] + " hundred" + (
|
||||||
|
" and " + _sub_thousand(r, ordinals) if r else "")
|
||||||
|
|
||||||
|
def _short_scale(n):
|
||||||
|
if n >= max(_SHORT_SCALE_EN.keys()):
|
||||||
|
return "infinity"
|
||||||
|
ordi = ordinals
|
||||||
|
|
||||||
|
if int(n) != n:
|
||||||
|
ordi = False
|
||||||
|
n = int(n)
|
||||||
|
assert 0 <= n
|
||||||
|
res = []
|
||||||
|
for i, z in enumerate(_split_by(n, 1000)):
|
||||||
|
if not z:
|
||||||
|
continue
|
||||||
|
number = _sub_thousand(z, not i and ordi)
|
||||||
|
|
||||||
|
if i:
|
||||||
|
if i >= len(hundreds):
|
||||||
|
return ""
|
||||||
|
number += " "
|
||||||
|
if ordi:
|
||||||
|
|
||||||
|
if i * 1000 in _SHORT_ORDINAL_EN:
|
||||||
|
if z == 1:
|
||||||
|
number = _SHORT_ORDINAL_EN[i * 1000]
|
||||||
|
else:
|
||||||
|
number += _SHORT_ORDINAL_EN[i * 1000]
|
||||||
|
else:
|
||||||
|
if n not in _SHORT_SCALE_EN:
|
||||||
|
num = int("1" + "0" * (len(str(n)) - 2))
|
||||||
|
|
||||||
|
number += _SHORT_SCALE_EN[num] + "th"
|
||||||
|
else:
|
||||||
|
number = _SHORT_SCALE_EN[n] + "th"
|
||||||
|
else:
|
||||||
|
number += hundreds[i]
|
||||||
|
res.append(number)
|
||||||
|
ordi = False
|
||||||
|
|
||||||
|
return ", ".join(reversed(res))
|
||||||
|
|
||||||
|
def _split_by(n, split=1000):
|
||||||
|
assert 0 <= n
|
||||||
|
res = []
|
||||||
|
while n:
|
||||||
|
n, r = divmod(n, split)
|
||||||
|
res.append(r)
|
||||||
|
return res
|
||||||
|
|
||||||
|
def _long_scale(n):
|
||||||
|
if n >= max(_LONG_SCALE_EN.keys()):
|
||||||
|
return "infinity"
|
||||||
|
ordi = ordinals
|
||||||
|
if int(n) != n:
|
||||||
|
ordi = False
|
||||||
|
n = int(n)
|
||||||
|
assert 0 <= n
|
||||||
|
res = []
|
||||||
|
for i, z in enumerate(_split_by(n, 1000000)):
|
||||||
|
if not z:
|
||||||
|
continue
|
||||||
|
number = pronounce_number_en(z, places, True, scientific,
|
||||||
|
ordinals=ordi and not i)
|
||||||
|
# strip off the comma after the thousand
|
||||||
|
if i:
|
||||||
|
if i >= len(hundreds):
|
||||||
|
return ""
|
||||||
|
# plus one as we skip 'thousand'
|
||||||
|
# (and 'hundred', but this is excluded by index value)
|
||||||
|
number = number.replace(',', '')
|
||||||
|
|
||||||
|
if ordi:
|
||||||
|
if i * 1000000 in _LONG_ORDINAL_EN:
|
||||||
|
if z == 1:
|
||||||
|
number = _LONG_ORDINAL_EN[
|
||||||
|
(i + 1) * 1000000]
|
||||||
|
else:
|
||||||
|
number += _LONG_ORDINAL_EN[
|
||||||
|
(i + 1) * 1000000]
|
||||||
|
else:
|
||||||
|
if n not in _LONG_SCALE_EN:
|
||||||
|
num = int("1" + "0" * (len(str(n)) - 2))
|
||||||
|
|
||||||
|
number += " " + _LONG_SCALE_EN[
|
||||||
|
num] + "th"
|
||||||
|
else:
|
||||||
|
number = " " + _LONG_SCALE_EN[n] + "th"
|
||||||
|
else:
|
||||||
|
|
||||||
|
number += " " + hundreds[i + 1]
|
||||||
|
res.append(number)
|
||||||
|
return ", ".join(reversed(res))
|
||||||
|
|
||||||
|
if short_scale:
|
||||||
|
result += _short_scale(num)
|
||||||
|
else:
|
||||||
|
result += _long_scale(num)
|
||||||
|
|
||||||
|
# deal with scientific notation unpronounceable as number
|
||||||
|
if not result and "e" in str(num):
|
||||||
|
return pronounce_number_en(num, places, short_scale, scientific=True)
|
||||||
|
# Deal with fractional part
|
||||||
|
elif not num == int(num) and places > 0:
|
||||||
|
if abs(num) < 1.0 and (result == "minus " or not result):
|
||||||
|
result += "zero"
|
||||||
|
result += " point"
|
||||||
|
_num_str = str(num)
|
||||||
|
_num_str = _num_str.split(".")[1][0:places]
|
||||||
|
for char in _num_str:
|
||||||
|
result += " " + number_names[int(char)]
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def nice_time_en(dt, speech=True, use_24hour=False, use_ampm=False):
|
||||||
|
"""
|
||||||
|
Format a time to a comfortable human format
|
||||||
|
For example, generate 'five thirty' for speech or '5:30' for
|
||||||
|
text display.
|
||||||
|
Args:
|
||||||
|
dt (datetime): date to format (assumes already in local timezone)
|
||||||
|
speech (bool): format for speech (default/True) or display (False)=Fal
|
||||||
|
use_24hour (bool): output in 24-hour/military or 12-hour format
|
||||||
|
use_ampm (bool): include the am/pm for 12-hour format
|
||||||
|
Returns:
|
||||||
|
(str): The formatted time string
|
||||||
|
"""
|
||||||
|
if use_24hour:
|
||||||
|
# e.g. "03:01" or "14:22"
|
||||||
|
string = dt.strftime("%H:%M")
|
||||||
|
else:
|
||||||
|
if use_ampm:
|
||||||
|
# e.g. "3:01 AM" or "2:22 PM"
|
||||||
|
string = dt.strftime("%I:%M %p")
|
||||||
|
else:
|
||||||
|
# e.g. "3:01" or "2:22"
|
||||||
|
string = dt.strftime("%I:%M")
|
||||||
|
if string[0] == '0':
|
||||||
|
string = string[1:] # strip leading zeros
|
||||||
|
|
||||||
|
if not speech:
|
||||||
|
return string
|
||||||
|
|
||||||
|
# Generate a speakable version of the time
|
||||||
|
if use_24hour:
|
||||||
|
speak = ""
|
||||||
|
|
||||||
|
# Either "0 8 hundred" or "13 hundred"
|
||||||
|
if string[0] == '0':
|
||||||
|
speak += pronounce_number_en(int(string[0])) + " "
|
||||||
|
speak += pronounce_number_en(int(string[1]))
|
||||||
|
else:
|
||||||
|
speak = pronounce_number_en(int(string[0:2]))
|
||||||
|
|
||||||
|
speak += " "
|
||||||
|
if string[3:5] == '00':
|
||||||
|
speak += "hundred"
|
||||||
|
else:
|
||||||
|
if string[3] == '0':
|
||||||
|
speak += pronounce_number_en(0) + " "
|
||||||
|
speak += pronounce_number_en(int(string[4]))
|
||||||
|
else:
|
||||||
|
speak += pronounce_number_en(int(string[3:5]))
|
||||||
|
return speak
|
||||||
|
else:
|
||||||
|
if dt.hour == 0 and dt.minute == 0:
|
||||||
|
return "midnight"
|
||||||
|
elif dt.hour == 12 and dt.minute == 0:
|
||||||
|
return "noon"
|
||||||
|
|
||||||
|
hour = dt.hour % 12 or 12 # 12 hour clock and 0 is spoken as 12
|
||||||
|
if dt.minute == 15:
|
||||||
|
speak = "quarter past " + pronounce_number_en(hour)
|
||||||
|
elif dt.minute == 30:
|
||||||
|
speak = "half past " + pronounce_number_en(hour)
|
||||||
|
elif dt.minute == 45:
|
||||||
|
next_hour = (dt.hour + 1) % 12 or 12
|
||||||
|
speak = "quarter to " + pronounce_number_en(next_hour)
|
||||||
|
else:
|
||||||
|
speak = pronounce_number_en(hour)
|
||||||
|
|
||||||
|
if dt.minute == 0:
|
||||||
|
if not use_ampm:
|
||||||
|
return speak + " o'clock"
|
||||||
|
else:
|
||||||
|
if dt.minute < 10:
|
||||||
|
speak += " oh"
|
||||||
|
speak += " " + pronounce_number_en(dt.minute)
|
||||||
|
|
||||||
|
if use_ampm:
|
||||||
|
if dt.hour > 11:
|
||||||
|
speak += " p.m."
|
||||||
|
else:
|
||||||
|
speak += " a.m."
|
||||||
|
|
||||||
|
return speak
|
||||||
269
lingua_franca/lang/format_es.py
Normal file
269
lingua_franca/lang/format_es.py
Normal file
@@ -0,0 +1,269 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
"""
|
||||||
|
Format functions for castillian (es-es)
|
||||||
|
|
||||||
|
"""
|
||||||
|
from lingua_franca.lang.format_common import convert_to_mixed_fraction
|
||||||
|
from lingua_franca.lang.common_data_es import _NUM_STRING_ES, \
|
||||||
|
_FRACTION_STRING_ES
|
||||||
|
|
||||||
|
|
||||||
|
def nice_number_es(number, speech=True, denominators=range(1, 21)):
|
||||||
|
""" Spanish helper for nice_number
|
||||||
|
|
||||||
|
This function formats a float to human understandable functions. Like
|
||||||
|
4.5 becomes "4 y medio" for speech and "4 1/2" for text
|
||||||
|
|
||||||
|
Args:
|
||||||
|
number (int or float): the float to format
|
||||||
|
speech (bool): format for speech (True) or display (False)
|
||||||
|
denominators (iter of ints): denominators to use, default [1 .. 20]
|
||||||
|
Returns:
|
||||||
|
(str): The formatted string.
|
||||||
|
"""
|
||||||
|
strNumber = ""
|
||||||
|
whole = 0
|
||||||
|
num = 0
|
||||||
|
den = 0
|
||||||
|
|
||||||
|
result = convert_to_mixed_fraction(number, denominators)
|
||||||
|
|
||||||
|
if not result:
|
||||||
|
# Give up, just represent as a 3 decimal number
|
||||||
|
whole = round(number, 3)
|
||||||
|
else:
|
||||||
|
whole, num, den = result
|
||||||
|
|
||||||
|
if not speech:
|
||||||
|
if num == 0:
|
||||||
|
strNumber = '{:,}'.format(whole)
|
||||||
|
strNumber = strNumber.replace(",", " ")
|
||||||
|
strNumber = strNumber.replace(".", ",")
|
||||||
|
return strNumber
|
||||||
|
else:
|
||||||
|
return '{} {}/{}'.format(whole, num, den)
|
||||||
|
else:
|
||||||
|
if num == 0:
|
||||||
|
# if the number is not a fraction, nothing to do
|
||||||
|
strNumber = str(whole)
|
||||||
|
strNumber = strNumber.replace(".", ",")
|
||||||
|
return strNumber
|
||||||
|
den_str = _FRACTION_STRING_ES[den]
|
||||||
|
# if it is not an integer
|
||||||
|
if whole == 0:
|
||||||
|
# if there is no whole number
|
||||||
|
if num == 1:
|
||||||
|
# if numerator is 1, return "un medio", for example
|
||||||
|
strNumber = 'un {}'.format(den_str)
|
||||||
|
else:
|
||||||
|
# else return "cuatro tercios", for example
|
||||||
|
strNumber = '{} {}'.format(num, den_str)
|
||||||
|
elif num == 1:
|
||||||
|
# if there is a whole number and numerator is 1
|
||||||
|
if den == 2:
|
||||||
|
# if denominator is 2, return "1 y medio", for example
|
||||||
|
strNumber = '{} y {}'.format(whole, den_str)
|
||||||
|
else:
|
||||||
|
# else return "1 y 1 tercio", for example
|
||||||
|
strNumber = '{} y 1 {}'.format(whole, den_str)
|
||||||
|
else:
|
||||||
|
# else return "2 y 3 cuarto", for example
|
||||||
|
strNumber = '{} y {} {}'.format(whole, num, den_str)
|
||||||
|
if num > 1 and den != 3:
|
||||||
|
# if the numerator is greater than 1 and the denominator
|
||||||
|
# is not 3 ("tercio"), add an s for plural
|
||||||
|
strNumber += 's'
|
||||||
|
|
||||||
|
return strNumber
|
||||||
|
|
||||||
|
|
||||||
|
def pronounce_number_es(number, places=2):
|
||||||
|
"""
|
||||||
|
Convert a number to it's spoken equivalent
|
||||||
|
|
||||||
|
For example, '5.2' would return 'cinco coma dos'
|
||||||
|
|
||||||
|
Args:
|
||||||
|
num(float or int): the number to pronounce (under 100)
|
||||||
|
places(int): maximum decimal places to speak
|
||||||
|
Returns:
|
||||||
|
(str): The pronounced number
|
||||||
|
"""
|
||||||
|
if abs(number) >= 100:
|
||||||
|
# TODO: Soporta a números por encima de 100
|
||||||
|
return str(number)
|
||||||
|
|
||||||
|
result = ""
|
||||||
|
if number < 0:
|
||||||
|
result = "menos "
|
||||||
|
number = abs(number)
|
||||||
|
|
||||||
|
# del 21 al 29 tienen una pronunciación especial
|
||||||
|
if 20 <= number <= 29:
|
||||||
|
tens = int(number-int(number) % 10)
|
||||||
|
ones = int(number - tens)
|
||||||
|
result += _NUM_STRING_ES[tens]
|
||||||
|
if ones > 0:
|
||||||
|
result = result[:-1]
|
||||||
|
# a veinte le quitamos la "e" final para construir los
|
||||||
|
# números del 21 - 29. Pero primero tenemos en cuenta
|
||||||
|
# las excepciones: 22, 23 y 26, que llevan tilde.
|
||||||
|
if ones == 2:
|
||||||
|
result += "idós"
|
||||||
|
elif ones == 3:
|
||||||
|
result += "itrés"
|
||||||
|
elif ones == 6:
|
||||||
|
result += "iséis"
|
||||||
|
else:
|
||||||
|
result += "i" + _NUM_STRING_ES[ones]
|
||||||
|
elif number >= 30: # de 30 en adelante
|
||||||
|
tens = int(number-int(number) % 10)
|
||||||
|
ones = int(number - tens)
|
||||||
|
result += _NUM_STRING_ES[tens]
|
||||||
|
if ones > 0:
|
||||||
|
result += " y " + _NUM_STRING_ES[ones]
|
||||||
|
else:
|
||||||
|
result += _NUM_STRING_ES[int(number)]
|
||||||
|
|
||||||
|
# Deal with decimal part, in spanish is commonly used the comma
|
||||||
|
# instead the dot. Decimal part can be written both with comma
|
||||||
|
# and dot, but when pronounced, its pronounced "coma"
|
||||||
|
if not number == int(number) and places > 0:
|
||||||
|
if abs(number) < 1.0 and (result == "menos " or not result):
|
||||||
|
result += "cero"
|
||||||
|
result += " coma"
|
||||||
|
_num_str = str(number)
|
||||||
|
_num_str = _num_str.split(".")[1][0:places]
|
||||||
|
for char in _num_str:
|
||||||
|
result += " " + _NUM_STRING_ES[int(char)]
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def nice_time_es(dt, speech=True, use_24hour=False, use_ampm=False):
|
||||||
|
"""
|
||||||
|
Format a time to a comfortable human format
|
||||||
|
|
||||||
|
For example, generate 'cinco treinta' for speech or '5:30' for
|
||||||
|
text display.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
dt (datetime): date to format (assumes already in local timezone)
|
||||||
|
speech (bool): format for speech (default/True) or display (False)=Fal
|
||||||
|
use_24hour (bool): output in 24-hour/military or 12-hour format
|
||||||
|
use_ampm (bool): include the am/pm for 12-hour format
|
||||||
|
Returns:
|
||||||
|
(str): The formatted time string
|
||||||
|
"""
|
||||||
|
if use_24hour:
|
||||||
|
# e.g. "03:01" or "14:22"
|
||||||
|
string = dt.strftime("%H:%M")
|
||||||
|
else:
|
||||||
|
if use_ampm:
|
||||||
|
# e.g. "3:01 AM" or "2:22 PM"
|
||||||
|
string = dt.strftime("%I:%M %p")
|
||||||
|
else:
|
||||||
|
# e.g. "3:01" or "2:22"
|
||||||
|
string = dt.strftime("%I:%M")
|
||||||
|
if string[0] == '0':
|
||||||
|
string = string[1:] # strip leading zeros
|
||||||
|
|
||||||
|
if not speech:
|
||||||
|
return string
|
||||||
|
|
||||||
|
# Generate a speakable version of the time
|
||||||
|
speak = ""
|
||||||
|
if use_24hour:
|
||||||
|
# Tenemos que tener en cuenta que cuando hablamos en formato
|
||||||
|
# 24h, no hay que especificar ninguna precisión adicional
|
||||||
|
# como "la noche", "la tarde" o "la mañana"
|
||||||
|
# http://lema.rae.es/dpd/srv/search?id=YNoTWNJnAD6bhhVBf9
|
||||||
|
if dt.hour == 1:
|
||||||
|
speak += "la una"
|
||||||
|
else:
|
||||||
|
speak += "las " + pronounce_number_es(dt.hour)
|
||||||
|
|
||||||
|
# las 14:04 son "las catorce cero cuatro"
|
||||||
|
if dt.minute < 10:
|
||||||
|
speak += " cero " + pronounce_number_es(dt.minute)
|
||||||
|
else:
|
||||||
|
speak += " " + pronounce_number_es(dt.minute)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Prepare for "tres menos cuarto" ??
|
||||||
|
if dt.minute == 35:
|
||||||
|
minute = -25
|
||||||
|
hour = dt.hour + 1
|
||||||
|
elif dt.minute == 40:
|
||||||
|
minute = -20
|
||||||
|
hour = dt.hour + 1
|
||||||
|
elif dt.minute == 45:
|
||||||
|
minute = -15
|
||||||
|
hour = dt.hour + 1
|
||||||
|
elif dt.minute == 50:
|
||||||
|
minute = -10
|
||||||
|
hour = dt.hour + 1
|
||||||
|
elif dt.minute == 55:
|
||||||
|
minute = -5
|
||||||
|
hour = dt.hour + 1
|
||||||
|
else:
|
||||||
|
minute = dt.minute
|
||||||
|
hour = dt.hour
|
||||||
|
|
||||||
|
if hour == 0 or hour == 12:
|
||||||
|
speak += "las doce"
|
||||||
|
elif hour == 1 or hour == 13:
|
||||||
|
speak += "la una"
|
||||||
|
elif hour < 13:
|
||||||
|
speak = "las " + pronounce_number_es(hour)
|
||||||
|
else:
|
||||||
|
speak = "las " + pronounce_number_es(hour-12)
|
||||||
|
|
||||||
|
if minute != 0:
|
||||||
|
# las horas especiales
|
||||||
|
if minute == 15:
|
||||||
|
speak += " y cuarto"
|
||||||
|
elif minute == 30:
|
||||||
|
speak += " y media"
|
||||||
|
elif minute == -15:
|
||||||
|
speak += " menos cuarto"
|
||||||
|
else: # seis y nueve. siete y veinticinco
|
||||||
|
if minute > 0:
|
||||||
|
speak += " y " + pronounce_number_es(minute)
|
||||||
|
else: # si son las siete menos veinte, no ponemos la "y"
|
||||||
|
speak += " " + pronounce_number_es(minute)
|
||||||
|
|
||||||
|
# si no especificamos de la tarde, noche, mañana, etc
|
||||||
|
if minute == 0 and not use_ampm:
|
||||||
|
# 3:00
|
||||||
|
speak += " en punto"
|
||||||
|
|
||||||
|
if use_ampm:
|
||||||
|
# "de la noche" es desde que anochece hasta medianoche
|
||||||
|
# así que decir que es desde las 21h es algo subjetivo
|
||||||
|
# en España a las 20h se dice "de la tarde"
|
||||||
|
# en castellano, las 12h es de la mañana o mediodía
|
||||||
|
# así que diremos "de la tarde" a partir de las 13h.
|
||||||
|
# http://lema.rae.es/dpd/srv/search?id=YNoTWNJnAD6bhhVBf9
|
||||||
|
if hour >= 0 and hour < 6:
|
||||||
|
speak += " de la madrugada"
|
||||||
|
elif hour >= 6 and hour < 13:
|
||||||
|
speak += " de la mañana"
|
||||||
|
elif hour >= 13 and hour < 21:
|
||||||
|
speak += " de la tarde"
|
||||||
|
else:
|
||||||
|
speak += " de la noche"
|
||||||
|
return speak
|
||||||
301
lingua_franca/lang/format_fa.py
Normal file
301
lingua_franca/lang/format_fa.py
Normal file
@@ -0,0 +1,301 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
from lingua_franca.lang.format_common import convert_to_mixed_fraction
|
||||||
|
from lingua_franca.lang.common_data_fa import \
|
||||||
|
_FARSI_ONES, _FARSI_TENS, _FARSI_HUNDREDS, _FARSI_BIG, _FARSI_SEPERATOR, \
|
||||||
|
_FARSI_FRAC, _FARSI_FRAC_BIG, _FRACTION_STRING_FA, _FORMAL_VARIANT
|
||||||
|
import math
|
||||||
|
from lingua_franca.internal import lookup_variant
|
||||||
|
from enum import IntEnum
|
||||||
|
from functools import wraps
|
||||||
|
|
||||||
|
class NumberVariantFA(IntEnum):
|
||||||
|
CONVERSATIONAL = 0
|
||||||
|
FORMAL = 1
|
||||||
|
|
||||||
|
lookup_number = lookup_variant({
|
||||||
|
"default": NumberVariantFA.CONVERSATIONAL,
|
||||||
|
"conversational": NumberVariantFA.CONVERSATIONAL,
|
||||||
|
"formal": NumberVariantFA.FORMAL,
|
||||||
|
})
|
||||||
|
|
||||||
|
def _apply_number_variant(text, variant):
|
||||||
|
if variant == NumberVariantFA.FORMAL:
|
||||||
|
for key, value in _FORMAL_VARIANT.items():
|
||||||
|
text = text.replace(value, key)
|
||||||
|
return text
|
||||||
|
|
||||||
|
def _handle_number_variant(func):
|
||||||
|
|
||||||
|
@wraps(func)
|
||||||
|
@lookup_variant({
|
||||||
|
"default": NumberVariantFA.CONVERSATIONAL,
|
||||||
|
"conversational": NumberVariantFA.CONVERSATIONAL,
|
||||||
|
"formal": NumberVariantFA.FORMAL,
|
||||||
|
})
|
||||||
|
def wrapper(*args, **kwargs):
|
||||||
|
result = func(*args, **kwargs)
|
||||||
|
if 'variant' in kwargs:
|
||||||
|
return _apply_number_variant(result, kwargs['variant'])
|
||||||
|
else:
|
||||||
|
return result
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
@_handle_number_variant
|
||||||
|
def nice_number_fa(number, speech=True, denominators=range(1, 21), variant=None):
|
||||||
|
""" Farsi helper for nice_number
|
||||||
|
|
||||||
|
This function formats a float to human understandable functions. Like
|
||||||
|
4.5 becomes "4 and a half" for speech and "4 1/2" for text
|
||||||
|
|
||||||
|
Args:
|
||||||
|
number (int or float): the float to format
|
||||||
|
speech (bool): format for speech (True) or display (False)
|
||||||
|
denominators (iter of ints): denominators to use, default [1 .. 20]
|
||||||
|
Returns:
|
||||||
|
(str): The formatted string.
|
||||||
|
"""
|
||||||
|
|
||||||
|
result = convert_to_mixed_fraction(number, denominators)
|
||||||
|
if not result:
|
||||||
|
# Give up, just represent as a 3 decimal number
|
||||||
|
return str(round(number, 3))
|
||||||
|
|
||||||
|
whole, num, den = result
|
||||||
|
|
||||||
|
if not speech:
|
||||||
|
if num == 0:
|
||||||
|
# TODO: Number grouping? E.g. "1,000,000"
|
||||||
|
return str(whole)
|
||||||
|
else:
|
||||||
|
return '{} {}/{}'.format(whole, num, den)
|
||||||
|
|
||||||
|
if num == 0:
|
||||||
|
return str(whole)
|
||||||
|
den_str = _FRACTION_STRING_FA[den]
|
||||||
|
if whole == 0:
|
||||||
|
if num == 1:
|
||||||
|
return_string = 'یک {}'.format(den_str)
|
||||||
|
else:
|
||||||
|
return_string = '{} {}'.format(num, den_str)
|
||||||
|
elif num == 1:
|
||||||
|
return_string = '{} و یک {}'.format(whole, den_str)
|
||||||
|
else:
|
||||||
|
return_string = '{} و {} {}'.format(whole, num, den_str)
|
||||||
|
return return_string
|
||||||
|
|
||||||
|
|
||||||
|
def _float2tuple(value, _precision):
|
||||||
|
pre = int(value)
|
||||||
|
|
||||||
|
post = abs(value - pre) * 10**_precision
|
||||||
|
if abs(round(post) - post) < 0.01:
|
||||||
|
# We generally floor all values beyond our precision (rather than
|
||||||
|
# rounding), but in cases where we have something like 1.239999999,
|
||||||
|
# which is probably due to python's handling of floats, we actually
|
||||||
|
# want to consider it as 1.24 instead of 1.23
|
||||||
|
post = int(round(post))
|
||||||
|
else:
|
||||||
|
post = int(math.floor(post))
|
||||||
|
|
||||||
|
while post != 0:
|
||||||
|
x, y = divmod(post, 10)
|
||||||
|
if y != 0:
|
||||||
|
break
|
||||||
|
post = x
|
||||||
|
_precision -= 1
|
||||||
|
|
||||||
|
return pre, post, _precision
|
||||||
|
|
||||||
|
|
||||||
|
def _cardinal3(number):
|
||||||
|
if (number < 19):
|
||||||
|
return _FARSI_ONES[number]
|
||||||
|
if (number < 100):
|
||||||
|
x, y = divmod(number, 10)
|
||||||
|
if y == 0:
|
||||||
|
return _FARSI_TENS[x]
|
||||||
|
return _FARSI_TENS[x] + _FARSI_SEPERATOR + _FARSI_ONES[y]
|
||||||
|
x, y = divmod(number, 100)
|
||||||
|
if y == 0:
|
||||||
|
return _FARSI_HUNDREDS[x]
|
||||||
|
return _FARSI_HUNDREDS[x] + _FARSI_SEPERATOR + _cardinal3(y)
|
||||||
|
|
||||||
|
def _cardinalPos(number):
|
||||||
|
x = number
|
||||||
|
res = ''
|
||||||
|
for b in _FARSI_BIG:
|
||||||
|
x, y = divmod(x, 1000)
|
||||||
|
if (y == 0):
|
||||||
|
continue
|
||||||
|
yx = _cardinal3(y)
|
||||||
|
if y == 1 and b == 'هزار':
|
||||||
|
yx = b
|
||||||
|
elif b != '':
|
||||||
|
yx += ' ' + b
|
||||||
|
if (res == ''):
|
||||||
|
res = yx
|
||||||
|
else:
|
||||||
|
res = yx + _FARSI_SEPERATOR + res
|
||||||
|
return res
|
||||||
|
|
||||||
|
def _fractional(number, l):
|
||||||
|
if (number / 10**l == 0.5):
|
||||||
|
return "نیم"
|
||||||
|
x = _cardinalPos(number)
|
||||||
|
ld3, lm3 = divmod(l, 3)
|
||||||
|
ltext = (_FARSI_FRAC[lm3] + " " + _FARSI_FRAC_BIG[ld3]).strip() + 'م'
|
||||||
|
return x + " " + ltext
|
||||||
|
|
||||||
|
def _to_ordinal(number):
|
||||||
|
r = _to_cardinal(number, 0)
|
||||||
|
if (r[-1] == 'ه' and r[-2] == 'س'):
|
||||||
|
return r[:-1] + 'وم'
|
||||||
|
return r + 'م'
|
||||||
|
|
||||||
|
def _to_ordinal_num(value):
|
||||||
|
return str(value)+"م"
|
||||||
|
|
||||||
|
def _to_cardinal(number, places):
|
||||||
|
if number < 0:
|
||||||
|
return "منفی " + _to_cardinal(-number, places)
|
||||||
|
if (number == 0):
|
||||||
|
return "صفر"
|
||||||
|
x, y, l = _float2tuple(number, places)
|
||||||
|
if y == 0:
|
||||||
|
return _cardinalPos(x)
|
||||||
|
if x == 0:
|
||||||
|
return _fractional(y, l)
|
||||||
|
return _cardinalPos(x) + _FARSI_SEPERATOR + _fractional(y, l)
|
||||||
|
|
||||||
|
@_handle_number_variant
|
||||||
|
def pronounce_number_fa(number, places=2, scientific=False,
|
||||||
|
ordinals=False, variant=None):
|
||||||
|
"""
|
||||||
|
Convert a number to it's spoken equivalent
|
||||||
|
|
||||||
|
For example, '5.2' would return 'five point two'
|
||||||
|
|
||||||
|
Args:
|
||||||
|
num(float or int): the number to pronounce (under 100)
|
||||||
|
places(int): maximum decimal places to speak
|
||||||
|
scientific (bool): pronounce in scientific notation
|
||||||
|
ordinals (bool): pronounce in ordinal form "first" instead of "one"
|
||||||
|
Returns:
|
||||||
|
(str): The pronounced number
|
||||||
|
"""
|
||||||
|
num = number
|
||||||
|
# deal with infinity
|
||||||
|
if num == float("inf"):
|
||||||
|
return "بینهایت"
|
||||||
|
elif num == float("-inf"):
|
||||||
|
return "منفی بینهایت"
|
||||||
|
if scientific:
|
||||||
|
if number == 0:
|
||||||
|
return "صفر"
|
||||||
|
number = '%E' % num
|
||||||
|
n, power = number.replace("+", "").split("E")
|
||||||
|
power = int(power)
|
||||||
|
if power != 0:
|
||||||
|
return '{}{} ضرب در ده به توان {}{}'.format(
|
||||||
|
'منفی ' if float(n) < 0 else '',
|
||||||
|
pronounce_number_fa(
|
||||||
|
abs(float(n)), places, False, ordinals=False),
|
||||||
|
'منفی ' if power < 0 else '',
|
||||||
|
pronounce_number_fa(abs(power), places, False, ordinals=False))
|
||||||
|
if ordinals:
|
||||||
|
return _to_ordinal(number)
|
||||||
|
return _to_cardinal(number, places)
|
||||||
|
|
||||||
|
@_handle_number_variant
|
||||||
|
def nice_time_fa(dt, speech=True, use_24hour=False, use_ampm=False, variant=None):
|
||||||
|
"""
|
||||||
|
Format a time to a comfortable human format
|
||||||
|
For example, generate 'five thirty' for speech or '5:30' for
|
||||||
|
text display.
|
||||||
|
Args:
|
||||||
|
dt (datetime): date to format (assumes already in local timezone)
|
||||||
|
speech (bool): format for speech (default/True) or display (False)=Fal
|
||||||
|
use_24hour (bool): output in 24-hour/military or 12-hour format
|
||||||
|
use_ampm (bool): include the am/pm for 12-hour format
|
||||||
|
Returns:
|
||||||
|
(str): The formatted time string
|
||||||
|
"""
|
||||||
|
if use_24hour:
|
||||||
|
# e.g. "03:01" or "14:22"
|
||||||
|
string = dt.strftime("%H:%M")
|
||||||
|
else:
|
||||||
|
if use_ampm:
|
||||||
|
# e.g. "3:01 AM" or "2:22 PM"
|
||||||
|
string = dt.strftime("%I:%M %p")
|
||||||
|
else:
|
||||||
|
# e.g. "3:01" or "2:22"
|
||||||
|
string = dt.strftime("%I:%M")
|
||||||
|
if string[0] == '0':
|
||||||
|
string = string[1:] # strip leading zeros
|
||||||
|
|
||||||
|
if not speech:
|
||||||
|
return string
|
||||||
|
|
||||||
|
# Generate a speakable version of the time
|
||||||
|
if use_24hour:
|
||||||
|
speak = ""
|
||||||
|
|
||||||
|
# Either "0 8 hundred" or "13 hundred"
|
||||||
|
if string[0] == '0':
|
||||||
|
speak += pronounce_number_fa(int(string[1]))
|
||||||
|
else:
|
||||||
|
speak = pronounce_number_fa(int(string[0:2]))
|
||||||
|
if not string[3:5] == '00':
|
||||||
|
speak += " و "
|
||||||
|
if string[3] == '0':
|
||||||
|
speak += pronounce_number_fa(int(string[4]))
|
||||||
|
else:
|
||||||
|
speak += pronounce_number_fa(int(string[3:5]))
|
||||||
|
speak += ' دقیقه'
|
||||||
|
return speak
|
||||||
|
else:
|
||||||
|
if dt.hour == 0 and dt.minute == 0:
|
||||||
|
return "نیمه شب"
|
||||||
|
elif dt.hour == 12 and dt.minute == 0:
|
||||||
|
return "ظهر"
|
||||||
|
|
||||||
|
hour = dt.hour % 12 or 12 # 12 hour clock and 0 is spoken as 12
|
||||||
|
if dt.minute == 15:
|
||||||
|
speak = pronounce_number_fa(hour) + " و ربع"
|
||||||
|
elif dt.minute == 30:
|
||||||
|
speak = pronounce_number_fa(hour) + " و نیم"
|
||||||
|
elif dt.minute == 45:
|
||||||
|
next_hour = (dt.hour + 1) % 12 or 12
|
||||||
|
speak = "یه ربع به " + pronounce_number_fa(next_hour)
|
||||||
|
else:
|
||||||
|
speak = pronounce_number_fa(hour)
|
||||||
|
|
||||||
|
if dt.minute == 0:
|
||||||
|
if not use_ampm:
|
||||||
|
return speak
|
||||||
|
else:
|
||||||
|
speak += " و " + pronounce_number_fa(dt.minute) + ' دقیقه'
|
||||||
|
|
||||||
|
if use_ampm:
|
||||||
|
if dt.hour > 11:
|
||||||
|
speak += " بعد از ظهر"
|
||||||
|
else:
|
||||||
|
speak += " قبل از ظهر"
|
||||||
|
|
||||||
|
return speak
|
||||||
251
lingua_franca/lang/format_fr.py
Normal file
251
lingua_franca/lang/format_fr.py
Normal file
@@ -0,0 +1,251 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
from lingua_franca.lang.format_common import convert_to_mixed_fraction
|
||||||
|
from lingua_franca.lang.common_data_fr import _NUM_STRING_FR, \
|
||||||
|
_FRACTION_STRING_FR
|
||||||
|
|
||||||
|
|
||||||
|
def nice_number_fr(number, speech=True, denominators=range(1, 21)):
|
||||||
|
""" French helper for nice_number
|
||||||
|
|
||||||
|
This function formats a float to human understandable functions. Like
|
||||||
|
4.5 becomes "4 et demi" for speech and "4 1/2" for text
|
||||||
|
|
||||||
|
Args:
|
||||||
|
number (int or float): the float to format
|
||||||
|
speech (bool): format for speech (True) or display (False)
|
||||||
|
denominators (iter of ints): denominators to use, default [1 .. 20]
|
||||||
|
Returns:
|
||||||
|
(str): The formatted string.
|
||||||
|
"""
|
||||||
|
strNumber = ""
|
||||||
|
whole = 0
|
||||||
|
num = 0
|
||||||
|
den = 0
|
||||||
|
|
||||||
|
result = convert_to_mixed_fraction(number, denominators)
|
||||||
|
|
||||||
|
if not result:
|
||||||
|
# Give up, just represent as a 3 decimal number
|
||||||
|
whole = round(number, 3)
|
||||||
|
else:
|
||||||
|
whole, num, den = result
|
||||||
|
|
||||||
|
if not speech:
|
||||||
|
if num == 0:
|
||||||
|
strNumber = '{:,}'.format(whole)
|
||||||
|
strNumber = strNumber.replace(",", " ")
|
||||||
|
strNumber = strNumber.replace(".", ",")
|
||||||
|
return strNumber
|
||||||
|
else:
|
||||||
|
return '{} {}/{}'.format(whole, num, den)
|
||||||
|
else:
|
||||||
|
if num == 0:
|
||||||
|
# if the number is not a fraction, nothing to do
|
||||||
|
strNumber = str(whole)
|
||||||
|
strNumber = strNumber.replace(".", ",")
|
||||||
|
return strNumber
|
||||||
|
den_str = _FRACTION_STRING_FR[den]
|
||||||
|
# if it is not an integer
|
||||||
|
if whole == 0:
|
||||||
|
# if there is no whole number
|
||||||
|
if num == 1:
|
||||||
|
# if numerator is 1, return "un demi", for example
|
||||||
|
strNumber = 'un {}'.format(den_str)
|
||||||
|
else:
|
||||||
|
# else return "quatre tiers", for example
|
||||||
|
strNumber = '{} {}'.format(num, den_str)
|
||||||
|
elif num == 1:
|
||||||
|
# if there is a whole number and numerator is 1
|
||||||
|
if den == 2:
|
||||||
|
# if denominator is 2, return "1 et demi", for example
|
||||||
|
strNumber = '{} et {}'.format(whole, den_str)
|
||||||
|
else:
|
||||||
|
# else return "1 et 1 tiers", for example
|
||||||
|
strNumber = '{} et 1 {}'.format(whole, den_str)
|
||||||
|
else:
|
||||||
|
# else return "2 et 3 quart", for example
|
||||||
|
strNumber = '{} et {} {}'.format(whole, num, den_str)
|
||||||
|
if num > 1 and den != 3:
|
||||||
|
# if the numerator is greater than 1 and the denominator
|
||||||
|
# is not 3 ("tiers"), add an s for plural
|
||||||
|
strNumber += 's'
|
||||||
|
|
||||||
|
return strNumber
|
||||||
|
|
||||||
|
|
||||||
|
def pronounce_number_fr(number, places=2):
|
||||||
|
"""
|
||||||
|
Convert a number to it's spoken equivalent
|
||||||
|
|
||||||
|
For example, '5.2' would return 'cinq virgule deux'
|
||||||
|
|
||||||
|
Args:
|
||||||
|
num(float or int): the number to pronounce (under 100)
|
||||||
|
places(int): maximum decimal places to speak
|
||||||
|
Returns:
|
||||||
|
(str): The pronounced number
|
||||||
|
"""
|
||||||
|
if abs(number) >= 100:
|
||||||
|
# TODO: Support for numbers over 100
|
||||||
|
return str(number)
|
||||||
|
|
||||||
|
result = ""
|
||||||
|
if number < 0:
|
||||||
|
result = "moins "
|
||||||
|
number = abs(number)
|
||||||
|
|
||||||
|
if number > 16:
|
||||||
|
tens = int(number-int(number) % 10)
|
||||||
|
ones = int(number-tens)
|
||||||
|
if ones != 0:
|
||||||
|
if tens > 10 and tens <= 60 and int(number-tens) == 1:
|
||||||
|
result += _NUM_STRING_FR[tens] + "-et-" + _NUM_STRING_FR[ones]
|
||||||
|
elif number == 71:
|
||||||
|
result += "soixante-et-onze"
|
||||||
|
elif tens == 70:
|
||||||
|
result += _NUM_STRING_FR[60] + "-"
|
||||||
|
if ones < 7:
|
||||||
|
result += _NUM_STRING_FR[10 + ones]
|
||||||
|
else:
|
||||||
|
result += _NUM_STRING_FR[10] + "-" + _NUM_STRING_FR[ones]
|
||||||
|
elif tens == 90:
|
||||||
|
result += _NUM_STRING_FR[80] + "-"
|
||||||
|
if ones < 7:
|
||||||
|
result += _NUM_STRING_FR[10 + ones]
|
||||||
|
else:
|
||||||
|
result += _NUM_STRING_FR[10] + "-" + _NUM_STRING_FR[ones]
|
||||||
|
else:
|
||||||
|
result += _NUM_STRING_FR[tens] + "-" + _NUM_STRING_FR[ones]
|
||||||
|
else:
|
||||||
|
if number == 80:
|
||||||
|
result += "quatre-vingts"
|
||||||
|
else:
|
||||||
|
result += _NUM_STRING_FR[tens]
|
||||||
|
else:
|
||||||
|
result += _NUM_STRING_FR[int(number)]
|
||||||
|
|
||||||
|
# Deal with decimal part
|
||||||
|
if not number == int(number) and places > 0:
|
||||||
|
if abs(number) < 1.0 and (result == "moins " or not result):
|
||||||
|
result += "zéro"
|
||||||
|
result += " virgule"
|
||||||
|
_num_str = str(number)
|
||||||
|
_num_str = _num_str.split(".")[1][0:places]
|
||||||
|
for char in _num_str:
|
||||||
|
result += " " + _NUM_STRING_FR[int(char)]
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def nice_time_fr(dt, speech=True, use_24hour=False, use_ampm=False):
|
||||||
|
"""
|
||||||
|
Format a time to a comfortable human format
|
||||||
|
|
||||||
|
For example, generate 'cinq heures trente' for speech or '5:30' for
|
||||||
|
text display.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
dt (datetime): date to format (assumes already in local timezone)
|
||||||
|
speech (bool): format for speech (default/True) or display (False)=Fal
|
||||||
|
use_24hour (bool): output in 24-hour/military or 12-hour format
|
||||||
|
use_ampm (bool): include the am/pm for 12-hour format
|
||||||
|
Returns:
|
||||||
|
(str): The formatted time string
|
||||||
|
"""
|
||||||
|
if use_24hour:
|
||||||
|
# e.g. "03:01" or "14:22"
|
||||||
|
string = dt.strftime("%H:%M")
|
||||||
|
else:
|
||||||
|
if use_ampm:
|
||||||
|
# e.g. "3:01 AM" or "2:22 PM"
|
||||||
|
string = dt.strftime("%I:%M %p")
|
||||||
|
else:
|
||||||
|
# e.g. "3:01" or "2:22"
|
||||||
|
string = dt.strftime("%I:%M")
|
||||||
|
if string[0] == '0':
|
||||||
|
string = string[1:] # strip leading zeros
|
||||||
|
|
||||||
|
if not speech:
|
||||||
|
return string
|
||||||
|
|
||||||
|
# Generate a speakable version of the time
|
||||||
|
speak = ""
|
||||||
|
if use_24hour:
|
||||||
|
|
||||||
|
# "13 heures trente"
|
||||||
|
if dt.hour == 0:
|
||||||
|
speak += "minuit"
|
||||||
|
elif dt.hour == 12:
|
||||||
|
speak += "midi"
|
||||||
|
elif dt.hour == 1:
|
||||||
|
speak += "une heure"
|
||||||
|
else:
|
||||||
|
speak += pronounce_number_fr(dt.hour) + " heures"
|
||||||
|
|
||||||
|
if dt.minute != 0:
|
||||||
|
speak += " " + pronounce_number_fr(dt.minute)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Prepare for "trois heures moins le quart"
|
||||||
|
if dt.minute == 35:
|
||||||
|
minute = -25
|
||||||
|
hour = dt.hour + 1
|
||||||
|
elif dt.minute == 40:
|
||||||
|
minute = -20
|
||||||
|
hour = dt.hour + 1
|
||||||
|
elif dt.minute == 45:
|
||||||
|
minute = -15
|
||||||
|
hour = dt.hour + 1
|
||||||
|
elif dt.minute == 50:
|
||||||
|
minute = -10
|
||||||
|
hour = dt.hour + 1
|
||||||
|
elif dt.minute == 55:
|
||||||
|
minute = -5
|
||||||
|
hour = dt.hour + 1
|
||||||
|
else:
|
||||||
|
minute = dt.minute
|
||||||
|
hour = dt.hour
|
||||||
|
|
||||||
|
if hour == 0:
|
||||||
|
speak += "minuit"
|
||||||
|
elif hour == 12:
|
||||||
|
speak += "midi"
|
||||||
|
elif hour == 1 or hour == 13:
|
||||||
|
speak += "une heure"
|
||||||
|
elif hour < 13:
|
||||||
|
speak = pronounce_number_fr(hour) + " heures"
|
||||||
|
else:
|
||||||
|
speak = pronounce_number_fr(hour-12) + " heures"
|
||||||
|
|
||||||
|
if minute != 0:
|
||||||
|
if minute == 15:
|
||||||
|
speak += " et quart"
|
||||||
|
elif minute == 30:
|
||||||
|
speak += " et demi"
|
||||||
|
elif minute == -15:
|
||||||
|
speak += " moins le quart"
|
||||||
|
else:
|
||||||
|
speak += " " + pronounce_number_fr(minute)
|
||||||
|
|
||||||
|
if use_ampm:
|
||||||
|
if hour > 17:
|
||||||
|
speak += " du soir"
|
||||||
|
elif hour > 12:
|
||||||
|
speak += " de l'après-midi"
|
||||||
|
elif hour > 0 and hour < 12:
|
||||||
|
speak += " du matin"
|
||||||
|
|
||||||
|
return speak
|
||||||
307
lingua_franca/lang/format_hu.py
Normal file
307
lingua_franca/lang/format_hu.py
Normal file
@@ -0,0 +1,307 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
from lingua_franca.lang.format_common import convert_to_mixed_fraction
|
||||||
|
from lingua_franca.lang.common_data_hu import _NUM_POWERS_OF_TEN, \
|
||||||
|
_EXTRA_SPACE_HU, _FRACTION_STRING_HU, _MONTHS_HU, _NUM_STRING_HU
|
||||||
|
from math import floor
|
||||||
|
|
||||||
|
|
||||||
|
def _get_vocal_type_hu(word):
|
||||||
|
# checks the vocal attributes of a word
|
||||||
|
vowels_high = len([char for char in word if char in 'eéiíöőüű'])
|
||||||
|
vowels_low = len([char for char in word if char in 'aáoóuú'])
|
||||||
|
if vowels_high != 0 and vowels_low != 0:
|
||||||
|
return 2 # 2: type is mixed
|
||||||
|
return 0 if vowels_high == 0 else 1 # 0: type is low, 1: is high
|
||||||
|
|
||||||
|
|
||||||
|
def nice_number_hu(number, speech=True, denominators=range(1, 21)):
|
||||||
|
""" Hungarian helper for nice_number
|
||||||
|
|
||||||
|
This function formats a float to human understandable functions. Like
|
||||||
|
4.5 becomes "4 és fél" for speech and "4 1/2" for text
|
||||||
|
|
||||||
|
Args:
|
||||||
|
number (int or float): the float to format
|
||||||
|
speech (bool): format for speech (True) or display (False)
|
||||||
|
denominators (iter of ints): denominators to use, default [1 .. 20]
|
||||||
|
Returns:
|
||||||
|
(str): The formatted string.
|
||||||
|
"""
|
||||||
|
|
||||||
|
result = convert_to_mixed_fraction(number, denominators)
|
||||||
|
if not result:
|
||||||
|
# Give up, just represent as a 3 decimal number
|
||||||
|
return str(round(number, 3)).replace(".", ",")
|
||||||
|
|
||||||
|
whole, num, den = result
|
||||||
|
|
||||||
|
if not speech:
|
||||||
|
if num == 0:
|
||||||
|
# TODO: Number grouping? E.g. "1,000,000"
|
||||||
|
return str(whole)
|
||||||
|
else:
|
||||||
|
return '{} {}/{}'.format(whole, num, den)
|
||||||
|
|
||||||
|
if num == 0:
|
||||||
|
return str(whole)
|
||||||
|
den_str = _FRACTION_STRING_HU[den]
|
||||||
|
if whole == 0:
|
||||||
|
if num == 1:
|
||||||
|
one = 'egy ' if den != 2 else ''
|
||||||
|
return_string = '{}{}'.format(one, den_str)
|
||||||
|
else:
|
||||||
|
return_string = '{} {}'.format(num, den_str)
|
||||||
|
elif num == 1:
|
||||||
|
pointOne = 'egész egy' if den != 2 else 'és'
|
||||||
|
return_string = '{} {} {}'.format(whole, pointOne, den_str)
|
||||||
|
else:
|
||||||
|
return_string = '{} egész {} {}'.format(whole, num, den_str)
|
||||||
|
return return_string
|
||||||
|
|
||||||
|
|
||||||
|
def pronounce_number_hu(number, places=2, short_scale=True, scientific=False,
|
||||||
|
ordinals=False):
|
||||||
|
"""
|
||||||
|
Convert a number to it's spoken equivalent
|
||||||
|
|
||||||
|
For example, '5.2' would return 'five point two'
|
||||||
|
|
||||||
|
Args:
|
||||||
|
number(float or int): the number to pronounce (under 100)
|
||||||
|
places(int): maximum decimal places to speak
|
||||||
|
short_scale (bool) : use short (True) or long scale (False)
|
||||||
|
https://en.wikipedia.org/wiki/Names_of_large_numbers
|
||||||
|
scientific (bool): pronounce in scientific notation
|
||||||
|
ordinals (bool): pronounce in ordinal form "first" instead of "one"
|
||||||
|
Returns:
|
||||||
|
(str): The pronounced number
|
||||||
|
"""
|
||||||
|
# TODO short_scale, scientific and ordinals
|
||||||
|
# currently ignored
|
||||||
|
|
||||||
|
def pronounce_triplet_hu(num):
|
||||||
|
result = ""
|
||||||
|
num = floor(num)
|
||||||
|
if num > 99:
|
||||||
|
hundreds = floor(num / 100)
|
||||||
|
if hundreds > 0:
|
||||||
|
hundredConst = _EXTRA_SPACE_HU + 'száz' + _EXTRA_SPACE_HU
|
||||||
|
if hundreds == 1:
|
||||||
|
result += hundredConst
|
||||||
|
elif hundreds == 2:
|
||||||
|
result += 'két' + hundredConst
|
||||||
|
else:
|
||||||
|
result += _NUM_STRING_HU[hundreds] + hundredConst
|
||||||
|
num -= hundreds * 100
|
||||||
|
if num == 0:
|
||||||
|
result += '' # do nothing
|
||||||
|
elif num <= 20:
|
||||||
|
result += _NUM_STRING_HU[num] # + _EXTRA_SPACE_DA
|
||||||
|
elif num > 20:
|
||||||
|
ones = num % 10
|
||||||
|
tens = num - ones
|
||||||
|
if tens > 0:
|
||||||
|
if tens != 20:
|
||||||
|
result += _NUM_STRING_HU[tens] + _EXTRA_SPACE_HU
|
||||||
|
else:
|
||||||
|
result += "huszon" + _EXTRA_SPACE_HU
|
||||||
|
if ones > 0:
|
||||||
|
result += _NUM_STRING_HU[ones] + _EXTRA_SPACE_HU
|
||||||
|
return result
|
||||||
|
|
||||||
|
def pronounce_whole_number_hu(num, scale_level=0):
|
||||||
|
if num == 0:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
num = floor(num)
|
||||||
|
result = ''
|
||||||
|
last_triplet = num % 1000
|
||||||
|
|
||||||
|
if last_triplet == 1:
|
||||||
|
if scale_level == 0:
|
||||||
|
if result != '':
|
||||||
|
result += '' + "egy"
|
||||||
|
else:
|
||||||
|
result += "egy"
|
||||||
|
elif scale_level == 1:
|
||||||
|
result += _EXTRA_SPACE_HU + \
|
||||||
|
_NUM_POWERS_OF_TEN[1] + _EXTRA_SPACE_HU
|
||||||
|
else:
|
||||||
|
result += "egy" + _NUM_POWERS_OF_TEN[scale_level]
|
||||||
|
elif last_triplet > 1:
|
||||||
|
result += pronounce_triplet_hu(last_triplet)
|
||||||
|
if scale_level != 0:
|
||||||
|
result = result.replace(_NUM_STRING_HU[2], 'két')
|
||||||
|
if scale_level == 1:
|
||||||
|
result += _NUM_POWERS_OF_TEN[1] + _EXTRA_SPACE_HU
|
||||||
|
if scale_level >= 2:
|
||||||
|
result += _NUM_POWERS_OF_TEN[scale_level]
|
||||||
|
if scale_level > 0:
|
||||||
|
result += '-'
|
||||||
|
|
||||||
|
num = floor(num / 1000)
|
||||||
|
scale_level += 1
|
||||||
|
return pronounce_whole_number_hu(num,
|
||||||
|
scale_level) + result
|
||||||
|
|
||||||
|
result = ""
|
||||||
|
if abs(number) >= 1000000000000000000000000: # cannot do more than this
|
||||||
|
return str(number)
|
||||||
|
elif number == 0:
|
||||||
|
return str(_NUM_STRING_HU[0])
|
||||||
|
elif number < 0:
|
||||||
|
return "mínusz " + pronounce_number_hu(abs(number), places)
|
||||||
|
else:
|
||||||
|
if number == int(number):
|
||||||
|
return pronounce_whole_number_hu(number).strip('-')
|
||||||
|
else:
|
||||||
|
whole_number_part = floor(number)
|
||||||
|
fractional_part = number - whole_number_part
|
||||||
|
if whole_number_part == 0:
|
||||||
|
result += _NUM_STRING_HU[0]
|
||||||
|
result += pronounce_whole_number_hu(whole_number_part)
|
||||||
|
if places > 0:
|
||||||
|
result += " egész "
|
||||||
|
fraction = pronounce_whole_number_hu(
|
||||||
|
round(fractional_part * 10 ** places))
|
||||||
|
result += fraction.replace(_NUM_STRING_HU[2], 'két')
|
||||||
|
fraction_suffixes = [
|
||||||
|
'tized', 'század', 'ezred', 'tízezred', 'százezred']
|
||||||
|
if places <= len(fraction_suffixes):
|
||||||
|
result += ' ' + fraction_suffixes[places - 1]
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def pronounce_ordinal_hu(number):
|
||||||
|
"""
|
||||||
|
This function pronounces a number as an ordinal
|
||||||
|
|
||||||
|
1 -> first
|
||||||
|
2 -> second
|
||||||
|
|
||||||
|
Args:
|
||||||
|
number (int): the number to format
|
||||||
|
Returns:
|
||||||
|
(str): The pronounced number string.
|
||||||
|
"""
|
||||||
|
ordinals = ["nulladik", "első", "második", "harmadik", "negyedik",
|
||||||
|
"ötödik", "hatodik", "hetedik", "nyolcadik", "kilencedik",
|
||||||
|
"tizedik"]
|
||||||
|
big_ordinals = ["", "ezredik", "milliomodik"]
|
||||||
|
|
||||||
|
# only for whole positive numbers including zero
|
||||||
|
if number < 0 or number != int(number):
|
||||||
|
return number
|
||||||
|
elif number < 11:
|
||||||
|
return ordinals[number]
|
||||||
|
else:
|
||||||
|
# concatenate parts and inflect them accordingly
|
||||||
|
root = pronounce_number_hu(number)
|
||||||
|
vtype = _get_vocal_type_hu(root)
|
||||||
|
last_digit = number - floor(number / 10) * 10
|
||||||
|
if root == "húsz":
|
||||||
|
root = "husz"
|
||||||
|
if number % 1000000 == 0:
|
||||||
|
return root.replace(_NUM_POWERS_OF_TEN[2], big_ordinals[2])
|
||||||
|
if number % 1000 == 0:
|
||||||
|
return root.replace(_NUM_POWERS_OF_TEN[1], big_ordinals[1])
|
||||||
|
if last_digit == 1:
|
||||||
|
return root + "edik"
|
||||||
|
elif root[-1] == 'ő':
|
||||||
|
return root[:-1] + 'edik'
|
||||||
|
elif last_digit != 0:
|
||||||
|
return ordinals[last_digit].join(
|
||||||
|
root.rsplit(_NUM_STRING_HU[last_digit], 1))
|
||||||
|
return root + "edik" if vtype == 1 else root + "adik"
|
||||||
|
|
||||||
|
|
||||||
|
def nice_time_hu(dt, speech=True, use_24hour=False, use_ampm=False):
|
||||||
|
"""
|
||||||
|
Format a time to a comfortable human format
|
||||||
|
|
||||||
|
For example, generate 'five thirty' for speech or '5:30' for
|
||||||
|
text display.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
dt (datetime): date to format (assumes already in local timezone)
|
||||||
|
speech (bool): format for speech (default/True) or display (False)=Fal
|
||||||
|
use_24hour (bool): output in 24-hour/military or 12-hour format
|
||||||
|
use_ampm (bool): include the am/pm for 12-hour format
|
||||||
|
Returns:
|
||||||
|
(str): The formatted time string
|
||||||
|
"""
|
||||||
|
if use_24hour:
|
||||||
|
# e.g. "03:01" or "14:22"
|
||||||
|
string = dt.strftime("%H:%M")
|
||||||
|
else:
|
||||||
|
if use_ampm:
|
||||||
|
# e.g. "3:01 AM" or "2:22 PM"
|
||||||
|
string = dt.strftime("%I:%M %p")
|
||||||
|
else:
|
||||||
|
# e.g. "3:01" or "2:22"
|
||||||
|
string = dt.strftime("%I:%M")
|
||||||
|
if string[0] == '0':
|
||||||
|
string = string[1:] # strip leading zeros
|
||||||
|
|
||||||
|
if not speech:
|
||||||
|
return string
|
||||||
|
|
||||||
|
# Generate a speakable version of the time
|
||||||
|
speak = ""
|
||||||
|
if use_24hour:
|
||||||
|
speak += pronounce_number_hu(dt.hour)
|
||||||
|
speak = speak.replace(_NUM_STRING_HU[2], 'két')
|
||||||
|
speak += " óra"
|
||||||
|
if not dt.minute == 0: # zero minutes are not pronounced
|
||||||
|
speak += " " + pronounce_number_hu(dt.minute)
|
||||||
|
|
||||||
|
return speak # ampm is ignored when use_24hour is true
|
||||||
|
else:
|
||||||
|
if dt.hour == 0 and dt.minute == 0:
|
||||||
|
return "éjfél"
|
||||||
|
if dt.hour == 12 and dt.minute == 0:
|
||||||
|
return "dél"
|
||||||
|
# TODO: "half past 3", "a quarter of 4" and other idiomatic times
|
||||||
|
|
||||||
|
if dt.hour == 0:
|
||||||
|
speak += pronounce_number_hu(12)
|
||||||
|
elif dt.hour < 13:
|
||||||
|
speak = pronounce_number_hu(dt.hour)
|
||||||
|
else:
|
||||||
|
speak = pronounce_number_hu(dt.hour - 12)
|
||||||
|
|
||||||
|
speak = speak.replace(_NUM_STRING_HU[2], 'két')
|
||||||
|
speak += " óra"
|
||||||
|
|
||||||
|
if not dt.minute == 0:
|
||||||
|
speak += " " + pronounce_number_hu(dt.minute)
|
||||||
|
|
||||||
|
if use_ampm:
|
||||||
|
if dt.hour > 11:
|
||||||
|
if dt.hour < 18:
|
||||||
|
speak = "délután " + speak # 12:01 - 17:59
|
||||||
|
elif dt.hour < 22:
|
||||||
|
speak = "este " + speak # 18:00 - 21:59 este/evening
|
||||||
|
else:
|
||||||
|
speak = "éjjel " + speak # 22:00 - 23:59 éjjel/at night
|
||||||
|
elif dt.hour < 3:
|
||||||
|
speak = "éjjel " + speak # 00:01 - 02:59 éjjel/at night
|
||||||
|
else:
|
||||||
|
speak = "reggel " + speak # 03:00 - 11:59 reggel/in t. morning
|
||||||
|
|
||||||
|
return speak
|
||||||
342
lingua_franca/lang/format_it.py
Normal file
342
lingua_franca/lang/format_it.py
Normal file
@@ -0,0 +1,342 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
from lingua_franca.lang.format_common import convert_to_mixed_fraction
|
||||||
|
from lingua_franca.lang.common_data_it import _NUM_STRING_IT, \
|
||||||
|
_FRACTION_STRING_IT, _LONG_SCALE_IT, _SHORT_SCALE_IT
|
||||||
|
|
||||||
|
|
||||||
|
def nice_number_it(number, speech=True, denominators=range(1, 21)):
|
||||||
|
""" Italian helper for nice_number
|
||||||
|
|
||||||
|
This function formats a float to human understandable functions. Like
|
||||||
|
4.5 becomes "4 e un mezz" for speech and "4 1/2" for text
|
||||||
|
|
||||||
|
Args:
|
||||||
|
number (int or float): the float to format
|
||||||
|
speech (bool): format for speech (True) or display (False)
|
||||||
|
denominators (iter of ints): denominators to use, default [1 .. 20]
|
||||||
|
Returns:
|
||||||
|
(str): The formatted string.
|
||||||
|
"""
|
||||||
|
|
||||||
|
result = convert_to_mixed_fraction(number, denominators)
|
||||||
|
if not result:
|
||||||
|
# Give up, just represent as a 3 decimal number
|
||||||
|
return str(round(number, 3))
|
||||||
|
|
||||||
|
whole, num, den = result
|
||||||
|
|
||||||
|
if not speech:
|
||||||
|
if num == 0:
|
||||||
|
return str(whole)
|
||||||
|
else:
|
||||||
|
return '{} {}/{}'.format(whole, num, den)
|
||||||
|
|
||||||
|
if num == 0:
|
||||||
|
return str(whole)
|
||||||
|
# denominatore
|
||||||
|
den_str = _FRACTION_STRING_IT[den]
|
||||||
|
# frazione
|
||||||
|
if whole == 0:
|
||||||
|
if num == 1:
|
||||||
|
# un decimo
|
||||||
|
return_string = 'un {}'.format(den_str)
|
||||||
|
else:
|
||||||
|
# tre mezzi
|
||||||
|
return_string = '{} {}'.format(num, den_str)
|
||||||
|
# interi >10
|
||||||
|
elif num == 1:
|
||||||
|
# trenta e un
|
||||||
|
return_string = '{} e un {}'.format(whole, den_str)
|
||||||
|
# interi >10 con frazioni
|
||||||
|
else:
|
||||||
|
# venti e 3 decimi
|
||||||
|
return_string = '{} e {} {}'.format(whole, num, den_str)
|
||||||
|
|
||||||
|
# gestisce il plurale del denominatore
|
||||||
|
if num > 1:
|
||||||
|
return_string += 'i'
|
||||||
|
else:
|
||||||
|
return_string += 'o'
|
||||||
|
|
||||||
|
return return_string
|
||||||
|
|
||||||
|
|
||||||
|
def pronounce_number_it(number, places=2, short_scale=False, scientific=False):
|
||||||
|
"""
|
||||||
|
Convert a number to it's spoken equivalent
|
||||||
|
adapted to italian fron en version
|
||||||
|
|
||||||
|
For example, '5.2' would return 'cinque virgola due'
|
||||||
|
|
||||||
|
Args:
|
||||||
|
num(float or int): the number to pronounce (under 100)
|
||||||
|
places(int): maximum decimal places to speak
|
||||||
|
short_scale (bool) : use short (True) or long scale (False)
|
||||||
|
https://en.wikipedia.org/wiki/Names_of_large_numbers
|
||||||
|
scientific (bool): pronounce in scientific notation
|
||||||
|
Returns:
|
||||||
|
(str): The pronounced number
|
||||||
|
"""
|
||||||
|
num = number
|
||||||
|
# gestione infinito
|
||||||
|
if num == float("inf"):
|
||||||
|
return "infinito"
|
||||||
|
elif num == float("-inf"):
|
||||||
|
return "meno infinito"
|
||||||
|
|
||||||
|
if scientific:
|
||||||
|
number = '%E' % num
|
||||||
|
n, power = number.replace("+", "").split("E")
|
||||||
|
power = int(power)
|
||||||
|
if power != 0:
|
||||||
|
return '{}{} per dieci elevato alla {}{}'.format(
|
||||||
|
'meno ' if float(n) < 0 else '',
|
||||||
|
pronounce_number_it(abs(float(n)), places, short_scale, False),
|
||||||
|
'meno ' if power < 0 else '',
|
||||||
|
pronounce_number_it(abs(power), places, short_scale, False))
|
||||||
|
|
||||||
|
if short_scale:
|
||||||
|
number_names = _NUM_STRING_IT.copy()
|
||||||
|
number_names.update(_SHORT_SCALE_IT)
|
||||||
|
else:
|
||||||
|
number_names = _NUM_STRING_IT.copy()
|
||||||
|
number_names.update(_LONG_SCALE_IT)
|
||||||
|
|
||||||
|
digits = [number_names[n] for n in range(0, 20)]
|
||||||
|
|
||||||
|
tens = [number_names[n] for n in range(10, 100, 10)]
|
||||||
|
|
||||||
|
if short_scale:
|
||||||
|
hundreds = [_SHORT_SCALE_IT[n] for n in _SHORT_SCALE_IT.keys()]
|
||||||
|
else:
|
||||||
|
hundreds = [_LONG_SCALE_IT[n] for n in _LONG_SCALE_IT.keys()]
|
||||||
|
|
||||||
|
# deal with negatives
|
||||||
|
result = ""
|
||||||
|
if num < 0:
|
||||||
|
result = "meno "
|
||||||
|
num = abs(num)
|
||||||
|
|
||||||
|
# check for a direct match
|
||||||
|
if num in number_names:
|
||||||
|
if num > 90:
|
||||||
|
result += "" # inizio stringa
|
||||||
|
result += number_names[num]
|
||||||
|
else:
|
||||||
|
def _sub_thousand(n):
|
||||||
|
assert 0 <= n <= 999
|
||||||
|
if n <= 19:
|
||||||
|
return digits[n]
|
||||||
|
elif n <= 99:
|
||||||
|
q, r = divmod(n, 10)
|
||||||
|
_deci = tens[q-1]
|
||||||
|
_unit = r
|
||||||
|
_partial = _deci
|
||||||
|
if _unit > 0:
|
||||||
|
if _unit == 1 or _unit == 8:
|
||||||
|
_partial = _partial[:-1] # ventuno ventotto
|
||||||
|
_partial += number_names[_unit]
|
||||||
|
return _partial
|
||||||
|
else:
|
||||||
|
q, r = divmod(n, 100)
|
||||||
|
if q == 1:
|
||||||
|
_partial = "cento"
|
||||||
|
else:
|
||||||
|
_partial = digits[q] + "cento"
|
||||||
|
_partial += (
|
||||||
|
" " + _sub_thousand(r) if r else "") # separa centinaia
|
||||||
|
return _partial
|
||||||
|
|
||||||
|
def _short_scale(n):
|
||||||
|
if n >= max(_SHORT_SCALE_IT.keys()):
|
||||||
|
return "numero davvero enorme"
|
||||||
|
n = int(n)
|
||||||
|
assert 0 <= n
|
||||||
|
res = []
|
||||||
|
for i, z in enumerate(_split_by(n, 1000)):
|
||||||
|
if not z:
|
||||||
|
continue
|
||||||
|
number = _sub_thousand(z)
|
||||||
|
if i:
|
||||||
|
number += "" # separa ordini grandezza
|
||||||
|
number += hundreds[i]
|
||||||
|
res.append(number)
|
||||||
|
|
||||||
|
return ", ".join(reversed(res))
|
||||||
|
|
||||||
|
def _split_by(n, split=1000):
|
||||||
|
assert 0 <= n
|
||||||
|
res = []
|
||||||
|
while n:
|
||||||
|
n, r = divmod(n, split)
|
||||||
|
res.append(r)
|
||||||
|
return res
|
||||||
|
|
||||||
|
def _long_scale(n):
|
||||||
|
if n >= max(_LONG_SCALE_IT.keys()):
|
||||||
|
return "numero davvero enorme"
|
||||||
|
n = int(n)
|
||||||
|
assert 0 <= n
|
||||||
|
res = []
|
||||||
|
for i, z in enumerate(_split_by(n, 1000000)):
|
||||||
|
if not z:
|
||||||
|
continue
|
||||||
|
number = pronounce_number_it(z, places, True, scientific)
|
||||||
|
# strip off the comma after the thousand
|
||||||
|
if i:
|
||||||
|
# plus one as we skip 'thousand'
|
||||||
|
# (and 'hundred', but this is excluded by index value)
|
||||||
|
number = number.replace(',', '')
|
||||||
|
number += " " + hundreds[i+1]
|
||||||
|
res.append(number)
|
||||||
|
return ", ".join(reversed(res))
|
||||||
|
|
||||||
|
if short_scale:
|
||||||
|
result += _short_scale(num)
|
||||||
|
else:
|
||||||
|
result += _long_scale(num)
|
||||||
|
|
||||||
|
# normalizza unità misura singole e 'ragionevoli' ed ad inizio stringa
|
||||||
|
if result == 'mila':
|
||||||
|
result = 'mille'
|
||||||
|
if result == 'milioni':
|
||||||
|
result = 'un milione'
|
||||||
|
if result == 'miliardi':
|
||||||
|
result = 'un miliardo'
|
||||||
|
if result[0:7] == 'unomila':
|
||||||
|
result = result.replace('unomila', 'mille', 1)
|
||||||
|
if result[0:10] == 'unomilioni':
|
||||||
|
result = result.replace('unomilioni', 'un milione', 1)
|
||||||
|
# if result[0:11] == 'unomiliardi':
|
||||||
|
# result = result.replace('unomiliardi', 'un miliardo', 1)
|
||||||
|
|
||||||
|
# Deal with fractional part
|
||||||
|
if not num == int(num) and places > 0:
|
||||||
|
if abs(num) < 1.0 and (result == "meno " or not result):
|
||||||
|
result += "zero"
|
||||||
|
result += " virgola"
|
||||||
|
_num_str = str(num)
|
||||||
|
_num_str = _num_str.split(".")[1][0:places]
|
||||||
|
for char in _num_str:
|
||||||
|
result += " " + number_names[int(char)]
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def nice_time_it(dt, speech=True, use_24hour=False, use_ampm=False):
|
||||||
|
"""
|
||||||
|
Format a time to a comfortable human format
|
||||||
|
adapted to italian fron en version
|
||||||
|
|
||||||
|
For example, generate 'cinque e trenta' for speech or '5:30' for
|
||||||
|
text display.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
dt (datetime): date to format (assumes already in local timezone)
|
||||||
|
speech (bool): format for speech (default/True) or display (False)=Fal
|
||||||
|
use_24hour (bool): output in 24-hour/military or 12-hour format
|
||||||
|
use_ampm (bool): include the am/pm for 12-hour format
|
||||||
|
Returns:
|
||||||
|
(str): The formatted time string
|
||||||
|
"""
|
||||||
|
if use_24hour:
|
||||||
|
# e.g. "03:01" or "14:22"
|
||||||
|
string = dt.strftime("%H:%M")
|
||||||
|
else:
|
||||||
|
if use_ampm:
|
||||||
|
# e.g. "3:01 AM" or "2:22 PM"
|
||||||
|
string = dt.strftime("%I:%M %p")
|
||||||
|
else:
|
||||||
|
# e.g. "3:01" or "2:22"
|
||||||
|
string = dt.strftime("%I:%M")
|
||||||
|
if string[0] == '0':
|
||||||
|
string = string[1:] # strip leading zeros
|
||||||
|
|
||||||
|
if not speech:
|
||||||
|
return string
|
||||||
|
|
||||||
|
# Generate a speakable version of the time
|
||||||
|
if use_24hour:
|
||||||
|
speak = ""
|
||||||
|
# Either "zero 8 zerozero" o "13 zerozero"
|
||||||
|
if string[0:2] == '00':
|
||||||
|
speak += "zerozero"
|
||||||
|
elif string[0] == '0':
|
||||||
|
speak += pronounce_number_it(int(string[0])) + " "
|
||||||
|
if int(string[1]) == 1:
|
||||||
|
speak = "una"
|
||||||
|
else:
|
||||||
|
speak += pronounce_number_it(int(string[1]))
|
||||||
|
else:
|
||||||
|
speak = pronounce_number_it(int(string[0:2]))
|
||||||
|
|
||||||
|
# in italian "13 e 25"
|
||||||
|
speak += " e "
|
||||||
|
|
||||||
|
if string[3:5] == '00':
|
||||||
|
speak += "zerozero"
|
||||||
|
else:
|
||||||
|
if string[3] == '0':
|
||||||
|
speak += pronounce_number_it(0) + " "
|
||||||
|
speak += pronounce_number_it(int(string[4]))
|
||||||
|
else:
|
||||||
|
speak += pronounce_number_it(int(string[3:5]))
|
||||||
|
return speak
|
||||||
|
else:
|
||||||
|
if dt.hour == 0 and dt.minute == 0:
|
||||||
|
return "mezzanotte"
|
||||||
|
if dt.hour == 12 and dt.minute == 0:
|
||||||
|
return "mezzogiorno"
|
||||||
|
# TODO: "10 e un quarto", "4 e tre quarti" and ot her idiomatic times
|
||||||
|
|
||||||
|
if dt.hour == 0:
|
||||||
|
speak = "mezzanotte"
|
||||||
|
elif dt.hour == 1 or dt.hour == 13:
|
||||||
|
speak = "una"
|
||||||
|
elif dt.hour > 13: # era minore
|
||||||
|
speak = pronounce_number_it(dt.hour-12)
|
||||||
|
else:
|
||||||
|
speak = pronounce_number_it(dt.hour)
|
||||||
|
|
||||||
|
speak += " e"
|
||||||
|
if dt.minute == 0:
|
||||||
|
speak = speak[:-2]
|
||||||
|
if not use_ampm:
|
||||||
|
speak += " in punto"
|
||||||
|
elif dt.minute == 15:
|
||||||
|
speak += " un quarto"
|
||||||
|
elif dt.minute == 45:
|
||||||
|
speak += " tre quarti"
|
||||||
|
else:
|
||||||
|
if dt.minute < 10:
|
||||||
|
speak += " zero"
|
||||||
|
speak += " " + pronounce_number_it(dt.minute)
|
||||||
|
|
||||||
|
if use_ampm:
|
||||||
|
|
||||||
|
if dt.hour < 4:
|
||||||
|
speak.strip()
|
||||||
|
elif dt.hour > 20:
|
||||||
|
speak += " della notte"
|
||||||
|
elif dt.hour > 17:
|
||||||
|
speak += " della sera"
|
||||||
|
elif dt.hour > 12:
|
||||||
|
speak += " del pomeriggio"
|
||||||
|
else:
|
||||||
|
speak += " della mattina"
|
||||||
|
|
||||||
|
return speak
|
||||||
337
lingua_franca/lang/format_nl.py
Normal file
337
lingua_franca/lang/format_nl.py
Normal file
@@ -0,0 +1,337 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
from .format_common import convert_to_mixed_fraction
|
||||||
|
from lingua_franca.lang.common_data_nl import _NUM_POWERS_OF_TEN, \
|
||||||
|
_NUM_STRING_NL, _FRACTION_STRING_NL, _EXTRA_SPACE_NL, _MONTHS_NL
|
||||||
|
from math import floor
|
||||||
|
|
||||||
|
|
||||||
|
def nice_number_nl(number, speech=True, denominators=range(1, 21)):
|
||||||
|
""" Dutch helper for nice_number
|
||||||
|
This function formats a float to human understandable functions. Like
|
||||||
|
4.5 becomes "4 einhalb" for speech and "4 1/2" for text
|
||||||
|
Args:
|
||||||
|
number (int or float): the float to format
|
||||||
|
speech (bool): format for speech (True) or display (False)
|
||||||
|
denominators (iter of ints): denominators to use, default [1 .. 20]
|
||||||
|
Returns:
|
||||||
|
(str): The formatted string.
|
||||||
|
"""
|
||||||
|
result = convert_to_mixed_fraction(number, denominators)
|
||||||
|
if not result:
|
||||||
|
# Give up, just represent as a 3 decimal number
|
||||||
|
return str(round(number, 3)).replace(".", ",")
|
||||||
|
whole, num, den = result
|
||||||
|
if not speech:
|
||||||
|
if num == 0:
|
||||||
|
# TODO: Number grouping? E.g. "1,000,000"
|
||||||
|
return str(whole)
|
||||||
|
else:
|
||||||
|
return '{} {}/{}'.format(whole, num, den)
|
||||||
|
if num == 0:
|
||||||
|
return str(whole)
|
||||||
|
den_str = _FRACTION_STRING_NL[den]
|
||||||
|
if whole == 0:
|
||||||
|
if num == 1:
|
||||||
|
return_string = 'één {}'.format(den_str)
|
||||||
|
else:
|
||||||
|
return_string = '{} {}'.format(num, den_str)
|
||||||
|
elif num == 1:
|
||||||
|
return_string = '{} en één {}'.format(whole, den_str)
|
||||||
|
else:
|
||||||
|
return_string = '{} en {} {}'.format(whole, num, den_str)
|
||||||
|
|
||||||
|
return return_string
|
||||||
|
|
||||||
|
|
||||||
|
def pronounce_number_nl(number, places=2, short_scale=True, scientific=False,
|
||||||
|
ordinals=False):
|
||||||
|
"""
|
||||||
|
Convert a number to it's spoken equivalent
|
||||||
|
|
||||||
|
For example, '5.2' would return 'five point two'
|
||||||
|
|
||||||
|
Args:
|
||||||
|
number(float or int): the number to pronounce (under 100)
|
||||||
|
places(int): maximum decimal places to speak
|
||||||
|
short_scale (bool) : use short (True) or long scale (False)
|
||||||
|
https://en.wikipedia.org/wiki/Names_of_large_numbers
|
||||||
|
scientific (bool): pronounce in scientific notation
|
||||||
|
ordinals (bool): pronounce in ordinal form "first" instead of "one"
|
||||||
|
Returns:
|
||||||
|
(str): The pronounced number
|
||||||
|
"""
|
||||||
|
# TODO short_scale, scientific and ordinals
|
||||||
|
# currently ignored
|
||||||
|
|
||||||
|
def pronounce_triplet_nl(num):
|
||||||
|
result = ""
|
||||||
|
num = floor(num)
|
||||||
|
if num > 99:
|
||||||
|
hundreds = floor(num / 100)
|
||||||
|
if hundreds > 0:
|
||||||
|
result += _NUM_STRING_NL[
|
||||||
|
hundreds] + _EXTRA_SPACE_NL + 'honderd' + _EXTRA_SPACE_NL
|
||||||
|
num -= hundreds * 100
|
||||||
|
if num == 0:
|
||||||
|
result += '' # do nothing
|
||||||
|
elif num <= 20:
|
||||||
|
result += _NUM_STRING_NL[num] # + _EXTRA_SPACE_DA
|
||||||
|
elif num > 20:
|
||||||
|
ones = num % 10
|
||||||
|
tens = num - ones
|
||||||
|
if ones > 0:
|
||||||
|
result += _NUM_STRING_NL[ones] + _EXTRA_SPACE_NL
|
||||||
|
if tens > 0:
|
||||||
|
result += 'en' + _EXTRA_SPACE_NL
|
||||||
|
if tens > 0:
|
||||||
|
result += _NUM_STRING_NL[tens] + _EXTRA_SPACE_NL
|
||||||
|
return result
|
||||||
|
|
||||||
|
def pronounce_fractional_nl(num,
|
||||||
|
places): # fixed number of places even with
|
||||||
|
# trailing zeros
|
||||||
|
result = ""
|
||||||
|
place = 10
|
||||||
|
while places > 0: # doesn't work with 1.0001 and places = 2: int(
|
||||||
|
# number*place) % 10 > 0 and places > 0:
|
||||||
|
result += " " + _NUM_STRING_NL[int(num * place) % 10]
|
||||||
|
if int(num * place) % 10 == 1:
|
||||||
|
result += '' # "1" is pronounced "eins" after the decimal
|
||||||
|
# point
|
||||||
|
place *= 10
|
||||||
|
places -= 1
|
||||||
|
return result
|
||||||
|
|
||||||
|
def pronounce_whole_number_nl(num, scale_level=0):
|
||||||
|
if num == 0:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
num = floor(num)
|
||||||
|
result = ''
|
||||||
|
last_triplet = num % 1000
|
||||||
|
|
||||||
|
if last_triplet == 1:
|
||||||
|
if scale_level == 0:
|
||||||
|
if result != '':
|
||||||
|
result += '' + 'één'
|
||||||
|
else:
|
||||||
|
result += "één"
|
||||||
|
elif scale_level == 1:
|
||||||
|
result += 'één' + _EXTRA_SPACE_NL + 'duizend' + _EXTRA_SPACE_NL
|
||||||
|
else:
|
||||||
|
result += "één " + _NUM_POWERS_OF_TEN[scale_level] + ' '
|
||||||
|
elif last_triplet > 1:
|
||||||
|
result += pronounce_triplet_nl(last_triplet)
|
||||||
|
if scale_level == 1:
|
||||||
|
# result += _EXTRA_SPACE_DA
|
||||||
|
result += 'duizend' + _EXTRA_SPACE_NL
|
||||||
|
if scale_level >= 2:
|
||||||
|
# if _EXTRA_SPACE_DA == '':
|
||||||
|
# result += " "
|
||||||
|
result += " " + _NUM_POWERS_OF_TEN[scale_level] + ' '
|
||||||
|
if scale_level >= 2:
|
||||||
|
if scale_level % 2 == 0:
|
||||||
|
result += "" # Miljioen
|
||||||
|
result += "" # Miljard, Miljoen
|
||||||
|
|
||||||
|
num = floor(num / 1000)
|
||||||
|
scale_level += 1
|
||||||
|
return pronounce_whole_number_nl(num,
|
||||||
|
scale_level) + result + ''
|
||||||
|
|
||||||
|
result = ""
|
||||||
|
if abs(number) >= 1000000000000000000000000: # cannot do more than this
|
||||||
|
return str(number)
|
||||||
|
elif number == 0:
|
||||||
|
return str(_NUM_STRING_NL[0])
|
||||||
|
elif number < 0:
|
||||||
|
return "min " + pronounce_number_nl(abs(number), places)
|
||||||
|
else:
|
||||||
|
if number == int(number):
|
||||||
|
return pronounce_whole_number_nl(number)
|
||||||
|
else:
|
||||||
|
whole_number_part = floor(number)
|
||||||
|
fractional_part = number - whole_number_part
|
||||||
|
result += pronounce_whole_number_nl(whole_number_part)
|
||||||
|
if places > 0:
|
||||||
|
result += " komma"
|
||||||
|
result += pronounce_fractional_nl(fractional_part, places)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def pronounce_ordinal_nl(number):
|
||||||
|
"""
|
||||||
|
This function pronounces a number as an ordinal
|
||||||
|
|
||||||
|
1 -> first
|
||||||
|
2 -> second
|
||||||
|
|
||||||
|
Args:
|
||||||
|
number (int): the number to format
|
||||||
|
Returns:
|
||||||
|
(str): The pronounced number string.
|
||||||
|
"""
|
||||||
|
ordinals = ["nulste", "eerste", "tweede", "derde", "vierde", "vijfde",
|
||||||
|
"zesde", "zevende", "achtste"]
|
||||||
|
# only for whole positive numbers including zero
|
||||||
|
if number < 0 or number != int(number):
|
||||||
|
return number
|
||||||
|
if number < 4:
|
||||||
|
return ordinals[number]
|
||||||
|
if number < 8:
|
||||||
|
return pronounce_number_nl(number) + "de"
|
||||||
|
if number < 9:
|
||||||
|
return pronounce_number_nl(number) + "ste"
|
||||||
|
if number < 20:
|
||||||
|
return pronounce_number_nl(number) + "de"
|
||||||
|
return pronounce_number_nl(number) + "ste"
|
||||||
|
|
||||||
|
|
||||||
|
def nice_time_nl(dt, speech=True, use_24hour=False, use_ampm=False):
|
||||||
|
"""
|
||||||
|
Format a time to a comfortable human format
|
||||||
|
|
||||||
|
For example, generate 'five thirty' for speech or '5:30' for
|
||||||
|
text display.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
dt (datetime): date to format (assumes already in local timezone)
|
||||||
|
speech (bool): format for speech (default/True) or display (False)=Fal
|
||||||
|
use_24hour (bool): output in 24-hour/military or 12-hour format
|
||||||
|
use_ampm (bool): include the am/pm for 12-hour format
|
||||||
|
Returns:
|
||||||
|
(str): The formatted time string
|
||||||
|
"""
|
||||||
|
if use_24hour:
|
||||||
|
# e.g. "03:01" or "14:22"
|
||||||
|
string = dt.strftime("%H:%M")
|
||||||
|
else:
|
||||||
|
if use_ampm:
|
||||||
|
# e.g. "3:01 AM" or "2:22 PM"
|
||||||
|
string = dt.strftime("%I:%M %p")
|
||||||
|
else:
|
||||||
|
# e.g. "3:01" or "2:22"
|
||||||
|
string = dt.strftime("%I:%M")
|
||||||
|
if string[0] == '0':
|
||||||
|
string = string[1:] # strip leading zeros
|
||||||
|
|
||||||
|
if not speech:
|
||||||
|
return string
|
||||||
|
|
||||||
|
# Generate a speakable version of the time
|
||||||
|
speak = ""
|
||||||
|
if use_24hour:
|
||||||
|
speak += pronounce_number_nl(dt.hour)
|
||||||
|
speak += " uur"
|
||||||
|
if not dt.minute == 0: # zero minutes are not pronounced, 13:00 is
|
||||||
|
# "13 uur" not "13 hundred hours"
|
||||||
|
speak += " " + pronounce_number_nl(dt.minute)
|
||||||
|
return speak # ampm is ignored when use_24hour is true
|
||||||
|
else:
|
||||||
|
if dt.hour == 0 and dt.minute == 0:
|
||||||
|
return "Middernacht"
|
||||||
|
hour = dt.hour % 12
|
||||||
|
if dt.minute == 0:
|
||||||
|
hour = _fix_hour_nl(hour)
|
||||||
|
speak += pronounce_number_nl(hour)
|
||||||
|
speak += " uur"
|
||||||
|
elif dt.minute == 30:
|
||||||
|
speak += "half "
|
||||||
|
hour += 1
|
||||||
|
hour = _fix_hour_nl(hour)
|
||||||
|
speak += pronounce_number_nl(hour)
|
||||||
|
elif dt.minute == 15:
|
||||||
|
speak += "kwart over "
|
||||||
|
hour = _fix_hour_nl(hour)
|
||||||
|
speak += pronounce_number_nl(hour)
|
||||||
|
elif dt.minute == 45:
|
||||||
|
speak += "kwart voor "
|
||||||
|
hour += 1
|
||||||
|
hour = _fix_hour_nl(hour)
|
||||||
|
speak += pronounce_number_nl(hour)
|
||||||
|
elif dt.minute > 30:
|
||||||
|
speak += pronounce_number_nl(60 - dt.minute)
|
||||||
|
speak += " voor "
|
||||||
|
hour += 1
|
||||||
|
hour = _fix_hour_nl(hour)
|
||||||
|
speak += pronounce_number_nl(hour)
|
||||||
|
else:
|
||||||
|
speak += pronounce_number_nl(dt.minute)
|
||||||
|
speak += " over "
|
||||||
|
hour = _fix_hour_nl(hour)
|
||||||
|
speak += pronounce_number_nl(hour)
|
||||||
|
|
||||||
|
if use_ampm:
|
||||||
|
speak += nice_part_of_day_nl(dt)
|
||||||
|
|
||||||
|
return speak
|
||||||
|
|
||||||
|
|
||||||
|
def _fix_hour_nl(hour):
|
||||||
|
hour = hour % 12
|
||||||
|
if hour == 0:
|
||||||
|
hour = 12
|
||||||
|
return hour
|
||||||
|
|
||||||
|
|
||||||
|
def nice_part_of_day_nl(dt, speech=True):
|
||||||
|
if dt.hour < 6:
|
||||||
|
return " 's nachts"
|
||||||
|
if dt.hour < 12:
|
||||||
|
return " 's ochtends"
|
||||||
|
if dt.hour < 18:
|
||||||
|
return " 's middags"
|
||||||
|
if dt.hour < 24:
|
||||||
|
return " 's avonds"
|
||||||
|
raise ValueError('dt.hour is bigger than 24')
|
||||||
|
|
||||||
|
|
||||||
|
def nice_response_nl(text):
|
||||||
|
# check for months and call _nice_ordinal_nl declension of ordinals
|
||||||
|
# replace "^" with "tot de macht" (to the power of)
|
||||||
|
words = text.split()
|
||||||
|
|
||||||
|
for idx, word in enumerate(words):
|
||||||
|
if word.lower() in _MONTHS_NL:
|
||||||
|
text = _nice_ordinal_nl(text)
|
||||||
|
|
||||||
|
if word == '^':
|
||||||
|
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
|
||||||
|
if wordNext.isnumeric():
|
||||||
|
words[idx] = "tot de macht"
|
||||||
|
text = " ".join(words)
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def _nice_ordinal_nl(text, speech=True):
|
||||||
|
# check for months for declension of ordinals before months
|
||||||
|
# depending on articles/prepositions
|
||||||
|
normalized_text = text
|
||||||
|
words = text.split()
|
||||||
|
for idx, word in enumerate(words):
|
||||||
|
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
|
||||||
|
wordPrev = words[idx - 1] if idx > 0 else ""
|
||||||
|
if word[:-1].isdecimal():
|
||||||
|
if wordNext.lower() in _MONTHS_NL:
|
||||||
|
if wordPrev == 'de':
|
||||||
|
word = pronounce_ordinal_nl(int(word))
|
||||||
|
else:
|
||||||
|
word = pronounce_number_nl(int(word))
|
||||||
|
words[idx] = word
|
||||||
|
normalized_text = " ".join(words)
|
||||||
|
return normalized_text
|
||||||
351
lingua_franca/lang/format_pl.py
Normal file
351
lingua_franca/lang/format_pl.py
Normal file
@@ -0,0 +1,351 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
from lingua_franca.lang.format_common import convert_to_mixed_fraction
|
||||||
|
from lingua_franca.lang.common_data_pl import _NUM_STRING_PL, \
|
||||||
|
_FRACTION_STRING_PL, _SHORT_SCALE_PL, _SHORT_ORDINAL_PL, _ALT_ORDINALS_PL
|
||||||
|
from lingua_franca.internal import FunctionNotLocalizedError
|
||||||
|
|
||||||
|
|
||||||
|
def nice_number_pl(number, speech=True, denominators=range(1, 21)):
|
||||||
|
""" English helper for nice_number
|
||||||
|
|
||||||
|
This function formats a float to human understandable functions. Like
|
||||||
|
4.5 becomes "4 and a half" for speech and "4 1/2" for text
|
||||||
|
|
||||||
|
Args:
|
||||||
|
number (int or float): the float to format
|
||||||
|
speech (bool): format for speech (True) or display (False)
|
||||||
|
denominators (iter of ints): denominators to use, default [1 .. 20]
|
||||||
|
Returns:
|
||||||
|
(str): The formatted string.
|
||||||
|
"""
|
||||||
|
|
||||||
|
result = convert_to_mixed_fraction(number, denominators)
|
||||||
|
if not result:
|
||||||
|
# Give up, just represent as a 3 decimal number
|
||||||
|
return str(round(number, 3))
|
||||||
|
|
||||||
|
whole, num, den = result
|
||||||
|
|
||||||
|
if not speech:
|
||||||
|
if num == 0:
|
||||||
|
# TODO: Number grouping? E.g. "1,000,000"
|
||||||
|
return str(whole)
|
||||||
|
else:
|
||||||
|
return '{} {}/{}'.format(whole, num, den)
|
||||||
|
|
||||||
|
if num == 0:
|
||||||
|
return str(whole)
|
||||||
|
den_str = _FRACTION_STRING_PL[den]
|
||||||
|
if whole == 0:
|
||||||
|
return_string = '{} {}'.format(num, den_str)
|
||||||
|
else:
|
||||||
|
return_string = '{} i {} {}'.format(whole, num, den_str)
|
||||||
|
if num > 1:
|
||||||
|
return_string = return_string[:-1] + 'e'
|
||||||
|
return return_string
|
||||||
|
|
||||||
|
|
||||||
|
def pronounce_number_pl(num, places=2, short_scale=True, scientific=False,
|
||||||
|
ordinals=False, scientific_run=False):
|
||||||
|
"""
|
||||||
|
Convert a number to it's spoken equivalent
|
||||||
|
|
||||||
|
For example, '5.2' would return 'five point two'
|
||||||
|
|
||||||
|
Args:
|
||||||
|
num(float or int): the number to pronounce (under 100)
|
||||||
|
places(int): maximum decimal places to speak
|
||||||
|
short_scale (bool) : use short (True) or long scale (False)
|
||||||
|
https://en.wikipedia.org/wiki/Names_of_large_numbers
|
||||||
|
scientific (bool): pronounce in scientific notation
|
||||||
|
ordinals (bool): pronounce in ordinal form "first" instead of "one"
|
||||||
|
Returns:
|
||||||
|
(str): The pronounced number
|
||||||
|
"""
|
||||||
|
# deal with infinity
|
||||||
|
if num == float("inf"):
|
||||||
|
return "nieskończoność"
|
||||||
|
elif num == float("-inf"):
|
||||||
|
return "minus nieskończoność"
|
||||||
|
if scientific:
|
||||||
|
number = '%E' % num
|
||||||
|
n, power = number.replace("+", "").split("E")
|
||||||
|
power = int(power)
|
||||||
|
if power != 0:
|
||||||
|
if ordinals:
|
||||||
|
# This handles negatives of powers separately from the normal
|
||||||
|
# handling since each call disables the scientific flag
|
||||||
|
return '{}{} razy dziesięć do {}{} potęgi'.format(
|
||||||
|
'minus ' if float(n) < 0 else '',
|
||||||
|
pronounce_number_pl(
|
||||||
|
abs(float(n)), places, short_scale, False, ordinals=False, scientific_run=True),
|
||||||
|
'minus ' if power < 0 else '',
|
||||||
|
pronounce_number_pl(abs(power), places, short_scale, False, ordinals=True, scientific_run=True))
|
||||||
|
else:
|
||||||
|
# This handles negatives of powers separately from the normal
|
||||||
|
# handling since each call disables the scientific flag
|
||||||
|
return '{}{} razy dziesięć do potęgi {}{}'.format(
|
||||||
|
'minus ' if float(n) < 0 else '',
|
||||||
|
pronounce_number_pl(
|
||||||
|
abs(float(n)), places, short_scale, False),
|
||||||
|
'minus ' if power < 0 else '',
|
||||||
|
pronounce_number_pl(abs(power), places, short_scale, False))
|
||||||
|
|
||||||
|
number_names = _NUM_STRING_PL.copy()
|
||||||
|
number_names.update(_SHORT_SCALE_PL)
|
||||||
|
|
||||||
|
digits = [number_names[n] for n in range(0, 20)]
|
||||||
|
if ordinals:
|
||||||
|
tens = [_SHORT_ORDINAL_PL[n] for n in range(10, 100, 10)]
|
||||||
|
else:
|
||||||
|
tens = [number_names[n] for n in range(10, 100, 10)]
|
||||||
|
hundreds = [_SHORT_SCALE_PL[n] for n in _SHORT_SCALE_PL.keys()]
|
||||||
|
|
||||||
|
# deal with negatives
|
||||||
|
result = ""
|
||||||
|
if num < 0:
|
||||||
|
result = "minus "
|
||||||
|
num = abs(num)
|
||||||
|
|
||||||
|
# check for a direct match
|
||||||
|
if num in number_names and not ordinals:
|
||||||
|
result += number_names[num]
|
||||||
|
else:
|
||||||
|
def _sub_thousand(n, ordinals=False, iteration=0):
|
||||||
|
assert 0 <= n <= 999
|
||||||
|
|
||||||
|
_, n_mod = divmod(n, 10)
|
||||||
|
if iteration > 0 and n in _ALT_ORDINALS_PL and ordinals:
|
||||||
|
return _ALT_ORDINALS_PL[n]
|
||||||
|
elif n in _SHORT_ORDINAL_PL and ordinals:
|
||||||
|
return _SHORT_ORDINAL_PL[n] if not scientific_run \
|
||||||
|
else _ALT_ORDINALS_PL[n]
|
||||||
|
if n <= 19:
|
||||||
|
return digits[n] if not scientific_run or not ordinals\
|
||||||
|
else digits[n][:-1] + "ej"
|
||||||
|
elif n <= 99:
|
||||||
|
q, r = divmod(n, 10)
|
||||||
|
tens_text = tens[q - 1]
|
||||||
|
if scientific_run:
|
||||||
|
tens_text = tens_text[:-1] + "ej"
|
||||||
|
return tens_text + (" " + _sub_thousand(r, ordinals) if r
|
||||||
|
else "")
|
||||||
|
else:
|
||||||
|
q, r = divmod(n, 100)
|
||||||
|
digit_name = digits[q]
|
||||||
|
if q*100 in _NUM_STRING_PL:
|
||||||
|
digit_name = _NUM_STRING_PL[q*100]
|
||||||
|
|
||||||
|
return digit_name + (
|
||||||
|
" " + _sub_thousand(r, ordinals) if r else "")
|
||||||
|
|
||||||
|
def _short_scale(n):
|
||||||
|
if n >= max(_SHORT_SCALE_PL.keys()):
|
||||||
|
return "nieskończoność"
|
||||||
|
ordi = ordinals
|
||||||
|
|
||||||
|
if int(n) != n:
|
||||||
|
ordi = False
|
||||||
|
n = int(n)
|
||||||
|
assert 0 <= n
|
||||||
|
res = []
|
||||||
|
for i, z in enumerate(_split_by(n, 1000)):
|
||||||
|
if not z:
|
||||||
|
continue
|
||||||
|
number = _sub_thousand(z, ordi, iteration=i)
|
||||||
|
|
||||||
|
if i:
|
||||||
|
if i >= len(hundreds):
|
||||||
|
return ""
|
||||||
|
number += " "
|
||||||
|
if ordi:
|
||||||
|
if i * 1000 in _SHORT_ORDINAL_PL:
|
||||||
|
if z == 1:
|
||||||
|
number = _SHORT_ORDINAL_PL[i * 1000]
|
||||||
|
else:
|
||||||
|
number += _SHORT_ORDINAL_PL[i * 1000]
|
||||||
|
else:
|
||||||
|
if n not in _SHORT_SCALE_PL:
|
||||||
|
num = int("1" + "0" * (len(str(n)) - 2))
|
||||||
|
|
||||||
|
number += _SHORT_SCALE_PL[num] + "owa"
|
||||||
|
else:
|
||||||
|
number = _SHORT_SCALE_PL[n] + "ty"
|
||||||
|
else:
|
||||||
|
hundreds_text = _SHORT_SCALE_PL[float(pow(1000, i))]
|
||||||
|
if z != 1:
|
||||||
|
_, z_mod = divmod(z, 10)
|
||||||
|
_, z_mod_tens = divmod(z, 100)
|
||||||
|
n_main, _ = divmod(z_mod_tens, 10)
|
||||||
|
if i == 1:
|
||||||
|
if n_main != 1 and 5 > z_mod > 0:
|
||||||
|
hundreds_text += "e"
|
||||||
|
else:
|
||||||
|
hundreds_text = "tysięcy"
|
||||||
|
elif i > 1:
|
||||||
|
hundreds_text += "y" if 5 > z_mod > 0 else "ów"
|
||||||
|
|
||||||
|
number += hundreds_text
|
||||||
|
res.append(number)
|
||||||
|
ordi = False
|
||||||
|
|
||||||
|
return ", ".join(reversed(res))
|
||||||
|
|
||||||
|
def _split_by(n, split=1000):
|
||||||
|
assert 0 <= n
|
||||||
|
res = []
|
||||||
|
while n:
|
||||||
|
n, r = divmod(n, split)
|
||||||
|
res.append(r)
|
||||||
|
return res
|
||||||
|
|
||||||
|
result += _short_scale(num)
|
||||||
|
|
||||||
|
# deal with scientific notation unpronounceable as number
|
||||||
|
if not result and "e" in str(num):
|
||||||
|
return pronounce_number_pl(num, places, short_scale, scientific=True)
|
||||||
|
# Deal with fractional part
|
||||||
|
elif not num == int(num) and places > 0:
|
||||||
|
if abs(num) < 1.0 and (result == "minus " or not result):
|
||||||
|
result += "zero"
|
||||||
|
result += " przecinek"
|
||||||
|
_num_str = str(num)
|
||||||
|
_num_str = _num_str.split(".")[1][0:places]
|
||||||
|
for char in _num_str:
|
||||||
|
result += " " + number_names[int(char)]
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def nice_time_pl(dt, speech=True, use_24hour=True, use_ampm=False):
|
||||||
|
"""
|
||||||
|
Format a time to a comfortable human format
|
||||||
|
For example, generate 'five thirty' for speech or '5:30' for
|
||||||
|
text display.
|
||||||
|
Args:
|
||||||
|
dt (datetime): date to format (assumes already in local timezone)
|
||||||
|
speech (bool): format for speech (default/True) or display (False)=Fal
|
||||||
|
use_24hour (bool): output in 24-hour/military or 12-hour format
|
||||||
|
use_ampm (bool): include the am/pm for 12-hour format
|
||||||
|
Returns:
|
||||||
|
(str): The formatted time string
|
||||||
|
"""
|
||||||
|
string = dt.strftime("%H:%M")
|
||||||
|
if not speech:
|
||||||
|
return string
|
||||||
|
|
||||||
|
# Generate a speakable version of the time
|
||||||
|
speak = ""
|
||||||
|
|
||||||
|
# Either "0 8 hundred" or "13 hundred"
|
||||||
|
if string[0:2] == '00':
|
||||||
|
speak = ""
|
||||||
|
elif string[0] == '0':
|
||||||
|
speak += pronounce_number_pl(int(string[1]), ordinals=True)
|
||||||
|
speak = speak[:-1] + 'a'
|
||||||
|
else:
|
||||||
|
speak = pronounce_number_pl(int(string[0:2]), ordinals=True)
|
||||||
|
speak = speak[:-1] + 'a'
|
||||||
|
|
||||||
|
speak += ' ' if string[0:2] != '00' else ''
|
||||||
|
if string[3:5] == '00':
|
||||||
|
speak += 'zero zero'
|
||||||
|
else:
|
||||||
|
if string[3] == '0':
|
||||||
|
speak += pronounce_number_pl(int(string[4]))
|
||||||
|
else:
|
||||||
|
speak += pronounce_number_pl(int(string[3:5]))
|
||||||
|
|
||||||
|
if string[0:2] == '00':
|
||||||
|
speak += " po północy"
|
||||||
|
return speak
|
||||||
|
|
||||||
|
|
||||||
|
def nice_duration_pl(duration, speech=True):
|
||||||
|
""" Convert duration to a nice spoken timespan
|
||||||
|
|
||||||
|
Args:
|
||||||
|
seconds: number of seconds
|
||||||
|
minutes: number of minutes
|
||||||
|
hours: number of hours
|
||||||
|
days: number of days
|
||||||
|
Returns:
|
||||||
|
str: timespan as a string
|
||||||
|
"""
|
||||||
|
|
||||||
|
# TODO this is a kludge around the fact that only Polish has a
|
||||||
|
# localized nice_duration()
|
||||||
|
if not speech:
|
||||||
|
raise FunctionNotLocalizedError
|
||||||
|
|
||||||
|
days = int(duration // 86400)
|
||||||
|
hours = int(duration // 3600 % 24)
|
||||||
|
minutes = int(duration // 60 % 60)
|
||||||
|
seconds = int(duration % 60)
|
||||||
|
|
||||||
|
out = ''
|
||||||
|
sec_main, sec_div = divmod(seconds, 10)
|
||||||
|
min_main, min_div = divmod(minutes, 10)
|
||||||
|
hour_main, hour_div = divmod(hours, 10)
|
||||||
|
|
||||||
|
if days > 0:
|
||||||
|
out += pronounce_number_pl(days) + " "
|
||||||
|
if days == 1:
|
||||||
|
out += 'dzień'
|
||||||
|
else:
|
||||||
|
out += 'dni'
|
||||||
|
if hours > 0:
|
||||||
|
if out:
|
||||||
|
out += " "
|
||||||
|
out += get_pronounce_number_for_duration(hours) + " "
|
||||||
|
if hours == 1:
|
||||||
|
out += 'godzina'
|
||||||
|
elif hour_main == 1 or hour_div > 4:
|
||||||
|
out += 'godzin'
|
||||||
|
else:
|
||||||
|
out += 'godziny'
|
||||||
|
if minutes > 0:
|
||||||
|
if out:
|
||||||
|
out += " "
|
||||||
|
out += get_pronounce_number_for_duration(minutes) + " "
|
||||||
|
if minutes == 1:
|
||||||
|
out += 'minuta'
|
||||||
|
elif min_main == 1 or min_div > 4:
|
||||||
|
out += 'minut'
|
||||||
|
else:
|
||||||
|
out += 'minuty'
|
||||||
|
if seconds > 0:
|
||||||
|
if out:
|
||||||
|
out += " "
|
||||||
|
out += get_pronounce_number_for_duration(seconds) + " "
|
||||||
|
if sec_div == 0:
|
||||||
|
out += 'sekund'
|
||||||
|
elif seconds == 1:
|
||||||
|
out += 'sekunda'
|
||||||
|
elif sec_main == 1 or sec_div > 4:
|
||||||
|
out += 'sekund'
|
||||||
|
else:
|
||||||
|
out += 'sekundy'
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def get_pronounce_number_for_duration(num):
|
||||||
|
pronounced = pronounce_number_pl(num)
|
||||||
|
|
||||||
|
return 'jedna' if pronounced == 'jeden' else pronounced
|
||||||
223
lingua_franca/lang/format_pt.py
Normal file
223
lingua_franca/lang/format_pt.py
Normal file
@@ -0,0 +1,223 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
from lingua_franca.lang.format_common import convert_to_mixed_fraction
|
||||||
|
from lingua_franca.lang.common_data_pt import _FRACTION_STRING_PT, \
|
||||||
|
_NUM_STRING_PT
|
||||||
|
|
||||||
|
|
||||||
|
def nice_number_pt(number, speech, denominators=range(1, 21)):
|
||||||
|
""" Portuguese helper for nice_number
|
||||||
|
|
||||||
|
This function formats a float to human understandable functions. Like
|
||||||
|
4.5 becomes "4 e meio" for speech and "4 1/2" for text
|
||||||
|
|
||||||
|
Args:
|
||||||
|
number (int or float): the float to format
|
||||||
|
speech (bool): format for speech (True) or display (False)
|
||||||
|
denominators (iter of ints): denominators to use, default [1 .. 20]
|
||||||
|
Returns:
|
||||||
|
(str): The formatted string.
|
||||||
|
"""
|
||||||
|
|
||||||
|
result = convert_to_mixed_fraction(number, denominators)
|
||||||
|
if not result:
|
||||||
|
# Give up, just represent as a 3 decimal number
|
||||||
|
return str(round(number, 3))
|
||||||
|
|
||||||
|
whole, num, den = result
|
||||||
|
|
||||||
|
if not speech:
|
||||||
|
if num == 0:
|
||||||
|
# TODO: Number grouping? E.g. "1,000,000"
|
||||||
|
return str(whole)
|
||||||
|
else:
|
||||||
|
return '{} {}/{}'.format(whole, num, den)
|
||||||
|
|
||||||
|
if num == 0:
|
||||||
|
return str(whole)
|
||||||
|
# denominador
|
||||||
|
den_str = _FRACTION_STRING_PT[den]
|
||||||
|
# fracções
|
||||||
|
if whole == 0:
|
||||||
|
if num == 1:
|
||||||
|
# um décimo
|
||||||
|
return_string = 'um {}'.format(den_str)
|
||||||
|
else:
|
||||||
|
# três meio
|
||||||
|
return_string = '{} {}'.format(num, den_str)
|
||||||
|
# inteiros >10
|
||||||
|
elif num == 1:
|
||||||
|
# trinta e um
|
||||||
|
return_string = '{} e {}'.format(whole, den_str)
|
||||||
|
# inteiros >10 com fracções
|
||||||
|
else:
|
||||||
|
# vinte e 3 décimo
|
||||||
|
return_string = '{} e {} {}'.format(whole, num, den_str)
|
||||||
|
# plural
|
||||||
|
if num > 1:
|
||||||
|
return_string += 's'
|
||||||
|
return return_string
|
||||||
|
|
||||||
|
|
||||||
|
def pronounce_number_pt(number, places=2):
|
||||||
|
"""
|
||||||
|
Convert a number to it's spoken equivalent
|
||||||
|
For example, '5.2' would return 'cinco virgula dois'
|
||||||
|
Args:
|
||||||
|
number(float or int): the number to pronounce (under 100)
|
||||||
|
places(int): maximum decimal places to speak
|
||||||
|
Returns:
|
||||||
|
(str): The pronounced number
|
||||||
|
"""
|
||||||
|
if abs(number) >= 100:
|
||||||
|
# TODO: Support n > 100
|
||||||
|
return str(number)
|
||||||
|
|
||||||
|
result = ""
|
||||||
|
if number < 0:
|
||||||
|
result = "menos "
|
||||||
|
number = abs(number)
|
||||||
|
|
||||||
|
if number >= 20:
|
||||||
|
tens = int(number - int(number) % 10)
|
||||||
|
ones = int(number - tens)
|
||||||
|
result += _NUM_STRING_PT[tens]
|
||||||
|
if ones > 0:
|
||||||
|
result += " e " + _NUM_STRING_PT[ones]
|
||||||
|
else:
|
||||||
|
result += _NUM_STRING_PT[int(number)]
|
||||||
|
|
||||||
|
# Deal with decimal part, in portuguese is commonly used the comma
|
||||||
|
# instead the dot. Decimal part can be written both with comma
|
||||||
|
# and dot, but when pronounced, its pronounced "virgula"
|
||||||
|
if not number == int(number) and places > 0:
|
||||||
|
if abs(number) < 1.0 and (result == "menos " or not result):
|
||||||
|
result += "zero"
|
||||||
|
result += " vírgula"
|
||||||
|
_num_str = str(number)
|
||||||
|
_num_str = _num_str.split(".")[1][0:places]
|
||||||
|
for char in _num_str:
|
||||||
|
result += " " + _NUM_STRING_PT[int(char)]
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def nice_time_pt(dt, speech=True, use_24hour=False, use_ampm=False):
|
||||||
|
"""
|
||||||
|
Format a time to a comfortable human format
|
||||||
|
For example, generate 'cinco treinta' for speech or '5:30' for
|
||||||
|
text display.
|
||||||
|
Args:
|
||||||
|
dt (datetime): date to format (assumes already in local timezone)
|
||||||
|
speech (bool): format for speech (default/True) or display (False)=Fal
|
||||||
|
use_24hour (bool): output in 24-hour/military or 12-hour format
|
||||||
|
use_ampm (bool): include the am/pm for 12-hour format
|
||||||
|
Returns:
|
||||||
|
(str): The formatted time string
|
||||||
|
"""
|
||||||
|
if use_24hour:
|
||||||
|
# e.g. "03:01" or "14:22"
|
||||||
|
string = dt.strftime("%H:%M")
|
||||||
|
else:
|
||||||
|
if use_ampm:
|
||||||
|
# e.g. "3:01 AM" or "2:22 PM"
|
||||||
|
string = dt.strftime("%I:%M %p")
|
||||||
|
else:
|
||||||
|
# e.g. "3:01" or "2:22"
|
||||||
|
string = dt.strftime("%I:%M")
|
||||||
|
if string[0] == '0':
|
||||||
|
string = string[1:] # strip leading zeros
|
||||||
|
|
||||||
|
if not speech:
|
||||||
|
return string
|
||||||
|
|
||||||
|
# Generate a speakable version of the time
|
||||||
|
speak = ""
|
||||||
|
if use_24hour:
|
||||||
|
# simply speak the number
|
||||||
|
if dt.hour == 1:
|
||||||
|
speak += "uma"
|
||||||
|
else:
|
||||||
|
speak += pronounce_number_pt(dt.hour)
|
||||||
|
|
||||||
|
# equivalent to "quarter past ten"
|
||||||
|
if dt.minute > 0:
|
||||||
|
speak += " e " + pronounce_number_pt(dt.minute)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# speak number and add daytime identifier
|
||||||
|
# (equivalent to "in the morning")
|
||||||
|
if dt.minute == 35:
|
||||||
|
minute = -25
|
||||||
|
hour = dt.hour + 1
|
||||||
|
elif dt.minute == 40:
|
||||||
|
minute = -20
|
||||||
|
hour = dt.hour + 1
|
||||||
|
elif dt.minute == 45:
|
||||||
|
minute = -15
|
||||||
|
hour = dt.hour + 1
|
||||||
|
elif dt.minute == 50:
|
||||||
|
minute = -10
|
||||||
|
hour = dt.hour + 1
|
||||||
|
elif dt.minute == 55:
|
||||||
|
minute = -5
|
||||||
|
hour = dt.hour + 1
|
||||||
|
else:
|
||||||
|
minute = dt.minute
|
||||||
|
hour = dt.hour
|
||||||
|
|
||||||
|
if hour == 0:
|
||||||
|
speak += "meia noite"
|
||||||
|
elif hour == 12:
|
||||||
|
speak += "meio dia"
|
||||||
|
# 1 and 2 are pronounced in female form when talking about hours
|
||||||
|
elif hour == 1 or hour == 13:
|
||||||
|
speak += "uma"
|
||||||
|
elif hour == 2 or hour == 14:
|
||||||
|
speak += "duas"
|
||||||
|
elif hour < 13:
|
||||||
|
speak = pronounce_number_pt(hour)
|
||||||
|
else:
|
||||||
|
speak = pronounce_number_pt(hour - 12)
|
||||||
|
|
||||||
|
if minute != 0:
|
||||||
|
if minute == 15:
|
||||||
|
speak += " e um quarto"
|
||||||
|
elif minute == 30:
|
||||||
|
speak += " e meia"
|
||||||
|
elif minute == -15:
|
||||||
|
speak += " menos um quarto"
|
||||||
|
else:
|
||||||
|
if minute > 0:
|
||||||
|
speak += " e " + pronounce_number_pt(minute)
|
||||||
|
else:
|
||||||
|
speak += " " + pronounce_number_pt(minute)
|
||||||
|
|
||||||
|
# exact time
|
||||||
|
if minute == 0 and not use_ampm:
|
||||||
|
# 3:00
|
||||||
|
speak += " em ponto"
|
||||||
|
|
||||||
|
if use_ampm:
|
||||||
|
if hour > 0 and hour < 6:
|
||||||
|
speak += " da madrugada"
|
||||||
|
elif hour >= 6 and hour < 12:
|
||||||
|
speak += " da manhã"
|
||||||
|
elif hour >= 13 and hour < 21:
|
||||||
|
speak += " da tarde"
|
||||||
|
elif hour != 0 and hour != 12:
|
||||||
|
speak += " da noite"
|
||||||
|
return speak
|
||||||
474
lingua_franca/lang/format_ru.py
Normal file
474
lingua_franca/lang/format_ru.py
Normal file
@@ -0,0 +1,474 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
from lingua_franca.lang.format_common import convert_to_mixed_fraction
|
||||||
|
from lingua_franca.lang.common_data_ru import _NUM_STRING_RU, \
|
||||||
|
_FRACTION_STRING_RU, _LONG_SCALE_RU, _SHORT_SCALE_RU, _SHORT_ORDINAL_RU, _LONG_ORDINAL_RU
|
||||||
|
from lingua_franca.internal import FunctionNotLocalizedError
|
||||||
|
|
||||||
|
|
||||||
|
def nice_number_ru(number, speech=True, denominators=range(1, 21)):
|
||||||
|
""" English helper for nice_number
|
||||||
|
|
||||||
|
This function formats a float to human understandable functions. Like
|
||||||
|
4.5 becomes "4 and a half" for speech and "4 1/2" for text
|
||||||
|
|
||||||
|
Args:
|
||||||
|
number (int or float): the float to format
|
||||||
|
speech (bool): format for speech (True) or display (False)
|
||||||
|
denominators (iter of ints): denominators to use, default [1 .. 20]
|
||||||
|
Returns:
|
||||||
|
(str): The formatted string.
|
||||||
|
"""
|
||||||
|
|
||||||
|
result = convert_to_mixed_fraction(number, denominators)
|
||||||
|
if not result:
|
||||||
|
# Give up, just represent as a 3 decimal number
|
||||||
|
return str(round(number, 3))
|
||||||
|
|
||||||
|
whole, num, den = result
|
||||||
|
|
||||||
|
if not speech:
|
||||||
|
if num == 0:
|
||||||
|
# TODO: Number grouping? E.g. "1,000,000"
|
||||||
|
return str(whole)
|
||||||
|
else:
|
||||||
|
return '{} {}/{}'.format(whole, num, den)
|
||||||
|
|
||||||
|
if num == 0:
|
||||||
|
return str(whole)
|
||||||
|
den_str = _FRACTION_STRING_RU[den]
|
||||||
|
if whole == 0:
|
||||||
|
if num == 1 and den <= 4:
|
||||||
|
return_string = '{}'.format(den_str)
|
||||||
|
else:
|
||||||
|
return_string = '{} {}'.format(num, den_str)
|
||||||
|
elif num == 1 and den == 2:
|
||||||
|
return_string = '{} с половиной'.format(whole)
|
||||||
|
else:
|
||||||
|
return_string = '{} и {} {}'.format(whole, num, den_str)
|
||||||
|
if 2 <= den <= 4:
|
||||||
|
if 2 <= num <= 4:
|
||||||
|
return_string = return_string[:-1] + 'и'
|
||||||
|
elif num > 4:
|
||||||
|
return_string = return_string[:-1] + 'ей'
|
||||||
|
elif den >= 5:
|
||||||
|
if 2 <= num <= 4:
|
||||||
|
return_string = return_string[:-2] + 'ые'
|
||||||
|
elif num > 4:
|
||||||
|
return_string = return_string[:-2] + 'ых'
|
||||||
|
|
||||||
|
return return_string
|
||||||
|
|
||||||
|
|
||||||
|
def pronounce_number_ru(number, places=2, short_scale=True, scientific=False,
|
||||||
|
ordinals=False):
|
||||||
|
"""
|
||||||
|
Convert a number to it's spoken equivalent
|
||||||
|
|
||||||
|
For example, '5.2' would return 'five point two'
|
||||||
|
|
||||||
|
Args:
|
||||||
|
number(float or int): the number to pronounce (under 100)
|
||||||
|
places(int): maximum decimal places to speak
|
||||||
|
short_scale (bool) : use short (True) or long scale (False)
|
||||||
|
https://en.wikipedia.org/wiki/Names_of_large_numbers
|
||||||
|
scientific (bool): pronounce in scientific notation
|
||||||
|
ordinals (bool): pronounce in ordinal form "first" instead of "one"
|
||||||
|
Returns:
|
||||||
|
(str): The pronounced number
|
||||||
|
"""
|
||||||
|
num = number
|
||||||
|
# deal with infinity
|
||||||
|
if num == float("inf"):
|
||||||
|
return "бесконечность"
|
||||||
|
elif num == float("-inf"):
|
||||||
|
return "минус бесконечность"
|
||||||
|
if scientific:
|
||||||
|
number = '%E' % num
|
||||||
|
n, power = number.replace("+", "").split("E")
|
||||||
|
power = int(power)
|
||||||
|
if power != 0:
|
||||||
|
if ordinals:
|
||||||
|
# This handles negative powers separately from the normal
|
||||||
|
# handling since each call disables the scientific flag
|
||||||
|
return '{}{} на десять в {}{} степени'.format(
|
||||||
|
'минус ' if float(n) < 0 else '',
|
||||||
|
pronounce_number_ru(
|
||||||
|
abs(float(n)), places, short_scale, False, ordinals=True),
|
||||||
|
'минус ' if power < 0 else '',
|
||||||
|
pronounce_number_ru(abs(power), places, short_scale, False, ordinals=True))
|
||||||
|
else:
|
||||||
|
# This handles negative powers separately from the normal
|
||||||
|
# handling since each call disables the scientific flag
|
||||||
|
return '{}{} на десять в степени {}{}'.format(
|
||||||
|
'минус ' if float(n) < 0 else '',
|
||||||
|
pronounce_number_ru(
|
||||||
|
abs(float(n)), places, short_scale, False, ordinals=False),
|
||||||
|
'минус ' if power < 0 else '',
|
||||||
|
pronounce_number_ru(abs(power), places, short_scale, False, ordinals=False))
|
||||||
|
|
||||||
|
if short_scale:
|
||||||
|
number_names = _NUM_STRING_RU.copy()
|
||||||
|
number_names.update(_SHORT_SCALE_RU)
|
||||||
|
else:
|
||||||
|
number_names = _NUM_STRING_RU.copy()
|
||||||
|
number_names.update(_LONG_SCALE_RU)
|
||||||
|
|
||||||
|
digits = [number_names[n] for n in range(0, 20)]
|
||||||
|
|
||||||
|
tens = [number_names[n] for n in range(10, 100, 10)]
|
||||||
|
|
||||||
|
if short_scale:
|
||||||
|
hundreds = [_SHORT_SCALE_RU[n] for n in _SHORT_SCALE_RU.keys()]
|
||||||
|
else:
|
||||||
|
hundreds = [_LONG_SCALE_RU[n] for n in _LONG_SCALE_RU.keys()]
|
||||||
|
|
||||||
|
# deal with negative numbers
|
||||||
|
result = ""
|
||||||
|
if num < 0:
|
||||||
|
result = "минус "
|
||||||
|
num = abs(num)
|
||||||
|
|
||||||
|
# check for a direct match
|
||||||
|
if num in number_names and not ordinals:
|
||||||
|
result += number_names[num]
|
||||||
|
else:
|
||||||
|
def _sub_thousand(n, ordinals=False):
|
||||||
|
assert 0 <= n <= 999
|
||||||
|
if n in _SHORT_ORDINAL_RU and ordinals:
|
||||||
|
return _SHORT_ORDINAL_RU[n]
|
||||||
|
if n <= 19:
|
||||||
|
return digits[n]
|
||||||
|
elif n <= 99:
|
||||||
|
q, r = divmod(n, 10)
|
||||||
|
return tens[q - 1] + (" " + _sub_thousand(r, ordinals) if r
|
||||||
|
else "")
|
||||||
|
else:
|
||||||
|
q, r = divmod(n, 100)
|
||||||
|
return _NUM_STRING_RU[q * 100] + (" " + _sub_thousand(r, ordinals) if r else "")
|
||||||
|
|
||||||
|
def _short_scale(n):
|
||||||
|
if n > max(_SHORT_SCALE_RU.keys()):
|
||||||
|
return "бесконечность"
|
||||||
|
ordi = ordinals
|
||||||
|
|
||||||
|
if int(n) != n:
|
||||||
|
ordi = False
|
||||||
|
n = int(n)
|
||||||
|
assert 0 <= n
|
||||||
|
res = []
|
||||||
|
for i, z in enumerate(_split_by(n, 1000)):
|
||||||
|
if not z:
|
||||||
|
continue
|
||||||
|
number = _sub_thousand(z, not i and ordi)
|
||||||
|
|
||||||
|
if i:
|
||||||
|
if i >= len(hundreds):
|
||||||
|
return ""
|
||||||
|
if ordi:
|
||||||
|
if i * 1000 in _SHORT_ORDINAL_RU:
|
||||||
|
if z == 1:
|
||||||
|
number = _SHORT_ORDINAL_RU[i * 1000]
|
||||||
|
else:
|
||||||
|
if z > 5:
|
||||||
|
number = number[:-1] + "и"
|
||||||
|
number += _SHORT_ORDINAL_RU[i * 1000]
|
||||||
|
else:
|
||||||
|
if n not in _SHORT_SCALE_RU:
|
||||||
|
num = int("1" + "0" * (len(str(n)) // 3 * 3))
|
||||||
|
|
||||||
|
if number[-3:] == "два":
|
||||||
|
number = number[:-1] + "ух"
|
||||||
|
elif number[-2:] == "ри" or number[-2:] == "ре":
|
||||||
|
number = number[:-1] + "ёх"
|
||||||
|
elif number[-1:] == "ь":
|
||||||
|
number = number[:-1] + "и"
|
||||||
|
|
||||||
|
number += _SHORT_SCALE_RU[num] + "ный"
|
||||||
|
else:
|
||||||
|
number = _SHORT_SCALE_RU[n] + "ный"
|
||||||
|
elif z == 1:
|
||||||
|
number = hundreds[i - 1]
|
||||||
|
else:
|
||||||
|
if i == 1:
|
||||||
|
if z % 10 == 1 and z % 100 // 10 != 1:
|
||||||
|
number = number[:-2] + "на"
|
||||||
|
elif z % 10 == 2 and z % 100 // 10 != 1:
|
||||||
|
number = number[:-1] + "е"
|
||||||
|
number += " " + plural_ru(z, "тысяча", "тысячи", "тысяч")
|
||||||
|
elif 1 <= z % 10 <= 4 and z % 100 // 10 != 1:
|
||||||
|
number += " " + hundreds[i - 1] + "а"
|
||||||
|
else:
|
||||||
|
number += " " + hundreds[i - 1] + "ов"
|
||||||
|
|
||||||
|
res.append(number)
|
||||||
|
ordi = False
|
||||||
|
|
||||||
|
return " ".join(reversed(res))
|
||||||
|
|
||||||
|
def _split_by(n, split=1000):
|
||||||
|
assert 0 <= n
|
||||||
|
res = []
|
||||||
|
while n:
|
||||||
|
n, r = divmod(n, split)
|
||||||
|
res.append(r)
|
||||||
|
return res
|
||||||
|
|
||||||
|
def _long_scale(n):
|
||||||
|
if n >= max(_LONG_SCALE_RU.keys()):
|
||||||
|
return "бесконечность"
|
||||||
|
ordi = ordinals
|
||||||
|
if int(n) != n:
|
||||||
|
ordi = False
|
||||||
|
n = int(n)
|
||||||
|
assert 0 <= n
|
||||||
|
res = []
|
||||||
|
for i, z in enumerate(_split_by(n, 1000000)):
|
||||||
|
if not z:
|
||||||
|
continue
|
||||||
|
number = pronounce_number_ru(z, places, True, scientific,
|
||||||
|
ordinals=ordi and not i)
|
||||||
|
# strip off the comma after the thousand
|
||||||
|
if i:
|
||||||
|
if i >= len(hundreds):
|
||||||
|
return ""
|
||||||
|
# plus one as we skip 'thousand'
|
||||||
|
# (and 'hundred', but this is excluded by index value)
|
||||||
|
number = number.replace(',', '')
|
||||||
|
|
||||||
|
if ordi:
|
||||||
|
if (i + 1) * 1000000 in _LONG_ORDINAL_RU:
|
||||||
|
if z == 1:
|
||||||
|
number = _LONG_ORDINAL_RU[
|
||||||
|
(i + 1) * 1000000]
|
||||||
|
else:
|
||||||
|
number += _LONG_ORDINAL_RU[
|
||||||
|
(i + 1) * 1000000]
|
||||||
|
else:
|
||||||
|
if n not in _LONG_SCALE_RU:
|
||||||
|
num = int("1" + "0" * (len(str(n)) // 3 * 3))
|
||||||
|
|
||||||
|
if number[-3:] == "два":
|
||||||
|
number = number[:-1] + "ух"
|
||||||
|
elif number[-2:] == "ри" or number[-2:] == "ре":
|
||||||
|
number = number[:-1] + "ёх"
|
||||||
|
elif number[-1:] == "ь":
|
||||||
|
number = number[:-1] + "и"
|
||||||
|
|
||||||
|
number += _LONG_SCALE_RU[num] + "ный"
|
||||||
|
else:
|
||||||
|
number = " " + _LONG_SCALE_RU[n] + "ный"
|
||||||
|
elif z == 1:
|
||||||
|
number = hundreds[i]
|
||||||
|
elif z <= 4:
|
||||||
|
number += " " + hundreds[i] + "а"
|
||||||
|
else:
|
||||||
|
number += " " + hundreds[i] + "ов"
|
||||||
|
|
||||||
|
res.append(number)
|
||||||
|
return " ".join(reversed(res))
|
||||||
|
|
||||||
|
if short_scale:
|
||||||
|
result += _short_scale(num)
|
||||||
|
else:
|
||||||
|
result += _long_scale(num)
|
||||||
|
|
||||||
|
# deal with scientific notation unpronounceable as number
|
||||||
|
if not result and "e" in str(num):
|
||||||
|
return pronounce_number_ru(num, places, short_scale, scientific=True)
|
||||||
|
# Deal with fractional part
|
||||||
|
elif not num == int(num) and places > 0:
|
||||||
|
if abs(num) < 1.0 and (result == "минус " or not result):
|
||||||
|
result += "ноль"
|
||||||
|
result += " точка"
|
||||||
|
_num_str = str(num)
|
||||||
|
_num_str = _num_str.split(".")[1][0:places]
|
||||||
|
for char in _num_str:
|
||||||
|
result += " " + number_names[int(char)]
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def nice_time_ru(dt, speech=True, use_24hour=True, use_ampm=False):
|
||||||
|
"""
|
||||||
|
Format a time to a comfortable human format
|
||||||
|
For example, generate 'five thirty' for speech or '5:30' for
|
||||||
|
text display.
|
||||||
|
Args:
|
||||||
|
dt (datetime): date to format (assumes already in local timezone)
|
||||||
|
speech (bool): format for speech (default/True) or display (False)=Fal
|
||||||
|
use_24hour (bool): output in 24-hour/military or 12-hour format
|
||||||
|
use_ampm (bool): include the am/pm for 12-hour format
|
||||||
|
Returns:
|
||||||
|
(str): The formatted time string
|
||||||
|
"""
|
||||||
|
if use_24hour:
|
||||||
|
# e.g. "03:01" or "14:22"
|
||||||
|
string = dt.strftime("%H:%M")
|
||||||
|
else:
|
||||||
|
if use_ampm:
|
||||||
|
# e.g. "3:01 AM" or "2:22 PM"
|
||||||
|
string = dt.strftime("%I:%M")
|
||||||
|
if dt.hour < 4:
|
||||||
|
string += " ночи"
|
||||||
|
elif dt.hour < 12:
|
||||||
|
string += " утра"
|
||||||
|
elif dt.hour < 18:
|
||||||
|
string += " дня"
|
||||||
|
else:
|
||||||
|
string += " вечера"
|
||||||
|
else:
|
||||||
|
# e.g. "3:01" or "2:22"
|
||||||
|
string = dt.strftime("%I:%M")
|
||||||
|
if string[0] == '0':
|
||||||
|
string = string[1:] # strip leading zeros
|
||||||
|
|
||||||
|
if not speech:
|
||||||
|
return string
|
||||||
|
|
||||||
|
# Generate a speakable version of the time
|
||||||
|
if use_24hour:
|
||||||
|
speak = ""
|
||||||
|
|
||||||
|
# Either "0 8 hundred" or "13 hundred"
|
||||||
|
if string[0] == '0':
|
||||||
|
speak += pronounce_hour_ru(int(string[0])) + " "
|
||||||
|
speak += pronounce_number_ru(int(string[1]))
|
||||||
|
else:
|
||||||
|
speak = pronounce_hour_ru(int(string[0:2]))
|
||||||
|
|
||||||
|
speak += " "
|
||||||
|
if string[3:5] == '00':
|
||||||
|
speak += "ровно"
|
||||||
|
else:
|
||||||
|
if string[3] == '0':
|
||||||
|
speak += pronounce_number_ru(0) + " "
|
||||||
|
speak += pronounce_number_ru(int(string[4]))
|
||||||
|
else:
|
||||||
|
speak += pronounce_number_ru(int(string[3:5]))
|
||||||
|
return speak
|
||||||
|
else:
|
||||||
|
if dt.hour == 0 and dt.minute == 0:
|
||||||
|
return "полночь"
|
||||||
|
elif dt.hour == 12 and dt.minute == 0:
|
||||||
|
return "полдень"
|
||||||
|
|
||||||
|
hour = dt.hour % 12 or 12 # 12 hour clock and 0 is spoken as 12
|
||||||
|
if dt.minute == 15:
|
||||||
|
speak = pronounce_hour_ru(hour) + " с четвертью"
|
||||||
|
elif dt.minute == 30:
|
||||||
|
speak = pronounce_hour_ru(hour) + " с половиной"
|
||||||
|
elif dt.minute == 45:
|
||||||
|
next_hour = (dt.hour + 1) % 12 or 12
|
||||||
|
speak = "без четверти " + pronounce_hour_ru(next_hour)
|
||||||
|
else:
|
||||||
|
speak = pronounce_hour_ru(hour)
|
||||||
|
|
||||||
|
if dt.minute == 0:
|
||||||
|
if not use_ampm:
|
||||||
|
if dt.hour % 12 == 1:
|
||||||
|
return speak
|
||||||
|
return speak + " " + plural_ru(dt.hour % 12, "час", "часа", "часов")
|
||||||
|
else:
|
||||||
|
if dt.minute < 10:
|
||||||
|
speak += " ноль"
|
||||||
|
speak += " " + pronounce_number_ru(dt.minute)
|
||||||
|
|
||||||
|
if use_ampm:
|
||||||
|
if dt.hour < 4:
|
||||||
|
speak += " ночи"
|
||||||
|
elif dt.hour < 12:
|
||||||
|
speak += " утра"
|
||||||
|
elif dt.hour < 18:
|
||||||
|
speak += " дня"
|
||||||
|
else:
|
||||||
|
speak += " вечера"
|
||||||
|
|
||||||
|
return speak
|
||||||
|
|
||||||
|
|
||||||
|
def nice_duration_ru(duration, speech=True):
|
||||||
|
""" Convert duration to a nice spoken timespan
|
||||||
|
|
||||||
|
Args:
|
||||||
|
seconds: number of seconds
|
||||||
|
minutes: number of minutes
|
||||||
|
hours: number of hours
|
||||||
|
days: number of days
|
||||||
|
Returns:
|
||||||
|
str: timespan as a string
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not speech:
|
||||||
|
raise FunctionNotLocalizedError
|
||||||
|
|
||||||
|
days = int(duration // 86400)
|
||||||
|
hours = int(duration // 3600 % 24)
|
||||||
|
minutes = int(duration // 60 % 60)
|
||||||
|
seconds = int(duration % 60)
|
||||||
|
|
||||||
|
out = ''
|
||||||
|
|
||||||
|
if days > 0:
|
||||||
|
out += pronounce_number_ru(days)
|
||||||
|
out += " " + plural_ru(days, "день", "дня", "дней")
|
||||||
|
if hours > 0:
|
||||||
|
if out:
|
||||||
|
out += " "
|
||||||
|
out += pronounce_number_ru(hours)
|
||||||
|
out += " " + plural_ru(hours, "час", "часа", "часов")
|
||||||
|
if minutes > 0:
|
||||||
|
if out:
|
||||||
|
out += " "
|
||||||
|
out += pronounce_number_feminine_ru(minutes)
|
||||||
|
out += " " + plural_ru(minutes, "минута", "минуты", "минут")
|
||||||
|
if seconds > 0:
|
||||||
|
if out:
|
||||||
|
out += " "
|
||||||
|
out += pronounce_number_feminine_ru(seconds)
|
||||||
|
out += " " + plural_ru(seconds, "секунда", "секунды", "секунд")
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def pronounce_hour_ru(num):
|
||||||
|
if num == 1:
|
||||||
|
return "час"
|
||||||
|
return pronounce_number_ru(num)
|
||||||
|
|
||||||
|
|
||||||
|
def pronounce_number_feminine_ru(num):
|
||||||
|
pronounced = pronounce_number_ru(num)
|
||||||
|
|
||||||
|
num %= 100
|
||||||
|
if num % 10 == 1 and num // 10 != 1:
|
||||||
|
return pronounced[:-2] + "на"
|
||||||
|
elif num % 10 == 2 and num // 10 != 1:
|
||||||
|
return pronounced[:-1] + "е"
|
||||||
|
|
||||||
|
return pronounced
|
||||||
|
|
||||||
|
|
||||||
|
def plural_ru(num: int, one: str, few: str, many: str):
|
||||||
|
num %= 100
|
||||||
|
if num // 10 == 1:
|
||||||
|
return many
|
||||||
|
if num % 10 == 1:
|
||||||
|
return one
|
||||||
|
if 2 <= num % 10 <= 4:
|
||||||
|
return few
|
||||||
|
return many
|
||||||
419
lingua_franca/lang/format_sl.py
Normal file
419
lingua_franca/lang/format_sl.py
Normal file
@@ -0,0 +1,419 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
from lingua_franca.lang.common_data_sl import _NUM_STRING_SL, \
|
||||||
|
_FRACTION_STRING_SL, _LONG_SCALE_SL, _SHORT_SCALE_SL, _SHORT_ORDINAL_SL
|
||||||
|
from lingua_franca.lang.format_common import convert_to_mixed_fraction
|
||||||
|
|
||||||
|
|
||||||
|
def nice_number_sl(number, speech=True, denominators=range(1, 21)):
|
||||||
|
""" Slovenian helper for nice_number
|
||||||
|
|
||||||
|
This function formats a float to human understandable functions. Like
|
||||||
|
4.5 becomes "2 in polovica" for speech and "4 1/2" for text
|
||||||
|
|
||||||
|
Args:
|
||||||
|
number (int or float): the float to format
|
||||||
|
speech (bool): format for speech (True) or display (False)
|
||||||
|
denominators (iter of ints): denominators to use, default [1 .. 20]
|
||||||
|
Returns:
|
||||||
|
(str): The formatted string.
|
||||||
|
"""
|
||||||
|
|
||||||
|
result = convert_to_mixed_fraction(number, denominators)
|
||||||
|
if not result:
|
||||||
|
# Give up, just represent as a 3 decimal number
|
||||||
|
return str(round(number, 3))
|
||||||
|
|
||||||
|
whole, num, den = result
|
||||||
|
|
||||||
|
if not speech:
|
||||||
|
if num == 0:
|
||||||
|
return str(whole)
|
||||||
|
else:
|
||||||
|
return '{} {}/{}'.format(whole, num, den)
|
||||||
|
|
||||||
|
if num == 0:
|
||||||
|
return str(whole)
|
||||||
|
den_str = _FRACTION_STRING_SL[den]
|
||||||
|
if whole == 0:
|
||||||
|
return_string = '{} {}'.format(num, den_str)
|
||||||
|
else:
|
||||||
|
return_string = '{} in {} {}'.format(whole, num, den_str)
|
||||||
|
|
||||||
|
if num % 100 == 1:
|
||||||
|
pass
|
||||||
|
elif num % 100 == 2:
|
||||||
|
return_string = return_string[:-1] + 'i'
|
||||||
|
elif num % 100 == 3 or num % 100 == 4:
|
||||||
|
return_string = return_string[:-1] + 'e'
|
||||||
|
else:
|
||||||
|
return_string = return_string[:-1]
|
||||||
|
|
||||||
|
return return_string
|
||||||
|
|
||||||
|
|
||||||
|
def pronounce_number_sl(num, places=2, short_scale=True, scientific=False,
|
||||||
|
ordinals=False):
|
||||||
|
"""
|
||||||
|
Convert a number to it's spoken equivalent
|
||||||
|
|
||||||
|
For example, '5.2' would return 'pet celih dve'
|
||||||
|
|
||||||
|
Args:
|
||||||
|
num(float or int): the number to pronounce (under 100)
|
||||||
|
places(int): maximum decimal places to speak
|
||||||
|
short_scale (bool) : use short (True) or long scale (False)
|
||||||
|
https://en.wikipedia.org/wiki/Names_of_large_numbers
|
||||||
|
scientific (bool): pronounce in scientific notation
|
||||||
|
ordinals (bool): pronounce in ordinal form "first" instead of "one"
|
||||||
|
Returns:
|
||||||
|
(str): The pronounced number
|
||||||
|
"""
|
||||||
|
# deal with infinity
|
||||||
|
if num == float("inf"):
|
||||||
|
return "neskončno"
|
||||||
|
elif num == float("-inf"):
|
||||||
|
return "minus neskončno"
|
||||||
|
if scientific:
|
||||||
|
number = '%E' % num
|
||||||
|
n, power = number.replace("+", "").split("E")
|
||||||
|
power = int(power)
|
||||||
|
if power != 0:
|
||||||
|
if ordinals:
|
||||||
|
# This handles negatives of powers separately from the normal
|
||||||
|
# handling since each call disables the scientific flag
|
||||||
|
return '{}{} krat deset na {}{}'.format(
|
||||||
|
'minus ' if float(n) < 0 else '',
|
||||||
|
pronounce_number_sl(
|
||||||
|
abs(float(n)), places, short_scale, False, ordinals=False),
|
||||||
|
'minus ' if power < 0 else '',
|
||||||
|
pronounce_number_sl(abs(power), places, short_scale, False, ordinals=True))
|
||||||
|
else:
|
||||||
|
# This handles negatives of powers separately from the normal
|
||||||
|
# handling since each call disables the scientific flag
|
||||||
|
return '{}{} krat deset na {}{}'.format(
|
||||||
|
'minus ' if float(n) < 0 else '',
|
||||||
|
pronounce_number_sl(
|
||||||
|
abs(float(n)), places, short_scale, False),
|
||||||
|
'minus ' if power < 0 else '',
|
||||||
|
pronounce_number_sl(abs(power), places, short_scale, False))
|
||||||
|
|
||||||
|
if short_scale:
|
||||||
|
number_names = _NUM_STRING_SL.copy()
|
||||||
|
number_names.update(_SHORT_SCALE_SL)
|
||||||
|
else:
|
||||||
|
number_names = _NUM_STRING_SL.copy()
|
||||||
|
number_names.update(_LONG_SCALE_SL)
|
||||||
|
|
||||||
|
digits = [number_names[n] for n in range(0, 20)]
|
||||||
|
|
||||||
|
tens = [number_names[n] for n in range(10, 100, 10)]
|
||||||
|
|
||||||
|
if short_scale:
|
||||||
|
hundreds = [_SHORT_SCALE_SL[n] for n in _SHORT_SCALE_SL.keys()]
|
||||||
|
else:
|
||||||
|
hundreds = [_LONG_SCALE_SL[n] for n in _LONG_SCALE_SL.keys()]
|
||||||
|
|
||||||
|
# deal with negatives
|
||||||
|
result = ""
|
||||||
|
if num < 0:
|
||||||
|
result = "minus "
|
||||||
|
num = abs(num)
|
||||||
|
|
||||||
|
# check for a direct match
|
||||||
|
if num in number_names and not ordinals:
|
||||||
|
result += number_names[num]
|
||||||
|
else:
|
||||||
|
def _sub_thousand(n, ordinals=False, is_male=False):
|
||||||
|
assert 0 <= n <= 999
|
||||||
|
if n in _SHORT_ORDINAL_SL and ordinals:
|
||||||
|
return _SHORT_ORDINAL_SL[n]
|
||||||
|
if n <= 19:
|
||||||
|
if is_male and n == 2:
|
||||||
|
return digits[n][:-1] + "a"
|
||||||
|
return digits[n]
|
||||||
|
elif n <= 99:
|
||||||
|
q, r = divmod(n, 10)
|
||||||
|
sub = _sub_thousand(r, False)
|
||||||
|
if r == 2:
|
||||||
|
sub = sub[:-1] + "a"
|
||||||
|
return ((sub + "in") if r else "") + (
|
||||||
|
tens[q - 1]) + ("i" if ordinals else "")
|
||||||
|
else:
|
||||||
|
q, r = divmod(n, 100)
|
||||||
|
if q == 1:
|
||||||
|
qstr = ""
|
||||||
|
else:
|
||||||
|
qstr = digits[q]
|
||||||
|
return (qstr + "sto" + (
|
||||||
|
" " + _sub_thousand(r, ordinals) if r else ""))
|
||||||
|
|
||||||
|
def _plural_hundreds(n, hundred, ordi=True):
|
||||||
|
if hundred[-3:] != "jon":
|
||||||
|
if ordi:
|
||||||
|
return hundred + "i"
|
||||||
|
|
||||||
|
return hundred
|
||||||
|
|
||||||
|
if n < 1000 or short_scale:
|
||||||
|
if ordi:
|
||||||
|
return hundred + "ti"
|
||||||
|
|
||||||
|
if n % 100 == 1:
|
||||||
|
return hundred
|
||||||
|
elif n % 100 == 2:
|
||||||
|
return hundred + "a"
|
||||||
|
elif n % 100 == 3 or n % 100 == 4:
|
||||||
|
return hundred + "i"
|
||||||
|
else:
|
||||||
|
return hundred + "ov"
|
||||||
|
else:
|
||||||
|
n //= 1000
|
||||||
|
|
||||||
|
if ordi:
|
||||||
|
return hundred[:-3] + "jardti"
|
||||||
|
|
||||||
|
if n % 100 == 1:
|
||||||
|
return hundred[:-3] + "jarda"
|
||||||
|
elif n % 100 == 2:
|
||||||
|
return hundred[:-3] + "jardi"
|
||||||
|
elif n % 100 == 3 or n % 100 == 4:
|
||||||
|
return hundred[:-3] + "jarde"
|
||||||
|
else:
|
||||||
|
return hundred[:-3] + "jard"
|
||||||
|
|
||||||
|
def _short_scale(n):
|
||||||
|
if n >= max(_SHORT_SCALE_SL.keys()):
|
||||||
|
return "neskončno"
|
||||||
|
ordi = ordinals
|
||||||
|
|
||||||
|
if int(n) != n:
|
||||||
|
ordi = False
|
||||||
|
n = int(n)
|
||||||
|
assert 0 <= n
|
||||||
|
res = []
|
||||||
|
|
||||||
|
split = _split_by(n, 1000)
|
||||||
|
if ordinals and len([a for a in split if a > 0]) == 1:
|
||||||
|
ordi_force = True
|
||||||
|
else:
|
||||||
|
ordi_force = False
|
||||||
|
|
||||||
|
for i, z in enumerate(split):
|
||||||
|
if not z:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if z == 1 and i == 1:
|
||||||
|
number = ""
|
||||||
|
elif z > 100 and z % 100 == 2:
|
||||||
|
number = _sub_thousand(z, not i and ordi, is_male=True)
|
||||||
|
elif z > 100 and z % 100 == 3:
|
||||||
|
number = _sub_thousand(z, not i and ordi) + "je"
|
||||||
|
elif z > 1 or i == 0 or ordi:
|
||||||
|
number = _sub_thousand(z, not i and ordi)
|
||||||
|
else:
|
||||||
|
number = ""
|
||||||
|
|
||||||
|
if i:
|
||||||
|
if i >= len(hundreds):
|
||||||
|
return ""
|
||||||
|
if z > 1:
|
||||||
|
number += " "
|
||||||
|
number += _plural_hundreds(
|
||||||
|
z, hundreds[i], True if ordi_force else not i and ordi)
|
||||||
|
res.append(number)
|
||||||
|
ordi = False
|
||||||
|
|
||||||
|
return " ".join(reversed(res))
|
||||||
|
|
||||||
|
def _split_by(n, split=1000):
|
||||||
|
assert 0 <= n
|
||||||
|
res = []
|
||||||
|
while n:
|
||||||
|
n, r = divmod(n, split)
|
||||||
|
res.append(r)
|
||||||
|
return res
|
||||||
|
|
||||||
|
def _long_scale(n):
|
||||||
|
if n >= max(_LONG_SCALE_SL.keys()):
|
||||||
|
return "neskončno"
|
||||||
|
ordi = ordinals
|
||||||
|
if int(n) != n:
|
||||||
|
ordi = False
|
||||||
|
n = int(n)
|
||||||
|
assert 0 <= n
|
||||||
|
res = []
|
||||||
|
|
||||||
|
split = _split_by(n, 1000000)
|
||||||
|
if ordinals and len([a for a in split if a > 0]) == 1:
|
||||||
|
ordi_force = True
|
||||||
|
else:
|
||||||
|
ordi_force = False
|
||||||
|
|
||||||
|
for i, z in enumerate(split):
|
||||||
|
if not z:
|
||||||
|
continue
|
||||||
|
|
||||||
|
number = pronounce_number_sl(z, places, True, scientific)
|
||||||
|
if z > 100:
|
||||||
|
add = number.split()[0] + " "
|
||||||
|
else:
|
||||||
|
add = ""
|
||||||
|
if z % 100 == 2 and i >= 1:
|
||||||
|
number = add + digits[2][:-1] + "a"
|
||||||
|
if z % 100 == 3 and i >= 1:
|
||||||
|
number = add + digits[3] + "je"
|
||||||
|
|
||||||
|
# strip off the comma after the thousand
|
||||||
|
if i:
|
||||||
|
if i >= len(hundreds):
|
||||||
|
return ""
|
||||||
|
# plus one as we skip 'thousand'
|
||||||
|
# (and 'hundred', but this is excluded by index value)
|
||||||
|
hundred = _plural_hundreds(
|
||||||
|
z, hundreds[i + 1], True if ordi_force else ordi and not i)
|
||||||
|
|
||||||
|
if z >= 1000:
|
||||||
|
z //= 1000
|
||||||
|
number = pronounce_number_sl(z, places, True, scientific,
|
||||||
|
ordinals=True if ordi_force else ordi and not i)
|
||||||
|
|
||||||
|
if z == 1:
|
||||||
|
number = hundred
|
||||||
|
else:
|
||||||
|
number += " " + hundred
|
||||||
|
res.append(number)
|
||||||
|
return " ".join(reversed(res))
|
||||||
|
|
||||||
|
if short_scale:
|
||||||
|
result += _short_scale(num)
|
||||||
|
else:
|
||||||
|
result += _long_scale(num)
|
||||||
|
|
||||||
|
if ordinals:
|
||||||
|
result = result.replace(" ", "")
|
||||||
|
|
||||||
|
# deal with scientific notation unpronounceable as number
|
||||||
|
if (not result or result == "neskončno") and "e" in str(num):
|
||||||
|
return pronounce_number_sl(num, places, short_scale, scientific=True)
|
||||||
|
# Deal with fractional part
|
||||||
|
elif not num == int(num) and places > 0:
|
||||||
|
if abs(num) < 1.0 and (result == "minus " or not result):
|
||||||
|
result += "nič"
|
||||||
|
|
||||||
|
if int(abs(num)) % 100 == 1:
|
||||||
|
result += " cela"
|
||||||
|
elif int(abs(num)) % 100 == 2:
|
||||||
|
result += " celi"
|
||||||
|
elif int(abs(num)) % 100 == 3 or int(abs(num)) % 100 == 4:
|
||||||
|
result += " cele"
|
||||||
|
else:
|
||||||
|
result += " celih"
|
||||||
|
|
||||||
|
_num_str = str(num)
|
||||||
|
_num_str = _num_str.split(".")[1][0:places]
|
||||||
|
for char in _num_str:
|
||||||
|
result += " " + number_names[int(char)]
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def nice_time_sl(dt, speech=True, use_24hour=False, use_ampm=False):
|
||||||
|
"""
|
||||||
|
Format a time to a comfortable human format
|
||||||
|
For example, generate 'pet trideset' for speech or '5:30' for
|
||||||
|
text display.
|
||||||
|
Args:
|
||||||
|
dt (datetime): date to format (assumes already in local timezone)
|
||||||
|
speech (bool): format for speech (default/True) or display (False)=Fal
|
||||||
|
use_24hour (bool): output in 24-hour/military or 12-hour format
|
||||||
|
use_ampm (bool): include the am/pm for 12-hour format
|
||||||
|
Returns:
|
||||||
|
(str): The formatted time string
|
||||||
|
"""
|
||||||
|
if use_24hour:
|
||||||
|
# e.g. "03:01" or "14:22"
|
||||||
|
string = dt.strftime("%H:%M")
|
||||||
|
else:
|
||||||
|
if use_ampm:
|
||||||
|
# e.g. "3:01 AM" or "2:22 PM"
|
||||||
|
string = dt.strftime("%I:%M %p")
|
||||||
|
else:
|
||||||
|
# e.g. "3:01" or "2:22"
|
||||||
|
string = dt.strftime("%I:%M")
|
||||||
|
if string[0] == '0':
|
||||||
|
string = string[1:] # strip leading zeros
|
||||||
|
|
||||||
|
if not speech:
|
||||||
|
return string
|
||||||
|
|
||||||
|
def _hour_declension(hour):
|
||||||
|
speak = pronounce_number_sl(hour)
|
||||||
|
|
||||||
|
if hour == 1:
|
||||||
|
return speak[:-1] + "ih"
|
||||||
|
elif hour == 2 or hour == 4:
|
||||||
|
return speak + "h"
|
||||||
|
elif hour == 3:
|
||||||
|
return speak[:-1] + "eh"
|
||||||
|
elif hour == 7 or hour == 8:
|
||||||
|
return speak[:-2] + "mih"
|
||||||
|
else:
|
||||||
|
return speak + "ih"
|
||||||
|
|
||||||
|
# Generate a speakable version of the time
|
||||||
|
if use_24hour:
|
||||||
|
# "13 nič nič"
|
||||||
|
speak = pronounce_number_sl(int(string[0:2]))
|
||||||
|
|
||||||
|
speak += " "
|
||||||
|
if string[3:5] == '00':
|
||||||
|
speak += "nič nič"
|
||||||
|
else:
|
||||||
|
if string[3] == '0':
|
||||||
|
speak += pronounce_number_sl(0) + " "
|
||||||
|
speak += pronounce_number_sl(int(string[4]))
|
||||||
|
else:
|
||||||
|
speak += pronounce_number_sl(int(string[3:5]))
|
||||||
|
return speak
|
||||||
|
else:
|
||||||
|
if dt.hour == 0 and dt.minute == 0:
|
||||||
|
return "polnoč"
|
||||||
|
elif dt.hour == 12 and dt.minute == 0:
|
||||||
|
return "poldne"
|
||||||
|
|
||||||
|
hour = dt.hour % 12 or 12 # 12 hour clock and 0 is spoken as 12
|
||||||
|
if dt.minute == 0:
|
||||||
|
speak = pronounce_number_sl(hour)
|
||||||
|
elif dt.minute < 30:
|
||||||
|
speak = pronounce_number_sl(
|
||||||
|
dt.minute) + " čez " + pronounce_number_sl(hour)
|
||||||
|
elif dt.minute == 30:
|
||||||
|
next_hour = (dt.hour + 1) % 12 or 12
|
||||||
|
speak = "pol " + _hour_declension(next_hour)
|
||||||
|
elif dt.minute > 30:
|
||||||
|
next_hour = (dt.hour + 1) % 12 or 12
|
||||||
|
speak = pronounce_number_sl(
|
||||||
|
60 - dt.minute) + " do " + _hour_declension(next_hour)
|
||||||
|
|
||||||
|
if use_ampm:
|
||||||
|
if dt.hour > 11:
|
||||||
|
speak += " p.m."
|
||||||
|
else:
|
||||||
|
speak += " a.m."
|
||||||
|
|
||||||
|
return speak
|
||||||
376
lingua_franca/lang/format_sv.py
Normal file
376
lingua_franca/lang/format_sv.py
Normal file
@@ -0,0 +1,376 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
from .format_common import convert_to_mixed_fraction
|
||||||
|
from lingua_franca.lang.common_data_sv import _EXTRA_SPACE_SV, \
|
||||||
|
_FRACTION_STRING_SV, _MONTHS_SV, _NUM_POWERS_OF_TEN_SV, _NUM_STRING_SV
|
||||||
|
from math import floor
|
||||||
|
|
||||||
|
|
||||||
|
def nice_number_sv(number, speech=True, denominators=range(1, 21)):
|
||||||
|
""" Swedish helper for nice_number
|
||||||
|
|
||||||
|
This function formats a float to human understandable functions. Like
|
||||||
|
4.5 becomes "4 och en halv" for speech and "4 1/2" for text
|
||||||
|
|
||||||
|
Args:
|
||||||
|
number (int or float): the float to format
|
||||||
|
speech (bool): format for speech (True) or display (False)
|
||||||
|
denominators (iter of ints): denominators to use, default [1 .. 20]
|
||||||
|
Returns:
|
||||||
|
(str): The formatted string.
|
||||||
|
"""
|
||||||
|
result = convert_to_mixed_fraction(number, denominators)
|
||||||
|
if not result:
|
||||||
|
# Give up, just represent as a 3 decimal number
|
||||||
|
return str(round(number, 3))
|
||||||
|
|
||||||
|
whole, num, den = result
|
||||||
|
|
||||||
|
if not speech:
|
||||||
|
if num == 0:
|
||||||
|
# TODO: Number grouping? E.g. "1,000,000"
|
||||||
|
return str(whole)
|
||||||
|
else:
|
||||||
|
return '{} {}/{}'.format(whole, num, den)
|
||||||
|
|
||||||
|
if num == 0:
|
||||||
|
return str(whole)
|
||||||
|
den_str = _FRACTION_STRING_SV[den]
|
||||||
|
if whole == 0:
|
||||||
|
if num == 1:
|
||||||
|
return_string = 'en {}'.format(den_str)
|
||||||
|
else:
|
||||||
|
return_string = '{} {}'.format(num, den_str)
|
||||||
|
elif num == 1:
|
||||||
|
return_string = '{} och en {}'.format(whole, den_str)
|
||||||
|
else:
|
||||||
|
return_string = '{} och {} {}'.format(whole, num, den_str)
|
||||||
|
if num > 1:
|
||||||
|
return_string += 'ar'
|
||||||
|
return return_string
|
||||||
|
|
||||||
|
|
||||||
|
def pronounce_number_sv(number, places=2, short_scale=True, scientific=False,
|
||||||
|
ordinals=False):
|
||||||
|
"""
|
||||||
|
Convert a number to it's spoken equivalent
|
||||||
|
|
||||||
|
For example, '5.2' would return 'five point two'
|
||||||
|
|
||||||
|
Args:
|
||||||
|
num(float or int): the number to pronounce (under 100)
|
||||||
|
places(int): maximum decimal places to speak
|
||||||
|
short_scale (bool) : use short (True) or long scale (False)
|
||||||
|
https://en.wikipedia.org/wiki/Names_of_large_numbers
|
||||||
|
scientific (bool): pronounce in scientific notation
|
||||||
|
ordinals (bool): pronounce in ordinal form "first" instead of "one"
|
||||||
|
Returns:
|
||||||
|
(str): The pronounced number
|
||||||
|
"""
|
||||||
|
# TODO short_scale, scientific and ordinals
|
||||||
|
# currently ignored
|
||||||
|
|
||||||
|
def pronounce_triplet_sv(num):
|
||||||
|
result = ""
|
||||||
|
num = floor(num)
|
||||||
|
|
||||||
|
if num > 99:
|
||||||
|
hundreds = floor(num / 100)
|
||||||
|
if hundreds > 0:
|
||||||
|
if hundreds == 1:
|
||||||
|
result += 'ett' + 'hundra'
|
||||||
|
else:
|
||||||
|
result += _NUM_STRING_SV[hundreds] + 'hundra'
|
||||||
|
|
||||||
|
num -= hundreds * 100
|
||||||
|
|
||||||
|
if num == 0:
|
||||||
|
result += '' # do nothing
|
||||||
|
elif num == 1:
|
||||||
|
result += 'ett'
|
||||||
|
elif num <= 20:
|
||||||
|
result += _NUM_STRING_SV[num]
|
||||||
|
elif num > 20:
|
||||||
|
tens = num % 10
|
||||||
|
ones = num - tens
|
||||||
|
|
||||||
|
if ones > 0:
|
||||||
|
result += _NUM_STRING_SV[ones]
|
||||||
|
if tens > 0:
|
||||||
|
result += _NUM_STRING_SV[tens]
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def pronounce_fractional_sv(num, places):
|
||||||
|
# fixed number of places even with trailing zeros
|
||||||
|
result = ""
|
||||||
|
place = 10
|
||||||
|
while places > 0:
|
||||||
|
# doesn't work with 1.0001 and places = 2: int(
|
||||||
|
# num*place) % 10 > 0 and places > 0:
|
||||||
|
result += " " + _NUM_STRING_SV[int(num * place) % 10]
|
||||||
|
place *= 10
|
||||||
|
places -= 1
|
||||||
|
return result
|
||||||
|
|
||||||
|
def pronounce_whole_number_sv(num, scale_level=0):
|
||||||
|
if num == 0:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
num = floor(num)
|
||||||
|
result = ''
|
||||||
|
last_triplet = num % 1000
|
||||||
|
|
||||||
|
if last_triplet == 1:
|
||||||
|
if scale_level == 0:
|
||||||
|
if result != '':
|
||||||
|
result += '' + 'ett'
|
||||||
|
else:
|
||||||
|
result += 'en'
|
||||||
|
elif scale_level == 1:
|
||||||
|
result += 'ettusen' + _EXTRA_SPACE_SV
|
||||||
|
else:
|
||||||
|
result += 'en ' + \
|
||||||
|
_NUM_POWERS_OF_TEN_SV[scale_level] + _EXTRA_SPACE_SV
|
||||||
|
elif last_triplet > 1:
|
||||||
|
result += pronounce_triplet_sv(last_triplet)
|
||||||
|
if scale_level == 1:
|
||||||
|
result += 'tusen' + _EXTRA_SPACE_SV
|
||||||
|
if scale_level >= 2:
|
||||||
|
result += _NUM_POWERS_OF_TEN_SV[scale_level]
|
||||||
|
if scale_level >= 2:
|
||||||
|
result += 'er' + _EXTRA_SPACE_SV # MiljonER
|
||||||
|
|
||||||
|
num = floor(num / 1000)
|
||||||
|
scale_level += 1
|
||||||
|
return pronounce_whole_number_sv(num, scale_level) + result
|
||||||
|
|
||||||
|
result = ""
|
||||||
|
if abs(number) >= 1000000000000000000000000: # cannot do more than this
|
||||||
|
return str(number)
|
||||||
|
elif number == 0:
|
||||||
|
return str(_NUM_STRING_SV[0])
|
||||||
|
elif number < 0:
|
||||||
|
return "minus " + pronounce_number_sv(abs(number), places)
|
||||||
|
else:
|
||||||
|
if number == int(number):
|
||||||
|
return pronounce_whole_number_sv(number)
|
||||||
|
else:
|
||||||
|
whole_number_part = floor(number)
|
||||||
|
fractional_part = number - whole_number_part
|
||||||
|
result += pronounce_whole_number_sv(whole_number_part)
|
||||||
|
if places > 0:
|
||||||
|
result += " komma"
|
||||||
|
result += pronounce_fractional_sv(fractional_part, places)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def pronounce_ordinal_sv(number):
|
||||||
|
"""
|
||||||
|
This function pronounces a number as an ordinal
|
||||||
|
|
||||||
|
1 -> first
|
||||||
|
2 -> second
|
||||||
|
|
||||||
|
Args:
|
||||||
|
number (int): the number to format
|
||||||
|
Returns:
|
||||||
|
(str): The pronounced number string.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# ordinals for 1, 3, 7 and 8 are irregular
|
||||||
|
# this produces the base form, it will have to be adapted for genus,
|
||||||
|
# casus, numerus
|
||||||
|
|
||||||
|
ordinals = ["noll", "första", "andra", "tredje", "fjärde", "femte",
|
||||||
|
"sjätte", "sjunde", "åttonde", "nionde", "tionde"]
|
||||||
|
|
||||||
|
tens = int(floor(number / 10.0)) * 10
|
||||||
|
ones = number % 10
|
||||||
|
|
||||||
|
if number < 0 or number != int(number):
|
||||||
|
return number
|
||||||
|
if number == 0:
|
||||||
|
return ordinals[number]
|
||||||
|
|
||||||
|
result = ""
|
||||||
|
if number > 10:
|
||||||
|
result += pronounce_number_sv(tens).rstrip()
|
||||||
|
|
||||||
|
if ones > 0:
|
||||||
|
result += ordinals[ones]
|
||||||
|
else:
|
||||||
|
result += 'de'
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def nice_time_sv(dt, speech=True, use_24hour=False, use_ampm=False):
|
||||||
|
"""
|
||||||
|
Format a time to a comfortable human format
|
||||||
|
|
||||||
|
For example, generate 'five thirty' for speech or '5:30' for
|
||||||
|
text display.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
dt (datetime): date to format (assumes already in local timezone)
|
||||||
|
speech (bool): format for speech (default/True) or display (False)=Fal
|
||||||
|
use_24hour (bool): output in 24-hour/military or 12-hour format
|
||||||
|
use_ampm (bool): include the am/pm for 12-hour format
|
||||||
|
Returns:
|
||||||
|
(str): The formatted time string
|
||||||
|
"""
|
||||||
|
if use_24hour:
|
||||||
|
# e.g. "03:01" or "14:22"
|
||||||
|
string = dt.strftime("%H:%M")
|
||||||
|
else:
|
||||||
|
if use_ampm:
|
||||||
|
# e.g. "3:01 AM" or "2:22 PM"
|
||||||
|
string = dt.strftime("%I:%M %p")
|
||||||
|
else:
|
||||||
|
# e.g. "3:01" or "2:22"
|
||||||
|
string = dt.strftime("%I:%M")
|
||||||
|
|
||||||
|
if not speech:
|
||||||
|
return string
|
||||||
|
|
||||||
|
# Generate a speakable version of the time
|
||||||
|
speak = ""
|
||||||
|
if use_24hour:
|
||||||
|
if dt.hour == 1:
|
||||||
|
speak += "ett" # 01:00 is "ett" not "en"
|
||||||
|
else:
|
||||||
|
speak += pronounce_number_sv(dt.hour)
|
||||||
|
if not dt.minute == 0:
|
||||||
|
if dt.minute < 10:
|
||||||
|
speak += ' noll'
|
||||||
|
|
||||||
|
if dt.minute == 1:
|
||||||
|
speak += ' ett'
|
||||||
|
else:
|
||||||
|
speak += " " + pronounce_number_sv(dt.minute)
|
||||||
|
|
||||||
|
return speak # ampm is ignored when use_24hour is true
|
||||||
|
else:
|
||||||
|
hour = dt.hour
|
||||||
|
|
||||||
|
if not dt.minute == 0:
|
||||||
|
if dt.minute < 30:
|
||||||
|
if dt.minute != 15:
|
||||||
|
speak += pronounce_number_sv(dt.minute)
|
||||||
|
else:
|
||||||
|
speak += 'kvart'
|
||||||
|
|
||||||
|
if dt.minute == 1:
|
||||||
|
speak += ' minut över '
|
||||||
|
elif dt.minute != 10 and dt.minute != 5 and dt.minute != 15:
|
||||||
|
speak += ' minuter över '
|
||||||
|
else:
|
||||||
|
speak += ' över '
|
||||||
|
elif dt.minute > 30:
|
||||||
|
if dt.minute != 45:
|
||||||
|
speak += pronounce_number_sv((60 - dt.minute))
|
||||||
|
else:
|
||||||
|
speak += 'kvart'
|
||||||
|
|
||||||
|
if dt.minute == 1:
|
||||||
|
speak += ' minut i '
|
||||||
|
elif dt.minute != 50 and dt.minute != 55 and dt.minute != 45:
|
||||||
|
speak += ' minuter i '
|
||||||
|
else:
|
||||||
|
speak += ' i '
|
||||||
|
|
||||||
|
hour = (hour + 1) % 12
|
||||||
|
elif dt.minute == 30:
|
||||||
|
speak += 'halv '
|
||||||
|
hour = (hour + 1) % 12
|
||||||
|
|
||||||
|
if hour == 0 and dt.minute == 0:
|
||||||
|
return "midnatt"
|
||||||
|
if hour == 12 and dt.minute == 0:
|
||||||
|
return "middag"
|
||||||
|
# TODO: "half past 3", "a quarter of 4" and other idiomatic times
|
||||||
|
|
||||||
|
if hour == 0:
|
||||||
|
speak += pronounce_number_sv(12)
|
||||||
|
elif hour <= 13:
|
||||||
|
if hour == 1 or hour == 13: # 01:00 and 13:00 is "ett"
|
||||||
|
speak += 'ett'
|
||||||
|
else:
|
||||||
|
speak += pronounce_number_sv(hour)
|
||||||
|
else:
|
||||||
|
speak += pronounce_number_sv(hour - 12)
|
||||||
|
|
||||||
|
if use_ampm:
|
||||||
|
if dt.hour > 11:
|
||||||
|
if dt.hour < 18:
|
||||||
|
# 12:01 - 17:59 nachmittags/afternoon
|
||||||
|
speak += " på eftermiddagen"
|
||||||
|
elif dt.hour < 22:
|
||||||
|
# 18:00 - 21:59 abends/evening
|
||||||
|
speak += " på kvällen"
|
||||||
|
else:
|
||||||
|
# 22:00 - 23:59 nachts/at night
|
||||||
|
speak += " på natten"
|
||||||
|
elif dt.hour < 3:
|
||||||
|
# 00:01 - 02:59 nachts/at night
|
||||||
|
speak += " på natten"
|
||||||
|
else:
|
||||||
|
# 03:00 - 11:59 morgens/in the morning
|
||||||
|
speak += " på morgonen"
|
||||||
|
|
||||||
|
return speak
|
||||||
|
|
||||||
|
|
||||||
|
def nice_response_sv(text):
|
||||||
|
# check for months and call _nice_ordinal_sv declension of ordinals
|
||||||
|
# replace "^" with "hoch" (to the power of)
|
||||||
|
words = text.split()
|
||||||
|
|
||||||
|
for idx, word in enumerate(words):
|
||||||
|
if word.lower() in _MONTHS_SV:
|
||||||
|
text = _nice_ordinal_sv(text)
|
||||||
|
|
||||||
|
if word == '^':
|
||||||
|
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
|
||||||
|
if wordNext.isnumeric():
|
||||||
|
words[idx] = "upphöjt till"
|
||||||
|
text = " ".join(words)
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def _nice_ordinal_sv(text, speech=True):
|
||||||
|
# check for months for declension of ordinals before months
|
||||||
|
# depending on articles/prepositions
|
||||||
|
normalized_text = text
|
||||||
|
words = text.split()
|
||||||
|
|
||||||
|
for idx, word in enumerate(words):
|
||||||
|
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
|
||||||
|
wordPrev = words[idx - 1] if idx > 0 else ""
|
||||||
|
if word[-1:] == ".":
|
||||||
|
if word[:-1].isdecimal():
|
||||||
|
if wordNext.lower() in _MONTHS_SV:
|
||||||
|
word = pronounce_ordinal_sv(int(word[:-1]))
|
||||||
|
if wordPrev.lower() in ["om", "den", "från", "till",
|
||||||
|
"(från", "(om", "till"]:
|
||||||
|
word += "n"
|
||||||
|
elif wordPrev.lower() not in ["den"]:
|
||||||
|
word += "r"
|
||||||
|
words[idx] = word
|
||||||
|
normalized_text = " ".join(words)
|
||||||
|
return normalized_text
|
||||||
1132
lingua_franca/lang/parse_ca.py
Normal file
1132
lingua_franca/lang/parse_ca.py
Normal file
File diff suppressed because it is too large
Load Diff
387
lingua_franca/lang/parse_common.py
Normal file
387
lingua_franca/lang/parse_common.py
Normal file
@@ -0,0 +1,387 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
from collections import namedtuple
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
class Normalizer:
|
||||||
|
"""
|
||||||
|
individual languages may subclass this if needed
|
||||||
|
|
||||||
|
normalize_XX should pass a valid config read from json
|
||||||
|
"""
|
||||||
|
_default_config = {}
|
||||||
|
|
||||||
|
def __init__(self, config=None):
|
||||||
|
self.config = config or self._default_config
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def tokenize(utterance):
|
||||||
|
# Split things like 12%
|
||||||
|
utterance = re.sub(r"([0-9]+)([\%])", r"\1 \2", utterance)
|
||||||
|
# Split thins like #1
|
||||||
|
utterance = re.sub(r"(\#)([0-9]+\b)", r"\1 \2", utterance)
|
||||||
|
return utterance.split()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def should_lowercase(self):
|
||||||
|
return self.config.get("lowercase", False)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def should_numbers_to_digits(self):
|
||||||
|
return self.config.get("numbers_to_digits", True)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def should_expand_contractions(self):
|
||||||
|
return self.config.get("expand_contractions", True)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def should_remove_symbols(self):
|
||||||
|
return self.config.get("remove_symbols", False)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def should_remove_accents(self):
|
||||||
|
return self.config.get("remove_accents", False)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def should_remove_articles(self):
|
||||||
|
return self.config.get("remove_articles", False)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def should_remove_stopwords(self):
|
||||||
|
return self.config.get("remove_stopwords", False)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def contractions(self):
|
||||||
|
return self.config.get("contractions", {})
|
||||||
|
|
||||||
|
@property
|
||||||
|
def word_replacements(self):
|
||||||
|
return self.config.get("word_replacements", {})
|
||||||
|
|
||||||
|
@property
|
||||||
|
def number_replacements(self):
|
||||||
|
return self.config.get("number_replacements", {})
|
||||||
|
|
||||||
|
@property
|
||||||
|
def accents(self):
|
||||||
|
return self.config.get("accents",
|
||||||
|
{"á": "a", "à": "a", "ã": "a", "â": "a",
|
||||||
|
"é": "e", "è": "e", "ê": "e", "ẽ": "e",
|
||||||
|
"í": "i", "ì": "i", "î": "i", "ĩ": "i",
|
||||||
|
"ò": "o", "ó": "o", "ô": "o", "õ": "o",
|
||||||
|
"ú": "u", "ù": "u", "û": "u", "ũ": "u",
|
||||||
|
"Á": "A", "À": "A", "Ã": "A", "Â": "A",
|
||||||
|
"É": "E", "È": "E", "Ê": "E", "Ẽ": "E",
|
||||||
|
"Í": "I", "Ì": "I", "Î": "I", "Ĩ": "I",
|
||||||
|
"Ò": "O", "Ó": "O", "Ô": "O", "Õ": "O",
|
||||||
|
"Ú": "U", "Ù": "U", "Û": "U", "Ũ": "U"
|
||||||
|
})
|
||||||
|
|
||||||
|
@property
|
||||||
|
def stopwords(self):
|
||||||
|
return self.config.get("stopwords", [])
|
||||||
|
|
||||||
|
@property
|
||||||
|
def articles(self):
|
||||||
|
return self.config.get("articles", [])
|
||||||
|
|
||||||
|
@property
|
||||||
|
def symbols(self):
|
||||||
|
return self.config.get("symbols",
|
||||||
|
[";", "_", "!", "?", "<", ">",
|
||||||
|
"|", "(", ")", "=", "[", "]", "{",
|
||||||
|
"}", "»", "«", "*", "~", "^", "`"])
|
||||||
|
|
||||||
|
def expand_contractions(self, utterance):
|
||||||
|
""" Expand common contractions, e.g. "isn't" -> "is not" """
|
||||||
|
words = self.tokenize(utterance)
|
||||||
|
for idx, w in enumerate(words):
|
||||||
|
if w in self.contractions:
|
||||||
|
words[idx] = self.contractions[w]
|
||||||
|
utterance = " ".join(words)
|
||||||
|
return utterance
|
||||||
|
|
||||||
|
def numbers_to_digits(self, utterance):
|
||||||
|
words = self.tokenize(utterance)
|
||||||
|
for idx, w in enumerate(words):
|
||||||
|
if w in self.number_replacements:
|
||||||
|
words[idx] = self.number_replacements[w]
|
||||||
|
utterance = " ".join(words)
|
||||||
|
return utterance
|
||||||
|
|
||||||
|
def remove_articles(self, utterance):
|
||||||
|
words = self.tokenize(utterance)
|
||||||
|
for idx, w in enumerate(words):
|
||||||
|
if w in self.articles:
|
||||||
|
words[idx] = ""
|
||||||
|
utterance = " ".join(words)
|
||||||
|
return utterance
|
||||||
|
|
||||||
|
def remove_stopwords(self, utterance):
|
||||||
|
words = self.tokenize(utterance)
|
||||||
|
for idx, w in enumerate(words):
|
||||||
|
if w in self.stopwords:
|
||||||
|
words[idx] = ""
|
||||||
|
# if words[-1] == '-':
|
||||||
|
# words = words[:-1]
|
||||||
|
utterance = " ".join(words)
|
||||||
|
# Remove trailing whitespaces from utterance along with orphaned
|
||||||
|
# hyphens, more characters may be added later
|
||||||
|
utterance = re.sub(r'- *$', '', utterance)
|
||||||
|
return utterance
|
||||||
|
|
||||||
|
def remove_symbols(self, utterance):
|
||||||
|
for s in self.symbols:
|
||||||
|
utterance = utterance.replace(s, " ")
|
||||||
|
return utterance
|
||||||
|
|
||||||
|
def remove_accents(self, utterance):
|
||||||
|
for s in self.accents:
|
||||||
|
utterance = utterance.replace(s, self.accents[s])
|
||||||
|
return utterance
|
||||||
|
|
||||||
|
def replace_words(self, utterance):
|
||||||
|
words = self.tokenize(utterance)
|
||||||
|
for idx, w in enumerate(words):
|
||||||
|
if w in self.word_replacements:
|
||||||
|
words[idx] = self.word_replacements[w]
|
||||||
|
utterance = " ".join(words)
|
||||||
|
return utterance
|
||||||
|
|
||||||
|
def normalize(self, utterance="", remove_articles=None):
|
||||||
|
# mutations
|
||||||
|
if self.should_lowercase:
|
||||||
|
utterance = utterance.lower()
|
||||||
|
if self.should_expand_contractions:
|
||||||
|
utterance = self.expand_contractions(utterance)
|
||||||
|
if self.should_numbers_to_digits:
|
||||||
|
utterance = self.numbers_to_digits(utterance)
|
||||||
|
utterance = self.replace_words(utterance)
|
||||||
|
|
||||||
|
# removals
|
||||||
|
if self.should_remove_symbols:
|
||||||
|
utterance = self.remove_symbols(utterance)
|
||||||
|
if self.should_remove_accents:
|
||||||
|
utterance = self.remove_accents(utterance)
|
||||||
|
# TODO deprecate remove_articles param, backwards compat
|
||||||
|
if remove_articles is not None and remove_articles:
|
||||||
|
utterance = self.remove_articles(utterance)
|
||||||
|
elif self.should_remove_articles:
|
||||||
|
utterance = self.remove_articles(utterance)
|
||||||
|
if self.should_remove_stopwords:
|
||||||
|
utterance = self.remove_stopwords(utterance)
|
||||||
|
# remove extra spaces
|
||||||
|
utterance = " ".join([w for w in utterance.split(" ") if w])
|
||||||
|
return utterance
|
||||||
|
|
||||||
|
|
||||||
|
# Token is intended to be used in the number processing functions in
|
||||||
|
# this module. The parsing requires slicing and dividing of the original
|
||||||
|
# text. To ensure things parse correctly, we need to know where text came
|
||||||
|
# from in the original input, hence this nametuple.
|
||||||
|
Token = namedtuple('Token', 'word index')
|
||||||
|
|
||||||
|
|
||||||
|
class ReplaceableNumber:
|
||||||
|
"""
|
||||||
|
Similar to Token, this class is used in number parsing.
|
||||||
|
|
||||||
|
Once we've found a number in a string, this class contains all
|
||||||
|
the info about the value, and where it came from in the original text.
|
||||||
|
In other words, it is the text, and the number that can replace it in
|
||||||
|
the string.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, value, tokens: [Token]):
|
||||||
|
self.value = value
|
||||||
|
self.tokens = tokens
|
||||||
|
|
||||||
|
def __bool__(self):
|
||||||
|
return bool(self.value is not None and self.value is not False)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def start_index(self):
|
||||||
|
return self.tokens[0].index
|
||||||
|
|
||||||
|
@property
|
||||||
|
def end_index(self):
|
||||||
|
return self.tokens[-1].index
|
||||||
|
|
||||||
|
@property
|
||||||
|
def text(self):
|
||||||
|
return ' '.join([t.word for t in self.tokens])
|
||||||
|
|
||||||
|
def __setattr__(self, key, value):
|
||||||
|
try:
|
||||||
|
getattr(self, key)
|
||||||
|
except AttributeError:
|
||||||
|
super().__setattr__(key, value)
|
||||||
|
else:
|
||||||
|
raise Exception("Immutable!")
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return "({v}, {t})".format(v=self.value, t=self.tokens)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "{n}({v}, {t})".format(n=self.__class__.__name__, v=self.value,
|
||||||
|
t=self.tokens)
|
||||||
|
|
||||||
|
|
||||||
|
def tokenize(text):
|
||||||
|
"""
|
||||||
|
Generate a list of token object, given a string.
|
||||||
|
Args:
|
||||||
|
text str: Text to tokenize.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
[Token]
|
||||||
|
|
||||||
|
"""
|
||||||
|
return [Token(word, index)
|
||||||
|
for index, word in enumerate(Normalizer.tokenize(text))]
|
||||||
|
|
||||||
|
|
||||||
|
def partition_list(items, split_on):
|
||||||
|
"""
|
||||||
|
Partition a list of items.
|
||||||
|
|
||||||
|
Works similarly to str.partition
|
||||||
|
|
||||||
|
Args:
|
||||||
|
items:
|
||||||
|
split_on callable:
|
||||||
|
Should return a boolean. Each item will be passed to
|
||||||
|
this callable in succession, and partitions will be
|
||||||
|
created any time it returns True.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
[[any]]
|
||||||
|
|
||||||
|
"""
|
||||||
|
splits = []
|
||||||
|
current_split = []
|
||||||
|
for item in items:
|
||||||
|
if split_on(item):
|
||||||
|
splits.append(current_split)
|
||||||
|
splits.append([item])
|
||||||
|
current_split = []
|
||||||
|
else:
|
||||||
|
current_split.append(item)
|
||||||
|
splits.append(current_split)
|
||||||
|
return list(filter(lambda x: len(x) != 0, splits))
|
||||||
|
|
||||||
|
|
||||||
|
def invert_dict(original):
|
||||||
|
"""
|
||||||
|
Produce a dictionary with the keys and values
|
||||||
|
inverted, relative to the dict passed in.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
original dict: The dict like object to invert
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict
|
||||||
|
|
||||||
|
"""
|
||||||
|
return {value: key for key, value in original.items()}
|
||||||
|
|
||||||
|
|
||||||
|
def is_numeric(input_str):
|
||||||
|
"""
|
||||||
|
Takes in a string and tests to see if it is a number.
|
||||||
|
Args:
|
||||||
|
text (str): string to test if a number
|
||||||
|
Returns:
|
||||||
|
(bool): True if a number, else False
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
float(input_str)
|
||||||
|
return True
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def look_for_fractions(split_list):
|
||||||
|
""""
|
||||||
|
This function takes a list made by fraction & determines if a fraction.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
split_list (list): list created by splitting on '/'
|
||||||
|
Returns:
|
||||||
|
(bool): False if not a fraction, otherwise True
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
if len(split_list) == 2:
|
||||||
|
if is_numeric(split_list[0]) and is_numeric(split_list[1]):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def extract_numbers_generic(text, pronounce_handler, extract_handler,
|
||||||
|
short_scale=True, ordinals=False):
|
||||||
|
"""
|
||||||
|
Takes in a string and extracts a list of numbers.
|
||||||
|
Language agnostic, per language parsers need to be provided
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text (str): the string to extract a number from
|
||||||
|
pronounce_handler (function): function that pronounces a number
|
||||||
|
extract_handler (function): function that extracts the last number
|
||||||
|
present in a string
|
||||||
|
short_scale (bool): Use "short scale" or "long scale" for large
|
||||||
|
numbers -- over a million. The default is short scale, which
|
||||||
|
is now common in most English speaking countries.
|
||||||
|
See https://en.wikipedia.org/wiki/Names_of_large_numbers
|
||||||
|
ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
|
||||||
|
Returns:
|
||||||
|
list: list of extracted numbers as floats
|
||||||
|
"""
|
||||||
|
numbers = []
|
||||||
|
normalized = text
|
||||||
|
extract = extract_handler(normalized, short_scale, ordinals)
|
||||||
|
to_parse = normalized
|
||||||
|
while extract:
|
||||||
|
numbers.append(extract)
|
||||||
|
prev = to_parse
|
||||||
|
num_txt = pronounce_handler(extract)
|
||||||
|
extract = str(extract)
|
||||||
|
if extract.endswith(".0"):
|
||||||
|
extract = extract[:-2]
|
||||||
|
|
||||||
|
# handle duplicate occurences, replace last one only
|
||||||
|
def replace_right(source, target, replacement, replacements=None):
|
||||||
|
return replacement.join(source.rsplit(target, replacements))
|
||||||
|
|
||||||
|
normalized = replace_right(normalized, num_txt, extract, 1)
|
||||||
|
# last biggest number was replaced, recurse to handle cases like
|
||||||
|
# test one two 3
|
||||||
|
to_parse = replace_right(to_parse, num_txt, extract, 1)
|
||||||
|
to_parse = replace_right(to_parse, extract, " ", 1)
|
||||||
|
if to_parse == prev:
|
||||||
|
# avoid infinite loops, occasionally pronounced number may be
|
||||||
|
# different from extracted text,
|
||||||
|
# ie pronounce(0.5) != half and extract(half) == 0.5
|
||||||
|
extract = False
|
||||||
|
# TODO fix this
|
||||||
|
else:
|
||||||
|
extract = extract_handler(to_parse, short_scale, ordinals)
|
||||||
|
numbers.reverse()
|
||||||
|
return numbers
|
||||||
1707
lingua_franca/lang/parse_cs.py
Normal file
1707
lingua_franca/lang/parse_cs.py
Normal file
File diff suppressed because it is too large
Load Diff
891
lingua_franca/lang/parse_da.py
Normal file
891
lingua_franca/lang/parse_da.py
Normal file
@@ -0,0 +1,891 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
from datetime import datetime
|
||||||
|
from dateutil.relativedelta import relativedelta
|
||||||
|
from lingua_franca.lang.parse_common import is_numeric, look_for_fractions, \
|
||||||
|
extract_numbers_generic, Normalizer
|
||||||
|
from lingua_franca.lang.common_data_da import _DA_NUMBERS
|
||||||
|
from lingua_franca.lang.format_da import pronounce_number_da
|
||||||
|
from lingua_franca.time import now_local
|
||||||
|
|
||||||
|
|
||||||
|
def extract_number_da(text, short_scale=True, ordinals=False):
|
||||||
|
"""
|
||||||
|
This function prepares the given text for parsing by making
|
||||||
|
numbers consistent, getting rid of contractions, etc.
|
||||||
|
Args:
|
||||||
|
text (str): the string to normalize
|
||||||
|
Returns:
|
||||||
|
(int) or (float): The value of extracted number
|
||||||
|
|
||||||
|
|
||||||
|
undefined articles cannot be suppressed in German:
|
||||||
|
'ein Pferd' means 'one horse' and 'a horse'
|
||||||
|
|
||||||
|
"""
|
||||||
|
# TODO: short_scale and ordinals don't do anything here.
|
||||||
|
# The parameters are present in the function signature for API compatibility
|
||||||
|
# reasons.
|
||||||
|
|
||||||
|
text = text.lower()
|
||||||
|
aWords = text.split()
|
||||||
|
aWords = [word for word in aWords if
|
||||||
|
word not in ["den", "det"]]
|
||||||
|
and_pass = False
|
||||||
|
valPreAnd = False
|
||||||
|
val = False
|
||||||
|
count = 0
|
||||||
|
while count < len(aWords):
|
||||||
|
word = aWords[count]
|
||||||
|
if is_numeric(word):
|
||||||
|
if word.isdigit(): # doesn't work with decimals
|
||||||
|
val = float(word)
|
||||||
|
elif is_fractional_da(word):
|
||||||
|
val = is_fractional_da(word)
|
||||||
|
elif is_ordinal_da(word):
|
||||||
|
val = is_ordinal_da(word)
|
||||||
|
else:
|
||||||
|
if word in _DA_NUMBERS:
|
||||||
|
val = _DA_NUMBERS[word]
|
||||||
|
if count < (len(aWords) - 1):
|
||||||
|
wordNext = aWords[count + 1]
|
||||||
|
else:
|
||||||
|
wordNext = ""
|
||||||
|
valNext = is_fractional_da(wordNext)
|
||||||
|
|
||||||
|
if valNext:
|
||||||
|
val = val * valNext
|
||||||
|
aWords[count + 1] = ""
|
||||||
|
|
||||||
|
if not val:
|
||||||
|
# look for fractions like "2/3"
|
||||||
|
aPieces = word.split('/')
|
||||||
|
# if (len(aPieces) == 2 and is_numeric(aPieces[0])
|
||||||
|
# and is_numeric(aPieces[1])):
|
||||||
|
if look_for_fractions(aPieces):
|
||||||
|
val = float(aPieces[0]) / float(aPieces[1])
|
||||||
|
elif and_pass:
|
||||||
|
# added to value, quit here
|
||||||
|
val = valPreAnd
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
count += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
aWords[count] = ""
|
||||||
|
|
||||||
|
if and_pass:
|
||||||
|
aWords[count - 1] = '' # remove "og"
|
||||||
|
val += valPreAnd
|
||||||
|
elif count + 1 < len(aWords) and aWords[count + 1] == 'og':
|
||||||
|
and_pass = True
|
||||||
|
valPreAnd = val
|
||||||
|
val = False
|
||||||
|
count += 2
|
||||||
|
continue
|
||||||
|
elif count + 2 < len(aWords) and aWords[count + 2] == 'og':
|
||||||
|
and_pass = True
|
||||||
|
valPreAnd = val
|
||||||
|
val = False
|
||||||
|
count += 3
|
||||||
|
continue
|
||||||
|
|
||||||
|
break
|
||||||
|
|
||||||
|
return val or False
|
||||||
|
|
||||||
|
|
||||||
|
def extract_datetime_da(text, anchorDate=None, default_time=None):
|
||||||
|
def clean_string(s):
|
||||||
|
"""
|
||||||
|
cleans the input string of unneeded punctuation
|
||||||
|
and capitalization among other things.
|
||||||
|
|
||||||
|
'am' is a preposition, so cannot currently be used
|
||||||
|
for 12 hour date format
|
||||||
|
"""
|
||||||
|
|
||||||
|
s = s.lower().replace('?', '').replace('.', '').replace(',', '') \
|
||||||
|
.replace(' den ', ' ').replace(' det ', ' ').replace(' om ',
|
||||||
|
' ').replace(
|
||||||
|
' om ', ' ') \
|
||||||
|
.replace(' på ', ' ').replace(' om ', ' ')
|
||||||
|
wordList = s.split()
|
||||||
|
|
||||||
|
for idx, word in enumerate(wordList):
|
||||||
|
if is_ordinal_da(word) is not False:
|
||||||
|
word = str(is_ordinal_da(word))
|
||||||
|
wordList[idx] = word
|
||||||
|
|
||||||
|
return wordList
|
||||||
|
|
||||||
|
def date_found():
|
||||||
|
return found or \
|
||||||
|
(
|
||||||
|
datestr != "" or timeStr != "" or
|
||||||
|
yearOffset != 0 or monthOffset != 0 or
|
||||||
|
dayOffset is True or hrOffset != 0 or
|
||||||
|
hrAbs or minOffset != 0 or
|
||||||
|
minAbs or secOffset != 0
|
||||||
|
)
|
||||||
|
|
||||||
|
if text == "":
|
||||||
|
return None
|
||||||
|
|
||||||
|
anchorDate = anchorDate or now_local()
|
||||||
|
found = False
|
||||||
|
daySpecified = False
|
||||||
|
dayOffset = False
|
||||||
|
monthOffset = 0
|
||||||
|
yearOffset = 0
|
||||||
|
dateNow = anchorDate
|
||||||
|
today = dateNow.strftime("%w")
|
||||||
|
currentYear = dateNow.strftime("%Y")
|
||||||
|
fromFlag = False
|
||||||
|
datestr = ""
|
||||||
|
hasYear = False
|
||||||
|
timeQualifier = ""
|
||||||
|
|
||||||
|
timeQualifiersList = ['tidlig',
|
||||||
|
'morgen',
|
||||||
|
'morgenen',
|
||||||
|
'formidag',
|
||||||
|
'formiddagen',
|
||||||
|
'eftermiddag',
|
||||||
|
'eftermiddagen',
|
||||||
|
'aften',
|
||||||
|
'aftenen',
|
||||||
|
'nat',
|
||||||
|
'natten']
|
||||||
|
markers = ['i', 'om', 'på', 'klokken', 'ved']
|
||||||
|
days = ['mandag', 'tirsdag', 'onsdag',
|
||||||
|
'torsdag', 'fredag', 'lørdag', 'søndag']
|
||||||
|
months = ['januar', 'februar', 'marts', 'april', 'maj', 'juni',
|
||||||
|
'juli', 'august', 'september', 'oktober', 'november',
|
||||||
|
'desember']
|
||||||
|
monthsShort = ['jan', 'feb', 'mar', 'apr', 'maj', 'juni', 'juli', 'aug',
|
||||||
|
'sep', 'okt', 'nov', 'des']
|
||||||
|
|
||||||
|
validFollowups = days + months + monthsShort
|
||||||
|
validFollowups.append("i dag")
|
||||||
|
validFollowups.append("morgen")
|
||||||
|
validFollowups.append("næste")
|
||||||
|
validFollowups.append("forige")
|
||||||
|
validFollowups.append("nu")
|
||||||
|
|
||||||
|
words = clean_string(text)
|
||||||
|
|
||||||
|
for idx, word in enumerate(words):
|
||||||
|
if word == "":
|
||||||
|
continue
|
||||||
|
wordPrevPrev = words[idx - 2] if idx > 1 else ""
|
||||||
|
wordPrev = words[idx - 1] if idx > 0 else ""
|
||||||
|
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
|
||||||
|
wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
|
||||||
|
|
||||||
|
start = idx
|
||||||
|
used = 0
|
||||||
|
# save timequalifier for later
|
||||||
|
if word in timeQualifiersList:
|
||||||
|
timeQualifier = word
|
||||||
|
# parse today, tomorrow, day after tomorrow
|
||||||
|
elif word == "dag" and not fromFlag:
|
||||||
|
dayOffset = 0
|
||||||
|
used += 1
|
||||||
|
elif word == "morgen" and not fromFlag and wordPrev != "om" and \
|
||||||
|
wordPrev not in days: # morgen means tomorrow if not "am
|
||||||
|
# Morgen" and not [day of the week] morgen
|
||||||
|
dayOffset = 1
|
||||||
|
used += 1
|
||||||
|
elif word == "overmorgen" and not fromFlag:
|
||||||
|
dayOffset = 2
|
||||||
|
used += 1
|
||||||
|
# parse 5 days, 10 weeks, last week, next week
|
||||||
|
elif word == "dag" or word == "dage":
|
||||||
|
if wordPrev[0].isdigit():
|
||||||
|
dayOffset += int(wordPrev)
|
||||||
|
start -= 1
|
||||||
|
used = 2
|
||||||
|
elif word == "uge" or word == "uger" and not fromFlag:
|
||||||
|
if wordPrev[0].isdigit():
|
||||||
|
dayOffset += int(wordPrev) * 7
|
||||||
|
start -= 1
|
||||||
|
used = 2
|
||||||
|
elif wordPrev[:6] == "næste":
|
||||||
|
dayOffset = 7
|
||||||
|
start -= 1
|
||||||
|
used = 2
|
||||||
|
elif wordPrev[:5] == "forige":
|
||||||
|
dayOffset = -7
|
||||||
|
start -= 1
|
||||||
|
used = 2
|
||||||
|
# parse 10 months, next month, last month
|
||||||
|
elif word == "måned" and not fromFlag:
|
||||||
|
if wordPrev[0].isdigit():
|
||||||
|
monthOffset = int(wordPrev)
|
||||||
|
start -= 1
|
||||||
|
used = 2
|
||||||
|
elif wordPrev[:6] == "næste":
|
||||||
|
monthOffset = 1
|
||||||
|
start -= 1
|
||||||
|
used = 2
|
||||||
|
elif wordPrev[:5] == "forige":
|
||||||
|
monthOffset = -1
|
||||||
|
start -= 1
|
||||||
|
used = 2
|
||||||
|
# parse 5 years, next year, last year
|
||||||
|
elif word == "år" and not fromFlag:
|
||||||
|
if wordPrev[0].isdigit():
|
||||||
|
yearOffset = int(wordPrev)
|
||||||
|
start -= 1
|
||||||
|
used = 2
|
||||||
|
elif wordPrev[:6] == " næste":
|
||||||
|
yearOffset = 1
|
||||||
|
start -= 1
|
||||||
|
used = 2
|
||||||
|
elif wordPrev[:6] == "næste":
|
||||||
|
yearOffset = -1
|
||||||
|
start -= 1
|
||||||
|
used = 2
|
||||||
|
# parse Monday, Tuesday, etc., and next Monday,
|
||||||
|
# last Tuesday, etc.
|
||||||
|
elif word in days and not fromFlag:
|
||||||
|
d = days.index(word)
|
||||||
|
dayOffset = (d + 1) - int(today)
|
||||||
|
used = 1
|
||||||
|
if dayOffset < 0:
|
||||||
|
dayOffset += 7
|
||||||
|
if wordNext == "morgen":
|
||||||
|
# morgen means morning if preceded by
|
||||||
|
# the day of the week
|
||||||
|
words[idx + 1] = "tidlig"
|
||||||
|
if wordPrev[:6] == "næste":
|
||||||
|
dayOffset += 7
|
||||||
|
used += 1
|
||||||
|
start -= 1
|
||||||
|
elif wordPrev[:5] == "forige":
|
||||||
|
dayOffset -= 7
|
||||||
|
used += 1
|
||||||
|
start -= 1
|
||||||
|
# parse 15 of July, June 20th, Feb 18, 19 of February
|
||||||
|
elif word in months or word in monthsShort and not fromFlag:
|
||||||
|
try:
|
||||||
|
m = months.index(word)
|
||||||
|
except ValueError:
|
||||||
|
m = monthsShort.index(word)
|
||||||
|
used += 1
|
||||||
|
datestr = months[m]
|
||||||
|
if wordPrev and (wordPrev[0].isdigit() or
|
||||||
|
(wordPrev == "of" and wordPrevPrev[0].isdigit())):
|
||||||
|
if wordPrev == "of" and wordPrevPrev[0].isdigit():
|
||||||
|
datestr += " " + words[idx - 2]
|
||||||
|
used += 1
|
||||||
|
start -= 1
|
||||||
|
else:
|
||||||
|
datestr += " " + wordPrev
|
||||||
|
start -= 1
|
||||||
|
used += 1
|
||||||
|
if wordNext and wordNext[0].isdigit():
|
||||||
|
datestr += " " + wordNext
|
||||||
|
used += 1
|
||||||
|
hasYear = True
|
||||||
|
else:
|
||||||
|
hasYear = False
|
||||||
|
|
||||||
|
elif wordNext and wordNext[0].isdigit():
|
||||||
|
datestr += " " + wordNext
|
||||||
|
used += 1
|
||||||
|
if wordNextNext and wordNextNext[0].isdigit():
|
||||||
|
datestr += " " + wordNextNext
|
||||||
|
used += 1
|
||||||
|
hasYear = True
|
||||||
|
else:
|
||||||
|
hasYear = False
|
||||||
|
# parse 5 days from tomorrow, 10 weeks from next thursday,
|
||||||
|
# 2 months from July
|
||||||
|
|
||||||
|
if (
|
||||||
|
word == "fra" or word == "til" or word == "om") and wordNext \
|
||||||
|
in validFollowups:
|
||||||
|
used = 2
|
||||||
|
fromFlag = True
|
||||||
|
if wordNext == "morgenen" and \
|
||||||
|
wordPrev != "om" and \
|
||||||
|
wordPrev not in days:
|
||||||
|
# morgen means tomorrow if not "am Morgen" and not
|
||||||
|
# [day of the week] morgen:
|
||||||
|
dayOffset += 1
|
||||||
|
elif wordNext in days:
|
||||||
|
d = days.index(wordNext)
|
||||||
|
tmpOffset = (d + 1) - int(today)
|
||||||
|
used = 2
|
||||||
|
if tmpOffset < 0:
|
||||||
|
tmpOffset += 7
|
||||||
|
dayOffset += tmpOffset
|
||||||
|
elif wordNextNext and wordNextNext in days:
|
||||||
|
d = days.index(wordNextNext)
|
||||||
|
tmpOffset = (d + 1) - int(today)
|
||||||
|
used = 3
|
||||||
|
if wordNext[:6] == "næste":
|
||||||
|
tmpOffset += 7
|
||||||
|
used += 1
|
||||||
|
start -= 1
|
||||||
|
elif wordNext[:5] == "forige":
|
||||||
|
tmpOffset -= 7
|
||||||
|
used += 1
|
||||||
|
start -= 1
|
||||||
|
dayOffset += tmpOffset
|
||||||
|
if used > 0:
|
||||||
|
if start - 1 > 0 and words[start - 1].startswith("denne"):
|
||||||
|
start -= 1
|
||||||
|
used += 1
|
||||||
|
|
||||||
|
for i in range(0, used):
|
||||||
|
words[i + start] = ""
|
||||||
|
|
||||||
|
if start - 1 >= 0 and words[start - 1] in markers:
|
||||||
|
words[start - 1] = ""
|
||||||
|
found = True
|
||||||
|
daySpecified = True
|
||||||
|
|
||||||
|
# parse time
|
||||||
|
timeStr = ""
|
||||||
|
hrOffset = 0
|
||||||
|
minOffset = 0
|
||||||
|
secOffset = 0
|
||||||
|
hrAbs = None
|
||||||
|
minAbs = None
|
||||||
|
|
||||||
|
for idx, word in enumerate(words):
|
||||||
|
if word == "":
|
||||||
|
continue
|
||||||
|
|
||||||
|
wordPrevPrev = words[idx - 2] if idx > 1 else ""
|
||||||
|
wordPrev = words[idx - 1] if idx > 0 else ""
|
||||||
|
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
|
||||||
|
wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
|
||||||
|
wordNextNextNext = words[idx + 3] if idx + 3 < len(words) else ""
|
||||||
|
wordNextNextNextNext = words[idx + 4] if idx + 4 < len(words) else ""
|
||||||
|
|
||||||
|
# parse noon, midnight, morning, afternoon, evening
|
||||||
|
used = 0
|
||||||
|
if word[:6] == "middag":
|
||||||
|
hrAbs = 12
|
||||||
|
used += 1
|
||||||
|
elif word[:11] == "midnat":
|
||||||
|
hrAbs = 0
|
||||||
|
used += 1
|
||||||
|
elif word == "morgenen" or (
|
||||||
|
wordPrev == "om" and word == "morgenen") or word == "tidlig":
|
||||||
|
if not hrAbs:
|
||||||
|
hrAbs = 8
|
||||||
|
used += 1
|
||||||
|
elif word[:11] == "eftermiddag":
|
||||||
|
if not hrAbs:
|
||||||
|
hrAbs = 15
|
||||||
|
used += 1
|
||||||
|
elif word[:5] == "aften":
|
||||||
|
if not hrAbs:
|
||||||
|
hrAbs = 19
|
||||||
|
used += 1
|
||||||
|
# parse half an hour, quarter hour
|
||||||
|
elif word == "time" and \
|
||||||
|
(wordPrev in markers or wordPrevPrev in markers):
|
||||||
|
if wordPrev[:4] == "halv":
|
||||||
|
minOffset = 30
|
||||||
|
elif wordPrev == "kvarter":
|
||||||
|
minOffset = 15
|
||||||
|
elif wordPrev == "trekvarter":
|
||||||
|
minOffset = 45
|
||||||
|
else:
|
||||||
|
hrOffset = 1
|
||||||
|
if wordPrevPrev in markers:
|
||||||
|
words[idx - 2] = ""
|
||||||
|
words[idx - 1] = ""
|
||||||
|
used += 1
|
||||||
|
hrAbs = -1
|
||||||
|
minAbs = -1
|
||||||
|
# parse 5:00 am, 12:00 p.m., etc
|
||||||
|
elif word[0].isdigit():
|
||||||
|
isTime = True
|
||||||
|
strHH = ""
|
||||||
|
strMM = ""
|
||||||
|
remainder = ""
|
||||||
|
if ':' in word:
|
||||||
|
# parse colons
|
||||||
|
# "3:00 in the morning"
|
||||||
|
stage = 0
|
||||||
|
length = len(word)
|
||||||
|
for i in range(length):
|
||||||
|
if stage == 0:
|
||||||
|
if word[i].isdigit():
|
||||||
|
strHH += word[i]
|
||||||
|
elif word[i] == ":":
|
||||||
|
stage = 1
|
||||||
|
else:
|
||||||
|
stage = 2
|
||||||
|
i -= 1
|
||||||
|
elif stage == 1:
|
||||||
|
if word[i].isdigit():
|
||||||
|
strMM += word[i]
|
||||||
|
else:
|
||||||
|
stage = 2
|
||||||
|
i -= 1
|
||||||
|
elif stage == 2:
|
||||||
|
remainder = word[i:].replace(".", "")
|
||||||
|
break
|
||||||
|
if remainder == "":
|
||||||
|
nextWord = wordNext.replace(".", "")
|
||||||
|
if nextWord == "am" or nextWord == "pm":
|
||||||
|
remainder = nextWord
|
||||||
|
used += 1
|
||||||
|
elif nextWord == "aften":
|
||||||
|
remainder = "pm"
|
||||||
|
used += 1
|
||||||
|
elif wordNext == "om" and wordNextNext == "morgenen":
|
||||||
|
remainder = "am"
|
||||||
|
used += 2
|
||||||
|
elif wordNext == "om" and wordNextNext == "eftermiddagen":
|
||||||
|
remainder = "pm"
|
||||||
|
used += 2
|
||||||
|
elif wordNext == "om" and wordNextNext == "aftenen":
|
||||||
|
remainder = "pm"
|
||||||
|
used += 2
|
||||||
|
elif wordNext == "morgen":
|
||||||
|
remainder = "am"
|
||||||
|
used += 1
|
||||||
|
elif wordNext == "eftermiddag":
|
||||||
|
remainder = "pm"
|
||||||
|
used += 1
|
||||||
|
elif wordNext == "aften":
|
||||||
|
remainder = "pm"
|
||||||
|
used += 1
|
||||||
|
elif wordNext == "i" and wordNextNext == "morgen":
|
||||||
|
remainder = "am"
|
||||||
|
used = 2
|
||||||
|
elif wordNext == "i" and wordNextNext == "eftermiddag":
|
||||||
|
remainder = "pm"
|
||||||
|
used = 2
|
||||||
|
elif wordNext == "i" and wordNextNext == "aften":
|
||||||
|
remainder = "pm"
|
||||||
|
used = 2
|
||||||
|
elif wordNext == "natten":
|
||||||
|
if strHH > 4:
|
||||||
|
remainder = "pm"
|
||||||
|
else:
|
||||||
|
remainder = "am"
|
||||||
|
used += 1
|
||||||
|
else:
|
||||||
|
if timeQualifier != "":
|
||||||
|
if strHH <= 12 and \
|
||||||
|
(timeQualifier == "aftenen" or
|
||||||
|
timeQualifier == "eftermiddagen"):
|
||||||
|
strHH += 12 # what happens when strHH is 24?
|
||||||
|
else:
|
||||||
|
# try to parse # s without colons
|
||||||
|
# 5 hours, 10 minutes etc.
|
||||||
|
length = len(word)
|
||||||
|
strNum = ""
|
||||||
|
remainder = ""
|
||||||
|
for i in range(length):
|
||||||
|
if word[i].isdigit():
|
||||||
|
strNum += word[i]
|
||||||
|
else:
|
||||||
|
remainder += word[i]
|
||||||
|
|
||||||
|
if remainder == "":
|
||||||
|
remainder = wordNext.replace(".", "").lstrip().rstrip()
|
||||||
|
|
||||||
|
if (
|
||||||
|
remainder == "pm" or
|
||||||
|
wordNext == "pm" or
|
||||||
|
remainder == "p.m." or
|
||||||
|
wordNext == "p.m."):
|
||||||
|
strHH = strNum
|
||||||
|
remainder = "pm"
|
||||||
|
used = 1
|
||||||
|
elif (
|
||||||
|
remainder == "am" or
|
||||||
|
wordNext == "am" or
|
||||||
|
remainder == "a.m." or
|
||||||
|
wordNext == "a.m."):
|
||||||
|
strHH = strNum
|
||||||
|
remainder = "am"
|
||||||
|
used = 1
|
||||||
|
else:
|
||||||
|
if wordNext == "time" and int(word) < 100:
|
||||||
|
# "in 3 hours"
|
||||||
|
hrOffset = int(word)
|
||||||
|
used = 2
|
||||||
|
isTime = False
|
||||||
|
hrAbs = -1
|
||||||
|
minAbs = -1
|
||||||
|
elif wordNext == "minut":
|
||||||
|
# "in 10 minutes"
|
||||||
|
minOffset = int(word)
|
||||||
|
used = 2
|
||||||
|
isTime = False
|
||||||
|
hrAbs = -1
|
||||||
|
minAbs = -1
|
||||||
|
elif wordNext == "sekund":
|
||||||
|
# in 5 seconds
|
||||||
|
secOffset = int(word)
|
||||||
|
used = 2
|
||||||
|
isTime = False
|
||||||
|
hrAbs = -1
|
||||||
|
minAbs = -1
|
||||||
|
|
||||||
|
elif wordNext == "time":
|
||||||
|
strHH = word
|
||||||
|
used += 1
|
||||||
|
isTime = True
|
||||||
|
if wordNextNext == timeQualifier:
|
||||||
|
strMM = ""
|
||||||
|
if wordNextNext[:11] == "eftermiddag":
|
||||||
|
used += 1
|
||||||
|
remainder = "pm"
|
||||||
|
elif wordNextNext == "om" and wordNextNextNext == \
|
||||||
|
"eftermiddagen":
|
||||||
|
used += 2
|
||||||
|
remainder = "pm"
|
||||||
|
elif wordNextNext[:5] == "aften":
|
||||||
|
used += 1
|
||||||
|
remainder = "pm"
|
||||||
|
elif wordNextNext == "om" and wordNextNextNext == \
|
||||||
|
"aftenen":
|
||||||
|
used += 2
|
||||||
|
remainder = "pm"
|
||||||
|
elif wordNextNext[:6] == "morgen":
|
||||||
|
used += 1
|
||||||
|
remainder = "am"
|
||||||
|
elif wordNextNext == "om" and wordNextNextNext == \
|
||||||
|
"morgenen":
|
||||||
|
used += 2
|
||||||
|
remainder = "am"
|
||||||
|
elif wordNextNext == "natten":
|
||||||
|
used += 1
|
||||||
|
if 8 <= int(word) <= 12:
|
||||||
|
remainder = "pm"
|
||||||
|
else:
|
||||||
|
remainder = "am"
|
||||||
|
|
||||||
|
elif is_numeric(wordNextNext):
|
||||||
|
strMM = wordNextNext
|
||||||
|
used += 1
|
||||||
|
if wordNextNextNext == timeQualifier:
|
||||||
|
if wordNextNextNext[:11] == "eftermiddag":
|
||||||
|
used += 1
|
||||||
|
remainder = "pm"
|
||||||
|
elif wordNextNextNext == "om" and \
|
||||||
|
wordNextNextNextNext == \
|
||||||
|
"eftermiddagen":
|
||||||
|
used += 2
|
||||||
|
remainder = "pm"
|
||||||
|
elif wordNextNextNext[:6] == "natten":
|
||||||
|
used += 1
|
||||||
|
remainder = "pm"
|
||||||
|
elif wordNextNextNext == "am" and \
|
||||||
|
wordNextNextNextNext == "natten":
|
||||||
|
used += 2
|
||||||
|
remainder = "pm"
|
||||||
|
elif wordNextNextNext[:7] == "morgenen":
|
||||||
|
used += 1
|
||||||
|
remainder = "am"
|
||||||
|
elif wordNextNextNext == "om" and \
|
||||||
|
wordNextNextNextNext == "morgenen":
|
||||||
|
used += 2
|
||||||
|
remainder = "am"
|
||||||
|
elif wordNextNextNext == "natten":
|
||||||
|
used += 1
|
||||||
|
if 8 <= int(word) <= 12:
|
||||||
|
remainder = "pm"
|
||||||
|
else:
|
||||||
|
remainder = "am"
|
||||||
|
|
||||||
|
elif wordNext == timeQualifier:
|
||||||
|
strHH = word
|
||||||
|
strMM = 00
|
||||||
|
isTime = True
|
||||||
|
if wordNext[:10] == "eftermidag":
|
||||||
|
used += 1
|
||||||
|
remainder = "pm"
|
||||||
|
elif wordNext == "om" and \
|
||||||
|
wordNextNext == "eftermiddanen":
|
||||||
|
used += 2
|
||||||
|
remainder = "pm"
|
||||||
|
elif wordNext[:7] == "aftenen":
|
||||||
|
used += 1
|
||||||
|
remainder = "pm"
|
||||||
|
elif wordNext == "om" and wordNextNext == "aftenen":
|
||||||
|
used += 2
|
||||||
|
remainder = "pm"
|
||||||
|
elif wordNext[:7] == "morgenen":
|
||||||
|
used += 1
|
||||||
|
remainder = "am"
|
||||||
|
elif wordNext == "ao" and wordNextNext == "morgenen":
|
||||||
|
used += 2
|
||||||
|
remainder = "am"
|
||||||
|
elif wordNext == "natten":
|
||||||
|
used += 1
|
||||||
|
if 8 <= int(word) <= 12:
|
||||||
|
remainder = "pm"
|
||||||
|
else:
|
||||||
|
remainder = "am"
|
||||||
|
|
||||||
|
# if timeQualifier != "":
|
||||||
|
# military = True
|
||||||
|
# else:
|
||||||
|
# isTime = False
|
||||||
|
|
||||||
|
strHH = int(strHH) if strHH else 0
|
||||||
|
strMM = int(strMM) if strMM else 0
|
||||||
|
strHH = strHH + 12 if remainder == "pm" and strHH < 12 else strHH
|
||||||
|
strHH = strHH - 12 if remainder == "am" and strHH >= 12 else strHH
|
||||||
|
if strHH > 24 or strMM > 59:
|
||||||
|
isTime = False
|
||||||
|
used = 0
|
||||||
|
if isTime:
|
||||||
|
hrAbs = strHH * 1
|
||||||
|
minAbs = strMM * 1
|
||||||
|
used += 1
|
||||||
|
if used > 0:
|
||||||
|
# removed parsed words from the sentence
|
||||||
|
for i in range(used):
|
||||||
|
words[idx + i] = ""
|
||||||
|
|
||||||
|
if wordPrev == "tidlig":
|
||||||
|
hrOffset = -1
|
||||||
|
words[idx - 1] = ""
|
||||||
|
idx -= 1
|
||||||
|
elif wordPrev == "sen":
|
||||||
|
hrOffset = 1
|
||||||
|
words[idx - 1] = ""
|
||||||
|
idx -= 1
|
||||||
|
if idx > 0 and wordPrev in markers:
|
||||||
|
words[idx - 1] = ""
|
||||||
|
if idx > 1 and wordPrevPrev in markers:
|
||||||
|
words[idx - 2] = ""
|
||||||
|
|
||||||
|
idx += used - 1
|
||||||
|
found = True
|
||||||
|
|
||||||
|
# check that we found a date
|
||||||
|
if not date_found():
|
||||||
|
return None
|
||||||
|
|
||||||
|
if dayOffset is False:
|
||||||
|
dayOffset = 0
|
||||||
|
|
||||||
|
# perform date manipulation
|
||||||
|
|
||||||
|
extractedDate = dateNow
|
||||||
|
extractedDate = extractedDate.replace(microsecond=0,
|
||||||
|
second=0,
|
||||||
|
minute=0,
|
||||||
|
hour=0)
|
||||||
|
if datestr != "":
|
||||||
|
en_months = ['january', 'february', 'march', 'april', 'may', 'june',
|
||||||
|
'july', 'august', 'september', 'october', 'november',
|
||||||
|
'december']
|
||||||
|
en_monthsShort = ['jan', 'feb', 'mar', 'apr', 'may', 'june', 'july',
|
||||||
|
'aug',
|
||||||
|
'sept', 'oct', 'nov', 'dec']
|
||||||
|
for idx, en_month in enumerate(en_months):
|
||||||
|
datestr = datestr.replace(months[idx], en_month)
|
||||||
|
for idx, en_month in enumerate(en_monthsShort):
|
||||||
|
datestr = datestr.replace(monthsShort[idx], en_month)
|
||||||
|
|
||||||
|
temp = datetime.strptime(datestr, "%B %d")
|
||||||
|
if extractedDate.tzinfo:
|
||||||
|
temp = temp.replace(tzinfo=extractedDate.tzinfo)
|
||||||
|
|
||||||
|
if not hasYear:
|
||||||
|
temp = temp.replace(year=extractedDate.year)
|
||||||
|
if extractedDate < temp:
|
||||||
|
extractedDate = extractedDate.replace(year=int(currentYear),
|
||||||
|
month=int(
|
||||||
|
temp.strftime(
|
||||||
|
"%m")),
|
||||||
|
day=int(temp.strftime(
|
||||||
|
"%d")))
|
||||||
|
else:
|
||||||
|
extractedDate = extractedDate.replace(
|
||||||
|
year=int(currentYear) + 1,
|
||||||
|
month=int(temp.strftime("%m")),
|
||||||
|
day=int(temp.strftime("%d")))
|
||||||
|
else:
|
||||||
|
extractedDate = extractedDate.replace(
|
||||||
|
year=int(temp.strftime("%Y")),
|
||||||
|
month=int(temp.strftime("%m")),
|
||||||
|
day=int(temp.strftime("%d")))
|
||||||
|
|
||||||
|
if timeStr != "":
|
||||||
|
temp = datetime(timeStr)
|
||||||
|
extractedDate = extractedDate.replace(hour=temp.strftime("%H"),
|
||||||
|
minute=temp.strftime("%M"),
|
||||||
|
second=temp.strftime("%S"))
|
||||||
|
|
||||||
|
if yearOffset != 0:
|
||||||
|
extractedDate = extractedDate + relativedelta(years=yearOffset)
|
||||||
|
if monthOffset != 0:
|
||||||
|
extractedDate = extractedDate + relativedelta(months=monthOffset)
|
||||||
|
if dayOffset != 0:
|
||||||
|
extractedDate = extractedDate + relativedelta(days=dayOffset)
|
||||||
|
|
||||||
|
if hrAbs is None and minAbs is None and default_time:
|
||||||
|
hrAbs = default_time.hour
|
||||||
|
minAbs = default_time.minute
|
||||||
|
|
||||||
|
if hrAbs != -1 and minAbs != -1:
|
||||||
|
|
||||||
|
extractedDate = extractedDate + relativedelta(hours=hrAbs or 0,
|
||||||
|
minutes=minAbs or 0)
|
||||||
|
if (hrAbs or minAbs) and datestr == "":
|
||||||
|
if not daySpecified and dateNow > extractedDate:
|
||||||
|
extractedDate = extractedDate + relativedelta(days=1)
|
||||||
|
if hrOffset != 0:
|
||||||
|
extractedDate = extractedDate + relativedelta(hours=hrOffset)
|
||||||
|
if minOffset != 0:
|
||||||
|
extractedDate = extractedDate + relativedelta(minutes=minOffset)
|
||||||
|
if secOffset != 0:
|
||||||
|
extractedDate = extractedDate + relativedelta(seconds=secOffset)
|
||||||
|
for idx, word in enumerate(words):
|
||||||
|
if words[idx] == "og" and words[idx - 1] == "" \
|
||||||
|
and words[idx + 1] == "":
|
||||||
|
words[idx] = ""
|
||||||
|
|
||||||
|
resultStr = " ".join(words)
|
||||||
|
resultStr = ' '.join(resultStr.split())
|
||||||
|
|
||||||
|
return [extractedDate, resultStr]
|
||||||
|
|
||||||
|
|
||||||
|
def is_fractional_da(input_str, short_scale=True):
|
||||||
|
"""
|
||||||
|
This function takes the given text and checks if it is a fraction.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_str (str): the string to check if fractional
|
||||||
|
Returns:
|
||||||
|
(bool) or (float): False if not a fraction, otherwise the fraction
|
||||||
|
|
||||||
|
"""
|
||||||
|
if input_str.lower().startswith("halv"):
|
||||||
|
return 0.5
|
||||||
|
|
||||||
|
if input_str.lower() == "trediedel":
|
||||||
|
return 1.0 / 3
|
||||||
|
elif input_str.endswith('del'):
|
||||||
|
input_str = input_str[:len(input_str) - 3] # e.g. "fünftel"
|
||||||
|
if input_str.lower() in _DA_NUMBERS:
|
||||||
|
return 1.0 / (_DA_NUMBERS[input_str.lower()])
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def is_ordinal_da(input_str):
|
||||||
|
"""
|
||||||
|
This function takes the given text and checks if it is an ordinal number.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_str (str): the string to check if ordinal
|
||||||
|
Returns:
|
||||||
|
(bool) or (float): False if not an ordinal, otherwise the number
|
||||||
|
corresponding to the ordinal
|
||||||
|
|
||||||
|
ordinals for 1, 3, 7 and 8 are irregular
|
||||||
|
|
||||||
|
only works for ordinals corresponding to the numbers in _DA_NUMBERS
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
lowerstr = input_str.lower()
|
||||||
|
|
||||||
|
if lowerstr.startswith("første"):
|
||||||
|
return 1
|
||||||
|
if lowerstr.startswith("anden"):
|
||||||
|
return 2
|
||||||
|
if lowerstr.startswith("tredie"):
|
||||||
|
return 3
|
||||||
|
if lowerstr.startswith("fjerde"):
|
||||||
|
return 4
|
||||||
|
if lowerstr.startswith("femte"):
|
||||||
|
return 5
|
||||||
|
if lowerstr.startswith("sjette"):
|
||||||
|
return 6
|
||||||
|
if lowerstr.startswith("elfte"):
|
||||||
|
return 1
|
||||||
|
if lowerstr.startswith("tolvfte"):
|
||||||
|
return 12
|
||||||
|
|
||||||
|
if lowerstr[-3:] == "nde":
|
||||||
|
# from 20 suffix is -ste*
|
||||||
|
lowerstr = lowerstr[:-3]
|
||||||
|
if lowerstr in _DA_NUMBERS:
|
||||||
|
return _DA_NUMBERS[lowerstr]
|
||||||
|
|
||||||
|
if lowerstr[-4:] in ["ende"]:
|
||||||
|
lowerstr = lowerstr[:-4]
|
||||||
|
if lowerstr in _DA_NUMBERS:
|
||||||
|
return _DA_NUMBERS[lowerstr]
|
||||||
|
|
||||||
|
if lowerstr[-2:] == "te": # below 20 suffix is -te*
|
||||||
|
lowerstr = lowerstr[:-2]
|
||||||
|
if lowerstr in _DA_NUMBERS:
|
||||||
|
return _DA_NUMBERS[lowerstr]
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_da(text, remove_articles=True):
|
||||||
|
""" German string normalization """
|
||||||
|
|
||||||
|
words = text.split() # this also removed extra spaces
|
||||||
|
normalized = ""
|
||||||
|
for word in words:
|
||||||
|
if remove_articles and word in ["den", "det"]:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Convert numbers into digits, e.g. "two" -> "2"
|
||||||
|
|
||||||
|
if word in _DA_NUMBERS:
|
||||||
|
word = str(_DA_NUMBERS[word])
|
||||||
|
|
||||||
|
normalized += " " + word
|
||||||
|
|
||||||
|
return normalized[1:] # strip the initial space
|
||||||
|
|
||||||
|
|
||||||
|
def extract_numbers_da(text, short_scale=True, ordinals=False):
|
||||||
|
"""
|
||||||
|
Takes in a string and extracts a list of numbers.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text (str): the string to extract a number from
|
||||||
|
short_scale (bool): Use "short scale" or "long scale" for large
|
||||||
|
numbers -- over a million. The default is short scale, which
|
||||||
|
is now common in most English speaking countries.
|
||||||
|
See https://en.wikipedia.org/wiki/Names_of_large_numbers
|
||||||
|
ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
|
||||||
|
Returns:
|
||||||
|
list: list of extracted numbers as floats
|
||||||
|
"""
|
||||||
|
return extract_numbers_generic(text, pronounce_number_da, extract_number_da,
|
||||||
|
short_scale=short_scale, ordinals=ordinals)
|
||||||
|
|
||||||
|
|
||||||
|
class DanishNormalizer(Normalizer):
|
||||||
|
""" TODO implement language specific normalizer"""
|
||||||
1025
lingua_franca/lang/parse_de.py
Normal file
1025
lingua_franca/lang/parse_de.py
Normal file
File diff suppressed because it is too large
Load Diff
1485
lingua_franca/lang/parse_en.py
Normal file
1485
lingua_franca/lang/parse_en.py
Normal file
File diff suppressed because it is too large
Load Diff
1110
lingua_franca/lang/parse_es.py
Normal file
1110
lingua_franca/lang/parse_es.py
Normal file
File diff suppressed because it is too large
Load Diff
381
lingua_franca/lang/parse_fa.py
Normal file
381
lingua_franca/lang/parse_fa.py
Normal file
@@ -0,0 +1,381 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
import json
|
||||||
|
from datetime import timedelta
|
||||||
|
|
||||||
|
from lingua_franca.internal import resolve_resource_file
|
||||||
|
from lingua_franca.lang.common_data_fa import (_FARSI_BIG, _FARSI_HUNDREDS,
|
||||||
|
_FARSI_ONES, _FARSI_TENS,
|
||||||
|
_FORMAL_VARIANT)
|
||||||
|
from lingua_franca.lang.parse_common import Normalizer
|
||||||
|
from lingua_franca.time import now_local
|
||||||
|
|
||||||
|
|
||||||
|
def _is_number(s):
|
||||||
|
try:
|
||||||
|
float(s)
|
||||||
|
return True
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _parse_sentence(text):
|
||||||
|
for key, value in _FORMAL_VARIANT.items():
|
||||||
|
text = text.replace(key, value)
|
||||||
|
ar = text.split()
|
||||||
|
result = []
|
||||||
|
current_number = 0
|
||||||
|
current_words = []
|
||||||
|
s = 0
|
||||||
|
step = 10
|
||||||
|
mode = 'init'
|
||||||
|
def finish_num():
|
||||||
|
nonlocal current_number
|
||||||
|
nonlocal s
|
||||||
|
nonlocal result
|
||||||
|
nonlocal mode
|
||||||
|
nonlocal current_words
|
||||||
|
current_number += s
|
||||||
|
if current_number != 0:
|
||||||
|
result.append((current_number, current_words))
|
||||||
|
s = 0
|
||||||
|
current_number = 0
|
||||||
|
current_words = []
|
||||||
|
mode = 'init'
|
||||||
|
for x in ar:
|
||||||
|
if x == "و":
|
||||||
|
if mode == 'num_ten' or mode == 'num_hundred' or mode == 'num_one':
|
||||||
|
mode += '_va'
|
||||||
|
current_words.append(x)
|
||||||
|
elif mode == 'num':
|
||||||
|
current_words.append(x)
|
||||||
|
else:
|
||||||
|
finish_num()
|
||||||
|
result.append(x)
|
||||||
|
elif x == "نیم":
|
||||||
|
current_words.append(x)
|
||||||
|
current_number += 0.5
|
||||||
|
finish_num()
|
||||||
|
elif x in _FARSI_ONES:
|
||||||
|
t = _FARSI_ONES.index(x)
|
||||||
|
if mode != 'init' and mode != 'num_hundred_va' and mode != 'num':
|
||||||
|
if not(t < 10 and mode == 'num_ten_va'):
|
||||||
|
finish_num()
|
||||||
|
current_words.append(x)
|
||||||
|
s += t
|
||||||
|
mode = 'num_one'
|
||||||
|
elif x in _FARSI_TENS:
|
||||||
|
if mode != 'init' and mode != 'num_hundred_va' and mode != 'num':
|
||||||
|
finish_num()
|
||||||
|
current_words.append(x)
|
||||||
|
s += _FARSI_TENS.index(x)*10
|
||||||
|
mode = 'num_ten'
|
||||||
|
elif x in _FARSI_HUNDREDS:
|
||||||
|
if mode != 'init' and mode != 'num':
|
||||||
|
finish_num()
|
||||||
|
current_words.append(x)
|
||||||
|
s += _FARSI_HUNDREDS.index(x)*100
|
||||||
|
mode = 'num_hundred'
|
||||||
|
elif x in _FARSI_BIG:
|
||||||
|
current_words.append(x)
|
||||||
|
d = _FARSI_BIG.index(x)
|
||||||
|
if mode == 'init' and d == 1:
|
||||||
|
s = 1
|
||||||
|
s *= 10**(3*d)
|
||||||
|
current_number += s
|
||||||
|
s = 0
|
||||||
|
mode = 'num'
|
||||||
|
elif _is_number(x):
|
||||||
|
current_words.append(x)
|
||||||
|
current_number = float(x)
|
||||||
|
finish_num()
|
||||||
|
else:
|
||||||
|
finish_num()
|
||||||
|
result.append(x)
|
||||||
|
if mode[:3] == 'num':
|
||||||
|
finish_num()
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
_time_units = {
|
||||||
|
'ثانیه': timedelta(seconds=1),
|
||||||
|
'دقیقه': timedelta(minutes=1),
|
||||||
|
'ساعت': timedelta(hours=1),
|
||||||
|
}
|
||||||
|
|
||||||
|
_date_units = {
|
||||||
|
'روز': timedelta(days=1),
|
||||||
|
'هفته': timedelta(weeks=1),
|
||||||
|
}
|
||||||
|
|
||||||
|
def extract_duration_fa(text):
|
||||||
|
"""
|
||||||
|
Convert an english phrase into a number of seconds
|
||||||
|
|
||||||
|
Convert things like:
|
||||||
|
"10 minute"
|
||||||
|
"2 and a half hours"
|
||||||
|
"3 days 8 hours 10 minutes and 49 seconds"
|
||||||
|
into an int, representing the total number of seconds.
|
||||||
|
|
||||||
|
The words used in the duration will be consumed, and
|
||||||
|
the remainder returned.
|
||||||
|
|
||||||
|
As an example, "set a timer for 5 minutes" would return
|
||||||
|
(300, "set a timer for").
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text (str): string containing a duration
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(timedelta, str):
|
||||||
|
A tuple containing the duration and the remaining text
|
||||||
|
not consumed in the parsing. The first value will
|
||||||
|
be None if no duration is found. The text returned
|
||||||
|
will have whitespace stripped from the ends.
|
||||||
|
"""
|
||||||
|
remainder = []
|
||||||
|
ar = _parse_sentence(text)
|
||||||
|
current_number = None
|
||||||
|
result = timedelta(0)
|
||||||
|
for x in ar:
|
||||||
|
if x == "و":
|
||||||
|
continue
|
||||||
|
elif type(x) == tuple:
|
||||||
|
current_number = x
|
||||||
|
elif x in _time_units:
|
||||||
|
result += _time_units[x] * current_number[0]
|
||||||
|
current_number = None
|
||||||
|
elif x in _date_units:
|
||||||
|
result += _date_units[x] * current_number[0]
|
||||||
|
current_number = None
|
||||||
|
else:
|
||||||
|
if current_number:
|
||||||
|
remainder.extend(current_number[1])
|
||||||
|
remainder.append(x)
|
||||||
|
current_number = None
|
||||||
|
return (result, " ".join(remainder))
|
||||||
|
|
||||||
|
|
||||||
|
def extract_datetime_fa(text, anchorDate=None, default_time=None):
|
||||||
|
""" Convert a human date reference into an exact datetime
|
||||||
|
|
||||||
|
Convert things like
|
||||||
|
"today"
|
||||||
|
"tomorrow afternoon"
|
||||||
|
"next Tuesday at 4pm"
|
||||||
|
"August 3rd"
|
||||||
|
into a datetime. If a reference date is not provided, the current
|
||||||
|
local time is used. Also consumes the words used to define the date
|
||||||
|
returning the remaining string. For example, the string
|
||||||
|
"what is Tuesday's weather forecast"
|
||||||
|
returns the date for the forthcoming Tuesday relative to the reference
|
||||||
|
date and the remainder string
|
||||||
|
"what is weather forecast".
|
||||||
|
|
||||||
|
The "next" instance of a day or weekend is considered to be no earlier than
|
||||||
|
48 hours in the future. On Friday, "next Monday" would be in 3 days.
|
||||||
|
On Saturday, "next Monday" would be in 9 days.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text (str): string containing date words
|
||||||
|
anchorDate (datetime): A reference date/time for "tommorrow", etc
|
||||||
|
default_time (time): Time to set if no time was found in the string
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
[datetime, str]: An array containing the datetime and the remaining
|
||||||
|
text not consumed in the parsing, or None if no
|
||||||
|
date or time related text was found.
|
||||||
|
"""
|
||||||
|
if text == "":
|
||||||
|
return None
|
||||||
|
text = text.lower().replace('', ' ').replace('.', '').replace('،', '') \
|
||||||
|
.replace('?', '').replace("پس فردا", "پسفردا") \
|
||||||
|
.replace('یک شنبه', 'یکشنبه') \
|
||||||
|
.replace('دو شنبه', 'دوشنبه') \
|
||||||
|
.replace('سه شنبه', 'سهشنبه') \
|
||||||
|
.replace('چهار شنبه', 'چهارشنبه') \
|
||||||
|
.replace('پنج شنبه', 'پنجشنبه') \
|
||||||
|
.replace('بعد از ظهر', 'بعدازظهر') \
|
||||||
|
|
||||||
|
|
||||||
|
if not anchorDate:
|
||||||
|
anchorDate = now_local()
|
||||||
|
today = anchorDate.replace(hour=0, minute=0, second=0, microsecond=0)
|
||||||
|
today_weekday = int(anchorDate.strftime("%w"))
|
||||||
|
weekday_names = [
|
||||||
|
'دوشنبه',
|
||||||
|
'سهشنبه',
|
||||||
|
'چهارشنبه',
|
||||||
|
'پنجشنبه',
|
||||||
|
'جمعه',
|
||||||
|
'شنبه',
|
||||||
|
'یکشنبه',
|
||||||
|
]
|
||||||
|
daysDict = {
|
||||||
|
'پریروز': today + timedelta(days= -2),
|
||||||
|
'دیروز': today + timedelta(days= -1),
|
||||||
|
'امروز': today,
|
||||||
|
'فردا': today + timedelta(days= 1),
|
||||||
|
'پسفردا': today + timedelta(days= 2),
|
||||||
|
}
|
||||||
|
timesDict = {
|
||||||
|
'صبح': timedelta(hours=8),
|
||||||
|
'بعدازظهر': timedelta(hours=15),
|
||||||
|
}
|
||||||
|
exactDict = {
|
||||||
|
'الان': anchorDate,
|
||||||
|
}
|
||||||
|
nextWords = ["بعد", "دیگه"]
|
||||||
|
prevWords = ["پیش", "قبل"]
|
||||||
|
ar = _parse_sentence(text)
|
||||||
|
mode = 'none'
|
||||||
|
number_seen = None
|
||||||
|
delta_seen = timedelta(0)
|
||||||
|
remainder = []
|
||||||
|
result = None
|
||||||
|
for x in ar:
|
||||||
|
handled = 1
|
||||||
|
if mode == 'finished':
|
||||||
|
remainder.append(x)
|
||||||
|
elif x == 'و' and mode[:5] == 'delta':
|
||||||
|
pass
|
||||||
|
elif type(x) == tuple:
|
||||||
|
number_seen = x
|
||||||
|
elif x in weekday_names:
|
||||||
|
dayOffset = (weekday_names.index(x) + 1) - today_weekday
|
||||||
|
if dayOffset < 0:
|
||||||
|
dayOffset += 7
|
||||||
|
result = today + timedelta(days=dayOffset)
|
||||||
|
mode = 'time'
|
||||||
|
elif x in exactDict:
|
||||||
|
result = exactDict[x]
|
||||||
|
mode = 'finished'
|
||||||
|
elif x in daysDict:
|
||||||
|
result = daysDict[x]
|
||||||
|
mode = 'time'
|
||||||
|
elif x in timesDict and mode == 'time':
|
||||||
|
result += timesDict[x]
|
||||||
|
mode = 'finish'
|
||||||
|
elif x in _date_units:
|
||||||
|
k = 1
|
||||||
|
if (number_seen):
|
||||||
|
k = number_seen[0]
|
||||||
|
number_seen = None
|
||||||
|
delta_seen += _date_units[x] * k
|
||||||
|
if mode != 'delta_time':
|
||||||
|
mode = 'delta_date'
|
||||||
|
elif x in _time_units:
|
||||||
|
k = 1
|
||||||
|
if (number_seen):
|
||||||
|
k = number_seen[0]
|
||||||
|
number_seen = None
|
||||||
|
delta_seen += _time_units[x] * k
|
||||||
|
mode = 'delta_time'
|
||||||
|
elif x in nextWords or x in prevWords:
|
||||||
|
# Give up instead of incorrect result
|
||||||
|
if mode == 'time':
|
||||||
|
return None
|
||||||
|
sign = 1 if x in nextWords else -1
|
||||||
|
if mode == 'delta_date':
|
||||||
|
result = today + delta_seen
|
||||||
|
mode = 'time'
|
||||||
|
elif mode == 'delta_time':
|
||||||
|
result = anchorDate + delta_seen
|
||||||
|
mode = 'finished'
|
||||||
|
else:
|
||||||
|
handled = 0
|
||||||
|
else:
|
||||||
|
handled = 0
|
||||||
|
if handled == 1:
|
||||||
|
continue
|
||||||
|
if number_seen:
|
||||||
|
remainder.extend(number_seen[1])
|
||||||
|
number_seen = None
|
||||||
|
remainder.append(x)
|
||||||
|
return (result, " ".join(remainder))
|
||||||
|
|
||||||
|
def is_fractional_fa(input_str, short_scale=True):
|
||||||
|
"""
|
||||||
|
This function takes the given text and checks if it is a fraction.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_str (str): the string to check if fractional
|
||||||
|
short_scale (bool): use short scale if True, long scale if False
|
||||||
|
Returns:
|
||||||
|
(bool) or (float): False if not a fraction, otherwise the fraction
|
||||||
|
|
||||||
|
"""
|
||||||
|
if input_str.endswith('s', -1):
|
||||||
|
input_str = input_str[:len(input_str) - 1] # e.g. "fifths"
|
||||||
|
|
||||||
|
fracts = {"whole": 1, "half": 2, "halve": 2, "quarter": 4}
|
||||||
|
if short_scale:
|
||||||
|
for num in _SHORT_ORDINAL_FA:
|
||||||
|
if num > 2:
|
||||||
|
fracts[_SHORT_ORDINAL_FA[num]] = num
|
||||||
|
else:
|
||||||
|
for num in _LONG_ORDINAL_FA:
|
||||||
|
if num > 2:
|
||||||
|
fracts[_LONG_ORDINAL_FA[num]] = num
|
||||||
|
|
||||||
|
if input_str.lower() in fracts:
|
||||||
|
return 1.0 / fracts[input_str.lower()]
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def extract_numbers_fa(text, short_scale=True, ordinals=False):
|
||||||
|
"""
|
||||||
|
Takes in a string and extracts a list of numbers.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text (str): the string to extract a number from
|
||||||
|
short_scale (bool): Use "short scale" or "long scale" for large
|
||||||
|
numbers -- over a million. The default is short scale, which
|
||||||
|
is now common in most English speaking countries.
|
||||||
|
See https://en.wikipedia.org/wiki/Names_of_large_numbers
|
||||||
|
ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
|
||||||
|
Returns:
|
||||||
|
list: list of extracted numbers as floats
|
||||||
|
"""
|
||||||
|
|
||||||
|
ar = _parse_sentence(text)
|
||||||
|
result = []
|
||||||
|
for x in ar:
|
||||||
|
if type(x) == tuple:
|
||||||
|
result.append(x[0])
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def extract_number_fa(text, ordinals=False):
|
||||||
|
"""
|
||||||
|
This function extracts a number from a text string,
|
||||||
|
handles pronunciations in long scale and short scale
|
||||||
|
|
||||||
|
https://en.wikipedia.org/wiki/Names_of_large_numbers
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text (str): the string to normalize
|
||||||
|
short_scale (bool): use short scale if True, long scale if False
|
||||||
|
ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
|
||||||
|
Returns:
|
||||||
|
(int) or (float) or False: The extracted number or False if no number
|
||||||
|
was found
|
||||||
|
|
||||||
|
"""
|
||||||
|
x = extract_numbers_fa(text, ordinals=ordinals)
|
||||||
|
if (len(x) == 0):
|
||||||
|
return False
|
||||||
|
return x[0]
|
||||||
1090
lingua_franca/lang/parse_fr.py
Normal file
1090
lingua_franca/lang/parse_fr.py
Normal file
File diff suppressed because it is too large
Load Diff
26
lingua_franca/lang/parse_hu.py
Normal file
26
lingua_franca/lang/parse_hu.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
from lingua_franca.time import now_local
|
||||||
|
from lingua_franca.lang.parse_common import Normalizer
|
||||||
|
|
||||||
|
|
||||||
|
class HungarianNormalizer(Normalizer):
|
||||||
|
""" TODO implement language specific normalizer"""
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_hu(text, remove_articles=True):
|
||||||
|
""" English string normalization """
|
||||||
|
return HungarianNormalizer().normalize(text, remove_articles)
|
||||||
1171
lingua_franca/lang/parse_it.py
Normal file
1171
lingua_franca/lang/parse_it.py
Normal file
File diff suppressed because it is too large
Load Diff
1339
lingua_franca/lang/parse_nl.py
Normal file
1339
lingua_franca/lang/parse_nl.py
Normal file
File diff suppressed because it is too large
Load Diff
1404
lingua_franca/lang/parse_pl.py
Normal file
1404
lingua_franca/lang/parse_pl.py
Normal file
File diff suppressed because it is too large
Load Diff
1089
lingua_franca/lang/parse_pt.py
Normal file
1089
lingua_franca/lang/parse_pt.py
Normal file
File diff suppressed because it is too large
Load Diff
1685
lingua_franca/lang/parse_ru.py
Normal file
1685
lingua_franca/lang/parse_ru.py
Normal file
File diff suppressed because it is too large
Load Diff
1
lingua_franca/lang/parse_sl.py
Normal file
1
lingua_franca/lang/parse_sl.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
# TODO implement parsing function
|
||||||
922
lingua_franca/lang/parse_sv.py
Normal file
922
lingua_franca/lang/parse_sv.py
Normal file
@@ -0,0 +1,922 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from dateutil.relativedelta import relativedelta
|
||||||
|
|
||||||
|
from lingua_franca.time import now_local
|
||||||
|
|
||||||
|
from .parse_common import (is_numeric, look_for_fractions, Normalizer,
|
||||||
|
tokenize, Token)
|
||||||
|
|
||||||
|
|
||||||
|
def _find_numbers_in_text(tokens):
|
||||||
|
"""Finds duration related numbers in texts and makes a list of mappings.
|
||||||
|
|
||||||
|
The mapping will be for number to token that created it, if no number was
|
||||||
|
created from the token the mapping will be from None to the token.
|
||||||
|
|
||||||
|
The function is optimized to generate data that can be parsed to a duration
|
||||||
|
so it returns the list in reverse order to make the "size" (minutes/hours/
|
||||||
|
etc.) come first and the related numbers afterwards.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tokens: Tokens to parse
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list of (number, token) tuples
|
||||||
|
"""
|
||||||
|
parts = []
|
||||||
|
for tok in tokens:
|
||||||
|
res = extract_number_sv(tok.word)
|
||||||
|
if res:
|
||||||
|
parts.insert(0, (res, tok))
|
||||||
|
# Special case for quarter of an hour
|
||||||
|
if tok.word == 'kvart':
|
||||||
|
parts.insert(0, (None, Token('timmar', index=-1)))
|
||||||
|
elif tok.word in ['halvtimme', 'halvtimma']:
|
||||||
|
parts.insert(0, (30, tok))
|
||||||
|
parts.insert(0, (None, Token('minuter', index=-1)))
|
||||||
|
else:
|
||||||
|
parts.insert(0, (None, tok))
|
||||||
|
return parts
|
||||||
|
|
||||||
|
|
||||||
|
def _combine_adjacent_numbers(number_map):
|
||||||
|
"""Combine adjacent numbers through multiplication.
|
||||||
|
|
||||||
|
Walks through a number map and joins adjasent numbers to handle cases
|
||||||
|
such as "en halvtimme" (one half hour).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(list): simplified number_map
|
||||||
|
"""
|
||||||
|
simplified = []
|
||||||
|
skip = False
|
||||||
|
for i in range(len(number_map) - 1):
|
||||||
|
if skip:
|
||||||
|
skip = False
|
||||||
|
continue
|
||||||
|
if number_map[i][0] and number_map[i + 1][0]:
|
||||||
|
combined_number = number_map[i][0] * number_map[i + 1][0]
|
||||||
|
combined_tokens = (number_map[i][1], number_map[i + 1][1])
|
||||||
|
simplified.append((combined_number, combined_tokens))
|
||||||
|
skip = True
|
||||||
|
else:
|
||||||
|
simplified.append((number_map[i][0], (number_map[i][1],)))
|
||||||
|
|
||||||
|
if not skip:
|
||||||
|
simplified.append((number_map[-1][0], (number_map[-1][1],)))
|
||||||
|
return simplified
|
||||||
|
|
||||||
|
|
||||||
|
def extract_duration_sv(text):
|
||||||
|
"""
|
||||||
|
Convert an swedish phrase into a number of seconds.
|
||||||
|
|
||||||
|
The function handles durations from seconds up to days.
|
||||||
|
|
||||||
|
Convert things like:
|
||||||
|
"10 minute"
|
||||||
|
"2 and a half hours"
|
||||||
|
"3 days 8 hours 10 minutes and 49 seconds"
|
||||||
|
into an int, representing the total number of seconds.
|
||||||
|
|
||||||
|
The words used in the duration will be consumed, and
|
||||||
|
the remainder returned.
|
||||||
|
|
||||||
|
As an example, "set a timer for 5 minutes" would return
|
||||||
|
(300, "set a timer for").
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text (str): string containing a duration
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(timedelta, str):
|
||||||
|
A tuple containing the duration and the remaining text
|
||||||
|
not consumed in the parsing. The first value will
|
||||||
|
be None if no duration is found. The text returned
|
||||||
|
will have whitespace stripped from the ends.
|
||||||
|
"""
|
||||||
|
tokens = tokenize(text)
|
||||||
|
number_tok_map = _find_numbers_in_text(tokens)
|
||||||
|
# Combine adjacent numbers
|
||||||
|
simplified = _combine_adjacent_numbers(number_tok_map)
|
||||||
|
|
||||||
|
states = {
|
||||||
|
'days': 0,
|
||||||
|
'hours': 0,
|
||||||
|
'minutes': 0,
|
||||||
|
'seconds': 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# Parser state, mapping words that should set the parser to collect
|
||||||
|
# numbers to a specific time "size"
|
||||||
|
state_words = {
|
||||||
|
'days': ('dygn', 'dag', 'dagar', 'dags'),
|
||||||
|
'hours': ('timmar', 'timme', 'timma', 'timmes', 'timmas'),
|
||||||
|
'minutes': ('minuter', 'minuters', 'minut', 'minuts'),
|
||||||
|
'seconds': ('sekunder', 'sekunders', 'sekund', 'sekunds')
|
||||||
|
}
|
||||||
|
binding_words = ('och')
|
||||||
|
|
||||||
|
consumed = []
|
||||||
|
state = None
|
||||||
|
valid = False
|
||||||
|
|
||||||
|
for num, toks in simplified:
|
||||||
|
if state and num:
|
||||||
|
states[state] += num
|
||||||
|
consumed.extend(toks)
|
||||||
|
valid = True # If a state field got set this is valid duration
|
||||||
|
elif num is None:
|
||||||
|
for s in state_words:
|
||||||
|
if toks[0].word in state_words[s]:
|
||||||
|
state = s
|
||||||
|
consumed.extend(toks)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
if toks[0].word not in binding_words:
|
||||||
|
state = None
|
||||||
|
|
||||||
|
td = timedelta(**states)
|
||||||
|
remainder = ' '.join([t.word for t in tokens if t not in consumed])
|
||||||
|
return (td, remainder) if valid else None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_number_sv(text, short_scale=True, ordinals=False):
|
||||||
|
"""
|
||||||
|
This function prepares the given text for parsing by making
|
||||||
|
numbers consistent, getting rid of contractions, etc.
|
||||||
|
Args:
|
||||||
|
text (str): the string to normalize
|
||||||
|
Returns:
|
||||||
|
(int) or (float): The value of extracted number
|
||||||
|
"""
|
||||||
|
# TODO: short_scale and ordinals don't do anything here.
|
||||||
|
# The parameters are present in the function signature for API
|
||||||
|
# compatibility reasons.
|
||||||
|
text = text.lower()
|
||||||
|
aWords = text.split()
|
||||||
|
and_pass = False
|
||||||
|
valPreAnd = False
|
||||||
|
val = False
|
||||||
|
count = 0
|
||||||
|
while count < len(aWords):
|
||||||
|
word = aWords[count]
|
||||||
|
if is_numeric(word):
|
||||||
|
val = float(word)
|
||||||
|
elif word == "första":
|
||||||
|
val = 1
|
||||||
|
elif word == "andra":
|
||||||
|
val = 2
|
||||||
|
elif word == "tredje":
|
||||||
|
val = 3
|
||||||
|
elif word == "fjärde":
|
||||||
|
val = 4
|
||||||
|
elif word == "femte":
|
||||||
|
val = 5
|
||||||
|
elif word == "sjätte":
|
||||||
|
val = 6
|
||||||
|
elif is_fractional_sv(word):
|
||||||
|
val = is_fractional_sv(word)
|
||||||
|
else:
|
||||||
|
if word == "en":
|
||||||
|
val = 1
|
||||||
|
if word == "ett":
|
||||||
|
val = 1
|
||||||
|
elif word == "två":
|
||||||
|
val = 2
|
||||||
|
elif word == "tre":
|
||||||
|
val = 3
|
||||||
|
elif word == "fyra":
|
||||||
|
val = 4
|
||||||
|
elif word == "fem":
|
||||||
|
val = 5
|
||||||
|
elif word == "sex":
|
||||||
|
val = 6
|
||||||
|
elif word == "sju":
|
||||||
|
val = 7
|
||||||
|
elif word == "åtta":
|
||||||
|
val = 8
|
||||||
|
elif word == "nio":
|
||||||
|
val = 9
|
||||||
|
elif word == "tio":
|
||||||
|
val = 10
|
||||||
|
if val:
|
||||||
|
if count < (len(aWords) - 1):
|
||||||
|
wordNext = aWords[count + 1]
|
||||||
|
else:
|
||||||
|
wordNext = ""
|
||||||
|
valNext = is_fractional_sv(wordNext)
|
||||||
|
|
||||||
|
if valNext:
|
||||||
|
val = val * valNext
|
||||||
|
aWords[count + 1] = ""
|
||||||
|
|
||||||
|
if not val:
|
||||||
|
# look for fractions like "2/3"
|
||||||
|
aPieces = word.split('/')
|
||||||
|
if look_for_fractions(aPieces):
|
||||||
|
val = float(aPieces[0]) / float(aPieces[1])
|
||||||
|
elif and_pass:
|
||||||
|
# added to value, quit here
|
||||||
|
val = valPreAnd
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
count += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
aWords[count] = ""
|
||||||
|
|
||||||
|
if and_pass:
|
||||||
|
aWords[count - 1] = '' # remove "och"
|
||||||
|
val += valPreAnd
|
||||||
|
elif count + 1 < len(aWords) and aWords[count + 1] == 'och':
|
||||||
|
and_pass = True
|
||||||
|
valPreAnd = val
|
||||||
|
val = False
|
||||||
|
count += 2
|
||||||
|
continue
|
||||||
|
elif count + 2 < len(aWords) and aWords[count + 2] == 'och':
|
||||||
|
and_pass = True
|
||||||
|
valPreAnd = val
|
||||||
|
val = False
|
||||||
|
count += 3
|
||||||
|
continue
|
||||||
|
|
||||||
|
break
|
||||||
|
|
||||||
|
return val or False
|
||||||
|
|
||||||
|
|
||||||
|
def extract_datetime_sv(text, anchorDate=None, default_time=None):
|
||||||
|
def clean_string(s):
|
||||||
|
"""
|
||||||
|
cleans the input string of unneeded punctuation and capitalization
|
||||||
|
among other things.
|
||||||
|
"""
|
||||||
|
s = s.lower().replace('?', '').replace('.', '').replace(',', '') \
|
||||||
|
.replace(' den ', ' ').replace(' en ', ' ')
|
||||||
|
wordList = s.split()
|
||||||
|
for idx, word in enumerate(wordList):
|
||||||
|
word = word.replace("'s", "")
|
||||||
|
|
||||||
|
ordinals = ["rd", "st", "nd", "th"]
|
||||||
|
if word[0].isdigit():
|
||||||
|
for ordinal in ordinals:
|
||||||
|
if ordinal in word:
|
||||||
|
word = word.replace(ordinal, "")
|
||||||
|
wordList[idx] = word
|
||||||
|
|
||||||
|
return wordList
|
||||||
|
|
||||||
|
def date_found():
|
||||||
|
return found or \
|
||||||
|
(
|
||||||
|
datestr != "" or timeStr != "" or
|
||||||
|
yearOffset != 0 or monthOffset != 0 or
|
||||||
|
dayOffset is True or hrOffset != 0 or
|
||||||
|
hrAbs or minOffset != 0 or
|
||||||
|
minAbs or secOffset != 0
|
||||||
|
)
|
||||||
|
|
||||||
|
if text == "":
|
||||||
|
return None
|
||||||
|
|
||||||
|
anchorDate = anchorDate or now_local()
|
||||||
|
found = False
|
||||||
|
daySpecified = False
|
||||||
|
dayOffset = False
|
||||||
|
monthOffset = 0
|
||||||
|
yearOffset = 0
|
||||||
|
dateNow = anchorDate
|
||||||
|
today = dateNow.strftime("%w")
|
||||||
|
currentYear = dateNow.strftime("%Y")
|
||||||
|
fromFlag = False
|
||||||
|
datestr = ""
|
||||||
|
hasYear = False
|
||||||
|
timeQualifier = ""
|
||||||
|
|
||||||
|
timeQualifiersList = ['morgon', 'förmiddag', 'eftermiddag', 'kväll']
|
||||||
|
markers = ['på', 'i', 'den här', 'kring', 'efter']
|
||||||
|
days = ['måndag', 'tisdag', 'onsdag', 'torsdag',
|
||||||
|
'fredag', 'lördag', 'söndag']
|
||||||
|
months = ['januari', 'februari', 'mars', 'april', 'maj', 'juni',
|
||||||
|
'juli', 'augusti', 'september', 'oktober', 'november',
|
||||||
|
'december']
|
||||||
|
monthsShort = ['jan', 'feb', 'mar', 'apr', 'may', 'june', 'july', 'aug',
|
||||||
|
'sept', 'oct', 'nov', 'dec']
|
||||||
|
|
||||||
|
words = clean_string(text)
|
||||||
|
|
||||||
|
for idx, word in enumerate(words):
|
||||||
|
if word == "":
|
||||||
|
continue
|
||||||
|
wordPrevPrev = words[idx - 2] if idx > 1 else ""
|
||||||
|
wordPrev = words[idx - 1] if idx > 0 else ""
|
||||||
|
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
|
||||||
|
wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
|
||||||
|
|
||||||
|
# this isn't in clean string because I don't want to save back to words
|
||||||
|
word = word.rstrip('s')
|
||||||
|
start = idx
|
||||||
|
used = 0
|
||||||
|
# save timequalifier for later
|
||||||
|
if word in timeQualifiersList:
|
||||||
|
timeQualifier = word
|
||||||
|
# parse today, tomorrow, day after tomorrow
|
||||||
|
elif word == "idag" and not fromFlag:
|
||||||
|
dayOffset = 0
|
||||||
|
used += 1
|
||||||
|
elif word == "imorgon" and not fromFlag:
|
||||||
|
dayOffset = 1
|
||||||
|
used += 1
|
||||||
|
elif word == "morgondagen" or word == "morgondagens" and not fromFlag:
|
||||||
|
dayOffset = 1
|
||||||
|
used += 1
|
||||||
|
elif word == "övermorgon" and not fromFlag:
|
||||||
|
dayOffset = 2
|
||||||
|
used += 1
|
||||||
|
# parse 5 days, 10 weeks, last week, next week
|
||||||
|
elif word == "dag" or word == "dagar":
|
||||||
|
if wordPrev[0].isdigit():
|
||||||
|
dayOffset += int(wordPrev)
|
||||||
|
start -= 1
|
||||||
|
used = 2
|
||||||
|
elif word == "vecka" or word == "veckor" and not fromFlag:
|
||||||
|
if wordPrev[0].isdigit():
|
||||||
|
dayOffset += int(wordPrev) * 7
|
||||||
|
start -= 1
|
||||||
|
used = 2
|
||||||
|
elif wordPrev == "nästa":
|
||||||
|
dayOffset = 7
|
||||||
|
start -= 1
|
||||||
|
used = 2
|
||||||
|
elif wordPrev == "förra":
|
||||||
|
dayOffset = -7
|
||||||
|
start -= 1
|
||||||
|
used = 2
|
||||||
|
# parse 10 months, next month, last month
|
||||||
|
elif word == "månad" and not fromFlag:
|
||||||
|
if wordPrev[0].isdigit():
|
||||||
|
monthOffset = int(wordPrev)
|
||||||
|
start -= 1
|
||||||
|
used = 2
|
||||||
|
elif wordPrev == "nästa":
|
||||||
|
monthOffset = 1
|
||||||
|
start -= 1
|
||||||
|
used = 2
|
||||||
|
elif wordPrev == "förra":
|
||||||
|
monthOffset = -1
|
||||||
|
start -= 1
|
||||||
|
used = 2
|
||||||
|
# parse 5 years, next year, last year
|
||||||
|
elif word == "år" and not fromFlag:
|
||||||
|
if wordPrev[0].isdigit():
|
||||||
|
yearOffset = int(wordPrev)
|
||||||
|
start -= 1
|
||||||
|
used = 2
|
||||||
|
elif wordPrev == "nästa":
|
||||||
|
yearOffset = 1
|
||||||
|
start -= 1
|
||||||
|
used = 2
|
||||||
|
elif wordPrev == "förra":
|
||||||
|
yearOffset = -1
|
||||||
|
start -= 1
|
||||||
|
used = 2
|
||||||
|
# parse Monday, Tuesday, etc., and next Monday,
|
||||||
|
# last Tuesday, etc.
|
||||||
|
elif word in days and not fromFlag:
|
||||||
|
d = days.index(word)
|
||||||
|
dayOffset = (d + 1) - int(today)
|
||||||
|
used = 1
|
||||||
|
if dayOffset < 0:
|
||||||
|
dayOffset += 7
|
||||||
|
if wordPrev == "nästa":
|
||||||
|
dayOffset += 7
|
||||||
|
used += 1
|
||||||
|
start -= 1
|
||||||
|
elif wordPrev == "förra":
|
||||||
|
dayOffset -= 7
|
||||||
|
used += 1
|
||||||
|
start -= 1
|
||||||
|
# parse 15 of July, June 20th, Feb 18, 19 of February
|
||||||
|
elif word in months or word in monthsShort and not fromFlag:
|
||||||
|
try:
|
||||||
|
m = months.index(word)
|
||||||
|
except ValueError:
|
||||||
|
m = monthsShort.index(word)
|
||||||
|
used += 1
|
||||||
|
datestr = months[m]
|
||||||
|
if wordPrev and (wordPrev[0].isdigit() or
|
||||||
|
(wordPrev == "of" and wordPrevPrev[0].isdigit())):
|
||||||
|
if wordPrev == "of" and wordPrevPrev[0].isdigit():
|
||||||
|
datestr += " " + words[idx - 2]
|
||||||
|
used += 1
|
||||||
|
start -= 1
|
||||||
|
else:
|
||||||
|
datestr += " " + wordPrev
|
||||||
|
start -= 1
|
||||||
|
used += 1
|
||||||
|
if wordNext and wordNext[0].isdigit():
|
||||||
|
datestr += " " + wordNext
|
||||||
|
used += 1
|
||||||
|
hasYear = True
|
||||||
|
else:
|
||||||
|
hasYear = False
|
||||||
|
|
||||||
|
elif wordNext and wordNext[0].isdigit():
|
||||||
|
datestr += " " + wordNext
|
||||||
|
used += 1
|
||||||
|
if wordNextNext and wordNextNext[0].isdigit():
|
||||||
|
datestr += " " + wordNextNext
|
||||||
|
used += 1
|
||||||
|
hasYear = True
|
||||||
|
else:
|
||||||
|
hasYear = False
|
||||||
|
# parse 5 days from tomorrow, 10 weeks from next thursday,
|
||||||
|
# 2 months from July
|
||||||
|
validFollowups = days + months + monthsShort
|
||||||
|
validFollowups.append("idag")
|
||||||
|
validFollowups.append("imorgon")
|
||||||
|
validFollowups.append("nästa")
|
||||||
|
validFollowups.append("förra")
|
||||||
|
validFollowups.append("nu")
|
||||||
|
if (word == "från" or word == "efter") and wordNext in validFollowups:
|
||||||
|
used = 2
|
||||||
|
fromFlag = True
|
||||||
|
if wordNext == "imorgon":
|
||||||
|
dayOffset += 1
|
||||||
|
elif wordNext in days:
|
||||||
|
d = days.index(wordNext)
|
||||||
|
tmpOffset = (d + 1) - int(today)
|
||||||
|
used = 2
|
||||||
|
if tmpOffset < 0:
|
||||||
|
tmpOffset += 7
|
||||||
|
dayOffset += tmpOffset
|
||||||
|
elif wordNextNext and wordNextNext in days:
|
||||||
|
d = days.index(wordNextNext)
|
||||||
|
tmpOffset = (d + 1) - int(today)
|
||||||
|
used = 3
|
||||||
|
if wordNext == "nästa":
|
||||||
|
tmpOffset += 7
|
||||||
|
used += 1
|
||||||
|
start -= 1
|
||||||
|
elif wordNext == "förra":
|
||||||
|
tmpOffset -= 7
|
||||||
|
used += 1
|
||||||
|
start -= 1
|
||||||
|
dayOffset += tmpOffset
|
||||||
|
if used > 0:
|
||||||
|
if start - 1 > 0 and words[start - 1] == "denna":
|
||||||
|
start -= 1
|
||||||
|
used += 1
|
||||||
|
|
||||||
|
for i in range(0, used):
|
||||||
|
words[i + start] = ""
|
||||||
|
|
||||||
|
if start - 1 >= 0 and words[start - 1] in markers:
|
||||||
|
words[start - 1] = ""
|
||||||
|
found = True
|
||||||
|
daySpecified = True
|
||||||
|
|
||||||
|
# parse time
|
||||||
|
timeStr = ""
|
||||||
|
hrOffset = 0
|
||||||
|
minOffset = 0
|
||||||
|
secOffset = 0
|
||||||
|
hrAbs = None
|
||||||
|
minAbs = None
|
||||||
|
|
||||||
|
for idx, word in enumerate(words):
|
||||||
|
if word == "":
|
||||||
|
continue
|
||||||
|
|
||||||
|
wordPrevPrev = words[idx - 2] if idx > 1 else ""
|
||||||
|
wordPrev = words[idx - 1] if idx > 0 else ""
|
||||||
|
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
|
||||||
|
wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
|
||||||
|
# parse noon, midnight, morning, afternoon, evening
|
||||||
|
used = 0
|
||||||
|
if word == "middag":
|
||||||
|
hrAbs = 12
|
||||||
|
used += 1
|
||||||
|
elif word == "midnatt":
|
||||||
|
hrAbs = 0
|
||||||
|
used += 1
|
||||||
|
elif word == "morgon":
|
||||||
|
if not hrAbs:
|
||||||
|
hrAbs = 8
|
||||||
|
used += 1
|
||||||
|
elif word == "förmiddag":
|
||||||
|
if not hrAbs:
|
||||||
|
hrAbs = 10
|
||||||
|
used += 1
|
||||||
|
elif word == "eftermiddag":
|
||||||
|
if not hrAbs:
|
||||||
|
hrAbs = 15
|
||||||
|
used += 1
|
||||||
|
elif word == "kväll":
|
||||||
|
if not hrAbs:
|
||||||
|
hrAbs = 19
|
||||||
|
used += 1
|
||||||
|
# parse half an hour, quarter hour
|
||||||
|
elif wordPrev in markers or wordPrevPrev in markers:
|
||||||
|
if word == "halvtimme" or word == "halvtimma":
|
||||||
|
minOffset = 30
|
||||||
|
elif word == "kvart":
|
||||||
|
minOffset = 15
|
||||||
|
elif word == "timme" or word == "timma":
|
||||||
|
hrOffset = 1
|
||||||
|
words[idx - 1] = ""
|
||||||
|
used += 1
|
||||||
|
hrAbs = -1
|
||||||
|
minAbs = -1
|
||||||
|
# parse 5:00 am, 12:00 p.m., etc
|
||||||
|
elif word[0].isdigit():
|
||||||
|
isTime = True
|
||||||
|
strHH = ""
|
||||||
|
strMM = ""
|
||||||
|
remainder = ""
|
||||||
|
if ':' in word:
|
||||||
|
# parse colons
|
||||||
|
# "3:00 in the morning"
|
||||||
|
stage = 0
|
||||||
|
length = len(word)
|
||||||
|
for i in range(length):
|
||||||
|
if stage == 0:
|
||||||
|
if word[i].isdigit():
|
||||||
|
strHH += word[i]
|
||||||
|
elif word[i] == ":":
|
||||||
|
stage = 1
|
||||||
|
else:
|
||||||
|
stage = 2
|
||||||
|
i -= 1
|
||||||
|
elif stage == 1:
|
||||||
|
if word[i].isdigit():
|
||||||
|
strMM += word[i]
|
||||||
|
else:
|
||||||
|
stage = 2
|
||||||
|
i -= 1
|
||||||
|
elif stage == 2:
|
||||||
|
remainder = word[i:].replace(".", "")
|
||||||
|
break
|
||||||
|
if remainder == "":
|
||||||
|
nextWord = wordNext.replace(".", "")
|
||||||
|
if nextWord == "am" or nextWord == "pm":
|
||||||
|
remainder = nextWord
|
||||||
|
used += 1
|
||||||
|
elif nextWord == "tonight":
|
||||||
|
remainder = "pm"
|
||||||
|
used += 1
|
||||||
|
elif wordNext == "in" and wordNextNext == "the" and \
|
||||||
|
words[idx + 3] == "morning":
|
||||||
|
remainder = "am"
|
||||||
|
used += 3
|
||||||
|
elif wordNext == "in" and wordNextNext == "the" and \
|
||||||
|
words[idx + 3] == "afternoon":
|
||||||
|
remainder = "pm"
|
||||||
|
used += 3
|
||||||
|
elif wordNext == "in" and wordNextNext == "the" and \
|
||||||
|
words[idx + 3] == "evening":
|
||||||
|
remainder = "pm"
|
||||||
|
used += 3
|
||||||
|
elif wordNext == "in" and wordNextNext == "morning":
|
||||||
|
remainder = "am"
|
||||||
|
used += 2
|
||||||
|
elif wordNext == "in" and wordNextNext == "afternoon":
|
||||||
|
remainder = "pm"
|
||||||
|
used += 2
|
||||||
|
elif wordNext == "in" and wordNextNext == "evening":
|
||||||
|
remainder = "pm"
|
||||||
|
used += 2
|
||||||
|
elif wordNext == "this" and wordNextNext == "morning":
|
||||||
|
remainder = "am"
|
||||||
|
used = 2
|
||||||
|
elif wordNext == "this" and wordNextNext == "afternoon":
|
||||||
|
remainder = "pm"
|
||||||
|
used = 2
|
||||||
|
elif wordNext == "this" and wordNextNext == "evening":
|
||||||
|
remainder = "pm"
|
||||||
|
used = 2
|
||||||
|
elif wordNext == "at" and wordNextNext == "night":
|
||||||
|
if strHH > 5:
|
||||||
|
remainder = "pm"
|
||||||
|
else:
|
||||||
|
remainder = "am"
|
||||||
|
used += 2
|
||||||
|
else:
|
||||||
|
if timeQualifier != "":
|
||||||
|
if strHH <= 12 and \
|
||||||
|
(timeQualifier == "evening" or
|
||||||
|
timeQualifier == "afternoon"):
|
||||||
|
strHH += 12
|
||||||
|
else:
|
||||||
|
# try to parse # s without colons
|
||||||
|
# 5 hours, 10 minutes etc.
|
||||||
|
length = len(word)
|
||||||
|
strNum = ""
|
||||||
|
remainder = ""
|
||||||
|
for i in range(length):
|
||||||
|
if word[i].isdigit():
|
||||||
|
strNum += word[i]
|
||||||
|
else:
|
||||||
|
remainder += word[i]
|
||||||
|
|
||||||
|
if remainder == "":
|
||||||
|
remainder = wordNext.replace(".", "").lstrip().rstrip()
|
||||||
|
|
||||||
|
if (
|
||||||
|
remainder == "pm" or
|
||||||
|
wordNext == "pm" or
|
||||||
|
remainder == "p.m." or
|
||||||
|
wordNext == "p.m."):
|
||||||
|
strHH = strNum
|
||||||
|
remainder = "pm"
|
||||||
|
used = 1
|
||||||
|
elif (
|
||||||
|
remainder == "am" or
|
||||||
|
wordNext == "am" or
|
||||||
|
remainder == "a.m." or
|
||||||
|
wordNext == "a.m."):
|
||||||
|
strHH = strNum
|
||||||
|
remainder = "am"
|
||||||
|
used = 1
|
||||||
|
else:
|
||||||
|
if wordNext == "pm" or wordNext == "p.m.":
|
||||||
|
strHH = strNum
|
||||||
|
remainder = "pm"
|
||||||
|
used = 1
|
||||||
|
elif wordNext == "am" or wordNext == "a.m.":
|
||||||
|
strHH = strNum
|
||||||
|
remainder = "am"
|
||||||
|
used = 1
|
||||||
|
elif (
|
||||||
|
int(word) > 100 and
|
||||||
|
(
|
||||||
|
wordPrev == "o" or
|
||||||
|
wordPrev == "oh"
|
||||||
|
)):
|
||||||
|
# 0800 hours (pronounced oh-eight-hundred)
|
||||||
|
strHH = int(word) / 100
|
||||||
|
strMM = int(word) - strHH * 100
|
||||||
|
if wordNext == "hours":
|
||||||
|
used += 1
|
||||||
|
elif (
|
||||||
|
wordNext == "hours" and
|
||||||
|
word[0] != '0' and
|
||||||
|
(
|
||||||
|
int(word) < 100 and
|
||||||
|
int(word) > 2400
|
||||||
|
)):
|
||||||
|
# "in 3 hours"
|
||||||
|
hrOffset = int(word)
|
||||||
|
used = 2
|
||||||
|
isTime = False
|
||||||
|
hrAbs = -1
|
||||||
|
minAbs = -1
|
||||||
|
|
||||||
|
elif wordNext == "minutes":
|
||||||
|
# "in 10 minutes"
|
||||||
|
minOffset = int(word)
|
||||||
|
used = 2
|
||||||
|
isTime = False
|
||||||
|
hrAbs = -1
|
||||||
|
minAbs = -1
|
||||||
|
elif wordNext == "seconds":
|
||||||
|
# in 5 seconds
|
||||||
|
secOffset = int(word)
|
||||||
|
used = 2
|
||||||
|
isTime = False
|
||||||
|
hrAbs = -1
|
||||||
|
minAbs = -1
|
||||||
|
elif int(word) > 100:
|
||||||
|
strHH = int(word) / 100
|
||||||
|
strMM = int(word) - strHH * 100
|
||||||
|
if wordNext == "hours":
|
||||||
|
used += 1
|
||||||
|
elif wordNext[0].isdigit():
|
||||||
|
strHH = word
|
||||||
|
strMM = wordNext
|
||||||
|
used += 1
|
||||||
|
if wordNextNext == "hours":
|
||||||
|
used += 1
|
||||||
|
elif (
|
||||||
|
wordNext == "" or wordNext == "o'clock" or
|
||||||
|
(
|
||||||
|
wordNext == "in" and
|
||||||
|
(
|
||||||
|
wordNextNext == "the" or
|
||||||
|
wordNextNext == timeQualifier
|
||||||
|
)
|
||||||
|
)):
|
||||||
|
strHH = word
|
||||||
|
strMM = 00
|
||||||
|
if wordNext == "o'clock":
|
||||||
|
used += 1
|
||||||
|
if wordNext == "in" or wordNextNext == "in":
|
||||||
|
used += (1 if wordNext == "in" else 2)
|
||||||
|
if (wordNextNext and
|
||||||
|
wordNextNext in timeQualifier or
|
||||||
|
(words[words.index(wordNextNext) + 1] and
|
||||||
|
words[words.index(wordNextNext) + 1] in
|
||||||
|
timeQualifier)):
|
||||||
|
if (wordNextNext == "afternoon" or
|
||||||
|
(len(words) >
|
||||||
|
words.index(wordNextNext) + 1 and
|
||||||
|
words[words.index(
|
||||||
|
wordNextNext) + 1] == "afternoon")):
|
||||||
|
remainder = "pm"
|
||||||
|
if (wordNextNext == "evening" or
|
||||||
|
(len(words) >
|
||||||
|
(words.index(wordNextNext) + 1) and
|
||||||
|
words[words.index(
|
||||||
|
wordNextNext) + 1] == "evening")):
|
||||||
|
remainder = "pm"
|
||||||
|
if (wordNextNext == "morning" or
|
||||||
|
(len(words) >
|
||||||
|
words.index(wordNextNext) + 1 and
|
||||||
|
words[words.index(
|
||||||
|
wordNextNext) + 1] == "morning")):
|
||||||
|
remainder = "am"
|
||||||
|
else:
|
||||||
|
isTime = False
|
||||||
|
|
||||||
|
strHH = int(strHH) if strHH else 0
|
||||||
|
strMM = int(strMM) if strMM else 0
|
||||||
|
strHH = strHH + 12 if remainder == "pm" and strHH < 12 else strHH
|
||||||
|
strHH = strHH - 12 if remainder == "am" and strHH >= 12 else strHH
|
||||||
|
if strHH > 24 or strMM > 59:
|
||||||
|
isTime = False
|
||||||
|
used = 0
|
||||||
|
if isTime:
|
||||||
|
hrAbs = strHH * 1
|
||||||
|
minAbs = strMM * 1
|
||||||
|
used += 1
|
||||||
|
if used > 0:
|
||||||
|
# removed parsed words from the sentence
|
||||||
|
for i in range(used):
|
||||||
|
words[idx + i] = ""
|
||||||
|
|
||||||
|
if wordPrev == "o" or wordPrev == "oh":
|
||||||
|
words[words.index(wordPrev)] = ""
|
||||||
|
|
||||||
|
if wordPrev == "early":
|
||||||
|
hrOffset = -1
|
||||||
|
words[idx - 1] = ""
|
||||||
|
idx -= 1
|
||||||
|
elif wordPrev == "late":
|
||||||
|
hrOffset = 1
|
||||||
|
words[idx - 1] = ""
|
||||||
|
idx -= 1
|
||||||
|
if idx > 0 and wordPrev in markers:
|
||||||
|
words[idx - 1] = ""
|
||||||
|
if idx > 1 and wordPrevPrev in markers:
|
||||||
|
words[idx - 2] = ""
|
||||||
|
|
||||||
|
idx += used - 1
|
||||||
|
found = True
|
||||||
|
|
||||||
|
# check that we found a date
|
||||||
|
if not date_found():
|
||||||
|
return None
|
||||||
|
|
||||||
|
if dayOffset is False:
|
||||||
|
dayOffset = 0
|
||||||
|
|
||||||
|
# perform date manipulation
|
||||||
|
|
||||||
|
extractedDate = dateNow
|
||||||
|
extractedDate = extractedDate.replace(microsecond=0,
|
||||||
|
second=0,
|
||||||
|
minute=0,
|
||||||
|
hour=0)
|
||||||
|
if datestr != "":
|
||||||
|
temp = datetime.strptime(datestr, "%B %d")
|
||||||
|
if not hasYear:
|
||||||
|
temp = temp.replace(year=extractedDate.year)
|
||||||
|
if extractedDate < temp:
|
||||||
|
extractedDate = extractedDate.replace(year=int(currentYear),
|
||||||
|
month=int(
|
||||||
|
temp.strftime(
|
||||||
|
"%m")),
|
||||||
|
day=int(temp.strftime(
|
||||||
|
"%d")))
|
||||||
|
else:
|
||||||
|
extractedDate = extractedDate.replace(
|
||||||
|
year=int(currentYear) + 1,
|
||||||
|
month=int(temp.strftime("%m")),
|
||||||
|
day=int(temp.strftime("%d")))
|
||||||
|
else:
|
||||||
|
extractedDate = extractedDate.replace(
|
||||||
|
year=int(temp.strftime("%Y")),
|
||||||
|
month=int(temp.strftime("%m")),
|
||||||
|
day=int(temp.strftime("%d")))
|
||||||
|
|
||||||
|
if timeStr != "":
|
||||||
|
temp = datetime(timeStr)
|
||||||
|
extractedDate = extractedDate.replace(hour=temp.strftime("%H"),
|
||||||
|
minute=temp.strftime("%M"),
|
||||||
|
second=temp.strftime("%S"))
|
||||||
|
|
||||||
|
if yearOffset != 0:
|
||||||
|
extractedDate = extractedDate + relativedelta(years=yearOffset)
|
||||||
|
if monthOffset != 0:
|
||||||
|
extractedDate = extractedDate + relativedelta(months=monthOffset)
|
||||||
|
if dayOffset != 0:
|
||||||
|
extractedDate = extractedDate + relativedelta(days=dayOffset)
|
||||||
|
|
||||||
|
if hrAbs is None and minAbs is None and default_time:
|
||||||
|
hrAbs = default_time.hour
|
||||||
|
minAbs = default_time.minute
|
||||||
|
if hrAbs != -1 and minAbs != -1:
|
||||||
|
extractedDate = extractedDate + relativedelta(hours=hrAbs or 0,
|
||||||
|
minutes=minAbs or 0)
|
||||||
|
if (hrAbs or minAbs) and datestr == "":
|
||||||
|
if not daySpecified and dateNow > extractedDate:
|
||||||
|
extractedDate = extractedDate + relativedelta(days=1)
|
||||||
|
if hrOffset != 0:
|
||||||
|
extractedDate = extractedDate + relativedelta(hours=hrOffset)
|
||||||
|
if minOffset != 0:
|
||||||
|
extractedDate = extractedDate + relativedelta(minutes=minOffset)
|
||||||
|
if secOffset != 0:
|
||||||
|
extractedDate = extractedDate + relativedelta(seconds=secOffset)
|
||||||
|
for idx, word in enumerate(words):
|
||||||
|
if words[idx] == "and" and words[idx - 1] == "" and words[
|
||||||
|
idx + 1] == "":
|
||||||
|
words[idx] = ""
|
||||||
|
|
||||||
|
resultStr = " ".join(words)
|
||||||
|
resultStr = ' '.join(resultStr.split())
|
||||||
|
return [extractedDate, resultStr]
|
||||||
|
|
||||||
|
|
||||||
|
def is_fractional_sv(input_str, short_scale=True):
|
||||||
|
"""
|
||||||
|
This function takes the given text and checks if it is a fraction.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_str (str): the string to check if fractional
|
||||||
|
short_scale (bool): use short scale if True, long scale if False
|
||||||
|
Returns:
|
||||||
|
(bool) or (float): False if not a fraction, otherwise the fraction
|
||||||
|
|
||||||
|
"""
|
||||||
|
if input_str.endswith('ars', -3):
|
||||||
|
input_str = input_str[:len(input_str) - 3] # e.g. "femtedelar"
|
||||||
|
if input_str.endswith('ar', -2):
|
||||||
|
input_str = input_str[:len(input_str) - 2] # e.g. "femtedelar"
|
||||||
|
if input_str.endswith('a', -1):
|
||||||
|
input_str = input_str[:len(input_str) - 1] # e.g. "halva"
|
||||||
|
if input_str.endswith('s', -1):
|
||||||
|
input_str = input_str[:len(input_str) - 1] # e.g. "halva"
|
||||||
|
|
||||||
|
aFrac = ["hel", "halv", "tredjedel", "fjärdedel", "femtedel", "sjättedel",
|
||||||
|
"sjundedel", "åttondel", "niondel", "tiondel", "elftedel",
|
||||||
|
"tolftedel"]
|
||||||
|
if input_str.lower() in aFrac:
|
||||||
|
return 1.0 / (aFrac.index(input_str) + 1)
|
||||||
|
if input_str == "kvart":
|
||||||
|
return 1.0 / 4
|
||||||
|
if input_str == "trekvart":
|
||||||
|
return 3.0 / 4
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_sv(text, remove_articles=True):
|
||||||
|
""" English string normalization """
|
||||||
|
|
||||||
|
words = text.split() # this also removed extra spaces
|
||||||
|
normalized = ''
|
||||||
|
for word in words:
|
||||||
|
# Convert numbers into digits, e.g. "two" -> "2"
|
||||||
|
if word == 'en':
|
||||||
|
word = 'ett'
|
||||||
|
textNumbers = ["noll", "ett", "två", "tre", "fyra", "fem", "sex",
|
||||||
|
"sju", "åtta", "nio", "tio", "elva", "tolv",
|
||||||
|
"tretton", "fjorton", "femton", "sexton",
|
||||||
|
"sjutton", "arton", "nitton", "tjugo"]
|
||||||
|
if word in textNumbers:
|
||||||
|
word = str(textNumbers.index(word))
|
||||||
|
|
||||||
|
normalized += " " + word
|
||||||
|
|
||||||
|
return normalized[1:] # strip the initial space
|
||||||
|
|
||||||
|
|
||||||
|
class SwedishNormalizer(Normalizer):
|
||||||
|
""" TODO implement language specific normalizer"""
|
||||||
269
lingua_franca/parse.py
Normal file
269
lingua_franca/parse.py
Normal file
@@ -0,0 +1,269 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2017 Mycroft AI Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
#
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
from difflib import SequenceMatcher
|
||||||
|
from warnings import warn
|
||||||
|
from lingua_franca.time import now_local
|
||||||
|
from lingua_franca.internal import populate_localized_function_dict, \
|
||||||
|
get_active_langs, get_full_lang_code, get_primary_lang_code, \
|
||||||
|
get_default_lang, localized_function, _raise_unsupported_language
|
||||||
|
|
||||||
|
_REGISTERED_FUNCTIONS = ("extract_numbers",
|
||||||
|
"extract_number",
|
||||||
|
"extract_duration",
|
||||||
|
"extract_datetime",
|
||||||
|
"normalize",
|
||||||
|
"get_gender",
|
||||||
|
"is_fractional",
|
||||||
|
"is_ordinal")
|
||||||
|
|
||||||
|
populate_localized_function_dict("parse", langs=get_active_langs())
|
||||||
|
|
||||||
|
|
||||||
|
def fuzzy_match(x: str, against: str) -> float:
|
||||||
|
"""Perform a 'fuzzy' comparison between two strings.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
match percentage -- 1.0 for perfect match,
|
||||||
|
down to 0.0 for no match at all.
|
||||||
|
"""
|
||||||
|
return SequenceMatcher(None, x, against).ratio()
|
||||||
|
|
||||||
|
|
||||||
|
def match_one(query, choices):
|
||||||
|
"""
|
||||||
|
Find best match from a list or dictionary given an input
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query (str): string to test
|
||||||
|
choices (list): list or dictionary of choices
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: (best match, score)
|
||||||
|
"""
|
||||||
|
if isinstance(choices, dict):
|
||||||
|
_choices = list(choices.keys())
|
||||||
|
elif isinstance(choices, list):
|
||||||
|
_choices = choices
|
||||||
|
else:
|
||||||
|
raise ValueError('a list or dict of choices must be provided')
|
||||||
|
|
||||||
|
best = (_choices[0], fuzzy_match(query, _choices[0]))
|
||||||
|
for c in _choices[1:]:
|
||||||
|
score = fuzzy_match(query, c)
|
||||||
|
if score > best[1]:
|
||||||
|
best = (c, score)
|
||||||
|
|
||||||
|
if isinstance(choices, dict):
|
||||||
|
return (choices[best[0]], best[1])
|
||||||
|
else:
|
||||||
|
return best
|
||||||
|
|
||||||
|
|
||||||
|
@localized_function()
|
||||||
|
def extract_numbers(text, short_scale=True, ordinals=False, lang=''):
|
||||||
|
"""
|
||||||
|
Takes in a string and extracts a list of numbers.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text (str): the string to extract a number from
|
||||||
|
short_scale (bool): Use "short scale" or "long scale" for large
|
||||||
|
numbers -- over a million. The default is short scale, which
|
||||||
|
is now common in most English speaking countries.
|
||||||
|
See https://en.wikipedia.org/wiki/Names_of_large_numbers
|
||||||
|
ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
|
||||||
|
lang (str, optional): an optional BCP-47 language code, if omitted
|
||||||
|
the default language will be used.
|
||||||
|
Returns:
|
||||||
|
list: list of extracted numbers as floats, or empty list if none found
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
@localized_function()
|
||||||
|
def extract_number(text, short_scale=True, ordinals=False, lang=''):
|
||||||
|
"""Takes in a string and extracts a number.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text (str): the string to extract a number from
|
||||||
|
short_scale (bool): Use "short scale" or "long scale" for large
|
||||||
|
numbers -- over a million. The default is short scale, which
|
||||||
|
is now common in most English speaking countries.
|
||||||
|
See https://en.wikipedia.org/wiki/Names_of_large_numbers
|
||||||
|
ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
|
||||||
|
lang (str, optional): an optional BCP-47 language code, if omitted
|
||||||
|
the default language will be used.
|
||||||
|
Returns:
|
||||||
|
(int, float or False): The number extracted or False if the input
|
||||||
|
text contains no numbers
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
@localized_function()
|
||||||
|
def extract_duration(text, lang=''):
|
||||||
|
""" Convert an english phrase into a number of seconds
|
||||||
|
|
||||||
|
Convert things like:
|
||||||
|
|
||||||
|
* "10 minute"
|
||||||
|
* "2 and a half hours"
|
||||||
|
* "3 days 8 hours 10 minutes and 49 seconds"
|
||||||
|
|
||||||
|
into an int, representing the total number of seconds.
|
||||||
|
|
||||||
|
The words used in the duration will be consumed, and
|
||||||
|
the remainder returned.
|
||||||
|
|
||||||
|
As an example, "set a timer for 5 minutes" would return
|
||||||
|
``(300, "set a timer for")``.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text (str): string containing a duration
|
||||||
|
lang (str, optional): an optional BCP-47 language code, if omitted
|
||||||
|
the default language will be used.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(timedelta, str):
|
||||||
|
A tuple containing the duration and the remaining text
|
||||||
|
not consumed in the parsing. The first value will
|
||||||
|
be None if no duration is found. The text returned
|
||||||
|
will have whitespace stripped from the ends.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
@localized_function()
|
||||||
|
def extract_datetime(text, anchorDate=None, lang='', default_time=None):
|
||||||
|
"""
|
||||||
|
Extracts date and time information from a sentence. Parses many of the
|
||||||
|
common ways that humans express dates and times, including relative dates
|
||||||
|
like "5 days from today", "tomorrow', and "Tuesday".
|
||||||
|
|
||||||
|
Vague terminology are given arbitrary values, like:
|
||||||
|
- morning = 8 AM
|
||||||
|
- afternoon = 3 PM
|
||||||
|
- evening = 7 PM
|
||||||
|
|
||||||
|
If a time isn't supplied or implied, the function defaults to 12 AM
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text (str): the text to be interpreted
|
||||||
|
anchorDate (:obj:`datetime`, optional): the date to be used for
|
||||||
|
relative dating (for example, what does "tomorrow" mean?).
|
||||||
|
Defaults to the current local date/time.
|
||||||
|
lang (str): the BCP-47 code for the language to use, None uses default
|
||||||
|
default_time (datetime.time): time to use if none was found in
|
||||||
|
the input string.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
[:obj:`datetime`, :obj:`str`]: 'datetime' is the extracted date
|
||||||
|
as a datetime object in the local timezone.
|
||||||
|
'leftover_string' is the original phrase with all date and time
|
||||||
|
related keywords stripped out. See examples for further
|
||||||
|
clarification
|
||||||
|
|
||||||
|
Returns 'None' if no date or time related text is found.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
>>> extract_datetime(
|
||||||
|
... "What is the weather like the day after tomorrow?",
|
||||||
|
... datetime(2017, 6, 30, 00, 00)
|
||||||
|
... )
|
||||||
|
[datetime.datetime(2017, 7, 2, 0, 0), 'what is weather like']
|
||||||
|
|
||||||
|
>>> extract_datetime(
|
||||||
|
... "Set up an appointment 2 weeks from Sunday at 5 pm",
|
||||||
|
... datetime(2016, 2, 19, 00, 00)
|
||||||
|
... )
|
||||||
|
[datetime.datetime(2016, 3, 6, 17, 0), 'set up appointment']
|
||||||
|
|
||||||
|
>>> extract_datetime(
|
||||||
|
... "Set up an appointment",
|
||||||
|
... datetime(2016, 2, 19, 00, 00)
|
||||||
|
... )
|
||||||
|
None
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
@localized_function()
|
||||||
|
def normalize(text, lang='', remove_articles=True):
|
||||||
|
"""Prepare a string for parsing
|
||||||
|
|
||||||
|
This function prepares the given text for parsing by making
|
||||||
|
numbers consistent, getting rid of contractions, etc.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text (str): the string to normalize
|
||||||
|
lang (str, optional): an optional BCP-47 language code, if omitted
|
||||||
|
the default language will be used.
|
||||||
|
remove_articles (bool): whether to remove articles (like 'a', or
|
||||||
|
'the'). True by default.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(str): The normalized string.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
@localized_function()
|
||||||
|
def get_gender(word, context="", lang=''):
|
||||||
|
""" Guess the gender of a word
|
||||||
|
|
||||||
|
Some languages assign genders to specific words. This method will attempt
|
||||||
|
to determine the gender, optionally using the provided context sentence.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
word (str): The word to look up
|
||||||
|
context (str, optional): String containing word, for context
|
||||||
|
lang (str, optional): an optional BCP-47 language code, if omitted
|
||||||
|
the default language will be used.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The code "m" (male), "f" (female) or "n" (neutral) for the gender,
|
||||||
|
or None if unknown/or unused in the given language.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
@localized_function()
|
||||||
|
def is_fractional(input_str, short_scale=True, lang=''):
|
||||||
|
"""
|
||||||
|
This function takes the given text and checks if it is a fraction.
|
||||||
|
Used by most of the number exractors.
|
||||||
|
|
||||||
|
Will return False on phrases that *contain* a fraction. Only detects
|
||||||
|
exact matches. To pull a fraction from a string, see extract_number()
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_str (str): the string to check if fractional
|
||||||
|
short_scale (bool): use short scale if True, long scale if False
|
||||||
|
lang (str, optional): an optional BCP-47 language code, if omitted
|
||||||
|
the default language will be used.
|
||||||
|
Returns:
|
||||||
|
(bool) or (float): False if not a fraction, otherwise the fraction
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
@localized_function()
|
||||||
|
def is_ordinal(input_str, lang=''):
|
||||||
|
"""
|
||||||
|
This function takes the given text and checks if it is an ordinal number.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_str (str): the string to check if ordinal
|
||||||
|
lang (str, optional): an optional BCP-47 language code, if omitted
|
||||||
|
the default language will be used.
|
||||||
|
Returns:
|
||||||
|
(bool) or (float): False if not an ordinal, otherwise the number
|
||||||
|
corresponding to the ordinal
|
||||||
|
"""
|
||||||
1
lingua_franca/res/text/ca-es/and.word
Normal file
1
lingua_franca/res/text/ca-es/and.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
i
|
||||||
130
lingua_franca/res/text/ca-es/date_time.json
Normal file
130
lingua_franca/res/text/ca-es/date_time.json
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
{
|
||||||
|
"decade_format": {
|
||||||
|
"1": {"match": "^\\d$", "format": "{x}"},
|
||||||
|
"2": {"match": "^1\\d$", "format": "{xx}"},
|
||||||
|
"3": {"match": "^\\d0$", "format": "{x0}"},
|
||||||
|
"4": {"match": "^2\\d$", "format": "vint-i-{x}"},
|
||||||
|
"5": {"match": "^[3-9]\\d$", "format": "{x0}-{x}"},
|
||||||
|
"default": "{number}"
|
||||||
|
},
|
||||||
|
"hundreds_format": {
|
||||||
|
"1": {"match": "^1\\d{2}$", "format": "{x_in_x00}-cent"},
|
||||||
|
"2": {"match": "^\\d{3}$", "format": "{x_in_x00}-cents"},
|
||||||
|
"default": "{number}"
|
||||||
|
},
|
||||||
|
"thousand_format": {
|
||||||
|
"1": {"match": "^10\\d\\d$", "format": "mil"},
|
||||||
|
"2": {"match": "^11\\d\\d$", "format": "mil cent"},
|
||||||
|
"3": {"match": "^1[2-9]\\d\\d$", "format": "mil {x_in_x00}-cents"},
|
||||||
|
"4": {"match": "^[2-9]0\\d{2}$", "format": "{x_in_x000} mil"},
|
||||||
|
"5": {"match": "^[2-9]1\\d{2}$", "format": "{x_in_x000} mil cent"},
|
||||||
|
"6": {"match": "^[2-9][2-9]\\d{2}$", "format": "{x_in_x000} mil {x_in_x00}-cents"},
|
||||||
|
"default": "{number}"
|
||||||
|
},
|
||||||
|
"year_format": {
|
||||||
|
"1": {"match": "^\\d\\d?$", "format": "{formatted_decade} {bc}"},
|
||||||
|
"2": {"match": "^\\d00$", "format": "{formatted_hundreds} {bc}"},
|
||||||
|
"3": {"match": "^\\d{3}$", "format": "{formatted_hundreds} {formatted_decade} {bc}"},
|
||||||
|
"4": {"match": "^\\d{2}00$", "format": "{formatted_thousand} {bc}"},
|
||||||
|
"5": {"match": "^\\d{4}$", "format": "{formatted_thousand} {formatted_decade} {bc}"},
|
||||||
|
"default": "{year} {bc}",
|
||||||
|
"bc": "a.C."
|
||||||
|
},
|
||||||
|
"date_format": {
|
||||||
|
"date_full": "{weekday}, {day} de {month} de {formatted_year}",
|
||||||
|
"date_full_no_year": "{weekday}, {day} de {month}",
|
||||||
|
"date_full_no_year_month": "{weekday}, dia {day}",
|
||||||
|
"today": "avui",
|
||||||
|
"tomorrow": "demà",
|
||||||
|
"yesterday": "ahir"
|
||||||
|
},
|
||||||
|
"date_time_format": {
|
||||||
|
"date_time": "{formatted_date} a {formatted_time}"
|
||||||
|
},
|
||||||
|
"weekday": {
|
||||||
|
"0": "dilluns",
|
||||||
|
"1": "dimarts",
|
||||||
|
"2": "dimecres",
|
||||||
|
"3": "dijous",
|
||||||
|
"4": "divendres",
|
||||||
|
"5": "dissabte",
|
||||||
|
"6": "diumenge"
|
||||||
|
},
|
||||||
|
"date": {
|
||||||
|
"1": "primer",
|
||||||
|
"2": "dos",
|
||||||
|
"3": "tres",
|
||||||
|
"4": "quatre",
|
||||||
|
"5": "cinc",
|
||||||
|
"6": "sis",
|
||||||
|
"7": "set",
|
||||||
|
"8": "vuit",
|
||||||
|
"9": "nou",
|
||||||
|
"10": "deu",
|
||||||
|
"11": "onze",
|
||||||
|
"12": "dotze",
|
||||||
|
"13": "tretze",
|
||||||
|
"14": "catorze",
|
||||||
|
"15": "quinze",
|
||||||
|
"16": "setze",
|
||||||
|
"17": "disset",
|
||||||
|
"18": "divuit",
|
||||||
|
"19": "dinou",
|
||||||
|
"20": "vint",
|
||||||
|
"21": "vint-i-u",
|
||||||
|
"22": "vint-i-dos",
|
||||||
|
"23": "vint-i-tres",
|
||||||
|
"24": "vint-i-quatre",
|
||||||
|
"25": "vint-i-cinc",
|
||||||
|
"26": "vint-i-sis",
|
||||||
|
"27": "vint-i-set",
|
||||||
|
"28": "vint-i-vuit",
|
||||||
|
"29": "vint-i-nou",
|
||||||
|
"30": "trenta",
|
||||||
|
"31": "trenta-u"
|
||||||
|
},
|
||||||
|
"month": {
|
||||||
|
"1": "gener",
|
||||||
|
"2": "febrer",
|
||||||
|
"3": "març",
|
||||||
|
"4": "abril",
|
||||||
|
"5": "maig",
|
||||||
|
"6": "juny",
|
||||||
|
"7": "juliol",
|
||||||
|
"8": "agost",
|
||||||
|
"9": "setembre",
|
||||||
|
"10": "octubre",
|
||||||
|
"11": "novembre",
|
||||||
|
"12": "desembre"
|
||||||
|
},
|
||||||
|
"number": {
|
||||||
|
"0": "zero",
|
||||||
|
"1": "u",
|
||||||
|
"2": "dos",
|
||||||
|
"3": "tres",
|
||||||
|
"4": "quatre",
|
||||||
|
"5": "cinc",
|
||||||
|
"6": "sis",
|
||||||
|
"7": "set",
|
||||||
|
"8": "vuit",
|
||||||
|
"9": "nou",
|
||||||
|
"10": "deu",
|
||||||
|
"11": "onze",
|
||||||
|
"12": "dotze",
|
||||||
|
"13": "tretze",
|
||||||
|
"14": "catorze",
|
||||||
|
"15": "quinze",
|
||||||
|
"16": "setze",
|
||||||
|
"17": "disset",
|
||||||
|
"18": "divuit",
|
||||||
|
"19": "dinou",
|
||||||
|
"20": "vint",
|
||||||
|
"30": "trenta",
|
||||||
|
"40": "quaranta",
|
||||||
|
"50": "cinquanta",
|
||||||
|
"60": "seixanta",
|
||||||
|
"70": "setanta",
|
||||||
|
"80": "vuitanta",
|
||||||
|
"90": "noranta"
|
||||||
|
}
|
||||||
|
}
|
||||||
43
lingua_franca/res/text/ca-es/date_time_test.json
Normal file
43
lingua_franca/res/text/ca-es/date_time_test.json
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
{
|
||||||
|
"test_nice_year": {
|
||||||
|
"1": {"datetime_param": "1, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "u a.C." },
|
||||||
|
"2": {"datetime_param": "10, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "deu a.C." },
|
||||||
|
"3": {"datetime_param": "92, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "noranta-dos a.C." },
|
||||||
|
"4": {"datetime_param": "803, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "vuit-cents tres" },
|
||||||
|
"5": {"datetime_param": "811, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "vuit-cents onze" },
|
||||||
|
"6": {"datetime_param": "454, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "quatre-cents cinquanta-quatre" },
|
||||||
|
"7": {"datetime_param": "1005, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mil cinc" },
|
||||||
|
"8": {"datetime_param": "1012, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mil dotze" },
|
||||||
|
"9": {"datetime_param": "1046, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mil quaranta-sis" },
|
||||||
|
"10": {"datetime_param": "1807, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mil vuit-cents set" },
|
||||||
|
"11": {"datetime_param": "1717, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mil set-cents disset" },
|
||||||
|
"12": {"datetime_param": "1988, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mil nou-cents vuitanta-vuit"},
|
||||||
|
"13": {"datetime_param": "2009, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dos mil nou"},
|
||||||
|
"14": {"datetime_param": "2018, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dos mil divuit"},
|
||||||
|
"15": {"datetime_param": "2021, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dos mil vint-i-u"},
|
||||||
|
"16": {"datetime_param": "2030, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dos mil trenta"},
|
||||||
|
"17": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "dos mil cent" },
|
||||||
|
"18": {"datetime_param": "1000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mil" },
|
||||||
|
"19": {"datetime_param": "2000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dos mil" },
|
||||||
|
"20": {"datetime_param": "3120, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "tres mil cent vint a.C." },
|
||||||
|
"21": {"datetime_param": "3241, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "tres mil dos-cents quaranta-u a.C." },
|
||||||
|
"22": {"datetime_param": "5200, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "cinc mil dos-cents" },
|
||||||
|
"23": {"datetime_param": "1100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mil cent" },
|
||||||
|
"24": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "dos mil cent" }
|
||||||
|
},
|
||||||
|
"test_nice_date": {
|
||||||
|
"1": {"datetime_param": "2017, 1, 31, 0, 2, 3", "now": "None", "assertEqual": "dimarts, trenta-u de gener de dos mil disset"},
|
||||||
|
"2": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2017, 1, 1, 0, 2, 3", "assertEqual": "diumenge, quatre de febrer de dos mil divuit"},
|
||||||
|
"3": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 1, 1, 0, 2, 3", "assertEqual": "diumenge, quatre de febrer"},
|
||||||
|
"4": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 1, 0, 2, 3", "assertEqual": "diumenge, dia quatre"},
|
||||||
|
"5": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 3, 0, 2, 3", "assertEqual": "demà"},
|
||||||
|
"6": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 4, 0, 2, 3", "assertEqual": "avui"},
|
||||||
|
"7": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 5, 0, 2, 3", "assertEqual": "ahir"},
|
||||||
|
"8": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 6, 0, 2, 3", "assertEqual": "diumenge, quatre de febrer"},
|
||||||
|
"9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "diumenge, quatre de febrer de dos mil divuit"}
|
||||||
|
},
|
||||||
|
"test_nice_date_time": {
|
||||||
|
"1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "dimarts, trenta-u de gener de dos mil disset a la una i vint-i-dos de la tarda"},
|
||||||
|
"2": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "True", "use_ampm": "False", "assertEqual": "dimarts, trenta-u de gener de dos mil disset a les tretze i vint-i-dos"}
|
||||||
|
}
|
||||||
|
}
|
||||||
1
lingua_franca/res/text/ca-es/day.word
Normal file
1
lingua_franca/res/text/ca-es/day.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
dia
|
||||||
1
lingua_franca/res/text/ca-es/days.word
Normal file
1
lingua_franca/res/text/ca-es/days.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
dies
|
||||||
1
lingua_franca/res/text/ca-es/hour.word
Normal file
1
lingua_franca/res/text/ca-es/hour.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
hora
|
||||||
1
lingua_franca/res/text/ca-es/hours.word
Normal file
1
lingua_franca/res/text/ca-es/hours.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
hores
|
||||||
1
lingua_franca/res/text/ca-es/minute.word
Normal file
1
lingua_franca/res/text/ca-es/minute.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
minut
|
||||||
1
lingua_franca/res/text/ca-es/minutes.word
Normal file
1
lingua_franca/res/text/ca-es/minutes.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
minuts
|
||||||
109
lingua_franca/res/text/ca-es/normalize.json
Normal file
109
lingua_franca/res/text/ca-es/normalize.json
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
{
|
||||||
|
"lowercase": false,
|
||||||
|
"numbers_to_digits": true,
|
||||||
|
"expand_contractions": false,
|
||||||
|
"remove_symbols": true,
|
||||||
|
"remove_accents": false,
|
||||||
|
"remove_articles": false,
|
||||||
|
"remove_stopwords": true,
|
||||||
|
"contractions": {},
|
||||||
|
"word_replacements": {},
|
||||||
|
"number_replacements": {
|
||||||
|
"catorze": "14",
|
||||||
|
"cent": "100",
|
||||||
|
"cents": "100",
|
||||||
|
"cinc": "5",
|
||||||
|
"cinc-centes": "500",
|
||||||
|
"cinc-cents": "500",
|
||||||
|
"cinquanta": "50",
|
||||||
|
"deu": "10",
|
||||||
|
"dinou": "19",
|
||||||
|
"setze": "16",
|
||||||
|
"disset": "17",
|
||||||
|
"dihuit": "18",
|
||||||
|
"divuit": "18",
|
||||||
|
"dos": "2",
|
||||||
|
"dos-centes": "200",
|
||||||
|
"dos-cents": "200",
|
||||||
|
"dotze": "12",
|
||||||
|
"dues": "2",
|
||||||
|
"dues-centes": "200",
|
||||||
|
"huitanta": "80",
|
||||||
|
"huit": "8",
|
||||||
|
"huit-centes": "800",
|
||||||
|
"huit-cents": "800",
|
||||||
|
"mil": "1000",
|
||||||
|
"milió": "1000000",
|
||||||
|
"nou": "9",
|
||||||
|
"nou-centes": "900",
|
||||||
|
"nou-cents": "900",
|
||||||
|
"noranta": "90",
|
||||||
|
"onze": "11",
|
||||||
|
"primer": "1",
|
||||||
|
"primera": "1",
|
||||||
|
"quaranta": "40",
|
||||||
|
"quatre": "4",
|
||||||
|
"quatre-centes": "400",
|
||||||
|
"quatre-cents": "400",
|
||||||
|
"quinze": "15",
|
||||||
|
"segon": "2",
|
||||||
|
"segona": "2",
|
||||||
|
"seixanta": "60",
|
||||||
|
"set": "7",
|
||||||
|
"set-centes": "700",
|
||||||
|
"set-cents": "700",
|
||||||
|
"setanta": "70",
|
||||||
|
"sis": "6",
|
||||||
|
"sis-centes": "600",
|
||||||
|
"sis-cents": "600",
|
||||||
|
"tercer": "3",
|
||||||
|
"trenta": "30",
|
||||||
|
"tres": "3",
|
||||||
|
"tres-centes": "300",
|
||||||
|
"tres-cents": "300",
|
||||||
|
"tretze": "13",
|
||||||
|
"u": "1",
|
||||||
|
"un": "1",
|
||||||
|
"una": "1",
|
||||||
|
"vint": "20",
|
||||||
|
"vuitanta": "80",
|
||||||
|
"vuit": "8",
|
||||||
|
"vuit-centes": "800",
|
||||||
|
"vuit-cents": "800",
|
||||||
|
"zero": "0"
|
||||||
|
},
|
||||||
|
"stopwords": [
|
||||||
|
"de",
|
||||||
|
"del",
|
||||||
|
"dels",
|
||||||
|
"ell",
|
||||||
|
"ella",
|
||||||
|
"ells",
|
||||||
|
"elles",
|
||||||
|
"jo",
|
||||||
|
"i",
|
||||||
|
"al",
|
||||||
|
"dins la",
|
||||||
|
"a la",
|
||||||
|
"nosaltres",
|
||||||
|
"dins el",
|
||||||
|
"para",
|
||||||
|
"aquest",
|
||||||
|
"aquesta",
|
||||||
|
"aquests",
|
||||||
|
"aquestes",
|
||||||
|
"aquell",
|
||||||
|
"aquella",
|
||||||
|
"aquells",
|
||||||
|
"aquelles",
|
||||||
|
"que"
|
||||||
|
],
|
||||||
|
"articles": [
|
||||||
|
"el",
|
||||||
|
"la",
|
||||||
|
"l",
|
||||||
|
"els",
|
||||||
|
"les",
|
||||||
|
"los"
|
||||||
|
]
|
||||||
|
}
|
||||||
1
lingua_franca/res/text/ca-es/or.word
Normal file
1
lingua_franca/res/text/ca-es/or.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
o
|
||||||
1
lingua_franca/res/text/ca-es/second.word
Normal file
1
lingua_franca/res/text/ca-es/second.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
segon
|
||||||
1
lingua_franca/res/text/ca-es/seconds.word
Normal file
1
lingua_franca/res/text/ca-es/seconds.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
segons
|
||||||
1
lingua_franca/res/text/cs-cz/and.word
Normal file
1
lingua_franca/res/text/cs-cz/and.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
a
|
||||||
129
lingua_franca/res/text/cs-cz/date_time.json
Normal file
129
lingua_franca/res/text/cs-cz/date_time.json
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
{
|
||||||
|
"decade_format": {
|
||||||
|
"1": {"match": "^\\d$", "format": "{x}"},
|
||||||
|
"2": {"match": "^1\\d$", "format": "{xx}"},
|
||||||
|
"3": {"match": "^\\d0$", "format": "{x0}"},
|
||||||
|
"4": {"match": "^[2-9]\\d$", "format": "{x0} {x}"},
|
||||||
|
"default": "{number}"
|
||||||
|
},
|
||||||
|
"hundreds_format": {
|
||||||
|
"1": {"match": "^\\d{3}$", "format": "{x_in_x00} sto"},
|
||||||
|
"default": "{number}"
|
||||||
|
},
|
||||||
|
"thousand_format": {
|
||||||
|
"1": {"match": "^\\d00\\d$", "format": "{x_in_x000} tisíc"},
|
||||||
|
"2": {"match": "^1\\d00$", "format": "{xx_in_xx00} sto"},
|
||||||
|
"3": {"match": "^\\d{2}00$", "format": "{x0_in_x000} {x_in_x00} sto"},
|
||||||
|
"4": {"match": "^(1\\d{3})|(\\d0\\d{2})$", "format": "{xx_in_xx00}"},
|
||||||
|
"5": {"match": "^\\d{4}$", "format": "{x0_in_x000} {x_in_x00}"},
|
||||||
|
"default": "{number}"
|
||||||
|
},
|
||||||
|
"year_format": {
|
||||||
|
"1": {"match": "^\\d\\d?$", "format": "{formatted_decade} {bc}"},
|
||||||
|
"2": {"match": "^\\d00$", "format": "{formatted_hundreds} {bc}"},
|
||||||
|
"3": {"match": "^\\d{3}$", "format": "{formatted_hundreds} {formatted_decade} {bc}"},
|
||||||
|
"4": {"match": "^\\d{2}00$", "format": "{formatted_thousand} {bc}"},
|
||||||
|
"5": {"match": "^\\d00\\d$", "format": "{formatted_thousand} {formatted_decade} {bc}"},
|
||||||
|
"6": {"match": "^\\d{2}0\\d$", "format": "{formatted_thousand} {formatted_decade} {bc}"},
|
||||||
|
"7": {"match": "^\\d{4}$", "format": "{formatted_thousand} {formatted_decade} {bc}"},
|
||||||
|
"default": "{year} {bc}",
|
||||||
|
"bc": "b.c."
|
||||||
|
},
|
||||||
|
"date_format": {
|
||||||
|
"date_full": "{weekday}, {month} {day}, {formatted_year}",
|
||||||
|
"date_full_no_year": "{weekday}, {month} {day}",
|
||||||
|
"date_full_no_year_month": "{weekday}, {day}",
|
||||||
|
"today": "dnes",
|
||||||
|
"tomorrow": "zítra",
|
||||||
|
"yesterday": "včera"
|
||||||
|
},
|
||||||
|
"date_time_format": {
|
||||||
|
"date_time": "{formatted_date} v {formatted_time}"
|
||||||
|
},
|
||||||
|
"weekday": {
|
||||||
|
"0": "pondělí",
|
||||||
|
"1": "úterý",
|
||||||
|
"2": "středa",
|
||||||
|
"3": "čtvrtek",
|
||||||
|
"4": "pátek",
|
||||||
|
"5": "sobota",
|
||||||
|
"6": "neděle"
|
||||||
|
},
|
||||||
|
"date": {
|
||||||
|
"1": "prvního",
|
||||||
|
"2": "druhého",
|
||||||
|
"3": "třetího",
|
||||||
|
"4": "čtvrtého",
|
||||||
|
"5": "pátého",
|
||||||
|
"6": "šestého",
|
||||||
|
"7": "sedmého",
|
||||||
|
"8": "osmého",
|
||||||
|
"9": "devátého",
|
||||||
|
"10": "desátého",
|
||||||
|
"11": "jedenáctého",
|
||||||
|
"12": "dvanáctého",
|
||||||
|
"13": "třináctého",
|
||||||
|
"14": "čtrnáctého",
|
||||||
|
"15": "patnáctého",
|
||||||
|
"16": "šestnáctého",
|
||||||
|
"17": "sedmnáctého",
|
||||||
|
"18": "osmnáctého",
|
||||||
|
"19": "devatenáctého",
|
||||||
|
"20": "dvacátého",
|
||||||
|
"21": "dvacátého-prvního",
|
||||||
|
"22": "dvacátého-druhého",
|
||||||
|
"23": "dvacátého-třetího",
|
||||||
|
"24": "dvacátého-čtvrtého",
|
||||||
|
"25": "dvacátého-pátého",
|
||||||
|
"26": "dvacátého-šestého",
|
||||||
|
"27": "dvacátého-sedmého",
|
||||||
|
"28": "dvacátého-osmého",
|
||||||
|
"29": "dvacátého-devátého",
|
||||||
|
"30": "třicátého",
|
||||||
|
"31": "třicátého-prvního"
|
||||||
|
},
|
||||||
|
"month": {
|
||||||
|
"1": "leden",
|
||||||
|
"2": "únor",
|
||||||
|
"3": "březen",
|
||||||
|
"4": "duben",
|
||||||
|
"5": "květen",
|
||||||
|
"6": "červen",
|
||||||
|
"7": "červenec",
|
||||||
|
"8": "srpen",
|
||||||
|
"9": "září",
|
||||||
|
"10": "říjen",
|
||||||
|
"11": "listopad",
|
||||||
|
"12": "prosinec"
|
||||||
|
},
|
||||||
|
"number": {
|
||||||
|
"0": "nula",
|
||||||
|
"1": "jedna",
|
||||||
|
"2": "dva",
|
||||||
|
"3": "tři",
|
||||||
|
"4": "čtyři",
|
||||||
|
"5": "pět",
|
||||||
|
"6": "šest",
|
||||||
|
"7": "sedm",
|
||||||
|
"8": "osm",
|
||||||
|
"9": "devět",
|
||||||
|
"10": "deset",
|
||||||
|
"11": "jedenáct",
|
||||||
|
"12": "dvanáct",
|
||||||
|
"13": "třináct",
|
||||||
|
"14": "čtrnáct",
|
||||||
|
"15": "patnáct",
|
||||||
|
"16": "šestnáct",
|
||||||
|
"17": "sedmnáct",
|
||||||
|
"18": "osmnáct",
|
||||||
|
"19": "devatenáct",
|
||||||
|
"20": "dvacet",
|
||||||
|
"30": "třicet",
|
||||||
|
"40": "čtyřicet",
|
||||||
|
"50": "padesát",
|
||||||
|
"60": "šedesát",
|
||||||
|
"70": "sedmdesát",
|
||||||
|
"80": "osmdesát",
|
||||||
|
"90": "devadesát"
|
||||||
|
}
|
||||||
|
}
|
||||||
43
lingua_franca/res/text/cs-cz/date_time_test.json
Normal file
43
lingua_franca/res/text/cs-cz/date_time_test.json
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
{
|
||||||
|
"test_nice_year": {
|
||||||
|
"1": {"datetime_param": "1, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "jedna b.c." },
|
||||||
|
"2": {"datetime_param": "10, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "deset b.c." },
|
||||||
|
"3": {"datetime_param": "92, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "devadesát dva b.c." },
|
||||||
|
"4": {"datetime_param": "803, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "osm sto tři" },
|
||||||
|
"5": {"datetime_param": "811, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "osm sto jedenáct" },
|
||||||
|
"6": {"datetime_param": "454, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "čtyři sto padesát čtyři" },
|
||||||
|
"7": {"datetime_param": "1005, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "jedna tisíc pět" },
|
||||||
|
"8": {"datetime_param": "1012, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "deset dvanáct" },
|
||||||
|
"9": {"datetime_param": "1046, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "deset čtyřicet šest" },
|
||||||
|
"10": {"datetime_param": "1807, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "osmnáct sedm" },
|
||||||
|
"11": {"datetime_param": "1717, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "sedmnáct sedmnáct" },
|
||||||
|
"12": {"datetime_param": "1988, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "devatenáct osmdesát osm"},
|
||||||
|
"13": {"datetime_param": "2009, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dva tisíc devět"},
|
||||||
|
"14": {"datetime_param": "2018, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dvacet osmnáct"},
|
||||||
|
"15": {"datetime_param": "2021, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dvacet dvacet jedna"},
|
||||||
|
"16": {"datetime_param": "2030, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dvacet třicet"},
|
||||||
|
"17": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "dvacet jedna sto" },
|
||||||
|
"18": {"datetime_param": "1000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "jedna tisíc" },
|
||||||
|
"19": {"datetime_param": "2000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dva tisíc" },
|
||||||
|
"20": {"datetime_param": "3120, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "třicet jedna dvacet b.c." },
|
||||||
|
"21": {"datetime_param": "3241, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "třicet dva čtyřicet jedna b.c." },
|
||||||
|
"22": {"datetime_param": "5200, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "padesát dva sto" },
|
||||||
|
"23": {"datetime_param": "1100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "jedenáct sto" },
|
||||||
|
"24": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "dvacet jedna sto" }
|
||||||
|
},
|
||||||
|
"test_nice_date": {
|
||||||
|
"1": {"datetime_param": "2017, 1, 31, 0, 2, 3", "now": "None", "assertEqual": "úterý, leden třicátého-prvního, dvacet sedmnáct"},
|
||||||
|
"2": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2017, 1, 1, 0, 2, 3", "assertEqual": "neděle, únor čtvrtého, dvacet osmnáct"},
|
||||||
|
"3": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 1, 1, 0, 2, 3", "assertEqual": "neděle, únor čtvrtého"},
|
||||||
|
"4": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 1, 0, 2, 3", "assertEqual": "neděle, čtvrtého"},
|
||||||
|
"5": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 3, 0, 2, 3", "assertEqual": "zítra"},
|
||||||
|
"6": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 4, 0, 2, 3", "assertEqual": "dnes"},
|
||||||
|
"7": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 5, 0, 2, 3", "assertEqual": "včera"},
|
||||||
|
"8": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 6, 0, 2, 3", "assertEqual": "neděle, únor čtvrtého"},
|
||||||
|
"9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "neděle, únor čtvrtého, dvacet osmnáct"}
|
||||||
|
},
|
||||||
|
"test_nice_date_time": {
|
||||||
|
"1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "úterý, leden třicátého-prvního, dvacet sedmnáct v jedna dvacet dva p.m."},
|
||||||
|
"2": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "True", "use_ampm": "False", "assertEqual": "úterý, leden třicátého-prvního, dvacet sedmnáct v třináct dvacet dva"}
|
||||||
|
}
|
||||||
|
}
|
||||||
1
lingua_franca/res/text/cs-cz/day.word
Normal file
1
lingua_franca/res/text/cs-cz/day.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
den
|
||||||
1
lingua_franca/res/text/cs-cz/days.word
Normal file
1
lingua_franca/res/text/cs-cz/days.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
dní
|
||||||
1
lingua_franca/res/text/cs-cz/hour.word
Normal file
1
lingua_franca/res/text/cs-cz/hour.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
hodina
|
||||||
1
lingua_franca/res/text/cs-cz/hours.word
Normal file
1
lingua_franca/res/text/cs-cz/hours.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
hodiny
|
||||||
1
lingua_franca/res/text/cs-cz/minute.word
Normal file
1
lingua_franca/res/text/cs-cz/minute.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
minuta
|
||||||
1
lingua_franca/res/text/cs-cz/minutes.word
Normal file
1
lingua_franca/res/text/cs-cz/minutes.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
minuty
|
||||||
46
lingua_franca/res/text/cs-cz/normalize.json
Normal file
46
lingua_franca/res/text/cs-cz/normalize.json
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
{
|
||||||
|
"lowercase": false,
|
||||||
|
"numbers_to_digits": true,
|
||||||
|
"expand_contractions": true,
|
||||||
|
"remove_symbols": false,
|
||||||
|
"remove_accents": false,
|
||||||
|
"remove_articles": false,
|
||||||
|
"remove_stopwords": false,
|
||||||
|
"contractions": {},
|
||||||
|
"word_replacements": {},
|
||||||
|
"number_replacements": {
|
||||||
|
"nula": "0",
|
||||||
|
"jedna": "1",
|
||||||
|
"dva": "2",
|
||||||
|
"dvě": "2",
|
||||||
|
"tři": "3",
|
||||||
|
"čtyři": "4",
|
||||||
|
"pět": "5",
|
||||||
|
"šest": "6",
|
||||||
|
"sedm": "7",
|
||||||
|
"sedum": "7",
|
||||||
|
"osm": "8",
|
||||||
|
"osum": "8",
|
||||||
|
"devět": "9",
|
||||||
|
"deset": "10",
|
||||||
|
"jedenáct": "11",
|
||||||
|
"dvanáct": "12",
|
||||||
|
"třináct": "13",
|
||||||
|
"čtrnáct": "14",
|
||||||
|
"patnáct": "15",
|
||||||
|
"šestnáct": "16",
|
||||||
|
"sedmnáct": "17",
|
||||||
|
"osmnáct": "18",
|
||||||
|
"devatenáct": "19",
|
||||||
|
"dvacet": "20",
|
||||||
|
"třicet": "30",
|
||||||
|
"čtyřicet": "40",
|
||||||
|
"padesát": "50",
|
||||||
|
"šedesát": "60",
|
||||||
|
"sedmdesát": "70",
|
||||||
|
"osmdesát": "80",
|
||||||
|
"devadesát": "90"
|
||||||
|
},
|
||||||
|
"stopwords": [],
|
||||||
|
"articles": []
|
||||||
|
}
|
||||||
1
lingua_franca/res/text/cs-cz/or.word
Normal file
1
lingua_franca/res/text/cs-cz/or.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
nebo
|
||||||
1
lingua_franca/res/text/cs-cz/second.word
Normal file
1
lingua_franca/res/text/cs-cz/second.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
sekunda
|
||||||
1
lingua_franca/res/text/cs-cz/seconds.word
Normal file
1
lingua_franca/res/text/cs-cz/seconds.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
sekundy
|
||||||
1
lingua_franca/res/text/da-dk/and.word
Normal file
1
lingua_franca/res/text/da-dk/and.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
og
|
||||||
132
lingua_franca/res/text/da-dk/date_time.json
Normal file
132
lingua_franca/res/text/da-dk/date_time.json
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
{
|
||||||
|
"decade_format": {
|
||||||
|
"1": {"match": "^1$", "format": "et"},
|
||||||
|
"2": {"match": "^\\d$", "format": "{x}"},
|
||||||
|
"3": {"match": "^1\\d$", "format": "{xx}"},
|
||||||
|
"4": {"match": "^\\d0$", "format": "{x0}"},
|
||||||
|
"5": {"match": "^[2-9]\\d$", "format": "{x} og {x0}"},
|
||||||
|
"default": "{number}"
|
||||||
|
},
|
||||||
|
"hundreds_format": {
|
||||||
|
"1": {"match": "^1\\d{2}$", "format": "et hundred"},
|
||||||
|
"2": {"match": "^\\d{3}$", "format": "{x_in_x00} hundred"},
|
||||||
|
"default": "{number}"
|
||||||
|
},
|
||||||
|
"thousand_format": {
|
||||||
|
"1": {"match": "^1[1-9]\\d{2}$", "format": "{xx_in_xx00} hundred"},
|
||||||
|
"2": {"match": "^1\\d{3}$", "format": "et tusind"},
|
||||||
|
"3": {"match": "^\\d{4}$", "format": "{x_in_x000} tusind"},
|
||||||
|
"default": "{number}"
|
||||||
|
},
|
||||||
|
"year_format": {
|
||||||
|
"1": {"match": "^\\d\\d?$", "format": "{formatted_decade} {bc}"},
|
||||||
|
"2": {"match": "^\\d00$", "format": "{formatted_hundreds} {bc}"},
|
||||||
|
"3": {"match": "^\\d{3}$", "format": "{formatted_hundreds} og {formatted_decade} {bc}"},
|
||||||
|
"4": {"match": "^(1\\d00)|([2-9]000)$", "format": "{formatted_thousand} {bc}"},
|
||||||
|
"5": {"match": "^(1\\d{3})|(\\d0\\d{2})$", "format": "{formatted_thousand} og {formatted_decade} {bc}"},
|
||||||
|
"6": {"match": "^\\d{4}$", "format": "{formatted_thousand} {formatted_hundreds} og {formatted_decade} {bc}"},
|
||||||
|
"default": "{year} {bc}",
|
||||||
|
"bc": "f.kr."
|
||||||
|
},
|
||||||
|
"date_format": {
|
||||||
|
"date_full": "{weekday}, den {day} {month}, {formatted_year}",
|
||||||
|
"date_full_no_year": "{weekday}, den {day} {month}",
|
||||||
|
"date_full_no_year_month": "{weekday}, den {day}",
|
||||||
|
"today": "i dag",
|
||||||
|
"tomorrow": "i morgen",
|
||||||
|
"yesterday": "i går"
|
||||||
|
},
|
||||||
|
"date_time_format": {
|
||||||
|
"date_time": "{formatted_date} klokken {formatted_time}"
|
||||||
|
},
|
||||||
|
"weekday": {
|
||||||
|
"0": "mandag",
|
||||||
|
"1": "tirsdag",
|
||||||
|
"2": "onsdag",
|
||||||
|
"3": "torsdag",
|
||||||
|
"4": "fredag",
|
||||||
|
"5": "lørdag",
|
||||||
|
"6": "søndag"
|
||||||
|
},
|
||||||
|
"date": {
|
||||||
|
"1": "første",
|
||||||
|
"2": "anden",
|
||||||
|
"3": "tredie",
|
||||||
|
"4": "fjerde",
|
||||||
|
"5": "femte",
|
||||||
|
"6": "sjette",
|
||||||
|
"7": "syvende",
|
||||||
|
"8": "ottende",
|
||||||
|
"9": "ninende",
|
||||||
|
"10": "tiende",
|
||||||
|
"11": "elvte",
|
||||||
|
"12": "tolvte",
|
||||||
|
"13": "trettende",
|
||||||
|
"14": "fjortende",
|
||||||
|
"15": "femtende",
|
||||||
|
"16": "sekstende",
|
||||||
|
"17": "syttende",
|
||||||
|
"18": "attende",
|
||||||
|
"19": "nittende",
|
||||||
|
"20": "tyvende",
|
||||||
|
"21": "en og tyvende",
|
||||||
|
"22": "to og tyvende",
|
||||||
|
"23": "tre og tyvende",
|
||||||
|
"24": "fire og tyvende",
|
||||||
|
"25": "fem og tyvende",
|
||||||
|
"26": "seks og tyvende",
|
||||||
|
"27": "syv og tyvende",
|
||||||
|
"28": "otte og tyvende",
|
||||||
|
"29": "ni og tyvende",
|
||||||
|
"30": "tredivte",
|
||||||
|
"31": "en og tredivte"
|
||||||
|
},
|
||||||
|
"month": {
|
||||||
|
"1": "januar",
|
||||||
|
"2": "februar",
|
||||||
|
"3": "marts",
|
||||||
|
"4": "april",
|
||||||
|
"5": "maj",
|
||||||
|
"6": "juni",
|
||||||
|
"7": "juli",
|
||||||
|
"8": "august",
|
||||||
|
"9": "september",
|
||||||
|
"10": "oktober",
|
||||||
|
"11": "november",
|
||||||
|
"12": "december"
|
||||||
|
},
|
||||||
|
"number": {
|
||||||
|
"0": "nul",
|
||||||
|
"1": "en",
|
||||||
|
"2": "to",
|
||||||
|
"3": "tre",
|
||||||
|
"4": "fire",
|
||||||
|
"5": "fem",
|
||||||
|
"6": "seks",
|
||||||
|
"7": "syv",
|
||||||
|
"8": "otte",
|
||||||
|
"9": "ni",
|
||||||
|
"10": "ti",
|
||||||
|
"11": "elve",
|
||||||
|
"12": "tolv",
|
||||||
|
"13": "tretten",
|
||||||
|
"14": "fjorten",
|
||||||
|
"15": "femten",
|
||||||
|
"16": "seksten",
|
||||||
|
"17": "sytten",
|
||||||
|
"18": "atten",
|
||||||
|
"19": "nitten",
|
||||||
|
"20": "tyve",
|
||||||
|
"30": "tredive",
|
||||||
|
"40": "fyrre",
|
||||||
|
"50": "halvtreds",
|
||||||
|
"60": "treds",
|
||||||
|
"70": "halvfjerds",
|
||||||
|
"80": "firs",
|
||||||
|
"90": "halvfems",
|
||||||
|
"100": "hundrede",
|
||||||
|
"1000": "tusind",
|
||||||
|
"2000": "to tusind"
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
32
lingua_franca/res/text/da-dk/date_time_test.json
Normal file
32
lingua_franca/res/text/da-dk/date_time_test.json
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
{
|
||||||
|
"test_nice_year": {
|
||||||
|
"1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "to tusind og sytten"},
|
||||||
|
"2": {"datetime_param": "1984, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "nitten hundred og fire og firs"},
|
||||||
|
"3": {"datetime_param": "1906, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "nitten hundred og seks"},
|
||||||
|
"4": {"datetime_param": "1802, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "atten hundred og to" },
|
||||||
|
"5": {"datetime_param": "806, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "otte hundred og seks" },
|
||||||
|
"6": {"datetime_param": "1800, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "atten hundred" },
|
||||||
|
"7": {"datetime_param": "1, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "et" },
|
||||||
|
"8": {"datetime_param": "103, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "et hundred og tre" },
|
||||||
|
"9": {"datetime_param": "1000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "et tusind" },
|
||||||
|
"10": {"datetime_param": "2000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "to tusind" },
|
||||||
|
"11": {"datetime_param": "99, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "ni og halvfems f.kr." },
|
||||||
|
"12": {"datetime_param": "5, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "fem f.kr." },
|
||||||
|
"13": {"datetime_param": "3120, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "tre tusind et hundred og tyve f.kr." },
|
||||||
|
"14": {"datetime_param": "3241, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "tre tusind to hundred og en og fyrre f.kr." }
|
||||||
|
},
|
||||||
|
"test_nice_date": {
|
||||||
|
"1": {"datetime_param": "2017, 1, 31, 0, 2, 3", "now": "None", "assertEqual": "tirsdag, den en og tredivte januar, to tusind og sytten"},
|
||||||
|
"2": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2017, 1, 1, 0, 2, 3", "assertEqual": "søndag, den fjerde februar, to tusind og atten"},
|
||||||
|
"3": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 1, 1, 0, 2, 3", "assertEqual": "søndag, den fjerde februar"},
|
||||||
|
"4": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 1, 0, 2, 3", "assertEqual": "søndag, den fjerde"},
|
||||||
|
"5": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 3, 0, 2, 3", "assertEqual": "i morgen"},
|
||||||
|
"6": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 4, 0, 2, 3", "assertEqual": "i dag"},
|
||||||
|
"7": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 5, 0, 2, 3", "assertEqual": "i går"},
|
||||||
|
"8": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 6, 0, 2, 3", "assertEqual": "søndag, den fjerde februar"},
|
||||||
|
"9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "søndag, den fjerde februar, to tusind og atten"}
|
||||||
|
},
|
||||||
|
"test_nice_date_time": {
|
||||||
|
"1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "tirsdag, den en og tredivte januar, to tusind og sytten klokken et toogtyve om eftermiddagen"}
|
||||||
|
}
|
||||||
|
}
|
||||||
1
lingua_franca/res/text/da-dk/day.word
Normal file
1
lingua_franca/res/text/da-dk/day.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
dag
|
||||||
1
lingua_franca/res/text/da-dk/days.word
Normal file
1
lingua_franca/res/text/da-dk/days.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
dage
|
||||||
1
lingua_franca/res/text/da-dk/hour.word
Normal file
1
lingua_franca/res/text/da-dk/hour.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
time
|
||||||
1
lingua_franca/res/text/da-dk/hours.word
Normal file
1
lingua_franca/res/text/da-dk/hours.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
timer
|
||||||
1
lingua_franca/res/text/da-dk/minute.word
Normal file
1
lingua_franca/res/text/da-dk/minute.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
minut
|
||||||
1
lingua_franca/res/text/da-dk/minutes.word
Normal file
1
lingua_franca/res/text/da-dk/minutes.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
minuter
|
||||||
1
lingua_franca/res/text/da-dk/or.word
Normal file
1
lingua_franca/res/text/da-dk/or.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
eller
|
||||||
1
lingua_franca/res/text/da-dk/second.word
Normal file
1
lingua_franca/res/text/da-dk/second.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
sekund
|
||||||
1
lingua_franca/res/text/da-dk/seconds.word
Normal file
1
lingua_franca/res/text/da-dk/seconds.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
sekunder
|
||||||
1
lingua_franca/res/text/de-de/and.word
Normal file
1
lingua_franca/res/text/de-de/and.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
und
|
||||||
136
lingua_franca/res/text/de-de/date_time.json
Normal file
136
lingua_franca/res/text/de-de/date_time.json
Normal file
@@ -0,0 +1,136 @@
|
|||||||
|
{
|
||||||
|
"decade_format": {
|
||||||
|
"1": {"match": "^\\d$", "format": "{x}"},
|
||||||
|
"2": {"match": "^1\\d$", "format": "{xx}"},
|
||||||
|
"3": {"match": "^\\d0$", "format": "{x0}"},
|
||||||
|
"4": {"match": "^[2-9]\\d$", "format": "{x} und {x0}"},
|
||||||
|
"default": "{number}"
|
||||||
|
},
|
||||||
|
"hundreds_format": {
|
||||||
|
"1": {"match": "^1\\d{2}$", "format": "hundert"},
|
||||||
|
"2": {"match": "^\\d{3}$", "format": "{x_in_x00} hundert"},
|
||||||
|
"default": "{number}"
|
||||||
|
},
|
||||||
|
"thousand_format": {
|
||||||
|
"1": {"match": "^10\\d\\d$", "format": "tausend"},
|
||||||
|
"2": {"match": "^\\d0\\d{2}$", "format": "{x_in_x000} tausend"},
|
||||||
|
"3": {"match": "^1\\d00$", "format": "{xx_in_xx00} hundert"},
|
||||||
|
"4": {"match": "^\\d{2}00$", "format": "{x_in_x000} tausend {x_in_x00} hundert"},
|
||||||
|
"5": {"match": "^\\d0\\d\\d$", "format": "{x_in_x000} tausend"},
|
||||||
|
"6": {"match": "^1\\d{3}$", "format": "{xx_in_xx00}"},
|
||||||
|
"7": {"match": "^\\d{4}$", "format": "{x_in_x000} tausend {x_in_x00} hundert"},
|
||||||
|
"default": "{number}"
|
||||||
|
},
|
||||||
|
"year_format": {
|
||||||
|
"1": {"match": "^1$", "format": "eins {bc}"},
|
||||||
|
"2": {"match": "^\\d{1}?$", "format": "{formatted_decade} {bc}"},
|
||||||
|
"3": {"match": "^\\d{2}?$", "format": "{formatted_decade} {bc}"},
|
||||||
|
"4": {"match": "^\\d00$", "format": "{formatted_hundreds} {bc}"},
|
||||||
|
"5": {"match": "^\\d{3}$", "format": "{formatted_hundreds} {formatted_decade} {bc}"},
|
||||||
|
"6": {"match": "^\\d{2}00$", "format": "{formatted_thousand} {bc}"},
|
||||||
|
"7": {"match": "^\\d00\\d$", "format": "{formatted_thousand} {formatted_decade} {bc}"},
|
||||||
|
"8": {"match": "^\\d{2}0\\d$", "format": "{formatted_thousand} hundert {formatted_decade} {bc}"},
|
||||||
|
"9": {"match": "^1[2-9]\\d{2}$", "format": "{formatted_thousand} hundert {formatted_decade} {bc}"},
|
||||||
|
"10": {"match": "^1\\d{3}$", "format": "{formatted_thousand} {formatted_decade} {bc}"},
|
||||||
|
"11": {"match": "^\\d{4}$", "format": "{formatted_thousand} {formatted_decade} {bc}"},
|
||||||
|
"default": "{year} {bc}",
|
||||||
|
"bc": "v.d.Z."
|
||||||
|
},
|
||||||
|
"date_format": {
|
||||||
|
"date_full": "{weekday}, {day} {month}, {formatted_year}",
|
||||||
|
"date_full_no_year": "{weekday}, {day} {month}",
|
||||||
|
"date_full_no_year_month": "{weekday}, {day}",
|
||||||
|
"today": "heute",
|
||||||
|
"tomorrow": "morgen",
|
||||||
|
"yesterday": "gestern"
|
||||||
|
},
|
||||||
|
"date_time_format": {
|
||||||
|
"date_time": "{formatted_date} um {formatted_time}"
|
||||||
|
},
|
||||||
|
"weekday": {
|
||||||
|
"0": "Montag",
|
||||||
|
"1": "Dienstag",
|
||||||
|
"2": "Mittwoch",
|
||||||
|
"3": "Donnerstag",
|
||||||
|
"4": "Freitag",
|
||||||
|
"5": "Samstag",
|
||||||
|
"6": "Sonntag"
|
||||||
|
},
|
||||||
|
"date": {
|
||||||
|
"1": "erster",
|
||||||
|
"2": "zweiter",
|
||||||
|
"3": "dritter",
|
||||||
|
"4": "vierter",
|
||||||
|
"5": "fünfter",
|
||||||
|
"6": "sechster",
|
||||||
|
"7": "siebter",
|
||||||
|
"8": "achter",
|
||||||
|
"9": "neunter",
|
||||||
|
"10": "zehnter",
|
||||||
|
"11": "elfter",
|
||||||
|
"12": "zwölfter",
|
||||||
|
"13": "dreizehnter",
|
||||||
|
"14": "vierzehnter",
|
||||||
|
"15": "fünfzehnter",
|
||||||
|
"16": "sechzehnter",
|
||||||
|
"17": "siebzehnter",
|
||||||
|
"18": "achtzehnter",
|
||||||
|
"19": "neunzehnter",
|
||||||
|
"20": "zwanzigster",
|
||||||
|
"21": "einundzwanzigster",
|
||||||
|
"22": "zweiundzwanzigster",
|
||||||
|
"23": "dreiundzwanzigster",
|
||||||
|
"24": "vierundzwanzigster",
|
||||||
|
"25": "fünfundzwanzigster",
|
||||||
|
"26": "sechsundzwanzigster",
|
||||||
|
"27": "siebenundzwanzigster",
|
||||||
|
"28": "achtundzwanzigster",
|
||||||
|
"29": "neunundzwanzigster",
|
||||||
|
"30": "dreißigster",
|
||||||
|
"31": "einunddreißigster"
|
||||||
|
},
|
||||||
|
"month": {
|
||||||
|
"1": "Januar",
|
||||||
|
"2": "Februar",
|
||||||
|
"3": "März",
|
||||||
|
"4": "April",
|
||||||
|
"5": "Mai",
|
||||||
|
"6": "Juni",
|
||||||
|
"7": "Juli",
|
||||||
|
"8": "August",
|
||||||
|
"9": "September",
|
||||||
|
"10": "Oktober",
|
||||||
|
"11": "November",
|
||||||
|
"12": "Dezember"
|
||||||
|
},
|
||||||
|
"number": {
|
||||||
|
"0": "null",
|
||||||
|
"1": "ein",
|
||||||
|
"2": "zwei",
|
||||||
|
"3": "drei",
|
||||||
|
"4": "vier",
|
||||||
|
"5": "fünf",
|
||||||
|
"6": "sechs",
|
||||||
|
"7": "sieben",
|
||||||
|
"8": "acht",
|
||||||
|
"9": "neun",
|
||||||
|
"10": "zehn",
|
||||||
|
"11": "elf",
|
||||||
|
"12": "zwölf",
|
||||||
|
"13": "dreizehn",
|
||||||
|
"14": "vierzehn",
|
||||||
|
"15": "fünfzehn",
|
||||||
|
"16": "sechzehn",
|
||||||
|
"17": "siebzehn",
|
||||||
|
"18": "achtzehn",
|
||||||
|
"19": "neunzehn",
|
||||||
|
"20": "zwanzig",
|
||||||
|
"30": "dreißig",
|
||||||
|
"40": "vierzig",
|
||||||
|
"50": "fünfzig",
|
||||||
|
"60": "sechzig",
|
||||||
|
"70": "siebzig",
|
||||||
|
"80": "achtzig",
|
||||||
|
"90": "neunzig"
|
||||||
|
}
|
||||||
|
}
|
||||||
43
lingua_franca/res/text/de-de/date_time_test.json
Normal file
43
lingua_franca/res/text/de-de/date_time_test.json
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
{
|
||||||
|
"test_nice_year": {
|
||||||
|
"1": {"datetime_param": "1, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "eins v.d.Z." },
|
||||||
|
"2": {"datetime_param": "10, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "zehn v.d.Z." },
|
||||||
|
"3": {"datetime_param": "92, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "zwei und neunzig v.d.Z." },
|
||||||
|
"4": {"datetime_param": "803, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "acht hundert drei" },
|
||||||
|
"5": {"datetime_param": "811, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "acht hundert elf" },
|
||||||
|
"6": {"datetime_param": "454, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "vier hundert vier und fünfzig" },
|
||||||
|
"7": {"datetime_param": "1005, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "tausend fünf" },
|
||||||
|
"8": {"datetime_param": "1012, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "tausend zwölf" },
|
||||||
|
"9": {"datetime_param": "1046, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "tausend sechs und vierzig" },
|
||||||
|
"10": {"datetime_param": "1807, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "achtzehn hundert sieben" },
|
||||||
|
"11": {"datetime_param": "1717, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "siebzehn hundert siebzehn" },
|
||||||
|
"12": {"datetime_param": "1988, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "neunzehn hundert acht und achtzig"},
|
||||||
|
"13": {"datetime_param": "2009, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "zwei tausend neun"},
|
||||||
|
"14": {"datetime_param": "2018, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "zwei tausend achtzehn"},
|
||||||
|
"15": {"datetime_param": "2021, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "zwei tausend ein und zwanzig"},
|
||||||
|
"16": {"datetime_param": "2030, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "zwei tausend dreißig"},
|
||||||
|
"17": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "zwei tausend ein hundert" },
|
||||||
|
"18": {"datetime_param": "1000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "tausend" },
|
||||||
|
"19": {"datetime_param": "2000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "zwei tausend" },
|
||||||
|
"20": {"datetime_param": "3120, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "drei tausend ein hundert zwanzig v.d.Z." },
|
||||||
|
"21": {"datetime_param": "3241, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "drei tausend zwei hundert ein und vierzig v.d.Z." },
|
||||||
|
"22": {"datetime_param": "5200, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "fünf tausend zwei hundert" },
|
||||||
|
"23": {"datetime_param": "1100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "elf hundert" },
|
||||||
|
"24": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "zwei tausend ein hundert" }
|
||||||
|
},
|
||||||
|
"test_nice_date": {
|
||||||
|
"1": {"datetime_param": "2017, 1, 31, 0, 2, 3", "now": "None", "assertEqual": "Dienstag, einunddreißigster Januar, zwei tausend siebzehn"},
|
||||||
|
"2": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2017, 1, 1, 0, 2, 3", "assertEqual": "Sonntag, vierter Februar, zwei tausend achtzehn"},
|
||||||
|
"3": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 1, 1, 0, 2, 3", "assertEqual": "Sonntag, vierter Februar"},
|
||||||
|
"4": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 1, 0, 2, 3", "assertEqual": "Sonntag, vierter"},
|
||||||
|
"5": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 3, 0, 2, 3", "assertEqual": "morgen"},
|
||||||
|
"6": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 4, 0, 2, 3", "assertEqual": "heute"},
|
||||||
|
"7": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 5, 0, 2, 3", "assertEqual": "gestern"},
|
||||||
|
"8": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 6, 0, 2, 3", "assertEqual": "Sonntag, vierter Februar"},
|
||||||
|
"9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "Sonntag, vierter Februar, zwei tausend achtzehn"}
|
||||||
|
},
|
||||||
|
"test_nice_date_time": {
|
||||||
|
"1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "Dienstag, einunddreißigster Januar, zwei tausend siebzehn um ein Uhr zweiundzwanzig nachmittags"},
|
||||||
|
"2": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "True", "use_ampm": "False", "assertEqual": "Dienstag, einunddreißigster Januar, zwei tausend siebzehn um dreizehn Uhr zweiundzwanzig"}
|
||||||
|
}
|
||||||
|
}
|
||||||
1
lingua_franca/res/text/de-de/day.word
Normal file
1
lingua_franca/res/text/de-de/day.word
Normal file
@@ -0,0 +1 @@
|
|||||||
|
Tag
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user