1
0
mirror of https://github.com/janvarev/Irene-Voice-Assistant.git synced 2025-11-26 22:50:58 +02:00

v5.1 - сделана функция core.all_num_to_text(text), позволяющая конвертировать все числа в тексте для произношения. Очень нужна для работы TTS silero. Опирается на написанную utils/all_num_to_text.

plugin_tts_silero_v3.py - обработка текста - конвертация чисел в строку. Параметры расстановки акцента и "ё" вынесены в опции
прикручена библиотека mycroftAI/lingua-franca для конвертации чисел в строку.
core.py - инициализация библиотеки lingua-franca
This commit is contained in:
janvarev
2022-04-18 12:55:33 +03:00
parent 9bf565384b
commit 213a6e8736
234 changed files with 31365 additions and 11 deletions

View File

@@ -2,7 +2,7 @@ Irene - russian offline voice assistant
MIT License MIT License
Copyright (c) 2021 Vladislav Janvarev Copyright (c) 2021-2022 Vladislav Janvarev
Copyright (c) 2020 EnjiRouz Copyright (c) 2020 EnjiRouz
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -54,4 +54,7 @@ timer.wav:
- licensed under Creative Commons 0 License - licensed under Creative Commons 0 License
- URL: https://freesound.org/people/AlphaDarkWolf/sounds/591109/ - URL: https://freesound.org/people/AlphaDarkWolf/sounds/591109/
MycroftAI/lingua-franca:
- licensed under Apache License 2.0
- URL: https://github.com/MycroftAI/lingua-franca

View File

@@ -0,0 +1,6 @@
from .internal import get_default_lang, set_default_lang, get_default_loc, \
get_active_langs, _set_active_langs, get_primary_lang_code, \
get_full_lang_code, resolve_resource_file, load_language, \
load_languages, unload_language, unload_languages, get_supported_langs
from lingua_franca import config

View File

@@ -0,0 +1,175 @@
# Copyright 2017 Mycroft AI, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
class Fragment(object):
"""(Abstract) empty sentence fragment"""
def __init__(self, tree):
"""
Construct a sentence tree fragment which is merely a wrapper for
a list of Strings
Args:
tree (?): Base tree for the sentence fragment, type depends on
subclass, refer to those subclasses
"""
self._tree = tree
def tree(self):
"""Return the represented sentence tree as raw data."""
return self._tree
def expand(self):
"""
Expanded version of the fragment. In this case an empty sentence.
Returns:
List<List<str>>: A list with an empty sentence (= token/string list)
"""
return [[]]
def __str__(self):
return self._tree.__str__()
def __repr__(self):
return self._tree.__repr__()
class Word(Fragment):
"""
Single word in the sentence tree.
Construct with a string as argument.
"""
def expand(self):
"""
Creates one sentence that contains exactly that word.
Returns:
List<List<str>>: A list with the given string as sentence
(= token/string list)
"""
return [[self._tree]]
class Sentence(Fragment):
"""
A Sentence made of several concatenations/words.
Construct with a List<Fragment> as argument.
"""
def expand(self):
"""
Creates a combination of all sub-sentences.
Returns:
List<List<str>>: A list with all subsentence expansions combined in
every possible way
"""
old_expanded = [[]]
for sub in self._tree:
sub_expanded = sub.expand()
new_expanded = []
while len(old_expanded) > 0:
sentence = old_expanded.pop()
for new in sub_expanded:
new_expanded.append(sentence + new)
old_expanded = new_expanded
return old_expanded
class Options(Fragment):
"""
A Combination of possible sub-sentences.
Construct with List<Fragment> as argument.
"""
def expand(self):
"""
Returns all of its options as seperated sub-sentences.
Returns:
List<List<str>>: A list containing the sentences created by all
expansions of its sub-sentences
"""
options = []
for option in self._tree:
options.extend(option.expand())
return options
class SentenceTreeParser(object):
"""
Generate sentence token trees from a list of tokens
['1', '(', '2', '|', '3, ')'] -> [['1', '2'], ['1', '3']]
"""
def __init__(self, tokens):
self.tokens = tokens
def _parse(self):
"""
Generate sentence token trees
['1', '(', '2', '|', '3, ')'] -> ['1', ['2', '3']]
"""
self._current_position = 0
return self._parse_expr()
def _parse_expr(self):
"""
Generate sentence token trees from the current position to
the next closing parentheses / end of the list and return it
['1', '(', '2', '|', '3, ')'] -> ['1', [['2'], ['3']]]
['2', '|', '3'] -> [['2'], ['3']]
"""
# List of all generated sentences
sentence_list = []
# Currently active sentence
cur_sentence = []
sentence_list.append(Sentence(cur_sentence))
# Determine which form the current expression has
while self._current_position < len(self.tokens):
cur = self.tokens[self._current_position]
self._current_position += 1
if cur == '(':
# Parse the subexpression
subexpr = self._parse_expr()
# Check if the subexpression only has one branch
# -> If so, append "(" and ")" and add it as is
normal_brackets = False
if len(subexpr.tree()) == 1:
normal_brackets = True
cur_sentence.append(Word('('))
# add it to the sentence
cur_sentence.append(subexpr)
if normal_brackets:
cur_sentence.append(Word(')'))
elif cur == '|':
# Begin parsing a new sentence
cur_sentence = []
sentence_list.append(Sentence(cur_sentence))
elif cur == ')':
# End parsing the current subexpression
break
# TODO anything special about {sth}?
else:
cur_sentence.append(Word(cur))
return Options(sentence_list)
def _expand_tree(self, tree):
"""
Expand a list of sub sentences to all combinated sentences.
['1', ['2', '3']] -> [['1', '2'], ['1', '3']]
"""
return tree.expand()
def expand_parentheses(self):
tree = self._parse()
return self._expand_tree(tree)

2
lingua_franca/config.py Normal file
View File

@@ -0,0 +1,2 @@
load_langs_on_demand = False
inject_timezones = True

568
lingua_franca/format.py Normal file
View File

@@ -0,0 +1,568 @@
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import datetime
import json
import os
import re
from collections import namedtuple
from warnings import warn
from os.path import join
from lingua_franca.bracket_expansion import SentenceTreeParser
from lingua_franca.internal import localized_function, \
populate_localized_function_dict, get_active_langs, \
get_full_lang_code, get_default_lang, get_default_loc, \
is_supported_full_lang, _raise_unsupported_language, \
UnsupportedLanguageError, NoneLangWarning, InvalidLangWarning, \
FunctionNotLocalizedError
_REGISTERED_FUNCTIONS = ("nice_number",
"nice_time",
"pronounce_number",
"nice_response",
"nice_duration")
populate_localized_function_dict("format", langs=get_active_langs())
def _translate_word(name, lang=''):
""" Helper to get word translations
Args:
name (str): Word name. Returned as the default value if not translated
lang (str, optional): an optional BCP-47 language code, if omitted
the default language will be used.
Returns:
str: translated version of resource name
"""
from lingua_franca.internal import resolve_resource_file
if not lang:
if lang is None:
warn(NoneLangWarning)
lang = get_default_loc()
lang_code = lang if is_supported_full_lang(lang) else \
get_full_lang_code(lang)
filename = resolve_resource_file(join("text", lang_code, name + ".word"))
if filename:
# open the file
try:
with open(filename, 'r', encoding='utf8') as f:
for line in f:
word = line.strip()
if word.startswith("#"):
continue # skip comment lines
return word
except Exception:
pass
return name # use resource name as the word
NUMBER_TUPLE = namedtuple(
'number',
('x, xx, x0, x_in_x0, xxx, x00, x_in_x00, xx00, xx_in_xx00, x000, ' +
'x_in_x000, x0_in_x000, x_in_0x00'))
class DateTimeFormat:
def __init__(self, config_path):
self.lang_config = {}
self.config_path = config_path
def cache(self, lang):
if lang not in self.lang_config:
try:
# Attempt to load the language-specific formatting data
with open(self.config_path + '/' + lang + '/date_time.json',
'r', encoding='utf8') as lang_config_file:
self.lang_config[lang] = json.loads(
lang_config_file.read())
except FileNotFoundError:
# Fallback to English formatting
with open(self.config_path + '/en-us/date_time.json',
'r') as lang_config_file:
self.lang_config[lang] = json.loads(
lang_config_file.read())
for x in ['decade_format', 'hundreds_format', 'thousand_format',
'year_format']:
i = 1
while self.lang_config[lang][x].get(str(i)):
self.lang_config[lang][x][str(i)]['re'] = (
re.compile(self.lang_config[lang][x][str(i)]['match']
))
i = i + 1
def _number_strings(self, number, lang):
x = (self.lang_config[lang]['number'].get(str(number % 10)) or
str(number % 10))
xx = (self.lang_config[lang]['number'].get(str(number % 100)) or
str(number % 100))
x_in_x0 = self.lang_config[lang]['number'].get(
str(int(number % 100 / 10))) or str(int(number % 100 / 10))
x0 = (self.lang_config[lang]['number'].get(
str(int(number % 100 / 10) * 10)) or
str(int(number % 100 / 10) * 10))
xxx = (self.lang_config[lang]['number'].get(str(number % 1000)) or
str(number % 1000))
x00 = (self.lang_config[lang]['number'].get(str(int(
number % 1000 / 100) * 100)) or
str(int(number % 1000 / 100) * 100))
x_in_x00 = self.lang_config[lang]['number'].get(str(int(
number % 1000 / 100))) or str(int(number % 1000 / 100))
xx00 = self.lang_config[lang]['number'].get(str(int(
number % 10000 / 100) * 100)) or str(int(number % 10000 / 100) *
100)
xx_in_xx00 = self.lang_config[lang]['number'].get(str(int(
number % 10000 / 100))) or str(int(number % 10000 / 100))
x000 = (self.lang_config[lang]['number'].get(str(int(
number % 10000 / 1000) * 1000)) or
str(int(number % 10000 / 1000) * 1000))
x_in_x000 = self.lang_config[lang]['number'].get(str(int(
number % 10000 / 1000))) or str(int(number % 10000 / 1000))
x0_in_x000 = self.lang_config[lang]['number'].get(str(int(
number % 10000 / 1000) * 10)) or str(int(number % 10000 / 1000) * 10)
x_in_0x00 = self.lang_config[lang]['number'].get(str(int(
number % 1000 / 100)) or str(int(number % 1000 / 100)))
return NUMBER_TUPLE(
x, xx, x0, x_in_x0, xxx, x00, x_in_x00, xx00, xx_in_xx00, x000,
x_in_x000, x0_in_x000, x_in_0x00)
def _format_string(self, number, format_section, lang):
s = self.lang_config[lang][format_section]['default']
i = 1
while self.lang_config[lang][format_section].get(str(i)):
e = self.lang_config[lang][format_section][str(i)]
if e['re'].match(str(number)):
return e['format']
i = i + 1
return s
def _decade_format(self, number, number_tuple, lang):
s = self._format_string(number % 100, 'decade_format', lang)
return s.format(x=number_tuple.x, xx=number_tuple.xx,
x0=number_tuple.x0, x_in_x0=number_tuple.x_in_x0,
number=str(number % 100))
def _number_format_hundreds(self, number, number_tuple, lang,
formatted_decade):
s = self._format_string(number % 1000, 'hundreds_format', lang)
return s.format(xxx=number_tuple.xxx, x00=number_tuple.x00,
x_in_x00=number_tuple.x_in_x00,
formatted_decade=formatted_decade,
number=str(number % 1000))
def _number_format_thousand(self, number, number_tuple, lang,
formatted_decade, formatted_hundreds):
s = self._format_string(number % 10000, 'thousand_format', lang)
return s.format(x_in_x00=number_tuple.x_in_x00,
xx00=number_tuple.xx00,
xx_in_xx00=number_tuple.xx_in_xx00,
x000=number_tuple.x000,
x_in_x000=number_tuple.x_in_x000,
x0_in_x000=number_tuple.x0_in_x000,
x_in_0x00=number_tuple.x_in_0x00,
formatted_decade=formatted_decade,
formatted_hundreds=formatted_hundreds,
number=str(number % 10000))
def date_format(self, dt, lang, now):
format_str = 'date_full'
if now:
if dt.year == now.year:
format_str = 'date_full_no_year'
if dt.month == now.month and dt.day > now.day:
format_str = 'date_full_no_year_month'
tomorrow = now + datetime.timedelta(days=1)
yesterday = now - datetime.timedelta(days=1)
if tomorrow.date() == dt.date():
format_str = 'tomorrow'
elif now.date() == dt.date():
format_str = 'today'
elif yesterday.date() == dt.date():
format_str = 'yesterday'
return self.lang_config[lang]['date_format'][format_str].format(
weekday=self.lang_config[lang]['weekday'][str(dt.weekday())],
month=self.lang_config[lang]['month'][str(dt.month)],
day=self.lang_config[lang]['date'][str(dt.day)],
formatted_year=self.year_format(dt, lang, False))
def date_time_format(self, dt, lang, now, use_24hour, use_ampm):
date_str = self.date_format(dt, lang, now)
time_str = nice_time(dt, lang, use_24hour=use_24hour,
use_ampm=use_ampm)
return self.lang_config[lang]['date_time_format']['date_time'].format(
formatted_date=date_str, formatted_time=time_str)
def year_format(self, dt, lang, bc):
number_tuple = self._number_strings(dt.year, lang)
formatted_bc = (
self.lang_config[lang]['year_format']['bc'] if bc else '')
formatted_decade = self._decade_format(
dt.year, number_tuple, lang)
formatted_hundreds = self._number_format_hundreds(
dt.year, number_tuple, lang, formatted_decade)
formatted_thousand = self._number_format_thousand(
dt.year, number_tuple, lang, formatted_decade, formatted_hundreds)
s = self._format_string(dt.year, 'year_format', lang)
return re.sub(' +', ' ',
s.format(
year=str(dt.year),
century=str(int(dt.year / 100)),
decade=str(dt.year % 100),
formatted_hundreds=formatted_hundreds,
formatted_decade=formatted_decade,
formatted_thousand=formatted_thousand,
bc=formatted_bc)).strip()
date_time_format = DateTimeFormat(os.path.join(os.path.dirname(__file__),
'res/text'))
@localized_function(run_own_code_on=[UnsupportedLanguageError])
def nice_number(number, lang='', speech=True, denominators=None):
"""Format a float to human readable functions
This function formats a float to human understandable functions. Like
4.5 becomes 4 and a half for speech and 4 1/2 for text
Args:
number (int or float): the float to format
lang (str, optional): an optional BCP-47 language code, if omitted
the default language will be used.
speech (bool): format for speech (True) or display (False)
denominators (iter of ints): denominators to use, default [1 .. 20]
Returns:
(str): The formatted string.
"""
return str(number)
@localized_function()
def nice_time(dt, lang='', speech=True, use_24hour=False,
use_ampm=False, variant=None):
"""
Format a time to a comfortable human format
For example, generate 'five thirty' for speech or '5:30' for
text display.
Args:
dt (datetime): date to format (assumes already in local timezone)
lang (str, optional): an optional BCP-47 language code, if omitted
the default language will be used.
speech (bool): format for speech (default/True) or display (False)
use_24hour (bool): output in 24-hour/military or 12-hour format
use_ampm (bool): include the am/pm for 12-hour format
variant (string): alternative time system to be used, string must
match language specific mappings
Returns:
(str): The formatted time string
"""
@localized_function()
def pronounce_number(number, lang='', places=2, short_scale=True,
scientific=False, ordinals=False):
"""
Convert a number to it's spoken equivalent
For example, '5' would be 'five'
Args:
number: the number to pronounce
lang (str, optional): an optional BCP-47 language code, if omitted
the default language will be used.
places (int): number of decimal places to express, default 2
short_scale (bool) : use short (True) or long scale (False)
https://en.wikipedia.org/wiki/Names_of_large_numbers
scientific (bool) : convert and pronounce in scientific notation
ordinals (bool): pronounce in ordinal form "first" instead of "one"
Returns:
(str): The pronounced number
"""
def nice_date(dt, lang='', now=None):
"""
Format a datetime to a pronounceable date
For example, generates 'tuesday, june the fifth, 2018'
Args:
dt (datetime): date to format (assumes already in local timezone)
lang (str, optional): an optional BCP-47 language code, if omitted
the default language will be used.
now (datetime): Current date. If provided, the returned date for speech
will be shortened accordingly: No year is returned if now is in the
same year as td, no month is returned if now is in the same month
as td. If now and td is the same day, 'today' is returned.
Returns:
(str): The formatted date string
"""
full_code = get_full_lang_code(lang)
date_time_format.cache(full_code)
return date_time_format.date_format(dt, full_code, now)
def nice_date_time(dt, lang='', now=None, use_24hour=False,
use_ampm=False):
"""
Format a datetime to a pronounceable date and time
For example, generate 'tuesday, june the fifth, 2018 at five thirty'
Args:
dt (datetime): date to format (assumes already in local timezone)
lang (str, optional): an optional BCP-47 language code, if omitted
the default language will be used.
now (datetime): Current date. If provided, the returned date for
speech will be shortened accordingly: No year is returned if
now is in the same year as td, no month is returned if now is
in the same month as td. If now and td is the same day, 'today'
is returned.
use_24hour (bool): output in 24-hour/military or 12-hour format
use_ampm (bool): include the am/pm for 12-hour format
Returns:
(str): The formatted date time string
"""
full_code = get_full_lang_code(lang)
date_time_format.cache(full_code)
return date_time_format.date_time_format(dt, full_code, now, use_24hour,
use_ampm)
def nice_year(dt, lang='', bc=False):
"""
Format a datetime to a pronounceable year
For example, generate 'nineteen-hundred and eighty-four' for year 1984
Args:
dt (datetime): date to format (assumes already in local timezone)
lang (str, optional): an optional BCP-47 language code, if omitted
the default language will be used.
bc (bool) pust B.C. after the year (python does not support dates
B.C. in datetime)
Returns:
(str): The formatted year string
"""
full_code = get_full_lang_code(lang)
date_time_format.cache(full_code)
return date_time_format.year_format(dt, full_code, bc)
@localized_function(run_own_code_on=[FunctionNotLocalizedError])
def nice_duration(duration, lang='', speech=True):
""" Convert duration in seconds to a nice spoken timespan
Examples:
duration = 60 -> "1:00" or "one minute"
duration = 163 -> "2:43" or "two minutes forty three seconds"
Args:
duration: time, in seconds
lang (str, optional): an optional BCP-47 language code, if omitted
the default language will be used.
speech (bool): format for speech (True) or display (False)
Returns:
str: timespan as a string
"""
if not lang:
if lang is None:
warn(NoneLangWarning)
lang = get_default_loc()
if not is_supported_full_lang(lang):
# TODO deprecated; delete when 'lang=None' and 'lang=invalid' are
# removed
try:
lang = get_full_lang_code(lang)
except UnsupportedLanguageError:
warn(InvalidLangWarning)
lang = get_default_loc()
if isinstance(duration, datetime.timedelta):
duration = duration.total_seconds()
# Do traditional rounding: 2.5->3, 3.5->4, plus this
# helps in a few cases of where calculations generate
# times like 2:59:59.9 instead of 3:00.
duration += 0.5
days = int(duration // 86400)
hours = int(duration // 3600 % 24)
minutes = int(duration // 60 % 60)
seconds = int(duration % 60)
if speech:
out = ""
if days > 0:
out += pronounce_number(days, lang) + " "
if days == 1:
out += _translate_word("day", lang)
else:
out += _translate_word("days", lang)
out += " "
if hours > 0:
if out:
out += " "
out += pronounce_number(hours, lang) + " "
if hours == 1:
out += _translate_word("hour", lang)
else:
out += _translate_word("hours", lang)
if minutes > 0:
if out:
out += " "
out += pronounce_number(minutes, lang) + " "
if minutes == 1:
out += _translate_word("minute", lang)
else:
out += _translate_word("minutes", lang)
if seconds > 0:
if out:
out += " "
out += pronounce_number(seconds, lang) + " "
if seconds == 1:
out += _translate_word("second", lang)
else:
out += _translate_word("seconds", lang)
else:
# M:SS, MM:SS, H:MM:SS, Dd H:MM:SS format
out = ""
if days > 0:
out = str(days) + "d "
if hours > 0 or days > 0:
out += str(hours) + ":"
if minutes < 10 and (hours > 0 or days > 0):
out += "0"
out += str(minutes) + ":"
if seconds < 10:
out += "0"
out += str(seconds)
return out
def join_list(items, connector, sep=None, lang=''):
""" Join a list into a phrase using the given connector word
Examples:
join_list([1,2,3], "and") -> "1, 2 and 3"
join_list([1,2,3], "and", ";") -> "1; 2 and 3"
Args:
items (array): items to be joined
connector (str): connecting word (resource name), like "and" or "or"
sep (str, optional): separator character, default = ","
lang (str, optional): an optional BCP-47 language code, if omitted
the default language will be used.
Returns:
str: the connected list phrase
"""
if not items:
return ""
if len(items) == 1:
return str(items[0])
if not sep:
sep = ", "
else:
sep += " "
return (sep.join(str(item) for item in items[:-1]) +
" " + _translate_word(connector, lang) +
" " + items[-1])
def expand_parentheses(sent):
"""
['1', '(', '2', '|', '3, ')'] -> [['1', '2'], ['1', '3']]
For example:
Will it (rain|pour) (today|tomorrow|)?
---->
Will it rain today?
Will it rain tomorrow?
Will it rain?
Will it pour today?
Will it pour tomorrow?
Will it pour?
Args:
sent (list<str>): List of tokens in sentence
Returns:
list<list<str>>: Multiple possible sentences from original
"""
return SentenceTreeParser(sent).expand_parentheses()
def expand_options(parentheses_line: str) -> list:
"""
Convert 'test (a|b)' -> ['test a', 'test b']
Args:
parentheses_line: Input line to expand
Returns:
List of expanded possibilities
"""
# 'a(this|that)b' -> [['a', 'this', 'b'], ['a', 'that', 'b']]
options = expand_parentheses(re.split(r'([(|)])', parentheses_line))
return [re.sub(r'\s+', ' ', ' '.join(i)).strip() for i in options]
@localized_function()
def nice_response(text, lang=''):
"""
In some languages, sanitizes certain numeric input for TTS
Most of the time, this function will be called by any formatters
which might need it. It's exposed here just in case you've got a clever
use.
As of July 2020, this function sanitizes some dates and "x ^ y"-formatted
exponents in the following primary language codes:
da de nl sv
Args:
text (str): input text to sanitize
lang (str, optional): an optional BCP-47 language code, if omitted
the default language will be used.
Example:
assertEqual(nice_response_de("dies ist der 31. mai"),
"dies ist der einunddreißigste mai")
assertEqual(nice_response_de("10 ^ 2"),
"10 hoch 2")
"""

774
lingua_franca/internal.py Normal file
View File

@@ -0,0 +1,774 @@
import os.path
from functools import wraps
from importlib import import_module
from inspect import signature
from warnings import warn
from datetime import datetime
from lingua_franca import config
from lingua_franca.time import to_local
_SUPPORTED_LANGUAGES = ("ca", "cs", "da", "de", "en", "es", "fr", "hu",
"it", "nl", "pl", "pt", "ru", "sl", "sv", "fa")
_SUPPORTED_FULL_LOCALIZATIONS = ("ca-es", "cs-cz", "da-dk", "de-de",
"en-au", "en-us", "es-es", "fr-fr",
"hu-hu", "it-it", "nl-nl", "pl-pl",
"fa-ir", "pt-pt", "ru-ru", "sl-si",
"sv-se", "tr-tr")
_DEFAULT_FULL_LANG_CODES = {'ca': 'ca-es',
'cs': 'cs-cz',
'da': 'da-dk',
'de': 'de-de',
'en': 'en-us',
'es': 'es-es',
'fa': 'fa-ir',
'fr': 'fr-fr',
'hu': 'hu-hu',
'it': 'it-it',
'nl': 'nl-nl',
'pl': 'pl-pl',
'pt': 'pt-pt',
'ru': 'ru-ru',
'sl': 'sl-si',
'sv': 'sv-se',
'tr': 'tr-tr'}
__default_lang = None
__active_lang_code = None
__loaded_langs = []
_localized_functions = {}
# TODO the deprecation of 'lang=None' and 'lang=<invalid>' can refer to
# commit 35efd0661a178e82f6745ad17e10e607c0d83472 for the "proper" state
# of affairs, raising the errors below instead of deprecation warnings
# Once the deprecation is complete, functions which have had their default
# parameter changed from lang=None to lang='' should be switched back
class UnsupportedLanguageError(NotImplementedError):
pass
class FunctionNotLocalizedError(NotImplementedError):
pass
NoneLangWarning = \
DeprecationWarning("Lingua Franca is dropping support"
" for 'lang=None' as an explicit"
" argument.")
InvalidLangWarning = \
DeprecationWarning("Invalid language code detected. Falling back on "
"default.\nThis behavior is deprecated. The 'lang' "
"parameter is optional, and only accepts supported "
"language codes, beginning with Lingua Franca 0.3.0")
def _raise_unsupported_language(language):
"""
Raise an error when a language is unsupported
Arguments:
language: str
The language that was supplied.
"""
supported = ' '.join(_SUPPORTED_LANGUAGES)
raise UnsupportedLanguageError("\nLanguage '{language}' is not yet "
"supported by Lingua Franca. "
"Supported language codes "
"include the following:\n{supported}"
.format(language=language, supported=supported))
def get_supported_langs():
"""
Returns:
list(str)
"""
return _SUPPORTED_LANGUAGES
def get_active_langs():
""" Get the list of currently-loaded language codes
Returns:
list(str)
"""
return __loaded_langs
def _set_active_langs(langs=None, override_default=True):
""" Set the list of languages to load.
Unloads previously-loaded languages which are not specified here.
If the input list does not contain the current default language,
langs[0] will become the new default language. This behavior
can be overridden.
Arguments:
langs: {list(str) or str} -- a list of language codes to load
Keyword Arguments:
override_default (bool) -- Change default language to first entry if
the current default is no longer present
(default: True)
"""
if isinstance(langs, str):
langs = [langs]
if not isinstance(langs, list):
raise(TypeError("lingua_franca.internal._set_active_langs expects"
" 'str' or 'list'"))
global __loaded_langs, __default_lang
__loaded_langs = list(dict.fromkeys(langs))
if __default_lang:
if override_default or get_primary_lang_code(__default_lang) \
not in __loaded_langs:
if len(__loaded_langs):
set_default_lang(get_full_lang_code(__loaded_langs[0]))
else:
__default_lang = None
_refresh_function_dict()
def _refresh_function_dict():
for mod in _localized_functions.keys():
populate_localized_function_dict(mod, langs=__loaded_langs)
def is_supported_lang(lang):
try:
return lang.lower() in _SUPPORTED_LANGUAGES
except AttributeError:
return False
def is_supported_full_lang(lang):
"""
Arguments:
lang (str): a full language code, such as "en-US" (case insensitive)
Returns:
bool - does Lingua Franca support this language code?
"""
try:
return lang.lower() in _SUPPORTED_FULL_LOCALIZATIONS
except AttributeError:
return False
def load_language(lang):
"""Load `lang` and its functions into memory. Will only import those
functions which belong to a loaded module. In other words, if you have
lingua_franca.parse loaded, but *not* lingua_franca.format,
running `load_language('es') will only import the Spanish-language
parsers, and not the formatters.
The reverse is also true: importing a module, such as
`import lingua_franca.parse`, will only import those functions
which belong to currently-loaded languages.
Arguments:
lang (str): the language code to load (any supported lang code,
whether 'primary' or 'full')
Case-insensitive.
"""
if not isinstance(lang, str):
raise TypeError("lingua_franca.load_language expects 'str' "
"(got " + type(lang) + ")")
if lang not in _SUPPORTED_LANGUAGES:
if lang in _SUPPORTED_FULL_LOCALIZATIONS:
lang = get_primary_lang_code(lang)
if lang not in __loaded_langs:
__loaded_langs.append(lang)
if not __default_lang:
set_default_lang(lang)
_set_active_langs(__loaded_langs)
def load_languages(langs):
"""Load multiple languages at once
Simple for loop using load_language()
Args:
langs (list[str])
"""
for lang in langs:
load_language(lang)
def unload_language(lang):
"""Opposite of load_language()
Unloading the default causes the next language in
`lingua_franca.get_active_langs()` to become the default.
Will not stop you from unloading the last language, as this may be
desirable for some applications.
Args:
lang (str): language code to unload
"""
if lang in __loaded_langs:
__loaded_langs.remove(lang)
_set_active_langs(__loaded_langs)
def unload_languages(langs):
"""Opposite of load_languages()
Simple for loop using unload_language()
Args:
langs (list[str])
"""
for lang in langs:
__loaded_langs.remove(lang)
_set_active_langs(__loaded_langs)
def get_default_lang():
""" Return the current default language.
This returns the active BCP-47 code, such as 'en' or 'es'.
For the current localization/full language code,
such as 'en-US' or 'es-ES', call `get_default_loc()`
See:
https://en.wikipedia.org/wiki/IETF_language_tag
Returns:
str: A primary language code, e.g. ("en", or "pt")
"""
return __default_lang
def get_default_loc():
""" Return the current, localized BCP-47 language code, such as 'en-US'
or 'es-ES'. For the default language *family* - which is passed to
most parsers and formatters - call `get_default_lang`
The 'localized' portion conforms to ISO 3166-1 alpha-2
https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
"""
return __active_lang_code
def set_default_lang(lang_code):
""" Set the active BCP-47 language code to be used in formatting/parsing
Will choose a default localization if passed a primary language family
(ex: `set_default_lang("en")` will default to "en-US")
Will respect localization when passed a full lang code.
For more information about valid lang codes, see get_default_lang()
and get_default_loc()
Args:
lang(str): BCP-47 language code, e.g. "en-us" or "es-mx"
"""
global __default_lang, __active_lang_code
lang_code = lang_code.lower()
primary_lang_code = get_primary_lang_code(lang_code)
if primary_lang_code not in _SUPPORTED_LANGUAGES:
_raise_unsupported_language(lang_code)
else:
__default_lang = primary_lang_code
# make sure the default language is loaded.
# also make sure the default language is at the front.
# position doesn't matter here, but it clarifies things while debugging.
if __default_lang in __loaded_langs:
__loaded_langs.remove(__default_lang)
__loaded_langs.insert(0, __default_lang)
_refresh_function_dict()
if is_supported_full_lang(lang_code):
__active_lang_code = lang_code
else:
__active_lang_code = get_full_lang_code(__default_lang)
# TODO remove this when invalid lang codes are removed (currently deprecated)
def get_primary_lang_code(lang=''):
if not lang:
if lang is None:
warn(NoneLangWarning)
lang = get_default_loc()
# if not (lang):
try:
lang = __get_primary_lang_code_deprecation_warning(lang)
except UnsupportedLanguageError:
warn(InvalidLangWarning)
lang = get_default_loc()
return lang
def __get_primary_lang_code_deprecation_warning(lang=''):
""" Get the primary language code
Args:
lang(str, optional): A BCP-47 language code
(If omitted, equivalent to
`lingua_franca.get_default_lang()`)
Returns:
str: A primary language family, such as "en", "de" or "pt"
"""
# split on the hyphen and only return the primary-language code
# NOTE: This is typically a two character code. The standard allows
# 1, 2, 3 and 4 character codes. In the future we can consider
# mapping from the 3 to 2 character codes, for example. But for
# now we can just be careful in use.
if not lang:
return get_default_lang()
elif not isinstance(lang, str):
raise(TypeError("lingua_franca.get_primary_lang_code() expects"
" an (optional)argument of type 'str', but got " +
type(lang)))
else:
lang_code = lang.lower()
if lang_code not in _SUPPORTED_FULL_LOCALIZATIONS and lang_code not in \
_SUPPORTED_LANGUAGES:
# We don't know this language code. Check if the input is
# formatted like a language code.
if lang == (("-".join([lang[:2], lang[3:]]) or None)):
warn("Unrecognized language code: '" + lang + "', but it appears "
"to be a valid language code. Returning the first two chars.")
return lang_code.split("-")[0]
else:
raise(ValueError("Invalid input: " + lang))
return lang_code.split("-")[0]
# TODO remove this when invalid lang codes are removed (currently deprecated)
def get_full_lang_code(lang=''):
if not lang:
if lang is None:
warn(NoneLangWarning)
lang = get_default_loc()
if not is_supported_full_lang(lang):
try:
lang = __get_full_lang_code_deprecation_warning(lang)
except UnsupportedLanguageError:
warn(InvalidLangWarning)
lang = get_default_loc()
return lang
def __get_full_lang_code_deprecation_warning(lang=''):
""" Get the full language code
Args:
lang(str, optional): A BCP-47 language code
(if omitted, equivalent to
`lingua_franca.get_default_loc()`)
Returns:
str: A full language code, such as "en-us" or "de-de"
"""
if lang is None:
return __active_lang_code.lower()
elif not isinstance(lang, str):
raise TypeError("get_full_lang_code expects str, "
"got {}".format(type(lang)))
if lang.lower() in _SUPPORTED_FULL_LOCALIZATIONS:
return lang
elif lang in _DEFAULT_FULL_LANG_CODES:
return _DEFAULT_FULL_LANG_CODES[lang]
else:
raise UnsupportedLanguageError(lang)
def localized_function(run_own_code_on=[type(None)]):
"""
Decorator which finds localized functions, and calls them, from signatures
defined in the top-level modules. See lingua_franca.format or .parse for
examples of the decorator in action.
Note that, by default, wrapped functions will never actually be executed.
Rather, when they're called, their arguments will be passed directly to
their localized equivalent, specified by the 'lang' parameter.
The wrapper can be instructed to execute the wrapped function itself when
a specified error is raised (see the argument 'run_own_code_on')
For instance, this decorator wraps parse.extract_number(), which has no
logic of its own. A call to
extract_number('uno', lang='es')
will locate and call
lingua_franca.lang.parse_es.extract_number_es('uno')
By contrast, here's the decorator above format.nice_number, with the param:
@localized_function(run_own_code_on=[UnsupportedLanguageError])
def nice_number(number, lang='', speech=True, denominators=None):
Here, nice_number() itself will be executed in the event that the localizer
raises an UnsupportedLanguageError.
Arguments:
run_own_code_on(list(type), optional)
A list of Error types (ValueError, NotImplementedError, etc)
which, if they are raised, will trigger the wrapped function's
own code.
If this argument is omitted, the function itself will never
be run. Calls to the wrapped function will be passed to the
appropriate, localized function.
"""
# Make sure everything in run_own_code_on is an Error or None
BadTypeError = \
ValueError("@localized_function(run_own_code_on=<>) expected an "
"Error type, or a list of Error types. Instead, it "
"received this value:\n" + str(run_own_code_on))
# TODO deprecate these kwarg values 6-12 months after v0.3.0 releases
if run_own_code_on != [None]:
def is_error_type(_type):
if not callable(_type):
return False
_instance = _type()
rval = isinstance(_instance, BaseException) if _instance else True
del _instance
return rval
if not isinstance(run_own_code_on, list):
try:
run_own_code_on = list(run_own_code_on)
except TypeError:
raise BadTypeError
if not all((is_error_type(e) for e in run_own_code_on)):
raise BadTypeError
# Begin wrapper
def localized_function_decorator(func):
# Wrapper's logic
def _call_localized_function(func, *args, **kwargs):
lang_code = None
load_langs_on_demand = config.load_langs_on_demand
unload_language_afterward = False
func_signature = signature(func)
func_params = list(func_signature.parameters)
lang_param_index = func_params.index('lang')
full_lang_code = None
# Check if we need to add timezone awareness to any datetime object
if config.inject_timezones:
for key, value in kwargs.items():
if isinstance(value, datetime) and value.tzinfo is None:
kwargs[key] = to_local(value)
for idx, value in enumerate(args):
if isinstance(value, datetime) and value.tzinfo is None:
args = (*args[:idx], to_local(value), *args[idx + 1:])
# Check if we're passing a lang as a kwarg
if 'lang' in kwargs.keys():
lang_param = kwargs['lang']
if lang_param is None:
warn(NoneLangWarning)
lang_code = get_default_lang()
else:
lang_code = lang_param
# Check if we're passing a lang as a positional arg
elif lang_param_index < len(args):
lang_param = args[lang_param_index]
if lang_param is None:
warn(NoneLangWarning)
lang_code = get_default_lang()
elif lang_param in _SUPPORTED_LANGUAGES or \
lang_param in _SUPPORTED_FULL_LOCALIZATIONS:
lang_code = args[lang_param_index]
args = args[:lang_param_index] + args[lang_param_index+1:]
# Turns out, we aren't passing a lang code at all
lang_code = lang_code or get_default_lang()
if not lang_code:
if load_langs_on_demand:
raise ModuleNotFoundError("No language module loaded "
"and none specified.")
else:
raise ModuleNotFoundError("No language module loaded.")
if lang_code not in _SUPPORTED_LANGUAGES:
try:
tmp = lang_code
__use_tmp = True
lang_code = get_primary_lang_code(lang_code)
except ValueError:
__error = \
UnsupportedLanguageError("\nLanguage '{language}' is not yet "
"supported by Lingua Franca. "
"Supported language codes "
"include the following:\n{supported}"
.format(
language=lang_code,
supported=_SUPPORTED_FULL_LOCALIZATIONS))
if UnsupportedLanguageError in run_own_code_on:
raise __error
else:
warn(DeprecationWarning("The following warning will "
"become an exception in a future "
"version of Lingua Franca." +
str(__error)))
lang_code = get_default_lang()
full_lang_code = get_full_lang_code()
__use_tmp = False
if lang_code not in _SUPPORTED_LANGUAGES:
_raise_unsupported_language(lang_code)
if __use_tmp:
full_lang_code = tmp
else:
full_lang_code = get_full_lang_code(lang_code)
# Here comes the ugly business.
_module_name = func.__module__.split('.')[-1]
_module = import_module(".lang." + _module_name +
"_" + lang_code, "lingua_franca")
# The nonsense above gets you from lingua_franca.parse
# to lingua_franca.lang.parse_xx
if _module_name not in _localized_functions.keys():
raise ModuleNotFoundError("Module lingua_franca." +
_module_name + " not recognized")
if lang_code not in _localized_functions[_module_name].keys():
if load_langs_on_demand:
load_language(lang_code)
unload_language_afterward = True
else:
raise ModuleNotFoundError(_module_name +
" module of language '" +
lang_code +
"' is not currently loaded.")
func_name = func.__name__.split('.')[-1]
# At some point in the past, both the module and the language
# were imported/loaded, respectively.
# When that happened, we cached the *signature* of each
# localized function.
#
# This is the crucial element that allows us to import funcs
# on the fly.
#
# If we didn't find a localized function to correspond with
# the wrapped function, we cached NotImplementedError in its
# place.
loc_signature = _localized_functions[_module_name][lang_code][func_name]
if isinstance(loc_signature, type(NotImplementedError())):
raise loc_signature
# Now we have the appropriate localized module. Let's get
# the localized function.
try:
localized_func = getattr(
_module, func_name + "_" + lang_code)
except AttributeError:
raise FunctionNotLocalizedError(func_name, lang_code)
# We now have a localized function, such as
# lingua_franca.parse.extract_datetime_en
# Get 'lang' out of its parameters.
if 'lang' in kwargs:
del kwargs['lang']
args = tuple(arg for arg in list(args) if
arg not in (lang_code, full_lang_code))
# Now we call the function, ignoring any kwargs from the
# wrapped function that aren't in the localized function.
r_val = localized_func(*args,
**{arg: val for arg, val
in kwargs.items()
if arg in loc_signature.parameters})
# Unload all the stuff we just assembled and imported
del localized_func
del _module
if unload_language_afterward:
unload_language(lang_code)
return r_val
# Actual wrapper
@wraps(func)
def call_localized_function(*args, **kwargs):
if run_own_code_on != [type(None)]:
try:
return _call_localized_function(func, *args, **kwargs)
except Exception as e: # Intercept, check for run_own_code_on
if any((isinstance(e, error) for error in run_own_code_on)):
return func(*args, **kwargs)
else:
raise e
else: # don't intercept any exceptions
return _call_localized_function(func, *args, **kwargs)
return call_localized_function
try:
return localized_function_decorator
except NotImplementedError as e:
warn(str(e))
return
def populate_localized_function_dict(lf_module, langs=get_active_langs()):
"""Returns a dictionary of dictionaries, containing localized functions.
Used by the top-level modules to locate, cache, and call localized funcs.
Arguments:
lf_module(str) - - the name of the top-level module
Returns:
Dict - - {language_code: {function_name(str): function}}
Note:
The dictionary returned can be used directly,
but it's normally discarded. Rather, this function will create
the dictionary as a member of
`lingua_franca.internal._localized_functions`,
and its members are invoked via the `@localized_function` decorator.
Example:
populate_localized_function_dict("format")["en"]["pronounce_number"](1)
"one"
"""
bad_lang_code = "Language code '{}' is registered with" \
" Lingua Franca, but its " + lf_module + " module" \
" could not be found."
return_dict = {}
for lang_code in langs:
primary_lang_code = get_primary_lang_code(lang_code)
return_dict[primary_lang_code] = {}
_FUNCTION_NOT_FOUND = ""
try:
lang_common_data = import_module(".lang.common_data_" + primary_lang_code,
"lingua_franca")
_FUNCTION_NOT_FOUND = getattr(lang_common_data,
"_FUNCTION_NOT_IMPLEMENTED_WARNING")
del lang_common_data
except Exception:
_FUNCTION_NOT_FOUND = "This function has not been implemented" \
" in the specified language."
_FUNCTION_NOT_FOUND = FunctionNotLocalizedError(_FUNCTION_NOT_FOUND)
try:
mod = import_module(".lang." + lf_module + "_" + primary_lang_code,
"lingua_franca")
except ModuleNotFoundError:
warn(Warning(bad_lang_code.format(primary_lang_code)))
continue
function_names = getattr(import_module("." + lf_module, "lingua_franca"),
"_REGISTERED_FUNCTIONS")
for function_name in function_names:
try:
function = getattr(mod, function_name
+ "_" + primary_lang_code)
function_signature = signature(function)
del function
except AttributeError:
function_signature = _FUNCTION_NOT_FOUND
# TODO log these occurrences: "function 'function_name' not
# implemented in language 'primary_lang_code'"
#
# Perhaps provide this info to autodocs, to help volunteers
# identify the functions in need of localization
return_dict[primary_lang_code][function_name] = function_signature
del mod
_localized_functions[lf_module] = return_dict
return _localized_functions[lf_module]
def resolve_resource_file(res_name, data_dir=None):
"""Convert a resource into an absolute filename.
Resource names are in the form: 'filename.ext'
or 'path/filename.ext'
The system wil look for ~/.mycroft/res_name first, and
if not found will look at / opt/mycroft/res_name,
then finally it will look for res_name in the 'mycroft/res'
folder of the source code package.
Example:
With mycroft running as the user 'bob', if you called
resolve_resource_file('snd/beep.wav')
it would return either '/home/bob/.mycroft/snd/beep.wav' or
'/opt/mycroft/snd/beep.wav' or '.../mycroft/res/snd/beep.wav',
where the '...' is replaced by the path where the package has
been installed.
Args:
res_name(str): a resource path/name
Returns:
str: path to resource or None if no resource found
"""
# First look for fully qualified file (e.g. a user setting)
if os.path.isfile(res_name):
return res_name
# Now look for ~/.mycroft/res_name (in user folder)
filename = os.path.expanduser("~/.mycroft/" + res_name)
if os.path.isfile(filename):
return filename
# Next look for /opt/mycroft/res/res_name
data_dir = data_dir or os.path.expanduser("/opt/mycroft/res/")
filename = os.path.expanduser(os.path.join(data_dir, res_name))
if os.path.isfile(filename):
return filename
# Finally look for it in the source package
filename = os.path.join(os.path.dirname(__file__), 'res', res_name)
filename = os.path.abspath(os.path.normpath(filename))
if os.path.isfile(filename):
return filename
return None # Resource cannot be resolved
def lookup_variant(mappings, key="variant"):
"""function decorator
maps strings to Enums expected by language specific functions
mappings can be used to translate values read from configuration files
Example usage:
@lookup_variant({
"default": TimeVariant.DEFAULT,
"traditional": TimeVariant.TRADITIONAL
})
def nice_time_XX(dt, speech=True, use_24hour=False, use_ampm=False,
variant=None):
variant = variant or TimeVariant.DEFAULT
(...)
"""
if not isinstance(mappings, dict):
raise ValueError
# Begin wrapper
def lang_variant_function_decorator(func):
@wraps(func)
def call_function(*args, **kwargs):
if key in kwargs and isinstance(kwargs[key], str):
if kwargs[key] in mappings:
kwargs[key] = mappings[kwargs[key]]
else:
raise ValueError("Unknown variant, mapping does not "
"exist for {v}".format(v=key))
return func(*args, **kwargs)
return call_function
try:
return lang_variant_function_decorator
except NotImplementedError as e:
warn(str(e))
return

View File

@@ -0,0 +1,72 @@
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from warnings import warn
from lingua_franca.internal import get_default_lang, \
set_default_lang, get_primary_lang_code as gplc, get_full_lang_code as gflc
def get_active_lang():
""" Get the active full language code (BCP-47)
Returns:
str: A BCP-47 language code, e.g. ("en-us", or "pt-pt")
"""
_getlang = "Direct imports from lingua_franca.lang"
" have been deprecated. Use"
" lingua_franca.get_default_lang()"
warn(_getlang, DeprecationWarning)
return get_default_lang()
def set_active_lang(lang_code):
""" Set the active BCP-47 language code to be used in formatting/parsing
Args:
lang (str): BCP-47 language code, e.g. "en-us" or "es-mx"
"""
_setlang = "Direct imports from lingua_franca.lang"
" have been deprecated. Use"
" lingua_franca.set_default_lang()"
warn(_setlang, DeprecationWarning)
set_default_lang(lang_code=lang_code)
def get_primary_lang_code(lang=None):
""" Get the primary language code
Args:
lang (str, optional): A BCP-47 language code, or None for default
Returns:
str: A primary language family, such as "en", "de" or "pt"
"""
warn("Direct imports from lingua_franca.lang have been deprecated. Use"
" lingua_franca.get_primary_lang_code()", DeprecationWarning)
return gplc(lang=lang)
def get_full_lang_code(lang=None):
""" Get the full language code
Args:
lang (str, optional): A BCP-47 language code, or None for default
Returns:
str: A full language code, such as "en-us" or "de-de"
"""
warn("Direct imports from lingua_franca.lang have been deprecated. Use"
" lingua_franca.get_full_lang_code()", DeprecationWarning)
return gflc(lang=lang)

View File

@@ -0,0 +1,197 @@
_FUNCTION_NOT_IMPLEMENTED_WARNING = "aquesta funció encara no s'ha implementat en 'ca'"
# Undefined articles ["un", "una", "uns", "unes"] can not be supressed,
# in CA, "un cavall" means "a horse" or "one horse".
_ARTICLES_CA = ["el", "la", "l", "lo", "els", "les", "los"]
# word rules for gender
_FEMALE_ENDINGS_CA = ["a", "esa", "essa", "esses", "eses", "ena", "enes",
"ques", "asi", "esi", "isi", "osi", "ut", "at",
"eta", "etes", "tja", "tges", "ica", "iques",
"ada", "ades"]
_MALE_ENDINGS_CA = ["o", "os", "ll", "lls", "ig", "igs", "itjos", "rs",
"et", "ets", "ès", "ns", "ic", "ics", "at", "ats"]
# special cases, word lookup for words not covered by above rule
_GENDERS_CA = {
"dones": "f",
"home": "m",
"pell": "f",
"pells": "f"
}
# context rules for gender
_MALE_DETERMINANTS_CA = ["el", "els", "l", "lo", "es", "aquest", "aquests",
"aquell", "aquells", "aqueix", "aqueixos",
"algun", "alguns", "este", "estos", "altre",
"mon", "mos", "mons", "meus", "meus"]
_FEMALE_DETERMINANTS_CA = ["la", "les", "sa", "ses", "aquesta", "aquestes",
"aquella", "aquelles", "aqueixa", "aqueixes",
"alguna", "algunes", "esta", "estes", "altra",
"ma", "mes", "meva", "meua", "meves"]
_NUMBERS_CA = {
"zero": 0,
"u": 1,
"un": 1,
"una": 1,
"uns": 1,
"unes": 1,
"primer": 1,
"primera": 1,
"segon": 2,
"segona": 2,
"tercer": 3,
"tercera": 3,
"dos": 2,
"dues": 2,
"tres": 3,
"quatre": 4,
"cinc": 5,
"sis": 6,
"set": 7,
"vuit": 8,
"huit": 8,
"nou": 9,
"deu": 10,
"onze": 11,
"dotze": 12,
"tretze": 13,
"catorze": 14,
"quinze": 15,
"setze": 16,
"disset": 17,
"divuit": 18,
"dinou": 19,
"vint": 20,
"trenta": 30,
"quaranta": 40,
"cinquanta": 50,
"seixanta": 60,
"setanta": 70,
"vuitanta": 80,
"noranta": 90,
"cent": 100,
"cents": 100,
"dos-cents": 200,
"dues-centes": 200,
"tres-cents": 300,
"tres-centes": 300,
"quatre-cents": 400,
"quatre-centes": 400,
"cinc-cents": 500,
"cinc-centes": 500,
"sis-cents": 600,
"sis-centes": 600,
"set--cents": 700,
"set-centes": 700,
"vuit-cents": 800,
"vuit-centes": 800,
"nou-cents": 900,
"nou-centes": 900,
"mil": 1000,
"milió": 1000000
}
_FRACTION_STRING_CA = {
2: 'mig',
3: 'terç',
4: 'quart',
5: 'cinquè',
6: 'sisè',
7: 'setè',
8: 'vuitè',
9: 'novè',
10: 'desè',
11: 'onzè',
12: 'dotzè',
13: 'tretzè',
14: 'catorzè',
15: 'quinzè',
16: 'setzè',
17: 'dissetè',
18: 'divuitè',
19: 'dinovè',
20: 'vintè',
30: 'trentè',
100: 'centè',
1000: 'milè'
}
_NUM_STRING_CA = {
0: 'zero',
1: 'un',
2: 'dos',
3: 'tres',
4: 'quatre',
5: 'cinc',
6: 'sis',
7: 'set',
8: 'vuit',
9: 'nou',
10: 'deu',
11: 'onze',
12: 'dotze',
13: 'tretze',
14: 'catorze',
15: 'quinze',
16: 'setze',
17: 'disset',
18: 'divuit',
19: 'dinou',
20: 'vint',
30: 'trenta',
40: 'quaranta',
50: 'cinquanta',
60: 'seixanta',
70: 'setanta',
80: 'vuitanta',
90: 'noranta'
}
_TENS_CA = {
"vint": 20,
"trenta": 30,
"quaranta": 40,
"cinquanta": 50,
"seixanta": 60,
"setanta": 70,
"vuitanta": 80,
"huitanta": 80,
"noranta": 90
}
_AFTER_TENS_CA = {
"u": 1,
"un": 1,
"dos": 2,
"dues": 2,
"tres": 3,
"quatre": 4,
"cinc": 5,
"sis": 6,
"set": 7,
"vuit": 8,
"huit": 8,
"nou": 9
}
_BEFORE_HUNDREDS_CA = {
"dos": 2,
"dues": 2,
"tres": 3,
"quatre": 4,
"cinc": 5,
"sis": 6,
"set": 7,
"vuit": 8,
"huit": 8,
"nou": 9,
}
_HUNDREDS_CA = {
"cent": 100,
"cents": 100,
"centes": 100
}

View File

@@ -0,0 +1,305 @@
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from collections import OrderedDict
#_ARTICLES_CS = {}
_NUM_STRING_CS = {
0: 'nula',
1: 'jedna',
2: 'dva',
3: 'tři',
4: 'čtyři',
5: 'pět',
6: 'šest',
7: 'sedm',
8: 'osm',
9: 'devět',
10: 'deset',
11: 'jedenáct',
12: 'dvanáct',
13: 'třináct',
14: 'čtrnáct',
15: 'patnáct',
16: 'šestnáct',
17: 'sedmnáct',
18: 'osmnáct',
19: 'devatenáct',
20: 'dvacet',
30: 'třicet',
40: 'čtyřicet',
50: 'padesát',
60: 'šedesát',
70: 'sedmdesát',
80: 'osmdesát',
90: 'devadesát'
}
_FRACTION_STRING_CS = {
2: 'polovina',
3: 'třetina',
4: 'čtvrtina',
5: 'pětina',
6: 'šestina',
7: 'sedmina',
8: 'osmina',
9: 'devítina',
10: 'desetina',
11: 'jedenáctina',
12: 'dvanáctina',
13: 'třináctina',
14: 'čtrnáctina',
15: 'patnáctina',
16: 'šestnáctina',
17: 'sedmnáctina',
18: 'osmnáctina',
19: 'devatenáctina',
20: 'dvacetina',
30: 'třicetina',
40: 'čtyřicetina',
50: 'padesátina',
60: 'šedesátina',
70: 'sedmdesátina',
80: 'osmdesátina',
90: 'devadesátina',
1e2: 'setina',
1e3: 'tisícina'
}
_LONG_SCALE_CS = OrderedDict([
(100, 'sto'),
(1000, 'tisíc'),
(1000000, 'milion'),
(1e9, "miliarda"),
(1e12, "bilion"),
(1e15, "biliarda"),
(1e18, "trilion"),
(1e21, "triliarda"),
(1e24, "kvadrilion"),
(1e27, "kvadriliarda"),
(1e30, "kvintilion"),
(1e33, "kvintiliarda"),
(1e36, "sextilion"),
(1e39, "sextiliarda"),
(1e42, "septilion"),
(1e45, "septiliarda"),
(1e48, "oktilion"),
(1e51, "oktiliarda"),
(1e54, "nonilion"),
(1e57, "noniliarda"),
(1e60, "decilion"),
(1e63, "deciliarda"),
(1e120, "vigintilion"),
(1e180, "trigintilion"),
(1e303, "kvinkvagintiliarda"),
(1e600, "centilion"),
(1e603, "centiliarda")
])
_SHORT_SCALE_CS = OrderedDict([
(100, 'sto'),
(1000, 'tisíc'),
(1000000, 'million'),
(1e9, "billion"),
(1e12, 'trillion'),
(1e15, "quadrillion"),
(1e18, "quintillion"),
(1e21, "sextillion"),
(1e24, "septillion"),
(1e27, "octillion"),
(1e30, "nonillion"),
(1e33, "decillion"),
(1e36, "undecillion"),
(1e39, "duodecillion"),
(1e42, "tredecillion"),
(1e45, "quadrdecillion"),
(1e48, "quindecillion"),
(1e51, "sexdecillion"),
(1e54, "septendecillion"),
(1e57, "octodecillion"),
(1e60, "novemdecillion"),
(1e63, "vigintillion"),
(1e66, "unvigintillion"),
(1e69, "uuovigintillion"),
(1e72, "tresvigintillion"),
(1e75, "quattuorvigintillion"),
(1e78, "quinquavigintillion"),
(1e81, "qesvigintillion"),
(1e84, "septemvigintillion"),
(1e87, "octovigintillion"),
(1e90, "novemvigintillion"),
(1e93, "trigintillion"),
(1e96, "untrigintillion"),
(1e99, "duotrigintillion"),
(1e102, "trestrigintillion"),
(1e105, "quattuortrigintillion"),
(1e108, "quinquatrigintillion"),
(1e111, "sestrigintillion"),
(1e114, "septentrigintillion"),
(1e117, "octotrigintillion"),
(1e120, "noventrigintillion"),
(1e123, "quadragintillion"),
(1e153, "quinquagintillion"),
(1e183, "sexagintillion"),
(1e213, "septuagintillion"),
(1e243, "octogintillion"),
(1e273, "nonagintillion"),
(1e303, "centillion"),
(1e306, "uncentillion"),
(1e309, "duocentillion"),
(1e312, "trescentillion"),
(1e333, "decicentillion"),
(1e336, "undecicentillion"),
(1e363, "viginticentillion"),
(1e366, "unviginticentillion"),
(1e393, "trigintacentillion"),
(1e423, "quadragintacentillion"),
(1e453, "quinquagintacentillion"),
(1e483, "sexagintacentillion"),
(1e513, "septuagintacentillion"),
(1e543, "ctogintacentillion"),
(1e573, "nonagintacentillion"),
(1e603, "ducentillion"),
(1e903, "trecentillion"),
(1e1203, "quadringentillion"),
(1e1503, "quingentillion"),
(1e1803, "sescentillion"),
(1e2103, "septingentillion"),
(1e2403, "octingentillion"),
(1e2703, "nongentillion"),
(1e3003, "millinillion")
])
_ORDINAL_BASE_CS = {
1: 'první',
2: 'druhý',
3: 'třetí',
4: 'čtvrtý',
5: 'pátý',
6: 'šestý',
7: 'sedmý',
8: 'osmý',
9: 'devátý',
10: 'desátý',
11: 'jedenáctý',
12: 'dvanáctý',
13: 'třináctý',
14: 'čtrnáctý',
15: 'patnáctý',
16: 'šestnáctý',
17: 'sedmnáctý',
18: 'osmnáctý',
19: 'devatenáctý',
20: 'dvacátý',
30: 'třicátý',
40: "čtyřicátý",
50: "padesátý",
60: "šedesátý",
70: "sedmdesátý",
80: "osmdesátý",
90: "devadesátý",
1e2: "stý",
1e3: "tisící"
}
_SHORT_ORDINAL_CS = {
1e6: "miliontý",
1e9: "billiontý",
1e12: "trilliontý",
1e15: "quadrilliontý",
1e18: "quintilliontý",
1e21: "sextilliontý",
1e24: "septilliontý",
1e27: "oktiliontý",
1e30: "nonilliontý",
1e33: "decilliontý"
# TODO > 1e-33
}
_SHORT_ORDINAL_CS.update(_ORDINAL_BASE_CS)
_LONG_ORDINAL_CS = {
1e6: "miliontý",
1e9: "miliardtý",
1e12: "biliontý",
1e15: "biliardtý",
1e18: "triliontý",
1e21: "triliardtý",
1e24: "kvadriliontý",
1e27: "kvadriliardtý",
1e30: "kvintiliontý",
1e33: "kvintiliardtý",
1e36: "sextiliontý",
1e39: "sextiliardtý",
1e42: "septiliontý",
1e45: "septiliardtý",
1e48: "oktilion",
1e51: "oktiliardtý",
1e54: "noniliontý",
1e57: "noniliardtý",
1e60: "deciliontý"
# TODO > 1e60
}
_LONG_ORDINAL_CS.update(_ORDINAL_BASE_CS)
# Months
_MONTHS_CONVERSION = {
0: "january",
1: "february",
2: "march",
3: "april",
4: "may",
5: "june",
6: "july",
7: "august",
8: "september",
9: "october",
10: "november",
11: "december"
}
_MONTHS_CZECH = ['leden', 'únor', 'březen', 'duben', 'květen', 'červen',
'červenec', 'srpen', 'září', 'říjen', 'listopad',
'prosinec']
# Time
_TIME_UNITS_CONVERSION = {
'mikrosekund': 'microseconds',
'milisekund': 'milliseconds',
'sekundu': 'seconds',
'sekundy': 'seconds',
'sekund': 'seconds',
'minutu': 'minutes',
'minuty': 'minutes',
'minut': 'minutes',
'hodin': 'hours',
'den': 'days', # 1 day
'dny': 'days', # 2-4 days
'dnů': 'days', # 5+ days
'dní': 'days', # 5+ days - different inflection
'dne': 'days', # a half day
'týden': 'weeks',
'týdny': 'weeks',
'týdnů': 'weeks'
}

View File

@@ -0,0 +1,133 @@
_FUNCTION_NOT_IMPLEMENTED_WARNING = "Denne funktion er ikke implementeret i 'dk'."
_DA_NUMBERS = {
'nul': 0,
'en': 1,
'et': 1,
'to': 2,
'tre': 3,
'fire': 4,
'fem': 5,
'seks': 6,
'syv': 7,
'otte': 8,
'ni': 9,
'ti': 10,
'elve': 11,
'tolv': 12,
'tretten': 13,
'fjorten': 14,
'femten': 15,
'seksten': 16,
'sytten': 17,
'atten': 18,
'nitten': 19,
'tyve': 20,
'enogtyve': 21,
'toogtyve': 22,
'treogtyve': 23,
'fireogtyve': 24,
'femogtyve': 25,
'seksogtyve': 26,
'syvogtyve': 27,
'otteogtyve': 28,
'niogtyve': 29,
'tredive': 30,
'enogtredive': 31,
'fyrrre': 40,
'halvtres': 50,
'tres': 60,
'halvfjers': 70,
'firs': 80,
'halvfems': 90,
'hunderede': 100,
'tohundrede': 200,
'trehundrede': 300,
'firehundrede': 400,
'femhundrede': 500,
'sekshundrede': 600,
'syvhundrede': 700,
'ottehundrede': 800,
'nihundrede': 900,
'tusinde': 1000,
'million': 1000000
}
_MONTHS_DA = ['januar', 'februar', 'märz', 'april', 'mai', 'juni',
'juli', 'august', 'september', 'oktober', 'november',
'dezember']
_NUM_STRING_DA = {
0: 'nul',
1: 'en',
2: 'to',
3: 'tre',
4: 'fire',
5: 'fem',
6: 'seks',
7: 'syv',
8: 'otte',
9: 'ni',
10: 'ti',
11: 'elve',
12: 'tolv',
13: 'tretten',
14: 'fjorten',
15: 'femten',
16: 'seksten',
17: 'sytten',
18: 'atten',
19: 'nitten',
20: 'tyve',
30: 'tredive',
40: 'fyrre',
50: 'halvtres',
60: 'tres',
70: 'halvfjers',
80: 'firs',
90: 'halvfems',
100: 'hundrede'
}
_NUM_POWERS_OF_TEN = [
'hundred',
'tusind',
'million',
'milliard',
'billion',
'billiard',
'trillion',
'trilliard'
]
_FRACTION_STRING_DA = {
2: 'halv',
3: 'trediedel',
4: 'fjerdedel',
5: 'femtedel',
6: 'sjettedel',
7: 'syvendedel',
8: 'ottendedel',
9: 'niendedel',
10: 'tiendedel',
11: 'elftedel',
12: 'tolvtedel',
13: 'trettendedel',
14: 'fjortendedel',
15: 'femtendedel',
16: 'sejstendedel',
17: 'syttendedel',
18: 'attendedel',
19: 'nittendedel',
20: 'tyvendedel'
}
# Numbers below 1 million are written in one word in Danish, yielding very
# long words
# In some circumstances it may better to seperate individual words
# Set _EXTRA_SPACE_DA=" " for separating numbers below 1 million (
# orthographically incorrect)
# Set _EXTRA_SPACE_DA="" for correct spelling, this is standard
# _EXTRA_SPACE_DA = " "
_EXTRA_SPACE_DA = ""

View File

@@ -0,0 +1,135 @@
_DE_NUMBERS = {
'null': 0,
'ein': 1,
'eins': 1,
'eine': 1,
'einer': 1,
'einem': 1,
'einen': 1,
'eines': 1,
'zwei': 2,
'drei': 3,
'vier': 4,
'fünf': 5,
'sechs': 6,
'sieben': 7,
'acht': 8,
'neun': 9,
'zehn': 10,
'elf': 11,
'zwölf': 12,
'dreizehn': 13,
'vierzehn': 14,
'fünfzehn': 15,
'sechzehn': 16,
'siebzehn': 17,
'achtzehn': 18,
'neunzehn': 19,
'zwanzig': 20,
'einundzwanzig': 21,
'zweiundzwanzig': 22,
'dreiundzwanzig': 23,
'vierundzwanzig': 24,
'fünfundzwanzig': 25,
'sechsundzwanzig': 26,
'siebenundzwanzig': 27,
'achtundzwanzig': 28,
'neunundzwanzig': 29,
'dreißig': 30,
'einunddreißig': 31,
'vierzig': 40,
'fünfzig': 50,
'sechzig': 60,
'siebzig': 70,
'achtzig': 80,
'neunzig': 90,
'hundert': 100,
'zweihundert': 200,
'dreihundert': 300,
'vierhundert': 400,
'fünfhundert': 500,
'sechshundert': 600,
'siebenhundert': 700,
'achthundert': 800,
'neunhundert': 900,
'tausend': 1000,
'million': 1000000
}
_MONTHS_DE = ['januar', 'februar', 'märz', 'april', 'mai', 'juni',
'juli', 'august', 'september', 'oktober', 'november',
'dezember']
_NUM_STRING_DE = {
0: 'null',
1: 'ein', # ein Viertel etc., nicht eins Viertel
2: 'zwei',
3: 'drei',
4: 'vier',
5: 'fünf',
6: 'sechs',
7: 'sieben',
8: 'acht',
9: 'neun',
10: 'zehn',
11: 'elf',
12: 'zwölf',
13: 'dreizehn',
14: 'vierzehn',
15: 'fünfzehn',
16: 'sechzehn',
17: 'siebzehn',
18: 'achtzehn',
19: 'neunzehn',
20: 'zwanzig',
30: 'dreißig',
40: 'vierzig',
50: 'fünfzig',
60: 'sechzig',
70: 'siebzig',
80: 'achtzig',
90: 'neunzig',
100: 'hundert'
}
# German uses "long scale" https://en.wikipedia.org/wiki/Long_and_short_scales
# Currently, numbers are limited to 1000000000000000000000000,
# but _NUM_POWERS_OF_TEN can be extended to include additional number words
_NUM_POWERS_OF_TEN_DE = [
'', 'tausend', 'Million', 'Milliarde', 'Billion', 'Billiarde', 'Trillion',
'Trilliarde'
]
_FRACTION_STRING_DE = {
2: 'halb',
3: 'drittel',
4: 'viertel',
5: 'fünftel',
6: 'sechstel',
7: 'siebtel',
8: 'achtel',
9: 'neuntel',
10: 'zehntel',
11: 'elftel',
12: 'zwölftel',
13: 'dreizehntel',
14: 'vierzehntel',
15: 'fünfzehntel',
16: 'sechzehntel',
17: 'siebzehntel',
18: 'achtzehntel',
19: 'neunzehntel',
20: 'zwanzigstel'
}
# Numbers below 1 million are written in one word in German, yielding very
# long words
# In some circumstances it may better to seperate individual words
# Set _EXTRA_SPACE_DA=" " for separating numbers below 1 million (
# orthographically incorrect)
# Set _EXTRA_SPACE_DA="" for correct spelling, this is standard
# _EXTRA_SPACE_DA = " "
_EXTRA_SPACE_DE = ""

View File

@@ -0,0 +1,297 @@
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from collections import OrderedDict
from .parse_common import invert_dict
_FUNCTION_NOT_IMPLEMENTED_WARNING = "The requested function is not implemented in English."
_ARTICLES_EN = {'a', 'an', 'the'}
_NUM_STRING_EN = {
0: 'zero',
1: 'one',
2: 'two',
3: 'three',
4: 'four',
5: 'five',
6: 'six',
7: 'seven',
8: 'eight',
9: 'nine',
10: 'ten',
11: 'eleven',
12: 'twelve',
13: 'thirteen',
14: 'fourteen',
15: 'fifteen',
16: 'sixteen',
17: 'seventeen',
18: 'eighteen',
19: 'nineteen',
20: 'twenty',
30: 'thirty',
40: 'forty',
50: 'fifty',
60: 'sixty',
70: 'seventy',
80: 'eighty',
90: 'ninety'
}
_FRACTION_STRING_EN = {
2: 'half',
3: 'third',
4: 'forth',
5: 'fifth',
6: 'sixth',
7: 'seventh',
8: 'eigth',
9: 'ninth',
10: 'tenth',
11: 'eleventh',
12: 'twelveth',
13: 'thirteenth',
14: 'fourteenth',
15: 'fifteenth',
16: 'sixteenth',
17: 'seventeenth',
18: 'eighteenth',
19: 'nineteenth',
20: 'twentyith'
}
_LONG_SCALE_EN = OrderedDict([
(100, 'hundred'),
(1000, 'thousand'),
(1000000, 'million'),
(1e12, "billion"),
(1e18, 'trillion'),
(1e24, "quadrillion"),
(1e30, "quintillion"),
(1e36, "sextillion"),
(1e42, "septillion"),
(1e48, "octillion"),
(1e54, "nonillion"),
(1e60, "decillion"),
(1e66, "undecillion"),
(1e72, "duodecillion"),
(1e78, "tredecillion"),
(1e84, "quattuordecillion"),
(1e90, "quinquadecillion"),
(1e96, "sedecillion"),
(1e102, "septendecillion"),
(1e108, "octodecillion"),
(1e114, "novendecillion"),
(1e120, "vigintillion"),
(1e306, "unquinquagintillion"),
(1e312, "duoquinquagintillion"),
(1e336, "sesquinquagintillion"),
(1e366, "unsexagintillion")
])
_SHORT_SCALE_EN = OrderedDict([
(100, 'hundred'),
(1000, 'thousand'),
(1000000, 'million'),
(1e9, "billion"),
(1e12, 'trillion'),
(1e15, "quadrillion"),
(1e18, "quintillion"),
(1e21, "sextillion"),
(1e24, "septillion"),
(1e27, "octillion"),
(1e30, "nonillion"),
(1e33, "decillion"),
(1e36, "undecillion"),
(1e39, "duodecillion"),
(1e42, "tredecillion"),
(1e45, "quattuordecillion"),
(1e48, "quinquadecillion"),
(1e51, "sedecillion"),
(1e54, "septendecillion"),
(1e57, "octodecillion"),
(1e60, "novendecillion"),
(1e63, "vigintillion"),
(1e66, "unvigintillion"),
(1e69, "uuovigintillion"),
(1e72, "tresvigintillion"),
(1e75, "quattuorvigintillion"),
(1e78, "quinquavigintillion"),
(1e81, "qesvigintillion"),
(1e84, "septemvigintillion"),
(1e87, "octovigintillion"),
(1e90, "novemvigintillion"),
(1e93, "trigintillion"),
(1e96, "untrigintillion"),
(1e99, "duotrigintillion"),
(1e102, "trestrigintillion"),
(1e105, "quattuortrigintillion"),
(1e108, "quinquatrigintillion"),
(1e111, "sestrigintillion"),
(1e114, "septentrigintillion"),
(1e117, "octotrigintillion"),
(1e120, "noventrigintillion"),
(1e123, "quadragintillion"),
(1e153, "quinquagintillion"),
(1e183, "sexagintillion"),
(1e213, "septuagintillion"),
(1e243, "octogintillion"),
(1e273, "nonagintillion"),
(1e303, "centillion"),
(1e306, "uncentillion"),
(1e309, "duocentillion"),
(1e312, "trescentillion"),
(1e333, "decicentillion"),
(1e336, "undecicentillion"),
(1e363, "viginticentillion"),
(1e366, "unviginticentillion"),
(1e393, "trigintacentillion"),
(1e423, "quadragintacentillion"),
(1e453, "quinquagintacentillion"),
(1e483, "sexagintacentillion"),
(1e513, "septuagintacentillion"),
(1e543, "ctogintacentillion"),
(1e573, "nonagintacentillion"),
(1e603, "ducentillion"),
(1e903, "trecentillion"),
(1e1203, "quadringentillion"),
(1e1503, "quingentillion"),
(1e1803, "sescentillion"),
(1e2103, "septingentillion"),
(1e2403, "octingentillion"),
(1e2703, "nongentillion"),
(1e3003, "millinillion")
])
_ORDINAL_BASE_EN = {
1: 'first',
2: 'second',
3: 'third',
4: 'fourth',
5: 'fifth',
6: 'sixth',
7: 'seventh',
8: 'eighth',
9: 'ninth',
10: 'tenth',
11: 'eleventh',
12: 'twelfth',
13: 'thirteenth',
14: 'fourteenth',
15: 'fifteenth',
16: 'sixteenth',
17: 'seventeenth',
18: 'eighteenth',
19: 'nineteenth',
20: 'twentieth',
30: 'thirtieth',
40: "fortieth",
50: "fiftieth",
60: "sixtieth",
70: "seventieth",
80: "eightieth",
90: "ninetieth",
1e2: "hundredth",
1e3: "thousandth"
}
_SHORT_ORDINAL_EN = {
1e6: "millionth",
1e9: "billionth",
1e12: "trillionth",
1e15: "quadrillionth",
1e18: "quintillionth",
1e21: "sextillionth",
1e24: "septillionth",
1e27: "octillionth",
1e30: "nonillionth",
1e33: "decillionth"
# TODO > 1e-33
}
_SHORT_ORDINAL_EN.update(_ORDINAL_BASE_EN)
_LONG_ORDINAL_EN = {
1e6: "millionth",
1e12: "billionth",
1e18: "trillionth",
1e24: "quadrillionth",
1e30: "quintillionth",
1e36: "sextillionth",
1e42: "septillionth",
1e48: "octillionth",
1e54: "nonillionth",
1e60: "decillionth"
# TODO > 1e60
}
_LONG_ORDINAL_EN.update(_ORDINAL_BASE_EN)
# negate next number (-2 = 0 - 2)
_NEGATIVES_EN = {"negative", "minus"}
# sum the next number (twenty two = 20 + 2)
_SUMS_EN = {'twenty', '20', 'thirty', '30', 'forty', '40', 'fifty', '50',
'sixty', '60', 'seventy', '70', 'eighty', '80', 'ninety', '90'}
def _generate_plurals_en(originals):
"""
Return a new set or dict containing the plural form of the original values,
In English this means all with 's' appended to them.
Args:
originals set(str) or dict(str, any): values to pluralize
Returns:
set(str) or dict(str, any)
"""
# TODO migrate to https://github.com/MycroftAI/lingua-franca/pull/36
if isinstance(originals, dict):
return {key + 's': value for key, value in originals.items()}
return {value + "s" for value in originals}
_MULTIPLIES_LONG_SCALE_EN = set(_LONG_SCALE_EN.values()) | \
_generate_plurals_en(_LONG_SCALE_EN.values())
_MULTIPLIES_SHORT_SCALE_EN = set(_SHORT_SCALE_EN.values()) | \
_generate_plurals_en(_SHORT_SCALE_EN.values())
# split sentence parse separately and sum ( 2 and a half = 2 + 0.5 )
_FRACTION_MARKER_EN = {"and"}
# decimal marker ( 1 point 5 = 1 + 0.5)
_DECIMAL_MARKER_EN = {"point", "dot"}
_STRING_NUM_EN = invert_dict(_NUM_STRING_EN)
_STRING_NUM_EN.update(_generate_plurals_en(_STRING_NUM_EN))
_SPOKEN_EXTRA_NUM_EN = {
"half": 0.5,
"halves": 0.5,
"couple": 2
}
_STRING_SHORT_ORDINAL_EN = invert_dict(_SHORT_ORDINAL_EN)
_STRING_LONG_ORDINAL_EN = invert_dict(_LONG_ORDINAL_EN)

View File

@@ -0,0 +1,313 @@
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# NOTE: This file as no use yet. It needs to be called from other functions
from collections import OrderedDict
_ARTICLES_ES = {'el', 'la', 'los', 'las'}
_NUM_STRING_ES = {
0: 'cero',
1: 'uno',
2: 'dos',
3: 'tres',
4: 'cuatro',
5: 'cinco',
6: 'seis',
7: 'siete',
8: 'ocho',
9: 'nueve',
10: 'diez',
11: 'once',
12: 'doce',
13: 'trece',
14: 'catorce',
15: 'quince',
16: 'dieciséis',
17: 'diecisete',
18: 'dieciocho',
19: 'diecinueve',
20: 'veinte',
30: 'treinta',
40: 'cuarenta',
50: 'cincuenta',
60: 'sesenta',
70: 'setenta',
80: 'ochenta',
90: 'noventa'
}
_STRING_NUM_ES = {
"cero": 0,
"un": 1,
"uno": 1,
"una": 1,
"dos": 2,
"tres": 3,
"trés": 3,
"cuatro": 4,
"cinco": 5,
"seis": 6,
"siete": 7,
"ocho": 8,
"nueve": 9,
"diez": 10,
"once": 11,
"doce": 12,
"trece": 13,
"catorce": 14,
"quince": 15,
"dieciseis": 16,
"dieciséis": 16,
"diecisiete": 17,
"dieciocho": 18,
"diecinueve": 19,
"veinte": 20,
"veintiuno": 21,
"veintid�s": 22,
"veintitr�s": 23,
"veintidos": 22,
"veintitres": 23,
"veintitrés": 23,
"veinticuatro": 24,
"veinticinco": 25,
"veintiséis": 26,
"veintiseis": 26,
"veintisiete": 27,
"veintiocho": 28,
"veintinueve": 29,
"treinta": 30,
"cuarenta": 40,
"cincuenta": 50,
"sesenta": 60,
"setenta": 70,
"ochenta": 80,
"noventa": 90,
"cien": 100,
"ciento": 100,
"doscientos": 200,
"doscientas": 200,
"trescientos": 300,
"trescientas": 300,
"cuatrocientos": 400,
"cuatrocientas": 400,
"quinientos": 500,
"quinientas": 500,
"seiscientos": 600,
"seiscientas": 600,
"setecientos": 700,
"setecientas": 700,
"ochocientos": 800,
"ochocientas": 800,
"novecientos": 900,
"novecientas": 900,
"mil": 1000}
_FRACTION_STRING_ES = {
2: 'medio',
3: 'tercio',
4: 'cuarto',
5: 'quinto',
6: 'sexto',
7: 'séptimo',
8: 'octavo',
9: 'noveno',
10: 'décimo',
11: 'onceavo',
12: 'doceavo',
13: 'treceavo',
14: 'catorceavo',
15: 'quinceavo',
16: 'dieciseisavo',
17: 'diecisieteavo',
18: 'dieciochoavo',
19: 'diecinueveavo',
20: 'veinteavo'
}
# https://www.grobauer.at/es_eur/zahlnamen.php
_LONG_SCALE_ES = OrderedDict([
(100, 'centena'),
(1000, 'millar'),
(1000000, 'millón'),
(1e9, "millardo"),
(1e12, "billón"),
(1e18, 'trillón'),
(1e24, "cuatrillón"),
(1e30, "quintillón"),
(1e36, "sextillón"),
(1e42, "septillón"),
(1e48, "octillón"),
(1e54, "nonillón"),
(1e60, "decillón"),
(1e66, "undecillón"),
(1e72, "duodecillón"),
(1e78, "tredecillón"),
(1e84, "cuatrodecillón"),
(1e90, "quindecillón"),
(1e96, "sexdecillón"),
(1e102, "septendecillón"),
(1e108, "octodecillón"),
(1e114, "novendecillón"),
(1e120, "vigintillón"),
(1e306, "unquinquagintillón"),
(1e312, "duoquinquagintillón"),
(1e336, "sexquinquagintillón"),
(1e366, "unsexagintillón")
])
_SHORT_SCALE_ES = OrderedDict([
(100, 'centena'),
(1000, 'millar'),
(1000000, 'millón'),
(1e9, "billón"),
(1e12, 'trillón'),
(1e15, "cuatrillón"),
(1e18, "quintillón"),
(1e21, "sextillón"),
(1e24, "septillón"),
(1e27, "octillón"),
(1e30, "nonillón"),
(1e33, "decillón"),
(1e36, "undecillón"),
(1e39, "duodecillón"),
(1e42, "tredecillón"),
(1e45, "cuatrodecillón"),
(1e48, "quindecillón"),
(1e51, "sexdecillón"),
(1e54, "septendecillón"),
(1e57, "octodecillón"),
(1e60, "novendecillón"),
(1e63, "vigintillón"),
(1e66, "unvigintillón"),
(1e69, "uuovigintillón"),
(1e72, "tresvigintillón"),
(1e75, "quattuorvigintillón"),
(1e78, "quinquavigintillón"),
(1e81, "qesvigintillón"),
(1e84, "septemvigintillón"),
(1e87, "octovigintillón"),
(1e90, "novemvigintillón"),
(1e93, "trigintillón"),
(1e96, "untrigintillón"),
(1e99, "duotrigintillón"),
(1e102, "trestrigintillón"),
(1e105, "quattuortrigintillón"),
(1e108, "quinquatrigintillón"),
(1e111, "sestrigintillón"),
(1e114, "septentrigintillón"),
(1e117, "octotrigintillón"),
(1e120, "noventrigintillón"),
(1e123, "quadragintillón"),
(1e153, "quinquagintillón"),
(1e183, "sexagintillón"),
(1e213, "septuagintillón"),
(1e243, "octogintillón"),
(1e273, "nonagintillón"),
(1e303, "centillón"),
(1e306, "uncentillón"),
(1e309, "duocentillón"),
(1e312, "trescentillón"),
(1e333, "decicentillón"),
(1e336, "undecicentillón"),
(1e363, "viginticentillón"),
(1e366, "unviginticentillón"),
(1e393, "trigintacentillón"),
(1e423, "quadragintacentillón"),
(1e453, "quinquagintacentillón"),
(1e483, "sexagintacentillón"),
(1e513, "septuagintacentillón"),
(1e543, "ctogintacentillón"),
(1e573, "nonagintacentillón"),
(1e603, "ducentillón"),
(1e903, "trecentillón"),
(1e1203, "quadringentillón"),
(1e1503, "quingentillón"),
(1e1803, "sexcentillón"),
(1e2103, "septingentillón"),
(1e2403, "octingentillón"),
(1e2703, "nongentillón"),
(1e3003, "millinillón")
])
# TODO: female forms.
_ORDINAL_STRING_BASE_ES = {
1: 'primero',
2: 'segundo',
3: 'tercero',
4: 'cuarto',
5: 'quinto',
6: 'sexto',
7: 'séptimo',
8: 'octavo',
9: 'noveno',
10: 'décimo',
11: 'undécimo',
12: 'duodécimo',
13: 'decimotercero',
14: 'decimocuarto',
15: 'decimoquinto',
16: 'decimosexto',
17: 'decimoséptimo',
18: 'decimoctavo',
19: 'decimonoveno',
20: 'vigésimo',
30: 'trigésimo',
40: "cuadragésimo",
50: "quincuagésimo",
60: "sexagésimo",
70: "septuagésimo",
80: "octogésimo",
90: "nonagésimo",
10e3: "centésimó",
1e3: "milésimo"
}
_SHORT_ORDINAL_STRING_ES = {
1e6: "millonésimo",
1e9: "milmillonésimo",
1e12: "billonésimo",
1e15: "milbillonésimo",
1e18: "trillonésimo",
1e21: "miltrillonésimo",
1e24: "cuatrillonésimo",
1e27: "milcuatrillonésimo",
1e30: "quintillonésimo",
1e33: "milquintillonésimo"
# TODO > 1e-33
}
_SHORT_ORDINAL_STRING_ES.update(_ORDINAL_STRING_BASE_ES)
_LONG_ORDINAL_STRING_ES = {
1e6: "millonésimo",
1e12: "billionth",
1e18: "trillonésimo",
1e24: "cuatrillonésimo",
1e30: "quintillonésimo",
1e36: "sextillonésimo",
1e42: "septillonésimo",
1e48: "octillonésimo",
1e54: "nonillonésimo",
1e60: "decillonésimo"
# TODO > 1e60
}
_LONG_ORDINAL_STRING_ES.update(_ORDINAL_STRING_BASE_ES)

View File

@@ -0,0 +1,115 @@
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from collections import OrderedDict
from .parse_common import invert_dict
_FUNCTION_NOT_IMPLEMENTED_WARNING = "تابع خواسته شده در زبان فارسی پیاده سازی نشده است."
_FRACTION_STRING_FA = {
2: 'دوم',
3: 'سوم',
4: 'چهارم',
5: 'پنجم',
6: 'ششم',
7: 'هفتم',
8: 'هشتم',
9: 'نهم',
10: 'دهم',
11: 'یازدهم',
12: 'دوازدهم',
13: 'سیزدهم',
14: 'چهاردهم',
15: 'پونزدهم',
16: 'شونزدهم',
17: 'هیفدهم',
18: 'هیجدهم',
19: 'نوزدهم',
20: 'بیستم'
}
_FARSI_ONES = [
"",
"یک",
"دو",
"سه",
"چهار",
"پنج",
"شش",
"هفت",
"هشت",
"نه",
"ده",
"یازده",
"دوازده",
"سیزده",
"چهارده",
"پونزده",
"شونزده",
"هیفده",
"هیجده",
"نوزده",
]
_FARSI_TENS = [
"",
"ده",
"بیست",
"سی",
"چهل",
"پنجاه",
"شصت",
"هفتاد",
"هشتاد",
"نود",
]
_FARSI_HUNDREDS = [
"",
"صد",
"دویست",
"سیصد",
"چهارصد",
"پانصد",
"ششصد",
"هفتصد",
"هشتصد",
"نهصد",
]
_FARSI_BIG = [
'',
'هزار',
'میلیون',
"میلیارد",
'تریلیون',
"تریلیارد",
]
_FORMAL_VARIANT = {
'هفده': 'هیفده',
'هجده': 'هیجده',
'شانزده': 'شونزده',
'پانزده': 'پونزده',
}
_FARSI_FRAC = ["", "ده", "صد"]
_FARSI_FRAC_BIG = ["", "هزار", "میلیونی", "میلیاردی"]
_FARSI_SEPERATOR = ' و '

View File

@@ -0,0 +1,98 @@
# Undefined articles ["un", "une"] cannot be supressed,
# in French, "un cheval" means "a horse" or "one horse".
_ARTICLES_FR = ["le", "la", "du", "de", "les", "des"]
_NUMBERS_FR = {
"zéro": 0,
"un": 1,
"une": 1,
"deux": 2,
"trois": 3,
"quatre": 4,
"cinq": 5,
"six": 6,
"sept": 7,
"huit": 8,
"neuf": 9,
"dix": 10,
"onze": 11,
"douze": 12,
"treize": 13,
"quatorze": 14,
"quinze": 15,
"seize": 16,
"vingt": 20,
"trente": 30,
"quarante": 40,
"cinquante": 50,
"soixante": 60,
"soixante-dix": 70,
"septante": 70,
"quatre-vingt": 80,
"quatre-vingts": 80,
"octante": 80,
"huitante": 80,
"quatre-vingt-dix": 90,
"nonante": 90,
"cent": 100,
"cents": 100,
"mille": 1000,
"mil": 1000,
"millier": 1000,
"milliers": 1000,
"million": 1000000,
"millions": 1000000,
"milliard": 1000000000,
"milliards": 1000000000}
_ORDINAL_ENDINGS_FR = ("er", "re", "ère", "nd", "nde" "ième", "ème", "e")
_NUM_STRING_FR = {
0: 'zéro',
1: 'un',
2: 'deux',
3: 'trois',
4: 'quatre',
5: 'cinq',
6: 'six',
7: 'sept',
8: 'huit',
9: 'neuf',
10: 'dix',
11: 'onze',
12: 'douze',
13: 'treize',
14: 'quatorze',
15: 'quinze',
16: 'seize',
20: 'vingt',
30: 'trente',
40: 'quarante',
50: 'cinquante',
60: 'soixante',
70: 'soixante-dix',
80: 'quatre-vingt',
90: 'quatre-vingt-dix'
}
_FRACTION_STRING_FR = {
2: 'demi',
3: 'tiers',
4: 'quart',
5: 'cinquième',
6: 'sixième',
7: 'septième',
8: 'huitième',
9: 'neuvième',
10: 'dixième',
11: 'onzième',
12: 'douzième',
13: 'treizième',
14: 'quatorzième',
15: 'quinzième',
16: 'seizième',
17: 'dix-septième',
18: 'dix-huitième',
19: 'dix-neuvième',
20: 'vingtième'
}

View File

@@ -0,0 +1,77 @@
_MONTHS_HU = ['január', 'február', 'március', 'április', 'május', 'június',
'július', 'augusztus', 'szeptember', 'október', 'november',
'december']
_NUM_STRING_HU = {
0: 'nulla',
1: 'egy',
2: 'kettő',
3: 'három',
4: 'négy',
5: 'öt',
6: 'hat',
7: 'hét',
8: 'nyolc',
9: 'kilenc',
10: 'tíz',
11: 'tizenegy',
12: 'tizenkettő',
13: 'tizenhárom',
14: 'tizennégy',
15: 'tizenöt',
16: 'tizenhat',
17: 'tizenhét',
18: 'tizennyolc',
19: 'tizenkilenc',
20: 'húsz',
30: 'harminc',
40: 'negyven',
50: 'ötven',
60: 'hatvan',
70: 'hetven',
80: 'nyolcvan',
90: 'kilencven',
100: 'száz'
}
# Hungarian uses "long scale"
# https://en.wikipedia.org/wiki/Long_and_short_scales
# Currently, numbers are limited to 1000000000000000000000000,
# but _NUM_POWERS_OF_TEN can be extended to include additional number words
_NUM_POWERS_OF_TEN = [
'', 'ezer', 'millió', 'milliárd', 'billió', 'billiárd', 'trillió',
'trilliárd'
]
_FRACTION_STRING_HU = {
2: 'fél',
3: 'harmad',
4: 'negyed',
5: 'ötöd',
6: 'hatod',
7: 'heted',
8: 'nyolcad',
9: 'kilenced',
10: 'tized',
11: 'tizenegyed',
12: 'tizenketted',
13: 'tizenharmad',
14: 'tizennegyed',
15: 'tizenötöd',
16: 'tizenhatod',
17: 'tizenheted',
18: 'tizennyolcad',
19: 'tizenkilenced',
20: 'huszad'
}
# Numbers below 2 thousand are written in one word in Hungarian
# Numbers above 2 thousand are separated by hyphens
# In some circumstances it may better to seperate individual words
# Set _EXTRA_SPACE_HU=" " for separating numbers below 2 thousand (
# orthographically incorrect)
# Set _EXTRA_SPACE_HU="" for correct spelling, this is standard
# _EXTRA_SPACE_HU = " "
_EXTRA_SPACE_HU = ""

View File

@@ -0,0 +1,321 @@
import collections
_SHORT_ORDINAL_STRING_IT = {
1: 'primo',
2: 'secondo',
3: 'terzo',
4: 'quarto',
5: 'quinto',
6: 'sesto',
7: 'settimo',
8: 'ottavo',
9: 'nono',
10: 'decimo',
11: 'undicesimo',
12: 'dodicesimo',
13: 'tredicesimo',
14: 'quattordicesimo',
15: 'quindicesimo',
16: 'sedicesimo',
17: 'diciassettesimo',
18: 'diciottesimo',
19: 'diciannovesimo',
20: 'ventesimo',
30: 'trentesimo',
40: 'quarantesimo',
50: 'cinquantesimo',
60: 'sessantesimo',
70: 'settantesimo',
80: 'ottantesimo',
90: 'novantesimo',
1e2: 'centesimo',
1e3: 'millesimo',
1e6: 'milionesimo',
1e9: 'miliardesimo',
1e12: 'trilionesimo',
1e15: 'quadrilionesimo',
1e18: 'quintilionesim',
1e21: 'sestilionesimo',
1e24: 'settilionesimo',
1e27: 'ottilionesimo',
1e30: 'nonilionesimo',
1e33: 'decilionesimo'
# TODO > 1e-33
}
# per i > 10e12 modificata solo la desinenza: da sistemare a fine debug
_LONG_ORDINAL_STRING_IT = {
1: 'primo',
2: 'secondo',
3: 'terzo',
4: 'quarto',
5: 'quinto',
6: 'sesto',
7: 'settimo',
8: 'ottavo',
9: 'nono',
10: 'decimo',
11: 'undicesimo',
12: 'dodicesimo',
13: 'tredicesimo',
14: 'quattordicesimo',
15: 'quindicesimo',
16: 'sedicesimo',
17: 'diciassettesimo',
18: 'diciottesimo',
19: 'diciannovesimo',
20: 'ventesimo',
30: 'trentesimo',
40: 'quarantesimo',
50: 'cinquantesimo',
60: 'sessantesimo',
70: 'settantesimo',
80: 'ottantesimo',
90: 'novantesimo',
1e2: 'centesimo',
1e3: 'millesimo',
1e6: 'milionesimo',
1e12: 'bilionesimo',
1e18: 'trilionesimo',
1e24: 'quadrilionesimo',
1e30: 'quintilionesimo',
1e36: 'sestilionesimo',
1e42: 'settilionesimo',
1e48: 'ottilionesimo',
1e54: 'nonilionesimo',
1e60: 'decilionesimo'
# TODO > 1e60
}
# Undefined articles ['un', 'una', 'un\''] can not be supressed,
# in Italian, 'un cavallo' means 'a horse' or 'one horse'.
_ARTICLES_IT = ['il', 'lo', 'la', 'i', 'gli', 'le']
_STRING_NUM_IT = {
'zero': 0,
'un': 1,
'uno': 1,
'una': 1,
'un\'': 1,
'due': 2,
'tre': 3,
'quattro': 4,
'cinque': 5,
'sei': 6,
'sette': 7,
'otto': 8,
'nove': 9,
'dieci': 10,
'undici': 11,
'dodici': 12,
'tredici': 13,
'quattordici': 14,
'quindici': 15,
'sedici': 16,
'diciassette': 17,
'diciotto': 18,
'diciannove': 19,
'venti': 20,
'vent': 20,
'trenta': 30,
'trent': 30,
'quaranta': 40,
'quarant': 40,
'cinquanta': 50,
'cinquant': 50,
'sessanta': 60,
'sessant': 60,
'settanta': 70,
'settant': 70,
'ottanta': 80,
'ottant': 80,
'novanta': 90,
'novant': 90,
'cento': 100,
'duecento': 200,
'trecento': 300,
'quattrocento': 400,
'cinquecento': 500,
'seicento': 600,
'settecento': 700,
'ottocento': 800,
'novecento': 900,
'mille': 1000,
'mila': 1000,
'centomila': 100000,
'milione': 1000000,
'miliardo': 1000000000,
'primo': 1,
'secondo': 2,
'mezzo': 0.5,
'mezza': 0.5,
'paio': 2,
'decina': 10,
'decine': 10,
'dozzina': 12,
'dozzine': 12,
'centinaio': 100,
'centinaia': 100,
'migliaio': 1000,
'migliaia': 1000
}
_NUM_STRING_IT = {
0: 'zero',
1: 'uno',
2: 'due',
3: 'tre',
4: 'quattro',
5: 'cinque',
6: 'sei',
7: 'sette',
8: 'otto',
9: 'nove',
10: 'dieci',
11: 'undici',
12: 'dodici',
13: 'tredici',
14: 'quattordici',
15: 'quindici',
16: 'sedici',
17: 'diciassette',
18: 'diciotto',
19: 'diciannove',
20: 'venti',
30: 'trenta',
40: 'quaranta',
50: 'cinquanta',
60: 'sessanta',
70: 'settanta',
80: 'ottanta',
90: 'novanta'
}
_FRACTION_STRING_IT = {
2: 'mezz',
3: 'terz',
4: 'quart',
5: 'quint',
6: 'sest',
7: 'settim',
8: 'ottav',
9: 'non',
10: 'decim',
11: 'undicesim',
12: 'dodicesim',
13: 'tredicesim',
14: 'quattordicesim',
15: 'quindicesim',
16: 'sedicesim',
17: 'diciassettesim',
18: 'diciottesim',
19: 'diciannovesim',
20: 'ventesim'
}
# fonte: http://tulengua.es/numeros-texto/default.aspx
_LONG_SCALE_IT = collections.OrderedDict([
(100, 'cento'),
(1000, 'mila'),
(1000000, 'milioni'),
(1e9, "miliardi"),
(1e12, "bilioni"),
(1e18, 'trilioni'),
(1e24, "quadrilioni"),
(1e30, "quintilioni"),
(1e36, "sestilioni"),
(1e42, "settilioni"),
(1e48, "ottillioni"),
(1e54, "nonillioni"),
(1e60, "decemillioni"),
(1e66, "undicilione"),
(1e72, "dodicilione"),
(1e78, "tredicilione"),
(1e84, "quattordicilione"),
(1e90, "quindicilione"),
(1e96, "sedicilione"),
(1e102, "diciasettilione"),
(1e108, "diciottilione"),
(1e114, "dicianovilione"),
(1e120, "vintilione"),
(1e306, "unquinquagintilione"),
(1e312, "duoquinquagintilione"),
(1e336, "sesquinquagintilione"),
(1e366, "unsexagintilione")
])
_SHORT_SCALE_IT = collections.OrderedDict([
(100, 'cento'),
(1000, 'mila'),
(1000000, 'milioni'),
(1e9, "miliardi"),
(1e12, 'bilioni'),
(1e15, "biliardi"),
(1e18, "trilioni"),
(1e21, "triliardi"),
(1e24, "quadrilioni"),
(1e27, "quadriliardi"),
(1e30, "quintilioni"),
(1e33, "quintiliardi"),
(1e36, "sestilioni"),
(1e39, "sestiliardi"),
(1e42, "settilioni"),
(1e45, "settiliardi"),
(1e48, "ottilioni"),
(1e51, "ottiliardi"),
(1e54, "nonilioni"),
(1e57, "noniliardi"),
(1e60, "decilioni"),
(1e63, "deciliardi"),
(1e66, "undicilioni"),
(1e69, "undiciliardi"),
(1e72, "dodicilioni"),
(1e75, "dodiciliardi"),
(1e78, "tredicilioni"),
(1e81, "trediciliardi"),
(1e84, "quattordicilioni"),
(1e87, "quattordiciliardi"),
(1e90, "quindicilioni"),
(1e93, "quindiciliardi"),
(1e96, "sedicilioni"),
(1e99, "sediciliardi"),
(1e102, "diciassettilioni"),
(1e105, "diciassettiliardi"),
(1e108, "diciottilioni"),
(1e111, "diciottiliardi"),
(1e114, "dicianovilioni"),
(1e117, "dicianoviliardi"),
(1e120, "vintilioni"),
(1e123, "vintiliardi"),
(1e153, "quinquagintillion"),
(1e183, "sexagintillion"),
(1e213, "septuagintillion"),
(1e243, "ottogintilioni"),
(1e273, "nonigintillioni"),
(1e303, "centilioni"),
(1e306, "uncentilioni"),
(1e309, "duocentilioni"),
(1e312, "trecentilioni"),
(1e333, "decicentilioni"),
(1e336, "undicicentilioni"),
(1e363, "viginticentilioni"),
(1e366, "unviginticentilioni"),
(1e393, "trigintacentilioni"),
(1e423, "quadragintacentillion"),
(1e453, "quinquagintacentillion"),
(1e483, "sexagintacentillion"),
(1e513, "septuagintacentillion"),
(1e543, "ctogintacentillion"),
(1e573, "nonagintacentillion"),
(1e603, "ducentillion"),
(1e903, "trecentillion"),
(1e1203, "quadringentillion"),
(1e1503, "quingentillion"),
(1e1803, "sescentillion"),
(1e2103, "septingentillion"),
(1e2403, "octingentillion"),
(1e2703, "nongentillion"),
(1e3003, "millinillion")
])

View File

@@ -0,0 +1,323 @@
# -*- coding: utf-8 -*-
#
# Copyright 2019 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from collections import OrderedDict
from .parse_common import invert_dict
_ARTICLES_NL = {'de', 'het'}
_NUM_STRING_NL = {
0: 'nul',
1: 'een',
2: 'twee',
3: 'drie',
4: 'vier',
5: 'vijf',
6: 'zes',
7: 'zeven',
8: 'acht',
9: 'negen',
10: 'tien',
11: 'elf',
12: 'twaalf',
13: 'dertien',
14: 'veertien',
15: 'vijftien',
16: 'zestien',
17: 'zeventien',
18: 'achttien',
19: 'negentien',
20: 'twintig',
30: 'dertig',
40: 'veertig',
50: 'vijftig',
60: 'zestig',
70: 'zeventig',
80: 'tachtig',
90: 'negentig'
}
_FRACTION_STRING_NL = {
2: 'half',
3: 'derde',
4: 'vierde',
5: 'vijfde',
6: 'zesde',
7: 'zevende',
8: 'achtste',
9: 'negende',
10: 'tiende',
11: 'elfde',
12: 'twaalfde',
13: 'dertiende',
14: 'veertiende',
15: 'vijftiende',
16: 'zestiende',
17: 'zeventiende',
18: 'achttiende',
19: 'negentiende',
20: 'twintigste'
}
_LONG_SCALE_NL = OrderedDict([
(100, 'honderd'),
(1000, 'duizend'),
(1000000, 'miljoen'),
(1e12, "biljoen"),
(1e18, 'triljoen'),
(1e24, "quadriljoen"),
(1e30, "quintillion"),
(1e36, "sextillion"),
(1e42, "septillion"),
(1e48, "octillion"),
(1e54, "nonillion"),
(1e60, "decillion"),
(1e66, "undecillion"),
(1e72, "duodecillion"),
(1e78, "tredecillion"),
(1e84, "quattuordecillion"),
(1e90, "quinquadecillion"),
(1e96, "sedecillion"),
(1e102, "septendecillion"),
(1e108, "octodecillion"),
(1e114, "novendecillion"),
(1e120, "vigintillion"),
(1e306, "unquinquagintillion"),
(1e312, "duoquinquagintillion"),
(1e336, "sesquinquagintillion"),
(1e366, "unsexagintillion")
])
_SHORT_SCALE_NL = OrderedDict([
(100, 'honderd'),
(1000, 'duizend'),
(1000000, 'miljoen'),
(1e9, "miljard"),
(1e12, 'biljoen'),
(1e15, "quadrillion"),
(1e18, "quintiljoen"),
(1e21, "sextiljoen"),
(1e24, "septiljoen"),
(1e27, "octiljoen"),
(1e30, "noniljoen"),
(1e33, "deciljoen"),
(1e36, "undeciljoen"),
(1e39, "duodeciljoen"),
(1e42, "tredeciljoen"),
(1e45, "quattuordeciljoen"),
(1e48, "quinquadeciljoen"),
(1e51, "sedeciljoen"),
(1e54, "septendeciljoen"),
(1e57, "octodeciljoen"),
(1e60, "novendeciljoen"),
(1e63, "vigintiljoen"),
(1e66, "unvigintiljoen"),
(1e69, "uuovigintiljoen"),
(1e72, "tresvigintiljoen"),
(1e75, "quattuorvigintiljoen"),
(1e78, "quinquavigintiljoen"),
(1e81, "qesvigintiljoen"),
(1e84, "septemvigintiljoen"),
(1e87, "octovigintiljoen"),
(1e90, "novemvigintiljoen"),
(1e93, "trigintiljoen"),
(1e96, "untrigintiljoen"),
(1e99, "duotrigintiljoen"),
(1e102, "trestrigintiljoen"),
(1e105, "quattuortrigintiljoen"),
(1e108, "quinquatrigintiljoen"),
(1e111, "sestrigintiljoen"),
(1e114, "septentrigintiljoen"),
(1e117, "octotrigintiljoen"),
(1e120, "noventrigintiljoen"),
(1e123, "quadragintiljoen"),
(1e153, "quinquagintiljoen"),
(1e183, "sexagintiljoen"),
(1e213, "septuagintiljoen"),
(1e243, "octogintiljoen"),
(1e273, "nonagintiljoen"),
(1e303, "centiljoen"),
(1e306, "uncentiljoen"),
(1e309, "duocentiljoen"),
(1e312, "trescentiljoen"),
(1e333, "decicentiljoen"),
(1e336, "undecicentiljoen"),
(1e363, "viginticentiljoen"),
(1e366, "unviginticentiljoen"),
(1e393, "trigintacentiljoen"),
(1e423, "quadragintacentiljoen"),
(1e453, "quinquagintacentiljoen"),
(1e483, "sexagintacentiljoen"),
(1e513, "septuagintacentiljoen"),
(1e543, "ctogintacentiljoen"),
(1e573, "nonagintacentiljoen"),
(1e603, "ducentiljoen"),
(1e903, "trecentiljoen"),
(1e1203, "quadringentiljoen"),
(1e1503, "quingentiljoen"),
(1e1803, "sescentiljoen"),
(1e2103, "septingentiljoen"),
(1e2403, "octingentiljoen"),
(1e2703, "nongentiljoen"),
(1e3003, "milliniljoen")
])
_ORDINAL_STRING_BASE_NL = {
1: 'eerste',
2: 'tweede',
3: 'derde',
4: 'vierde',
5: 'vijfde',
6: 'zesde',
7: 'zevende',
8: 'achtste',
9: 'negende',
10: 'tiende',
11: 'elfde',
12: 'twaalfde',
13: 'dertiende',
14: 'veertiende',
15: 'vijftiende',
16: 'zestiende',
17: 'zeventiende',
18: 'achttiende',
19: 'negentiende',
20: 'twintigste',
30: 'dertigste',
40: "veertigste",
50: "vijftigste",
60: "zestigste",
70: "zeventigste",
80: "tachtigste",
90: "negentigste",
10e3: "honderdste",
1e3: "duizendste"
}
_SHORT_ORDINAL_STRING_NL = {
1e6: "miloenste",
1e9: "miljardste",
1e12: "biljoenste",
1e15: "biljardste",
1e18: "triljoenste",
1e21: "trijardste",
1e24: "quadriljoenste",
1e27: "quadriljardste",
1e30: "quintiljoenste",
1e33: "quintiljardste"
# TODO > 1e-33
}
_SHORT_ORDINAL_STRING_NL.update(_ORDINAL_STRING_BASE_NL)
_LONG_ORDINAL_STRING_NL = {
1e6: "miloenste",
1e9: "miljardste",
1e12: "biljoenste",
1e15: "biljardste",
1e18: "triljoenste",
1e21: "trijardste",
1e24: "quadriljoenste",
1e27: "quadriljardste",
1e30: "quintiljoenste",
1e33: "quintiljardste"
# TODO > 1e60
}
_LONG_ORDINAL_STRING_NL.update(_ORDINAL_STRING_BASE_NL)
# negate next number (-2 = 0 - 2)
_NEGATIVES_NL = {"min", "minus"}
# sum the next number (twenty two = 20 + 2)
_SUMS_NL = {'twintig', '20', 'dertig', '30', 'veertig', '40', 'vijftig', '50',
'zestig', '60', 'zeventig', '70', 'techtig', '80', 'negentig',
'90'}
_MULTIPLIES_LONG_SCALE_NL = set(_LONG_SCALE_NL.values())
_MULTIPLIES_SHORT_SCALE_NL = set(_SHORT_SCALE_NL.values())
# split sentence parse separately and sum ( 2 and a half = 2 + 0.5 )
_FRACTION_MARKER_NL = {"en"}
# decimal marker ( 1 point 5 = 1 + 0.5)
_DECIMAL_MARKER_NL = {"komma", "punt"}
_STRING_NUM_NL = invert_dict(_NUM_STRING_NL)
_STRING_NUM_NL.update({
"half": 0.5,
"driekwart": 0.75,
"anderhalf": 1.5,
"paar": 2
})
_STRING_SHORT_ORDINAL_NL = invert_dict(_SHORT_ORDINAL_STRING_NL)
_STRING_LONG_ORDINAL_NL = invert_dict(_LONG_ORDINAL_STRING_NL)
_MONTHS_NL = ['januari', 'februari', 'maart', 'april', 'mei', 'juni',
'juli', 'augustus', 'september', 'oktober', 'november',
'december']
_NUM_STRING_NL = {
0: 'nul',
1: 'één',
2: 'twee',
3: 'drie',
4: 'vier',
5: 'vijf',
6: 'zes',
7: 'zeven',
8: 'acht',
9: 'negen',
10: 'tien',
11: 'elf',
12: 'twaalf',
13: 'dertien',
14: 'veertien',
15: 'vijftien',
16: 'zestien',
17: 'zeventien',
18: 'actien',
19: 'negentien',
20: 'twintig',
30: 'dertig',
40: 'veertig',
50: 'vijftig',
60: 'zestig',
70: 'zeventig',
80: 'tachtig',
90: 'negentig',
100: 'honderd'
}
# Dutch uses "long scale" https://en.wikipedia.org/wiki/Long_and_short_scales
# Currently, numbers are limited to 1000000000000000000000000,
# but _NUM_POWERS_OF_TEN can be extended to include additional number words
_NUM_POWERS_OF_TEN = [
'', 'duizend', 'miljoen', 'miljard', 'biljoen', 'biljard', 'triljoen',
'triljard'
]
# Numbers below 1 million are written in one word in dutch, yielding very
# long words
# In some circumstances it may better to seperate individual words
# Set _EXTRA_SPACE_NL=" " for separating numbers below 1 million (
# orthographically incorrect)
# Set _EXTRA_SPACE_NL="" for correct spelling, this is standard
# _EXTRA_SPACE_NL = " "
_EXTRA_SPACE_NL = ""

View File

@@ -0,0 +1,497 @@
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from collections import OrderedDict
_NUM_STRING_PL = {
0: 'zero',
1: 'jeden',
2: 'dwa',
3: 'trzy',
4: 'cztery',
5: 'pięć',
6: 'sześć',
7: 'siedem',
8: 'osiem',
9: 'dziewięć',
10: 'dziesięć',
11: 'jedenaście',
12: 'dwanaście',
13: 'trzynaście',
14: 'czternaście',
15: 'piętnaście',
16: 'szesnaście',
17: 'siedemnaście',
18: 'osiemnaście',
19: 'dziewiętnaście',
20: 'dwadzieścia',
30: 'trzydzieści',
40: 'czterdzieści',
50: 'pięćdziesiąt',
60: 'sześćdziesiąt',
70: 'siedemdziesiąt',
80: 'osiemdziesiąt',
90: 'dziewięćdziesiąt',
100: 'sto',
200: 'dwieście',
300: 'trzysta',
400: 'czterysta',
500: 'pięćset',
600: 'sześćset',
700: 'siedemset',
800: 'osiemset',
900: 'dziewięćset',
}
_FRACTION_STRING_PL = {
1: 'jedna',
2: 'druga',
3: 'trzecia',
4: 'czwarta',
5: 'piąta',
6: 'szósta',
7: 'siódma',
8: 'ósma',
9: 'dziewiąta',
10: 'dziesiąta',
11: 'jedenasta',
12: 'dwunasta',
13: 'trzynasta',
14: 'czternasta',
15: 'piętnasta',
16: 'szesnasta',
17: 'siedemnasta',
18: 'osiemnasta',
19: 'dziewiętnasta',
20: 'dwudziesta',
30: 'trzydziesta',
40: 'czterdziesta',
50: 'pięćdziesiąta',
60: 'sześćdziesiąta',
70: 'siedemdziesiąta',
80: 'osiemdziesiąta',
90: 'dziewięćdziesiąta',
100: 'setna',
200: 'dwusetna',
300: 'trzysetna',
400: 'czterysetna',
500: 'pięćsetna',
600: 'sześćsetna',
700: 'siedemsetna',
800: 'osiemsetna',
900: 'dziewięćsetna',
1000: 'tysięczna',
}
_SHORT_SCALE_PL = OrderedDict([
(100, 'sto'),
(200, 'dwieście'),
(300, 'trzysta'),
(400, 'czterysta'),
(500, 'pięćset'),
(600, 'sześćset'),
(700, 'siedemset'),
(800, 'osiemset'),
(900, 'dziewięćset'),
(1000, 'tysiąc'),
(1000000, 'milion'),
(1e9, "miliard"),
(1e12, 'bilion'),
(1e15, "biliard"),
(1e18, "trylion"),
(1e21, "sekstilion"),
(1e24, "kwadrylion"),
(1e27, "kwadryliard"),
(1e30, "kwintylion"),
(1e33, "kwintyliard"),
(1e36, "sekstylion"),
(1e39, "sekstyliard"),
(1e42, "septylion"),
(1e45, "septyliard"),
(1e48, "oktylion"),
(1e51, "oktyliard"),
(1e54, "nonilion"),
(1e57, "noniliard"),
(1e60, "decylion"),
(1e63, "decyliard"),
(1e66, "undecylion"),
(1e69, "undecyliard"),
(1e72, "duodecylion"),
(1e75, "duodecyliard"),
(1e78, "tredecylion"),
(1e81, "tredecyliard"),
(1e84, "kwartyduodecylion"),
(1e87, "kwartyduodecyliard"),
(1e90, "kwintyduodecylion"),
(1e93, "kwintyduodecyliard"),
(1e96, "seksdecylion"),
(1e99, "seksdecyliard"),
(1e102, "septydecylion"),
(1e105, "septydecyliard"),
(1e108, "oktodecylion"),
(1e111, "oktodecyliard"),
(1e114, "nondecylion"),
(1e117, "nondecyliard"),
(1e120, "wigintylion"),
(1e123, "wigintyliard"),
(1e153, "quinquagintylion"),
(1e183, "trycyliard"),
(1e213, "septuagintylion"),
(1e243, "kwadragiliard"),
(1e273, "nonagintylion"),
(1e303, "centezylion"),
(1e306, "uncentylion"),
(1e309, "duocentylion"),
(1e312, "trescentylion"),
(1e333, "decicentylion"),
(1e336, "undecicentylion"),
(1e363, "viginticentylion"),
(1e366, "unviginticentylion"),
(1e393, "trigintacentylion"),
(1e423, "quadragintacentylion"),
(1e453, "quinquagintacentylion"),
(1e483, "sexagintacentylion"),
(1e513, "septuagintacentylion"),
(1e543, "ctogintacentylion"),
(1e573, "nonagintacentylion"),
(1e603, "centyliard"),
(1e903, "trecentylion"),
(1e1203, "quadringentylion"),
(1e1503, "quingentylion"),
(1e1803, "sescentylion"),
(1e2103, "septingentylion"),
(1e2403, "octingentylion"),
(1e2703, "nongentylion"),
(1e3003, "milinylion")
])
_ORDINAL_BASE_PL = {
1: 'pierwszy',
2: 'drugi',
3: 'trzeci',
4: 'czwarty',
5: 'piąty',
6: 'szósty',
7: 'siódmy',
8: 'ósmy',
9: 'dziewiąty',
10: 'dziesiąty',
11: 'jedenasty',
12: 'dwunasty',
13: 'trzynasty',
14: 'czternasty',
15: 'piętnasty',
16: 'szesnasty',
17: 'siedemnasty',
18: 'osiemnasty',
19: 'dziewiętnasty',
20: 'dwudziesty',
30: 'trzydziesty',
40: "czterdziesty",
50: "pięćdziesiąty",
60: "sześćdziesiąty",
70: "siedemdziesiąty",
80: "osiemdziesiąty",
90: "dziewięćdziesiąty",
1e2: "setny",
1e3: "tysięczny"
}
_SHORT_ORDINAL_PL = {
1e6: "milionowy",
1e9: "miliardowy",
1e12: "bilionowy",
1e15: "biliardowy",
1e18: "trylionowy",
1e21: "tryliardowy",
1e24: "kwadrylionowy",
1e27: "kwadryliardowy",
1e30: "kwintylionowy",
1e33: "kwintyliardowy",
1e36: "sektylionowy",
1e42: "septylionowy",
1e48: "oktylionowy",
1e54: "nonylionowy",
1e60: "decylionowy"
# TODO > 1e-33
}
_SHORT_ORDINAL_PL.update(_ORDINAL_BASE_PL)
_ALT_ORDINALS_PL = {
1: 'pierwszej',
2: 'drugiej',
3: 'trzeciej',
4: 'czwartej',
5: 'piątej',
6: 'szóstej',
7: 'siódmej',
8: 'ósmej',
9: 'dziewiątej',
10: 'dziesięcio',
11: 'jedenasto',
12: 'dwunasto',
13: 'trzynasto',
14: 'czternasto',
15: 'piętnasto',
16: 'szesnasto',
17: 'siedemnasto',
18: 'osiemnasto',
19: 'dziewiętnasto',
20: 'dwudziesto',
30: 'trzydziesto',
40: 'czterdziesto',
50: 'pięćdziesiecio',
60: 'sześćdziesięcio',
70: 'siedemdziesięcio',
80: 'osiemdziesięcio',
90: 'dziewięćdziesięcio',
}
_TIME_UNITS_CONVERSION = {
'mikrosekund': 'microseconds',
'mikrosekundy': 'microseconds',
'milisekund': 'milliseconds',
'milisekundy': 'milliseconds',
'sekunda': 'seconds',
'sekundy': 'seconds',
'sekund': 'seconds',
'minuta': 'minutes',
'minuty': 'minutes',
'minut': 'minutes',
'godzina': 'hours',
'godziny': 'hours',
'godzin': 'hours',
'dzień': 'days',
'dni': 'days',
'tydzień': 'weeks',
'tygodni': 'weeks',
'tygodnie': 'weeks',
'tygodniu': 'weeks',
}
_TIME_UNITS_NORMALIZATION = {
'mikrosekunda': 'mikrosekunda',
'mikrosekundę': 'mikrosekunda',
'mikrosekund': 'mikrosekunda',
'mikrosekundy': 'mikrosekunda',
'milisekunda': 'milisekunda',
'milisekundę': 'milisekunda',
'milisekund': 'milisekunda',
'milisekundy': 'milisekunda',
'sekunda': 'sekunda',
'sekundę': 'sekunda',
'sekundy': 'sekunda',
'sekund': 'sekunda',
'minuta': 'minuta',
'minutę': 'minuta',
'minut': 'minuta',
'minuty': 'minuta',
'godzina': 'godzina',
'godzinę': 'godzina',
'godzin': 'godzina',
'godziny': 'godzina',
'dzień': 'dzień',
'dni': 'dzień',
'tydzień': 'tydzień',
'tygodni': 'tydzień',
'tygodnie': 'tydzień',
'tygodniu': 'tydzień',
'miesiąc': 'miesiąc',
'miesiące': 'miesiąc',
'miesięcy': 'miesiąc',
'rok': 'rok',
'lata': 'rok',
'lat': 'rok',
'dekada': 'dekada',
'dekad': 'dekada',
'dekady': 'dekada',
'dekadę': 'dekada',
'wiek': 'wiek',
'wieki': 'wiek',
'milenia': 'milenia',
'milenium': 'milenia',
}
_MONTHS_TO_EN = {
'styczeń': 'January',
'stycznia': 'January',
'luty': 'February',
'lutego': 'February',
'marzec': 'March',
'marca': 'March',
'kwiecień': 'April',
'kwietnia': 'April',
'maj': 'May',
'maja': 'May',
'czerwiec': 'June',
'czerwca': 'June',
'lipiec': 'July',
'lipca': 'July',
'sierpień': 'August',
'sierpnia': 'August',
'wrzesień': 'September',
'września': 'September',
'październik': 'October',
'października': 'October',
'listopad': 'November',
'listopada': 'November',
'grudzień': 'December',
'grudnia': 'December',
}
_DAYS_TO_EN = {
'poniedziałek': 0,
'poniedziałkach': 0,
'poniedziałkami': 0,
'poniedziałki': 0,
'poniedziałkiem': 0,
'poniedziałkom': 0,
'poniedziałkowa': 0,
'poniedziałkową': 0,
'poniedziałkowe': 0,
'poniedziałkowego': 0,
'poniedziałkowej': 0,
'poniedziałkowemu': 0,
'poniedziałkowi': 0,
'poniedziałkowy': 0,
'poniedziałkowych': 0,
'poniedziałkowym': 0,
'poniedziałkowymi': 0,
'poniedziałków': 0,
'poniedziałku': 0,
'wtorek': 1,
'wtorkach': 1,
'wtorkami': 1,
'wtorki': 1,
'wtorkiem': 1,
'wtorkom': 1,
'wtorkowa': 1,
'wtorkową': 1,
'wtorkowe': 1,
'wtorkowego': 1,
'wtorkowej': 1,
'wtorkowemu': 1,
'wtorkowi': 1,
'wtorkowy': 1,
'wtorkowych': 1,
'wtorkowym': 1,
'wtorkowymi': 1,
'wtorków': 1,
'wtorku': 1,
'środa': 2,
'środach': 2,
'środami': 2,
'środą': 2,
'środę': 2,
'środo': 2,
'środom': 2,
'środowa': 2,
'środową': 2,
'środowe': 2,
'środowego': 2,
'środowej': 2,
'środowemu': 2,
'środowi': 2,
'środowy': 2,
'środowych': 2,
'środowym': 2,
'środowymi': 2,
'środy': 2,
'środzie': 2,
'śród': 2,
'czwartek': 3,
'czwartkach': 3,
'czwartkami': 3,
'czwartki': 3,
'czwartkiem': 3,
'czwartkom': 3,
'czwartkowa': 3,
'czwartkową': 3,
'czwartkowe': 3,
'czwartkowego': 3,
'czwartkowej': 3,
'czwartkowemu': 3,
'czwartkowi': 3,
'czwartkowy': 3,
'czwartkowych': 3,
'czwartkowym': 3,
'czwartkowymi': 3,
'czwartków': 3,
'czwartku': 3,
'piątek': 4,
'piątkach': 4,
'piątkami': 4,
'piątki': 4,
'piątkiem': 4,
'piątkom': 4,
'piątkowa': 4,
'piątkową': 4,
'piątkowe': 4,
'piątkowego': 4,
'piątkowej': 4,
'piątkowemu': 4,
'piątkowi': 4,
'piątkowy': 4,
'piątkowych': 4,
'piątkowym': 4,
'piątkowymi': 4,
'piątków': 4,
'piątku': 4,
'sobocie': 5,
'sobota': 5,
'sobotach': 5,
'sobotami': 5,
'sobotą': 5,
'sobotę': 5,
'sobotni': 5,
'sobotnia': 5,
'sobotnią': 5,
'sobotnich': 5,
'sobotnie': 5,
'sobotniego': 5,
'sobotniej': 5,
'sobotniemu': 5,
'sobotnim': 5,
'sobotnimi': 5,
'soboto': 5,
'sobotom': 5,
'soboty': 5,
'sobót': 5,
'niedziel': 6,
'niedziela': 6,
'niedzielach': 6,
'niedzielami': 6,
'niedzielą': 6,
'niedziele': 6,
'niedzielę': 6,
'niedzieli': 6,
'niedzielna': 6,
'niedzielną': 6,
'niedzielne': 6,
'niedzielnego': 6,
'niedzielnej': 6,
'niedzielnemu': 6,
'niedzielni': 6,
'niedzielny': 6,
'niedzielnych': 6,
'niedzielnym': 6,
'niedzielnymi': 6,
'niedzielo': 6,
'niedzielom': 6
}

View File

@@ -0,0 +1,135 @@
_FUNCTION_NOT_IMPLEMENTED_WARNING = "esta função não foi implementada em 'pt'"
# Undefined articles ["um", "uma", "uns", "umas"] can not be supressed,
# in PT, "um cavalo" means "a horse" or "one horse".
_ARTICLES_PT = ["o", "a", "os", "as"]
# word rules for gender
_FEMALE_ENDINGS_PT = ["a", "as"]
_MALE_ENDINGS_PT = ["o", "os"]
# special cases, word lookup for words not covered by above rule
_GENDERS_PT = {
"mulher": "f",
"mulheres": "f",
"homem": "m"
}
# context rules for gender
_MALE_DETERMINANTS_PT = ["o", "os", "este", "estes", "esse", "esses"]
_FEMALE_DETERMINANTS_PT = ["a", "as", "estas", "estas", "essa", "essas"]
_NUMBERS_PT = {
"zero": 0,
"um": 1,
"uma": 1,
"uns": 1,
"umas": 1,
"primeiro": 1,
"segundo": 2,
"terceiro": 3,
"dois": 2,
"duas": 2,
"tres": 3,
"três": 3,
"quatro": 4,
"cinco": 5,
"seis": 6,
"sete": 7,
"oito": 8,
"nove": 9,
"dez": 10,
"onze": 11,
"doze": 12,
"treze": 13,
"catorze": 14,
"quinze": 15,
"dezasseis": 16,
"dezassete": 17,
"dezoito": 18,
"dezanove": 19,
"vinte": 20,
"trinta": 30,
"quarenta": 40,
"cinquenta": 50,
"sessenta": 60,
"setenta": 70,
"oitenta": 80,
"noventa": 90,
"cem": 100,
"cento": 100,
"duzentos": 200,
"duzentas": 200,
"trezentos": 300,
"trezentas": 300,
"quatrocentos": 400,
"quatrocentas": 400,
"quinhentos": 500,
"quinhentas": 500,
"seiscentos": 600,
"seiscentas": 600,
"setecentos": 700,
"setecentas": 700,
"oitocentos": 800,
"oitocentas": 800,
"novecentos": 900,
"novecentas": 900,
"mil": 1000,
"milh�o": 1000000}
_FRACTION_STRING_PT = {
2: 'meio',
3: 'terço',
4: 'quarto',
5: 'quinto',
6: 'sexto',
7: 'sétimo',
8: 'oitavo',
9: 'nono',
10: 'décimo',
11: 'onze avos',
12: 'doze avos',
13: 'treze avos',
14: 'catorze avos',
15: 'quinze avos',
16: 'dezasseis avos',
17: 'dezassete avos',
18: 'dezoito avos',
19: 'dezanove avos',
20: 'vigésimo',
30: 'trigésimo',
100: 'centésimo',
1000: 'milésimo'
}
_NUM_STRING_PT = {
0: 'zero',
1: 'um',
2: 'dois',
3: 'três',
4: 'quatro',
5: 'cinco',
6: 'seis',
7: 'sete',
8: 'oito',
9: 'nove',
10: 'dez',
11: 'onze',
12: 'doze',
13: 'treze',
14: 'catorze',
15: 'quinze',
16: 'dezasseis',
17: 'dezassete',
18: 'dezoito',
19: 'dezanove',
20: 'vinte',
30: 'trinta',
40: 'quarenta',
50: 'cinquenta',
60: 'sessenta',
70: 'setenta',
80: 'oitenta',
90: 'noventa'
}

View File

@@ -0,0 +1,304 @@
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from collections import OrderedDict
_NUM_STRING_RU = {
0: 'ноль',
1: 'один',
2: 'два',
3: 'три',
4: 'четыре',
5: 'пять',
6: 'шесть',
7: 'семь',
8: 'восемь',
9: 'девять',
10: 'десять',
11: 'одиннадцать',
12: 'двенадцать',
13: 'тринадцать',
14: 'четырнадцать',
15: 'пятнадцать',
16: 'шестнадцать',
17: 'семнадцать',
18: 'восемнадцать',
19: 'девятнадцать',
20: 'двадцать',
30: 'тридцать',
40: 'сорок',
50: 'пятьдесят',
60: 'шестьдесят',
70: 'семьдесят',
80: 'восемьдесят',
90: 'девяносто',
100: 'сто',
200: 'двести',
300: 'триста',
400: 'четыреста',
500: 'пятьсот',
600: 'шестьсот',
700: 'семьсот',
800: 'восемьсот',
900: 'девятьсот'
}
_FRACTION_STRING_RU = {
2: 'половина',
3: 'треть',
4: 'четверть',
5: 'пятая',
6: 'шестая',
7: 'седьмая',
8: 'восьмая',
9: 'девятая',
10: 'десятая',
11: 'одиннадцатая',
12: 'двенадцатая',
13: 'тринадцатая',
14: 'четырнадцатая',
15: 'пятнадцатая',
16: 'шестнадцатая',
17: 'семнадцатая',
18: 'восемнадцатая',
19: 'девятнадцатая',
20: 'двадцатая',
30: 'тридцатая',
40: 'сороковая',
50: 'пятидесятая',
60: 'шестидесятая',
70: 'семидесятая',
80: 'восьмидесятая',
90: 'девяностая',
1e2: 'сотая',
1e3: 'тысячная',
1e6: 'миллионная',
1e9: 'миллиардная'
}
_SHORT_SCALE_RU = OrderedDict([
(1e3, 'тысяча'),
(1e6, "миллион"),
(1e9, "миллиард"),
(1e12, "триллион"),
(1e15, "квадриллион"),
(1e18, "квинтиллион"),
(1e21, "секстиллион"),
(1e24, "септиллион"),
(1e27, "октиллион"),
(1e30, "нониллион"),
(1e33, "дециллион"),
(1e36, "ундециллион"),
(1e39, "дуодециллион"),
(1e42, "тредециллион"),
(1e45, "кваттордециллион"),
(1e48, "квиндециллион"),
(1e51, "сексдециллион"),
(1e54, "септендециллион"),
(1e57, "октодециллион"),
(1e60, "новемдециллион"),
(1e63, "вигинтиллион"),
(1e66, "унвигинтиллион"),
(1e69, "дуовигинтиллион"),
(1e72, "тревигинтиллион"),
(1e75, "кватторвигинтиллион"),
(1e78, "квинвигинтиллион"),
(1e81, "секснвигинтиллион"),
(1e84, "септенвигинтиллион"),
(1e87, "октовигинтиллион"),
(1e90, "новемвигинтиллион"),
(1e93, "тригинтиллион"),
])
_LONG_SCALE_RU = OrderedDict([
(1e3, 'тысяча'),
(1e6, "миллион"),
(1e9, "миллиард"),
(1e12, "биллион"),
(1e15, "биллиард"),
(1e18, "триллион"),
(1e21, "триллиард"),
(1e24, "квадриллион"),
(1e27, "квадриллиард"),
(1e30, "квинтиллион"),
(1e33, "квинтиллиард"),
(1e36, "секстиллион"),
(1e39, "секстиллиард"),
(1e42, "септиллион"),
(1e45, "септиллиард"),
(1e48, "октиллион"),
(1e51, "октиллиард"),
(1e54, "нониллион"),
(1e57, "нониллиард"),
(1e60, "дециллион"),
(1e63, "дециллиард"),
(1e66, "ундециллион"),
(1e72, "дуодециллион"),
(1e78, "тредециллион"),
(1e84, "кваттордециллион"),
(1e90, "квиндециллион"),
(1e96, "сексдециллион"),
(1e102, "септендециллион"),
(1e108, "октодециллион"),
(1e114, "новемдециллион"),
(1e120, "вигинтиллион"),
])
_ORDINAL_BASE_RU = {
1: 'первый',
2: 'второй',
3: 'третий',
4: 'четвёртый',
5: 'пятый',
6: 'шестой',
7: 'седьмой',
8: 'восьмой',
9: 'девятый',
10: 'десятый',
11: 'одиннадцатый',
12: 'двенадцатый',
13: 'тринадцатый',
14: 'четырнадцатый',
15: 'пятнадцатый',
16: 'шестнадцатый',
17: 'семнадцатый',
18: 'восемнадцатый',
19: 'девятнадцатый',
20: 'двадцатый',
30: 'тридцатый',
40: "сороковой",
50: "пятидесятый",
60: "шестидесятый",
70: "семидесятый",
80: "восьмидесятый",
90: "девяностый",
1e2: "сотый",
2e2: "двухсотый",
3e2: "трёхсотый",
4e2: "четырёхсотый",
5e2: "пятисотый",
6e2: "шестисотый",
7e2: "семисотый",
8e2: "восьмисотый",
9e2: "девятисотый",
1e3: "тысячный"
}
_SHORT_ORDINAL_RU = {
1e6: "миллион",
1e9: "миллиард",
1e12: "триллион",
1e15: "квадриллион",
1e18: "квинтиллион",
1e21: "секстиллион",
1e24: "септиллион",
1e27: "октиллион",
1e30: "нониллион",
1e33: "дециллион",
1e36: "ундециллион",
1e39: "дуодециллион",
1e42: "тредециллион",
1e45: "кваттордециллион",
1e48: "квиндециллион",
1e51: "сексдециллион",
1e54: "септендециллион",
1e57: "октодециллион",
1e60: "новемдециллион",
1e63: "вигинтиллион"
}
_SHORT_ORDINAL_RU.update(_ORDINAL_BASE_RU)
_LONG_ORDINAL_RU = {
1e6: "миллион",
1e9: "миллиард",
1e12: "биллион",
1e15: "биллиард",
1e18: "триллион",
1e21: "триллиард",
1e24: "квадриллион",
1e27: "квадриллиард",
1e30: "квинтиллион",
1e33: "квинтиллиард",
1e36: "секстиллион",
1e39: "секстиллиард",
1e42: "септиллион",
1e45: "септиллиард",
1e48: "октиллион",
1e51: "октиллиард",
1e54: "нониллион",
1e57: "нониллиард",
1e60: "дециллион",
1e63: "дециллиард",
1e66: "ундециллион",
1e72: "дуодециллион",
1e78: "тредециллион",
1e84: "кваттордециллион",
1e90: "квиндециллион",
1e96: "сексдециллион",
1e102: "септендециллион",
1e108: "октодециллион",
1e114: "новемдециллион",
1e120: "вигинтиллион"
}
_LONG_ORDINAL_RU.update(_ORDINAL_BASE_RU)
# Months
_MONTHS_CONVERSION = {
0: "january",
1: "february",
2: "march",
3: "april",
4: "may",
5: "june",
6: "july",
7: "august",
8: "september",
9: "october",
10: "november",
11: "december"
}
_MONTHS_RU = ['январь', 'февраль', 'март', 'апрель', 'май', 'июнь',
'июль', 'август', 'сентябрь', 'октябрь', 'ноябрь',
'декабрь']
# Time
_TIME_UNITS_CONVERSION = {
'микросекунд': 'microseconds',
'милисекунд': 'milliseconds',
'секунда': 'seconds',
'секунды': 'seconds',
'секунд': 'seconds',
'минута': 'minutes',
'минуты': 'minutes',
'минут': 'minutes',
'час': 'hours',
'часа': 'hours',
'часов': 'hours',
'день': 'days',
'дня': 'days',
'дней': 'days',
'неделя': 'weeks',
'недели': 'weeks',
'недель': 'weeks'
}

View File

@@ -0,0 +1,173 @@
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an 'AS IS' BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from collections import OrderedDict
_ARTICLES_SL = {}
_NUM_STRING_SL = {
0: 'nič',
1: 'ena',
2: 'dve',
3: 'tri',
4: 'štiri',
5: 'pet',
6: 'šest',
7: 'sedem',
8: 'osem',
9: 'devet',
10: 'deset',
11: 'enajst',
12: 'dvanajst',
13: 'trinajst',
14: 'štirinajst',
15: 'petnajst',
16: 'šestnajst',
17: 'sedemnajst',
18: 'osemnajst',
19: 'devetnajst',
20: 'dvajset',
30: 'trideset',
40: 'štirideset',
50: 'petdeset',
60: 'šestdeset',
70: 'sedemdeset',
80: 'osemdeset',
90: 'devetdeset'
}
_FRACTION_STRING_SL = {
2: 'polovica',
3: 'tretjina',
4: 'četrtina',
5: 'petina',
6: 'šestina',
7: 'sedmina',
8: 'osmina',
9: 'devetina',
10: 'desetina',
11: 'enajstina',
12: 'dvanajstina',
13: 'trinajstina',
14: 'štirinajstina',
15: 'petnajstina',
16: 'šestnajstina',
17: 'sedemnajstina',
18: 'osemnajstina',
19: 'devetnajstina',
20: 'dvajsetina'
}
_LONG_SCALE_SL = OrderedDict([
(100, 'sto'),
(1000, 'tisoč'),
(1000000, 'milijon'),
(1e12, 'bilijon'),
(1e18, 'trilijon'),
(1e24, 'kvadrilijon'),
(1e30, 'kvintilijon'),
(1e36, 'sekstilijon'),
(1e42, 'septilijon'),
(1e48, 'oktilijon'),
(1e54, 'nonilijon'),
(1e60, 'decilijon')
# TODO > 1e63
])
_SHORT_SCALE_SL = OrderedDict([
(100, 'sto'),
(1000, 'tisoč'),
(1000000, 'milijon'),
(1e9, 'bilijon'),
(1e12, 'trilijon'),
(1e15, 'kvadrilijon'),
(1e18, 'kvintilijon'),
(1e21, 'sekstilijon'),
(1e24, 'septilijon'),
(1e27, 'oktilijon'),
(1e30, 'nonilijon'),
(1e33, 'decilijon')
# TODO > 1e33
])
_ORDINAL_BASE_SL = {
1: 'prvi',
2: 'drugi',
3: 'tretji',
4: 'četrti',
5: 'peti',
6: 'šesti',
7: 'sedmi',
8: 'osmi',
9: 'deveti',
10: 'deseti',
11: 'enajsti',
12: 'dvanajsti',
13: 'trinajsti',
14: 'štirinajsti',
15: 'petnajsti',
16: 'šestnajsti',
17: 'sedemnajsti',
18: 'osemnajsti',
19: 'devetnajsti',
20: 'dvajseti',
30: 'trideseti',
40: 'štirideseti',
50: 'petdeseti',
60: 'šestdeseti',
70: 'sedemdeseti',
80: 'osemdeseti',
90: 'devetdeseti',
1e2: 'stoti',
1e3: 'tisoči'
}
_LONG_ORDINAL_SL = {
1e6: 'milijonti',
1e12: 'bilijonti',
1e18: 'trilijonti',
1e24: 'kvadrilijonti',
1e30: 'kvintiljonti',
1e36: 'sekstilijonti',
1e42: 'septilijonti',
1e48: 'oktilijonti',
1e54: 'nonilijonti',
1e60: 'decilijonti'
# TODO > 1e60
}
_LONG_ORDINAL_SL.update(_ORDINAL_BASE_SL)
_SHORT_ORDINAL_SL = {
1e6: 'milijonti',
1e9: 'bilijonti',
1e12: 'trilijonti',
1e15: 'kvadrilijonti',
1e18: 'kvintiljonti',
1e21: 'sekstilijonti',
1e24: 'septilijonti',
1e27: 'oktilijonti',
1e30: 'nonilijonti',
1e33: 'decilijonti'
# TODO > 1e33
}
_SHORT_ORDINAL_SL.update(_ORDINAL_BASE_SL)

View File

@@ -0,0 +1,72 @@
_FUNCTION_NOT_IMPLEMENTED_WARNING = "Denna funktion har inte implementerats i 'sv'"
_MONTHS_SV = ['januari', 'februari', 'mars', 'april', 'maj', 'juni',
'juli', 'augusti', 'september', 'oktober', 'november',
'december']
_NUM_STRING_SV = {
0: 'noll',
1: 'en',
2: 'två',
3: 'tre',
4: 'fyra',
5: 'fem',
6: 'sex',
7: 'sju',
8: 'åtta',
9: 'nio',
10: 'tio',
11: 'elva',
12: 'tolv',
13: 'tretton',
14: 'fjorton',
15: 'femton',
16: 'sexton',
17: 'sjutton',
18: 'arton',
19: 'nitton',
20: 'tjugo',
30: 'trettio',
40: 'fyrtio',
50: 'femtio',
60: 'sextio',
70: 'sjuttio',
80: 'åttio',
90: 'nittio',
100: 'hundra'
}
_NUM_POWERS_OF_TEN_SV = [
'hundra',
'tusen',
'miljon',
'miljard',
'biljon',
'biljard',
'triljon',
'triljard'
]
_FRACTION_STRING_SV = {
2: 'halv',
3: 'tredjedel',
4: 'fjärdedel',
5: 'femtedel',
6: 'sjättedel',
7: 'sjundedel',
8: 'åttondel',
9: 'niondel',
10: 'tiondel',
11: 'elftedel',
12: 'tolftedel',
13: 'trettondel',
14: 'fjortondel',
15: 'femtondel',
16: 'sextondel',
17: 'sjuttondel',
18: 'artondel',
19: 'nittondel',
20: 'tjugondel'
}
_EXTRA_SPACE_SV = " "

View File

@@ -0,0 +1,596 @@
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from lingua_franca.lang.format_common import convert_to_mixed_fraction
from lingua_franca.lang.common_data_ca import _FRACTION_STRING_CA, \
_NUM_STRING_CA
from lingua_franca.internal import lookup_variant
from enum import IntEnum
class TimeVariantCA(IntEnum):
DEFAULT = 0
BELL = 1
FULL_BELL = 2
SPANISH_LIKE = 3
def nice_number_ca(number, speech, denominators=range(1, 21)):
""" Catalan helper for nice_number
This function formats a float to human understandable functions. Like
4.5 becomes "4 i mig" for speech and "4 1/2" for text
Args:
number (int or float): the float to format
speech (bool): format for speech (True) or display (False)
denominators (iter of ints): denominators to use, default [1 .. 20]
Returns:
(str): The formatted string.
"""
result = convert_to_mixed_fraction(number, denominators)
if not result:
# Give up, just represent as a 3 decimal number
return str(round(number, 3))
whole, num, den = result
if not speech:
if num == 0:
# TODO: Number grouping? E.g. "1,000,000"
return str(whole)
else:
return '{} {}/{}'.format(whole, num, den)
if num == 0:
return str(whole)
# denominador
den_str = _FRACTION_STRING_CA[den]
# fraccions
if whole == 0:
if num == 1:
# un desè
return_string = 'un {}'.format(den_str)
else:
# tres mig
return_string = '{} {}'.format(num, den_str)
# inteiros >10
elif num == 1:
# trenta-un
return_string = '{}-{}'.format(whole, den_str)
# inteiros >10 com fracções
else:
# vint i 3 desens
return_string = '{} i {} {}'.format(whole, num, den_str)
# plural
if num > 1:
return_string += 's'
return return_string
def pronounce_number_ca(number, places=2):
"""
Convert a number to it's spoken equivalent
For example, '5.2' would return 'cinc coma dos'
Args:
number(float or int): the number to pronounce (under 100)
places(int): maximum decimal places to speak
Returns:
(str): The pronounced number
"""
if abs(number) >= 100:
# TODO: Support n > 100
return str(number)
result = ""
if number < 0:
result = "menys "
number = abs(number)
if number >= 20:
tens = int(number - int(number) % 10)
ones = int(number - tens)
result += _NUM_STRING_CA[tens]
if ones > 0:
if tens == 20:
result += "-i-" + _NUM_STRING_CA[ones]
else:
result += "-" + _NUM_STRING_CA[ones]
else:
result += _NUM_STRING_CA[int(number)]
# Deal with decimal part, in Catalan is commonly used the comma
# instead the dot. Decimal part can be written both with comma
# and dot, but when pronounced, its pronounced "coma"
if not number == int(number) and places > 0:
if abs(number) < 1.0 and (result == "menys " or not result):
result += "zero"
result += " coma"
_num_str = str(number)
_num_str = _num_str.split(".")[1][0:places]
for char in _num_str:
result += " " + _NUM_STRING_CA[int(char)]
return result
@lookup_variant({
"default": TimeVariantCA.DEFAULT,
"traditional": TimeVariantCA.FULL_BELL,
"bell": TimeVariantCA.BELL,
"full_bell": TimeVariantCA.FULL_BELL,
"spanish": TimeVariantCA.SPANISH_LIKE
})
def nice_time_ca(dt, speech=True, use_24hour=False, use_ampm=False,
variant=None):
"""
Format a time to a comfortable human format
For example, generate 'cinc trenta' for speech or '5:30' for
text display.
Args:
dt (datetime): date to format (assumes already in local timezone)
speech (bool): format for speech (default/True) or display (False)=Fal
use_24hour (bool): output in 24-hour/military or 12-hour format
use_ampm (bool): include the am/pm for 12-hour format
Returns:
(str): The formatted time string
"""
variant = variant or TimeVariantCA.DEFAULT
if use_24hour:
# e.g. "03:01" or "14:22"
string = dt.strftime("%H:%M")
else:
if use_ampm:
# e.g. "3:01 AM" or "2:22 PM"
string = dt.strftime("%I:%M %p")
else:
# e.g. "3:01" or "2:22"
string = dt.strftime("%I:%M")
if string[0] == '0':
string = string[1:] # strip leading zeros
if not speech:
return string
# Generate a speakable version of the time
speak = ""
if variant == TimeVariantCA.BELL:
# Bell Catalan Time System
# https://en.wikipedia.org/wiki/Catalan_time_system
if dt.minute < 7:
next_hour = False
elif dt.minute == 7 or dt.minute == 8:
speak += "mig quart"
next_hour = True
elif dt.minute < 15:
next_hour = False
elif dt.minute == 15:
speak += "un quart"
next_hour = True
elif dt.minute == 16:
speak += "un quart i un minut"
next_hour = True
elif dt.minute < 21:
speak += "un quart i " + pronounce_number_ca(
dt.minute - 15) + " minuts"
next_hour = True
elif dt.minute == 22 or dt.minute == 23:
speak += "un quart i mig"
next_hour = True
elif dt.minute < 30:
speak += "un quart i " + pronounce_number_ca(
dt.minute - 15) + " minuts"
next_hour = True
elif dt.minute == 30:
speak += "dos quarts"
next_hour = True
elif dt.minute == 31:
speak += "dos quarts i un minut"
next_hour = True
elif dt.minute < 37:
speak += "dos quarts i " + pronounce_number_ca(
dt.minute - 30) + " minuts"
next_hour = True
elif dt.minute == 37 or dt.minute == 38:
speak += "dos quarts i mig"
next_hour = True
elif dt.minute < 45:
speak += "dos quarts i " + pronounce_number_ca(
dt.minute - 30) + " minuts"
next_hour = True
elif dt.minute == 45:
speak += "tres quarts"
next_hour = True
elif dt.minute == 46:
speak += "tres quarts i un minut"
next_hour = True
elif dt.minute < 52:
speak += "tres quarts i " + pronounce_number_ca(
dt.minute - 45) + " minuts"
next_hour = True
elif dt.minute == 52 or dt.minute == 53:
speak += "tres quarts i mig"
next_hour = True
elif dt.minute > 53:
speak += "tres quarts i " + pronounce_number_ca(
dt.minute - 45) + " minuts"
next_hour = True
if next_hour == True:
next_hour = (dt.hour + 1) % 12
if next_hour == 0:
speak += " de dotze"
if dt.hour == 11:
speak += " del migdia"
else:
speak += " de la nit"
elif next_hour == 1:
speak += " d'una"
if dt.hour == 12:
speak += " de la tarda"
else:
speak += " de la matinada"
elif next_hour == 2:
speak += "de dues"
if dt.hour == 13:
speak += " de la tarda"
else:
speak += " de la nit"
elif next_hour == 11:
speak += "d'onze"
if dt.hour == 22:
speak += " de la nit"
else:
speak += " del matí"
else:
speak += "de " + pronounce_number_ca(next_hour)
if dt.hour == 0 and dt.hour < 5:
speak += " de la matinada"
elif dt.hour >= 5 and dt.hour < 11:
speak += " del matí"
elif dt.hour == 11:
speak += " del migdia"
elif dt.hour >= 12 and dt.hour <= 17:
speak += " de la tarda"
elif dt.hour >= 18 and dt.hour < 20:
speak += " del vespre"
elif dt.hour >= 21 and dt.hour <= 23:
speak += " de la nit"
else:
hour = dt.hour % 12
if hour == 0:
speak += "les dotze"
elif hour == 1:
speak += "la una"
elif hour == 2:
speak += "les dues"
else:
speak += "les " + pronounce_number_ca(hour)
if dt.minute == 0:
speak += " en punt"
elif dt.minute == 1:
speak += " i un minut"
else:
speak += " i " + pronounce_number_ca(dt.minute) + " minuts"
if dt.hour == 0:
speak += " de la nit"
elif dt.hour >= 1 and dt.hour < 6:
speak += " de la matinada"
elif dt.hour >= 6 and dt.hour < 11:
speak += " del matí"
elif dt.hour == 12:
speak += " del migdia"
elif dt.hour >= 13 and dt.hour < 19:
speak += " de la tarda"
elif dt.hour >= 19 and dt.hour < 21:
speak += " del vespre"
elif dt.hour >= 21 and dt.hour <= 23:
speak += " de la nit"
elif variant == TimeVariantCA.FULL_BELL:
# Full Bell Catalan Time System
# https://en.wikipedia.org/wiki/Catalan_time_system
if dt.minute < 2:
# en punt
next_hour = False
if dt.minute < 5:
# tocades
next_hour = False
elif dt.minute < 7:
# ben tocades
next_hour = False
elif dt.minute < 9:
# mig quart
speak += "mig quart"
next_hour = True
elif dt.minute < 12:
# mig quart passat
speak += "mig quart passat"
next_hour = True
elif dt.minute < 14:
# mig quart passat
speak += "mig quart ben passat"
next_hour = True
elif dt.minute < 17:
speak += "un quart"
next_hour = True
elif dt.minute < 20:
speak += "un quart tocat"
next_hour = True
elif dt.minute < 22:
speak += "un quart ben tocat"
next_hour = True
elif dt.minute < 24:
speak += "un quart i mig"
next_hour = True
elif dt.minute < 27:
speak += "un quart i mig passat"
next_hour = True
elif dt.minute < 29:
speak += "un quart i mig ben passat"
next_hour = True
elif dt.minute < 32:
speak += "dos quarts"
next_hour = True
elif dt.minute < 35:
speak += "dos quarts tocats"
next_hour = True
elif dt.minute < 37:
speak += "dos quarts ben tocats"
next_hour = True
elif dt.minute < 39:
speak += "dos quarts i mig"
next_hour = True
elif dt.minute < 42:
speak += "dos quarts i mig passats"
next_hour = True
elif dt.minute < 44:
speak += "dos quarts i mig ben passats"
next_hour = True
elif dt.minute < 47:
speak += "tres quarts"
next_hour = True
elif dt.minute < 50:
speak += "tres quarts tocats"
next_hour = True
elif dt.minute < 52:
speak += "tres quarts ben tocats"
next_hour = True
elif dt.minute < 54:
speak += "tres quarts i mig"
next_hour = True
elif dt.minute < 57:
speak += "tres quarts i mig passats"
next_hour = True
elif dt.minute < 59:
speak += "tres quarts i mig ben passats"
next_hour = True
elif dt.minute == 59:
next_hour = False
if next_hour == True:
next_hour = (dt.hour + 1) % 12
if next_hour == 0:
speak += " de dotze"
if dt.hour == 11:
speak += " del migdia"
else:
speak += " de la nit"
elif next_hour == 1:
speak += " d'una"
if dt.hour == 12:
speak += " de la tarda"
else:
speak += " de la matinada"
elif next_hour == 2:
speak += "de dues"
if dt.hour == 13:
speak += " de la tarda"
else:
speak += " de la nit"
elif next_hour == 11:
speak += "d'onze"
if dt.hour == 22:
speak += " de la nit"
else:
speak += " del matí"
else:
speak += "de " + pronounce_number_ca(next_hour)
if dt.hour == 0 and dt.hour < 5:
speak += " de la matinada"
elif dt.hour >= 5 and dt.hour < 11:
speak += " del matí"
elif dt.hour == 11:
speak += " del migdia"
elif dt.hour >= 12 and dt.hour <= 17:
speak += " de la tarda"
elif dt.hour >= 18 and dt.hour < 20:
speak += " del vespre"
elif dt.hour >= 21 and dt.hour <= 23:
speak += " de la nit"
else:
hour = dt.hour % 12
if dt.minute == 59:
hour = (hour + 1) % 12
if hour == 0:
speak += "les dotze"
elif hour == 1:
speak += "la una"
elif hour == 2:
speak += "les dues"
else:
speak += "les " + pronounce_number_ca(hour)
if dt.minute == 0:
speak += " en punt"
elif dt.minute > 1 and dt.minute < 5:
if hour == 1:
speak += " tocada"
else:
speak += " tocades"
elif dt.minute < 7:
if hour == 1:
speak += " ben tocada"
else:
speak += " ben tocades"
if dt.hour == 0:
if hour == 1:
speak += " de la matinada"
else:
speak += " de la nit"
elif dt.hour < 6:
if hour == 6:
speak += " del matí"
else:
speak += " de la matinada"
elif dt.hour < 12:
if hour == 12:
speak += " del migdia"
else:
speak += " del matí"
elif dt.hour == 12:
if hour == 1:
speak += " de la tarda"
else:
speak += " del migdia"
elif dt.hour < 19:
if hour == 7:
speak += " del vespre"
else:
speak += " de la tarda"
elif dt.hour < 21:
if hour == 9:
speak += " de la nit"
else:
speak += " del vespre"
elif dt.hour <= 23:
speak += " de la nit"
elif variant == TimeVariantCA.SPANISH_LIKE:
# Prepare for "tres menys quart" ??
if dt.minute == 35:
minute = -25
hour = dt.hour + 1
elif dt.minute == 40:
minute = -20
hour = dt.hour + 1
elif dt.minute == 45:
minute = -15
hour = dt.hour + 1
elif dt.minute == 50:
minute = -10
hour = dt.hour + 1
elif dt.minute == 55:
minute = -5
hour = dt.hour + 1
else:
minute = dt.minute
hour = dt.hour
if hour == 0 or hour == 12:
speak += "les dotze"
elif hour == 1 or hour == 13:
speak += "la una"
elif hour < 13:
speak = "les " + pronounce_number_ca(hour)
else:
speak = "les " + pronounce_number_ca(hour - 12)
if minute != 0:
# les hores especials
if minute == 15:
speak += " i quart"
elif minute == 30:
speak += " i mitja"
elif minute == -15:
speak += " menys quart"
else: # sis i nou. set i veint-i-cinc
if minute > 0:
speak += " i " + pronounce_number_ca(minute)
else: # si son las set menys vint, no posem la "i"
speak += " " + pronounce_number_ca(minute)
# Default Watch Time Sytem
else:
if use_24hour:
# simply speak the number
if dt.hour == 1:
speak += "la una"
elif dt.hour == 2:
speak += "les dues"
elif dt.hour == 21:
speak += "les vint-i-una"
elif dt.hour == 22:
speak += "les vint-i-dues"
else:
speak += "les " + pronounce_number_ca(dt.hour)
if dt.minute > 0:
speak += " i " + pronounce_number_ca(dt.minute)
else:
# speak number and add daytime identifier
# (equivalent to "in the morning")
if dt.hour == 0:
speak += "les dotze"
# 1 and 2 are pronounced in female form when talking about hours
elif dt.hour == 1 or dt.hour == 13:
speak += "la una"
elif dt.hour == 2 or dt.hour == 14:
speak += "les dues"
elif dt.hour < 13:
speak = "les " + pronounce_number_ca(dt.hour)
else:
speak = "les " + pronounce_number_ca(dt.hour - 12)
# exact time
if dt.minute == 0:
# 3:00
speak += " en punt"
# else
else:
speak += " i " + pronounce_number_ca(dt.minute)
# TODO: review day-periods
if use_ampm:
if dt.hour == 0:
speak += " de la nit"
elif dt.hour >= 1 and dt.hour < 6:
speak += " de la matinada"
elif dt.hour >= 6 and dt.hour < 12:
speak += " del matí"
elif dt.hour == 12:
speak += " del migdia"
elif dt.hour >= 13 and dt.hour <= 18:
speak += " de la tarda"
elif dt.hour >= 19 and dt.hour < 21:
speak += " del vespre"
elif dt.hour != 0 and dt.hour != 12:
speak += " de la nit"
return speak

View File

@@ -0,0 +1,47 @@
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
def convert_to_mixed_fraction(number, denominators=range(1, 21)):
"""
Convert floats to components of a mixed fraction representation
Returns the closest fractional representation using the
provided denominators. For example, 4.500002 would become
the whole number 4, the numerator 1 and the denominator 2
Args:
number (float): number for convert
denominators (iter of ints): denominators to use, default [1 .. 20]
Returns:
whole, numerator, denominator (int): Integers of the mixed fraction
"""
int_number = int(number)
if int_number == number:
return int_number, 0, 1 # whole number, no fraction
frac_number = abs(number - int_number)
if not denominators:
denominators = range(1, 21)
for denominator in denominators:
numerator = abs(frac_number) * denominator
if abs(numerator - round(numerator)) < 0.01: # 0.01 accuracy
break
else:
return None
return int_number, int(round(numerator)), denominator

View File

@@ -0,0 +1,389 @@
# -*- coding: utf-8 -*-
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from lingua_franca.lang.format_common import convert_to_mixed_fraction
from lingua_franca.lang.common_data_cs import _NUM_STRING_CS, \
_FRACTION_STRING_CS, _LONG_SCALE_CS, _SHORT_SCALE_CS, _SHORT_ORDINAL_CS, _LONG_ORDINAL_CS
def nice_number_cs(number, speech=True, denominators=range(1, 21)):
""" English helper for nice_number
This function formats a float to human understandable functions. Like
4.5 becomes "4 and a half" for speech and "4 1/2" for text
Args:
number (int or float): the float to format
speech (bool): format for speech (True) or display (False)
denominators (iter of ints): denominators to use, default [1 .. 20]
Returns:
(str): The formatted string.
"""
result = convert_to_mixed_fraction(number, denominators)
if not result:
# Give up, just represent as a 3 decimal number
return str(round(number, 3))
whole, num, den = result
if not speech:
if num == 0:
# TODO: Number grouping? E.g. "1,000,000"
return str(whole)
else:
return '{} {}/{}'.format(whole, num, den)
if num == 0:
return str(whole)
den_str = _FRACTION_STRING_CS[den]
if whole == 0:
if num == 1:
return_string = '{}'.format(den_str)
else:
return_string = '{} {}'.format(num, den_str)
elif num == 1:
return_string = '{} a {}'.format(whole, den_str)
else:
return_string = '{} a {} {}'.format(whole, num, den_str)
if num > 4:
return_string = return_string[:-1]
elif num > 1:
return_string = return_string[:-1] + 'y'
return return_string
def pronounce_number_cs(number, places=2, short_scale=True, scientific=False,
ordinals=False):
"""
Convert a number to it's spoken equivalent
For example, '5.2' would return 'five point two'
Args:
num(float or int): the number to pronounce (under 100)
places(int): maximum decimal places to speak
short_scale (bool) : use short (True) or long scale (False)
https://en.wikipedia.org/wiki/Names_of_large_numbers
scientific (bool): pronounce in scientific notation
ordinals (bool): pronounce in ordinal form "first" instead of "one"
Returns:
(str): The pronounced number
"""
num = number
# deal with infinity
if num == float("inf"):
return "nekonečno"
elif num == float("-inf"):
return "záporné nekonečno"
if scientific:
number = '%E' % num
n, power = number.replace("+", "").split("E")
power = int(power)
if power != 0:
if ordinals:
# This handles zápornés of powers separately from the normal
# handling since each call disables the scientific flag
return '{}{} krát deset k {}{} mocnině'.format(
'záporné ' if float(n) < 0 else '',
pronounce_number_cs(
abs(float(n)), places, short_scale, False, ordinals=False),
'záporné ' if power < 0 else '',
pronounce_number_cs(abs(power), places, short_scale, False, ordinals=True))
else:
# This handles zápornés of powers separately from the normal
# handling since each call disables the scientific flag
return '{}{} krát deset na mocninu {}{}'.format(
'záporné ' if float(n) < 0 else '',
pronounce_number_cs(
abs(float(n)), places, short_scale, False),
'záporné ' if power < 0 else '',
pronounce_number_cs(abs(power), places, short_scale, False))
if short_scale:
number_names = _NUM_STRING_CS.copy()
number_names.update(_SHORT_SCALE_CS)
else:
number_names = _NUM_STRING_CS.copy()
number_names.update(_LONG_SCALE_CS)
digits = [number_names[n] for n in range(0, 20)]
tens = [number_names[n] for n in range(10, 100, 10)]
if short_scale:
hundreds = [_SHORT_SCALE_CS[n] for n in _SHORT_SCALE_CS.keys()]
else:
hundreds = [_LONG_SCALE_CS[n] for n in _LONG_SCALE_CS.keys()]
# deal with zápornés
result = ""
if num < 0:
result = "záporné " if scientific else "mínus "
num = abs(num)
if not ordinals:
try:
# deal with 4 digits
# usually if it's a 4 digit num it should be said like a date
# i.e. 1972 => nineteen seventy two
if len(str(num)) == 4 and isinstance(num, int):
_num = str(num)
# deal with 1000, 2000, 2001, 2100, 3123, etc
# is skipped as the rest of the
# functin deals with this already
if _num[1:4] == '000' or _num[1:3] == '00' or int(_num[0:2]) >= 20:
pass
# deal with 1900, 1300, etc
# i.e. 1900 => nineteen hundred
elif _num[2:4] == '00':
first = number_names[int(_num[0:2])]
last = number_names[100]
return first + " " + last
# deal with 1960, 1961, etc
# i.e. 1960 => nineteen sixty
# 1961 => nineteen sixty one
else:
first = number_names[int(_num[0:2])]
if _num[3:4] == '0':
last = number_names[int(_num[2:4])]
else:
second = number_names[int(_num[2:3])*10]
last = second + " " + number_names[int(_num[3:4])]
return first + " " + last
# exception used to catch any unforseen edge cases
# will default back to normal subroutine
except Exception as e:
# TODO this probably shouldn't go to stdout
print('ERROR: Exception in pronounce_number_cs: {}' + repr(e))
# check for a direct match
if num in number_names and not ordinals:
if num > 90:
result += "jedna "
result += number_names[num]
else:
def _sub_thousand(n, ordinals=False):
assert 0 <= n <= 999
if n in _SHORT_ORDINAL_CS and ordinals:
return _SHORT_ORDINAL_CS[n]
if n <= 19:
return digits[n]
elif n <= 99:
q, r = divmod(n, 10)
return tens[q - 1] + (" " + _sub_thousand(r, ordinals) if r
else "")
else:
q, r = divmod(n, 100)
return digits[q] + " sto" + (
" a " + _sub_thousand(r, ordinals) if r else "")
def _short_scale(n):
if n >= max(_SHORT_SCALE_CS.keys()):
return "nekonečno"
ordi = ordinals
if int(n) != n:
ordi = False
n = int(n)
assert 0 <= n
res = []
for i, z in enumerate(_split_by(n, 1000)):
if not z:
continue
number = _sub_thousand(z, not i and ordi)
if i:
if i >= len(hundreds):
return ""
number += " "
if ordi:
if i * 1000 in _SHORT_ORDINAL_CS:
if z == 1:
number = _SHORT_ORDINAL_CS[i * 1000]
else:
number += _SHORT_ORDINAL_CS[i * 1000]
else:
if n not in _SHORT_SCALE_CS:
num = int("1" + "0" * (len(str(n)) - 2))
number += _SHORT_SCALE_CS[num] + ""
else:
number = _SHORT_SCALE_CS[n] + ""
else:
number += hundreds[i]
res.append(number)
ordi = False
return ", ".join(reversed(res))
def _split_by(n, split=1000):
assert 0 <= n
res = []
while n:
n, r = divmod(n, split)
res.append(r)
return res
def _long_scale(n):
if n >= max(_LONG_SCALE_CS.keys()):
return "nekonečno"
ordi = ordinals
if int(n) != n:
ordi = False
n = int(n)
assert 0 <= n
res = []
for i, z in enumerate(_split_by(n, 1000000)):
if not z:
continue
number = pronounce_number_cs(z, places, True, scientific,
ordinals=ordi and not i)
# strip off the comma after the thousand
if i:
if i >= len(hundreds):
return ""
# plus one as we skip 'thousand'
# (and 'hundred', but this is excluded by index value)
number = number.replace(',', '')
if ordi:
if i * 1000000 in _LONG_ORDINAL_CS:
if z == 1:
number = _LONG_ORDINAL_CS[
(i + 1) * 1000000]
else:
number += _LONG_ORDINAL_CS[
(i + 1) * 1000000]
else:
if n not in _LONG_SCALE_CS:
num = int("1" + "0" * (len(str(n)) - 2))
number += " " + _LONG_SCALE_CS[
num] + ""
else:
number = " " + _LONG_SCALE_CS[n] + ""
else:
number += " " + hundreds[i + 1]
res.append(number)
return ", ".join(reversed(res))
if short_scale:
result += _short_scale(num)
else:
result += _long_scale(num)
# deal with scientific notation unpronounceable as number
if not result and "e" in str(num):
return pronounce_number_cs(num, places, short_scale, scientific=True)
# Deal with fractional part
elif not num == int(num) and places > 0:
if abs(num) < 1.0 and (result == "mínus " or not result):
result += "nula"
result += " tečka"
_num_str = str(num)
_num_str = _num_str.split(".")[1][0:places]
for char in _num_str:
result += " " + number_names[int(char)]
return result
def nice_time_cs(dt, speech=True, use_24hour=True, use_ampm=False):
"""
Format a time to a comfortable human format
For example, generate 'five thirty' for speech or '5:30' for
text display.
Args:
dt (datetime): date to format (assumes already in local timezone)
speech (bool): format for speech (default/True) or display (False)=Fal
use_24hour (bool): output in 24-hour/military or 12-hour format
use_ampm (bool): include the am/pm for 12-hour format
Returns:
(str): The formatted time string
"""
if use_24hour:
# e.g. "03:01" or "14:22"
string = dt.strftime("%H:%M")
else:
if use_ampm:
# e.g. "3:01 AM" or "2:22 PM"
string = dt.strftime("%I:%M %p")
else:
# e.g. "3:01" or "2:22"
string = dt.strftime("%I:%M")
if string[0] == '0':
string = string[1:] # strip leading zeros
if not speech:
return string
# Generate a speakable version of the time
if use_24hour:
speak = ""
# Either "0 8 hundred" or "13 hundred"
if string[0] == '0':
speak += pronounce_number_cs(int(string[0])) + " "
speak += pronounce_number_cs(int(string[1]))
else:
speak = pronounce_number_cs(int(string[0:2]))
speak += " "
if string[3:5] == '00':
speak += "sto"
else:
if string[3] == '0':
speak += pronounce_number_cs(0) + " "
speak += pronounce_number_cs(int(string[4]))
else:
speak += pronounce_number_cs(int(string[3:5]))
return speak
else:
if dt.hour == 0 and dt.minute == 0:
return "půlnoc"
elif dt.hour == 12 and dt.minute == 0:
return "poledne"
hour = dt.hour % 12 or 12 # 12 hour clock and 0 is spoken as 12
if dt.minute == 15:
speak = "čtvrt po " + pronounce_number_cs(hour)
elif dt.minute == 30:
speak = "půl po " + pronounce_number_cs(hour)
elif dt.minute == 45:
next_hour = (dt.hour + 1) % 12 or 12
speak = "třičtvrtě na " + pronounce_number_cs(next_hour)
else:
speak = pronounce_number_cs(hour)
if dt.minute == 0:
if not use_ampm:
return speak + " hodin"
else:
if dt.minute < 10:
speak += " oh"
speak += " " + pronounce_number_cs(dt.minute)
if use_ampm:
if dt.hour > 11:
speak += " p.m."
else:
speak += " a.m."
return speak

View File

@@ -0,0 +1,339 @@
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from lingua_franca.lang.format_common import convert_to_mixed_fraction
from lingua_franca.lang.common_data_da import _EXTRA_SPACE_DA, \
_FRACTION_STRING_DA, _MONTHS_DA, _NUM_POWERS_OF_TEN, _NUM_STRING_DA
from math import floor
def nice_number_da(number, speech=True, denominators=range(1, 21)):
""" Danish helper for nice_number
This function formats a float to human understandable functions. Like
4.5 becomes "4 einhalb" for speech and "4 1/2" for text
Args:
number (int or float): the float to format
speech (bool): format for speech (True) or display (False)
denominators (iter of ints): denominators to use, default [1 .. 20]
Returns:
(str): The formatted string.
"""
result = convert_to_mixed_fraction(number, denominators)
if not result:
# Give up, just represent as a 3 decimal number
return str(round(number, 3)).replace(".", ",")
whole, num, den = result
if not speech:
if num == 0:
# TODO: Number grouping? E.g. "1,000,000"
return str(whole)
else:
return '{} {}/{}'.format(whole, num, den)
if num == 0:
return str(whole)
den_str = _FRACTION_STRING_DA[den]
if whole == 0:
if num == 1:
return_string = '{} {}'.format(num, den_str)
else:
return_string = '{} {}e'.format(num, den_str)
else:
if num == 1:
return_string = '{} og {} {}'.format(whole, num, den_str)
else:
return_string = '{} og {} {}e'.format(whole, num, den_str)
return return_string
def pronounce_number_da(number, places=2, short_scale=True, scientific=False,
ordinals=False):
"""
Convert a number to it's spoken equivalent
For example, '5.2' would return 'five point two'
Args:
number(float or int): the number to pronounce (under 100)
places(int): maximum decimal places to speak
short_scale (bool) : use short (True) or long scale (False)
https://en.wikipedia.org/wiki/Names_of_large_numbers
scientific (bool): pronounce in scientific notation
ordinals (bool): pronounce in ordinal form "first" instead of "one"
Returns:
(str): The pronounced number
"""
# TODO short_scale, scientific and ordinals
# currently ignored
def pronounce_triplet_da(num):
result = ""
num = floor(num)
if num > 99:
hundreds = floor(num / 100)
if hundreds > 0:
if hundreds == 1:
result += 'et' + 'hundrede' + _EXTRA_SPACE_DA
else:
result += _NUM_STRING_DA[hundreds] + \
'hundrede' + _EXTRA_SPACE_DA
num -= hundreds * 100
if num == 0:
result += '' # do nothing
elif num == 1:
result += 'et'
elif num <= 20:
result += _NUM_STRING_DA[num] + _EXTRA_SPACE_DA
elif num > 20:
ones = num % 10
tens = num - ones
if ones > 0:
result += _NUM_STRING_DA[ones] + _EXTRA_SPACE_DA
if tens > 0:
result += 'og' + _EXTRA_SPACE_DA
if tens > 0:
result += _NUM_STRING_DA[tens] + _EXTRA_SPACE_DA
return result
def pronounce_fractional_da(num, places):
# fixed number of places even with trailing zeros
result = ""
place = 10
while places > 0:
# doesn't work with 1.0001 and places = 2: int(
# number*place) % 10 > 0 and places > 0:
result += " " + _NUM_STRING_DA[int(num * place) % 10]
place *= 10
places -= 1
return result
def pronounce_whole_number_da(num, scale_level=0):
if num == 0:
return ''
num = floor(num)
result = ''
last_triplet = num % 1000
if last_triplet == 1:
if scale_level == 0:
if result != '':
result += '' + 'et'
else:
result += "en"
elif scale_level == 1:
result += 'et' + _EXTRA_SPACE_DA + 'tusinde' + _EXTRA_SPACE_DA
else:
result += "en " + _NUM_POWERS_OF_TEN[scale_level] + ' '
elif last_triplet > 1:
result += pronounce_triplet_da(last_triplet)
if scale_level == 1:
result += 'tusinde' + _EXTRA_SPACE_DA
if scale_level >= 2:
result += "og" + _NUM_POWERS_OF_TEN[scale_level]
if scale_level >= 2:
if scale_level % 2 == 0:
result += "er" # MillionER
result += "er " # MilliardER, MillioneER
num = floor(num / 1000)
scale_level += 1
return pronounce_whole_number_da(num,
scale_level) + result + _EXTRA_SPACE_DA
result = ""
if abs(number) >= 1000000000000000000000000: # cannot do more than this
return str(number)
elif number == 0:
return str(_NUM_STRING_DA[0])
elif number < 0:
return "minus " + pronounce_number_da(abs(number), places)
else:
if number == int(number):
return pronounce_whole_number_da(number)
else:
whole_number_part = floor(number)
fractional_part = number - whole_number_part
result += pronounce_whole_number_da(whole_number_part)
if places > 0:
result += " komma"
result += pronounce_fractional_da(fractional_part, places)
return result
def pronounce_ordinal_da(number):
"""
This function pronounces a number as an ordinal
1 -> first
2 -> second
Args:
number (int): the number to format
Returns:
(str): The pronounced number string.
"""
# ordinals for 1, 3, 7 and 8 are irregular
# this produces the base form, it will have to be adapted for genus,
# casus, numerus
ordinals = ["nulte", "første", "anden", "tredie", "fjerde", "femte",
"sjette", "syvende", "ottende", "niende", "tiende"]
# only for whole positive numbers including zero
if number < 0 or number != int(number):
return number
if number < 10:
return ordinals[number]
if number < 30:
if pronounce_number_da(number)[-1:] == 'e':
return pronounce_number_da(number) + "nde"
else:
return pronounce_number_da(number) + "ende"
if number < 40:
return pronounce_number_da(number) + "fte"
else:
if pronounce_number_da(number)[-1:] == 'e':
return pronounce_number_da(number) + "nde"
else:
return pronounce_number_da(number) + "ende"
def nice_time_da(dt, speech=True, use_24hour=False, use_ampm=False):
"""
Format a time to a comfortable human format
For example, generate 'five thirty' for speech or '5:30' for
text display.
Args:
dt (datetime): date to format (assumes already in local timezone)
speech (bool): format for speech (default/True) or display (False)=Fal
use_24hour (bool): output in 24-hour/military or 12-hour format
use_ampm (bool): include the am/pm for 12-hour format
Returns:
(str): The formatted time string
"""
if use_24hour:
# e.g. "03:01" or "14:22"
string = dt.strftime("%H:%M")
else:
if use_ampm:
# e.g. "3:01 AM" or "2:22 PM"
string = dt.strftime("%I:%M %p")
else:
# e.g. "3:01" or "2:22"
string = dt.strftime("%I:%M")
if not speech:
return string
# Generate a speakable version of the time
speak = ""
if use_24hour:
if dt.hour == 1:
speak += "et" # 01:00 is "et" not "en"
else:
speak += pronounce_number_da(dt.hour)
if not dt.minute == 0:
if dt.minute < 10:
speak += ' nul'
speak += " " + pronounce_number_da(dt.minute)
return speak # ampm is ignored when use_24hour is true
else:
if dt.hour == 0 and dt.minute == 0:
return "midnat"
if dt.hour == 12 and dt.minute == 0:
return "middag"
# TODO: "half past 3", "a quarter of 4" and other idiomatic times
if dt.hour == 0:
speak += pronounce_number_da(12)
elif dt.hour <= 13:
if dt.hour == 1 or dt.hour == 13: # 01:00 and 13:00 is "et"
speak += 'et'
else:
speak += pronounce_number_da(dt.hour)
else:
speak += pronounce_number_da(dt.hour - 12)
if not dt.minute == 0:
if dt.minute < 10:
speak += ' nul'
speak += " " + pronounce_number_da(dt.minute)
if use_ampm:
if dt.hour > 11:
if dt.hour < 18:
# 12:01 - 17:59 nachmittags/afternoon
speak += " om eftermiddagen"
elif dt.hour < 22:
# 18:00 - 21:59 abends/evening
speak += " om aftenen"
else:
# 22:00 - 23:59 nachts/at night
speak += " om natten"
elif dt.hour < 3:
# 00:01 - 02:59 nachts/at night
speak += " om natten"
else:
# 03:00 - 11:59 morgens/in the morning
speak += " om morgenen"
return speak
def nice_response_da(text):
# check for months and call _nice_ordinal_da declension of ordinals
# replace "^" with "hoch" (to the power of)
words = text.split()
for idx, word in enumerate(words):
if word.lower() in _MONTHS_DA:
text = _nice_ordinal_da(text)
if word == '^':
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
if wordNext.isnumeric():
words[idx] = "opløftet i"
text = " ".join(words)
return text
def _nice_ordinal_da(text, speech=True):
# check for months for declension of ordinals before months
# depending on articles/prepositions
normalized_text = text
words = text.split()
for idx, word in enumerate(words):
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
wordPrev = words[idx - 1] if idx > 0 else ""
if word[-1:] == ".":
if word[:-1].isdecimal():
if wordNext.lower() in _MONTHS_DA:
word = pronounce_ordinal_da(int(word[:-1]))
if wordPrev.lower() in ["om", "den", "fra", "til",
"(fra", "(om", "til"]:
word += "n"
elif wordPrev.lower() not in ["den"]:
word += "r"
words[idx] = word
normalized_text = " ".join(words)
return normalized_text

View File

@@ -0,0 +1,327 @@
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from lingua_franca.lang.format_common import convert_to_mixed_fraction
from lingua_franca.lang.common_data_de import _EXTRA_SPACE_DE, \
_FRACTION_STRING_DE, _MONTHS_DE, _NUM_POWERS_OF_TEN_DE, _NUM_STRING_DE
from math import floor
def nice_number_de(number, speech=True, denominators=range(1, 21)):
""" German helper for nice_number
This function formats a float to human understandable functions. Like
4.5 becomes "4 einhalb" for speech and "4 1/2" for text
Args:
number (int or float): the float to format
speech (bool): format for speech (True) or display (False)
denominators (iter of ints): denominators to use, default [1 .. 20]
Returns:
(str): The formatted string.
"""
result = convert_to_mixed_fraction(number, denominators)
if not result:
# Give up, just represent as a 3 decimal number
return str(round(number, 3)).replace(".", ",")
whole, num, den = result
if not speech:
if num == 0:
# TODO: Number grouping? E.g. "1,000,000"
return str(whole)
else:
return '{} {}/{}'.format(whole, num, den)
if num == 0:
return str(whole)
den_str = _FRACTION_STRING_DE[den]
if whole == 0:
if num == 1:
return_string = 'ein {}'.format(den_str)
else:
return_string = '{} {}'.format(num, den_str)
elif num == 1:
return_string = '{} und ein {}'.format(whole, den_str)
else:
return_string = '{} und {} {}'.format(whole, num, den_str)
return return_string
def pronounce_number_de(number, places=2, short_scale=True, scientific=False,
ordinals=False):
"""
Convert a number to it's spoken equivalent
For example, '5.2' would return 'five point two'
Args:
number(float or int): the number to pronounce (under 100)
places(int): maximum decimal places to speak
short_scale (bool) : use short (True) or long scale (False)
https://en.wikipedia.org/wiki/Names_of_large_numbers
scientific (bool): pronounce in scientific notation
ordinals (bool): pronounce in ordinal form "first" instead of "one"
Returns:
(str): The pronounced number
"""
# TODO short_scale, scientific and ordinals
# currently ignored
def pronounce_triplet_de(num):
result = ""
num = floor(num)
if num > 99:
hundreds = floor(num / 100)
if hundreds > 0:
result += _NUM_STRING_DE[
hundreds] + _EXTRA_SPACE_DE + 'hundert' + _EXTRA_SPACE_DE
num -= hundreds * 100
if num == 0:
result += '' # do nothing
elif num == 1:
result += 'eins' # need the s for the last digit
elif num <= 20:
result += _NUM_STRING_DE[num] # + _EXTRA_SPACE_DA
elif num > 20:
ones = num % 10
tens = num - ones
if ones > 0:
result += _NUM_STRING_DE[ones] + _EXTRA_SPACE_DE
if tens > 0:
result += 'und' + _EXTRA_SPACE_DE
if tens > 0:
result += _NUM_STRING_DE[tens] + _EXTRA_SPACE_DE
return result
def pronounce_fractional_de(num,
places): # fixed number of places even with
# trailing zeros
result = ""
place = 10
while places > 0: # doesn't work with 1.0001 and places = 2: int(
# number*place) % 10 > 0 and places > 0:
result += " " + _NUM_STRING_DE[int(num * place) % 10]
if int(num * place) % 10 == 1:
result += 's' # "1" is pronounced "eins" after the decimal
# point
place *= 10
places -= 1
return result
def pronounce_whole_number_de(num, scale_level=0):
if num == 0:
return ''
num = floor(num)
result = ''
last_triplet = num % 1000
if last_triplet == 1:
if scale_level == 0:
if result != '':
result += '' + 'eins'
else:
result += "eins"
elif scale_level == 1:
result += 'ein' + _EXTRA_SPACE_DE + 'tausend' + _EXTRA_SPACE_DE
else:
result += "eine " + _NUM_POWERS_OF_TEN_DE[scale_level] + ' '
elif last_triplet > 1:
result += pronounce_triplet_de(last_triplet)
if scale_level == 1:
# result += _EXTRA_SPACE_DA
result += 'tausend' + _EXTRA_SPACE_DE
if scale_level >= 2:
# if _EXTRA_SPACE_DA == '':
# result += " "
result += " " + _NUM_POWERS_OF_TEN_DE[scale_level]
if scale_level >= 2:
if scale_level % 2 == 0:
result += "e" # MillionE
result += "n " # MilliardeN, MillioneN
num = floor(num / 1000)
scale_level += 1
return pronounce_whole_number_de(num,
scale_level) + result # + _EXTRA_SPACE_DA
result = ""
if abs(number) >= 1000000000000000000000000: # cannot do more than this
return str(number)
elif number == 0:
return str(_NUM_STRING_DE[0])
elif number < 0:
return "minus " + pronounce_number_de(abs(number), places)
else:
if number == int(number):
return pronounce_whole_number_de(number)
else:
whole_number_part = floor(number)
fractional_part = number - whole_number_part
result += pronounce_whole_number_de(whole_number_part)
if places > 0:
result += " Komma"
result += pronounce_fractional_de(fractional_part, places)
return result
def pronounce_ordinal_de(number):
"""
This function pronounces a number as an ordinal
1 -> first
2 -> second
Args:
number (int): the number to format
Returns:
(str): The pronounced number string.
"""
# ordinals for 1, 3, 7 and 8 are irregular
# this produces the base form, it will have to be adapted for genus,
# casus, numerus
ordinals = ["nullte", "erste", "zweite", "dritte", "vierte", "fünfte",
"sechste", "siebte", "achte"]
# only for whole positive numbers including zero
if number < 0 or number != int(number):
return number
elif number < 9:
return ordinals[number]
elif number < 20:
return pronounce_number_de(number) + "te"
else:
return pronounce_number_de(number) + "ste"
def nice_time_de(dt, speech=True, use_24hour=False, use_ampm=False):
"""
Format a time to a comfortable human format
For example, generate 'five thirty' for speech or '5:30' for
text display.
Args:
dt (datetime): date to format (assumes already in local timezone)
speech (bool): format for speech (default/True) or display (False)=Fal
use_24hour (bool): output in 24-hour/military or 12-hour format
use_ampm (bool): include the am/pm for 12-hour format
Returns:
(str): The formatted time string
"""
if not speech:
if use_24hour:
# e.g. "03:01" or "14:22"
string = dt.strftime("%H:%M")
else:
if use_ampm:
# e.g. "3:01 AM" or "2:22 PM"
string = dt.strftime("%I:%M %p")
else:
# e.g. "3:01" or "2:22"
string = dt.strftime("%I:%M")
if string[0] == '0':
string = string[1:] # strip leading zeros
return string
# Generate a speakable version of the time
speak = ""
if use_24hour:
if dt.hour == 1:
speak += "ein" # 01:00 is "ein Uhr" not "eins Uhr"
else:
speak += pronounce_number_de(dt.hour)
speak += " Uhr"
if not dt.minute == 0: # zero minutes are not pronounced, 13:00 is
# "13 Uhr" not "13 hundred hours"
speak += " " + pronounce_number_de(dt.minute)
return speak # ampm is ignored when use_24hour is true
else:
if dt.hour == 0 and dt.minute == 0:
return "Mitternacht"
elif dt.hour == 12 and dt.minute == 0:
return "Mittag"
elif dt.minute == 15:
# sentence relative to next hour and 0 spoken as 12
next_hour = (dt.hour + 1) % 12 or 12
speak = "viertel " + pronounce_number_de(next_hour)
elif dt.minute == 30:
next_hour = (dt.hour + 1) % 12 or 12
speak = "halb " + pronounce_number_de(next_hour)
elif dt.minute == 45:
next_hour = (dt.hour + 1) % 12 or 12
speak = "dreiviertel " + pronounce_number_de(next_hour)
else:
hour = dt.hour % 12 or 12 # 12 hour clock and 0 is spoken as 12
if hour == 1: # 01:00 and 13:00 is "ein Uhr" not "eins Uhr"
speak += 'ein'
else:
speak += pronounce_number_de(hour)
speak += " Uhr"
if not dt.minute == 0:
speak += " " + pronounce_number_de(dt.minute)
if use_ampm:
if 3 <= dt.hour < 12:
speak += " morgens" # 03:00 - 11:59 morgens/in the morning
elif 12 <= dt.hour < 18:
speak += " nachmittags" # 12:01 - 17:59 nachmittags/afternoon
elif 18 <= dt.hour < 22:
speak += " abends" # 18:00 - 21:59 abends/evening
else:
speak += " nachts" # 22:00 - 02:59 nachts/at night
return speak
def nice_response_de(text):
# check for months and call _nice_ordinal_de declension of ordinals
# replace "^" with "hoch" (to the power of)
words = text.split()
for idx, word in enumerate(words):
if word.lower() in _MONTHS_DE:
text = _nice_ordinal_de(text)
if word == '^':
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
if wordNext.isnumeric():
words[idx] = "hoch"
text = " ".join(words)
return text
def _nice_ordinal_de(text, speech=True):
# check for months for declension of ordinals before months
# depending on articles/prepositions
normalized_text = text
words = text.split()
for idx, word in enumerate(words):
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
wordPrev = words[idx - 1] if idx > 0 else ""
if word[-1:] == ".":
if word[:-1].isdecimal():
if wordNext.lower() in _MONTHS_DE:
word = pronounce_ordinal_de(int(word[:-1]))
if wordPrev.lower() in ["am", "dem", "vom", "zum",
"(vom", "(am", "zum"]:
word += "n"
elif wordPrev.lower() not in ["der", "die", "das"]:
word += "r"
words[idx] = word
normalized_text = " ".join(words)
return normalized_text

View File

@@ -0,0 +1,386 @@
# -*- coding: utf-8 -*-
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from lingua_franca.lang.format_common import convert_to_mixed_fraction
from lingua_franca.lang.common_data_en import _NUM_STRING_EN, \
_FRACTION_STRING_EN, _LONG_SCALE_EN, _SHORT_SCALE_EN, _SHORT_ORDINAL_EN, _LONG_ORDINAL_EN
def nice_number_en(number, speech=True, denominators=range(1, 21)):
""" English helper for nice_number
This function formats a float to human understandable functions. Like
4.5 becomes "4 and a half" for speech and "4 1/2" for text
Args:
number (int or float): the float to format
speech (bool): format for speech (True) or display (False)
denominators (iter of ints): denominators to use, default [1 .. 20]
Returns:
(str): The formatted string.
"""
result = convert_to_mixed_fraction(number, denominators)
if not result:
# Give up, just represent as a 3 decimal number
return str(round(number, 3))
whole, num, den = result
if not speech:
if num == 0:
# TODO: Number grouping? E.g. "1,000,000"
return str(whole)
else:
return '{} {}/{}'.format(whole, num, den)
if num == 0:
return str(whole)
den_str = _FRACTION_STRING_EN[den]
if whole == 0:
if num == 1:
return_string = 'a {}'.format(den_str)
else:
return_string = '{} {}'.format(num, den_str)
elif num == 1:
return_string = '{} and a {}'.format(whole, den_str)
else:
return_string = '{} and {} {}'.format(whole, num, den_str)
if num > 1:
return_string += 's'
return return_string
def pronounce_number_en(number, places=2, short_scale=True, scientific=False,
ordinals=False):
"""
Convert a number to it's spoken equivalent
For example, '5.2' would return 'five point two'
Args:
num(float or int): the number to pronounce (under 100)
places(int): maximum decimal places to speak
short_scale (bool) : use short (True) or long scale (False)
https://en.wikipedia.org/wiki/Names_of_large_numbers
scientific (bool): pronounce in scientific notation
ordinals (bool): pronounce in ordinal form "first" instead of "one"
Returns:
(str): The pronounced number
"""
num = number
# deal with infinity
if num == float("inf"):
return "infinity"
elif num == float("-inf"):
return "negative infinity"
if scientific:
number = '%E' % num
n, power = number.replace("+", "").split("E")
power = int(power)
if power != 0:
if ordinals:
# This handles negatives of powers separately from the normal
# handling since each call disables the scientific flag
return '{}{} times ten to the {}{} power'.format(
'negative ' if float(n) < 0 else '',
pronounce_number_en(
abs(float(n)), places, short_scale, False, ordinals=False),
'negative ' if power < 0 else '',
pronounce_number_en(abs(power), places, short_scale, False, ordinals=True))
else:
# This handles negatives of powers separately from the normal
# handling since each call disables the scientific flag
return '{}{} times ten to the power of {}{}'.format(
'negative ' if float(n) < 0 else '',
pronounce_number_en(
abs(float(n)), places, short_scale, False),
'negative ' if power < 0 else '',
pronounce_number_en(abs(power), places, short_scale, False))
if short_scale:
number_names = _NUM_STRING_EN.copy()
number_names.update(_SHORT_SCALE_EN)
else:
number_names = _NUM_STRING_EN.copy()
number_names.update(_LONG_SCALE_EN)
digits = [number_names[n] for n in range(0, 20)]
tens = [number_names[n] for n in range(10, 100, 10)]
if short_scale:
hundreds = [_SHORT_SCALE_EN[n] for n in _SHORT_SCALE_EN.keys()]
else:
hundreds = [_LONG_SCALE_EN[n] for n in _LONG_SCALE_EN.keys()]
# deal with negatives
result = ""
if num < 0:
result = "negative " if scientific else "minus "
num = abs(num)
if not ordinals:
try:
# deal with 4 digits
# usually if it's a 4 digit num it should be said like a date
# i.e. 1972 => nineteen seventy two
if len(str(num)) == 4 and isinstance(num, int):
_num = str(num)
# deal with 1000, 2000, 2001, 2100, 3123, etc
# is skipped as the rest of the
# functin deals with this already
if _num[1:4] == '000' or _num[1:3] == '00' or int(_num[0:2]) >= 20:
pass
# deal with 1900, 1300, etc
# i.e. 1900 => nineteen hundred
elif _num[2:4] == '00':
first = number_names[int(_num[0:2])]
last = number_names[100]
return first + " " + last
# deal with 1960, 1961, etc
# i.e. 1960 => nineteen sixty
# 1961 => nineteen sixty one
else:
first = number_names[int(_num[0:2])]
if _num[3:4] == '0':
last = number_names[int(_num[2:4])]
else:
second = number_names[int(_num[2:3])*10]
last = second + " " + number_names[int(_num[3:4])]
return first + " " + last
# exception used to catch any unforseen edge cases
# will default back to normal subroutine
except Exception as e:
# TODO this probably shouldn't go to stdout
print('ERROR: Exception in pronounce_number_en: {}' + repr(e))
# check for a direct match
if num in number_names and not ordinals:
if num > 90:
result += "one "
result += number_names[num]
else:
def _sub_thousand(n, ordinals=False):
assert 0 <= n <= 999
if n in _SHORT_ORDINAL_EN and ordinals:
return _SHORT_ORDINAL_EN[n]
if n <= 19:
return digits[n]
elif n <= 99:
q, r = divmod(n, 10)
return tens[q - 1] + (" " + _sub_thousand(r, ordinals) if r
else "")
else:
q, r = divmod(n, 100)
return digits[q] + " hundred" + (
" and " + _sub_thousand(r, ordinals) if r else "")
def _short_scale(n):
if n >= max(_SHORT_SCALE_EN.keys()):
return "infinity"
ordi = ordinals
if int(n) != n:
ordi = False
n = int(n)
assert 0 <= n
res = []
for i, z in enumerate(_split_by(n, 1000)):
if not z:
continue
number = _sub_thousand(z, not i and ordi)
if i:
if i >= len(hundreds):
return ""
number += " "
if ordi:
if i * 1000 in _SHORT_ORDINAL_EN:
if z == 1:
number = _SHORT_ORDINAL_EN[i * 1000]
else:
number += _SHORT_ORDINAL_EN[i * 1000]
else:
if n not in _SHORT_SCALE_EN:
num = int("1" + "0" * (len(str(n)) - 2))
number += _SHORT_SCALE_EN[num] + "th"
else:
number = _SHORT_SCALE_EN[n] + "th"
else:
number += hundreds[i]
res.append(number)
ordi = False
return ", ".join(reversed(res))
def _split_by(n, split=1000):
assert 0 <= n
res = []
while n:
n, r = divmod(n, split)
res.append(r)
return res
def _long_scale(n):
if n >= max(_LONG_SCALE_EN.keys()):
return "infinity"
ordi = ordinals
if int(n) != n:
ordi = False
n = int(n)
assert 0 <= n
res = []
for i, z in enumerate(_split_by(n, 1000000)):
if not z:
continue
number = pronounce_number_en(z, places, True, scientific,
ordinals=ordi and not i)
# strip off the comma after the thousand
if i:
if i >= len(hundreds):
return ""
# plus one as we skip 'thousand'
# (and 'hundred', but this is excluded by index value)
number = number.replace(',', '')
if ordi:
if i * 1000000 in _LONG_ORDINAL_EN:
if z == 1:
number = _LONG_ORDINAL_EN[
(i + 1) * 1000000]
else:
number += _LONG_ORDINAL_EN[
(i + 1) * 1000000]
else:
if n not in _LONG_SCALE_EN:
num = int("1" + "0" * (len(str(n)) - 2))
number += " " + _LONG_SCALE_EN[
num] + "th"
else:
number = " " + _LONG_SCALE_EN[n] + "th"
else:
number += " " + hundreds[i + 1]
res.append(number)
return ", ".join(reversed(res))
if short_scale:
result += _short_scale(num)
else:
result += _long_scale(num)
# deal with scientific notation unpronounceable as number
if not result and "e" in str(num):
return pronounce_number_en(num, places, short_scale, scientific=True)
# Deal with fractional part
elif not num == int(num) and places > 0:
if abs(num) < 1.0 and (result == "minus " or not result):
result += "zero"
result += " point"
_num_str = str(num)
_num_str = _num_str.split(".")[1][0:places]
for char in _num_str:
result += " " + number_names[int(char)]
return result
def nice_time_en(dt, speech=True, use_24hour=False, use_ampm=False):
"""
Format a time to a comfortable human format
For example, generate 'five thirty' for speech or '5:30' for
text display.
Args:
dt (datetime): date to format (assumes already in local timezone)
speech (bool): format for speech (default/True) or display (False)=Fal
use_24hour (bool): output in 24-hour/military or 12-hour format
use_ampm (bool): include the am/pm for 12-hour format
Returns:
(str): The formatted time string
"""
if use_24hour:
# e.g. "03:01" or "14:22"
string = dt.strftime("%H:%M")
else:
if use_ampm:
# e.g. "3:01 AM" or "2:22 PM"
string = dt.strftime("%I:%M %p")
else:
# e.g. "3:01" or "2:22"
string = dt.strftime("%I:%M")
if string[0] == '0':
string = string[1:] # strip leading zeros
if not speech:
return string
# Generate a speakable version of the time
if use_24hour:
speak = ""
# Either "0 8 hundred" or "13 hundred"
if string[0] == '0':
speak += pronounce_number_en(int(string[0])) + " "
speak += pronounce_number_en(int(string[1]))
else:
speak = pronounce_number_en(int(string[0:2]))
speak += " "
if string[3:5] == '00':
speak += "hundred"
else:
if string[3] == '0':
speak += pronounce_number_en(0) + " "
speak += pronounce_number_en(int(string[4]))
else:
speak += pronounce_number_en(int(string[3:5]))
return speak
else:
if dt.hour == 0 and dt.minute == 0:
return "midnight"
elif dt.hour == 12 and dt.minute == 0:
return "noon"
hour = dt.hour % 12 or 12 # 12 hour clock and 0 is spoken as 12
if dt.minute == 15:
speak = "quarter past " + pronounce_number_en(hour)
elif dt.minute == 30:
speak = "half past " + pronounce_number_en(hour)
elif dt.minute == 45:
next_hour = (dt.hour + 1) % 12 or 12
speak = "quarter to " + pronounce_number_en(next_hour)
else:
speak = pronounce_number_en(hour)
if dt.minute == 0:
if not use_ampm:
return speak + " o'clock"
else:
if dt.minute < 10:
speak += " oh"
speak += " " + pronounce_number_en(dt.minute)
if use_ampm:
if dt.hour > 11:
speak += " p.m."
else:
speak += " a.m."
return speak

View File

@@ -0,0 +1,269 @@
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Format functions for castillian (es-es)
"""
from lingua_franca.lang.format_common import convert_to_mixed_fraction
from lingua_franca.lang.common_data_es import _NUM_STRING_ES, \
_FRACTION_STRING_ES
def nice_number_es(number, speech=True, denominators=range(1, 21)):
""" Spanish helper for nice_number
This function formats a float to human understandable functions. Like
4.5 becomes "4 y medio" for speech and "4 1/2" for text
Args:
number (int or float): the float to format
speech (bool): format for speech (True) or display (False)
denominators (iter of ints): denominators to use, default [1 .. 20]
Returns:
(str): The formatted string.
"""
strNumber = ""
whole = 0
num = 0
den = 0
result = convert_to_mixed_fraction(number, denominators)
if not result:
# Give up, just represent as a 3 decimal number
whole = round(number, 3)
else:
whole, num, den = result
if not speech:
if num == 0:
strNumber = '{:,}'.format(whole)
strNumber = strNumber.replace(",", " ")
strNumber = strNumber.replace(".", ",")
return strNumber
else:
return '{} {}/{}'.format(whole, num, den)
else:
if num == 0:
# if the number is not a fraction, nothing to do
strNumber = str(whole)
strNumber = strNumber.replace(".", ",")
return strNumber
den_str = _FRACTION_STRING_ES[den]
# if it is not an integer
if whole == 0:
# if there is no whole number
if num == 1:
# if numerator is 1, return "un medio", for example
strNumber = 'un {}'.format(den_str)
else:
# else return "cuatro tercios", for example
strNumber = '{} {}'.format(num, den_str)
elif num == 1:
# if there is a whole number and numerator is 1
if den == 2:
# if denominator is 2, return "1 y medio", for example
strNumber = '{} y {}'.format(whole, den_str)
else:
# else return "1 y 1 tercio", for example
strNumber = '{} y 1 {}'.format(whole, den_str)
else:
# else return "2 y 3 cuarto", for example
strNumber = '{} y {} {}'.format(whole, num, den_str)
if num > 1 and den != 3:
# if the numerator is greater than 1 and the denominator
# is not 3 ("tercio"), add an s for plural
strNumber += 's'
return strNumber
def pronounce_number_es(number, places=2):
"""
Convert a number to it's spoken equivalent
For example, '5.2' would return 'cinco coma dos'
Args:
num(float or int): the number to pronounce (under 100)
places(int): maximum decimal places to speak
Returns:
(str): The pronounced number
"""
if abs(number) >= 100:
# TODO: Soporta a números por encima de 100
return str(number)
result = ""
if number < 0:
result = "menos "
number = abs(number)
# del 21 al 29 tienen una pronunciación especial
if 20 <= number <= 29:
tens = int(number-int(number) % 10)
ones = int(number - tens)
result += _NUM_STRING_ES[tens]
if ones > 0:
result = result[:-1]
# a veinte le quitamos la "e" final para construir los
# números del 21 - 29. Pero primero tenemos en cuenta
# las excepciones: 22, 23 y 26, que llevan tilde.
if ones == 2:
result += "idós"
elif ones == 3:
result += "itrés"
elif ones == 6:
result += "iséis"
else:
result += "i" + _NUM_STRING_ES[ones]
elif number >= 30: # de 30 en adelante
tens = int(number-int(number) % 10)
ones = int(number - tens)
result += _NUM_STRING_ES[tens]
if ones > 0:
result += " y " + _NUM_STRING_ES[ones]
else:
result += _NUM_STRING_ES[int(number)]
# Deal with decimal part, in spanish is commonly used the comma
# instead the dot. Decimal part can be written both with comma
# and dot, but when pronounced, its pronounced "coma"
if not number == int(number) and places > 0:
if abs(number) < 1.0 and (result == "menos " or not result):
result += "cero"
result += " coma"
_num_str = str(number)
_num_str = _num_str.split(".")[1][0:places]
for char in _num_str:
result += " " + _NUM_STRING_ES[int(char)]
return result
def nice_time_es(dt, speech=True, use_24hour=False, use_ampm=False):
"""
Format a time to a comfortable human format
For example, generate 'cinco treinta' for speech or '5:30' for
text display.
Args:
dt (datetime): date to format (assumes already in local timezone)
speech (bool): format for speech (default/True) or display (False)=Fal
use_24hour (bool): output in 24-hour/military or 12-hour format
use_ampm (bool): include the am/pm for 12-hour format
Returns:
(str): The formatted time string
"""
if use_24hour:
# e.g. "03:01" or "14:22"
string = dt.strftime("%H:%M")
else:
if use_ampm:
# e.g. "3:01 AM" or "2:22 PM"
string = dt.strftime("%I:%M %p")
else:
# e.g. "3:01" or "2:22"
string = dt.strftime("%I:%M")
if string[0] == '0':
string = string[1:] # strip leading zeros
if not speech:
return string
# Generate a speakable version of the time
speak = ""
if use_24hour:
# Tenemos que tener en cuenta que cuando hablamos en formato
# 24h, no hay que especificar ninguna precisión adicional
# como "la noche", "la tarde" o "la mañana"
# http://lema.rae.es/dpd/srv/search?id=YNoTWNJnAD6bhhVBf9
if dt.hour == 1:
speak += "la una"
else:
speak += "las " + pronounce_number_es(dt.hour)
# las 14:04 son "las catorce cero cuatro"
if dt.minute < 10:
speak += " cero " + pronounce_number_es(dt.minute)
else:
speak += " " + pronounce_number_es(dt.minute)
else:
# Prepare for "tres menos cuarto" ??
if dt.minute == 35:
minute = -25
hour = dt.hour + 1
elif dt.minute == 40:
minute = -20
hour = dt.hour + 1
elif dt.minute == 45:
minute = -15
hour = dt.hour + 1
elif dt.minute == 50:
minute = -10
hour = dt.hour + 1
elif dt.minute == 55:
minute = -5
hour = dt.hour + 1
else:
minute = dt.minute
hour = dt.hour
if hour == 0 or hour == 12:
speak += "las doce"
elif hour == 1 or hour == 13:
speak += "la una"
elif hour < 13:
speak = "las " + pronounce_number_es(hour)
else:
speak = "las " + pronounce_number_es(hour-12)
if minute != 0:
# las horas especiales
if minute == 15:
speak += " y cuarto"
elif minute == 30:
speak += " y media"
elif minute == -15:
speak += " menos cuarto"
else: # seis y nueve. siete y veinticinco
if minute > 0:
speak += " y " + pronounce_number_es(minute)
else: # si son las siete menos veinte, no ponemos la "y"
speak += " " + pronounce_number_es(minute)
# si no especificamos de la tarde, noche, mañana, etc
if minute == 0 and not use_ampm:
# 3:00
speak += " en punto"
if use_ampm:
# "de la noche" es desde que anochece hasta medianoche
# así que decir que es desde las 21h es algo subjetivo
# en España a las 20h se dice "de la tarde"
# en castellano, las 12h es de la mañana o mediodía
# así que diremos "de la tarde" a partir de las 13h.
# http://lema.rae.es/dpd/srv/search?id=YNoTWNJnAD6bhhVBf9
if hour >= 0 and hour < 6:
speak += " de la madrugada"
elif hour >= 6 and hour < 13:
speak += " de la mañana"
elif hour >= 13 and hour < 21:
speak += " de la tarde"
else:
speak += " de la noche"
return speak

View File

@@ -0,0 +1,301 @@
# -*- coding: utf-8 -*-
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from lingua_franca.lang.format_common import convert_to_mixed_fraction
from lingua_franca.lang.common_data_fa import \
_FARSI_ONES, _FARSI_TENS, _FARSI_HUNDREDS, _FARSI_BIG, _FARSI_SEPERATOR, \
_FARSI_FRAC, _FARSI_FRAC_BIG, _FRACTION_STRING_FA, _FORMAL_VARIANT
import math
from lingua_franca.internal import lookup_variant
from enum import IntEnum
from functools import wraps
class NumberVariantFA(IntEnum):
CONVERSATIONAL = 0
FORMAL = 1
lookup_number = lookup_variant({
"default": NumberVariantFA.CONVERSATIONAL,
"conversational": NumberVariantFA.CONVERSATIONAL,
"formal": NumberVariantFA.FORMAL,
})
def _apply_number_variant(text, variant):
if variant == NumberVariantFA.FORMAL:
for key, value in _FORMAL_VARIANT.items():
text = text.replace(value, key)
return text
def _handle_number_variant(func):
@wraps(func)
@lookup_variant({
"default": NumberVariantFA.CONVERSATIONAL,
"conversational": NumberVariantFA.CONVERSATIONAL,
"formal": NumberVariantFA.FORMAL,
})
def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
if 'variant' in kwargs:
return _apply_number_variant(result, kwargs['variant'])
else:
return result
return wrapper
@_handle_number_variant
def nice_number_fa(number, speech=True, denominators=range(1, 21), variant=None):
""" Farsi helper for nice_number
This function formats a float to human understandable functions. Like
4.5 becomes "4 and a half" for speech and "4 1/2" for text
Args:
number (int or float): the float to format
speech (bool): format for speech (True) or display (False)
denominators (iter of ints): denominators to use, default [1 .. 20]
Returns:
(str): The formatted string.
"""
result = convert_to_mixed_fraction(number, denominators)
if not result:
# Give up, just represent as a 3 decimal number
return str(round(number, 3))
whole, num, den = result
if not speech:
if num == 0:
# TODO: Number grouping? E.g. "1,000,000"
return str(whole)
else:
return '{} {}/{}'.format(whole, num, den)
if num == 0:
return str(whole)
den_str = _FRACTION_STRING_FA[den]
if whole == 0:
if num == 1:
return_string = 'یک {}'.format(den_str)
else:
return_string = '{} {}'.format(num, den_str)
elif num == 1:
return_string = '{} و یک {}'.format(whole, den_str)
else:
return_string = '{} و {} {}'.format(whole, num, den_str)
return return_string
def _float2tuple(value, _precision):
pre = int(value)
post = abs(value - pre) * 10**_precision
if abs(round(post) - post) < 0.01:
# We generally floor all values beyond our precision (rather than
# rounding), but in cases where we have something like 1.239999999,
# which is probably due to python's handling of floats, we actually
# want to consider it as 1.24 instead of 1.23
post = int(round(post))
else:
post = int(math.floor(post))
while post != 0:
x, y = divmod(post, 10)
if y != 0:
break
post = x
_precision -= 1
return pre, post, _precision
def _cardinal3(number):
if (number < 19):
return _FARSI_ONES[number]
if (number < 100):
x, y = divmod(number, 10)
if y == 0:
return _FARSI_TENS[x]
return _FARSI_TENS[x] + _FARSI_SEPERATOR + _FARSI_ONES[y]
x, y = divmod(number, 100)
if y == 0:
return _FARSI_HUNDREDS[x]
return _FARSI_HUNDREDS[x] + _FARSI_SEPERATOR + _cardinal3(y)
def _cardinalPos(number):
x = number
res = ''
for b in _FARSI_BIG:
x, y = divmod(x, 1000)
if (y == 0):
continue
yx = _cardinal3(y)
if y == 1 and b == 'هزار':
yx = b
elif b != '':
yx += ' ' + b
if (res == ''):
res = yx
else:
res = yx + _FARSI_SEPERATOR + res
return res
def _fractional(number, l):
if (number / 10**l == 0.5):
return "نیم"
x = _cardinalPos(number)
ld3, lm3 = divmod(l, 3)
ltext = (_FARSI_FRAC[lm3] + " " + _FARSI_FRAC_BIG[ld3]).strip() + 'م'
return x + " " + ltext
def _to_ordinal(number):
r = _to_cardinal(number, 0)
if (r[-1] == 'ه' and r[-2] == 'س'):
return r[:-1] + 'وم'
return r + 'م'
def _to_ordinal_num(value):
return str(value)+"م"
def _to_cardinal(number, places):
if number < 0:
return "منفی " + _to_cardinal(-number, places)
if (number == 0):
return "صفر"
x, y, l = _float2tuple(number, places)
if y == 0:
return _cardinalPos(x)
if x == 0:
return _fractional(y, l)
return _cardinalPos(x) + _FARSI_SEPERATOR + _fractional(y, l)
@_handle_number_variant
def pronounce_number_fa(number, places=2, scientific=False,
ordinals=False, variant=None):
"""
Convert a number to it's spoken equivalent
For example, '5.2' would return 'five point two'
Args:
num(float or int): the number to pronounce (under 100)
places(int): maximum decimal places to speak
scientific (bool): pronounce in scientific notation
ordinals (bool): pronounce in ordinal form "first" instead of "one"
Returns:
(str): The pronounced number
"""
num = number
# deal with infinity
if num == float("inf"):
return "بینهایت"
elif num == float("-inf"):
return "منفی بینهایت"
if scientific:
if number == 0:
return "صفر"
number = '%E' % num
n, power = number.replace("+", "").split("E")
power = int(power)
if power != 0:
return '{}{} ضرب در ده به توان {}{}'.format(
'منفی ' if float(n) < 0 else '',
pronounce_number_fa(
abs(float(n)), places, False, ordinals=False),
'منفی ' if power < 0 else '',
pronounce_number_fa(abs(power), places, False, ordinals=False))
if ordinals:
return _to_ordinal(number)
return _to_cardinal(number, places)
@_handle_number_variant
def nice_time_fa(dt, speech=True, use_24hour=False, use_ampm=False, variant=None):
"""
Format a time to a comfortable human format
For example, generate 'five thirty' for speech or '5:30' for
text display.
Args:
dt (datetime): date to format (assumes already in local timezone)
speech (bool): format for speech (default/True) or display (False)=Fal
use_24hour (bool): output in 24-hour/military or 12-hour format
use_ampm (bool): include the am/pm for 12-hour format
Returns:
(str): The formatted time string
"""
if use_24hour:
# e.g. "03:01" or "14:22"
string = dt.strftime("%H:%M")
else:
if use_ampm:
# e.g. "3:01 AM" or "2:22 PM"
string = dt.strftime("%I:%M %p")
else:
# e.g. "3:01" or "2:22"
string = dt.strftime("%I:%M")
if string[0] == '0':
string = string[1:] # strip leading zeros
if not speech:
return string
# Generate a speakable version of the time
if use_24hour:
speak = ""
# Either "0 8 hundred" or "13 hundred"
if string[0] == '0':
speak += pronounce_number_fa(int(string[1]))
else:
speak = pronounce_number_fa(int(string[0:2]))
if not string[3:5] == '00':
speak += " و "
if string[3] == '0':
speak += pronounce_number_fa(int(string[4]))
else:
speak += pronounce_number_fa(int(string[3:5]))
speak += ' دقیقه'
return speak
else:
if dt.hour == 0 and dt.minute == 0:
return "نیمه شب"
elif dt.hour == 12 and dt.minute == 0:
return "ظهر"
hour = dt.hour % 12 or 12 # 12 hour clock and 0 is spoken as 12
if dt.minute == 15:
speak = pronounce_number_fa(hour) + " و ربع"
elif dt.minute == 30:
speak = pronounce_number_fa(hour) + " و نیم"
elif dt.minute == 45:
next_hour = (dt.hour + 1) % 12 or 12
speak = "یه ربع به " + pronounce_number_fa(next_hour)
else:
speak = pronounce_number_fa(hour)
if dt.minute == 0:
if not use_ampm:
return speak
else:
speak += " و " + pronounce_number_fa(dt.minute) + ' دقیقه'
if use_ampm:
if dt.hour > 11:
speak += " بعد از ظهر"
else:
speak += " قبل از ظهر"
return speak

View File

@@ -0,0 +1,251 @@
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from lingua_franca.lang.format_common import convert_to_mixed_fraction
from lingua_franca.lang.common_data_fr import _NUM_STRING_FR, \
_FRACTION_STRING_FR
def nice_number_fr(number, speech=True, denominators=range(1, 21)):
""" French helper for nice_number
This function formats a float to human understandable functions. Like
4.5 becomes "4 et demi" for speech and "4 1/2" for text
Args:
number (int or float): the float to format
speech (bool): format for speech (True) or display (False)
denominators (iter of ints): denominators to use, default [1 .. 20]
Returns:
(str): The formatted string.
"""
strNumber = ""
whole = 0
num = 0
den = 0
result = convert_to_mixed_fraction(number, denominators)
if not result:
# Give up, just represent as a 3 decimal number
whole = round(number, 3)
else:
whole, num, den = result
if not speech:
if num == 0:
strNumber = '{:,}'.format(whole)
strNumber = strNumber.replace(",", " ")
strNumber = strNumber.replace(".", ",")
return strNumber
else:
return '{} {}/{}'.format(whole, num, den)
else:
if num == 0:
# if the number is not a fraction, nothing to do
strNumber = str(whole)
strNumber = strNumber.replace(".", ",")
return strNumber
den_str = _FRACTION_STRING_FR[den]
# if it is not an integer
if whole == 0:
# if there is no whole number
if num == 1:
# if numerator is 1, return "un demi", for example
strNumber = 'un {}'.format(den_str)
else:
# else return "quatre tiers", for example
strNumber = '{} {}'.format(num, den_str)
elif num == 1:
# if there is a whole number and numerator is 1
if den == 2:
# if denominator is 2, return "1 et demi", for example
strNumber = '{} et {}'.format(whole, den_str)
else:
# else return "1 et 1 tiers", for example
strNumber = '{} et 1 {}'.format(whole, den_str)
else:
# else return "2 et 3 quart", for example
strNumber = '{} et {} {}'.format(whole, num, den_str)
if num > 1 and den != 3:
# if the numerator is greater than 1 and the denominator
# is not 3 ("tiers"), add an s for plural
strNumber += 's'
return strNumber
def pronounce_number_fr(number, places=2):
"""
Convert a number to it's spoken equivalent
For example, '5.2' would return 'cinq virgule deux'
Args:
num(float or int): the number to pronounce (under 100)
places(int): maximum decimal places to speak
Returns:
(str): The pronounced number
"""
if abs(number) >= 100:
# TODO: Support for numbers over 100
return str(number)
result = ""
if number < 0:
result = "moins "
number = abs(number)
if number > 16:
tens = int(number-int(number) % 10)
ones = int(number-tens)
if ones != 0:
if tens > 10 and tens <= 60 and int(number-tens) == 1:
result += _NUM_STRING_FR[tens] + "-et-" + _NUM_STRING_FR[ones]
elif number == 71:
result += "soixante-et-onze"
elif tens == 70:
result += _NUM_STRING_FR[60] + "-"
if ones < 7:
result += _NUM_STRING_FR[10 + ones]
else:
result += _NUM_STRING_FR[10] + "-" + _NUM_STRING_FR[ones]
elif tens == 90:
result += _NUM_STRING_FR[80] + "-"
if ones < 7:
result += _NUM_STRING_FR[10 + ones]
else:
result += _NUM_STRING_FR[10] + "-" + _NUM_STRING_FR[ones]
else:
result += _NUM_STRING_FR[tens] + "-" + _NUM_STRING_FR[ones]
else:
if number == 80:
result += "quatre-vingts"
else:
result += _NUM_STRING_FR[tens]
else:
result += _NUM_STRING_FR[int(number)]
# Deal with decimal part
if not number == int(number) and places > 0:
if abs(number) < 1.0 and (result == "moins " or not result):
result += "zéro"
result += " virgule"
_num_str = str(number)
_num_str = _num_str.split(".")[1][0:places]
for char in _num_str:
result += " " + _NUM_STRING_FR[int(char)]
return result
def nice_time_fr(dt, speech=True, use_24hour=False, use_ampm=False):
"""
Format a time to a comfortable human format
For example, generate 'cinq heures trente' for speech or '5:30' for
text display.
Args:
dt (datetime): date to format (assumes already in local timezone)
speech (bool): format for speech (default/True) or display (False)=Fal
use_24hour (bool): output in 24-hour/military or 12-hour format
use_ampm (bool): include the am/pm for 12-hour format
Returns:
(str): The formatted time string
"""
if use_24hour:
# e.g. "03:01" or "14:22"
string = dt.strftime("%H:%M")
else:
if use_ampm:
# e.g. "3:01 AM" or "2:22 PM"
string = dt.strftime("%I:%M %p")
else:
# e.g. "3:01" or "2:22"
string = dt.strftime("%I:%M")
if string[0] == '0':
string = string[1:] # strip leading zeros
if not speech:
return string
# Generate a speakable version of the time
speak = ""
if use_24hour:
# "13 heures trente"
if dt.hour == 0:
speak += "minuit"
elif dt.hour == 12:
speak += "midi"
elif dt.hour == 1:
speak += "une heure"
else:
speak += pronounce_number_fr(dt.hour) + " heures"
if dt.minute != 0:
speak += " " + pronounce_number_fr(dt.minute)
else:
# Prepare for "trois heures moins le quart"
if dt.minute == 35:
minute = -25
hour = dt.hour + 1
elif dt.minute == 40:
minute = -20
hour = dt.hour + 1
elif dt.minute == 45:
minute = -15
hour = dt.hour + 1
elif dt.minute == 50:
minute = -10
hour = dt.hour + 1
elif dt.minute == 55:
minute = -5
hour = dt.hour + 1
else:
minute = dt.minute
hour = dt.hour
if hour == 0:
speak += "minuit"
elif hour == 12:
speak += "midi"
elif hour == 1 or hour == 13:
speak += "une heure"
elif hour < 13:
speak = pronounce_number_fr(hour) + " heures"
else:
speak = pronounce_number_fr(hour-12) + " heures"
if minute != 0:
if minute == 15:
speak += " et quart"
elif minute == 30:
speak += " et demi"
elif minute == -15:
speak += " moins le quart"
else:
speak += " " + pronounce_number_fr(minute)
if use_ampm:
if hour > 17:
speak += " du soir"
elif hour > 12:
speak += " de l'après-midi"
elif hour > 0 and hour < 12:
speak += " du matin"
return speak

View File

@@ -0,0 +1,307 @@
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from lingua_franca.lang.format_common import convert_to_mixed_fraction
from lingua_franca.lang.common_data_hu import _NUM_POWERS_OF_TEN, \
_EXTRA_SPACE_HU, _FRACTION_STRING_HU, _MONTHS_HU, _NUM_STRING_HU
from math import floor
def _get_vocal_type_hu(word):
# checks the vocal attributes of a word
vowels_high = len([char for char in word if char in 'eéiíöőüű'])
vowels_low = len([char for char in word if char in 'aáoóuú'])
if vowels_high != 0 and vowels_low != 0:
return 2 # 2: type is mixed
return 0 if vowels_high == 0 else 1 # 0: type is low, 1: is high
def nice_number_hu(number, speech=True, denominators=range(1, 21)):
""" Hungarian helper for nice_number
This function formats a float to human understandable functions. Like
4.5 becomes "4 és fél" for speech and "4 1/2" for text
Args:
number (int or float): the float to format
speech (bool): format for speech (True) or display (False)
denominators (iter of ints): denominators to use, default [1 .. 20]
Returns:
(str): The formatted string.
"""
result = convert_to_mixed_fraction(number, denominators)
if not result:
# Give up, just represent as a 3 decimal number
return str(round(number, 3)).replace(".", ",")
whole, num, den = result
if not speech:
if num == 0:
# TODO: Number grouping? E.g. "1,000,000"
return str(whole)
else:
return '{} {}/{}'.format(whole, num, den)
if num == 0:
return str(whole)
den_str = _FRACTION_STRING_HU[den]
if whole == 0:
if num == 1:
one = 'egy ' if den != 2 else ''
return_string = '{}{}'.format(one, den_str)
else:
return_string = '{} {}'.format(num, den_str)
elif num == 1:
pointOne = 'egész egy' if den != 2 else 'és'
return_string = '{} {} {}'.format(whole, pointOne, den_str)
else:
return_string = '{} egész {} {}'.format(whole, num, den_str)
return return_string
def pronounce_number_hu(number, places=2, short_scale=True, scientific=False,
ordinals=False):
"""
Convert a number to it's spoken equivalent
For example, '5.2' would return 'five point two'
Args:
number(float or int): the number to pronounce (under 100)
places(int): maximum decimal places to speak
short_scale (bool) : use short (True) or long scale (False)
https://en.wikipedia.org/wiki/Names_of_large_numbers
scientific (bool): pronounce in scientific notation
ordinals (bool): pronounce in ordinal form "first" instead of "one"
Returns:
(str): The pronounced number
"""
# TODO short_scale, scientific and ordinals
# currently ignored
def pronounce_triplet_hu(num):
result = ""
num = floor(num)
if num > 99:
hundreds = floor(num / 100)
if hundreds > 0:
hundredConst = _EXTRA_SPACE_HU + 'száz' + _EXTRA_SPACE_HU
if hundreds == 1:
result += hundredConst
elif hundreds == 2:
result += 'két' + hundredConst
else:
result += _NUM_STRING_HU[hundreds] + hundredConst
num -= hundreds * 100
if num == 0:
result += '' # do nothing
elif num <= 20:
result += _NUM_STRING_HU[num] # + _EXTRA_SPACE_DA
elif num > 20:
ones = num % 10
tens = num - ones
if tens > 0:
if tens != 20:
result += _NUM_STRING_HU[tens] + _EXTRA_SPACE_HU
else:
result += "huszon" + _EXTRA_SPACE_HU
if ones > 0:
result += _NUM_STRING_HU[ones] + _EXTRA_SPACE_HU
return result
def pronounce_whole_number_hu(num, scale_level=0):
if num == 0:
return ''
num = floor(num)
result = ''
last_triplet = num % 1000
if last_triplet == 1:
if scale_level == 0:
if result != '':
result += '' + "egy"
else:
result += "egy"
elif scale_level == 1:
result += _EXTRA_SPACE_HU + \
_NUM_POWERS_OF_TEN[1] + _EXTRA_SPACE_HU
else:
result += "egy" + _NUM_POWERS_OF_TEN[scale_level]
elif last_triplet > 1:
result += pronounce_triplet_hu(last_triplet)
if scale_level != 0:
result = result.replace(_NUM_STRING_HU[2], 'két')
if scale_level == 1:
result += _NUM_POWERS_OF_TEN[1] + _EXTRA_SPACE_HU
if scale_level >= 2:
result += _NUM_POWERS_OF_TEN[scale_level]
if scale_level > 0:
result += '-'
num = floor(num / 1000)
scale_level += 1
return pronounce_whole_number_hu(num,
scale_level) + result
result = ""
if abs(number) >= 1000000000000000000000000: # cannot do more than this
return str(number)
elif number == 0:
return str(_NUM_STRING_HU[0])
elif number < 0:
return "mínusz " + pronounce_number_hu(abs(number), places)
else:
if number == int(number):
return pronounce_whole_number_hu(number).strip('-')
else:
whole_number_part = floor(number)
fractional_part = number - whole_number_part
if whole_number_part == 0:
result += _NUM_STRING_HU[0]
result += pronounce_whole_number_hu(whole_number_part)
if places > 0:
result += " egész "
fraction = pronounce_whole_number_hu(
round(fractional_part * 10 ** places))
result += fraction.replace(_NUM_STRING_HU[2], 'két')
fraction_suffixes = [
'tized', 'század', 'ezred', 'tízezred', 'százezred']
if places <= len(fraction_suffixes):
result += ' ' + fraction_suffixes[places - 1]
return result
def pronounce_ordinal_hu(number):
"""
This function pronounces a number as an ordinal
1 -> first
2 -> second
Args:
number (int): the number to format
Returns:
(str): The pronounced number string.
"""
ordinals = ["nulladik", "első", "második", "harmadik", "negyedik",
"ötödik", "hatodik", "hetedik", "nyolcadik", "kilencedik",
"tizedik"]
big_ordinals = ["", "ezredik", "milliomodik"]
# only for whole positive numbers including zero
if number < 0 or number != int(number):
return number
elif number < 11:
return ordinals[number]
else:
# concatenate parts and inflect them accordingly
root = pronounce_number_hu(number)
vtype = _get_vocal_type_hu(root)
last_digit = number - floor(number / 10) * 10
if root == "húsz":
root = "husz"
if number % 1000000 == 0:
return root.replace(_NUM_POWERS_OF_TEN[2], big_ordinals[2])
if number % 1000 == 0:
return root.replace(_NUM_POWERS_OF_TEN[1], big_ordinals[1])
if last_digit == 1:
return root + "edik"
elif root[-1] == 'ő':
return root[:-1] + 'edik'
elif last_digit != 0:
return ordinals[last_digit].join(
root.rsplit(_NUM_STRING_HU[last_digit], 1))
return root + "edik" if vtype == 1 else root + "adik"
def nice_time_hu(dt, speech=True, use_24hour=False, use_ampm=False):
"""
Format a time to a comfortable human format
For example, generate 'five thirty' for speech or '5:30' for
text display.
Args:
dt (datetime): date to format (assumes already in local timezone)
speech (bool): format for speech (default/True) or display (False)=Fal
use_24hour (bool): output in 24-hour/military or 12-hour format
use_ampm (bool): include the am/pm for 12-hour format
Returns:
(str): The formatted time string
"""
if use_24hour:
# e.g. "03:01" or "14:22"
string = dt.strftime("%H:%M")
else:
if use_ampm:
# e.g. "3:01 AM" or "2:22 PM"
string = dt.strftime("%I:%M %p")
else:
# e.g. "3:01" or "2:22"
string = dt.strftime("%I:%M")
if string[0] == '0':
string = string[1:] # strip leading zeros
if not speech:
return string
# Generate a speakable version of the time
speak = ""
if use_24hour:
speak += pronounce_number_hu(dt.hour)
speak = speak.replace(_NUM_STRING_HU[2], 'két')
speak += " óra"
if not dt.minute == 0: # zero minutes are not pronounced
speak += " " + pronounce_number_hu(dt.minute)
return speak # ampm is ignored when use_24hour is true
else:
if dt.hour == 0 and dt.minute == 0:
return "éjfél"
if dt.hour == 12 and dt.minute == 0:
return "dél"
# TODO: "half past 3", "a quarter of 4" and other idiomatic times
if dt.hour == 0:
speak += pronounce_number_hu(12)
elif dt.hour < 13:
speak = pronounce_number_hu(dt.hour)
else:
speak = pronounce_number_hu(dt.hour - 12)
speak = speak.replace(_NUM_STRING_HU[2], 'két')
speak += " óra"
if not dt.minute == 0:
speak += " " + pronounce_number_hu(dt.minute)
if use_ampm:
if dt.hour > 11:
if dt.hour < 18:
speak = "délután " + speak # 12:01 - 17:59
elif dt.hour < 22:
speak = "este " + speak # 18:00 - 21:59 este/evening
else:
speak = "éjjel " + speak # 22:00 - 23:59 éjjel/at night
elif dt.hour < 3:
speak = "éjjel " + speak # 00:01 - 02:59 éjjel/at night
else:
speak = "reggel " + speak # 03:00 - 11:59 reggel/in t. morning
return speak

View File

@@ -0,0 +1,342 @@
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from lingua_franca.lang.format_common import convert_to_mixed_fraction
from lingua_franca.lang.common_data_it import _NUM_STRING_IT, \
_FRACTION_STRING_IT, _LONG_SCALE_IT, _SHORT_SCALE_IT
def nice_number_it(number, speech=True, denominators=range(1, 21)):
""" Italian helper for nice_number
This function formats a float to human understandable functions. Like
4.5 becomes "4 e un mezz" for speech and "4 1/2" for text
Args:
number (int or float): the float to format
speech (bool): format for speech (True) or display (False)
denominators (iter of ints): denominators to use, default [1 .. 20]
Returns:
(str): The formatted string.
"""
result = convert_to_mixed_fraction(number, denominators)
if not result:
# Give up, just represent as a 3 decimal number
return str(round(number, 3))
whole, num, den = result
if not speech:
if num == 0:
return str(whole)
else:
return '{} {}/{}'.format(whole, num, den)
if num == 0:
return str(whole)
# denominatore
den_str = _FRACTION_STRING_IT[den]
# frazione
if whole == 0:
if num == 1:
# un decimo
return_string = 'un {}'.format(den_str)
else:
# tre mezzi
return_string = '{} {}'.format(num, den_str)
# interi >10
elif num == 1:
# trenta e un
return_string = '{} e un {}'.format(whole, den_str)
# interi >10 con frazioni
else:
# venti e 3 decimi
return_string = '{} e {} {}'.format(whole, num, den_str)
# gestisce il plurale del denominatore
if num > 1:
return_string += 'i'
else:
return_string += 'o'
return return_string
def pronounce_number_it(number, places=2, short_scale=False, scientific=False):
"""
Convert a number to it's spoken equivalent
adapted to italian fron en version
For example, '5.2' would return 'cinque virgola due'
Args:
num(float or int): the number to pronounce (under 100)
places(int): maximum decimal places to speak
short_scale (bool) : use short (True) or long scale (False)
https://en.wikipedia.org/wiki/Names_of_large_numbers
scientific (bool): pronounce in scientific notation
Returns:
(str): The pronounced number
"""
num = number
# gestione infinito
if num == float("inf"):
return "infinito"
elif num == float("-inf"):
return "meno infinito"
if scientific:
number = '%E' % num
n, power = number.replace("+", "").split("E")
power = int(power)
if power != 0:
return '{}{} per dieci elevato alla {}{}'.format(
'meno ' if float(n) < 0 else '',
pronounce_number_it(abs(float(n)), places, short_scale, False),
'meno ' if power < 0 else '',
pronounce_number_it(abs(power), places, short_scale, False))
if short_scale:
number_names = _NUM_STRING_IT.copy()
number_names.update(_SHORT_SCALE_IT)
else:
number_names = _NUM_STRING_IT.copy()
number_names.update(_LONG_SCALE_IT)
digits = [number_names[n] for n in range(0, 20)]
tens = [number_names[n] for n in range(10, 100, 10)]
if short_scale:
hundreds = [_SHORT_SCALE_IT[n] for n in _SHORT_SCALE_IT.keys()]
else:
hundreds = [_LONG_SCALE_IT[n] for n in _LONG_SCALE_IT.keys()]
# deal with negatives
result = ""
if num < 0:
result = "meno "
num = abs(num)
# check for a direct match
if num in number_names:
if num > 90:
result += "" # inizio stringa
result += number_names[num]
else:
def _sub_thousand(n):
assert 0 <= n <= 999
if n <= 19:
return digits[n]
elif n <= 99:
q, r = divmod(n, 10)
_deci = tens[q-1]
_unit = r
_partial = _deci
if _unit > 0:
if _unit == 1 or _unit == 8:
_partial = _partial[:-1] # ventuno ventotto
_partial += number_names[_unit]
return _partial
else:
q, r = divmod(n, 100)
if q == 1:
_partial = "cento"
else:
_partial = digits[q] + "cento"
_partial += (
" " + _sub_thousand(r) if r else "") # separa centinaia
return _partial
def _short_scale(n):
if n >= max(_SHORT_SCALE_IT.keys()):
return "numero davvero enorme"
n = int(n)
assert 0 <= n
res = []
for i, z in enumerate(_split_by(n, 1000)):
if not z:
continue
number = _sub_thousand(z)
if i:
number += "" # separa ordini grandezza
number += hundreds[i]
res.append(number)
return ", ".join(reversed(res))
def _split_by(n, split=1000):
assert 0 <= n
res = []
while n:
n, r = divmod(n, split)
res.append(r)
return res
def _long_scale(n):
if n >= max(_LONG_SCALE_IT.keys()):
return "numero davvero enorme"
n = int(n)
assert 0 <= n
res = []
for i, z in enumerate(_split_by(n, 1000000)):
if not z:
continue
number = pronounce_number_it(z, places, True, scientific)
# strip off the comma after the thousand
if i:
# plus one as we skip 'thousand'
# (and 'hundred', but this is excluded by index value)
number = number.replace(',', '')
number += " " + hundreds[i+1]
res.append(number)
return ", ".join(reversed(res))
if short_scale:
result += _short_scale(num)
else:
result += _long_scale(num)
# normalizza unità misura singole e 'ragionevoli' ed ad inizio stringa
if result == 'mila':
result = 'mille'
if result == 'milioni':
result = 'un milione'
if result == 'miliardi':
result = 'un miliardo'
if result[0:7] == 'unomila':
result = result.replace('unomila', 'mille', 1)
if result[0:10] == 'unomilioni':
result = result.replace('unomilioni', 'un milione', 1)
# if result[0:11] == 'unomiliardi':
# result = result.replace('unomiliardi', 'un miliardo', 1)
# Deal with fractional part
if not num == int(num) and places > 0:
if abs(num) < 1.0 and (result == "meno " or not result):
result += "zero"
result += " virgola"
_num_str = str(num)
_num_str = _num_str.split(".")[1][0:places]
for char in _num_str:
result += " " + number_names[int(char)]
return result
def nice_time_it(dt, speech=True, use_24hour=False, use_ampm=False):
"""
Format a time to a comfortable human format
adapted to italian fron en version
For example, generate 'cinque e trenta' for speech or '5:30' for
text display.
Args:
dt (datetime): date to format (assumes already in local timezone)
speech (bool): format for speech (default/True) or display (False)=Fal
use_24hour (bool): output in 24-hour/military or 12-hour format
use_ampm (bool): include the am/pm for 12-hour format
Returns:
(str): The formatted time string
"""
if use_24hour:
# e.g. "03:01" or "14:22"
string = dt.strftime("%H:%M")
else:
if use_ampm:
# e.g. "3:01 AM" or "2:22 PM"
string = dt.strftime("%I:%M %p")
else:
# e.g. "3:01" or "2:22"
string = dt.strftime("%I:%M")
if string[0] == '0':
string = string[1:] # strip leading zeros
if not speech:
return string
# Generate a speakable version of the time
if use_24hour:
speak = ""
# Either "zero 8 zerozero" o "13 zerozero"
if string[0:2] == '00':
speak += "zerozero"
elif string[0] == '0':
speak += pronounce_number_it(int(string[0])) + " "
if int(string[1]) == 1:
speak = "una"
else:
speak += pronounce_number_it(int(string[1]))
else:
speak = pronounce_number_it(int(string[0:2]))
# in italian "13 e 25"
speak += " e "
if string[3:5] == '00':
speak += "zerozero"
else:
if string[3] == '0':
speak += pronounce_number_it(0) + " "
speak += pronounce_number_it(int(string[4]))
else:
speak += pronounce_number_it(int(string[3:5]))
return speak
else:
if dt.hour == 0 and dt.minute == 0:
return "mezzanotte"
if dt.hour == 12 and dt.minute == 0:
return "mezzogiorno"
# TODO: "10 e un quarto", "4 e tre quarti" and ot her idiomatic times
if dt.hour == 0:
speak = "mezzanotte"
elif dt.hour == 1 or dt.hour == 13:
speak = "una"
elif dt.hour > 13: # era minore
speak = pronounce_number_it(dt.hour-12)
else:
speak = pronounce_number_it(dt.hour)
speak += " e"
if dt.minute == 0:
speak = speak[:-2]
if not use_ampm:
speak += " in punto"
elif dt.minute == 15:
speak += " un quarto"
elif dt.minute == 45:
speak += " tre quarti"
else:
if dt.minute < 10:
speak += " zero"
speak += " " + pronounce_number_it(dt.minute)
if use_ampm:
if dt.hour < 4:
speak.strip()
elif dt.hour > 20:
speak += " della notte"
elif dt.hour > 17:
speak += " della sera"
elif dt.hour > 12:
speak += " del pomeriggio"
else:
speak += " della mattina"
return speak

View File

@@ -0,0 +1,337 @@
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from .format_common import convert_to_mixed_fraction
from lingua_franca.lang.common_data_nl import _NUM_POWERS_OF_TEN, \
_NUM_STRING_NL, _FRACTION_STRING_NL, _EXTRA_SPACE_NL, _MONTHS_NL
from math import floor
def nice_number_nl(number, speech=True, denominators=range(1, 21)):
""" Dutch helper for nice_number
This function formats a float to human understandable functions. Like
4.5 becomes "4 einhalb" for speech and "4 1/2" for text
Args:
number (int or float): the float to format
speech (bool): format for speech (True) or display (False)
denominators (iter of ints): denominators to use, default [1 .. 20]
Returns:
(str): The formatted string.
"""
result = convert_to_mixed_fraction(number, denominators)
if not result:
# Give up, just represent as a 3 decimal number
return str(round(number, 3)).replace(".", ",")
whole, num, den = result
if not speech:
if num == 0:
# TODO: Number grouping? E.g. "1,000,000"
return str(whole)
else:
return '{} {}/{}'.format(whole, num, den)
if num == 0:
return str(whole)
den_str = _FRACTION_STRING_NL[den]
if whole == 0:
if num == 1:
return_string = 'één {}'.format(den_str)
else:
return_string = '{} {}'.format(num, den_str)
elif num == 1:
return_string = '{} en één {}'.format(whole, den_str)
else:
return_string = '{} en {} {}'.format(whole, num, den_str)
return return_string
def pronounce_number_nl(number, places=2, short_scale=True, scientific=False,
ordinals=False):
"""
Convert a number to it's spoken equivalent
For example, '5.2' would return 'five point two'
Args:
number(float or int): the number to pronounce (under 100)
places(int): maximum decimal places to speak
short_scale (bool) : use short (True) or long scale (False)
https://en.wikipedia.org/wiki/Names_of_large_numbers
scientific (bool): pronounce in scientific notation
ordinals (bool): pronounce in ordinal form "first" instead of "one"
Returns:
(str): The pronounced number
"""
# TODO short_scale, scientific and ordinals
# currently ignored
def pronounce_triplet_nl(num):
result = ""
num = floor(num)
if num > 99:
hundreds = floor(num / 100)
if hundreds > 0:
result += _NUM_STRING_NL[
hundreds] + _EXTRA_SPACE_NL + 'honderd' + _EXTRA_SPACE_NL
num -= hundreds * 100
if num == 0:
result += '' # do nothing
elif num <= 20:
result += _NUM_STRING_NL[num] # + _EXTRA_SPACE_DA
elif num > 20:
ones = num % 10
tens = num - ones
if ones > 0:
result += _NUM_STRING_NL[ones] + _EXTRA_SPACE_NL
if tens > 0:
result += 'en' + _EXTRA_SPACE_NL
if tens > 0:
result += _NUM_STRING_NL[tens] + _EXTRA_SPACE_NL
return result
def pronounce_fractional_nl(num,
places): # fixed number of places even with
# trailing zeros
result = ""
place = 10
while places > 0: # doesn't work with 1.0001 and places = 2: int(
# number*place) % 10 > 0 and places > 0:
result += " " + _NUM_STRING_NL[int(num * place) % 10]
if int(num * place) % 10 == 1:
result += '' # "1" is pronounced "eins" after the decimal
# point
place *= 10
places -= 1
return result
def pronounce_whole_number_nl(num, scale_level=0):
if num == 0:
return ''
num = floor(num)
result = ''
last_triplet = num % 1000
if last_triplet == 1:
if scale_level == 0:
if result != '':
result += '' + 'één'
else:
result += "één"
elif scale_level == 1:
result += 'één' + _EXTRA_SPACE_NL + 'duizend' + _EXTRA_SPACE_NL
else:
result += "één " + _NUM_POWERS_OF_TEN[scale_level] + ' '
elif last_triplet > 1:
result += pronounce_triplet_nl(last_triplet)
if scale_level == 1:
# result += _EXTRA_SPACE_DA
result += 'duizend' + _EXTRA_SPACE_NL
if scale_level >= 2:
# if _EXTRA_SPACE_DA == '':
# result += " "
result += " " + _NUM_POWERS_OF_TEN[scale_level] + ' '
if scale_level >= 2:
if scale_level % 2 == 0:
result += "" # Miljioen
result += "" # Miljard, Miljoen
num = floor(num / 1000)
scale_level += 1
return pronounce_whole_number_nl(num,
scale_level) + result + ''
result = ""
if abs(number) >= 1000000000000000000000000: # cannot do more than this
return str(number)
elif number == 0:
return str(_NUM_STRING_NL[0])
elif number < 0:
return "min " + pronounce_number_nl(abs(number), places)
else:
if number == int(number):
return pronounce_whole_number_nl(number)
else:
whole_number_part = floor(number)
fractional_part = number - whole_number_part
result += pronounce_whole_number_nl(whole_number_part)
if places > 0:
result += " komma"
result += pronounce_fractional_nl(fractional_part, places)
return result
def pronounce_ordinal_nl(number):
"""
This function pronounces a number as an ordinal
1 -> first
2 -> second
Args:
number (int): the number to format
Returns:
(str): The pronounced number string.
"""
ordinals = ["nulste", "eerste", "tweede", "derde", "vierde", "vijfde",
"zesde", "zevende", "achtste"]
# only for whole positive numbers including zero
if number < 0 or number != int(number):
return number
if number < 4:
return ordinals[number]
if number < 8:
return pronounce_number_nl(number) + "de"
if number < 9:
return pronounce_number_nl(number) + "ste"
if number < 20:
return pronounce_number_nl(number) + "de"
return pronounce_number_nl(number) + "ste"
def nice_time_nl(dt, speech=True, use_24hour=False, use_ampm=False):
"""
Format a time to a comfortable human format
For example, generate 'five thirty' for speech or '5:30' for
text display.
Args:
dt (datetime): date to format (assumes already in local timezone)
speech (bool): format for speech (default/True) or display (False)=Fal
use_24hour (bool): output in 24-hour/military or 12-hour format
use_ampm (bool): include the am/pm for 12-hour format
Returns:
(str): The formatted time string
"""
if use_24hour:
# e.g. "03:01" or "14:22"
string = dt.strftime("%H:%M")
else:
if use_ampm:
# e.g. "3:01 AM" or "2:22 PM"
string = dt.strftime("%I:%M %p")
else:
# e.g. "3:01" or "2:22"
string = dt.strftime("%I:%M")
if string[0] == '0':
string = string[1:] # strip leading zeros
if not speech:
return string
# Generate a speakable version of the time
speak = ""
if use_24hour:
speak += pronounce_number_nl(dt.hour)
speak += " uur"
if not dt.minute == 0: # zero minutes are not pronounced, 13:00 is
# "13 uur" not "13 hundred hours"
speak += " " + pronounce_number_nl(dt.minute)
return speak # ampm is ignored when use_24hour is true
else:
if dt.hour == 0 and dt.minute == 0:
return "Middernacht"
hour = dt.hour % 12
if dt.minute == 0:
hour = _fix_hour_nl(hour)
speak += pronounce_number_nl(hour)
speak += " uur"
elif dt.minute == 30:
speak += "half "
hour += 1
hour = _fix_hour_nl(hour)
speak += pronounce_number_nl(hour)
elif dt.minute == 15:
speak += "kwart over "
hour = _fix_hour_nl(hour)
speak += pronounce_number_nl(hour)
elif dt.minute == 45:
speak += "kwart voor "
hour += 1
hour = _fix_hour_nl(hour)
speak += pronounce_number_nl(hour)
elif dt.minute > 30:
speak += pronounce_number_nl(60 - dt.minute)
speak += " voor "
hour += 1
hour = _fix_hour_nl(hour)
speak += pronounce_number_nl(hour)
else:
speak += pronounce_number_nl(dt.minute)
speak += " over "
hour = _fix_hour_nl(hour)
speak += pronounce_number_nl(hour)
if use_ampm:
speak += nice_part_of_day_nl(dt)
return speak
def _fix_hour_nl(hour):
hour = hour % 12
if hour == 0:
hour = 12
return hour
def nice_part_of_day_nl(dt, speech=True):
if dt.hour < 6:
return " 's nachts"
if dt.hour < 12:
return " 's ochtends"
if dt.hour < 18:
return " 's middags"
if dt.hour < 24:
return " 's avonds"
raise ValueError('dt.hour is bigger than 24')
def nice_response_nl(text):
# check for months and call _nice_ordinal_nl declension of ordinals
# replace "^" with "tot de macht" (to the power of)
words = text.split()
for idx, word in enumerate(words):
if word.lower() in _MONTHS_NL:
text = _nice_ordinal_nl(text)
if word == '^':
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
if wordNext.isnumeric():
words[idx] = "tot de macht"
text = " ".join(words)
return text
def _nice_ordinal_nl(text, speech=True):
# check for months for declension of ordinals before months
# depending on articles/prepositions
normalized_text = text
words = text.split()
for idx, word in enumerate(words):
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
wordPrev = words[idx - 1] if idx > 0 else ""
if word[:-1].isdecimal():
if wordNext.lower() in _MONTHS_NL:
if wordPrev == 'de':
word = pronounce_ordinal_nl(int(word))
else:
word = pronounce_number_nl(int(word))
words[idx] = word
normalized_text = " ".join(words)
return normalized_text

View File

@@ -0,0 +1,351 @@
# -*- coding: utf-8 -*-
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from lingua_franca.lang.format_common import convert_to_mixed_fraction
from lingua_franca.lang.common_data_pl import _NUM_STRING_PL, \
_FRACTION_STRING_PL, _SHORT_SCALE_PL, _SHORT_ORDINAL_PL, _ALT_ORDINALS_PL
from lingua_franca.internal import FunctionNotLocalizedError
def nice_number_pl(number, speech=True, denominators=range(1, 21)):
""" English helper for nice_number
This function formats a float to human understandable functions. Like
4.5 becomes "4 and a half" for speech and "4 1/2" for text
Args:
number (int or float): the float to format
speech (bool): format for speech (True) or display (False)
denominators (iter of ints): denominators to use, default [1 .. 20]
Returns:
(str): The formatted string.
"""
result = convert_to_mixed_fraction(number, denominators)
if not result:
# Give up, just represent as a 3 decimal number
return str(round(number, 3))
whole, num, den = result
if not speech:
if num == 0:
# TODO: Number grouping? E.g. "1,000,000"
return str(whole)
else:
return '{} {}/{}'.format(whole, num, den)
if num == 0:
return str(whole)
den_str = _FRACTION_STRING_PL[den]
if whole == 0:
return_string = '{} {}'.format(num, den_str)
else:
return_string = '{} i {} {}'.format(whole, num, den_str)
if num > 1:
return_string = return_string[:-1] + 'e'
return return_string
def pronounce_number_pl(num, places=2, short_scale=True, scientific=False,
ordinals=False, scientific_run=False):
"""
Convert a number to it's spoken equivalent
For example, '5.2' would return 'five point two'
Args:
num(float or int): the number to pronounce (under 100)
places(int): maximum decimal places to speak
short_scale (bool) : use short (True) or long scale (False)
https://en.wikipedia.org/wiki/Names_of_large_numbers
scientific (bool): pronounce in scientific notation
ordinals (bool): pronounce in ordinal form "first" instead of "one"
Returns:
(str): The pronounced number
"""
# deal with infinity
if num == float("inf"):
return "nieskończoność"
elif num == float("-inf"):
return "minus nieskończoność"
if scientific:
number = '%E' % num
n, power = number.replace("+", "").split("E")
power = int(power)
if power != 0:
if ordinals:
# This handles negatives of powers separately from the normal
# handling since each call disables the scientific flag
return '{}{} razy dziesięć do {}{} potęgi'.format(
'minus ' if float(n) < 0 else '',
pronounce_number_pl(
abs(float(n)), places, short_scale, False, ordinals=False, scientific_run=True),
'minus ' if power < 0 else '',
pronounce_number_pl(abs(power), places, short_scale, False, ordinals=True, scientific_run=True))
else:
# This handles negatives of powers separately from the normal
# handling since each call disables the scientific flag
return '{}{} razy dziesięć do potęgi {}{}'.format(
'minus ' if float(n) < 0 else '',
pronounce_number_pl(
abs(float(n)), places, short_scale, False),
'minus ' if power < 0 else '',
pronounce_number_pl(abs(power), places, short_scale, False))
number_names = _NUM_STRING_PL.copy()
number_names.update(_SHORT_SCALE_PL)
digits = [number_names[n] for n in range(0, 20)]
if ordinals:
tens = [_SHORT_ORDINAL_PL[n] for n in range(10, 100, 10)]
else:
tens = [number_names[n] for n in range(10, 100, 10)]
hundreds = [_SHORT_SCALE_PL[n] for n in _SHORT_SCALE_PL.keys()]
# deal with negatives
result = ""
if num < 0:
result = "minus "
num = abs(num)
# check for a direct match
if num in number_names and not ordinals:
result += number_names[num]
else:
def _sub_thousand(n, ordinals=False, iteration=0):
assert 0 <= n <= 999
_, n_mod = divmod(n, 10)
if iteration > 0 and n in _ALT_ORDINALS_PL and ordinals:
return _ALT_ORDINALS_PL[n]
elif n in _SHORT_ORDINAL_PL and ordinals:
return _SHORT_ORDINAL_PL[n] if not scientific_run \
else _ALT_ORDINALS_PL[n]
if n <= 19:
return digits[n] if not scientific_run or not ordinals\
else digits[n][:-1] + "ej"
elif n <= 99:
q, r = divmod(n, 10)
tens_text = tens[q - 1]
if scientific_run:
tens_text = tens_text[:-1] + "ej"
return tens_text + (" " + _sub_thousand(r, ordinals) if r
else "")
else:
q, r = divmod(n, 100)
digit_name = digits[q]
if q*100 in _NUM_STRING_PL:
digit_name = _NUM_STRING_PL[q*100]
return digit_name + (
" " + _sub_thousand(r, ordinals) if r else "")
def _short_scale(n):
if n >= max(_SHORT_SCALE_PL.keys()):
return "nieskończoność"
ordi = ordinals
if int(n) != n:
ordi = False
n = int(n)
assert 0 <= n
res = []
for i, z in enumerate(_split_by(n, 1000)):
if not z:
continue
number = _sub_thousand(z, ordi, iteration=i)
if i:
if i >= len(hundreds):
return ""
number += " "
if ordi:
if i * 1000 in _SHORT_ORDINAL_PL:
if z == 1:
number = _SHORT_ORDINAL_PL[i * 1000]
else:
number += _SHORT_ORDINAL_PL[i * 1000]
else:
if n not in _SHORT_SCALE_PL:
num = int("1" + "0" * (len(str(n)) - 2))
number += _SHORT_SCALE_PL[num] + "owa"
else:
number = _SHORT_SCALE_PL[n] + "ty"
else:
hundreds_text = _SHORT_SCALE_PL[float(pow(1000, i))]
if z != 1:
_, z_mod = divmod(z, 10)
_, z_mod_tens = divmod(z, 100)
n_main, _ = divmod(z_mod_tens, 10)
if i == 1:
if n_main != 1 and 5 > z_mod > 0:
hundreds_text += "e"
else:
hundreds_text = "tysięcy"
elif i > 1:
hundreds_text += "y" if 5 > z_mod > 0 else "ów"
number += hundreds_text
res.append(number)
ordi = False
return ", ".join(reversed(res))
def _split_by(n, split=1000):
assert 0 <= n
res = []
while n:
n, r = divmod(n, split)
res.append(r)
return res
result += _short_scale(num)
# deal with scientific notation unpronounceable as number
if not result and "e" in str(num):
return pronounce_number_pl(num, places, short_scale, scientific=True)
# Deal with fractional part
elif not num == int(num) and places > 0:
if abs(num) < 1.0 and (result == "minus " or not result):
result += "zero"
result += " przecinek"
_num_str = str(num)
_num_str = _num_str.split(".")[1][0:places]
for char in _num_str:
result += " " + number_names[int(char)]
return result
def nice_time_pl(dt, speech=True, use_24hour=True, use_ampm=False):
"""
Format a time to a comfortable human format
For example, generate 'five thirty' for speech or '5:30' for
text display.
Args:
dt (datetime): date to format (assumes already in local timezone)
speech (bool): format for speech (default/True) or display (False)=Fal
use_24hour (bool): output in 24-hour/military or 12-hour format
use_ampm (bool): include the am/pm for 12-hour format
Returns:
(str): The formatted time string
"""
string = dt.strftime("%H:%M")
if not speech:
return string
# Generate a speakable version of the time
speak = ""
# Either "0 8 hundred" or "13 hundred"
if string[0:2] == '00':
speak = ""
elif string[0] == '0':
speak += pronounce_number_pl(int(string[1]), ordinals=True)
speak = speak[:-1] + 'a'
else:
speak = pronounce_number_pl(int(string[0:2]), ordinals=True)
speak = speak[:-1] + 'a'
speak += ' ' if string[0:2] != '00' else ''
if string[3:5] == '00':
speak += 'zero zero'
else:
if string[3] == '0':
speak += pronounce_number_pl(int(string[4]))
else:
speak += pronounce_number_pl(int(string[3:5]))
if string[0:2] == '00':
speak += " po północy"
return speak
def nice_duration_pl(duration, speech=True):
""" Convert duration to a nice spoken timespan
Args:
seconds: number of seconds
minutes: number of minutes
hours: number of hours
days: number of days
Returns:
str: timespan as a string
"""
# TODO this is a kludge around the fact that only Polish has a
# localized nice_duration()
if not speech:
raise FunctionNotLocalizedError
days = int(duration // 86400)
hours = int(duration // 3600 % 24)
minutes = int(duration // 60 % 60)
seconds = int(duration % 60)
out = ''
sec_main, sec_div = divmod(seconds, 10)
min_main, min_div = divmod(minutes, 10)
hour_main, hour_div = divmod(hours, 10)
if days > 0:
out += pronounce_number_pl(days) + " "
if days == 1:
out += 'dzień'
else:
out += 'dni'
if hours > 0:
if out:
out += " "
out += get_pronounce_number_for_duration(hours) + " "
if hours == 1:
out += 'godzina'
elif hour_main == 1 or hour_div > 4:
out += 'godzin'
else:
out += 'godziny'
if minutes > 0:
if out:
out += " "
out += get_pronounce_number_for_duration(minutes) + " "
if minutes == 1:
out += 'minuta'
elif min_main == 1 or min_div > 4:
out += 'minut'
else:
out += 'minuty'
if seconds > 0:
if out:
out += " "
out += get_pronounce_number_for_duration(seconds) + " "
if sec_div == 0:
out += 'sekund'
elif seconds == 1:
out += 'sekunda'
elif sec_main == 1 or sec_div > 4:
out += 'sekund'
else:
out += 'sekundy'
return out
def get_pronounce_number_for_duration(num):
pronounced = pronounce_number_pl(num)
return 'jedna' if pronounced == 'jeden' else pronounced

View File

@@ -0,0 +1,223 @@
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from lingua_franca.lang.format_common import convert_to_mixed_fraction
from lingua_franca.lang.common_data_pt import _FRACTION_STRING_PT, \
_NUM_STRING_PT
def nice_number_pt(number, speech, denominators=range(1, 21)):
""" Portuguese helper for nice_number
This function formats a float to human understandable functions. Like
4.5 becomes "4 e meio" for speech and "4 1/2" for text
Args:
number (int or float): the float to format
speech (bool): format for speech (True) or display (False)
denominators (iter of ints): denominators to use, default [1 .. 20]
Returns:
(str): The formatted string.
"""
result = convert_to_mixed_fraction(number, denominators)
if not result:
# Give up, just represent as a 3 decimal number
return str(round(number, 3))
whole, num, den = result
if not speech:
if num == 0:
# TODO: Number grouping? E.g. "1,000,000"
return str(whole)
else:
return '{} {}/{}'.format(whole, num, den)
if num == 0:
return str(whole)
# denominador
den_str = _FRACTION_STRING_PT[den]
# fracções
if whole == 0:
if num == 1:
# um décimo
return_string = 'um {}'.format(den_str)
else:
# três meio
return_string = '{} {}'.format(num, den_str)
# inteiros >10
elif num == 1:
# trinta e um
return_string = '{} e {}'.format(whole, den_str)
# inteiros >10 com fracções
else:
# vinte e 3 décimo
return_string = '{} e {} {}'.format(whole, num, den_str)
# plural
if num > 1:
return_string += 's'
return return_string
def pronounce_number_pt(number, places=2):
"""
Convert a number to it's spoken equivalent
For example, '5.2' would return 'cinco virgula dois'
Args:
number(float or int): the number to pronounce (under 100)
places(int): maximum decimal places to speak
Returns:
(str): The pronounced number
"""
if abs(number) >= 100:
# TODO: Support n > 100
return str(number)
result = ""
if number < 0:
result = "menos "
number = abs(number)
if number >= 20:
tens = int(number - int(number) % 10)
ones = int(number - tens)
result += _NUM_STRING_PT[tens]
if ones > 0:
result += " e " + _NUM_STRING_PT[ones]
else:
result += _NUM_STRING_PT[int(number)]
# Deal with decimal part, in portuguese is commonly used the comma
# instead the dot. Decimal part can be written both with comma
# and dot, but when pronounced, its pronounced "virgula"
if not number == int(number) and places > 0:
if abs(number) < 1.0 and (result == "menos " or not result):
result += "zero"
result += " vírgula"
_num_str = str(number)
_num_str = _num_str.split(".")[1][0:places]
for char in _num_str:
result += " " + _NUM_STRING_PT[int(char)]
return result
def nice_time_pt(dt, speech=True, use_24hour=False, use_ampm=False):
"""
Format a time to a comfortable human format
For example, generate 'cinco treinta' for speech or '5:30' for
text display.
Args:
dt (datetime): date to format (assumes already in local timezone)
speech (bool): format for speech (default/True) or display (False)=Fal
use_24hour (bool): output in 24-hour/military or 12-hour format
use_ampm (bool): include the am/pm for 12-hour format
Returns:
(str): The formatted time string
"""
if use_24hour:
# e.g. "03:01" or "14:22"
string = dt.strftime("%H:%M")
else:
if use_ampm:
# e.g. "3:01 AM" or "2:22 PM"
string = dt.strftime("%I:%M %p")
else:
# e.g. "3:01" or "2:22"
string = dt.strftime("%I:%M")
if string[0] == '0':
string = string[1:] # strip leading zeros
if not speech:
return string
# Generate a speakable version of the time
speak = ""
if use_24hour:
# simply speak the number
if dt.hour == 1:
speak += "uma"
else:
speak += pronounce_number_pt(dt.hour)
# equivalent to "quarter past ten"
if dt.minute > 0:
speak += " e " + pronounce_number_pt(dt.minute)
else:
# speak number and add daytime identifier
# (equivalent to "in the morning")
if dt.minute == 35:
minute = -25
hour = dt.hour + 1
elif dt.minute == 40:
minute = -20
hour = dt.hour + 1
elif dt.minute == 45:
minute = -15
hour = dt.hour + 1
elif dt.minute == 50:
minute = -10
hour = dt.hour + 1
elif dt.minute == 55:
minute = -5
hour = dt.hour + 1
else:
minute = dt.minute
hour = dt.hour
if hour == 0:
speak += "meia noite"
elif hour == 12:
speak += "meio dia"
# 1 and 2 are pronounced in female form when talking about hours
elif hour == 1 or hour == 13:
speak += "uma"
elif hour == 2 or hour == 14:
speak += "duas"
elif hour < 13:
speak = pronounce_number_pt(hour)
else:
speak = pronounce_number_pt(hour - 12)
if minute != 0:
if minute == 15:
speak += " e um quarto"
elif minute == 30:
speak += " e meia"
elif minute == -15:
speak += " menos um quarto"
else:
if minute > 0:
speak += " e " + pronounce_number_pt(minute)
else:
speak += " " + pronounce_number_pt(minute)
# exact time
if minute == 0 and not use_ampm:
# 3:00
speak += " em ponto"
if use_ampm:
if hour > 0 and hour < 6:
speak += " da madrugada"
elif hour >= 6 and hour < 12:
speak += " da manhã"
elif hour >= 13 and hour < 21:
speak += " da tarde"
elif hour != 0 and hour != 12:
speak += " da noite"
return speak

View File

@@ -0,0 +1,474 @@
# -*- coding: utf-8 -*-
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from lingua_franca.lang.format_common import convert_to_mixed_fraction
from lingua_franca.lang.common_data_ru import _NUM_STRING_RU, \
_FRACTION_STRING_RU, _LONG_SCALE_RU, _SHORT_SCALE_RU, _SHORT_ORDINAL_RU, _LONG_ORDINAL_RU
from lingua_franca.internal import FunctionNotLocalizedError
def nice_number_ru(number, speech=True, denominators=range(1, 21)):
""" English helper for nice_number
This function formats a float to human understandable functions. Like
4.5 becomes "4 and a half" for speech and "4 1/2" for text
Args:
number (int or float): the float to format
speech (bool): format for speech (True) or display (False)
denominators (iter of ints): denominators to use, default [1 .. 20]
Returns:
(str): The formatted string.
"""
result = convert_to_mixed_fraction(number, denominators)
if not result:
# Give up, just represent as a 3 decimal number
return str(round(number, 3))
whole, num, den = result
if not speech:
if num == 0:
# TODO: Number grouping? E.g. "1,000,000"
return str(whole)
else:
return '{} {}/{}'.format(whole, num, den)
if num == 0:
return str(whole)
den_str = _FRACTION_STRING_RU[den]
if whole == 0:
if num == 1 and den <= 4:
return_string = '{}'.format(den_str)
else:
return_string = '{} {}'.format(num, den_str)
elif num == 1 and den == 2:
return_string = '{} с половиной'.format(whole)
else:
return_string = '{} и {} {}'.format(whole, num, den_str)
if 2 <= den <= 4:
if 2 <= num <= 4:
return_string = return_string[:-1] + 'и'
elif num > 4:
return_string = return_string[:-1] + 'ей'
elif den >= 5:
if 2 <= num <= 4:
return_string = return_string[:-2] + 'ые'
elif num > 4:
return_string = return_string[:-2] + 'ых'
return return_string
def pronounce_number_ru(number, places=2, short_scale=True, scientific=False,
ordinals=False):
"""
Convert a number to it's spoken equivalent
For example, '5.2' would return 'five point two'
Args:
number(float or int): the number to pronounce (under 100)
places(int): maximum decimal places to speak
short_scale (bool) : use short (True) or long scale (False)
https://en.wikipedia.org/wiki/Names_of_large_numbers
scientific (bool): pronounce in scientific notation
ordinals (bool): pronounce in ordinal form "first" instead of "one"
Returns:
(str): The pronounced number
"""
num = number
# deal with infinity
if num == float("inf"):
return "бесконечность"
elif num == float("-inf"):
return "минус бесконечность"
if scientific:
number = '%E' % num
n, power = number.replace("+", "").split("E")
power = int(power)
if power != 0:
if ordinals:
# This handles negative powers separately from the normal
# handling since each call disables the scientific flag
return '{}{} на десять в {}{} степени'.format(
'минус ' if float(n) < 0 else '',
pronounce_number_ru(
abs(float(n)), places, short_scale, False, ordinals=True),
'минус ' if power < 0 else '',
pronounce_number_ru(abs(power), places, short_scale, False, ordinals=True))
else:
# This handles negative powers separately from the normal
# handling since each call disables the scientific flag
return '{}{} на десять в степени {}{}'.format(
'минус ' if float(n) < 0 else '',
pronounce_number_ru(
abs(float(n)), places, short_scale, False, ordinals=False),
'минус ' if power < 0 else '',
pronounce_number_ru(abs(power), places, short_scale, False, ordinals=False))
if short_scale:
number_names = _NUM_STRING_RU.copy()
number_names.update(_SHORT_SCALE_RU)
else:
number_names = _NUM_STRING_RU.copy()
number_names.update(_LONG_SCALE_RU)
digits = [number_names[n] for n in range(0, 20)]
tens = [number_names[n] for n in range(10, 100, 10)]
if short_scale:
hundreds = [_SHORT_SCALE_RU[n] for n in _SHORT_SCALE_RU.keys()]
else:
hundreds = [_LONG_SCALE_RU[n] for n in _LONG_SCALE_RU.keys()]
# deal with negative numbers
result = ""
if num < 0:
result = "минус "
num = abs(num)
# check for a direct match
if num in number_names and not ordinals:
result += number_names[num]
else:
def _sub_thousand(n, ordinals=False):
assert 0 <= n <= 999
if n in _SHORT_ORDINAL_RU and ordinals:
return _SHORT_ORDINAL_RU[n]
if n <= 19:
return digits[n]
elif n <= 99:
q, r = divmod(n, 10)
return tens[q - 1] + (" " + _sub_thousand(r, ordinals) if r
else "")
else:
q, r = divmod(n, 100)
return _NUM_STRING_RU[q * 100] + (" " + _sub_thousand(r, ordinals) if r else "")
def _short_scale(n):
if n > max(_SHORT_SCALE_RU.keys()):
return "бесконечность"
ordi = ordinals
if int(n) != n:
ordi = False
n = int(n)
assert 0 <= n
res = []
for i, z in enumerate(_split_by(n, 1000)):
if not z:
continue
number = _sub_thousand(z, not i and ordi)
if i:
if i >= len(hundreds):
return ""
if ordi:
if i * 1000 in _SHORT_ORDINAL_RU:
if z == 1:
number = _SHORT_ORDINAL_RU[i * 1000]
else:
if z > 5:
number = number[:-1] + "и"
number += _SHORT_ORDINAL_RU[i * 1000]
else:
if n not in _SHORT_SCALE_RU:
num = int("1" + "0" * (len(str(n)) // 3 * 3))
if number[-3:] == "два":
number = number[:-1] + "ух"
elif number[-2:] == "ри" or number[-2:] == "ре":
number = number[:-1] + "ёх"
elif number[-1:] == "ь":
number = number[:-1] + "и"
number += _SHORT_SCALE_RU[num] + "ный"
else:
number = _SHORT_SCALE_RU[n] + "ный"
elif z == 1:
number = hundreds[i - 1]
else:
if i == 1:
if z % 10 == 1 and z % 100 // 10 != 1:
number = number[:-2] + "на"
elif z % 10 == 2 and z % 100 // 10 != 1:
number = number[:-1] + "е"
number += " " + plural_ru(z, "тысяча", "тысячи", "тысяч")
elif 1 <= z % 10 <= 4 and z % 100 // 10 != 1:
number += " " + hundreds[i - 1] + "а"
else:
number += " " + hundreds[i - 1] + "ов"
res.append(number)
ordi = False
return " ".join(reversed(res))
def _split_by(n, split=1000):
assert 0 <= n
res = []
while n:
n, r = divmod(n, split)
res.append(r)
return res
def _long_scale(n):
if n >= max(_LONG_SCALE_RU.keys()):
return "бесконечность"
ordi = ordinals
if int(n) != n:
ordi = False
n = int(n)
assert 0 <= n
res = []
for i, z in enumerate(_split_by(n, 1000000)):
if not z:
continue
number = pronounce_number_ru(z, places, True, scientific,
ordinals=ordi and not i)
# strip off the comma after the thousand
if i:
if i >= len(hundreds):
return ""
# plus one as we skip 'thousand'
# (and 'hundred', but this is excluded by index value)
number = number.replace(',', '')
if ordi:
if (i + 1) * 1000000 in _LONG_ORDINAL_RU:
if z == 1:
number = _LONG_ORDINAL_RU[
(i + 1) * 1000000]
else:
number += _LONG_ORDINAL_RU[
(i + 1) * 1000000]
else:
if n not in _LONG_SCALE_RU:
num = int("1" + "0" * (len(str(n)) // 3 * 3))
if number[-3:] == "два":
number = number[:-1] + "ух"
elif number[-2:] == "ри" or number[-2:] == "ре":
number = number[:-1] + "ёх"
elif number[-1:] == "ь":
number = number[:-1] + "и"
number += _LONG_SCALE_RU[num] + "ный"
else:
number = " " + _LONG_SCALE_RU[n] + "ный"
elif z == 1:
number = hundreds[i]
elif z <= 4:
number += " " + hundreds[i] + "а"
else:
number += " " + hundreds[i] + "ов"
res.append(number)
return " ".join(reversed(res))
if short_scale:
result += _short_scale(num)
else:
result += _long_scale(num)
# deal with scientific notation unpronounceable as number
if not result and "e" in str(num):
return pronounce_number_ru(num, places, short_scale, scientific=True)
# Deal with fractional part
elif not num == int(num) and places > 0:
if abs(num) < 1.0 and (result == "минус " or not result):
result += "ноль"
result += " точка"
_num_str = str(num)
_num_str = _num_str.split(".")[1][0:places]
for char in _num_str:
result += " " + number_names[int(char)]
return result
def nice_time_ru(dt, speech=True, use_24hour=True, use_ampm=False):
"""
Format a time to a comfortable human format
For example, generate 'five thirty' for speech or '5:30' for
text display.
Args:
dt (datetime): date to format (assumes already in local timezone)
speech (bool): format for speech (default/True) or display (False)=Fal
use_24hour (bool): output in 24-hour/military or 12-hour format
use_ampm (bool): include the am/pm for 12-hour format
Returns:
(str): The formatted time string
"""
if use_24hour:
# e.g. "03:01" or "14:22"
string = dt.strftime("%H:%M")
else:
if use_ampm:
# e.g. "3:01 AM" or "2:22 PM"
string = dt.strftime("%I:%M")
if dt.hour < 4:
string += " ночи"
elif dt.hour < 12:
string += " утра"
elif dt.hour < 18:
string += " дня"
else:
string += " вечера"
else:
# e.g. "3:01" or "2:22"
string = dt.strftime("%I:%M")
if string[0] == '0':
string = string[1:] # strip leading zeros
if not speech:
return string
# Generate a speakable version of the time
if use_24hour:
speak = ""
# Either "0 8 hundred" or "13 hundred"
if string[0] == '0':
speak += pronounce_hour_ru(int(string[0])) + " "
speak += pronounce_number_ru(int(string[1]))
else:
speak = pronounce_hour_ru(int(string[0:2]))
speak += " "
if string[3:5] == '00':
speak += "ровно"
else:
if string[3] == '0':
speak += pronounce_number_ru(0) + " "
speak += pronounce_number_ru(int(string[4]))
else:
speak += pronounce_number_ru(int(string[3:5]))
return speak
else:
if dt.hour == 0 and dt.minute == 0:
return "полночь"
elif dt.hour == 12 and dt.minute == 0:
return "полдень"
hour = dt.hour % 12 or 12 # 12 hour clock and 0 is spoken as 12
if dt.minute == 15:
speak = pronounce_hour_ru(hour) + " с четвертью"
elif dt.minute == 30:
speak = pronounce_hour_ru(hour) + " с половиной"
elif dt.minute == 45:
next_hour = (dt.hour + 1) % 12 or 12
speak = "без четверти " + pronounce_hour_ru(next_hour)
else:
speak = pronounce_hour_ru(hour)
if dt.minute == 0:
if not use_ampm:
if dt.hour % 12 == 1:
return speak
return speak + " " + plural_ru(dt.hour % 12, "час", "часа", "часов")
else:
if dt.minute < 10:
speak += " ноль"
speak += " " + pronounce_number_ru(dt.minute)
if use_ampm:
if dt.hour < 4:
speak += " ночи"
elif dt.hour < 12:
speak += " утра"
elif dt.hour < 18:
speak += " дня"
else:
speak += " вечера"
return speak
def nice_duration_ru(duration, speech=True):
""" Convert duration to a nice spoken timespan
Args:
seconds: number of seconds
minutes: number of minutes
hours: number of hours
days: number of days
Returns:
str: timespan as a string
"""
if not speech:
raise FunctionNotLocalizedError
days = int(duration // 86400)
hours = int(duration // 3600 % 24)
minutes = int(duration // 60 % 60)
seconds = int(duration % 60)
out = ''
if days > 0:
out += pronounce_number_ru(days)
out += " " + plural_ru(days, "день", "дня", "дней")
if hours > 0:
if out:
out += " "
out += pronounce_number_ru(hours)
out += " " + plural_ru(hours, "час", "часа", "часов")
if minutes > 0:
if out:
out += " "
out += pronounce_number_feminine_ru(minutes)
out += " " + plural_ru(minutes, "минута", "минуты", "минут")
if seconds > 0:
if out:
out += " "
out += pronounce_number_feminine_ru(seconds)
out += " " + plural_ru(seconds, "секунда", "секунды", "секунд")
return out
def pronounce_hour_ru(num):
if num == 1:
return "час"
return pronounce_number_ru(num)
def pronounce_number_feminine_ru(num):
pronounced = pronounce_number_ru(num)
num %= 100
if num % 10 == 1 and num // 10 != 1:
return pronounced[:-2] + "на"
elif num % 10 == 2 and num // 10 != 1:
return pronounced[:-1] + "е"
return pronounced
def plural_ru(num: int, one: str, few: str, many: str):
num %= 100
if num // 10 == 1:
return many
if num % 10 == 1:
return one
if 2 <= num % 10 <= 4:
return few
return many

View File

@@ -0,0 +1,419 @@
# -*- coding: utf-8 -*-
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from lingua_franca.lang.common_data_sl import _NUM_STRING_SL, \
_FRACTION_STRING_SL, _LONG_SCALE_SL, _SHORT_SCALE_SL, _SHORT_ORDINAL_SL
from lingua_franca.lang.format_common import convert_to_mixed_fraction
def nice_number_sl(number, speech=True, denominators=range(1, 21)):
""" Slovenian helper for nice_number
This function formats a float to human understandable functions. Like
4.5 becomes "2 in polovica" for speech and "4 1/2" for text
Args:
number (int or float): the float to format
speech (bool): format for speech (True) or display (False)
denominators (iter of ints): denominators to use, default [1 .. 20]
Returns:
(str): The formatted string.
"""
result = convert_to_mixed_fraction(number, denominators)
if not result:
# Give up, just represent as a 3 decimal number
return str(round(number, 3))
whole, num, den = result
if not speech:
if num == 0:
return str(whole)
else:
return '{} {}/{}'.format(whole, num, den)
if num == 0:
return str(whole)
den_str = _FRACTION_STRING_SL[den]
if whole == 0:
return_string = '{} {}'.format(num, den_str)
else:
return_string = '{} in {} {}'.format(whole, num, den_str)
if num % 100 == 1:
pass
elif num % 100 == 2:
return_string = return_string[:-1] + 'i'
elif num % 100 == 3 or num % 100 == 4:
return_string = return_string[:-1] + 'e'
else:
return_string = return_string[:-1]
return return_string
def pronounce_number_sl(num, places=2, short_scale=True, scientific=False,
ordinals=False):
"""
Convert a number to it's spoken equivalent
For example, '5.2' would return 'pet celih dve'
Args:
num(float or int): the number to pronounce (under 100)
places(int): maximum decimal places to speak
short_scale (bool) : use short (True) or long scale (False)
https://en.wikipedia.org/wiki/Names_of_large_numbers
scientific (bool): pronounce in scientific notation
ordinals (bool): pronounce in ordinal form "first" instead of "one"
Returns:
(str): The pronounced number
"""
# deal with infinity
if num == float("inf"):
return "neskončno"
elif num == float("-inf"):
return "minus neskončno"
if scientific:
number = '%E' % num
n, power = number.replace("+", "").split("E")
power = int(power)
if power != 0:
if ordinals:
# This handles negatives of powers separately from the normal
# handling since each call disables the scientific flag
return '{}{} krat deset na {}{}'.format(
'minus ' if float(n) < 0 else '',
pronounce_number_sl(
abs(float(n)), places, short_scale, False, ordinals=False),
'minus ' if power < 0 else '',
pronounce_number_sl(abs(power), places, short_scale, False, ordinals=True))
else:
# This handles negatives of powers separately from the normal
# handling since each call disables the scientific flag
return '{}{} krat deset na {}{}'.format(
'minus ' if float(n) < 0 else '',
pronounce_number_sl(
abs(float(n)), places, short_scale, False),
'minus ' if power < 0 else '',
pronounce_number_sl(abs(power), places, short_scale, False))
if short_scale:
number_names = _NUM_STRING_SL.copy()
number_names.update(_SHORT_SCALE_SL)
else:
number_names = _NUM_STRING_SL.copy()
number_names.update(_LONG_SCALE_SL)
digits = [number_names[n] for n in range(0, 20)]
tens = [number_names[n] for n in range(10, 100, 10)]
if short_scale:
hundreds = [_SHORT_SCALE_SL[n] for n in _SHORT_SCALE_SL.keys()]
else:
hundreds = [_LONG_SCALE_SL[n] for n in _LONG_SCALE_SL.keys()]
# deal with negatives
result = ""
if num < 0:
result = "minus "
num = abs(num)
# check for a direct match
if num in number_names and not ordinals:
result += number_names[num]
else:
def _sub_thousand(n, ordinals=False, is_male=False):
assert 0 <= n <= 999
if n in _SHORT_ORDINAL_SL and ordinals:
return _SHORT_ORDINAL_SL[n]
if n <= 19:
if is_male and n == 2:
return digits[n][:-1] + "a"
return digits[n]
elif n <= 99:
q, r = divmod(n, 10)
sub = _sub_thousand(r, False)
if r == 2:
sub = sub[:-1] + "a"
return ((sub + "in") if r else "") + (
tens[q - 1]) + ("i" if ordinals else "")
else:
q, r = divmod(n, 100)
if q == 1:
qstr = ""
else:
qstr = digits[q]
return (qstr + "sto" + (
" " + _sub_thousand(r, ordinals) if r else ""))
def _plural_hundreds(n, hundred, ordi=True):
if hundred[-3:] != "jon":
if ordi:
return hundred + "i"
return hundred
if n < 1000 or short_scale:
if ordi:
return hundred + "ti"
if n % 100 == 1:
return hundred
elif n % 100 == 2:
return hundred + "a"
elif n % 100 == 3 or n % 100 == 4:
return hundred + "i"
else:
return hundred + "ov"
else:
n //= 1000
if ordi:
return hundred[:-3] + "jardti"
if n % 100 == 1:
return hundred[:-3] + "jarda"
elif n % 100 == 2:
return hundred[:-3] + "jardi"
elif n % 100 == 3 or n % 100 == 4:
return hundred[:-3] + "jarde"
else:
return hundred[:-3] + "jard"
def _short_scale(n):
if n >= max(_SHORT_SCALE_SL.keys()):
return "neskončno"
ordi = ordinals
if int(n) != n:
ordi = False
n = int(n)
assert 0 <= n
res = []
split = _split_by(n, 1000)
if ordinals and len([a for a in split if a > 0]) == 1:
ordi_force = True
else:
ordi_force = False
for i, z in enumerate(split):
if not z:
continue
if z == 1 and i == 1:
number = ""
elif z > 100 and z % 100 == 2:
number = _sub_thousand(z, not i and ordi, is_male=True)
elif z > 100 and z % 100 == 3:
number = _sub_thousand(z, not i and ordi) + "je"
elif z > 1 or i == 0 or ordi:
number = _sub_thousand(z, not i and ordi)
else:
number = ""
if i:
if i >= len(hundreds):
return ""
if z > 1:
number += " "
number += _plural_hundreds(
z, hundreds[i], True if ordi_force else not i and ordi)
res.append(number)
ordi = False
return " ".join(reversed(res))
def _split_by(n, split=1000):
assert 0 <= n
res = []
while n:
n, r = divmod(n, split)
res.append(r)
return res
def _long_scale(n):
if n >= max(_LONG_SCALE_SL.keys()):
return "neskončno"
ordi = ordinals
if int(n) != n:
ordi = False
n = int(n)
assert 0 <= n
res = []
split = _split_by(n, 1000000)
if ordinals and len([a for a in split if a > 0]) == 1:
ordi_force = True
else:
ordi_force = False
for i, z in enumerate(split):
if not z:
continue
number = pronounce_number_sl(z, places, True, scientific)
if z > 100:
add = number.split()[0] + " "
else:
add = ""
if z % 100 == 2 and i >= 1:
number = add + digits[2][:-1] + "a"
if z % 100 == 3 and i >= 1:
number = add + digits[3] + "je"
# strip off the comma after the thousand
if i:
if i >= len(hundreds):
return ""
# plus one as we skip 'thousand'
# (and 'hundred', but this is excluded by index value)
hundred = _plural_hundreds(
z, hundreds[i + 1], True if ordi_force else ordi and not i)
if z >= 1000:
z //= 1000
number = pronounce_number_sl(z, places, True, scientific,
ordinals=True if ordi_force else ordi and not i)
if z == 1:
number = hundred
else:
number += " " + hundred
res.append(number)
return " ".join(reversed(res))
if short_scale:
result += _short_scale(num)
else:
result += _long_scale(num)
if ordinals:
result = result.replace(" ", "")
# deal with scientific notation unpronounceable as number
if (not result or result == "neskončno") and "e" in str(num):
return pronounce_number_sl(num, places, short_scale, scientific=True)
# Deal with fractional part
elif not num == int(num) and places > 0:
if abs(num) < 1.0 and (result == "minus " or not result):
result += "nič"
if int(abs(num)) % 100 == 1:
result += " cela"
elif int(abs(num)) % 100 == 2:
result += " celi"
elif int(abs(num)) % 100 == 3 or int(abs(num)) % 100 == 4:
result += " cele"
else:
result += " celih"
_num_str = str(num)
_num_str = _num_str.split(".")[1][0:places]
for char in _num_str:
result += " " + number_names[int(char)]
return result
def nice_time_sl(dt, speech=True, use_24hour=False, use_ampm=False):
"""
Format a time to a comfortable human format
For example, generate 'pet trideset' for speech or '5:30' for
text display.
Args:
dt (datetime): date to format (assumes already in local timezone)
speech (bool): format for speech (default/True) or display (False)=Fal
use_24hour (bool): output in 24-hour/military or 12-hour format
use_ampm (bool): include the am/pm for 12-hour format
Returns:
(str): The formatted time string
"""
if use_24hour:
# e.g. "03:01" or "14:22"
string = dt.strftime("%H:%M")
else:
if use_ampm:
# e.g. "3:01 AM" or "2:22 PM"
string = dt.strftime("%I:%M %p")
else:
# e.g. "3:01" or "2:22"
string = dt.strftime("%I:%M")
if string[0] == '0':
string = string[1:] # strip leading zeros
if not speech:
return string
def _hour_declension(hour):
speak = pronounce_number_sl(hour)
if hour == 1:
return speak[:-1] + "ih"
elif hour == 2 or hour == 4:
return speak + "h"
elif hour == 3:
return speak[:-1] + "eh"
elif hour == 7 or hour == 8:
return speak[:-2] + "mih"
else:
return speak + "ih"
# Generate a speakable version of the time
if use_24hour:
# "13 nič nič"
speak = pronounce_number_sl(int(string[0:2]))
speak += " "
if string[3:5] == '00':
speak += "nič nič"
else:
if string[3] == '0':
speak += pronounce_number_sl(0) + " "
speak += pronounce_number_sl(int(string[4]))
else:
speak += pronounce_number_sl(int(string[3:5]))
return speak
else:
if dt.hour == 0 and dt.minute == 0:
return "polnoč"
elif dt.hour == 12 and dt.minute == 0:
return "poldne"
hour = dt.hour % 12 or 12 # 12 hour clock and 0 is spoken as 12
if dt.minute == 0:
speak = pronounce_number_sl(hour)
elif dt.minute < 30:
speak = pronounce_number_sl(
dt.minute) + " čez " + pronounce_number_sl(hour)
elif dt.minute == 30:
next_hour = (dt.hour + 1) % 12 or 12
speak = "pol " + _hour_declension(next_hour)
elif dt.minute > 30:
next_hour = (dt.hour + 1) % 12 or 12
speak = pronounce_number_sl(
60 - dt.minute) + " do " + _hour_declension(next_hour)
if use_ampm:
if dt.hour > 11:
speak += " p.m."
else:
speak += " a.m."
return speak

View File

@@ -0,0 +1,376 @@
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from .format_common import convert_to_mixed_fraction
from lingua_franca.lang.common_data_sv import _EXTRA_SPACE_SV, \
_FRACTION_STRING_SV, _MONTHS_SV, _NUM_POWERS_OF_TEN_SV, _NUM_STRING_SV
from math import floor
def nice_number_sv(number, speech=True, denominators=range(1, 21)):
""" Swedish helper for nice_number
This function formats a float to human understandable functions. Like
4.5 becomes "4 och en halv" for speech and "4 1/2" for text
Args:
number (int or float): the float to format
speech (bool): format for speech (True) or display (False)
denominators (iter of ints): denominators to use, default [1 .. 20]
Returns:
(str): The formatted string.
"""
result = convert_to_mixed_fraction(number, denominators)
if not result:
# Give up, just represent as a 3 decimal number
return str(round(number, 3))
whole, num, den = result
if not speech:
if num == 0:
# TODO: Number grouping? E.g. "1,000,000"
return str(whole)
else:
return '{} {}/{}'.format(whole, num, den)
if num == 0:
return str(whole)
den_str = _FRACTION_STRING_SV[den]
if whole == 0:
if num == 1:
return_string = 'en {}'.format(den_str)
else:
return_string = '{} {}'.format(num, den_str)
elif num == 1:
return_string = '{} och en {}'.format(whole, den_str)
else:
return_string = '{} och {} {}'.format(whole, num, den_str)
if num > 1:
return_string += 'ar'
return return_string
def pronounce_number_sv(number, places=2, short_scale=True, scientific=False,
ordinals=False):
"""
Convert a number to it's spoken equivalent
For example, '5.2' would return 'five point two'
Args:
num(float or int): the number to pronounce (under 100)
places(int): maximum decimal places to speak
short_scale (bool) : use short (True) or long scale (False)
https://en.wikipedia.org/wiki/Names_of_large_numbers
scientific (bool): pronounce in scientific notation
ordinals (bool): pronounce in ordinal form "first" instead of "one"
Returns:
(str): The pronounced number
"""
# TODO short_scale, scientific and ordinals
# currently ignored
def pronounce_triplet_sv(num):
result = ""
num = floor(num)
if num > 99:
hundreds = floor(num / 100)
if hundreds > 0:
if hundreds == 1:
result += 'ett' + 'hundra'
else:
result += _NUM_STRING_SV[hundreds] + 'hundra'
num -= hundreds * 100
if num == 0:
result += '' # do nothing
elif num == 1:
result += 'ett'
elif num <= 20:
result += _NUM_STRING_SV[num]
elif num > 20:
tens = num % 10
ones = num - tens
if ones > 0:
result += _NUM_STRING_SV[ones]
if tens > 0:
result += _NUM_STRING_SV[tens]
return result
def pronounce_fractional_sv(num, places):
# fixed number of places even with trailing zeros
result = ""
place = 10
while places > 0:
# doesn't work with 1.0001 and places = 2: int(
# num*place) % 10 > 0 and places > 0:
result += " " + _NUM_STRING_SV[int(num * place) % 10]
place *= 10
places -= 1
return result
def pronounce_whole_number_sv(num, scale_level=0):
if num == 0:
return ''
num = floor(num)
result = ''
last_triplet = num % 1000
if last_triplet == 1:
if scale_level == 0:
if result != '':
result += '' + 'ett'
else:
result += 'en'
elif scale_level == 1:
result += 'ettusen' + _EXTRA_SPACE_SV
else:
result += 'en ' + \
_NUM_POWERS_OF_TEN_SV[scale_level] + _EXTRA_SPACE_SV
elif last_triplet > 1:
result += pronounce_triplet_sv(last_triplet)
if scale_level == 1:
result += 'tusen' + _EXTRA_SPACE_SV
if scale_level >= 2:
result += _NUM_POWERS_OF_TEN_SV[scale_level]
if scale_level >= 2:
result += 'er' + _EXTRA_SPACE_SV # MiljonER
num = floor(num / 1000)
scale_level += 1
return pronounce_whole_number_sv(num, scale_level) + result
result = ""
if abs(number) >= 1000000000000000000000000: # cannot do more than this
return str(number)
elif number == 0:
return str(_NUM_STRING_SV[0])
elif number < 0:
return "minus " + pronounce_number_sv(abs(number), places)
else:
if number == int(number):
return pronounce_whole_number_sv(number)
else:
whole_number_part = floor(number)
fractional_part = number - whole_number_part
result += pronounce_whole_number_sv(whole_number_part)
if places > 0:
result += " komma"
result += pronounce_fractional_sv(fractional_part, places)
return result
def pronounce_ordinal_sv(number):
"""
This function pronounces a number as an ordinal
1 -> first
2 -> second
Args:
number (int): the number to format
Returns:
(str): The pronounced number string.
"""
# ordinals for 1, 3, 7 and 8 are irregular
# this produces the base form, it will have to be adapted for genus,
# casus, numerus
ordinals = ["noll", "första", "andra", "tredje", "fjärde", "femte",
"sjätte", "sjunde", "åttonde", "nionde", "tionde"]
tens = int(floor(number / 10.0)) * 10
ones = number % 10
if number < 0 or number != int(number):
return number
if number == 0:
return ordinals[number]
result = ""
if number > 10:
result += pronounce_number_sv(tens).rstrip()
if ones > 0:
result += ordinals[ones]
else:
result += 'de'
return result
def nice_time_sv(dt, speech=True, use_24hour=False, use_ampm=False):
"""
Format a time to a comfortable human format
For example, generate 'five thirty' for speech or '5:30' for
text display.
Args:
dt (datetime): date to format (assumes already in local timezone)
speech (bool): format for speech (default/True) or display (False)=Fal
use_24hour (bool): output in 24-hour/military or 12-hour format
use_ampm (bool): include the am/pm for 12-hour format
Returns:
(str): The formatted time string
"""
if use_24hour:
# e.g. "03:01" or "14:22"
string = dt.strftime("%H:%M")
else:
if use_ampm:
# e.g. "3:01 AM" or "2:22 PM"
string = dt.strftime("%I:%M %p")
else:
# e.g. "3:01" or "2:22"
string = dt.strftime("%I:%M")
if not speech:
return string
# Generate a speakable version of the time
speak = ""
if use_24hour:
if dt.hour == 1:
speak += "ett" # 01:00 is "ett" not "en"
else:
speak += pronounce_number_sv(dt.hour)
if not dt.minute == 0:
if dt.minute < 10:
speak += ' noll'
if dt.minute == 1:
speak += ' ett'
else:
speak += " " + pronounce_number_sv(dt.minute)
return speak # ampm is ignored when use_24hour is true
else:
hour = dt.hour
if not dt.minute == 0:
if dt.minute < 30:
if dt.minute != 15:
speak += pronounce_number_sv(dt.minute)
else:
speak += 'kvart'
if dt.minute == 1:
speak += ' minut över '
elif dt.minute != 10 and dt.minute != 5 and dt.minute != 15:
speak += ' minuter över '
else:
speak += ' över '
elif dt.minute > 30:
if dt.minute != 45:
speak += pronounce_number_sv((60 - dt.minute))
else:
speak += 'kvart'
if dt.minute == 1:
speak += ' minut i '
elif dt.minute != 50 and dt.minute != 55 and dt.minute != 45:
speak += ' minuter i '
else:
speak += ' i '
hour = (hour + 1) % 12
elif dt.minute == 30:
speak += 'halv '
hour = (hour + 1) % 12
if hour == 0 and dt.minute == 0:
return "midnatt"
if hour == 12 and dt.minute == 0:
return "middag"
# TODO: "half past 3", "a quarter of 4" and other idiomatic times
if hour == 0:
speak += pronounce_number_sv(12)
elif hour <= 13:
if hour == 1 or hour == 13: # 01:00 and 13:00 is "ett"
speak += 'ett'
else:
speak += pronounce_number_sv(hour)
else:
speak += pronounce_number_sv(hour - 12)
if use_ampm:
if dt.hour > 11:
if dt.hour < 18:
# 12:01 - 17:59 nachmittags/afternoon
speak += " på eftermiddagen"
elif dt.hour < 22:
# 18:00 - 21:59 abends/evening
speak += " på kvällen"
else:
# 22:00 - 23:59 nachts/at night
speak += " på natten"
elif dt.hour < 3:
# 00:01 - 02:59 nachts/at night
speak += " på natten"
else:
# 03:00 - 11:59 morgens/in the morning
speak += " på morgonen"
return speak
def nice_response_sv(text):
# check for months and call _nice_ordinal_sv declension of ordinals
# replace "^" with "hoch" (to the power of)
words = text.split()
for idx, word in enumerate(words):
if word.lower() in _MONTHS_SV:
text = _nice_ordinal_sv(text)
if word == '^':
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
if wordNext.isnumeric():
words[idx] = "upphöjt till"
text = " ".join(words)
return text
def _nice_ordinal_sv(text, speech=True):
# check for months for declension of ordinals before months
# depending on articles/prepositions
normalized_text = text
words = text.split()
for idx, word in enumerate(words):
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
wordPrev = words[idx - 1] if idx > 0 else ""
if word[-1:] == ".":
if word[:-1].isdecimal():
if wordNext.lower() in _MONTHS_SV:
word = pronounce_ordinal_sv(int(word[:-1]))
if wordPrev.lower() in ["om", "den", "från", "till",
"(från", "(om", "till"]:
word += "n"
elif wordPrev.lower() not in ["den"]:
word += "r"
words[idx] = word
normalized_text = " ".join(words)
return normalized_text

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,387 @@
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from collections import namedtuple
import re
class Normalizer:
"""
individual languages may subclass this if needed
normalize_XX should pass a valid config read from json
"""
_default_config = {}
def __init__(self, config=None):
self.config = config or self._default_config
@staticmethod
def tokenize(utterance):
# Split things like 12%
utterance = re.sub(r"([0-9]+)([\%])", r"\1 \2", utterance)
# Split thins like #1
utterance = re.sub(r"(\#)([0-9]+\b)", r"\1 \2", utterance)
return utterance.split()
@property
def should_lowercase(self):
return self.config.get("lowercase", False)
@property
def should_numbers_to_digits(self):
return self.config.get("numbers_to_digits", True)
@property
def should_expand_contractions(self):
return self.config.get("expand_contractions", True)
@property
def should_remove_symbols(self):
return self.config.get("remove_symbols", False)
@property
def should_remove_accents(self):
return self.config.get("remove_accents", False)
@property
def should_remove_articles(self):
return self.config.get("remove_articles", False)
@property
def should_remove_stopwords(self):
return self.config.get("remove_stopwords", False)
@property
def contractions(self):
return self.config.get("contractions", {})
@property
def word_replacements(self):
return self.config.get("word_replacements", {})
@property
def number_replacements(self):
return self.config.get("number_replacements", {})
@property
def accents(self):
return self.config.get("accents",
{"á": "a", "à": "a", "ã": "a", "â": "a",
"é": "e", "è": "e", "ê": "e", "": "e",
"í": "i", "ì": "i", "î": "i", "ĩ": "i",
"ò": "o", "ó": "o", "ô": "o", "õ": "o",
"ú": "u", "ù": "u", "û": "u", "ũ": "u",
"Á": "A", "À": "A", "Ã": "A", "Â": "A",
"É": "E", "È": "E", "Ê": "E", "": "E",
"Í": "I", "Ì": "I", "Î": "I", "Ĩ": "I",
"Ò": "O", "Ó": "O", "Ô": "O", "Õ": "O",
"Ú": "U", "Ù": "U", "Û": "U", "Ũ": "U"
})
@property
def stopwords(self):
return self.config.get("stopwords", [])
@property
def articles(self):
return self.config.get("articles", [])
@property
def symbols(self):
return self.config.get("symbols",
[";", "_", "!", "?", "<", ">",
"|", "(", ")", "=", "[", "]", "{",
"}", "»", "«", "*", "~", "^", "`"])
def expand_contractions(self, utterance):
""" Expand common contractions, e.g. "isn't" -> "is not" """
words = self.tokenize(utterance)
for idx, w in enumerate(words):
if w in self.contractions:
words[idx] = self.contractions[w]
utterance = " ".join(words)
return utterance
def numbers_to_digits(self, utterance):
words = self.tokenize(utterance)
for idx, w in enumerate(words):
if w in self.number_replacements:
words[idx] = self.number_replacements[w]
utterance = " ".join(words)
return utterance
def remove_articles(self, utterance):
words = self.tokenize(utterance)
for idx, w in enumerate(words):
if w in self.articles:
words[idx] = ""
utterance = " ".join(words)
return utterance
def remove_stopwords(self, utterance):
words = self.tokenize(utterance)
for idx, w in enumerate(words):
if w in self.stopwords:
words[idx] = ""
# if words[-1] == '-':
# words = words[:-1]
utterance = " ".join(words)
# Remove trailing whitespaces from utterance along with orphaned
# hyphens, more characters may be added later
utterance = re.sub(r'- *$', '', utterance)
return utterance
def remove_symbols(self, utterance):
for s in self.symbols:
utterance = utterance.replace(s, " ")
return utterance
def remove_accents(self, utterance):
for s in self.accents:
utterance = utterance.replace(s, self.accents[s])
return utterance
def replace_words(self, utterance):
words = self.tokenize(utterance)
for idx, w in enumerate(words):
if w in self.word_replacements:
words[idx] = self.word_replacements[w]
utterance = " ".join(words)
return utterance
def normalize(self, utterance="", remove_articles=None):
# mutations
if self.should_lowercase:
utterance = utterance.lower()
if self.should_expand_contractions:
utterance = self.expand_contractions(utterance)
if self.should_numbers_to_digits:
utterance = self.numbers_to_digits(utterance)
utterance = self.replace_words(utterance)
# removals
if self.should_remove_symbols:
utterance = self.remove_symbols(utterance)
if self.should_remove_accents:
utterance = self.remove_accents(utterance)
# TODO deprecate remove_articles param, backwards compat
if remove_articles is not None and remove_articles:
utterance = self.remove_articles(utterance)
elif self.should_remove_articles:
utterance = self.remove_articles(utterance)
if self.should_remove_stopwords:
utterance = self.remove_stopwords(utterance)
# remove extra spaces
utterance = " ".join([w for w in utterance.split(" ") if w])
return utterance
# Token is intended to be used in the number processing functions in
# this module. The parsing requires slicing and dividing of the original
# text. To ensure things parse correctly, we need to know where text came
# from in the original input, hence this nametuple.
Token = namedtuple('Token', 'word index')
class ReplaceableNumber:
"""
Similar to Token, this class is used in number parsing.
Once we've found a number in a string, this class contains all
the info about the value, and where it came from in the original text.
In other words, it is the text, and the number that can replace it in
the string.
"""
def __init__(self, value, tokens: [Token]):
self.value = value
self.tokens = tokens
def __bool__(self):
return bool(self.value is not None and self.value is not False)
@property
def start_index(self):
return self.tokens[0].index
@property
def end_index(self):
return self.tokens[-1].index
@property
def text(self):
return ' '.join([t.word for t in self.tokens])
def __setattr__(self, key, value):
try:
getattr(self, key)
except AttributeError:
super().__setattr__(key, value)
else:
raise Exception("Immutable!")
def __str__(self):
return "({v}, {t})".format(v=self.value, t=self.tokens)
def __repr__(self):
return "{n}({v}, {t})".format(n=self.__class__.__name__, v=self.value,
t=self.tokens)
def tokenize(text):
"""
Generate a list of token object, given a string.
Args:
text str: Text to tokenize.
Returns:
[Token]
"""
return [Token(word, index)
for index, word in enumerate(Normalizer.tokenize(text))]
def partition_list(items, split_on):
"""
Partition a list of items.
Works similarly to str.partition
Args:
items:
split_on callable:
Should return a boolean. Each item will be passed to
this callable in succession, and partitions will be
created any time it returns True.
Returns:
[[any]]
"""
splits = []
current_split = []
for item in items:
if split_on(item):
splits.append(current_split)
splits.append([item])
current_split = []
else:
current_split.append(item)
splits.append(current_split)
return list(filter(lambda x: len(x) != 0, splits))
def invert_dict(original):
"""
Produce a dictionary with the keys and values
inverted, relative to the dict passed in.
Args:
original dict: The dict like object to invert
Returns:
dict
"""
return {value: key for key, value in original.items()}
def is_numeric(input_str):
"""
Takes in a string and tests to see if it is a number.
Args:
text (str): string to test if a number
Returns:
(bool): True if a number, else False
"""
try:
float(input_str)
return True
except ValueError:
return False
def look_for_fractions(split_list):
""""
This function takes a list made by fraction & determines if a fraction.
Args:
split_list (list): list created by splitting on '/'
Returns:
(bool): False if not a fraction, otherwise True
"""
if len(split_list) == 2:
if is_numeric(split_list[0]) and is_numeric(split_list[1]):
return True
return False
def extract_numbers_generic(text, pronounce_handler, extract_handler,
short_scale=True, ordinals=False):
"""
Takes in a string and extracts a list of numbers.
Language agnostic, per language parsers need to be provided
Args:
text (str): the string to extract a number from
pronounce_handler (function): function that pronounces a number
extract_handler (function): function that extracts the last number
present in a string
short_scale (bool): Use "short scale" or "long scale" for large
numbers -- over a million. The default is short scale, which
is now common in most English speaking countries.
See https://en.wikipedia.org/wiki/Names_of_large_numbers
ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
Returns:
list: list of extracted numbers as floats
"""
numbers = []
normalized = text
extract = extract_handler(normalized, short_scale, ordinals)
to_parse = normalized
while extract:
numbers.append(extract)
prev = to_parse
num_txt = pronounce_handler(extract)
extract = str(extract)
if extract.endswith(".0"):
extract = extract[:-2]
# handle duplicate occurences, replace last one only
def replace_right(source, target, replacement, replacements=None):
return replacement.join(source.rsplit(target, replacements))
normalized = replace_right(normalized, num_txt, extract, 1)
# last biggest number was replaced, recurse to handle cases like
# test one two 3
to_parse = replace_right(to_parse, num_txt, extract, 1)
to_parse = replace_right(to_parse, extract, " ", 1)
if to_parse == prev:
# avoid infinite loops, occasionally pronounced number may be
# different from extracted text,
# ie pronounce(0.5) != half and extract(half) == 0.5
extract = False
# TODO fix this
else:
extract = extract_handler(to_parse, short_scale, ordinals)
numbers.reverse()
return numbers

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,891 @@
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from datetime import datetime
from dateutil.relativedelta import relativedelta
from lingua_franca.lang.parse_common import is_numeric, look_for_fractions, \
extract_numbers_generic, Normalizer
from lingua_franca.lang.common_data_da import _DA_NUMBERS
from lingua_franca.lang.format_da import pronounce_number_da
from lingua_franca.time import now_local
def extract_number_da(text, short_scale=True, ordinals=False):
"""
This function prepares the given text for parsing by making
numbers consistent, getting rid of contractions, etc.
Args:
text (str): the string to normalize
Returns:
(int) or (float): The value of extracted number
undefined articles cannot be suppressed in German:
'ein Pferd' means 'one horse' and 'a horse'
"""
# TODO: short_scale and ordinals don't do anything here.
# The parameters are present in the function signature for API compatibility
# reasons.
text = text.lower()
aWords = text.split()
aWords = [word for word in aWords if
word not in ["den", "det"]]
and_pass = False
valPreAnd = False
val = False
count = 0
while count < len(aWords):
word = aWords[count]
if is_numeric(word):
if word.isdigit(): # doesn't work with decimals
val = float(word)
elif is_fractional_da(word):
val = is_fractional_da(word)
elif is_ordinal_da(word):
val = is_ordinal_da(word)
else:
if word in _DA_NUMBERS:
val = _DA_NUMBERS[word]
if count < (len(aWords) - 1):
wordNext = aWords[count + 1]
else:
wordNext = ""
valNext = is_fractional_da(wordNext)
if valNext:
val = val * valNext
aWords[count + 1] = ""
if not val:
# look for fractions like "2/3"
aPieces = word.split('/')
# if (len(aPieces) == 2 and is_numeric(aPieces[0])
# and is_numeric(aPieces[1])):
if look_for_fractions(aPieces):
val = float(aPieces[0]) / float(aPieces[1])
elif and_pass:
# added to value, quit here
val = valPreAnd
break
else:
count += 1
continue
aWords[count] = ""
if and_pass:
aWords[count - 1] = '' # remove "og"
val += valPreAnd
elif count + 1 < len(aWords) and aWords[count + 1] == 'og':
and_pass = True
valPreAnd = val
val = False
count += 2
continue
elif count + 2 < len(aWords) and aWords[count + 2] == 'og':
and_pass = True
valPreAnd = val
val = False
count += 3
continue
break
return val or False
def extract_datetime_da(text, anchorDate=None, default_time=None):
def clean_string(s):
"""
cleans the input string of unneeded punctuation
and capitalization among other things.
'am' is a preposition, so cannot currently be used
for 12 hour date format
"""
s = s.lower().replace('?', '').replace('.', '').replace(',', '') \
.replace(' den ', ' ').replace(' det ', ' ').replace(' om ',
' ').replace(
' om ', ' ') \
.replace('', ' ').replace(' om ', ' ')
wordList = s.split()
for idx, word in enumerate(wordList):
if is_ordinal_da(word) is not False:
word = str(is_ordinal_da(word))
wordList[idx] = word
return wordList
def date_found():
return found or \
(
datestr != "" or timeStr != "" or
yearOffset != 0 or monthOffset != 0 or
dayOffset is True or hrOffset != 0 or
hrAbs or minOffset != 0 or
minAbs or secOffset != 0
)
if text == "":
return None
anchorDate = anchorDate or now_local()
found = False
daySpecified = False
dayOffset = False
monthOffset = 0
yearOffset = 0
dateNow = anchorDate
today = dateNow.strftime("%w")
currentYear = dateNow.strftime("%Y")
fromFlag = False
datestr = ""
hasYear = False
timeQualifier = ""
timeQualifiersList = ['tidlig',
'morgen',
'morgenen',
'formidag',
'formiddagen',
'eftermiddag',
'eftermiddagen',
'aften',
'aftenen',
'nat',
'natten']
markers = ['i', 'om', '', 'klokken', 'ved']
days = ['mandag', 'tirsdag', 'onsdag',
'torsdag', 'fredag', 'lørdag', 'søndag']
months = ['januar', 'februar', 'marts', 'april', 'maj', 'juni',
'juli', 'august', 'september', 'oktober', 'november',
'desember']
monthsShort = ['jan', 'feb', 'mar', 'apr', 'maj', 'juni', 'juli', 'aug',
'sep', 'okt', 'nov', 'des']
validFollowups = days + months + monthsShort
validFollowups.append("i dag")
validFollowups.append("morgen")
validFollowups.append("næste")
validFollowups.append("forige")
validFollowups.append("nu")
words = clean_string(text)
for idx, word in enumerate(words):
if word == "":
continue
wordPrevPrev = words[idx - 2] if idx > 1 else ""
wordPrev = words[idx - 1] if idx > 0 else ""
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
start = idx
used = 0
# save timequalifier for later
if word in timeQualifiersList:
timeQualifier = word
# parse today, tomorrow, day after tomorrow
elif word == "dag" and not fromFlag:
dayOffset = 0
used += 1
elif word == "morgen" and not fromFlag and wordPrev != "om" and \
wordPrev not in days: # morgen means tomorrow if not "am
# Morgen" and not [day of the week] morgen
dayOffset = 1
used += 1
elif word == "overmorgen" and not fromFlag:
dayOffset = 2
used += 1
# parse 5 days, 10 weeks, last week, next week
elif word == "dag" or word == "dage":
if wordPrev[0].isdigit():
dayOffset += int(wordPrev)
start -= 1
used = 2
elif word == "uge" or word == "uger" and not fromFlag:
if wordPrev[0].isdigit():
dayOffset += int(wordPrev) * 7
start -= 1
used = 2
elif wordPrev[:6] == "næste":
dayOffset = 7
start -= 1
used = 2
elif wordPrev[:5] == "forige":
dayOffset = -7
start -= 1
used = 2
# parse 10 months, next month, last month
elif word == "måned" and not fromFlag:
if wordPrev[0].isdigit():
monthOffset = int(wordPrev)
start -= 1
used = 2
elif wordPrev[:6] == "næste":
monthOffset = 1
start -= 1
used = 2
elif wordPrev[:5] == "forige":
monthOffset = -1
start -= 1
used = 2
# parse 5 years, next year, last year
elif word == "år" and not fromFlag:
if wordPrev[0].isdigit():
yearOffset = int(wordPrev)
start -= 1
used = 2
elif wordPrev[:6] == " næste":
yearOffset = 1
start -= 1
used = 2
elif wordPrev[:6] == "næste":
yearOffset = -1
start -= 1
used = 2
# parse Monday, Tuesday, etc., and next Monday,
# last Tuesday, etc.
elif word in days and not fromFlag:
d = days.index(word)
dayOffset = (d + 1) - int(today)
used = 1
if dayOffset < 0:
dayOffset += 7
if wordNext == "morgen":
# morgen means morning if preceded by
# the day of the week
words[idx + 1] = "tidlig"
if wordPrev[:6] == "næste":
dayOffset += 7
used += 1
start -= 1
elif wordPrev[:5] == "forige":
dayOffset -= 7
used += 1
start -= 1
# parse 15 of July, June 20th, Feb 18, 19 of February
elif word in months or word in monthsShort and not fromFlag:
try:
m = months.index(word)
except ValueError:
m = monthsShort.index(word)
used += 1
datestr = months[m]
if wordPrev and (wordPrev[0].isdigit() or
(wordPrev == "of" and wordPrevPrev[0].isdigit())):
if wordPrev == "of" and wordPrevPrev[0].isdigit():
datestr += " " + words[idx - 2]
used += 1
start -= 1
else:
datestr += " " + wordPrev
start -= 1
used += 1
if wordNext and wordNext[0].isdigit():
datestr += " " + wordNext
used += 1
hasYear = True
else:
hasYear = False
elif wordNext and wordNext[0].isdigit():
datestr += " " + wordNext
used += 1
if wordNextNext and wordNextNext[0].isdigit():
datestr += " " + wordNextNext
used += 1
hasYear = True
else:
hasYear = False
# parse 5 days from tomorrow, 10 weeks from next thursday,
# 2 months from July
if (
word == "fra" or word == "til" or word == "om") and wordNext \
in validFollowups:
used = 2
fromFlag = True
if wordNext == "morgenen" and \
wordPrev != "om" and \
wordPrev not in days:
# morgen means tomorrow if not "am Morgen" and not
# [day of the week] morgen:
dayOffset += 1
elif wordNext in days:
d = days.index(wordNext)
tmpOffset = (d + 1) - int(today)
used = 2
if tmpOffset < 0:
tmpOffset += 7
dayOffset += tmpOffset
elif wordNextNext and wordNextNext in days:
d = days.index(wordNextNext)
tmpOffset = (d + 1) - int(today)
used = 3
if wordNext[:6] == "næste":
tmpOffset += 7
used += 1
start -= 1
elif wordNext[:5] == "forige":
tmpOffset -= 7
used += 1
start -= 1
dayOffset += tmpOffset
if used > 0:
if start - 1 > 0 and words[start - 1].startswith("denne"):
start -= 1
used += 1
for i in range(0, used):
words[i + start] = ""
if start - 1 >= 0 and words[start - 1] in markers:
words[start - 1] = ""
found = True
daySpecified = True
# parse time
timeStr = ""
hrOffset = 0
minOffset = 0
secOffset = 0
hrAbs = None
minAbs = None
for idx, word in enumerate(words):
if word == "":
continue
wordPrevPrev = words[idx - 2] if idx > 1 else ""
wordPrev = words[idx - 1] if idx > 0 else ""
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
wordNextNextNext = words[idx + 3] if idx + 3 < len(words) else ""
wordNextNextNextNext = words[idx + 4] if idx + 4 < len(words) else ""
# parse noon, midnight, morning, afternoon, evening
used = 0
if word[:6] == "middag":
hrAbs = 12
used += 1
elif word[:11] == "midnat":
hrAbs = 0
used += 1
elif word == "morgenen" or (
wordPrev == "om" and word == "morgenen") or word == "tidlig":
if not hrAbs:
hrAbs = 8
used += 1
elif word[:11] == "eftermiddag":
if not hrAbs:
hrAbs = 15
used += 1
elif word[:5] == "aften":
if not hrAbs:
hrAbs = 19
used += 1
# parse half an hour, quarter hour
elif word == "time" and \
(wordPrev in markers or wordPrevPrev in markers):
if wordPrev[:4] == "halv":
minOffset = 30
elif wordPrev == "kvarter":
minOffset = 15
elif wordPrev == "trekvarter":
minOffset = 45
else:
hrOffset = 1
if wordPrevPrev in markers:
words[idx - 2] = ""
words[idx - 1] = ""
used += 1
hrAbs = -1
minAbs = -1
# parse 5:00 am, 12:00 p.m., etc
elif word[0].isdigit():
isTime = True
strHH = ""
strMM = ""
remainder = ""
if ':' in word:
# parse colons
# "3:00 in the morning"
stage = 0
length = len(word)
for i in range(length):
if stage == 0:
if word[i].isdigit():
strHH += word[i]
elif word[i] == ":":
stage = 1
else:
stage = 2
i -= 1
elif stage == 1:
if word[i].isdigit():
strMM += word[i]
else:
stage = 2
i -= 1
elif stage == 2:
remainder = word[i:].replace(".", "")
break
if remainder == "":
nextWord = wordNext.replace(".", "")
if nextWord == "am" or nextWord == "pm":
remainder = nextWord
used += 1
elif nextWord == "aften":
remainder = "pm"
used += 1
elif wordNext == "om" and wordNextNext == "morgenen":
remainder = "am"
used += 2
elif wordNext == "om" and wordNextNext == "eftermiddagen":
remainder = "pm"
used += 2
elif wordNext == "om" and wordNextNext == "aftenen":
remainder = "pm"
used += 2
elif wordNext == "morgen":
remainder = "am"
used += 1
elif wordNext == "eftermiddag":
remainder = "pm"
used += 1
elif wordNext == "aften":
remainder = "pm"
used += 1
elif wordNext == "i" and wordNextNext == "morgen":
remainder = "am"
used = 2
elif wordNext == "i" and wordNextNext == "eftermiddag":
remainder = "pm"
used = 2
elif wordNext == "i" and wordNextNext == "aften":
remainder = "pm"
used = 2
elif wordNext == "natten":
if strHH > 4:
remainder = "pm"
else:
remainder = "am"
used += 1
else:
if timeQualifier != "":
if strHH <= 12 and \
(timeQualifier == "aftenen" or
timeQualifier == "eftermiddagen"):
strHH += 12 # what happens when strHH is 24?
else:
# try to parse # s without colons
# 5 hours, 10 minutes etc.
length = len(word)
strNum = ""
remainder = ""
for i in range(length):
if word[i].isdigit():
strNum += word[i]
else:
remainder += word[i]
if remainder == "":
remainder = wordNext.replace(".", "").lstrip().rstrip()
if (
remainder == "pm" or
wordNext == "pm" or
remainder == "p.m." or
wordNext == "p.m."):
strHH = strNum
remainder = "pm"
used = 1
elif (
remainder == "am" or
wordNext == "am" or
remainder == "a.m." or
wordNext == "a.m."):
strHH = strNum
remainder = "am"
used = 1
else:
if wordNext == "time" and int(word) < 100:
# "in 3 hours"
hrOffset = int(word)
used = 2
isTime = False
hrAbs = -1
minAbs = -1
elif wordNext == "minut":
# "in 10 minutes"
minOffset = int(word)
used = 2
isTime = False
hrAbs = -1
minAbs = -1
elif wordNext == "sekund":
# in 5 seconds
secOffset = int(word)
used = 2
isTime = False
hrAbs = -1
minAbs = -1
elif wordNext == "time":
strHH = word
used += 1
isTime = True
if wordNextNext == timeQualifier:
strMM = ""
if wordNextNext[:11] == "eftermiddag":
used += 1
remainder = "pm"
elif wordNextNext == "om" and wordNextNextNext == \
"eftermiddagen":
used += 2
remainder = "pm"
elif wordNextNext[:5] == "aften":
used += 1
remainder = "pm"
elif wordNextNext == "om" and wordNextNextNext == \
"aftenen":
used += 2
remainder = "pm"
elif wordNextNext[:6] == "morgen":
used += 1
remainder = "am"
elif wordNextNext == "om" and wordNextNextNext == \
"morgenen":
used += 2
remainder = "am"
elif wordNextNext == "natten":
used += 1
if 8 <= int(word) <= 12:
remainder = "pm"
else:
remainder = "am"
elif is_numeric(wordNextNext):
strMM = wordNextNext
used += 1
if wordNextNextNext == timeQualifier:
if wordNextNextNext[:11] == "eftermiddag":
used += 1
remainder = "pm"
elif wordNextNextNext == "om" and \
wordNextNextNextNext == \
"eftermiddagen":
used += 2
remainder = "pm"
elif wordNextNextNext[:6] == "natten":
used += 1
remainder = "pm"
elif wordNextNextNext == "am" and \
wordNextNextNextNext == "natten":
used += 2
remainder = "pm"
elif wordNextNextNext[:7] == "morgenen":
used += 1
remainder = "am"
elif wordNextNextNext == "om" and \
wordNextNextNextNext == "morgenen":
used += 2
remainder = "am"
elif wordNextNextNext == "natten":
used += 1
if 8 <= int(word) <= 12:
remainder = "pm"
else:
remainder = "am"
elif wordNext == timeQualifier:
strHH = word
strMM = 00
isTime = True
if wordNext[:10] == "eftermidag":
used += 1
remainder = "pm"
elif wordNext == "om" and \
wordNextNext == "eftermiddanen":
used += 2
remainder = "pm"
elif wordNext[:7] == "aftenen":
used += 1
remainder = "pm"
elif wordNext == "om" and wordNextNext == "aftenen":
used += 2
remainder = "pm"
elif wordNext[:7] == "morgenen":
used += 1
remainder = "am"
elif wordNext == "ao" and wordNextNext == "morgenen":
used += 2
remainder = "am"
elif wordNext == "natten":
used += 1
if 8 <= int(word) <= 12:
remainder = "pm"
else:
remainder = "am"
# if timeQualifier != "":
# military = True
# else:
# isTime = False
strHH = int(strHH) if strHH else 0
strMM = int(strMM) if strMM else 0
strHH = strHH + 12 if remainder == "pm" and strHH < 12 else strHH
strHH = strHH - 12 if remainder == "am" and strHH >= 12 else strHH
if strHH > 24 or strMM > 59:
isTime = False
used = 0
if isTime:
hrAbs = strHH * 1
minAbs = strMM * 1
used += 1
if used > 0:
# removed parsed words from the sentence
for i in range(used):
words[idx + i] = ""
if wordPrev == "tidlig":
hrOffset = -1
words[idx - 1] = ""
idx -= 1
elif wordPrev == "sen":
hrOffset = 1
words[idx - 1] = ""
idx -= 1
if idx > 0 and wordPrev in markers:
words[idx - 1] = ""
if idx > 1 and wordPrevPrev in markers:
words[idx - 2] = ""
idx += used - 1
found = True
# check that we found a date
if not date_found():
return None
if dayOffset is False:
dayOffset = 0
# perform date manipulation
extractedDate = dateNow
extractedDate = extractedDate.replace(microsecond=0,
second=0,
minute=0,
hour=0)
if datestr != "":
en_months = ['january', 'february', 'march', 'april', 'may', 'june',
'july', 'august', 'september', 'october', 'november',
'december']
en_monthsShort = ['jan', 'feb', 'mar', 'apr', 'may', 'june', 'july',
'aug',
'sept', 'oct', 'nov', 'dec']
for idx, en_month in enumerate(en_months):
datestr = datestr.replace(months[idx], en_month)
for idx, en_month in enumerate(en_monthsShort):
datestr = datestr.replace(monthsShort[idx], en_month)
temp = datetime.strptime(datestr, "%B %d")
if extractedDate.tzinfo:
temp = temp.replace(tzinfo=extractedDate.tzinfo)
if not hasYear:
temp = temp.replace(year=extractedDate.year)
if extractedDate < temp:
extractedDate = extractedDate.replace(year=int(currentYear),
month=int(
temp.strftime(
"%m")),
day=int(temp.strftime(
"%d")))
else:
extractedDate = extractedDate.replace(
year=int(currentYear) + 1,
month=int(temp.strftime("%m")),
day=int(temp.strftime("%d")))
else:
extractedDate = extractedDate.replace(
year=int(temp.strftime("%Y")),
month=int(temp.strftime("%m")),
day=int(temp.strftime("%d")))
if timeStr != "":
temp = datetime(timeStr)
extractedDate = extractedDate.replace(hour=temp.strftime("%H"),
minute=temp.strftime("%M"),
second=temp.strftime("%S"))
if yearOffset != 0:
extractedDate = extractedDate + relativedelta(years=yearOffset)
if monthOffset != 0:
extractedDate = extractedDate + relativedelta(months=monthOffset)
if dayOffset != 0:
extractedDate = extractedDate + relativedelta(days=dayOffset)
if hrAbs is None and minAbs is None and default_time:
hrAbs = default_time.hour
minAbs = default_time.minute
if hrAbs != -1 and minAbs != -1:
extractedDate = extractedDate + relativedelta(hours=hrAbs or 0,
minutes=minAbs or 0)
if (hrAbs or minAbs) and datestr == "":
if not daySpecified and dateNow > extractedDate:
extractedDate = extractedDate + relativedelta(days=1)
if hrOffset != 0:
extractedDate = extractedDate + relativedelta(hours=hrOffset)
if minOffset != 0:
extractedDate = extractedDate + relativedelta(minutes=minOffset)
if secOffset != 0:
extractedDate = extractedDate + relativedelta(seconds=secOffset)
for idx, word in enumerate(words):
if words[idx] == "og" and words[idx - 1] == "" \
and words[idx + 1] == "":
words[idx] = ""
resultStr = " ".join(words)
resultStr = ' '.join(resultStr.split())
return [extractedDate, resultStr]
def is_fractional_da(input_str, short_scale=True):
"""
This function takes the given text and checks if it is a fraction.
Args:
input_str (str): the string to check if fractional
Returns:
(bool) or (float): False if not a fraction, otherwise the fraction
"""
if input_str.lower().startswith("halv"):
return 0.5
if input_str.lower() == "trediedel":
return 1.0 / 3
elif input_str.endswith('del'):
input_str = input_str[:len(input_str) - 3] # e.g. "fünftel"
if input_str.lower() in _DA_NUMBERS:
return 1.0 / (_DA_NUMBERS[input_str.lower()])
return False
def is_ordinal_da(input_str):
"""
This function takes the given text and checks if it is an ordinal number.
Args:
input_str (str): the string to check if ordinal
Returns:
(bool) or (float): False if not an ordinal, otherwise the number
corresponding to the ordinal
ordinals for 1, 3, 7 and 8 are irregular
only works for ordinals corresponding to the numbers in _DA_NUMBERS
"""
lowerstr = input_str.lower()
if lowerstr.startswith("første"):
return 1
if lowerstr.startswith("anden"):
return 2
if lowerstr.startswith("tredie"):
return 3
if lowerstr.startswith("fjerde"):
return 4
if lowerstr.startswith("femte"):
return 5
if lowerstr.startswith("sjette"):
return 6
if lowerstr.startswith("elfte"):
return 1
if lowerstr.startswith("tolvfte"):
return 12
if lowerstr[-3:] == "nde":
# from 20 suffix is -ste*
lowerstr = lowerstr[:-3]
if lowerstr in _DA_NUMBERS:
return _DA_NUMBERS[lowerstr]
if lowerstr[-4:] in ["ende"]:
lowerstr = lowerstr[:-4]
if lowerstr in _DA_NUMBERS:
return _DA_NUMBERS[lowerstr]
if lowerstr[-2:] == "te": # below 20 suffix is -te*
lowerstr = lowerstr[:-2]
if lowerstr in _DA_NUMBERS:
return _DA_NUMBERS[lowerstr]
return False
def normalize_da(text, remove_articles=True):
""" German string normalization """
words = text.split() # this also removed extra spaces
normalized = ""
for word in words:
if remove_articles and word in ["den", "det"]:
continue
# Convert numbers into digits, e.g. "two" -> "2"
if word in _DA_NUMBERS:
word = str(_DA_NUMBERS[word])
normalized += " " + word
return normalized[1:] # strip the initial space
def extract_numbers_da(text, short_scale=True, ordinals=False):
"""
Takes in a string and extracts a list of numbers.
Args:
text (str): the string to extract a number from
short_scale (bool): Use "short scale" or "long scale" for large
numbers -- over a million. The default is short scale, which
is now common in most English speaking countries.
See https://en.wikipedia.org/wiki/Names_of_large_numbers
ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
Returns:
list: list of extracted numbers as floats
"""
return extract_numbers_generic(text, pronounce_number_da, extract_number_da,
short_scale=short_scale, ordinals=ordinals)
class DanishNormalizer(Normalizer):
""" TODO implement language specific normalizer"""

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,381 @@
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import json
from datetime import timedelta
from lingua_franca.internal import resolve_resource_file
from lingua_franca.lang.common_data_fa import (_FARSI_BIG, _FARSI_HUNDREDS,
_FARSI_ONES, _FARSI_TENS,
_FORMAL_VARIANT)
from lingua_franca.lang.parse_common import Normalizer
from lingua_franca.time import now_local
def _is_number(s):
try:
float(s)
return True
except ValueError:
return False
def _parse_sentence(text):
for key, value in _FORMAL_VARIANT.items():
text = text.replace(key, value)
ar = text.split()
result = []
current_number = 0
current_words = []
s = 0
step = 10
mode = 'init'
def finish_num():
nonlocal current_number
nonlocal s
nonlocal result
nonlocal mode
nonlocal current_words
current_number += s
if current_number != 0:
result.append((current_number, current_words))
s = 0
current_number = 0
current_words = []
mode = 'init'
for x in ar:
if x == "و":
if mode == 'num_ten' or mode == 'num_hundred' or mode == 'num_one':
mode += '_va'
current_words.append(x)
elif mode == 'num':
current_words.append(x)
else:
finish_num()
result.append(x)
elif x == "نیم":
current_words.append(x)
current_number += 0.5
finish_num()
elif x in _FARSI_ONES:
t = _FARSI_ONES.index(x)
if mode != 'init' and mode != 'num_hundred_va' and mode != 'num':
if not(t < 10 and mode == 'num_ten_va'):
finish_num()
current_words.append(x)
s += t
mode = 'num_one'
elif x in _FARSI_TENS:
if mode != 'init' and mode != 'num_hundred_va' and mode != 'num':
finish_num()
current_words.append(x)
s += _FARSI_TENS.index(x)*10
mode = 'num_ten'
elif x in _FARSI_HUNDREDS:
if mode != 'init' and mode != 'num':
finish_num()
current_words.append(x)
s += _FARSI_HUNDREDS.index(x)*100
mode = 'num_hundred'
elif x in _FARSI_BIG:
current_words.append(x)
d = _FARSI_BIG.index(x)
if mode == 'init' and d == 1:
s = 1
s *= 10**(3*d)
current_number += s
s = 0
mode = 'num'
elif _is_number(x):
current_words.append(x)
current_number = float(x)
finish_num()
else:
finish_num()
result.append(x)
if mode[:3] == 'num':
finish_num()
return result
_time_units = {
'ثانیه': timedelta(seconds=1),
'دقیقه': timedelta(minutes=1),
'ساعت': timedelta(hours=1),
}
_date_units = {
'روز': timedelta(days=1),
'هفته': timedelta(weeks=1),
}
def extract_duration_fa(text):
"""
Convert an english phrase into a number of seconds
Convert things like:
"10 minute"
"2 and a half hours"
"3 days 8 hours 10 minutes and 49 seconds"
into an int, representing the total number of seconds.
The words used in the duration will be consumed, and
the remainder returned.
As an example, "set a timer for 5 minutes" would return
(300, "set a timer for").
Args:
text (str): string containing a duration
Returns:
(timedelta, str):
A tuple containing the duration and the remaining text
not consumed in the parsing. The first value will
be None if no duration is found. The text returned
will have whitespace stripped from the ends.
"""
remainder = []
ar = _parse_sentence(text)
current_number = None
result = timedelta(0)
for x in ar:
if x == "و":
continue
elif type(x) == tuple:
current_number = x
elif x in _time_units:
result += _time_units[x] * current_number[0]
current_number = None
elif x in _date_units:
result += _date_units[x] * current_number[0]
current_number = None
else:
if current_number:
remainder.extend(current_number[1])
remainder.append(x)
current_number = None
return (result, " ".join(remainder))
def extract_datetime_fa(text, anchorDate=None, default_time=None):
""" Convert a human date reference into an exact datetime
Convert things like
"today"
"tomorrow afternoon"
"next Tuesday at 4pm"
"August 3rd"
into a datetime. If a reference date is not provided, the current
local time is used. Also consumes the words used to define the date
returning the remaining string. For example, the string
"what is Tuesday's weather forecast"
returns the date for the forthcoming Tuesday relative to the reference
date and the remainder string
"what is weather forecast".
The "next" instance of a day or weekend is considered to be no earlier than
48 hours in the future. On Friday, "next Monday" would be in 3 days.
On Saturday, "next Monday" would be in 9 days.
Args:
text (str): string containing date words
anchorDate (datetime): A reference date/time for "tommorrow", etc
default_time (time): Time to set if no time was found in the string
Returns:
[datetime, str]: An array containing the datetime and the remaining
text not consumed in the parsing, or None if no
date or time related text was found.
"""
if text == "":
return None
text = text.lower().replace('', ' ').replace('.', '').replace('،', '') \
.replace('?', '').replace("پس فردا", "پسفردا") \
.replace('یک شنبه', 'یکشنبه') \
.replace('دو شنبه', 'دوشنبه') \
.replace('سه شنبه', 'سهشنبه') \
.replace('چهار شنبه', 'چهارشنبه') \
.replace('پنج شنبه', 'پنجشنبه') \
.replace('بعد از ظهر', 'بعدازظهر') \
if not anchorDate:
anchorDate = now_local()
today = anchorDate.replace(hour=0, minute=0, second=0, microsecond=0)
today_weekday = int(anchorDate.strftime("%w"))
weekday_names = [
'دوشنبه',
'سهشنبه',
'چهارشنبه',
'پنجشنبه',
'جمعه',
'شنبه',
'یکشنبه',
]
daysDict = {
'پریروز': today + timedelta(days= -2),
'دیروز': today + timedelta(days= -1),
'امروز': today,
'فردا': today + timedelta(days= 1),
'پسفردا': today + timedelta(days= 2),
}
timesDict = {
'صبح': timedelta(hours=8),
'بعدازظهر': timedelta(hours=15),
}
exactDict = {
'الان': anchorDate,
}
nextWords = ["بعد", "دیگه"]
prevWords = ["پیش", "قبل"]
ar = _parse_sentence(text)
mode = 'none'
number_seen = None
delta_seen = timedelta(0)
remainder = []
result = None
for x in ar:
handled = 1
if mode == 'finished':
remainder.append(x)
elif x == 'و' and mode[:5] == 'delta':
pass
elif type(x) == tuple:
number_seen = x
elif x in weekday_names:
dayOffset = (weekday_names.index(x) + 1) - today_weekday
if dayOffset < 0:
dayOffset += 7
result = today + timedelta(days=dayOffset)
mode = 'time'
elif x in exactDict:
result = exactDict[x]
mode = 'finished'
elif x in daysDict:
result = daysDict[x]
mode = 'time'
elif x in timesDict and mode == 'time':
result += timesDict[x]
mode = 'finish'
elif x in _date_units:
k = 1
if (number_seen):
k = number_seen[0]
number_seen = None
delta_seen += _date_units[x] * k
if mode != 'delta_time':
mode = 'delta_date'
elif x in _time_units:
k = 1
if (number_seen):
k = number_seen[0]
number_seen = None
delta_seen += _time_units[x] * k
mode = 'delta_time'
elif x in nextWords or x in prevWords:
# Give up instead of incorrect result
if mode == 'time':
return None
sign = 1 if x in nextWords else -1
if mode == 'delta_date':
result = today + delta_seen
mode = 'time'
elif mode == 'delta_time':
result = anchorDate + delta_seen
mode = 'finished'
else:
handled = 0
else:
handled = 0
if handled == 1:
continue
if number_seen:
remainder.extend(number_seen[1])
number_seen = None
remainder.append(x)
return (result, " ".join(remainder))
def is_fractional_fa(input_str, short_scale=True):
"""
This function takes the given text and checks if it is a fraction.
Args:
input_str (str): the string to check if fractional
short_scale (bool): use short scale if True, long scale if False
Returns:
(bool) or (float): False if not a fraction, otherwise the fraction
"""
if input_str.endswith('s', -1):
input_str = input_str[:len(input_str) - 1] # e.g. "fifths"
fracts = {"whole": 1, "half": 2, "halve": 2, "quarter": 4}
if short_scale:
for num in _SHORT_ORDINAL_FA:
if num > 2:
fracts[_SHORT_ORDINAL_FA[num]] = num
else:
for num in _LONG_ORDINAL_FA:
if num > 2:
fracts[_LONG_ORDINAL_FA[num]] = num
if input_str.lower() in fracts:
return 1.0 / fracts[input_str.lower()]
return False
def extract_numbers_fa(text, short_scale=True, ordinals=False):
"""
Takes in a string and extracts a list of numbers.
Args:
text (str): the string to extract a number from
short_scale (bool): Use "short scale" or "long scale" for large
numbers -- over a million. The default is short scale, which
is now common in most English speaking countries.
See https://en.wikipedia.org/wiki/Names_of_large_numbers
ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
Returns:
list: list of extracted numbers as floats
"""
ar = _parse_sentence(text)
result = []
for x in ar:
if type(x) == tuple:
result.append(x[0])
return result
def extract_number_fa(text, ordinals=False):
"""
This function extracts a number from a text string,
handles pronunciations in long scale and short scale
https://en.wikipedia.org/wiki/Names_of_large_numbers
Args:
text (str): the string to normalize
short_scale (bool): use short scale if True, long scale if False
ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
Returns:
(int) or (float) or False: The extracted number or False if no number
was found
"""
x = extract_numbers_fa(text, ordinals=ordinals)
if (len(x) == 0):
return False
return x[0]

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,26 @@
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from lingua_franca.time import now_local
from lingua_franca.lang.parse_common import Normalizer
class HungarianNormalizer(Normalizer):
""" TODO implement language specific normalizer"""
def normalize_hu(text, remove_articles=True):
""" English string normalization """
return HungarianNormalizer().normalize(text, remove_articles)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1 @@
# TODO implement parsing function

View File

@@ -0,0 +1,922 @@
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
from lingua_franca.time import now_local
from .parse_common import (is_numeric, look_for_fractions, Normalizer,
tokenize, Token)
def _find_numbers_in_text(tokens):
"""Finds duration related numbers in texts and makes a list of mappings.
The mapping will be for number to token that created it, if no number was
created from the token the mapping will be from None to the token.
The function is optimized to generate data that can be parsed to a duration
so it returns the list in reverse order to make the "size" (minutes/hours/
etc.) come first and the related numbers afterwards.
Args:
tokens: Tokens to parse
Returns:
list of (number, token) tuples
"""
parts = []
for tok in tokens:
res = extract_number_sv(tok.word)
if res:
parts.insert(0, (res, tok))
# Special case for quarter of an hour
if tok.word == 'kvart':
parts.insert(0, (None, Token('timmar', index=-1)))
elif tok.word in ['halvtimme', 'halvtimma']:
parts.insert(0, (30, tok))
parts.insert(0, (None, Token('minuter', index=-1)))
else:
parts.insert(0, (None, tok))
return parts
def _combine_adjacent_numbers(number_map):
"""Combine adjacent numbers through multiplication.
Walks through a number map and joins adjasent numbers to handle cases
such as "en halvtimme" (one half hour).
Returns:
(list): simplified number_map
"""
simplified = []
skip = False
for i in range(len(number_map) - 1):
if skip:
skip = False
continue
if number_map[i][0] and number_map[i + 1][0]:
combined_number = number_map[i][0] * number_map[i + 1][0]
combined_tokens = (number_map[i][1], number_map[i + 1][1])
simplified.append((combined_number, combined_tokens))
skip = True
else:
simplified.append((number_map[i][0], (number_map[i][1],)))
if not skip:
simplified.append((number_map[-1][0], (number_map[-1][1],)))
return simplified
def extract_duration_sv(text):
"""
Convert an swedish phrase into a number of seconds.
The function handles durations from seconds up to days.
Convert things like:
"10 minute"
"2 and a half hours"
"3 days 8 hours 10 minutes and 49 seconds"
into an int, representing the total number of seconds.
The words used in the duration will be consumed, and
the remainder returned.
As an example, "set a timer for 5 minutes" would return
(300, "set a timer for").
Args:
text (str): string containing a duration
Returns:
(timedelta, str):
A tuple containing the duration and the remaining text
not consumed in the parsing. The first value will
be None if no duration is found. The text returned
will have whitespace stripped from the ends.
"""
tokens = tokenize(text)
number_tok_map = _find_numbers_in_text(tokens)
# Combine adjacent numbers
simplified = _combine_adjacent_numbers(number_tok_map)
states = {
'days': 0,
'hours': 0,
'minutes': 0,
'seconds': 0
}
# Parser state, mapping words that should set the parser to collect
# numbers to a specific time "size"
state_words = {
'days': ('dygn', 'dag', 'dagar', 'dags'),
'hours': ('timmar', 'timme', 'timma', 'timmes', 'timmas'),
'minutes': ('minuter', 'minuters', 'minut', 'minuts'),
'seconds': ('sekunder', 'sekunders', 'sekund', 'sekunds')
}
binding_words = ('och')
consumed = []
state = None
valid = False
for num, toks in simplified:
if state and num:
states[state] += num
consumed.extend(toks)
valid = True # If a state field got set this is valid duration
elif num is None:
for s in state_words:
if toks[0].word in state_words[s]:
state = s
consumed.extend(toks)
break
else:
if toks[0].word not in binding_words:
state = None
td = timedelta(**states)
remainder = ' '.join([t.word for t in tokens if t not in consumed])
return (td, remainder) if valid else None
def extract_number_sv(text, short_scale=True, ordinals=False):
"""
This function prepares the given text for parsing by making
numbers consistent, getting rid of contractions, etc.
Args:
text (str): the string to normalize
Returns:
(int) or (float): The value of extracted number
"""
# TODO: short_scale and ordinals don't do anything here.
# The parameters are present in the function signature for API
# compatibility reasons.
text = text.lower()
aWords = text.split()
and_pass = False
valPreAnd = False
val = False
count = 0
while count < len(aWords):
word = aWords[count]
if is_numeric(word):
val = float(word)
elif word == "första":
val = 1
elif word == "andra":
val = 2
elif word == "tredje":
val = 3
elif word == "fjärde":
val = 4
elif word == "femte":
val = 5
elif word == "sjätte":
val = 6
elif is_fractional_sv(word):
val = is_fractional_sv(word)
else:
if word == "en":
val = 1
if word == "ett":
val = 1
elif word == "två":
val = 2
elif word == "tre":
val = 3
elif word == "fyra":
val = 4
elif word == "fem":
val = 5
elif word == "sex":
val = 6
elif word == "sju":
val = 7
elif word == "åtta":
val = 8
elif word == "nio":
val = 9
elif word == "tio":
val = 10
if val:
if count < (len(aWords) - 1):
wordNext = aWords[count + 1]
else:
wordNext = ""
valNext = is_fractional_sv(wordNext)
if valNext:
val = val * valNext
aWords[count + 1] = ""
if not val:
# look for fractions like "2/3"
aPieces = word.split('/')
if look_for_fractions(aPieces):
val = float(aPieces[0]) / float(aPieces[1])
elif and_pass:
# added to value, quit here
val = valPreAnd
break
else:
count += 1
continue
aWords[count] = ""
if and_pass:
aWords[count - 1] = '' # remove "och"
val += valPreAnd
elif count + 1 < len(aWords) and aWords[count + 1] == 'och':
and_pass = True
valPreAnd = val
val = False
count += 2
continue
elif count + 2 < len(aWords) and aWords[count + 2] == 'och':
and_pass = True
valPreAnd = val
val = False
count += 3
continue
break
return val or False
def extract_datetime_sv(text, anchorDate=None, default_time=None):
def clean_string(s):
"""
cleans the input string of unneeded punctuation and capitalization
among other things.
"""
s = s.lower().replace('?', '').replace('.', '').replace(',', '') \
.replace(' den ', ' ').replace(' en ', ' ')
wordList = s.split()
for idx, word in enumerate(wordList):
word = word.replace("'s", "")
ordinals = ["rd", "st", "nd", "th"]
if word[0].isdigit():
for ordinal in ordinals:
if ordinal in word:
word = word.replace(ordinal, "")
wordList[idx] = word
return wordList
def date_found():
return found or \
(
datestr != "" or timeStr != "" or
yearOffset != 0 or monthOffset != 0 or
dayOffset is True or hrOffset != 0 or
hrAbs or minOffset != 0 or
minAbs or secOffset != 0
)
if text == "":
return None
anchorDate = anchorDate or now_local()
found = False
daySpecified = False
dayOffset = False
monthOffset = 0
yearOffset = 0
dateNow = anchorDate
today = dateNow.strftime("%w")
currentYear = dateNow.strftime("%Y")
fromFlag = False
datestr = ""
hasYear = False
timeQualifier = ""
timeQualifiersList = ['morgon', 'förmiddag', 'eftermiddag', 'kväll']
markers = ['', 'i', 'den här', 'kring', 'efter']
days = ['måndag', 'tisdag', 'onsdag', 'torsdag',
'fredag', 'lördag', 'söndag']
months = ['januari', 'februari', 'mars', 'april', 'maj', 'juni',
'juli', 'augusti', 'september', 'oktober', 'november',
'december']
monthsShort = ['jan', 'feb', 'mar', 'apr', 'may', 'june', 'july', 'aug',
'sept', 'oct', 'nov', 'dec']
words = clean_string(text)
for idx, word in enumerate(words):
if word == "":
continue
wordPrevPrev = words[idx - 2] if idx > 1 else ""
wordPrev = words[idx - 1] if idx > 0 else ""
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
# this isn't in clean string because I don't want to save back to words
word = word.rstrip('s')
start = idx
used = 0
# save timequalifier for later
if word in timeQualifiersList:
timeQualifier = word
# parse today, tomorrow, day after tomorrow
elif word == "idag" and not fromFlag:
dayOffset = 0
used += 1
elif word == "imorgon" and not fromFlag:
dayOffset = 1
used += 1
elif word == "morgondagen" or word == "morgondagens" and not fromFlag:
dayOffset = 1
used += 1
elif word == "övermorgon" and not fromFlag:
dayOffset = 2
used += 1
# parse 5 days, 10 weeks, last week, next week
elif word == "dag" or word == "dagar":
if wordPrev[0].isdigit():
dayOffset += int(wordPrev)
start -= 1
used = 2
elif word == "vecka" or word == "veckor" and not fromFlag:
if wordPrev[0].isdigit():
dayOffset += int(wordPrev) * 7
start -= 1
used = 2
elif wordPrev == "nästa":
dayOffset = 7
start -= 1
used = 2
elif wordPrev == "förra":
dayOffset = -7
start -= 1
used = 2
# parse 10 months, next month, last month
elif word == "månad" and not fromFlag:
if wordPrev[0].isdigit():
monthOffset = int(wordPrev)
start -= 1
used = 2
elif wordPrev == "nästa":
monthOffset = 1
start -= 1
used = 2
elif wordPrev == "förra":
monthOffset = -1
start -= 1
used = 2
# parse 5 years, next year, last year
elif word == "år" and not fromFlag:
if wordPrev[0].isdigit():
yearOffset = int(wordPrev)
start -= 1
used = 2
elif wordPrev == "nästa":
yearOffset = 1
start -= 1
used = 2
elif wordPrev == "förra":
yearOffset = -1
start -= 1
used = 2
# parse Monday, Tuesday, etc., and next Monday,
# last Tuesday, etc.
elif word in days and not fromFlag:
d = days.index(word)
dayOffset = (d + 1) - int(today)
used = 1
if dayOffset < 0:
dayOffset += 7
if wordPrev == "nästa":
dayOffset += 7
used += 1
start -= 1
elif wordPrev == "förra":
dayOffset -= 7
used += 1
start -= 1
# parse 15 of July, June 20th, Feb 18, 19 of February
elif word in months or word in monthsShort and not fromFlag:
try:
m = months.index(word)
except ValueError:
m = monthsShort.index(word)
used += 1
datestr = months[m]
if wordPrev and (wordPrev[0].isdigit() or
(wordPrev == "of" and wordPrevPrev[0].isdigit())):
if wordPrev == "of" and wordPrevPrev[0].isdigit():
datestr += " " + words[idx - 2]
used += 1
start -= 1
else:
datestr += " " + wordPrev
start -= 1
used += 1
if wordNext and wordNext[0].isdigit():
datestr += " " + wordNext
used += 1
hasYear = True
else:
hasYear = False
elif wordNext and wordNext[0].isdigit():
datestr += " " + wordNext
used += 1
if wordNextNext and wordNextNext[0].isdigit():
datestr += " " + wordNextNext
used += 1
hasYear = True
else:
hasYear = False
# parse 5 days from tomorrow, 10 weeks from next thursday,
# 2 months from July
validFollowups = days + months + monthsShort
validFollowups.append("idag")
validFollowups.append("imorgon")
validFollowups.append("nästa")
validFollowups.append("förra")
validFollowups.append("nu")
if (word == "från" or word == "efter") and wordNext in validFollowups:
used = 2
fromFlag = True
if wordNext == "imorgon":
dayOffset += 1
elif wordNext in days:
d = days.index(wordNext)
tmpOffset = (d + 1) - int(today)
used = 2
if tmpOffset < 0:
tmpOffset += 7
dayOffset += tmpOffset
elif wordNextNext and wordNextNext in days:
d = days.index(wordNextNext)
tmpOffset = (d + 1) - int(today)
used = 3
if wordNext == "nästa":
tmpOffset += 7
used += 1
start -= 1
elif wordNext == "förra":
tmpOffset -= 7
used += 1
start -= 1
dayOffset += tmpOffset
if used > 0:
if start - 1 > 0 and words[start - 1] == "denna":
start -= 1
used += 1
for i in range(0, used):
words[i + start] = ""
if start - 1 >= 0 and words[start - 1] in markers:
words[start - 1] = ""
found = True
daySpecified = True
# parse time
timeStr = ""
hrOffset = 0
minOffset = 0
secOffset = 0
hrAbs = None
minAbs = None
for idx, word in enumerate(words):
if word == "":
continue
wordPrevPrev = words[idx - 2] if idx > 1 else ""
wordPrev = words[idx - 1] if idx > 0 else ""
wordNext = words[idx + 1] if idx + 1 < len(words) else ""
wordNextNext = words[idx + 2] if idx + 2 < len(words) else ""
# parse noon, midnight, morning, afternoon, evening
used = 0
if word == "middag":
hrAbs = 12
used += 1
elif word == "midnatt":
hrAbs = 0
used += 1
elif word == "morgon":
if not hrAbs:
hrAbs = 8
used += 1
elif word == "förmiddag":
if not hrAbs:
hrAbs = 10
used += 1
elif word == "eftermiddag":
if not hrAbs:
hrAbs = 15
used += 1
elif word == "kväll":
if not hrAbs:
hrAbs = 19
used += 1
# parse half an hour, quarter hour
elif wordPrev in markers or wordPrevPrev in markers:
if word == "halvtimme" or word == "halvtimma":
minOffset = 30
elif word == "kvart":
minOffset = 15
elif word == "timme" or word == "timma":
hrOffset = 1
words[idx - 1] = ""
used += 1
hrAbs = -1
minAbs = -1
# parse 5:00 am, 12:00 p.m., etc
elif word[0].isdigit():
isTime = True
strHH = ""
strMM = ""
remainder = ""
if ':' in word:
# parse colons
# "3:00 in the morning"
stage = 0
length = len(word)
for i in range(length):
if stage == 0:
if word[i].isdigit():
strHH += word[i]
elif word[i] == ":":
stage = 1
else:
stage = 2
i -= 1
elif stage == 1:
if word[i].isdigit():
strMM += word[i]
else:
stage = 2
i -= 1
elif stage == 2:
remainder = word[i:].replace(".", "")
break
if remainder == "":
nextWord = wordNext.replace(".", "")
if nextWord == "am" or nextWord == "pm":
remainder = nextWord
used += 1
elif nextWord == "tonight":
remainder = "pm"
used += 1
elif wordNext == "in" and wordNextNext == "the" and \
words[idx + 3] == "morning":
remainder = "am"
used += 3
elif wordNext == "in" and wordNextNext == "the" and \
words[idx + 3] == "afternoon":
remainder = "pm"
used += 3
elif wordNext == "in" and wordNextNext == "the" and \
words[idx + 3] == "evening":
remainder = "pm"
used += 3
elif wordNext == "in" and wordNextNext == "morning":
remainder = "am"
used += 2
elif wordNext == "in" and wordNextNext == "afternoon":
remainder = "pm"
used += 2
elif wordNext == "in" and wordNextNext == "evening":
remainder = "pm"
used += 2
elif wordNext == "this" and wordNextNext == "morning":
remainder = "am"
used = 2
elif wordNext == "this" and wordNextNext == "afternoon":
remainder = "pm"
used = 2
elif wordNext == "this" and wordNextNext == "evening":
remainder = "pm"
used = 2
elif wordNext == "at" and wordNextNext == "night":
if strHH > 5:
remainder = "pm"
else:
remainder = "am"
used += 2
else:
if timeQualifier != "":
if strHH <= 12 and \
(timeQualifier == "evening" or
timeQualifier == "afternoon"):
strHH += 12
else:
# try to parse # s without colons
# 5 hours, 10 minutes etc.
length = len(word)
strNum = ""
remainder = ""
for i in range(length):
if word[i].isdigit():
strNum += word[i]
else:
remainder += word[i]
if remainder == "":
remainder = wordNext.replace(".", "").lstrip().rstrip()
if (
remainder == "pm" or
wordNext == "pm" or
remainder == "p.m." or
wordNext == "p.m."):
strHH = strNum
remainder = "pm"
used = 1
elif (
remainder == "am" or
wordNext == "am" or
remainder == "a.m." or
wordNext == "a.m."):
strHH = strNum
remainder = "am"
used = 1
else:
if wordNext == "pm" or wordNext == "p.m.":
strHH = strNum
remainder = "pm"
used = 1
elif wordNext == "am" or wordNext == "a.m.":
strHH = strNum
remainder = "am"
used = 1
elif (
int(word) > 100 and
(
wordPrev == "o" or
wordPrev == "oh"
)):
# 0800 hours (pronounced oh-eight-hundred)
strHH = int(word) / 100
strMM = int(word) - strHH * 100
if wordNext == "hours":
used += 1
elif (
wordNext == "hours" and
word[0] != '0' and
(
int(word) < 100 and
int(word) > 2400
)):
# "in 3 hours"
hrOffset = int(word)
used = 2
isTime = False
hrAbs = -1
minAbs = -1
elif wordNext == "minutes":
# "in 10 minutes"
minOffset = int(word)
used = 2
isTime = False
hrAbs = -1
minAbs = -1
elif wordNext == "seconds":
# in 5 seconds
secOffset = int(word)
used = 2
isTime = False
hrAbs = -1
minAbs = -1
elif int(word) > 100:
strHH = int(word) / 100
strMM = int(word) - strHH * 100
if wordNext == "hours":
used += 1
elif wordNext[0].isdigit():
strHH = word
strMM = wordNext
used += 1
if wordNextNext == "hours":
used += 1
elif (
wordNext == "" or wordNext == "o'clock" or
(
wordNext == "in" and
(
wordNextNext == "the" or
wordNextNext == timeQualifier
)
)):
strHH = word
strMM = 00
if wordNext == "o'clock":
used += 1
if wordNext == "in" or wordNextNext == "in":
used += (1 if wordNext == "in" else 2)
if (wordNextNext and
wordNextNext in timeQualifier or
(words[words.index(wordNextNext) + 1] and
words[words.index(wordNextNext) + 1] in
timeQualifier)):
if (wordNextNext == "afternoon" or
(len(words) >
words.index(wordNextNext) + 1 and
words[words.index(
wordNextNext) + 1] == "afternoon")):
remainder = "pm"
if (wordNextNext == "evening" or
(len(words) >
(words.index(wordNextNext) + 1) and
words[words.index(
wordNextNext) + 1] == "evening")):
remainder = "pm"
if (wordNextNext == "morning" or
(len(words) >
words.index(wordNextNext) + 1 and
words[words.index(
wordNextNext) + 1] == "morning")):
remainder = "am"
else:
isTime = False
strHH = int(strHH) if strHH else 0
strMM = int(strMM) if strMM else 0
strHH = strHH + 12 if remainder == "pm" and strHH < 12 else strHH
strHH = strHH - 12 if remainder == "am" and strHH >= 12 else strHH
if strHH > 24 or strMM > 59:
isTime = False
used = 0
if isTime:
hrAbs = strHH * 1
minAbs = strMM * 1
used += 1
if used > 0:
# removed parsed words from the sentence
for i in range(used):
words[idx + i] = ""
if wordPrev == "o" or wordPrev == "oh":
words[words.index(wordPrev)] = ""
if wordPrev == "early":
hrOffset = -1
words[idx - 1] = ""
idx -= 1
elif wordPrev == "late":
hrOffset = 1
words[idx - 1] = ""
idx -= 1
if idx > 0 and wordPrev in markers:
words[idx - 1] = ""
if idx > 1 and wordPrevPrev in markers:
words[idx - 2] = ""
idx += used - 1
found = True
# check that we found a date
if not date_found():
return None
if dayOffset is False:
dayOffset = 0
# perform date manipulation
extractedDate = dateNow
extractedDate = extractedDate.replace(microsecond=0,
second=0,
minute=0,
hour=0)
if datestr != "":
temp = datetime.strptime(datestr, "%B %d")
if not hasYear:
temp = temp.replace(year=extractedDate.year)
if extractedDate < temp:
extractedDate = extractedDate.replace(year=int(currentYear),
month=int(
temp.strftime(
"%m")),
day=int(temp.strftime(
"%d")))
else:
extractedDate = extractedDate.replace(
year=int(currentYear) + 1,
month=int(temp.strftime("%m")),
day=int(temp.strftime("%d")))
else:
extractedDate = extractedDate.replace(
year=int(temp.strftime("%Y")),
month=int(temp.strftime("%m")),
day=int(temp.strftime("%d")))
if timeStr != "":
temp = datetime(timeStr)
extractedDate = extractedDate.replace(hour=temp.strftime("%H"),
minute=temp.strftime("%M"),
second=temp.strftime("%S"))
if yearOffset != 0:
extractedDate = extractedDate + relativedelta(years=yearOffset)
if monthOffset != 0:
extractedDate = extractedDate + relativedelta(months=monthOffset)
if dayOffset != 0:
extractedDate = extractedDate + relativedelta(days=dayOffset)
if hrAbs is None and minAbs is None and default_time:
hrAbs = default_time.hour
minAbs = default_time.minute
if hrAbs != -1 and minAbs != -1:
extractedDate = extractedDate + relativedelta(hours=hrAbs or 0,
minutes=minAbs or 0)
if (hrAbs or minAbs) and datestr == "":
if not daySpecified and dateNow > extractedDate:
extractedDate = extractedDate + relativedelta(days=1)
if hrOffset != 0:
extractedDate = extractedDate + relativedelta(hours=hrOffset)
if minOffset != 0:
extractedDate = extractedDate + relativedelta(minutes=minOffset)
if secOffset != 0:
extractedDate = extractedDate + relativedelta(seconds=secOffset)
for idx, word in enumerate(words):
if words[idx] == "and" and words[idx - 1] == "" and words[
idx + 1] == "":
words[idx] = ""
resultStr = " ".join(words)
resultStr = ' '.join(resultStr.split())
return [extractedDate, resultStr]
def is_fractional_sv(input_str, short_scale=True):
"""
This function takes the given text and checks if it is a fraction.
Args:
input_str (str): the string to check if fractional
short_scale (bool): use short scale if True, long scale if False
Returns:
(bool) or (float): False if not a fraction, otherwise the fraction
"""
if input_str.endswith('ars', -3):
input_str = input_str[:len(input_str) - 3] # e.g. "femtedelar"
if input_str.endswith('ar', -2):
input_str = input_str[:len(input_str) - 2] # e.g. "femtedelar"
if input_str.endswith('a', -1):
input_str = input_str[:len(input_str) - 1] # e.g. "halva"
if input_str.endswith('s', -1):
input_str = input_str[:len(input_str) - 1] # e.g. "halva"
aFrac = ["hel", "halv", "tredjedel", "fjärdedel", "femtedel", "sjättedel",
"sjundedel", "åttondel", "niondel", "tiondel", "elftedel",
"tolftedel"]
if input_str.lower() in aFrac:
return 1.0 / (aFrac.index(input_str) + 1)
if input_str == "kvart":
return 1.0 / 4
if input_str == "trekvart":
return 3.0 / 4
return False
def normalize_sv(text, remove_articles=True):
""" English string normalization """
words = text.split() # this also removed extra spaces
normalized = ''
for word in words:
# Convert numbers into digits, e.g. "two" -> "2"
if word == 'en':
word = 'ett'
textNumbers = ["noll", "ett", "två", "tre", "fyra", "fem", "sex",
"sju", "åtta", "nio", "tio", "elva", "tolv",
"tretton", "fjorton", "femton", "sexton",
"sjutton", "arton", "nitton", "tjugo"]
if word in textNumbers:
word = str(textNumbers.index(word))
normalized += " " + word
return normalized[1:] # strip the initial space
class SwedishNormalizer(Normalizer):
""" TODO implement language specific normalizer"""

269
lingua_franca/parse.py Normal file
View File

@@ -0,0 +1,269 @@
#
# Copyright 2017 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
#
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from difflib import SequenceMatcher
from warnings import warn
from lingua_franca.time import now_local
from lingua_franca.internal import populate_localized_function_dict, \
get_active_langs, get_full_lang_code, get_primary_lang_code, \
get_default_lang, localized_function, _raise_unsupported_language
_REGISTERED_FUNCTIONS = ("extract_numbers",
"extract_number",
"extract_duration",
"extract_datetime",
"normalize",
"get_gender",
"is_fractional",
"is_ordinal")
populate_localized_function_dict("parse", langs=get_active_langs())
def fuzzy_match(x: str, against: str) -> float:
"""Perform a 'fuzzy' comparison between two strings.
Returns:
match percentage -- 1.0 for perfect match,
down to 0.0 for no match at all.
"""
return SequenceMatcher(None, x, against).ratio()
def match_one(query, choices):
"""
Find best match from a list or dictionary given an input
Args:
query (str): string to test
choices (list): list or dictionary of choices
Returns:
tuple: (best match, score)
"""
if isinstance(choices, dict):
_choices = list(choices.keys())
elif isinstance(choices, list):
_choices = choices
else:
raise ValueError('a list or dict of choices must be provided')
best = (_choices[0], fuzzy_match(query, _choices[0]))
for c in _choices[1:]:
score = fuzzy_match(query, c)
if score > best[1]:
best = (c, score)
if isinstance(choices, dict):
return (choices[best[0]], best[1])
else:
return best
@localized_function()
def extract_numbers(text, short_scale=True, ordinals=False, lang=''):
"""
Takes in a string and extracts a list of numbers.
Args:
text (str): the string to extract a number from
short_scale (bool): Use "short scale" or "long scale" for large
numbers -- over a million. The default is short scale, which
is now common in most English speaking countries.
See https://en.wikipedia.org/wiki/Names_of_large_numbers
ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
lang (str, optional): an optional BCP-47 language code, if omitted
the default language will be used.
Returns:
list: list of extracted numbers as floats, or empty list if none found
"""
@localized_function()
def extract_number(text, short_scale=True, ordinals=False, lang=''):
"""Takes in a string and extracts a number.
Args:
text (str): the string to extract a number from
short_scale (bool): Use "short scale" or "long scale" for large
numbers -- over a million. The default is short scale, which
is now common in most English speaking countries.
See https://en.wikipedia.org/wiki/Names_of_large_numbers
ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
lang (str, optional): an optional BCP-47 language code, if omitted
the default language will be used.
Returns:
(int, float or False): The number extracted or False if the input
text contains no numbers
"""
@localized_function()
def extract_duration(text, lang=''):
""" Convert an english phrase into a number of seconds
Convert things like:
* "10 minute"
* "2 and a half hours"
* "3 days 8 hours 10 minutes and 49 seconds"
into an int, representing the total number of seconds.
The words used in the duration will be consumed, and
the remainder returned.
As an example, "set a timer for 5 minutes" would return
``(300, "set a timer for")``.
Args:
text (str): string containing a duration
lang (str, optional): an optional BCP-47 language code, if omitted
the default language will be used.
Returns:
(timedelta, str):
A tuple containing the duration and the remaining text
not consumed in the parsing. The first value will
be None if no duration is found. The text returned
will have whitespace stripped from the ends.
"""
@localized_function()
def extract_datetime(text, anchorDate=None, lang='', default_time=None):
"""
Extracts date and time information from a sentence. Parses many of the
common ways that humans express dates and times, including relative dates
like "5 days from today", "tomorrow', and "Tuesday".
Vague terminology are given arbitrary values, like:
- morning = 8 AM
- afternoon = 3 PM
- evening = 7 PM
If a time isn't supplied or implied, the function defaults to 12 AM
Args:
text (str): the text to be interpreted
anchorDate (:obj:`datetime`, optional): the date to be used for
relative dating (for example, what does "tomorrow" mean?).
Defaults to the current local date/time.
lang (str): the BCP-47 code for the language to use, None uses default
default_time (datetime.time): time to use if none was found in
the input string.
Returns:
[:obj:`datetime`, :obj:`str`]: 'datetime' is the extracted date
as a datetime object in the local timezone.
'leftover_string' is the original phrase with all date and time
related keywords stripped out. See examples for further
clarification
Returns 'None' if no date or time related text is found.
Examples:
>>> extract_datetime(
... "What is the weather like the day after tomorrow?",
... datetime(2017, 6, 30, 00, 00)
... )
[datetime.datetime(2017, 7, 2, 0, 0), 'what is weather like']
>>> extract_datetime(
... "Set up an appointment 2 weeks from Sunday at 5 pm",
... datetime(2016, 2, 19, 00, 00)
... )
[datetime.datetime(2016, 3, 6, 17, 0), 'set up appointment']
>>> extract_datetime(
... "Set up an appointment",
... datetime(2016, 2, 19, 00, 00)
... )
None
"""
@localized_function()
def normalize(text, lang='', remove_articles=True):
"""Prepare a string for parsing
This function prepares the given text for parsing by making
numbers consistent, getting rid of contractions, etc.
Args:
text (str): the string to normalize
lang (str, optional): an optional BCP-47 language code, if omitted
the default language will be used.
remove_articles (bool): whether to remove articles (like 'a', or
'the'). True by default.
Returns:
(str): The normalized string.
"""
@localized_function()
def get_gender(word, context="", lang=''):
""" Guess the gender of a word
Some languages assign genders to specific words. This method will attempt
to determine the gender, optionally using the provided context sentence.
Args:
word (str): The word to look up
context (str, optional): String containing word, for context
lang (str, optional): an optional BCP-47 language code, if omitted
the default language will be used.
Returns:
str: The code "m" (male), "f" (female) or "n" (neutral) for the gender,
or None if unknown/or unused in the given language.
"""
@localized_function()
def is_fractional(input_str, short_scale=True, lang=''):
"""
This function takes the given text and checks if it is a fraction.
Used by most of the number exractors.
Will return False on phrases that *contain* a fraction. Only detects
exact matches. To pull a fraction from a string, see extract_number()
Args:
input_str (str): the string to check if fractional
short_scale (bool): use short scale if True, long scale if False
lang (str, optional): an optional BCP-47 language code, if omitted
the default language will be used.
Returns:
(bool) or (float): False if not a fraction, otherwise the fraction
"""
@localized_function()
def is_ordinal(input_str, lang=''):
"""
This function takes the given text and checks if it is an ordinal number.
Args:
input_str (str): the string to check if ordinal
lang (str, optional): an optional BCP-47 language code, if omitted
the default language will be used.
Returns:
(bool) or (float): False if not an ordinal, otherwise the number
corresponding to the ordinal
"""

View File

@@ -0,0 +1 @@
i

View File

@@ -0,0 +1,130 @@
{
"decade_format": {
"1": {"match": "^\\d$", "format": "{x}"},
"2": {"match": "^1\\d$", "format": "{xx}"},
"3": {"match": "^\\d0$", "format": "{x0}"},
"4": {"match": "^2\\d$", "format": "vint-i-{x}"},
"5": {"match": "^[3-9]\\d$", "format": "{x0}-{x}"},
"default": "{number}"
},
"hundreds_format": {
"1": {"match": "^1\\d{2}$", "format": "{x_in_x00}-cent"},
"2": {"match": "^\\d{3}$", "format": "{x_in_x00}-cents"},
"default": "{number}"
},
"thousand_format": {
"1": {"match": "^10\\d\\d$", "format": "mil"},
"2": {"match": "^11\\d\\d$", "format": "mil cent"},
"3": {"match": "^1[2-9]\\d\\d$", "format": "mil {x_in_x00}-cents"},
"4": {"match": "^[2-9]0\\d{2}$", "format": "{x_in_x000} mil"},
"5": {"match": "^[2-9]1\\d{2}$", "format": "{x_in_x000} mil cent"},
"6": {"match": "^[2-9][2-9]\\d{2}$", "format": "{x_in_x000} mil {x_in_x00}-cents"},
"default": "{number}"
},
"year_format": {
"1": {"match": "^\\d\\d?$", "format": "{formatted_decade} {bc}"},
"2": {"match": "^\\d00$", "format": "{formatted_hundreds} {bc}"},
"3": {"match": "^\\d{3}$", "format": "{formatted_hundreds} {formatted_decade} {bc}"},
"4": {"match": "^\\d{2}00$", "format": "{formatted_thousand} {bc}"},
"5": {"match": "^\\d{4}$", "format": "{formatted_thousand} {formatted_decade} {bc}"},
"default": "{year} {bc}",
"bc": "a.C."
},
"date_format": {
"date_full": "{weekday}, {day} de {month} de {formatted_year}",
"date_full_no_year": "{weekday}, {day} de {month}",
"date_full_no_year_month": "{weekday}, dia {day}",
"today": "avui",
"tomorrow": "demà",
"yesterday": "ahir"
},
"date_time_format": {
"date_time": "{formatted_date} a {formatted_time}"
},
"weekday": {
"0": "dilluns",
"1": "dimarts",
"2": "dimecres",
"3": "dijous",
"4": "divendres",
"5": "dissabte",
"6": "diumenge"
},
"date": {
"1": "primer",
"2": "dos",
"3": "tres",
"4": "quatre",
"5": "cinc",
"6": "sis",
"7": "set",
"8": "vuit",
"9": "nou",
"10": "deu",
"11": "onze",
"12": "dotze",
"13": "tretze",
"14": "catorze",
"15": "quinze",
"16": "setze",
"17": "disset",
"18": "divuit",
"19": "dinou",
"20": "vint",
"21": "vint-i-u",
"22": "vint-i-dos",
"23": "vint-i-tres",
"24": "vint-i-quatre",
"25": "vint-i-cinc",
"26": "vint-i-sis",
"27": "vint-i-set",
"28": "vint-i-vuit",
"29": "vint-i-nou",
"30": "trenta",
"31": "trenta-u"
},
"month": {
"1": "gener",
"2": "febrer",
"3": "març",
"4": "abril",
"5": "maig",
"6": "juny",
"7": "juliol",
"8": "agost",
"9": "setembre",
"10": "octubre",
"11": "novembre",
"12": "desembre"
},
"number": {
"0": "zero",
"1": "u",
"2": "dos",
"3": "tres",
"4": "quatre",
"5": "cinc",
"6": "sis",
"7": "set",
"8": "vuit",
"9": "nou",
"10": "deu",
"11": "onze",
"12": "dotze",
"13": "tretze",
"14": "catorze",
"15": "quinze",
"16": "setze",
"17": "disset",
"18": "divuit",
"19": "dinou",
"20": "vint",
"30": "trenta",
"40": "quaranta",
"50": "cinquanta",
"60": "seixanta",
"70": "setanta",
"80": "vuitanta",
"90": "noranta"
}
}

View File

@@ -0,0 +1,43 @@
{
"test_nice_year": {
"1": {"datetime_param": "1, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "u a.C." },
"2": {"datetime_param": "10, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "deu a.C." },
"3": {"datetime_param": "92, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "noranta-dos a.C." },
"4": {"datetime_param": "803, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "vuit-cents tres" },
"5": {"datetime_param": "811, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "vuit-cents onze" },
"6": {"datetime_param": "454, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "quatre-cents cinquanta-quatre" },
"7": {"datetime_param": "1005, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mil cinc" },
"8": {"datetime_param": "1012, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mil dotze" },
"9": {"datetime_param": "1046, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mil quaranta-sis" },
"10": {"datetime_param": "1807, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mil vuit-cents set" },
"11": {"datetime_param": "1717, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mil set-cents disset" },
"12": {"datetime_param": "1988, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mil nou-cents vuitanta-vuit"},
"13": {"datetime_param": "2009, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dos mil nou"},
"14": {"datetime_param": "2018, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dos mil divuit"},
"15": {"datetime_param": "2021, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dos mil vint-i-u"},
"16": {"datetime_param": "2030, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dos mil trenta"},
"17": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "dos mil cent" },
"18": {"datetime_param": "1000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mil" },
"19": {"datetime_param": "2000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dos mil" },
"20": {"datetime_param": "3120, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "tres mil cent vint a.C." },
"21": {"datetime_param": "3241, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "tres mil dos-cents quaranta-u a.C." },
"22": {"datetime_param": "5200, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "cinc mil dos-cents" },
"23": {"datetime_param": "1100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mil cent" },
"24": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "dos mil cent" }
},
"test_nice_date": {
"1": {"datetime_param": "2017, 1, 31, 0, 2, 3", "now": "None", "assertEqual": "dimarts, trenta-u de gener de dos mil disset"},
"2": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2017, 1, 1, 0, 2, 3", "assertEqual": "diumenge, quatre de febrer de dos mil divuit"},
"3": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 1, 1, 0, 2, 3", "assertEqual": "diumenge, quatre de febrer"},
"4": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 1, 0, 2, 3", "assertEqual": "diumenge, dia quatre"},
"5": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 3, 0, 2, 3", "assertEqual": "demà"},
"6": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 4, 0, 2, 3", "assertEqual": "avui"},
"7": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 5, 0, 2, 3", "assertEqual": "ahir"},
"8": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 6, 0, 2, 3", "assertEqual": "diumenge, quatre de febrer"},
"9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "diumenge, quatre de febrer de dos mil divuit"}
},
"test_nice_date_time": {
"1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "dimarts, trenta-u de gener de dos mil disset a la una i vint-i-dos de la tarda"},
"2": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "True", "use_ampm": "False", "assertEqual": "dimarts, trenta-u de gener de dos mil disset a les tretze i vint-i-dos"}
}
}

View File

@@ -0,0 +1 @@
dia

View File

@@ -0,0 +1 @@
dies

View File

@@ -0,0 +1 @@
hora

View File

@@ -0,0 +1 @@
hores

View File

@@ -0,0 +1 @@
minut

View File

@@ -0,0 +1 @@
minuts

View File

@@ -0,0 +1,109 @@
{
"lowercase": false,
"numbers_to_digits": true,
"expand_contractions": false,
"remove_symbols": true,
"remove_accents": false,
"remove_articles": false,
"remove_stopwords": true,
"contractions": {},
"word_replacements": {},
"number_replacements": {
"catorze": "14",
"cent": "100",
"cents": "100",
"cinc": "5",
"cinc-centes": "500",
"cinc-cents": "500",
"cinquanta": "50",
"deu": "10",
"dinou": "19",
"setze": "16",
"disset": "17",
"dihuit": "18",
"divuit": "18",
"dos": "2",
"dos-centes": "200",
"dos-cents": "200",
"dotze": "12",
"dues": "2",
"dues-centes": "200",
"huitanta": "80",
"huit": "8",
"huit-centes": "800",
"huit-cents": "800",
"mil": "1000",
"milió": "1000000",
"nou": "9",
"nou-centes": "900",
"nou-cents": "900",
"noranta": "90",
"onze": "11",
"primer": "1",
"primera": "1",
"quaranta": "40",
"quatre": "4",
"quatre-centes": "400",
"quatre-cents": "400",
"quinze": "15",
"segon": "2",
"segona": "2",
"seixanta": "60",
"set": "7",
"set-centes": "700",
"set-cents": "700",
"setanta": "70",
"sis": "6",
"sis-centes": "600",
"sis-cents": "600",
"tercer": "3",
"trenta": "30",
"tres": "3",
"tres-centes": "300",
"tres-cents": "300",
"tretze": "13",
"u": "1",
"un": "1",
"una": "1",
"vint": "20",
"vuitanta": "80",
"vuit": "8",
"vuit-centes": "800",
"vuit-cents": "800",
"zero": "0"
},
"stopwords": [
"de",
"del",
"dels",
"ell",
"ella",
"ells",
"elles",
"jo",
"i",
"al",
"dins la",
"a la",
"nosaltres",
"dins el",
"para",
"aquest",
"aquesta",
"aquests",
"aquestes",
"aquell",
"aquella",
"aquells",
"aquelles",
"que"
],
"articles": [
"el",
"la",
"l",
"els",
"les",
"los"
]
}

View File

@@ -0,0 +1 @@
o

View File

@@ -0,0 +1 @@
segon

View File

@@ -0,0 +1 @@
segons

View File

@@ -0,0 +1 @@
a

View File

@@ -0,0 +1,129 @@
{
"decade_format": {
"1": {"match": "^\\d$", "format": "{x}"},
"2": {"match": "^1\\d$", "format": "{xx}"},
"3": {"match": "^\\d0$", "format": "{x0}"},
"4": {"match": "^[2-9]\\d$", "format": "{x0} {x}"},
"default": "{number}"
},
"hundreds_format": {
"1": {"match": "^\\d{3}$", "format": "{x_in_x00} sto"},
"default": "{number}"
},
"thousand_format": {
"1": {"match": "^\\d00\\d$", "format": "{x_in_x000} tisíc"},
"2": {"match": "^1\\d00$", "format": "{xx_in_xx00} sto"},
"3": {"match": "^\\d{2}00$", "format": "{x0_in_x000} {x_in_x00} sto"},
"4": {"match": "^(1\\d{3})|(\\d0\\d{2})$", "format": "{xx_in_xx00}"},
"5": {"match": "^\\d{4}$", "format": "{x0_in_x000} {x_in_x00}"},
"default": "{number}"
},
"year_format": {
"1": {"match": "^\\d\\d?$", "format": "{formatted_decade} {bc}"},
"2": {"match": "^\\d00$", "format": "{formatted_hundreds} {bc}"},
"3": {"match": "^\\d{3}$", "format": "{formatted_hundreds} {formatted_decade} {bc}"},
"4": {"match": "^\\d{2}00$", "format": "{formatted_thousand} {bc}"},
"5": {"match": "^\\d00\\d$", "format": "{formatted_thousand} {formatted_decade} {bc}"},
"6": {"match": "^\\d{2}0\\d$", "format": "{formatted_thousand} {formatted_decade} {bc}"},
"7": {"match": "^\\d{4}$", "format": "{formatted_thousand} {formatted_decade} {bc}"},
"default": "{year} {bc}",
"bc": "b.c."
},
"date_format": {
"date_full": "{weekday}, {month} {day}, {formatted_year}",
"date_full_no_year": "{weekday}, {month} {day}",
"date_full_no_year_month": "{weekday}, {day}",
"today": "dnes",
"tomorrow": "zítra",
"yesterday": "včera"
},
"date_time_format": {
"date_time": "{formatted_date} v {formatted_time}"
},
"weekday": {
"0": "pondělí",
"1": "úterý",
"2": "středa",
"3": "čtvrtek",
"4": "pátek",
"5": "sobota",
"6": "neděle"
},
"date": {
"1": "prvního",
"2": "druhého",
"3": "třetího",
"4": "čtvrtého",
"5": "pátého",
"6": "šestého",
"7": "sedmého",
"8": "osmého",
"9": "devátého",
"10": "desátého",
"11": "jedenáctého",
"12": "dvanáctého",
"13": "třináctého",
"14": "čtrnáctého",
"15": "patnáctého",
"16": "šestnáctého",
"17": "sedmnáctého",
"18": "osmnáctého",
"19": "devatenáctého",
"20": "dvacátého",
"21": "dvacátého-prvního",
"22": "dvacátého-druhého",
"23": "dvacátého-třetího",
"24": "dvacátého-čtvrtého",
"25": "dvacátého-pátého",
"26": "dvacátého-šestého",
"27": "dvacátého-sedmého",
"28": "dvacátého-osmého",
"29": "dvacátého-devátého",
"30": "třicátého",
"31": "třicátého-prvního"
},
"month": {
"1": "leden",
"2": "únor",
"3": "březen",
"4": "duben",
"5": "květen",
"6": "červen",
"7": "červenec",
"8": "srpen",
"9": "září",
"10": "říjen",
"11": "listopad",
"12": "prosinec"
},
"number": {
"0": "nula",
"1": "jedna",
"2": "dva",
"3": "tři",
"4": "čtyři",
"5": "pět",
"6": "šest",
"7": "sedm",
"8": "osm",
"9": "devět",
"10": "deset",
"11": "jedenáct",
"12": "dvanáct",
"13": "třináct",
"14": "čtrnáct",
"15": "patnáct",
"16": "šestnáct",
"17": "sedmnáct",
"18": "osmnáct",
"19": "devatenáct",
"20": "dvacet",
"30": "třicet",
"40": "čtyřicet",
"50": "padesát",
"60": "šedesát",
"70": "sedmdesát",
"80": "osmdesát",
"90": "devadesát"
}
}

View File

@@ -0,0 +1,43 @@
{
"test_nice_year": {
"1": {"datetime_param": "1, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "jedna b.c." },
"2": {"datetime_param": "10, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "deset b.c." },
"3": {"datetime_param": "92, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "devadesát dva b.c." },
"4": {"datetime_param": "803, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "osm sto tři" },
"5": {"datetime_param": "811, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "osm sto jedenáct" },
"6": {"datetime_param": "454, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "čtyři sto padesát čtyři" },
"7": {"datetime_param": "1005, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "jedna tisíc pět" },
"8": {"datetime_param": "1012, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "deset dvanáct" },
"9": {"datetime_param": "1046, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "deset čtyřicet šest" },
"10": {"datetime_param": "1807, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "osmnáct sedm" },
"11": {"datetime_param": "1717, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "sedmnáct sedmnáct" },
"12": {"datetime_param": "1988, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "devatenáct osmdesát osm"},
"13": {"datetime_param": "2009, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dva tisíc devět"},
"14": {"datetime_param": "2018, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dvacet osmnáct"},
"15": {"datetime_param": "2021, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dvacet dvacet jedna"},
"16": {"datetime_param": "2030, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dvacet třicet"},
"17": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "dvacet jedna sto" },
"18": {"datetime_param": "1000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "jedna tisíc" },
"19": {"datetime_param": "2000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dva tisíc" },
"20": {"datetime_param": "3120, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "třicet jedna dvacet b.c." },
"21": {"datetime_param": "3241, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "třicet dva čtyřicet jedna b.c." },
"22": {"datetime_param": "5200, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "padesát dva sto" },
"23": {"datetime_param": "1100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "jedenáct sto" },
"24": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "dvacet jedna sto" }
},
"test_nice_date": {
"1": {"datetime_param": "2017, 1, 31, 0, 2, 3", "now": "None", "assertEqual": "úterý, leden třicátého-prvního, dvacet sedmnáct"},
"2": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2017, 1, 1, 0, 2, 3", "assertEqual": "neděle, únor čtvrtého, dvacet osmnáct"},
"3": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 1, 1, 0, 2, 3", "assertEqual": "neděle, únor čtvrtého"},
"4": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 1, 0, 2, 3", "assertEqual": "neděle, čtvrtého"},
"5": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 3, 0, 2, 3", "assertEqual": "zítra"},
"6": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 4, 0, 2, 3", "assertEqual": "dnes"},
"7": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 5, 0, 2, 3", "assertEqual": "včera"},
"8": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 6, 0, 2, 3", "assertEqual": "neděle, únor čtvrtého"},
"9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "neděle, únor čtvrtého, dvacet osmnáct"}
},
"test_nice_date_time": {
"1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "úterý, leden třicátého-prvního, dvacet sedmnáct v jedna dvacet dva p.m."},
"2": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "True", "use_ampm": "False", "assertEqual": "úterý, leden třicátého-prvního, dvacet sedmnáct v třináct dvacet dva"}
}
}

View File

@@ -0,0 +1 @@
den

View File

@@ -0,0 +1 @@
dní

View File

@@ -0,0 +1 @@
hodina

View File

@@ -0,0 +1 @@
hodiny

View File

@@ -0,0 +1 @@
minuta

View File

@@ -0,0 +1 @@
minuty

View File

@@ -0,0 +1,46 @@
{
"lowercase": false,
"numbers_to_digits": true,
"expand_contractions": true,
"remove_symbols": false,
"remove_accents": false,
"remove_articles": false,
"remove_stopwords": false,
"contractions": {},
"word_replacements": {},
"number_replacements": {
"nula": "0",
"jedna": "1",
"dva": "2",
"dvě": "2",
"tři": "3",
"čtyři": "4",
"pět": "5",
"šest": "6",
"sedm": "7",
"sedum": "7",
"osm": "8",
"osum": "8",
"devět": "9",
"deset": "10",
"jedenáct": "11",
"dvanáct": "12",
"třináct": "13",
"čtrnáct": "14",
"patnáct": "15",
"šestnáct": "16",
"sedmnáct": "17",
"osmnáct": "18",
"devatenáct": "19",
"dvacet": "20",
"třicet": "30",
"čtyřicet": "40",
"padesát": "50",
"šedesát": "60",
"sedmdesát": "70",
"osmdesát": "80",
"devadesát": "90"
},
"stopwords": [],
"articles": []
}

View File

@@ -0,0 +1 @@
nebo

View File

@@ -0,0 +1 @@
sekunda

View File

@@ -0,0 +1 @@
sekundy

View File

@@ -0,0 +1 @@
og

View File

@@ -0,0 +1,132 @@
{
"decade_format": {
"1": {"match": "^1$", "format": "et"},
"2": {"match": "^\\d$", "format": "{x}"},
"3": {"match": "^1\\d$", "format": "{xx}"},
"4": {"match": "^\\d0$", "format": "{x0}"},
"5": {"match": "^[2-9]\\d$", "format": "{x} og {x0}"},
"default": "{number}"
},
"hundreds_format": {
"1": {"match": "^1\\d{2}$", "format": "et hundred"},
"2": {"match": "^\\d{3}$", "format": "{x_in_x00} hundred"},
"default": "{number}"
},
"thousand_format": {
"1": {"match": "^1[1-9]\\d{2}$", "format": "{xx_in_xx00} hundred"},
"2": {"match": "^1\\d{3}$", "format": "et tusind"},
"3": {"match": "^\\d{4}$", "format": "{x_in_x000} tusind"},
"default": "{number}"
},
"year_format": {
"1": {"match": "^\\d\\d?$", "format": "{formatted_decade} {bc}"},
"2": {"match": "^\\d00$", "format": "{formatted_hundreds} {bc}"},
"3": {"match": "^\\d{3}$", "format": "{formatted_hundreds} og {formatted_decade} {bc}"},
"4": {"match": "^(1\\d00)|([2-9]000)$", "format": "{formatted_thousand} {bc}"},
"5": {"match": "^(1\\d{3})|(\\d0\\d{2})$", "format": "{formatted_thousand} og {formatted_decade} {bc}"},
"6": {"match": "^\\d{4}$", "format": "{formatted_thousand} {formatted_hundreds} og {formatted_decade} {bc}"},
"default": "{year} {bc}",
"bc": "f.kr."
},
"date_format": {
"date_full": "{weekday}, den {day} {month}, {formatted_year}",
"date_full_no_year": "{weekday}, den {day} {month}",
"date_full_no_year_month": "{weekday}, den {day}",
"today": "i dag",
"tomorrow": "i morgen",
"yesterday": "i går"
},
"date_time_format": {
"date_time": "{formatted_date} klokken {formatted_time}"
},
"weekday": {
"0": "mandag",
"1": "tirsdag",
"2": "onsdag",
"3": "torsdag",
"4": "fredag",
"5": "lørdag",
"6": "søndag"
},
"date": {
"1": "første",
"2": "anden",
"3": "tredie",
"4": "fjerde",
"5": "femte",
"6": "sjette",
"7": "syvende",
"8": "ottende",
"9": "ninende",
"10": "tiende",
"11": "elvte",
"12": "tolvte",
"13": "trettende",
"14": "fjortende",
"15": "femtende",
"16": "sekstende",
"17": "syttende",
"18": "attende",
"19": "nittende",
"20": "tyvende",
"21": "en og tyvende",
"22": "to og tyvende",
"23": "tre og tyvende",
"24": "fire og tyvende",
"25": "fem og tyvende",
"26": "seks og tyvende",
"27": "syv og tyvende",
"28": "otte og tyvende",
"29": "ni og tyvende",
"30": "tredivte",
"31": "en og tredivte"
},
"month": {
"1": "januar",
"2": "februar",
"3": "marts",
"4": "april",
"5": "maj",
"6": "juni",
"7": "juli",
"8": "august",
"9": "september",
"10": "oktober",
"11": "november",
"12": "december"
},
"number": {
"0": "nul",
"1": "en",
"2": "to",
"3": "tre",
"4": "fire",
"5": "fem",
"6": "seks",
"7": "syv",
"8": "otte",
"9": "ni",
"10": "ti",
"11": "elve",
"12": "tolv",
"13": "tretten",
"14": "fjorten",
"15": "femten",
"16": "seksten",
"17": "sytten",
"18": "atten",
"19": "nitten",
"20": "tyve",
"30": "tredive",
"40": "fyrre",
"50": "halvtreds",
"60": "treds",
"70": "halvfjerds",
"80": "firs",
"90": "halvfems",
"100": "hundrede",
"1000": "tusind",
"2000": "to tusind"
}
}

View File

@@ -0,0 +1,32 @@
{
"test_nice_year": {
"1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "to tusind og sytten"},
"2": {"datetime_param": "1984, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "nitten hundred og fire og firs"},
"3": {"datetime_param": "1906, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "nitten hundred og seks"},
"4": {"datetime_param": "1802, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "atten hundred og to" },
"5": {"datetime_param": "806, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "otte hundred og seks" },
"6": {"datetime_param": "1800, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "atten hundred" },
"7": {"datetime_param": "1, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "et" },
"8": {"datetime_param": "103, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "et hundred og tre" },
"9": {"datetime_param": "1000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "et tusind" },
"10": {"datetime_param": "2000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "to tusind" },
"11": {"datetime_param": "99, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "ni og halvfems f.kr." },
"12": {"datetime_param": "5, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "fem f.kr." },
"13": {"datetime_param": "3120, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "tre tusind et hundred og tyve f.kr." },
"14": {"datetime_param": "3241, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "tre tusind to hundred og en og fyrre f.kr." }
},
"test_nice_date": {
"1": {"datetime_param": "2017, 1, 31, 0, 2, 3", "now": "None", "assertEqual": "tirsdag, den en og tredivte januar, to tusind og sytten"},
"2": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2017, 1, 1, 0, 2, 3", "assertEqual": "søndag, den fjerde februar, to tusind og atten"},
"3": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 1, 1, 0, 2, 3", "assertEqual": "søndag, den fjerde februar"},
"4": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 1, 0, 2, 3", "assertEqual": "søndag, den fjerde"},
"5": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 3, 0, 2, 3", "assertEqual": "i morgen"},
"6": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 4, 0, 2, 3", "assertEqual": "i dag"},
"7": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 5, 0, 2, 3", "assertEqual": "i går"},
"8": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 6, 0, 2, 3", "assertEqual": "søndag, den fjerde februar"},
"9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "søndag, den fjerde februar, to tusind og atten"}
},
"test_nice_date_time": {
"1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "tirsdag, den en og tredivte januar, to tusind og sytten klokken et toogtyve om eftermiddagen"}
}
}

View File

@@ -0,0 +1 @@
dag

View File

@@ -0,0 +1 @@
dage

View File

@@ -0,0 +1 @@
time

View File

@@ -0,0 +1 @@
timer

View File

@@ -0,0 +1 @@
minut

View File

@@ -0,0 +1 @@
minuter

View File

@@ -0,0 +1 @@
eller

View File

@@ -0,0 +1 @@
sekund

View File

@@ -0,0 +1 @@
sekunder

View File

@@ -0,0 +1 @@
und

View File

@@ -0,0 +1,136 @@
{
"decade_format": {
"1": {"match": "^\\d$", "format": "{x}"},
"2": {"match": "^1\\d$", "format": "{xx}"},
"3": {"match": "^\\d0$", "format": "{x0}"},
"4": {"match": "^[2-9]\\d$", "format": "{x} und {x0}"},
"default": "{number}"
},
"hundreds_format": {
"1": {"match": "^1\\d{2}$", "format": "hundert"},
"2": {"match": "^\\d{3}$", "format": "{x_in_x00} hundert"},
"default": "{number}"
},
"thousand_format": {
"1": {"match": "^10\\d\\d$", "format": "tausend"},
"2": {"match": "^\\d0\\d{2}$", "format": "{x_in_x000} tausend"},
"3": {"match": "^1\\d00$", "format": "{xx_in_xx00} hundert"},
"4": {"match": "^\\d{2}00$", "format": "{x_in_x000} tausend {x_in_x00} hundert"},
"5": {"match": "^\\d0\\d\\d$", "format": "{x_in_x000} tausend"},
"6": {"match": "^1\\d{3}$", "format": "{xx_in_xx00}"},
"7": {"match": "^\\d{4}$", "format": "{x_in_x000} tausend {x_in_x00} hundert"},
"default": "{number}"
},
"year_format": {
"1": {"match": "^1$", "format": "eins {bc}"},
"2": {"match": "^\\d{1}?$", "format": "{formatted_decade} {bc}"},
"3": {"match": "^\\d{2}?$", "format": "{formatted_decade} {bc}"},
"4": {"match": "^\\d00$", "format": "{formatted_hundreds} {bc}"},
"5": {"match": "^\\d{3}$", "format": "{formatted_hundreds} {formatted_decade} {bc}"},
"6": {"match": "^\\d{2}00$", "format": "{formatted_thousand} {bc}"},
"7": {"match": "^\\d00\\d$", "format": "{formatted_thousand} {formatted_decade} {bc}"},
"8": {"match": "^\\d{2}0\\d$", "format": "{formatted_thousand} hundert {formatted_decade} {bc}"},
"9": {"match": "^1[2-9]\\d{2}$", "format": "{formatted_thousand} hundert {formatted_decade} {bc}"},
"10": {"match": "^1\\d{3}$", "format": "{formatted_thousand} {formatted_decade} {bc}"},
"11": {"match": "^\\d{4}$", "format": "{formatted_thousand} {formatted_decade} {bc}"},
"default": "{year} {bc}",
"bc": "v.d.Z."
},
"date_format": {
"date_full": "{weekday}, {day} {month}, {formatted_year}",
"date_full_no_year": "{weekday}, {day} {month}",
"date_full_no_year_month": "{weekday}, {day}",
"today": "heute",
"tomorrow": "morgen",
"yesterday": "gestern"
},
"date_time_format": {
"date_time": "{formatted_date} um {formatted_time}"
},
"weekday": {
"0": "Montag",
"1": "Dienstag",
"2": "Mittwoch",
"3": "Donnerstag",
"4": "Freitag",
"5": "Samstag",
"6": "Sonntag"
},
"date": {
"1": "erster",
"2": "zweiter",
"3": "dritter",
"4": "vierter",
"5": "fünfter",
"6": "sechster",
"7": "siebter",
"8": "achter",
"9": "neunter",
"10": "zehnter",
"11": "elfter",
"12": "zwölfter",
"13": "dreizehnter",
"14": "vierzehnter",
"15": "fünfzehnter",
"16": "sechzehnter",
"17": "siebzehnter",
"18": "achtzehnter",
"19": "neunzehnter",
"20": "zwanzigster",
"21": "einundzwanzigster",
"22": "zweiundzwanzigster",
"23": "dreiundzwanzigster",
"24": "vierundzwanzigster",
"25": "fünfundzwanzigster",
"26": "sechsundzwanzigster",
"27": "siebenundzwanzigster",
"28": "achtundzwanzigster",
"29": "neunundzwanzigster",
"30": "dreißigster",
"31": "einunddreißigster"
},
"month": {
"1": "Januar",
"2": "Februar",
"3": "März",
"4": "April",
"5": "Mai",
"6": "Juni",
"7": "Juli",
"8": "August",
"9": "September",
"10": "Oktober",
"11": "November",
"12": "Dezember"
},
"number": {
"0": "null",
"1": "ein",
"2": "zwei",
"3": "drei",
"4": "vier",
"5": "fünf",
"6": "sechs",
"7": "sieben",
"8": "acht",
"9": "neun",
"10": "zehn",
"11": "elf",
"12": "zwölf",
"13": "dreizehn",
"14": "vierzehn",
"15": "fünfzehn",
"16": "sechzehn",
"17": "siebzehn",
"18": "achtzehn",
"19": "neunzehn",
"20": "zwanzig",
"30": "dreißig",
"40": "vierzig",
"50": "fünfzig",
"60": "sechzig",
"70": "siebzig",
"80": "achtzig",
"90": "neunzig"
}
}

View File

@@ -0,0 +1,43 @@
{
"test_nice_year": {
"1": {"datetime_param": "1, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "eins v.d.Z." },
"2": {"datetime_param": "10, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "zehn v.d.Z." },
"3": {"datetime_param": "92, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "zwei und neunzig v.d.Z." },
"4": {"datetime_param": "803, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "acht hundert drei" },
"5": {"datetime_param": "811, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "acht hundert elf" },
"6": {"datetime_param": "454, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "vier hundert vier und fünfzig" },
"7": {"datetime_param": "1005, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "tausend fünf" },
"8": {"datetime_param": "1012, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "tausend zwölf" },
"9": {"datetime_param": "1046, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "tausend sechs und vierzig" },
"10": {"datetime_param": "1807, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "achtzehn hundert sieben" },
"11": {"datetime_param": "1717, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "siebzehn hundert siebzehn" },
"12": {"datetime_param": "1988, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "neunzehn hundert acht und achtzig"},
"13": {"datetime_param": "2009, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "zwei tausend neun"},
"14": {"datetime_param": "2018, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "zwei tausend achtzehn"},
"15": {"datetime_param": "2021, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "zwei tausend ein und zwanzig"},
"16": {"datetime_param": "2030, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "zwei tausend dreißig"},
"17": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "zwei tausend ein hundert" },
"18": {"datetime_param": "1000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "tausend" },
"19": {"datetime_param": "2000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "zwei tausend" },
"20": {"datetime_param": "3120, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "drei tausend ein hundert zwanzig v.d.Z." },
"21": {"datetime_param": "3241, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "drei tausend zwei hundert ein und vierzig v.d.Z." },
"22": {"datetime_param": "5200, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "fünf tausend zwei hundert" },
"23": {"datetime_param": "1100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "elf hundert" },
"24": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "zwei tausend ein hundert" }
},
"test_nice_date": {
"1": {"datetime_param": "2017, 1, 31, 0, 2, 3", "now": "None", "assertEqual": "Dienstag, einunddreißigster Januar, zwei tausend siebzehn"},
"2": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2017, 1, 1, 0, 2, 3", "assertEqual": "Sonntag, vierter Februar, zwei tausend achtzehn"},
"3": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 1, 1, 0, 2, 3", "assertEqual": "Sonntag, vierter Februar"},
"4": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 1, 0, 2, 3", "assertEqual": "Sonntag, vierter"},
"5": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 3, 0, 2, 3", "assertEqual": "morgen"},
"6": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 4, 0, 2, 3", "assertEqual": "heute"},
"7": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 5, 0, 2, 3", "assertEqual": "gestern"},
"8": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 6, 0, 2, 3", "assertEqual": "Sonntag, vierter Februar"},
"9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "Sonntag, vierter Februar, zwei tausend achtzehn"}
},
"test_nice_date_time": {
"1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "Dienstag, einunddreißigster Januar, zwei tausend siebzehn um ein Uhr zweiundzwanzig nachmittags"},
"2": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "True", "use_ampm": "False", "assertEqual": "Dienstag, einunddreißigster Januar, zwei tausend siebzehn um dreizehn Uhr zweiundzwanzig"}
}
}

View File

@@ -0,0 +1 @@
Tag

Some files were not shown because too many files have changed in this diff Show More