Codice sorgente per mlconjug.PyVerbiste

# -*- coding: utf-8 -*-

"""
PyVerbiste.

| A Python library for conjugating verbs in French, English, Spanish, Italian, Portuguese and Romanian (more soon).
| It contains conjugation data generated by machine learning models using the python library mlconjug.
| More information about mlconjug at https://pypi.org/project/mlconjug/


| The conjugation data conforms to the XML schema defined by Verbiste.
| More information on Verbiste at https://perso.b2b2c.ca/~sarrazip/dev/conjug_manager.html

"""

__author__ = 'SekouD'
__author_email__ = 'sekoud.python@gmail.com'


import copy
import xml.etree.ElementTree as ET
import json
from collections import OrderedDict
import pkg_resources


_RESOURCE_PACKAGE = __name__

_LANGUAGES = ('default', 'fr', 'en', 'es', 'it', 'pt', 'ro')

_VERBS_RESOURCE_PATH = {'fr': '/'.join(('data', 'conjug_manager', 'verbs-fr.json')),
                        'it': '/'.join(('data', 'conjug_manager', 'verbs-it.json')),
                        'es': '/'.join(('data', 'conjug_manager', 'verbs-es.json')),
                        'en': '/'.join(('data', 'conjug_manager', 'verbs-en.json')),
                        'pt': '/'.join(('data', 'conjug_manager', 'verbs-pt.json')),
                        'ro': '/'.join(('data', 'conjug_manager', 'verbs-ro.json')),}

_CONJUGATIONS_RESOURCE_PATH = {'fr': '/'.join(('data', 'conjug_manager', 'conjugation-fr.json')),
                               'it': '/'.join(('data', 'conjug_manager', 'conjugation-it.json')),
                               'es': '/'.join(('data', 'conjug_manager', 'conjugation-es.json')),
                               'en': '/'.join(('data', 'conjug_manager', 'conjugation-en.json')),
                               'pt': '/'.join(('data', 'conjug_manager', 'conjugation-pt.json')),
                               'ro': '/'.join(('data', 'conjug_manager', 'conjugation-ro.json')),}

_ABBREVS = ("1s", "2s", "3s", "1p", "2p", "3p")

_PRONOUNS = {'fr': {'abbrev': _ABBREVS,
                   'pronoun': ("je", "tu", "il (elle, on)", "nous", "vous", "ils (elles)")},
             'it': {'abbrev': _ABBREVS,
                   'pronoun': ('io', 'tu', 'egli/ella', 'noi', 'voi', 'essi/esse')},
             'es': {'abbrev': _ABBREVS,
                   'pronoun': ('yo', 'tú', 'él', 'nosotros', 'vosotros', 'ellos')},
             'en': {'abbrev': _ABBREVS,
                   'pronoun': ('I', 'you', 'he/she/it', 'you', 'we', 'they')},
             'pt': {'abbrev': _ABBREVS,
                   'pronoun': ('eu', 'tu', 'ele', 'nós', 'vós', 'eles')},
             'ro': {'abbrev': _ABBREVS,
                   'pronoun': ('eu', 'tu', 'el/ea', 'noi', 'voi', 'ei/ele')}
             }

_IMPERATIVE_PRONOUNS = {'fr': {'abbrev': ("2s", "1p", "2p"),
                              'pronoun': ("", "", "")},
                        'it': None,
                        'es': {'abbrev': ("2s", "3s", "1p", "2p", "3p"),
                              'pronoun': ('tú', 'él', 'nosotros', 'vosotros', 'ellos')},
                        'en': {'abbrev': ("2s", "1p", "2p"),
                              'pronoun': ("", "let's", "")},
                        'pt': None,
                        'ro': {'abbrev': ("2s", "2p"),
                              'pronoun': ("tu", "voi")},
                        }

_AUXILIARIES = {'fr':None,
                'it': 'non',
                'es': 'no',
                'en':  {'abbrev': _ABBREVS,
                    'pronoun': ('am', 'are', 'is', 'are', 'are', 'are')},
                'pt': 'não',
                'ro': 'nu'}

_GENDER = {'fr': {'abbrev': ("ms", "mp", "fs", "fp"),
                 'pronoun': ("masculin singulier", "masculin pluriel", "feminin singulier", "feminin pluriel")},
           'it': None,
           'es': None,
           'en': None,
           'pt': None,
           'ro': None}

_NEGATION = {'fr': 'ne',
             'it': 'non',
             'es': 'no',
             'en': "don't",
             'pt': 'não',
             'ro': 'nu'}


[documenti]class ConjugManager: """ This is the class handling the mlconjug json files. :param language: string. | The language of the conjugator. The default value is fr for French. | The allowed values are: fr, en, es, it, pt, ro. """ def __init__(self, language='default'): if language not in _LANGUAGES: raise ValueError(_('Unsupported language.\nThe allowed languages are fr, en, es, it, pt, ro.')) if language == 'default': self.language = 'fr' else: self.language = language self.verbs = {} self.conjugations = OrderedDict() verbs_file = pkg_resources.resource_filename(_RESOURCE_PACKAGE, _VERBS_RESOURCE_PATH[self.language]) self._load_verbs(verbs_file) self._allowed_endings = self._detect_allowed_endings() conjugations_file = pkg_resources.resource_filename(_RESOURCE_PACKAGE, _CONJUGATIONS_RESOURCE_PATH[self.language]) self._load_conjugations(conjugations_file) self.templates = sorted(self.conjugations.keys()) return def __repr__(self): return '{0}.{1}(language={2})'.format(__name__, self.__class__.__name__, self.language)
[documenti] def _load_verbs(self, verbs_file): """ Load and parses the verbs from the json file. :param verbs_file: string or path object. Path to the verbs json file. """ with open(verbs_file, 'r', encoding='utf-8') as file: self.verbs = json.load(file) return
[documenti] def _load_conjugations(self, conjugations_file): """ Load and parses the conjugations from the xml file. :param conjugations_file: string or path object. Path to the conjugation xml file. """ with open(conjugations_file, 'r', encoding='utf-8') as file: self.conjugations = json.load(file) return
[documenti] def _detect_allowed_endings(self): """ | Detects the allowed endings for verbs in the supported languages. | All the supported languages except for English restrict the form a verb can take. | As English is much more productive and varied in the morphology of its verbs, any word is allowed as a verb. :return: set. A set containing the allowed endings of verbs in the target language. """ if self.language == 'en': return True results = {verb.split(' ')[0][-2:] for verb in self.verbs if 2 <= len(verb)} return results
[documenti] def is_valid_verb(self, verb): """ | Checks if the verb is a valid verb in the given language. | English words are always treated as possible verbs. | Verbs in other languages are filtered by their endings. :param verb: string. The verb to conjugate. :return: bool. True if the verb is a valid verb in the language. False otherwise. """ if self.language == 'en': return True # LOL! if verb[-2:] in self._allowed_endings: return True else: return False
[documenti] def get_verb_info(self, verb): """ Gets verb information and returns a VerbInfo instance. :param verb: string. Verb to conjugate. :return: VerbInfo object or None. """ if verb not in self.verbs.keys(): return None infinitive = verb root = self.verbs[verb]['root'] template = self.verbs[verb]['template'] verb_info = VerbInfo(infinitive, root, template) return verb_info
[documenti] def get_conjug_info(self, template): """ Gets conjugation information corresponding to the given template. :param template: string. Name of the verb ending pattern. :return: OrderedDict or None. OrderedDict containing the conjugated suffixes of the template. """ if template not in self.conjugations.keys(): return None info = copy.deepcopy(self.conjugations[template]) return info
[documenti]class Verbiste(ConjugManager): """ This is the class handling the Verbiste xml files. :param language: string. | The language of the conjugator. The default value is fr for French. | The allowed values are: fr, en, es, it, pt, ro. """
[documenti] def _load_verbs(self, verbs_file): """ Load and parses the verbs from the xml file. :param verbs_file: string or path object. Path to the verbs xml file. """ self.verbs = self._parse_verbs(verbs_file.replace('json', 'xml')) return
[documenti] def _parse_verbs(self, file): """ Parses the XML file. :param file: FileObject. XML file containing the verbs. :return: OrderedDict. An OrderedDict containing the verb and its template for all verbs in the file. """ verbs_dic = {} xml = ET.parse(file) for verb in xml.findall("v"): verb_name = verb.find("i").text template = verb.find("t").text index = - len(template[template.index(":") + 1:]) root = verb_name[:index] verbs_dic[verb_name] = {"template": template, "root": root} return verbs_dic
[documenti] def _load_conjugations(self, conjugations_file): """ Load and parses the conjugations from the xml file. :param conjugations_file: string or path object. Path to the conjugation xml file. """ self.conjugations = self._parse_conjugations(conjugations_file.replace('json', 'xml')) return
[documenti] def _parse_conjugations(self, file): """ Parses the XML file. :param file: FileObject. XML file containing the conjugation templates. :return: OrderedDict. An OrderedDict containing all the conjugation templates in the file. """ conjugations_dic = {} xml = ET.parse(file) for template in xml.findall("template"): template_name = template.get("name") conjugations_dic[template_name] = OrderedDict() for mood in list(template): conjugations_dic[template_name][mood.tag] = OrderedDict() for tense in list(mood): conjugations_dic[template_name][mood.tag][tense.tag.replace('-', ' ')] = self._load_tense(tense) return conjugations_dic
[documenti] def _load_tense(self, tense): """ Load and parses the inflected forms of the tense from xml file. :param tense: list of xml tags containing inflected forms. The list of inflected forms for the current tense being processed. :return: list. List of inflected forms. """ persons = list(tense) if not persons: return None elif len(persons) == 1: if persons[0].find("i") is None: return None conjug = persons[0].find("i").text else: conjug = [(pers, term.find("i").text if term.find("i") is not None else None) for pers, term in enumerate(persons)] return conjug
[documenti]class VerbInfo: """ This class defines the Verbiste verb information structure. :param infinitive: string. Infinitive form of the verb. :param root: string. Lexical root of the verb. :param template: string. Name of the verb ending pattern. """ __slots__ = ('infinitive', 'root', 'template') def __init__(self, infinitive, root, template): self.infinitive = infinitive if not root: self.root = infinitive else: self.root = root self.template = template return def __repr__(self): return '{0}.{1}({2}, {3}, {4})'.format(__name__, self.__class__.__name__, self.infinitive, self.root, self.template) def __eq__(self, other): if not isinstance(other, VerbInfo): return NotImplemented return self.infinitive == other.infinitive and self.root == other.root and self.template == other.template
[documenti]class Verb: """ This class defines the Verb Object. TODO: Make the conjugated forms iterable by implementing the iterator protocol. :param verb_info: VerbInfo Object. :param conjug_info: OrderedDict. :param subject: string. Toggles abbreviated or full pronouns. The default value is 'abbrev'. Select 'pronoun' for full pronouns. :param predicted: bool. Indicates if the conjugation information was predicted by the model or retrieved from the dataset. """ __slots__ = ('name', 'verb_info', 'conjug_info', 'subject', 'predicted', 'confidence_score') language = 'default' def __init__(self, verb_info, conjug_info, subject='abbrev', predicted=False): self.name = verb_info.infinitive self.verb_info = verb_info self.conjug_info = conjug_info self.subject = subject self.predicted = predicted self.confidence_score = None self._load_conjug() return def __repr__(self): return '{0}.{1}({2})'.format(__name__, self.__class__.__name__, self.name)
[documenti] def iterate(self): """ Iterates over all conjugated forms and returns a list of tuples of those conjugated forms. :return: """ iterate_results = [] for mood, tenses in self.conjug_info.items(): for tense, persons in tenses.items(): if isinstance(persons, str): iterate_results.append((mood, tense, persons)) else: for pers, form in persons.items(): iterate_results.append((mood, tense, pers, form)) return iterate_results
[documenti] def _load_conjug(self): """ | Populates the inflected forms of the verb. | This is the generic version of this method. | It does not add personal pronouns to the conjugated forms. | This method can handle any new language if the conjugation structure conforms to the Verbiste XML Schema. """ for mood, tense in self.conjug_info.items(): for tense_name, persons in tense.items(): if isinstance(persons, list): persons_dict = OrderedDict() for pers, term in persons: if len(persons) == 6: key = _ABBREVS[pers] else: key = '' if term is not None: if self.verb_info.root == self.verb_info.template[1:]: persons_dict[key] = term else: persons_dict[key] = self.verb_info.root + term else: persons[key] = None self.conjug_info[mood][tense_name] = persons_dict elif isinstance(persons, str): self.conjug_info[mood][tense_name] = self.verb_info.root + persons return
[documenti]class VerbFr(Verb): """ This class defines the French Verb Object. """ __slots__ = () language = 'fr'
[documenti] def _load_conjug(self): """ | Populates the inflected forms of the verb. | Adds personal pronouns to the inflected verbs. """ for mood, tense in self.conjug_info.items(): for tense_name, persons in tense.items(): if isinstance(persons, list): persons_dict = OrderedDict() for pers, term in persons: if len(persons) == 6: key = _PRONOUNS[self.language][self.subject][pers] elif tense_name == 'Participe Passé': key = _GENDER[self.language][self.subject][pers] elif tense_name == 'Imperatif Présent': key = _IMPERATIVE_PRONOUNS[self.language][self.subject][pers] else: key = term if term is not None: if self.verb_info.root == self.verb_info.template[1:]: persons_dict[key] = term else: persons_dict[key] = self.verb_info.root + term else: persons_dict[key] = None self.conjug_info[mood][tense_name] = persons_dict elif isinstance(persons, str): self.conjug_info[mood][tense_name] = self.verb_info.root + persons return
[documenti]class VerbEn(Verb): """ This class defines the English Verb Object. """ __slots__ = () language = 'en'
[documenti] def _load_conjug(self): """ | Populates the inflected forms of the verb. | Adds personal pronouns to the inflected verbs. """ for mood, tense in self.conjug_info.items(): for tense_name, persons in tense.items(): if isinstance(persons, list): persons_dict = OrderedDict() for pers, term in persons: if tense_name == 'indicative present continuous': key = " ".join((_PRONOUNS[self.language][self.subject][pers], _AUXILIARIES[self.language][self.subject][pers],)) pass elif len(persons) == 6 and not tense_name == 'indicative present continuous': key = _PRONOUNS[self.language][self.subject][pers] elif tense_name == 'imperative present': key = _IMPERATIVE_PRONOUNS[self.language][self.subject][pers] else: key = term if term is not None: if self.verb_info.root == self.verb_info.template[1:]: persons_dict[key] = term else: persons_dict[key] = self.verb_info.root + term else: persons_dict[key] = None self.conjug_info[mood][tense_name] = persons_dict elif isinstance(persons, str): if tense_name == 'infinitive present': prefix = 'to ' else: prefix = '' self.conjug_info[mood][tense_name] = prefix + self.verb_info.root + persons return
[documenti]class VerbEs(Verb): """ This class defines the Spanish Verb Object. """ __slots__ = () language = 'es'
[documenti] def _load_conjug(self): """ | Populates the inflected forms of the verb. | Adds personal pronouns to the inflected verbs. """ for mood, tense in self.conjug_info.items(): for tense_name, persons in tense.items(): if isinstance(persons, list): persons_dict = OrderedDict() for pers, term in persons: if len(persons) == 6: key = _PRONOUNS[self.language][self.subject][pers] elif tense_name == 'Imperativo Afirmativo': key = _IMPERATIVE_PRONOUNS[self.language][self.subject][pers] elif tense_name == 'Imperativo non': key = ' '.join((_IMPERATIVE_PRONOUNS[self.language][self.subject][pers], _NEGATION[self.language])) else: key = '' if term is not None: if self.verb_info.root == self.verb_info.template[1:]: persons_dict[key] = term else: persons_dict[key] = self.verb_info.root + term else: persons_dict[key] = None self.conjug_info[mood][tense_name] = persons_dict elif isinstance(persons, str): self.conjug_info[mood][tense_name] = self.verb_info.root + persons return
[documenti]class VerbIt(Verb): """ This class defines the Italian Verb Object. """ __slots__ = () language = 'it'
[documenti] def _load_conjug(self): """ | Populates the inflected forms of the verb. | Adds personal pronouns to the inflected verbs. """ for mood, tense in self.conjug_info.items(): for tense_name, persons in tense.items(): if isinstance(persons, list): persons_dict = OrderedDict() for pers, term in persons: if len(persons) == 6 and not tense_name.startswith('Imperativo'): key = _PRONOUNS[self.language][self.subject][pers] elif tense_name == 'Imperativo Imperativo': key = '' elif tense_name == 'Imperativo non': key = _NEGATION[self.language] else: key = '' if term is not None: if self.verb_info.root == self.verb_info.template[1:]: persons_dict[key] = term else: persons_dict[key] = self.verb_info.root + term else: persons_dict[key] = None self.conjug_info[mood][tense_name] = persons_dict elif isinstance(persons, str): self.conjug_info[mood][tense_name] = self.verb_info.root + persons return
[documenti]class VerbPt(Verb): """ This class defines the Portuguese Verb Object. """ __slots__ = () language = 'pt'
[documenti] def _load_conjug(self): """ | Populates the inflected forms of the verb. | Adds personal pronouns to the inflected verbs. """ for mood, tense in self.conjug_info.items(): for tense_name, persons in tense.items(): if isinstance(persons, list): persons_dict = OrderedDict() for pers, term in persons: if len(persons) == 6 and not tense_name.startswith('Imperativo'): key = _PRONOUNS[self.language][self.subject][pers] elif tense_name == 'Imperativo Afirmativo': key = '' elif tense_name == 'Imperativo Negativo': key = _NEGATION[self.language] else: key = '' if term is not None: if self.verb_info.root == self.verb_info.template[1:]: persons_dict[key] = term else: persons_dict[key] = self.verb_info.root + term else: persons_dict[key] = None self.conjug_info[mood][tense_name] = persons_dict elif isinstance(persons, str): self.conjug_info[mood][tense_name] = self.verb_info.root + persons return
[documenti]class VerbRo(Verb): """ This class defines the Romanian Verb Object. """ __slots__ = () language = 'ro'
[documenti] def _load_conjug(self): """ | Populates the inflected forms of the verb. | Adds personal pronouns to the inflected verbs. """ for mood, tense in self.conjug_info.items(): for tense_name, persons in tense.items(): if isinstance(persons, list): persons_dict = OrderedDict() for pers, term in persons: if len(persons) == 6: key = _PRONOUNS[self.language][self.subject][pers] elif tense_name == 'Imperativ Imperativ': key = _IMPERATIVE_PRONOUNS[self.language][self.subject][pers] # key = pers pass elif tense_name == 'Imperativ Negativ': key = _NEGATION[self.language] else: key = '' if term is not None: if self.verb_info.root == self.verb_info.template[1:]: persons_dict[key] = term else: persons_dict[key] = self.verb_info.root + term else: persons_dict[key] = None self.conjug_info[mood][tense_name] = persons_dict elif isinstance(persons, str): if tense_name == 'Infinitiv Infinitiv': prefix = 'a ' else: prefix = '' self.conjug_info[mood][tense_name] = prefix + self.verb_info.root + persons return
if __name__ == "__main__": pass