Skip to content

Languages

A human written or spoken language, defined by an ISO-639 three-letter code. This list is arbitrarily limited for some weird upstream technical reasons, but we'll happily accept pull requests for additional languages once there is a specific need for them to be supported.

Additional values can be introduced in FtM type enumerations within minor releases.

Attribute Value Detail
name language Used in schema definitions
label Language plural: Languages
group languages Used in search indexing to query all properties of a given type
matchable Suitable for use in entity matching
pivot Suitable for use as a pivot point for connecting to other entities

Data reference

Code Label
ara Arabic
nor Norwegian
cnr Montenegrin
eng English
fra French
deu German
rus Russian
spa Spanish
nld Dutch
ron Romanian
kat Georgian
tur Turkish
ltz Luxembourgish
ell Greek
lit Lithuanian
ukr Ukrainian
zho Chinese
bel Belarusian
bul Bulgarian
bos Bosnian
jpn Japanese
ces Czech
lav Latvian
por Portuguese
pol Polish
hye Armenian
hrv Croatian
hin Hindi
heb Hebrew
uzb Uzbek
mon Mongolian
urd Urdu
sqi Albanian
kor Korean
isl Icelandic
ita Italian
est Estonian
fas Persian
swa Swahili
slv Slovenian
slk Slovak
aze Azerbaijani
tgk Tajik
kaz Kazakh
tuk Turkmen
kir Kyrgyz
hun Hungarian
dan Danish
afr Afrikaans
swe Swedish
srp Serbian
ind Indonesian
kan Kannada
mkd Macedonian
mlt Maltese
msa Malay
fin Finnish
cat Catalan
nep Nepali
tgl Tagalog
fil Filipino
mya Burmese
khm Khmer
ben Bangla

followthemoney.types.LanguageType

Bases: EnumType

A human written language. This list is arbitrarily limited for some weird upstream technical reasons, but we'll happily accept pull requests for additional languages once there is a specific need for them to be supported.

Source code in followthemoney/types/language.py
class LanguageType(EnumType):
    """A human written language. This list is arbitrarily limited for some
    weird upstream technical reasons, but we'll happily accept pull requests
    for additional languages once there is a specific need for them to be
    supported."""

    name = const("language")
    group = const("languages")
    label = _("Language")
    plural = _("Languages")
    matchable = False
    max_length = 16

    # Language whitelist
    LANGUAGES = [
        "eng",
        "fra",
        "deu",
        "rus",
        "spa",
        "nld",
        "ron",
        "kat",
        "ara",
        "tur",
        "ltz",
        "ell",
        "lit",
        "ukr",
        "zho",
        "bel",
        "bul",
        "bos",
        "jpn",
        "ces",
        "lav",
        "por",
        "pol",
        "hye",
        "hrv",
        "hin",
        "heb",
        "uzb",
        "mon",
        "urd",
        "sqi",
        "kor",
        "isl",
        "ita",
        "est",
        "nor",
        "fas",
        "swa",
        "slv",
        "slk",
        "aze",
        "tgk",
        "kaz",
        "tuk",
        "kir",
        "hun",
        "dan",
        "afr",
        "swe",
        "srp",
        "ind",
        "kan",
        "mkd",
        "mlt",
        "msa",
        "fin",
        "cat",
        "nep",
        "tgl",
        "fil",
        "mya",
        "khm",
        "cnr",
        "ben",
    ]
    LANGUAGES = get_env_list("FTM_LANGUAGES", LANGUAGES)
    LANGUAGES = [lang.lower().strip() for lang in LANGUAGES]

    def _locale_names(self, locale: Locale) -> EnumValues:
        names = {
            "ara": gettext("Arabic"),
            "nor": gettext("Norwegian"),
            "cnr": gettext("Montenegrin"),
        }
        for lang in self.LANGUAGES:
            if lang not in names:
                names[lang] = lang
        for code, label in locale.languages.items():
            code = iso_639_alpha3(code)
            if code in self.LANGUAGES and names[code] == code:
                names[code] = label
        return names

    def clean_text(
        self,
        text: str,
        fuzzy: bool = False,
        format: Optional[str] = None,
        proxy: Optional["EntityProxy"] = None,
    ) -> Optional[str]:
        code = iso_639_alpha3(text)
        if code not in self.LANGUAGES:
            return None
        return code