Skip to content

Languages

A human written or spoken language, defined by an ISO-639 three-letter code. This list is arbitrarily limited for some weird upstream technical reasons, but we'll happily accept pull requests for additional languages once there is a specific need for them to be supported.

Additional values can be introduced in FtM type enumerations within minor releases.

Attribute Value Detail
name language Used in schema definitions
label Language plural: Languages
group languages Used in search indexing to query all properties of a given type
matchable Suitable for use in entity matching
pivot Suitable for use as a pivot point for connecting to other entities

Data reference

Code Label
ara Arabic
nor Norwegian
cnr Montenegrin
afr Afrikaans
amh Amharic
aze Azerbaijani
bel Belarusian
ben Bangla
bos Bosnian
bul Bulgarian
cat Catalan
ces Czech
dan Danish
deu German
ell Greek
eng English
est Estonian
fas Persian
fil Filipino
fin Finnish
fra French
gle Irish
heb Hebrew
hin Hindi
hrv Croatian
hun Hungarian
hye Armenian
ind Indonesian
isl Icelandic
ita Italian
jpn Japanese
kan Kannada
kat Georgian
kaz Kazakh
khm Khmer
kir Kyrgyz
kor Korean
lao Lao
lav Latvian
lit Lithuanian
ltz Luxembourgish
mkd Macedonian
mlt Maltese
mon Mongolian
msa Malay
mya Burmese
nep Nepali
nld Dutch
pol Polish
por Portuguese
pus Pashto
ron Romanian
rus Russian
sin Sinhala
slk Slovak
slv Slovenian
spa Spanish
sqi Albanian
srp Serbian
swa Swahili
swe Swedish
tam Tamil
tel Telugu
tgk Tajik
tgl Tagalog
tha Thai
tuk Turkmen
tur Turkish
ukr Ukrainian
urd Urdu
uzb Uzbek
vie Vietnamese
zho Chinese

followthemoney.types.LanguageType

Bases: EnumType

A human written language. This list is arbitrarily limited for some weird upstream technical reasons, but we'll happily accept pull requests for additional languages once there is a specific need for them to be supported.

Source code in followthemoney/types/language.py
class LanguageType(EnumType):
    """A human written language. This list is arbitrarily limited for some
    weird upstream technical reasons, but we'll happily accept pull requests
    for additional languages once there is a specific need for them to be
    supported."""

    name = "language"
    group = "languages"
    label = _("Language")
    plural = _("Languages")
    matchable = False
    max_length = 16

    # Language whitelist
    LANGUAGES = [
        "afr",
        "amh",
        "ara",
        "aze",
        "bel",
        "ben",
        "bos",
        "bul",
        "cat",
        "ces",
        "cnr",
        "dan",
        "deu",
        "ell",
        "eng",
        "est",
        "fas",
        "fil",
        "fin",
        "fra",
        "gle",
        "heb",
        "hin",
        "hrv",
        "hun",
        "hye",
        "ind",
        "isl",
        "ita",
        "jpn",
        "kan",
        "kat",
        "kaz",
        "khm",
        "kir",
        "kor",
        "lao",
        "lav",
        "lit",
        "ltz",
        "mkd",
        "mlt",
        "mon",
        "msa",
        "mya",
        "nep",
        "nld",
        "nor",
        "pol",
        "por",
        "pus",
        "ron",
        "rus",
        "sin",
        "slk",
        "slv",
        "spa",
        "sqi",
        "srp",
        "swa",
        "swe",
        "tam",
        "tel",
        "tgk",
        "tgl",
        "tha",
        "tuk",
        "tur",
        "ukr",
        "urd",
        "uzb",
        "vie",
        "zho",
    ]
    LANGUAGES = get_env_list("FTM_LANGUAGES", LANGUAGES)
    LANGUAGES = [lang.lower().strip() for lang in LANGUAGES]

    def _locale_names(self, locale: Locale) -> EnumValues:
        names = {
            "ara": gettext("Arabic"),
            "nor": gettext("Norwegian"),
            "cnr": gettext("Montenegrin"),
        }
        for lang in self.LANGUAGES:
            if lang not in names:
                names[lang] = lang
        for code, label in locale.languages.items():
            code = iso_639_alpha3(code)
            if code in self.LANGUAGES and names[code] == code:
                names[code] = label
        return names

    def clean_text(
        self,
        text: str,
        fuzzy: bool = False,
        format: Optional[str] = None,
        proxy: Optional["EntityProxy"] = None,
    ) -> Optional[str]:
        code = iso_639_alpha3(text)
        if code not in self.LANGUAGES:
            return None
        return code