Skip to content

E-Mail Addresses

Attribute Value Detail
name email Used in schema definitions
label E-Mail Address plural: E-Mail Addresses
group emails Used in search indexing to query all properties of a given type
matchable Suitable for use in entity matching
pivot Suitable for use as a pivot point for connecting to other entities

Python API

followthemoney.types.EmailType

Bases: PropertyType

Internet mail address (e.g. user@example.com). These are notoriously hard to validate, but we use an irresponsibly simple rule and hope for the best.

Source code in followthemoney/types/email.py
class EmailType(PropertyType):
    """Internet mail address (e.g. user@example.com). These are notoriously hard
    to validate, but we use an irresponsibly simple rule and hope for the best."""

    REGEX_RAW = r"^[^@\s]+@[^@\s]+\.\w+$"
    REGEX = re.compile(REGEX_RAW)
    name = "email"
    group = "emails"
    label = _("E-Mail Address")
    plural = _("E-Mail Addresses")
    matchable = True
    pivot = True

    # def _check_exists(self, domain):
    #     """Actually try to resolve a domain name."""
    #     try:
    #         domain = domain.encode('idna').lower()
    #         socket.getaddrinfo(domain, None)
    #         return True
    #     except:
    #         return False

    def validate(
        self, value: str, fuzzy: bool = False, format: Optional[str] = None
    ) -> bool:
        """Check to see if this is a valid email address."""
        # TODO: adopt email.utils.parseaddr
        email = sanitize_text(value)
        if email is None or not self.REGEX.match(email):
            return False
        _, domain = email.rsplit("@", 1)
        if len(domain) < 4 or "." not in domain:
            return False
        return True

    def clean_text(
        self,
        text: str,
        fuzzy: bool = False,
        format: Optional[str] = None,
        proxy: Optional["EntityProxy"] = None,
    ) -> Optional[str]:
        """Parse and normalize an email address.

        Returns None if this is not an email address.
        """
        email = strip_quotes(text)
        if email is None or not self.REGEX.match(email):
            return None
        mailbox, domain = email.rsplit("@", 1)
        # TODO: https://pypi.python.org/pypi/publicsuffix/
        # handle URLs by extracting the domain name
        domain = urlparse(domain).hostname or domain
        domain = domain.lower()
        domain = domain.rstrip(".")
        # handle unicode
        try:
            domain = domain.encode("idna").decode("ascii")
        except UnicodeError:
            return None
        if domain is not None and mailbox is not None:
            return "@".join((mailbox, domain))
        return None

clean_text(text, fuzzy=False, format=None, proxy=None)

Parse and normalize an email address.

Returns None if this is not an email address.

Source code in followthemoney/types/email.py
def clean_text(
    self,
    text: str,
    fuzzy: bool = False,
    format: Optional[str] = None,
    proxy: Optional["EntityProxy"] = None,
) -> Optional[str]:
    """Parse and normalize an email address.

    Returns None if this is not an email address.
    """
    email = strip_quotes(text)
    if email is None or not self.REGEX.match(email):
        return None
    mailbox, domain = email.rsplit("@", 1)
    # TODO: https://pypi.python.org/pypi/publicsuffix/
    # handle URLs by extracting the domain name
    domain = urlparse(domain).hostname or domain
    domain = domain.lower()
    domain = domain.rstrip(".")
    # handle unicode
    try:
        domain = domain.encode("idna").decode("ascii")
    except UnicodeError:
        return None
    if domain is not None and mailbox is not None:
        return "@".join((mailbox, domain))
    return None

validate(value, fuzzy=False, format=None)

Check to see if this is a valid email address.

Source code in followthemoney/types/email.py
def validate(
    self, value: str, fuzzy: bool = False, format: Optional[str] = None
) -> bool:
    """Check to see if this is a valid email address."""
    # TODO: adopt email.utils.parseaddr
    email = sanitize_text(value)
    if email is None or not self.REGEX.match(email):
        return False
    _, domain = email.rsplit("@", 1)
    if len(domain) < 4 or "." not in domain:
        return False
    return True