Skip to content

Addresses

Attribute Value Detail
name address Used in schema definitions
label Address plural: Addresses
group addresses Used in search indexing to query all properties of a given type
matchable Suitable for use in entity matching
pivot Suitable for use as a pivot point for connecting to other entities

Python API

followthemoney.types.AddressType

Bases: PropertyType

A geographic address used to describe a location of a residence or post box. There is no specified order for the sub-parts of an address (e.g. street, city, postal code), and we should consider introducing an Address schema type to retain fidelity in cases where address parts are specified.

Source code in followthemoney/types/address.py
class AddressType(PropertyType):
    """A geographic address used to describe a location of a residence or post
    box. There is no specified order for the sub-parts of an address (e.g. street,
    city, postal code), and we should consider introducing an Address schema type
    to retain fidelity in cases where address parts are specified."""

    LINE_BREAKS = re.compile(r"(\r\n|\n|<BR/>|<BR>|\t|ESQ\.,|ESQ,|;)")
    COMMATA = re.compile(r"(,\s?[,\.])")
    name = "address"
    group = "addresses"
    label = _("Address")
    plural = _("Addresses")
    matchable = True
    pivot = True

    def clean_text(
        self,
        text: str,
        fuzzy: bool = False,
        format: Optional[str] = None,
        proxy: Optional["EntityProxy"] = None,
    ) -> Optional[str]:
        """Basic clean-up."""
        address = self.LINE_BREAKS.sub(", ", text)
        address = self.COMMATA.sub(", ", address)
        collapsed = collapse_spaces(address)
        if collapsed is None or not len(collapsed):
            return None
        return collapsed

    def compare(self, left: str, right: str) -> float:
        left_norm = normalize_address(left)
        right_norm = normalize_address(right)
        if left_norm is None or right_norm is None:
            return 0.0
        return levenshtein_similarity(left_norm, right_norm, max_edits=3)

    def _specificity(self, value: str) -> float:
        return dampen(10, 60, value)

    def node_id(self, value: str) -> Optional[str]:
        slug = slugify(normalize_address(value))
        if slug is None:
            return None
        return f"addr:{value}"

clean_text(text, fuzzy=False, format=None, proxy=None)

Basic clean-up.

Source code in followthemoney/types/address.py
def clean_text(
    self,
    text: str,
    fuzzy: bool = False,
    format: Optional[str] = None,
    proxy: Optional["EntityProxy"] = None,
) -> Optional[str]:
    """Basic clean-up."""
    address = self.LINE_BREAKS.sub(", ", text)
    address = self.COMMATA.sub(", ", address)
    collapsed = collapse_spaces(address)
    if collapsed is None or not len(collapsed):
        return None
    return collapsed