Statement data model

`followthemoney.statement.statement.Statement`

Bases: object

A single statement about a property relevant to an entity.

For example, this could be used to say: "In dataset A, entity X has the property name set to 'John Smith'. I first observed this at K, and last saw it at L."

Null property values are not supported. This might need to change if we want to support making property-less entities.

Source code in followthemoney/statement/statement.py

class Statement(object):
    """A single statement about a property relevant to an entity.

    For example, this could be used to say: "In dataset A, entity X has the
    property `name` set to 'John Smith'. I first observed this at K, and last
    saw it at L."

    Null property values are not supported. This might need to change if we
    want to support making property-less entities.
    """

    BASE = BASE_ID

    __slots__ = [
        "id",
        "entity_id",
        "canonical_id",
        "prop",
        "schema",
        "value",
        "dataset",
        "lang",
        "original_value",
        "external",
        "first_seen",
        "last_seen",
        "origin",
    ]

    def __init__(
        self,
        entity_id: str,
        prop: str,
        schema: str,
        value: str,
        dataset: str,
        lang: Optional[str] = None,
        original_value: Optional[str] = None,
        first_seen: Optional[str] = None,
        external: bool = False,
        id: Optional[str] = None,
        canonical_id: Optional[str] = None,
        last_seen: Optional[str] = None,
        origin: Optional[str] = None,
    ):
        self.entity_id = entity_id
        self.canonical_id = canonical_id or entity_id
        self.prop = prop
        self.schema = schema
        self.value = value
        self.dataset = dataset
        self.lang = lang
        self.original_value = original_value
        self.first_seen = first_seen
        self.last_seen = last_seen or first_seen
        self.external = external
        self.origin = origin
        if id is None:
            id = self.generate_key()
        self.id = id

    @property
    def prop_type(self) -> str:
        """The type of the property, e.g. 'string', 'number', 'url'."""
        return get_prop_type(self.schema, self.prop)

    def to_dict(self) -> StatementDict:
        return {
            "canonical_id": self.canonical_id,
            "entity_id": self.entity_id,
            "prop": self.prop,
            "schema": self.schema,
            "value": self.value,
            "dataset": self.dataset,
            "lang": self.lang,
            "original_value": self.original_value,
            "first_seen": self.first_seen,
            "last_seen": self.last_seen,
            "external": self.external,
            "origin": self.origin,
            "id": self.id,
        }

    def to_csv_row(self) -> Dict[str, Optional[str]]:
        data = cast(Dict[str, Optional[str]], self.to_dict())
        data["external"] = bool_text(self.external)
        data["prop_type"] = get_prop_type(self.schema, self.prop)
        return data

    def to_db_row(self) -> Dict[str, Any]:
        data = cast(Dict[str, Any], self.to_dict())
        data["first_seen"] = iso_datetime(self.first_seen)
        data["last_seen"] = iso_datetime(self.last_seen)
        data["prop_type"] = get_prop_type(self.schema, self.prop)
        return data

    def __hash__(self) -> int:
        if self.id is None:
            warnings.warn(
                "Hashing a statement without an ID results in undefined behaviour",
                RuntimeWarning,
            )
        return hash(self.id)

    def __repr__(self) -> str:
        return "<Statement(%r, %r, %r)>" % (self.entity_id, self.prop, self.value)

    def __eq__(self, other: Any) -> bool:
        return not self.id != other.id

    def __lt__(self, other: Any) -> bool:
        self_key = (self.prop != BASE_ID, self.id or "")
        other_key = (other.prop != BASE_ID, other.id or "")
        return self_key < other_key

    def clone(self: Self) -> "Statement":
        """Make a deep copy of the given statement."""
        return Statement.from_dict(self.to_dict())

    def generate_key(self) -> Optional[str]:
        return self.make_key(
            self.dataset,
            self.entity_id,
            self.prop,
            self.value,
            self.external,
        )

    @classmethod
    def make_key(
        cls,
        dataset: str,
        entity_id: str,
        prop: str,
        value: str,
        external: Optional[bool],
    ) -> Optional[str]:
        """Hash the key properties of a statement record to make a unique ID."""
        if prop is None or value is None:
            return None
        key = f"{dataset}.{entity_id}.{prop}.{value}"
        if external:
            # We consider the external flag in key composition to avoid race conditions
            # where a certain entity might be emitted as external while it is already
            # linked in to the graph via another route.
            key = f"{key}.ext"
        return hashlib.sha1(key.encode(HASH_ENCODING)).hexdigest()

    @classmethod
    def from_dict(cls, data: StatementDict) -> "Statement":
        return cls(
            entity_id=data["entity_id"],
            prop=data["prop"],
            schema=data["schema"],
            value=data["value"],
            dataset=data["dataset"],
            lang=data.get("lang", None),
            original_value=data.get("original_value", None),
            first_seen=data.get("first_seen", None),
            external=data.get("external", False),
            id=data.get("id", None),
            canonical_id=data.get("canonical_id", None),
            last_seen=data.get("last_seen", None),
            origin=data.get("origin", None),
        )

    @classmethod
    def from_db_row(cls, row: Row[Any]) -> "Statement":
        return cls(
            id=row.id,
            canonical_id=row.canonical_id,
            entity_id=row.entity_id,
            prop=row.prop,
            schema=row.schema,
            value=row.value,
            dataset=row.dataset,
            lang=row.lang,
            original_value=row.original_value,
            first_seen=datetime_iso(row.first_seen),
            external=row.external,
            last_seen=datetime_iso(row.last_seen),
            origin=row.origin,
        )

    @classmethod
    def from_entity(
        cls,
        entity: "EntityProxy",
        dataset: str,
        first_seen: Optional[str] = None,
        last_seen: Optional[str] = None,
        external: bool = False,
        origin: Optional[str] = None,
    ) -> Generator["Statement", None, None]:
        from followthemoney.statement.entity import StatementEntity

        if entity.id is None:
            raise ValueError("Cannot create statements for entity without ID!")

        # If the entity is already a StatementEntity, we return its statements directly.
        if isinstance(entity, StatementEntity):
            yield from entity.statements
            return

        yield cls(
            entity_id=entity.id,
            prop=BASE_ID,
            schema=entity.schema.name,
            value=entity.id,
            dataset=dataset,
            external=external,
            first_seen=first_seen,
            last_seen=last_seen,
            origin=origin,
        )
        for prop, value in entity.itervalues():
            yield cls(
                entity_id=entity.id,
                prop=prop.name,
                schema=entity.schema.name,
                value=value,
                dataset=dataset,
                external=external,
                first_seen=first_seen,
                last_seen=last_seen,
                origin=origin,
            )

`clone()`

Make a deep copy of the given statement.

Source code in followthemoney/statement/statement.py

def clone(self: Self) -> "Statement":
    """Make a deep copy of the given statement."""
    return Statement.from_dict(self.to_dict())

`make_key(dataset, entity_id, prop, value, external)` `classmethod`

Hash the key properties of a statement record to make a unique ID.

Source code in followthemoney/statement/statement.py

@classmethod
def make_key(
    cls,
    dataset: str,
    entity_id: str,
    prop: str,
    value: str,
    external: Optional[bool],
) -> Optional[str]:
    """Hash the key properties of a statement record to make a unique ID."""
    if prop is None or value is None:
        return None
    key = f"{dataset}.{entity_id}.{prop}.{value}"
    if external:
        # We consider the external flag in key composition to avoid race conditions
        # where a certain entity might be emitted as external while it is already
        # linked in to the graph via another route.
        key = f"{key}.ext"
    return hashlib.sha1(key.encode(HASH_ENCODING)).hexdigest()

`prop_type()`

The type of the property, e.g. 'string', 'number', 'url'.

Source code in followthemoney/statement/statement.py

@property
def prop_type(self) -> str:
    """The type of the property, e.g. 'string', 'number', 'url'."""
    return get_prop_type(self.schema, self.prop)

`followthemoney.statement.entity.StatementEntity`

Bases: EntityProxy

An entity object that can link to a set of datasets that it is sourced from.

Source code in followthemoney/statement/entity.py

class StatementEntity(EntityProxy):
    """An entity object that can link to a set of datasets that it is sourced from."""

    __slots__ = (
        "schema",
        "id",
        "_caption",
        "extra_referents",
        "dataset",
        "last_change",
        "_statements",
    )

    def __init__(self, dataset: Dataset, data: Dict[str, Any], cleaned: bool = True):
        data = dict(data or {})
        schema = Model.instance().get(data.pop("schema", None))
        if schema is None:
            raise InvalidData(gettext("No schema for entity."))
        self.schema = schema

        self._caption: Optional[str] = None
        """A pre-computed label for this entity."""

        self.extra_referents: Set[str] = set(data.pop("referents", []))
        """The IDs of all entities which are included in this canonical entity."""

        self.last_change: Optional[str] = data.get("last_change", None)
        """The last time this entity was changed."""

        self.dataset = dataset
        """The default dataset for new statements."""

        self.id: Optional[str] = data.pop("id", None)
        self._statements: Dict[str, Set[Statement]] = {}

        properties = data.pop("properties", None)
        if isinstance(properties, Mapping):
            for key, value in properties.items():
                self.add(key, value, cleaned=cleaned, quiet=True)

        for stmt_data in data.pop("statements", []):
            stmt = Statement.from_dict(stmt_data)
            if self.id is not None:
                stmt.canonical_id = self.id
            self.add_statement(stmt)

    @property
    def _properties(self) -> Dict[str, List[str]]:  # type: ignore
        return {p: [s.value for s in v] for p, v in self._statements.items()}

    def _iter_stmt(self) -> Generator[Statement, None, None]:
        for stmts in self._statements.values():
            for stmt in stmts:
                if stmt.entity_id is None and self.id is not None:
                    stmt.entity_id = self.id
                    stmt.id = stmt.generate_key()
                if stmt.id is None:
                    stmt.id = stmt.generate_key()
                yield stmt

    @property
    def statements(self) -> Generator[Statement, None, None]:
        """Return all statements for this entity, with extra ID statement."""
        ids: List[str] = []
        last_seen: Set[str] = set()
        first_seen: Set[str] = set()
        for stmt in self._iter_stmt():
            yield stmt
            if stmt.id is not None:
                ids.append(stmt.id)
            if stmt.last_seen is not None:
                last_seen.add(stmt.last_seen)
            if stmt.first_seen is not None:
                first_seen.add(stmt.first_seen)
        if self.id is not None:
            digest = sha1(self.schema.name.encode(HASH_ENCODING))
            for id in sorted(ids):
                digest.update(id.encode(HASH_ENCODING))
            checksum = digest.hexdigest()
            # This is to make the last_change value stable across
            # serialisation:
            first = self.last_change or min(first_seen, default=None)
            yield Statement(
                canonical_id=self.id,
                entity_id=self.id,
                prop=BASE_ID,
                schema=self.schema.name,
                value=checksum,
                dataset=self.dataset.name,
                first_seen=first,
                last_seen=max(last_seen, default=None),
            )

    @property
    def first_seen(self) -> Optional[str]:
        seen = (s.first_seen for s in self._iter_stmt() if s.first_seen is not None)
        return min(seen, default=None)

    @property
    def last_seen(self) -> Optional[str]:
        seen = (s.last_seen for s in self._iter_stmt() if s.last_seen is not None)
        return max(seen, default=None)

    @property
    def datasets(self) -> Set[str]:
        datasets: Set[str] = set()
        for stmt in self._iter_stmt():
            datasets.add(stmt.dataset)
        return datasets

    @property
    def referents(self) -> Set[str]:
        referents: Set[str] = set(self.extra_referents)
        for stmt in self._iter_stmt():
            if stmt.entity_id is not None and stmt.entity_id != self.id:
                referents.add(stmt.entity_id)
        return referents

    @property
    def key_prefix(self) -> Optional[str]:
        return self.dataset.name

    @key_prefix.setter
    def key_prefix(self, dataset: Optional[str]) -> None:
        raise NotImplementedError()

    def add_statement(self, stmt: Statement) -> None:
        schema = self.schema
        if schema.name != stmt.schema and not schema.is_a(stmt.schema):
            try:
                self.schema = schema.model.common_schema(schema, stmt.schema)
            except InvalidData as exc:
                raise InvalidData(f"{self.id}: {exc}") from exc

        if stmt.prop == BASE_ID:
            if stmt.first_seen is not None:
                # The last_change attribute describes the latest checksum change
                # of any emitted component of the entity, which is stored in the BASE
                # field.
                if self.last_change is None:
                    self.last_change = stmt.first_seen
                else:
                    self.last_change = max(self.last_change, stmt.first_seen)
        else:
            self._caption = None
            if stmt.prop not in self._statements:
                self._statements[stmt.prop] = set()
            self._statements[stmt.prop].add(stmt)

    def get(self, prop: P, quiet: bool = False) -> List[str]:
        prop_name = self._prop_name(prop, quiet=quiet)
        if prop_name is None or prop_name not in self._statements:
            return []
        return list({s.value for s in self._statements[prop_name]})

    def get_statements(self, prop: P, quiet: bool = False) -> List[Statement]:
        prop_name = self._prop_name(prop, quiet=quiet)
        if prop_name is None or prop_name not in self._statements:
            return []
        return list(self._statements[prop_name])

    def set(
        self,
        prop: P,
        values: Values,
        cleaned: bool = False,
        quiet: bool = False,
        fuzzy: bool = False,
        format: Optional[str] = None,
        lang: Optional[str] = None,
        original_value: Optional[str] = None,
        origin: Optional[str] = None,
    ) -> None:
        prop_name = self._prop_name(prop, quiet=quiet)
        if prop_name is None:
            return
        self._statements.pop(prop_name, None)
        return self.add(
            prop,
            values,
            cleaned=cleaned,
            quiet=quiet,
            fuzzy=fuzzy,
            format=format,
            lang=lang,
            original_value=original_value,
            origin=origin,
        )

    def add(
        self,
        prop: P,
        values: Values,
        cleaned: bool = False,
        quiet: bool = False,
        fuzzy: bool = False,
        format: Optional[str] = None,
        lang: Optional[str] = None,
        original_value: Optional[str] = None,
        origin: Optional[str] = None,
    ) -> None:
        prop_name = self._prop_name(prop, quiet=quiet)
        if prop_name is None:
            return None
        prop = self.schema.properties[prop_name]
        for value in string_list(values, sanitize=not cleaned):
            self.unsafe_add(
                prop,
                value,
                cleaned=cleaned,
                fuzzy=fuzzy,
                format=format,
                quiet=quiet,
                lang=lang,
                original_value=original_value,
                origin=origin,
            )
        return None

    def unsafe_add(
        self,
        prop: Property,
        value: Optional[str],
        cleaned: bool = False,
        fuzzy: bool = False,
        format: Optional[str] = None,
        quiet: bool = False,
        schema: Optional[str] = None,
        dataset: Optional[str] = None,
        seen: Optional[str] = None,
        lang: Optional[str] = None,
        original_value: Optional[str] = None,
        origin: Optional[str] = None,
    ) -> Optional[str]:
        """Add a statement to the entity, possibly the value."""
        if value is None or len(value) == 0:
            return None

        # Don't allow setting the reverse properties:
        if prop.stub:
            if quiet:
                return None
            msg = gettext("Stub property (%s): %s")
            raise InvalidData(msg % (self.schema, prop))

        if lang is not None:
            lang = registry.language.clean_text(lang)

        clean: Optional[str] = value
        if not cleaned:
            clean = prop.type.clean_text(value, proxy=self, fuzzy=fuzzy, format=format)

        if clean is None:
            return None

        if original_value is None and clean != value:
            original_value = value

        if self.id is None:
            raise InvalidData("Cannot add statement to entity without ID!")
        stmt = Statement(
            entity_id=self.id,
            prop=prop.name,
            schema=schema or self.schema.name,
            value=clean,
            dataset=dataset or self.dataset.name,
            lang=lang,
            original_value=original_value,
            first_seen=seen,
            origin=origin,
        )
        self.add_statement(stmt)
        return clean

    def pop(self, prop: P, quiet: bool = True) -> List[str]:
        prop_name = self._prop_name(prop, quiet=quiet)
        if prop_name is None or prop_name not in self._statements:
            return []
        if prop_name in self.schema.caption:
            self._caption = None
        return list({s.value for s in self._statements.pop(prop_name, [])})

    def remove(self, prop: P, value: str, quiet: bool = True) -> None:
        prop_name = self._prop_name(prop, quiet=quiet)
        if prop_name is not None and prop_name in self._statements:
            stmts = {s for s in self._statements[prop_name] if s.value != value}
            self._statements[prop_name] = stmts
            if prop_name in self.schema.caption:
                self._caption = None

    def itervalues(self) -> Generator[Tuple[Property, str], None, None]:
        for name, statements in self._statements.items():
            prop = self.schema.properties[name]
            for value in set((s.value for s in statements)):
                yield (prop, value)

    def get_type_values(
        self, type_: PropertyType, matchable: bool = False
    ) -> List[str]:
        combined: Set[str] = set()
        for stmt in self.get_type_statements(type_, matchable=matchable):
            combined.add(stmt.value)
        return list(combined)

    def get_type_statements(
        self, type_: PropertyType, matchable: bool = False
    ) -> List[Statement]:
        combined = []
        for prop_name, statements in self._statements.items():
            prop = self.schema.properties[prop_name]
            if matchable and not prop.matchable:
                continue
            if prop.type == type_:
                for statement in statements:
                    combined.append(statement)
        return combined

    @property
    def properties(self) -> Dict[str, List[str]]:
        return {p: list({s.value for s in vs}) for p, vs in self._statements.items()}

    @property
    def caption(self) -> str:
        """The user-facing label to be used for this entity. This checks a list
        of properties defined by the schema (caption) and returns the first
        available value. If no caption is available, return the schema label.

        This implementation prefers statements where the language property is that
        of the preferred system language."""
        if self._caption is None:
            for prop_ in self.schema.caption:
                stmts = self._statements.get(prop_)
                if stmts is None:
                    continue
                prop = self.schema.properties[prop_]
                if prop.type == registry.name and len(stmts) > 1:
                    values = [LangStr(s.value, lang=s.lang) for s in stmts]
                    name = pick_lang_name(values)
                    if name is not None:
                        self._caption = name
                        return self._caption

                for stmt in sorted(stmts):
                    self._caption = stmt.value
                    return self._caption
            if self._caption is None:
                self._caption = self.schema.label
        return self._caption

    def iterprops(self) -> List[Property]:
        return [self.schema.properties[p] for p in self._statements.keys()]

    def clone(self: SE) -> SE:
        data = {"schema": self.schema.name, "id": self.id}
        cloned = type(self)(self.dataset, data)
        for stmt in self._iter_stmt():
            cloned.add_statement(stmt)
        return cloned

    def merge(self: SE, other: EntityProxy) -> SE:
        try:
            self.schema = self.schema.model.common_schema(self.schema, other.schema)
        except InvalidData as e:
            msg = "Cannot merge entities with id %s: %s"
            raise InvalidData(msg % (self.id, e))

        if not isinstance(other, StatementEntity):
            for prop, value in other.itervalues():
                self.unsafe_add(prop, value, cleaned=True, quiet=True)
            return self
        for stmt in other._iter_stmt():
            if self.id is not None:
                stmt.canonical_id = self.id
            self.add_statement(stmt)
        self.extra_referents.update(other.extra_referents)
        return self

    def to_context_dict(self) -> Dict[str, Any]:
        """Return a dictionary representation of the entity for context."""
        data: Dict[str, Any] = {
            "id": self.id,
            "caption": self.caption,
            "schema": self.schema.name,
        }
        referents: Set[Optional[str]] = set(self.extra_referents)
        datasets = set(self.datasets)
        origins: Set[str] = set()
        first_seen = None
        last_seen = None
        for stmts in self._statements.values():
            for stmt in stmts:
                if stmt.first_seen is not None:
                    if first_seen is None or stmt.first_seen < first_seen:
                        first_seen = stmt.first_seen
                if stmt.last_seen is not None:
                    if last_seen is None or stmt.last_seen > last_seen:
                        last_seen = stmt.last_seen
                if stmt.entity_id is not None and stmt.entity_id != self.id:
                    referents.add(stmt.entity_id)
                datasets.add(stmt.dataset)
                if stmt.origin is not None:
                    origins.add(stmt.origin)

        data["referents"] = list(referents)
        data["datasets"] = list(datasets)
        if origins:
            data["origin"] = list(origins)

        if first_seen is not None:
            data["first_seen"] = first_seen
        if last_seen is not None:
            data["last_seen"] = last_seen
        if self.last_change is not None:
            data["last_change"] = self.last_change
        return data

    def to_dict(self) -> Dict[str, Any]:
        data = self.to_context_dict()
        data["properties"] = self.properties
        return data

    def to_statement_dict(self) -> Dict[str, Any]:
        """Return a dictionary representation of the entity's statements."""
        data = self.to_context_dict()
        data["statements"] = [stmt.to_dict() for stmt in self.statements]
        return data

    def _checksum_digest(self) -> "_Hash":
        """Create a SHA1 digest of the entity's ID, schema and properties for
        change detection. This is returned as a hashlib digest object so that
        it can be subclassed."""
        digest = sha1()
        if self.id is not None:
            digest.update(self.id.encode(HASH_ENCODING))
        statement_ids: List[str] = []
        for stmts in self._statements.values():
            for stmt in stmts:
                if stmt.id is not None:
                    statement_ids.append(stmt.id)
        for stmt_id in sorted(statement_ids):
            digest.update(stmt_id.encode(HASH_ENCODING))
            digest.update(b"\x1e")
        return digest

    def __len__(self) -> int:
        return len(list(self._iter_stmt())) + 1

    @classmethod
    def from_dict(
        cls: Type[SE],
        data: Dict[str, Any],
        cleaned: bool = True,
        default_dataset: Optional[Dataset] = None,
    ) -> SE:
        # Exists only for backwards compatibility.
        dataset = default_dataset or DefaultDataset
        return cls(dataset, data, cleaned=cleaned)

    @classmethod
    def from_data(
        cls: Type[SE],
        dataset: Dataset,
        data: Dict[str, Any],
        cleaned: bool = True,
    ) -> SE:
        return cls(dataset, data, cleaned=cleaned)

    @classmethod
    def from_statements(
        cls: Type[SE],
        dataset: Dataset,
        statements: Iterable[Statement],
    ) -> SE:
        model = Model.instance()
        canonical_id: Optional[str] = None
        schemata: Set[str] = set()
        first_seens: Set[str] = set()
        props: Dict[str, Set[Statement]] = {}
        for stmt in statements:
            schemata.add(stmt.schema)
            canonical_id = stmt.canonical_id or canonical_id or stmt.entity_id
            if stmt.prop == BASE_ID:
                if stmt.first_seen is not None:
                    first_seens.add(stmt.first_seen)
            else:
                if stmt.prop not in props:
                    props[stmt.prop] = set()
                props[stmt.prop].add(stmt)

        schema: Optional[Schema] = None
        for name in schemata:
            if schema is None:
                schema = model.get(name)
            elif schema.name != name:
                try:
                    schema = model.common_schema(schema, name)
                except InvalidData as exc:
                    raise InvalidData(f"{canonical_id}: {exc}") from exc

        if schema is None:
            err = "No valid schema for entity: %s %r" % (canonical_id, schemata)
            raise InvalidData(err)

        data = {"schema": schema, "id": canonical_id}
        obj = cls(dataset, data)
        obj.last_change = max(first_seens, default=None)
        obj._statements = {p: s for p, s in props.items()}
        return obj

`dataset = dataset` `instance-attribute`

The default dataset for new statements.

`extra_referents = set(data.pop('referents', []))` `instance-attribute`

The IDs of all entities which are included in this canonical entity.

`last_change = data.get('last_change', None)` `instance-attribute`

The last time this entity was changed.

`caption()`

The user-facing label to be used for this entity. This checks a list of properties defined by the schema (caption) and returns the first available value. If no caption is available, return the schema label.

This implementation prefers statements where the language property is that of the preferred system language.

Source code in followthemoney/statement/entity.py

@property
def caption(self) -> str:
    """The user-facing label to be used for this entity. This checks a list
    of properties defined by the schema (caption) and returns the first
    available value. If no caption is available, return the schema label.

    This implementation prefers statements where the language property is that
    of the preferred system language."""
    if self._caption is None:
        for prop_ in self.schema.caption:
            stmts = self._statements.get(prop_)
            if stmts is None:
                continue
            prop = self.schema.properties[prop_]
            if prop.type == registry.name and len(stmts) > 1:
                values = [LangStr(s.value, lang=s.lang) for s in stmts]
                name = pick_lang_name(values)
                if name is not None:
                    self._caption = name
                    return self._caption

            for stmt in sorted(stmts):
                self._caption = stmt.value
                return self._caption
        if self._caption is None:
            self._caption = self.schema.label
    return self._caption

`statements()`

Return all statements for this entity, with extra ID statement.

Source code in followthemoney/statement/entity.py

@property
def statements(self) -> Generator[Statement, None, None]:
    """Return all statements for this entity, with extra ID statement."""
    ids: List[str] = []
    last_seen: Set[str] = set()
    first_seen: Set[str] = set()
    for stmt in self._iter_stmt():
        yield stmt
        if stmt.id is not None:
            ids.append(stmt.id)
        if stmt.last_seen is not None:
            last_seen.add(stmt.last_seen)
        if stmt.first_seen is not None:
            first_seen.add(stmt.first_seen)
    if self.id is not None:
        digest = sha1(self.schema.name.encode(HASH_ENCODING))
        for id in sorted(ids):
            digest.update(id.encode(HASH_ENCODING))
        checksum = digest.hexdigest()
        # This is to make the last_change value stable across
        # serialisation:
        first = self.last_change or min(first_seen, default=None)
        yield Statement(
            canonical_id=self.id,
            entity_id=self.id,
            prop=BASE_ID,
            schema=self.schema.name,
            value=checksum,
            dataset=self.dataset.name,
            first_seen=first,
            last_seen=max(last_seen, default=None),
        )

`to_context_dict()`

Return a dictionary representation of the entity for context.

Source code in followthemoney/statement/entity.py

def to_context_dict(self) -> Dict[str, Any]:
    """Return a dictionary representation of the entity for context."""
    data: Dict[str, Any] = {
        "id": self.id,
        "caption": self.caption,
        "schema": self.schema.name,
    }
    referents: Set[Optional[str]] = set(self.extra_referents)
    datasets = set(self.datasets)
    origins: Set[str] = set()
    first_seen = None
    last_seen = None
    for stmts in self._statements.values():
        for stmt in stmts:
            if stmt.first_seen is not None:
                if first_seen is None or stmt.first_seen < first_seen:
                    first_seen = stmt.first_seen
            if stmt.last_seen is not None:
                if last_seen is None or stmt.last_seen > last_seen:
                    last_seen = stmt.last_seen
            if stmt.entity_id is not None and stmt.entity_id != self.id:
                referents.add(stmt.entity_id)
            datasets.add(stmt.dataset)
            if stmt.origin is not None:
                origins.add(stmt.origin)

    data["referents"] = list(referents)
    data["datasets"] = list(datasets)
    if origins:
        data["origin"] = list(origins)

    if first_seen is not None:
        data["first_seen"] = first_seen
    if last_seen is not None:
        data["last_seen"] = last_seen
    if self.last_change is not None:
        data["last_change"] = self.last_change
    return data

`to_statement_dict()`

Return a dictionary representation of the entity's statements.

Source code in followthemoney/statement/entity.py

def to_statement_dict(self) -> Dict[str, Any]:
    """Return a dictionary representation of the entity's statements."""
    data = self.to_context_dict()
    data["statements"] = [stmt.to_dict() for stmt in self.statements]
    return data

`unsafe_add(prop, value, cleaned=False, fuzzy=False, format=None, quiet=False, schema=None, dataset=None, seen=None, lang=None, original_value=None, origin=None)`

Add a statement to the entity, possibly the value.

Source code in followthemoney/statement/entity.py

def unsafe_add(
    self,
    prop: Property,
    value: Optional[str],
    cleaned: bool = False,
    fuzzy: bool = False,
    format: Optional[str] = None,
    quiet: bool = False,
    schema: Optional[str] = None,
    dataset: Optional[str] = None,
    seen: Optional[str] = None,
    lang: Optional[str] = None,
    original_value: Optional[str] = None,
    origin: Optional[str] = None,
) -> Optional[str]:
    """Add a statement to the entity, possibly the value."""
    if value is None or len(value) == 0:
        return None

    # Don't allow setting the reverse properties:
    if prop.stub:
        if quiet:
            return None
        msg = gettext("Stub property (%s): %s")
        raise InvalidData(msg % (self.schema, prop))

    if lang is not None:
        lang = registry.language.clean_text(lang)

    clean: Optional[str] = value
    if not cleaned:
        clean = prop.type.clean_text(value, proxy=self, fuzzy=fuzzy, format=format)

    if clean is None:
        return None

    if original_value is None and clean != value:
        original_value = value

    if self.id is None:
        raise InvalidData("Cannot add statement to entity without ID!")
    stmt = Statement(
        entity_id=self.id,
        prop=prop.name,
        schema=schema or self.schema.name,
        value=clean,
        dataset=dataset or self.dataset.name,
        lang=lang,
        original_value=original_value,
        first_seen=seen,
        origin=origin,
    )
    self.add_statement(stmt)
    return clean

Statement data model

followthemoney.statement.statement.Statement

clone()

make_key(dataset, entity_id, prop, value, external) classmethod

prop_type()

followthemoney.statement.entity.StatementEntity

dataset = dataset instance-attribute

extra_referents = set(data.pop('referents', [])) instance-attribute

last_change = data.get('last_change', None) instance-attribute

caption()

statements()

to_context_dict()

to_statement_dict()

unsafe_add(prop, value, cleaned=False, fuzzy=False, format=None, quiet=False, schema=None, dataset=None, seen=None, lang=None, original_value=None, origin=None)

`followthemoney.statement.statement.Statement`

`clone()`

`make_key(dataset, entity_id, prop, value, external)` `classmethod`

`prop_type()`

`followthemoney.statement.entity.StatementEntity`

`dataset = dataset` `instance-attribute`

`extra_referents = set(data.pop('referents', []))` `instance-attribute`

`last_change = data.get('last_change', None)` `instance-attribute`

`caption()`

`statements()`

`to_context_dict()`

`to_statement_dict()`

`unsafe_add(prop, value, cleaned=False, fuzzy=False, format=None, quiet=False, schema=None, dataset=None, seen=None, lang=None, original_value=None, origin=None)`