followthemoney.proxy

  1import logging
  2from typing import (
  3    TYPE_CHECKING,
  4    Any,
  5    Dict,
  6    Generator,
  7    List,
  8    Optional,
  9    Set,
 10    Tuple,
 11    Union,
 12    Type,
 13    TypeVar,
 14    cast,
 15)
 16import warnings
 17from itertools import product
 18from banal import ensure_dict
 19
 20from followthemoney.exc import InvalidData
 21from followthemoney.types import registry
 22from followthemoney.types.common import PropertyType
 23from followthemoney.property import Property
 24from followthemoney.rdf import SKOS, RDF, Literal, URIRef, Identifier
 25from followthemoney.util import sanitize_text, gettext
 26from followthemoney.util import merge_context, value_list, make_entity_id
 27
 28if TYPE_CHECKING:
 29    from followthemoney.model import Model
 30
 31log = logging.getLogger(__name__)
 32P = Union[Property, str]
 33Triple = Tuple[Identifier, Identifier, Identifier]
 34E = TypeVar("E", bound="EntityProxy")
 35
 36
 37class EntityProxy(object):
 38    """A wrapper object for an entity, with utility functions for the
 39    introspection and manipulation of its properties.
 40
 41    This is the main working object in the library, used to generate, validate
 42    and emit data."""
 43
 44    __slots__ = ["schema", "id", "key_prefix", "context", "_properties", "_size"]
 45
 46    def __init__(
 47        self,
 48        model: "Model",
 49        data: Dict[str, Any],
 50        key_prefix: Optional[str] = None,
 51        cleaned: bool = True,
 52    ):
 53        data = dict(data or {})
 54        properties = data.pop("properties", {})
 55        if not cleaned:
 56            properties = ensure_dict(properties)
 57
 58        #: The schema definition for this entity, which implies the properties
 59        #: That can be set on it.
 60        schema = model.get(data.pop("schema", None))
 61        if schema is None:
 62            raise InvalidData(gettext("No schema for entity."))
 63        self.schema = schema
 64
 65        #: When using :meth:`~make_id` to generate a natural key for this entity,
 66        #: the prefix will be added to the ID as a salt to make it easier to keep
 67        #: IDs unique across datasets. This is somewhat redundant following the
 68        #: introduction of :class:`~followthemoney.namespace.Namespace`.
 69        self.key_prefix = key_prefix
 70
 71        #: A unique identifier for this entity, usually a hashed natural key,
 72        #: a UUID, or a very simple slug. Can be signed using a
 73        #: :class:`~followthemoney.namespace.Namespace`.
 74        self.id = data.pop("id", None)
 75        if not cleaned:
 76            self.id = sanitize_text(self.id)
 77
 78        #: If the input dictionary for the entity proxy contains fields other
 79        #: than ``id``, ``schema`` or ``properties``, they will be kept in here
 80        #: and re-added upon serialization.
 81        self.context = data
 82        self._properties: Dict[str, List[str]] = {}
 83        self._size = 0
 84
 85        for key, values in properties.items():
 86            if key not in self.schema.properties:
 87                continue
 88            if cleaned:
 89                # This does not call `self.add` as it might be called millions of times
 90                # in some context and we want to avoid the performance overhead of
 91                # doing so.
 92                seen: Set[str] = set()
 93                seen_add = seen.add
 94                unique_values = [v for v in values if not (v in seen or seen_add(v))]
 95                self._properties[key] = unique_values
 96                self._size += sum([len(v) for v in unique_values])
 97            else:
 98                self.add(key, values, quiet=True)
 99
100    def make_id(self, *parts: Any) -> Optional[str]:
101        """Generate a (hopefully unique) ID for the given entity, composed
102        of the given components, and the :attr:`~key_prefix` defined in
103        the proxy.
104        """
105        self.id = make_entity_id(*parts, key_prefix=self.key_prefix)
106        return self.id
107
108    def _prop_name(self, prop: P, quiet: bool = False) -> Optional[str]:
109        # This is pretty unwound because it gets called a *lot*.
110        if prop in self.schema.properties:
111            return cast(str, prop)
112        try:
113            obj = cast(Property, prop)
114            if obj.name in self.schema.properties:
115                return obj.name
116        except AttributeError:
117            pass
118        if quiet:
119            return None
120        msg = gettext("Unknown property (%s): %s")
121        raise InvalidData(msg % (self.schema, prop))
122
123    def get(self, prop: P, quiet: bool = False) -> List[str]:
124        """Get all values of a property.
125
126        :param prop: can be given as a name or an instance of
127            :class:`~followthemoney.property.Property`.
128        :param quiet: a reference to an non-existent property will return
129            an empty list instead of raising an error.
130        :return: A list of values.
131        """
132        prop_name = self._prop_name(prop, quiet=quiet)
133        if prop_name is None:
134            return []
135        return self._properties.get(prop_name, [])
136
137    def first(self, prop: P, quiet: bool = False) -> Optional[str]:
138        """Get only the first value set for the property.
139
140        :param prop: can be given as a name or an instance of
141            :class:`~followthemoney.property.Property`.
142        :param quiet: a reference to an non-existent property will return
143            an empty list instead of raising an error.
144        :return: A value, or ``None``.
145        """
146        for value in self.get(prop, quiet=quiet):
147            return value
148        return None
149
150    def has(self, prop: P, quiet: bool = False) -> bool:
151        """Check to see if the given property has at least one value set.
152
153        :param prop: can be given as a name or an instance of
154            :class:`~followthemoney.property.Property`.
155        :param quiet: a reference to an non-existent property will return
156            an empty list instead of raising an error.
157        :return: a boolean.
158        """
159        prop_name = self._prop_name(prop, quiet=quiet)
160        return prop_name in self._properties
161
162    def add(
163        self,
164        prop: P,
165        values: Any,
166        cleaned: bool = False,
167        quiet: bool = False,
168        fuzzy: bool = False,
169        format: Optional[str] = None,
170    ) -> None:
171        """Add the given value(s) to the property if they are valid for
172        the type of the property.
173
174        :param prop: can be given as a name or an instance of
175            :class:`~followthemoney.property.Property`.
176        :param values: either a single value, or a list of values to be added.
177        :param cleaned: should the data be normalised before adding it.
178        :param quiet: a reference to an non-existent property will return
179            an empty list instead of raising an error.
180        :param fuzzy: when normalising the data, should fuzzy matching be allowed.
181        :param format: when normalising the data, formatting for a date.
182        """
183        prop_name = self._prop_name(prop, quiet=quiet)
184        if prop_name is None:
185            return None
186        prop = self.schema.properties[prop_name]
187
188        # Don't allow setting the reverse properties:
189        if prop.stub:
190            if quiet:
191                return None
192            msg = gettext("Stub property (%s): %s")
193            raise InvalidData(msg % (self.schema, prop))
194
195        for value in value_list(values):
196            if not cleaned:
197                format = format or prop.format
198                value = prop.type.clean(value, proxy=self, fuzzy=fuzzy, format=format)
199            self.unsafe_add(prop, value, cleaned=True)
200        return None
201
202    def unsafe_add(
203        self,
204        prop: Property,
205        value: Optional[str],
206        cleaned: bool = False,
207        fuzzy: bool = False,
208        format: Optional[str] = None,
209    ) -> Optional[str]:
210        """A version of `add()` to be used only in type-checking code. This accepts
211        only a single value, and performs input cleaning on the premise that the
212        value is already valid unicode. Returns the value that has been added."""
213        if not cleaned and value is not None:
214            format = format or prop.format
215            value = prop.type.clean_text(value, fuzzy=fuzzy, format=format, proxy=self)
216
217        if value is None:
218            return None
219
220        # Somewhat hacky: limit the maximum size of any particular
221        # field to avoid overloading upstream aleph/elasticsearch.
222        value_size = len(value)
223        if prop.type.max_size is not None:
224            if self._size + value_size > prop.type.max_size:
225                # msg = "[%s] too large. Rejecting additional values."
226                # log.warning(msg, prop.name)
227                return None
228        self._size += value_size
229        self._properties.setdefault(prop.name, list())
230
231        if value not in self._properties[prop.name]:
232            self._properties[prop.name].append(value)
233
234        return value
235
236    def set(
237        self,
238        prop: P,
239        values: Any,
240        cleaned: bool = False,
241        quiet: bool = False,
242        fuzzy: bool = False,
243        format: Optional[str] = None,
244    ) -> None:
245        """Replace the values of the property with the given value(s).
246
247        :param prop: can be given as a name or an instance of
248            :class:`~followthemoney.property.Property`.
249        :param values: either a single value, or a list of values to be added.
250        :param cleaned: should the data be normalised before adding it.
251        :param quiet: a reference to an non-existent property will return
252            an empty list instead of raising an error.
253        """
254        prop_name = self._prop_name(prop, quiet=quiet)
255        if prop_name is None:
256            return
257        self._properties.pop(prop_name, None)
258        return self.add(
259            prop, values, cleaned=cleaned, quiet=quiet, fuzzy=fuzzy, format=format
260        )
261
262    def pop(self, prop: P, quiet: bool = True) -> List[str]:
263        """Remove all the values from the given property and return them.
264
265        :param prop: can be given as a name or an instance of
266            :class:`~followthemoney.property.Property`.
267        :param quiet: a reference to an non-existent property will return
268            an empty list instead of raising an error.
269        :return: a list of values, possibly empty.
270        """
271        prop_name = self._prop_name(prop, quiet=quiet)
272        if prop_name is None or prop_name not in self._properties:
273            return []
274        return list(self._properties.pop(prop_name))
275
276    def remove(self, prop: P, value: str, quiet: bool = True) -> None:
277        """Remove a single value from the given property. If it is not there,
278        no action takes place.
279
280        :param prop: can be given as a name or an instance of
281            :class:`~followthemoney.property.Property`.
282        :param value: will not be cleaned before checking.
283        :param quiet: a reference to an non-existent property will return
284            an empty list instead of raising an error.
285        """
286        prop_name = self._prop_name(prop, quiet=quiet)
287        if prop_name is not None and prop_name in self._properties:
288            try:
289                self._properties[prop_name].remove(value)
290            except (KeyError, ValueError):
291                pass
292
293    def iterprops(self) -> List[Property]:
294        """Iterate across all the properties for which a value is set in
295        the proxy (but do not return their values)."""
296        return [self.schema.properties[p] for p in self._properties.keys()]
297
298    def itervalues(self) -> Generator[Tuple[Property, str], None, None]:
299        """Iterate across all values in the proxy one by one, each given as a
300        tuple of the property and the value."""
301        for name, values in self._properties.items():
302            prop = self.schema.properties[name]
303            for value in values:
304                yield (prop, value)
305
306    def edgepairs(self) -> Generator[Tuple[str, str], None, None]:
307        """Return all the possible pairs of values for the edge source and target if
308        the schema allows for an edge representation of the entity."""
309        if self.schema.source_prop is not None and self.schema.target_prop is not None:
310            sources = self.get(self.schema.source_prop)
311            targets = self.get(self.schema.target_prop)
312            for (source, target) in product(sources, targets):
313                yield (source, target)
314
315    def get_type_values(
316        self, type_: PropertyType, matchable: bool = False
317    ) -> List[str]:
318        """All values of a particular type associated with a the entity. For
319        example, this lets you return all countries linked to an entity, rather
320        than manually checking each property to see if it contains countries.
321
322        :param type_: The type object to be searched.
323        :param matchable: Whether to return only property values marked as matchable.
324        """
325        combined = set()
326        for prop_name, values in self._properties.items():
327            prop = self.schema.properties[prop_name]
328            if matchable and not prop.matchable:
329                continue
330            if prop.type == type_:
331                combined.update(values)
332        return list(combined)
333
334    @property
335    def names(self) -> List[str]:
336        """Get the set of all name-type values set of the entity."""
337        return self.get_type_values(registry.name)
338
339    @property
340    def countries(self) -> List[str]:
341        """Get the set of all country-type values set of the entity."""
342        return self.get_type_values(registry.country)
343
344    @property
345    def temporal_start(self) -> Optional[Tuple[Property, str]]:
346        """Get a date that can be used to represent the start of the entity in a
347        timeline. If there are multiple possible dates, the earliest date is
348        returned."""
349        values = []
350
351        for prop in self.schema.temporal_start_props:
352            values += [(prop, value) for value in self.get(prop.name)]
353
354        values.sort(key=lambda tuple: tuple[1])
355        return next(iter(values), None)
356
357    @property
358    def temporal_end(self) -> Optional[Tuple[Property, str]]:
359        """Get a date that can be used to represent the end of the entity in a timeline.
360        If therer are multiple possible dates, the latest date is returned."""
361        values = []
362
363        for prop in self.schema.temporal_end_props:
364            values += [(prop, value) for value in self.get(prop.name)]
365
366        values.sort(reverse=True, key=lambda tuple: tuple[1])
367        return next(iter(values), None)
368
369    def get_type_inverted(self, matchable: bool = False) -> Dict[str, List[str]]:
370        """Return all the values of the entity arranged into a mapping with the
371        group name of their property type. These groups include ``countries``,
372        ``addresses``, ``emails``, etc."""
373        data: Dict[str, List[str]] = {}
374        for group, type_ in registry.groups.items():
375            values = self.get_type_values(type_, matchable=matchable)
376            if len(values):
377                data[group] = values
378        return data
379
380    def triples(self, qualified: bool = True) -> Generator[Triple, None, None]:
381        """Serialise the entity into a set of RDF triple statements. The
382        statements include the property values, an ``RDF#type`` definition
383        that refers to the entity schema, and a ``SKOS#prefLabel`` with the
384        entity caption."""
385        if self.id is None or self.schema is None:
386            return
387        uri = registry.entity.rdf(self.id)
388        yield (uri, RDF.type, self.schema.uri)
389        if qualified:
390            caption = self.caption
391            if caption != self.schema.label:
392                yield (uri, SKOS.prefLabel, Literal(caption))
393        for prop, value in self.itervalues():
394            value = prop.type.rdf(value)
395            if qualified:
396                yield (uri, prop.uri, value)
397            else:
398                yield (uri, URIRef(prop.name), value)
399
400    @property
401    def caption(self) -> str:
402        """The user-facing label to be used for this entity. This checks a list
403        of properties defined by the schema (caption) and returns the first
404        available value. If no caption is available, return the schema label."""
405        for prop in self.schema.caption:
406            for value in self.get(prop):
407                return value
408        return self.schema.label
409
410    @property
411    def country_hints(self) -> Set[str]:
412        """Some property types, such as phone numbers and IBAN codes imply a
413        country that may be associated with the entity. This list can be used
414        for a more generous matching approach than the actual country values."""
415        countries = set(self.countries)
416        if not len(countries):
417            for (prop, value) in self.itervalues():
418                hint = prop.type.country_hint(value)
419                if hint is not None:
420                    countries.add(hint)
421        return countries
422
423    @property
424    def properties(self) -> Dict[str, List[str]]:
425        """Return a mapping of the properties and set values of the entity."""
426        return {p: list(vs) for p, vs in self._properties.items()}
427
428    def to_dict(self) -> Dict[str, Any]:
429        """Serialise the proxy into a dictionary with the defined properties, ID,
430        schema and any contextual values that were handed in initially. The resulting
431        dictionary can be used to make a new proxy, and it is commonly written to disk
432        or a database."""
433        data = dict(self.context)
434        extra = {
435            "id": self.id,
436            "schema": self.schema.name,
437            "properties": self.properties,
438        }
439        data.update(extra)
440        return data
441
442    def to_full_dict(self, matchable: bool = False) -> Dict[str, Any]:
443        """Return a serialised version of the entity with inverted type groups mixed
444        in. See :meth:`~get_type_inverted`."""
445        data = self.to_dict()
446        data.update(self.get_type_inverted(matchable=matchable))
447        return data
448
449    def clone(self: E) -> E:
450        """Make a deep copy of the current entity proxy."""
451        return self.__class__.from_dict(self.schema.model, self.to_dict())
452
453    def merge(self: E, other: E) -> E:
454        """Merge another entity proxy into this one. This will try and find
455        the common schema between both entities and then add all property
456        values from the other entity into this one."""
457        model = self.schema.model
458        self.id = self.id or other.id
459        try:
460            self.schema = model.common_schema(self.schema, other.schema)
461        except InvalidData as e:
462            msg = "Cannot merge entities with id %s: %s"
463            raise InvalidData(msg % (self.id, e))
464
465        self.context = merge_context(self.context, other.context)
466        for prop, values in other._properties.items():
467            self.add(prop, values, cleaned=True, quiet=True)
468        return self
469
470    def __str__(self) -> str:
471        return self.caption
472
473    def __repr__(self) -> str:
474        return "<E(%r,%r)>" % (self.id, str(self))
475
476    def __len__(self) -> int:
477        return self._size
478
479    def __hash__(self) -> int:
480        if not self.id:
481            warnings.warn(
482                "Hashing an EntityProxy without an ID results in undefined behaviour",
483                RuntimeWarning,
484            )
485        return hash(self.id)
486
487    def __eq__(self, other: Any) -> bool:
488        try:
489            if self.id is None or other.id is None:
490                warnings.warn(
491                    "Comparing EntityProxys without IDs results in undefined behaviour",
492                    RuntimeWarning,
493                )
494            return bool(self.id == other.id)
495        except AttributeError:
496            return False
497
498    @classmethod
499    def from_dict(
500        cls: Type[E],
501        model: "Model",
502        data: Dict[str, Any],
503        cleaned: bool = True,
504    ) -> E:
505        """Instantiate a proxy based on the given model and serialised dictionary.
506
507        Use :meth:`followthemoney.model.Model.get_proxy` instead."""
508        return cls(model, data, cleaned=cleaned)
log = <Logger followthemoney.proxy (WARNING)>
P = typing.Union[followthemoney.property.Property, str]
Triple = typing.Tuple[rdflib.term.Identifier, rdflib.term.Identifier, rdflib.term.Identifier]
class EntityProxy:
 38class EntityProxy(object):
 39    """A wrapper object for an entity, with utility functions for the
 40    introspection and manipulation of its properties.
 41
 42    This is the main working object in the library, used to generate, validate
 43    and emit data."""
 44
 45    __slots__ = ["schema", "id", "key_prefix", "context", "_properties", "_size"]
 46
 47    def __init__(
 48        self,
 49        model: "Model",
 50        data: Dict[str, Any],
 51        key_prefix: Optional[str] = None,
 52        cleaned: bool = True,
 53    ):
 54        data = dict(data or {})
 55        properties = data.pop("properties", {})
 56        if not cleaned:
 57            properties = ensure_dict(properties)
 58
 59        #: The schema definition for this entity, which implies the properties
 60        #: That can be set on it.
 61        schema = model.get(data.pop("schema", None))
 62        if schema is None:
 63            raise InvalidData(gettext("No schema for entity."))
 64        self.schema = schema
 65
 66        #: When using :meth:`~make_id` to generate a natural key for this entity,
 67        #: the prefix will be added to the ID as a salt to make it easier to keep
 68        #: IDs unique across datasets. This is somewhat redundant following the
 69        #: introduction of :class:`~followthemoney.namespace.Namespace`.
 70        self.key_prefix = key_prefix
 71
 72        #: A unique identifier for this entity, usually a hashed natural key,
 73        #: a UUID, or a very simple slug. Can be signed using a
 74        #: :class:`~followthemoney.namespace.Namespace`.
 75        self.id = data.pop("id", None)
 76        if not cleaned:
 77            self.id = sanitize_text(self.id)
 78
 79        #: If the input dictionary for the entity proxy contains fields other
 80        #: than ``id``, ``schema`` or ``properties``, they will be kept in here
 81        #: and re-added upon serialization.
 82        self.context = data
 83        self._properties: Dict[str, List[str]] = {}
 84        self._size = 0
 85
 86        for key, values in properties.items():
 87            if key not in self.schema.properties:
 88                continue
 89            if cleaned:
 90                # This does not call `self.add` as it might be called millions of times
 91                # in some context and we want to avoid the performance overhead of
 92                # doing so.
 93                seen: Set[str] = set()
 94                seen_add = seen.add
 95                unique_values = [v for v in values if not (v in seen or seen_add(v))]
 96                self._properties[key] = unique_values
 97                self._size += sum([len(v) for v in unique_values])
 98            else:
 99                self.add(key, values, quiet=True)
100
101    def make_id(self, *parts: Any) -> Optional[str]:
102        """Generate a (hopefully unique) ID for the given entity, composed
103        of the given components, and the :attr:`~key_prefix` defined in
104        the proxy.
105        """
106        self.id = make_entity_id(*parts, key_prefix=self.key_prefix)
107        return self.id
108
109    def _prop_name(self, prop: P, quiet: bool = False) -> Optional[str]:
110        # This is pretty unwound because it gets called a *lot*.
111        if prop in self.schema.properties:
112            return cast(str, prop)
113        try:
114            obj = cast(Property, prop)
115            if obj.name in self.schema.properties:
116                return obj.name
117        except AttributeError:
118            pass
119        if quiet:
120            return None
121        msg = gettext("Unknown property (%s): %s")
122        raise InvalidData(msg % (self.schema, prop))
123
124    def get(self, prop: P, quiet: bool = False) -> List[str]:
125        """Get all values of a property.
126
127        :param prop: can be given as a name or an instance of
128            :class:`~followthemoney.property.Property`.
129        :param quiet: a reference to an non-existent property will return
130            an empty list instead of raising an error.
131        :return: A list of values.
132        """
133        prop_name = self._prop_name(prop, quiet=quiet)
134        if prop_name is None:
135            return []
136        return self._properties.get(prop_name, [])
137
138    def first(self, prop: P, quiet: bool = False) -> Optional[str]:
139        """Get only the first value set for the property.
140
141        :param prop: can be given as a name or an instance of
142            :class:`~followthemoney.property.Property`.
143        :param quiet: a reference to an non-existent property will return
144            an empty list instead of raising an error.
145        :return: A value, or ``None``.
146        """
147        for value in self.get(prop, quiet=quiet):
148            return value
149        return None
150
151    def has(self, prop: P, quiet: bool = False) -> bool:
152        """Check to see if the given property has at least one value set.
153
154        :param prop: can be given as a name or an instance of
155            :class:`~followthemoney.property.Property`.
156        :param quiet: a reference to an non-existent property will return
157            an empty list instead of raising an error.
158        :return: a boolean.
159        """
160        prop_name = self._prop_name(prop, quiet=quiet)
161        return prop_name in self._properties
162
163    def add(
164        self,
165        prop: P,
166        values: Any,
167        cleaned: bool = False,
168        quiet: bool = False,
169        fuzzy: bool = False,
170        format: Optional[str] = None,
171    ) -> None:
172        """Add the given value(s) to the property if they are valid for
173        the type of the property.
174
175        :param prop: can be given as a name or an instance of
176            :class:`~followthemoney.property.Property`.
177        :param values: either a single value, or a list of values to be added.
178        :param cleaned: should the data be normalised before adding it.
179        :param quiet: a reference to an non-existent property will return
180            an empty list instead of raising an error.
181        :param fuzzy: when normalising the data, should fuzzy matching be allowed.
182        :param format: when normalising the data, formatting for a date.
183        """
184        prop_name = self._prop_name(prop, quiet=quiet)
185        if prop_name is None:
186            return None
187        prop = self.schema.properties[prop_name]
188
189        # Don't allow setting the reverse properties:
190        if prop.stub:
191            if quiet:
192                return None
193            msg = gettext("Stub property (%s): %s")
194            raise InvalidData(msg % (self.schema, prop))
195
196        for value in value_list(values):
197            if not cleaned:
198                format = format or prop.format
199                value = prop.type.clean(value, proxy=self, fuzzy=fuzzy, format=format)
200            self.unsafe_add(prop, value, cleaned=True)
201        return None
202
203    def unsafe_add(
204        self,
205        prop: Property,
206        value: Optional[str],
207        cleaned: bool = False,
208        fuzzy: bool = False,
209        format: Optional[str] = None,
210    ) -> Optional[str]:
211        """A version of `add()` to be used only in type-checking code. This accepts
212        only a single value, and performs input cleaning on the premise that the
213        value is already valid unicode. Returns the value that has been added."""
214        if not cleaned and value is not None:
215            format = format or prop.format
216            value = prop.type.clean_text(value, fuzzy=fuzzy, format=format, proxy=self)
217
218        if value is None:
219            return None
220
221        # Somewhat hacky: limit the maximum size of any particular
222        # field to avoid overloading upstream aleph/elasticsearch.
223        value_size = len(value)
224        if prop.type.max_size is not None:
225            if self._size + value_size > prop.type.max_size:
226                # msg = "[%s] too large. Rejecting additional values."
227                # log.warning(msg, prop.name)
228                return None
229        self._size += value_size
230        self._properties.setdefault(prop.name, list())
231
232        if value not in self._properties[prop.name]:
233            self._properties[prop.name].append(value)
234
235        return value
236
237    def set(
238        self,
239        prop: P,
240        values: Any,
241        cleaned: bool = False,
242        quiet: bool = False,
243        fuzzy: bool = False,
244        format: Optional[str] = None,
245    ) -> None:
246        """Replace the values of the property with the given value(s).
247
248        :param prop: can be given as a name or an instance of
249            :class:`~followthemoney.property.Property`.
250        :param values: either a single value, or a list of values to be added.
251        :param cleaned: should the data be normalised before adding it.
252        :param quiet: a reference to an non-existent property will return
253            an empty list instead of raising an error.
254        """
255        prop_name = self._prop_name(prop, quiet=quiet)
256        if prop_name is None:
257            return
258        self._properties.pop(prop_name, None)
259        return self.add(
260            prop, values, cleaned=cleaned, quiet=quiet, fuzzy=fuzzy, format=format
261        )
262
263    def pop(self, prop: P, quiet: bool = True) -> List[str]:
264        """Remove all the values from the given property and return them.
265
266        :param prop: can be given as a name or an instance of
267            :class:`~followthemoney.property.Property`.
268        :param quiet: a reference to an non-existent property will return
269            an empty list instead of raising an error.
270        :return: a list of values, possibly empty.
271        """
272        prop_name = self._prop_name(prop, quiet=quiet)
273        if prop_name is None or prop_name not in self._properties:
274            return []
275        return list(self._properties.pop(prop_name))
276
277    def remove(self, prop: P, value: str, quiet: bool = True) -> None:
278        """Remove a single value from the given property. If it is not there,
279        no action takes place.
280
281        :param prop: can be given as a name or an instance of
282            :class:`~followthemoney.property.Property`.
283        :param value: will not be cleaned before checking.
284        :param quiet: a reference to an non-existent property will return
285            an empty list instead of raising an error.
286        """
287        prop_name = self._prop_name(prop, quiet=quiet)
288        if prop_name is not None and prop_name in self._properties:
289            try:
290                self._properties[prop_name].remove(value)
291            except (KeyError, ValueError):
292                pass
293
294    def iterprops(self) -> List[Property]:
295        """Iterate across all the properties for which a value is set in
296        the proxy (but do not return their values)."""
297        return [self.schema.properties[p] for p in self._properties.keys()]
298
299    def itervalues(self) -> Generator[Tuple[Property, str], None, None]:
300        """Iterate across all values in the proxy one by one, each given as a
301        tuple of the property and the value."""
302        for name, values in self._properties.items():
303            prop = self.schema.properties[name]
304            for value in values:
305                yield (prop, value)
306
307    def edgepairs(self) -> Generator[Tuple[str, str], None, None]:
308        """Return all the possible pairs of values for the edge source and target if
309        the schema allows for an edge representation of the entity."""
310        if self.schema.source_prop is not None and self.schema.target_prop is not None:
311            sources = self.get(self.schema.source_prop)
312            targets = self.get(self.schema.target_prop)
313            for (source, target) in product(sources, targets):
314                yield (source, target)
315
316    def get_type_values(
317        self, type_: PropertyType, matchable: bool = False
318    ) -> List[str]:
319        """All values of a particular type associated with a the entity. For
320        example, this lets you return all countries linked to an entity, rather
321        than manually checking each property to see if it contains countries.
322
323        :param type_: The type object to be searched.
324        :param matchable: Whether to return only property values marked as matchable.
325        """
326        combined = set()
327        for prop_name, values in self._properties.items():
328            prop = self.schema.properties[prop_name]
329            if matchable and not prop.matchable:
330                continue
331            if prop.type == type_:
332                combined.update(values)
333        return list(combined)
334
335    @property
336    def names(self) -> List[str]:
337        """Get the set of all name-type values set of the entity."""
338        return self.get_type_values(registry.name)
339
340    @property
341    def countries(self) -> List[str]:
342        """Get the set of all country-type values set of the entity."""
343        return self.get_type_values(registry.country)
344
345    @property
346    def temporal_start(self) -> Optional[Tuple[Property, str]]:
347        """Get a date that can be used to represent the start of the entity in a
348        timeline. If there are multiple possible dates, the earliest date is
349        returned."""
350        values = []
351
352        for prop in self.schema.temporal_start_props:
353            values += [(prop, value) for value in self.get(prop.name)]
354
355        values.sort(key=lambda tuple: tuple[1])
356        return next(iter(values), None)
357
358    @property
359    def temporal_end(self) -> Optional[Tuple[Property, str]]:
360        """Get a date that can be used to represent the end of the entity in a timeline.
361        If therer are multiple possible dates, the latest date is returned."""
362        values = []
363
364        for prop in self.schema.temporal_end_props:
365            values += [(prop, value) for value in self.get(prop.name)]
366
367        values.sort(reverse=True, key=lambda tuple: tuple[1])
368        return next(iter(values), None)
369
370    def get_type_inverted(self, matchable: bool = False) -> Dict[str, List[str]]:
371        """Return all the values of the entity arranged into a mapping with the
372        group name of their property type. These groups include ``countries``,
373        ``addresses``, ``emails``, etc."""
374        data: Dict[str, List[str]] = {}
375        for group, type_ in registry.groups.items():
376            values = self.get_type_values(type_, matchable=matchable)
377            if len(values):
378                data[group] = values
379        return data
380
381    def triples(self, qualified: bool = True) -> Generator[Triple, None, None]:
382        """Serialise the entity into a set of RDF triple statements. The
383        statements include the property values, an ``RDF#type`` definition
384        that refers to the entity schema, and a ``SKOS#prefLabel`` with the
385        entity caption."""
386        if self.id is None or self.schema is None:
387            return
388        uri = registry.entity.rdf(self.id)
389        yield (uri, RDF.type, self.schema.uri)
390        if qualified:
391            caption = self.caption
392            if caption != self.schema.label:
393                yield (uri, SKOS.prefLabel, Literal(caption))
394        for prop, value in self.itervalues():
395            value = prop.type.rdf(value)
396            if qualified:
397                yield (uri, prop.uri, value)
398            else:
399                yield (uri, URIRef(prop.name), value)
400
401    @property
402    def caption(self) -> str:
403        """The user-facing label to be used for this entity. This checks a list
404        of properties defined by the schema (caption) and returns the first
405        available value. If no caption is available, return the schema label."""
406        for prop in self.schema.caption:
407            for value in self.get(prop):
408                return value
409        return self.schema.label
410
411    @property
412    def country_hints(self) -> Set[str]:
413        """Some property types, such as phone numbers and IBAN codes imply a
414        country that may be associated with the entity. This list can be used
415        for a more generous matching approach than the actual country values."""
416        countries = set(self.countries)
417        if not len(countries):
418            for (prop, value) in self.itervalues():
419                hint = prop.type.country_hint(value)
420                if hint is not None:
421                    countries.add(hint)
422        return countries
423
424    @property
425    def properties(self) -> Dict[str, List[str]]:
426        """Return a mapping of the properties and set values of the entity."""
427        return {p: list(vs) for p, vs in self._properties.items()}
428
429    def to_dict(self) -> Dict[str, Any]:
430        """Serialise the proxy into a dictionary with the defined properties, ID,
431        schema and any contextual values that were handed in initially. The resulting
432        dictionary can be used to make a new proxy, and it is commonly written to disk
433        or a database."""
434        data = dict(self.context)
435        extra = {
436            "id": self.id,
437            "schema": self.schema.name,
438            "properties": self.properties,
439        }
440        data.update(extra)
441        return data
442
443    def to_full_dict(self, matchable: bool = False) -> Dict[str, Any]:
444        """Return a serialised version of the entity with inverted type groups mixed
445        in. See :meth:`~get_type_inverted`."""
446        data = self.to_dict()
447        data.update(self.get_type_inverted(matchable=matchable))
448        return data
449
450    def clone(self: E) -> E:
451        """Make a deep copy of the current entity proxy."""
452        return self.__class__.from_dict(self.schema.model, self.to_dict())
453
454    def merge(self: E, other: E) -> E:
455        """Merge another entity proxy into this one. This will try and find
456        the common schema between both entities and then add all property
457        values from the other entity into this one."""
458        model = self.schema.model
459        self.id = self.id or other.id
460        try:
461            self.schema = model.common_schema(self.schema, other.schema)
462        except InvalidData as e:
463            msg = "Cannot merge entities with id %s: %s"
464            raise InvalidData(msg % (self.id, e))
465
466        self.context = merge_context(self.context, other.context)
467        for prop, values in other._properties.items():
468            self.add(prop, values, cleaned=True, quiet=True)
469        return self
470
471    def __str__(self) -> str:
472        return self.caption
473
474    def __repr__(self) -> str:
475        return "<E(%r,%r)>" % (self.id, str(self))
476
477    def __len__(self) -> int:
478        return self._size
479
480    def __hash__(self) -> int:
481        if not self.id:
482            warnings.warn(
483                "Hashing an EntityProxy without an ID results in undefined behaviour",
484                RuntimeWarning,
485            )
486        return hash(self.id)
487
488    def __eq__(self, other: Any) -> bool:
489        try:
490            if self.id is None or other.id is None:
491                warnings.warn(
492                    "Comparing EntityProxys without IDs results in undefined behaviour",
493                    RuntimeWarning,
494                )
495            return bool(self.id == other.id)
496        except AttributeError:
497            return False
498
499    @classmethod
500    def from_dict(
501        cls: Type[E],
502        model: "Model",
503        data: Dict[str, Any],
504        cleaned: bool = True,
505    ) -> E:
506        """Instantiate a proxy based on the given model and serialised dictionary.
507
508        Use :meth:`followthemoney.model.Model.get_proxy` instead."""
509        return cls(model, data, cleaned=cleaned)

A wrapper object for an entity, with utility functions for the introspection and manipulation of its properties.

This is the main working object in the library, used to generate, validate and emit data.

EntityProxy( model: followthemoney.model.Model, data: Dict[str, Any], key_prefix: Optional[str] = None, cleaned: bool = True)
47    def __init__(
48        self,
49        model: "Model",
50        data: Dict[str, Any],
51        key_prefix: Optional[str] = None,
52        cleaned: bool = True,
53    ):
54        data = dict(data or {})
55        properties = data.pop("properties", {})
56        if not cleaned:
57            properties = ensure_dict(properties)
58
59        #: The schema definition for this entity, which implies the properties
60        #: That can be set on it.
61        schema = model.get(data.pop("schema", None))
62        if schema is None:
63            raise InvalidData(gettext("No schema for entity."))
64        self.schema = schema
65
66        #: When using :meth:`~make_id` to generate a natural key for this entity,
67        #: the prefix will be added to the ID as a salt to make it easier to keep
68        #: IDs unique across datasets. This is somewhat redundant following the
69        #: introduction of :class:`~followthemoney.namespace.Namespace`.
70        self.key_prefix = key_prefix
71
72        #: A unique identifier for this entity, usually a hashed natural key,
73        #: a UUID, or a very simple slug. Can be signed using a
74        #: :class:`~followthemoney.namespace.Namespace`.
75        self.id = data.pop("id", None)
76        if not cleaned:
77            self.id = sanitize_text(self.id)
78
79        #: If the input dictionary for the entity proxy contains fields other
80        #: than ``id``, ``schema`` or ``properties``, they will be kept in here
81        #: and re-added upon serialization.
82        self.context = data
83        self._properties: Dict[str, List[str]] = {}
84        self._size = 0
85
86        for key, values in properties.items():
87            if key not in self.schema.properties:
88                continue
89            if cleaned:
90                # This does not call `self.add` as it might be called millions of times
91                # in some context and we want to avoid the performance overhead of
92                # doing so.
93                seen: Set[str] = set()
94                seen_add = seen.add
95                unique_values = [v for v in values if not (v in seen or seen_add(v))]
96                self._properties[key] = unique_values
97                self._size += sum([len(v) for v in unique_values])
98            else:
99                self.add(key, values, quiet=True)
schema
key_prefix
id
context
def make_id(self, *parts: Any) -> Optional[str]:
101    def make_id(self, *parts: Any) -> Optional[str]:
102        """Generate a (hopefully unique) ID for the given entity, composed
103        of the given components, and the :attr:`~key_prefix` defined in
104        the proxy.
105        """
106        self.id = make_entity_id(*parts, key_prefix=self.key_prefix)
107        return self.id

Generate a (hopefully unique) ID for the given entity, composed of the given components, and the ~key_prefix defined in the proxy.

def get( self, prop: Union[followthemoney.property.Property, str], quiet: bool = False) -> List[str]:
124    def get(self, prop: P, quiet: bool = False) -> List[str]:
125        """Get all values of a property.
126
127        :param prop: can be given as a name or an instance of
128            :class:`~followthemoney.property.Property`.
129        :param quiet: a reference to an non-existent property will return
130            an empty list instead of raising an error.
131        :return: A list of values.
132        """
133        prop_name = self._prop_name(prop, quiet=quiet)
134        if prop_name is None:
135            return []
136        return self._properties.get(prop_name, [])

Get all values of a property.

Parameters
  • prop: can be given as a name or an instance of ~followthemoney.property.Property.
  • quiet: a reference to an non-existent property will return an empty list instead of raising an error.
Returns

A list of values.

def first( self, prop: Union[followthemoney.property.Property, str], quiet: bool = False) -> Optional[str]:
138    def first(self, prop: P, quiet: bool = False) -> Optional[str]:
139        """Get only the first value set for the property.
140
141        :param prop: can be given as a name or an instance of
142            :class:`~followthemoney.property.Property`.
143        :param quiet: a reference to an non-existent property will return
144            an empty list instead of raising an error.
145        :return: A value, or ``None``.
146        """
147        for value in self.get(prop, quiet=quiet):
148            return value
149        return None

Get only the first value set for the property.

Parameters
  • prop: can be given as a name or an instance of ~followthemoney.property.Property.
  • quiet: a reference to an non-existent property will return an empty list instead of raising an error.
Returns

A value, or None.

def has( self, prop: Union[followthemoney.property.Property, str], quiet: bool = False) -> bool:
151    def has(self, prop: P, quiet: bool = False) -> bool:
152        """Check to see if the given property has at least one value set.
153
154        :param prop: can be given as a name or an instance of
155            :class:`~followthemoney.property.Property`.
156        :param quiet: a reference to an non-existent property will return
157            an empty list instead of raising an error.
158        :return: a boolean.
159        """
160        prop_name = self._prop_name(prop, quiet=quiet)
161        return prop_name in self._properties

Check to see if the given property has at least one value set.

Parameters
  • prop: can be given as a name or an instance of ~followthemoney.property.Property.
  • quiet: a reference to an non-existent property will return an empty list instead of raising an error.
Returns

a boolean.

def add( self, prop: Union[followthemoney.property.Property, str], values: Any, cleaned: bool = False, quiet: bool = False, fuzzy: bool = False, format: Optional[str] = None) -> None:
163    def add(
164        self,
165        prop: P,
166        values: Any,
167        cleaned: bool = False,
168        quiet: bool = False,
169        fuzzy: bool = False,
170        format: Optional[str] = None,
171    ) -> None:
172        """Add the given value(s) to the property if they are valid for
173        the type of the property.
174
175        :param prop: can be given as a name or an instance of
176            :class:`~followthemoney.property.Property`.
177        :param values: either a single value, or a list of values to be added.
178        :param cleaned: should the data be normalised before adding it.
179        :param quiet: a reference to an non-existent property will return
180            an empty list instead of raising an error.
181        :param fuzzy: when normalising the data, should fuzzy matching be allowed.
182        :param format: when normalising the data, formatting for a date.
183        """
184        prop_name = self._prop_name(prop, quiet=quiet)
185        if prop_name is None:
186            return None
187        prop = self.schema.properties[prop_name]
188
189        # Don't allow setting the reverse properties:
190        if prop.stub:
191            if quiet:
192                return None
193            msg = gettext("Stub property (%s): %s")
194            raise InvalidData(msg % (self.schema, prop))
195
196        for value in value_list(values):
197            if not cleaned:
198                format = format or prop.format
199                value = prop.type.clean(value, proxy=self, fuzzy=fuzzy, format=format)
200            self.unsafe_add(prop, value, cleaned=True)
201        return None

Add the given value(s) to the property if they are valid for the type of the property.

Parameters
  • prop: can be given as a name or an instance of ~followthemoney.property.Property.
  • values: either a single value, or a list of values to be added.
  • cleaned: should the data be normalised before adding it.
  • quiet: a reference to an non-existent property will return an empty list instead of raising an error.
  • fuzzy: when normalising the data, should fuzzy matching be allowed.
  • format: when normalising the data, formatting for a date.
def unsafe_add( self, prop: followthemoney.property.Property, value: Optional[str], cleaned: bool = False, fuzzy: bool = False, format: Optional[str] = None) -> Optional[str]:
203    def unsafe_add(
204        self,
205        prop: Property,
206        value: Optional[str],
207        cleaned: bool = False,
208        fuzzy: bool = False,
209        format: Optional[str] = None,
210    ) -> Optional[str]:
211        """A version of `add()` to be used only in type-checking code. This accepts
212        only a single value, and performs input cleaning on the premise that the
213        value is already valid unicode. Returns the value that has been added."""
214        if not cleaned and value is not None:
215            format = format or prop.format
216            value = prop.type.clean_text(value, fuzzy=fuzzy, format=format, proxy=self)
217
218        if value is None:
219            return None
220
221        # Somewhat hacky: limit the maximum size of any particular
222        # field to avoid overloading upstream aleph/elasticsearch.
223        value_size = len(value)
224        if prop.type.max_size is not None:
225            if self._size + value_size > prop.type.max_size:
226                # msg = "[%s] too large. Rejecting additional values."
227                # log.warning(msg, prop.name)
228                return None
229        self._size += value_size
230        self._properties.setdefault(prop.name, list())
231
232        if value not in self._properties[prop.name]:
233            self._properties[prop.name].append(value)
234
235        return value

A version of add() to be used only in type-checking code. This accepts only a single value, and performs input cleaning on the premise that the value is already valid unicode. Returns the value that has been added.

def set( self, prop: Union[followthemoney.property.Property, str], values: Any, cleaned: bool = False, quiet: bool = False, fuzzy: bool = False, format: Optional[str] = None) -> None:
237    def set(
238        self,
239        prop: P,
240        values: Any,
241        cleaned: bool = False,
242        quiet: bool = False,
243        fuzzy: bool = False,
244        format: Optional[str] = None,
245    ) -> None:
246        """Replace the values of the property with the given value(s).
247
248        :param prop: can be given as a name or an instance of
249            :class:`~followthemoney.property.Property`.
250        :param values: either a single value, or a list of values to be added.
251        :param cleaned: should the data be normalised before adding it.
252        :param quiet: a reference to an non-existent property will return
253            an empty list instead of raising an error.
254        """
255        prop_name = self._prop_name(prop, quiet=quiet)
256        if prop_name is None:
257            return
258        self._properties.pop(prop_name, None)
259        return self.add(
260            prop, values, cleaned=cleaned, quiet=quiet, fuzzy=fuzzy, format=format
261        )

Replace the values of the property with the given value(s).

Parameters
  • prop: can be given as a name or an instance of ~followthemoney.property.Property.
  • values: either a single value, or a list of values to be added.
  • cleaned: should the data be normalised before adding it.
  • quiet: a reference to an non-existent property will return an empty list instead of raising an error.
def pop( self, prop: Union[followthemoney.property.Property, str], quiet: bool = True) -> List[str]:
263    def pop(self, prop: P, quiet: bool = True) -> List[str]:
264        """Remove all the values from the given property and return them.
265
266        :param prop: can be given as a name or an instance of
267            :class:`~followthemoney.property.Property`.
268        :param quiet: a reference to an non-existent property will return
269            an empty list instead of raising an error.
270        :return: a list of values, possibly empty.
271        """
272        prop_name = self._prop_name(prop, quiet=quiet)
273        if prop_name is None or prop_name not in self._properties:
274            return []
275        return list(self._properties.pop(prop_name))

Remove all the values from the given property and return them.

Parameters
  • prop: can be given as a name or an instance of ~followthemoney.property.Property.
  • quiet: a reference to an non-existent property will return an empty list instead of raising an error.
Returns

a list of values, possibly empty.

def remove( self, prop: Union[followthemoney.property.Property, str], value: str, quiet: bool = True) -> None:
277    def remove(self, prop: P, value: str, quiet: bool = True) -> None:
278        """Remove a single value from the given property. If it is not there,
279        no action takes place.
280
281        :param prop: can be given as a name or an instance of
282            :class:`~followthemoney.property.Property`.
283        :param value: will not be cleaned before checking.
284        :param quiet: a reference to an non-existent property will return
285            an empty list instead of raising an error.
286        """
287        prop_name = self._prop_name(prop, quiet=quiet)
288        if prop_name is not None and prop_name in self._properties:
289            try:
290                self._properties[prop_name].remove(value)
291            except (KeyError, ValueError):
292                pass

Remove a single value from the given property. If it is not there, no action takes place.

Parameters
  • prop: can be given as a name or an instance of ~followthemoney.property.Property.
  • value: will not be cleaned before checking.
  • quiet: a reference to an non-existent property will return an empty list instead of raising an error.
def iterprops(self) -> List[followthemoney.property.Property]:
294    def iterprops(self) -> List[Property]:
295        """Iterate across all the properties for which a value is set in
296        the proxy (but do not return their values)."""
297        return [self.schema.properties[p] for p in self._properties.keys()]

Iterate across all the properties for which a value is set in the proxy (but do not return their values).

def itervalues( self) -> Generator[Tuple[followthemoney.property.Property, str], NoneType, NoneType]:
299    def itervalues(self) -> Generator[Tuple[Property, str], None, None]:
300        """Iterate across all values in the proxy one by one, each given as a
301        tuple of the property and the value."""
302        for name, values in self._properties.items():
303            prop = self.schema.properties[name]
304            for value in values:
305                yield (prop, value)

Iterate across all values in the proxy one by one, each given as a tuple of the property and the value.

def edgepairs(self) -> Generator[Tuple[str, str], NoneType, NoneType]:
307    def edgepairs(self) -> Generator[Tuple[str, str], None, None]:
308        """Return all the possible pairs of values for the edge source and target if
309        the schema allows for an edge representation of the entity."""
310        if self.schema.source_prop is not None and self.schema.target_prop is not None:
311            sources = self.get(self.schema.source_prop)
312            targets = self.get(self.schema.target_prop)
313            for (source, target) in product(sources, targets):
314                yield (source, target)

Return all the possible pairs of values for the edge source and target if the schema allows for an edge representation of the entity.

def get_type_values( self, type_: followthemoney.types.common.PropertyType, matchable: bool = False) -> List[str]:
316    def get_type_values(
317        self, type_: PropertyType, matchable: bool = False
318    ) -> List[str]:
319        """All values of a particular type associated with a the entity. For
320        example, this lets you return all countries linked to an entity, rather
321        than manually checking each property to see if it contains countries.
322
323        :param type_: The type object to be searched.
324        :param matchable: Whether to return only property values marked as matchable.
325        """
326        combined = set()
327        for prop_name, values in self._properties.items():
328            prop = self.schema.properties[prop_name]
329            if matchable and not prop.matchable:
330                continue
331            if prop.type == type_:
332                combined.update(values)
333        return list(combined)

All values of a particular type associated with a the entity. For example, this lets you return all countries linked to an entity, rather than manually checking each property to see if it contains countries.

Parameters
  • type_: The type object to be searched.
  • matchable: Whether to return only property values marked as matchable.
names: List[str]
335    @property
336    def names(self) -> List[str]:
337        """Get the set of all name-type values set of the entity."""
338        return self.get_type_values(registry.name)

Get the set of all name-type values set of the entity.

countries: List[str]
340    @property
341    def countries(self) -> List[str]:
342        """Get the set of all country-type values set of the entity."""
343        return self.get_type_values(registry.country)

Get the set of all country-type values set of the entity.

temporal_start: Optional[Tuple[followthemoney.property.Property, str]]
345    @property
346    def temporal_start(self) -> Optional[Tuple[Property, str]]:
347        """Get a date that can be used to represent the start of the entity in a
348        timeline. If there are multiple possible dates, the earliest date is
349        returned."""
350        values = []
351
352        for prop in self.schema.temporal_start_props:
353            values += [(prop, value) for value in self.get(prop.name)]
354
355        values.sort(key=lambda tuple: tuple[1])
356        return next(iter(values), None)

Get a date that can be used to represent the start of the entity in a timeline. If there are multiple possible dates, the earliest date is returned.

temporal_end: Optional[Tuple[followthemoney.property.Property, str]]
358    @property
359    def temporal_end(self) -> Optional[Tuple[Property, str]]:
360        """Get a date that can be used to represent the end of the entity in a timeline.
361        If therer are multiple possible dates, the latest date is returned."""
362        values = []
363
364        for prop in self.schema.temporal_end_props:
365            values += [(prop, value) for value in self.get(prop.name)]
366
367        values.sort(reverse=True, key=lambda tuple: tuple[1])
368        return next(iter(values), None)

Get a date that can be used to represent the end of the entity in a timeline. If therer are multiple possible dates, the latest date is returned.

def get_type_inverted(self, matchable: bool = False) -> Dict[str, List[str]]:
370    def get_type_inverted(self, matchable: bool = False) -> Dict[str, List[str]]:
371        """Return all the values of the entity arranged into a mapping with the
372        group name of their property type. These groups include ``countries``,
373        ``addresses``, ``emails``, etc."""
374        data: Dict[str, List[str]] = {}
375        for group, type_ in registry.groups.items():
376            values = self.get_type_values(type_, matchable=matchable)
377            if len(values):
378                data[group] = values
379        return data

Return all the values of the entity arranged into a mapping with the group name of their property type. These groups include countries, addresses, emails, etc.

def triples( self, qualified: bool = True) -> Generator[Tuple[rdflib.term.Identifier, rdflib.term.Identifier, rdflib.term.Identifier], NoneType, NoneType]:
381    def triples(self, qualified: bool = True) -> Generator[Triple, None, None]:
382        """Serialise the entity into a set of RDF triple statements. The
383        statements include the property values, an ``RDF#type`` definition
384        that refers to the entity schema, and a ``SKOS#prefLabel`` with the
385        entity caption."""
386        if self.id is None or self.schema is None:
387            return
388        uri = registry.entity.rdf(self.id)
389        yield (uri, RDF.type, self.schema.uri)
390        if qualified:
391            caption = self.caption
392            if caption != self.schema.label:
393                yield (uri, SKOS.prefLabel, Literal(caption))
394        for prop, value in self.itervalues():
395            value = prop.type.rdf(value)
396            if qualified:
397                yield (uri, prop.uri, value)
398            else:
399                yield (uri, URIRef(prop.name), value)

Serialise the entity into a set of RDF triple statements. The statements include the property values, an RDF#type definition that refers to the entity schema, and a SKOS#prefLabel with the entity caption.

caption: str
401    @property
402    def caption(self) -> str:
403        """The user-facing label to be used for this entity. This checks a list
404        of properties defined by the schema (caption) and returns the first
405        available value. If no caption is available, return the schema label."""
406        for prop in self.schema.caption:
407            for value in self.get(prop):
408                return value
409        return self.schema.label

The user-facing label to be used for this entity. This checks a list of properties defined by the schema (caption) and returns the first available value. If no caption is available, return the schema label.

country_hints: Set[str]
411    @property
412    def country_hints(self) -> Set[str]:
413        """Some property types, such as phone numbers and IBAN codes imply a
414        country that may be associated with the entity. This list can be used
415        for a more generous matching approach than the actual country values."""
416        countries = set(self.countries)
417        if not len(countries):
418            for (prop, value) in self.itervalues():
419                hint = prop.type.country_hint(value)
420                if hint is not None:
421                    countries.add(hint)
422        return countries

Some property types, such as phone numbers and IBAN codes imply a country that may be associated with the entity. This list can be used for a more generous matching approach than the actual country values.

properties: Dict[str, List[str]]
424    @property
425    def properties(self) -> Dict[str, List[str]]:
426        """Return a mapping of the properties and set values of the entity."""
427        return {p: list(vs) for p, vs in self._properties.items()}

Return a mapping of the properties and set values of the entity.

def to_dict(self) -> Dict[str, Any]:
429    def to_dict(self) -> Dict[str, Any]:
430        """Serialise the proxy into a dictionary with the defined properties, ID,
431        schema and any contextual values that were handed in initially. The resulting
432        dictionary can be used to make a new proxy, and it is commonly written to disk
433        or a database."""
434        data = dict(self.context)
435        extra = {
436            "id": self.id,
437            "schema": self.schema.name,
438            "properties": self.properties,
439        }
440        data.update(extra)
441        return data

Serialise the proxy into a dictionary with the defined properties, ID, schema and any contextual values that were handed in initially. The resulting dictionary can be used to make a new proxy, and it is commonly written to disk or a database.

def to_full_dict(self, matchable: bool = False) -> Dict[str, Any]:
443    def to_full_dict(self, matchable: bool = False) -> Dict[str, Any]:
444        """Return a serialised version of the entity with inverted type groups mixed
445        in. See :meth:`~get_type_inverted`."""
446        data = self.to_dict()
447        data.update(self.get_type_inverted(matchable=matchable))
448        return data

Return a serialised version of the entity with inverted type groups mixed in. See ~get_type_inverted().

def clone(self: ~E) -> ~E:
450    def clone(self: E) -> E:
451        """Make a deep copy of the current entity proxy."""
452        return self.__class__.from_dict(self.schema.model, self.to_dict())

Make a deep copy of the current entity proxy.

def merge(self: ~E, other: ~E) -> ~E:
454    def merge(self: E, other: E) -> E:
455        """Merge another entity proxy into this one. This will try and find
456        the common schema between both entities and then add all property
457        values from the other entity into this one."""
458        model = self.schema.model
459        self.id = self.id or other.id
460        try:
461            self.schema = model.common_schema(self.schema, other.schema)
462        except InvalidData as e:
463            msg = "Cannot merge entities with id %s: %s"
464            raise InvalidData(msg % (self.id, e))
465
466        self.context = merge_context(self.context, other.context)
467        for prop, values in other._properties.items():
468            self.add(prop, values, cleaned=True, quiet=True)
469        return self

Merge another entity proxy into this one. This will try and find the common schema between both entities and then add all property values from the other entity into this one.

@classmethod
def from_dict( cls: Type[~E], model: followthemoney.model.Model, data: Dict[str, Any], cleaned: bool = True) -> ~E:
499    @classmethod
500    def from_dict(
501        cls: Type[E],
502        model: "Model",
503        data: Dict[str, Any],
504        cleaned: bool = True,
505    ) -> E:
506        """Instantiate a proxy based on the given model and serialised dictionary.
507
508        Use :meth:`followthemoney.model.Model.get_proxy` instead."""
509        return cls(model, data, cleaned=cleaned)

Instantiate a proxy based on the given model and serialised dictionary.

Use followthemoney.model.Model.get_proxy() instead.