followthemoney.proxy

  1import logging
  2from typing import (
  3    TYPE_CHECKING,
  4    Any,
  5    Dict,
  6    Generator,
  7    List,
  8    Optional,
  9    Set,
 10    Tuple,
 11    Union,
 12    Type,
 13    TypeVar,
 14    cast,
 15)
 16import warnings
 17from itertools import product
 18from banal import ensure_dict
 19
 20from followthemoney.exc import InvalidData
 21from followthemoney.types import registry
 22from followthemoney.types.common import PropertyType
 23from followthemoney.property import Property
 24from followthemoney.rdf import SKOS, RDF, Literal, URIRef, Identifier
 25from followthemoney.util import sanitize_text, gettext
 26from followthemoney.util import merge_context, value_list, make_entity_id
 27
 28if TYPE_CHECKING:
 29    from followthemoney.model import Model
 30
 31log = logging.getLogger(__name__)
 32P = Union[Property, str]
 33Triple = Tuple[Identifier, Identifier, Identifier]
 34E = TypeVar("E", bound="EntityProxy")
 35
 36
 37class EntityProxy(object):
 38    """A wrapper object for an entity, with utility functions for the
 39    introspection and manipulation of its properties.
 40
 41    This is the main working object in the library, used to generate, validate
 42    and emit data."""
 43
 44    __slots__ = ["schema", "id", "key_prefix", "context", "_properties", "_size"]
 45
 46    def __init__(
 47        self,
 48        model: "Model",
 49        data: Dict[str, Any],
 50        key_prefix: Optional[str] = None,
 51        cleaned: bool = True,
 52    ):
 53        data = dict(data or {})
 54        properties = data.pop("properties", {})
 55        if not cleaned:
 56            properties = ensure_dict(properties)
 57
 58        #: The schema definition for this entity, which implies the properties
 59        #: That can be set on it.
 60        schema = model.get(data.pop("schema", None))
 61        if schema is None:
 62            raise InvalidData(gettext("No schema for entity."))
 63        self.schema = schema
 64
 65        #: When using :meth:`~make_id` to generate a natural key for this entity,
 66        #: the prefix will be added to the ID as a salt to make it easier to keep
 67        #: IDs unique across datasets. This is somewhat redundant following the
 68        #: introduction of :class:`~followthemoney.namespace.Namespace`.
 69        self.key_prefix = key_prefix
 70
 71        #: A unique identifier for this entity, usually a hashed natural key,
 72        #: a UUID, or a very simple slug. Can be signed using a
 73        #: :class:`~followthemoney.namespace.Namespace`.
 74        self.id = data.pop("id", None)
 75        if not cleaned:
 76            self.id = sanitize_text(self.id)
 77
 78        #: If the input dictionary for the entity proxy contains fields other
 79        #: than ``id``, ``schema`` or ``properties``, they will be kept in here
 80        #: and re-added upon serialization.
 81        self.context = data
 82        self._properties: Dict[str, List[str]] = {}
 83        self._size = 0
 84
 85        for key, values in properties.items():
 86            if key not in self.schema.properties:
 87                continue
 88            if cleaned:
 89                # This does not call `self.add` as it might be called millions of times
 90                # in some context and we want to avoid the performance overhead of
 91                # doing so.
 92                seen: Set[str] = set()
 93                seen_add = seen.add
 94                unique_values = [v for v in values if not (v in seen or seen_add(v))]
 95                self._properties[key] = unique_values
 96                self._size += sum([len(v) for v in unique_values])
 97            else:
 98                self.add(key, values, quiet=True)
 99
100    def make_id(self, *parts: Any) -> Optional[str]:
101        """Generate a (hopefully unique) ID for the given entity, composed
102        of the given components, and the :attr:`~key_prefix` defined in
103        the proxy.
104        """
105        self.id = make_entity_id(*parts, key_prefix=self.key_prefix)
106        return self.id
107
108    def _prop_name(self, prop: P, quiet: bool = False) -> Optional[str]:
109        # This is pretty unwound because it gets called a *lot*.
110        if prop in self.schema.properties:
111            return cast(str, prop)
112        try:
113            obj = cast(Property, prop)
114            if obj.name in self.schema.properties:
115                return obj.name
116        except AttributeError:
117            pass
118        if quiet:
119            return None
120        msg = gettext("Unknown property (%s): %s")
121        raise InvalidData(msg % (self.schema, prop))
122
123    def get(self, prop: P, quiet: bool = False) -> List[str]:
124        """Get all values of a property.
125
126        :param prop: can be given as a name or an instance of
127            :class:`~followthemoney.property.Property`.
128        :param quiet: a reference to an non-existent property will return
129            an empty list instead of raising an error.
130        :return: A list of values.
131        """
132        prop_name = self._prop_name(prop, quiet=quiet)
133        if prop_name is None:
134            return []
135        return self._properties.get(prop_name, [])
136
137    def first(self, prop: P, quiet: bool = False) -> Optional[str]:
138        """Get only the first value set for the property.
139
140        :param prop: can be given as a name or an instance of
141            :class:`~followthemoney.property.Property`.
142        :param quiet: a reference to an non-existent property will return
143            an empty list instead of raising an error.
144        :return: A value, or ``None``.
145        """
146        for value in self.get(prop, quiet=quiet):
147            return value
148        return None
149
150    def has(self, prop: P, quiet: bool = False) -> bool:
151        """Check to see if the given property has at least one value set.
152
153        :param prop: can be given as a name or an instance of
154            :class:`~followthemoney.property.Property`.
155        :param quiet: a reference to an non-existent property will return
156            an empty list instead of raising an error.
157        :return: a boolean.
158        """
159        prop_name = self._prop_name(prop, quiet=quiet)
160        return prop_name in self._properties
161
162    def add(
163        self,
164        prop: P,
165        values: Any,
166        cleaned: bool = False,
167        quiet: bool = False,
168        fuzzy: bool = False,
169        format: Optional[str] = None,
170    ) -> None:
171        """Add the given value(s) to the property if they are valid for
172        the type of the property.
173
174        :param prop: can be given as a name or an instance of
175            :class:`~followthemoney.property.Property`.
176        :param values: either a single value, or a list of values to be added.
177        :param cleaned: should the data be normalised before adding it.
178        :param quiet: a reference to an non-existent property will return
179            an empty list instead of raising an error.
180        :param fuzzy: when normalising the data, should fuzzy matching be allowed.
181        :param format: when normalising the data, formatting for a date.
182        """
183        prop_name = self._prop_name(prop, quiet=quiet)
184        if prop_name is None:
185            return None
186        prop = self.schema.properties[prop_name]
187
188        # Don't allow setting the reverse properties:
189        if prop.stub:
190            if quiet:
191                return None
192            msg = gettext("Stub property (%s): %s")
193            raise InvalidData(msg % (self.schema, prop))
194
195        for value in value_list(values):
196            if not cleaned:
197                value = prop.type.clean(value, proxy=self, fuzzy=fuzzy, format=format)
198            self.unsafe_add(prop, value, cleaned=True)
199        return None
200
201    def unsafe_add(
202        self,
203        prop: Property,
204        value: Optional[str],
205        cleaned: bool = False,
206        fuzzy: bool = False,
207        format: Optional[str] = None,
208    ) -> None:
209        """A version of `add()` to be used only in type-checking code. This accepts
210        only a single value, and performs input cleaning on the premise that the
211        value is already valid unicode."""
212        if not cleaned and value is not None:
213            value = prop.type.clean_text(value, fuzzy=fuzzy, format=format, proxy=self)
214        if value is not None:
215            # Somewhat hacky: limit the maximum size of any particular
216            # field to avoid overloading upstream aleph/elasticsearch.
217            value_size = len(value)
218            if prop.type.max_size is not None:
219                if self._size + value_size > prop.type.max_size:
220                    # msg = "[%s] too large. Rejecting additional values."
221                    # log.warning(msg, prop.name)
222                    return None
223            self._size += value_size
224            self._properties.setdefault(prop.name, list())
225            if value not in self._properties[prop.name]:
226                self._properties[prop.name].append(value)
227        return None
228
229    def set(
230        self,
231        prop: P,
232        values: Any,
233        cleaned: bool = False,
234        quiet: bool = False,
235        fuzzy: bool = False,
236        format: Optional[str] = None,
237    ) -> None:
238        """Replace the values of the property with the given value(s).
239
240        :param prop: can be given as a name or an instance of
241            :class:`~followthemoney.property.Property`.
242        :param values: either a single value, or a list of values to be added.
243        :param cleaned: should the data be normalised before adding it.
244        :param quiet: a reference to an non-existent property will return
245            an empty list instead of raising an error.
246        """
247        prop_name = self._prop_name(prop, quiet=quiet)
248        if prop_name is None:
249            return
250        self._properties.pop(prop_name, None)
251        return self.add(
252            prop, values, cleaned=cleaned, quiet=quiet, fuzzy=fuzzy, format=format
253        )
254
255    def pop(self, prop: P, quiet: bool = True) -> List[str]:
256        """Remove all the values from the given property and return them.
257
258        :param prop: can be given as a name or an instance of
259            :class:`~followthemoney.property.Property`.
260        :param quiet: a reference to an non-existent property will return
261            an empty list instead of raising an error.
262        :return: a list of values, possibly empty.
263        """
264        prop_name = self._prop_name(prop, quiet=quiet)
265        if prop_name is None or prop_name not in self._properties:
266            return []
267        return list(self._properties.pop(prop_name))
268
269    def remove(self, prop: P, value: str, quiet: bool = True) -> None:
270        """Remove a single value from the given property. If it is not there,
271        no action takes place.
272
273        :param prop: can be given as a name or an instance of
274            :class:`~followthemoney.property.Property`.
275        :param value: will not be cleaned before checking.
276        :param quiet: a reference to an non-existent property will return
277            an empty list instead of raising an error.
278        """
279        prop_name = self._prop_name(prop, quiet=quiet)
280        if prop_name is not None and prop_name in self._properties:
281            try:
282                self._properties[prop_name].remove(value)
283            except (KeyError, ValueError):
284                pass
285
286    def iterprops(self) -> List[Property]:
287        """Iterate across all the properties for which a value is set in
288        the proxy (but do not return their values)."""
289        return [self.schema.properties[p] for p in self._properties.keys()]
290
291    def itervalues(self) -> Generator[Tuple[Property, str], None, None]:
292        """Iterate across all values in the proxy one by one, each given as a
293        tuple of the property and the value."""
294        for name, values in self._properties.items():
295            prop = self.schema.properties[name]
296            for value in values:
297                yield (prop, value)
298
299    def edgepairs(self) -> Generator[Tuple[str, str], None, None]:
300        """Return all the possible pairs of values for the edge source and target if
301        the schema allows for an edge representation of the entity."""
302        if self.schema.source_prop is not None and self.schema.target_prop is not None:
303            sources = self.get(self.schema.source_prop)
304            targets = self.get(self.schema.target_prop)
305            for (source, target) in product(sources, targets):
306                yield (source, target)
307
308    def get_type_values(
309        self, type_: PropertyType, matchable: bool = False
310    ) -> List[str]:
311        """All values of a particular type associated with a the entity. For
312        example, this lets you return all countries linked to an entity, rather
313        than manually checking each property to see if it contains countries.
314
315        :param type_: The type object to be searched.
316        :param matchable: Whether to return only property values marked as matchable.
317        """
318        combined = set()
319        for prop_name, values in self._properties.items():
320            prop = self.schema.properties[prop_name]
321            if matchable and not prop.matchable:
322                continue
323            if prop.type == type_:
324                combined.update(values)
325        return list(combined)
326
327    @property
328    def names(self) -> List[str]:
329        """Get the set of all name-type values set of the entity."""
330        return self.get_type_values(registry.name)
331
332    @property
333    def countries(self) -> List[str]:
334        """Get the set of all country-type values set of the entity."""
335        return self.get_type_values(registry.country)
336
337    @property
338    def temporal_start(self) -> Optional[Tuple[Property, str]]:
339        """Get a date that can be used to represent the start of the entity in a
340        timeline. If there are multiple possible dates, the earliest date is
341        returned."""
342        values = []
343
344        for prop in self.schema.temporal_start_props:
345            values += [(prop, value) for value in self.get(prop.name)]
346
347        values.sort(key=lambda tuple: tuple[1])
348        return next(iter(values), None)
349
350    @property
351    def temporal_end(self) -> Optional[Tuple[Property, str]]:
352        """Get a date that can be used to represent the end of the entity in a timeline.
353        If therer are multiple possible dates, the latest date is returned."""
354        values = []
355
356        for prop in self.schema.temporal_end_props:
357            values += [(prop, value) for value in self.get(prop.name)]
358
359        values.sort(reverse=True, key=lambda tuple: tuple[1])
360        return next(iter(values), None)
361
362    def get_type_inverted(self, matchable: bool = False) -> Dict[str, List[str]]:
363        """Return all the values of the entity arranged into a mapping with the
364        group name of their property type. These groups include ``countries``,
365        ``addresses``, ``emails``, etc."""
366        data: Dict[str, List[str]] = {}
367        for group, type_ in registry.groups.items():
368            values = self.get_type_values(type_, matchable=matchable)
369            if len(values):
370                data[group] = values
371        return data
372
373    def triples(self, qualified: bool = True) -> Generator[Triple, None, None]:
374        """Serialise the entity into a set of RDF triple statements. The
375        statements include the property values, an ``RDF#type`` definition
376        that refers to the entity schema, and a ``SKOS#prefLabel`` with the
377        entity caption."""
378        if self.id is None or self.schema is None:
379            return
380        uri = registry.entity.rdf(self.id)
381        yield (uri, RDF.type, self.schema.uri)
382        if qualified:
383            caption = self.caption
384            if caption != self.schema.label:
385                yield (uri, SKOS.prefLabel, Literal(caption))
386        for prop, value in self.itervalues():
387            value = prop.type.rdf(value)
388            if qualified:
389                yield (uri, prop.uri, value)
390            else:
391                yield (uri, URIRef(prop.name), value)
392
393    @property
394    def caption(self) -> str:
395        """The user-facing label to be used for this entity. This checks a list
396        of properties defined by the schema (caption) and returns the first
397        available value. If no caption is available, return the schema label."""
398        for prop in self.schema.caption:
399            for value in self.get(prop):
400                return value
401        return self.schema.label
402
403    @property
404    def country_hints(self) -> Set[str]:
405        """Some property types, such as phone numbers and IBAN codes imply a
406        country that may be associated with the entity. This list can be used
407        for a more generous matching approach than the actual country values."""
408        countries = set(self.countries)
409        if not len(countries):
410            for (prop, value) in self.itervalues():
411                hint = prop.type.country_hint(value)
412                if hint is not None:
413                    countries.add(hint)
414        return countries
415
416    @property
417    def properties(self) -> Dict[str, List[str]]:
418        """Return a mapping of the properties and set values of the entity."""
419        return {p: list(vs) for p, vs in self._properties.items()}
420
421    def to_dict(self) -> Dict[str, Any]:
422        """Serialise the proxy into a dictionary with the defined properties, ID,
423        schema and any contextual values that were handed in initially. The resulting
424        dictionary can be used to make a new proxy, and it is commonly written to disk
425        or a database."""
426        data = dict(self.context)
427        data.update(
428            {"id": self.id, "schema": self.schema.name, "properties": self.properties}
429        )
430        return data
431
432    def to_full_dict(self, matchable: bool = False) -> Dict[str, Any]:
433        """Return a serialised version of the entity with inverted type groups mixed
434        in. See :meth:`~get_type_inverted`."""
435        data = self.to_dict()
436        data.update(self.get_type_inverted(matchable=matchable))
437        return data
438
439    def clone(self: E) -> E:
440        """Make a deep copy of the current entity proxy."""
441        return self.__class__.from_dict(self.schema.model, self.to_dict())
442
443    def merge(self: E, other: E) -> E:
444        """Merge another entity proxy into this one. This will try and find
445        the common schema between both entities and then add all property
446        values from the other entity into this one."""
447        model = self.schema.model
448        self.id = self.id or other.id
449        try:
450            self.schema = model.common_schema(self.schema, other.schema)
451        except InvalidData as e:
452            msg = "Cannot merge entities with id %s: %s"
453            raise InvalidData(msg % (self.id, e))
454
455        self.context = merge_context(self.context, other.context)
456        for prop, values in other._properties.items():
457            self.add(prop, values, cleaned=True, quiet=True)
458        return self
459
460    def __str__(self) -> str:
461        return self.caption
462
463    def __repr__(self) -> str:
464        return "<E(%r,%r)>" % (self.id, str(self))
465
466    def __len__(self) -> int:
467        return self._size
468
469    def __hash__(self) -> int:
470        if not self.id:
471            warnings.warn(
472                "Hashing an EntityProxy without an ID results in undefined behaviour",
473                RuntimeWarning,
474            )
475        return hash(self.id)
476
477    def __eq__(self, other: Any) -> bool:
478        try:
479            if self.id is None or other.id is None:
480                warnings.warn(
481                    "Comparing EntityProxys without IDs results in undefined behaviour",
482                    RuntimeWarning,
483                )
484            return bool(self.id == other.id)
485        except AttributeError:
486            return False
487
488    @classmethod
489    def from_dict(
490        cls: Type[E],
491        model: "Model",
492        data: Dict[str, Any],
493        cleaned: bool = True,
494    ) -> E:
495        """Instantiate a proxy based on the given model and serialised dictionary.
496
497        Use :meth:`followthemoney.model.Model.get_proxy` instead."""
498        return cls(model, data, cleaned=cleaned)
log = <Logger followthemoney.proxy (WARNING)>
P = typing.Union[followthemoney.property.Property, str]
Triple = typing.Tuple[rdflib.term.Identifier, rdflib.term.Identifier, rdflib.term.Identifier]
class EntityProxy:
 38class EntityProxy(object):
 39    """A wrapper object for an entity, with utility functions for the
 40    introspection and manipulation of its properties.
 41
 42    This is the main working object in the library, used to generate, validate
 43    and emit data."""
 44
 45    __slots__ = ["schema", "id", "key_prefix", "context", "_properties", "_size"]
 46
 47    def __init__(
 48        self,
 49        model: "Model",
 50        data: Dict[str, Any],
 51        key_prefix: Optional[str] = None,
 52        cleaned: bool = True,
 53    ):
 54        data = dict(data or {})
 55        properties = data.pop("properties", {})
 56        if not cleaned:
 57            properties = ensure_dict(properties)
 58
 59        #: The schema definition for this entity, which implies the properties
 60        #: That can be set on it.
 61        schema = model.get(data.pop("schema", None))
 62        if schema is None:
 63            raise InvalidData(gettext("No schema for entity."))
 64        self.schema = schema
 65
 66        #: When using :meth:`~make_id` to generate a natural key for this entity,
 67        #: the prefix will be added to the ID as a salt to make it easier to keep
 68        #: IDs unique across datasets. This is somewhat redundant following the
 69        #: introduction of :class:`~followthemoney.namespace.Namespace`.
 70        self.key_prefix = key_prefix
 71
 72        #: A unique identifier for this entity, usually a hashed natural key,
 73        #: a UUID, or a very simple slug. Can be signed using a
 74        #: :class:`~followthemoney.namespace.Namespace`.
 75        self.id = data.pop("id", None)
 76        if not cleaned:
 77            self.id = sanitize_text(self.id)
 78
 79        #: If the input dictionary for the entity proxy contains fields other
 80        #: than ``id``, ``schema`` or ``properties``, they will be kept in here
 81        #: and re-added upon serialization.
 82        self.context = data
 83        self._properties: Dict[str, List[str]] = {}
 84        self._size = 0
 85
 86        for key, values in properties.items():
 87            if key not in self.schema.properties:
 88                continue
 89            if cleaned:
 90                # This does not call `self.add` as it might be called millions of times
 91                # in some context and we want to avoid the performance overhead of
 92                # doing so.
 93                seen: Set[str] = set()
 94                seen_add = seen.add
 95                unique_values = [v for v in values if not (v in seen or seen_add(v))]
 96                self._properties[key] = unique_values
 97                self._size += sum([len(v) for v in unique_values])
 98            else:
 99                self.add(key, values, quiet=True)
100
101    def make_id(self, *parts: Any) -> Optional[str]:
102        """Generate a (hopefully unique) ID for the given entity, composed
103        of the given components, and the :attr:`~key_prefix` defined in
104        the proxy.
105        """
106        self.id = make_entity_id(*parts, key_prefix=self.key_prefix)
107        return self.id
108
109    def _prop_name(self, prop: P, quiet: bool = False) -> Optional[str]:
110        # This is pretty unwound because it gets called a *lot*.
111        if prop in self.schema.properties:
112            return cast(str, prop)
113        try:
114            obj = cast(Property, prop)
115            if obj.name in self.schema.properties:
116                return obj.name
117        except AttributeError:
118            pass
119        if quiet:
120            return None
121        msg = gettext("Unknown property (%s): %s")
122        raise InvalidData(msg % (self.schema, prop))
123
124    def get(self, prop: P, quiet: bool = False) -> List[str]:
125        """Get all values of a property.
126
127        :param prop: can be given as a name or an instance of
128            :class:`~followthemoney.property.Property`.
129        :param quiet: a reference to an non-existent property will return
130            an empty list instead of raising an error.
131        :return: A list of values.
132        """
133        prop_name = self._prop_name(prop, quiet=quiet)
134        if prop_name is None:
135            return []
136        return self._properties.get(prop_name, [])
137
138    def first(self, prop: P, quiet: bool = False) -> Optional[str]:
139        """Get only the first value set for the property.
140
141        :param prop: can be given as a name or an instance of
142            :class:`~followthemoney.property.Property`.
143        :param quiet: a reference to an non-existent property will return
144            an empty list instead of raising an error.
145        :return: A value, or ``None``.
146        """
147        for value in self.get(prop, quiet=quiet):
148            return value
149        return None
150
151    def has(self, prop: P, quiet: bool = False) -> bool:
152        """Check to see if the given property has at least one value set.
153
154        :param prop: can be given as a name or an instance of
155            :class:`~followthemoney.property.Property`.
156        :param quiet: a reference to an non-existent property will return
157            an empty list instead of raising an error.
158        :return: a boolean.
159        """
160        prop_name = self._prop_name(prop, quiet=quiet)
161        return prop_name in self._properties
162
163    def add(
164        self,
165        prop: P,
166        values: Any,
167        cleaned: bool = False,
168        quiet: bool = False,
169        fuzzy: bool = False,
170        format: Optional[str] = None,
171    ) -> None:
172        """Add the given value(s) to the property if they are valid for
173        the type of the property.
174
175        :param prop: can be given as a name or an instance of
176            :class:`~followthemoney.property.Property`.
177        :param values: either a single value, or a list of values to be added.
178        :param cleaned: should the data be normalised before adding it.
179        :param quiet: a reference to an non-existent property will return
180            an empty list instead of raising an error.
181        :param fuzzy: when normalising the data, should fuzzy matching be allowed.
182        :param format: when normalising the data, formatting for a date.
183        """
184        prop_name = self._prop_name(prop, quiet=quiet)
185        if prop_name is None:
186            return None
187        prop = self.schema.properties[prop_name]
188
189        # Don't allow setting the reverse properties:
190        if prop.stub:
191            if quiet:
192                return None
193            msg = gettext("Stub property (%s): %s")
194            raise InvalidData(msg % (self.schema, prop))
195
196        for value in value_list(values):
197            if not cleaned:
198                value = prop.type.clean(value, proxy=self, fuzzy=fuzzy, format=format)
199            self.unsafe_add(prop, value, cleaned=True)
200        return None
201
202    def unsafe_add(
203        self,
204        prop: Property,
205        value: Optional[str],
206        cleaned: bool = False,
207        fuzzy: bool = False,
208        format: Optional[str] = None,
209    ) -> None:
210        """A version of `add()` to be used only in type-checking code. This accepts
211        only a single value, and performs input cleaning on the premise that the
212        value is already valid unicode."""
213        if not cleaned and value is not None:
214            value = prop.type.clean_text(value, fuzzy=fuzzy, format=format, proxy=self)
215        if value is not None:
216            # Somewhat hacky: limit the maximum size of any particular
217            # field to avoid overloading upstream aleph/elasticsearch.
218            value_size = len(value)
219            if prop.type.max_size is not None:
220                if self._size + value_size > prop.type.max_size:
221                    # msg = "[%s] too large. Rejecting additional values."
222                    # log.warning(msg, prop.name)
223                    return None
224            self._size += value_size
225            self._properties.setdefault(prop.name, list())
226            if value not in self._properties[prop.name]:
227                self._properties[prop.name].append(value)
228        return None
229
230    def set(
231        self,
232        prop: P,
233        values: Any,
234        cleaned: bool = False,
235        quiet: bool = False,
236        fuzzy: bool = False,
237        format: Optional[str] = None,
238    ) -> None:
239        """Replace the values of the property with the given value(s).
240
241        :param prop: can be given as a name or an instance of
242            :class:`~followthemoney.property.Property`.
243        :param values: either a single value, or a list of values to be added.
244        :param cleaned: should the data be normalised before adding it.
245        :param quiet: a reference to an non-existent property will return
246            an empty list instead of raising an error.
247        """
248        prop_name = self._prop_name(prop, quiet=quiet)
249        if prop_name is None:
250            return
251        self._properties.pop(prop_name, None)
252        return self.add(
253            prop, values, cleaned=cleaned, quiet=quiet, fuzzy=fuzzy, format=format
254        )
255
256    def pop(self, prop: P, quiet: bool = True) -> List[str]:
257        """Remove all the values from the given property and return them.
258
259        :param prop: can be given as a name or an instance of
260            :class:`~followthemoney.property.Property`.
261        :param quiet: a reference to an non-existent property will return
262            an empty list instead of raising an error.
263        :return: a list of values, possibly empty.
264        """
265        prop_name = self._prop_name(prop, quiet=quiet)
266        if prop_name is None or prop_name not in self._properties:
267            return []
268        return list(self._properties.pop(prop_name))
269
270    def remove(self, prop: P, value: str, quiet: bool = True) -> None:
271        """Remove a single value from the given property. If it is not there,
272        no action takes place.
273
274        :param prop: can be given as a name or an instance of
275            :class:`~followthemoney.property.Property`.
276        :param value: will not be cleaned before checking.
277        :param quiet: a reference to an non-existent property will return
278            an empty list instead of raising an error.
279        """
280        prop_name = self._prop_name(prop, quiet=quiet)
281        if prop_name is not None and prop_name in self._properties:
282            try:
283                self._properties[prop_name].remove(value)
284            except (KeyError, ValueError):
285                pass
286
287    def iterprops(self) -> List[Property]:
288        """Iterate across all the properties for which a value is set in
289        the proxy (but do not return their values)."""
290        return [self.schema.properties[p] for p in self._properties.keys()]
291
292    def itervalues(self) -> Generator[Tuple[Property, str], None, None]:
293        """Iterate across all values in the proxy one by one, each given as a
294        tuple of the property and the value."""
295        for name, values in self._properties.items():
296            prop = self.schema.properties[name]
297            for value in values:
298                yield (prop, value)
299
300    def edgepairs(self) -> Generator[Tuple[str, str], None, None]:
301        """Return all the possible pairs of values for the edge source and target if
302        the schema allows for an edge representation of the entity."""
303        if self.schema.source_prop is not None and self.schema.target_prop is not None:
304            sources = self.get(self.schema.source_prop)
305            targets = self.get(self.schema.target_prop)
306            for (source, target) in product(sources, targets):
307                yield (source, target)
308
309    def get_type_values(
310        self, type_: PropertyType, matchable: bool = False
311    ) -> List[str]:
312        """All values of a particular type associated with a the entity. For
313        example, this lets you return all countries linked to an entity, rather
314        than manually checking each property to see if it contains countries.
315
316        :param type_: The type object to be searched.
317        :param matchable: Whether to return only property values marked as matchable.
318        """
319        combined = set()
320        for prop_name, values in self._properties.items():
321            prop = self.schema.properties[prop_name]
322            if matchable and not prop.matchable:
323                continue
324            if prop.type == type_:
325                combined.update(values)
326        return list(combined)
327
328    @property
329    def names(self) -> List[str]:
330        """Get the set of all name-type values set of the entity."""
331        return self.get_type_values(registry.name)
332
333    @property
334    def countries(self) -> List[str]:
335        """Get the set of all country-type values set of the entity."""
336        return self.get_type_values(registry.country)
337
338    @property
339    def temporal_start(self) -> Optional[Tuple[Property, str]]:
340        """Get a date that can be used to represent the start of the entity in a
341        timeline. If there are multiple possible dates, the earliest date is
342        returned."""
343        values = []
344
345        for prop in self.schema.temporal_start_props:
346            values += [(prop, value) for value in self.get(prop.name)]
347
348        values.sort(key=lambda tuple: tuple[1])
349        return next(iter(values), None)
350
351    @property
352    def temporal_end(self) -> Optional[Tuple[Property, str]]:
353        """Get a date that can be used to represent the end of the entity in a timeline.
354        If therer are multiple possible dates, the latest date is returned."""
355        values = []
356
357        for prop in self.schema.temporal_end_props:
358            values += [(prop, value) for value in self.get(prop.name)]
359
360        values.sort(reverse=True, key=lambda tuple: tuple[1])
361        return next(iter(values), None)
362
363    def get_type_inverted(self, matchable: bool = False) -> Dict[str, List[str]]:
364        """Return all the values of the entity arranged into a mapping with the
365        group name of their property type. These groups include ``countries``,
366        ``addresses``, ``emails``, etc."""
367        data: Dict[str, List[str]] = {}
368        for group, type_ in registry.groups.items():
369            values = self.get_type_values(type_, matchable=matchable)
370            if len(values):
371                data[group] = values
372        return data
373
374    def triples(self, qualified: bool = True) -> Generator[Triple, None, None]:
375        """Serialise the entity into a set of RDF triple statements. The
376        statements include the property values, an ``RDF#type`` definition
377        that refers to the entity schema, and a ``SKOS#prefLabel`` with the
378        entity caption."""
379        if self.id is None or self.schema is None:
380            return
381        uri = registry.entity.rdf(self.id)
382        yield (uri, RDF.type, self.schema.uri)
383        if qualified:
384            caption = self.caption
385            if caption != self.schema.label:
386                yield (uri, SKOS.prefLabel, Literal(caption))
387        for prop, value in self.itervalues():
388            value = prop.type.rdf(value)
389            if qualified:
390                yield (uri, prop.uri, value)
391            else:
392                yield (uri, URIRef(prop.name), value)
393
394    @property
395    def caption(self) -> str:
396        """The user-facing label to be used for this entity. This checks a list
397        of properties defined by the schema (caption) and returns the first
398        available value. If no caption is available, return the schema label."""
399        for prop in self.schema.caption:
400            for value in self.get(prop):
401                return value
402        return self.schema.label
403
404    @property
405    def country_hints(self) -> Set[str]:
406        """Some property types, such as phone numbers and IBAN codes imply a
407        country that may be associated with the entity. This list can be used
408        for a more generous matching approach than the actual country values."""
409        countries = set(self.countries)
410        if not len(countries):
411            for (prop, value) in self.itervalues():
412                hint = prop.type.country_hint(value)
413                if hint is not None:
414                    countries.add(hint)
415        return countries
416
417    @property
418    def properties(self) -> Dict[str, List[str]]:
419        """Return a mapping of the properties and set values of the entity."""
420        return {p: list(vs) for p, vs in self._properties.items()}
421
422    def to_dict(self) -> Dict[str, Any]:
423        """Serialise the proxy into a dictionary with the defined properties, ID,
424        schema and any contextual values that were handed in initially. The resulting
425        dictionary can be used to make a new proxy, and it is commonly written to disk
426        or a database."""
427        data = dict(self.context)
428        data.update(
429            {"id": self.id, "schema": self.schema.name, "properties": self.properties}
430        )
431        return data
432
433    def to_full_dict(self, matchable: bool = False) -> Dict[str, Any]:
434        """Return a serialised version of the entity with inverted type groups mixed
435        in. See :meth:`~get_type_inverted`."""
436        data = self.to_dict()
437        data.update(self.get_type_inverted(matchable=matchable))
438        return data
439
440    def clone(self: E) -> E:
441        """Make a deep copy of the current entity proxy."""
442        return self.__class__.from_dict(self.schema.model, self.to_dict())
443
444    def merge(self: E, other: E) -> E:
445        """Merge another entity proxy into this one. This will try and find
446        the common schema between both entities and then add all property
447        values from the other entity into this one."""
448        model = self.schema.model
449        self.id = self.id or other.id
450        try:
451            self.schema = model.common_schema(self.schema, other.schema)
452        except InvalidData as e:
453            msg = "Cannot merge entities with id %s: %s"
454            raise InvalidData(msg % (self.id, e))
455
456        self.context = merge_context(self.context, other.context)
457        for prop, values in other._properties.items():
458            self.add(prop, values, cleaned=True, quiet=True)
459        return self
460
461    def __str__(self) -> str:
462        return self.caption
463
464    def __repr__(self) -> str:
465        return "<E(%r,%r)>" % (self.id, str(self))
466
467    def __len__(self) -> int:
468        return self._size
469
470    def __hash__(self) -> int:
471        if not self.id:
472            warnings.warn(
473                "Hashing an EntityProxy without an ID results in undefined behaviour",
474                RuntimeWarning,
475            )
476        return hash(self.id)
477
478    def __eq__(self, other: Any) -> bool:
479        try:
480            if self.id is None or other.id is None:
481                warnings.warn(
482                    "Comparing EntityProxys without IDs results in undefined behaviour",
483                    RuntimeWarning,
484                )
485            return bool(self.id == other.id)
486        except AttributeError:
487            return False
488
489    @classmethod
490    def from_dict(
491        cls: Type[E],
492        model: "Model",
493        data: Dict[str, Any],
494        cleaned: bool = True,
495    ) -> E:
496        """Instantiate a proxy based on the given model and serialised dictionary.
497
498        Use :meth:`followthemoney.model.Model.get_proxy` instead."""
499        return cls(model, data, cleaned=cleaned)

A wrapper object for an entity, with utility functions for the introspection and manipulation of its properties.

This is the main working object in the library, used to generate, validate and emit data.

EntityProxy( model: followthemoney.model.Model, data: Dict[str, Any], key_prefix: Optional[str] = None, cleaned: bool = True)
47    def __init__(
48        self,
49        model: "Model",
50        data: Dict[str, Any],
51        key_prefix: Optional[str] = None,
52        cleaned: bool = True,
53    ):
54        data = dict(data or {})
55        properties = data.pop("properties", {})
56        if not cleaned:
57            properties = ensure_dict(properties)
58
59        #: The schema definition for this entity, which implies the properties
60        #: That can be set on it.
61        schema = model.get(data.pop("schema", None))
62        if schema is None:
63            raise InvalidData(gettext("No schema for entity."))
64        self.schema = schema
65
66        #: When using :meth:`~make_id` to generate a natural key for this entity,
67        #: the prefix will be added to the ID as a salt to make it easier to keep
68        #: IDs unique across datasets. This is somewhat redundant following the
69        #: introduction of :class:`~followthemoney.namespace.Namespace`.
70        self.key_prefix = key_prefix
71
72        #: A unique identifier for this entity, usually a hashed natural key,
73        #: a UUID, or a very simple slug. Can be signed using a
74        #: :class:`~followthemoney.namespace.Namespace`.
75        self.id = data.pop("id", None)
76        if not cleaned:
77            self.id = sanitize_text(self.id)
78
79        #: If the input dictionary for the entity proxy contains fields other
80        #: than ``id``, ``schema`` or ``properties``, they will be kept in here
81        #: and re-added upon serialization.
82        self.context = data
83        self._properties: Dict[str, List[str]] = {}
84        self._size = 0
85
86        for key, values in properties.items():
87            if key not in self.schema.properties:
88                continue
89            if cleaned:
90                # This does not call `self.add` as it might be called millions of times
91                # in some context and we want to avoid the performance overhead of
92                # doing so.
93                seen: Set[str] = set()
94                seen_add = seen.add
95                unique_values = [v for v in values if not (v in seen or seen_add(v))]
96                self._properties[key] = unique_values
97                self._size += sum([len(v) for v in unique_values])
98            else:
99                self.add(key, values, quiet=True)
schema
key_prefix
id
context
def make_id(self, *parts: Any) -> Optional[str]:
101    def make_id(self, *parts: Any) -> Optional[str]:
102        """Generate a (hopefully unique) ID for the given entity, composed
103        of the given components, and the :attr:`~key_prefix` defined in
104        the proxy.
105        """
106        self.id = make_entity_id(*parts, key_prefix=self.key_prefix)
107        return self.id

Generate a (hopefully unique) ID for the given entity, composed of the given components, and the ~key_prefix defined in the proxy.

def get( self, prop: Union[followthemoney.property.Property, str], quiet: bool = False) -> List[str]:
124    def get(self, prop: P, quiet: bool = False) -> List[str]:
125        """Get all values of a property.
126
127        :param prop: can be given as a name or an instance of
128            :class:`~followthemoney.property.Property`.
129        :param quiet: a reference to an non-existent property will return
130            an empty list instead of raising an error.
131        :return: A list of values.
132        """
133        prop_name = self._prop_name(prop, quiet=quiet)
134        if prop_name is None:
135            return []
136        return self._properties.get(prop_name, [])

Get all values of a property.

Parameters
  • prop: can be given as a name or an instance of ~followthemoney.property.Property.
  • quiet: a reference to an non-existent property will return an empty list instead of raising an error.
Returns

A list of values.

def first( self, prop: Union[followthemoney.property.Property, str], quiet: bool = False) -> Optional[str]:
138    def first(self, prop: P, quiet: bool = False) -> Optional[str]:
139        """Get only the first value set for the property.
140
141        :param prop: can be given as a name or an instance of
142            :class:`~followthemoney.property.Property`.
143        :param quiet: a reference to an non-existent property will return
144            an empty list instead of raising an error.
145        :return: A value, or ``None``.
146        """
147        for value in self.get(prop, quiet=quiet):
148            return value
149        return None

Get only the first value set for the property.

Parameters
  • prop: can be given as a name or an instance of ~followthemoney.property.Property.
  • quiet: a reference to an non-existent property will return an empty list instead of raising an error.
Returns

A value, or None.

def has( self, prop: Union[followthemoney.property.Property, str], quiet: bool = False) -> bool:
151    def has(self, prop: P, quiet: bool = False) -> bool:
152        """Check to see if the given property has at least one value set.
153
154        :param prop: can be given as a name or an instance of
155            :class:`~followthemoney.property.Property`.
156        :param quiet: a reference to an non-existent property will return
157            an empty list instead of raising an error.
158        :return: a boolean.
159        """
160        prop_name = self._prop_name(prop, quiet=quiet)
161        return prop_name in self._properties

Check to see if the given property has at least one value set.

Parameters
  • prop: can be given as a name or an instance of ~followthemoney.property.Property.
  • quiet: a reference to an non-existent property will return an empty list instead of raising an error.
Returns

a boolean.

def add( self, prop: Union[followthemoney.property.Property, str], values: Any, cleaned: bool = False, quiet: bool = False, fuzzy: bool = False, format: Optional[str] = None) -> None:
163    def add(
164        self,
165        prop: P,
166        values: Any,
167        cleaned: bool = False,
168        quiet: bool = False,
169        fuzzy: bool = False,
170        format: Optional[str] = None,
171    ) -> None:
172        """Add the given value(s) to the property if they are valid for
173        the type of the property.
174
175        :param prop: can be given as a name or an instance of
176            :class:`~followthemoney.property.Property`.
177        :param values: either a single value, or a list of values to be added.
178        :param cleaned: should the data be normalised before adding it.
179        :param quiet: a reference to an non-existent property will return
180            an empty list instead of raising an error.
181        :param fuzzy: when normalising the data, should fuzzy matching be allowed.
182        :param format: when normalising the data, formatting for a date.
183        """
184        prop_name = self._prop_name(prop, quiet=quiet)
185        if prop_name is None:
186            return None
187        prop = self.schema.properties[prop_name]
188
189        # Don't allow setting the reverse properties:
190        if prop.stub:
191            if quiet:
192                return None
193            msg = gettext("Stub property (%s): %s")
194            raise InvalidData(msg % (self.schema, prop))
195
196        for value in value_list(values):
197            if not cleaned:
198                value = prop.type.clean(value, proxy=self, fuzzy=fuzzy, format=format)
199            self.unsafe_add(prop, value, cleaned=True)
200        return None

Add the given value(s) to the property if they are valid for the type of the property.

Parameters
  • prop: can be given as a name or an instance of ~followthemoney.property.Property.
  • values: either a single value, or a list of values to be added.
  • cleaned: should the data be normalised before adding it.
  • quiet: a reference to an non-existent property will return an empty list instead of raising an error.
  • fuzzy: when normalising the data, should fuzzy matching be allowed.
  • format: when normalising the data, formatting for a date.
def unsafe_add( self, prop: followthemoney.property.Property, value: Optional[str], cleaned: bool = False, fuzzy: bool = False, format: Optional[str] = None) -> None:
202    def unsafe_add(
203        self,
204        prop: Property,
205        value: Optional[str],
206        cleaned: bool = False,
207        fuzzy: bool = False,
208        format: Optional[str] = None,
209    ) -> None:
210        """A version of `add()` to be used only in type-checking code. This accepts
211        only a single value, and performs input cleaning on the premise that the
212        value is already valid unicode."""
213        if not cleaned and value is not None:
214            value = prop.type.clean_text(value, fuzzy=fuzzy, format=format, proxy=self)
215        if value is not None:
216            # Somewhat hacky: limit the maximum size of any particular
217            # field to avoid overloading upstream aleph/elasticsearch.
218            value_size = len(value)
219            if prop.type.max_size is not None:
220                if self._size + value_size > prop.type.max_size:
221                    # msg = "[%s] too large. Rejecting additional values."
222                    # log.warning(msg, prop.name)
223                    return None
224            self._size += value_size
225            self._properties.setdefault(prop.name, list())
226            if value not in self._properties[prop.name]:
227                self._properties[prop.name].append(value)
228        return None

A version of add() to be used only in type-checking code. This accepts only a single value, and performs input cleaning on the premise that the value is already valid unicode.

def set( self, prop: Union[followthemoney.property.Property, str], values: Any, cleaned: bool = False, quiet: bool = False, fuzzy: bool = False, format: Optional[str] = None) -> None:
230    def set(
231        self,
232        prop: P,
233        values: Any,
234        cleaned: bool = False,
235        quiet: bool = False,
236        fuzzy: bool = False,
237        format: Optional[str] = None,
238    ) -> None:
239        """Replace the values of the property with the given value(s).
240
241        :param prop: can be given as a name or an instance of
242            :class:`~followthemoney.property.Property`.
243        :param values: either a single value, or a list of values to be added.
244        :param cleaned: should the data be normalised before adding it.
245        :param quiet: a reference to an non-existent property will return
246            an empty list instead of raising an error.
247        """
248        prop_name = self._prop_name(prop, quiet=quiet)
249        if prop_name is None:
250            return
251        self._properties.pop(prop_name, None)
252        return self.add(
253            prop, values, cleaned=cleaned, quiet=quiet, fuzzy=fuzzy, format=format
254        )

Replace the values of the property with the given value(s).

Parameters
  • prop: can be given as a name or an instance of ~followthemoney.property.Property.
  • values: either a single value, or a list of values to be added.
  • cleaned: should the data be normalised before adding it.
  • quiet: a reference to an non-existent property will return an empty list instead of raising an error.
def pop( self, prop: Union[followthemoney.property.Property, str], quiet: bool = True) -> List[str]:
256    def pop(self, prop: P, quiet: bool = True) -> List[str]:
257        """Remove all the values from the given property and return them.
258
259        :param prop: can be given as a name or an instance of
260            :class:`~followthemoney.property.Property`.
261        :param quiet: a reference to an non-existent property will return
262            an empty list instead of raising an error.
263        :return: a list of values, possibly empty.
264        """
265        prop_name = self._prop_name(prop, quiet=quiet)
266        if prop_name is None or prop_name not in self._properties:
267            return []
268        return list(self._properties.pop(prop_name))

Remove all the values from the given property and return them.

Parameters
  • prop: can be given as a name or an instance of ~followthemoney.property.Property.
  • quiet: a reference to an non-existent property will return an empty list instead of raising an error.
Returns

a list of values, possibly empty.

def remove( self, prop: Union[followthemoney.property.Property, str], value: str, quiet: bool = True) -> None:
270    def remove(self, prop: P, value: str, quiet: bool = True) -> None:
271        """Remove a single value from the given property. If it is not there,
272        no action takes place.
273
274        :param prop: can be given as a name or an instance of
275            :class:`~followthemoney.property.Property`.
276        :param value: will not be cleaned before checking.
277        :param quiet: a reference to an non-existent property will return
278            an empty list instead of raising an error.
279        """
280        prop_name = self._prop_name(prop, quiet=quiet)
281        if prop_name is not None and prop_name in self._properties:
282            try:
283                self._properties[prop_name].remove(value)
284            except (KeyError, ValueError):
285                pass

Remove a single value from the given property. If it is not there, no action takes place.

Parameters
  • prop: can be given as a name or an instance of ~followthemoney.property.Property.
  • value: will not be cleaned before checking.
  • quiet: a reference to an non-existent property will return an empty list instead of raising an error.
def iterprops(self) -> List[followthemoney.property.Property]:
287    def iterprops(self) -> List[Property]:
288        """Iterate across all the properties for which a value is set in
289        the proxy (but do not return their values)."""
290        return [self.schema.properties[p] for p in self._properties.keys()]

Iterate across all the properties for which a value is set in the proxy (but do not return their values).

def itervalues( self) -> Generator[Tuple[followthemoney.property.Property, str], NoneType, NoneType]:
292    def itervalues(self) -> Generator[Tuple[Property, str], None, None]:
293        """Iterate across all values in the proxy one by one, each given as a
294        tuple of the property and the value."""
295        for name, values in self._properties.items():
296            prop = self.schema.properties[name]
297            for value in values:
298                yield (prop, value)

Iterate across all values in the proxy one by one, each given as a tuple of the property and the value.

def edgepairs(self) -> Generator[Tuple[str, str], NoneType, NoneType]:
300    def edgepairs(self) -> Generator[Tuple[str, str], None, None]:
301        """Return all the possible pairs of values for the edge source and target if
302        the schema allows for an edge representation of the entity."""
303        if self.schema.source_prop is not None and self.schema.target_prop is not None:
304            sources = self.get(self.schema.source_prop)
305            targets = self.get(self.schema.target_prop)
306            for (source, target) in product(sources, targets):
307                yield (source, target)

Return all the possible pairs of values for the edge source and target if the schema allows for an edge representation of the entity.

def get_type_values( self, type_: followthemoney.types.common.PropertyType, matchable: bool = False) -> List[str]:
309    def get_type_values(
310        self, type_: PropertyType, matchable: bool = False
311    ) -> List[str]:
312        """All values of a particular type associated with a the entity. For
313        example, this lets you return all countries linked to an entity, rather
314        than manually checking each property to see if it contains countries.
315
316        :param type_: The type object to be searched.
317        :param matchable: Whether to return only property values marked as matchable.
318        """
319        combined = set()
320        for prop_name, values in self._properties.items():
321            prop = self.schema.properties[prop_name]
322            if matchable and not prop.matchable:
323                continue
324            if prop.type == type_:
325                combined.update(values)
326        return list(combined)

All values of a particular type associated with a the entity. For example, this lets you return all countries linked to an entity, rather than manually checking each property to see if it contains countries.

Parameters
  • type_: The type object to be searched.
  • matchable: Whether to return only property values marked as matchable.
names: List[str]
328    @property
329    def names(self) -> List[str]:
330        """Get the set of all name-type values set of the entity."""
331        return self.get_type_values(registry.name)

Get the set of all name-type values set of the entity.

countries: List[str]
333    @property
334    def countries(self) -> List[str]:
335        """Get the set of all country-type values set of the entity."""
336        return self.get_type_values(registry.country)

Get the set of all country-type values set of the entity.

temporal_start: Optional[Tuple[followthemoney.property.Property, str]]
338    @property
339    def temporal_start(self) -> Optional[Tuple[Property, str]]:
340        """Get a date that can be used to represent the start of the entity in a
341        timeline. If there are multiple possible dates, the earliest date is
342        returned."""
343        values = []
344
345        for prop in self.schema.temporal_start_props:
346            values += [(prop, value) for value in self.get(prop.name)]
347
348        values.sort(key=lambda tuple: tuple[1])
349        return next(iter(values), None)

Get a date that can be used to represent the start of the entity in a timeline. If there are multiple possible dates, the earliest date is returned.

temporal_end: Optional[Tuple[followthemoney.property.Property, str]]
351    @property
352    def temporal_end(self) -> Optional[Tuple[Property, str]]:
353        """Get a date that can be used to represent the end of the entity in a timeline.
354        If therer are multiple possible dates, the latest date is returned."""
355        values = []
356
357        for prop in self.schema.temporal_end_props:
358            values += [(prop, value) for value in self.get(prop.name)]
359
360        values.sort(reverse=True, key=lambda tuple: tuple[1])
361        return next(iter(values), None)

Get a date that can be used to represent the end of the entity in a timeline. If therer are multiple possible dates, the latest date is returned.

def get_type_inverted(self, matchable: bool = False) -> Dict[str, List[str]]:
363    def get_type_inverted(self, matchable: bool = False) -> Dict[str, List[str]]:
364        """Return all the values of the entity arranged into a mapping with the
365        group name of their property type. These groups include ``countries``,
366        ``addresses``, ``emails``, etc."""
367        data: Dict[str, List[str]] = {}
368        for group, type_ in registry.groups.items():
369            values = self.get_type_values(type_, matchable=matchable)
370            if len(values):
371                data[group] = values
372        return data

Return all the values of the entity arranged into a mapping with the group name of their property type. These groups include countries, addresses, emails, etc.

def triples( self, qualified: bool = True) -> Generator[Tuple[rdflib.term.Identifier, rdflib.term.Identifier, rdflib.term.Identifier], NoneType, NoneType]:
374    def triples(self, qualified: bool = True) -> Generator[Triple, None, None]:
375        """Serialise the entity into a set of RDF triple statements. The
376        statements include the property values, an ``RDF#type`` definition
377        that refers to the entity schema, and a ``SKOS#prefLabel`` with the
378        entity caption."""
379        if self.id is None or self.schema is None:
380            return
381        uri = registry.entity.rdf(self.id)
382        yield (uri, RDF.type, self.schema.uri)
383        if qualified:
384            caption = self.caption
385            if caption != self.schema.label:
386                yield (uri, SKOS.prefLabel, Literal(caption))
387        for prop, value in self.itervalues():
388            value = prop.type.rdf(value)
389            if qualified:
390                yield (uri, prop.uri, value)
391            else:
392                yield (uri, URIRef(prop.name), value)

Serialise the entity into a set of RDF triple statements. The statements include the property values, an RDF#type definition that refers to the entity schema, and a SKOS#prefLabel with the entity caption.

caption: str
394    @property
395    def caption(self) -> str:
396        """The user-facing label to be used for this entity. This checks a list
397        of properties defined by the schema (caption) and returns the first
398        available value. If no caption is available, return the schema label."""
399        for prop in self.schema.caption:
400            for value in self.get(prop):
401                return value
402        return self.schema.label

The user-facing label to be used for this entity. This checks a list of properties defined by the schema (caption) and returns the first available value. If no caption is available, return the schema label.

country_hints: Set[str]
404    @property
405    def country_hints(self) -> Set[str]:
406        """Some property types, such as phone numbers and IBAN codes imply a
407        country that may be associated with the entity. This list can be used
408        for a more generous matching approach than the actual country values."""
409        countries = set(self.countries)
410        if not len(countries):
411            for (prop, value) in self.itervalues():
412                hint = prop.type.country_hint(value)
413                if hint is not None:
414                    countries.add(hint)
415        return countries

Some property types, such as phone numbers and IBAN codes imply a country that may be associated with the entity. This list can be used for a more generous matching approach than the actual country values.

properties: Dict[str, List[str]]
417    @property
418    def properties(self) -> Dict[str, List[str]]:
419        """Return a mapping of the properties and set values of the entity."""
420        return {p: list(vs) for p, vs in self._properties.items()}

Return a mapping of the properties and set values of the entity.

def to_dict(self) -> Dict[str, Any]:
422    def to_dict(self) -> Dict[str, Any]:
423        """Serialise the proxy into a dictionary with the defined properties, ID,
424        schema and any contextual values that were handed in initially. The resulting
425        dictionary can be used to make a new proxy, and it is commonly written to disk
426        or a database."""
427        data = dict(self.context)
428        data.update(
429            {"id": self.id, "schema": self.schema.name, "properties": self.properties}
430        )
431        return data

Serialise the proxy into a dictionary with the defined properties, ID, schema and any contextual values that were handed in initially. The resulting dictionary can be used to make a new proxy, and it is commonly written to disk or a database.

def to_full_dict(self, matchable: bool = False) -> Dict[str, Any]:
433    def to_full_dict(self, matchable: bool = False) -> Dict[str, Any]:
434        """Return a serialised version of the entity with inverted type groups mixed
435        in. See :meth:`~get_type_inverted`."""
436        data = self.to_dict()
437        data.update(self.get_type_inverted(matchable=matchable))
438        return data

Return a serialised version of the entity with inverted type groups mixed in. See ~get_type_inverted().

def clone(self: ~E) -> ~E:
440    def clone(self: E) -> E:
441        """Make a deep copy of the current entity proxy."""
442        return self.__class__.from_dict(self.schema.model, self.to_dict())

Make a deep copy of the current entity proxy.

def merge(self: ~E, other: ~E) -> ~E:
444    def merge(self: E, other: E) -> E:
445        """Merge another entity proxy into this one. This will try and find
446        the common schema between both entities and then add all property
447        values from the other entity into this one."""
448        model = self.schema.model
449        self.id = self.id or other.id
450        try:
451            self.schema = model.common_schema(self.schema, other.schema)
452        except InvalidData as e:
453            msg = "Cannot merge entities with id %s: %s"
454            raise InvalidData(msg % (self.id, e))
455
456        self.context = merge_context(self.context, other.context)
457        for prop, values in other._properties.items():
458            self.add(prop, values, cleaned=True, quiet=True)
459        return self

Merge another entity proxy into this one. This will try and find the common schema between both entities and then add all property values from the other entity into this one.

@classmethod
def from_dict( cls: Type[~E], model: followthemoney.model.Model, data: Dict[str, Any], cleaned: bool = True) -> ~E:
489    @classmethod
490    def from_dict(
491        cls: Type[E],
492        model: "Model",
493        data: Dict[str, Any],
494        cleaned: bool = True,
495    ) -> E:
496        """Instantiate a proxy based on the given model and serialised dictionary.
497
498        Use :meth:`followthemoney.model.Model.get_proxy` instead."""
499        return cls(model, data, cleaned=cleaned)

Instantiate a proxy based on the given model and serialised dictionary.

Use followthemoney.model.Model.get_proxy() instead.