followthemoney.proxy
1import logging 2from typing import ( 3 TYPE_CHECKING, 4 Any, 5 Dict, 6 Generator, 7 List, 8 Optional, 9 Set, 10 Tuple, 11 Union, 12 Type, 13 TypeVar, 14 cast, 15) 16import warnings 17from itertools import product 18from banal import ensure_dict 19 20from followthemoney.exc import InvalidData 21from followthemoney.types import registry 22from followthemoney.types.common import PropertyType 23from followthemoney.property import Property 24from followthemoney.rdf import SKOS, RDF, Literal, URIRef, Identifier 25from followthemoney.util import sanitize_text, gettext 26from followthemoney.util import merge_context, value_list, make_entity_id 27 28if TYPE_CHECKING: 29 from followthemoney.model import Model 30 31log = logging.getLogger(__name__) 32P = Union[Property, str] 33Triple = Tuple[Identifier, Identifier, Identifier] 34E = TypeVar("E", bound="EntityProxy") 35 36 37class EntityProxy(object): 38 """A wrapper object for an entity, with utility functions for the 39 introspection and manipulation of its properties. 40 41 This is the main working object in the library, used to generate, validate 42 and emit data.""" 43 44 __slots__ = ["schema", "id", "key_prefix", "context", "_properties", "_size"] 45 46 def __init__( 47 self, 48 model: "Model", 49 data: Dict[str, Any], 50 key_prefix: Optional[str] = None, 51 cleaned: bool = True, 52 ): 53 data = dict(data or {}) 54 properties = data.pop("properties", {}) 55 if not cleaned: 56 properties = ensure_dict(properties) 57 58 #: The schema definition for this entity, which implies the properties 59 #: That can be set on it. 60 schema = model.get(data.pop("schema", None)) 61 if schema is None: 62 raise InvalidData(gettext("No schema for entity.")) 63 self.schema = schema 64 65 #: When using :meth:`~make_id` to generate a natural key for this entity, 66 #: the prefix will be added to the ID as a salt to make it easier to keep 67 #: IDs unique across datasets. This is somewhat redundant following the 68 #: introduction of :class:`~followthemoney.namespace.Namespace`. 69 self.key_prefix = key_prefix 70 71 #: A unique identifier for this entity, usually a hashed natural key, 72 #: a UUID, or a very simple slug. Can be signed using a 73 #: :class:`~followthemoney.namespace.Namespace`. 74 self.id = data.pop("id", None) 75 if not cleaned: 76 self.id = sanitize_text(self.id) 77 78 #: If the input dictionary for the entity proxy contains fields other 79 #: than ``id``, ``schema`` or ``properties``, they will be kept in here 80 #: and re-added upon serialization. 81 self.context = data 82 self._properties: Dict[str, List[str]] = {} 83 self._size = 0 84 85 for key, values in properties.items(): 86 if key not in self.schema.properties: 87 continue 88 if cleaned: 89 # This does not call `self.add` as it might be called millions of times 90 # in some context and we want to avoid the performance overhead of 91 # doing so. 92 seen: Set[str] = set() 93 seen_add = seen.add 94 unique_values = [v for v in values if not (v in seen or seen_add(v))] 95 self._properties[key] = unique_values 96 self._size += sum([len(v) for v in unique_values]) 97 else: 98 self.add(key, values, quiet=True) 99 100 def make_id(self, *parts: Any) -> Optional[str]: 101 """Generate a (hopefully unique) ID for the given entity, composed 102 of the given components, and the :attr:`~key_prefix` defined in 103 the proxy. 104 """ 105 self.id = make_entity_id(*parts, key_prefix=self.key_prefix) 106 return self.id 107 108 def _prop_name(self, prop: P, quiet: bool = False) -> Optional[str]: 109 # This is pretty unwound because it gets called a *lot*. 110 if prop in self.schema.properties: 111 return cast(str, prop) 112 try: 113 obj = cast(Property, prop) 114 if obj.name in self.schema.properties: 115 return obj.name 116 except AttributeError: 117 pass 118 if quiet: 119 return None 120 msg = gettext("Unknown property (%s): %s") 121 raise InvalidData(msg % (self.schema, prop)) 122 123 def get(self, prop: P, quiet: bool = False) -> List[str]: 124 """Get all values of a property. 125 126 :param prop: can be given as a name or an instance of 127 :class:`~followthemoney.property.Property`. 128 :param quiet: a reference to an non-existent property will return 129 an empty list instead of raising an error. 130 :return: A list of values. 131 """ 132 prop_name = self._prop_name(prop, quiet=quiet) 133 if prop_name is None: 134 return [] 135 return self._properties.get(prop_name, []) 136 137 def first(self, prop: P, quiet: bool = False) -> Optional[str]: 138 """Get only the first value set for the property. 139 140 :param prop: can be given as a name or an instance of 141 :class:`~followthemoney.property.Property`. 142 :param quiet: a reference to an non-existent property will return 143 an empty list instead of raising an error. 144 :return: A value, or ``None``. 145 """ 146 for value in self.get(prop, quiet=quiet): 147 return value 148 return None 149 150 def has(self, prop: P, quiet: bool = False) -> bool: 151 """Check to see if the given property has at least one value set. 152 153 :param prop: can be given as a name or an instance of 154 :class:`~followthemoney.property.Property`. 155 :param quiet: a reference to an non-existent property will return 156 an empty list instead of raising an error. 157 :return: a boolean. 158 """ 159 prop_name = self._prop_name(prop, quiet=quiet) 160 return prop_name in self._properties 161 162 def add( 163 self, 164 prop: P, 165 values: Any, 166 cleaned: bool = False, 167 quiet: bool = False, 168 fuzzy: bool = False, 169 format: Optional[str] = None, 170 ) -> None: 171 """Add the given value(s) to the property if they are valid for 172 the type of the property. 173 174 :param prop: can be given as a name or an instance of 175 :class:`~followthemoney.property.Property`. 176 :param values: either a single value, or a list of values to be added. 177 :param cleaned: should the data be normalised before adding it. 178 :param quiet: a reference to an non-existent property will return 179 an empty list instead of raising an error. 180 :param fuzzy: when normalising the data, should fuzzy matching be allowed. 181 :param format: when normalising the data, formatting for a date. 182 """ 183 prop_name = self._prop_name(prop, quiet=quiet) 184 if prop_name is None: 185 return None 186 prop = self.schema.properties[prop_name] 187 188 # Don't allow setting the reverse properties: 189 if prop.stub: 190 if quiet: 191 return None 192 msg = gettext("Stub property (%s): %s") 193 raise InvalidData(msg % (self.schema, prop)) 194 195 for value in value_list(values): 196 if not cleaned: 197 value = prop.type.clean(value, proxy=self, fuzzy=fuzzy, format=format) 198 self.unsafe_add(prop, value, cleaned=True) 199 return None 200 201 def unsafe_add( 202 self, 203 prop: Property, 204 value: Optional[str], 205 cleaned: bool = False, 206 fuzzy: bool = False, 207 format: Optional[str] = None, 208 ) -> None: 209 """A version of `add()` to be used only in type-checking code. This accepts 210 only a single value, and performs input cleaning on the premise that the 211 value is already valid unicode.""" 212 if not cleaned and value is not None: 213 value = prop.type.clean_text(value, fuzzy=fuzzy, format=format, proxy=self) 214 if value is not None: 215 # Somewhat hacky: limit the maximum size of any particular 216 # field to avoid overloading upstream aleph/elasticsearch. 217 value_size = len(value) 218 if prop.type.max_size is not None: 219 if self._size + value_size > prop.type.max_size: 220 # msg = "[%s] too large. Rejecting additional values." 221 # log.warning(msg, prop.name) 222 return None 223 self._size += value_size 224 self._properties.setdefault(prop.name, list()) 225 if value not in self._properties[prop.name]: 226 self._properties[prop.name].append(value) 227 return None 228 229 def set( 230 self, 231 prop: P, 232 values: Any, 233 cleaned: bool = False, 234 quiet: bool = False, 235 fuzzy: bool = False, 236 format: Optional[str] = None, 237 ) -> None: 238 """Replace the values of the property with the given value(s). 239 240 :param prop: can be given as a name or an instance of 241 :class:`~followthemoney.property.Property`. 242 :param values: either a single value, or a list of values to be added. 243 :param cleaned: should the data be normalised before adding it. 244 :param quiet: a reference to an non-existent property will return 245 an empty list instead of raising an error. 246 """ 247 prop_name = self._prop_name(prop, quiet=quiet) 248 if prop_name is None: 249 return 250 self._properties.pop(prop_name, None) 251 return self.add( 252 prop, values, cleaned=cleaned, quiet=quiet, fuzzy=fuzzy, format=format 253 ) 254 255 def pop(self, prop: P, quiet: bool = True) -> List[str]: 256 """Remove all the values from the given property and return them. 257 258 :param prop: can be given as a name or an instance of 259 :class:`~followthemoney.property.Property`. 260 :param quiet: a reference to an non-existent property will return 261 an empty list instead of raising an error. 262 :return: a list of values, possibly empty. 263 """ 264 prop_name = self._prop_name(prop, quiet=quiet) 265 if prop_name is None or prop_name not in self._properties: 266 return [] 267 return list(self._properties.pop(prop_name)) 268 269 def remove(self, prop: P, value: str, quiet: bool = True) -> None: 270 """Remove a single value from the given property. If it is not there, 271 no action takes place. 272 273 :param prop: can be given as a name or an instance of 274 :class:`~followthemoney.property.Property`. 275 :param value: will not be cleaned before checking. 276 :param quiet: a reference to an non-existent property will return 277 an empty list instead of raising an error. 278 """ 279 prop_name = self._prop_name(prop, quiet=quiet) 280 if prop_name is not None and prop_name in self._properties: 281 try: 282 self._properties[prop_name].remove(value) 283 except (KeyError, ValueError): 284 pass 285 286 def iterprops(self) -> List[Property]: 287 """Iterate across all the properties for which a value is set in 288 the proxy (but do not return their values).""" 289 return [self.schema.properties[p] for p in self._properties.keys()] 290 291 def itervalues(self) -> Generator[Tuple[Property, str], None, None]: 292 """Iterate across all values in the proxy one by one, each given as a 293 tuple of the property and the value.""" 294 for name, values in self._properties.items(): 295 prop = self.schema.properties[name] 296 for value in values: 297 yield (prop, value) 298 299 def edgepairs(self) -> Generator[Tuple[str, str], None, None]: 300 """Return all the possible pairs of values for the edge source and target if 301 the schema allows for an edge representation of the entity.""" 302 if self.schema.source_prop is not None and self.schema.target_prop is not None: 303 sources = self.get(self.schema.source_prop) 304 targets = self.get(self.schema.target_prop) 305 for (source, target) in product(sources, targets): 306 yield (source, target) 307 308 def get_type_values( 309 self, type_: PropertyType, matchable: bool = False 310 ) -> List[str]: 311 """All values of a particular type associated with a the entity. For 312 example, this lets you return all countries linked to an entity, rather 313 than manually checking each property to see if it contains countries. 314 315 :param type_: The type object to be searched. 316 :param matchable: Whether to return only property values marked as matchable. 317 """ 318 combined = set() 319 for prop_name, values in self._properties.items(): 320 prop = self.schema.properties[prop_name] 321 if matchable and not prop.matchable: 322 continue 323 if prop.type == type_: 324 combined.update(values) 325 return list(combined) 326 327 @property 328 def names(self) -> List[str]: 329 """Get the set of all name-type values set of the entity.""" 330 return self.get_type_values(registry.name) 331 332 @property 333 def countries(self) -> List[str]: 334 """Get the set of all country-type values set of the entity.""" 335 return self.get_type_values(registry.country) 336 337 @property 338 def temporal_start(self) -> Optional[Tuple[Property, str]]: 339 """Get a date that can be used to represent the start of the entity in a 340 timeline. If there are multiple possible dates, the earliest date is 341 returned.""" 342 values = [] 343 344 for prop in self.schema.temporal_start_props: 345 values += [(prop, value) for value in self.get(prop.name)] 346 347 values.sort(key=lambda tuple: tuple[1]) 348 return next(iter(values), None) 349 350 @property 351 def temporal_end(self) -> Optional[Tuple[Property, str]]: 352 """Get a date that can be used to represent the end of the entity in a timeline. 353 If therer are multiple possible dates, the latest date is returned.""" 354 values = [] 355 356 for prop in self.schema.temporal_end_props: 357 values += [(prop, value) for value in self.get(prop.name)] 358 359 values.sort(reverse=True, key=lambda tuple: tuple[1]) 360 return next(iter(values), None) 361 362 def get_type_inverted(self, matchable: bool = False) -> Dict[str, List[str]]: 363 """Return all the values of the entity arranged into a mapping with the 364 group name of their property type. These groups include ``countries``, 365 ``addresses``, ``emails``, etc.""" 366 data: Dict[str, List[str]] = {} 367 for group, type_ in registry.groups.items(): 368 values = self.get_type_values(type_, matchable=matchable) 369 if len(values): 370 data[group] = values 371 return data 372 373 def triples(self, qualified: bool = True) -> Generator[Triple, None, None]: 374 """Serialise the entity into a set of RDF triple statements. The 375 statements include the property values, an ``RDF#type`` definition 376 that refers to the entity schema, and a ``SKOS#prefLabel`` with the 377 entity caption.""" 378 if self.id is None or self.schema is None: 379 return 380 uri = registry.entity.rdf(self.id) 381 yield (uri, RDF.type, self.schema.uri) 382 if qualified: 383 caption = self.caption 384 if caption != self.schema.label: 385 yield (uri, SKOS.prefLabel, Literal(caption)) 386 for prop, value in self.itervalues(): 387 value = prop.type.rdf(value) 388 if qualified: 389 yield (uri, prop.uri, value) 390 else: 391 yield (uri, URIRef(prop.name), value) 392 393 @property 394 def caption(self) -> str: 395 """The user-facing label to be used for this entity. This checks a list 396 of properties defined by the schema (caption) and returns the first 397 available value. If no caption is available, return the schema label.""" 398 for prop in self.schema.caption: 399 for value in self.get(prop): 400 return value 401 return self.schema.label 402 403 @property 404 def country_hints(self) -> Set[str]: 405 """Some property types, such as phone numbers and IBAN codes imply a 406 country that may be associated with the entity. This list can be used 407 for a more generous matching approach than the actual country values.""" 408 countries = set(self.countries) 409 if not len(countries): 410 for (prop, value) in self.itervalues(): 411 hint = prop.type.country_hint(value) 412 if hint is not None: 413 countries.add(hint) 414 return countries 415 416 @property 417 def properties(self) -> Dict[str, List[str]]: 418 """Return a mapping of the properties and set values of the entity.""" 419 return {p: list(vs) for p, vs in self._properties.items()} 420 421 def to_dict(self) -> Dict[str, Any]: 422 """Serialise the proxy into a dictionary with the defined properties, ID, 423 schema and any contextual values that were handed in initially. The resulting 424 dictionary can be used to make a new proxy, and it is commonly written to disk 425 or a database.""" 426 data = dict(self.context) 427 data.update( 428 {"id": self.id, "schema": self.schema.name, "properties": self.properties} 429 ) 430 return data 431 432 def to_full_dict(self, matchable: bool = False) -> Dict[str, Any]: 433 """Return a serialised version of the entity with inverted type groups mixed 434 in. See :meth:`~get_type_inverted`.""" 435 data = self.to_dict() 436 data.update(self.get_type_inverted(matchable=matchable)) 437 return data 438 439 def clone(self: E) -> E: 440 """Make a deep copy of the current entity proxy.""" 441 return self.__class__.from_dict(self.schema.model, self.to_dict()) 442 443 def merge(self: E, other: E) -> E: 444 """Merge another entity proxy into this one. This will try and find 445 the common schema between both entities and then add all property 446 values from the other entity into this one.""" 447 model = self.schema.model 448 self.id = self.id or other.id 449 try: 450 self.schema = model.common_schema(self.schema, other.schema) 451 except InvalidData as e: 452 msg = "Cannot merge entities with id %s: %s" 453 raise InvalidData(msg % (self.id, e)) 454 455 self.context = merge_context(self.context, other.context) 456 for prop, values in other._properties.items(): 457 self.add(prop, values, cleaned=True, quiet=True) 458 return self 459 460 def __str__(self) -> str: 461 return self.caption 462 463 def __repr__(self) -> str: 464 return "<E(%r,%r)>" % (self.id, str(self)) 465 466 def __len__(self) -> int: 467 return self._size 468 469 def __hash__(self) -> int: 470 if not self.id: 471 warnings.warn( 472 "Hashing an EntityProxy without an ID results in undefined behaviour", 473 RuntimeWarning, 474 ) 475 return hash(self.id) 476 477 def __eq__(self, other: Any) -> bool: 478 try: 479 if self.id is None or other.id is None: 480 warnings.warn( 481 "Comparing EntityProxys without IDs results in undefined behaviour", 482 RuntimeWarning, 483 ) 484 return bool(self.id == other.id) 485 except AttributeError: 486 return False 487 488 @classmethod 489 def from_dict( 490 cls: Type[E], 491 model: "Model", 492 data: Dict[str, Any], 493 cleaned: bool = True, 494 ) -> E: 495 """Instantiate a proxy based on the given model and serialised dictionary. 496 497 Use :meth:`followthemoney.model.Model.get_proxy` instead.""" 498 return cls(model, data, cleaned=cleaned)
38class EntityProxy(object): 39 """A wrapper object for an entity, with utility functions for the 40 introspection and manipulation of its properties. 41 42 This is the main working object in the library, used to generate, validate 43 and emit data.""" 44 45 __slots__ = ["schema", "id", "key_prefix", "context", "_properties", "_size"] 46 47 def __init__( 48 self, 49 model: "Model", 50 data: Dict[str, Any], 51 key_prefix: Optional[str] = None, 52 cleaned: bool = True, 53 ): 54 data = dict(data or {}) 55 properties = data.pop("properties", {}) 56 if not cleaned: 57 properties = ensure_dict(properties) 58 59 #: The schema definition for this entity, which implies the properties 60 #: That can be set on it. 61 schema = model.get(data.pop("schema", None)) 62 if schema is None: 63 raise InvalidData(gettext("No schema for entity.")) 64 self.schema = schema 65 66 #: When using :meth:`~make_id` to generate a natural key for this entity, 67 #: the prefix will be added to the ID as a salt to make it easier to keep 68 #: IDs unique across datasets. This is somewhat redundant following the 69 #: introduction of :class:`~followthemoney.namespace.Namespace`. 70 self.key_prefix = key_prefix 71 72 #: A unique identifier for this entity, usually a hashed natural key, 73 #: a UUID, or a very simple slug. Can be signed using a 74 #: :class:`~followthemoney.namespace.Namespace`. 75 self.id = data.pop("id", None) 76 if not cleaned: 77 self.id = sanitize_text(self.id) 78 79 #: If the input dictionary for the entity proxy contains fields other 80 #: than ``id``, ``schema`` or ``properties``, they will be kept in here 81 #: and re-added upon serialization. 82 self.context = data 83 self._properties: Dict[str, List[str]] = {} 84 self._size = 0 85 86 for key, values in properties.items(): 87 if key not in self.schema.properties: 88 continue 89 if cleaned: 90 # This does not call `self.add` as it might be called millions of times 91 # in some context and we want to avoid the performance overhead of 92 # doing so. 93 seen: Set[str] = set() 94 seen_add = seen.add 95 unique_values = [v for v in values if not (v in seen or seen_add(v))] 96 self._properties[key] = unique_values 97 self._size += sum([len(v) for v in unique_values]) 98 else: 99 self.add(key, values, quiet=True) 100 101 def make_id(self, *parts: Any) -> Optional[str]: 102 """Generate a (hopefully unique) ID for the given entity, composed 103 of the given components, and the :attr:`~key_prefix` defined in 104 the proxy. 105 """ 106 self.id = make_entity_id(*parts, key_prefix=self.key_prefix) 107 return self.id 108 109 def _prop_name(self, prop: P, quiet: bool = False) -> Optional[str]: 110 # This is pretty unwound because it gets called a *lot*. 111 if prop in self.schema.properties: 112 return cast(str, prop) 113 try: 114 obj = cast(Property, prop) 115 if obj.name in self.schema.properties: 116 return obj.name 117 except AttributeError: 118 pass 119 if quiet: 120 return None 121 msg = gettext("Unknown property (%s): %s") 122 raise InvalidData(msg % (self.schema, prop)) 123 124 def get(self, prop: P, quiet: bool = False) -> List[str]: 125 """Get all values of a property. 126 127 :param prop: can be given as a name or an instance of 128 :class:`~followthemoney.property.Property`. 129 :param quiet: a reference to an non-existent property will return 130 an empty list instead of raising an error. 131 :return: A list of values. 132 """ 133 prop_name = self._prop_name(prop, quiet=quiet) 134 if prop_name is None: 135 return [] 136 return self._properties.get(prop_name, []) 137 138 def first(self, prop: P, quiet: bool = False) -> Optional[str]: 139 """Get only the first value set for the property. 140 141 :param prop: can be given as a name or an instance of 142 :class:`~followthemoney.property.Property`. 143 :param quiet: a reference to an non-existent property will return 144 an empty list instead of raising an error. 145 :return: A value, or ``None``. 146 """ 147 for value in self.get(prop, quiet=quiet): 148 return value 149 return None 150 151 def has(self, prop: P, quiet: bool = False) -> bool: 152 """Check to see if the given property has at least one value set. 153 154 :param prop: can be given as a name or an instance of 155 :class:`~followthemoney.property.Property`. 156 :param quiet: a reference to an non-existent property will return 157 an empty list instead of raising an error. 158 :return: a boolean. 159 """ 160 prop_name = self._prop_name(prop, quiet=quiet) 161 return prop_name in self._properties 162 163 def add( 164 self, 165 prop: P, 166 values: Any, 167 cleaned: bool = False, 168 quiet: bool = False, 169 fuzzy: bool = False, 170 format: Optional[str] = None, 171 ) -> None: 172 """Add the given value(s) to the property if they are valid for 173 the type of the property. 174 175 :param prop: can be given as a name or an instance of 176 :class:`~followthemoney.property.Property`. 177 :param values: either a single value, or a list of values to be added. 178 :param cleaned: should the data be normalised before adding it. 179 :param quiet: a reference to an non-existent property will return 180 an empty list instead of raising an error. 181 :param fuzzy: when normalising the data, should fuzzy matching be allowed. 182 :param format: when normalising the data, formatting for a date. 183 """ 184 prop_name = self._prop_name(prop, quiet=quiet) 185 if prop_name is None: 186 return None 187 prop = self.schema.properties[prop_name] 188 189 # Don't allow setting the reverse properties: 190 if prop.stub: 191 if quiet: 192 return None 193 msg = gettext("Stub property (%s): %s") 194 raise InvalidData(msg % (self.schema, prop)) 195 196 for value in value_list(values): 197 if not cleaned: 198 value = prop.type.clean(value, proxy=self, fuzzy=fuzzy, format=format) 199 self.unsafe_add(prop, value, cleaned=True) 200 return None 201 202 def unsafe_add( 203 self, 204 prop: Property, 205 value: Optional[str], 206 cleaned: bool = False, 207 fuzzy: bool = False, 208 format: Optional[str] = None, 209 ) -> None: 210 """A version of `add()` to be used only in type-checking code. This accepts 211 only a single value, and performs input cleaning on the premise that the 212 value is already valid unicode.""" 213 if not cleaned and value is not None: 214 value = prop.type.clean_text(value, fuzzy=fuzzy, format=format, proxy=self) 215 if value is not None: 216 # Somewhat hacky: limit the maximum size of any particular 217 # field to avoid overloading upstream aleph/elasticsearch. 218 value_size = len(value) 219 if prop.type.max_size is not None: 220 if self._size + value_size > prop.type.max_size: 221 # msg = "[%s] too large. Rejecting additional values." 222 # log.warning(msg, prop.name) 223 return None 224 self._size += value_size 225 self._properties.setdefault(prop.name, list()) 226 if value not in self._properties[prop.name]: 227 self._properties[prop.name].append(value) 228 return None 229 230 def set( 231 self, 232 prop: P, 233 values: Any, 234 cleaned: bool = False, 235 quiet: bool = False, 236 fuzzy: bool = False, 237 format: Optional[str] = None, 238 ) -> None: 239 """Replace the values of the property with the given value(s). 240 241 :param prop: can be given as a name or an instance of 242 :class:`~followthemoney.property.Property`. 243 :param values: either a single value, or a list of values to be added. 244 :param cleaned: should the data be normalised before adding it. 245 :param quiet: a reference to an non-existent property will return 246 an empty list instead of raising an error. 247 """ 248 prop_name = self._prop_name(prop, quiet=quiet) 249 if prop_name is None: 250 return 251 self._properties.pop(prop_name, None) 252 return self.add( 253 prop, values, cleaned=cleaned, quiet=quiet, fuzzy=fuzzy, format=format 254 ) 255 256 def pop(self, prop: P, quiet: bool = True) -> List[str]: 257 """Remove all the values from the given property and return them. 258 259 :param prop: can be given as a name or an instance of 260 :class:`~followthemoney.property.Property`. 261 :param quiet: a reference to an non-existent property will return 262 an empty list instead of raising an error. 263 :return: a list of values, possibly empty. 264 """ 265 prop_name = self._prop_name(prop, quiet=quiet) 266 if prop_name is None or prop_name not in self._properties: 267 return [] 268 return list(self._properties.pop(prop_name)) 269 270 def remove(self, prop: P, value: str, quiet: bool = True) -> None: 271 """Remove a single value from the given property. If it is not there, 272 no action takes place. 273 274 :param prop: can be given as a name or an instance of 275 :class:`~followthemoney.property.Property`. 276 :param value: will not be cleaned before checking. 277 :param quiet: a reference to an non-existent property will return 278 an empty list instead of raising an error. 279 """ 280 prop_name = self._prop_name(prop, quiet=quiet) 281 if prop_name is not None and prop_name in self._properties: 282 try: 283 self._properties[prop_name].remove(value) 284 except (KeyError, ValueError): 285 pass 286 287 def iterprops(self) -> List[Property]: 288 """Iterate across all the properties for which a value is set in 289 the proxy (but do not return their values).""" 290 return [self.schema.properties[p] for p in self._properties.keys()] 291 292 def itervalues(self) -> Generator[Tuple[Property, str], None, None]: 293 """Iterate across all values in the proxy one by one, each given as a 294 tuple of the property and the value.""" 295 for name, values in self._properties.items(): 296 prop = self.schema.properties[name] 297 for value in values: 298 yield (prop, value) 299 300 def edgepairs(self) -> Generator[Tuple[str, str], None, None]: 301 """Return all the possible pairs of values for the edge source and target if 302 the schema allows for an edge representation of the entity.""" 303 if self.schema.source_prop is not None and self.schema.target_prop is not None: 304 sources = self.get(self.schema.source_prop) 305 targets = self.get(self.schema.target_prop) 306 for (source, target) in product(sources, targets): 307 yield (source, target) 308 309 def get_type_values( 310 self, type_: PropertyType, matchable: bool = False 311 ) -> List[str]: 312 """All values of a particular type associated with a the entity. For 313 example, this lets you return all countries linked to an entity, rather 314 than manually checking each property to see if it contains countries. 315 316 :param type_: The type object to be searched. 317 :param matchable: Whether to return only property values marked as matchable. 318 """ 319 combined = set() 320 for prop_name, values in self._properties.items(): 321 prop = self.schema.properties[prop_name] 322 if matchable and not prop.matchable: 323 continue 324 if prop.type == type_: 325 combined.update(values) 326 return list(combined) 327 328 @property 329 def names(self) -> List[str]: 330 """Get the set of all name-type values set of the entity.""" 331 return self.get_type_values(registry.name) 332 333 @property 334 def countries(self) -> List[str]: 335 """Get the set of all country-type values set of the entity.""" 336 return self.get_type_values(registry.country) 337 338 @property 339 def temporal_start(self) -> Optional[Tuple[Property, str]]: 340 """Get a date that can be used to represent the start of the entity in a 341 timeline. If there are multiple possible dates, the earliest date is 342 returned.""" 343 values = [] 344 345 for prop in self.schema.temporal_start_props: 346 values += [(prop, value) for value in self.get(prop.name)] 347 348 values.sort(key=lambda tuple: tuple[1]) 349 return next(iter(values), None) 350 351 @property 352 def temporal_end(self) -> Optional[Tuple[Property, str]]: 353 """Get a date that can be used to represent the end of the entity in a timeline. 354 If therer are multiple possible dates, the latest date is returned.""" 355 values = [] 356 357 for prop in self.schema.temporal_end_props: 358 values += [(prop, value) for value in self.get(prop.name)] 359 360 values.sort(reverse=True, key=lambda tuple: tuple[1]) 361 return next(iter(values), None) 362 363 def get_type_inverted(self, matchable: bool = False) -> Dict[str, List[str]]: 364 """Return all the values of the entity arranged into a mapping with the 365 group name of their property type. These groups include ``countries``, 366 ``addresses``, ``emails``, etc.""" 367 data: Dict[str, List[str]] = {} 368 for group, type_ in registry.groups.items(): 369 values = self.get_type_values(type_, matchable=matchable) 370 if len(values): 371 data[group] = values 372 return data 373 374 def triples(self, qualified: bool = True) -> Generator[Triple, None, None]: 375 """Serialise the entity into a set of RDF triple statements. The 376 statements include the property values, an ``RDF#type`` definition 377 that refers to the entity schema, and a ``SKOS#prefLabel`` with the 378 entity caption.""" 379 if self.id is None or self.schema is None: 380 return 381 uri = registry.entity.rdf(self.id) 382 yield (uri, RDF.type, self.schema.uri) 383 if qualified: 384 caption = self.caption 385 if caption != self.schema.label: 386 yield (uri, SKOS.prefLabel, Literal(caption)) 387 for prop, value in self.itervalues(): 388 value = prop.type.rdf(value) 389 if qualified: 390 yield (uri, prop.uri, value) 391 else: 392 yield (uri, URIRef(prop.name), value) 393 394 @property 395 def caption(self) -> str: 396 """The user-facing label to be used for this entity. This checks a list 397 of properties defined by the schema (caption) and returns the first 398 available value. If no caption is available, return the schema label.""" 399 for prop in self.schema.caption: 400 for value in self.get(prop): 401 return value 402 return self.schema.label 403 404 @property 405 def country_hints(self) -> Set[str]: 406 """Some property types, such as phone numbers and IBAN codes imply a 407 country that may be associated with the entity. This list can be used 408 for a more generous matching approach than the actual country values.""" 409 countries = set(self.countries) 410 if not len(countries): 411 for (prop, value) in self.itervalues(): 412 hint = prop.type.country_hint(value) 413 if hint is not None: 414 countries.add(hint) 415 return countries 416 417 @property 418 def properties(self) -> Dict[str, List[str]]: 419 """Return a mapping of the properties and set values of the entity.""" 420 return {p: list(vs) for p, vs in self._properties.items()} 421 422 def to_dict(self) -> Dict[str, Any]: 423 """Serialise the proxy into a dictionary with the defined properties, ID, 424 schema and any contextual values that were handed in initially. The resulting 425 dictionary can be used to make a new proxy, and it is commonly written to disk 426 or a database.""" 427 data = dict(self.context) 428 data.update( 429 {"id": self.id, "schema": self.schema.name, "properties": self.properties} 430 ) 431 return data 432 433 def to_full_dict(self, matchable: bool = False) -> Dict[str, Any]: 434 """Return a serialised version of the entity with inverted type groups mixed 435 in. See :meth:`~get_type_inverted`.""" 436 data = self.to_dict() 437 data.update(self.get_type_inverted(matchable=matchable)) 438 return data 439 440 def clone(self: E) -> E: 441 """Make a deep copy of the current entity proxy.""" 442 return self.__class__.from_dict(self.schema.model, self.to_dict()) 443 444 def merge(self: E, other: E) -> E: 445 """Merge another entity proxy into this one. This will try and find 446 the common schema between both entities and then add all property 447 values from the other entity into this one.""" 448 model = self.schema.model 449 self.id = self.id or other.id 450 try: 451 self.schema = model.common_schema(self.schema, other.schema) 452 except InvalidData as e: 453 msg = "Cannot merge entities with id %s: %s" 454 raise InvalidData(msg % (self.id, e)) 455 456 self.context = merge_context(self.context, other.context) 457 for prop, values in other._properties.items(): 458 self.add(prop, values, cleaned=True, quiet=True) 459 return self 460 461 def __str__(self) -> str: 462 return self.caption 463 464 def __repr__(self) -> str: 465 return "<E(%r,%r)>" % (self.id, str(self)) 466 467 def __len__(self) -> int: 468 return self._size 469 470 def __hash__(self) -> int: 471 if not self.id: 472 warnings.warn( 473 "Hashing an EntityProxy without an ID results in undefined behaviour", 474 RuntimeWarning, 475 ) 476 return hash(self.id) 477 478 def __eq__(self, other: Any) -> bool: 479 try: 480 if self.id is None or other.id is None: 481 warnings.warn( 482 "Comparing EntityProxys without IDs results in undefined behaviour", 483 RuntimeWarning, 484 ) 485 return bool(self.id == other.id) 486 except AttributeError: 487 return False 488 489 @classmethod 490 def from_dict( 491 cls: Type[E], 492 model: "Model", 493 data: Dict[str, Any], 494 cleaned: bool = True, 495 ) -> E: 496 """Instantiate a proxy based on the given model and serialised dictionary. 497 498 Use :meth:`followthemoney.model.Model.get_proxy` instead.""" 499 return cls(model, data, cleaned=cleaned)
A wrapper object for an entity, with utility functions for the introspection and manipulation of its properties.
This is the main working object in the library, used to generate, validate and emit data.
47 def __init__( 48 self, 49 model: "Model", 50 data: Dict[str, Any], 51 key_prefix: Optional[str] = None, 52 cleaned: bool = True, 53 ): 54 data = dict(data or {}) 55 properties = data.pop("properties", {}) 56 if not cleaned: 57 properties = ensure_dict(properties) 58 59 #: The schema definition for this entity, which implies the properties 60 #: That can be set on it. 61 schema = model.get(data.pop("schema", None)) 62 if schema is None: 63 raise InvalidData(gettext("No schema for entity.")) 64 self.schema = schema 65 66 #: When using :meth:`~make_id` to generate a natural key for this entity, 67 #: the prefix will be added to the ID as a salt to make it easier to keep 68 #: IDs unique across datasets. This is somewhat redundant following the 69 #: introduction of :class:`~followthemoney.namespace.Namespace`. 70 self.key_prefix = key_prefix 71 72 #: A unique identifier for this entity, usually a hashed natural key, 73 #: a UUID, or a very simple slug. Can be signed using a 74 #: :class:`~followthemoney.namespace.Namespace`. 75 self.id = data.pop("id", None) 76 if not cleaned: 77 self.id = sanitize_text(self.id) 78 79 #: If the input dictionary for the entity proxy contains fields other 80 #: than ``id``, ``schema`` or ``properties``, they will be kept in here 81 #: and re-added upon serialization. 82 self.context = data 83 self._properties: Dict[str, List[str]] = {} 84 self._size = 0 85 86 for key, values in properties.items(): 87 if key not in self.schema.properties: 88 continue 89 if cleaned: 90 # This does not call `self.add` as it might be called millions of times 91 # in some context and we want to avoid the performance overhead of 92 # doing so. 93 seen: Set[str] = set() 94 seen_add = seen.add 95 unique_values = [v for v in values if not (v in seen or seen_add(v))] 96 self._properties[key] = unique_values 97 self._size += sum([len(v) for v in unique_values]) 98 else: 99 self.add(key, values, quiet=True)
101 def make_id(self, *parts: Any) -> Optional[str]: 102 """Generate a (hopefully unique) ID for the given entity, composed 103 of the given components, and the :attr:`~key_prefix` defined in 104 the proxy. 105 """ 106 self.id = make_entity_id(*parts, key_prefix=self.key_prefix) 107 return self.id
Generate a (hopefully unique) ID for the given entity, composed
of the given components, and the ~key_prefix
defined in
the proxy.
124 def get(self, prop: P, quiet: bool = False) -> List[str]: 125 """Get all values of a property. 126 127 :param prop: can be given as a name or an instance of 128 :class:`~followthemoney.property.Property`. 129 :param quiet: a reference to an non-existent property will return 130 an empty list instead of raising an error. 131 :return: A list of values. 132 """ 133 prop_name = self._prop_name(prop, quiet=quiet) 134 if prop_name is None: 135 return [] 136 return self._properties.get(prop_name, [])
Get all values of a property.
Parameters
- prop: can be given as a name or an instance of
~followthemoney.property.Property
. - quiet: a reference to an non-existent property will return an empty list instead of raising an error.
Returns
A list of values.
138 def first(self, prop: P, quiet: bool = False) -> Optional[str]: 139 """Get only the first value set for the property. 140 141 :param prop: can be given as a name or an instance of 142 :class:`~followthemoney.property.Property`. 143 :param quiet: a reference to an non-existent property will return 144 an empty list instead of raising an error. 145 :return: A value, or ``None``. 146 """ 147 for value in self.get(prop, quiet=quiet): 148 return value 149 return None
Get only the first value set for the property.
Parameters
- prop: can be given as a name or an instance of
~followthemoney.property.Property
. - quiet: a reference to an non-existent property will return an empty list instead of raising an error.
Returns
A value, or
None
.
151 def has(self, prop: P, quiet: bool = False) -> bool: 152 """Check to see if the given property has at least one value set. 153 154 :param prop: can be given as a name or an instance of 155 :class:`~followthemoney.property.Property`. 156 :param quiet: a reference to an non-existent property will return 157 an empty list instead of raising an error. 158 :return: a boolean. 159 """ 160 prop_name = self._prop_name(prop, quiet=quiet) 161 return prop_name in self._properties
Check to see if the given property has at least one value set.
Parameters
- prop: can be given as a name or an instance of
~followthemoney.property.Property
. - quiet: a reference to an non-existent property will return an empty list instead of raising an error.
Returns
a boolean.
163 def add( 164 self, 165 prop: P, 166 values: Any, 167 cleaned: bool = False, 168 quiet: bool = False, 169 fuzzy: bool = False, 170 format: Optional[str] = None, 171 ) -> None: 172 """Add the given value(s) to the property if they are valid for 173 the type of the property. 174 175 :param prop: can be given as a name or an instance of 176 :class:`~followthemoney.property.Property`. 177 :param values: either a single value, or a list of values to be added. 178 :param cleaned: should the data be normalised before adding it. 179 :param quiet: a reference to an non-existent property will return 180 an empty list instead of raising an error. 181 :param fuzzy: when normalising the data, should fuzzy matching be allowed. 182 :param format: when normalising the data, formatting for a date. 183 """ 184 prop_name = self._prop_name(prop, quiet=quiet) 185 if prop_name is None: 186 return None 187 prop = self.schema.properties[prop_name] 188 189 # Don't allow setting the reverse properties: 190 if prop.stub: 191 if quiet: 192 return None 193 msg = gettext("Stub property (%s): %s") 194 raise InvalidData(msg % (self.schema, prop)) 195 196 for value in value_list(values): 197 if not cleaned: 198 value = prop.type.clean(value, proxy=self, fuzzy=fuzzy, format=format) 199 self.unsafe_add(prop, value, cleaned=True) 200 return None
Add the given value(s) to the property if they are valid for the type of the property.
Parameters
- prop: can be given as a name or an instance of
~followthemoney.property.Property
. - values: either a single value, or a list of values to be added.
- cleaned: should the data be normalised before adding it.
- quiet: a reference to an non-existent property will return an empty list instead of raising an error.
- fuzzy: when normalising the data, should fuzzy matching be allowed.
- format: when normalising the data, formatting for a date.
202 def unsafe_add( 203 self, 204 prop: Property, 205 value: Optional[str], 206 cleaned: bool = False, 207 fuzzy: bool = False, 208 format: Optional[str] = None, 209 ) -> None: 210 """A version of `add()` to be used only in type-checking code. This accepts 211 only a single value, and performs input cleaning on the premise that the 212 value is already valid unicode.""" 213 if not cleaned and value is not None: 214 value = prop.type.clean_text(value, fuzzy=fuzzy, format=format, proxy=self) 215 if value is not None: 216 # Somewhat hacky: limit the maximum size of any particular 217 # field to avoid overloading upstream aleph/elasticsearch. 218 value_size = len(value) 219 if prop.type.max_size is not None: 220 if self._size + value_size > prop.type.max_size: 221 # msg = "[%s] too large. Rejecting additional values." 222 # log.warning(msg, prop.name) 223 return None 224 self._size += value_size 225 self._properties.setdefault(prop.name, list()) 226 if value not in self._properties[prop.name]: 227 self._properties[prop.name].append(value) 228 return None
A version of add()
to be used only in type-checking code. This accepts
only a single value, and performs input cleaning on the premise that the
value is already valid unicode.
230 def set( 231 self, 232 prop: P, 233 values: Any, 234 cleaned: bool = False, 235 quiet: bool = False, 236 fuzzy: bool = False, 237 format: Optional[str] = None, 238 ) -> None: 239 """Replace the values of the property with the given value(s). 240 241 :param prop: can be given as a name or an instance of 242 :class:`~followthemoney.property.Property`. 243 :param values: either a single value, or a list of values to be added. 244 :param cleaned: should the data be normalised before adding it. 245 :param quiet: a reference to an non-existent property will return 246 an empty list instead of raising an error. 247 """ 248 prop_name = self._prop_name(prop, quiet=quiet) 249 if prop_name is None: 250 return 251 self._properties.pop(prop_name, None) 252 return self.add( 253 prop, values, cleaned=cleaned, quiet=quiet, fuzzy=fuzzy, format=format 254 )
Replace the values of the property with the given value(s).
Parameters
- prop: can be given as a name or an instance of
~followthemoney.property.Property
. - values: either a single value, or a list of values to be added.
- cleaned: should the data be normalised before adding it.
- quiet: a reference to an non-existent property will return an empty list instead of raising an error.
256 def pop(self, prop: P, quiet: bool = True) -> List[str]: 257 """Remove all the values from the given property and return them. 258 259 :param prop: can be given as a name or an instance of 260 :class:`~followthemoney.property.Property`. 261 :param quiet: a reference to an non-existent property will return 262 an empty list instead of raising an error. 263 :return: a list of values, possibly empty. 264 """ 265 prop_name = self._prop_name(prop, quiet=quiet) 266 if prop_name is None or prop_name not in self._properties: 267 return [] 268 return list(self._properties.pop(prop_name))
Remove all the values from the given property and return them.
Parameters
- prop: can be given as a name or an instance of
~followthemoney.property.Property
. - quiet: a reference to an non-existent property will return an empty list instead of raising an error.
Returns
a list of values, possibly empty.
270 def remove(self, prop: P, value: str, quiet: bool = True) -> None: 271 """Remove a single value from the given property. If it is not there, 272 no action takes place. 273 274 :param prop: can be given as a name or an instance of 275 :class:`~followthemoney.property.Property`. 276 :param value: will not be cleaned before checking. 277 :param quiet: a reference to an non-existent property will return 278 an empty list instead of raising an error. 279 """ 280 prop_name = self._prop_name(prop, quiet=quiet) 281 if prop_name is not None and prop_name in self._properties: 282 try: 283 self._properties[prop_name].remove(value) 284 except (KeyError, ValueError): 285 pass
Remove a single value from the given property. If it is not there, no action takes place.
Parameters
- prop: can be given as a name or an instance of
~followthemoney.property.Property
. - value: will not be cleaned before checking.
- quiet: a reference to an non-existent property will return an empty list instead of raising an error.
287 def iterprops(self) -> List[Property]: 288 """Iterate across all the properties for which a value is set in 289 the proxy (but do not return their values).""" 290 return [self.schema.properties[p] for p in self._properties.keys()]
Iterate across all the properties for which a value is set in the proxy (but do not return their values).
292 def itervalues(self) -> Generator[Tuple[Property, str], None, None]: 293 """Iterate across all values in the proxy one by one, each given as a 294 tuple of the property and the value.""" 295 for name, values in self._properties.items(): 296 prop = self.schema.properties[name] 297 for value in values: 298 yield (prop, value)
Iterate across all values in the proxy one by one, each given as a tuple of the property and the value.
300 def edgepairs(self) -> Generator[Tuple[str, str], None, None]: 301 """Return all the possible pairs of values for the edge source and target if 302 the schema allows for an edge representation of the entity.""" 303 if self.schema.source_prop is not None and self.schema.target_prop is not None: 304 sources = self.get(self.schema.source_prop) 305 targets = self.get(self.schema.target_prop) 306 for (source, target) in product(sources, targets): 307 yield (source, target)
Return all the possible pairs of values for the edge source and target if the schema allows for an edge representation of the entity.
309 def get_type_values( 310 self, type_: PropertyType, matchable: bool = False 311 ) -> List[str]: 312 """All values of a particular type associated with a the entity. For 313 example, this lets you return all countries linked to an entity, rather 314 than manually checking each property to see if it contains countries. 315 316 :param type_: The type object to be searched. 317 :param matchable: Whether to return only property values marked as matchable. 318 """ 319 combined = set() 320 for prop_name, values in self._properties.items(): 321 prop = self.schema.properties[prop_name] 322 if matchable and not prop.matchable: 323 continue 324 if prop.type == type_: 325 combined.update(values) 326 return list(combined)
All values of a particular type associated with a the entity. For example, this lets you return all countries linked to an entity, rather than manually checking each property to see if it contains countries.
Parameters
- type_: The type object to be searched.
- matchable: Whether to return only property values marked as matchable.
328 @property 329 def names(self) -> List[str]: 330 """Get the set of all name-type values set of the entity.""" 331 return self.get_type_values(registry.name)
Get the set of all name-type values set of the entity.
333 @property 334 def countries(self) -> List[str]: 335 """Get the set of all country-type values set of the entity.""" 336 return self.get_type_values(registry.country)
Get the set of all country-type values set of the entity.
338 @property 339 def temporal_start(self) -> Optional[Tuple[Property, str]]: 340 """Get a date that can be used to represent the start of the entity in a 341 timeline. If there are multiple possible dates, the earliest date is 342 returned.""" 343 values = [] 344 345 for prop in self.schema.temporal_start_props: 346 values += [(prop, value) for value in self.get(prop.name)] 347 348 values.sort(key=lambda tuple: tuple[1]) 349 return next(iter(values), None)
Get a date that can be used to represent the start of the entity in a timeline. If there are multiple possible dates, the earliest date is returned.
351 @property 352 def temporal_end(self) -> Optional[Tuple[Property, str]]: 353 """Get a date that can be used to represent the end of the entity in a timeline. 354 If therer are multiple possible dates, the latest date is returned.""" 355 values = [] 356 357 for prop in self.schema.temporal_end_props: 358 values += [(prop, value) for value in self.get(prop.name)] 359 360 values.sort(reverse=True, key=lambda tuple: tuple[1]) 361 return next(iter(values), None)
Get a date that can be used to represent the end of the entity in a timeline. If therer are multiple possible dates, the latest date is returned.
363 def get_type_inverted(self, matchable: bool = False) -> Dict[str, List[str]]: 364 """Return all the values of the entity arranged into a mapping with the 365 group name of their property type. These groups include ``countries``, 366 ``addresses``, ``emails``, etc.""" 367 data: Dict[str, List[str]] = {} 368 for group, type_ in registry.groups.items(): 369 values = self.get_type_values(type_, matchable=matchable) 370 if len(values): 371 data[group] = values 372 return data
Return all the values of the entity arranged into a mapping with the
group name of their property type. These groups include countries
,
addresses
, emails
, etc.
374 def triples(self, qualified: bool = True) -> Generator[Triple, None, None]: 375 """Serialise the entity into a set of RDF triple statements. The 376 statements include the property values, an ``RDF#type`` definition 377 that refers to the entity schema, and a ``SKOS#prefLabel`` with the 378 entity caption.""" 379 if self.id is None or self.schema is None: 380 return 381 uri = registry.entity.rdf(self.id) 382 yield (uri, RDF.type, self.schema.uri) 383 if qualified: 384 caption = self.caption 385 if caption != self.schema.label: 386 yield (uri, SKOS.prefLabel, Literal(caption)) 387 for prop, value in self.itervalues(): 388 value = prop.type.rdf(value) 389 if qualified: 390 yield (uri, prop.uri, value) 391 else: 392 yield (uri, URIRef(prop.name), value)
Serialise the entity into a set of RDF triple statements. The
statements include the property values, an RDF#type
definition
that refers to the entity schema, and a SKOS#prefLabel
with the
entity caption.
404 @property 405 def country_hints(self) -> Set[str]: 406 """Some property types, such as phone numbers and IBAN codes imply a 407 country that may be associated with the entity. This list can be used 408 for a more generous matching approach than the actual country values.""" 409 countries = set(self.countries) 410 if not len(countries): 411 for (prop, value) in self.itervalues(): 412 hint = prop.type.country_hint(value) 413 if hint is not None: 414 countries.add(hint) 415 return countries
Some property types, such as phone numbers and IBAN codes imply a country that may be associated with the entity. This list can be used for a more generous matching approach than the actual country values.
417 @property 418 def properties(self) -> Dict[str, List[str]]: 419 """Return a mapping of the properties and set values of the entity.""" 420 return {p: list(vs) for p, vs in self._properties.items()}
Return a mapping of the properties and set values of the entity.
422 def to_dict(self) -> Dict[str, Any]: 423 """Serialise the proxy into a dictionary with the defined properties, ID, 424 schema and any contextual values that were handed in initially. The resulting 425 dictionary can be used to make a new proxy, and it is commonly written to disk 426 or a database.""" 427 data = dict(self.context) 428 data.update( 429 {"id": self.id, "schema": self.schema.name, "properties": self.properties} 430 ) 431 return data
Serialise the proxy into a dictionary with the defined properties, ID, schema and any contextual values that were handed in initially. The resulting dictionary can be used to make a new proxy, and it is commonly written to disk or a database.
433 def to_full_dict(self, matchable: bool = False) -> Dict[str, Any]: 434 """Return a serialised version of the entity with inverted type groups mixed 435 in. See :meth:`~get_type_inverted`.""" 436 data = self.to_dict() 437 data.update(self.get_type_inverted(matchable=matchable)) 438 return data
Return a serialised version of the entity with inverted type groups mixed
in. See ~get_type_inverted()
.
440 def clone(self: E) -> E: 441 """Make a deep copy of the current entity proxy.""" 442 return self.__class__.from_dict(self.schema.model, self.to_dict())
Make a deep copy of the current entity proxy.
444 def merge(self: E, other: E) -> E: 445 """Merge another entity proxy into this one. This will try and find 446 the common schema between both entities and then add all property 447 values from the other entity into this one.""" 448 model = self.schema.model 449 self.id = self.id or other.id 450 try: 451 self.schema = model.common_schema(self.schema, other.schema) 452 except InvalidData as e: 453 msg = "Cannot merge entities with id %s: %s" 454 raise InvalidData(msg % (self.id, e)) 455 456 self.context = merge_context(self.context, other.context) 457 for prop, values in other._properties.items(): 458 self.add(prop, values, cleaned=True, quiet=True) 459 return self
Merge another entity proxy into this one. This will try and find the common schema between both entities and then add all property values from the other entity into this one.
489 @classmethod 490 def from_dict( 491 cls: Type[E], 492 model: "Model", 493 data: Dict[str, Any], 494 cleaned: bool = True, 495 ) -> E: 496 """Instantiate a proxy based on the given model and serialised dictionary. 497 498 Use :meth:`followthemoney.model.Model.get_proxy` instead.""" 499 return cls(model, data, cleaned=cleaned)
Instantiate a proxy based on the given model and serialised dictionary.
Use followthemoney.model.Model.get_proxy()
instead.