followthemoney.proxy
1import logging 2from typing import ( 3 TYPE_CHECKING, 4 Any, 5 Dict, 6 Generator, 7 List, 8 Optional, 9 Set, 10 Tuple, 11 Union, 12 Type, 13 TypeVar, 14 cast, 15) 16import warnings 17from itertools import product 18from banal import ensure_dict 19 20from followthemoney.exc import InvalidData 21from followthemoney.types import registry 22from followthemoney.types.common import PropertyType 23from followthemoney.property import Property 24from followthemoney.rdf import SKOS, RDF, Literal, URIRef, Identifier 25from followthemoney.util import sanitize_text, gettext 26from followthemoney.util import merge_context, value_list, make_entity_id 27 28if TYPE_CHECKING: 29 from followthemoney.model import Model 30 31log = logging.getLogger(__name__) 32P = Union[Property, str] 33Triple = Tuple[Identifier, Identifier, Identifier] 34E = TypeVar("E", bound="EntityProxy") 35 36 37class EntityProxy(object): 38 """A wrapper object for an entity, with utility functions for the 39 introspection and manipulation of its properties. 40 41 This is the main working object in the library, used to generate, validate 42 and emit data.""" 43 44 __slots__ = ["schema", "id", "key_prefix", "context", "_properties", "_size"] 45 46 def __init__( 47 self, 48 model: "Model", 49 data: Dict[str, Any], 50 key_prefix: Optional[str] = None, 51 cleaned: bool = True, 52 ): 53 data = dict(data or {}) 54 properties = data.pop("properties", {}) 55 if not cleaned: 56 properties = ensure_dict(properties) 57 58 #: The schema definition for this entity, which implies the properties 59 #: That can be set on it. 60 schema = model.get(data.pop("schema", None)) 61 if schema is None: 62 raise InvalidData(gettext("No schema for entity.")) 63 self.schema = schema 64 65 #: When using :meth:`~make_id` to generate a natural key for this entity, 66 #: the prefix will be added to the ID as a salt to make it easier to keep 67 #: IDs unique across datasets. This is somewhat redundant following the 68 #: introduction of :class:`~followthemoney.namespace.Namespace`. 69 self.key_prefix = key_prefix 70 71 #: A unique identifier for this entity, usually a hashed natural key, 72 #: a UUID, or a very simple slug. Can be signed using a 73 #: :class:`~followthemoney.namespace.Namespace`. 74 self.id = data.pop("id", None) 75 if not cleaned: 76 self.id = sanitize_text(self.id) 77 78 #: If the input dictionary for the entity proxy contains fields other 79 #: than ``id``, ``schema`` or ``properties``, they will be kept in here 80 #: and re-added upon serialization. 81 self.context = data 82 self._properties: Dict[str, List[str]] = {} 83 self._size = 0 84 85 for key, values in properties.items(): 86 if key not in self.schema.properties: 87 continue 88 if cleaned: 89 # This does not call `self.add` as it might be called millions of times 90 # in some context and we want to avoid the performance overhead of 91 # doing so. 92 seen: Set[str] = set() 93 seen_add = seen.add 94 unique_values = [v for v in values if not (v in seen or seen_add(v))] 95 self._properties[key] = unique_values 96 self._size += sum([len(v) for v in unique_values]) 97 else: 98 self.add(key, values, quiet=True) 99 100 def make_id(self, *parts: Any) -> Optional[str]: 101 """Generate a (hopefully unique) ID for the given entity, composed 102 of the given components, and the :attr:`~key_prefix` defined in 103 the proxy. 104 """ 105 self.id = make_entity_id(*parts, key_prefix=self.key_prefix) 106 return self.id 107 108 def _prop_name(self, prop: P, quiet: bool = False) -> Optional[str]: 109 # This is pretty unwound because it gets called a *lot*. 110 if prop in self.schema.properties: 111 return cast(str, prop) 112 try: 113 obj = cast(Property, prop) 114 if obj.name in self.schema.properties: 115 return obj.name 116 except AttributeError: 117 pass 118 if quiet: 119 return None 120 msg = gettext("Unknown property (%s): %s") 121 raise InvalidData(msg % (self.schema, prop)) 122 123 def get(self, prop: P, quiet: bool = False) -> List[str]: 124 """Get all values of a property. 125 126 :param prop: can be given as a name or an instance of 127 :class:`~followthemoney.property.Property`. 128 :param quiet: a reference to an non-existent property will return 129 an empty list instead of raising an error. 130 :return: A list of values. 131 """ 132 prop_name = self._prop_name(prop, quiet=quiet) 133 if prop_name is None: 134 return [] 135 return self._properties.get(prop_name, []) 136 137 def first(self, prop: P, quiet: bool = False) -> Optional[str]: 138 """Get only the first value set for the property. 139 140 :param prop: can be given as a name or an instance of 141 :class:`~followthemoney.property.Property`. 142 :param quiet: a reference to an non-existent property will return 143 an empty list instead of raising an error. 144 :return: A value, or ``None``. 145 """ 146 for value in self.get(prop, quiet=quiet): 147 return value 148 return None 149 150 def has(self, prop: P, quiet: bool = False) -> bool: 151 """Check to see if the given property has at least one value set. 152 153 :param prop: can be given as a name or an instance of 154 :class:`~followthemoney.property.Property`. 155 :param quiet: a reference to an non-existent property will return 156 an empty list instead of raising an error. 157 :return: a boolean. 158 """ 159 prop_name = self._prop_name(prop, quiet=quiet) 160 return prop_name in self._properties 161 162 def add( 163 self, 164 prop: P, 165 values: Any, 166 cleaned: bool = False, 167 quiet: bool = False, 168 fuzzy: bool = False, 169 format: Optional[str] = None, 170 ) -> None: 171 """Add the given value(s) to the property if they are valid for 172 the type of the property. 173 174 :param prop: can be given as a name or an instance of 175 :class:`~followthemoney.property.Property`. 176 :param values: either a single value, or a list of values to be added. 177 :param cleaned: should the data be normalised before adding it. 178 :param quiet: a reference to an non-existent property will return 179 an empty list instead of raising an error. 180 :param fuzzy: when normalising the data, should fuzzy matching be allowed. 181 :param format: when normalising the data, formatting for a date. 182 """ 183 prop_name = self._prop_name(prop, quiet=quiet) 184 if prop_name is None: 185 return None 186 prop = self.schema.properties[prop_name] 187 188 # Don't allow setting the reverse properties: 189 if prop.stub: 190 if quiet: 191 return None 192 msg = gettext("Stub property (%s): %s") 193 raise InvalidData(msg % (self.schema, prop)) 194 195 for value in value_list(values): 196 if not cleaned: 197 format = format or prop.format 198 value = prop.type.clean(value, proxy=self, fuzzy=fuzzy, format=format) 199 self.unsafe_add(prop, value, cleaned=True) 200 return None 201 202 def unsafe_add( 203 self, 204 prop: Property, 205 value: Optional[str], 206 cleaned: bool = False, 207 fuzzy: bool = False, 208 format: Optional[str] = None, 209 ) -> Optional[str]: 210 """A version of `add()` to be used only in type-checking code. This accepts 211 only a single value, and performs input cleaning on the premise that the 212 value is already valid unicode. Returns the value that has been added.""" 213 if not cleaned and value is not None: 214 format = format or prop.format 215 value = prop.type.clean_text(value, fuzzy=fuzzy, format=format, proxy=self) 216 217 if value is None: 218 return None 219 220 # Somewhat hacky: limit the maximum size of any particular 221 # field to avoid overloading upstream aleph/elasticsearch. 222 value_size = len(value) 223 if prop.type.total_size is not None: 224 if self._size + value_size > prop.type.total_size: 225 # msg = "[%s] too large. Rejecting additional values." 226 # log.warning(msg, prop.name) 227 return None 228 self._size += value_size 229 self._properties.setdefault(prop.name, list()) 230 231 if value not in self._properties[prop.name]: 232 self._properties[prop.name].append(value) 233 234 return value 235 236 def set( 237 self, 238 prop: P, 239 values: Any, 240 cleaned: bool = False, 241 quiet: bool = False, 242 fuzzy: bool = False, 243 format: Optional[str] = None, 244 ) -> None: 245 """Replace the values of the property with the given value(s). 246 247 :param prop: can be given as a name or an instance of 248 :class:`~followthemoney.property.Property`. 249 :param values: either a single value, or a list of values to be added. 250 :param cleaned: should the data be normalised before adding it. 251 :param quiet: a reference to an non-existent property will return 252 an empty list instead of raising an error. 253 """ 254 prop_name = self._prop_name(prop, quiet=quiet) 255 if prop_name is None: 256 return 257 self._properties.pop(prop_name, None) 258 return self.add( 259 prop, values, cleaned=cleaned, quiet=quiet, fuzzy=fuzzy, format=format 260 ) 261 262 def pop(self, prop: P, quiet: bool = True) -> List[str]: 263 """Remove all the values from the given property and return them. 264 265 :param prop: can be given as a name or an instance of 266 :class:`~followthemoney.property.Property`. 267 :param quiet: a reference to an non-existent property will return 268 an empty list instead of raising an error. 269 :return: a list of values, possibly empty. 270 """ 271 prop_name = self._prop_name(prop, quiet=quiet) 272 if prop_name is None or prop_name not in self._properties: 273 return [] 274 return list(self._properties.pop(prop_name)) 275 276 def remove(self, prop: P, value: str, quiet: bool = True) -> None: 277 """Remove a single value from the given property. If it is not there, 278 no action takes place. 279 280 :param prop: can be given as a name or an instance of 281 :class:`~followthemoney.property.Property`. 282 :param value: will not be cleaned before checking. 283 :param quiet: a reference to an non-existent property will return 284 an empty list instead of raising an error. 285 """ 286 prop_name = self._prop_name(prop, quiet=quiet) 287 if prop_name is not None and prop_name in self._properties: 288 try: 289 self._properties[prop_name].remove(value) 290 except (KeyError, ValueError): 291 pass 292 293 def iterprops(self) -> List[Property]: 294 """Iterate across all the properties for which a value is set in 295 the proxy (but do not return their values).""" 296 return [self.schema.properties[p] for p in self._properties.keys()] 297 298 def itervalues(self) -> Generator[Tuple[Property, str], None, None]: 299 """Iterate across all values in the proxy one by one, each given as a 300 tuple of the property and the value.""" 301 for name, values in self._properties.items(): 302 prop = self.schema.properties[name] 303 for value in values: 304 yield (prop, value) 305 306 def edgepairs(self) -> Generator[Tuple[str, str], None, None]: 307 """Return all the possible pairs of values for the edge source and target if 308 the schema allows for an edge representation of the entity.""" 309 if self.schema.source_prop is not None and self.schema.target_prop is not None: 310 sources = self.get(self.schema.source_prop) 311 targets = self.get(self.schema.target_prop) 312 for source, target in product(sources, targets): 313 yield (source, target) 314 315 def get_type_values( 316 self, type_: PropertyType, matchable: bool = False 317 ) -> List[str]: 318 """All values of a particular type associated with a the entity. For 319 example, this lets you return all countries linked to an entity, rather 320 than manually checking each property to see if it contains countries. 321 322 :param type_: The type object to be searched. 323 :param matchable: Whether to return only property values marked as matchable. 324 """ 325 combined = set() 326 for prop_name, values in self._properties.items(): 327 prop = self.schema.properties[prop_name] 328 if matchable and not prop.matchable: 329 continue 330 if prop.type == type_: 331 combined.update(values) 332 return list(combined) 333 334 @property 335 def names(self) -> List[str]: 336 """Get the set of all name-type values set of the entity.""" 337 return self.get_type_values(registry.name) 338 339 @property 340 def countries(self) -> List[str]: 341 """Get the set of all country-type values set of the entity.""" 342 return self.get_type_values(registry.country) 343 344 @property 345 def temporal_start(self) -> Optional[Tuple[Property, str]]: 346 """Get a date that can be used to represent the start of the entity in a 347 timeline. If there are multiple possible dates, the earliest date is 348 returned.""" 349 values = [] 350 351 for prop in self.schema.temporal_start_props: 352 values += [(prop, value) for value in self.get(prop.name)] 353 354 values.sort(key=lambda tuple: tuple[1]) 355 return next(iter(values), None) 356 357 @property 358 def temporal_end(self) -> Optional[Tuple[Property, str]]: 359 """Get a date that can be used to represent the end of the entity in a timeline. 360 If therer are multiple possible dates, the latest date is returned.""" 361 values = [] 362 363 for prop in self.schema.temporal_end_props: 364 values += [(prop, value) for value in self.get(prop.name)] 365 366 values.sort(reverse=True, key=lambda tuple: tuple[1]) 367 return next(iter(values), None) 368 369 def get_type_inverted(self, matchable: bool = False) -> Dict[str, List[str]]: 370 """Return all the values of the entity arranged into a mapping with the 371 group name of their property type. These groups include ``countries``, 372 ``addresses``, ``emails``, etc.""" 373 data: Dict[str, List[str]] = {} 374 for group, type_ in registry.groups.items(): 375 values = self.get_type_values(type_, matchable=matchable) 376 if len(values): 377 data[group] = values 378 return data 379 380 def triples(self, qualified: bool = True) -> Generator[Triple, None, None]: 381 """Serialise the entity into a set of RDF triple statements. The 382 statements include the property values, an ``RDF#type`` definition 383 that refers to the entity schema, and a ``SKOS#prefLabel`` with the 384 entity caption.""" 385 if self.id is None or self.schema is None: 386 return 387 uri = registry.entity.rdf(self.id) 388 yield (uri, RDF.type, self.schema.uri) 389 if qualified: 390 caption = self.caption 391 if caption != self.schema.label: 392 yield (uri, SKOS.prefLabel, Literal(caption)) 393 for prop, value in self.itervalues(): 394 value = prop.type.rdf(value) 395 if qualified: 396 yield (uri, prop.uri, value) 397 else: 398 yield (uri, URIRef(prop.name), value) 399 400 @property 401 def caption(self) -> str: 402 """The user-facing label to be used for this entity. This checks a list 403 of properties defined by the schema (caption) and returns the first 404 available value. If no caption is available, return the schema label.""" 405 for prop in self.schema.caption: 406 for value in self.get(prop): 407 return value 408 return self.schema.label 409 410 @property 411 def country_hints(self) -> Set[str]: 412 """Some property types, such as phone numbers and IBAN codes imply a 413 country that may be associated with the entity. This list can be used 414 for a more generous matching approach than the actual country values.""" 415 countries = set(self.countries) 416 if not len(countries): 417 for prop, value in self.itervalues(): 418 hint = prop.type.country_hint(value) 419 if hint is not None: 420 countries.add(hint) 421 return countries 422 423 @property 424 def properties(self) -> Dict[str, List[str]]: 425 """Return a mapping of the properties and set values of the entity.""" 426 return {p: list(vs) for p, vs in self._properties.items()} 427 428 def to_dict(self) -> Dict[str, Any]: 429 """Serialise the proxy into a dictionary with the defined properties, ID, 430 schema and any contextual values that were handed in initially. The resulting 431 dictionary can be used to make a new proxy, and it is commonly written to disk 432 or a database.""" 433 data = dict(self.context) 434 extra = { 435 "id": self.id, 436 "schema": self.schema.name, 437 "properties": self.properties, 438 } 439 data.update(extra) 440 return data 441 442 def to_full_dict(self, matchable: bool = False) -> Dict[str, Any]: 443 """Return a serialised version of the entity with inverted type groups mixed 444 in. See :meth:`~get_type_inverted`.""" 445 data = self.to_dict() 446 data.update(self.get_type_inverted(matchable=matchable)) 447 return data 448 449 def clone(self: E) -> E: 450 """Make a deep copy of the current entity proxy.""" 451 return self.__class__.from_dict(self.schema.model, self.to_dict()) 452 453 def merge(self: E, other: E) -> E: 454 """Merge another entity proxy into this one. This will try and find 455 the common schema between both entities and then add all property 456 values from the other entity into this one.""" 457 model = self.schema.model 458 self.id = self.id or other.id 459 try: 460 self.schema = model.common_schema(self.schema, other.schema) 461 except InvalidData as e: 462 msg = "Cannot merge entities with id %s: %s" 463 raise InvalidData(msg % (self.id, e)) 464 465 self.context = merge_context(self.context, other.context) 466 for prop, values in other._properties.items(): 467 self.add(prop, values, cleaned=True, quiet=True) 468 return self 469 470 def __str__(self) -> str: 471 return self.caption 472 473 def __repr__(self) -> str: 474 return "<E(%r,%r)>" % (self.id, str(self)) 475 476 def __len__(self) -> int: 477 return self._size 478 479 def __hash__(self) -> int: 480 if not self.id: 481 warnings.warn( 482 "Hashing an EntityProxy without an ID results in undefined behaviour", 483 RuntimeWarning, 484 ) 485 return hash(self.id) 486 487 def __eq__(self, other: Any) -> bool: 488 try: 489 if self.id is None or other.id is None: 490 warnings.warn( 491 "Comparing EntityProxys without IDs results in undefined behaviour", 492 RuntimeWarning, 493 ) 494 return bool(self.id == other.id) 495 except AttributeError: 496 return False 497 498 @classmethod 499 def from_dict( 500 cls: Type[E], 501 model: "Model", 502 data: Dict[str, Any], 503 cleaned: bool = True, 504 ) -> E: 505 """Instantiate a proxy based on the given model and serialised dictionary. 506 507 Use :meth:`followthemoney.model.Model.get_proxy` instead.""" 508 return cls(model, data, cleaned=cleaned)
38class EntityProxy(object): 39 """A wrapper object for an entity, with utility functions for the 40 introspection and manipulation of its properties. 41 42 This is the main working object in the library, used to generate, validate 43 and emit data.""" 44 45 __slots__ = ["schema", "id", "key_prefix", "context", "_properties", "_size"] 46 47 def __init__( 48 self, 49 model: "Model", 50 data: Dict[str, Any], 51 key_prefix: Optional[str] = None, 52 cleaned: bool = True, 53 ): 54 data = dict(data or {}) 55 properties = data.pop("properties", {}) 56 if not cleaned: 57 properties = ensure_dict(properties) 58 59 #: The schema definition for this entity, which implies the properties 60 #: That can be set on it. 61 schema = model.get(data.pop("schema", None)) 62 if schema is None: 63 raise InvalidData(gettext("No schema for entity.")) 64 self.schema = schema 65 66 #: When using :meth:`~make_id` to generate a natural key for this entity, 67 #: the prefix will be added to the ID as a salt to make it easier to keep 68 #: IDs unique across datasets. This is somewhat redundant following the 69 #: introduction of :class:`~followthemoney.namespace.Namespace`. 70 self.key_prefix = key_prefix 71 72 #: A unique identifier for this entity, usually a hashed natural key, 73 #: a UUID, or a very simple slug. Can be signed using a 74 #: :class:`~followthemoney.namespace.Namespace`. 75 self.id = data.pop("id", None) 76 if not cleaned: 77 self.id = sanitize_text(self.id) 78 79 #: If the input dictionary for the entity proxy contains fields other 80 #: than ``id``, ``schema`` or ``properties``, they will be kept in here 81 #: and re-added upon serialization. 82 self.context = data 83 self._properties: Dict[str, List[str]] = {} 84 self._size = 0 85 86 for key, values in properties.items(): 87 if key not in self.schema.properties: 88 continue 89 if cleaned: 90 # This does not call `self.add` as it might be called millions of times 91 # in some context and we want to avoid the performance overhead of 92 # doing so. 93 seen: Set[str] = set() 94 seen_add = seen.add 95 unique_values = [v for v in values if not (v in seen or seen_add(v))] 96 self._properties[key] = unique_values 97 self._size += sum([len(v) for v in unique_values]) 98 else: 99 self.add(key, values, quiet=True) 100 101 def make_id(self, *parts: Any) -> Optional[str]: 102 """Generate a (hopefully unique) ID for the given entity, composed 103 of the given components, and the :attr:`~key_prefix` defined in 104 the proxy. 105 """ 106 self.id = make_entity_id(*parts, key_prefix=self.key_prefix) 107 return self.id 108 109 def _prop_name(self, prop: P, quiet: bool = False) -> Optional[str]: 110 # This is pretty unwound because it gets called a *lot*. 111 if prop in self.schema.properties: 112 return cast(str, prop) 113 try: 114 obj = cast(Property, prop) 115 if obj.name in self.schema.properties: 116 return obj.name 117 except AttributeError: 118 pass 119 if quiet: 120 return None 121 msg = gettext("Unknown property (%s): %s") 122 raise InvalidData(msg % (self.schema, prop)) 123 124 def get(self, prop: P, quiet: bool = False) -> List[str]: 125 """Get all values of a property. 126 127 :param prop: can be given as a name or an instance of 128 :class:`~followthemoney.property.Property`. 129 :param quiet: a reference to an non-existent property will return 130 an empty list instead of raising an error. 131 :return: A list of values. 132 """ 133 prop_name = self._prop_name(prop, quiet=quiet) 134 if prop_name is None: 135 return [] 136 return self._properties.get(prop_name, []) 137 138 def first(self, prop: P, quiet: bool = False) -> Optional[str]: 139 """Get only the first value set for the property. 140 141 :param prop: can be given as a name or an instance of 142 :class:`~followthemoney.property.Property`. 143 :param quiet: a reference to an non-existent property will return 144 an empty list instead of raising an error. 145 :return: A value, or ``None``. 146 """ 147 for value in self.get(prop, quiet=quiet): 148 return value 149 return None 150 151 def has(self, prop: P, quiet: bool = False) -> bool: 152 """Check to see if the given property has at least one value set. 153 154 :param prop: can be given as a name or an instance of 155 :class:`~followthemoney.property.Property`. 156 :param quiet: a reference to an non-existent property will return 157 an empty list instead of raising an error. 158 :return: a boolean. 159 """ 160 prop_name = self._prop_name(prop, quiet=quiet) 161 return prop_name in self._properties 162 163 def add( 164 self, 165 prop: P, 166 values: Any, 167 cleaned: bool = False, 168 quiet: bool = False, 169 fuzzy: bool = False, 170 format: Optional[str] = None, 171 ) -> None: 172 """Add the given value(s) to the property if they are valid for 173 the type of the property. 174 175 :param prop: can be given as a name or an instance of 176 :class:`~followthemoney.property.Property`. 177 :param values: either a single value, or a list of values to be added. 178 :param cleaned: should the data be normalised before adding it. 179 :param quiet: a reference to an non-existent property will return 180 an empty list instead of raising an error. 181 :param fuzzy: when normalising the data, should fuzzy matching be allowed. 182 :param format: when normalising the data, formatting for a date. 183 """ 184 prop_name = self._prop_name(prop, quiet=quiet) 185 if prop_name is None: 186 return None 187 prop = self.schema.properties[prop_name] 188 189 # Don't allow setting the reverse properties: 190 if prop.stub: 191 if quiet: 192 return None 193 msg = gettext("Stub property (%s): %s") 194 raise InvalidData(msg % (self.schema, prop)) 195 196 for value in value_list(values): 197 if not cleaned: 198 format = format or prop.format 199 value = prop.type.clean(value, proxy=self, fuzzy=fuzzy, format=format) 200 self.unsafe_add(prop, value, cleaned=True) 201 return None 202 203 def unsafe_add( 204 self, 205 prop: Property, 206 value: Optional[str], 207 cleaned: bool = False, 208 fuzzy: bool = False, 209 format: Optional[str] = None, 210 ) -> Optional[str]: 211 """A version of `add()` to be used only in type-checking code. This accepts 212 only a single value, and performs input cleaning on the premise that the 213 value is already valid unicode. Returns the value that has been added.""" 214 if not cleaned and value is not None: 215 format = format or prop.format 216 value = prop.type.clean_text(value, fuzzy=fuzzy, format=format, proxy=self) 217 218 if value is None: 219 return None 220 221 # Somewhat hacky: limit the maximum size of any particular 222 # field to avoid overloading upstream aleph/elasticsearch. 223 value_size = len(value) 224 if prop.type.total_size is not None: 225 if self._size + value_size > prop.type.total_size: 226 # msg = "[%s] too large. Rejecting additional values." 227 # log.warning(msg, prop.name) 228 return None 229 self._size += value_size 230 self._properties.setdefault(prop.name, list()) 231 232 if value not in self._properties[prop.name]: 233 self._properties[prop.name].append(value) 234 235 return value 236 237 def set( 238 self, 239 prop: P, 240 values: Any, 241 cleaned: bool = False, 242 quiet: bool = False, 243 fuzzy: bool = False, 244 format: Optional[str] = None, 245 ) -> None: 246 """Replace the values of the property with the given value(s). 247 248 :param prop: can be given as a name or an instance of 249 :class:`~followthemoney.property.Property`. 250 :param values: either a single value, or a list of values to be added. 251 :param cleaned: should the data be normalised before adding it. 252 :param quiet: a reference to an non-existent property will return 253 an empty list instead of raising an error. 254 """ 255 prop_name = self._prop_name(prop, quiet=quiet) 256 if prop_name is None: 257 return 258 self._properties.pop(prop_name, None) 259 return self.add( 260 prop, values, cleaned=cleaned, quiet=quiet, fuzzy=fuzzy, format=format 261 ) 262 263 def pop(self, prop: P, quiet: bool = True) -> List[str]: 264 """Remove all the values from the given property and return them. 265 266 :param prop: can be given as a name or an instance of 267 :class:`~followthemoney.property.Property`. 268 :param quiet: a reference to an non-existent property will return 269 an empty list instead of raising an error. 270 :return: a list of values, possibly empty. 271 """ 272 prop_name = self._prop_name(prop, quiet=quiet) 273 if prop_name is None or prop_name not in self._properties: 274 return [] 275 return list(self._properties.pop(prop_name)) 276 277 def remove(self, prop: P, value: str, quiet: bool = True) -> None: 278 """Remove a single value from the given property. If it is not there, 279 no action takes place. 280 281 :param prop: can be given as a name or an instance of 282 :class:`~followthemoney.property.Property`. 283 :param value: will not be cleaned before checking. 284 :param quiet: a reference to an non-existent property will return 285 an empty list instead of raising an error. 286 """ 287 prop_name = self._prop_name(prop, quiet=quiet) 288 if prop_name is not None and prop_name in self._properties: 289 try: 290 self._properties[prop_name].remove(value) 291 except (KeyError, ValueError): 292 pass 293 294 def iterprops(self) -> List[Property]: 295 """Iterate across all the properties for which a value is set in 296 the proxy (but do not return their values).""" 297 return [self.schema.properties[p] for p in self._properties.keys()] 298 299 def itervalues(self) -> Generator[Tuple[Property, str], None, None]: 300 """Iterate across all values in the proxy one by one, each given as a 301 tuple of the property and the value.""" 302 for name, values in self._properties.items(): 303 prop = self.schema.properties[name] 304 for value in values: 305 yield (prop, value) 306 307 def edgepairs(self) -> Generator[Tuple[str, str], None, None]: 308 """Return all the possible pairs of values for the edge source and target if 309 the schema allows for an edge representation of the entity.""" 310 if self.schema.source_prop is not None and self.schema.target_prop is not None: 311 sources = self.get(self.schema.source_prop) 312 targets = self.get(self.schema.target_prop) 313 for source, target in product(sources, targets): 314 yield (source, target) 315 316 def get_type_values( 317 self, type_: PropertyType, matchable: bool = False 318 ) -> List[str]: 319 """All values of a particular type associated with a the entity. For 320 example, this lets you return all countries linked to an entity, rather 321 than manually checking each property to see if it contains countries. 322 323 :param type_: The type object to be searched. 324 :param matchable: Whether to return only property values marked as matchable. 325 """ 326 combined = set() 327 for prop_name, values in self._properties.items(): 328 prop = self.schema.properties[prop_name] 329 if matchable and not prop.matchable: 330 continue 331 if prop.type == type_: 332 combined.update(values) 333 return list(combined) 334 335 @property 336 def names(self) -> List[str]: 337 """Get the set of all name-type values set of the entity.""" 338 return self.get_type_values(registry.name) 339 340 @property 341 def countries(self) -> List[str]: 342 """Get the set of all country-type values set of the entity.""" 343 return self.get_type_values(registry.country) 344 345 @property 346 def temporal_start(self) -> Optional[Tuple[Property, str]]: 347 """Get a date that can be used to represent the start of the entity in a 348 timeline. If there are multiple possible dates, the earliest date is 349 returned.""" 350 values = [] 351 352 for prop in self.schema.temporal_start_props: 353 values += [(prop, value) for value in self.get(prop.name)] 354 355 values.sort(key=lambda tuple: tuple[1]) 356 return next(iter(values), None) 357 358 @property 359 def temporal_end(self) -> Optional[Tuple[Property, str]]: 360 """Get a date that can be used to represent the end of the entity in a timeline. 361 If therer are multiple possible dates, the latest date is returned.""" 362 values = [] 363 364 for prop in self.schema.temporal_end_props: 365 values += [(prop, value) for value in self.get(prop.name)] 366 367 values.sort(reverse=True, key=lambda tuple: tuple[1]) 368 return next(iter(values), None) 369 370 def get_type_inverted(self, matchable: bool = False) -> Dict[str, List[str]]: 371 """Return all the values of the entity arranged into a mapping with the 372 group name of their property type. These groups include ``countries``, 373 ``addresses``, ``emails``, etc.""" 374 data: Dict[str, List[str]] = {} 375 for group, type_ in registry.groups.items(): 376 values = self.get_type_values(type_, matchable=matchable) 377 if len(values): 378 data[group] = values 379 return data 380 381 def triples(self, qualified: bool = True) -> Generator[Triple, None, None]: 382 """Serialise the entity into a set of RDF triple statements. The 383 statements include the property values, an ``RDF#type`` definition 384 that refers to the entity schema, and a ``SKOS#prefLabel`` with the 385 entity caption.""" 386 if self.id is None or self.schema is None: 387 return 388 uri = registry.entity.rdf(self.id) 389 yield (uri, RDF.type, self.schema.uri) 390 if qualified: 391 caption = self.caption 392 if caption != self.schema.label: 393 yield (uri, SKOS.prefLabel, Literal(caption)) 394 for prop, value in self.itervalues(): 395 value = prop.type.rdf(value) 396 if qualified: 397 yield (uri, prop.uri, value) 398 else: 399 yield (uri, URIRef(prop.name), value) 400 401 @property 402 def caption(self) -> str: 403 """The user-facing label to be used for this entity. This checks a list 404 of properties defined by the schema (caption) and returns the first 405 available value. If no caption is available, return the schema label.""" 406 for prop in self.schema.caption: 407 for value in self.get(prop): 408 return value 409 return self.schema.label 410 411 @property 412 def country_hints(self) -> Set[str]: 413 """Some property types, such as phone numbers and IBAN codes imply a 414 country that may be associated with the entity. This list can be used 415 for a more generous matching approach than the actual country values.""" 416 countries = set(self.countries) 417 if not len(countries): 418 for prop, value in self.itervalues(): 419 hint = prop.type.country_hint(value) 420 if hint is not None: 421 countries.add(hint) 422 return countries 423 424 @property 425 def properties(self) -> Dict[str, List[str]]: 426 """Return a mapping of the properties and set values of the entity.""" 427 return {p: list(vs) for p, vs in self._properties.items()} 428 429 def to_dict(self) -> Dict[str, Any]: 430 """Serialise the proxy into a dictionary with the defined properties, ID, 431 schema and any contextual values that were handed in initially. The resulting 432 dictionary can be used to make a new proxy, and it is commonly written to disk 433 or a database.""" 434 data = dict(self.context) 435 extra = { 436 "id": self.id, 437 "schema": self.schema.name, 438 "properties": self.properties, 439 } 440 data.update(extra) 441 return data 442 443 def to_full_dict(self, matchable: bool = False) -> Dict[str, Any]: 444 """Return a serialised version of the entity with inverted type groups mixed 445 in. See :meth:`~get_type_inverted`.""" 446 data = self.to_dict() 447 data.update(self.get_type_inverted(matchable=matchable)) 448 return data 449 450 def clone(self: E) -> E: 451 """Make a deep copy of the current entity proxy.""" 452 return self.__class__.from_dict(self.schema.model, self.to_dict()) 453 454 def merge(self: E, other: E) -> E: 455 """Merge another entity proxy into this one. This will try and find 456 the common schema between both entities and then add all property 457 values from the other entity into this one.""" 458 model = self.schema.model 459 self.id = self.id or other.id 460 try: 461 self.schema = model.common_schema(self.schema, other.schema) 462 except InvalidData as e: 463 msg = "Cannot merge entities with id %s: %s" 464 raise InvalidData(msg % (self.id, e)) 465 466 self.context = merge_context(self.context, other.context) 467 for prop, values in other._properties.items(): 468 self.add(prop, values, cleaned=True, quiet=True) 469 return self 470 471 def __str__(self) -> str: 472 return self.caption 473 474 def __repr__(self) -> str: 475 return "<E(%r,%r)>" % (self.id, str(self)) 476 477 def __len__(self) -> int: 478 return self._size 479 480 def __hash__(self) -> int: 481 if not self.id: 482 warnings.warn( 483 "Hashing an EntityProxy without an ID results in undefined behaviour", 484 RuntimeWarning, 485 ) 486 return hash(self.id) 487 488 def __eq__(self, other: Any) -> bool: 489 try: 490 if self.id is None or other.id is None: 491 warnings.warn( 492 "Comparing EntityProxys without IDs results in undefined behaviour", 493 RuntimeWarning, 494 ) 495 return bool(self.id == other.id) 496 except AttributeError: 497 return False 498 499 @classmethod 500 def from_dict( 501 cls: Type[E], 502 model: "Model", 503 data: Dict[str, Any], 504 cleaned: bool = True, 505 ) -> E: 506 """Instantiate a proxy based on the given model and serialised dictionary. 507 508 Use :meth:`followthemoney.model.Model.get_proxy` instead.""" 509 return cls(model, data, cleaned=cleaned)
A wrapper object for an entity, with utility functions for the introspection and manipulation of its properties.
This is the main working object in the library, used to generate, validate and emit data.
47 def __init__( 48 self, 49 model: "Model", 50 data: Dict[str, Any], 51 key_prefix: Optional[str] = None, 52 cleaned: bool = True, 53 ): 54 data = dict(data or {}) 55 properties = data.pop("properties", {}) 56 if not cleaned: 57 properties = ensure_dict(properties) 58 59 #: The schema definition for this entity, which implies the properties 60 #: That can be set on it. 61 schema = model.get(data.pop("schema", None)) 62 if schema is None: 63 raise InvalidData(gettext("No schema for entity.")) 64 self.schema = schema 65 66 #: When using :meth:`~make_id` to generate a natural key for this entity, 67 #: the prefix will be added to the ID as a salt to make it easier to keep 68 #: IDs unique across datasets. This is somewhat redundant following the 69 #: introduction of :class:`~followthemoney.namespace.Namespace`. 70 self.key_prefix = key_prefix 71 72 #: A unique identifier for this entity, usually a hashed natural key, 73 #: a UUID, or a very simple slug. Can be signed using a 74 #: :class:`~followthemoney.namespace.Namespace`. 75 self.id = data.pop("id", None) 76 if not cleaned: 77 self.id = sanitize_text(self.id) 78 79 #: If the input dictionary for the entity proxy contains fields other 80 #: than ``id``, ``schema`` or ``properties``, they will be kept in here 81 #: and re-added upon serialization. 82 self.context = data 83 self._properties: Dict[str, List[str]] = {} 84 self._size = 0 85 86 for key, values in properties.items(): 87 if key not in self.schema.properties: 88 continue 89 if cleaned: 90 # This does not call `self.add` as it might be called millions of times 91 # in some context and we want to avoid the performance overhead of 92 # doing so. 93 seen: Set[str] = set() 94 seen_add = seen.add 95 unique_values = [v for v in values if not (v in seen or seen_add(v))] 96 self._properties[key] = unique_values 97 self._size += sum([len(v) for v in unique_values]) 98 else: 99 self.add(key, values, quiet=True)
101 def make_id(self, *parts: Any) -> Optional[str]: 102 """Generate a (hopefully unique) ID for the given entity, composed 103 of the given components, and the :attr:`~key_prefix` defined in 104 the proxy. 105 """ 106 self.id = make_entity_id(*parts, key_prefix=self.key_prefix) 107 return self.id
Generate a (hopefully unique) ID for the given entity, composed
of the given components, and the ~key_prefix
defined in
the proxy.
124 def get(self, prop: P, quiet: bool = False) -> List[str]: 125 """Get all values of a property. 126 127 :param prop: can be given as a name or an instance of 128 :class:`~followthemoney.property.Property`. 129 :param quiet: a reference to an non-existent property will return 130 an empty list instead of raising an error. 131 :return: A list of values. 132 """ 133 prop_name = self._prop_name(prop, quiet=quiet) 134 if prop_name is None: 135 return [] 136 return self._properties.get(prop_name, [])
Get all values of a property.
Parameters
- prop: can be given as a name or an instance of
~followthemoney.property.Property
. - quiet: a reference to an non-existent property will return an empty list instead of raising an error.
Returns
A list of values.
138 def first(self, prop: P, quiet: bool = False) -> Optional[str]: 139 """Get only the first value set for the property. 140 141 :param prop: can be given as a name or an instance of 142 :class:`~followthemoney.property.Property`. 143 :param quiet: a reference to an non-existent property will return 144 an empty list instead of raising an error. 145 :return: A value, or ``None``. 146 """ 147 for value in self.get(prop, quiet=quiet): 148 return value 149 return None
Get only the first value set for the property.
Parameters
- prop: can be given as a name or an instance of
~followthemoney.property.Property
. - quiet: a reference to an non-existent property will return an empty list instead of raising an error.
Returns
A value, or
None
.
151 def has(self, prop: P, quiet: bool = False) -> bool: 152 """Check to see if the given property has at least one value set. 153 154 :param prop: can be given as a name or an instance of 155 :class:`~followthemoney.property.Property`. 156 :param quiet: a reference to an non-existent property will return 157 an empty list instead of raising an error. 158 :return: a boolean. 159 """ 160 prop_name = self._prop_name(prop, quiet=quiet) 161 return prop_name in self._properties
Check to see if the given property has at least one value set.
Parameters
- prop: can be given as a name or an instance of
~followthemoney.property.Property
. - quiet: a reference to an non-existent property will return an empty list instead of raising an error.
Returns
a boolean.
163 def add( 164 self, 165 prop: P, 166 values: Any, 167 cleaned: bool = False, 168 quiet: bool = False, 169 fuzzy: bool = False, 170 format: Optional[str] = None, 171 ) -> None: 172 """Add the given value(s) to the property if they are valid for 173 the type of the property. 174 175 :param prop: can be given as a name or an instance of 176 :class:`~followthemoney.property.Property`. 177 :param values: either a single value, or a list of values to be added. 178 :param cleaned: should the data be normalised before adding it. 179 :param quiet: a reference to an non-existent property will return 180 an empty list instead of raising an error. 181 :param fuzzy: when normalising the data, should fuzzy matching be allowed. 182 :param format: when normalising the data, formatting for a date. 183 """ 184 prop_name = self._prop_name(prop, quiet=quiet) 185 if prop_name is None: 186 return None 187 prop = self.schema.properties[prop_name] 188 189 # Don't allow setting the reverse properties: 190 if prop.stub: 191 if quiet: 192 return None 193 msg = gettext("Stub property (%s): %s") 194 raise InvalidData(msg % (self.schema, prop)) 195 196 for value in value_list(values): 197 if not cleaned: 198 format = format or prop.format 199 value = prop.type.clean(value, proxy=self, fuzzy=fuzzy, format=format) 200 self.unsafe_add(prop, value, cleaned=True) 201 return None
Add the given value(s) to the property if they are valid for the type of the property.
Parameters
- prop: can be given as a name or an instance of
~followthemoney.property.Property
. - values: either a single value, or a list of values to be added.
- cleaned: should the data be normalised before adding it.
- quiet: a reference to an non-existent property will return an empty list instead of raising an error.
- fuzzy: when normalising the data, should fuzzy matching be allowed.
- format: when normalising the data, formatting for a date.
203 def unsafe_add( 204 self, 205 prop: Property, 206 value: Optional[str], 207 cleaned: bool = False, 208 fuzzy: bool = False, 209 format: Optional[str] = None, 210 ) -> Optional[str]: 211 """A version of `add()` to be used only in type-checking code. This accepts 212 only a single value, and performs input cleaning on the premise that the 213 value is already valid unicode. Returns the value that has been added.""" 214 if not cleaned and value is not None: 215 format = format or prop.format 216 value = prop.type.clean_text(value, fuzzy=fuzzy, format=format, proxy=self) 217 218 if value is None: 219 return None 220 221 # Somewhat hacky: limit the maximum size of any particular 222 # field to avoid overloading upstream aleph/elasticsearch. 223 value_size = len(value) 224 if prop.type.total_size is not None: 225 if self._size + value_size > prop.type.total_size: 226 # msg = "[%s] too large. Rejecting additional values." 227 # log.warning(msg, prop.name) 228 return None 229 self._size += value_size 230 self._properties.setdefault(prop.name, list()) 231 232 if value not in self._properties[prop.name]: 233 self._properties[prop.name].append(value) 234 235 return value
A version of add()
to be used only in type-checking code. This accepts
only a single value, and performs input cleaning on the premise that the
value is already valid unicode. Returns the value that has been added.
237 def set( 238 self, 239 prop: P, 240 values: Any, 241 cleaned: bool = False, 242 quiet: bool = False, 243 fuzzy: bool = False, 244 format: Optional[str] = None, 245 ) -> None: 246 """Replace the values of the property with the given value(s). 247 248 :param prop: can be given as a name or an instance of 249 :class:`~followthemoney.property.Property`. 250 :param values: either a single value, or a list of values to be added. 251 :param cleaned: should the data be normalised before adding it. 252 :param quiet: a reference to an non-existent property will return 253 an empty list instead of raising an error. 254 """ 255 prop_name = self._prop_name(prop, quiet=quiet) 256 if prop_name is None: 257 return 258 self._properties.pop(prop_name, None) 259 return self.add( 260 prop, values, cleaned=cleaned, quiet=quiet, fuzzy=fuzzy, format=format 261 )
Replace the values of the property with the given value(s).
Parameters
- prop: can be given as a name or an instance of
~followthemoney.property.Property
. - values: either a single value, or a list of values to be added.
- cleaned: should the data be normalised before adding it.
- quiet: a reference to an non-existent property will return an empty list instead of raising an error.
263 def pop(self, prop: P, quiet: bool = True) -> List[str]: 264 """Remove all the values from the given property and return them. 265 266 :param prop: can be given as a name or an instance of 267 :class:`~followthemoney.property.Property`. 268 :param quiet: a reference to an non-existent property will return 269 an empty list instead of raising an error. 270 :return: a list of values, possibly empty. 271 """ 272 prop_name = self._prop_name(prop, quiet=quiet) 273 if prop_name is None or prop_name not in self._properties: 274 return [] 275 return list(self._properties.pop(prop_name))
Remove all the values from the given property and return them.
Parameters
- prop: can be given as a name or an instance of
~followthemoney.property.Property
. - quiet: a reference to an non-existent property will return an empty list instead of raising an error.
Returns
a list of values, possibly empty.
277 def remove(self, prop: P, value: str, quiet: bool = True) -> None: 278 """Remove a single value from the given property. If it is not there, 279 no action takes place. 280 281 :param prop: can be given as a name or an instance of 282 :class:`~followthemoney.property.Property`. 283 :param value: will not be cleaned before checking. 284 :param quiet: a reference to an non-existent property will return 285 an empty list instead of raising an error. 286 """ 287 prop_name = self._prop_name(prop, quiet=quiet) 288 if prop_name is not None and prop_name in self._properties: 289 try: 290 self._properties[prop_name].remove(value) 291 except (KeyError, ValueError): 292 pass
Remove a single value from the given property. If it is not there, no action takes place.
Parameters
- prop: can be given as a name or an instance of
~followthemoney.property.Property
. - value: will not be cleaned before checking.
- quiet: a reference to an non-existent property will return an empty list instead of raising an error.
294 def iterprops(self) -> List[Property]: 295 """Iterate across all the properties for which a value is set in 296 the proxy (but do not return their values).""" 297 return [self.schema.properties[p] for p in self._properties.keys()]
Iterate across all the properties for which a value is set in the proxy (but do not return their values).
299 def itervalues(self) -> Generator[Tuple[Property, str], None, None]: 300 """Iterate across all values in the proxy one by one, each given as a 301 tuple of the property and the value.""" 302 for name, values in self._properties.items(): 303 prop = self.schema.properties[name] 304 for value in values: 305 yield (prop, value)
Iterate across all values in the proxy one by one, each given as a tuple of the property and the value.
307 def edgepairs(self) -> Generator[Tuple[str, str], None, None]: 308 """Return all the possible pairs of values for the edge source and target if 309 the schema allows for an edge representation of the entity.""" 310 if self.schema.source_prop is not None and self.schema.target_prop is not None: 311 sources = self.get(self.schema.source_prop) 312 targets = self.get(self.schema.target_prop) 313 for source, target in product(sources, targets): 314 yield (source, target)
Return all the possible pairs of values for the edge source and target if the schema allows for an edge representation of the entity.
316 def get_type_values( 317 self, type_: PropertyType, matchable: bool = False 318 ) -> List[str]: 319 """All values of a particular type associated with a the entity. For 320 example, this lets you return all countries linked to an entity, rather 321 than manually checking each property to see if it contains countries. 322 323 :param type_: The type object to be searched. 324 :param matchable: Whether to return only property values marked as matchable. 325 """ 326 combined = set() 327 for prop_name, values in self._properties.items(): 328 prop = self.schema.properties[prop_name] 329 if matchable and not prop.matchable: 330 continue 331 if prop.type == type_: 332 combined.update(values) 333 return list(combined)
All values of a particular type associated with a the entity. For example, this lets you return all countries linked to an entity, rather than manually checking each property to see if it contains countries.
Parameters
- type_: The type object to be searched.
- matchable: Whether to return only property values marked as matchable.
335 @property 336 def names(self) -> List[str]: 337 """Get the set of all name-type values set of the entity.""" 338 return self.get_type_values(registry.name)
Get the set of all name-type values set of the entity.
340 @property 341 def countries(self) -> List[str]: 342 """Get the set of all country-type values set of the entity.""" 343 return self.get_type_values(registry.country)
Get the set of all country-type values set of the entity.
345 @property 346 def temporal_start(self) -> Optional[Tuple[Property, str]]: 347 """Get a date that can be used to represent the start of the entity in a 348 timeline. If there are multiple possible dates, the earliest date is 349 returned.""" 350 values = [] 351 352 for prop in self.schema.temporal_start_props: 353 values += [(prop, value) for value in self.get(prop.name)] 354 355 values.sort(key=lambda tuple: tuple[1]) 356 return next(iter(values), None)
Get a date that can be used to represent the start of the entity in a timeline. If there are multiple possible dates, the earliest date is returned.
358 @property 359 def temporal_end(self) -> Optional[Tuple[Property, str]]: 360 """Get a date that can be used to represent the end of the entity in a timeline. 361 If therer are multiple possible dates, the latest date is returned.""" 362 values = [] 363 364 for prop in self.schema.temporal_end_props: 365 values += [(prop, value) for value in self.get(prop.name)] 366 367 values.sort(reverse=True, key=lambda tuple: tuple[1]) 368 return next(iter(values), None)
Get a date that can be used to represent the end of the entity in a timeline. If therer are multiple possible dates, the latest date is returned.
370 def get_type_inverted(self, matchable: bool = False) -> Dict[str, List[str]]: 371 """Return all the values of the entity arranged into a mapping with the 372 group name of their property type. These groups include ``countries``, 373 ``addresses``, ``emails``, etc.""" 374 data: Dict[str, List[str]] = {} 375 for group, type_ in registry.groups.items(): 376 values = self.get_type_values(type_, matchable=matchable) 377 if len(values): 378 data[group] = values 379 return data
Return all the values of the entity arranged into a mapping with the
group name of their property type. These groups include countries
,
addresses
, emails
, etc.
381 def triples(self, qualified: bool = True) -> Generator[Triple, None, None]: 382 """Serialise the entity into a set of RDF triple statements. The 383 statements include the property values, an ``RDF#type`` definition 384 that refers to the entity schema, and a ``SKOS#prefLabel`` with the 385 entity caption.""" 386 if self.id is None or self.schema is None: 387 return 388 uri = registry.entity.rdf(self.id) 389 yield (uri, RDF.type, self.schema.uri) 390 if qualified: 391 caption = self.caption 392 if caption != self.schema.label: 393 yield (uri, SKOS.prefLabel, Literal(caption)) 394 for prop, value in self.itervalues(): 395 value = prop.type.rdf(value) 396 if qualified: 397 yield (uri, prop.uri, value) 398 else: 399 yield (uri, URIRef(prop.name), value)
Serialise the entity into a set of RDF triple statements. The
statements include the property values, an RDF#type
definition
that refers to the entity schema, and a SKOS#prefLabel
with the
entity caption.
411 @property 412 def country_hints(self) -> Set[str]: 413 """Some property types, such as phone numbers and IBAN codes imply a 414 country that may be associated with the entity. This list can be used 415 for a more generous matching approach than the actual country values.""" 416 countries = set(self.countries) 417 if not len(countries): 418 for prop, value in self.itervalues(): 419 hint = prop.type.country_hint(value) 420 if hint is not None: 421 countries.add(hint) 422 return countries
Some property types, such as phone numbers and IBAN codes imply a country that may be associated with the entity. This list can be used for a more generous matching approach than the actual country values.
424 @property 425 def properties(self) -> Dict[str, List[str]]: 426 """Return a mapping of the properties and set values of the entity.""" 427 return {p: list(vs) for p, vs in self._properties.items()}
Return a mapping of the properties and set values of the entity.
429 def to_dict(self) -> Dict[str, Any]: 430 """Serialise the proxy into a dictionary with the defined properties, ID, 431 schema and any contextual values that were handed in initially. The resulting 432 dictionary can be used to make a new proxy, and it is commonly written to disk 433 or a database.""" 434 data = dict(self.context) 435 extra = { 436 "id": self.id, 437 "schema": self.schema.name, 438 "properties": self.properties, 439 } 440 data.update(extra) 441 return data
Serialise the proxy into a dictionary with the defined properties, ID, schema and any contextual values that were handed in initially. The resulting dictionary can be used to make a new proxy, and it is commonly written to disk or a database.
443 def to_full_dict(self, matchable: bool = False) -> Dict[str, Any]: 444 """Return a serialised version of the entity with inverted type groups mixed 445 in. See :meth:`~get_type_inverted`.""" 446 data = self.to_dict() 447 data.update(self.get_type_inverted(matchable=matchable)) 448 return data
Return a serialised version of the entity with inverted type groups mixed
in. See ~get_type_inverted()
.
450 def clone(self: E) -> E: 451 """Make a deep copy of the current entity proxy.""" 452 return self.__class__.from_dict(self.schema.model, self.to_dict())
Make a deep copy of the current entity proxy.
454 def merge(self: E, other: E) -> E: 455 """Merge another entity proxy into this one. This will try and find 456 the common schema between both entities and then add all property 457 values from the other entity into this one.""" 458 model = self.schema.model 459 self.id = self.id or other.id 460 try: 461 self.schema = model.common_schema(self.schema, other.schema) 462 except InvalidData as e: 463 msg = "Cannot merge entities with id %s: %s" 464 raise InvalidData(msg % (self.id, e)) 465 466 self.context = merge_context(self.context, other.context) 467 for prop, values in other._properties.items(): 468 self.add(prop, values, cleaned=True, quiet=True) 469 return self
Merge another entity proxy into this one. This will try and find the common schema between both entities and then add all property values from the other entity into this one.
499 @classmethod 500 def from_dict( 501 cls: Type[E], 502 model: "Model", 503 data: Dict[str, Any], 504 cleaned: bool = True, 505 ) -> E: 506 """Instantiate a proxy based on the given model and serialised dictionary. 507 508 Use :meth:`followthemoney.model.Model.get_proxy` instead.""" 509 return cls(model, data, cleaned=cleaned)
Instantiate a proxy based on the given model and serialised dictionary.
Use followthemoney.model.Model.get_proxy()
instead.