Skip to content

Numbers

Numeric data is usually stored in FtM for descriptive reasons more than to facilitate quantitative analysis. The number type reflects this. Stored as strings, FtM numbers can consist of a value and a unit, e.g. 37 % or 9 t. Numbers are expected to use a . as their decimals separator; a , separator can be introduced to group thousands, lakhs or crores (they're removed for storage).

Some unit strings (including for area and currency) are normalized using rigour.units.normalize_unit (e.g. sqm -> ).

Attribute Value Detail
name number Used in schema definitions
label Number plural: Numbers
max_length 250 Space to be allocated in fixed-length database definitions
matchable Suitable for use in entity matching
pivot Suitable for use as a pivot point for connecting to other entities

Python API

followthemoney.types.NumberType

Bases: PropertyType

A numeric value, like the size of a piece of land, or the value of a contract. Since all property values in FtM are strings, this is also a string and there is no specified format (e.g. 1,000.00 vs. 1.000,00).

In the future we might want to enable annotations for format, units, or even to introduce a separate property type for monetary values.

Source code in followthemoney/types/number.py
class NumberType(PropertyType):
    """A numeric value, like the size of a piece of land, or the value of a
    contract. Since all property values in FtM are strings, this is also a
    string and there is no specified format (e.g. `1,000.00` vs. `1.000,00`).

    In the future we might want to enable annotations for format, units, or
    even to introduce a separate property type for monetary values."""

    DECIMAL = "."
    SEPARATOR = ","
    PRECISION = 2

    _NUM_UNIT_RE = (
        f"(\\s?\\-?\\s?\\d+(?:{re.escape(DECIMAL)}\\d+)?)\\s*([^\\s\\d][^\\s]*)?"
    )
    NUM_UNIT_RE = re.compile(_NUM_UNIT_RE, re.UNICODE)
    _FLOAT_FMT = "{:" + SEPARATOR + "." + str(PRECISION) + "f}"
    _INT_FMT = "{:" + SEPARATOR + "d}"

    name = const("number")
    label = _("Number")
    plural = _("Numbers")
    matchable = False

    def node_id(self, value: str) -> None:
        return None

    def parse(
        self, value: str, decimal: str = DECIMAL, separator: str = SEPARATOR
    ) -> Tuple[Optional[str], Optional[str]]:
        """Parse a number into a numeric value and a unit. The numeric value is
        aligned with the decimal and separator settings. The unit is stripped of
        whitespace and returned as a string. If no unit is found, None is
        returned. If no number is found, None is returned for both values.

        Args:
            value (str): The string to parse.
            decimal (str): The character used as the decimal separator.
            separator (str): The character used to separate thousands, lakhs, or crores.

        Returns:
            A tuple of (number, unit), where number is a string and unit is a string or None.
        """
        value = value.replace(separator, "")
        if decimal != self.DECIMAL:
            value = value.replace(decimal, self.DECIMAL)
        match = self.NUM_UNIT_RE.match(value)
        if not match:
            return None, None
        number, unit = match.groups()
        if unit is not None:
            unit = unit.strip()
            if len(unit) == 0:
                unit = None
        # TODO: We could have a lookup table for common units, e.g. kg, m, etc. to
        # convert them to a standard form.
        number = number.replace(" ", "")
        if number == "":
            number = None
        return number, unit

    def to_number(self, value: str) -> Optional[float]:
        """Convert a number string to a float. The string is parsed and the unit is
        discarded if present.

        Args:
            value (str): The string to convert.

        Returns:
            Optional[float]: The parsed float value, or None if parsing fails.
        """
        try:
            number, _ = self.parse(value)
            if number is None:
                return None
            return float(number)
        except Exception:
            return None

    def caption(self, value: str, format: Optional[str] = None) -> str:
        """Return a caption for the number. This is used for display purposes.

        Args:
            value (str): The string to format.
            format (Optional[str]): An optional format string to use for formatting the number.

        Returns:
            str: The formatted number string, possibly with a unit.
        """
        number, unit = self.parse(value)
        if number is None:
            return value
        try:
            fnumber = float(number)
        except ValueError:
            return value
        if format is not None:
            number = format.format(fnumber)
        elif fnumber.is_integer():
            number = self._INT_FMT.format(int(fnumber))
        else:
            number = self._FLOAT_FMT.format(fnumber)
        if unit is not None:
            return f"{number} {unit}"
        return number

caption(value, format=None)

Return a caption for the number. This is used for display purposes.

Parameters:

Name Type Description Default
value str

The string to format.

required
format Optional[str]

An optional format string to use for formatting the number.

None

Returns:

Name Type Description
str str

The formatted number string, possibly with a unit.

Source code in followthemoney/types/number.py
def caption(self, value: str, format: Optional[str] = None) -> str:
    """Return a caption for the number. This is used for display purposes.

    Args:
        value (str): The string to format.
        format (Optional[str]): An optional format string to use for formatting the number.

    Returns:
        str: The formatted number string, possibly with a unit.
    """
    number, unit = self.parse(value)
    if number is None:
        return value
    try:
        fnumber = float(number)
    except ValueError:
        return value
    if format is not None:
        number = format.format(fnumber)
    elif fnumber.is_integer():
        number = self._INT_FMT.format(int(fnumber))
    else:
        number = self._FLOAT_FMT.format(fnumber)
    if unit is not None:
        return f"{number} {unit}"
    return number

parse(value, decimal=DECIMAL, separator=SEPARATOR)

Parse a number into a numeric value and a unit. The numeric value is aligned with the decimal and separator settings. The unit is stripped of whitespace and returned as a string. If no unit is found, None is returned. If no number is found, None is returned for both values.

Parameters:

Name Type Description Default
value str

The string to parse.

required
decimal str

The character used as the decimal separator.

DECIMAL
separator str

The character used to separate thousands, lakhs, or crores.

SEPARATOR

Returns:

Type Description
Tuple[Optional[str], Optional[str]]

A tuple of (number, unit), where number is a string and unit is a string or None.

Source code in followthemoney/types/number.py
def parse(
    self, value: str, decimal: str = DECIMAL, separator: str = SEPARATOR
) -> Tuple[Optional[str], Optional[str]]:
    """Parse a number into a numeric value and a unit. The numeric value is
    aligned with the decimal and separator settings. The unit is stripped of
    whitespace and returned as a string. If no unit is found, None is
    returned. If no number is found, None is returned for both values.

    Args:
        value (str): The string to parse.
        decimal (str): The character used as the decimal separator.
        separator (str): The character used to separate thousands, lakhs, or crores.

    Returns:
        A tuple of (number, unit), where number is a string and unit is a string or None.
    """
    value = value.replace(separator, "")
    if decimal != self.DECIMAL:
        value = value.replace(decimal, self.DECIMAL)
    match = self.NUM_UNIT_RE.match(value)
    if not match:
        return None, None
    number, unit = match.groups()
    if unit is not None:
        unit = unit.strip()
        if len(unit) == 0:
            unit = None
    # TODO: We could have a lookup table for common units, e.g. kg, m, etc. to
    # convert them to a standard form.
    number = number.replace(" ", "")
    if number == "":
        number = None
    return number, unit

to_number(value)

Convert a number string to a float. The string is parsed and the unit is discarded if present.

Parameters:

Name Type Description Default
value str

The string to convert.

required

Returns:

Type Description
Optional[float]

Optional[float]: The parsed float value, or None if parsing fails.

Source code in followthemoney/types/number.py
def to_number(self, value: str) -> Optional[float]:
    """Convert a number string to a float. The string is parsed and the unit is
    discarded if present.

    Args:
        value (str): The string to convert.

    Returns:
        Optional[float]: The parsed float value, or None if parsing fails.
    """
    try:
        number, _ = self.parse(value)
        if number is None:
            return None
        return float(number)
    except Exception:
        return None