Source code for ctapipe.core.qualityquery

"""
Data Quality selection
"""

__all__ = ["QualityQuery", "QualityCriteriaError"]

import numpy as np  # for use in selection functions

from .component import Component
from .expression_engine import ExpressionEngine
from .traits import List, Tuple, Unicode


[docs]class QualityCriteriaError(TypeError):
    """Signal a problem with a user-defined selection criteria function"""


[docs]class QualityQuery(Component):
    """
    Manages a set of user-configurable (at runtime or in a config file) selection
    criteria that operate on the same type of input. Each time it is called, it
    returns a boolean array of whether or not each criterion passed. It  also keeps
    track of the total number of times each criterium is passed, as well as a
    cumulative product of criterium (i.e. the criteria applied in-order)
    """

    quality_criteria = List(
        Tuple(Unicode(), Unicode()),
        help=(
            "list of tuples of ('<description', 'expression string') to accept "
            "(select) a given data value.  E.g. ``[('mycut', 'x > 3'),]``. "
            "You may use ``numpy`` as ``np`` and ``astropy.units`` as ``u``,"
            " but no other modules."
        ),
    ).tag(config=True)

    def __init__(self, config=None, parent=None, **kwargs):
        super().__init__(config=config, parent=parent, **kwargs)

        # add a selection to count all entries and make it the first one
        self.criteria_names = [n for (n, _) in self.quality_criteria]
        self.expressions = [e for (_, e) in self.quality_criteria]

        self.engine = ExpressionEngine(self.quality_criteria)
        for _, expr in self.quality_criteria:
            if "lambda" in expr:
                raise ValueError(
                    "As of ctapipe 0.16, do not give lambda expressions"
                    " to QualityQuery. Directly give the expression."
                    " E.g. instead of `lambda p: p.hillas.width.value > 0`"
                    " use `parameters.hillas.width.value > 0`"
                )

        # arrays for recording overall statistics, add one for total count
        n = len(self.quality_criteria) + 1
        self._counts = np.zeros(n, dtype=np.int64)
        self._cumulative_counts = np.zeros(n, dtype=np.int64)

[docs]    def to_table(self, functions=False):
        """
        Return a tabular view of the latest quality summary

        The columns are
        - *criteria*: name of each criterion
        - *counts*: counts of each criterion independently
        - *cum_counts*: counts of cumulative application of each criterion in order

        Parameters
        ----------
        functions: bool:
            include the function string as a column

        Returns
        -------
        astropy.table.Table
        """
        from astropy.table import Table

        cols = {
            "criteria": ["TOTAL"] + self.criteria_names,
            "counts": self._counts,
            "cumulative_counts": self._cumulative_counts,
        }
        if functions:
            cols["func"] = ["True"] + self.expressions
        return Table(cols)

    def _repr_html_(self):
        """display nicely in Jupyter notebooks"""
        return self.to_table()._repr_html_()

    def __str__(self):
        """Print a formatted string representation of the entire table."""
        return str(self.to_table())

[docs]    def __call__(self, **kwargs) -> np.ndarray:
        """
        Test that value passes all cuts

        Parameters
        ----------
        **kwargs:
            Are passed as locals to evaluate the given expression

        Returns
        -------
        np.ndarray:
            array of booleans with results of each selection criterion in order
        """
        # add 1 for total
        result = np.ones(len(self.quality_criteria) + 1, dtype=bool)

        for i, res in enumerate(self.engine(kwargs), start=1):
            result[i] = res

        self._counts += result.astype(int)
        self._cumulative_counts += result.cumprod()
        return result[1:]  # strip off TOTAL criterion, since redundant

[docs]    def get_table_mask(self, table):
        """
        Get a boolean mask for the entries that pass the quality checks.

        Parameters
        ----------
        table : `~astropy.table.Table`
            Table with columns matching the expressions used in the
            `QualityQuery.quality_criteria`.

        Returns
        -------
        mask : np.ndarray[bool]
            Boolean mask of valid entries.
        """
        n_criteria = len(self.quality_criteria) + 1
        result = np.ones((n_criteria, len(table)), dtype=bool)

        for i, res in enumerate(self.engine(table), start=1):
            result[i] = res

        self._counts += np.count_nonzero(result, axis=1)
        self._cumulative_counts += np.count_nonzero(np.cumprod(result, axis=0), axis=1)
        return np.all(result, axis=0)