Source code for ctapipe.core.qualityquery

"""
Data Quality selection
"""

__all__ = ["QualityQuery", "QualityCriteriaError"]

import numpy as np  # for use in selection functions

from .component import Component
from .expression_engine import ExpressionEngine
from .traits import List, Tuple, Unicode


[docs]class QualityCriteriaError(TypeError): """Signal a problem with a user-defined selection criteria function"""
[docs]class QualityQuery(Component): """ Manages a set of user-configurable (at runtime or in a config file) selection criteria that operate on the same type of input. Each time it is called, it returns a boolean array of whether or not each criterion passed. It also keeps track of the total number of times each criterium is passed, as well as a cumulative product of criterium (i.e. the criteria applied in-order) """ quality_criteria = List( Tuple(Unicode(), Unicode()), help=( "list of tuples of ('<description', 'expression string') to accept " "(select) a given data value. E.g. ``[('mycut', 'x > 3'),]``. " "You may use ``numpy`` as ``np`` and ``astropy.units`` as ``u``," " but no other modules." ), ).tag(config=True) def __init__(self, config=None, parent=None, **kwargs): super().__init__(config=config, parent=parent, **kwargs) # add a selection to count all entries and make it the first one self.criteria_names = [n for (n, _) in self.quality_criteria] self.expressions = [e for (_, e) in self.quality_criteria] self.engine = ExpressionEngine(self.quality_criteria) for _, expr in self.quality_criteria: if "lambda" in expr: raise ValueError( "As of ctapipe 0.16, do not give lambda expressions" " to QualityQuery. Directly give the expression." " E.g. instead of `lambda p: p.hillas.width.value > 0`" " use `parameters.hillas.width.value > 0`" ) # arrays for recording overall statistics, add one for total count n = len(self.quality_criteria) + 1 self._counts = np.zeros(n, dtype=np.int64) self._cumulative_counts = np.zeros(n, dtype=np.int64)
[docs] def to_table(self, functions=False): """ Return a tabular view of the latest quality summary The columns are - *criteria*: name of each criterion - *counts*: counts of each criterion independently - *cum_counts*: counts of cumulative application of each criterion in order Parameters ---------- functions: bool: include the function string as a column Returns ------- astropy.table.Table """ from astropy.table import Table cols = { "criteria": ["TOTAL"] + self.criteria_names, "counts": self._counts, "cumulative_counts": self._cumulative_counts, } if functions: cols["func"] = ["True"] + self.expressions return Table(cols)
def _repr_html_(self): """display nicely in Jupyter notebooks""" return self.to_table()._repr_html_() def __str__(self): """Print a formatted string representation of the entire table.""" return str(self.to_table())
[docs] def __call__(self, **kwargs) -> np.ndarray: """ Test that value passes all cuts Parameters ---------- **kwargs: Are passed as locals to evaluate the given expression Returns ------- np.ndarray: array of booleans with results of each selection criterion in order """ # add 1 for total result = np.ones(len(self.quality_criteria) + 1, dtype=bool) for i, res in enumerate(self.engine(kwargs), start=1): result[i] = res self._counts += result.astype(int) self._cumulative_counts += result.cumprod() return result[1:] # strip off TOTAL criterion, since redundant
[docs] def get_table_mask(self, table): """ Get a boolean mask for the entries that pass the quality checks. Parameters ---------- table : `~astropy.table.Table` Table with columns matching the expressions used in the `QualityQuery.quality_criteria`. Returns ------- mask : np.ndarray[bool] Boolean mask of valid entries. """ n_criteria = len(self.quality_criteria) + 1 result = np.ones((n_criteria, len(table)), dtype=bool) for i, res in enumerate(self.engine(table), start=1): result[i] = res self._counts += np.count_nonzero(result, axis=1) self._cumulative_counts += np.count_nonzero(np.cumprod(result, axis=0), axis=1) return np.all(result, axis=0)