"""
Handles reading of different event/waveform containing files
"""
import warnings
from abc import abstractmethod
from typing import Dict, Generator, List, Tuple
from traitlets.config.loader import LazyConfigValue
from ctapipe.atmosphere import AtmosphereDensityProfile
from ..containers import (
ArrayEventContainer,
ObservationBlockContainer,
SchedulingBlockContainer,
SimulationConfigContainer,
)
from ..core import Provenance, ToolConfigurationError
from ..core.component import Component, find_config_in_hierarchy
from ..core.traits import CInt, Int, Path, Set, TraitError, Undefined
from ..instrument import SubarrayDescription
from .datalevels import DataLevel
__all__ = ["EventSource"]
[docs]class EventSource(Component):
"""
Parent class for EventSources.
EventSources read input files and generate `~ctapipe.containers.ArrayEventContainer`
instances when iterated over.
A new EventSource should be created for each type of event file read
into ctapipe, e.g. sim_telarray files are read by the `~ctapipe.io.SimTelEventSource`.
EventSource provides a common high-level interface for accessing event
information from different data sources (simulation or different camera
file formats). Creating an EventSource for a new
file format or other event source ensures that data can be accessed in a common way,
irregardless of the file format or data origin.
EventSource itself is an abstract class, but will create an
appropriate subclass if a compatible source is found for the given
``input_url``.
>>> EventSource(input_url="dataset://gamma_prod5.simtel.zst")
<ctapipe.io.simteleventsource.SimTelEventSource ...>
An ``EventSource`` can also be created through the configuration system,
by passing ``config`` or ``parent`` as appropriate.
E.g. if using ``EventSource`` inside of a ``Tool``, you would do:
>>> self.source = EventSource(parent=self) # doctest: +SKIP
To loop through the events in a file:
>>> source = EventSource(input_url="dataset://gamma_prod5.simtel.zst", max_events=2)
>>> for event in source:
... print(event.count)
0
1
**NOTE**: Every time a new loop is started through the source,
it tries to restart from the first event, which might not be supported
by the event source.
It is encouraged to use ``EventSource`` in a context manager to ensure
the correct cleanups are performed when you are finished with the source:
>>> with EventSource(input_url="dataset://gamma_prod5.simtel.zst", max_events=2) as source:
... for event in source:
... print(event.count)
0
1
**NOTE**: EventSource implementations should not reuse the same ArrayEventContainer,
as these are mutable and may lead to errors when analyzing multiple events.
Attributes
----------
input_url : str
Path to the input event file.
max_events : int
Maximum number of events to loop through in generator
allowed_tels: Set or None
Ids of the telescopes to be included in the data.
If given, only this subset of telescopes will be present in the
generated events. If None, all available telescopes are used.
"""
#: ctapipe_io entry points may provide EventSource implementations
plugin_entry_point = "ctapipe_io"
input_url = Path(help="Path to the input file containing events.").tag(config=True)
max_events = Int(
None,
allow_none=True,
help="Maximum number of events that will be read from the file",
).tag(config=True)
allowed_tels = Set(
trait=CInt(),
default_value=None,
allow_none=True,
help=(
"list of allowed tel_ids, others will be ignored. "
"If None, all telescopes in the input stream "
"will be included"
),
).tag(config=True)
def __new__(cls, input_url=Undefined, config=None, parent=None, **kwargs):
"""
Returns a compatible subclass for given input url, either
directly or via config / parent
"""
# needed to break recursion, as __new__ of subclass will also
# call this method
if cls is not EventSource:
return super().__new__(cls)
# check we have at least one of these to be able to determine the subclass
if input_url in {None, Undefined} and config is None and parent is None:
raise ValueError("One of `input_url`, `config`, `parent` is required")
if input_url in {None, Undefined}:
input_url = cls._find_input_url_in_config(config=config, parent=parent)
subcls = cls._find_compatible_source(input_url)
return super().__new__(subcls)
def __init__(self, input_url=None, config=None, parent=None, **kwargs):
"""
Class to handle generic input files. Enables obtaining the "source"
generator, regardless of the type of file (either hessio or camera
file).
Parameters
----------
config : traitlets.loader.Config
Configuration specified by config file or cmdline arguments.
Used to set traitlet values.
Set to None if no configuration to pass.
tool : ctapipe.core.Tool
Tool executable that is calling this component.
Passes the correct logger to the component.
Set to None if no Tool to pass.
kwargs
"""
# traitlets differentiates between not getting the kwarg
# and getting the kwarg with a None value.
# the latter overrides the value in the config with None, the former
# enables getting it from the config.
if input_url not in {None, Undefined}:
kwargs["input_url"] = input_url
super().__init__(config=config, parent=parent, **kwargs)
self.metadata = dict(is_simulation=False)
self.log.info(f"INPUT PATH = {self.input_url}")
if self.max_events:
self.log.info(f"Max events being read = {self.max_events}")
Provenance().add_input_file(str(self.input_url), role="DL0/Event")
[docs] @staticmethod
@abstractmethod
def is_compatible(file_path):
"""
Abstract method to be defined in child class.
Perform a set of checks to see if the input file is compatible
with this file event_source.
Parameters
----------
file_path : str
File path to the event file.
Returns
-------
compatible : bool
True if file is compatible, False if it is incompatible
"""
@property
def is_stream(self):
"""
Bool indicating if input is a stream. If it is then it is incompatible
with `ctapipe.io.eventseeker.EventSeeker`.
TODO: Define a method to detect if it is a stream
Returns
-------
bool
If True, then input is a stream.
"""
return False
@property
@abstractmethod
def subarray(self) -> SubarrayDescription:
"""
Obtain the subarray from the EventSource
Returns
-------
ctapipe.instrument.SubarrayDecription
"""
@property
def simulation_config(self) -> Dict[int, SimulationConfigContainer]:
"""The simulation configurations of all observations provided by the
EventSource, or None if the source does not provide simulated data
Returns
-------
Dict[int,ctapipe.containers.SimulationConfigContainer] | None
"""
return None
@property
@abstractmethod
def observation_blocks(self) -> Dict[int, ObservationBlockContainer]:
"""
Obtain the ObservationConfigurations from the EventSource, indexed by obs_id
"""
pass
@property
@abstractmethod
def scheduling_blocks(self) -> Dict[int, SchedulingBlockContainer]:
"""
Obtain the ObservationConfigurations from the EventSource, indexed by obs_id
"""
pass
@property
@abstractmethod
def is_simulation(self) -> bool:
"""
Whether the currently opened file is simulated
Returns
-------
bool
"""
@property
@abstractmethod
def datalevels(self) -> Tuple[DataLevel]:
"""
The datalevels provided by this event source
Returns
-------
tuple[ctapipe.io.DataLevel]
"""
[docs] def has_any_datalevel(self, datalevels) -> bool:
"""
Check if any of `datalevels` is in self.datalevels
Parameters
----------
datalevels: Iterable
Iterable of datalevels
"""
return any(dl in self.datalevels for dl in datalevels)
@property
def obs_ids(self) -> List[int]:
"""
The observation ids of the runs located in the file
Unmerged files should only contain a single obs id.
Returns
-------
list[int]
"""
return list(self.observation_blocks.keys())
@property
def atmosphere_density_profile(self) -> AtmosphereDensityProfile:
"""atmosphere density profile that can be integrated to
convert between h_max and X_max. This should correspond
either to what was used in a simualtion, or a measurment
for use with observed data.
Returns
-------
AtmosphereDensityProfile:
profile to be used
"""
return None
@abstractmethod
def _generator(self) -> Generator[ArrayEventContainer, None, None]:
"""
Abstract method to be defined in child class.
Generator where the filling of the `ctapipe.containers` occurs.
Returns
-------
generator
"""
def __iter__(self):
"""
Generator that iterates through `_generator`, but keeps track of
`self.max_events`.
Returns
-------
generator
"""
for event in self._generator():
yield event
if self.max_events and event.count >= self.max_events - 1:
break
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
pass
@classmethod
def _find_compatible_source(cls, input_url):
if input_url == "" or input_url in {None, Undefined}:
raise ToolConfigurationError("EventSource: No input_url was specified")
# validate input url with the traitel validate method
# to make sure it's compatible and to raise the correct error
input_url = EventSource.input_url.validate(obj=None, value=input_url)
available_classes = cls.non_abstract_subclasses()
for name, subcls in available_classes.items():
try:
if subcls.is_compatible(input_url):
return subcls
except Exception as e:
warnings.warn(f"{name}.is_compatible raised exception: {e}")
# provide a more helpful error for non-existing input_url
if not input_url.exists():
raise TraitError(
f"input_url {input_url} is not an existing file "
" and no EventSource implementation claimed compatibility"
)
raise ValueError(
"Cannot find compatible EventSource for \n"
"\turl:{}\n"
"in available EventSources:\n"
"\t{}".format(input_url, [c for c in available_classes])
)
[docs] @classmethod
def from_url(cls, input_url, **kwargs):
"""
Find compatible EventSource for input_url via the `is_compatible`
method of the EventSource
Parameters
----------
input_url : str
Filename or URL pointing to an event file
kwargs
Named arguments for the EventSource
Returns
-------
instance
Instance of a compatible EventSource subclass
"""
subcls = cls._find_compatible_source(input_url)
return subcls(input_url=input_url, **kwargs)
@classmethod
def _find_input_url_in_config(cls, config=None, parent=None):
if config is None and parent is None:
raise ValueError("One of config or parent must be provided")
if config is not None and parent is not None:
raise ValueError("Only one of config or parent must be provided")
input_url = None
# config was passed
if config is not None:
if not isinstance(config.input_url, LazyConfigValue):
input_url = config.input_url
elif not isinstance(config.EventSource.input_url, LazyConfigValue):
input_url = config.EventSource.input_url
else:
input_url = cls.input_url.default_value
# parent was passed
else:
# first look at appropriate position in the config hierarcy
input_url = find_config_in_hierarchy(parent, "EventSource", "input_url")
# if not found, check top level
if isinstance(input_url, LazyConfigValue):
if not isinstance(parent.config.EventSource.input_url, LazyConfigValue):
input_url = parent.config.EventSource.input_url
else:
input_url = cls.input_url.default_value
return input_url
[docs] @classmethod
def from_config(cls, config=None, parent=None, **kwargs):
"""
Find compatible EventSource for the EventSource.input_url traitlet
specified via the config.
This method is typically used in Tools, where the input_url is chosen via
the command line using the traitlet configuration system.
Parameters
----------
config : traitlets.config.loader.Config
Configuration created in the Tool
kwargs
Named arguments for the EventSource
Returns
-------
instance
Instance of a compatible EventSource subclass
"""
input_url = cls._find_input_url_in_config(config=config, parent=parent)
return cls.from_url(input_url, config=config, parent=parent, **kwargs)
[docs] def close(self):
"""Close this event source.
No-op by default. Should be overriden by sources needing a cleanup-step
"""
pass