Histogram aggregation with HistogramAggregator#

This tutorial shows how to:

  1. Build an event table with camera-like data (images and peak times) and some invalid values.

  2. Configure and run HistogramAggregator in chunks.

  3. Access histogram counts, bin edges, summary statistics, and valid-event counts (n_events).

  4. Plot one pixel histogram from the selected chunks and both gain channels for both image and peak_time columns.

  5. Plot the same histogram using Hist’s built-in plotting functionality after filling a Hist object with the aggregated histogram counts and variances.

  6. Plot the integral over all pixels for both channels using Hist’s built-in plotting functionality.

  7. Compare no-flow, underflow-only, overflow-only, and both-flow histograms to see how the outer bins behave.

  • Chunk 0, pixel 10, Chunk 1, pixel 10
  • Peak Time - Chunk 0, pixel 10, Peak Time - Chunk 1, pixel 10
  • Chunk 0, Pixel 10, Chunk 0, Pixel 11, Chunk 0, Pixel 12
  • Chunk 0, High Gain, Low Gain, Integral over all pixels
  • HistogramAggregator output: chunk 0, pixel 10, Under/overflow behavior
Number of chunks: 2
histogram shape per chunk: (50, 2, 100)
bin edges shape per chunk: (51,)
bin centers shape per chunk: (50,)
n_events shape per chunk: (2, 100)

 16 import matplotlib.pyplot as plt
 17 import numpy as np
 18 from astropy.table import Table
 19 from astropy.time import Time
 20 from traitlets.config import Config
 21
 22 from ctapipe.containers import ChunkHistogramContainer
 23 from ctapipe.monitoring.aggregator import HistogramAggregator
 24
 25
 26 # -------------------------------------------------------------------
 27 # Create synthetic event-wise camera data
 28 # -------------------------------------------------------------------
 29 rng = np.random.default_rng(42)
 30
 31 n_events = 2000
 32 n_channels = 2
 33 n_pixels = 100
 34
 35 times = Time(
 36     np.linspace(60117.911, 60117.9258, num=n_events),
 37     scale="tai",
 38     format="mjd",
 39 )
 40 event_ids = np.arange(n_events)
 41 images = rng.normal(loc=85.0, scale=10.0, size=(n_events, n_channels, n_pixels))
 42 images[:, 1, :] -= 15  # Simulate lower gain channel by shifting the mean down by 15
 43 peak_time = rng.normal(loc=20.0, scale=2.0, size=(n_events, n_channels, n_pixels))
 44
 45 # Add a few invalid values to demonstrate n_events behavior.
 46 images[3, 0, 10] = np.nan
 47 images[15, 0, 10] = np.nan
 48 peak_time[5, 0, 10] = np.nan
 49 peak_time[35, 1, 10] = np.nan
 50
 51 # Optional static mask over sample dimensions (channel, pixel).
 52 # Here we exclude channel 1, pixel 99 for all events.
 53 masked_elements_of_sample = np.zeros((n_channels, n_pixels), dtype=bool)
 54 masked_elements_of_sample[1, 99] = True
 55
 56 table = Table(
 57     [times, event_ids, images, peak_time],
 58     names=("time", "event_id", "image", "peak_time"),
 59 )
 60
 61
 62 # -------------------------------------------------------------------
 63 # Configure and run histogram aggregation
 64 # -------------------------------------------------------------------
 65 config_image = Config(
 66     {
 67         "HistogramAggregator": {
 68             "chunking_type": "SizeChunking",
 69             "axis_definition": {
 70                 "class_name": "Regular",
 71                 "bins": 50,
 72                 "start": 40.0,
 73                 "stop": 110.0,
 74                 "name": "image",
 75             },
 76             "axis_names": ["channel", "pixel"],
 77         },
 78         "SizeChunking": {"chunk_size": 1000},
 79     }
 80 )
 81
 82 aggregator_image = HistogramAggregator(config=config_image)
 83 result = aggregator_image(
 84     table=table,
 85     col_name="image",
 86     masked_elements_of_sample=masked_elements_of_sample,
 87 )
 88
 89 config_peak_time = Config(
 90     {
 91         "HistogramAggregator": {
 92             "chunking_type": "SizeChunking",
 93             "axis_definition": {
 94                 "class_name": "Regular",
 95                 "bins": 50,
 96                 "start": 2.0,
 97                 "stop": 38.0,
 98                 "name": "peak_time",
 99             },
100             "axis_names": ["channel", "pixel"],
101         },
102         "SizeChunking": {"chunk_size": 1000},
103     }
104 )
105
106 aggregator_peak_time = HistogramAggregator(config=config_peak_time)
107 result_peak_time = aggregator_peak_time(
108     table=table,
109     col_name="peak_time",
110     masked_elements_of_sample=masked_elements_of_sample,
111 )
112
113 print(f"Number of chunks: {len(result)}")
114 print(f"histogram shape per chunk: {result[0]['histogram'].shape}")
115 print(f"bin edges shape per chunk: {result[0].meta['bin_edges'].shape}")
116 print(f"bin centers shape per chunk: {result[0].meta['bin_centers'].shape}")
117 print(f"n_events shape per chunk: {result[0]['n_events'].shape}")
118
119
120 # -------------------------------------------------------------------
121 # Plot the histograms for one pixel with two gain channels
122 # -------------------------------------------------------------------
123 # We aggreagted the histograms in two chunks of 1000 events each, so we have two histograms per gain channel
124 # for the selected pixel. We will plot both chunks for the selected pixel and gain channels
125 # on the same axes for comparison, and then do the same for the peak_time column in a separate figure.
126
127 pixel_index = 10
128 gain_label = {0: "High Gain", 1: "Low Gain"}
129
130 fig, axes = plt.subplots(1, 2, figsize=(12, 4), sharey=True)
131 for chunk_index, ax in enumerate(axes):
132     bin_edges = result[chunk_index].meta["bin_edges"]
133     bin_centers = result[chunk_index].meta["bin_centers"]
134     channel_handles = []
135
136     for channel_index in range(n_channels):
137         counts = result[chunk_index]["histogram"][:, channel_index, pixel_index]
138         valid_events = result[chunk_index]["n_events"][channel_index, pixel_index]
139
140         line = ax.stairs(
141             counts,
142             bin_edges,
143             label=f"{gain_label[channel_index]} (n_events={valid_events})",
144         )
145         channel_handles.append(line)
146         color = line.get_edgecolor()
147
148         # Plot bin variances as error bars (use sqrt of variance for error) at bin centers
149         bin_errors = np.sqrt(counts)
150         ax.errorbar(
151             bin_centers,
152             counts,
153             yerr=bin_errors,
154             fmt="none",
155             color=color,
156             elinewidth=1.0,
157             capsize=3,
158             alpha=0.6,
159         )
160
161     ax.set_title(f"Chunk {chunk_index}, pixel {pixel_index}")
162     ax.set_xlabel("image value")
163     ax.set_ylabel("Counts")
164
165     ax.legend(
166         handles=channel_handles,
167         loc="upper left",
168         fontsize=8,
169     )
170
171 plt.show()
172
173
174 # -------------------------------------------------------------------
175 # Plot peak_time histograms in a separate figure
176 # -------------------------------------------------------------------
177 fig, axes = plt.subplots(1, 2, figsize=(12, 4), sharey=True)
178 for chunk_index, ax in enumerate(axes):
179     bin_edges = result_peak_time[chunk_index].meta["bin_edges"]
180     bin_centers = result_peak_time[chunk_index].meta["bin_centers"]
181
182     channel_handles = []
183
184     for channel_index in range(n_channels):
185         counts = result_peak_time[chunk_index]["histogram"][
186             :, channel_index, pixel_index
187         ]
188         valid_events = result_peak_time[chunk_index]["n_events"][
189             channel_index, pixel_index
190         ]
191
192         line = ax.stairs(
193             counts,
194             bin_edges,
195             label=f"{gain_label[channel_index]} (n_events={valid_events})",
196         )
197         channel_handles.append(line)
198         color = line.get_edgecolor()
199
200         # Plot bin variances as error bars (use sqrt of variance for error) at bin centers
201         bin_errors = np.sqrt(counts)
202         ax.errorbar(
203             bin_centers,
204             counts,
205             yerr=bin_errors,
206             fmt="none",
207             color=color,
208             elinewidth=1.0,
209             capsize=3,
210             alpha=0.6,
211         )
212
213     ax.set_title(f"Peak Time - Chunk {chunk_index}, pixel {pixel_index}")
214     ax.set_xlabel("peak_time value")
215     ax.set_ylabel("Counts")
216     ax.legend(
217         handles=channel_handles,
218         loc="upper left",
219         fontsize=8,
220     )
221
222 plt.show()
223
224
225 # -------------------------------------------------------------------
226 # Build a Hist object from serialized axis metadata and plot it
227 # -------------------------------------------------------------------
228
229 # Reconstruct the histogram axis from metadata stored by HistogramAggregator.
230 # In this tutorial, axis_definition uses hist.axis.Regular.
231 chunk_index = 0
232 chunk_histograms_container = ChunkHistogramContainer(
233     **dict(zip(result[chunk_index].colnames, result[chunk_index]))
234 )
235 chunk_histograms_container.meta = result.meta
236
237 # Plot three nearby pixels using Hist's built-in plotting functionality.
238 # Requires 'hist[plot]' to be installed in the environment. Reconstruct
239 # the full histogram as a Hist object for the chunk using hist_from_container method.
240 full_hist = HistogramAggregator.hist_from_container(chunk_histograms_container)
241 pixels_to_plot = [pixel_index, pixel_index + 1, pixel_index + 2]
242 fig, axes = plt.subplots(1, len(pixels_to_plot), figsize=(15, 4), sharey=True)
243
244 for ax, pixel_to_plot in zip(axes, pixels_to_plot):
245     for channel_index in range(n_channels):
246         h = full_hist[{"channel": channel_index, "pixel": pixel_to_plot}]
247         h.name = gain_label[channel_index]
248
249         plt.sca(ax)
250         h.plot(histtype="step", yerr=True, label=h.name)
251
252     ax.set_title(f"Chunk {chunk_index}, Pixel {pixel_to_plot}")
253     ax.set_xlabel("image value")
254     ax.legend(fontsize=8, loc="upper left")
255
256 axes[0].set_ylabel("Counts")
257 plt.tight_layout()
258 plt.show()
259
260 # ------------------------------------------------------------------------------------------------
261 # Plot the integral over all pixels for both channels using Hist's built-in plotting functionality
262 # ------------------------------------------------------------------------------------------------
263 h = full_hist
264
265 fig, ax = plt.subplots(
266     1,
267     3,
268     figsize=(20, 4),
269     gridspec_kw={"width_ratios": [1.15, 1.15, 1.4]},
270 )
271 fig.suptitle(f"Chunk {chunk_index}")
272 h[:, 0, :].plot2d(ax=ax[0], norm="log")
273 h[:, 1, :].plot2d(ax=ax[1], norm="log")
274 channel_stack = h.integrate("pixel").stack("channel")
275 channel_stack[0].name = "High Gain"
276 channel_stack[1].name = "Low Gain"
277 channel_stack.plot(ax=ax[2], legend=True)
278
279 ax[0].set_title(channel_stack[0].name)
280 ax[1].set_title(channel_stack[1].name)
281 ax[2].set_title("Integral over all pixels")
282 for heatmap_ax in ax[:2]:
283     heatmap_ax.set_xlabel("image value")
284     heatmap_ax.set_ylabel("Pixel")
285     heatmap_ax.set_yticks([25, 50, 75, 100], labels=["25", "50", "75", "100"])
286
287 ax[2].set_xlabel("image value")
288 fig.subplots_adjust(wspace=0.5, top=0.88)
289 plt.show()
290
291 # ----------------------------------------------------------------------
292 # Demonstrate underflow/overflow via HistogramAggregator axis_definition
293 # ----------------------------------------------------------------------
294
295 FLOW_CONFIGS = {
296     "No flow bins": {"underflow": False, "overflow": False},
297     "Underflow only": {"underflow": True, "overflow": False},
298     "Overflow only": {"underflow": False, "overflow": True},
299     "With flow bins": {"underflow": True, "overflow": True},
300 }
301
302 BASE_AXIS = {
303     "class_name": "Regular",
304     "bins": 20,
305     "start": 75.0,
306     "stop": 95.0,
307 }
308
309 CHUNKING = {
310     "chunking_type": "SizeChunking",
311     "SizeChunking": {"chunk_size": 1000},
312 }
313 # Run all configurations and extract histograms/counts
314 results = {}
315 histograms = {}
316 flow_counts = {}
317
318 for label, flow_options in FLOW_CONFIGS.items():
319     config = Config(
320         {
321             "HistogramAggregator": {
322                 "chunking_type": "SizeChunking",
323                 "axis_definition": {
324                     **BASE_AXIS,
325                     **flow_options,
326                 },
327                 "axis_names": ["channel", "pixel"],
328             },
329             "SizeChunking": {"chunk_size": 1000},
330         }
331     )
332
333     # Aggregate the histogram which will return an astropy table
334     result = HistogramAggregator(config=config)(
335         table=table,
336         col_name="image",
337         masked_elements_of_sample=masked_elements_of_sample,
338     )
339     # Create a Hist object from the aggregated histogram and
340     # metadata for the selected chunk using the hist_from_tablerow method.
341     histogram_cont = HistogramAggregator.hist_from_tablerow(result[chunk_index])
342     histogram = histogram_cont[{"channel": 0, "pixel": pixel_index}]
343
344     flow_view = histogram.view(flow=True)
345     axis_kwargs = result.meta["axis_kwargs"]
346
347     results[label] = result
348     histograms[label] = histogram
349     flow_counts[label] = {
350         "underflow": (int(flow_view[0]) if axis_kwargs.get("underflow") else 0),
351         "in_range": int(histogram.values().sum()),
352         "overflow": (int(flow_view[-1]) if axis_kwargs.get("overflow") else 0),
353     }
354
355 valid_events = results["With flow bins"][chunk_index]["n_events"][0, pixel_index]
356
357 fig, axes = plt.subplots(1, 2, figsize=(12, 4))
358
359 styles = {
360     "No flow bins": "-",
361     "Underflow only": "--",
362     "Overflow only": "-.",
363     "With flow bins": ":",
364 }
365
366 # Left: histogram comparison
367 for label, histogram in histograms.items():
368     axes[0].stairs(
369         histogram.values(),
370         histogram.axes[0].edges,
371         linestyle=styles[label],
372         label=label,
373     )
374
375 axes[0].set_title(
376     f"HistogramAggregator output: chunk {chunk_index}, pixel {pixel_index}"
377 )
378 axes[0].set_xlabel("image value")
379 axes[0].set_ylabel("Counts")
380
381 axis_margin = 0.05 * (BASE_AXIS["stop"] - BASE_AXIS["start"])
382 axes[0].set_xlim(
383     BASE_AXIS["start"] - axis_margin,
384     BASE_AXIS["stop"] + axis_margin,
385 )
386
387 axes[0].legend(fontsize=8, loc="upper left")
388
389 # Right: flow-bin behavior
390 x = np.arange(3)
391 labels = ["underflow", "in-range", "overflow"]
392
393 bar_offsets = {
394     "No flow bins": -0.18,
395     "Underflow only": 0.00,
396     "Overflow only": 0.18,
397     "With flow bins": 0.36,
398 }
399
400 bar_width = 0.18
401
402 for label, offset in bar_offsets.items():
403     counts = flow_counts[label]
404
405     axes[1].bar(
406         x + offset,
407         [
408             counts["underflow"],
409             counts["in_range"],
410             counts["overflow"],
411         ],
412         width=bar_width,
413         label=label,
414     )
415
416 axes[1].set_xticks(x + 0.09, labels)
417 axes[1].set_ylabel("Event count")
418 axes[1].set_title("Under/overflow behavior")
419 axes[1].legend(fontsize=8)
420
421 plt.tight_layout()
422 plt.show()

Total running time of the script: (0 minutes 2.038 seconds)

Gallery generated by Sphinx-Gallery