From 5a80172d5b48c114366e4f31bf9a5dea80fc6c55 Mon Sep 17 00:00:00 2001
From: Reinhold Willcox <reinhold.willcox@gmail.com>
Date: Wed, 18 Mar 2026 14:14:00 +0100
Subject: [PATCH 1/4] added compasUtils file

---
 compas_python_utils/compasUtils.py | 355 +++++++++++++++++++++++++++++
 1 file changed, 355 insertions(+)
 create mode 100644 compas_python_utils/compasUtils.py

diff --git a/compas_python_utils/compasUtils.py b/compas_python_utils/compasUtils.py
new file mode 100644
index 000000000..2cc126c68
--- /dev/null
+++ b/compas_python_utils/compasUtils.py
@@ -0,0 +1,355 @@
+import h5py as h5
+import numpy as np
+import pandas as pd
+from numpy.dtypes import StringDType
+
+
+########################################################################
+# ## Function to print the data from a given COMPAS HDF5 group in a readable pandas template
+
+def printCompasDetails(data, *seeds, mask=()):
+    """
+    Function to print the full Compas output for given seeds, optionally with an additional mask
+    """
+    list_of_keys = list(data.keys())
+
+    # Check if seed parameter exists - if not, just print without (e.g RunDetails)
+    if ('SEED' in list_of_keys) | ('SEED>MT' in list_of_keys): # Most output files 
+        #SEED>MT is a relic from older versions, but we leave this in for backwards compatibility
+
+        # Set the seed name parameter, mask on seeds as needed, and set the index
+        seedVariableName='SEED' if ('SEED' in list_of_keys) else 'SEED>MT'
+        list_of_keys.remove(seedVariableName) # this is the index above, don't want to include it
+    
+        allSeeds = data[seedVariableName][()]
+        seedsMask = np.isin(allSeeds, seeds)
+        if len(seeds) == 0: # if any seed is included, do not reset the mask
+            seedsMask = np.ones_like(allSeeds).astype(bool)
+        if mask == ():
+            mask = np.ones_like(allSeeds).astype(bool)
+        mask &= seedsMask
+
+        df = pd.DataFrame.from_dict({param: data[param][()][mask] for param in list(data.keys())}).set_index(seedVariableName).T
+
+    else: # No seed parameter, so do custom print for Run Details
+
+        # Get just the keys without the -Derivation suffix - those will be a second column
+        keys_not_derivations = []
+        for key in list_of_keys:
+            if '-Derivation' not in key:
+                keys_not_derivations.append(key)
+        
+        # Some parameter values are string types, formatted as np.bytes_, need to convert back
+        def convert_strings(param_array):
+            if isinstance(param_array[0], np.bytes_):
+                return param_array.astype(str)
+            else:
+                return param_array
+
+        df_keys = pd.DataFrame.from_dict({param: convert_strings(data[param][()]) for param in keys_not_derivations }).T
+        nCols = df_keys.shape[1] # Required only because if we combine RDs, we get many columns (should fix later)
+        df_keys.columns = ['Parameter']*nCols
+        df_drvs = pd.DataFrame.from_dict({param: convert_strings(data[param+'-Derivation'][()]) for param in keys_not_derivations }).T
+        df_drvs.columns = ['Derivation']*nCols
+        df = pd.concat([df_keys, df_drvs], axis=1)
+
+    # Add units as first col
+    units_dict = {key:data[key].attrs['units'].astype(str) for key in list_of_keys}
+    df.insert(loc=0, column='(units)', value=pd.Series(units_dict))
+    return df
+
+
+
+########################################################################
+# ## Get event histories of MT data, SN data, and combined MT, SN data
+
+def getMtEvents(MT):                                     
+    """
+    This function takes in the `BSE_RLOF` output category from COMPAS, and returns the information
+    on the Mass Transfer (MT) events that happen for each seed. The events do not have to be in order, 
+    either chronologically or by seed, this function will reorder them as required.
+    
+    OUT:
+        returnedSeeds (list): ordered list of the unique seeds in the MT file
+        returnedEvents (list): list of sublists, where each sublist contains all the MT events for a given seed.
+            MT event tuples take the form :
+            (stellarTypePrimary, stellarTypeSecondary, isRlof1, isRlof2, isCEE, isMrg)
+        returnTimes (list): is a list of sublists of times of each of the MT events
+    """
+    mtSeeds = MT['SEED'][()]
+    mtTimes = MT['Time<MT'][()]
+    mtPrimaryStype = MT['Stellar_Type(1)<MT'][()]
+    mtSecondaryStype = MT['Stellar_Type(2)<MT'][()]
+    mtIsRlof1 = MT['RLOF(1)>MT'][()] == 1
+    mtIsRlof2 = MT['RLOF(2)>MT'][()] == 1
+    mtIsCEE = MT['CEE>MT'][()] == 1
+    mtIsMrg = MT['Merger'][()] == 1
+
+    # We want the return arrays sorted by seed, so sort here.
+    mtSeedsInds = np.lexsort((mtTimes, mtSeeds)) # sort by seeds then times - lexsort sorts by the last column first...
+    mtSeeds = mtSeeds[mtSeedsInds]  
+    mtTimes = mtTimes[mtSeedsInds]  
+    mtPrimaryStype = mtPrimaryStype[mtSeedsInds]
+    mtSecondaryStype = mtSecondaryStype[mtSeedsInds]
+    mtIsRlof1 = mtIsRlof1[mtSeedsInds]
+    mtIsRlof2 = mtIsRlof2[mtSeedsInds]
+    mtIsCEE = mtIsCEE[mtSeedsInds]
+    mtIsMrg = mtIsMrg[mtSeedsInds]
+
+    # Process the MT events
+    returnedSeeds = []                                      # array of seeds - will only contain seeds that have MT events
+    returnedEvents = []                                     # array of MT events for each seed in returnedSeeds
+    returnedTimes = []                                      # array of times for each event in returnedEvents (for each seed in returnedSeeds)
+
+    lastSeed = -1                                           # initialize most recently processed seed
+    for seedIndex, thisSeed in enumerate(mtSeeds):          # iterate over all RLOF file entries
+        thisTime = mtTimes[seedIndex]                       # time for this RLOF file entry
+        thisEvent = (mtPrimaryStype[seedIndex], mtSecondaryStype[seedIndex], 
+                     mtIsRlof1[seedIndex], mtIsRlof2[seedIndex], mtIsCEE[seedIndex], mtIsMrg[seedIndex])  # construct event tuple
+
+        # If this is an entirely new seed:
+        if thisSeed != lastSeed:                            # same seed as last seed processed?
+            returnedSeeds.append(thisSeed)                  # no - new seed, record it
+            returnedTimes.append([thisTime])                # initialize the list of event times for this seed
+            returnedEvents.append([thisEvent])              # initialize the list of events for this seed
+            lastSeed = thisSeed                             # update the latest seed
+
+        # Add event, if it is not a duplicate
+        try:
+            eventIndex = returnedEvents[-1].index(thisEvent)  # find eventIndex of this particular event tuple in the array of events for this seed 
+            if thisTime > returnedTimes[-1][eventIndex]:      # ^ if event is not a duplicate, this will throw a ValueError 
+                returnedTimes[-1][eventIndex] = thisTime      # if event is duplicate, update time to the later of the duplicates
+        except ValueError:                                    # event is not a duplicate:
+            returnedEvents[-1].append(thisEvent)              # record new event tuple for this seed
+            returnedTimes[-1].append(thisTime)                # record new event time for this seed
+
+    return returnedSeeds, returnedEvents, returnedTimes       # see above for description
+
+
+def getSnEvents(SN):                                     
+    """
+    This function takes in the `BSE_Supernovae` output category from COMPAS, and returns the information
+    on the Supernova (SN) events that happen for each seed. The events do not have to be in order chronologically,
+    this function will reorder them as required.
+    
+    OUT:
+        returnedSeeds (list): ordered list of all the unique seeds in the SN file
+        returnedEvents (list): list of sublists, where each sublist contains all the SN events for a given seed.
+            SN event tuples take the form :
+            (stellarTypeProgenitor, stellarTypeRemnant, whichStarIsProgenitor, isBinaryUnbound)
+        returnedTimes (list): is a list of sublists of times of each of the SN events
+    """
+    snSeeds = SN['SEED'][()]
+    snTimes = SN['Time'][()]
+    snProgStype = SN['Stellar_Type_Prev(SN)'][()]
+    snRemnStype = SN['Stellar_Type(SN)'][()]
+    snWhichProg = SN['Supernova_State'][()]
+    snIsUnbound = SN['Unbound'][()] == 1
+
+    # We want the return arrays sorted by seed, so sort here.
+    snSeedsInds = np.lexsort((snTimes, snSeeds)) # sort by seeds then times - lexsort sorts by the last column first...
+    snSeeds = snSeeds[snSeedsInds]  
+    snTimes = snTimes[snSeedsInds]  
+    snProgStype = snProgStype[snSeedsInds]
+    snRemnStype = snRemnStype[snSeedsInds]
+    snWhichProg = snWhichProg[snSeedsInds]
+    snIsUnbound = snIsUnbound[snSeedsInds]
+    
+    # Process the SN events
+    returnedSeeds = []                                      # array of seeds - will only contain seeds that have SN events
+    returnedEvents = []                                     # array of SN events for each seed in returnedSeeds
+    returnedTimes = []                                      # array of times for each event in returnedEvents (for each seed in returnedSeeds)
+
+    lastSeed = -1                                           # initialize most recently processed seed
+    for seedIndex, thisSeed in enumerate(snSeeds):          # iterate over all SN file entries
+        thisTime = snTimes[seedIndex]                       # time for this SN file entry
+        thisEvent = (snProgStype[seedIndex], snRemnStype[seedIndex], 
+                     snWhichProg[seedIndex], snIsUnbound[seedIndex]) # construct event tuple
+               
+        # If this is an entirely new seed:
+        if thisSeed != lastSeed:                            # same seed as last seed processed?
+            returnedSeeds.append(thisSeed)                  # no - new seed, record it
+            returnedTimes.append([thisTime])                #   initialize the list of event times for this seed
+            returnedEvents.append([thisEvent])              #   initialize the list of events for this seed
+            lastSeed = thisSeed                             #   update the latest seed
+        else:                                               # yes - second SN event for this seed
+            returnedTimes[-1].append(thisTime)              #   append time at end of array
+            returnedEvents[-1].append(thisEvent)            #   append event at end of array
+                
+    return returnedSeeds, returnedEvents, returnedTimes     # see above for description
+
+
+def getEventHistory(h5file, exclude_null=False):
+    """
+    Get the event history for all seeds, including both RLOF and SN events, in chronological order.
+    IN:
+        h5file (h5.File() type): COMPAS HDF5 output file
+        exclude_null (bool): whether or not to exclude seeds which undergo no RLOF or SN events
+    OUT:
+        returnedSeeds (list): ordered list of all seeds in the output
+        returnedEvents (list): a list (corresponding to the ordered seeds above) of the 
+            collected SN and MT events from the getMtEvents and getSnEvents functions above,
+            themselves ordered chronologically
+    """
+    SP = h5file['BSE_System_Parameters']
+    MT = h5file['BSE_RLOF']
+    SN = h5file['BSE_Supernovae']
+    allSeeds = SP['SEED'][()]                                              # get all seeds
+    mtSeeds, mtEvents, mtTimes = getMtEvents(MT)                           # get MT events
+    snSeeds, snEvents, snTimes = getSnEvents(SN)                           # get SN events
+
+    numMtSeeds = len(mtSeeds)                                               # number of MT events
+    numSnSeeds = len(snSeeds)                                               # number of SN events
+
+    if numMtSeeds < 1 and numSnSeeds < 1: return []                         # no events - return empty history
+
+    returnedSeeds = []                                                      # array of seeds - will only contain seeds that have events (of any type)
+    returnedEvents = []                                                     # array of events - same size as returnedSeeds (includes event times)
+
+    eventOrdering = ['RL', 'CE', 'SN', 'MG']                                # order of preference for simultaneous events
+
+    mtIndex = 0                                                             # index into MT events arrays
+    snIndex = 0                                                             # index into SN events arrays
+
+    if exclude_null:
+        seedsToIterate = np.sort(np.unique(np.append(mtSeeds, snSeeds)))    # iterate over all the seeds that have either MT or SN events
+    else:
+        seedsToIterate = allSeeds
+        
+    idxOrdered = np.argsort(seedsToIterate)
+    returnedSeeds = [None] * np.size(seedsToIterate)                        # array of seeds - will only contain seeds that have events (of any type)
+    returnedEvents = [None] * np.size(seedsToIterate)                       # array of events - same size as returnedSeeds (includes event times)
+
+    for idx in idxOrdered:
+        seed = seedsToIterate[idx]
+        seedEvents = []                                                     # initialise the events for the seed being processed
+
+        # Collect any MT events for this seed, add the time of the event and the event type
+        while mtIndex < numMtSeeds and mtSeeds[mtIndex] == seed:
+            for eventIndex, event in enumerate(mtEvents[mtIndex]):
+                _, _, isRL1, isRL2, isCEE, isMrg = event
+                eventKey = 'MG' if isMrg else 'CE' if isCEE else 'RL'       # event type: Mrg, CEE, RLOF 1->2, RLOF 2->1
+                seedEvents.append((eventKey, mtTimes[mtIndex][eventIndex], *mtEvents[mtIndex][eventIndex]))
+            mtIndex += 1
+
+        # Collect any SN events for this seed, add the time of the event and the event type 
+        while snIndex < numSnSeeds and snSeeds[snIndex] == seed:
+            for eventIndex, event in enumerate(snEvents[snIndex]):
+                eventKey = 'SN' 
+                seedEvents.append((eventKey, snTimes[snIndex][eventIndex], *snEvents[snIndex][eventIndex]))
+            snIndex += 1
+
+        seedEvents.sort(key=lambda ev:(ev[1], eventOrdering.index(ev[0])))  # sort the events by time and event type (MT before SN if at the same time)
+
+        returnedSeeds[idx] = seed                                           # record the seed in the seeds array being returned
+        returnedEvents[idx] = seedEvents                                    # record the events for this seed in the events array being returned
+
+    return returnedSeeds, returnedEvents                                    # see above for details
+
+
+
+###########################################
+# ## Produce strings of the event histories
+
+stellarTypeDict = {
+    0:  'MS',
+    1:  'MS',
+    2:  'HG',
+    3:  'GB',
+    4:  'GB',
+    5:  'GB',
+    6:  'GB',
+    7:  'HE',
+    8:  'HE',
+    9:  'HE',
+    10: 'WD',
+    11: 'WD',
+    12: 'WD',
+    13: 'NS',
+    14: 'BH',
+    15: 'MR',
+    16: 'MS',
+}
+
+def buildEventString(events, useIntStypes=False):
+    """
+    Function to produce a string representing the event history of a single binary for quick readability.
+    NOTE: unvectorized, do the vectorization later for a speed up
+    IN:
+        events (list of tuples): events output from getEventHistory()
+    OUT:
+        eventString (string): string representing the event history of the binary
+    
+    MT strings look like: 
+        P>S, P<S, or P=S where P is primary type, S is secondary type, 
+        and >, < is RLOF (1->2 or 1<-2) or otherwise = for CEE or & for Merger
+
+    SN strings look like:
+        P*SR for star1 the SN progenitor,or 
+        R*SP for star2 the SN progenitor,
+        where P is progenitor type, R is remnant type, 
+        S is state (I for intact, U for unbound)
+
+    Event strings for the same seed are separated by the undesrcore character ('_')
+    """
+    def _remap_stype(int_stype):
+        if useIntStypes:
+            return str(int_stype)
+        else:
+            return stellarTypeDict[int_stype]
+    
+    # Empty event
+    if len(events) == 0:
+        return 'NA'
+        
+    eventStr = ''
+    for event in events:
+        if event[0] == 'SN':                                               # SN event
+            _, time, stypeP, stypeC, whichSN, isUnbound = event
+            if whichSN == 1:                                               # Progenitor or Remnant depending upon which star is the SN
+                charL = _remap_stype(stypeP)
+                charR = _remap_stype(stypeC)
+            else:
+                charL = _remap_stype(stypeC)
+                charR = _remap_stype(stypeP)
+            charM = '*u' if isUnbound else '*i'                            # unbound or intact
+        else:                                                              # Any of the MT events 
+            _, time, stype1, stype2, isRL1, isRL2, isCEE, isMrg = event
+            charL = _remap_stype(stype1)                                   # primary stellar type
+            charR = _remap_stype(stype2)                                   # secondary stellar type
+            charM =      '&' if isMrg \
+                    else '=' if isCEE \
+                    else '<' if isRL2 \
+                    else '>'                                               # event type: CEE, RLOF 2->1, RLOF 1->2
+        eventStr += "{}{}{}_".format(charL, charM, charR)                   # event string for this star, _ is event separator
+
+    return np.array(eventStr[:-1], dtype=np.str_)                          # return event string for this star (pop the last underscore first)
+
+def getEventStrings(h5file=None, allEvents=None, useIntStypes=False):
+    """
+    Function to calculate the event history strings for either the entire Compas output, or some list of events
+    IN: One of
+        h5file (h5.File() type): COMPAS HDF5 output file
+        allEvents (list of tuples)
+    OUT:
+        eventStrings (list): list of strings of the event history of each seed
+    """
+
+    # If output is 
+    if (h5file == None) & (allEvents == None):
+        return 
+    elif (allEvents == None):
+        _, allEvents = getEventHistory(h5file)
+
+    eventStrings = np.zeros(len(allEvents), dtype=StringDType())
+    for ii, eventsForGivenSeed in enumerate(allEvents):  
+        eventString = buildEventString(eventsForGivenSeed, useIntStypes=useIntStypes)
+        eventStrings[ii] = eventString                                     # append event string for this star (pop the last underscore first)
+
+    return eventStrings
+
+""
+
+
+""
+

From b401059f3f64db65a701890e2b0b984b5aeb011f Mon Sep 17 00:00:00 2001
From: Reinhold Willcox <reinhold.willcox@gmail.com>
Date: Fri, 26 Jun 2026 17:52:09 +0200
Subject: [PATCH 2/4] big cleanup of post processing utils

---
 compas_python_utils/compasUtils.py     | 355 --------------------
 compas_python_utils/debugging_utils.py | 440 +++++++++++++++++++++++++
 2 files changed, 440 insertions(+), 355 deletions(-)
 delete mode 100644 compas_python_utils/compasUtils.py
 create mode 100644 compas_python_utils/debugging_utils.py

diff --git a/compas_python_utils/compasUtils.py b/compas_python_utils/compasUtils.py
deleted file mode 100644
index 2cc126c68..000000000
--- a/compas_python_utils/compasUtils.py
+++ /dev/null
@@ -1,355 +0,0 @@
-import h5py as h5
-import numpy as np
-import pandas as pd
-from numpy.dtypes import StringDType
-
-
-########################################################################
-# ## Function to print the data from a given COMPAS HDF5 group in a readable pandas template
-
-def printCompasDetails(data, *seeds, mask=()):
-    """
-    Function to print the full Compas output for given seeds, optionally with an additional mask
-    """
-    list_of_keys = list(data.keys())
-
-    # Check if seed parameter exists - if not, just print without (e.g RunDetails)
-    if ('SEED' in list_of_keys) | ('SEED>MT' in list_of_keys): # Most output files 
-        #SEED>MT is a relic from older versions, but we leave this in for backwards compatibility
-
-        # Set the seed name parameter, mask on seeds as needed, and set the index
-        seedVariableName='SEED' if ('SEED' in list_of_keys) else 'SEED>MT'
-        list_of_keys.remove(seedVariableName) # this is the index above, don't want to include it
-    
-        allSeeds = data[seedVariableName][()]
-        seedsMask = np.isin(allSeeds, seeds)
-        if len(seeds) == 0: # if any seed is included, do not reset the mask
-            seedsMask = np.ones_like(allSeeds).astype(bool)
-        if mask == ():
-            mask = np.ones_like(allSeeds).astype(bool)
-        mask &= seedsMask
-
-        df = pd.DataFrame.from_dict({param: data[param][()][mask] for param in list(data.keys())}).set_index(seedVariableName).T
-
-    else: # No seed parameter, so do custom print for Run Details
-
-        # Get just the keys without the -Derivation suffix - those will be a second column
-        keys_not_derivations = []
-        for key in list_of_keys:
-            if '-Derivation' not in key:
-                keys_not_derivations.append(key)
-        
-        # Some parameter values are string types, formatted as np.bytes_, need to convert back
-        def convert_strings(param_array):
-            if isinstance(param_array[0], np.bytes_):
-                return param_array.astype(str)
-            else:
-                return param_array
-
-        df_keys = pd.DataFrame.from_dict({param: convert_strings(data[param][()]) for param in keys_not_derivations }).T
-        nCols = df_keys.shape[1] # Required only because if we combine RDs, we get many columns (should fix later)
-        df_keys.columns = ['Parameter']*nCols
-        df_drvs = pd.DataFrame.from_dict({param: convert_strings(data[param+'-Derivation'][()]) for param in keys_not_derivations }).T
-        df_drvs.columns = ['Derivation']*nCols
-        df = pd.concat([df_keys, df_drvs], axis=1)
-
-    # Add units as first col
-    units_dict = {key:data[key].attrs['units'].astype(str) for key in list_of_keys}
-    df.insert(loc=0, column='(units)', value=pd.Series(units_dict))
-    return df
-
-
-
-########################################################################
-# ## Get event histories of MT data, SN data, and combined MT, SN data
-
-def getMtEvents(MT):                                     
-    """
-    This function takes in the `BSE_RLOF` output category from COMPAS, and returns the information
-    on the Mass Transfer (MT) events that happen for each seed. The events do not have to be in order, 
-    either chronologically or by seed, this function will reorder them as required.
-    
-    OUT:
-        returnedSeeds (list): ordered list of the unique seeds in the MT file
-        returnedEvents (list): list of sublists, where each sublist contains all the MT events for a given seed.
-            MT event tuples take the form :
-            (stellarTypePrimary, stellarTypeSecondary, isRlof1, isRlof2, isCEE, isMrg)
-        returnTimes (list): is a list of sublists of times of each of the MT events
-    """
-    mtSeeds = MT['SEED'][()]
-    mtTimes = MT['Time<MT'][()]
-    mtPrimaryStype = MT['Stellar_Type(1)<MT'][()]
-    mtSecondaryStype = MT['Stellar_Type(2)<MT'][()]
-    mtIsRlof1 = MT['RLOF(1)>MT'][()] == 1
-    mtIsRlof2 = MT['RLOF(2)>MT'][()] == 1
-    mtIsCEE = MT['CEE>MT'][()] == 1
-    mtIsMrg = MT['Merger'][()] == 1
-
-    # We want the return arrays sorted by seed, so sort here.
-    mtSeedsInds = np.lexsort((mtTimes, mtSeeds)) # sort by seeds then times - lexsort sorts by the last column first...
-    mtSeeds = mtSeeds[mtSeedsInds]  
-    mtTimes = mtTimes[mtSeedsInds]  
-    mtPrimaryStype = mtPrimaryStype[mtSeedsInds]
-    mtSecondaryStype = mtSecondaryStype[mtSeedsInds]
-    mtIsRlof1 = mtIsRlof1[mtSeedsInds]
-    mtIsRlof2 = mtIsRlof2[mtSeedsInds]
-    mtIsCEE = mtIsCEE[mtSeedsInds]
-    mtIsMrg = mtIsMrg[mtSeedsInds]
-
-    # Process the MT events
-    returnedSeeds = []                                      # array of seeds - will only contain seeds that have MT events
-    returnedEvents = []                                     # array of MT events for each seed in returnedSeeds
-    returnedTimes = []                                      # array of times for each event in returnedEvents (for each seed in returnedSeeds)
-
-    lastSeed = -1                                           # initialize most recently processed seed
-    for seedIndex, thisSeed in enumerate(mtSeeds):          # iterate over all RLOF file entries
-        thisTime = mtTimes[seedIndex]                       # time for this RLOF file entry
-        thisEvent = (mtPrimaryStype[seedIndex], mtSecondaryStype[seedIndex], 
-                     mtIsRlof1[seedIndex], mtIsRlof2[seedIndex], mtIsCEE[seedIndex], mtIsMrg[seedIndex])  # construct event tuple
-
-        # If this is an entirely new seed:
-        if thisSeed != lastSeed:                            # same seed as last seed processed?
-            returnedSeeds.append(thisSeed)                  # no - new seed, record it
-            returnedTimes.append([thisTime])                # initialize the list of event times for this seed
-            returnedEvents.append([thisEvent])              # initialize the list of events for this seed
-            lastSeed = thisSeed                             # update the latest seed
-
-        # Add event, if it is not a duplicate
-        try:
-            eventIndex = returnedEvents[-1].index(thisEvent)  # find eventIndex of this particular event tuple in the array of events for this seed 
-            if thisTime > returnedTimes[-1][eventIndex]:      # ^ if event is not a duplicate, this will throw a ValueError 
-                returnedTimes[-1][eventIndex] = thisTime      # if event is duplicate, update time to the later of the duplicates
-        except ValueError:                                    # event is not a duplicate:
-            returnedEvents[-1].append(thisEvent)              # record new event tuple for this seed
-            returnedTimes[-1].append(thisTime)                # record new event time for this seed
-
-    return returnedSeeds, returnedEvents, returnedTimes       # see above for description
-
-
-def getSnEvents(SN):                                     
-    """
-    This function takes in the `BSE_Supernovae` output category from COMPAS, and returns the information
-    on the Supernova (SN) events that happen for each seed. The events do not have to be in order chronologically,
-    this function will reorder them as required.
-    
-    OUT:
-        returnedSeeds (list): ordered list of all the unique seeds in the SN file
-        returnedEvents (list): list of sublists, where each sublist contains all the SN events for a given seed.
-            SN event tuples take the form :
-            (stellarTypeProgenitor, stellarTypeRemnant, whichStarIsProgenitor, isBinaryUnbound)
-        returnedTimes (list): is a list of sublists of times of each of the SN events
-    """
-    snSeeds = SN['SEED'][()]
-    snTimes = SN['Time'][()]
-    snProgStype = SN['Stellar_Type_Prev(SN)'][()]
-    snRemnStype = SN['Stellar_Type(SN)'][()]
-    snWhichProg = SN['Supernova_State'][()]
-    snIsUnbound = SN['Unbound'][()] == 1
-
-    # We want the return arrays sorted by seed, so sort here.
-    snSeedsInds = np.lexsort((snTimes, snSeeds)) # sort by seeds then times - lexsort sorts by the last column first...
-    snSeeds = snSeeds[snSeedsInds]  
-    snTimes = snTimes[snSeedsInds]  
-    snProgStype = snProgStype[snSeedsInds]
-    snRemnStype = snRemnStype[snSeedsInds]
-    snWhichProg = snWhichProg[snSeedsInds]
-    snIsUnbound = snIsUnbound[snSeedsInds]
-    
-    # Process the SN events
-    returnedSeeds = []                                      # array of seeds - will only contain seeds that have SN events
-    returnedEvents = []                                     # array of SN events for each seed in returnedSeeds
-    returnedTimes = []                                      # array of times for each event in returnedEvents (for each seed in returnedSeeds)
-
-    lastSeed = -1                                           # initialize most recently processed seed
-    for seedIndex, thisSeed in enumerate(snSeeds):          # iterate over all SN file entries
-        thisTime = snTimes[seedIndex]                       # time for this SN file entry
-        thisEvent = (snProgStype[seedIndex], snRemnStype[seedIndex], 
-                     snWhichProg[seedIndex], snIsUnbound[seedIndex]) # construct event tuple
-               
-        # If this is an entirely new seed:
-        if thisSeed != lastSeed:                            # same seed as last seed processed?
-            returnedSeeds.append(thisSeed)                  # no - new seed, record it
-            returnedTimes.append([thisTime])                #   initialize the list of event times for this seed
-            returnedEvents.append([thisEvent])              #   initialize the list of events for this seed
-            lastSeed = thisSeed                             #   update the latest seed
-        else:                                               # yes - second SN event for this seed
-            returnedTimes[-1].append(thisTime)              #   append time at end of array
-            returnedEvents[-1].append(thisEvent)            #   append event at end of array
-                
-    return returnedSeeds, returnedEvents, returnedTimes     # see above for description
-
-
-def getEventHistory(h5file, exclude_null=False):
-    """
-    Get the event history for all seeds, including both RLOF and SN events, in chronological order.
-    IN:
-        h5file (h5.File() type): COMPAS HDF5 output file
-        exclude_null (bool): whether or not to exclude seeds which undergo no RLOF or SN events
-    OUT:
-        returnedSeeds (list): ordered list of all seeds in the output
-        returnedEvents (list): a list (corresponding to the ordered seeds above) of the 
-            collected SN and MT events from the getMtEvents and getSnEvents functions above,
-            themselves ordered chronologically
-    """
-    SP = h5file['BSE_System_Parameters']
-    MT = h5file['BSE_RLOF']
-    SN = h5file['BSE_Supernovae']
-    allSeeds = SP['SEED'][()]                                              # get all seeds
-    mtSeeds, mtEvents, mtTimes = getMtEvents(MT)                           # get MT events
-    snSeeds, snEvents, snTimes = getSnEvents(SN)                           # get SN events
-
-    numMtSeeds = len(mtSeeds)                                               # number of MT events
-    numSnSeeds = len(snSeeds)                                               # number of SN events
-
-    if numMtSeeds < 1 and numSnSeeds < 1: return []                         # no events - return empty history
-
-    returnedSeeds = []                                                      # array of seeds - will only contain seeds that have events (of any type)
-    returnedEvents = []                                                     # array of events - same size as returnedSeeds (includes event times)
-
-    eventOrdering = ['RL', 'CE', 'SN', 'MG']                                # order of preference for simultaneous events
-
-    mtIndex = 0                                                             # index into MT events arrays
-    snIndex = 0                                                             # index into SN events arrays
-
-    if exclude_null:
-        seedsToIterate = np.sort(np.unique(np.append(mtSeeds, snSeeds)))    # iterate over all the seeds that have either MT or SN events
-    else:
-        seedsToIterate = allSeeds
-        
-    idxOrdered = np.argsort(seedsToIterate)
-    returnedSeeds = [None] * np.size(seedsToIterate)                        # array of seeds - will only contain seeds that have events (of any type)
-    returnedEvents = [None] * np.size(seedsToIterate)                       # array of events - same size as returnedSeeds (includes event times)
-
-    for idx in idxOrdered:
-        seed = seedsToIterate[idx]
-        seedEvents = []                                                     # initialise the events for the seed being processed
-
-        # Collect any MT events for this seed, add the time of the event and the event type
-        while mtIndex < numMtSeeds and mtSeeds[mtIndex] == seed:
-            for eventIndex, event in enumerate(mtEvents[mtIndex]):
-                _, _, isRL1, isRL2, isCEE, isMrg = event
-                eventKey = 'MG' if isMrg else 'CE' if isCEE else 'RL'       # event type: Mrg, CEE, RLOF 1->2, RLOF 2->1
-                seedEvents.append((eventKey, mtTimes[mtIndex][eventIndex], *mtEvents[mtIndex][eventIndex]))
-            mtIndex += 1
-
-        # Collect any SN events for this seed, add the time of the event and the event type 
-        while snIndex < numSnSeeds and snSeeds[snIndex] == seed:
-            for eventIndex, event in enumerate(snEvents[snIndex]):
-                eventKey = 'SN' 
-                seedEvents.append((eventKey, snTimes[snIndex][eventIndex], *snEvents[snIndex][eventIndex]))
-            snIndex += 1
-
-        seedEvents.sort(key=lambda ev:(ev[1], eventOrdering.index(ev[0])))  # sort the events by time and event type (MT before SN if at the same time)
-
-        returnedSeeds[idx] = seed                                           # record the seed in the seeds array being returned
-        returnedEvents[idx] = seedEvents                                    # record the events for this seed in the events array being returned
-
-    return returnedSeeds, returnedEvents                                    # see above for details
-
-
-
-###########################################
-# ## Produce strings of the event histories
-
-stellarTypeDict = {
-    0:  'MS',
-    1:  'MS',
-    2:  'HG',
-    3:  'GB',
-    4:  'GB',
-    5:  'GB',
-    6:  'GB',
-    7:  'HE',
-    8:  'HE',
-    9:  'HE',
-    10: 'WD',
-    11: 'WD',
-    12: 'WD',
-    13: 'NS',
-    14: 'BH',
-    15: 'MR',
-    16: 'MS',
-}
-
-def buildEventString(events, useIntStypes=False):
-    """
-    Function to produce a string representing the event history of a single binary for quick readability.
-    NOTE: unvectorized, do the vectorization later for a speed up
-    IN:
-        events (list of tuples): events output from getEventHistory()
-    OUT:
-        eventString (string): string representing the event history of the binary
-    
-    MT strings look like: 
-        P>S, P<S, or P=S where P is primary type, S is secondary type, 
-        and >, < is RLOF (1->2 or 1<-2) or otherwise = for CEE or & for Merger
-
-    SN strings look like:
-        P*SR for star1 the SN progenitor,or 
-        R*SP for star2 the SN progenitor,
-        where P is progenitor type, R is remnant type, 
-        S is state (I for intact, U for unbound)
-
-    Event strings for the same seed are separated by the undesrcore character ('_')
-    """
-    def _remap_stype(int_stype):
-        if useIntStypes:
-            return str(int_stype)
-        else:
-            return stellarTypeDict[int_stype]
-    
-    # Empty event
-    if len(events) == 0:
-        return 'NA'
-        
-    eventStr = ''
-    for event in events:
-        if event[0] == 'SN':                                               # SN event
-            _, time, stypeP, stypeC, whichSN, isUnbound = event
-            if whichSN == 1:                                               # Progenitor or Remnant depending upon which star is the SN
-                charL = _remap_stype(stypeP)
-                charR = _remap_stype(stypeC)
-            else:
-                charL = _remap_stype(stypeC)
-                charR = _remap_stype(stypeP)
-            charM = '*u' if isUnbound else '*i'                            # unbound or intact
-        else:                                                              # Any of the MT events 
-            _, time, stype1, stype2, isRL1, isRL2, isCEE, isMrg = event
-            charL = _remap_stype(stype1)                                   # primary stellar type
-            charR = _remap_stype(stype2)                                   # secondary stellar type
-            charM =      '&' if isMrg \
-                    else '=' if isCEE \
-                    else '<' if isRL2 \
-                    else '>'                                               # event type: CEE, RLOF 2->1, RLOF 1->2
-        eventStr += "{}{}{}_".format(charL, charM, charR)                   # event string for this star, _ is event separator
-
-    return np.array(eventStr[:-1], dtype=np.str_)                          # return event string for this star (pop the last underscore first)
-
-def getEventStrings(h5file=None, allEvents=None, useIntStypes=False):
-    """
-    Function to calculate the event history strings for either the entire Compas output, or some list of events
-    IN: One of
-        h5file (h5.File() type): COMPAS HDF5 output file
-        allEvents (list of tuples)
-    OUT:
-        eventStrings (list): list of strings of the event history of each seed
-    """
-
-    # If output is 
-    if (h5file == None) & (allEvents == None):
-        return 
-    elif (allEvents == None):
-        _, allEvents = getEventHistory(h5file)
-
-    eventStrings = np.zeros(len(allEvents), dtype=StringDType())
-    for ii, eventsForGivenSeed in enumerate(allEvents):  
-        eventString = buildEventString(eventsForGivenSeed, useIntStypes=useIntStypes)
-        eventStrings[ii] = eventString                                     # append event string for this star (pop the last underscore first)
-
-    return eventStrings
-
-""
-
-
-""
-
diff --git a/compas_python_utils/debugging_utils.py b/compas_python_utils/debugging_utils.py
new file mode 100644
index 000000000..76498c96c
--- /dev/null
+++ b/compas_python_utils/debugging_utils.py
@@ -0,0 +1,440 @@
+import h5py as h5
+import numpy as np
+import pandas as pd
+from numpy.dtypes import StringDType
+from typing import NewType
+
+""
+# New Types
+MaskNdarray = NewType('MaskNdarray', np.ndarray[bool])
+H5File = NewType('H5File', h5._hl.files.File)
+H5Group = NewType('H5Group', h5._hl.group.Group)
+MtEventTuple = NewType('MtEventTuple', tuple[ np.ndarray[int], np.ndarray[int], np.ndarray[bool], np.ndarray[bool], np.ndarray[bool], np.ndarray[bool]])
+SnEventTuple = NewType('SnEventTuple', tuple[ np.ndarray[int], np.ndarray[int], np.ndarray[bool], np.ndarray[bool]])
+EventHistoryString = NewType('EventHistoryString', np.array(np.str_))
+
+
+########################################################################
+# ## Function to print the data from a given COMPAS HDF5 group in a readable pandas template
+
+def print_compas_details_dataframe(data: H5Group, 
+                                   *seeds: int, 
+                                   mask: MaskNdarray=()) -> pd.DataFrame:
+    """Return a pd.DataFrame for the COMPAS output contained in an H5Group.
+
+    Parameters
+    ----------
+    data : H5Group
+        an H5Group containing COMPAS data
+    seeds : int or array_like of ints, optional
+        seeds of specific systems to include (default is to include all seeds)
+    mask : array_like of booleans, optional
+        boolean mask to filter out specific systems (default is no mask)
+        Must be same length as data['SEED']
+
+    Returns 
+    -------
+    compas_details : pd.DataFrame
+        The values of each parameter in the H5Group, excluding those removed by `seeds` or `mask`.
+
+    Notes
+    -----
+    Designed for easier visualization in a jupyter notebook, for inspection / debugging purposes. 
+    If both `seeds` and `mask` are supplied, resulting systems must pass both filters.
+
+    Examples
+    --------
+    >>> mt_data = h5.File('COMPAS_Output.h5')['BSE_RLOF']) 
+    >>> print_compas_details_dataframe(mt_data)
+    [output of all BSE_RLOF events]
+    
+    >>> mt_seeds = mt_data['SEED'][()]
+    >>> print_compas_details_dataframe(mt_data, mt_seeds[:50])
+    [output of all BSE_RLOF events occuring in the first 50 seeds]
+    
+    >>> cee_events = mt_data['CEE>MT'][()] == 1 # needed to convert to boolean mask
+    >>> print_compas_details_dataframe(mt_data, mt_seeds[:50], mask=cee_events)
+    [output of all Common Envelope events occuring in the first 50 seeds]
+    """
+    list_of_keys = list(data.keys())
+
+    # Check if SEED parameter exists in data
+    if ('SEED' in list_of_keys) | ('SEED>MT' in list_of_keys): 
+        seed_variable_name='SEED' if ('SEED' in list_of_keys) else 'SEED>MT' #SEED>MT is a relic from older versions, but we leave this in for backwards compatibility
+
+        # If `seeds` or `mask` arguments supplied, create the relevant mask
+        all_seeds = data[seed_variable_name][()]
+        seeds_mask = np.isin(all_seeds, seeds)
+        if len(seeds) == 0: # If `seeds` argument is not supplied, set the default mask
+            seeds_mask = np.ones_like(all_seeds).astype(bool)
+        if mask == ():
+            mask = np.ones_like(all_seeds).astype(bool)
+        mask &= seeds_mask
+        df = pd.DataFrame.from_dict({param: data[param][()][mask] for param in list_of_keys}).set_index(seed_variable_name).T
+
+    else: # No seed parameter, currently only applies to the RunDetails H5Group 
+        keys_not_derivations = [key for key in list_of_keys  if '-Derivation' not in key] # Get just the keys without the -Derivation suffix
+        
+        # Some parameter values are string types, formatted as np.bytes_, need to convert back
+        def convert_strings(param_array):
+            if isinstance(param_array[0], np.bytes_):
+                return param_array.astype(str)
+            else:
+                return param_array
+
+        df_keys = pd.DataFrame.from_dict({param: convert_strings(data[param][()]) for param in keys_not_derivations }).T
+        nCols = df_keys.shape[1] # Required only because if we have combined RunDetails output from a previous h5copy, we get many columns (should fix later)
+        df_keys.columns = ['Parameter']*nCols
+        df_drvs = pd.DataFrame.from_dict({param: convert_strings(data[param+'-Derivation'][()]) for param in keys_not_derivations }).T
+        df_drvs.columns = ['Derivation']*nCols
+        df = pd.concat([df_keys, df_drvs], axis=1)
+
+    # Add units as first col
+    units_dict = {key:data[key].attrs['units'].astype(str) for key in list_of_keys}
+    df.insert(loc=0, column='(units)', value=pd.Series(units_dict))
+    return df
+
+
+
+########################################################################
+# ## Get event histories of MT data, SN data, and combined MT, SN data
+
+def get_mt_data_tuple(mt_data: H5Group) -> tuple[list, list, list]:
+    """Calculates the EventTuple for the BSE_RLOF output H5Group.
+
+    Parameters
+    ----------
+    mt_data : H5Group
+        the COMPAS output H5Group corresponding to BSE_RLOF
+
+    Returns
+    -------
+    returned_seeds : list 
+        an ordered list of the unique seeds in the mt_data file, 
+    returned_events : list
+        a list of sublists, one sublist per seed, where each sublist 
+        contains all the MtEventTuples for the given seed
+    returned_times : list
+        a list of sublists of times of each of the mt_data events.
+
+    Notes
+    -----
+    MtEventTuples take the form: (stellar_type_primary, stellar_type_secondary, is_rlof1, is_rlof2, is_cee, is_mrg).
+    The events in the input do not have to be ordered chronologically, this function orders them, in the event
+    that the input was coallated from a previous h5copy command. The output is not meant to be used generally,
+    it is just a feeder for get_event_history below.
+    """
+    mt_seeds = mt_data['SEED'][()]
+
+    mt_times = mt_data['Time<MT'][()]
+    mt_primary_stype = mt_data['Stellar_Type(1)<MT'][()]
+    mt_secondary_stype = mt_data['Stellar_Type(2)<MT'][()]
+    mt_is_rlof1 = mt_data['RLOF(1)>MT'][()] == 1
+    mt_is_rlof2 = mt_data['RLOF(2)>MT'][()] == 1
+    mt_is_cee = mt_data['CEE>MT'][()] == 1
+    mt_is_mrg = mt_data['Merger'][()] == 1
+
+    # We want the return arrays sorted by seed, so sort here.
+    mt_seeds_idx = np.lexsort((mt_times, mt_seeds)) # sort by seeds then times - lexsort sorts by the last column first...
+    mt_seeds = mt_seeds[mt_seeds_idx]  
+    mt_times = mt_times[mt_seeds_idx]  
+    mt_primary_stype = mt_primary_stype[mt_seeds_idx]
+    mt_secondary_stype = mt_secondary_stype[mt_seeds_idx]
+    mt_is_rlof1 = mt_is_rlof1[mt_seeds_idx]
+    mt_is_rlof2 = mt_is_rlof2[mt_seeds_idx]
+    mt_is_cee = mt_is_cee[mt_seeds_idx]
+    mt_is_mrg = mt_is_mrg[mt_seeds_idx]
+
+    # Process the mt_data events
+    returned_seeds = []                                      # array of seeds - will only contain seeds that have mt_data events
+    returned_events = []                                     # array of mt_data events for each seed in returned_seeds
+    returned_times = []                                      # array of times for each event in returned_events (for each seed in returned_seeds)
+
+    last_seed = -1                                           # initialize most recently processed seed
+    for seed_index, this_seed in enumerate(mt_seeds):          # iterate over all RLOF file entries
+        this_time = mt_times[seed_index]                       # time for this RLOF file entry
+        this_event = (mt_primary_stype[seed_index], mt_secondary_stype[seed_index], 
+                     mt_is_rlof1[seed_index], mt_is_rlof2[seed_index], mt_is_cee[seed_index], mt_is_mrg[seed_index])  # construct event tuple
+
+        # If this is an entirely new seed:
+        if this_seed != last_seed:                            # same seed as last seed processed?
+            returned_seeds.append(this_seed)                  # no - new seed, record it
+            returned_times.append([this_time])                # initialize the list of event times for this seed
+            returned_events.append([this_event])              # initialize the list of events for this seed
+            last_seed = this_seed                             # update the latest seed
+
+        # Add event, if it is not a duplicate
+        try:
+            event_index = returned_events[-1].index(this_event)  # find event_index of this particular event tuple in the array of events for this seed 
+            if this_time > returned_times[-1][event_index]:      # ^ if event is not a duplicate, this will throw a ValueError 
+                returned_times[-1][event_index] = this_time      # if event is duplicate, update time to the later of the duplicates
+        except ValueError:                                    # event is not a duplicate:
+            returned_events[-1].append(this_event)              # record new event tuple for this seed
+            returned_times[-1].append(this_time)                # record new event time for this seed
+
+    return returned_seeds, returned_events, returned_times       # see above for description
+
+
+def get_sn_data_tuple(sn_data: H5Group) -> tuple[list, list, list]:
+    """Calculates the EventTuple for the BSE_Supernovae output H5Group.
+
+    Parameters
+    ----------
+    sn_data : H5Group
+        the COMPAS output H5Group corresponding to BSE_Supernovae
+
+    Returns
+    -------
+    returned_seeds : list 
+        an ordered list of the unique seeds in the sn_data file, 
+    returned_events : list
+        a list of sublists, one sublist per seed, where each sublist 
+        contains all the SnEventTuples for the given seed
+    returned_times : list
+        a list of sublists of times of each of the sn_data events.
+
+    Notes
+    -----
+    SnEventTuples take the form: (stellar_type_progenitor, stellar_type_remnant, which_starIsProgenitor, is_binary_unbound).
+    The events in the input do not have to be ordered chronologically, this function orders them, in the event
+    that the input was coallated from a previous h5copy command. The output is not meant to be used generally,
+    it is just a feeder for get_event_history below.
+    """
+    sn_seeds = sn_data['SEED'][()]
+    sn_times = sn_data['Time'][()]
+    sn_prog_stype = sn_data['Stellar_Type_Prev(SN)'][()]
+    sn_remn_stype = sn_data['Stellar_Type(SN)'][()]
+    sn_which_prog = sn_data['Supernova_State'][()]
+    sn_is_unbound = sn_data['Unbound'][()] == 1
+
+    # We want the return arrays sorted by seed, so sort here.
+    sn_seeds_idx = np.lexsort((sn_times, sn_seeds)) # sort by seeds then times - lexsort sorts by the last column first...
+    sn_seeds = sn_seeds[sn_seeds_idx]  
+    sn_times = sn_times[sn_seeds_idx]  
+    sn_prog_stype = sn_prog_stype[sn_seeds_idx]
+    sn_remn_stype = sn_remn_stype[sn_seeds_idx]
+    sn_which_prog = sn_which_prog[sn_seeds_idx]
+    sn_is_unbound = sn_is_unbound[sn_seeds_idx]
+    
+    # Process the sn_data events
+    returned_seeds = []                                      # array of seeds - will only contain seeds that have sn_data events
+    returned_events = []                                     # array of sn_data events for each seed in returned_seeds
+    returned_times = []                                      # array of times for each event in returned_events (for each seed in returned_seeds)
+
+    last_seed = -1                                           # initialize most recently processed seed
+    for seed_index, this_seed in enumerate(sn_seeds):          # iterate over all sn_data file entries
+        this_time = sn_times[seed_index]                       # time for this sn_data file entry
+        this_event = (sn_prog_stype[seed_index], sn_remn_stype[seed_index], 
+                     sn_which_prog[seed_index], sn_is_unbound[seed_index]) # construct event tuple
+               
+        # If this is an entirely new seed:
+        if this_seed != last_seed:                            # same seed as last seed processed?
+            returned_seeds.append(this_seed)                  # no - new seed, record it
+            returned_times.append([this_time])                #   initialize the list of event times for this seed
+            returned_events.append([this_event])              #   initialize the list of events for this seed
+            last_seed = this_seed                             #   update the latest seed
+        else:                                               # yes - second sn_data event for this seed
+            returned_times[-1].append(this_time)              #   append time at end of array
+            returned_events[-1].append(this_event)            #   append event at end of array
+                
+    return returned_seeds, returned_events, returned_times     # see above for description
+
+
+def get_event_history(data: H5File, include_null: bool=True) -> tuple[list, list]:
+    """Get the event history for all seeds, including both MT and SN events, in chronological order.
+
+    Parameters
+    ----------
+    data : H5File
+        the COMPAS output H5file 
+    include_null : bool
+        whether to include seeds which undergo no MT or SN events (default is True)
+
+    Returns 
+    -------
+    returned_seeds : list
+        an ordered list of all the unique seeds in the output file
+    returned_events : list                                    
+        a list where each element is a chronological set of events corresponding to the associated seed
+    """
+    sp_data = data['BSE_System_Parameters']
+    mt_data = data['BSE_RLOF']
+    sn_data = data['BSE_Supernovae']
+    all_seeds = sp_data['SEED'][()]                                           # get all seeds
+    mt_seeds, mt_events, mt_times = get_mt_data_tuple(mt_data)                # get mt data tuple
+    sn_seeds, sn_events, sn_times = get_sn_data_tuple(sn_data)                # get sn data tuple
+
+    num_mt_seeds = len(mt_seeds)                                              # number of mt_data events
+    num_sn_seeds = len(sn_seeds)                                              # number of sn_data events
+
+    if num_mt_seeds < 1 and num_sn_seeds < 1: return []                       # no events - return empty history
+
+    returned_seeds = []                                                       # array of seeds - will only contain seeds that have events (of any type)
+    returned_events = []                                                      # array of events - same size as returned_seeds (includes event times)
+
+    event_ordering = ['RL', 'CE', 'SN', 'MG']                                 # order of preference for simultaneous events
+
+    mt_index = 0                                                              # index into mt_data events arrays
+    sn_index = 0                                                              # index into sn_data events arrays
+
+    if include_null:
+        seeds_to_iterate = all_seeds
+    else:
+        seeds_to_iterate = np.sort(np.unique(np.append(mt_seeds, sn_seeds)))  # iterate over all the seeds that have either mt_data or sn_data events
+        
+    idx_ordered = np.argsort(seeds_to_iterate)
+    returned_seeds = [None] * np.size(seeds_to_iterate)                       # array of seeds - will only contain seeds that have events (of any type)
+    returned_events = [None] * np.size(seeds_to_iterate)                      # array of events - same size as returned_seeds (includes event times)
+
+    for idx in idx_ordered:
+        seed = seeds_to_iterate[idx]
+        seed_events = []                                                      # initialise the events for the seed being processed
+
+        # Collect any mt_data events for this seed, add the time of the event and the event type
+        while mt_index < num_mt_seeds and mt_seeds[mt_index] == seed:
+            for event_index, event in enumerate(mt_events[mt_index]):
+                _, _, is_rl1, is_rl2, is_cee, is_mrg = event
+                event_key = 'MG' if is_mrg else 'CE' if is_cee else 'RL'      # event type: Merger, CEE, RLOF 1->2, RLOF 2->1
+                seed_events.append((event_key, mt_times[mt_index][event_index], *mt_events[mt_index][event_index]))
+            mt_index += 1
+
+        # Collect any sn_data events for this seed, add the time of the event and the event type 
+        while sn_index < num_sn_seeds and sn_seeds[sn_index] == seed:
+            for event_index, event in enumerate(sn_events[sn_index]):
+                event_key = 'SN' 
+                seed_events.append((event_key, sn_times[sn_index][event_index], *sn_events[sn_index][event_index]))
+            sn_index += 1
+
+        seed_events.sort(key=lambda ev:(ev[1], event_ordering.index(ev[0])))  # sort the events by time and event type (mt_data before sn_data if at the same time)
+        returned_seeds[idx] = seed                                            # record the seed in the seeds array being returned
+        returned_events[idx] = seed_events                                    # record the events for this seed in the events array being returned
+    return returned_seeds, returned_events                                    
+
+
+###########################################
+# ## Produce strings of the event histories
+
+stellar_type_dict = {
+    0:  'MS',
+    1:  'MS',
+    2:  'HG',
+    3:  'GB',
+    4:  'GB',
+    5:  'GB',
+    6:  'GB',
+    7:  'HE',
+    8:  'HE',
+    9:  'HE',
+    10: 'WD',
+    11: 'WD',
+    12: 'WD',
+    13: 'NS',
+    14: 'BH',
+    15: 'MR',
+    16: 'MS',
+}
+
+
+def build_event_string(events: list, use_int_stypes: bool=False) -> EventHistoryString:
+    """Produce a string representation of the event history of a single binary.
+
+    Parameters
+    ----------
+    events : list
+        list of mixed MtEventTuple and SnEventTuple's
+    use_int_stypes : bool, optional
+        whether to report stellar types as integers 
+        (default is False, report them as 2-char stellar types)
+
+    Returns 
+    -------
+    event_str : EventHistoryString
+        condensed string representation of the event history of a binary, in array form
+
+    Notes
+    -----
+    MT strings look like: 
+        P>S, P<S, or P=S where P is primary type, S is secondary type, 
+        and '>', '<' is RLOF (star1 -> star2 or star1 <- star2) 
+        or otherwise '=' for CEE or '&' for Merger.
+    SN strings look like:
+        P*SR if star1 is the SN progenitor, or 
+        R*SP if star2 is the SN progenitor,
+        where P is progenitor type, R is remnant type, 
+        S is state ('i' for intact, 'u' for unbound)
+    Event strings for the same seed are separated by an undesrcore '_'
+    
+    TODO
+    ----
+    Currently unvectorized, do the vectorization later for a speed up
+    """
+    def _remap_stype(int_stype):
+        if use_int_stypes:
+            return str(int_stype)
+        else:
+            return stellar_type_dict[int_stype]
+    
+    # Empty event
+    if len(events) == 0:
+        return 'NA'
+        
+    event_str = ''
+    for event in events:
+        if event[0] == 'SN':                                                # SN event
+            _, time, stype_p, stype_c, which_sn, is_unbound = event
+            if which_sn == 1:                                                # Progenitor or Remnant depending upon which star is the SN
+                char_l = _remap_stype(stype_p)
+                char_r = _remap_stype(stype_c)
+            else:
+                char_l = _remap_stype(stype_c)
+                char_r = _remap_stype(stype_p)
+            char_m = '*u' if is_unbound else '*i'                           # unbound or intact
+        else:                                                               # Any of the MT events 
+            _, time, stype_1, stype_2, is_rl1, is_rl2, is_cee, is_mrg = event
+            char_l = _remap_stype(stype_1)                                   # primary stellar type
+            char_r = _remap_stype(stype_2)                                   # secondary stellar type
+            char_m = '&' if is_mrg \
+                     else '=' if is_cee \
+                     else '<' if is_rl2 \
+                     else '>'                                                # event type: CEE, RLOF 2->1, RLOF 1->2
+        event_str += "{}{}{}_".format(char_l, char_m, char_r)                # event string for this star, _ is event separator
+    event_str = np.array(event_str[:-1], dtype=np.str_)                      # return event string for this star (pop the last underscore first)
+    return event_str
+
+
+def get_event_strings(data: H5File=None, all_events: list=None, use_int_stypes: bool=False):
+    """Calculate the event history strings for a COMPAS population. 
+
+    Parameters
+    ----------
+    data : H5File, optional
+        the COMPAS output H5file 
+    all_events : list, optional                                    
+        a list where each element is a chronological set of events corresponding to the associated seed,
+        one of the outputs of get_event_history(data)
+    use_int_stypes : bool, optional
+
+    Returns
+    -------
+    event_strings : array of EventHistoryString's
+        strings of the event history of each system
+
+    Notes
+    -----
+    Exactly one of data or all_events must be included. If neither, nothign is returned.
+    """
+
+    # If output is 
+    if (data == None) & (all_events == None):
+        return 
+    elif (all_events == None):
+        _, all_events = get_event_history(data)
+
+    event_strings = np.zeros(len(all_events), dtype=StringDType())
+    for ii, events_for_given_seed in enumerate(all_events):  
+        event_string = build_event_string(events_for_given_seed, use_int_stypes=use_int_stypes)
+        event_strings[ii] = event_string                                     # append event string for this star (pop the last underscore first)
+    return event_strings
+
+""
+

From 11caeb8785a4091af80003e6290caa1607a4ad53 Mon Sep 17 00:00:00 2001
From: Reinhold Willcox <reinhold.willcox@gmail.com>
Date: Mon, 29 Jun 2026 12:05:36 +0200
Subject: [PATCH 3/4] addressed nicolas' concerns

---
 compas_python_utils/debugging_utils.py | 436 +++++++++++++++----------
 1 file changed, 272 insertions(+), 164 deletions(-)

diff --git a/compas_python_utils/debugging_utils.py b/compas_python_utils/debugging_utils.py
index 76498c96c..15a9826a2 100644
--- a/compas_python_utils/debugging_utils.py
+++ b/compas_python_utils/debugging_utils.py
@@ -9,17 +9,46 @@
 MaskNdarray = NewType('MaskNdarray', np.ndarray[bool])
 H5File = NewType('H5File', h5._hl.files.File)
 H5Group = NewType('H5Group', h5._hl.group.Group)
-MtEventTuple = NewType('MtEventTuple', tuple[ np.ndarray[int], np.ndarray[int], np.ndarray[bool], np.ndarray[bool], np.ndarray[bool], np.ndarray[bool]])
-SnEventTuple = NewType('SnEventTuple', tuple[ np.ndarray[int], np.ndarray[int], np.ndarray[bool], np.ndarray[bool]])
+MtEventTuple = NewType('MtEventTuple',
+                       tuple[np.ndarray[int],
+                             np.ndarray[int],
+                             np.ndarray[bool],
+                             np.ndarray[bool],
+                             np.ndarray[bool],
+                             np.ndarray[bool]])
+SnEventTuple = NewType('SnEventTuple',
+                       tuple[np.ndarray[int],
+                             np.ndarray[int],
+                             np.ndarray[bool],
+                             np.ndarray[bool]])
 EventHistoryString = NewType('EventHistoryString', np.array(np.str_))
 
 
 ########################################################################
 # ## Function to print the data from a given COMPAS HDF5 group in a readable pandas template
 
-def print_compas_details_dataframe(data: H5Group, 
-                                   *seeds: int, 
-                                   mask: MaskNdarray=()) -> pd.DataFrame:
+def convert_bytes_array_to_strings(param_array):
+    """Check and convert np.bytes_ array to strings.
+
+    Parameters
+    ----------
+    param_array : np.ndarray[np.bytes_ | str]
+        a numpy array containing either np.bytes_ or strings
+
+    Returns
+    -------
+    param_array : np.ndarray[str]
+        the same array, but converted to strings if not already
+    """
+    if np.issubdtype(param_array.dtype, np.bytes_):
+        return param_array.astype(str)
+    else:
+        return param_array
+
+
+def print_compas_details_dataframe(data: H5Group,
+                                   *seeds: int,
+                                   mask: MaskNdarray = ()) -> pd.DataFrame:
     """Return a pd.DataFrame for the COMPAS output contained in an H5Group.
 
     Parameters
@@ -32,70 +61,69 @@ def print_compas_details_dataframe(data: H5Group,
         boolean mask to filter out specific systems (default is no mask)
         Must be same length as data['SEED']
 
-    Returns 
+    Returns
     -------
     compas_details : pd.DataFrame
         The values of each parameter in the H5Group, excluding those removed by `seeds` or `mask`.
 
     Notes
     -----
-    Designed for easier visualization in a jupyter notebook, for inspection / debugging purposes. 
+    Designed for easier visualization in a jupyter notebook, for inspection / debugging purposes.
     If both `seeds` and `mask` are supplied, resulting systems must pass both filters.
 
     Examples
     --------
-    >>> mt_data = h5.File('COMPAS_Output.h5')['BSE_RLOF']) 
+    >>> mt_data = h5.File('COMPAS_Output.h5')['BSE_RLOF']
     >>> print_compas_details_dataframe(mt_data)
     [output of all BSE_RLOF events]
-    
+
     >>> mt_seeds = mt_data['SEED'][()]
     >>> print_compas_details_dataframe(mt_data, mt_seeds[:50])
     [output of all BSE_RLOF events occuring in the first 50 seeds]
-    
+
     >>> cee_events = mt_data['CEE>MT'][()] == 1 # needed to convert to boolean mask
     >>> print_compas_details_dataframe(mt_data, mt_seeds[:50], mask=cee_events)
     [output of all Common Envelope events occuring in the first 50 seeds]
     """
-    list_of_keys = list(data.keys())
-
     # Check if SEED parameter exists in data
-    if ('SEED' in list_of_keys) | ('SEED>MT' in list_of_keys): 
-        seed_variable_name='SEED' if ('SEED' in list_of_keys) else 'SEED>MT' #SEED>MT is a relic from older versions, but we leave this in for backwards compatibility
+    if ('SEED' or 'SEED>MT') in data:
+        # SEED>MT is a relic from older versions, but we leave this in for
+        # backwards compatibility
+        seed_variable_name = 'SEED' if ('SEED' in data) else 'SEED>MT'
 
         # If `seeds` or `mask` arguments supplied, create the relevant mask
         all_seeds = data[seed_variable_name][()]
         seeds_mask = np.isin(all_seeds, seeds)
-        if len(seeds) == 0: # If `seeds` argument is not supplied, set the default mask
+        if len(seeds) == 0:  # If `seeds` argument is not supplied, set the default mask
             seeds_mask = np.ones_like(all_seeds).astype(bool)
         if mask == ():
             mask = np.ones_like(all_seeds).astype(bool)
         mask &= seeds_mask
-        df = pd.DataFrame.from_dict({param: data[param][()][mask] for param in list_of_keys}).set_index(seed_variable_name).T
-
-    else: # No seed parameter, currently only applies to the RunDetails H5Group 
-        keys_not_derivations = [key for key in list_of_keys  if '-Derivation' not in key] # Get just the keys without the -Derivation suffix
-        
-        # Some parameter values are string types, formatted as np.bytes_, need to convert back
-        def convert_strings(param_array):
-            if isinstance(param_array[0], np.bytes_):
-                return param_array.astype(str)
-            else:
-                return param_array
-
-        df_keys = pd.DataFrame.from_dict({param: convert_strings(data[param][()]) for param in keys_not_derivations }).T
-        nCols = df_keys.shape[1] # Required only because if we have combined RunDetails output from a previous h5copy, we get many columns (should fix later)
-        df_keys.columns = ['Parameter']*nCols
-        df_drvs = pd.DataFrame.from_dict({param: convert_strings(data[param+'-Derivation'][()]) for param in keys_not_derivations }).T
-        df_drvs.columns = ['Derivation']*nCols
+        df = pd.DataFrame.from_dict(
+            {param: data[param][()][mask] for param in data}).set_index(seed_variable_name).T
+
+    else:  # No seed parameter, currently only applies to the RunDetails H5Group
+        # Get just the keys without the -Derivation suffix
+        keys_not_derivations = [
+            key for key in data if '-Derivation' not in key]
+
+        df_keys = pd.DataFrame.from_dict({param: convert_bytes_array_to_strings(
+            data[param][()]) for param in keys_not_derivations}).T
+        # Required only because if we have combined RunDetails output from a
+        # previous h5copy, we get many columns (should fix later)
+        nCols = df_keys.shape[1]
+        df_keys.columns = ['Parameter'] * nCols
+        df_drvs = pd.DataFrame.from_dict({param: convert_bytes_array_to_strings(
+            data[param + '-Derivation'][()]) for param in keys_not_derivations}).T
+        df_drvs.columns = ['Derivation'] * nCols
         df = pd.concat([df_keys, df_drvs], axis=1)
 
     # Add units as first col
-    units_dict = {key:data[key].attrs['units'].astype(str) for key in list_of_keys}
+    units_dict = {key: data[key].attrs['units'].astype(str) for key in data}
     df.insert(loc=0, column='(units)', value=pd.Series(units_dict))
     return df
 
 
-
 ########################################################################
 # ## Get event histories of MT data, SN data, and combined MT, SN data
 
@@ -109,10 +137,10 @@ def get_mt_data_tuple(mt_data: H5Group) -> tuple[list, list, list]:
 
     Returns
     -------
-    returned_seeds : list 
-        an ordered list of the unique seeds in the mt_data file, 
+    returned_seeds : list
+        an ordered list of the unique seeds in the mt_data file
     returned_events : list
-        a list of sublists, one sublist per seed, where each sublist 
+        a list of sublists, one sublist per seed, where each sublist
         contains all the MtEventTuples for the given seed
     returned_times : list
         a list of sublists of times of each of the mt_data events.
@@ -135,9 +163,10 @@ def get_mt_data_tuple(mt_data: H5Group) -> tuple[list, list, list]:
     mt_is_mrg = mt_data['Merger'][()] == 1
 
     # We want the return arrays sorted by seed, so sort here.
-    mt_seeds_idx = np.lexsort((mt_times, mt_seeds)) # sort by seeds then times - lexsort sorts by the last column first...
-    mt_seeds = mt_seeds[mt_seeds_idx]  
-    mt_times = mt_times[mt_seeds_idx]  
+    # sort by seeds then times - lexsort sorts by the last column first...
+    mt_seeds_idx = np.lexsort((mt_times, mt_seeds))
+    mt_seeds = mt_seeds[mt_seeds_idx]
+    mt_times = mt_times[mt_seeds_idx]
     mt_primary_stype = mt_primary_stype[mt_seeds_idx]
     mt_secondary_stype = mt_secondary_stype[mt_seeds_idx]
     mt_is_rlof1 = mt_is_rlof1[mt_seeds_idx]
@@ -146,33 +175,56 @@ def get_mt_data_tuple(mt_data: H5Group) -> tuple[list, list, list]:
     mt_is_mrg = mt_is_mrg[mt_seeds_idx]
 
     # Process the mt_data events
-    returned_seeds = []                                      # array of seeds - will only contain seeds that have mt_data events
-    returned_events = []                                     # array of mt_data events for each seed in returned_seeds
-    returned_times = []                                      # array of times for each event in returned_events (for each seed in returned_seeds)
-
-    last_seed = -1                                           # initialize most recently processed seed
-    for seed_index, this_seed in enumerate(mt_seeds):          # iterate over all RLOF file entries
-        this_time = mt_times[seed_index]                       # time for this RLOF file entry
-        this_event = (mt_primary_stype[seed_index], mt_secondary_stype[seed_index], 
-                     mt_is_rlof1[seed_index], mt_is_rlof2[seed_index], mt_is_cee[seed_index], mt_is_mrg[seed_index])  # construct event tuple
+    # array of seeds - will only contain seeds that have mt_data events
+    returned_seeds = []
+    # array of mt_data events for each seed in returned_seeds
+    returned_events = []
+    # array of times for each event in returned_events (for each seed in
+    # returned_seeds)
+    returned_times = []
+
+    # initialize most recently processed seed
+    last_seed = -1
+    for seed_index, this_seed in enumerate(
+            mt_seeds):          # iterate over all RLOF file entries
+        # time for this RLOF file entry
+        this_time = mt_times[seed_index]
+        this_event = (
+            mt_primary_stype[seed_index],
+            mt_secondary_stype[seed_index],
+            mt_is_rlof1[seed_index],
+            mt_is_rlof2[seed_index],
+            mt_is_cee[seed_index],
+            mt_is_mrg[seed_index])  # construct event tuple
 
         # If this is an entirely new seed:
         if this_seed != last_seed:                            # same seed as last seed processed?
-            returned_seeds.append(this_seed)                  # no - new seed, record it
-            returned_times.append([this_time])                # initialize the list of event times for this seed
-            returned_events.append([this_event])              # initialize the list of events for this seed
+            # no - new seed, record it
+            returned_seeds.append(this_seed)
+            # initialize the list of event times for this seed
+            returned_times.append([this_time])
+            # initialize the list of events for this seed
+            returned_events.append([this_event])
             last_seed = this_seed                             # update the latest seed
 
         # Add event, if it is not a duplicate
         try:
-            event_index = returned_events[-1].index(this_event)  # find event_index of this particular event tuple in the array of events for this seed 
-            if this_time > returned_times[-1][event_index]:      # ^ if event is not a duplicate, this will throw a ValueError 
-                returned_times[-1][event_index] = this_time      # if event is duplicate, update time to the later of the duplicates
+            # find event_index of this particular event tuple in the array of
+            # events for this seed
+            event_index = returned_events[-1].index(this_event)
+            # ^ if event is not a duplicate, this will throw a ValueError
+            if this_time > returned_times[-1][event_index]:
+                # if event is duplicate, update time to the later of the
+                # duplicates
+                returned_times[-1][event_index] = this_time
         except ValueError:                                    # event is not a duplicate:
-            returned_events[-1].append(this_event)              # record new event tuple for this seed
-            returned_times[-1].append(this_time)                # record new event time for this seed
+            # record new event tuple for this seed
+            returned_events[-1].append(this_event)
+            # record new event time for this seed
+            returned_times[-1].append(this_time)
 
-    return returned_seeds, returned_events, returned_times       # see above for description
+    # see above for description
+    return returned_seeds, returned_events, returned_times
 
 
 def get_sn_data_tuple(sn_data: H5Group) -> tuple[list, list, list]:
@@ -185,10 +237,10 @@ def get_sn_data_tuple(sn_data: H5Group) -> tuple[list, list, list]:
 
     Returns
     -------
-    returned_seeds : list 
-        an ordered list of the unique seeds in the sn_data file, 
+    returned_seeds : list
+        an ordered list of the unique seeds in the sn_data file,
     returned_events : list
-        a list of sublists, one sublist per seed, where each sublist 
+        a list of sublists, one sublist per seed, where each sublist
         contains all the SnEventTuples for the given seed
     returned_times : list
         a list of sublists of times of each of the sn_data events.
@@ -208,123 +260,168 @@ def get_sn_data_tuple(sn_data: H5Group) -> tuple[list, list, list]:
     sn_is_unbound = sn_data['Unbound'][()] == 1
 
     # We want the return arrays sorted by seed, so sort here.
-    sn_seeds_idx = np.lexsort((sn_times, sn_seeds)) # sort by seeds then times - lexsort sorts by the last column first...
-    sn_seeds = sn_seeds[sn_seeds_idx]  
-    sn_times = sn_times[sn_seeds_idx]  
+    # sort by seeds then times - lexsort sorts by the last column first...
+    sn_seeds_idx = np.lexsort((sn_times, sn_seeds))
+    sn_seeds = sn_seeds[sn_seeds_idx]
+    sn_times = sn_times[sn_seeds_idx]
     sn_prog_stype = sn_prog_stype[sn_seeds_idx]
     sn_remn_stype = sn_remn_stype[sn_seeds_idx]
     sn_which_prog = sn_which_prog[sn_seeds_idx]
     sn_is_unbound = sn_is_unbound[sn_seeds_idx]
-    
+
     # Process the sn_data events
-    returned_seeds = []                                      # array of seeds - will only contain seeds that have sn_data events
-    returned_events = []                                     # array of sn_data events for each seed in returned_seeds
-    returned_times = []                                      # array of times for each event in returned_events (for each seed in returned_seeds)
-
-    last_seed = -1                                           # initialize most recently processed seed
-    for seed_index, this_seed in enumerate(sn_seeds):          # iterate over all sn_data file entries
-        this_time = sn_times[seed_index]                       # time for this sn_data file entry
-        this_event = (sn_prog_stype[seed_index], sn_remn_stype[seed_index], 
-                     sn_which_prog[seed_index], sn_is_unbound[seed_index]) # construct event tuple
-               
+    # array of seeds - will only contain seeds that have sn_data events
+    returned_seeds = []
+    # array of sn_data events for each seed in returned_seeds
+    returned_events = []
+    # array of times for each event in returned_events (for each seed in
+    # returned_seeds)
+    returned_times = []
+
+    # initialize most recently processed seed
+    last_seed = -1
+    for seed_index, this_seed in enumerate(
+            sn_seeds):          # iterate over all sn_data file entries
+        # time for this sn_data file entry
+        this_time = sn_times[seed_index]
+        this_event = (
+            sn_prog_stype[seed_index],
+            sn_remn_stype[seed_index],
+            sn_which_prog[seed_index],
+            sn_is_unbound[seed_index])  # construct event tuple
+
         # If this is an entirely new seed:
         if this_seed != last_seed:                            # same seed as last seed processed?
-            returned_seeds.append(this_seed)                  # no - new seed, record it
-            returned_times.append([this_time])                #   initialize the list of event times for this seed
-            returned_events.append([this_event])              #   initialize the list of events for this seed
-            last_seed = this_seed                             #   update the latest seed
+            # no - new seed, record it
+            returned_seeds.append(this_seed)
+            # initialize the list of event times for this seed
+            returned_times.append([this_time])
+            # initialize the list of events for this seed
+            returned_events.append([this_event])
+            last_seed = this_seed  # update the latest seed
         else:                                               # yes - second sn_data event for this seed
-            returned_times[-1].append(this_time)              #   append time at end of array
-            returned_events[-1].append(this_event)            #   append event at end of array
-                
-    return returned_seeds, returned_events, returned_times     # see above for description
+            returned_times[-1].append(this_time)  # append time at end of array
+            # append event at end of array
+            returned_events[-1].append(this_event)
 
+    # see above for description
+    return returned_seeds, returned_events, returned_times
 
-def get_event_history(data: H5File, include_null: bool=True) -> tuple[list, list]:
+
+def get_event_history(
+        data: H5File, include_null: bool = True) -> tuple[list, list]:
     """Get the event history for all seeds, including both MT and SN events, in chronological order.
 
     Parameters
     ----------
     data : H5File
-        the COMPAS output H5file 
+        the COMPAS output H5file
     include_null : bool
         whether to include seeds which undergo no MT or SN events (default is True)
 
-    Returns 
+    Returns
     -------
     returned_seeds : list
         an ordered list of all the unique seeds in the output file
-    returned_events : list                                    
+    returned_events : list
         a list where each element is a chronological set of events corresponding to the associated seed
     """
     sp_data = data['BSE_System_Parameters']
     mt_data = data['BSE_RLOF']
     sn_data = data['BSE_Supernovae']
-    all_seeds = sp_data['SEED'][()]                                           # get all seeds
-    mt_seeds, mt_events, mt_times = get_mt_data_tuple(mt_data)                # get mt data tuple
-    sn_seeds, sn_events, sn_times = get_sn_data_tuple(sn_data)                # get sn data tuple
-
-    num_mt_seeds = len(mt_seeds)                                              # number of mt_data events
-    num_sn_seeds = len(sn_seeds)                                              # number of sn_data events
-
-    if num_mt_seeds < 1 and num_sn_seeds < 1: return []                       # no events - return empty history
-
-    returned_seeds = []                                                       # array of seeds - will only contain seeds that have events (of any type)
-    returned_events = []                                                      # array of events - same size as returned_seeds (includes event times)
-
-    event_ordering = ['RL', 'CE', 'SN', 'MG']                                 # order of preference for simultaneous events
-
-    mt_index = 0                                                              # index into mt_data events arrays
-    sn_index = 0                                                              # index into sn_data events arrays
+    # get all seeds
+    all_seeds = sp_data['SEED'][()]
+    mt_seeds, mt_events, mt_times = get_mt_data_tuple(
+        mt_data)                # get mt data tuple
+    sn_seeds, sn_events, sn_times = get_sn_data_tuple(
+        sn_data)                # get sn data tuple
+
+    # number of mt_data events
+    num_mt_seeds = len(mt_seeds)
+    # number of sn_data events
+    num_sn_seeds = len(sn_seeds)
+
+    if num_mt_seeds < 1 and num_sn_seeds < 1:
+        return []                       # no events - return empty history
+
+    # array of seeds - will only contain seeds that have events (of any type)
+    returned_seeds = []
+    # array of events - same size as returned_seeds (includes event times)
+    returned_events = []
+
+    # order of preference for simultaneous events
+    event_ordering = ['RL', 'CE', 'SN', 'MG']
+
+    # index into mt_data events arrays
+    mt_index = 0
+    # index into sn_data events arrays
+    sn_index = 0
 
     if include_null:
         seeds_to_iterate = all_seeds
     else:
-        seeds_to_iterate = np.sort(np.unique(np.append(mt_seeds, sn_seeds)))  # iterate over all the seeds that have either mt_data or sn_data events
-        
+        # iterate over all the seeds that have either mt_data or sn_data events
+        seeds_to_iterate = np.sort(np.unique(np.append(mt_seeds, sn_seeds)))
+
     idx_ordered = np.argsort(seeds_to_iterate)
-    returned_seeds = [None] * np.size(seeds_to_iterate)                       # array of seeds - will only contain seeds that have events (of any type)
-    returned_events = [None] * np.size(seeds_to_iterate)                      # array of events - same size as returned_seeds (includes event times)
+    # array of seeds - will only contain seeds that have events (of any type)
+    returned_seeds = [None] * np.size(seeds_to_iterate)
+    # array of events - same size as returned_seeds (includes event times)
+    returned_events = [None] * np.size(seeds_to_iterate)
 
     for idx in idx_ordered:
         seed = seeds_to_iterate[idx]
-        seed_events = []                                                      # initialise the events for the seed being processed
+        # initialise the events for the seed being processed
+        seed_events = []
 
-        # Collect any mt_data events for this seed, add the time of the event and the event type
+        # Collect any mt_data events for this seed, add the time of the event
+        # and the event type
         while mt_index < num_mt_seeds and mt_seeds[mt_index] == seed:
             for event_index, event in enumerate(mt_events[mt_index]):
                 _, _, is_rl1, is_rl2, is_cee, is_mrg = event
-                event_key = 'MG' if is_mrg else 'CE' if is_cee else 'RL'      # event type: Merger, CEE, RLOF 1->2, RLOF 2->1
-                seed_events.append((event_key, mt_times[mt_index][event_index], *mt_events[mt_index][event_index]))
+                # event type: Merger, CEE, RLOF 1->2, RLOF 2->1
+                event_key = 'MG' if is_mrg else 'CE' if is_cee else 'RL'
+                seed_events.append(
+                    (event_key,
+                     mt_times[mt_index][event_index],
+                        *mt_events[mt_index][event_index]))
             mt_index += 1
 
-        # Collect any sn_data events for this seed, add the time of the event and the event type 
+        # Collect any sn_data events for this seed, add the time of the event
+        # and the event type
         while sn_index < num_sn_seeds and sn_seeds[sn_index] == seed:
             for event_index, event in enumerate(sn_events[sn_index]):
-                event_key = 'SN' 
-                seed_events.append((event_key, sn_times[sn_index][event_index], *sn_events[sn_index][event_index]))
+                event_key = 'SN'
+                seed_events.append(
+                    (event_key,
+                     sn_times[sn_index][event_index],
+                        *sn_events[sn_index][event_index]))
             sn_index += 1
 
-        seed_events.sort(key=lambda ev:(ev[1], event_ordering.index(ev[0])))  # sort the events by time and event type (mt_data before sn_data if at the same time)
-        returned_seeds[idx] = seed                                            # record the seed in the seeds array being returned
-        returned_events[idx] = seed_events                                    # record the events for this seed in the events array being returned
-    return returned_seeds, returned_events                                    
+        # sort the events by time and event type (mt_data before sn_data if at
+        # the same time)
+        seed_events.sort(key=lambda ev: (ev[1], event_ordering.index(ev[0])))
+        # record the seed in the seeds array being returned
+        returned_seeds[idx] = seed
+        # record the events for this seed in the events array being returned
+        returned_events[idx] = seed_events
+    return returned_seeds, returned_events
 
 
 ###########################################
 # ## Produce strings of the event histories
 
-stellar_type_dict = {
-    0:  'MS',
-    1:  'MS',
-    2:  'HG',
-    3:  'GB',
-    4:  'GB',
-    5:  'GB',
-    6:  'GB',
-    7:  'HE',
-    8:  'HE',
-    9:  'HE',
+simplified_stellar_type_dict = {
+    0: 'MS',
+    1: 'MS',
+    2: 'HG',
+    3: 'GB',
+    4: 'GB',
+    5: 'GB',
+    6: 'GB',
+    7: 'HE',
+    8: 'HE',
+    9: 'HE',
     10: 'WD',
     11: 'WD',
     12: 'WD',
@@ -335,7 +432,9 @@ def get_event_history(data: H5File, include_null: bool=True) -> tuple[list, list
 }
 
 
-def build_event_string(events: list, use_int_stypes: bool=False) -> EventHistoryString:
+def build_event_string(
+        events: list,
+        use_int_stypes: bool = False) -> EventHistoryString:
     """Produce a string representation of the event history of a single binary.
 
     Parameters
@@ -343,27 +442,27 @@ def build_event_string(events: list, use_int_stypes: bool=False) -> EventHistory
     events : list
         list of mixed MtEventTuple and SnEventTuple's
     use_int_stypes : bool, optional
-        whether to report stellar types as integers 
+        whether to report stellar types as integers
         (default is False, report them as 2-char stellar types)
 
-    Returns 
+    Returns
     -------
     event_str : EventHistoryString
         condensed string representation of the event history of a binary, in array form
 
     Notes
     -----
-    MT strings look like: 
-        P>S, P<S, or P=S where P is primary type, S is secondary type, 
-        and '>', '<' is RLOF (star1 -> star2 or star1 <- star2) 
+    MT strings look like:
+        P>S, P<S, or P=S where P is primary type, S is secondary type,
+        and '>', '<' is RLOF (star1 -> star2 or star1 <- star2)
         or otherwise '=' for CEE or '&' for Merger.
     SN strings look like:
-        P*SR if star1 is the SN progenitor, or 
+        P*SR if star1 is the SN progenitor, or
         R*SP if star2 is the SN progenitor,
-        where P is progenitor type, R is remnant type, 
+        where P is progenitor type, R is remnant type,
         S is state ('i' for intact, 'u' for unbound)
     Event strings for the same seed are separated by an undesrcore '_'
-    
+
     TODO
     ----
     Currently unvectorized, do the vectorization later for a speed up
@@ -372,44 +471,50 @@ def _remap_stype(int_stype):
         if use_int_stypes:
             return str(int_stype)
         else:
-            return stellar_type_dict[int_stype]
-    
+            return simplified_stellar_type_dict[int_stype]
+
     # Empty event
     if len(events) == 0:
         return 'NA'
-        
+
     event_str = ''
     for event in events:
         if event[0] == 'SN':                                                # SN event
             _, time, stype_p, stype_c, which_sn, is_unbound = event
-            if which_sn == 1:                                                # Progenitor or Remnant depending upon which star is the SN
+            # Progenitor or Remnant depending upon which star is the SN
+            if which_sn == 1:
                 char_l = _remap_stype(stype_p)
                 char_r = _remap_stype(stype_c)
             else:
                 char_l = _remap_stype(stype_c)
                 char_r = _remap_stype(stype_p)
             char_m = '*u' if is_unbound else '*i'                           # unbound or intact
-        else:                                                               # Any of the MT events 
+        else:                                                               # Any of the MT events
             _, time, stype_1, stype_2, is_rl1, is_rl2, is_cee, is_mrg = event
-            char_l = _remap_stype(stype_1)                                   # primary stellar type
-            char_r = _remap_stype(stype_2)                                   # secondary stellar type
-            char_m = '&' if is_mrg \
-                     else '=' if is_cee \
-                     else '<' if is_rl2 \
-                     else '>'                                                # event type: CEE, RLOF 2->1, RLOF 1->2
-        event_str += "{}{}{}_".format(char_l, char_m, char_r)                # event string for this star, _ is event separator
-    event_str = np.array(event_str[:-1], dtype=np.str_)                      # return event string for this star (pop the last underscore first)
+            # primary stellar type
+            char_l = _remap_stype(stype_1)
+            # secondary stellar type
+            char_r = _remap_stype(stype_2)
+            # event type: CEE, RLOF 2->1, RLOF 1->2
+            char_m = '&' if is_mrg else '=' if is_cee else '<' if is_rl2 else '>'
+        # event string for this star, _ is event separator
+        event_str += "{}{}{}_".format(char_l, char_m, char_r)
+    # return event string for this star (pop the last underscore first)
+    event_str = np.array(event_str[:-1], dtype=np.str_)
     return event_str
 
 
-def get_event_strings(data: H5File=None, all_events: list=None, use_int_stypes: bool=False):
-    """Calculate the event history strings for a COMPAS population. 
+def get_event_strings(
+        data: H5File = None,
+        all_events: list = None,
+        use_int_stypes: bool = False):
+    """Calculate the event history strings for a COMPAS population.
 
     Parameters
     ----------
     data : H5File, optional
-        the COMPAS output H5file 
-    all_events : list, optional                                    
+        the COMPAS output H5file
+    all_events : list, optional
         a list where each element is a chronological set of events corresponding to the associated seed,
         one of the outputs of get_event_history(data)
     use_int_stypes : bool, optional
@@ -421,20 +526,23 @@ def get_event_strings(data: H5File=None, all_events: list=None, use_int_stypes:
 
     Notes
     -----
-    Exactly one of data or all_events must be included. If neither, nothign is returned.
+    Exactly one of data or all_events must be included. If neither, nothing is returned.
     """
 
-    # If output is 
-    if (data == None) & (all_events == None):
-        return 
-    elif (all_events == None):
+    # If output is
+    if (data is None) & (all_events is None):
+        return
+    elif (all_events is None):
         _, all_events = get_event_history(data)
 
     event_strings = np.zeros(len(all_events), dtype=StringDType())
-    for ii, events_for_given_seed in enumerate(all_events):  
-        event_string = build_event_string(events_for_given_seed, use_int_stypes=use_int_stypes)
-        event_strings[ii] = event_string                                     # append event string for this star (pop the last underscore first)
+    for ii, events_for_given_seed in enumerate(all_events):
+        event_string = build_event_string(
+            events_for_given_seed,
+            use_int_stypes=use_int_stypes)
+        # append event string for this star (pop the last underscore first)
+        event_strings[ii] = event_string
     return event_strings
 
-""
 
+""

From ec95efcbb19aa89494842e72f72a17d450c1b669 Mon Sep 17 00:00:00 2001
From: Reinhold Willcox <reinhold.willcox@gmail.com>
Date: Mon, 29 Jun 2026 12:13:42 +0200
Subject: [PATCH 4/4] starting the process of adding unit testing

---
 compas_python_utils/debugging_utils.py | 14 +++++++-------
 setup.py                               |  1 +
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/compas_python_utils/debugging_utils.py b/compas_python_utils/debugging_utils.py
index 15a9826a2..4047d10f8 100644
--- a/compas_python_utils/debugging_utils.py
+++ b/compas_python_utils/debugging_utils.py
@@ -4,8 +4,7 @@
 from numpy.dtypes import StringDType
 from typing import NewType
 
-""
-# New Types
+### New Types
 MaskNdarray = NewType('MaskNdarray', np.ndarray[bool])
 H5File = NewType('H5File', h5._hl.files.File)
 H5Group = NewType('H5Group', h5._hl.group.Group)
@@ -24,8 +23,7 @@
 EventHistoryString = NewType('EventHistoryString', np.array(np.str_))
 
 
-########################################################################
-# ## Function to print the data from a given COMPAS HDF5 group in a readable pandas template
+### Function to print the data from a given COMPAS HDF5 group in a readable pandas template
 
 def convert_bytes_array_to_strings(param_array):
     """Check and convert np.bytes_ array to strings.
@@ -124,8 +122,7 @@ def print_compas_details_dataframe(data: H5Group,
     return df
 
 
-########################################################################
-# ## Get event histories of MT data, SN data, and combined MT, SN data
+### Get event histories of MT data, SN data, and combined MT, SN data
 
 def get_mt_data_tuple(mt_data: H5Group) -> tuple[list, list, list]:
     """Calculates the EventTuple for the BSE_RLOF output H5Group.
@@ -544,5 +541,8 @@ def get_event_strings(
         event_strings[ii] = event_string
     return event_strings
 
+def main():
+    return
 
-""
+if __name__ == "__main__":
+    main()
diff --git a/setup.py b/setup.py
index 8cf5abe20..29eef5c43 100644
--- a/setup.py
+++ b/setup.py
@@ -128,6 +128,7 @@ def find_version(version_file=read(CPP_VERSION_FILE)):
                 f"compas_h5view= {NAME}.h5view:main",
                 f"compas_h5copy= {NAME}.h5copy:main",
                 f"compas_h5sample= {NAME}.h5sample:main",
+                f"compas_h5sample= {NAME}.debugging_utils:main",
                 f"compas_plot_detailed_evolution={NAME}.detailed_evolution_plotter.plot_detailed_evolution:main",
                 f"compas_run_submit={NAME}.preprocessing.runSubmit:main",
                 f"compas_sample_stroopwafel={NAME}.preprocessing.stroopwafelInterface:main",