From 5a80172d5b48c114366e4f31bf9a5dea80fc6c55 Mon Sep 17 00:00:00 2001 From: Reinhold Willcox Date: Wed, 18 Mar 2026 14:14:00 +0100 Subject: [PATCH 1/4] added compasUtils file --- compas_python_utils/compasUtils.py | 355 +++++++++++++++++++++++++++++ 1 file changed, 355 insertions(+) create mode 100644 compas_python_utils/compasUtils.py diff --git a/compas_python_utils/compasUtils.py b/compas_python_utils/compasUtils.py new file mode 100644 index 000000000..2cc126c68 --- /dev/null +++ b/compas_python_utils/compasUtils.py @@ -0,0 +1,355 @@ +import h5py as h5 +import numpy as np +import pandas as pd +from numpy.dtypes import StringDType + + +######################################################################## +# ## Function to print the data from a given COMPAS HDF5 group in a readable pandas template + +def printCompasDetails(data, *seeds, mask=()): + """ + Function to print the full Compas output for given seeds, optionally with an additional mask + """ + list_of_keys = list(data.keys()) + + # Check if seed parameter exists - if not, just print without (e.g RunDetails) + if ('SEED' in list_of_keys) | ('SEED>MT' in list_of_keys): # Most output files + #SEED>MT is a relic from older versions, but we leave this in for backwards compatibility + + # Set the seed name parameter, mask on seeds as needed, and set the index + seedVariableName='SEED' if ('SEED' in list_of_keys) else 'SEED>MT' + list_of_keys.remove(seedVariableName) # this is the index above, don't want to include it + + allSeeds = data[seedVariableName][()] + seedsMask = np.isin(allSeeds, seeds) + if len(seeds) == 0: # if any seed is included, do not reset the mask + seedsMask = np.ones_like(allSeeds).astype(bool) + if mask == (): + mask = np.ones_like(allSeeds).astype(bool) + mask &= seedsMask + + df = pd.DataFrame.from_dict({param: data[param][()][mask] for param in list(data.keys())}).set_index(seedVariableName).T + + else: # No seed parameter, so do custom print for Run Details + + # Get just the keys without the -Derivation suffix - those will be a second column + keys_not_derivations = [] + for key in list_of_keys: + if '-Derivation' not in key: + keys_not_derivations.append(key) + + # Some parameter values are string types, formatted as np.bytes_, need to convert back + def convert_strings(param_array): + if isinstance(param_array[0], np.bytes_): + return param_array.astype(str) + else: + return param_array + + df_keys = pd.DataFrame.from_dict({param: convert_strings(data[param][()]) for param in keys_not_derivations }).T + nCols = df_keys.shape[1] # Required only because if we combine RDs, we get many columns (should fix later) + df_keys.columns = ['Parameter']*nCols + df_drvs = pd.DataFrame.from_dict({param: convert_strings(data[param+'-Derivation'][()]) for param in keys_not_derivations }).T + df_drvs.columns = ['Derivation']*nCols + df = pd.concat([df_keys, df_drvs], axis=1) + + # Add units as first col + units_dict = {key:data[key].attrs['units'].astype(str) for key in list_of_keys} + df.insert(loc=0, column='(units)', value=pd.Series(units_dict)) + return df + + + +######################################################################## +# ## Get event histories of MT data, SN data, and combined MT, SN data + +def getMtEvents(MT): + """ + This function takes in the `BSE_RLOF` output category from COMPAS, and returns the information + on the Mass Transfer (MT) events that happen for each seed. The events do not have to be in order, + either chronologically or by seed, this function will reorder them as required. + + OUT: + returnedSeeds (list): ordered list of the unique seeds in the MT file + returnedEvents (list): list of sublists, where each sublist contains all the MT events for a given seed. + MT event tuples take the form : + (stellarTypePrimary, stellarTypeSecondary, isRlof1, isRlof2, isCEE, isMrg) + returnTimes (list): is a list of sublists of times of each of the MT events + """ + mtSeeds = MT['SEED'][()] + mtTimes = MT['TimeMT'][()] == 1 + mtIsRlof2 = MT['RLOF(2)>MT'][()] == 1 + mtIsCEE = MT['CEE>MT'][()] == 1 + mtIsMrg = MT['Merger'][()] == 1 + + # We want the return arrays sorted by seed, so sort here. + mtSeedsInds = np.lexsort((mtTimes, mtSeeds)) # sort by seeds then times - lexsort sorts by the last column first... + mtSeeds = mtSeeds[mtSeedsInds] + mtTimes = mtTimes[mtSeedsInds] + mtPrimaryStype = mtPrimaryStype[mtSeedsInds] + mtSecondaryStype = mtSecondaryStype[mtSeedsInds] + mtIsRlof1 = mtIsRlof1[mtSeedsInds] + mtIsRlof2 = mtIsRlof2[mtSeedsInds] + mtIsCEE = mtIsCEE[mtSeedsInds] + mtIsMrg = mtIsMrg[mtSeedsInds] + + # Process the MT events + returnedSeeds = [] # array of seeds - will only contain seeds that have MT events + returnedEvents = [] # array of MT events for each seed in returnedSeeds + returnedTimes = [] # array of times for each event in returnedEvents (for each seed in returnedSeeds) + + lastSeed = -1 # initialize most recently processed seed + for seedIndex, thisSeed in enumerate(mtSeeds): # iterate over all RLOF file entries + thisTime = mtTimes[seedIndex] # time for this RLOF file entry + thisEvent = (mtPrimaryStype[seedIndex], mtSecondaryStype[seedIndex], + mtIsRlof1[seedIndex], mtIsRlof2[seedIndex], mtIsCEE[seedIndex], mtIsMrg[seedIndex]) # construct event tuple + + # If this is an entirely new seed: + if thisSeed != lastSeed: # same seed as last seed processed? + returnedSeeds.append(thisSeed) # no - new seed, record it + returnedTimes.append([thisTime]) # initialize the list of event times for this seed + returnedEvents.append([thisEvent]) # initialize the list of events for this seed + lastSeed = thisSeed # update the latest seed + + # Add event, if it is not a duplicate + try: + eventIndex = returnedEvents[-1].index(thisEvent) # find eventIndex of this particular event tuple in the array of events for this seed + if thisTime > returnedTimes[-1][eventIndex]: # ^ if event is not a duplicate, this will throw a ValueError + returnedTimes[-1][eventIndex] = thisTime # if event is duplicate, update time to the later of the duplicates + except ValueError: # event is not a duplicate: + returnedEvents[-1].append(thisEvent) # record new event tuple for this seed + returnedTimes[-1].append(thisTime) # record new event time for this seed + + return returnedSeeds, returnedEvents, returnedTimes # see above for description + + +def getSnEvents(SN): + """ + This function takes in the `BSE_Supernovae` output category from COMPAS, and returns the information + on the Supernova (SN) events that happen for each seed. The events do not have to be in order chronologically, + this function will reorder them as required. + + OUT: + returnedSeeds (list): ordered list of all the unique seeds in the SN file + returnedEvents (list): list of sublists, where each sublist contains all the SN events for a given seed. + SN event tuples take the form : + (stellarTypeProgenitor, stellarTypeRemnant, whichStarIsProgenitor, isBinaryUnbound) + returnedTimes (list): is a list of sublists of times of each of the SN events + """ + snSeeds = SN['SEED'][()] + snTimes = SN['Time'][()] + snProgStype = SN['Stellar_Type_Prev(SN)'][()] + snRemnStype = SN['Stellar_Type(SN)'][()] + snWhichProg = SN['Supernova_State'][()] + snIsUnbound = SN['Unbound'][()] == 1 + + # We want the return arrays sorted by seed, so sort here. + snSeedsInds = np.lexsort((snTimes, snSeeds)) # sort by seeds then times - lexsort sorts by the last column first... + snSeeds = snSeeds[snSeedsInds] + snTimes = snTimes[snSeedsInds] + snProgStype = snProgStype[snSeedsInds] + snRemnStype = snRemnStype[snSeedsInds] + snWhichProg = snWhichProg[snSeedsInds] + snIsUnbound = snIsUnbound[snSeedsInds] + + # Process the SN events + returnedSeeds = [] # array of seeds - will only contain seeds that have SN events + returnedEvents = [] # array of SN events for each seed in returnedSeeds + returnedTimes = [] # array of times for each event in returnedEvents (for each seed in returnedSeeds) + + lastSeed = -1 # initialize most recently processed seed + for seedIndex, thisSeed in enumerate(snSeeds): # iterate over all SN file entries + thisTime = snTimes[seedIndex] # time for this SN file entry + thisEvent = (snProgStype[seedIndex], snRemnStype[seedIndex], + snWhichProg[seedIndex], snIsUnbound[seedIndex]) # construct event tuple + + # If this is an entirely new seed: + if thisSeed != lastSeed: # same seed as last seed processed? + returnedSeeds.append(thisSeed) # no - new seed, record it + returnedTimes.append([thisTime]) # initialize the list of event times for this seed + returnedEvents.append([thisEvent]) # initialize the list of events for this seed + lastSeed = thisSeed # update the latest seed + else: # yes - second SN event for this seed + returnedTimes[-1].append(thisTime) # append time at end of array + returnedEvents[-1].append(thisEvent) # append event at end of array + + return returnedSeeds, returnedEvents, returnedTimes # see above for description + + +def getEventHistory(h5file, exclude_null=False): + """ + Get the event history for all seeds, including both RLOF and SN events, in chronological order. + IN: + h5file (h5.File() type): COMPAS HDF5 output file + exclude_null (bool): whether or not to exclude seeds which undergo no RLOF or SN events + OUT: + returnedSeeds (list): ordered list of all seeds in the output + returnedEvents (list): a list (corresponding to the ordered seeds above) of the + collected SN and MT events from the getMtEvents and getSnEvents functions above, + themselves ordered chronologically + """ + SP = h5file['BSE_System_Parameters'] + MT = h5file['BSE_RLOF'] + SN = h5file['BSE_Supernovae'] + allSeeds = SP['SEED'][()] # get all seeds + mtSeeds, mtEvents, mtTimes = getMtEvents(MT) # get MT events + snSeeds, snEvents, snTimes = getSnEvents(SN) # get SN events + + numMtSeeds = len(mtSeeds) # number of MT events + numSnSeeds = len(snSeeds) # number of SN events + + if numMtSeeds < 1 and numSnSeeds < 1: return [] # no events - return empty history + + returnedSeeds = [] # array of seeds - will only contain seeds that have events (of any type) + returnedEvents = [] # array of events - same size as returnedSeeds (includes event times) + + eventOrdering = ['RL', 'CE', 'SN', 'MG'] # order of preference for simultaneous events + + mtIndex = 0 # index into MT events arrays + snIndex = 0 # index into SN events arrays + + if exclude_null: + seedsToIterate = np.sort(np.unique(np.append(mtSeeds, snSeeds))) # iterate over all the seeds that have either MT or SN events + else: + seedsToIterate = allSeeds + + idxOrdered = np.argsort(seedsToIterate) + returnedSeeds = [None] * np.size(seedsToIterate) # array of seeds - will only contain seeds that have events (of any type) + returnedEvents = [None] * np.size(seedsToIterate) # array of events - same size as returnedSeeds (includes event times) + + for idx in idxOrdered: + seed = seedsToIterate[idx] + seedEvents = [] # initialise the events for the seed being processed + + # Collect any MT events for this seed, add the time of the event and the event type + while mtIndex < numMtSeeds and mtSeeds[mtIndex] == seed: + for eventIndex, event in enumerate(mtEvents[mtIndex]): + _, _, isRL1, isRL2, isCEE, isMrg = event + eventKey = 'MG' if isMrg else 'CE' if isCEE else 'RL' # event type: Mrg, CEE, RLOF 1->2, RLOF 2->1 + seedEvents.append((eventKey, mtTimes[mtIndex][eventIndex], *mtEvents[mtIndex][eventIndex])) + mtIndex += 1 + + # Collect any SN events for this seed, add the time of the event and the event type + while snIndex < numSnSeeds and snSeeds[snIndex] == seed: + for eventIndex, event in enumerate(snEvents[snIndex]): + eventKey = 'SN' + seedEvents.append((eventKey, snTimes[snIndex][eventIndex], *snEvents[snIndex][eventIndex])) + snIndex += 1 + + seedEvents.sort(key=lambda ev:(ev[1], eventOrdering.index(ev[0]))) # sort the events by time and event type (MT before SN if at the same time) + + returnedSeeds[idx] = seed # record the seed in the seeds array being returned + returnedEvents[idx] = seedEvents # record the events for this seed in the events array being returned + + return returnedSeeds, returnedEvents # see above for details + + + +########################################### +# ## Produce strings of the event histories + +stellarTypeDict = { + 0: 'MS', + 1: 'MS', + 2: 'HG', + 3: 'GB', + 4: 'GB', + 5: 'GB', + 6: 'GB', + 7: 'HE', + 8: 'HE', + 9: 'HE', + 10: 'WD', + 11: 'WD', + 12: 'WD', + 13: 'NS', + 14: 'BH', + 15: 'MR', + 16: 'MS', +} + +def buildEventString(events, useIntStypes=False): + """ + Function to produce a string representing the event history of a single binary for quick readability. + NOTE: unvectorized, do the vectorization later for a speed up + IN: + events (list of tuples): events output from getEventHistory() + OUT: + eventString (string): string representing the event history of the binary + + MT strings look like: + P>S, P, < is RLOF (1->2 or 1<-2) or otherwise = for CEE or & for Merger + + SN strings look like: + P*SR for star1 the SN progenitor,or + R*SP for star2 the SN progenitor, + where P is progenitor type, R is remnant type, + S is state (I for intact, U for unbound) + + Event strings for the same seed are separated by the undesrcore character ('_') + """ + def _remap_stype(int_stype): + if useIntStypes: + return str(int_stype) + else: + return stellarTypeDict[int_stype] + + # Empty event + if len(events) == 0: + return 'NA' + + eventStr = '' + for event in events: + if event[0] == 'SN': # SN event + _, time, stypeP, stypeC, whichSN, isUnbound = event + if whichSN == 1: # Progenitor or Remnant depending upon which star is the SN + charL = _remap_stype(stypeP) + charR = _remap_stype(stypeC) + else: + charL = _remap_stype(stypeC) + charR = _remap_stype(stypeP) + charM = '*u' if isUnbound else '*i' # unbound or intact + else: # Any of the MT events + _, time, stype1, stype2, isRL1, isRL2, isCEE, isMrg = event + charL = _remap_stype(stype1) # primary stellar type + charR = _remap_stype(stype2) # secondary stellar type + charM = '&' if isMrg \ + else '=' if isCEE \ + else '<' if isRL2 \ + else '>' # event type: CEE, RLOF 2->1, RLOF 1->2 + eventStr += "{}{}{}_".format(charL, charM, charR) # event string for this star, _ is event separator + + return np.array(eventStr[:-1], dtype=np.str_) # return event string for this star (pop the last underscore first) + +def getEventStrings(h5file=None, allEvents=None, useIntStypes=False): + """ + Function to calculate the event history strings for either the entire Compas output, or some list of events + IN: One of + h5file (h5.File() type): COMPAS HDF5 output file + allEvents (list of tuples) + OUT: + eventStrings (list): list of strings of the event history of each seed + """ + + # If output is + if (h5file == None) & (allEvents == None): + return + elif (allEvents == None): + _, allEvents = getEventHistory(h5file) + + eventStrings = np.zeros(len(allEvents), dtype=StringDType()) + for ii, eventsForGivenSeed in enumerate(allEvents): + eventString = buildEventString(eventsForGivenSeed, useIntStypes=useIntStypes) + eventStrings[ii] = eventString # append event string for this star (pop the last underscore first) + + return eventStrings + +"" + + +"" + From b401059f3f64db65a701890e2b0b984b5aeb011f Mon Sep 17 00:00:00 2001 From: Reinhold Willcox Date: Fri, 26 Jun 2026 17:52:09 +0200 Subject: [PATCH 2/4] big cleanup of post processing utils --- compas_python_utils/compasUtils.py | 355 -------------------- compas_python_utils/debugging_utils.py | 440 +++++++++++++++++++++++++ 2 files changed, 440 insertions(+), 355 deletions(-) delete mode 100644 compas_python_utils/compasUtils.py create mode 100644 compas_python_utils/debugging_utils.py diff --git a/compas_python_utils/compasUtils.py b/compas_python_utils/compasUtils.py deleted file mode 100644 index 2cc126c68..000000000 --- a/compas_python_utils/compasUtils.py +++ /dev/null @@ -1,355 +0,0 @@ -import h5py as h5 -import numpy as np -import pandas as pd -from numpy.dtypes import StringDType - - -######################################################################## -# ## Function to print the data from a given COMPAS HDF5 group in a readable pandas template - -def printCompasDetails(data, *seeds, mask=()): - """ - Function to print the full Compas output for given seeds, optionally with an additional mask - """ - list_of_keys = list(data.keys()) - - # Check if seed parameter exists - if not, just print without (e.g RunDetails) - if ('SEED' in list_of_keys) | ('SEED>MT' in list_of_keys): # Most output files - #SEED>MT is a relic from older versions, but we leave this in for backwards compatibility - - # Set the seed name parameter, mask on seeds as needed, and set the index - seedVariableName='SEED' if ('SEED' in list_of_keys) else 'SEED>MT' - list_of_keys.remove(seedVariableName) # this is the index above, don't want to include it - - allSeeds = data[seedVariableName][()] - seedsMask = np.isin(allSeeds, seeds) - if len(seeds) == 0: # if any seed is included, do not reset the mask - seedsMask = np.ones_like(allSeeds).astype(bool) - if mask == (): - mask = np.ones_like(allSeeds).astype(bool) - mask &= seedsMask - - df = pd.DataFrame.from_dict({param: data[param][()][mask] for param in list(data.keys())}).set_index(seedVariableName).T - - else: # No seed parameter, so do custom print for Run Details - - # Get just the keys without the -Derivation suffix - those will be a second column - keys_not_derivations = [] - for key in list_of_keys: - if '-Derivation' not in key: - keys_not_derivations.append(key) - - # Some parameter values are string types, formatted as np.bytes_, need to convert back - def convert_strings(param_array): - if isinstance(param_array[0], np.bytes_): - return param_array.astype(str) - else: - return param_array - - df_keys = pd.DataFrame.from_dict({param: convert_strings(data[param][()]) for param in keys_not_derivations }).T - nCols = df_keys.shape[1] # Required only because if we combine RDs, we get many columns (should fix later) - df_keys.columns = ['Parameter']*nCols - df_drvs = pd.DataFrame.from_dict({param: convert_strings(data[param+'-Derivation'][()]) for param in keys_not_derivations }).T - df_drvs.columns = ['Derivation']*nCols - df = pd.concat([df_keys, df_drvs], axis=1) - - # Add units as first col - units_dict = {key:data[key].attrs['units'].astype(str) for key in list_of_keys} - df.insert(loc=0, column='(units)', value=pd.Series(units_dict)) - return df - - - -######################################################################## -# ## Get event histories of MT data, SN data, and combined MT, SN data - -def getMtEvents(MT): - """ - This function takes in the `BSE_RLOF` output category from COMPAS, and returns the information - on the Mass Transfer (MT) events that happen for each seed. The events do not have to be in order, - either chronologically or by seed, this function will reorder them as required. - - OUT: - returnedSeeds (list): ordered list of the unique seeds in the MT file - returnedEvents (list): list of sublists, where each sublist contains all the MT events for a given seed. - MT event tuples take the form : - (stellarTypePrimary, stellarTypeSecondary, isRlof1, isRlof2, isCEE, isMrg) - returnTimes (list): is a list of sublists of times of each of the MT events - """ - mtSeeds = MT['SEED'][()] - mtTimes = MT['TimeMT'][()] == 1 - mtIsRlof2 = MT['RLOF(2)>MT'][()] == 1 - mtIsCEE = MT['CEE>MT'][()] == 1 - mtIsMrg = MT['Merger'][()] == 1 - - # We want the return arrays sorted by seed, so sort here. - mtSeedsInds = np.lexsort((mtTimes, mtSeeds)) # sort by seeds then times - lexsort sorts by the last column first... - mtSeeds = mtSeeds[mtSeedsInds] - mtTimes = mtTimes[mtSeedsInds] - mtPrimaryStype = mtPrimaryStype[mtSeedsInds] - mtSecondaryStype = mtSecondaryStype[mtSeedsInds] - mtIsRlof1 = mtIsRlof1[mtSeedsInds] - mtIsRlof2 = mtIsRlof2[mtSeedsInds] - mtIsCEE = mtIsCEE[mtSeedsInds] - mtIsMrg = mtIsMrg[mtSeedsInds] - - # Process the MT events - returnedSeeds = [] # array of seeds - will only contain seeds that have MT events - returnedEvents = [] # array of MT events for each seed in returnedSeeds - returnedTimes = [] # array of times for each event in returnedEvents (for each seed in returnedSeeds) - - lastSeed = -1 # initialize most recently processed seed - for seedIndex, thisSeed in enumerate(mtSeeds): # iterate over all RLOF file entries - thisTime = mtTimes[seedIndex] # time for this RLOF file entry - thisEvent = (mtPrimaryStype[seedIndex], mtSecondaryStype[seedIndex], - mtIsRlof1[seedIndex], mtIsRlof2[seedIndex], mtIsCEE[seedIndex], mtIsMrg[seedIndex]) # construct event tuple - - # If this is an entirely new seed: - if thisSeed != lastSeed: # same seed as last seed processed? - returnedSeeds.append(thisSeed) # no - new seed, record it - returnedTimes.append([thisTime]) # initialize the list of event times for this seed - returnedEvents.append([thisEvent]) # initialize the list of events for this seed - lastSeed = thisSeed # update the latest seed - - # Add event, if it is not a duplicate - try: - eventIndex = returnedEvents[-1].index(thisEvent) # find eventIndex of this particular event tuple in the array of events for this seed - if thisTime > returnedTimes[-1][eventIndex]: # ^ if event is not a duplicate, this will throw a ValueError - returnedTimes[-1][eventIndex] = thisTime # if event is duplicate, update time to the later of the duplicates - except ValueError: # event is not a duplicate: - returnedEvents[-1].append(thisEvent) # record new event tuple for this seed - returnedTimes[-1].append(thisTime) # record new event time for this seed - - return returnedSeeds, returnedEvents, returnedTimes # see above for description - - -def getSnEvents(SN): - """ - This function takes in the `BSE_Supernovae` output category from COMPAS, and returns the information - on the Supernova (SN) events that happen for each seed. The events do not have to be in order chronologically, - this function will reorder them as required. - - OUT: - returnedSeeds (list): ordered list of all the unique seeds in the SN file - returnedEvents (list): list of sublists, where each sublist contains all the SN events for a given seed. - SN event tuples take the form : - (stellarTypeProgenitor, stellarTypeRemnant, whichStarIsProgenitor, isBinaryUnbound) - returnedTimes (list): is a list of sublists of times of each of the SN events - """ - snSeeds = SN['SEED'][()] - snTimes = SN['Time'][()] - snProgStype = SN['Stellar_Type_Prev(SN)'][()] - snRemnStype = SN['Stellar_Type(SN)'][()] - snWhichProg = SN['Supernova_State'][()] - snIsUnbound = SN['Unbound'][()] == 1 - - # We want the return arrays sorted by seed, so sort here. - snSeedsInds = np.lexsort((snTimes, snSeeds)) # sort by seeds then times - lexsort sorts by the last column first... - snSeeds = snSeeds[snSeedsInds] - snTimes = snTimes[snSeedsInds] - snProgStype = snProgStype[snSeedsInds] - snRemnStype = snRemnStype[snSeedsInds] - snWhichProg = snWhichProg[snSeedsInds] - snIsUnbound = snIsUnbound[snSeedsInds] - - # Process the SN events - returnedSeeds = [] # array of seeds - will only contain seeds that have SN events - returnedEvents = [] # array of SN events for each seed in returnedSeeds - returnedTimes = [] # array of times for each event in returnedEvents (for each seed in returnedSeeds) - - lastSeed = -1 # initialize most recently processed seed - for seedIndex, thisSeed in enumerate(snSeeds): # iterate over all SN file entries - thisTime = snTimes[seedIndex] # time for this SN file entry - thisEvent = (snProgStype[seedIndex], snRemnStype[seedIndex], - snWhichProg[seedIndex], snIsUnbound[seedIndex]) # construct event tuple - - # If this is an entirely new seed: - if thisSeed != lastSeed: # same seed as last seed processed? - returnedSeeds.append(thisSeed) # no - new seed, record it - returnedTimes.append([thisTime]) # initialize the list of event times for this seed - returnedEvents.append([thisEvent]) # initialize the list of events for this seed - lastSeed = thisSeed # update the latest seed - else: # yes - second SN event for this seed - returnedTimes[-1].append(thisTime) # append time at end of array - returnedEvents[-1].append(thisEvent) # append event at end of array - - return returnedSeeds, returnedEvents, returnedTimes # see above for description - - -def getEventHistory(h5file, exclude_null=False): - """ - Get the event history for all seeds, including both RLOF and SN events, in chronological order. - IN: - h5file (h5.File() type): COMPAS HDF5 output file - exclude_null (bool): whether or not to exclude seeds which undergo no RLOF or SN events - OUT: - returnedSeeds (list): ordered list of all seeds in the output - returnedEvents (list): a list (corresponding to the ordered seeds above) of the - collected SN and MT events from the getMtEvents and getSnEvents functions above, - themselves ordered chronologically - """ - SP = h5file['BSE_System_Parameters'] - MT = h5file['BSE_RLOF'] - SN = h5file['BSE_Supernovae'] - allSeeds = SP['SEED'][()] # get all seeds - mtSeeds, mtEvents, mtTimes = getMtEvents(MT) # get MT events - snSeeds, snEvents, snTimes = getSnEvents(SN) # get SN events - - numMtSeeds = len(mtSeeds) # number of MT events - numSnSeeds = len(snSeeds) # number of SN events - - if numMtSeeds < 1 and numSnSeeds < 1: return [] # no events - return empty history - - returnedSeeds = [] # array of seeds - will only contain seeds that have events (of any type) - returnedEvents = [] # array of events - same size as returnedSeeds (includes event times) - - eventOrdering = ['RL', 'CE', 'SN', 'MG'] # order of preference for simultaneous events - - mtIndex = 0 # index into MT events arrays - snIndex = 0 # index into SN events arrays - - if exclude_null: - seedsToIterate = np.sort(np.unique(np.append(mtSeeds, snSeeds))) # iterate over all the seeds that have either MT or SN events - else: - seedsToIterate = allSeeds - - idxOrdered = np.argsort(seedsToIterate) - returnedSeeds = [None] * np.size(seedsToIterate) # array of seeds - will only contain seeds that have events (of any type) - returnedEvents = [None] * np.size(seedsToIterate) # array of events - same size as returnedSeeds (includes event times) - - for idx in idxOrdered: - seed = seedsToIterate[idx] - seedEvents = [] # initialise the events for the seed being processed - - # Collect any MT events for this seed, add the time of the event and the event type - while mtIndex < numMtSeeds and mtSeeds[mtIndex] == seed: - for eventIndex, event in enumerate(mtEvents[mtIndex]): - _, _, isRL1, isRL2, isCEE, isMrg = event - eventKey = 'MG' if isMrg else 'CE' if isCEE else 'RL' # event type: Mrg, CEE, RLOF 1->2, RLOF 2->1 - seedEvents.append((eventKey, mtTimes[mtIndex][eventIndex], *mtEvents[mtIndex][eventIndex])) - mtIndex += 1 - - # Collect any SN events for this seed, add the time of the event and the event type - while snIndex < numSnSeeds and snSeeds[snIndex] == seed: - for eventIndex, event in enumerate(snEvents[snIndex]): - eventKey = 'SN' - seedEvents.append((eventKey, snTimes[snIndex][eventIndex], *snEvents[snIndex][eventIndex])) - snIndex += 1 - - seedEvents.sort(key=lambda ev:(ev[1], eventOrdering.index(ev[0]))) # sort the events by time and event type (MT before SN if at the same time) - - returnedSeeds[idx] = seed # record the seed in the seeds array being returned - returnedEvents[idx] = seedEvents # record the events for this seed in the events array being returned - - return returnedSeeds, returnedEvents # see above for details - - - -########################################### -# ## Produce strings of the event histories - -stellarTypeDict = { - 0: 'MS', - 1: 'MS', - 2: 'HG', - 3: 'GB', - 4: 'GB', - 5: 'GB', - 6: 'GB', - 7: 'HE', - 8: 'HE', - 9: 'HE', - 10: 'WD', - 11: 'WD', - 12: 'WD', - 13: 'NS', - 14: 'BH', - 15: 'MR', - 16: 'MS', -} - -def buildEventString(events, useIntStypes=False): - """ - Function to produce a string representing the event history of a single binary for quick readability. - NOTE: unvectorized, do the vectorization later for a speed up - IN: - events (list of tuples): events output from getEventHistory() - OUT: - eventString (string): string representing the event history of the binary - - MT strings look like: - P>S, P, < is RLOF (1->2 or 1<-2) or otherwise = for CEE or & for Merger - - SN strings look like: - P*SR for star1 the SN progenitor,or - R*SP for star2 the SN progenitor, - where P is progenitor type, R is remnant type, - S is state (I for intact, U for unbound) - - Event strings for the same seed are separated by the undesrcore character ('_') - """ - def _remap_stype(int_stype): - if useIntStypes: - return str(int_stype) - else: - return stellarTypeDict[int_stype] - - # Empty event - if len(events) == 0: - return 'NA' - - eventStr = '' - for event in events: - if event[0] == 'SN': # SN event - _, time, stypeP, stypeC, whichSN, isUnbound = event - if whichSN == 1: # Progenitor or Remnant depending upon which star is the SN - charL = _remap_stype(stypeP) - charR = _remap_stype(stypeC) - else: - charL = _remap_stype(stypeC) - charR = _remap_stype(stypeP) - charM = '*u' if isUnbound else '*i' # unbound or intact - else: # Any of the MT events - _, time, stype1, stype2, isRL1, isRL2, isCEE, isMrg = event - charL = _remap_stype(stype1) # primary stellar type - charR = _remap_stype(stype2) # secondary stellar type - charM = '&' if isMrg \ - else '=' if isCEE \ - else '<' if isRL2 \ - else '>' # event type: CEE, RLOF 2->1, RLOF 1->2 - eventStr += "{}{}{}_".format(charL, charM, charR) # event string for this star, _ is event separator - - return np.array(eventStr[:-1], dtype=np.str_) # return event string for this star (pop the last underscore first) - -def getEventStrings(h5file=None, allEvents=None, useIntStypes=False): - """ - Function to calculate the event history strings for either the entire Compas output, or some list of events - IN: One of - h5file (h5.File() type): COMPAS HDF5 output file - allEvents (list of tuples) - OUT: - eventStrings (list): list of strings of the event history of each seed - """ - - # If output is - if (h5file == None) & (allEvents == None): - return - elif (allEvents == None): - _, allEvents = getEventHistory(h5file) - - eventStrings = np.zeros(len(allEvents), dtype=StringDType()) - for ii, eventsForGivenSeed in enumerate(allEvents): - eventString = buildEventString(eventsForGivenSeed, useIntStypes=useIntStypes) - eventStrings[ii] = eventString # append event string for this star (pop the last underscore first) - - return eventStrings - -"" - - -"" - diff --git a/compas_python_utils/debugging_utils.py b/compas_python_utils/debugging_utils.py new file mode 100644 index 000000000..76498c96c --- /dev/null +++ b/compas_python_utils/debugging_utils.py @@ -0,0 +1,440 @@ +import h5py as h5 +import numpy as np +import pandas as pd +from numpy.dtypes import StringDType +from typing import NewType + +"" +# New Types +MaskNdarray = NewType('MaskNdarray', np.ndarray[bool]) +H5File = NewType('H5File', h5._hl.files.File) +H5Group = NewType('H5Group', h5._hl.group.Group) +MtEventTuple = NewType('MtEventTuple', tuple[ np.ndarray[int], np.ndarray[int], np.ndarray[bool], np.ndarray[bool], np.ndarray[bool], np.ndarray[bool]]) +SnEventTuple = NewType('SnEventTuple', tuple[ np.ndarray[int], np.ndarray[int], np.ndarray[bool], np.ndarray[bool]]) +EventHistoryString = NewType('EventHistoryString', np.array(np.str_)) + + +######################################################################## +# ## Function to print the data from a given COMPAS HDF5 group in a readable pandas template + +def print_compas_details_dataframe(data: H5Group, + *seeds: int, + mask: MaskNdarray=()) -> pd.DataFrame: + """Return a pd.DataFrame for the COMPAS output contained in an H5Group. + + Parameters + ---------- + data : H5Group + an H5Group containing COMPAS data + seeds : int or array_like of ints, optional + seeds of specific systems to include (default is to include all seeds) + mask : array_like of booleans, optional + boolean mask to filter out specific systems (default is no mask) + Must be same length as data['SEED'] + + Returns + ------- + compas_details : pd.DataFrame + The values of each parameter in the H5Group, excluding those removed by `seeds` or `mask`. + + Notes + ----- + Designed for easier visualization in a jupyter notebook, for inspection / debugging purposes. + If both `seeds` and `mask` are supplied, resulting systems must pass both filters. + + Examples + -------- + >>> mt_data = h5.File('COMPAS_Output.h5')['BSE_RLOF']) + >>> print_compas_details_dataframe(mt_data) + [output of all BSE_RLOF events] + + >>> mt_seeds = mt_data['SEED'][()] + >>> print_compas_details_dataframe(mt_data, mt_seeds[:50]) + [output of all BSE_RLOF events occuring in the first 50 seeds] + + >>> cee_events = mt_data['CEE>MT'][()] == 1 # needed to convert to boolean mask + >>> print_compas_details_dataframe(mt_data, mt_seeds[:50], mask=cee_events) + [output of all Common Envelope events occuring in the first 50 seeds] + """ + list_of_keys = list(data.keys()) + + # Check if SEED parameter exists in data + if ('SEED' in list_of_keys) | ('SEED>MT' in list_of_keys): + seed_variable_name='SEED' if ('SEED' in list_of_keys) else 'SEED>MT' #SEED>MT is a relic from older versions, but we leave this in for backwards compatibility + + # If `seeds` or `mask` arguments supplied, create the relevant mask + all_seeds = data[seed_variable_name][()] + seeds_mask = np.isin(all_seeds, seeds) + if len(seeds) == 0: # If `seeds` argument is not supplied, set the default mask + seeds_mask = np.ones_like(all_seeds).astype(bool) + if mask == (): + mask = np.ones_like(all_seeds).astype(bool) + mask &= seeds_mask + df = pd.DataFrame.from_dict({param: data[param][()][mask] for param in list_of_keys}).set_index(seed_variable_name).T + + else: # No seed parameter, currently only applies to the RunDetails H5Group + keys_not_derivations = [key for key in list_of_keys if '-Derivation' not in key] # Get just the keys without the -Derivation suffix + + # Some parameter values are string types, formatted as np.bytes_, need to convert back + def convert_strings(param_array): + if isinstance(param_array[0], np.bytes_): + return param_array.astype(str) + else: + return param_array + + df_keys = pd.DataFrame.from_dict({param: convert_strings(data[param][()]) for param in keys_not_derivations }).T + nCols = df_keys.shape[1] # Required only because if we have combined RunDetails output from a previous h5copy, we get many columns (should fix later) + df_keys.columns = ['Parameter']*nCols + df_drvs = pd.DataFrame.from_dict({param: convert_strings(data[param+'-Derivation'][()]) for param in keys_not_derivations }).T + df_drvs.columns = ['Derivation']*nCols + df = pd.concat([df_keys, df_drvs], axis=1) + + # Add units as first col + units_dict = {key:data[key].attrs['units'].astype(str) for key in list_of_keys} + df.insert(loc=0, column='(units)', value=pd.Series(units_dict)) + return df + + + +######################################################################## +# ## Get event histories of MT data, SN data, and combined MT, SN data + +def get_mt_data_tuple(mt_data: H5Group) -> tuple[list, list, list]: + """Calculates the EventTuple for the BSE_RLOF output H5Group. + + Parameters + ---------- + mt_data : H5Group + the COMPAS output H5Group corresponding to BSE_RLOF + + Returns + ------- + returned_seeds : list + an ordered list of the unique seeds in the mt_data file, + returned_events : list + a list of sublists, one sublist per seed, where each sublist + contains all the MtEventTuples for the given seed + returned_times : list + a list of sublists of times of each of the mt_data events. + + Notes + ----- + MtEventTuples take the form: (stellar_type_primary, stellar_type_secondary, is_rlof1, is_rlof2, is_cee, is_mrg). + The events in the input do not have to be ordered chronologically, this function orders them, in the event + that the input was coallated from a previous h5copy command. The output is not meant to be used generally, + it is just a feeder for get_event_history below. + """ + mt_seeds = mt_data['SEED'][()] + + mt_times = mt_data['TimeMT'][()] == 1 + mt_is_rlof2 = mt_data['RLOF(2)>MT'][()] == 1 + mt_is_cee = mt_data['CEE>MT'][()] == 1 + mt_is_mrg = mt_data['Merger'][()] == 1 + + # We want the return arrays sorted by seed, so sort here. + mt_seeds_idx = np.lexsort((mt_times, mt_seeds)) # sort by seeds then times - lexsort sorts by the last column first... + mt_seeds = mt_seeds[mt_seeds_idx] + mt_times = mt_times[mt_seeds_idx] + mt_primary_stype = mt_primary_stype[mt_seeds_idx] + mt_secondary_stype = mt_secondary_stype[mt_seeds_idx] + mt_is_rlof1 = mt_is_rlof1[mt_seeds_idx] + mt_is_rlof2 = mt_is_rlof2[mt_seeds_idx] + mt_is_cee = mt_is_cee[mt_seeds_idx] + mt_is_mrg = mt_is_mrg[mt_seeds_idx] + + # Process the mt_data events + returned_seeds = [] # array of seeds - will only contain seeds that have mt_data events + returned_events = [] # array of mt_data events for each seed in returned_seeds + returned_times = [] # array of times for each event in returned_events (for each seed in returned_seeds) + + last_seed = -1 # initialize most recently processed seed + for seed_index, this_seed in enumerate(mt_seeds): # iterate over all RLOF file entries + this_time = mt_times[seed_index] # time for this RLOF file entry + this_event = (mt_primary_stype[seed_index], mt_secondary_stype[seed_index], + mt_is_rlof1[seed_index], mt_is_rlof2[seed_index], mt_is_cee[seed_index], mt_is_mrg[seed_index]) # construct event tuple + + # If this is an entirely new seed: + if this_seed != last_seed: # same seed as last seed processed? + returned_seeds.append(this_seed) # no - new seed, record it + returned_times.append([this_time]) # initialize the list of event times for this seed + returned_events.append([this_event]) # initialize the list of events for this seed + last_seed = this_seed # update the latest seed + + # Add event, if it is not a duplicate + try: + event_index = returned_events[-1].index(this_event) # find event_index of this particular event tuple in the array of events for this seed + if this_time > returned_times[-1][event_index]: # ^ if event is not a duplicate, this will throw a ValueError + returned_times[-1][event_index] = this_time # if event is duplicate, update time to the later of the duplicates + except ValueError: # event is not a duplicate: + returned_events[-1].append(this_event) # record new event tuple for this seed + returned_times[-1].append(this_time) # record new event time for this seed + + return returned_seeds, returned_events, returned_times # see above for description + + +def get_sn_data_tuple(sn_data: H5Group) -> tuple[list, list, list]: + """Calculates the EventTuple for the BSE_Supernovae output H5Group. + + Parameters + ---------- + sn_data : H5Group + the COMPAS output H5Group corresponding to BSE_Supernovae + + Returns + ------- + returned_seeds : list + an ordered list of the unique seeds in the sn_data file, + returned_events : list + a list of sublists, one sublist per seed, where each sublist + contains all the SnEventTuples for the given seed + returned_times : list + a list of sublists of times of each of the sn_data events. + + Notes + ----- + SnEventTuples take the form: (stellar_type_progenitor, stellar_type_remnant, which_starIsProgenitor, is_binary_unbound). + The events in the input do not have to be ordered chronologically, this function orders them, in the event + that the input was coallated from a previous h5copy command. The output is not meant to be used generally, + it is just a feeder for get_event_history below. + """ + sn_seeds = sn_data['SEED'][()] + sn_times = sn_data['Time'][()] + sn_prog_stype = sn_data['Stellar_Type_Prev(SN)'][()] + sn_remn_stype = sn_data['Stellar_Type(SN)'][()] + sn_which_prog = sn_data['Supernova_State'][()] + sn_is_unbound = sn_data['Unbound'][()] == 1 + + # We want the return arrays sorted by seed, so sort here. + sn_seeds_idx = np.lexsort((sn_times, sn_seeds)) # sort by seeds then times - lexsort sorts by the last column first... + sn_seeds = sn_seeds[sn_seeds_idx] + sn_times = sn_times[sn_seeds_idx] + sn_prog_stype = sn_prog_stype[sn_seeds_idx] + sn_remn_stype = sn_remn_stype[sn_seeds_idx] + sn_which_prog = sn_which_prog[sn_seeds_idx] + sn_is_unbound = sn_is_unbound[sn_seeds_idx] + + # Process the sn_data events + returned_seeds = [] # array of seeds - will only contain seeds that have sn_data events + returned_events = [] # array of sn_data events for each seed in returned_seeds + returned_times = [] # array of times for each event in returned_events (for each seed in returned_seeds) + + last_seed = -1 # initialize most recently processed seed + for seed_index, this_seed in enumerate(sn_seeds): # iterate over all sn_data file entries + this_time = sn_times[seed_index] # time for this sn_data file entry + this_event = (sn_prog_stype[seed_index], sn_remn_stype[seed_index], + sn_which_prog[seed_index], sn_is_unbound[seed_index]) # construct event tuple + + # If this is an entirely new seed: + if this_seed != last_seed: # same seed as last seed processed? + returned_seeds.append(this_seed) # no - new seed, record it + returned_times.append([this_time]) # initialize the list of event times for this seed + returned_events.append([this_event]) # initialize the list of events for this seed + last_seed = this_seed # update the latest seed + else: # yes - second sn_data event for this seed + returned_times[-1].append(this_time) # append time at end of array + returned_events[-1].append(this_event) # append event at end of array + + return returned_seeds, returned_events, returned_times # see above for description + + +def get_event_history(data: H5File, include_null: bool=True) -> tuple[list, list]: + """Get the event history for all seeds, including both MT and SN events, in chronological order. + + Parameters + ---------- + data : H5File + the COMPAS output H5file + include_null : bool + whether to include seeds which undergo no MT or SN events (default is True) + + Returns + ------- + returned_seeds : list + an ordered list of all the unique seeds in the output file + returned_events : list + a list where each element is a chronological set of events corresponding to the associated seed + """ + sp_data = data['BSE_System_Parameters'] + mt_data = data['BSE_RLOF'] + sn_data = data['BSE_Supernovae'] + all_seeds = sp_data['SEED'][()] # get all seeds + mt_seeds, mt_events, mt_times = get_mt_data_tuple(mt_data) # get mt data tuple + sn_seeds, sn_events, sn_times = get_sn_data_tuple(sn_data) # get sn data tuple + + num_mt_seeds = len(mt_seeds) # number of mt_data events + num_sn_seeds = len(sn_seeds) # number of sn_data events + + if num_mt_seeds < 1 and num_sn_seeds < 1: return [] # no events - return empty history + + returned_seeds = [] # array of seeds - will only contain seeds that have events (of any type) + returned_events = [] # array of events - same size as returned_seeds (includes event times) + + event_ordering = ['RL', 'CE', 'SN', 'MG'] # order of preference for simultaneous events + + mt_index = 0 # index into mt_data events arrays + sn_index = 0 # index into sn_data events arrays + + if include_null: + seeds_to_iterate = all_seeds + else: + seeds_to_iterate = np.sort(np.unique(np.append(mt_seeds, sn_seeds))) # iterate over all the seeds that have either mt_data or sn_data events + + idx_ordered = np.argsort(seeds_to_iterate) + returned_seeds = [None] * np.size(seeds_to_iterate) # array of seeds - will only contain seeds that have events (of any type) + returned_events = [None] * np.size(seeds_to_iterate) # array of events - same size as returned_seeds (includes event times) + + for idx in idx_ordered: + seed = seeds_to_iterate[idx] + seed_events = [] # initialise the events for the seed being processed + + # Collect any mt_data events for this seed, add the time of the event and the event type + while mt_index < num_mt_seeds and mt_seeds[mt_index] == seed: + for event_index, event in enumerate(mt_events[mt_index]): + _, _, is_rl1, is_rl2, is_cee, is_mrg = event + event_key = 'MG' if is_mrg else 'CE' if is_cee else 'RL' # event type: Merger, CEE, RLOF 1->2, RLOF 2->1 + seed_events.append((event_key, mt_times[mt_index][event_index], *mt_events[mt_index][event_index])) + mt_index += 1 + + # Collect any sn_data events for this seed, add the time of the event and the event type + while sn_index < num_sn_seeds and sn_seeds[sn_index] == seed: + for event_index, event in enumerate(sn_events[sn_index]): + event_key = 'SN' + seed_events.append((event_key, sn_times[sn_index][event_index], *sn_events[sn_index][event_index])) + sn_index += 1 + + seed_events.sort(key=lambda ev:(ev[1], event_ordering.index(ev[0]))) # sort the events by time and event type (mt_data before sn_data if at the same time) + returned_seeds[idx] = seed # record the seed in the seeds array being returned + returned_events[idx] = seed_events # record the events for this seed in the events array being returned + return returned_seeds, returned_events + + +########################################### +# ## Produce strings of the event histories + +stellar_type_dict = { + 0: 'MS', + 1: 'MS', + 2: 'HG', + 3: 'GB', + 4: 'GB', + 5: 'GB', + 6: 'GB', + 7: 'HE', + 8: 'HE', + 9: 'HE', + 10: 'WD', + 11: 'WD', + 12: 'WD', + 13: 'NS', + 14: 'BH', + 15: 'MR', + 16: 'MS', +} + + +def build_event_string(events: list, use_int_stypes: bool=False) -> EventHistoryString: + """Produce a string representation of the event history of a single binary. + + Parameters + ---------- + events : list + list of mixed MtEventTuple and SnEventTuple's + use_int_stypes : bool, optional + whether to report stellar types as integers + (default is False, report them as 2-char stellar types) + + Returns + ------- + event_str : EventHistoryString + condensed string representation of the event history of a binary, in array form + + Notes + ----- + MT strings look like: + P>S, P', '<' is RLOF (star1 -> star2 or star1 <- star2) + or otherwise '=' for CEE or '&' for Merger. + SN strings look like: + P*SR if star1 is the SN progenitor, or + R*SP if star2 is the SN progenitor, + where P is progenitor type, R is remnant type, + S is state ('i' for intact, 'u' for unbound) + Event strings for the same seed are separated by an undesrcore '_' + + TODO + ---- + Currently unvectorized, do the vectorization later for a speed up + """ + def _remap_stype(int_stype): + if use_int_stypes: + return str(int_stype) + else: + return stellar_type_dict[int_stype] + + # Empty event + if len(events) == 0: + return 'NA' + + event_str = '' + for event in events: + if event[0] == 'SN': # SN event + _, time, stype_p, stype_c, which_sn, is_unbound = event + if which_sn == 1: # Progenitor or Remnant depending upon which star is the SN + char_l = _remap_stype(stype_p) + char_r = _remap_stype(stype_c) + else: + char_l = _remap_stype(stype_c) + char_r = _remap_stype(stype_p) + char_m = '*u' if is_unbound else '*i' # unbound or intact + else: # Any of the MT events + _, time, stype_1, stype_2, is_rl1, is_rl2, is_cee, is_mrg = event + char_l = _remap_stype(stype_1) # primary stellar type + char_r = _remap_stype(stype_2) # secondary stellar type + char_m = '&' if is_mrg \ + else '=' if is_cee \ + else '<' if is_rl2 \ + else '>' # event type: CEE, RLOF 2->1, RLOF 1->2 + event_str += "{}{}{}_".format(char_l, char_m, char_r) # event string for this star, _ is event separator + event_str = np.array(event_str[:-1], dtype=np.str_) # return event string for this star (pop the last underscore first) + return event_str + + +def get_event_strings(data: H5File=None, all_events: list=None, use_int_stypes: bool=False): + """Calculate the event history strings for a COMPAS population. + + Parameters + ---------- + data : H5File, optional + the COMPAS output H5file + all_events : list, optional + a list where each element is a chronological set of events corresponding to the associated seed, + one of the outputs of get_event_history(data) + use_int_stypes : bool, optional + + Returns + ------- + event_strings : array of EventHistoryString's + strings of the event history of each system + + Notes + ----- + Exactly one of data or all_events must be included. If neither, nothign is returned. + """ + + # If output is + if (data == None) & (all_events == None): + return + elif (all_events == None): + _, all_events = get_event_history(data) + + event_strings = np.zeros(len(all_events), dtype=StringDType()) + for ii, events_for_given_seed in enumerate(all_events): + event_string = build_event_string(events_for_given_seed, use_int_stypes=use_int_stypes) + event_strings[ii] = event_string # append event string for this star (pop the last underscore first) + return event_strings + +"" + From 11caeb8785a4091af80003e6290caa1607a4ad53 Mon Sep 17 00:00:00 2001 From: Reinhold Willcox Date: Mon, 29 Jun 2026 12:05:36 +0200 Subject: [PATCH 3/4] addressed nicolas' concerns --- compas_python_utils/debugging_utils.py | 436 +++++++++++++++---------- 1 file changed, 272 insertions(+), 164 deletions(-) diff --git a/compas_python_utils/debugging_utils.py b/compas_python_utils/debugging_utils.py index 76498c96c..15a9826a2 100644 --- a/compas_python_utils/debugging_utils.py +++ b/compas_python_utils/debugging_utils.py @@ -9,17 +9,46 @@ MaskNdarray = NewType('MaskNdarray', np.ndarray[bool]) H5File = NewType('H5File', h5._hl.files.File) H5Group = NewType('H5Group', h5._hl.group.Group) -MtEventTuple = NewType('MtEventTuple', tuple[ np.ndarray[int], np.ndarray[int], np.ndarray[bool], np.ndarray[bool], np.ndarray[bool], np.ndarray[bool]]) -SnEventTuple = NewType('SnEventTuple', tuple[ np.ndarray[int], np.ndarray[int], np.ndarray[bool], np.ndarray[bool]]) +MtEventTuple = NewType('MtEventTuple', + tuple[np.ndarray[int], + np.ndarray[int], + np.ndarray[bool], + np.ndarray[bool], + np.ndarray[bool], + np.ndarray[bool]]) +SnEventTuple = NewType('SnEventTuple', + tuple[np.ndarray[int], + np.ndarray[int], + np.ndarray[bool], + np.ndarray[bool]]) EventHistoryString = NewType('EventHistoryString', np.array(np.str_)) ######################################################################## # ## Function to print the data from a given COMPAS HDF5 group in a readable pandas template -def print_compas_details_dataframe(data: H5Group, - *seeds: int, - mask: MaskNdarray=()) -> pd.DataFrame: +def convert_bytes_array_to_strings(param_array): + """Check and convert np.bytes_ array to strings. + + Parameters + ---------- + param_array : np.ndarray[np.bytes_ | str] + a numpy array containing either np.bytes_ or strings + + Returns + ------- + param_array : np.ndarray[str] + the same array, but converted to strings if not already + """ + if np.issubdtype(param_array.dtype, np.bytes_): + return param_array.astype(str) + else: + return param_array + + +def print_compas_details_dataframe(data: H5Group, + *seeds: int, + mask: MaskNdarray = ()) -> pd.DataFrame: """Return a pd.DataFrame for the COMPAS output contained in an H5Group. Parameters @@ -32,70 +61,69 @@ def print_compas_details_dataframe(data: H5Group, boolean mask to filter out specific systems (default is no mask) Must be same length as data['SEED'] - Returns + Returns ------- compas_details : pd.DataFrame The values of each parameter in the H5Group, excluding those removed by `seeds` or `mask`. Notes ----- - Designed for easier visualization in a jupyter notebook, for inspection / debugging purposes. + Designed for easier visualization in a jupyter notebook, for inspection / debugging purposes. If both `seeds` and `mask` are supplied, resulting systems must pass both filters. Examples -------- - >>> mt_data = h5.File('COMPAS_Output.h5')['BSE_RLOF']) + >>> mt_data = h5.File('COMPAS_Output.h5')['BSE_RLOF'] >>> print_compas_details_dataframe(mt_data) [output of all BSE_RLOF events] - + >>> mt_seeds = mt_data['SEED'][()] >>> print_compas_details_dataframe(mt_data, mt_seeds[:50]) [output of all BSE_RLOF events occuring in the first 50 seeds] - + >>> cee_events = mt_data['CEE>MT'][()] == 1 # needed to convert to boolean mask >>> print_compas_details_dataframe(mt_data, mt_seeds[:50], mask=cee_events) [output of all Common Envelope events occuring in the first 50 seeds] """ - list_of_keys = list(data.keys()) - # Check if SEED parameter exists in data - if ('SEED' in list_of_keys) | ('SEED>MT' in list_of_keys): - seed_variable_name='SEED' if ('SEED' in list_of_keys) else 'SEED>MT' #SEED>MT is a relic from older versions, but we leave this in for backwards compatibility + if ('SEED' or 'SEED>MT') in data: + # SEED>MT is a relic from older versions, but we leave this in for + # backwards compatibility + seed_variable_name = 'SEED' if ('SEED' in data) else 'SEED>MT' # If `seeds` or `mask` arguments supplied, create the relevant mask all_seeds = data[seed_variable_name][()] seeds_mask = np.isin(all_seeds, seeds) - if len(seeds) == 0: # If `seeds` argument is not supplied, set the default mask + if len(seeds) == 0: # If `seeds` argument is not supplied, set the default mask seeds_mask = np.ones_like(all_seeds).astype(bool) if mask == (): mask = np.ones_like(all_seeds).astype(bool) mask &= seeds_mask - df = pd.DataFrame.from_dict({param: data[param][()][mask] for param in list_of_keys}).set_index(seed_variable_name).T - - else: # No seed parameter, currently only applies to the RunDetails H5Group - keys_not_derivations = [key for key in list_of_keys if '-Derivation' not in key] # Get just the keys without the -Derivation suffix - - # Some parameter values are string types, formatted as np.bytes_, need to convert back - def convert_strings(param_array): - if isinstance(param_array[0], np.bytes_): - return param_array.astype(str) - else: - return param_array - - df_keys = pd.DataFrame.from_dict({param: convert_strings(data[param][()]) for param in keys_not_derivations }).T - nCols = df_keys.shape[1] # Required only because if we have combined RunDetails output from a previous h5copy, we get many columns (should fix later) - df_keys.columns = ['Parameter']*nCols - df_drvs = pd.DataFrame.from_dict({param: convert_strings(data[param+'-Derivation'][()]) for param in keys_not_derivations }).T - df_drvs.columns = ['Derivation']*nCols + df = pd.DataFrame.from_dict( + {param: data[param][()][mask] for param in data}).set_index(seed_variable_name).T + + else: # No seed parameter, currently only applies to the RunDetails H5Group + # Get just the keys without the -Derivation suffix + keys_not_derivations = [ + key for key in data if '-Derivation' not in key] + + df_keys = pd.DataFrame.from_dict({param: convert_bytes_array_to_strings( + data[param][()]) for param in keys_not_derivations}).T + # Required only because if we have combined RunDetails output from a + # previous h5copy, we get many columns (should fix later) + nCols = df_keys.shape[1] + df_keys.columns = ['Parameter'] * nCols + df_drvs = pd.DataFrame.from_dict({param: convert_bytes_array_to_strings( + data[param + '-Derivation'][()]) for param in keys_not_derivations}).T + df_drvs.columns = ['Derivation'] * nCols df = pd.concat([df_keys, df_drvs], axis=1) # Add units as first col - units_dict = {key:data[key].attrs['units'].astype(str) for key in list_of_keys} + units_dict = {key: data[key].attrs['units'].astype(str) for key in data} df.insert(loc=0, column='(units)', value=pd.Series(units_dict)) return df - ######################################################################## # ## Get event histories of MT data, SN data, and combined MT, SN data @@ -109,10 +137,10 @@ def get_mt_data_tuple(mt_data: H5Group) -> tuple[list, list, list]: Returns ------- - returned_seeds : list - an ordered list of the unique seeds in the mt_data file, + returned_seeds : list + an ordered list of the unique seeds in the mt_data file returned_events : list - a list of sublists, one sublist per seed, where each sublist + a list of sublists, one sublist per seed, where each sublist contains all the MtEventTuples for the given seed returned_times : list a list of sublists of times of each of the mt_data events. @@ -135,9 +163,10 @@ def get_mt_data_tuple(mt_data: H5Group) -> tuple[list, list, list]: mt_is_mrg = mt_data['Merger'][()] == 1 # We want the return arrays sorted by seed, so sort here. - mt_seeds_idx = np.lexsort((mt_times, mt_seeds)) # sort by seeds then times - lexsort sorts by the last column first... - mt_seeds = mt_seeds[mt_seeds_idx] - mt_times = mt_times[mt_seeds_idx] + # sort by seeds then times - lexsort sorts by the last column first... + mt_seeds_idx = np.lexsort((mt_times, mt_seeds)) + mt_seeds = mt_seeds[mt_seeds_idx] + mt_times = mt_times[mt_seeds_idx] mt_primary_stype = mt_primary_stype[mt_seeds_idx] mt_secondary_stype = mt_secondary_stype[mt_seeds_idx] mt_is_rlof1 = mt_is_rlof1[mt_seeds_idx] @@ -146,33 +175,56 @@ def get_mt_data_tuple(mt_data: H5Group) -> tuple[list, list, list]: mt_is_mrg = mt_is_mrg[mt_seeds_idx] # Process the mt_data events - returned_seeds = [] # array of seeds - will only contain seeds that have mt_data events - returned_events = [] # array of mt_data events for each seed in returned_seeds - returned_times = [] # array of times for each event in returned_events (for each seed in returned_seeds) - - last_seed = -1 # initialize most recently processed seed - for seed_index, this_seed in enumerate(mt_seeds): # iterate over all RLOF file entries - this_time = mt_times[seed_index] # time for this RLOF file entry - this_event = (mt_primary_stype[seed_index], mt_secondary_stype[seed_index], - mt_is_rlof1[seed_index], mt_is_rlof2[seed_index], mt_is_cee[seed_index], mt_is_mrg[seed_index]) # construct event tuple + # array of seeds - will only contain seeds that have mt_data events + returned_seeds = [] + # array of mt_data events for each seed in returned_seeds + returned_events = [] + # array of times for each event in returned_events (for each seed in + # returned_seeds) + returned_times = [] + + # initialize most recently processed seed + last_seed = -1 + for seed_index, this_seed in enumerate( + mt_seeds): # iterate over all RLOF file entries + # time for this RLOF file entry + this_time = mt_times[seed_index] + this_event = ( + mt_primary_stype[seed_index], + mt_secondary_stype[seed_index], + mt_is_rlof1[seed_index], + mt_is_rlof2[seed_index], + mt_is_cee[seed_index], + mt_is_mrg[seed_index]) # construct event tuple # If this is an entirely new seed: if this_seed != last_seed: # same seed as last seed processed? - returned_seeds.append(this_seed) # no - new seed, record it - returned_times.append([this_time]) # initialize the list of event times for this seed - returned_events.append([this_event]) # initialize the list of events for this seed + # no - new seed, record it + returned_seeds.append(this_seed) + # initialize the list of event times for this seed + returned_times.append([this_time]) + # initialize the list of events for this seed + returned_events.append([this_event]) last_seed = this_seed # update the latest seed # Add event, if it is not a duplicate try: - event_index = returned_events[-1].index(this_event) # find event_index of this particular event tuple in the array of events for this seed - if this_time > returned_times[-1][event_index]: # ^ if event is not a duplicate, this will throw a ValueError - returned_times[-1][event_index] = this_time # if event is duplicate, update time to the later of the duplicates + # find event_index of this particular event tuple in the array of + # events for this seed + event_index = returned_events[-1].index(this_event) + # ^ if event is not a duplicate, this will throw a ValueError + if this_time > returned_times[-1][event_index]: + # if event is duplicate, update time to the later of the + # duplicates + returned_times[-1][event_index] = this_time except ValueError: # event is not a duplicate: - returned_events[-1].append(this_event) # record new event tuple for this seed - returned_times[-1].append(this_time) # record new event time for this seed + # record new event tuple for this seed + returned_events[-1].append(this_event) + # record new event time for this seed + returned_times[-1].append(this_time) - return returned_seeds, returned_events, returned_times # see above for description + # see above for description + return returned_seeds, returned_events, returned_times def get_sn_data_tuple(sn_data: H5Group) -> tuple[list, list, list]: @@ -185,10 +237,10 @@ def get_sn_data_tuple(sn_data: H5Group) -> tuple[list, list, list]: Returns ------- - returned_seeds : list - an ordered list of the unique seeds in the sn_data file, + returned_seeds : list + an ordered list of the unique seeds in the sn_data file, returned_events : list - a list of sublists, one sublist per seed, where each sublist + a list of sublists, one sublist per seed, where each sublist contains all the SnEventTuples for the given seed returned_times : list a list of sublists of times of each of the sn_data events. @@ -208,123 +260,168 @@ def get_sn_data_tuple(sn_data: H5Group) -> tuple[list, list, list]: sn_is_unbound = sn_data['Unbound'][()] == 1 # We want the return arrays sorted by seed, so sort here. - sn_seeds_idx = np.lexsort((sn_times, sn_seeds)) # sort by seeds then times - lexsort sorts by the last column first... - sn_seeds = sn_seeds[sn_seeds_idx] - sn_times = sn_times[sn_seeds_idx] + # sort by seeds then times - lexsort sorts by the last column first... + sn_seeds_idx = np.lexsort((sn_times, sn_seeds)) + sn_seeds = sn_seeds[sn_seeds_idx] + sn_times = sn_times[sn_seeds_idx] sn_prog_stype = sn_prog_stype[sn_seeds_idx] sn_remn_stype = sn_remn_stype[sn_seeds_idx] sn_which_prog = sn_which_prog[sn_seeds_idx] sn_is_unbound = sn_is_unbound[sn_seeds_idx] - + # Process the sn_data events - returned_seeds = [] # array of seeds - will only contain seeds that have sn_data events - returned_events = [] # array of sn_data events for each seed in returned_seeds - returned_times = [] # array of times for each event in returned_events (for each seed in returned_seeds) - - last_seed = -1 # initialize most recently processed seed - for seed_index, this_seed in enumerate(sn_seeds): # iterate over all sn_data file entries - this_time = sn_times[seed_index] # time for this sn_data file entry - this_event = (sn_prog_stype[seed_index], sn_remn_stype[seed_index], - sn_which_prog[seed_index], sn_is_unbound[seed_index]) # construct event tuple - + # array of seeds - will only contain seeds that have sn_data events + returned_seeds = [] + # array of sn_data events for each seed in returned_seeds + returned_events = [] + # array of times for each event in returned_events (for each seed in + # returned_seeds) + returned_times = [] + + # initialize most recently processed seed + last_seed = -1 + for seed_index, this_seed in enumerate( + sn_seeds): # iterate over all sn_data file entries + # time for this sn_data file entry + this_time = sn_times[seed_index] + this_event = ( + sn_prog_stype[seed_index], + sn_remn_stype[seed_index], + sn_which_prog[seed_index], + sn_is_unbound[seed_index]) # construct event tuple + # If this is an entirely new seed: if this_seed != last_seed: # same seed as last seed processed? - returned_seeds.append(this_seed) # no - new seed, record it - returned_times.append([this_time]) # initialize the list of event times for this seed - returned_events.append([this_event]) # initialize the list of events for this seed - last_seed = this_seed # update the latest seed + # no - new seed, record it + returned_seeds.append(this_seed) + # initialize the list of event times for this seed + returned_times.append([this_time]) + # initialize the list of events for this seed + returned_events.append([this_event]) + last_seed = this_seed # update the latest seed else: # yes - second sn_data event for this seed - returned_times[-1].append(this_time) # append time at end of array - returned_events[-1].append(this_event) # append event at end of array - - return returned_seeds, returned_events, returned_times # see above for description + returned_times[-1].append(this_time) # append time at end of array + # append event at end of array + returned_events[-1].append(this_event) + # see above for description + return returned_seeds, returned_events, returned_times -def get_event_history(data: H5File, include_null: bool=True) -> tuple[list, list]: + +def get_event_history( + data: H5File, include_null: bool = True) -> tuple[list, list]: """Get the event history for all seeds, including both MT and SN events, in chronological order. Parameters ---------- data : H5File - the COMPAS output H5file + the COMPAS output H5file include_null : bool whether to include seeds which undergo no MT or SN events (default is True) - Returns + Returns ------- returned_seeds : list an ordered list of all the unique seeds in the output file - returned_events : list + returned_events : list a list where each element is a chronological set of events corresponding to the associated seed """ sp_data = data['BSE_System_Parameters'] mt_data = data['BSE_RLOF'] sn_data = data['BSE_Supernovae'] - all_seeds = sp_data['SEED'][()] # get all seeds - mt_seeds, mt_events, mt_times = get_mt_data_tuple(mt_data) # get mt data tuple - sn_seeds, sn_events, sn_times = get_sn_data_tuple(sn_data) # get sn data tuple - - num_mt_seeds = len(mt_seeds) # number of mt_data events - num_sn_seeds = len(sn_seeds) # number of sn_data events - - if num_mt_seeds < 1 and num_sn_seeds < 1: return [] # no events - return empty history - - returned_seeds = [] # array of seeds - will only contain seeds that have events (of any type) - returned_events = [] # array of events - same size as returned_seeds (includes event times) - - event_ordering = ['RL', 'CE', 'SN', 'MG'] # order of preference for simultaneous events - - mt_index = 0 # index into mt_data events arrays - sn_index = 0 # index into sn_data events arrays + # get all seeds + all_seeds = sp_data['SEED'][()] + mt_seeds, mt_events, mt_times = get_mt_data_tuple( + mt_data) # get mt data tuple + sn_seeds, sn_events, sn_times = get_sn_data_tuple( + sn_data) # get sn data tuple + + # number of mt_data events + num_mt_seeds = len(mt_seeds) + # number of sn_data events + num_sn_seeds = len(sn_seeds) + + if num_mt_seeds < 1 and num_sn_seeds < 1: + return [] # no events - return empty history + + # array of seeds - will only contain seeds that have events (of any type) + returned_seeds = [] + # array of events - same size as returned_seeds (includes event times) + returned_events = [] + + # order of preference for simultaneous events + event_ordering = ['RL', 'CE', 'SN', 'MG'] + + # index into mt_data events arrays + mt_index = 0 + # index into sn_data events arrays + sn_index = 0 if include_null: seeds_to_iterate = all_seeds else: - seeds_to_iterate = np.sort(np.unique(np.append(mt_seeds, sn_seeds))) # iterate over all the seeds that have either mt_data or sn_data events - + # iterate over all the seeds that have either mt_data or sn_data events + seeds_to_iterate = np.sort(np.unique(np.append(mt_seeds, sn_seeds))) + idx_ordered = np.argsort(seeds_to_iterate) - returned_seeds = [None] * np.size(seeds_to_iterate) # array of seeds - will only contain seeds that have events (of any type) - returned_events = [None] * np.size(seeds_to_iterate) # array of events - same size as returned_seeds (includes event times) + # array of seeds - will only contain seeds that have events (of any type) + returned_seeds = [None] * np.size(seeds_to_iterate) + # array of events - same size as returned_seeds (includes event times) + returned_events = [None] * np.size(seeds_to_iterate) for idx in idx_ordered: seed = seeds_to_iterate[idx] - seed_events = [] # initialise the events for the seed being processed + # initialise the events for the seed being processed + seed_events = [] - # Collect any mt_data events for this seed, add the time of the event and the event type + # Collect any mt_data events for this seed, add the time of the event + # and the event type while mt_index < num_mt_seeds and mt_seeds[mt_index] == seed: for event_index, event in enumerate(mt_events[mt_index]): _, _, is_rl1, is_rl2, is_cee, is_mrg = event - event_key = 'MG' if is_mrg else 'CE' if is_cee else 'RL' # event type: Merger, CEE, RLOF 1->2, RLOF 2->1 - seed_events.append((event_key, mt_times[mt_index][event_index], *mt_events[mt_index][event_index])) + # event type: Merger, CEE, RLOF 1->2, RLOF 2->1 + event_key = 'MG' if is_mrg else 'CE' if is_cee else 'RL' + seed_events.append( + (event_key, + mt_times[mt_index][event_index], + *mt_events[mt_index][event_index])) mt_index += 1 - # Collect any sn_data events for this seed, add the time of the event and the event type + # Collect any sn_data events for this seed, add the time of the event + # and the event type while sn_index < num_sn_seeds and sn_seeds[sn_index] == seed: for event_index, event in enumerate(sn_events[sn_index]): - event_key = 'SN' - seed_events.append((event_key, sn_times[sn_index][event_index], *sn_events[sn_index][event_index])) + event_key = 'SN' + seed_events.append( + (event_key, + sn_times[sn_index][event_index], + *sn_events[sn_index][event_index])) sn_index += 1 - seed_events.sort(key=lambda ev:(ev[1], event_ordering.index(ev[0]))) # sort the events by time and event type (mt_data before sn_data if at the same time) - returned_seeds[idx] = seed # record the seed in the seeds array being returned - returned_events[idx] = seed_events # record the events for this seed in the events array being returned - return returned_seeds, returned_events + # sort the events by time and event type (mt_data before sn_data if at + # the same time) + seed_events.sort(key=lambda ev: (ev[1], event_ordering.index(ev[0]))) + # record the seed in the seeds array being returned + returned_seeds[idx] = seed + # record the events for this seed in the events array being returned + returned_events[idx] = seed_events + return returned_seeds, returned_events ########################################### # ## Produce strings of the event histories -stellar_type_dict = { - 0: 'MS', - 1: 'MS', - 2: 'HG', - 3: 'GB', - 4: 'GB', - 5: 'GB', - 6: 'GB', - 7: 'HE', - 8: 'HE', - 9: 'HE', +simplified_stellar_type_dict = { + 0: 'MS', + 1: 'MS', + 2: 'HG', + 3: 'GB', + 4: 'GB', + 5: 'GB', + 6: 'GB', + 7: 'HE', + 8: 'HE', + 9: 'HE', 10: 'WD', 11: 'WD', 12: 'WD', @@ -335,7 +432,9 @@ def get_event_history(data: H5File, include_null: bool=True) -> tuple[list, list } -def build_event_string(events: list, use_int_stypes: bool=False) -> EventHistoryString: +def build_event_string( + events: list, + use_int_stypes: bool = False) -> EventHistoryString: """Produce a string representation of the event history of a single binary. Parameters @@ -343,27 +442,27 @@ def build_event_string(events: list, use_int_stypes: bool=False) -> EventHistory events : list list of mixed MtEventTuple and SnEventTuple's use_int_stypes : bool, optional - whether to report stellar types as integers + whether to report stellar types as integers (default is False, report them as 2-char stellar types) - Returns + Returns ------- event_str : EventHistoryString condensed string representation of the event history of a binary, in array form Notes ----- - MT strings look like: - P>S, P', '<' is RLOF (star1 -> star2 or star1 <- star2) + MT strings look like: + P>S, P', '<' is RLOF (star1 -> star2 or star1 <- star2) or otherwise '=' for CEE or '&' for Merger. SN strings look like: - P*SR if star1 is the SN progenitor, or + P*SR if star1 is the SN progenitor, or R*SP if star2 is the SN progenitor, - where P is progenitor type, R is remnant type, + where P is progenitor type, R is remnant type, S is state ('i' for intact, 'u' for unbound) Event strings for the same seed are separated by an undesrcore '_' - + TODO ---- Currently unvectorized, do the vectorization later for a speed up @@ -372,44 +471,50 @@ def _remap_stype(int_stype): if use_int_stypes: return str(int_stype) else: - return stellar_type_dict[int_stype] - + return simplified_stellar_type_dict[int_stype] + # Empty event if len(events) == 0: return 'NA' - + event_str = '' for event in events: if event[0] == 'SN': # SN event _, time, stype_p, stype_c, which_sn, is_unbound = event - if which_sn == 1: # Progenitor or Remnant depending upon which star is the SN + # Progenitor or Remnant depending upon which star is the SN + if which_sn == 1: char_l = _remap_stype(stype_p) char_r = _remap_stype(stype_c) else: char_l = _remap_stype(stype_c) char_r = _remap_stype(stype_p) char_m = '*u' if is_unbound else '*i' # unbound or intact - else: # Any of the MT events + else: # Any of the MT events _, time, stype_1, stype_2, is_rl1, is_rl2, is_cee, is_mrg = event - char_l = _remap_stype(stype_1) # primary stellar type - char_r = _remap_stype(stype_2) # secondary stellar type - char_m = '&' if is_mrg \ - else '=' if is_cee \ - else '<' if is_rl2 \ - else '>' # event type: CEE, RLOF 2->1, RLOF 1->2 - event_str += "{}{}{}_".format(char_l, char_m, char_r) # event string for this star, _ is event separator - event_str = np.array(event_str[:-1], dtype=np.str_) # return event string for this star (pop the last underscore first) + # primary stellar type + char_l = _remap_stype(stype_1) + # secondary stellar type + char_r = _remap_stype(stype_2) + # event type: CEE, RLOF 2->1, RLOF 1->2 + char_m = '&' if is_mrg else '=' if is_cee else '<' if is_rl2 else '>' + # event string for this star, _ is event separator + event_str += "{}{}{}_".format(char_l, char_m, char_r) + # return event string for this star (pop the last underscore first) + event_str = np.array(event_str[:-1], dtype=np.str_) return event_str -def get_event_strings(data: H5File=None, all_events: list=None, use_int_stypes: bool=False): - """Calculate the event history strings for a COMPAS population. +def get_event_strings( + data: H5File = None, + all_events: list = None, + use_int_stypes: bool = False): + """Calculate the event history strings for a COMPAS population. Parameters ---------- data : H5File, optional - the COMPAS output H5file - all_events : list, optional + the COMPAS output H5file + all_events : list, optional a list where each element is a chronological set of events corresponding to the associated seed, one of the outputs of get_event_history(data) use_int_stypes : bool, optional @@ -421,20 +526,23 @@ def get_event_strings(data: H5File=None, all_events: list=None, use_int_stypes: Notes ----- - Exactly one of data or all_events must be included. If neither, nothign is returned. + Exactly one of data or all_events must be included. If neither, nothing is returned. """ - # If output is - if (data == None) & (all_events == None): - return - elif (all_events == None): + # If output is + if (data is None) & (all_events is None): + return + elif (all_events is None): _, all_events = get_event_history(data) event_strings = np.zeros(len(all_events), dtype=StringDType()) - for ii, events_for_given_seed in enumerate(all_events): - event_string = build_event_string(events_for_given_seed, use_int_stypes=use_int_stypes) - event_strings[ii] = event_string # append event string for this star (pop the last underscore first) + for ii, events_for_given_seed in enumerate(all_events): + event_string = build_event_string( + events_for_given_seed, + use_int_stypes=use_int_stypes) + # append event string for this star (pop the last underscore first) + event_strings[ii] = event_string return event_strings -"" +"" From ec95efcbb19aa89494842e72f72a17d450c1b669 Mon Sep 17 00:00:00 2001 From: Reinhold Willcox Date: Mon, 29 Jun 2026 12:13:42 +0200 Subject: [PATCH 4/4] starting the process of adding unit testing --- compas_python_utils/debugging_utils.py | 14 +++++++------- setup.py | 1 + 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/compas_python_utils/debugging_utils.py b/compas_python_utils/debugging_utils.py index 15a9826a2..4047d10f8 100644 --- a/compas_python_utils/debugging_utils.py +++ b/compas_python_utils/debugging_utils.py @@ -4,8 +4,7 @@ from numpy.dtypes import StringDType from typing import NewType -"" -# New Types +### New Types MaskNdarray = NewType('MaskNdarray', np.ndarray[bool]) H5File = NewType('H5File', h5._hl.files.File) H5Group = NewType('H5Group', h5._hl.group.Group) @@ -24,8 +23,7 @@ EventHistoryString = NewType('EventHistoryString', np.array(np.str_)) -######################################################################## -# ## Function to print the data from a given COMPAS HDF5 group in a readable pandas template +### Function to print the data from a given COMPAS HDF5 group in a readable pandas template def convert_bytes_array_to_strings(param_array): """Check and convert np.bytes_ array to strings. @@ -124,8 +122,7 @@ def print_compas_details_dataframe(data: H5Group, return df -######################################################################## -# ## Get event histories of MT data, SN data, and combined MT, SN data +### Get event histories of MT data, SN data, and combined MT, SN data def get_mt_data_tuple(mt_data: H5Group) -> tuple[list, list, list]: """Calculates the EventTuple for the BSE_RLOF output H5Group. @@ -544,5 +541,8 @@ def get_event_strings( event_strings[ii] = event_string return event_strings +def main(): + return -"" +if __name__ == "__main__": + main() diff --git a/setup.py b/setup.py index 8cf5abe20..29eef5c43 100644 --- a/setup.py +++ b/setup.py @@ -128,6 +128,7 @@ def find_version(version_file=read(CPP_VERSION_FILE)): f"compas_h5view= {NAME}.h5view:main", f"compas_h5copy= {NAME}.h5copy:main", f"compas_h5sample= {NAME}.h5sample:main", + f"compas_h5sample= {NAME}.debugging_utils:main", f"compas_plot_detailed_evolution={NAME}.detailed_evolution_plotter.plot_detailed_evolution:main", f"compas_run_submit={NAME}.preprocessing.runSubmit:main", f"compas_sample_stroopwafel={NAME}.preprocessing.stroopwafelInterface:main",