Source code for tardis.io.atom_data.base

import logging
from dataclasses import dataclass

import numpy as np
import pandas as pd
from astropy.units import Quantity

from tardis import constants as const
from tardis.io.atom_data.collision_data import (
    ChiantiCollisionData,
    CMFGENCollisionData,
)
from tardis.io.atom_data.macro_atom_data import MacroAtomData
from tardis.io.atom_data.nlte_data import NLTEData
from tardis.io.atom_data.util import resolve_atom_data_fname
from tardis.plasma.properties.continuum_processes.rates import (
    get_ground_state_multi_index,
)


[docs] class AtomDataNotPreparedError(Exception): pass
[docs] class AtomDataMissingError(Exception): pass
logger = logging.getLogger(__name__)
[docs] class AtomData: """ Class for storing atomic data Parameters ---------- atom_data : pandas.DataFrame A DataFrame containing the *basic atomic data* with: index : atomic_number columns : symbol, name, mass[u]. ionization_data : pandas.DataFrame A DataFrame containing the *ionization data* with: index : atomic_number, ion_number columns : ionization_energy[eV]. It is important to note here is that `ion_number` describes the *final ion state* e.g. H I - H II is described with ion=1 levels : pandas.DataFrame A DataFrame containing the *levels data* with: index : numerical index columns : atomic_number, ion_number, level_number, energy[eV], g[1], metastable. lines : pandas.DataFrame A DataFrame containing the *lines data* with: index : numerical index columns : line_id, atomic_number, ion_number, level_number_lower, level_number_upper, wavelength[angstrom], nu[Hz], f_lu[1], f_ul[1], B_ul[?], B_ul[?], A_ul[1/s]. macro_atom_data : A DataFrame containing the *macro atom data* with: index : numerical index columns : atomic_number, ion_number, source_level_number, destination_level_number, transition_line_id, transition_type, transition_probability; macro_atom_references : A DataFrame containing the *macro atom references* with: index : numerical index columns : atomic_number, ion_number, source_level_number, count_down, count_up, count_total. Refer to the docs: http://tardis.readthedocs.io/en/latest/physics/plasma/macroatom.html collision_data : (pandas.DataFrame, np.array) A DataFrame containing the *electron collisions data* with: index : atomic_number, ion_number, level_number_lower, level_number_upper columns : e_col_id, delta_e, g_ratio, c_ul; collision_data_temperatures : np.array An array with the collision temperatures. zeta_data : A DataFrame containing the *zeta data* for the nebular ionization calculation (i.e., the fraction of recombinations that go directly to the ground state) with: index : atomic_number, ion_charge columns : temperatures[K] synpp_refs : ? photoionization_data : pandas.DataFrame A DataFrame containing the *photoionization data* with: index : numerical index columns : atomic_number, ion_number, level_number, nu[Hz], x_sect[cm^2] two_photon_data : pandas.DataFrame A DataFrame containing the *two photon decay data* with: index: atomic_number, ion_number, level_number_lower, level_number_upper columns: A_ul[1/s], nu0[Hz], alpha, beta, gamma decay_radiation_data : pandas.DataFrame A dataframe containing the *decay radiation data* with: index: Isotope names columns: atomic_number, element, Rad energy, Rad intensity decay mode. Curated from nndc linelist_atoms : pandas.DataFrame A DataFrame containing a linelist of input atoms linelist_molecules : pandas.DataFrame A DataFrame containing a linelist of input molecules molecule_data : MolecularData A class containing the *molecular data* with: equilibrium_constants, partition_functions, dissociation_energies Attributes ---------- prepared : bool atom_data : pandas.DataFrame ionization_data : pandas.DataFrame macro_atom_data_all : pandas.DataFrame macro_atom_references_all : pandas.DataFrame collision_data : pandas.DataFrame collision_data_temperatures : numpy.array zeta_data : pandas.DataFrame synpp_refs : pandas.DataFrame photoionization_data : pandas.DataFrame two_photon_data : pandas.DataFrame decay_radiation_data : pandas.DataFrame linelist_atoms : pandas.DataFrame linelist_molecules : pandas.DataFrame molecule_data : MolecularData Methods ------- from_hdf: Function to read the atom data from a TARDIS atom HDF Store prepare_atom_data: Prepares the atom data to set the lines, levels and if requested macro atom data. This function mainly cuts the `levels` and `lines` by discarding any data that is not needed (any data for atoms that are not needed Notes ----- 1. The units of some columns are given in the square brackets. They are **NOT** the parts of columns' names! """ hdf_names = [ "atom_data", "ionization_data", "levels", "lines", "macro_atom_data", "macro_atom_references", "zeta_data", "collision_data", "collision_data_temperatures", "synpp_refs", "photoionization_data", "yg_data", "two_photon_data", "linelist_atoms", "linelist_molecules", "decay_radiation_data", ] # List of tuples of the related dataframes. # Either all or none of the related dataframes must be given related_groups = [ ("macro_atom_data_all", "macro_atom_references_all"), ("collision_data", "collision_data_temperatures"), ]
[docs] @classmethod def from_hdf(cls, fname=None): """ Function to read the atom data from a TARDIS atom HDF Store Parameters ---------- fname : Path, optional Path to the HDFStore file or name of known atom data file (default: None) """ dataframes = {} nonavailable = [] fname = resolve_atom_data_fname(fname) with pd.HDFStore(fname, "r") as store: for name in cls.hdf_names: try: dataframes[name] = store.select(name) except KeyError: logger.debug(f"Dataframe does not contain {name} column") nonavailable.append(name) if "metadata" in store: carsus_version_str = ( store["metadata"].loc[("format", "version")].value ) carsus_version = tuple(map(int, carsus_version_str.split("."))) if carsus_version == (1, 0): # Checks for various collisional data from Carsus files if "collisions_data" in store: try: dataframes["collision_data_temperatures"] = store[ "collisions_metadata" ].temperatures if "cmfgen" in store["collisions_metadata"].dataset: dataframes["yg_data"] = store["collisions_data"] dataframes["collision_data"] = "dummy value" elif ( "chianti" in store["collisions_metadata"].dataset ): dataframes["collision_data"] = store[ "collisions_data" ] else: raise KeyError( "Atomic Data Collisions Not a Valid Chanti or CMFGEN Carsus Data File" ) except KeyError as e: logger.warning( "Atomic Data is not a Valid Carsus Atomic Data File" ) raise dataframes["levels"] = store["levels_data"] dataframes["lines"] = store["lines_data"] else: raise ValueError( f"Current carsus version, {carsus_version}, is not supported." ) if "linelist_atoms" in store: dataframes["linelist_atoms"] = store["linelist_atoms"] if "linelist_molecules" in store: dataframes["linelist_molecules"] = store["linelist_molecules"] if "molecules" in store: molecule_data = MoleculeData( store["molecules/equilibrium_constants"], store["molecules/partition_functions"], store["molecules/dissociation_energies"], ) else: molecule_data = None atom_data = cls(**dataframes, molecule_data=molecule_data) atom_data.uuid1 = cls.get_attributes_from_store(store, "uuid1") atom_data.md5 = cls.get_attributes_from_store(store, "md5") atom_data.version = cls.get_attributes_from_store( store, "database_version" ) # TODO: strore data sources as attributes in carsus logger.info( f"Reading Atom Data with: UUID = {atom_data.uuid1} MD5 = {atom_data.md5} " ) if nonavailable: logger.info( "Non provided Atomic Data: {}".format( ", ".join(nonavailable) ) ) return atom_data
def __init__( self, atom_data, ionization_data, levels=None, lines=None, macro_atom_data=None, macro_atom_references=None, zeta_data=None, collision_data=None, collision_data_temperatures=None, synpp_refs=None, photoionization_data=None, yg_data=None, two_photon_data=None, linelist_atoms=None, linelist_molecules=None, decay_radiation_data=None, molecule_data=None, ): self.prepared = False # CONVERT VALUES TO CGS UNITS # Convert atomic masses to CGS # We have to use constants.u because astropy uses # different values for the unit u and the constant. # This is changed in later versions of astropy ( # the value of constants.u is used in all cases) atom_data.loc[:, "mass"] = atom_data["mass"].values * const.u.cgs.value # Convert ionization energies to CGS ionization_data = ionization_data.squeeze() ionization_data[:] = Quantity(ionization_data[:], "eV").cgs.value # Convert energy to CGS levels.loc[:, "energy"] = Quantity( levels["energy"].values, "eV" ).cgs.value # Create a new columns with wavelengths in the CGS units lines["wavelength_cm"] = Quantity( lines["wavelength"], "angstrom" ).cgs.value # SET ATTRIBUTES self.atom_data = atom_data self.ionization_data = ionization_data self.levels = levels # Cast to float so that Numba can use the values in numpy functions self.levels.energy = self.levels.energy.astype(np.float64) self.lines = lines collected_macro_atom_data = MacroAtomData( macro_atom_data, macro_atom_references ) # Rename these (drop "_all") when `prepare_atom_data` is removed! self.macro_atom_data_all = ( collected_macro_atom_data.transition_probability_data ) self.macro_atom_references_all = ( collected_macro_atom_data.block_reference_data ) self.zeta_data = zeta_data chianti_collision_data = ChiantiCollisionData( collision_data, collision_data_temperatures ) cmfgen_collision_data = CMFGENCollisionData( yg_data, collision_data_temperatures ) self.collision_data = chianti_collision_data.data self.collision_data_temperatures = chianti_collision_data.temperatures self.synpp_refs = synpp_refs self.photoionization_data = photoionization_data self.yg_data = cmfgen_collision_data.data self.two_photon_data = two_photon_data if linelist_atoms is not None: self.linelist_atoms = linelist_atoms if linelist_molecules is not None: self.linelist_molecules = linelist_molecules if molecule_data is not None: self.molecule_data = molecule_data if decay_radiation_data is not None: self.decay_radiation_data = decay_radiation_data self._check_related() # ADDITIONAL ATTRIBUTES self.selected_atomic_numbers = None self.nlte_data = None self.photo_ion_block_references = None self.photo_ion_unique_index = None self.lines_upper2macro_reference_idx = None self.lines_lower2macro_reference_idx = None # VERSIONING self.uuid1 = None self.md5 = None self.version = None def _check_related(self): """ Check that either all or none of the related dataframes are given. """ for group in self.related_groups: check_list = [name for name in group if getattr(self, name) is None] if len(check_list) != 0 and len(check_list) != len(group): raise AtomDataMissingError( f'The following dataframes from the related group [{", ".join(group)}]' f'were not given: {", ".join(check_list)}' )
[docs] def prepare_atom_data( self, selected_atomic_numbers, line_interaction_type, nlte_species, continuum_interaction_species, ): """ Prepares the atom data to set the lines, levels and if requested macro atom data. This function mainly cuts the `levels` and `lines` by discarding any data that is not needed (any data for atoms that are not needed Parameters ---------- selected_atoms : set set of selected atom numbers, e.g. set([14, 26]) line_interaction_type : str can be 'scatter', 'downbranch' or 'macroatom' """ if not self.prepared: self.prepared = True else: raise AtomDataNotPreparedError("AtomData was already prepared") self.selected_atomic_numbers = selected_atomic_numbers self._check_selected_atomic_numbers() # cutting levels_lines self.prepare_lines() ( tmp_lines_lower2level_idx, tmp_lines_upper2level_idx, ) = self.prepare_line_level_indexes() self.prepare_macro_atom_data( line_interaction_type, tmp_lines_lower2level_idx, tmp_lines_upper2level_idx, ) if len(continuum_interaction_species) > 0: self.prepare_continuum_interaction_data( continuum_interaction_species ) self.nlte_data = NLTEData(self, nlte_species)
[docs] def prepare_lines(self): """Prepare line data""" self.lines = self.lines[ self.lines.index.isin( self.selected_atomic_numbers, level="atomic_number" ) ] self.lines = self.lines.sort_values(by="wavelength")
[docs] def prepare_line_level_indexes(self): levels_index = pd.Series( np.arange(len(self.levels), dtype=int), index=self.levels.index ) tmp_lines_lower2level_idx = self.lines.index.droplevel( "level_number_upper" ) self.lines_lower2level_idx = ( levels_index.loc[tmp_lines_lower2level_idx].astype(np.int64).values ) tmp_lines_upper2level_idx = self.lines.index.droplevel( "level_number_lower" ) self.lines_upper2level_idx = ( levels_index.loc[tmp_lines_upper2level_idx].astype(np.int64).values ) return tmp_lines_lower2level_idx, tmp_lines_upper2level_idx
[docs] def prepare_continuum_interaction_data(self, continuum_interaction_species): """ Prepares the atom data for the continuum interaction Parameters ---------- continuum_interaction : ContinuumInteraction The continuum interaction object """ # photoionization_data = atomic_data.photoionization_data.set_index( # ["atomic_number", "ion_number", "level_number"] # ) mask_selected_species = self.photoionization_data.index.droplevel( "level_number" ).isin(continuum_interaction_species) self.photoionization_data = self.photoionization_data[ mask_selected_species ] self.photo_ion_block_references = np.pad( self.photoionization_data.nu.groupby(level=[0, 1, 2]) .count() .values.cumsum(), [1, 0], ) self.photo_ion_unique_index = self.photoionization_data.index.unique() nu_ion_threshold = ( self.photoionization_data.groupby(level=[0, 1, 2]).first().nu ) source_idx = self.macro_atom_references.loc[ self.photo_ion_unique_index ].references_idx destination_idx = self.macro_atom_references.loc[ get_ground_state_multi_index(self.photo_ion_unique_index) ].references_idx photo_ion_levels_idx = pd.DataFrame( { "source_level_idx": source_idx.values, "destination_level_idx": destination_idx.values, }, index=self.photo_ion_unique_index, ) self.level2continuum_edge_idx = pd.Series( np.arange(len(nu_ion_threshold)), nu_ion_threshold.sort_values(ascending=False).index, name="continuum_idx", ) level_idxs2continuum_idx = photo_ion_levels_idx.copy() level_idxs2continuum_idx[ "continuum_idx" ] = self.level2continuum_edge_idx self.level_idxs2continuum_idx = level_idxs2continuum_idx.set_index( ["source_level_idx", "destination_level_idx"] )
[docs] def prepare_macro_atom_data( self, line_interaction_type, tmp_lines_lower2level_idx, tmp_lines_upper2level_idx, ): if ( self.macro_atom_data_all is not None and not line_interaction_type == "scatter" ): self.macro_atom_data = self.macro_atom_data_all.loc[ self.macro_atom_data_all["atomic_number"].isin( self.selected_atomic_numbers ) ].copy() self.macro_atom_references = self.macro_atom_references_all[ self.macro_atom_references_all.index.isin( self.selected_atomic_numbers, level="atomic_number" ) ].copy() if line_interaction_type == "downbranch": self.macro_atom_data = self.macro_atom_data.loc[ self.macro_atom_data["transition_type"] == -1 ] self.macro_atom_references = self.macro_atom_references.loc[ self.macro_atom_references["count_down"] > 0 ] self.macro_atom_references.loc[ :, "count_total" ] = self.macro_atom_references["count_down"] self.macro_atom_references.loc[ :, "block_references" ] = np.hstack( ( 0, np.cumsum( self.macro_atom_references["count_down"].values[:-1] ), ) ) elif line_interaction_type == "macroatom": self.macro_atom_references.loc[ :, "block_references" ] = np.hstack( ( 0, np.cumsum( self.macro_atom_references["count_total"].values[ :-1 ] ), ) ) self.macro_atom_references.loc[:, "references_idx"] = np.arange( len(self.macro_atom_references) ) lines_index = pd.Series( np.arange(len(self.lines), dtype=int), index=self.lines.set_index("line_id").index, ) self.macro_atom_data.loc[:, "lines_idx"] = lines_index.loc[ self.macro_atom_data["transition_line_id"] ].values self.lines_upper2macro_reference_idx = ( self.macro_atom_references.loc[ tmp_lines_upper2level_idx, "references_idx" ] .astype(np.int64) .values ) if line_interaction_type == "macroatom": self.lines_lower2macro_reference_idx = ( self.macro_atom_references.loc[ tmp_lines_lower2level_idx, "references_idx" ] .astype(np.int64) .values ) # Sets all tmp_macro_destination_level_idx = pd.MultiIndex.from_arrays( [ self.macro_atom_data["atomic_number"], self.macro_atom_data["ion_number"], self.macro_atom_data["destination_level_number"], ] ) tmp_macro_source_level_idx = pd.MultiIndex.from_arrays( [ self.macro_atom_data["atomic_number"], self.macro_atom_data["ion_number"], self.macro_atom_data["source_level_number"], ] ) self.macro_atom_data.loc[:, "destination_level_idx"] = ( self.macro_atom_references.loc[ tmp_macro_destination_level_idx, "references_idx" ] .astype(np.int64) .values ) self.macro_atom_data.loc[:, "source_level_idx"] = ( self.macro_atom_references.loc[ tmp_macro_source_level_idx, "references_idx" ] .astype(np.int64) .values ) elif line_interaction_type == "downbranch": # Sets all the destination levels to -1 to indicate that they # are not used in downbranch calculations self.macro_atom_data.loc[:, "destination_level_idx"] = -1 if self.yg_data is not None: self.yg_data = self.yg_data.reindex( self.selected_atomic_numbers, level=0 )
def _check_selected_atomic_numbers(self): selected_atomic_numbers = self.selected_atomic_numbers available_atomic_numbers = np.unique( self.ionization_data.index.get_level_values(0) ) atomic_number_check = np.isin( selected_atomic_numbers, available_atomic_numbers ) if not all(atomic_number_check): missing_atom_mask = np.logical_not(atomic_number_check) missing_atomic_numbers = selected_atomic_numbers[missing_atom_mask] missing_numbers_str = ",".join(missing_atomic_numbers.astype("str")) msg = f"For atomic numbers {missing_numbers_str} there is no atomic data." raise AtomDataMissingError(msg) def __repr__(self): return f"<Atomic Data UUID={self.uuid1} MD5={self.md5} Lines={self.lines.line_id.count():d} Levels={self.levels.energy.count():d}>"
[docs] def get_attributes_from_store(store, store_key): """Gets atom_data attributes, throws error and sets to None if they are not available. Parameters ---------- store : pd.HDFStore Data source store_key : str HDFStore value to check """ try: attribute = store.root._v_attrs[store_key] if hasattr(attribute, "decode"): attribute = attribute.decode("ascii") except KeyError: logger.debug( f"{store_key} not available for Atom Data. Setting value to None" ) attribute = None return attribute
[docs] @dataclass class MoleculeData: """ Class to hold molecular data. Held by the AtomData object. equilibrium_constants : pandas.DataFrame A DataFrame containing the *molecular equilibrium constants* with: index: molecule columns: temperatures partition_functions : pandas.DataFrame A DataFrame containing the *molecular partition functions* with: index: molecule columns: temperatures dissociation_energies : pandas.DataFrame A DataFrame containing the *molecular dissociation energies* with: index: molecule """ equilibrium_constants: pd.DataFrame partition_functions: pd.DataFrame dissociation_energies: pd.DataFrame