Source code for janus_core.calculations.descriptors

"""Calculate MLIP descriptors for structures."""

from __future__ import annotations

from collections.abc import Sequence
from typing import Any

from ase import Atoms
import numpy as np

from janus_core.calculations.base import BaseCalculation
from janus_core.helpers.janus_types import (
    Architectures,
    ASEReadArgs,
    ASEWriteArgs,
    Devices,
    MaybeSequence,
    PathLike,
)
from janus_core.helpers.mlip_calculators import check_calculator
from janus_core.helpers.struct_io import output_structs
from janus_core.helpers.utils import none_to_dict


[docs] class Descriptors(BaseCalculation): """ Prepare and calculate MLIP descriptors for structures. Parameters ---------- struct : MaybeSequence[Atoms] | None ASE Atoms structure(s) to calculate descriptors for. Required if `struct_path` is None. Default is None. struct_path : PathLike | None Path of structure to calculate descriptors for. Required if `struct` is None. Default is None. arch : Architectures MLIP architecture to use for calculations. Default is "mace_mp". device : Devices Device to run MLIP model on. Default is "cpu". model_path : PathLike | None Path to MLIP model. Default is `None`. read_kwargs : ASEReadArgs | None Keyword arguments to pass to ase.io.read. By default, read_kwargs["index"] is -1. calc_kwargs : dict[str, Any] | None Keyword arguments to pass to the selected calculator. Default is {}. set_calc : bool | None Whether to set (new) calculators for structures. Default is None. attach_logger : bool Whether to attach a logger. Default is False. log_kwargs : dict[str, Any] | None Keyword arguments to pass to `config_logger`. Default is {}. track_carbon : bool Whether to track carbon emissions of calculation. Default is True. tracker_kwargs : dict[str, Any] | None Keyword arguments to pass to `config_tracker`. Default is {}. invariants_only : bool Whether only the invariant descriptors should be returned. Default is True. calc_per_element : bool Whether to calculate mean descriptors for each element. Default is False. calc_per_atom : bool Whether to calculate descriptors for each atom. Default is False. write_results : bool True to write out structure with results of calculations. Default is False. write_kwargs : ASEWriteArgs | None Keyword arguments to pass to ase.io.write if saving structure with results of calculations. Default is {}. Methods ------- run() Calculate descriptors for structure(s) """
[docs] def __init__( self, struct: MaybeSequence[Atoms] | None = None, struct_path: PathLike | None = None, arch: Architectures = "mace_mp", device: Devices = "cpu", model_path: PathLike | None = None, read_kwargs: ASEReadArgs | None = None, calc_kwargs: dict[str, Any] | None = None, set_calc: bool | None = None, attach_logger: bool = False, log_kwargs: dict[str, Any] | None = None, track_carbon: bool = True, tracker_kwargs: dict[str, Any] | None = None, invariants_only: bool = True, calc_per_element: bool = False, calc_per_atom: bool = False, write_results: bool = False, write_kwargs: ASEWriteArgs | None = None, ) -> None: """ Initialise class. Parameters ---------- struct : MaybeSequence[Atoms] | None ASE Atoms structure(s) to calculate descriptors for. Required if `struct_path` is None. Default is None. struct_path : PathLike | None Path of structure to calculate descriptors for. Required if `struct` is None. Default is None. arch : Architectures MLIP architecture to use for calculations. Default is "mace_mp". device : Devices Device to run MLIP model on. Default is "cpu". model_path : PathLike | None Path to MLIP model. Default is `None`. read_kwargs : ASEReadArgs | None Keyword arguments to pass to ase.io.read. By default, read_kwargs["index"] is -1. calc_kwargs : dict[str, Any] | None Keyword arguments to pass to the selected calculator. Default is {}. set_calc : bool | None Whether to set (new) calculators for structures. Default is None. attach_logger : bool Whether to attach a logger. Default is False. log_kwargs : dict[str, Any] | None Keyword arguments to pass to `config_logger`. Default is {}. track_carbon : bool Whether to track carbon emissions of calculation. Default is True. tracker_kwargs : dict[str, Any] | None Keyword arguments to pass to `config_tracker`. Default is {}. invariants_only : bool Whether only the invariant descriptors should be returned. Default is True. calc_per_element : bool Whether to calculate mean descriptors for each element. Default is False. calc_per_atom : bool Whether to calculate descriptors for each atom. Default is False. write_results : bool True to write out structure with results of calculations. Default is False. write_kwargs : ASEWriteArgs | None Keyword arguments to pass to ase.io.write if saving structure with results of calculations. Default is {}. """ read_kwargs, write_kwargs = none_to_dict(read_kwargs, write_kwargs) self.invariants_only = invariants_only self.calc_per_element = calc_per_element self.calc_per_atom = calc_per_atom self.write_results = write_results self.write_kwargs = write_kwargs # Read last image by default read_kwargs.setdefault("index", ":") # Initialise structures and logging super().__init__( calc_name=__name__, struct=struct, struct_path=struct_path, arch=arch, device=device, model_path=model_path, read_kwargs=read_kwargs, sequence_allowed=True, calc_kwargs=calc_kwargs, set_calc=set_calc, attach_logger=attach_logger, log_kwargs=log_kwargs, track_carbon=track_carbon, tracker_kwargs=tracker_kwargs, ) if isinstance(self.struct, Atoms) and not self.struct.calc: raise ValueError("Please attach a calculator to `struct`.") if isinstance(self.struct, Sequence) and not any( image.calc for image in self.struct ): raise ValueError("Please attach a calculator to `struct`.") if isinstance(self.struct, Atoms): check_calculator(self.struct.calc, "get_descriptors") if isinstance(self.struct, Sequence): for image in self.struct: check_calculator(image.calc, "get_descriptors") # Set output file self.write_kwargs.setdefault("filename", None) self.write_kwargs["filename"] = self._build_filename( "descriptors.extxyz", filename=self.write_kwargs["filename"] ).absolute()
[docs] def run(self) -> None: """Calculate descriptors for structure(s).""" if self.logger: self.logger.info("Starting descriptors calculation") self.logger.info("invariants_only: %s", self.invariants_only) self.logger.info("calc_per_element: %s", self.calc_per_element) self.logger.info("calc_per_atom: %s", self.calc_per_atom) if self.tracker: self.tracker.start_task("Descriptors") if isinstance(self.struct, Sequence): for struct in self.struct: self._calc_descriptors(struct) else: self._calc_descriptors(self.struct) if self.logger: self.logger.info("Descriptors calculation complete") if self.tracker: emissions = self.tracker.stop_task().emissions if isinstance(self.struct, Sequence): for image in self.struct: image.info["emissions"] = emissions else: self.struct.info["emissions"] = emissions self.tracker.stop() output_structs( self.struct, struct_path=self.struct_path, write_results=self.write_results, write_kwargs=self.write_kwargs, )
[docs] def _calc_descriptors(self, struct: Atoms) -> None: """ Calculate MLIP descriptors a given structure. Parameters ---------- struct : Atoms Structure to calculate descriptors for. """ if "arch" in struct.calc.parameters: arch = struct.calc.parameters["arch"] label = f"{arch}_" else: label = "" # Calculate mean descriptor and save mean descriptors = struct.calc.get_descriptors( struct, invariants_only=self.invariants_only ) descriptor = np.mean(descriptors) struct.info[f"{label}descriptor"] = descriptor if self.calc_per_element: elements = set(struct.get_chemical_symbols()) for element in elements: pattern = [atom.index for atom in struct if atom.symbol == element] struct.info[f"{arch}_{element}_descriptor"] = np.mean( descriptors[pattern, :] ) if self.calc_per_atom: struct.arrays[f"{arch}_descriptors"] = np.mean(descriptors, axis=1)