"""Parser for mlip train."""importjsonfrompathlibimportPathfromtypingimportAnyfromaiida.engineimportExitCodefromaiida.ormimportDict,FolderDatafromaiida.orm.nodes.process.processimportProcessNodefromaiida.parsers.parserimportParserfromaiida_mlip.data.modelimportModelData
[docs]classTrainParser(Parser):""" Parser class for parsing output of calculation. Parameters ---------- node : aiida.orm.nodes.process.process.ProcessNode ProcessNode of calculation. Methods ------- __init__(node: aiida.orm.nodes.process.process.ProcessNode) Initialize the TrainParser instance. parse(**kwargs: Any) -> int: Parse outputs, store results in the database. _get_remote_dirs(mlip_dict: [str, Any]) -> [str, Path]: Get the remote directories based on mlip config file. _validate_retrieved_files(output_filename: str, model_name: str) -> bool: Validate that the expected files have been retrieved. _save_models(model_output: Path, compiled_model_output: Path) -> None: Save model and compiled model as outputs. _parse_results(result_name: Path) -> None: Parse the results file and store the results dictionary. _save_folders(remote_dirs: [str, Path]) -> None: Save log and checkpoint folders as outputs. Returns ------- int An exit code. Raises ------ exceptions.ParsingError If the ProcessNode being passed was not produced by a `Train` Calcjob. """
[docs]def__init__(self,node:ProcessNode):""" Initialize the TrainParser instance. Parameters ---------- node : aiida.orm.nodes.process.process.ProcessNode ProcessNode of calculation. """super().__init__(node)
[docs]defparse(self,**kwargs:Any)->int:""" Parse outputs and store results in the database. Parameters ---------- **kwargs : Any Any keyword arguments. Returns ------- int An exit code. """mlip_dict=self.node.inputs.mlip_config.as_dictionaryoutput_filename=self.node.get_option("output_filename")remote_dirs=self._get_remote_dirs(mlip_dict)model_output=remote_dirs["model"]/f"{mlip_dict['name']}.model"compiled_model_output=(remote_dirs["model"]/f"{mlip_dict['name']}_compiled.model")result_name=remote_dirs["results"]/f"{mlip_dict['name']}_run-123_train.txt"ifnotself._validate_retrieved_files(output_filename,mlip_dict["name"]):returnself.exit_codes.ERROR_MISSING_OUTPUT_FILESself._save_models(model_output,compiled_model_output)self._parse_results(result_name)self._save_folders(remote_dirs)returnExitCode(0)
[docs]def_get_remote_dirs(self,mlip_dict:dict)->dict:""" Get the remote directories based on mlip config file. Parameters ---------- mlip_dict : dict Dictionary containing mlip config file. Returns ------- dict Dictionary of remote directories. """rem_dir=Path(self.node.get_remote_workdir())return{typ:rem_dir/mlip_dict.get(f"{typ}_dir",default)fortyp,defaultin(("log","logs"),("checkpoint","checkpoints"),("results","results"),("model",""),)}
[docs]def_validate_retrieved_files(self,output_filename:str,model_name:str)->bool:""" Validate that the expected files have been retrieved. Parameters ---------- output_filename : str The expected output filename. model_name : str The name of the model as found in the config file key `name`. Returns ------- bool True if the expected files are retrieved, False otherwise. """files_retrieved=self.retrieved.list_object_names()files_expected={output_filename,f"{model_name}.model"}ifnotfiles_expected.issubset(files_retrieved):self.logger.error(f"Found files '{files_retrieved}', expected to find '{files_expected}'")returnFalsereturnTrue
[docs]def_save_models(self,model_output:Path,compiled_model_output:Path)->None:""" Save model and compiled model as outputs. Parameters ---------- model_output : Path Path to the model output file. compiled_model_output : Path Path to the compiled model output file. """architecture="mace_mp"model=ModelData.from_local(model_output,architecture=architecture)compiled_model=ModelData.from_local(compiled_model_output,architecture=architecture)self.out("model",model)self.out("compiled_model",compiled_model)
[docs]def_parse_results(self,result_name:Path)->None:""" Parse the results file and store the results dictionary. Parameters ---------- result_name : Path Path to the result file. """withopen(result_name,encoding="utf-8")asfile:last_dict_str=Noneforlineinfile:try:last_dict_str=json.loads(line.strip())exceptjson.JSONDecodeError:continueiflast_dict_strisnotNone:results_node=Dict(last_dict_str)self.out("results_dict",results_node)else:raiseValueError("No valid dictionary in the file")
[docs]def_save_folders(self,remote_dirs:dict)->None:""" Save log and checkpoint folders as outputs. Parameters ---------- remote_dirs : dict Dictionary of remote folders. """log_node=FolderData(tree=remote_dirs["log"])self.out("logs",log_node)checkpoint_node=FolderData(tree=remote_dirs["checkpoint"])self.out("checkpoints",checkpoint_node)