Source code for ml4chem.data.utils

from ase.io import Trajectory
from io import StringIO
import random


[docs]def split_data( images, training_name="training_images.traj", test_name="test_images.traj", randomize=True, test_set=20, logfile="data_split.log", ): """Split Data Parameters ---------- images : str or object A path to an ASE trajectory file or a list of Atoms objects. training_name : str, optional Name of the training set trajectory file, by default 'training_images.traj' test_name : str, optional Name of the test set file, by default 'test_images.traj' randomize : bool, optional Randomize indices of images, by default True test_set : int, optional Percentage of the Data to be used as test set, by default 20 logfile : str, optional Log file name, by default 'data_split.log' """ if isinstance(images, str): images = Trajectory(images) total_length = len(images) test_length = int((test_set * total_length / 100)) training_leght = int(total_length - test_length) _images = list(range(len(images))) if randomize: random.shuffle(_images) training_images = [] training_traj = Trajectory(training_name, mode="w") log = open(logfile, "w") log.write("Training set\n") for i in _images[0:training_leght]: training_images.append(i) training_traj.write(images[i]) log.write(str(training_images)) log.write("\n") log.write("Test set\n") if test_set > 0: test_images = [] test_traj = Trajectory(test_name, mode="w") for i in _images[-test_length:-1]: test_images.append(i) test_traj.write(images[i]) log.write(str(test_images)) log.close()
[docs]def ase_to_xyz(atoms, comment="", file=True): """Convert ASE to xyz This function is useful to save xyz to DataFrame. """ xyz = StringIO() symbols = atoms.get_chemical_symbols() natoms = len(symbols) xyz.write("%d\n%s\n" % (natoms, comment)) for s, (x, y, z) in zip(symbols, atoms.positions): xyz.write("%-2s %22.15f %22.15f %22.15f\n" % (s, x, y, z)) if file: return xyz else: return xyz.getvalue()