Source code for ml4chem.atomistic.features.gaussian

import dask
import datetime
import logging
import os
import time
import dask.array as da
import numpy as np
import pandas as pd
from ase.data import atomic_numbers
from collections import OrderedDict
from ml4chem.atomistic.features.cutoff import Cosine
from ml4chem.atomistic.features.base import AtomisticFeatures
from ml4chem.data.serialization import dump, load
from ml4chem.data.preprocessing import Preprocessing
from ml4chem.utils import get_chunks, get_neighborlist, convert_elapsed_time

logger = logging.getLogger()


[docs]class Gaussian(AtomisticFeatures):
    """Behler-Parrinello symmetry functions

    This class builds local chemical environments for atoms based on the
    Behler-Parrinello Gaussian type symmetry functions. It is modular enough
    that can be used just for creating feature spaces.

    Parameters
    ----------
    cutoff : float
        Cutoff radius used for computing features.
    cutofffxn : object
        A Cutoff function object.
    normalized : bool
        Set it to true if the features are being normalized with respect to the
        cutoff radius.
    preprocessor : str
        Use some scaling method to preprocess the data. Default MinMaxScaler.
    custom : dict, opt
        Create custom symmetry functions, and override defaults. Default is
        None. The structure of the dictionary is as follows:

        >>> custom = {'G2': {'etas': etas},
                      'G3': {'etas': a_etas, 'zetas': zetas, 'gammas': gammas}}

    save_preprocessor : str
        Save preprocessor to file.
    scheduler : str
        The scheduler to be used with the dask backend.
    filename : str
        Path to save database. Note that if the filename exists, the features
        will be loaded without being recomputed.
    overwrite : bool
        If overwrite is set to True, ml4chem will not try to load existing
        databases. Default is True.
    angular_type : str
        Compute "G3" or "G4" angular symmetry functions.
    weighted : bool
        True if applying weighted feature of Gaussian function. See Ref. 2.
    batch_size : int
        Number of data points per batch to use for training. Default is None.

    References
    ----------
    1. Behler, J. Atom-centered symmetry functions for constructing
       high-dimensional neural network potentials. J. Chem. Phys. 134, 074106
       (2011).
    2. Gastegger, M., Schwiedrzik, L., Bittermann, M., Berzsenyi, F. &
       Marquetand, P. wACSF—Weighted atom-centered symmetry functions as
       descriptors in machine learning potentials. J. Chem. Phys. 148, 241709
       (2018).
    """

    NAME = "Gaussian"

[docs]    @classmethod
    def name(cls):
        """Returns name of class"""
        return cls.NAME

    def __init__(
        self,
        cutoff=6.5,
        cutofffxn=None,
        normalized=True,
        preprocessor=("MinMaxScaler", None),
        custom=None,
        save_preprocessor="ml4chem",
        scheduler="distributed",
        filename="features.db",
        overwrite=True,
        angular_type="G3",
        weighted=False,
        batch_size=None,
    ):
        super(Gaussian, self).__init__()

        cutoff_keys = ["radial", "angular"]
        if isinstance(cutoff, (int, float)):
            cutoff = {cutoff_key: cutoff for cutoff_key in cutoff_keys}

        self.normalized = normalized
        self.filename = filename
        self.scheduler = scheduler
        self.preprocessor = preprocessor
        self.save_preprocessor = save_preprocessor
        self.overwrite = overwrite
        self.angular_type = angular_type.upper()
        self.weighted = weighted
        self.batch_size = batch_size

        # Let's add parameters that are going to be stored in the .params json
        # file.
        self.params = OrderedDict()
        self.params["name"] = self.name()

        # We verify that values of parameters are list otherwise they cannot be
        # serialized by json.

        # These keys are very likely to exist when doing inference
        keys = ["user_input", "GP"]

        if custom is None:
            custom = {key: custom for key in keys}
        elif (
            custom is not None and len(list(set(keys).intersection(custom.keys()))) == 0
        ):
            for value in custom.values():
                for k, v in value.items():
                    if isinstance(v, list) is False:
                        value[k] = v.tolist()
            custom = {"user_input": custom}

        self.custom = custom

        # This is a very general way of not forgetting to save variables
        _params = vars()

        # Delete useless variables
        delete = [
            "self",
            "scheduler",
            "overwrite",
            "k",
            "v",
            "value",
            "keys",
            "batch_size",
            "__class__",
            "cutoff_keys",
        ]

        for param in delete:
            try:
                del _params[param]
            except KeyError:
                # In case the variable does not exist we just pass.
                pass

        for k, v in _params.items():
            if v is not None:
                self.params[k] = v

        self.cutoff = cutoff
        self.cutofffxn = {}

        if cutofffxn is None:
            for cutoff_key in cutoff_keys:
                self.cutofffxn[cutoff_key] = Cosine(cutoff=self.cutoff[cutoff_key])
        else:
            raise RuntimeError("This case is not implemented yet...")

[docs]    def calculate(self, images=None, purpose="training", data=None, svm=False, GP=None):
        """Calculate the features per atom in an atoms objects

        Parameters
        ----------
        image : dict
            Hashed images using the Data class.
        purpose : str
            The supported purposes are: 'training', 'inference'.
        data : obj
            data object
        svm : bool
            Whether or not these features are going to be used for kernel
            methods.

        Returns
        -------
        feature_space : dict
            A dictionary with key hash and value as a list with the following
            structure: {'hash': [('H', [vector]]}
        reference_space : dict
            A reference space useful for SVM models.
        """

        client = dask.distributed.get_client()
        logger.info(" ")
        logger.info("Featurization")
        logger.info("=============")
        now = datetime.datetime.now()
        logger.info("Module accessed on {}.".format(now.strftime("%Y-%m-%d %H:%M:%S")))
        logger.info(f"Module name: {self.name()}.")

        # FIXME the block below should become a function.
        if os.path.isfile(self.filename) and self.overwrite is False:
            logger.warning(f"Loading features from {self.filename}.")
            logger.info(" ")
            svm_keys = [b"feature_space", b"reference_space"]
            data = load(self.filename)

            data_hashes = list(data.keys())
            image_hashes = list(images.keys())

            if image_hashes == data_hashes:
                # Check if both lists are the same.
                return data
            elif any(i in image_hashes for i in data_hashes):
                # Check if any of the elem
                _data = {}
                for hash in image_hashes:
                    _data[hash] = data[hash]
                return _data

            if svm_keys == list(data.keys()):
                feature_space = data[svm_keys[0]]
                reference_space = data[svm_keys[1]]
                return feature_space, reference_space

        initial_time = time.time()

        # Verify that we know the unique element symbols
        if data.unique_element_symbols is None:
            logger.info(f"Getting unique element symbols for {purpose}")

            unique_element_symbols = data.get_unique_element_symbols(
                images, purpose=purpose
            )

            unique_element_symbols = unique_element_symbols[purpose]

            logger.info(f"Unique chemical elements: {unique_element_symbols}")

        elif isinstance(data.unique_element_symbols, dict):
            unique_element_symbols = data.unique_element_symbols[purpose]

            logger.info(f"Unique chemical elements: {unique_element_symbols}")

        # we make the features
        if GP is None:
            self.GP = self.custom.get("GP", None)
        else:
            self.GP = GP

        if self.GP is None:
            custom = self.custom.get("user_input", None)
            self.GP = self.make_symmetry_functions(
                unique_element_symbols, custom=custom, angular_type=self.angular_type
            )
            self.custom.update({"GP": self.GP})
        else:
            logger.info("Using parameters from file to create symmetry functions...\n")

        self.print_features_params(self.GP)

        symbol = data.unique_element_symbols[purpose][0]
        sample = np.zeros(len(self.GP[symbol]))

        self.dimension = len(sample)

        preprocessor = Preprocessing(self.preprocessor, purpose=purpose)
        preprocessor.set(purpose=purpose)

        # We start populating computations to get atomic features.
        logger.info("")
        logger.info("Embarrassingly parallel computation of atomic features...")

        stacked_features = []
        atoms_index_map = []  # This list is used to reconstruct images from atoms.

        if self.batch_size is None:
            self.batch_size = data.get_total_number_atoms()

        chunks = get_chunks(images, self.batch_size, svm=svm)

        ini = end = 0
        for chunk in chunks:
            images_ = OrderedDict(chunk)
            intermediate = []

            for image in images_.items():
                _, image = image
                end = ini + len(image)
                atoms_index_map.append(list(range(ini, end)))
                ini = end
                for atom in image:
                    index = atom.index
                    symbol = atom.symbol

                    cutoff_keys = ["radial", "angular"]
                    n_symbols, neighborpositions = {}, {}

                    if isinstance(self.cutoff, dict):
                        for cutoff_key in cutoff_keys:
                            nl = get_neighborlist(image, cutoff=self.cutoff[cutoff_key])
                            # n_indices: neighbor indices for central atom_i.
                            # n_offsets: neighbor offsets for central atom_i.
                            n_indices, n_offsets = nl[atom.index]

                            n_symbols_ = np.array(image.get_chemical_symbols())[
                                n_indices
                            ]
                            n_symbols[cutoff_key] = n_symbols_

                            neighborpositions_ = image.positions[n_indices] + np.dot(
                                n_offsets, image.get_cell()
                            )
                            neighborpositions[cutoff_key] = neighborpositions_
                    else:
                        for cutoff_key in cutoff_keys:
                            nl = get_neighborlist(image, cutoff=self.cutoff)
                            # n_indices: neighbor indices for central atom_i.
                            # n_offsets: neighbor offsets for central atom_i.
                            n_indices, n_offsets = nl[atom.index]

                            n_symbols_ = np.array(image.get_chemical_symbols())[
                                n_indices
                            ]
                            n_symbols[cutoff_key] = n_symbols_

                            neighborpositions_ = image.positions[n_indices] + np.dot(
                                n_offsets, image.get_cell()
                            )
                            neighborpositions[cutoff_key] = neighborpositions_

                    afp = self.get_atomic_features(
                        atom,
                        index,
                        symbol,
                        n_symbols,
                        neighborpositions,
                        image_molecule=image,
                        weighted=self.weighted,
                        n_indices=n_indices,
                    )

                    intermediate.append(afp)

            intermediate = client.persist(intermediate, scheduler=self.scheduler)
            stacked_features += intermediate
            del intermediate

        scheduler_time = time.time() - initial_time

        dask.distributed.wait(stacked_features)

        h, m, s = convert_elapsed_time(scheduler_time)
        logger.info(
            "... finished in {} hours {} minutes {:.2f}" " seconds.".format(h, m, s)
        )

        logger.info("")

        if self.preprocessor is not None:

            scaled_feature_space = []

            # To take advantage of dask_ml we need to convert our numpy array
            # into a dask array.
            logger.info("Converting features to dask array...")
            stacked_features = [
                da.from_delayed(lazy, dtype=float, shape=sample.shape)
                for lazy in stacked_features
            ]
            layout = {0: tuple(len(i) for i in atoms_index_map), 1: -1}
            # stacked_features = dask.array.stack(stacked_features, axis=0).rechunk(layout)
            stacked_features = da.stack(stacked_features, axis=0).rechunk(layout)

            logger.info(
                "Shape of array is {} and chunks {}.".format(
                    stacked_features.shape, stacked_features.chunks
                )
            )

            # Note that dask_ml by default convert the output of .fit
            # in a concrete value.
            if purpose == "training":
                stacked_features = preprocessor.fit(
                    stacked_features, scheduler=self.scheduler
                )
            else:
                stacked_features = preprocessor.transform(stacked_features)

            atoms_index_map = [client.scatter(indices) for indices in atoms_index_map]
            # stacked_features = [client.scatter(features) for features in stacked_features]
            stacked_features = client.scatter(stacked_features, broadcast=True)

            logger.info("Stacking features using atoms index map...")

            for indices in atoms_index_map:
                features = client.submit(
                    self.stack_features, *(indices, stacked_features)
                )

                # features = self.stack_features(indices, stacked_features)

                scaled_feature_space.append(features)

        else:
            scaled_feature_space = []
            atoms_index_map = [client.scatter(chunk) for chunk in atoms_index_map]
            stacked_features = client.scatter(stacked_features, broadcast=True)

            for indices in atoms_index_map:
                features = client.submit(
                    self.stack_features, *(indices, stacked_features)
                )
                scaled_feature_space.append(features)

            scaled_feature_space = client.gather(scaled_feature_space)

        # Clean
        del stacked_features

        # Restack images
        feature_space = []

        if svm and purpose == "training":
            logger.info("Building array with reference space.")
            reference_space = []

            for i, image in enumerate(images.items()):
                restacked = client.submit(
                    self.restack_image, *(i, image, scaled_feature_space, svm)
                )

                # image = (hash, ase_image) -> tuple
                for atom in image[1]:
                    restacked_atom = client.submit(
                        self.restack_atom, *(i, atom, scaled_feature_space)
                    )
                    reference_space.append(restacked_atom)

                feature_space.append(restacked)

            reference_space = client.gather(reference_space)

        elif svm is False and purpose == "training":
            for i, image in enumerate(images.items()):
                restacked = client.submit(
                    self.restack_image, *(i, image, scaled_feature_space, svm)
                )
                feature_space.append(restacked)

        else:
            try:
                for i, image in enumerate(images.items()):
                    restacked = client.submit(
                        self.restack_image, *(i, image, scaled_feature_space, svm)
                    )
                    feature_space.append(restacked)

            except UnboundLocalError:
                # scaled_feature_space does not exist.
                for i, image in enumerate(images.items()):
                    restacked = client.submit(
                        self.restack_image, *(i, image, feature_space, svm)
                    )
                    feature_space.append(restacked)

        feature_space = client.gather(feature_space)
        feature_space = OrderedDict(feature_space)

        fp_time = time.time() - initial_time

        h, m, s = convert_elapsed_time(fp_time)

        logger.info(
            "Featurization finished in {} hours {} minutes {:.2f}"
            " seconds.".format(h, m, s)
        )

        if svm and purpose == "training":
            client.restart()  # Reclaims memory aggressively
            preprocessor.save_to_file(preprocessor, self.save_preprocessor)

            if self.filename is not None:
                logger.info(f"features saved to {self.filename}.")
                data = {"feature_space": feature_space}
                data.update({"reference_space": reference_space})
                dump(data, filename=self.filename)
                self.feature_space = feature_space
                self.reference_space = reference_space

            return self.feature_space, self.reference_space

        elif svm is False and purpose == "training":
            client.restart()  # Reclaims memory aggressively
            preprocessor.save_to_file(preprocessor, self.save_preprocessor)

            if self.filename is not None:
                logger.info(f"features saved to {self.filename}.")
                dump(feature_space, filename=self.filename)
                self.feature_space = feature_space

            return self.feature_space
        else:
            self.feature_space = feature_space
            return self.feature_space

[docs]    def to_pandas(self):
        """Convert features to pandas DataFrame"""
        return pd.DataFrame.from_dict(self.feature_space, orient="index")

[docs]    def stack_features(self, indices, stacked_features):
        """Stack features """

        features = []
        for index in indices:
            features.append(stacked_features[index])

        return features

    @dask.delayed
    def get_atomic_features(
        self,
        atom,
        index,
        symbol,
        n_symbols,
        neighborpositions,
        n_indices=None,
        image_molecule=None,
        weighted=False,
    ):
        """Delayed class method to compute atomic features

        Parameters
        ----------
        atom : object
            An ASE atom object.
        image : ase object, list
            List of atoms in an image.
        index : int
            Index of atom in atoms object.
        symbol : str
            Chemical symbol of atom in atoms object.
        n_symbols : ndarray of str
            Array of neighbors' symbols.
        neighborpositions : ndarray of float
            Array of Cartesian atomic positions.
        image_molecule : ase object, list
            List of atoms in an image.
        weighted : bool
            True if applying weighted feature of Gaussian function. See Ref.
            2.
        """
        cutoff_keys = ["radial", "angular"]
        num_symmetries = len(self.GP[symbol])
        Ri = atom.position
        features = [None] * num_symmetries

        # See https://listserv.brown.edu/cgi-bin/wa?A2=ind1904&L=AMP-USERS&P=19048
        # n_numbers = [atomic_numbers[symbol] for symbol in n_symbols]
        n_numbers = [
            atomic_numbers[item]
            for cutoff_key in cutoff_keys
            for item in n_symbols[cutoff_key]
        ]

        for count in range(num_symmetries):
            GP = self.GP[symbol][count]

            if GP["type"] == "G2":
                feature = calculate_G2(
                    n_numbers,
                    n_symbols["radial"],
                    neighborpositions["radial"],
                    GP["symbol"],
                    GP["eta"],
                    self.cutoff["radial"],
                    self.cutofffxn["radial"],
                    Ri,
                    image_molecule=image_molecule,
                    n_indices=n_indices,
                    normalized=self.normalized,
                    weighted=weighted,
                )
            elif GP["type"] == "G3":
                feature = calculate_G3(
                    n_numbers,
                    n_symbols["angular"],
                    neighborpositions["angular"],
                    GP["symbols"],
                    GP["gamma"],
                    GP["zeta"],
                    GP["eta"],
                    self.cutoff["angular"],
                    self.cutofffxn["angular"],
                    Ri,
                    normalized=self.normalized,
                    image_molecule=image_molecule,
                    n_indices=n_indices,
                    weighted=weighted,
                )
            elif GP["type"] == "G4":
                feature = calculate_G4(
                    n_numbers,
                    n_symbols,
                    neighborpositions,
                    GP["symbols"],
                    GP["gamma"],
                    GP["zeta"],
                    GP["eta"],
                    self.cutoff["angular"],
                    self.cutofffxn["angular"],
                    Ri,
                    normalized=self.normalized,
                    image_molecule=image_molecule,
                    n_indices=n_indices,
                    weighted=weighted,
                )
            else:
                raise NotImplementedError(
                    "The requested symmetry function is not implemented yet..."
                )
            features[count] = feature

        return np.array(features)

[docs]    def make_symmetry_functions(self, symbols, custom=None, angular_type="G3"):
        """Function to make symmetry functions

        This method needs at least unique symbols and defaults set to true.
        Parameters

        ----------
        symbols : list
            List of strings with chemical symbols to create symmetry functions.
            >>> symbols = ['H', 'O']

        custom : dict, opt
            Create custom symmetry functions, and override defaults. Default is
            None. The structure of the dictionary is as follows:

            >>> custom = {'G2': {'etas': etas},
                          'G3': {'etas': a_etas, 'zetas': zetas, 'gammas': gammas}}

        angular_type : str
            Compute "G3" or "G4" angular symmetry functions.

        Return
        ------
        GP : dict
            Symmetry function parameters.
        """

        GP = {}

        if custom is None:
            logger.warning("Making default symmetry functions...")

            for symbol in symbols:
                # Radial
                etas = np.logspace(np.log10(0.05), np.log10(5.0), num=4)
                _GP = self.get_symmetry_functions(type="G2", etas=etas, symbols=symbols)

                # Angular
                etas = [0.005]
                zetas = [1.0, 4.0]
                gammas = [1.0, -1.0]
                _GP += self.get_symmetry_functions(
                    type=angular_type,
                    symbols=symbols,
                    etas=etas,
                    zetas=zetas,
                    gammas=gammas,
                )

                GP[symbol] = _GP
        else:
            logger.warning("Making custom symmetry functions...")
            types = sorted(custom.keys())

            for symbol in symbols:
                _GP = []
                for type_ in types:
                    if type_.upper() == "G2":
                        keys = ["Rs"]

                        kwargs = {}
                        for key in keys:
                            val = custom[type_].get(key, None)
                            if val is not None:
                                kwargs[key] = val

                        _GP += self.get_symmetry_functions(
                            type=type_,
                            etas=custom[type_]["etas"],
                            symbols=symbols,
                            **kwargs,
                        )
                    else:
                        keys = ["gammas", "Rs_a", "thetas"]

                        kwargs = {}
                        for key in keys:
                            val = custom[type_].get(key, None)
                            if val is not None:
                                kwargs[key] = val

                        _GP += self.get_symmetry_functions(
                            type=type_,
                            symbols=symbols,
                            etas=custom[type_]["etas"],
                            zetas=custom[type_]["zetas"],
                            **kwargs,
                        )
                GP[symbol] = _GP

        return GP

[docs]    def get_symmetry_functions(self, type, symbols, etas=None, zetas=None, gammas=None):
        """Get requested symmetry functions

        Parameters
        ----------
        type : str
            The desired symmetry function: 'G2', 'G3', or 'G4'.
        symbols : list
            List of chemical symbols.
        etas : list
            List of etas to build the Gaussian function.
        zetas : list
            List of zetas to build the Gaussian function.
        gammas : list
            List of gammas to build the Gaussian function.
        """

        supported_angular_symmetry_functions = ["G3", "G4"]

        if type == "G2":
            GP = [
                {"type": "G2", "symbol": symbol, "eta": eta}
                for eta in etas
                for symbol in symbols
            ]
            return GP

        elif type in supported_angular_symmetry_functions:
            GP = []
            for eta in etas:
                for zeta in zetas:
                    for gamma in gammas:
                        for idx1, sym1 in enumerate(symbols):
                            for sym2 in symbols[idx1:]:
                                pairs = sorted([sym1, sym2])
                                GP.append(
                                    {
                                        "type": type,
                                        "symbols": pairs,
                                        "eta": eta,
                                        "gamma": gamma,
                                        "zeta": zeta,
                                    }
                                )
            return GP
        else:
            raise RuntimeError(
                "The requested type of symmetry function is not supported."
            )

[docs]    def print_features_params(self, GP):
        """Print features parameters"""

        logger.info("Number of features per chemical element:")
        for symbol, v in GP.items():
            logger.info("    - {}: {}.".format(symbol, len(v)))

        _symbols = []
        for symbol, value in GP.items():
            logger.info(" ")
            logger.info(f"Symmetry function parameters for {symbol} atom:")
            underline = "---------------------------------------"

            for char in range(len(symbol)):
                underline += "-"

            logger.info(underline)
            logging.info(
                "{:^5} {:^12} {:4.4} {}".format("#", "Symbol", "Type", "Parameters")
            )

            if symbol not in _symbols:
                _symbols.append(symbol)
                for i, v in enumerate(value):
                    type_ = v["type"]
                    eta = v["eta"]
                    if type_ == "G2":
                        symbol = v["symbol"]
                        params = "{:^5} {:12.2} {:^4.4} eta: {:.4f}".format(
                            i, symbol, type_, eta
                        )
                    else:
                        symbol = str(v["symbols"])[1:-1].replace("'", "")
                        gamma = v["gamma"]
                        zeta = v["zeta"]
                        params = (
                            "{:^5} {:12} {:^4.5} eta: {:.4f} "
                            "gamma: {:7.4f} zeta: {:.4f}".format(
                                i, symbol, type_, eta, gamma, zeta
                            )
                        )

                    logging.info(params)


[docs]def calculate_G2(
    n_numbers,
    neighborsymbols,
    neighborpositions,
    center_symbol,
    eta,
    cutoff,
    cutofffxn,
    Ri,
    image_molecule=None,
    n_indices=None,
    normalized=True,
    weighted=False,
):
    """Calculate G2 symmetry function.

    These correspond to 2 body, or radial interactions.

    Parameters
    ----------
    n_symbols : list of int
        List of neighbors' chemical numbers.
    neighborsymbols : list of str
        List of symbols of all neighbor atoms.
    neighborpositions : list of list of floats
        List of Cartesian atomic positions.
    center_symbol : str
        Chemical symbol of the center atom.
    eta : float
        Parameter of Gaussian symmetry functions.
    cutoff : float
        Cutoff radius.
    cutofffxn : object
        Cutoff function.
    Ri : list
        Position of the center atom. Should be fed as a list of three floats.
    normalized : bool
        Whether or not the symmetry function is normalized.
    image_molecule : ase object, list
        List of atoms in an image.
    n_indices : list
        List of indices of neighboring atoms from the image object.
    weighted : bool
        True if applying weighted feature of Gaussian function. See Ref. 2.


    Returns
    -------
    feature : float
        Radial feature.
    """
    feature = 0.0
    num_neighbors = len(neighborpositions)

    # Are we normalizing the feature?
    if normalized:
        Rc = cutoff
    else:
        Rc = 1.0
    for count in range(num_neighbors):
        symbol = neighborsymbols[count]
        Rj = neighborpositions[count]

        if symbol == center_symbol:
            Rij = np.linalg.norm(Rj - Ri)

            feature += np.exp(-eta * (Rij ** 2.0) / (Rc ** 2.0)) * cutofffxn(Rij)

            if weighted:
                weighted_atom = image_molecule[n_indices[count]].number
                feature *= weighted_atom

    return feature


[docs]def calculate_G3(
    n_numbers,
    neighborsymbols,
    neighborpositions,
    G_elements,
    gamma,
    zeta,
    eta,
    cutoff,
    cutofffxn,
    Ri,
    normalized=True,
    image_molecule=None,
    n_indices=None,
    weighted=False,
):
    """Calculate G3 symmetry function.

    These are 3 body or angular interactions.

    Parameters
    ----------
    n_symbols : list of int
        List of neighbors' chemical numbers.
    neighborsymbols : list of str
        List of symbols of neighboring atoms.
    neighborpositions : list of list of floats
        List of Cartesian atomic positions of neighboring atoms.
    G_elements : list of str
        A list of two members, each member is the chemical species of one of
        the neighboring atoms forming the triangle with the center atom.
    gamma : float
        Parameter of Gaussian symmetry functions.
    zeta : float
        Parameter of Gaussian symmetry functions.
    eta : float
        Parameter of Gaussian symmetry functions.
    cutoff : float
        Cutoff radius.
    cutofffxn : object
        Cutoff function.
    Ri : list
        Position of the center atom. Should be fed as a list of three floats.
    normalized : bool
        Whether or not the symmetry function is normalized.
    image_molecule : ase object, list
        List of atoms in an image.
    n_indices : list
        List of indices of neighboring atoms from the image object.
    weighted : bool
        True if applying weighted feature of Gaussian function. See Ref. 2.

    Returns
    -------
    feature : float
        G3 feature value.
    """
    feature = 0.0
    counts = range(len(neighborpositions))

    # Are we normalizing the feature?
    if normalized:
        Rc = cutoff
    else:
        Rc = 1.0

    for j in counts:
        for k in counts[(j + 1) :]:
            els = sorted([neighborsymbols[j], neighborsymbols[k]])
            if els != G_elements:
                continue

            Rij_vector = neighborpositions[j] - Ri
            Rij = np.linalg.norm(Rij_vector)
            Rik_vector = neighborpositions[k] - Ri
            Rik = np.linalg.norm(Rik_vector)
            Rjk_vector = neighborpositions[k] - neighborpositions[j]
            Rjk = np.linalg.norm(Rjk_vector)
            cos_theta_ijk = np.dot(Rij_vector, Rik_vector) / Rij / Rik
            term = (1.0 + gamma * cos_theta_ijk) ** zeta
            term *= np.exp(-eta * (Rij ** 2.0 + Rik ** 2.0 + Rjk ** 2.0) / (Rc ** 2.0))
            if weighted:
                term *= weighted_h(image_molecule, n_indices)
            term *= cutofffxn(Rij)
            term *= cutofffxn(Rik)
            term *= cutofffxn(Rjk)
            feature += term
    feature *= 2.0 ** (1.0 - zeta)
    return feature


[docs]def calculate_G4(
    n_numbers,
    neighborsymbols,
    neighborpositions,
    G_elements,
    gamma,
    zeta,
    eta,
    cutoff,
    cutofffxn,
    Ri,
    normalized=True,
    image_molecule=None,
    n_indices=None,
    weighted=False,
):
    """Calculate G4 symmetry function.

    These are 3 body or angular interactions.

    Parameters
    ----------
    n_symbols : list of int
        List of neighbors' chemical numbers.
    neighborsymbols : list of str
        List of symbols of neighboring atoms.
    neighborpositions : list of list of floats
        List of Cartesian atomic positions of neighboring atoms.
    G_elements : list of str
        A list of two members, each member is the chemical species of one of
        the neighboring atoms forming the triangle with the center atom.
    gamma : float
        Parameter of Gaussian symmetry functions.
    zeta : float
        Parameter of Gaussian symmetry functions.
    eta : float
        Parameter of Gaussian symmetry functions.
    cutoff : float
        Cutoff radius.
    cutofffxn : object
        Cutoff function.
    Ri : list
        Position of the center atom. Should be fed as a list of three floats.
    normalized : bool
        Whether or not the symmetry function is normalized.
    image_molecule : ase object, list
        List of atoms in an image.
    n_indices : list
        List of indices of neighboring atoms from the image object.
    weighted : bool
        True if applying weighted feature of Gaussian function. See Ref. 2.

    Returns
    -------
    feature : float
        G4 feature value.

    Notes
    -----
    The difference between the calculate_G3 and the calculate_G4 function is
    that calculate_G4 accounts for bond angles of 180 degrees.
    """
    feature = 0.0
    counts = range(len(neighborpositions))
    # Are we normalizing the feature?
    if normalized:
        Rc = cutoff
    else:
        Rc = 1.0
    for j in counts:
        for k in counts[(j + 1) :]:
            els = sorted([neighborsymbols[j], neighborsymbols[k]])
            if els != G_elements:
                continue

            Rij_vector = neighborpositions[j] - Ri
            Rij = np.linalg.norm(Rij_vector)
            Rik_vector = neighborpositions[k] - Ri
            Rik = np.linalg.norm(Rik_vector)
            cos_theta_ijk = np.dot(Rij_vector, Rik_vector) / Rij / Rik
            term = (1.0 + gamma * cos_theta_ijk) ** zeta
            term *= np.exp(-eta * (Rij ** 2.0 + Rik ** 2.0) / (Rc ** 2.0))

            if weighted:
                term *= weighted_h(image_molecule, n_indices)

            term *= cutofffxn(Rij)
            term *= cutofffxn(Rik)
            feature += term
    feature *= 2.0 ** (1.0 - zeta)
    return feature


[docs]def weighted_h(image_molecule, n_indices):
    """ Calculate the atomic numbers of neighboring atoms for a molecule,
    then multiplies each neighor atomic number by each other.

    Parameters
    ----------
    image_molecule : ase object, list
        List of atoms in an image.
    n_indices : list
        List of indices of neighboring atoms from the image object.
    """
    atomic_numbers = 1.0

    for i in n_indices:
        atomic_numbers *= image_molecule[i].number

    return atomic_numbers