import dask
import datetime
import logging
import os
import time
import dask.array as da
import numpy as np
import pandas as pd
from ase.data import atomic_numbers
from collections import OrderedDict
from ml4chem.atomistic.features.cutoff import Cosine
from ml4chem.atomistic.features.base import AtomisticFeatures
from ml4chem.data.serialization import dump, load
from ml4chem.data.preprocessing import Preprocessing
from ml4chem.utils import get_chunks, get_neighborlist, convert_elapsed_time
logger = logging.getLogger()
[docs]class Gaussian(AtomisticFeatures):
"""Behler-Parrinello symmetry functions
This class builds local chemical environments for atoms based on the
Behler-Parrinello Gaussian type symmetry functions. It is modular enough
that can be used just for creating feature spaces.
Parameters
----------
cutoff : float
Cutoff radius used for computing features.
cutofffxn : object
A Cutoff function object.
normalized : bool
Set it to true if the features are being normalized with respect to the
cutoff radius.
preprocessor : str
Use some scaling method to preprocess the data. Default MinMaxScaler.
custom : dict, opt
Create custom symmetry functions, and override defaults. Default is
None. The structure of the dictionary is as follows:
>>> custom = {'G2': {'etas': etas},
'G3': {'etas': a_etas, 'zetas': zetas, 'gammas': gammas}}
save_preprocessor : str
Save preprocessor to file.
scheduler : str
The scheduler to be used with the dask backend.
filename : str
Path to save database. Note that if the filename exists, the features
will be loaded without being recomputed.
overwrite : bool
If overwrite is set to True, ml4chem will not try to load existing
databases. Default is True.
angular_type : str
Compute "G3" or "G4" angular symmetry functions.
weighted : bool
True if applying weighted feature of Gaussian function. See Ref. 2.
batch_size : int
Number of data points per batch to use for training. Default is None.
References
----------
1. Behler, J. Atom-centered symmetry functions for constructing
high-dimensional neural network potentials. J. Chem. Phys. 134, 074106
(2011).
2. Gastegger, M., Schwiedrzik, L., Bittermann, M., Berzsenyi, F. &
Marquetand, P. wACSF—Weighted atom-centered symmetry functions as
descriptors in machine learning potentials. J. Chem. Phys. 148, 241709
(2018).
"""
NAME = "Gaussian"
[docs] @classmethod
def name(cls):
"""Returns name of class"""
return cls.NAME
def __init__(
self,
cutoff=6.5,
cutofffxn=None,
normalized=True,
preprocessor=("MinMaxScaler", None),
custom=None,
save_preprocessor="ml4chem",
scheduler="distributed",
filename="features.db",
overwrite=True,
angular_type="G3",
weighted=False,
batch_size=None,
):
super(Gaussian, self).__init__()
cutoff_keys = ["radial", "angular"]
if isinstance(cutoff, (int, float)):
cutoff = {cutoff_key: cutoff for cutoff_key in cutoff_keys}
self.normalized = normalized
self.filename = filename
self.scheduler = scheduler
self.preprocessor = preprocessor
self.save_preprocessor = save_preprocessor
self.overwrite = overwrite
self.angular_type = angular_type.upper()
self.weighted = weighted
self.batch_size = batch_size
# Let's add parameters that are going to be stored in the .params json
# file.
self.params = OrderedDict()
self.params["name"] = self.name()
# We verify that values of parameters are list otherwise they cannot be
# serialized by json.
# These keys are very likely to exist when doing inference
keys = ["user_input", "GP"]
if custom is None:
custom = {key: custom for key in keys}
elif (
custom is not None and len(list(set(keys).intersection(custom.keys()))) == 0
):
for value in custom.values():
for k, v in value.items():
if isinstance(v, list) is False:
value[k] = v.tolist()
custom = {"user_input": custom}
self.custom = custom
# This is a very general way of not forgetting to save variables
_params = vars()
# Delete useless variables
delete = [
"self",
"scheduler",
"overwrite",
"k",
"v",
"value",
"keys",
"batch_size",
"__class__",
"cutoff_keys",
]
for param in delete:
try:
del _params[param]
except KeyError:
# In case the variable does not exist we just pass.
pass
for k, v in _params.items():
if v is not None:
self.params[k] = v
self.cutoff = cutoff
self.cutofffxn = {}
if cutofffxn is None:
for cutoff_key in cutoff_keys:
self.cutofffxn[cutoff_key] = Cosine(cutoff=self.cutoff[cutoff_key])
else:
raise RuntimeError("This case is not implemented yet...")
[docs] def calculate(self, images=None, purpose="training", data=None, svm=False, GP=None):
"""Calculate the features per atom in an atoms objects
Parameters
----------
image : dict
Hashed images using the Data class.
purpose : str
The supported purposes are: 'training', 'inference'.
data : obj
data object
svm : bool
Whether or not these features are going to be used for kernel
methods.
Returns
-------
feature_space : dict
A dictionary with key hash and value as a list with the following
structure: {'hash': [('H', [vector]]}
reference_space : dict
A reference space useful for SVM models.
"""
client = dask.distributed.get_client()
logger.info(" ")
logger.info("Featurization")
logger.info("=============")
now = datetime.datetime.now()
logger.info("Module accessed on {}.".format(now.strftime("%Y-%m-%d %H:%M:%S")))
logger.info(f"Module name: {self.name()}.")
# FIXME the block below should become a function.
if os.path.isfile(self.filename) and self.overwrite is False:
logger.warning(f"Loading features from {self.filename}.")
logger.info(" ")
svm_keys = [b"feature_space", b"reference_space"]
data = load(self.filename)
data_hashes = list(data.keys())
image_hashes = list(images.keys())
if image_hashes == data_hashes:
# Check if both lists are the same.
return data
elif any(i in image_hashes for i in data_hashes):
# Check if any of the elem
_data = {}
for hash in image_hashes:
_data[hash] = data[hash]
return _data
if svm_keys == list(data.keys()):
feature_space = data[svm_keys[0]]
reference_space = data[svm_keys[1]]
return feature_space, reference_space
initial_time = time.time()
# Verify that we know the unique element symbols
if data.unique_element_symbols is None:
logger.info(f"Getting unique element symbols for {purpose}")
unique_element_symbols = data.get_unique_element_symbols(
images, purpose=purpose
)
unique_element_symbols = unique_element_symbols[purpose]
logger.info(f"Unique chemical elements: {unique_element_symbols}")
elif isinstance(data.unique_element_symbols, dict):
unique_element_symbols = data.unique_element_symbols[purpose]
logger.info(f"Unique chemical elements: {unique_element_symbols}")
# we make the features
if GP is None:
self.GP = self.custom.get("GP", None)
else:
self.GP = GP
if self.GP is None:
custom = self.custom.get("user_input", None)
self.GP = self.make_symmetry_functions(
unique_element_symbols, custom=custom, angular_type=self.angular_type
)
self.custom.update({"GP": self.GP})
else:
logger.info("Using parameters from file to create symmetry functions...\n")
self.print_features_params(self.GP)
symbol = data.unique_element_symbols[purpose][0]
sample = np.zeros(len(self.GP[symbol]))
self.dimension = len(sample)
preprocessor = Preprocessing(self.preprocessor, purpose=purpose)
preprocessor.set(purpose=purpose)
# We start populating computations to get atomic features.
logger.info("")
logger.info("Embarrassingly parallel computation of atomic features...")
stacked_features = []
atoms_index_map = [] # This list is used to reconstruct images from atoms.
if self.batch_size is None:
self.batch_size = data.get_total_number_atoms()
chunks = get_chunks(images, self.batch_size, svm=svm)
ini = end = 0
for chunk in chunks:
images_ = OrderedDict(chunk)
intermediate = []
for image in images_.items():
_, image = image
end = ini + len(image)
atoms_index_map.append(list(range(ini, end)))
ini = end
for atom in image:
index = atom.index
symbol = atom.symbol
cutoff_keys = ["radial", "angular"]
n_symbols, neighborpositions = {}, {}
if isinstance(self.cutoff, dict):
for cutoff_key in cutoff_keys:
nl = get_neighborlist(image, cutoff=self.cutoff[cutoff_key])
# n_indices: neighbor indices for central atom_i.
# n_offsets: neighbor offsets for central atom_i.
n_indices, n_offsets = nl[atom.index]
n_symbols_ = np.array(image.get_chemical_symbols())[
n_indices
]
n_symbols[cutoff_key] = n_symbols_
neighborpositions_ = image.positions[n_indices] + np.dot(
n_offsets, image.get_cell()
)
neighborpositions[cutoff_key] = neighborpositions_
else:
for cutoff_key in cutoff_keys:
nl = get_neighborlist(image, cutoff=self.cutoff)
# n_indices: neighbor indices for central atom_i.
# n_offsets: neighbor offsets for central atom_i.
n_indices, n_offsets = nl[atom.index]
n_symbols_ = np.array(image.get_chemical_symbols())[
n_indices
]
n_symbols[cutoff_key] = n_symbols_
neighborpositions_ = image.positions[n_indices] + np.dot(
n_offsets, image.get_cell()
)
neighborpositions[cutoff_key] = neighborpositions_
afp = self.get_atomic_features(
atom,
index,
symbol,
n_symbols,
neighborpositions,
image_molecule=image,
weighted=self.weighted,
n_indices=n_indices,
)
intermediate.append(afp)
intermediate = client.persist(intermediate, scheduler=self.scheduler)
stacked_features += intermediate
del intermediate
scheduler_time = time.time() - initial_time
dask.distributed.wait(stacked_features)
h, m, s = convert_elapsed_time(scheduler_time)
logger.info(
"... finished in {} hours {} minutes {:.2f}" " seconds.".format(h, m, s)
)
logger.info("")
if self.preprocessor is not None:
scaled_feature_space = []
# To take advantage of dask_ml we need to convert our numpy array
# into a dask array.
logger.info("Converting features to dask array...")
stacked_features = [
da.from_delayed(lazy, dtype=float, shape=sample.shape)
for lazy in stacked_features
]
layout = {0: tuple(len(i) for i in atoms_index_map), 1: -1}
# stacked_features = dask.array.stack(stacked_features, axis=0).rechunk(layout)
stacked_features = da.stack(stacked_features, axis=0).rechunk(layout)
logger.info(
"Shape of array is {} and chunks {}.".format(
stacked_features.shape, stacked_features.chunks
)
)
# Note that dask_ml by default convert the output of .fit
# in a concrete value.
if purpose == "training":
stacked_features = preprocessor.fit(
stacked_features, scheduler=self.scheduler
)
else:
stacked_features = preprocessor.transform(stacked_features)
atoms_index_map = [client.scatter(indices) for indices in atoms_index_map]
# stacked_features = [client.scatter(features) for features in stacked_features]
stacked_features = client.scatter(stacked_features, broadcast=True)
logger.info("Stacking features using atoms index map...")
for indices in atoms_index_map:
features = client.submit(
self.stack_features, *(indices, stacked_features)
)
# features = self.stack_features(indices, stacked_features)
scaled_feature_space.append(features)
else:
scaled_feature_space = []
atoms_index_map = [client.scatter(chunk) for chunk in atoms_index_map]
stacked_features = client.scatter(stacked_features, broadcast=True)
for indices in atoms_index_map:
features = client.submit(
self.stack_features, *(indices, stacked_features)
)
scaled_feature_space.append(features)
scaled_feature_space = client.gather(scaled_feature_space)
# Clean
del stacked_features
# Restack images
feature_space = []
if svm and purpose == "training":
logger.info("Building array with reference space.")
reference_space = []
for i, image in enumerate(images.items()):
restacked = client.submit(
self.restack_image, *(i, image, scaled_feature_space, svm)
)
# image = (hash, ase_image) -> tuple
for atom in image[1]:
restacked_atom = client.submit(
self.restack_atom, *(i, atom, scaled_feature_space)
)
reference_space.append(restacked_atom)
feature_space.append(restacked)
reference_space = client.gather(reference_space)
elif svm is False and purpose == "training":
for i, image in enumerate(images.items()):
restacked = client.submit(
self.restack_image, *(i, image, scaled_feature_space, svm)
)
feature_space.append(restacked)
else:
try:
for i, image in enumerate(images.items()):
restacked = client.submit(
self.restack_image, *(i, image, scaled_feature_space, svm)
)
feature_space.append(restacked)
except UnboundLocalError:
# scaled_feature_space does not exist.
for i, image in enumerate(images.items()):
restacked = client.submit(
self.restack_image, *(i, image, feature_space, svm)
)
feature_space.append(restacked)
feature_space = client.gather(feature_space)
feature_space = OrderedDict(feature_space)
fp_time = time.time() - initial_time
h, m, s = convert_elapsed_time(fp_time)
logger.info(
"Featurization finished in {} hours {} minutes {:.2f}"
" seconds.".format(h, m, s)
)
if svm and purpose == "training":
client.restart() # Reclaims memory aggressively
preprocessor.save_to_file(preprocessor, self.save_preprocessor)
if self.filename is not None:
logger.info(f"features saved to {self.filename}.")
data = {"feature_space": feature_space}
data.update({"reference_space": reference_space})
dump(data, filename=self.filename)
self.feature_space = feature_space
self.reference_space = reference_space
return self.feature_space, self.reference_space
elif svm is False and purpose == "training":
client.restart() # Reclaims memory aggressively
preprocessor.save_to_file(preprocessor, self.save_preprocessor)
if self.filename is not None:
logger.info(f"features saved to {self.filename}.")
dump(feature_space, filename=self.filename)
self.feature_space = feature_space
return self.feature_space
else:
self.feature_space = feature_space
return self.feature_space
[docs] def to_pandas(self):
"""Convert features to pandas DataFrame"""
return pd.DataFrame.from_dict(self.feature_space, orient="index")
[docs] def stack_features(self, indices, stacked_features):
"""Stack features """
features = []
for index in indices:
features.append(stacked_features[index])
return features
@dask.delayed
def get_atomic_features(
self,
atom,
index,
symbol,
n_symbols,
neighborpositions,
n_indices=None,
image_molecule=None,
weighted=False,
):
"""Delayed class method to compute atomic features
Parameters
----------
atom : object
An ASE atom object.
image : ase object, list
List of atoms in an image.
index : int
Index of atom in atoms object.
symbol : str
Chemical symbol of atom in atoms object.
n_symbols : ndarray of str
Array of neighbors' symbols.
neighborpositions : ndarray of float
Array of Cartesian atomic positions.
image_molecule : ase object, list
List of atoms in an image.
weighted : bool
True if applying weighted feature of Gaussian function. See Ref.
2.
"""
cutoff_keys = ["radial", "angular"]
num_symmetries = len(self.GP[symbol])
Ri = atom.position
features = [None] * num_symmetries
# See https://listserv.brown.edu/cgi-bin/wa?A2=ind1904&L=AMP-USERS&P=19048
# n_numbers = [atomic_numbers[symbol] for symbol in n_symbols]
n_numbers = [
atomic_numbers[item]
for cutoff_key in cutoff_keys
for item in n_symbols[cutoff_key]
]
for count in range(num_symmetries):
GP = self.GP[symbol][count]
if GP["type"] == "G2":
feature = calculate_G2(
n_numbers,
n_symbols["radial"],
neighborpositions["radial"],
GP["symbol"],
GP["eta"],
self.cutoff["radial"],
self.cutofffxn["radial"],
Ri,
image_molecule=image_molecule,
n_indices=n_indices,
normalized=self.normalized,
weighted=weighted,
)
elif GP["type"] == "G3":
feature = calculate_G3(
n_numbers,
n_symbols["angular"],
neighborpositions["angular"],
GP["symbols"],
GP["gamma"],
GP["zeta"],
GP["eta"],
self.cutoff["angular"],
self.cutofffxn["angular"],
Ri,
normalized=self.normalized,
image_molecule=image_molecule,
n_indices=n_indices,
weighted=weighted,
)
elif GP["type"] == "G4":
feature = calculate_G4(
n_numbers,
n_symbols,
neighborpositions,
GP["symbols"],
GP["gamma"],
GP["zeta"],
GP["eta"],
self.cutoff["angular"],
self.cutofffxn["angular"],
Ri,
normalized=self.normalized,
image_molecule=image_molecule,
n_indices=n_indices,
weighted=weighted,
)
else:
raise NotImplementedError(
"The requested symmetry function is not implemented yet..."
)
features[count] = feature
return np.array(features)
[docs] def make_symmetry_functions(self, symbols, custom=None, angular_type="G3"):
"""Function to make symmetry functions
This method needs at least unique symbols and defaults set to true.
Parameters
----------
symbols : list
List of strings with chemical symbols to create symmetry functions.
>>> symbols = ['H', 'O']
custom : dict, opt
Create custom symmetry functions, and override defaults. Default is
None. The structure of the dictionary is as follows:
>>> custom = {'G2': {'etas': etas},
'G3': {'etas': a_etas, 'zetas': zetas, 'gammas': gammas}}
angular_type : str
Compute "G3" or "G4" angular symmetry functions.
Return
------
GP : dict
Symmetry function parameters.
"""
GP = {}
if custom is None:
logger.warning("Making default symmetry functions...")
for symbol in symbols:
# Radial
etas = np.logspace(np.log10(0.05), np.log10(5.0), num=4)
_GP = self.get_symmetry_functions(type="G2", etas=etas, symbols=symbols)
# Angular
etas = [0.005]
zetas = [1.0, 4.0]
gammas = [1.0, -1.0]
_GP += self.get_symmetry_functions(
type=angular_type,
symbols=symbols,
etas=etas,
zetas=zetas,
gammas=gammas,
)
GP[symbol] = _GP
else:
logger.warning("Making custom symmetry functions...")
types = sorted(custom.keys())
for symbol in symbols:
_GP = []
for type_ in types:
if type_.upper() == "G2":
keys = ["Rs"]
kwargs = {}
for key in keys:
val = custom[type_].get(key, None)
if val is not None:
kwargs[key] = val
_GP += self.get_symmetry_functions(
type=type_,
etas=custom[type_]["etas"],
symbols=symbols,
**kwargs,
)
else:
keys = ["gammas", "Rs_a", "thetas"]
kwargs = {}
for key in keys:
val = custom[type_].get(key, None)
if val is not None:
kwargs[key] = val
_GP += self.get_symmetry_functions(
type=type_,
symbols=symbols,
etas=custom[type_]["etas"],
zetas=custom[type_]["zetas"],
**kwargs,
)
GP[symbol] = _GP
return GP
[docs] def get_symmetry_functions(self, type, symbols, etas=None, zetas=None, gammas=None):
"""Get requested symmetry functions
Parameters
----------
type : str
The desired symmetry function: 'G2', 'G3', or 'G4'.
symbols : list
List of chemical symbols.
etas : list
List of etas to build the Gaussian function.
zetas : list
List of zetas to build the Gaussian function.
gammas : list
List of gammas to build the Gaussian function.
"""
supported_angular_symmetry_functions = ["G3", "G4"]
if type == "G2":
GP = [
{"type": "G2", "symbol": symbol, "eta": eta}
for eta in etas
for symbol in symbols
]
return GP
elif type in supported_angular_symmetry_functions:
GP = []
for eta in etas:
for zeta in zetas:
for gamma in gammas:
for idx1, sym1 in enumerate(symbols):
for sym2 in symbols[idx1:]:
pairs = sorted([sym1, sym2])
GP.append(
{
"type": type,
"symbols": pairs,
"eta": eta,
"gamma": gamma,
"zeta": zeta,
}
)
return GP
else:
raise RuntimeError(
"The requested type of symmetry function is not supported."
)
[docs] def print_features_params(self, GP):
"""Print features parameters"""
logger.info("Number of features per chemical element:")
for symbol, v in GP.items():
logger.info(" - {}: {}.".format(symbol, len(v)))
_symbols = []
for symbol, value in GP.items():
logger.info(" ")
logger.info(f"Symmetry function parameters for {symbol} atom:")
underline = "---------------------------------------"
for char in range(len(symbol)):
underline += "-"
logger.info(underline)
logging.info(
"{:^5} {:^12} {:4.4} {}".format("#", "Symbol", "Type", "Parameters")
)
if symbol not in _symbols:
_symbols.append(symbol)
for i, v in enumerate(value):
type_ = v["type"]
eta = v["eta"]
if type_ == "G2":
symbol = v["symbol"]
params = "{:^5} {:12.2} {:^4.4} eta: {:.4f}".format(
i, symbol, type_, eta
)
else:
symbol = str(v["symbols"])[1:-1].replace("'", "")
gamma = v["gamma"]
zeta = v["zeta"]
params = (
"{:^5} {:12} {:^4.5} eta: {:.4f} "
"gamma: {:7.4f} zeta: {:.4f}".format(
i, symbol, type_, eta, gamma, zeta
)
)
logging.info(params)
[docs]def calculate_G2(
n_numbers,
neighborsymbols,
neighborpositions,
center_symbol,
eta,
cutoff,
cutofffxn,
Ri,
image_molecule=None,
n_indices=None,
normalized=True,
weighted=False,
):
"""Calculate G2 symmetry function.
These correspond to 2 body, or radial interactions.
Parameters
----------
n_symbols : list of int
List of neighbors' chemical numbers.
neighborsymbols : list of str
List of symbols of all neighbor atoms.
neighborpositions : list of list of floats
List of Cartesian atomic positions.
center_symbol : str
Chemical symbol of the center atom.
eta : float
Parameter of Gaussian symmetry functions.
cutoff : float
Cutoff radius.
cutofffxn : object
Cutoff function.
Ri : list
Position of the center atom. Should be fed as a list of three floats.
normalized : bool
Whether or not the symmetry function is normalized.
image_molecule : ase object, list
List of atoms in an image.
n_indices : list
List of indices of neighboring atoms from the image object.
weighted : bool
True if applying weighted feature of Gaussian function. See Ref. 2.
Returns
-------
feature : float
Radial feature.
"""
feature = 0.0
num_neighbors = len(neighborpositions)
# Are we normalizing the feature?
if normalized:
Rc = cutoff
else:
Rc = 1.0
for count in range(num_neighbors):
symbol = neighborsymbols[count]
Rj = neighborpositions[count]
if symbol == center_symbol:
Rij = np.linalg.norm(Rj - Ri)
feature += np.exp(-eta * (Rij ** 2.0) / (Rc ** 2.0)) * cutofffxn(Rij)
if weighted:
weighted_atom = image_molecule[n_indices[count]].number
feature *= weighted_atom
return feature
[docs]def calculate_G3(
n_numbers,
neighborsymbols,
neighborpositions,
G_elements,
gamma,
zeta,
eta,
cutoff,
cutofffxn,
Ri,
normalized=True,
image_molecule=None,
n_indices=None,
weighted=False,
):
"""Calculate G3 symmetry function.
These are 3 body or angular interactions.
Parameters
----------
n_symbols : list of int
List of neighbors' chemical numbers.
neighborsymbols : list of str
List of symbols of neighboring atoms.
neighborpositions : list of list of floats
List of Cartesian atomic positions of neighboring atoms.
G_elements : list of str
A list of two members, each member is the chemical species of one of
the neighboring atoms forming the triangle with the center atom.
gamma : float
Parameter of Gaussian symmetry functions.
zeta : float
Parameter of Gaussian symmetry functions.
eta : float
Parameter of Gaussian symmetry functions.
cutoff : float
Cutoff radius.
cutofffxn : object
Cutoff function.
Ri : list
Position of the center atom. Should be fed as a list of three floats.
normalized : bool
Whether or not the symmetry function is normalized.
image_molecule : ase object, list
List of atoms in an image.
n_indices : list
List of indices of neighboring atoms from the image object.
weighted : bool
True if applying weighted feature of Gaussian function. See Ref. 2.
Returns
-------
feature : float
G3 feature value.
"""
feature = 0.0
counts = range(len(neighborpositions))
# Are we normalizing the feature?
if normalized:
Rc = cutoff
else:
Rc = 1.0
for j in counts:
for k in counts[(j + 1) :]:
els = sorted([neighborsymbols[j], neighborsymbols[k]])
if els != G_elements:
continue
Rij_vector = neighborpositions[j] - Ri
Rij = np.linalg.norm(Rij_vector)
Rik_vector = neighborpositions[k] - Ri
Rik = np.linalg.norm(Rik_vector)
Rjk_vector = neighborpositions[k] - neighborpositions[j]
Rjk = np.linalg.norm(Rjk_vector)
cos_theta_ijk = np.dot(Rij_vector, Rik_vector) / Rij / Rik
term = (1.0 + gamma * cos_theta_ijk) ** zeta
term *= np.exp(-eta * (Rij ** 2.0 + Rik ** 2.0 + Rjk ** 2.0) / (Rc ** 2.0))
if weighted:
term *= weighted_h(image_molecule, n_indices)
term *= cutofffxn(Rij)
term *= cutofffxn(Rik)
term *= cutofffxn(Rjk)
feature += term
feature *= 2.0 ** (1.0 - zeta)
return feature
[docs]def calculate_G4(
n_numbers,
neighborsymbols,
neighborpositions,
G_elements,
gamma,
zeta,
eta,
cutoff,
cutofffxn,
Ri,
normalized=True,
image_molecule=None,
n_indices=None,
weighted=False,
):
"""Calculate G4 symmetry function.
These are 3 body or angular interactions.
Parameters
----------
n_symbols : list of int
List of neighbors' chemical numbers.
neighborsymbols : list of str
List of symbols of neighboring atoms.
neighborpositions : list of list of floats
List of Cartesian atomic positions of neighboring atoms.
G_elements : list of str
A list of two members, each member is the chemical species of one of
the neighboring atoms forming the triangle with the center atom.
gamma : float
Parameter of Gaussian symmetry functions.
zeta : float
Parameter of Gaussian symmetry functions.
eta : float
Parameter of Gaussian symmetry functions.
cutoff : float
Cutoff radius.
cutofffxn : object
Cutoff function.
Ri : list
Position of the center atom. Should be fed as a list of three floats.
normalized : bool
Whether or not the symmetry function is normalized.
image_molecule : ase object, list
List of atoms in an image.
n_indices : list
List of indices of neighboring atoms from the image object.
weighted : bool
True if applying weighted feature of Gaussian function. See Ref. 2.
Returns
-------
feature : float
G4 feature value.
Notes
-----
The difference between the calculate_G3 and the calculate_G4 function is
that calculate_G4 accounts for bond angles of 180 degrees.
"""
feature = 0.0
counts = range(len(neighborpositions))
# Are we normalizing the feature?
if normalized:
Rc = cutoff
else:
Rc = 1.0
for j in counts:
for k in counts[(j + 1) :]:
els = sorted([neighborsymbols[j], neighborsymbols[k]])
if els != G_elements:
continue
Rij_vector = neighborpositions[j] - Ri
Rij = np.linalg.norm(Rij_vector)
Rik_vector = neighborpositions[k] - Ri
Rik = np.linalg.norm(Rik_vector)
cos_theta_ijk = np.dot(Rij_vector, Rik_vector) / Rij / Rik
term = (1.0 + gamma * cos_theta_ijk) ** zeta
term *= np.exp(-eta * (Rij ** 2.0 + Rik ** 2.0) / (Rc ** 2.0))
if weighted:
term *= weighted_h(image_molecule, n_indices)
term *= cutofffxn(Rij)
term *= cutofffxn(Rik)
feature += term
feature *= 2.0 ** (1.0 - zeta)
return feature
[docs]def weighted_h(image_molecule, n_indices):
""" Calculate the atomic numbers of neighboring atoms for a molecule,
then multiplies each neighor atomic number by each other.
Parameters
----------
image_molecule : ase object, list
List of atoms in an image.
n_indices : list
List of indices of neighboring atoms from the image object.
"""
atomic_numbers = 1.0
for i in n_indices:
atomic_numbers *= image_molecule[i].number
return atomic_numbers